├── .gitattributes ├── .github └── workflows │ └── cargo.yml ├── .gitignore ├── CHANGELOG.md ├── Cargo.toml ├── LICENSE ├── README.md ├── examples ├── list_recent_versions.rs ├── sparse_http_reqwest.rs ├── sparse_http_ureq.rs ├── update_and_get_latest.rs └── update_and_get_most_recent_version.rs ├── rustfmt.toml ├── src ├── config.rs ├── dedupe.rs ├── dirs.rs ├── error.rs ├── git │ ├── changes.rs │ ├── config.rs │ ├── impl_.rs │ ├── mod.rs │ └── test.rs ├── lib.rs ├── names.rs ├── sparse.rs └── types.rs └── tests ├── crates_index.rs ├── fixtures ├── autocfg.txt ├── config.json ├── crates-index.txt └── sparse_registry_cache │ └── cargo_home │ └── registry │ └── index │ └── index.crates.io-6f17d22bba15001f │ └── .cache │ └── au │ └── to │ └── autocfg ├── git └── mod.rs ├── mem.rs ├── names └── mod.rs └── sparse_index └── mod.rs /.gitattributes: -------------------------------------------------------------------------------- 1 | *.txt text eol=lf 2 | -------------------------------------------------------------------------------- /.github/workflows/cargo.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | env: 4 | CARGO_REGISTRIES_CRATES_IO_PROTOCOL: sparse 5 | 6 | on: 7 | push: 8 | pull_request: 9 | 10 | jobs: 11 | test: 12 | name: Test 13 | strategy: 14 | matrix: 15 | os: 16 | - ubuntu-latest 17 | - macos-latest 18 | - windows-latest 19 | rust: 20 | - stable 21 | runs-on: ${{ matrix.os }} 22 | steps: 23 | - uses: actions/checkout@v2 24 | - uses: actions-rs/toolchain@v1 25 | with: 26 | profile: minimal 27 | toolchain: ${{ matrix.rust }} 28 | override: true 29 | - uses: Swatinem/rust-cache@v2 30 | - uses: actions-rs/cargo@v1 31 | with: 32 | command: test 33 | args: --features=git-performance,git-https --release 34 | - uses: actions-rs/cargo@v1 35 | with: 36 | command: check 37 | args: --all-targets --no-default-features 38 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | Cargo.lock 3 | _test 4 | 5 | # May be created during tests, but should not be present 6 | tests/fixtures/sparse_registry_cache/cargo_home/registry/index/index.crates.io-6f17d22bba15001f/.cache/cr/at/crates-index 7 | /tests/fixtures/git-registry/ 8 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "crates-index" 3 | description = "Library for retrieving and interacting with the crates.io index" 4 | version = "3.10.0" 5 | homepage = "https://crates.io/crates/crates-index" 6 | authors = ["Corey Farwell ", "Kornel "] 7 | keywords = ["packaging", "index", "dependencies", "crate", "meta"] 8 | categories = ["development-tools", "database"] 9 | repository = "https://github.com/frewsxcv/rust-crates-index" 10 | license = "Apache-2.0" 11 | documentation = "https://docs.rs/crates-index/" 12 | edition = "2021" 13 | rust-version = "1.70" 14 | include = ["src/**/*", "examples/*.rs", "LICENSE", "README.md", "CHANGELOG.md"] 15 | 16 | [[example]] 17 | name = "sparse_http_reqwest" 18 | required-features = ["sparse"] 19 | 20 | [[example]] 21 | name = "sparse_http_ureq" 22 | required-features = ["sparse"] 23 | 24 | [[example]] 25 | name = "list_recent_versions" 26 | required-features = ["sparse"] 27 | 28 | [[example]] 29 | name = "update_and_get_latest" 30 | required-features = ["git-https"] 31 | 32 | [[example]] 33 | name = "update_and_get_most_recent_version" 34 | required-features = ["git-https"] 35 | 36 | [dependencies] 37 | gix = { version = "0.72.1", default-features = false, features = [ 38 | "max-performance-safe", 39 | "blocking-network-client", 40 | "revision", 41 | ], optional = true } 42 | hex = { version = "0.4.3", features = ["serde"] } 43 | home = "0.5.4" 44 | http = { version = "1", optional = true } 45 | memchr = "2.5.0" 46 | rayon = { version = "1.7.0", optional = true } 47 | rustc-hash = "2.0.0" 48 | rustc-stable-hash = "0.1.1" 49 | semver = "1.0.17" 50 | serde = { version = "1.0.160", features = ["rc"] } 51 | serde_derive = "1.0.160" 52 | serde_json = "1.0.96" 53 | smol_str = { version = "0.3.2", features = ["serde"] } 54 | thiserror = "2.0.0" 55 | toml = "0.8.0" 56 | 57 | document-features = { version = "0.2.0", optional = true } 58 | 59 | [dev-dependencies] 60 | bytesize = "1.2.0" 61 | cap = { version = "0.1.2", features = ["stats"] } 62 | is_ci = "1.1.1" 63 | tempfile = "3.5.0" 64 | ureq = "3.0" 65 | reqwest = { version = "0.12", features = ["blocking", "gzip"] } 66 | serial_test = "3.1.1" 67 | parking_lot = "0.12.1" 68 | 69 | [package.metadata.docs.rs] 70 | targets = ["x86_64-unknown-linux-gnu"] 71 | features = ["sparse", "git", "git-https", "parallel", "document-features"] 72 | rustdoc-args = ["--cfg", "docsrs"] 73 | 74 | 75 | [features] 76 | default = ["sparse"] 77 | ## Activate support for accessing git-powered cargo indices. These have been superseded by the sparse-index, see the `sparse` feature. 78 | git = ["dep:gix"] 79 | ## Activate support for git-powered cargo indices, but configure the underlying `gix` crate to use higher-performance sub-crates at the expense of platform compatibility. 80 | ## 81 | ## If it compiles for you, this should be preferred over `git`. 82 | git-performance = ["git", "gix?/max-performance"] 83 | ## Add support for https based URLs in git indices. 84 | ## 85 | ## Most will want this enabled unless they know to only have to access non-https URLs for their indices. 86 | git-https = ["git", "gix?/blocking-http-transport-curl"] 87 | ## Like `git-https` but using a reqwest backend instead of curl 88 | git-https-reqwest = ["git", "gix?/blocking-http-transport-reqwest"] 89 | ## Allow some functions to receive `rayon`-powered siblings for higher performance. 90 | parallel = ["dep:rayon"] 91 | ## Add support for communicating with sparse indices. 92 | sparse = ["dep:http"] 93 | 94 | [badges] 95 | maintenance = { status = "passively-maintained" } 96 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # crates-index 2 | 3 | [![crates-index on Crates.io](https://img.shields.io/crates/v/crates-index.svg)](https://crates.io/crates/crates-index) 4 | 5 | Library for retrieving and interacting with the [crates.io registry index](https://doc.rust-lang.org/cargo/reference/registry-index.html) using either the `git` or `sparse` protocol. 6 | 7 | The index contains metadata for all Rust libraries and programs published on crates.io: their versions, dependencies, and feature flags. 8 | 9 | [Documentation](https://docs.rs/crates-index/) 10 | 11 | ## Example 12 | 13 | ```rust 14 | let index = crates_index::GitIndex::new_cargo_default()?; 15 | 16 | for crate_releases in index.crates() { 17 | let _ = crate_releases.most_recent_version(); // newest version 18 | let crate_version = crate_releases.highest_version(); // max version by semver 19 | println!("crate name: {}", crate_version.name()); 20 | println!("crate version: {}", crate_version.version()); 21 | } 22 | ``` 23 | 24 | ## Changelog 25 | 26 | Please find the changelog in [CHANGELOG.md](https://github.com/frewsxcv/rust-crates-index/blob/master/CHANGELOG.md). 27 | 28 | ## Similar crates 29 | 30 | - [`tame-index`](https://github.com/EmbarkStudios/tame-index) - a hard fork with many improvements and advantages 31 | - [`crates_io_api`](https://github.com/theduke/crates_io_api) - a way to talk to the HTTP API of crates.io 32 | 33 | ## License 34 | 35 | Licensed under version 2 of the Apache License 36 | -------------------------------------------------------------------------------- /examples/list_recent_versions.rs: -------------------------------------------------------------------------------- 1 | //! Print the 5 most recent versions of a give crate name. 2 | //! 3 | //! It first checks all possible [names](Names) using the local cache and on failure 4 | //! updates the cache by fetching the most likely [names](Names) from the sparse registry. 5 | 6 | use crates_index::{Crate, Names, SparseIndex}; 7 | use std::error::Error; 8 | 9 | fn main() -> Result<(), Box> { 10 | let sparse_index = SparseIndex::new_cargo_default()?; 11 | let mut count = 0; 12 | let mut missing = Vec::new(); 13 | for name in std::env::args().skip(1) { 14 | count += 1; 15 | let krate = match find_in_cache(&name, &sparse_index)? { 16 | Some(krate) => krate, 17 | None => match fetch_crate(&name, &sparse_index)? { 18 | Some(krate) => krate, 19 | None => { 20 | eprintln!("{name} not found"); 21 | missing.push(name); 22 | continue; 23 | } 24 | }, 25 | }; 26 | 27 | print_crate(krate); 28 | } 29 | 30 | if count == 0 { 31 | Err("Please provide one or more crate names to lookup".into()) 32 | } else if !missing.is_empty() { 33 | Err(format!("The following crates could not be found: {}", missing.join(", ")).into()) 34 | } else { 35 | Ok(()) 36 | } 37 | } 38 | 39 | /// Loop though all possible permutations of `name` and return the crate if found. 40 | /// This is feasible as local lookups are fast. 41 | /// 42 | /// Read more about [name permutations](Names). 43 | fn find_in_cache(name: &str, sparse_index: &SparseIndex) -> Result, Box> { 44 | for name in names(name)? { 45 | eprintln!("checking for '{}' locally", name); 46 | 47 | if let Ok(krate) = sparse_index.crate_from_cache(&name) { 48 | return Ok(Some(krate)); 49 | } 50 | } 51 | Ok(None) 52 | } 53 | 54 | /// Fetch the first 3 permutations of `name` from the sparse registry and return the crate if found. 55 | /// 56 | /// Here we only use the first 3 names which are most likely to be correct 57 | /// and skip the rest to minimize the performance hit if a crate does not exist. 58 | /// 59 | /// Read more about [name permutations](Names). 60 | fn fetch_crate(name: &str, sparse_index: &SparseIndex) -> Result, Box> { 61 | for name in names(name)? { 62 | eprintln!("fetching for '{}'", name); 63 | 64 | if let Some(krate) = update_cache(&name, sparse_index)? { 65 | return Ok(Some(krate)); 66 | } 67 | } 68 | Ok(None) 69 | } 70 | 71 | fn names(name: &str) -> Result, Box> { 72 | Ok(Names::new(name) 73 | .ok_or_else(|| "Too many hyphens in crate name")? 74 | .take(3)) 75 | } 76 | 77 | /// Create a request to the sparse `index` and parse the response with the side-effect of yielding 78 | /// the desired crate and updating the local cache. 79 | fn update_cache(name: &str, index: &SparseIndex) -> Result, Box> { 80 | let request = index 81 | .make_cache_request(name)? 82 | .version(ureq::http::Version::HTTP_11) 83 | .body(())?; 84 | 85 | let response = ureq::run(request)?; 86 | 87 | let (parts, mut body) = response.into_parts(); 88 | let response = http::Response::from_parts(parts, body.read_to_vec()?); 89 | Ok(index.parse_cache_response(name, response, true)?) 90 | } 91 | 92 | fn print_crate(krate: Crate) { 93 | const MAX_VERSIONS: usize = 5; 94 | println!("{}", krate.name()); 95 | 96 | let versions = krate 97 | .versions() 98 | .iter() 99 | .rev() 100 | .take(5) 101 | .map(|version| version.version()) 102 | .collect::>(); 103 | 104 | print!("versions: {}", versions.join(", ")); 105 | if krate.versions().len() > MAX_VERSIONS { 106 | println!(", [{} more skipped]", krate.versions().len() - MAX_VERSIONS) 107 | } else { 108 | println!() 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /examples/sparse_http_reqwest.rs: -------------------------------------------------------------------------------- 1 | use crates_index::SparseIndex; 2 | 3 | /// 4 | /// **important**:
5 | /// dont forget to enable the **["blocking", "gzip"]** feature of **reqwest** 6 | /// 7 | /// command to run:
8 | /// cargo run --example sparse_http_reqwest -F sparse 9 | /// 10 | 11 | const CRATE_TO_FETCH: &str = "names"; 12 | 13 | fn main() { 14 | let mut index = SparseIndex::new_cargo_default().unwrap(); 15 | 16 | print_crate(&mut index); 17 | update(&mut index); 18 | print_crate(&mut index); 19 | } 20 | 21 | fn print_crate(index: &mut SparseIndex) { 22 | match index.crate_from_cache(CRATE_TO_FETCH) { 23 | Ok(krate) => { 24 | println!("{:?}", krate.highest_normal_version().unwrap().version()); 25 | } 26 | Err(_err) => { 27 | println!("could not find crate {}", CRATE_TO_FETCH) 28 | } 29 | } 30 | } 31 | 32 | fn update(index: &mut SparseIndex) { 33 | let req = index.make_cache_request(CRATE_TO_FETCH).unwrap().body(()).unwrap(); 34 | 35 | let (parts, _) = req.into_parts(); 36 | let req = http::Request::from_parts(parts, vec![]); 37 | 38 | let req: reqwest::blocking::Request = req.try_into().unwrap(); 39 | 40 | let client = reqwest::blocking::ClientBuilder::new().gzip(true).build().unwrap(); 41 | 42 | let res = client.execute(req).unwrap(); 43 | 44 | let mut builder = http::Response::builder().status(res.status()).version(res.version()); 45 | 46 | builder 47 | .headers_mut() 48 | .unwrap() 49 | .extend(res.headers().iter().map(|(k, v)| (k.clone(), v.clone()))); 50 | 51 | let body = res.bytes().unwrap(); 52 | let res = builder.body(body.to_vec()).unwrap(); 53 | 54 | index.parse_cache_response(CRATE_TO_FETCH, res, true).unwrap(); 55 | } 56 | -------------------------------------------------------------------------------- /examples/sparse_http_ureq.rs: -------------------------------------------------------------------------------- 1 | use crates_index::SparseIndex; 2 | 3 | /// 4 | /// command to run:
5 | /// cargo run --example sparse_http_ureq -F sparse 6 | /// 7 | 8 | const CRATE_TO_FETCH: &str = "inferno"; 9 | 10 | fn main() { 11 | let mut index = SparseIndex::new_cargo_default().unwrap(); 12 | 13 | print_crate(&mut index); 14 | update(&mut index); 15 | print_crate(&mut index); 16 | } 17 | 18 | fn print_crate(index: &mut SparseIndex) { 19 | match index.crate_from_cache(CRATE_TO_FETCH) { 20 | Ok(krate) => { 21 | println!("{:?}", krate.highest_normal_version().unwrap().version()); 22 | } 23 | Err(_err) => { 24 | println!("could not find crate {}", CRATE_TO_FETCH) 25 | } 26 | } 27 | } 28 | 29 | fn update(index: &mut SparseIndex) { 30 | // ureq doesn't support HTTP/2, so we have to set the version to HTTP/1.1 31 | let request = index 32 | .make_cache_request(CRATE_TO_FETCH) 33 | .unwrap() 34 | .version(ureq::http::Version::HTTP_11) 35 | .body(()) 36 | .unwrap(); 37 | 38 | let response = ureq::run(request).unwrap(); 39 | 40 | let (parts, mut body) = response.into_parts(); 41 | let response = http::Response::from_parts(parts, body.read_to_vec().unwrap()); 42 | index.parse_cache_response(CRATE_TO_FETCH, response, true).unwrap(); 43 | } 44 | -------------------------------------------------------------------------------- /examples/update_and_get_latest.rs: -------------------------------------------------------------------------------- 1 | //! Updates the local git registry and extracts the latest most recent changes. 2 | fn main() -> Result<(), Box> { 3 | let mut index = crates_index::GitIndex::new_cargo_default()?; 4 | eprintln!("Updating index…"); 5 | index.update()?; 6 | 7 | let limit = 10; 8 | eprintln!("The most recent {limit} changes:\n"); 9 | for change in index.changes()?.take(limit) { 10 | let change = change?; 11 | println!( 12 | "{name} changed in {commit}", 13 | name = change.crate_name(), 14 | commit = change.commit_hex() 15 | ); 16 | } 17 | Ok(()) 18 | } 19 | -------------------------------------------------------------------------------- /examples/update_and_get_most_recent_version.rs: -------------------------------------------------------------------------------- 1 | //! Updates the local git registry and extracts the latest most recent changes. 2 | fn main() -> Result<(), Box> { 3 | let crate_name = std::env::args() 4 | .nth(1) 5 | .ok_or("The first argument must be the name of the crate to get the most recent version of")?; 6 | let mut index = crates_index::GitIndex::new_cargo_default()?; 7 | eprintln!("Updating index…"); 8 | index.update()?; 9 | 10 | let krate = index 11 | .crate_(&crate_name) 12 | .ok_or_else(|| format!("Crate named '{crate_name}' does not exist in git index"))?; 13 | println!("most recent : {}", krate.most_recent_version().version()); 14 | println!( 15 | "highest normal: {:?}", 16 | krate.highest_normal_version().map(|v| v.version()) 17 | ); 18 | println!("highest : {}", krate.highest_version().version()); 19 | println!("earliest : {}", krate.earliest_version().version()); 20 | Ok(()) 21 | } 22 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | max_width = 120 2 | disable_all_formatting = false 3 | -------------------------------------------------------------------------------- /src/config.rs: -------------------------------------------------------------------------------- 1 | use crate::dirs::crate_prefix; 2 | use serde_derive::Deserialize; 3 | 4 | /// Global configuration of an index, reflecting the [contents of config.json](https://doc.rust-lang.org/cargo/reference/registries.html#index-format). 5 | #[derive(Clone, Debug, Deserialize)] 6 | pub struct IndexConfig { 7 | /// Pattern for creating download URLs. Use [`IndexConfig::download_url`] instead. 8 | pub dl: String, 9 | /// Base URL for publishing, etc. 10 | pub api: Option, 11 | } 12 | 13 | impl IndexConfig { 14 | /// Get the URL from where the specified package can be downloaded. 15 | /// This method assumes the particular version is present in the registry, 16 | /// and does not verify that it is. 17 | #[must_use] 18 | pub fn download_url(&self, name: &str, version: &str) -> Option { 19 | if !self.dl.contains("{crate}") 20 | && !self.dl.contains("{version}") 21 | && !self.dl.contains("{prefix}") 22 | && !self.dl.contains("{lowerprefix}") 23 | { 24 | let mut new = String::with_capacity(self.dl.len() + name.len() + version.len() + 10); 25 | new.push_str(&self.dl); 26 | new.push('/'); 27 | new.push_str(name); 28 | new.push('/'); 29 | new.push_str(version); 30 | new.push_str("/download"); 31 | Some(new) 32 | } else { 33 | let mut prefix = String::with_capacity(5); 34 | crate_prefix(&mut prefix, name, '/')?; 35 | Some( 36 | self.dl 37 | .replace("{crate}", name) 38 | .replace("{version}", version) 39 | .replace("{prefix}", &prefix) 40 | .replace("{lowerprefix}", &prefix.to_ascii_lowercase()), 41 | ) 42 | } 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/dedupe.rs: -------------------------------------------------------------------------------- 1 | use crate::Dependency; 2 | use rustc_hash::FxHashSet; 3 | use std::collections::HashMap; 4 | use std::hash::Hash; 5 | use std::hash::Hasher; 6 | use std::sync::Arc; 7 | 8 | /// Many crates (their versions) have the same features and dependencies 9 | pub(crate) struct DedupeContext { 10 | features: FxHashSet>>, 11 | deps: FxHashSet>, 12 | } 13 | 14 | impl DedupeContext { 15 | #[inline] 16 | pub(crate) fn new() -> Self { 17 | Self { 18 | deps: FxHashSet::default(), 19 | features: FxHashSet::default(), 20 | } 21 | } 22 | 23 | pub(crate) fn features(&mut self, features: &mut Arc>>) { 24 | let features_to_dedupe = HashableHashMap::new(Arc::clone(features)); 25 | if let Some(has_feats) = self.features.get(&features_to_dedupe) { 26 | *features = Arc::clone(&has_feats.map); 27 | } else { 28 | if self.features.len() > 16384 { 29 | // keeps peak memory low (must clear, remove is leaving tombstones) 30 | self.features.clear(); 31 | } 32 | self.features.insert(features_to_dedupe); 33 | } 34 | } 35 | 36 | pub(crate) fn deps(&mut self, deps: &mut Arc<[Dependency]>) { 37 | if let Some(has_deps) = self.deps.get(&*deps) { 38 | *deps = Arc::clone(has_deps); 39 | } else { 40 | if self.deps.len() > 16384 { 41 | // keeps peak memory low (must clear, remove is leaving tombstones) 42 | self.deps.clear(); 43 | } 44 | self.deps.insert(Arc::clone(deps)); 45 | } 46 | } 47 | } 48 | 49 | /// Newtype that caches hash of the hashmap (the default hashmap has a random order of the keys, so it's not cheap to hash) 50 | #[derive(PartialEq, Eq)] 51 | pub struct HashableHashMap { 52 | pub map: Arc>, 53 | hash: u64, 54 | } 55 | 56 | impl Hash for HashableHashMap { 57 | fn hash(&self, hasher: &mut H) 58 | where 59 | H: Hasher, 60 | { 61 | hasher.write_u64(self.hash); 62 | } 63 | } 64 | 65 | impl HashableHashMap { 66 | pub(crate) fn new(map: Arc>) -> Self { 67 | let mut hash = 0; 68 | for (k, v) in map.iter() { 69 | let mut hasher = rustc_hash::FxHasher::default(); 70 | k.hash(&mut hasher); 71 | v.hash(&mut hasher); 72 | hash ^= hasher.finish(); // XOR makes it order-independent 73 | } 74 | Self { map, hash } 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /src/dirs.rs: -------------------------------------------------------------------------------- 1 | use crate::Error; 2 | 3 | /// Get the disk location of the specified `url`, as well as its canonical form, 4 | /// exactly as cargo would. 5 | /// 6 | /// `cargo_home` is used to root the directory at specific location, if not 7 | /// specified `CARGO_HOME` or else the default cargo location is used as the root. 8 | pub fn local_path_and_canonical_url( 9 | url: &str, 10 | cargo_home: Option<&std::path::Path>, 11 | ) -> Result<(std::path::PathBuf, String), Error> { 12 | local_path_and_canonical_url_with_hash_kind(url, cargo_home, &DEFAULT_HASHER_KIND) 13 | } 14 | 15 | /// Like [`local_path_and_canonical_url`] but accepts [`HashKind`] for determining the crate index path. 16 | pub fn local_path_and_canonical_url_with_hash_kind( 17 | url: &str, 18 | cargo_home: Option<&std::path::Path>, 19 | hash_kind: &HashKind, 20 | ) -> Result<(std::path::PathBuf, String), Error> { 21 | let (dir_name, canonical_url) = url_to_local_dir(url, hash_kind)?; 22 | 23 | let mut path = match cargo_home { 24 | Some(path) => path.to_owned(), 25 | None => home::cargo_home()?, 26 | }; 27 | 28 | path.push("registry"); 29 | path.push("index"); 30 | path.push(dir_name); 31 | 32 | Ok((path, canonical_url)) 33 | } 34 | 35 | pub(crate) fn crate_prefix(accumulator: &mut String, crate_name: &str, separator: char) -> Option<()> { 36 | match crate_name.len() { 37 | 0 => return None, 38 | 1 => accumulator.push('1'), 39 | 2 => accumulator.push('2'), 40 | 3 => { 41 | accumulator.push('3'); 42 | accumulator.push(separator); 43 | accumulator.extend( 44 | crate_name 45 | .as_bytes() 46 | .get(0..1)? 47 | .iter() 48 | .map(|c| c.to_ascii_lowercase() as char), 49 | ); 50 | } 51 | _ => { 52 | accumulator.extend( 53 | crate_name 54 | .as_bytes() 55 | .get(0..2)? 56 | .iter() 57 | .map(|c| c.to_ascii_lowercase() as char), 58 | ); 59 | accumulator.push(separator); 60 | accumulator.extend( 61 | crate_name 62 | .as_bytes() 63 | .get(2..4)? 64 | .iter() 65 | .map(|c| c.to_ascii_lowercase() as char), 66 | ); 67 | } 68 | }; 69 | Some(()) 70 | } 71 | 72 | pub(crate) fn crate_name_to_relative_path(crate_name: &str, separator: Option) -> Option { 73 | let separator = separator.unwrap_or(std::path::MAIN_SEPARATOR); 74 | let mut rel_path = String::with_capacity(crate_name.len() + 6); 75 | crate_prefix(&mut rel_path, crate_name, separator)?; 76 | rel_path.push(separator); 77 | rel_path.extend(crate_name.as_bytes().iter().map(|c| c.to_ascii_lowercase() as char)); 78 | 79 | Some(rel_path) 80 | } 81 | 82 | /// Matches https://github.com/rust-lang/cargo/blob/2928e32734b04925ee51e1ae88bea9a83d2fd451/crates/cargo-util-schemas/src/core/source_kind.rs#L5 83 | type SourceKind = u64; 84 | const SOURCE_KIND_REGISTRY: SourceKind = 2; 85 | const SOURCE_KIND_SPASE_REGISTRY: SourceKind = 3; 86 | 87 | /// Determine the crate registry hashing strategy for locating local crate indexes. 88 | #[derive(Debug)] 89 | pub enum HashKind { 90 | /// Use the new hashing behavior introduced in Rust `1.85.0`. 91 | Stable, 92 | 93 | /// Use a hashing strategy that matches Cargo versions less than `1.85.0` 94 | Legacy, 95 | } 96 | 97 | // For now, this acts as a centralized place to change the default. Ideally 98 | // this would be compiled conditionally based on the version of rustc as 99 | // a nice approximation of when consumers will be using the associated hash 100 | // implementation but this behavior is not yet stable: https://github.com/rust-lang/rust/issues/64796 101 | pub(crate) const DEFAULT_HASHER_KIND: HashKind = HashKind::Legacy; 102 | 103 | /// Converts a full url, eg https://github.com/rust-lang/crates.io-index, into 104 | /// the root directory name where cargo itself will fetch it on disk 105 | fn url_to_local_dir(url: &str, hash_kind: &HashKind) -> Result<(String, String), Error> { 106 | #[allow(deprecated)] 107 | fn legacy_hash_u64(url: &str, registry_kind: u64) -> u64 { 108 | use std::hash::{Hash, Hasher, SipHasher}; 109 | 110 | let mut hasher = SipHasher::new_with_keys(0, 0); 111 | // Registry 112 | registry_kind.hash(&mut hasher); 113 | // Url 114 | url.hash(&mut hasher); 115 | hasher.finish() 116 | } 117 | 118 | // Matches https://github.com/rust-lang/cargo/blob/2928e32734b04925ee51e1ae88bea9a83d2fd451/src/cargo/util/hasher.rs#L6 119 | fn stable_hash_u64(url: &str, registry_kind: u64) -> u64 { 120 | use rustc_stable_hash::StableSipHasher128 as StableHasher; 121 | use std::hash::{Hash, Hasher}; 122 | 123 | let mut hasher = StableHasher::new(); 124 | 125 | // Type has an impact in the `rustc_stable_hasher`. 126 | (registry_kind as isize).hash(&mut hasher); 127 | 128 | url.hash(&mut hasher); 129 | 130 | Hasher::finish(&hasher) 131 | } 132 | 133 | fn has_path_past_base(url: &str) -> bool { 134 | if let Some(protocol_end) = url.find("://") { 135 | // skip past protocol 136 | let base_url_end = protocol_end + 3; 137 | let rest_of_url = &url[base_url_end..]; 138 | 139 | // Check if there's any path or meaningful content after the domain (ignoring any trailing slashes) 140 | return rest_of_url.trim_end_matches('/').contains('/'); 141 | } 142 | false 143 | } 144 | 145 | // Matches https://github.com/rust-lang/cargo/blob/2928e32734b04925ee51e1ae88bea9a83d2fd451/src/cargo/util/hex.rs#L6 146 | fn to_hex(num: u64) -> String { 147 | hex::encode(num.to_le_bytes()) 148 | } 149 | 150 | let hash_u64 = match hash_kind { 151 | HashKind::Stable => stable_hash_u64, 152 | HashKind::Legacy => legacy_hash_u64, 153 | }; 154 | 155 | let mut registry_kind = SOURCE_KIND_REGISTRY; 156 | 157 | // Ensure we have a registry or bare url 158 | let (mut url, scheme_ind) = { 159 | let scheme_ind = url 160 | .find("://") 161 | .ok_or_else(|| Error::Url(format!("'{url}' is not a valid url")))?; 162 | let scheme_str = &url[..scheme_ind]; 163 | if scheme_str.starts_with("sparse+http") { 164 | registry_kind = SOURCE_KIND_SPASE_REGISTRY; 165 | (url, scheme_ind) 166 | } else if let Some(ind) = scheme_str.find('+') { 167 | if &scheme_str[..ind] != "registry" { 168 | return Err(Error::Url(format!("'{url}' is not a valid registry url"))); 169 | } 170 | 171 | (&url[ind + 1..], scheme_ind - ind - 1) 172 | } else { 173 | (url, scheme_ind) 174 | } 175 | }; 176 | 177 | // Could use the Url crate for this, but it's simple enough and we don't 178 | // need to deal with every possible url (I hope...) 179 | let host = match url[scheme_ind + 3..].find('/') { 180 | Some(end) => &url[scheme_ind + 3..scheme_ind + 3 + end], 181 | None => &url[scheme_ind + 3..], 182 | }; 183 | 184 | // if a custom url ends with a slash it messes up the 185 | // hash. But if we remove it from just a base url such as 186 | // https://index.crates.io/ it messes it up 187 | // as well. So we strip if it has a path 188 | // past the base url 189 | if has_path_past_base(url) { 190 | if let Some(stripped_url) = url.strip_suffix('/') { 191 | url = stripped_url; 192 | } 193 | } 194 | 195 | // trim port 196 | let host = host.split(':').next().unwrap(); 197 | 198 | let (ident, url) = if registry_kind == SOURCE_KIND_REGISTRY { 199 | // cargo special cases github.com for reasons, so do the same 200 | let mut canonical = if host == "github.com" { 201 | url.to_lowercase() 202 | } else { 203 | url.to_owned() 204 | }; 205 | 206 | let ident = match hash_kind { 207 | HashKind::Stable => { 208 | // Locate the the first instance of params/fragments. 209 | let mut params_index = { 210 | let question = canonical.find('?'); 211 | let hash = canonical.rfind('#'); 212 | 213 | question.zip(hash).map(|(q, h)| q.min(h)).or(question).or(hash) 214 | }; 215 | 216 | // Attempt to trim `.git` from the end of url paths. 217 | canonical = if let Some(idx) = params_index { 218 | let base_url = &canonical[..idx]; 219 | let params = &canonical[idx..]; 220 | 221 | if let Some(sanitized) = base_url.strip_suffix(".git") { 222 | params_index = Some(idx - 4); 223 | format!("{}{}", sanitized, params) 224 | } else { 225 | canonical 226 | } 227 | } else { 228 | if canonical.ends_with(".git") { 229 | canonical.truncate(canonical.len() - 4); 230 | } 231 | canonical 232 | }; 233 | 234 | let ident = to_hex(hash_u64(&canonical, registry_kind)); 235 | 236 | // Strip params 237 | if let Some(idx) = params_index { 238 | canonical.truncate(canonical.len() - (canonical.len() - idx)); 239 | } 240 | 241 | ident 242 | } 243 | HashKind::Legacy => { 244 | // Chop off any query params/fragments 245 | if let Some(hash) = canonical.rfind('#') { 246 | canonical.truncate(hash); 247 | } 248 | 249 | if let Some(query) = canonical.rfind('?') { 250 | canonical.truncate(query); 251 | } 252 | 253 | if canonical.ends_with('/') { 254 | canonical.pop(); 255 | } 256 | 257 | let ident = to_hex(hash_u64(&canonical, registry_kind)); 258 | 259 | // Only GitHub (crates.io) repositories have their .git suffix truncated 260 | if canonical.contains("github.com/") && canonical.ends_with(".git") { 261 | canonical.truncate(canonical.len() - 4); 262 | } 263 | 264 | ident 265 | } 266 | }; 267 | 268 | (ident, canonical) 269 | } else { 270 | (to_hex(hash_u64(url, registry_kind)), url.to_owned()) 271 | }; 272 | Ok((format!("{host}-{ident}"), url)) 273 | } 274 | 275 | #[cfg(test)] 276 | mod test { 277 | use crate::dirs::HashKind; 278 | 279 | #[test] 280 | fn http_index_url_matches_cargo() { 281 | use crate::sparse::URL; 282 | assert_eq!( 283 | super::url_to_local_dir(URL, &HashKind::Legacy).unwrap(), 284 | ("index.crates.io-6f17d22bba15001f".to_owned(), URL.to_owned(),) 285 | ); 286 | assert_eq!( 287 | super::url_to_local_dir(URL, &HashKind::Stable).unwrap(), 288 | ("index.crates.io-1949cf8c6b5b557f".to_owned(), URL.to_owned(),) 289 | ); 290 | 291 | // I've confirmed this also works with a custom registry, unfortunately 292 | // that one includes a secret key as part of the url which would allow 293 | // anyone to publish to the registry, so uhh...here's a fake one instead 294 | assert_eq!( 295 | super::url_to_local_dir( 296 | "https://dl.cloudsmith.io/aBcW1234aBcW1234/embark/rust/cargo/index.git", 297 | &HashKind::Legacy 298 | ) 299 | .unwrap(), 300 | ( 301 | "dl.cloudsmith.io-ff79e51ddd2b38fd".to_owned(), 302 | "https://dl.cloudsmith.io/aBcW1234aBcW1234/embark/rust/cargo/index.git".to_owned() 303 | ) 304 | ); 305 | assert_eq!( 306 | super::url_to_local_dir( 307 | "https://dl.cloudsmith.io/aBcW1234aBcW1234/embark/rust/cargo/index.git", 308 | &HashKind::Stable 309 | ) 310 | .unwrap(), 311 | ( 312 | "dl.cloudsmith.io-5e6de3fada793d05".to_owned(), 313 | "https://dl.cloudsmith.io/aBcW1234aBcW1234/embark/rust/cargo/index".to_owned() 314 | ) 315 | ); 316 | } 317 | 318 | #[test] 319 | fn http_index_url_matches_index_slash() { 320 | assert_eq!( 321 | super::url_to_local_dir( 322 | "https://dl.cloudsmith.io/aBcW1234aBcW1234/embark/rust/cargo/index/", 323 | &HashKind::Stable 324 | ) 325 | .unwrap(), 326 | ( 327 | "dl.cloudsmith.io-5e6de3fada793d05".to_owned(), 328 | "https://dl.cloudsmith.io/aBcW1234aBcW1234/embark/rust/cargo/index".to_owned() 329 | ) 330 | ); 331 | } 332 | 333 | #[test] 334 | #[cfg(feature = "git")] 335 | fn git_url_matches_cargo() { 336 | use crate::git::URL; 337 | assert_eq!( 338 | crate::dirs::url_to_local_dir(URL, &HashKind::Legacy).unwrap(), 339 | ("github.com-1ecc6299db9ec823".to_owned(), URL.to_owned()) 340 | ); 341 | assert_eq!( 342 | crate::dirs::url_to_local_dir(URL, &HashKind::Stable).unwrap(), 343 | ("github.com-25cdd57fae9f0462".to_owned(), URL.to_owned()) 344 | ); 345 | 346 | // Ensure we actually strip off the irrelevant parts of a url, note that 347 | // the .git suffix is not part of the canonical url, but *is* used when hashing 348 | assert_eq!( 349 | crate::dirs::url_to_local_dir(&format!("registry+{}.git?one=1&two=2#fragment", URL), &HashKind::Legacy) 350 | .unwrap(), 351 | ("github.com-c786010fb7ef2e6e".to_owned(), URL.to_owned()) 352 | ); 353 | assert_eq!( 354 | crate::dirs::url_to_local_dir(&format!("registry+{}.git?one=1&two=2#fragment", URL), &HashKind::Stable) 355 | .unwrap(), 356 | ("github.com-e78ed0bbfe5f35d7".to_owned(), URL.to_owned()) 357 | ); 358 | } 359 | } 360 | -------------------------------------------------------------------------------- /src/error.rs: -------------------------------------------------------------------------------- 1 | pub use serde_json::Error as SerdeJsonError; 2 | use std::io; 3 | use std::path::PathBuf; 4 | pub use toml::de::Error as TomlDeError; 5 | 6 | /// The catch-all error for the entire crate. 7 | #[derive(Debug, thiserror::Error)] 8 | #[allow(missing_docs)] 9 | pub enum Error { 10 | #[error("\"gix\" crate failed. If problems persist, consider deleting `~/.cargo/registry/index/github.com-1ecc6299db9ec823/`")] 11 | #[cfg(feature = "git")] 12 | Git(#[from] GixError), 13 | #[error("{0}")] 14 | Url(String), 15 | #[error("Could not obtain the most recent head commit in repo at {}. Tried {}, had {} available", repo_path.display(), refs_tried.join(", "), refs_available.join(", "))] 16 | MissingHead { 17 | /// The references we tried to get commits for. 18 | refs_tried: &'static [&'static str], 19 | /// The references that were actually present in the repository. 20 | refs_available: Vec, 21 | /// The path of the repository we tried 22 | repo_path: PathBuf, 23 | }, 24 | #[error(transparent)] 25 | Io(#[from] io::Error), 26 | #[error("If this happens, the registry is seriously corrupted. Consider deleting `~/.cargo/registry/index/`")] 27 | Json(#[from] SerdeJsonError), 28 | #[error(transparent)] 29 | Toml(#[from] TomlDeError), 30 | } 31 | 32 | /// Any error produced by `gix` or the `gix-*` family of crates. 33 | #[derive(Debug, thiserror::Error)] 34 | #[allow(missing_docs)] 35 | #[cfg(feature = "git")] 36 | pub enum GixError { 37 | #[error(transparent)] 38 | CreateInMemoryRemote(#[from] gix::remote::init::Error), 39 | #[error(transparent)] 40 | HeadCommit(#[from] gix::reference::head_commit::Error), 41 | #[error(transparent)] 42 | TreeOfCommit(#[from] gix::object::commit::Error), 43 | #[error(transparent)] 44 | DecodeObject(#[from] gix::objs::decode::Error), 45 | #[error(transparent)] 46 | FindExistingObject(#[from] gix::object::find::existing::Error), 47 | #[error(transparent)] 48 | FindObject(#[from] gix::object::find::Error), 49 | #[error(transparent)] 50 | IntoObjectKind(#[from] gix::object::try_into::Error), 51 | #[error("The '{}' file is missing at the root of the tree of the crates index", path.display())] 52 | PathMissing { path: std::path::PathBuf }, 53 | #[error(transparent)] 54 | #[deprecated(note = "This variant can't happen anymore as locks aren't used when opening the index")] 55 | LockAcquire(#[from] gix::lock::acquire::Error), 56 | #[error(transparent)] 57 | ParseRefSpec(#[from] gix::refspec::parse::Error), 58 | #[error(transparent)] 59 | RemoteConnect(#[from] gix::remote::connect::Error), 60 | #[error(transparent)] 61 | PrepareFetch(#[from] gix::remote::fetch::prepare::Error), 62 | #[error(transparent)] 63 | Fetch(#[from] gix::remote::fetch::Error), 64 | #[error(transparent)] 65 | PrepareClone(#[from] gix::clone::Error), 66 | #[error(transparent)] 67 | RemoteName(#[from] gix::remote::name::Error), 68 | #[error(transparent)] 69 | FetchDuringClone(#[from] gix::clone::fetch::Error), 70 | #[error(transparent)] 71 | PeelToKind(#[from] gix::object::peel::to_kind::Error), 72 | } 73 | 74 | /// Unknown error from [`crate::GitIndex::crates_parallel`] 75 | #[cfg(feature = "parallel")] 76 | #[derive(Debug, thiserror::Error)] 77 | #[error("error while iterating git repository")] 78 | pub struct CratesIterError; 79 | -------------------------------------------------------------------------------- /src/git/changes.rs: -------------------------------------------------------------------------------- 1 | use crate::error::GixError; 2 | use crate::git::{fetch_remote, Change}; 3 | use crate::Error; 4 | use crate::GitIndex; 5 | use gix::bstr::ByteSlice; 6 | use gix::prelude::TreeEntryRefExt; 7 | use std::collections::{HashSet, VecDeque}; 8 | use std::time::{Duration, SystemTime}; 9 | 10 | const INDEX_GIT_ARCHIVE_URL: &str = "https://github.com/rust-lang/crates.io-index-archive"; 11 | 12 | /// An iterator over individual changes, see [`GitIndex::changes`] for more. 13 | pub struct Changes<'repo> { 14 | repo: &'repo gix::Repository, 15 | current: gix::Commit<'repo>, 16 | current_tree: gix::Tree<'repo>, 17 | out: VecDeque, 18 | } 19 | 20 | impl<'repo> Iterator for Changes<'repo> { 21 | type Item = Result; 22 | 23 | fn next(&mut self) -> Option { 24 | while self.out.is_empty() { 25 | let parent = match self.get_parent() { 26 | Ok(Some(parent)) => parent, 27 | Ok(None) => return None, 28 | Err(e) => return Some(Err(e.into())), 29 | }; 30 | let parent_tree = parent.tree().ok()?; 31 | let time = SystemTime::UNIX_EPOCH + Duration::from_secs(self.current.time().ok()?.seconds.max(0) as _); 32 | Self::tree_additions( 33 | &self.repo, 34 | &mut self.out, 35 | time, 36 | &self.current.id(), 37 | &self.current_tree, 38 | &parent_tree, 39 | ) 40 | .ok()?; 41 | self.current_tree = parent_tree; 42 | self.current = parent; 43 | } 44 | self.out.pop_front().map(Ok) 45 | } 46 | } 47 | 48 | impl<'repo> Changes<'repo> { 49 | pub(crate) fn new(index: &'repo GitIndex) -> Result { 50 | let current = index.repo.find_object(index.head_commit)?.peel_to_commit()?; 51 | let current_tree = current.tree()?; 52 | 53 | Ok(Self { 54 | repo: &index.repo, 55 | current, 56 | current_tree, 57 | out: VecDeque::new(), 58 | }) 59 | } 60 | 61 | fn get_parent(&self) -> Result>, GixError> { 62 | match self 63 | .current 64 | .parent_ids() 65 | .next() 66 | .map(|id| id.try_object()) 67 | .transpose()? 68 | .flatten() 69 | { 70 | Some(obj) => Ok(Some(obj.try_into_commit()?)), 71 | None => { 72 | let msg = self.current.message_raw_sloppy().to_str_lossy(); 73 | let (oid, branch) = match oid_and_branch_from_commit_message(msg.as_ref()) { 74 | Some(res) => res, 75 | None => return Ok(None), 76 | }; 77 | match self.repo.try_find_object(oid)? { 78 | Some(obj) => Ok(Some(obj.try_into_commit()?)), 79 | None => { 80 | let mut remote = self.repo.remote_at(INDEX_GIT_ARCHIVE_URL)?; 81 | fetch_remote(&mut remote, &[&format!("+refs/heads/{}", branch)])?; 82 | Ok(Some(self.repo.find_object(oid)?.try_into_commit()?)) 83 | } 84 | } 85 | } 86 | } 87 | } 88 | 89 | fn tree_additions( 90 | repo: &gix::Repository, 91 | out: &mut VecDeque, 92 | change_time: SystemTime, 93 | commit: &gix::hash::oid, 94 | new: &gix::Tree<'_>, 95 | old: &gix::Tree<'_>, 96 | ) -> Result<(), GixError> { 97 | let old_oids = old 98 | .iter() 99 | .map(|old| old.map(|e| e.object_id())) 100 | .collect::, _>>()?; 101 | let old = old.decode()?; 102 | for new_entry in new.iter().filter_map(Result::ok) { 103 | if old_oids.contains(new_entry.oid()) { 104 | continue; 105 | } 106 | if new_entry.mode().is_tree() { 107 | let new_tree = new_entry.object()?.into_tree(); 108 | let name = new_entry.filename(); 109 | // Recurse only into crate subdirs, and they all happen to be 1 or 2 letters long 110 | let is_crates_subdir = name.len() <= 2 && name.iter().copied().all(valid_crate_name_char); 111 | let old_obj = if is_crates_subdir { 112 | old.bisect_entry(name, true).map(|entry| entry.attach(repo)) 113 | } else { 114 | None 115 | } 116 | .map(|o| o.object()) 117 | .transpose()?; 118 | let old_tree = match old_obj.and_then(|o| o.try_into_tree().ok()) { 119 | Some(t) => t, 120 | None => repo.empty_tree(), 121 | }; 122 | Self::tree_additions(repo, out, change_time, commit, &new_tree, &old_tree)?; 123 | } else { 124 | let name = new_entry.filename(); 125 | // filter out config.json 126 | if name.iter().copied().all(valid_crate_name_char) { 127 | out.push_back(Change { 128 | time: change_time, 129 | crate_name: name.to_string().into(), 130 | commit: commit.into(), 131 | }); 132 | } 133 | } 134 | } 135 | Ok(()) 136 | } 137 | } 138 | 139 | #[inline] 140 | fn valid_crate_name_char(c: u8) -> bool { 141 | c.is_ascii_alphanumeric() || c == b'-' || c == b'_' 142 | } 143 | 144 | fn oid_and_branch_from_commit_message(msg: &str) -> Option<(gix::ObjectId, &str)> { 145 | let hash_start = msg 146 | .split_once("Previous HEAD was ")? 147 | .1 148 | .trim_start_matches(|c: char| !c.is_ascii_hexdigit()); 149 | let (hash_str, rest) = hash_start.split_once(|c: char| !c.is_ascii_hexdigit())?; 150 | let hash = gix::ObjectId::from_hex(hash_str.as_bytes()).ok()?; 151 | let snapshot_start = rest.find("snapshot-")?; 152 | let branch = rest.get(snapshot_start..snapshot_start + "snapshot-xxxx-xx-xx".len())?; 153 | 154 | Some((hash, branch)) 155 | } 156 | 157 | #[cfg(test)] 158 | pub(crate) mod test { 159 | use super::oid_and_branch_from_commit_message; 160 | 161 | #[test] 162 | fn changes_parse_split_message() { 163 | let (id, branch) = oid_and_branch_from_commit_message( 164 | "Previous HEAD was 4181c62812c70fafb2b56cbbd66c31056671b445, now on the `snapshot-2021-07-02` branch 165 | 166 | More information about this change can be found [online] and on [this issue]. 167 | 168 | [online]: https://internals.rust-lang.org/t/cargos-crate-index-upcoming-squash-into-one-commit/8440 169 | [this issue]: https://github.com/rust-lang/crates-io-cargo-teams/issues/47", 170 | ) 171 | .unwrap(); 172 | assert_eq!("4181c62812c70fafb2b56cbbd66c31056671b445", id.to_string()); 173 | assert_eq!("snapshot-2021-07-02", branch); 174 | } 175 | } 176 | -------------------------------------------------------------------------------- /src/git/config.rs: -------------------------------------------------------------------------------- 1 | use crate::Error; 2 | use std::path::{Path, PathBuf}; 3 | 4 | /// Calls the specified function for each cargo config located according to 5 | /// cargo's standard hierarchical structure 6 | /// 7 | /// Note that this only supports the use of `.cargo/config.toml`, which is not 8 | /// supported below cargo 1.39.0 9 | /// 10 | /// See https://doc.rust-lang.org/cargo/reference/config.html#hierarchical-structure 11 | fn read_cargo_config( 12 | root: Option<&Path>, 13 | cargo_home: Option<&Path>, 14 | callback: impl Fn(&toml::Value) -> Option, 15 | ) -> Result, Error> { 16 | use std::borrow::Cow; 17 | 18 | if let Some(mut path) = root.map(PathBuf::from).or_else(|| std::env::current_dir().ok()) { 19 | loop { 20 | path.push(".cargo/config.toml"); 21 | if let Some(toml) = try_read_toml(&path)? { 22 | if let Some(value) = callback(&toml) { 23 | return Ok(Some(value)); 24 | } 25 | } 26 | path.pop(); 27 | path.pop(); 28 | 29 | // Walk up to the next potential config root 30 | if !path.pop() { 31 | break; 32 | } 33 | } 34 | } 35 | 36 | if let Some(home) = cargo_home 37 | .map(Cow::Borrowed) 38 | .or_else(|| home::cargo_home().ok().map(Cow::Owned)) 39 | { 40 | let path = home.join("config.toml"); 41 | if let Some(toml) = try_read_toml(&path)? { 42 | if let Some(value) = callback(&toml) { 43 | return Ok(Some(value)); 44 | } 45 | } 46 | } 47 | 48 | Ok(None) 49 | } 50 | 51 | fn try_read_toml(path: &Path) -> Result, Error> { 52 | if !path.exists() { 53 | return Ok(None); 54 | } 55 | 56 | let toml = toml::from_str(&std::fs::read_to_string(path)?).map_err(Error::Toml)?; 57 | Ok(Some(toml)) 58 | } 59 | 60 | /// Gets the url of a replacement registry for crates.io if one has been configured 61 | /// 62 | /// See https://doc.rust-lang.org/cargo/reference/source-replacement.html 63 | #[inline] 64 | pub(crate) fn get_crates_io_replacement( 65 | root: Option<&Path>, 66 | cargo_home: Option<&Path>, 67 | ) -> Result, Error> { 68 | read_cargo_config(root, cargo_home, |config| { 69 | config.get("source").and_then(|sources| { 70 | sources 71 | .get("crates-io") 72 | .and_then(|v| v.get("replace-with")) 73 | .and_then(|v| v.as_str()) 74 | .and_then(|v| sources.get(v)) 75 | .and_then(|v| v.get("registry")) 76 | .and_then(|v| v.as_str().map(String::from)) 77 | }) 78 | }) 79 | } 80 | -------------------------------------------------------------------------------- /src/git/impl_.rs: -------------------------------------------------------------------------------- 1 | use crate::dedupe::DedupeContext; 2 | use crate::dirs::{ 3 | crate_name_to_relative_path, local_path_and_canonical_url_with_hash_kind, HashKind, DEFAULT_HASHER_KIND, 4 | }; 5 | use crate::error::GixError; 6 | use crate::git::{changes, config, URL}; 7 | use crate::{path_max_byte_len, Crate, Error, GitIndex, IndexConfig}; 8 | use gix::bstr::ByteSlice; 9 | use gix::config::tree::Key; 10 | use std::io; 11 | use std::path::{Path, PathBuf}; 12 | use std::time::Duration; 13 | use std::time::SystemTime; 14 | 15 | /// An individual change to a crate in the crates.io index, returned by [the changes iterator](GitIndex::changes). 16 | #[derive(Debug, Clone)] 17 | pub struct Change { 18 | /// Name of a crate, can be used in [`GitIndex::crate_`] 19 | pub(super) crate_name: Box, 20 | /// Timestamp in the crates.io index repository 21 | pub(super) time: SystemTime, 22 | pub(super) commit: gix::ObjectId, 23 | } 24 | 25 | impl Change { 26 | /// Name of a crate, can be used in [`GitIndex::crate_`] 27 | #[inline] 28 | #[must_use] 29 | pub fn crate_name(&self) -> &str { 30 | &*self.crate_name 31 | } 32 | 33 | /// Timestamp in the crates.io index repository, which may be publication or modification date 34 | #[inline] 35 | #[must_use] 36 | pub fn time(&self) -> SystemTime { 37 | self.time 38 | } 39 | 40 | /// git hash of a commit in the crates.io repository 41 | #[must_use] 42 | pub fn commit(&self) -> &[u8; 20] { 43 | self.commit.as_bytes().try_into().unwrap() 44 | } 45 | 46 | /// git hash of a commit in the crates.io repository 47 | #[must_use] 48 | pub fn commit_hex(&self) -> String { 49 | self.commit.to_string() 50 | } 51 | } 52 | 53 | impl GitIndex { 54 | #[doc(hidden)] 55 | #[deprecated(note = "use new_cargo_default()")] 56 | pub fn new>(path: P) -> Self { 57 | Self::from_path_and_url(path.into(), URL.into(), Mode::ReadOnly) 58 | .unwrap() 59 | .expect("repo present after possibly cloning index") 60 | } 61 | 62 | /// Creates an index for the default crates.io registry, using the same 63 | /// disk location as Cargo itself. 64 | /// 65 | /// This is the recommended way to access Cargo's index. 66 | /// *Note that this clones a new index if none is present yet. 67 | /// 68 | /// Note this function takes the `CARGO_HOME` environment variable into account 69 | /// 70 | /// ### Concurrency 71 | /// 72 | /// Concurrent invocations may fail if the index needs to be cloned. To prevent that, 73 | /// use synchronization mechanisms like mutexes or file locks as needed by the application. 74 | pub fn new_cargo_default() -> Result { 75 | let url = config::get_crates_io_replacement(None, None)?; 76 | Self::from_url(url.as_deref().unwrap_or(URL)) 77 | } 78 | 79 | /// Like [`Self::new_cargo_default()`], but read-only without auto-cloning the cargo default git index. 80 | pub fn try_new_cargo_default() -> Result, Error> { 81 | let url = config::get_crates_io_replacement(None, None)?; 82 | Self::try_from_url(url.as_deref().unwrap_or(URL)) 83 | } 84 | 85 | /// Creates a bare index from a provided URL, opening the same location on 86 | /// disk that Cargo uses for that registry index. 87 | /// 88 | /// *Note that this clones a new index if none is present yet. 89 | /// 90 | /// It can be used to access custom registries. 91 | /// 92 | /// ### Concurrency 93 | /// 94 | /// Concurrent invocations may fail if the index needs to be cloned. To prevent that, 95 | /// use synchronization mechanisms like mutexes or file locks as needed by the application. 96 | pub fn from_url(url: &str) -> Result { 97 | Self::from_url_with_hash_kind(url, &DEFAULT_HASHER_KIND) 98 | } 99 | 100 | /// Like [`Self::from_url`], but accepts an explicit [`HashKind`] for determining the crates index path. 101 | pub fn from_url_with_hash_kind(url: &str, hash_kind: &HashKind) -> Result { 102 | let (path, canonical_url) = local_path_and_canonical_url_with_hash_kind(url, None, hash_kind)?; 103 | Ok( 104 | Self::from_path_and_url(path, canonical_url, Mode::CloneUrlToPathIfRepoMissing)? 105 | .expect("repo present after possibly cloning it"), 106 | ) 107 | } 108 | 109 | /// Like [`Self::from_url()`], but read-only without auto-cloning the index at `url`. 110 | pub fn try_from_url(url: &str) -> Result, Error> { 111 | Self::try_from_url_with_hash_kind(url, &DEFAULT_HASHER_KIND) 112 | } 113 | 114 | /// Like [`Self::try_from_url`], but accepts an explicit [`HashKind`] for determining the crates index path. 115 | pub fn try_from_url_with_hash_kind(url: &str, hash_kind: &HashKind) -> Result, Error> { 116 | let (path, canonical_url) = local_path_and_canonical_url_with_hash_kind(url, None, hash_kind)?; 117 | Self::from_path_and_url(path, canonical_url, Mode::ReadOnly) 118 | } 119 | 120 | /// Creates a bare index at the provided `path` with the specified repository `URL`. 121 | /// 122 | /// *Note that this clones a new index to `path` if none is present there yet. 123 | /// 124 | /// ### Concurrency 125 | /// 126 | /// Concurrent invocations may fail if the index needs to be cloned. To prevent that, 127 | /// use synchronization mechanisms like mutexes or file locks as needed by the application. 128 | pub fn with_path, S: Into>(path: P, url: S) -> Result { 129 | Ok( 130 | Self::from_path_and_url(path.into(), url.into(), Mode::CloneUrlToPathIfRepoMissing)? 131 | .expect("repo present after possibly cloning it"), 132 | ) 133 | } 134 | 135 | /// Like [`Self::with_path()`], but read-only without auto-cloning the index at `url` if it's not already 136 | /// present at `path`. 137 | pub fn try_with_path, S: Into>(path: P, url: S) -> Result, Error> { 138 | Self::from_path_and_url(path.into(), url.into(), Mode::ReadOnly) 139 | } 140 | 141 | /// Get the index directory. 142 | #[inline] 143 | #[must_use] 144 | pub fn path(&self) -> &Path { 145 | &self.path 146 | } 147 | 148 | /// Get the index url. 149 | #[inline] 150 | #[must_use] 151 | pub fn url(&self) -> &str { 152 | &self.url 153 | } 154 | 155 | /// Timestamp of the commit of repository being read, which may be the publication or modification date. 156 | /// 157 | /// Note that currently only times at or past the Unix epoch are supported. 158 | #[inline] 159 | #[must_use] 160 | pub fn time(&self) -> Result { 161 | Ok(SystemTime::UNIX_EPOCH 162 | + Duration::from_secs( 163 | self.repo 164 | .find_object(self.head_commit)? 165 | .peel_to_commit()? 166 | .time()? 167 | .seconds 168 | .max(0) as _, 169 | )) 170 | } 171 | 172 | /// git hash of the commit of repository being read 173 | #[must_use] 174 | pub fn commit(&self) -> &[u8; 20] { 175 | self.head_commit.as_bytes().try_into().unwrap() 176 | } 177 | 178 | /// git hash of the commit of repository being read 179 | #[must_use] 180 | pub fn commit_hex(&self) -> String { 181 | self.head_commit.to_string() 182 | } 183 | 184 | fn lookup_commit(&self, rev: &str) -> Option { 185 | self.repo 186 | .rev_parse_single(rev) 187 | .ok()? 188 | .object() 189 | .ok()? 190 | .try_into_commit() 191 | .ok()? 192 | .id 193 | .into() 194 | } 195 | 196 | /// Change the commit of repository being read to the commit pointed to by a refspec. 197 | /// Note that this is *in-memory* only, the repository will not be changed! 198 | pub fn set_commit_from_refspec(&mut self, rev: &str) -> Result<(), Error> { 199 | self.head_commit = self.lookup_commit(rev).ok_or_else(|| Error::MissingHead { 200 | repo_path: self.path.to_owned(), 201 | refs_tried: &[], 202 | refs_available: self 203 | .repo 204 | .references() 205 | .ok() 206 | .and_then(|p| { 207 | p.all() 208 | .ok()? 209 | .map(|r| r.ok().map(|r| r.name().as_bstr().to_string())) 210 | .collect() 211 | }) 212 | .unwrap_or_default(), 213 | })?; 214 | Ok(()) 215 | } 216 | 217 | /// List crates that have changed (published or yanked), in reverse chronological order. 218 | /// 219 | /// This iterator is aware of periodic index squashing crates.io performs, 220 | /// and will perform (slow and blocking) network requests to fetch the additional history from if needed. 221 | /// 222 | /// If you want to track newly added/changed crates over time, make a note of the last [`commit`](Change::commit) or [`timestamp`](Change) you've processed, 223 | /// and stop iteration on it next time. 224 | /// 225 | /// Crates will be reported multiple times, once for each publish/yank/unyank event that happened. 226 | /// 227 | /// If you like to know publication dates of all crates, consider instead. 228 | pub fn changes(&self) -> Result, Error> { 229 | Ok(changes::Changes::new(self)?) 230 | } 231 | 232 | fn from_path_and_url(path: PathBuf, url: String, mode: Mode) -> Result, Error> { 233 | let open_with_complete_config = gix::open::Options::default().permissions(gix::open::Permissions { 234 | config: gix::open::permissions::Config { 235 | // Be sure to get all configuration, some of which is only known by the git binary. 236 | // That way we are sure to see all the systems credential helpers 237 | git_binary: true, 238 | ..Default::default() 239 | }, 240 | ..Default::default() 241 | }); 242 | 243 | if let Some(parent) = path.parent() { 244 | std::fs::create_dir_all(parent)?; 245 | } 246 | let repo = gix::open_opts(&path, open_with_complete_config.clone()) 247 | .ok() 248 | .filter(|repo| { 249 | // The `cargo` standard registry clone has no configured origin (when created with `git2`). 250 | repo.find_remote("origin").map_or(true, |remote| { 251 | remote 252 | .url(gix::remote::Direction::Fetch) 253 | .map_or(false, |remote_url| remote_url.to_bstring().starts_with_str(&url)) 254 | }) 255 | }); 256 | 257 | let repo = match mode { 258 | Mode::ReadOnly => repo, 259 | Mode::CloneUrlToPathIfRepoMissing => Some(match repo { 260 | Some(repo) => repo, 261 | None => match gix::open_opts(&path, open_with_complete_config).ok() { 262 | None => clone_url(&url, &path)?, 263 | Some(repo) => repo, 264 | }, 265 | }), 266 | }; 267 | 268 | match repo { 269 | None => Ok(None), 270 | Some(repo) => { 271 | let head_commit = Self::find_repo_head(&repo, &path)?; 272 | Ok(Some(Self { 273 | path, 274 | url, 275 | repo, 276 | head_commit, 277 | })) 278 | } 279 | } 280 | } 281 | 282 | fn tree(&self) -> Result, GixError> { 283 | Ok(self.repo.find_object(self.head_commit)?.try_into_commit()?.tree()?) 284 | } 285 | 286 | #[doc(hidden)] 287 | #[deprecated(note = "use update()")] 288 | pub fn retrieve_or_update(&mut self) -> Result<(), Error> { 289 | self.update() 290 | } 291 | 292 | #[doc(hidden)] 293 | #[deprecated(note = "it's always retrieved. there's no need to call it any more")] 294 | pub fn retrieve(&self) -> Result<(), Error> { 295 | Ok(()) 296 | } 297 | 298 | #[doc(hidden)] 299 | #[deprecated(note = "it's always retrieved, so it's assumed to always exist")] 300 | #[must_use] 301 | pub fn exists(&self) -> bool { 302 | true 303 | } 304 | 305 | /// Fetches latest from the remote index repository. Note that using this 306 | /// method will mean no cache entries will be used, if a new commit is fetched 307 | /// from the repository, as their commit version will no longer match. 308 | pub fn update(&mut self) -> Result<(), Error> { 309 | let mut remote = self 310 | .repo 311 | .find_remote("origin") 312 | .ok() 313 | .unwrap_or_else(|| self.repo.remote_at(self.url.as_str()).expect("own URL is always valid")); 314 | fetch_remote( 315 | &mut remote, 316 | &["+HEAD:refs/remotes/origin/HEAD", "+master:refs/remotes/origin/master"], 317 | )?; 318 | 319 | let head_commit = Self::find_repo_head(&self.repo, &self.path)?; 320 | self.head_commit = head_commit; 321 | 322 | Ok(()) 323 | } 324 | 325 | /// Reads a crate from the index, it will attempt to use a cached entry if 326 | /// one is available, otherwise it will fallback to reading the crate 327 | /// directly from the git blob containing the crate information. 328 | /// 329 | /// Use this only if you need to get very few crates. If you're going 330 | /// to read the majority of crates, prefer the [`GitIndex::crates()`] iterator. 331 | #[must_use] 332 | pub fn crate_(&self, name: &str) -> Option { 333 | let rel_path = crate_name_to_relative_path(name, None)?; 334 | 335 | // Attempt to load the .cache/ entry first, this is purely an acceleration 336 | // mechanism and can fail for a few reasons that are non-fatal 337 | { 338 | // avoid realloc on each push 339 | let mut cache_path = PathBuf::with_capacity(path_max_byte_len(&self.path) + 8 + rel_path.len()); 340 | cache_path.push(&self.path); 341 | cache_path.push(".cache"); 342 | cache_path.push(&rel_path); 343 | if let Ok(cache_bytes) = std::fs::read(&cache_path) { 344 | if let Ok(krate) = Crate::from_cache_slice(&cache_bytes, None) { 345 | return Some(krate); 346 | } 347 | } 348 | } 349 | 350 | // Fallback to reading the blob directly via git if we don't have a 351 | // valid cache entry 352 | self.crate_from_rel_path(rel_path).ok() 353 | } 354 | 355 | fn crate_from_rel_path(&self, rel_path: String) -> Result { 356 | let object = self.object_at_path(rel_path.into())?; 357 | Crate::from_slice(&object.data).map_err(Error::Io) 358 | } 359 | 360 | /// Single-threaded iterator over all the crates in the index. 361 | /// 362 | /// [`GitIndex::crates_parallel`] is typically 4 times faster. 363 | /// 364 | /// Skips crates that can not be parsed (but there shouldn't be any such crates in the crates-io index). 365 | /// Also consider to enable `git-index-performance` feature toggle for better performance. 366 | #[inline] 367 | #[must_use] 368 | pub fn crates(&self) -> Crates<'_> { 369 | Crates { 370 | blobs: self.crates_blobs().expect("HEAD commit disappeared"), 371 | dedupe: MaybeOwned::Owned(DedupeContext::new()), 372 | } 373 | } 374 | 375 | /// Iterate over all crates using rayon. 376 | /// 377 | /// This method is available only if the "parallel" feature is enabled. 378 | /// Also consider to enable `git-index-performance` feature toggle for better performance. 379 | #[cfg(feature = "parallel")] 380 | #[must_use] 381 | pub fn crates_parallel( 382 | &self, 383 | ) -> impl rayon::iter::ParallelIterator> + '_ { 384 | use rayon::iter::{IntoParallelIterator, ParallelIterator}; 385 | let tree_oids = match self.crates_top_level_ids() { 386 | Ok(objs) => objs, 387 | Err(_) => vec![self.repo.object_hash().null()], // intentionally broken oid to return error from the iterator 388 | }; 389 | 390 | tree_oids 391 | .into_par_iter() 392 | .map_init( 393 | { 394 | let repo = self.repo.clone().into_sync(); 395 | move || { 396 | ( 397 | { 398 | let mut repo = repo.to_thread_local(); 399 | repo.objects.unset_pack_cache(); 400 | repo 401 | }, 402 | DedupeContext::new(), 403 | ) 404 | } 405 | }, 406 | |(repo, ctx), oid| { 407 | let mut stack = Vec::with_capacity(64); 408 | match repo.find_object(oid) { 409 | Ok(obj) => stack.push(obj.detach()), 410 | Err(_) => return vec![Err(crate::error::CratesIterError)], 411 | }; 412 | let blobs = CratesTreesToBlobs { 413 | stack, 414 | repo: repo.clone(), 415 | }; 416 | Crates { 417 | blobs, 418 | dedupe: MaybeOwned::Borrowed(ctx), 419 | } 420 | .map(Ok) 421 | .collect::>() 422 | }, 423 | ) 424 | .flat_map_iter(|chunk| chunk.into_iter()) 425 | } 426 | 427 | fn crates_blobs(&self) -> Result { 428 | let repo = with_delta_cache(self.repo.clone()); 429 | Ok(CratesTreesToBlobs { 430 | stack: self 431 | .crates_top_level_ids()? 432 | .into_iter() 433 | .map(|id| self.repo.find_object(id).map(|tree| tree.detach())) 434 | .collect::>()?, 435 | repo, 436 | }) 437 | } 438 | 439 | fn crates_top_level_ids(&self) -> Result, GixError> { 440 | let mut stack = Vec::with_capacity(800); 441 | for entry in self.tree()?.iter() { 442 | let entry = entry?; 443 | // crates are in directories no longer than 2 letters. 444 | if !is_top_level_dir(&entry) { 445 | continue; 446 | }; 447 | stack.push(entry.oid().to_owned()); 448 | } 449 | Ok(stack) 450 | } 451 | 452 | /// Get the global configuration of the index. 453 | pub fn index_config(&self) -> Result { 454 | let blob = self.object_at_path("config.json".into())?; 455 | serde_json::from_slice(&blob.data).map_err(Error::Json) 456 | } 457 | 458 | fn object_at_path(&self, path: PathBuf) -> Result, GixError> { 459 | let entry = self 460 | .tree()? 461 | .peel_to_entry_by_path(&path)? 462 | .ok_or(GixError::PathMissing { path })?; 463 | Ok(entry.object()?) 464 | } 465 | 466 | /// Find the most recent commit of `repo` at `path`. 467 | /// 468 | /// This is complicated by a few specialities of the cargo git index. 469 | /// 470 | /// * it's possible for `origin/HEAD` and `origin/master` to be stalled and out of date if they have been fetched with 471 | /// non-force refspecs. 472 | /// This was done by this crate as well, but is not done by cargo. 473 | /// * if `origin/master` is out of date, `FETCH_HEAD` is the only chance for getting the most recent commit. 474 | /// * if `gix` is updating the index, `FETCH_HEAD` will not be written at all, *only* the references are. Note that 475 | /// `cargo` does not rely on `FETCH_HEAD`, but relies on `origin/master` directly. 476 | /// 477 | /// This, we get a list of candidates and use the most recent commit. 478 | fn find_repo_head(repo: &gix::Repository, path: &Path) -> Result { 479 | #[rustfmt::skip] 480 | const CANDIDATE_REFS: &[&str] = &[ 481 | "FETCH_HEAD", /* the location with the most-recent updates, as written by git2 */ 482 | "origin/HEAD", /* typical refspecs update this symbolic ref to point to the actual remote ref with the fetched commit */ 483 | "origin/master", /* for good measure, resolve this branch by hand in case origin/HEAD is broken */ 484 | ]; 485 | let mut candidates: Vec<_> = CANDIDATE_REFS 486 | .iter() 487 | .filter_map(|refname| repo.find_reference(*refname).ok()?.into_fully_peeled_id().ok()) 488 | .filter_map(|r| { 489 | let c = r.object().ok()?.try_into_commit().ok()?; 490 | Some((c.id, c.time().ok()?.seconds)) 491 | }) 492 | .collect(); 493 | 494 | candidates.sort_by_key(|t| t.1); 495 | // get the most recent commit, the one with most time passed since unix epoch. 496 | Ok(candidates 497 | .last() 498 | .ok_or_else(|| Error::MissingHead { 499 | repo_path: path.to_owned(), 500 | refs_tried: CANDIDATE_REFS, 501 | refs_available: repo 502 | .references() 503 | .ok() 504 | .and_then(|p| { 505 | p.all() 506 | .ok()? 507 | .map(|r| r.ok().map(|r| r.name().as_bstr().to_string())) 508 | .collect() 509 | }) 510 | .unwrap_or_default(), 511 | })? 512 | .0) 513 | } 514 | } 515 | 516 | fn is_top_level_dir(entry: &gix::object::tree::EntryRef<'_, '_>) -> bool { 517 | entry.mode().is_tree() && entry.filename().len() <= 2 518 | } 519 | 520 | fn with_delta_cache(mut repo: gix::Repository) -> gix::Repository { 521 | if repo 522 | .config_snapshot() 523 | .integer(gix::config::tree::Core::DELTA_BASE_CACHE_LIMIT.logical_name().as_str()) 524 | .is_none() 525 | { 526 | let mut config = repo.config_snapshot_mut(); 527 | // Set a memory-backed delta-cache to the same size as git for ~40% more speed in this workload. 528 | config 529 | .set_value(&gix::config::tree::Core::DELTA_BASE_CACHE_LIMIT, "96m") 530 | .expect("in memory always works"); 531 | } 532 | repo 533 | } 534 | 535 | pub(super) fn fetch_remote(remote: &mut gix::Remote<'_>, refspecs: &[&str]) -> Result<(), GixError> { 536 | remote.replace_refspecs(refspecs, gix::remote::Direction::Fetch)?; 537 | 538 | remote 539 | .connect(gix::remote::Direction::Fetch)? 540 | .prepare_fetch(gix::progress::Discard, Default::default())? 541 | .receive(gix::progress::Discard, &gix::interrupt::IS_INTERRUPTED)?; 542 | Ok(()) 543 | } 544 | 545 | fn clone_url(url: &str, destination: &Path) -> Result { 546 | // Clones and fetches already know they need `bin_config` to work, so nothing to do here. 547 | let (repo, _outcome) = gix::prepare_clone_bare(url, destination)? 548 | .with_remote_name("origin")? 549 | .configure_remote(|remote| { 550 | Ok(remote.with_refspecs( 551 | ["+HEAD:refs/remotes/origin/HEAD", "+master:refs/remotes/origin/master"], 552 | gix::remote::Direction::Fetch, 553 | )?) 554 | }) 555 | .fetch_only(gix::progress::Discard, &gix::interrupt::IS_INTERRUPTED)?; 556 | Ok(repo) 557 | } 558 | 559 | /// Iterator over all crates in the index, but returns opaque objects that can be parsed separately. 560 | struct CratesTreesToBlobs { 561 | stack: Vec, 562 | repo: gix::Repository, 563 | } 564 | 565 | /// Opaque representation of a crate in the index. See [`CrateUnparsed::parse`]. 566 | struct CrateUnparsed(Vec); 567 | 568 | impl CrateUnparsed { 569 | #[inline] 570 | fn parse(&self, ctx: &mut DedupeContext) -> io::Result { 571 | Crate::from_slice_with_context(self.0.as_slice(), ctx) 572 | } 573 | } 574 | 575 | impl Iterator for CratesTreesToBlobs { 576 | type Item = CrateUnparsed; 577 | 578 | fn next(&mut self) -> Option { 579 | while let Some(obj) = self.stack.pop() { 580 | if obj.kind.is_tree() { 581 | let tree = gix::objs::TreeRef::from_bytes(&obj.data).unwrap(); 582 | for entry in tree.entries.into_iter().rev() { 583 | self.stack.push(self.repo.find_object(entry.oid).unwrap().detach()); 584 | } 585 | continue; 586 | } else { 587 | return Some(CrateUnparsed(obj.data)); 588 | } 589 | } 590 | None 591 | } 592 | } 593 | 594 | enum MaybeOwned<'a, T> { 595 | Owned(T), 596 | #[cfg_attr(not(feature = "parallel"), allow(dead_code))] 597 | Borrowed(&'a mut T), 598 | } 599 | 600 | /// Iterator over all crates in the index. Skips crates that failed to parse. 601 | pub struct Crates<'a> { 602 | blobs: CratesTreesToBlobs, 603 | dedupe: MaybeOwned<'a, DedupeContext>, 604 | } 605 | 606 | impl<'a> Iterator for Crates<'a> { 607 | type Item = Crate; 608 | 609 | fn next(&mut self) -> Option { 610 | for next in self.blobs.by_ref() { 611 | let dedupe = match &mut self.dedupe { 612 | MaybeOwned::Owned(d) => d, 613 | MaybeOwned::Borrowed(d) => d, 614 | }; 615 | if let Ok(k) = CrateUnparsed::parse(&next, dedupe) { 616 | return Some(k); 617 | } 618 | } 619 | None 620 | } 621 | } 622 | 623 | enum Mode { 624 | ReadOnly, 625 | CloneUrlToPathIfRepoMissing, 626 | } 627 | 628 | #[cfg(test)] 629 | #[cfg(feature = "git-https")] 630 | mod tests { 631 | use crate::dedupe::DedupeContext; 632 | use crate::{git, GitIndex}; 633 | use gix::bstr::ByteSlice; 634 | 635 | #[test] 636 | #[cfg_attr(debug_assertions, ignore = "too slow in debug mode")] 637 | fn parse_all_blobs() { 638 | std::thread::scope(|scope| { 639 | let (tx, rx) = std::sync::mpsc::channel(); 640 | let blobs = scope.spawn(move || { 641 | let index = shared_index(); 642 | for c in index.crates_blobs().unwrap() { 643 | tx.send(c).unwrap(); 644 | } 645 | }); 646 | let parse = scope.spawn(move || { 647 | let mut found_gcc_crate = false; 648 | let mut ctx = DedupeContext::new(); 649 | for c in rx { 650 | match c.parse(&mut ctx) { 651 | Ok(c) => { 652 | if c.name() == "gcc" { 653 | found_gcc_crate = true; 654 | } 655 | } 656 | Err(e) => panic!("can't parse :( {:?}: {e}", c.0.as_bstr()), 657 | } 658 | } 659 | assert!(found_gcc_crate); 660 | }); 661 | parse.join().unwrap(); 662 | blobs.join().unwrap(); 663 | }); 664 | } 665 | 666 | fn shared_index() -> GitIndex { 667 | static LOCK: parking_lot::Mutex<()> = parking_lot::Mutex::new(()); 668 | let _guard = LOCK.lock(); 669 | 670 | let index_path = "tests/fixtures/git-registry"; 671 | if is_ci::cached() { 672 | GitIndex::new_cargo_default().expect("CI has just cloned this index and its ours and valid") 673 | } else { 674 | GitIndex::with_path(index_path, git::URL).expect("clone works and there is no racing") 675 | } 676 | } 677 | } 678 | -------------------------------------------------------------------------------- /src/git/mod.rs: -------------------------------------------------------------------------------- 1 | #![allow(clippy::result_large_err)] 2 | 3 | #[cfg(all(doc, feature = "git"))] 4 | use crate::GitIndex; 5 | 6 | /// The default URL of the crates.io index for use with git, see [`GitIndex::with_path`] 7 | pub const URL: &str = "https://github.com/rust-lang/crates.io-index"; 8 | 9 | /// 10 | #[cfg(feature = "git")] 11 | mod changes; 12 | #[cfg(feature = "git")] 13 | pub use changes::Changes; 14 | 15 | #[cfg(feature = "git")] 16 | mod config; 17 | 18 | #[cfg(feature = "git")] 19 | mod impl_; 20 | #[cfg(feature = "git")] 21 | use impl_::fetch_remote; 22 | #[cfg(feature = "git")] 23 | pub use impl_::{Change, Crates}; 24 | -------------------------------------------------------------------------------- /src/git/test.rs: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2015 Corey Farwell 2 | // Copyright 2015 Contributors of github.com/huonw/crates.io-graph 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | //! Library for retrieving and interacting with the 17 | //! [crates.io index](https://github.com/rust-lang/crates.io-index). 18 | //! 19 | //! ## Examples 20 | //! 21 | //! ### Getting information about a single crate 22 | //! 23 | //! ```rust 24 | //! # #[cfg(all(not(debug_assertions), feature = "git"))] 25 | //! # { 26 | //! let index = crates_index::GitIndex::new_cargo_default()?; 27 | //! let serde_crate = index.crate_("serde").expect("you should handle errors here"); 28 | //! println!("Serde is at v{}", serde_crate.highest_normal_version().unwrap().version()); 29 | //! # } 30 | //! # Ok::<_, crates_index::Error>(()) 31 | //! ``` 32 | //! 33 | //! ### Iterating over *all* crates in the index 34 | //! 35 | //! ```rust 36 | //! # #[cfg(all(not(debug_assertions), feature = "parallel", feature = "git"))] 37 | //! # { 38 | //! let index = crates_index::GitIndex::new_cargo_default()?; 39 | //! for crate_ in index.crates() { 40 | //! let latest = crate_.most_recent_version(); 41 | //! println!("crate name: {}", latest.name()); 42 | //! println!("most recently released version: {}", latest.version()); 43 | //! } 44 | //! 45 | //! // or faster: 46 | //! use rayon::prelude::*; 47 | //! index.crates_parallel().for_each(|crate_| { 48 | //! /* etc. */ 49 | //! }); 50 | //! 51 | //! # } 52 | //! # Ok::<_, crates_index::Error>(()) 53 | //! ``` 54 | //! 55 | //! ### Getting most recently published or yanked crates 56 | //! 57 | //! ```rust 58 | //! # #[cfg(feature = "git")] 59 | //! # { 60 | //! let index = crates_index::GitIndex::new_cargo_default()?; 61 | //! 62 | //! for c in index.changes()?.take(20) { 63 | //! let c = c?; 64 | //! println!("{} has changed in the index commit {}", c.crate_name(), c.commit_hex()); 65 | //! } 66 | //! 67 | //! # } 68 | //! # Ok::<_, crates_index::Error>(()) 69 | //! ``` 70 | //! 71 | //! ## Auto-cloning and parallelism 72 | //! 73 | //! When using any means of instantiating the [`GitIndex`] type, we will 74 | //! clone the default crates index (or the given one) if it no git 75 | //! repository is present at the destination path. 76 | //! 77 | //! This operation is racy and opening the index concurrently can lead to errors 78 | //! as multiple threads may try to clone the index at the same time if it wasn't there yet. 79 | //! 80 | //! To prevent that, consider using synchronization primitives on application level that 81 | //! synchronize methods like [`GitIndex::new_cargo_default()`] and its siblings. 82 | //! 83 | //! ## Git Repository Performance 84 | //! 85 | //! By default, `gix` is compiled with `max-performance-safe`, which maximizes support for compilation environments but which 86 | //! may be slower as it uses a pure-Rust Zlib implementation. 87 | //! To get best possible performance, use the `git-index-performance` feature toggle. 88 | //! 89 | //! ## Using `rustls` instead of `openssl` when using the `git-https` feature in applications 90 | //! 91 | //! When using the `git-https` feature, a choice will be made for you that involves selecting the `curl` backend for making 92 | //! the `https` protocol available. As using a different backend isn't additive, as cargo features should be, one will have 93 | //! to resort to the following. 94 | //! 95 | //! * Change the `crates-index` dependency to `features = ["git-index", …(everything else *but* "git-https")]` 96 | //! * Add the `gix` dependency with `default-features = false` and `features = ["blocking-http-transport-reqwest-rust-tls"]`. 97 | //! Consider renaming the crate to `gix-for-configuration-only = { package = "gix", … }` to make the intend clear. 98 | //! 99 | //! Please note that this should only be done in application manifests, who have the final say over the protocol and backend choices. 100 | //! ## Feature Flags 101 | #![cfg_attr( 102 | feature = "document-features", 103 | cfg_attr(doc, doc = ::document_features::document_features!()) 104 | )] 105 | #![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))] 106 | #![deny(unsafe_code, rust_2018_compatibility, missing_docs)] 107 | use std::path::{Path, PathBuf}; 108 | 109 | /// Wrapper around managing the crates.io-index git repository 110 | /// 111 | /// Uses a "bare" git index that fetches files directly from the repo instead of local checkout. 112 | /// Uses Cargo's cache. 113 | /// 114 | /// ### Instantiation 115 | /// 116 | /// When creating an instance of this type, the crates-index will be cloned automatically should it not 117 | /// be present. If a repository is present at the location but the remote doesn't match the desired index URL, 118 | /// a new remote will be added and fetched from. 119 | /// 120 | /// Please note that concurrent calls to [`GitIndex::new_cargo_default()`] (and related) will automatically block 121 | /// and wait for each other, so only one instance will try to clone the index while the others will wait for completion. 122 | /// 123 | /// This, however, only protects from itself and `cargo` cloning the index at the same time might interfere. 124 | #[cfg(feature = "git")] 125 | pub struct GitIndex { 126 | path: std::path::PathBuf, 127 | url: String, 128 | 129 | pub(crate) repo: gix::Repository, 130 | pub(crate) head_commit: gix::ObjectId, 131 | } 132 | 133 | /// The Git based index implementation 134 | pub mod git; 135 | 136 | mod config; 137 | pub use config::IndexConfig; 138 | 139 | mod dedupe; 140 | mod dirs; 141 | pub use dirs::{local_path_and_canonical_url, local_path_and_canonical_url_with_hash_kind, HashKind}; 142 | 143 | /// Re-exports in case you want to inspect specific error details 144 | pub mod error; 145 | #[doc(hidden)] 146 | #[cfg(feature = "parallel")] 147 | pub use error::CratesIterError; 148 | #[doc(hidden)] 149 | pub use error::Error; 150 | 151 | /// Wrapper around managing a sparse HTTP index, re-using Cargo's local disk caches. 152 | /// 153 | /// Currently it only uses local Cargo cache, and does not access the network in any way. 154 | /// For examples of how to update the local cache, 155 | /// see [`examples/sparse_http_reqwest.rs`][reqwest] and [`examples/sparse_http_ureq.rs`][ureq]. 156 | /// 157 | /// [reqwest]: https://github.com/frewsxcv/rust-crates-index/blob/HEAD/examples/sparse_http_reqwest.rs 158 | /// [ureq]: https://github.com/frewsxcv/rust-crates-index/blob/HEAD/examples/sparse_http_ureq.rs 159 | #[derive(Debug)] 160 | pub struct SparseIndex { 161 | path: PathBuf, 162 | url: String, 163 | } 164 | 165 | /// The sparse index implementation. 166 | pub mod sparse; 167 | /// The matching `http` types for use in the [`sparse`] API. 168 | #[cfg(feature = "sparse")] 169 | pub use http; 170 | 171 | mod names; 172 | pub use names::Names; 173 | 174 | mod types; 175 | pub use types::{Crate, Dependency, DependencyKind, Version}; 176 | 177 | pub(crate) fn split(haystack: &[u8], needle: u8) -> impl Iterator + '_ { 178 | struct Split<'a> { 179 | haystack: &'a [u8], 180 | needle: u8, 181 | } 182 | 183 | impl<'a> Iterator for Split<'a> { 184 | type Item = &'a [u8]; 185 | 186 | #[inline] 187 | fn next(&mut self) -> Option<&'a [u8]> { 188 | if self.haystack.is_empty() { 189 | return None; 190 | } 191 | let (ret, remaining) = match memchr::memchr(self.needle, self.haystack) { 192 | Some(pos) => (&self.haystack[..pos], &self.haystack[pos + 1..]), 193 | None => (self.haystack, &[][..]), 194 | }; 195 | self.haystack = remaining; 196 | Some(ret) 197 | } 198 | } 199 | 200 | Split { haystack, needle } 201 | } 202 | 203 | #[cfg(unix)] 204 | fn path_max_byte_len(path: &Path) -> usize { 205 | use std::os::unix::prelude::OsStrExt; 206 | path.as_os_str().as_bytes().len() 207 | } 208 | 209 | #[cfg(not(unix))] 210 | fn path_max_byte_len(path: &Path) -> usize { 211 | path.to_str().map_or(0, |p| p.len()) 212 | } 213 | -------------------------------------------------------------------------------- /src/names.rs: -------------------------------------------------------------------------------- 1 | /// An iterator over all possible permutations of hyphens (`-`) and underscores (`_`) of a crate name. 2 | /// 3 | /// The sequence yields the input name first, then an all-hyphens variant of it followed by an 4 | /// all-underscores variant to maximize the chance of finding a match. Then follow all remaining permutations. 5 | /// 6 | /// For instance, the name `parking_lot` is turned into the sequence `parking_lot` and `parking-lot`, while 7 | /// `serde-yaml` is turned into `serde-yaml` and `serde_yaml`. 8 | /// Finally, `a-b_c` is returned as `a-b_c`, `a-b-c`, `a_b_c`, `a_b-c`. 9 | #[derive(Clone)] 10 | pub struct Names { 11 | count: Option, 12 | initial: String, 13 | max_count: u16, 14 | current: String, 15 | separator_indexes: [usize; 17], 16 | separator_count: usize, 17 | } 18 | 19 | impl Names { 20 | /// Creates a new iterator over all permutations of `-` and `_` of the given `name`, 21 | /// or `None` if there are more than 15 `-` or `_` characters. 22 | pub fn new(name: impl Into) -> Option { 23 | let mut separator_indexes = [0; 17]; 24 | let mut separator_count = 0; 25 | 26 | let name = name.into(); 27 | let current: String = name 28 | .chars() 29 | .enumerate() 30 | .map(|(index, char)| { 31 | if char == '-' || char == '_' { 32 | separator_indexes[separator_count] = index; 33 | separator_count += 1; 34 | '_' 35 | } else { 36 | char 37 | } 38 | }) 39 | .collect(); 40 | 41 | Some(Names { 42 | count: None, 43 | initial: name, 44 | max_count: 2u16.checked_pow(separator_count.try_into().ok()?)?, 45 | current, 46 | separator_indexes, 47 | separator_count, 48 | }) 49 | } 50 | } 51 | 52 | impl Iterator for Names { 53 | type Item = String; 54 | 55 | fn next(&mut self) -> Option { 56 | match self.count.as_mut() { 57 | None => { 58 | self.count = Some(0); 59 | self.initial.clone().into() 60 | } 61 | Some(count) => { 62 | for _round in 0..2 { 63 | if *count == self.max_count { 64 | return None; 65 | } 66 | 67 | //map the count so the first value is the last one (all "-"), the second one is the first one (all "_")... 68 | let used_count = *count as isize - 1 + self.max_count as isize; 69 | for (sep_index, char_index) in self.separator_indexes[..self.separator_count].iter().enumerate() { 70 | let char = if used_count & (1 << sep_index) == 0 { b'_' } else { b'-' }; 71 | // SAFETY: We validated that `char_index` is a valid UTF-8 codepoint 72 | #[allow(unsafe_code)] 73 | unsafe { 74 | self.current.as_bytes_mut()[*char_index] = char; 75 | } 76 | } 77 | 78 | *count += 1; 79 | if self.current != self.initial { 80 | break; 81 | } 82 | } 83 | Some(self.current.clone()) 84 | } 85 | } 86 | } 87 | 88 | fn count(self) -> usize 89 | where 90 | Self: Sized, 91 | { 92 | self.max_count as usize 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /src/sparse.rs: -------------------------------------------------------------------------------- 1 | use std::io; 2 | use std::path::{Path, PathBuf}; 3 | 4 | use crate::dirs::{ 5 | crate_name_to_relative_path, local_path_and_canonical_url_with_hash_kind, HashKind, DEFAULT_HASHER_KIND, 6 | }; 7 | use crate::{path_max_byte_len, Crate, Error, IndexConfig, SparseIndex}; 8 | 9 | /// The default URL of the crates.io HTTP index, see [`SparseIndex::from_url`] and [`SparseIndex::new_cargo_default`] 10 | pub const URL: &str = "sparse+https://index.crates.io/"; 11 | 12 | impl SparseIndex { 13 | /// Creates a view over the sparse HTTP index from a provided URL, opening 14 | /// the same location on disk that Cargo uses for that registry index's 15 | /// metadata and cache. 16 | /// 17 | /// Note this function takes the `CARGO_HOME` environment variable into account 18 | #[inline] 19 | pub fn from_url(url: &str) -> Result { 20 | Self::from_url_with_hash_kind(url, &DEFAULT_HASHER_KIND) 21 | } 22 | 23 | /// Like [`Self::from_url`] but accepts an explicit [`HashKind`] for determining the crates index path. 24 | #[inline] 25 | pub fn from_url_with_hash_kind(url: &str, hash_kind: &HashKind) -> Result { 26 | Self::with_path_and_hash_kind(home::cargo_home()?, url, hash_kind) 27 | } 28 | 29 | /// Creates an index for the default crates.io registry, using the same 30 | /// disk location as Cargo itself. 31 | /// 32 | /// This is the recommended way to access the crates.io sparse index. 33 | /// 34 | /// Note this function takes the `CARGO_HOME` environment variable into account 35 | #[inline] 36 | pub fn new_cargo_default() -> Result { 37 | Self::from_url(URL) 38 | } 39 | 40 | /// Creates a view over the sparse HTTP index from the provided URL, rooted 41 | /// at the specified location 42 | #[inline] 43 | pub fn with_path(cargo_home: impl AsRef, url: impl AsRef) -> Result { 44 | Self::with_path_and_hash_kind(cargo_home, url, &DEFAULT_HASHER_KIND) 45 | } 46 | 47 | /// Like [`Self::with_path`] but accepts an explicit [`HashKind`] for determining the crates index path. 48 | #[inline] 49 | pub fn with_path_and_hash_kind( 50 | cargo_home: impl AsRef, 51 | url: impl AsRef, 52 | hash_kind: &HashKind, 53 | ) -> Result { 54 | let url = url.as_ref(); 55 | // It is required to have the sparse+ scheme modifier for sparse urls as 56 | // they are part of the short ident hash calculation done by cargo 57 | if !url.starts_with("sparse+http") { 58 | return Err(Error::Url(url.to_owned())); 59 | } 60 | 61 | let (path, url) = local_path_and_canonical_url_with_hash_kind(url, Some(cargo_home.as_ref()), hash_kind)?; 62 | Ok(Self::at_path(path, url)) 63 | } 64 | 65 | /// Creates a view over the sparse HTTP index at the exact specified path 66 | #[inline] 67 | #[must_use] 68 | pub fn at_path(path: PathBuf, mut url: String) -> Self { 69 | if !url.ends_with('/') { 70 | url.push('/'); 71 | } 72 | Self { path, url } 73 | } 74 | 75 | /// Get the global configuration of the index. There are no guarantees around freshness, 76 | /// and if the config is not available, no fetch will be performed. 77 | pub fn index_config(&self) -> Result { 78 | let path = self.path.join("config.json"); 79 | let bytes = std::fs::read(path).map_err(Error::Io)?; 80 | 81 | serde_json::from_slice(&bytes).map_err(Error::Json) 82 | } 83 | 84 | /// Reads a crate from the local cache of the index. There are no guarantees around freshness, 85 | /// and if the crate is not known in the cache, no fetch will be performed. 86 | pub fn crate_from_cache(&self, name: &str) -> Result { 87 | let cache_path = self 88 | .cache_path(name) 89 | .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidInput, "bad name"))?; 90 | 91 | let cache_bytes = std::fs::read(&cache_path) 92 | .map_err(|e| io::Error::new(e.kind(), format!("{}: `{}`", e, cache_path.display())))?; 93 | Ok(Crate::from_cache_slice(&cache_bytes, None)?) 94 | } 95 | 96 | /// The HTTP url of the index 97 | #[inline] 98 | #[must_use] 99 | pub fn url(&self) -> &str { 100 | self.url.strip_prefix("sparse+").unwrap_or(&self.url) 101 | } 102 | 103 | /// Get the URL that can be used to fetch the index entry for the specified 104 | /// crate 105 | /// 106 | /// The body of a successful response for the returned URL can be parsed 107 | /// via [`Crate::from_slice`] 108 | #[inline] 109 | #[must_use] 110 | pub fn crate_url(&self, name: &str) -> Option { 111 | let rel_path = crate_name_to_relative_path(name, Some('/'))?; 112 | Some(format!("{}{rel_path}", self.url())) 113 | } 114 | 115 | /// Gets the full path to the cache file for the specified crate 116 | fn cache_path(&self, name: &str) -> Option { 117 | let rel_path = crate_name_to_relative_path(name, None)?; 118 | 119 | // avoid realloc on each push 120 | let mut cache_path = PathBuf::with_capacity(path_max_byte_len(&self.path) + 8 + rel_path.len()); 121 | cache_path.push(&self.path); 122 | cache_path.push(".cache"); 123 | cache_path.push(rel_path); 124 | 125 | Some(cache_path) 126 | } 127 | 128 | /// Reads the version of the cache entry for the specified crate, if it exists 129 | /// 130 | /// The version is of the form `key:value`, where, currently, the key is either 131 | /// `etag` or `last-modified` 132 | #[cfg(feature = "sparse")] 133 | fn read_cache_version(&self, name: &str) -> Option { 134 | let cache_path = self.cache_path(name)?; 135 | let bytes = std::fs::read(cache_path).ok()?; 136 | 137 | const CURRENT_CACHE_VERSION: u8 = 3; 138 | const CURRENT_INDEX_FORMAT_VERSION: u32 = 2; 139 | 140 | let (&first_byte, rest) = bytes.split_first()?; 141 | 142 | if first_byte != CURRENT_CACHE_VERSION { 143 | return None; 144 | } 145 | 146 | let index_v_bytes = rest.get(..4)?; 147 | let index_v = u32::from_le_bytes(index_v_bytes.try_into().unwrap()); 148 | if index_v != CURRENT_INDEX_FORMAT_VERSION { 149 | return None; 150 | } 151 | let rest = &rest[4..]; 152 | 153 | let version = crate::split(rest, 0) 154 | .next() 155 | .and_then(|version| std::str::from_utf8(version).ok().map(String::from)); 156 | 157 | version 158 | } 159 | 160 | #[cfg(feature = "sparse")] 161 | fn make_request(&self, url: &str, cache_version: Option<&str>) -> Result { 162 | use http::header; 163 | 164 | let mut req = http::Request::get(url).version(http::Version::HTTP_2); 165 | 166 | { 167 | let headers = req.headers_mut().unwrap(); 168 | 169 | // AFAICT this does not affect responses at the moment, but could in the future 170 | // if there are changes 171 | headers.insert("cargo-protocol", header::HeaderValue::from_static("version=1")); 172 | // All index entries are just files with lines of JSON 173 | headers.insert(header::ACCEPT, header::HeaderValue::from_static("text/plain")); 174 | // We need to accept both identity and gzip, as otherwise cloudfront will 175 | // always respond to requests with strong etag's, which will differ from 176 | // cache entries generated by cargo 177 | headers.insert( 178 | header::ACCEPT_ENCODING, 179 | header::HeaderValue::from_static("gzip,identity"), 180 | ); 181 | 182 | // If we have a local cache entry, include its version with the 183 | // appropriate header, this allows the server to respond with a 184 | // cached, or even better, empty response if its version matches 185 | // the local one making the request/response loop basically free 186 | if let Some(cache_version) = cache_version { 187 | if let Some((key, value)) = cache_version.split_once(':') { 188 | if let Ok(value) = header::HeaderValue::from_str(value.trim()) { 189 | if key == header::ETAG { 190 | headers.insert(header::IF_NONE_MATCH, value); 191 | } else if key == header::LAST_MODIFIED { 192 | headers.insert(header::IF_MODIFIED_SINCE, value); 193 | } else { 194 | // We could error here, but that's kind of pointless 195 | // since the response will be sent in full if we haven't 196 | // specified one of the above headers. Though it does 197 | // potentially indicate something weird is going on 198 | } 199 | } 200 | } 201 | } 202 | } 203 | 204 | Ok(req) 205 | } 206 | 207 | /// Creates an HTTP request that can be sent via your HTTP client of choice 208 | /// to retrieve the config for this index. 209 | /// 210 | /// See [`Self::parse_config_response()`] processing the response from the remote 211 | /// index. 212 | /// 213 | /// It is highly recommended to assume HTTP/2 when making requests to remote 214 | /// indices, at least crates.io. 215 | #[cfg(feature = "sparse")] 216 | pub fn make_config_request(&self) -> Result { 217 | self.make_request(&format!("{}config.json", self.url()), None) 218 | } 219 | 220 | /// Creates an HTTP request that can be sent via your HTTP client of choice 221 | /// to retrieve the current metadata for the specified crate `namw`. 222 | /// 223 | /// See [`Self::parse_cache_response()`] processing the response from the remote 224 | /// index. 225 | /// 226 | /// It is highly recommended to assume HTTP/2 when making requests to remote 227 | /// indices, at least crates.io. 228 | #[cfg(feature = "sparse")] 229 | pub fn make_cache_request(&self, name: &str) -> Result { 230 | self.make_request( 231 | &self 232 | .crate_url(name) 233 | .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidInput, "crate name is invalid"))?, 234 | self.read_cache_version(name).as_deref(), 235 | ) 236 | } 237 | 238 | /// Process the response to a request created by [`Self::make_config_request()`]. 239 | /// 240 | /// If `write_config` is `true`, write the configuration to disk after parsing it. 241 | /// Note that the write operation may fail, and as opposed to the similar parameter 242 | /// in [`Self::parse_cache_response()`], write errors will not be ignored. 243 | /// 244 | /// Note that the `response` from sparse HTTP indices, at least crates.io, may 245 | /// send responses with `gzip` compression, it is your responsibility to 246 | /// decompress it before sending to this function. 247 | #[cfg(feature = "sparse")] 248 | pub fn parse_config_response( 249 | &self, 250 | response: http::Response>, 251 | write_config: bool, 252 | ) -> Result { 253 | use http::StatusCode; 254 | let (parts, body) = response.into_parts(); 255 | 256 | match parts.status { 257 | StatusCode::OK => { 258 | let res = serde_json::from_slice(&body).map_err(Error::Json); 259 | if write_config { 260 | let path = self.path.join("config.json"); 261 | std::fs::create_dir_all(path.parent().unwrap())?; 262 | std::fs::write(&path, &body)?; 263 | } 264 | res 265 | } 266 | StatusCode::UNAUTHORIZED => { 267 | Err(io::Error::new(io::ErrorKind::PermissionDenied, "the request was not authorized").into()) 268 | } 269 | StatusCode::NOT_FOUND => { 270 | Err(io::Error::new(io::ErrorKind::NotFound, "config.json not found in registry").into()) 271 | } 272 | other => Err(io::Error::new( 273 | io::ErrorKind::Unsupported, 274 | format!( 275 | "the server responded with status code '{other}', which is not supported in the current protocol" 276 | ), 277 | ) 278 | .into()), 279 | } 280 | } 281 | 282 | /// Process the response to a request created by [`Self::make_cache_request`] 283 | /// 284 | /// This handles both the scenario where the local cache is missing the specified 285 | /// crate, or it is out of date, as well as the local entry being up to date 286 | /// and can just be read from disk 287 | /// 288 | /// You may specify whether an updated index entry is written locally to the 289 | /// cache or not 290 | /// 291 | /// Note that responses from sparse HTTP indices, at least crates.io, may 292 | /// send responses with `gzip` compression, it is your responsibility to 293 | /// decompress it before sending to this function 294 | #[cfg(feature = "sparse")] 295 | pub fn parse_cache_response( 296 | &self, 297 | name: &str, 298 | response: http::Response>, 299 | write_cache_entry: bool, 300 | ) -> Result, Error> { 301 | use http::{header, StatusCode}; 302 | let (parts, body) = response.into_parts(); 303 | 304 | match parts.status { 305 | // The server responded with the full contents of the index entry 306 | StatusCode::OK => { 307 | let krate = Crate::from_slice(&body)?; 308 | 309 | if write_cache_entry { 310 | // The same as cargo, prefer etag over last-modified 311 | let version = if let Some(etag) = parts.headers.get(header::ETAG) { 312 | etag.to_str().ok().map(|etag| format!("{}: {etag}", header::ETAG)) 313 | } else if let Some(lm) = parts.headers.get(header::LAST_MODIFIED) { 314 | lm.to_str().ok().map(|lm| format!("{}: {lm}", header::LAST_MODIFIED)) 315 | } else { 316 | None 317 | }; 318 | 319 | let version = version.unwrap_or_else(|| "Unknown".to_owned()); 320 | 321 | // This should always succeed, but no need to panic or fail 322 | if let Some(cache_path) = self.cache_path(name) { 323 | if std::fs::create_dir_all(cache_path.parent().unwrap()).is_ok() { 324 | // It's unfortunate if this fails for some reason, but 325 | // not writing the cache entry shouldn't stop the user 326 | // from getting the crate's metadata 327 | let _ = krate.write_cache_entry(&cache_path, &version); 328 | } 329 | } 330 | } 331 | 332 | Ok(Some(krate)) 333 | } 334 | // The local cache entry is up to date with the latest entry on the 335 | // server, we can just return the local one 336 | StatusCode::NOT_MODIFIED => self.crate_from_cache(name).map(Option::Some), 337 | // The server requires authorization but the user didn't provide it 338 | StatusCode::UNAUTHORIZED => { 339 | Err(io::Error::new(io::ErrorKind::PermissionDenied, "the request was not authorized").into()) 340 | } 341 | // The crate does not exist, or has been removed 342 | StatusCode::NOT_FOUND | StatusCode::GONE | StatusCode::UNAVAILABLE_FOR_LEGAL_REASONS => Ok(None), 343 | other => Err(io::Error::new( 344 | io::ErrorKind::Unsupported, 345 | format!( 346 | "the server responded with status code '{other}', which is not supported in the current protocol" 347 | ), 348 | ) 349 | .into()), 350 | } 351 | } 352 | } 353 | 354 | #[cfg(test)] 355 | #[cfg(feature = "sparse")] 356 | mod tests { 357 | use crate::SparseIndex; 358 | use http::header; 359 | 360 | #[inline] 361 | fn crates_io() -> SparseIndex { 362 | SparseIndex::with_path( 363 | std::path::Path::new(&std::env::var_os("CARGO_MANIFEST_DIR").unwrap()) 364 | .join("tests/fixtures/sparse_registry_cache/cargo_home"), 365 | crate::sparse::URL, 366 | ) 367 | .unwrap() 368 | } 369 | 370 | // curl -v -H 'accept-encoding: gzip,identity' https://index.crates.io/cr/at/crates-index 371 | const CRATES_INDEX_INDEX_ENTRY: &[u8] = include_bytes!("../tests/fixtures/crates-index.txt"); 372 | 373 | // Validates that a valid cache entry is written if the index entry has been 374 | // modified 375 | #[test] 376 | fn writes_cache_entry() { 377 | let index = crates_io(); 378 | 379 | let cache_path = index.cache_path("crates-index").unwrap(); 380 | if cache_path.exists() { 381 | std::fs::remove_file(&cache_path).expect("failed to remove existing crates-index cache file"); 382 | } 383 | 384 | let response = http::Response::builder() 385 | .status(http::StatusCode::OK) 386 | .header(header::ETAG, "W/\"7fbfc422231ec53a9283f2eb2fb4f459\"") 387 | .body(CRATES_INDEX_INDEX_ENTRY.to_vec()) 388 | .unwrap(); 389 | 390 | let http_krate = index 391 | .parse_cache_response("crates-index", response, true /* write cache entry */) 392 | .unwrap() 393 | .unwrap(); 394 | assert!(cache_path.is_file(), "the cache entry was indeed written"); 395 | let cache_krate = index.crate_from_cache("crates-index").unwrap(); 396 | 397 | for (http, cache) in http_krate.versions().iter().zip(cache_krate.versions().iter()) { 398 | assert_eq!(http.version(), cache.version()); 399 | } 400 | } 401 | } 402 | -------------------------------------------------------------------------------- /src/types.rs: -------------------------------------------------------------------------------- 1 | use crate::dedupe::DedupeContext; 2 | 3 | use crate::IndexConfig; 4 | use semver::Version as SemverVersion; 5 | use serde_derive::{Deserialize, Serialize}; 6 | use smol_str::SmolStr; 7 | use std::collections::HashMap; 8 | use std::io; 9 | use std::path::Path; 10 | use std::sync::Arc; 11 | 12 | /// A single version of a crate (package) published to the index 13 | #[derive(Serialize, Deserialize, Clone, Debug)] 14 | pub struct Version { 15 | name: SmolStr, 16 | vers: SmolStr, 17 | deps: Arc<[Dependency]>, 18 | features: Arc>>, 19 | /// It's wrapped in `Option` to reduce size of the struct when the field is unused (i.e. almost always) 20 | /// 21 | #[serde(default, skip_serializing_if = "Option::is_none")] 22 | #[allow(clippy::box_collection)] 23 | features2: Option>>>, 24 | #[serde(skip_serializing_if = "Option::is_none")] 25 | links: Option>, 26 | #[serde(default)] 27 | rust_version: Option, 28 | #[serde(with = "hex")] 29 | cksum: [u8; 32], 30 | #[serde(default)] 31 | yanked: bool, 32 | } 33 | 34 | impl Version { 35 | /// Name of the crate 36 | #[inline] 37 | #[must_use] 38 | pub fn name(&self) -> &str { 39 | &self.name 40 | } 41 | 42 | /// Name of this version 43 | #[inline] 44 | #[must_use] 45 | pub fn version(&self) -> &str { 46 | &self.vers 47 | } 48 | 49 | /// Dependencies for this version 50 | #[inline] 51 | #[must_use] 52 | pub fn dependencies(&self) -> &[Dependency] { 53 | &self.deps 54 | } 55 | 56 | /// Checksum of the package for this version 57 | /// 58 | /// SHA256 of the .crate file 59 | #[inline] 60 | #[must_use] 61 | pub fn checksum(&self) -> &[u8; 32] { 62 | &self.cksum 63 | } 64 | 65 | /// Explicit features this crate has. This list is not exhaustive, 66 | /// because any optional dependency becomes a feature automatically. 67 | /// 68 | /// `default` is a special feature name for implicitly enabled features. 69 | #[inline] 70 | #[must_use] 71 | pub fn features(&self) -> &HashMap> { 72 | &self.features 73 | } 74 | 75 | /// combines features and features2 76 | /// 77 | /// dedupes dependencies and features 78 | fn build_data(&mut self, dedupe: &mut DedupeContext) { 79 | if let Some(features2) = self.features2.take() { 80 | if let Some(f1) = Arc::get_mut(&mut self.features) { 81 | for (key, mut val) in features2.into_iter() { 82 | f1.entry(key).or_insert_with(Vec::new).append(&mut val); 83 | } 84 | } 85 | } 86 | 87 | // Many versions have identical dependencies and features 88 | dedupe.deps(&mut self.deps); 89 | dedupe.features(&mut self.features); 90 | } 91 | 92 | /// Exclusivity flag. If this is a sys crate, it informs it 93 | /// conflicts with any other crate with the same links string. 94 | /// 95 | /// It does not involve linker or libraries in any way. 96 | #[inline] 97 | #[must_use] 98 | pub fn links(&self) -> Option<&str> { 99 | self.links.as_ref().map(|s| s.as_str()) 100 | } 101 | 102 | /// Whether this version was [yanked](http://doc.crates.io/crates-io.html#cargo-yank) from the 103 | /// index 104 | #[inline] 105 | #[must_use] 106 | pub fn is_yanked(&self) -> bool { 107 | self.yanked 108 | } 109 | 110 | /// Required version of rust 111 | /// 112 | /// Corresponds to `package.rust-version`. 113 | /// 114 | /// Added in 2023 (see ), 115 | /// can be `None` if published before then or if not set in the manifest. 116 | #[inline] 117 | #[must_use] 118 | pub fn rust_version(&self) -> Option<&str> { 119 | self.rust_version.as_deref() 120 | } 121 | 122 | /// Where to find crate tarball 123 | #[must_use] 124 | pub fn download_url(&self, index: &IndexConfig) -> Option { 125 | index.download_url(&self.name, &self.vers) 126 | } 127 | } 128 | 129 | /// A single dependency of a specific crate version 130 | #[derive(Serialize, Deserialize, Clone, Debug, Eq, PartialEq, Hash)] 131 | pub struct Dependency { 132 | name: SmolStr, 133 | req: SmolStr, 134 | /// Double indirection to remove size from this struct, since the features are rarely set 135 | features: Box>, 136 | #[serde(skip_serializing_if = "Option::is_none")] 137 | package: Option>, 138 | #[serde(skip_serializing_if = "Option::is_none")] 139 | kind: Option, 140 | #[serde(skip_serializing_if = "Option::is_none")] 141 | registry: Option, 142 | #[serde(skip_serializing_if = "Option::is_none")] 143 | target: Option>, 144 | optional: bool, 145 | default_features: bool, 146 | } 147 | 148 | impl Dependency { 149 | /// Dependency's arbitrary nickname (it may be an alias). Use [`Dependency::crate_name`] for actual crate name. 150 | #[inline] 151 | #[must_use] 152 | pub fn name(&self) -> &str { 153 | &self.name 154 | } 155 | 156 | /// Semver version pattern 157 | #[inline] 158 | #[must_use] 159 | pub fn requirement(&self) -> &str { 160 | &self.req 161 | } 162 | 163 | /// Features unconditionally enabled when using this dependency, 164 | /// in addition to [`Dependency::has_default_features`] and features enabled through 165 | /// parent crate's feature list. 166 | #[inline] 167 | #[must_use] 168 | pub fn features(&self) -> &[String] { 169 | &self.features 170 | } 171 | 172 | /// If it's optional, it implies a feature of its [`Dependency::name`], and can be enabled through 173 | /// the crate's features. 174 | #[inline] 175 | #[must_use] 176 | pub fn is_optional(&self) -> bool { 177 | self.optional 178 | } 179 | 180 | /// If `true` (default), enable `default` feature of this dependency 181 | #[inline] 182 | #[must_use] 183 | pub fn has_default_features(&self) -> bool { 184 | self.default_features 185 | } 186 | 187 | /// This dependency is only used when compiling for this `cfg` expression 188 | #[inline] 189 | #[must_use] 190 | pub fn target(&self) -> Option<&str> { 191 | self.target.as_ref().map(|s| s.as_str()) 192 | } 193 | 194 | /// Dev or not 195 | #[inline] 196 | #[must_use] 197 | pub fn kind(&self) -> DependencyKind { 198 | self.kind.unwrap_or_default() 199 | } 200 | 201 | /// The registry URL, if available. 202 | /// 203 | /// Example: `https://github.com/rust-lang/crates.io-index.git` 204 | #[inline] 205 | #[must_use] 206 | pub fn registry(&self) -> Option<&str> { 207 | self.registry.as_deref() 208 | } 209 | 210 | /// Set if dependency's crate name is different from the `name` (alias) 211 | #[inline] 212 | #[must_use] 213 | pub fn package(&self) -> Option<&str> { 214 | self.package.as_ref().map(|s| s.as_str()) 215 | } 216 | 217 | /// Returns the name of the crate providing the dependency. 218 | /// This is equivalent to `name()` unless `self.package()` 219 | /// is not `None`, in which case it's equal to `self.package()`. 220 | /// 221 | /// Basically, you can define a dependency in your `Cargo.toml` 222 | /// like this: 223 | /// 224 | /// ```toml 225 | /// serde_lib = {version = "1", package = "serde"} 226 | /// ``` 227 | /// 228 | /// ...which means that it uses the crate `serde` but imports 229 | /// it under the name `serde_lib`. 230 | #[inline] 231 | #[must_use] 232 | pub fn crate_name(&self) -> &str { 233 | match self.package { 234 | Some(ref s) => s, 235 | None => self.name(), 236 | } 237 | } 238 | } 239 | 240 | /// Section in which this dependency was defined 241 | #[derive(Debug, Copy, Clone, Serialize, Deserialize, Eq, PartialEq, Hash)] 242 | #[serde(rename_all = "lowercase")] 243 | pub enum DependencyKind { 244 | /// Used at run time 245 | Normal, 246 | /// Not fetched and not used, except for when used direclty in a workspace 247 | Dev, 248 | /// Used at build time, not available at run time 249 | Build, 250 | } 251 | 252 | impl Default for DependencyKind { 253 | fn default() -> Self { 254 | Self::Normal 255 | } 256 | } 257 | 258 | /// A whole crate with all its versions 259 | #[derive(Serialize, Deserialize, Clone, Debug)] 260 | pub struct Crate { 261 | versions: Box<[Version]>, 262 | } 263 | 264 | impl Crate { 265 | /// Parse crate file from in-memory JSON data 266 | #[inline(never)] 267 | pub(crate) fn from_slice_with_context(mut bytes: &[u8], dedupe: &mut DedupeContext) -> io::Result { 268 | // Trim last newline 269 | while bytes.last() == Some(&b'\n') { 270 | bytes = &bytes[..bytes.len() - 1]; 271 | } 272 | 273 | #[inline(always)] 274 | fn is_newline(&c: &u8) -> bool { 275 | c == b'\n' 276 | } 277 | let num_versions = bytes.split(is_newline).count(); 278 | let mut versions = Vec::with_capacity(num_versions); 279 | for line in bytes.split(is_newline) { 280 | let mut version: Version = 281 | serde_json::from_slice(line).map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 282 | 283 | version.build_data(dedupe); 284 | 285 | versions.push(version); 286 | } 287 | if versions.is_empty() { 288 | return Err(io::ErrorKind::UnexpectedEof.into()); 289 | } 290 | debug_assert_eq!(versions.len(), versions.capacity()); 291 | Ok(Crate { 292 | versions: versions.into_boxed_slice(), 293 | }) 294 | } 295 | 296 | /// Parse crate index entry from a .cache file, this can fail for a number of reasons 297 | /// 298 | /// 1. There is no entry for this crate 299 | /// 2. The entry was created with an older version than the one specified 300 | /// 3. The entry is a newer version than what can be read, would only 301 | /// happen if a future version of cargo changed the format of the cache entries 302 | /// 4. The cache entry is malformed somehow 303 | #[inline(never)] 304 | pub(crate) fn from_cache_slice(bytes: &[u8], index_version: Option<&str>) -> io::Result { 305 | const CURRENT_CACHE_VERSION: u8 = 3; 306 | const CURRENT_INDEX_FORMAT_VERSION: u32 = 2; 307 | 308 | // See src/cargo/sources/registry/index.rs 309 | let (first_byte, mut rest) = bytes.split_first().ok_or(io::ErrorKind::UnexpectedEof)?; 310 | 311 | match *first_byte { 312 | // This is the current 1.54.0 - 1.70.0+ version of cache entries 313 | CURRENT_CACHE_VERSION => { 314 | let index_v_bytes = rest.get(..4).ok_or(io::ErrorKind::UnexpectedEof)?; 315 | let index_v = u32::from_le_bytes(index_v_bytes.try_into().unwrap()); 316 | if index_v != CURRENT_INDEX_FORMAT_VERSION { 317 | return Err(io::Error::new( 318 | io::ErrorKind::Unsupported, 319 | format!("wrong index format version: {index_v} (expected {CURRENT_INDEX_FORMAT_VERSION}))"), 320 | )); 321 | } 322 | rest = &rest[4..]; 323 | } 324 | // This is only to support ancient <1.52.0 versions of cargo https://github.com/rust-lang/cargo/pull/9161 325 | 1 => {} 326 | // Note that the change from 2 -> 3 was only to invalidate cache 327 | // entries https://github.com/rust-lang/cargo/pull/9476 and 328 | // version 2 entries should only be emitted by cargo 1.52.0 and 1.53.0, 329 | // but rather than _potentially_ parse bad cache entries as noted in 330 | // the PR we explicitly tell the user their version of cargo is suspect 331 | // these versions are so old (and specific) it shouldn't affect really anyone 332 | 2 => { 333 | return Err(io::Error::new( 334 | io::ErrorKind::Other, 335 | "potentially invalid version 2 cache entry found", 336 | )); 337 | } 338 | version => { 339 | return Err(io::Error::new( 340 | io::ErrorKind::Unsupported, 341 | format!("cache version '{version}' not currently supported"), 342 | )); 343 | } 344 | } 345 | 346 | let mut iter = crate::split(rest, 0); 347 | let update = iter.next().ok_or(io::ErrorKind::UnexpectedEof)?; 348 | if let Some(index_version) = index_version { 349 | if update != index_version.as_bytes() { 350 | return Err(io::Error::new( 351 | io::ErrorKind::Other, 352 | format!( 353 | "cache out of date: current index ({index_version}) != cache ({})", 354 | String::from_utf8_lossy(update) 355 | ), 356 | )); 357 | } 358 | } 359 | 360 | Self::from_version_entries_iter(iter) 361 | } 362 | 363 | pub(crate) fn from_version_entries_iter<'a, I: Iterator + 'a>(mut iter: I) -> io::Result { 364 | let mut versions = Vec::new(); 365 | 366 | let mut dedupe = DedupeContext::new(); 367 | 368 | // Each entry is a tuple of (semver, version_json) 369 | while let Some(_version) = iter.next() { 370 | let version_slice = iter.next().ok_or(io::ErrorKind::UnexpectedEof)?; 371 | let mut version: Version = 372 | serde_json::from_slice(version_slice).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; 373 | 374 | version.build_data(&mut dedupe); 375 | 376 | versions.push(version); 377 | } 378 | 379 | Ok(Self { 380 | versions: versions.into_boxed_slice(), 381 | }) 382 | } 383 | 384 | /// Writes a cache entry to disk in the same format as cargo 385 | #[cfg(feature = "sparse")] 386 | pub(crate) fn write_cache_entry(&self, path: &Path, version: &str) -> io::Result<()> { 387 | const CURRENT_CACHE_VERSION: u8 = 3; 388 | const CURRENT_INDEX_FORMAT_VERSION: u32 = 2; 389 | 390 | let mut v = Vec::new(); 391 | v.push(CURRENT_CACHE_VERSION); 392 | v.extend_from_slice(&CURRENT_INDEX_FORMAT_VERSION.to_le_bytes()); 393 | v.extend_from_slice(version.as_bytes()); 394 | v.push(0); 395 | 396 | for version in self.versions() { 397 | v.extend_from_slice(version.version().as_bytes()); 398 | v.push(0); 399 | v.append(&mut serde_json::to_vec(version).unwrap()); 400 | v.push(0); 401 | } 402 | 403 | std::fs::write(path, v) 404 | } 405 | 406 | /// All versions of this crate sorted chronologically by date originally published 407 | /// 408 | /// Warning: may be yanked or duplicate 409 | #[inline] 410 | #[must_use] 411 | pub fn versions(&self) -> &[Version] { 412 | &self.versions 413 | } 414 | 415 | /// The highest version as per semantic versioning specification 416 | /// 417 | /// Warning: may be pre-release or yanked 418 | #[must_use] 419 | pub fn highest_version(&self) -> &Version { 420 | self.versions 421 | .iter() 422 | .max_by_key(|v| SemverVersion::parse(&v.vers).ok()) 423 | // Safety: Versions inside the index will always adhere to 424 | // semantic versioning. If a crate is inside the index, at 425 | // least one version is available. 426 | .unwrap() 427 | } 428 | 429 | /// Returns crate version with the highest version number according to semver, 430 | /// but excludes pre-release and yanked versions. 431 | /// 432 | /// 0.x.y versions are included. 433 | /// 434 | /// May return `None` if the crate has only pre-release or yanked versions. 435 | #[must_use] 436 | pub fn highest_normal_version(&self) -> Option<&Version> { 437 | self.versions 438 | .iter() 439 | .filter(|v| !v.is_yanked()) 440 | .filter_map(|v| Some((v, SemverVersion::parse(&v.vers).ok()?))) 441 | .filter(|(_, sem)| sem.pre.is_empty()) 442 | .max_by(|a, b| a.1.cmp(&b.1)) 443 | .map(|(v, _)| v) 444 | } 445 | 446 | /// Crate's unique registry name. Case-sensitive, mostly. 447 | #[inline] 448 | #[must_use] 449 | pub fn name(&self) -> &str { 450 | self.versions[0].name() 451 | } 452 | 453 | /// The last release by date, even if it's yanked or less than highest version. 454 | /// 455 | /// See [`Crate::highest_normal_version`] 456 | #[inline] 457 | #[must_use] 458 | pub fn most_recent_version(&self) -> &Version { 459 | &self.versions[self.versions.len() - 1] 460 | } 461 | 462 | /// First version ever published. May be yanked. 463 | /// 464 | /// It is not guaranteed to be the lowest version number. 465 | #[inline] 466 | #[must_use] 467 | pub fn earliest_version(&self) -> &Version { 468 | &self.versions[0] 469 | } 470 | 471 | /// Unconstrained Latest version 472 | /// 473 | /// Warning: may not be the highest version and may be yanked 474 | #[cold] 475 | #[doc(hidden)] 476 | #[deprecated(note = "use most_recent_version")] 477 | #[must_use] 478 | pub fn latest_version(&self) -> &Version { 479 | self.most_recent_version() 480 | } 481 | 482 | /// Returns the highest version as per semantic versioning specification, 483 | /// filtering out versions with pre-release identifiers. 484 | /// 485 | /// Warning: may be yanked 486 | #[cold] 487 | #[doc(hidden)] 488 | #[deprecated(note = "use highest_normal_version")] 489 | #[must_use] 490 | pub fn highest_stable_version(&self) -> Option<&Version> { 491 | self.versions 492 | .iter() 493 | .filter_map(|v| Some((v, SemverVersion::parse(&v.vers).ok()?))) 494 | .filter(|(_, sem)| sem.pre.is_empty()) 495 | .max_by(|a, b| a.1.cmp(&b.1)) 496 | .map(|(v, _)| v) 497 | } 498 | 499 | /// Parse an index file with all of crate's versions. 500 | /// 501 | /// The file must contain at least one version. 502 | #[inline] 503 | pub fn new>(index_path: P) -> io::Result { 504 | let lines = std::fs::read(index_path)?; 505 | Self::from_slice(&lines) 506 | } 507 | 508 | /// Parse crate file from in-memory JSON-lines data 509 | #[inline] 510 | pub fn from_slice(bytes: &[u8]) -> io::Result { 511 | let mut dedupe = DedupeContext::new(); 512 | Self::from_slice_with_context(bytes, &mut dedupe) 513 | } 514 | } 515 | -------------------------------------------------------------------------------- /tests/crates_index.rs: -------------------------------------------------------------------------------- 1 | mod git; 2 | mod names; 3 | mod sparse_index; 4 | mod error { 5 | #[test] 6 | fn error_is_send() { 7 | fn is_send() {} 8 | is_send::(); 9 | } 10 | } 11 | 12 | use crates_index::{Crate, Dependency, Version}; 13 | 14 | #[test] 15 | fn sizes() { 16 | assert!(std::mem::size_of::() <= 152); 17 | assert!(std::mem::size_of::() <= 16); 18 | assert!(std::mem::size_of::() <= 104); 19 | } 20 | 21 | #[test] 22 | fn semver() { 23 | let c = Crate::from_slice(r#"{"vers":"1.0.0", "name":"test", "deps":[], "features":{}, "cksum":"1234567890123456789012345678901234567890123456789012345678901234", "yanked":false} 24 | {"vers":"1.2.0-alpha.1", "name":"test", "deps":[], "features":{}, "cksum":"1234567890123456789012345678901234567890123456789012345678901234", "yanked":false} 25 | {"vers":"1.0.1", "name":"test", "deps":[], "features":{}, "cksum":"1234567890123456789012345678901234567890123456789012345678901234", "yanked":false}"#.as_bytes()).unwrap(); 26 | assert_eq!(c.most_recent_version().version(), "1.0.1"); 27 | assert_eq!(c.highest_version().version(), "1.2.0-alpha.1"); 28 | assert_eq!(c.highest_normal_version().unwrap().version(), "1.0.1"); 29 | } 30 | 31 | #[test] 32 | fn features2() { 33 | let c = Crate::from_slice(br#"{"vers":"1.0.0", "name":"test", "deps":[], "features":{"a":["one"], "b":["x"]},"features2":{"a":["two"], "c":["y"]}, "cksum":"1234567890123456789012345678901234567890123456789012345678901234"}"#).unwrap(); 34 | let f2 = c.most_recent_version().features(); 35 | 36 | assert_eq!(3, f2.len()); 37 | assert_eq!(["one", "two"], &f2["a"][..]); 38 | assert_eq!(["x"], &f2["b"][..]); 39 | assert_eq!(["y"], &f2["c"][..]); 40 | } 41 | 42 | #[test] 43 | fn rust_version() { 44 | let c = Crate::from_slice(br#"{"vers":"1.0.0", "name":"test", "deps":[], "features":{},"features2":{}, "cksum":"1234567890123456789012345678901234567890123456789012345678901234", "rust_version":"1.64.0"}"#).unwrap(); 45 | assert_eq!(c.most_recent_version().rust_version(), Some("1.64.0")); 46 | } 47 | -------------------------------------------------------------------------------- /tests/fixtures/autocfg.txt: -------------------------------------------------------------------------------- 1 | {"name":"autocfg","vers":"0.0.1","deps":[],"cksum":"ea7663289d0f977de588bc604662f450e05e5ad47e728f93062394291b68be8f","features":{},"yanked":false} 2 | {"name":"autocfg","vers":"0.1.0","deps":[],"cksum":"23213af7601f0f2d929f73d2a772804562cb09063f50bba9c361f86d6a0376f8","features":{},"yanked":false} 3 | {"name":"autocfg","vers":"0.1.1","deps":[],"cksum":"4e5f34df7a019573fb8bdc7e24a2bfebe51a2a1d6bfdbaeccedb3c41fc574727","features":{},"yanked":false} 4 | {"name":"autocfg","vers":"0.1.2","deps":[],"cksum":"a6d640bee2da49f60a4068a7fae53acde8982514ab7bae8b8cea9e88cbcfd799","features":{},"yanked":false} 5 | {"name":"autocfg","vers":"0.1.3","deps":[],"cksum":"dfb37ca32a3d9d88f18d08bac8d28368b8ee1f14f8b08eb62999c51720035b55","features":{},"yanked":true} 6 | {"name":"autocfg","vers":"0.1.4","deps":[],"cksum":"0e49efa51329a5fd37e7c79db4621af617cd4e3e5bc224939808d076077077bf","features":{},"yanked":false} 7 | {"name":"autocfg","vers":"0.1.5","deps":[],"cksum":"22130e92352b948e7e82a49cdb0aa94f2211761117f29e052dd397c1ac33542b","features":{},"yanked":false} 8 | {"name":"autocfg","vers":"0.1.6","deps":[],"cksum":"b671c8fb71b457dd4ae18c4ba1e59aa81793daacc361d82fcd410cef0d491875","features":{},"yanked":false} 9 | {"name":"autocfg","vers":"0.1.7","deps":[],"cksum":"1d49d90015b3c36167a20fe2810c5cd875ad504b39cff3d4eae7977e6b7c1cb2","features":{},"yanked":false} 10 | {"name":"autocfg","vers":"1.0.0","deps":[],"cksum":"f8aac770f1885fd7e387acedd76065302551364496e46b3dd00860b2f8359b9d","features":{},"yanked":false} 11 | {"name":"autocfg","vers":"1.0.1","deps":[],"cksum":"cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a","features":{},"yanked":false} 12 | {"name":"autocfg","vers":"1.1.0","deps":[],"cksum":"d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa","features":{},"yanked":false} 13 | {"name":"autocfg","vers":"0.1.8","deps":[{"name":"autocfg","req":"^1.1.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"}],"cksum":"0dde43e75fd43e8a1bf86103336bc699aa8d17ad1be60c76c0bdfd4828e19b78","features":{},"yanked":false} 14 | -------------------------------------------------------------------------------- /tests/fixtures/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "dl": "https://static.crates.io/crates", 3 | "api": "https://crates.io" 4 | } 5 | -------------------------------------------------------------------------------- /tests/fixtures/sparse_registry_cache/cargo_home/registry/index/index.crates.io-6f17d22bba15001f/.cache/au/to/autocfg: -------------------------------------------------------------------------------- 1 | etag: W/"aa975a09419f9c8f61762a3d06fdb67d"0.0.1{"name":"autocfg","vers":"0.0.1","deps":[],"cksum":"ea7663289d0f977de588bc604662f450e05e5ad47e728f93062394291b68be8f","features":{},"yanked":false,"links":null}0.1.0{"name":"autocfg","vers":"0.1.0","deps":[],"cksum":"23213af7601f0f2d929f73d2a772804562cb09063f50bba9c361f86d6a0376f8","features":{},"yanked":false,"links":null}0.1.1{"name":"autocfg","vers":"0.1.1","deps":[],"cksum":"4e5f34df7a019573fb8bdc7e24a2bfebe51a2a1d6bfdbaeccedb3c41fc574727","features":{},"yanked":false,"links":null}0.1.2{"name":"autocfg","vers":"0.1.2","deps":[],"cksum":"a6d640bee2da49f60a4068a7fae53acde8982514ab7bae8b8cea9e88cbcfd799","features":{},"yanked":false,"links":null}0.1.3{"name":"autocfg","vers":"0.1.3","deps":[],"cksum":"dfb37ca32a3d9d88f18d08bac8d28368b8ee1f14f8b08eb62999c51720035b55","features":{},"yanked":true,"links":null}0.1.4{"name":"autocfg","vers":"0.1.4","deps":[],"cksum":"0e49efa51329a5fd37e7c79db4621af617cd4e3e5bc224939808d076077077bf","features":{},"yanked":false,"links":null}0.1.5{"name":"autocfg","vers":"0.1.5","deps":[],"cksum":"22130e92352b948e7e82a49cdb0aa94f2211761117f29e052dd397c1ac33542b","features":{},"yanked":false,"links":null}0.1.6{"name":"autocfg","vers":"0.1.6","deps":[],"cksum":"b671c8fb71b457dd4ae18c4ba1e59aa81793daacc361d82fcd410cef0d491875","features":{},"yanked":false,"links":null}0.1.7{"name":"autocfg","vers":"0.1.7","deps":[],"cksum":"1d49d90015b3c36167a20fe2810c5cd875ad504b39cff3d4eae7977e6b7c1cb2","features":{},"yanked":false,"links":null}1.0.0{"name":"autocfg","vers":"1.0.0","deps":[],"cksum":"f8aac770f1885fd7e387acedd76065302551364496e46b3dd00860b2f8359b9d","features":{},"yanked":false,"links":null}1.0.1{"name":"autocfg","vers":"1.0.1","deps":[],"cksum":"cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a","features":{},"yanked":false,"links":null}1.1.0{"name":"autocfg","vers":"1.1.0","deps":[],"cksum":"d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa","features":{},"yanked":false,"links":null}0.1.8{"name":"autocfg","vers":"0.1.8","deps":[{"name":"autocfg","req":"^1.1.0","features":[],"optional":false,"default_features":true,"target":null,"kind":"normal"}],"cksum":"0dde43e75fd43e8a1bf86103336bc699aa8d17ad1be60c76c0bdfd4828e19b78","features":{},"yanked":false,"links":null} -------------------------------------------------------------------------------- /tests/git/mod.rs: -------------------------------------------------------------------------------- 1 | #[cfg(feature = "git-https")] 2 | pub(crate) mod with_https { 3 | use crates_index::git::URL; 4 | use crates_index::GitIndex; 5 | use std::time::SystemTime; 6 | 7 | #[test] 8 | fn changes() { 9 | let index = shared_index(); 10 | let ch = index.changes().unwrap(); 11 | let mut last_time = SystemTime::now(); 12 | let desired = 500; 13 | let mut count = 0; 14 | let mut missing = 0; 15 | for c in ch.take(desired) { 16 | let c = c.unwrap(); 17 | count += 1; 18 | if index.crate_(&c.crate_name()).is_none() { 19 | eprintln!( 20 | "{} is changed but couldn't be found in the Git database", 21 | c.crate_name() 22 | ); 23 | missing += 1 24 | } 25 | assert!(last_time >= c.time()); 26 | last_time = c.time(); 27 | } 28 | assert_eq!(count, desired); 29 | if missing != 0 { 30 | eprintln!("Couldn't find {missing} crates when looking them up - strange") 31 | } 32 | } 33 | 34 | #[test] 35 | fn crates() { 36 | let repo = shared_index(); 37 | assert_eq!("time", repo.crate_("time").unwrap().name()); 38 | 39 | let mut found_first_crate = false; 40 | let mut found_second_crate = false; 41 | 42 | // Note that crates are roughly ordered in reverse. 43 | for c in repo.crates() { 44 | if c.name() == "zzzz" { 45 | found_first_crate = true; 46 | } else if c.name() == "zulip" { 47 | found_second_crate = true; 48 | } 49 | if found_first_crate && found_second_crate { 50 | break; 51 | } 52 | } 53 | assert!(found_first_crate); 54 | assert!(found_second_crate); 55 | 56 | assert!( 57 | GitIndex::try_with_path(repo.path(), repo.url()) 58 | .expect("no error opening") 59 | .is_some(), 60 | "index present as we worked with it" 61 | ); 62 | } 63 | 64 | #[test] 65 | fn open_without_auto_clone() { 66 | let tmp_dir = tempfile::TempDir::new().unwrap(); 67 | assert!( 68 | GitIndex::try_with_path(tmp_dir.path(), crates_index::git::URL) 69 | .unwrap() 70 | .is_none(), 71 | "no index present and none checked out" 72 | ); 73 | assert!( 74 | GitIndex::try_from_url("https://example.com/repo/doesnotexist") 75 | .unwrap() 76 | .is_none(), 77 | "no index present and none checked out" 78 | ); 79 | } 80 | 81 | #[test] 82 | #[serial_test::serial] 83 | fn with_path_clones_bare_index_automatically() { 84 | let tmp_dir = tempfile::TempDir::new().unwrap(); 85 | let path = tmp_dir.path().join("some/sub/dir/testing/abc"); 86 | 87 | let mut repo = GitIndex::with_path(path, URL).expect("Failed to clone crates.io index"); 88 | 89 | fn test_sval(repo: &GitIndex) { 90 | let krate = repo.crate_("sval").expect("Could not find the crate sval in the index"); 91 | 92 | let version = krate 93 | .versions() 94 | .iter() 95 | .find(|v| v.version() == "0.0.1") 96 | .expect("Version 0.0.1 of sval does not exist?"); 97 | let dep_with_package_name = version 98 | .dependencies() 99 | .iter() 100 | .find(|d| d.name() == "serde_lib") 101 | .expect("sval does not have expected dependency?"); 102 | assert_ne!(dep_with_package_name.name(), dep_with_package_name.package().unwrap()); 103 | assert_eq!( 104 | dep_with_package_name.crate_name(), 105 | dep_with_package_name.package().unwrap() 106 | ); 107 | } 108 | 109 | test_sval(&repo); 110 | repo.update().expect("Failed to fetch crates.io index"); 111 | test_sval(&repo); 112 | } 113 | 114 | #[test] 115 | #[serial_test::serial] 116 | fn opens_bare_index_and_can_update_it() { 117 | let mut repo = shared_index(); 118 | fn test_sval(repo: &GitIndex) { 119 | let krate = repo.crate_("sval").expect("Could not find the crate sval in the index"); 120 | 121 | let version = krate 122 | .versions() 123 | .iter() 124 | .find(|v| v.version() == "0.0.1") 125 | .expect("Version 0.0.1 of sval does not exist?"); 126 | let dep_with_package_name = version 127 | .dependencies() 128 | .iter() 129 | .find(|d| d.name() == "serde_lib") 130 | .expect("sval does not have expected dependency?"); 131 | assert_ne!(dep_with_package_name.name(), dep_with_package_name.package().unwrap()); 132 | assert_eq!( 133 | dep_with_package_name.crate_name(), 134 | dep_with_package_name.package().unwrap() 135 | ); 136 | } 137 | 138 | test_sval(&repo); 139 | repo.update().expect("Failed to fetch crates.io index"); 140 | test_sval(&repo); 141 | 142 | let time_before_setting_commit_to_past = repo.time().unwrap(); 143 | repo.set_commit_from_refspec("@~100").unwrap(); 144 | assert_ne!( 145 | repo.time().unwrap(), 146 | time_before_setting_commit_to_past, 147 | "different commits have different times" 148 | ); 149 | } 150 | 151 | #[test] 152 | fn reads_replaced_source() { 153 | let index = shared_index(); 154 | let _config = index 155 | .index_config() 156 | .expect("we are able to obtain and parse the configuration of the default registry"); 157 | } 158 | 159 | #[test] 160 | fn crate_dependencies_can_be_read() { 161 | let index = shared_index(); 162 | 163 | let crate_ = index 164 | .crate_("sval") 165 | .expect("Could not find the crate libnotify in the index"); 166 | let _ = format!("supports debug {crate_:?}"); 167 | 168 | let version = crate_ 169 | .versions() 170 | .iter() 171 | .find(|v| v.version() == "0.0.1") 172 | .expect("Version 0.0.1 of sval does not exist?"); 173 | let dep_with_package_name = version 174 | .dependencies() 175 | .iter() 176 | .find(|d| d.name() == "serde_lib") 177 | .expect("sval does not have expected dependency?"); 178 | assert_ne!(dep_with_package_name.name(), dep_with_package_name.package().unwrap()); 179 | assert_eq!( 180 | dep_with_package_name.crate_name(), 181 | dep_with_package_name.package().unwrap() 182 | ); 183 | } 184 | 185 | #[test] 186 | #[serial_test::serial] 187 | fn can_update_index_explicitly() { 188 | let mut index = shared_index(); 189 | index 190 | .update() 191 | .map_err(|e| format!("could not fetch cargo's index in {}: {}", index.path().display(), e)) 192 | .unwrap(); 193 | assert!(index.crate_("crates-index").is_some()); 194 | assert!(index.crate_("toml").is_some()); 195 | assert!(index.crate_("gcc").is_some()); 196 | assert!(index.crate_("cc").is_some()); 197 | assert!(index.crate_("CC").is_some()); 198 | assert!(index.crate_("無").is_none()); 199 | } 200 | 201 | pub(crate) fn shared_index() -> GitIndex { 202 | static LOCK: parking_lot::Mutex<()> = parking_lot::Mutex::new(()); 203 | let _guard = LOCK.lock(); 204 | 205 | let index_path = "tests/fixtures/git-registry"; 206 | if is_ci::cached() { 207 | let index = GitIndex::new_cargo_default().expect("CI has just cloned this index and its ours and valid"); 208 | assert!( 209 | GitIndex::try_new_cargo_default().unwrap().is_some(), 210 | "index should exist if we just retrieved it" 211 | ); 212 | index 213 | } else { 214 | GitIndex::with_path(index_path, URL).expect("clone works and there is no racing") 215 | } 216 | } 217 | } 218 | -------------------------------------------------------------------------------- /tests/mem.rs: -------------------------------------------------------------------------------- 1 | #[cfg(all(feature = "parallel", feature = "git"))] 2 | mod mem { 3 | use bytesize::ByteSize; 4 | use cap::Cap; 5 | use std::alloc; 6 | use std::time::Instant; 7 | 8 | #[global_allocator] 9 | static ALLOCATOR: Cap = Cap::new(alloc::System, usize::max_value()); 10 | 11 | #[test] 12 | #[cfg_attr(debug_assertions, ignore = "too slow when running in debug mode")] 13 | fn usage() { 14 | use crates_index::GitIndex; 15 | use rayon::iter::ParallelIterator; 16 | 17 | let index = GitIndex::new_cargo_default().unwrap(); 18 | 19 | let before = ALLOCATOR.allocated(); 20 | // let all_crates: Vec<_> = index.crates().collect(); 21 | let start = Instant::now(); 22 | let all_crates: Vec<_> = index.crates_parallel().map(|c| c.unwrap()).collect(); 23 | let after = ALLOCATOR.allocated(); 24 | let used = after - before; 25 | assert!(all_crates.len() > 89000); 26 | let elapsed = start.elapsed().as_secs_f32(); 27 | let per_crate = used / all_crates.len(); 28 | eprintln!( 29 | "used mem: {}B for {} crates, {}B per crate, took {elapsed:.02}s [total-mem: {total}, peak-mem: {peak}]", 30 | ByteSize(used as u64), 31 | all_crates.len(), 32 | per_crate, 33 | total = ByteSize(ALLOCATOR.total_allocated() as u64), 34 | peak = ByteSize(ALLOCATOR.max_allocated() as u64), 35 | ); 36 | assert!( 37 | per_crate < 10_000, 38 | "per crate limit {per_crate}B should remain below memory limit" 39 | ); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /tests/names/mod.rs: -------------------------------------------------------------------------------- 1 | use crates_index::Names; 2 | 3 | #[test] 4 | fn empty_string_is_nothing_special() { 5 | assert_eq!(assert_count(Names::new("").unwrap()), 1); 6 | } 7 | 8 | #[test] 9 | fn name_without_separators_yields_name() { 10 | assert_eq!(assert_count(Names::new("serde").unwrap()), 1); 11 | } 12 | 13 | #[test] 14 | fn permutation_counts() { 15 | assert_eq!(assert_count(Names::new("a-b").unwrap()), 2); 16 | assert_eq!(assert_count(Names::new("a-b_c").unwrap()), 4); 17 | assert_eq!(assert_count(Names::new("a_b_c").unwrap()), 4); 18 | assert_eq!(assert_count(Names::new("a_b_c-d").unwrap()), 8); 19 | } 20 | 21 | #[test] 22 | fn max_permutation_count_causes_error() { 23 | assert_eq!( 24 | assert_count(Names::new("a-b-c-d-e-f-g-h-i-j-k-l-m-n-o-p").expect("15 separators are fine")), 25 | 32768 26 | ); 27 | assert!( 28 | Names::new("a-b-c-d-e-f-g-h-i-j-k-l-m-n-o-p-q-r").is_none(), 29 | "16 are not fine anymore" 30 | ); 31 | } 32 | 33 | #[test] 34 | fn permutations() { 35 | for (name, expected) in [ 36 | ("parking_lot", &["parking_lot", "parking-lot"] as &[_]), // the input name is always the first one returned. 37 | ( 38 | "a-b_c-d", // input name -> all-hyphens -> all_underscores -> rest 39 | &[ 40 | "a-b_c-d", "a-b-c-d", "a_b_c_d", "a-b_c_d", "a_b-c_d", "a-b-c_d", "a_b_c-d", "a_b-c-d", 41 | ], 42 | ), 43 | ("a_b", &["a_b", "a-b"]), 44 | ("a-b", &["a-b", "a_b"]), 45 | ("a-b-c", &["a-b-c", "a_b_c", "a-b_c", "a_b-c"]), 46 | ( 47 | "a-b-c-d", 48 | &[ 49 | "a-b-c-d", "a_b_c_d", "a-b_c_d", "a_b-c_d", "a-b-c_d", "a_b_c-d", "a-b_c-d", "a_b-c-d", 50 | ], 51 | ), 52 | ( 53 | "a_b_c_d", 54 | &[ 55 | "a_b_c_d", "a-b-c-d", "a-b_c_d", "a_b-c_d", "a-b-c_d", "a_b_c-d", "a-b_c-d", "a_b-c-d", 56 | ], 57 | ), 58 | ] { 59 | let names: Vec = Names::new(name).unwrap().collect(); 60 | assert_eq!(&names, expected); 61 | } 62 | } 63 | 64 | fn assert_count(names: Names) -> usize { 65 | let expected = names.clone().collect::>().len(); 66 | assert_eq!( 67 | names.count(), 68 | expected, 69 | "the computed count should match the actual one" 70 | ); 71 | expected 72 | } 73 | -------------------------------------------------------------------------------- /tests/sparse_index/mod.rs: -------------------------------------------------------------------------------- 1 | #[test] 2 | fn crate_from_cache() { 3 | let index = crates_index::SparseIndex::with_path( 4 | std::path::Path::new(&std::env::var_os("CARGO_MANIFEST_DIR").unwrap()) 5 | .join("tests/fixtures/sparse_registry_cache/cargo_home"), 6 | crates_index::sparse::URL, 7 | ) 8 | .unwrap(); 9 | 10 | let crate_ = index.crate_from_cache("autocfg").unwrap(); 11 | 12 | assert_eq!(crate_.name(), "autocfg"); 13 | assert_eq!(crate_.versions().len(), 13); 14 | assert_eq!(crate_.earliest_version().version(), "0.0.1"); 15 | assert_eq!(crate_.highest_version().version(), "1.1.0"); 16 | } 17 | 18 | #[cfg(all(test, feature = "sparse"))] 19 | mod with_sparse_http_feature { 20 | use crates_index::SparseIndex; 21 | 22 | #[inline] 23 | fn crates_io() -> SparseIndex { 24 | SparseIndex::with_path( 25 | std::path::Path::new(&std::env::var_os("CARGO_MANIFEST_DIR").unwrap()) 26 | .join("tests/fixtures/sparse_registry_cache/cargo_home"), 27 | crates_index::sparse::URL, 28 | ) 29 | .unwrap() 30 | } 31 | 32 | mod make_cache_request { 33 | use crate::sparse_index::with_sparse_http_feature::crates_io; 34 | use http::{header, Request}; 35 | 36 | // Validates that a valid request is generated when there is no cache entry 37 | // for a crate 38 | #[test] 39 | fn generate_request_for_missing_cache_entry() { 40 | let index = crates_io(); 41 | let builder = index.make_cache_request("serde").unwrap(); 42 | let req: Request> = builder.body(vec![]).unwrap(); 43 | 44 | assert_eq!(req.uri(), format!("{}se/rd/serde", index.url()).as_str()); 45 | assert!(req.headers().get(header::IF_NONE_MATCH).is_none()); 46 | assert!(req.headers().get(header::IF_MODIFIED_SINCE).is_none()); 47 | assert_eq!(req.headers().get(header::ACCEPT_ENCODING).unwrap(), "gzip,identity"); 48 | assert_eq!( 49 | req.headers() 50 | .get(header::HeaderName::from_static("cargo-protocol")) 51 | .unwrap(), 52 | "version=1" 53 | ); 54 | assert_eq!(req.headers().get(header::ACCEPT).unwrap(), "text/plain"); 55 | } 56 | 57 | // Validates that a valid request is generated when there is a local cache 58 | // entry for a crate 59 | #[test] 60 | fn generate_request_for_local_cache_entry() { 61 | let index = crates_io(); 62 | let builder = index.make_cache_request("autocfg").unwrap(); 63 | let req: Request> = builder.body(vec![]).unwrap(); 64 | 65 | assert_eq!(req.uri(), format!("{}au/to/autocfg", index.url()).as_str()); 66 | assert_eq!( 67 | req.headers().get(header::IF_NONE_MATCH).unwrap(), 68 | "W/\"aa975a09419f9c8f61762a3d06fdb67d\"" 69 | ); 70 | assert!(req.headers().get(header::IF_MODIFIED_SINCE).is_none()); 71 | } 72 | } 73 | 74 | mod parse_cache_response { 75 | use crate::sparse_index::with_sparse_http_feature::crates_io; 76 | use http::header; 77 | 78 | // curl -v -H 'accept-encoding: gzip,identity' -H 'if-none-match: W/"aa975a09419f9c8f61762a3d06fdb67d"' https://index.crates.io/au/to/autocfg 79 | // as of 2023-06-15 80 | const AUTOCFG_INDEX_ENTRY: &[u8] = include_bytes!("../../tests/fixtures/autocfg.txt"); 81 | 82 | // Validates that a response with the full index contents are properly parsed 83 | #[test] 84 | fn modified_response() { 85 | let index = crates_io(); 86 | let response = http::Response::builder() 87 | .status(http::StatusCode::OK) 88 | .header(header::ETAG, "W/\"5f15de4a723e10b3f9eaf048d693cccc\"") 89 | .body(AUTOCFG_INDEX_ENTRY.to_vec()) 90 | .unwrap(); 91 | 92 | let krate = index.parse_cache_response("autocfg", response, false).unwrap().unwrap(); 93 | assert_eq!(krate.highest_version().version(), "1.1.0"); 94 | } 95 | 96 | // Validates that a response for an index entry that has not been modified is 97 | // parsed correctly 98 | #[test] 99 | fn unmodified_response() { 100 | let index = crates_io(); 101 | let response = http::Response::builder() 102 | .status(http::StatusCode::NOT_MODIFIED) 103 | .header(header::ETAG, "W/\"5f15de4a723e10b3f9eaf048d693cccc\"") 104 | .body(Vec::new()) 105 | .unwrap(); 106 | 107 | let krate = index.parse_cache_response("autocfg", response, false).unwrap().unwrap(); 108 | assert_eq!(krate.name(), "autocfg"); 109 | assert_eq!(krate.versions().len(), 13); 110 | assert_eq!(krate.earliest_version().version(), "0.0.1"); 111 | assert_eq!(krate.highest_version().version(), "1.1.0"); 112 | } 113 | 114 | // Validates that a response for an index entry that does not exist is 115 | // parsed correcty 116 | #[test] 117 | fn missing_response() { 118 | let index = crates_io(); 119 | let response = http::Response::builder() 120 | .status(http::StatusCode::NOT_FOUND) 121 | .body(Vec::new()) 122 | .unwrap(); 123 | 124 | assert!(index.parse_cache_response("serde", response, false).unwrap().is_none()); 125 | } 126 | } 127 | 128 | #[test] 129 | fn make_config_request() { 130 | use crate::sparse_index::with_sparse_http_feature::crates_io; 131 | use http::{header, Request}; 132 | 133 | let index = crates_io(); 134 | let builder = index.make_config_request().unwrap(); 135 | let req: Request> = builder.body(vec![]).unwrap(); 136 | 137 | assert_eq!(req.uri(), format!("{}config.json", index.url()).as_str()); 138 | assert!(req.headers().get(header::IF_NONE_MATCH).is_none()); 139 | assert!(req.headers().get(header::IF_MODIFIED_SINCE).is_none()); 140 | assert_eq!(req.headers().get(header::ACCEPT_ENCODING).unwrap(), "gzip,identity"); 141 | assert_eq!( 142 | req.headers() 143 | .get(header::HeaderName::from_static("cargo-protocol")) 144 | .unwrap(), 145 | "version=1" 146 | ); 147 | assert_eq!(req.headers().get(header::ACCEPT).unwrap(), "text/plain"); 148 | } 149 | 150 | mod parse_config_response { 151 | use crates_index::{Error, SparseIndex}; 152 | use std::io; 153 | 154 | // curl -v -H 'accept-encoding: gzip,identity' https://index.crates.io/config.json 155 | const CONFIG_JSON: &[u8] = include_bytes!("../../tests/fixtures/config.json"); 156 | 157 | fn crates_io_tmp() -> (tempfile::TempDir, SparseIndex) { 158 | let dir = tempfile::tempdir().unwrap(); 159 | let index = SparseIndex::with_path(dir.path(), crates_index::sparse::URL).unwrap(); 160 | (dir, index) 161 | } 162 | 163 | fn make_response() -> http::Response> { 164 | http::Response::builder() 165 | .status(http::StatusCode::OK) 166 | .body(CONFIG_JSON.to_vec()) 167 | .unwrap() 168 | } 169 | 170 | #[test] 171 | fn parse() { 172 | let (_dir, index) = crates_io_tmp(); 173 | 174 | let config = index.parse_config_response(make_response(), false).unwrap(); 175 | 176 | assert_eq!(config.dl, "https://static.crates.io/crates"); 177 | assert_eq!(config.api.as_deref(), Some("https://crates.io")); 178 | 179 | assert!( 180 | matches!(index.index_config(), Err(Error::Io(_))), 181 | "the configuration shouldn't exist, hence we cannot query the index configuration" 182 | ); 183 | } 184 | 185 | #[test] 186 | fn parse_and_store() { 187 | let (_dir, index) = crates_io_tmp(); 188 | 189 | match index.index_config() { 190 | Err(Error::Io(err)) => { 191 | assert_eq!(err.kind(), io::ErrorKind::NotFound); 192 | } 193 | _ => unreachable!("precondition: there is no configuration and this triggers an IO error"), 194 | }; 195 | 196 | let config = index.parse_config_response(make_response(), true).unwrap(); 197 | let stored_config = index.index_config().unwrap(); 198 | 199 | assert_eq!(config.dl, stored_config.dl); 200 | assert_eq!(config.api, stored_config.api); 201 | } 202 | } 203 | } 204 | --------------------------------------------------------------------------------