├── .github └── workflows │ └── release.yml ├── .gitignore ├── Cargo.toml ├── Changes.md ├── LICENSE ├── README.md ├── README_CN.md ├── logo.jpg └── src ├── download.rs ├── main.rs └── utilities.rs /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Tests and release 2 | on: 3 | push: 4 | branches: [main] 5 | tags: ["*"] 6 | pull_request: 7 | branches: [main] 8 | env: 9 | CRATE_NAME: rustyface 10 | # GITHUB_TOKEN: ${{ github.token }} 11 | GITHUB_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }} 12 | RUST_BACKTRACE: 1 13 | jobs: 14 | test-matrix: 15 | name: ${{ matrix.platform.os-name }} with rust ${{ matrix.toolchain }} 16 | runs-on: ${{ matrix.platform.runs-on }} 17 | strategy: 18 | fail-fast: false 19 | matrix: 20 | platform: 21 | # Platforms that don't work: 22 | # 23 | # - sparc64-unknown-linux-gnu - cannot compile openssl-sys 24 | # - x86_64-unknown-illumos - weird error compiling openssl - "bin/sh: 1: granlib: not found" 25 | 26 | # - os-name: FreeBSD-x86_64 27 | # runs-on: ubuntu-24.04 28 | # target: x86_64-unknown-freebsd 29 | # skip-tests: true 30 | - os-name: Linux-x86_64 31 | runs-on: ubuntu-24.04 32 | target: x86_64-unknown-linux-musl 33 | - os-name: Linux-x86_64 (rustls-native-roots) 34 | runs-on: ubuntu-24.04 35 | target: x86_64-unknown-linux-musl 36 | features: rustls-tls-native-roots 37 | - os-name: Linux-x86_64 (native-tls) 38 | runs-on: ubuntu-24.04 39 | # We need to use gnu or we cannot link against openssl. 40 | target: x86_64-unknown-linux-gnu 41 | features: native-tls 42 | - os-name: Linux-x86_64 (native-tls-vendored) 43 | runs-on: ubuntu-24.04 44 | target: x86_64-unknown-linux-musl 45 | features: native-tls-vendored 46 | - os-name: Linux-aarch64 47 | runs-on: ubuntu-24.04 48 | target: aarch64-unknown-linux-musl 49 | - os-name: Linux-arm 50 | runs-on: ubuntu-24.04 51 | target: arm-unknown-linux-musleabi 52 | # - os-name: Linux-i686 53 | # runs-on: ubuntu-24.04 54 | # target: i686-unknown-linux-musl 55 | # skip-tests: true 56 | # - os-name: Linux-powerpc 57 | # runs-on: ubuntu-24.04 58 | # target: powerpc-unknown-linux-gnu 59 | # skip-tests: true 60 | # - os-name: Linux-powerpc64 61 | # runs-on: ubuntu-24.04 62 | # target: powerpc64-unknown-linux-gnu 63 | # skip-tests: true 64 | # - os-name: Linux-powerpc64le 65 | # runs-on: ubuntu-24.04 66 | # target: powerpc64le-unknown-linux-gnu 67 | # skip-tests: true 68 | # - os-name: Linux-riscv64 # Unsupported for now 69 | # runs-on: ubuntu-24.04 70 | # target: riscv64gc-unknown-linux-gnu 71 | # - os-name: Linux-s390x 72 | # runs-on: ubuntu-24.04 73 | # target: s390x-unknown-linux-gnu 74 | # skip-tests: true 75 | # - os-name: NetBSD-x86_64 # Unsupported for now 76 | # runs-on: ubuntu-24.04 77 | # target: x86_64-unknown-netbsd 78 | # skip-tests: true 79 | - os-name: Windows-aarch64 80 | runs-on: windows-latest 81 | target: aarch64-pc-windows-msvc 82 | skip-tests: true 83 | - os-name: Windows-i686 84 | runs-on: windows-latest 85 | target: i686-pc-windows-msvc 86 | skip-tests: true 87 | - os-name: Windows-x86_64 88 | runs-on: windows-latest 89 | target: x86_64-pc-windows-msvc 90 | - os-name: macOS-x86_64 91 | runs-on: macOS-latest 92 | target: x86_64-apple-darwin 93 | - os-name: macOS-aarch64 94 | runs-on: macOS-latest 95 | target: aarch64-apple-darwin 96 | toolchain: 97 | - stable 98 | include: 99 | - platform: 100 | os-name: Linux-x86_64 101 | runs-on: ubuntu-24.04 102 | target: x86_64-unknown-linux-musl 103 | bin: rustyface 104 | toolchain: beta 105 | - platform: 106 | os-name: Linux-x86_64 107 | runs-on: ubuntu-24.04 108 | target: x86_64-unknown-linux-musl 109 | bin: rustyface 110 | toolchain: nightly 111 | steps: 112 | - uses: actions/checkout@v4 113 | # - name: Set "--features" flag value 114 | # id: set-features-flag-value 115 | # shell: bash 116 | # run: | 117 | # if [ -n "${{ matrix.platform.features }}" ]; then 118 | # echo "features=${{ matrix.platform.features }}" >> "$GITHUB_OUTPUT" 119 | # else 120 | # echo "features=default" >> "$GITHUB_OUTPUT" 121 | # fi 122 | # cat "$GITHUB_OUTPUT" 123 | - name: Maybe install SSL packages 124 | run: sudo apt-get update --yes && sudo apt-get install --yes pkg-config openssl libssl-dev 125 | if: matrix.platform.features == 'native-tls' 126 | - name: Build binary 127 | uses: houseabsolute/actions-rust-cross@v1 128 | with: 129 | command: "build" 130 | target: ${{ matrix.platform.target }} 131 | toolchain: ${{ matrix.toolchain }} 132 | args: 133 | # "--locked --release --features ${{ steps.set-features-flag-value.outputs.features }}" 134 | "--release" 135 | strip: true 136 | - name: Run tests 137 | uses: houseabsolute/actions-rust-cross@v1 138 | with: 139 | command: "test" 140 | target: ${{ matrix.platform.target }} 141 | toolchain: ${{ matrix.toolchain }} 142 | args: 143 | # "--locked --release --features ${{ steps.set-features-flag-value.outputs.features }}" 144 | "--release" 145 | if: ${{ !matrix.platform.skip-tests }} 146 | - name: Publish artifacts and release 147 | uses: houseabsolute/actions-rust-release@v0 148 | with: 149 | executable-name: rustyface 150 | target: ${{ matrix.platform.target }} 151 | action-gh-release-parameters: '{ "make_latest": false }' 152 | if: matrix.toolchain == 'stable' && matrix.platform.features == '' 153 | 154 | test-alpine: 155 | name: Alpine Linux 156 | runs-on: ubuntu-24.04 157 | container: 158 | image: rust:alpine 159 | env: 160 | GITHUB_TOKEN: ${{ github.token }} 161 | volumes: 162 | - "${{ github.workspace }}:/workspace" 163 | options: "--workdir /workspace" 164 | steps: 165 | - uses: actions/checkout@v4 166 | - name: Install packages on Alpine 167 | shell: sh 168 | run: | 169 | apk update 170 | # file is used in an integration test 171 | apk add file musl-dev zstd-dev perl make openssl-dev 172 | - name: Run tests 173 | shell: sh 174 | # run: cargo test --locked 175 | run: cargo test 176 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Generated by Cargo 2 | # will have compiled files and executables 3 | debug/ 4 | target/ 5 | 6 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries 7 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html 8 | Cargo.lock 9 | 10 | # These are backup files generated by rustfmt 11 | **/*.rs.bk 12 | 13 | # MSVC Windows builds of rustc generate these, which store debugging information 14 | *.pdb 15 | 16 | # RustRover 17 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 18 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 19 | # and can be added to the global gitignore or merged into this file. For a more nuclear 20 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 21 | #.idea/ 22 | 23 | # Added by cargo 24 | 25 | /target 26 | .DS_STORE -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rustyface" 3 | version = "0.1.2" 4 | authors = ["Xinyu Bao "] 5 | edition = "2021" 6 | description = "A Huggingface downloading CLI tool written in Rust." 7 | license = "MIT" 8 | repository = "https://github.com/AspadaX/RustyFace" 9 | keywords = ["cli", "huggingface", "downloader"] 10 | categories = ["command-line-utilities", "network-programming"] 11 | 12 | [dependencies] 13 | clap = { version = "4.5.13", features = ["derive"] } 14 | fern = { version = "0.6.2", features = ["colored"] } 15 | futures-util = "0.3.30" 16 | git2 = "0.19.0" 17 | glob = "0.3.1" 18 | humantime = "2.1.0" 19 | indicatif = "0.17.8" 20 | log = "0.4.22" 21 | reqwest = { version = "0.12.5", features = ["blocking", "stream"] } 22 | sha2 = "0.10.8" 23 | tokio = { version = "1.39.2", features = ["full"] } 24 | openssl-sys = { version = "0.9", features = ["vendored"] } 25 | 26 | [[bin]] 27 | name = "rustyface" 28 | path = "src/main.rs" -------------------------------------------------------------------------------- /Changes.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AspadaX/RustyFace/99f1f0129cc365577b10dbee8dec9aeafc422436/Changes.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Xinyu Bao 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RustyFace 2 | A command line app for downloading Huggingface repositories with Rust. 3 | 4 |

5 | RustyFace Logo 6 |

7 | 8 |

9 | 10 | Crates.io 11 | 12 | 13 | License: MIT 14 | 15 |

16 | 17 | # Why using this? 18 | RustyFace does not require installing additional dependencies such as `git` or `git lfs` etc. It aims to be lightweight and portable. 19 | In addition to that, RustyFace is friendly to users who live in Mainland China, where HuggingFace accessibility is unstable, as this CLI app adopted a mirror that can be accessed globally. 20 | 21 | The mirror site used in this project is `hf-mirror.com` 22 | 23 | # How to Install and Use RustyFace 24 | First, you need to have Rust installed. For those new to Rust, please refer to the [official installation guide](https://doc.rust-lang.org/cargo/getting-started/installation.html). 25 | 26 | ## Quickstart - Without Installation 27 | You don't need to install Rust if you download the corresponding binaries to your platform from the Release section. That way, you can just type this command to download Huggingface repositories: 28 | ``` 29 | rustyface_windows_x86 --repository sentence-transformers/all-MiniLM-L6-v2 --tasks 4 30 | ``` 31 | - `rustyface_windows_x86` is the binary file name that you have downloaded from the Release section. 32 | - `--repository` is followed by the `repo_id` of the repository that you want to download from HuggingFace. 33 | - `--tasks` is followed by the number of concurrent downloads. For example, 4 means downloading 4 files at once. It is recommended to use a lower number if your network conditions do not support higher concurrency. 34 | 35 | ## Quickstart - With Installation 36 | If you would like to reuse the program, it is recommended to install RustyFace onto your system rather than using the binaries. Here is how you can do it. 37 | 38 | ### Install Rust 39 | On Linux and macOS: 40 | ``` 41 | curl https://sh.rustup.rs -sSf | sh 42 | ``` 43 | On Windows, you can download the installation executable via this link: https://win.rustup.rs/ 44 | 45 | ### Install RustyFace 46 | After done installing Rust, just type this to your terminal: 47 | ``` 48 | cargo install rustyface 49 | ``` 50 | 51 | ### Use RustyFace to Download Repositories 52 | Try RustyFace out with this simple command line: 53 | ``` 54 | rustyface --repository sentence-transformers/all-MiniLM-L6-v2 --tasks 4 55 | ``` 56 | - `--repository` is followed by the `repo_id` of the repository that you want to download from HuggingFace. 57 | - `--tasks` is followed by the number of concurrent downloads. For example, 4 means downloading 4 files at once. It is recommended to use a lower number if your network conditions do not support higher concurrency. 58 | 59 | 60 | # Feedback & Further Development 61 | Any participation is appreciated! Feel free to submit an issue, discussion or pull request. You can find me on WeChat: `baoxinyu2007` or Discord: `https://discord.gg/UYfZeuPy` 62 | 63 | # License 64 | This project is licensed under the MIT License. See the LICENSE file for details. 65 | 66 | ## Packages Used 67 | - [clap](https://crates.io/crates/clap) for command line argument parsing. 68 | - [futures-util](https://crates.io/crates/futures-util) for asynchronous operations. 69 | - [indicatif](https://crates.io/crates/indicatif) for progress bars. 70 | - [log](https://crates.io/crates/log) for logging. 71 | - [reqwest](https://crates.io/crates/reqwest) for HTTP requests. 72 | - [sha2](https://crates.io/crates/sha2) for SHA-256 hashing. 73 | - [tokio](https://crates.io/crates/tokio) for asynchronous runtime. 74 | - [fern](https://crates.io/crates/fern) for logging configuration. 75 | - [chrono](https://crates.io/crates/chrono) for date and time handling. 76 | -------------------------------------------------------------------------------- /README_CN.md: -------------------------------------------------------------------------------- 1 | # RustyFace 2 | 一个使用Rust开发的命令行工具,用于下载Huggingface仓库。 3 | 4 |

5 | RustyFace Logo 6 |

7 | 8 |

9 | 10 | Crates.io 11 | 12 | 13 | License: MIT 14 | 15 |

16 | 17 | # 为什么使用RustyFace? 18 | RustyFace不需要安装额外的依赖,如`git`或`git lfs`等。它旨在轻量化和便携化。 19 | 此外,RustyFace对中国大陆用户友好,因为在中国大陆访问HuggingFace可能不稳定,而此CLI应用程序采用了可全球访问的镜像站点。 20 | 21 | 本项目使用的镜像站点是`hf-mirror.com` 22 | 23 | # 如何安装和使用RustyFace 24 | 首先,您需要安装Rust。对于Rust新手,请参考[官方安装指南](https://doc.rust-lang.org/cargo/getting-started/installation.html)。 25 | 26 | ## 快速入门 - 无需安装 27 | 如果您从Release部分下载了对应平台的二进制文件,则无需安装Rust。这样,您只需输入以下命令即可下载Huggingface仓库: 28 | ``` 29 | rustyface_windows_x86 --repository sentence-transformers/all-MiniLM-L6-v2 --tasks 4 30 | ``` 31 | - `rustyface_windows_x86`是您从Release部分下载的二进制文件名。 32 | - `--repository`后跟您想从HuggingFace下载的仓库的`repo_id`。 33 | - `--tasks`后跟并发下载数量。例如,4表示同时下载4个文件。如果您的网络条件不支持较高的并发性,建议使用较低的数值。 34 | 35 | ## 快速入门 - 安装后使用 36 | 如果您想重复使用该程序,建议将RustyFace安装到您的系统上,而不是使用二进制文件。以下是安装方法。 37 | 38 | ### 安装Rust 39 | 在Linux和macOS上: 40 | ``` 41 | curl https://sh.rustup.rs -sSf | sh 42 | ``` 43 | 在Windows上,您可以通过此链接下载安装程序:https://win.rustup.rs/ 44 | 45 | ### 安装RustyFace 46 | 安装Rust后,只需在终端中输入: 47 | ``` 48 | cargo install rustyface 49 | ``` 50 | 51 | ### 使用RustyFace下载仓库 52 | 尝试使用以下简单命令行: 53 | ``` 54 | rustyface --repository sentence-transformers/all-MiniLM-L6-v2 --tasks 4 55 | ``` 56 | - `--repository`后跟您想从HuggingFace下载的仓库的`repo_id`。 57 | - `--tasks`后跟并发下载数量。例如,4表示同时下载4个文件。如果您的网络条件不支持较高的并发性,建议使用较低的数值。 58 | 59 | # 反馈与进一步开发 60 | 非常感谢任何参与!欢迎提交问题、讨论或拉取请求。您可以在微信上找到我:`baoxinyu2007`或Discord:`https://discord.gg/UYfZeuPy` 61 | 62 | # 许可证 63 | 本项目采用MIT许可证。详情请参阅LICENSE文件。 64 | 65 | ## 使用的包 66 | - [clap](https://crates.io/crates/clap) 用于命令行参数解析。 67 | - [futures-util](https://crates.io/crates/futures-util) 用于异步操作。 68 | - [indicatif](https://crates.io/crates/indicatif) 用于进度条显示。 69 | - [log](https://crates.io/crates/log) 用于日志记录。 70 | - [reqwest](https://crates.io/crates/reqwest) 用于HTTP请求。 71 | - [sha2](https://crates.io/crates/sha2) 用于SHA-256哈希运算。 72 | - [tokio](https://crates.io/crates/tokio) 用于异步运行时。 73 | - [fern](https://crates.io/crates/fern) 用于日志配置。 74 | - [chrono](https://crates.io/crates/chrono) 用于日期和时间处理。 -------------------------------------------------------------------------------- /logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AspadaX/RustyFace/99f1f0129cc365577b10dbee8dec9aeafc422436/logo.jpg -------------------------------------------------------------------------------- /src/download.rs: -------------------------------------------------------------------------------- 1 | use futures_util::StreamExt; 2 | use glob::glob; 3 | use std::io::{BufRead, Seek, Write}; 4 | 5 | use clap::Parser; 6 | use git2::{build::RepoBuilder, Repository}; 7 | use indicatif; 8 | use log::{debug, error, info, warn}; 9 | use sha2::Digest; 10 | 11 | #[derive(Parser, Debug)] 12 | #[command(version, about, long_about = None)] 13 | pub struct DownloadArguments { 14 | // below are cli arguments 15 | #[arg(short, long)] 16 | pub repository: String, 17 | #[arg(short, long, default_value_t = 4)] 18 | pub tasks: usize, 19 | 20 | // below are data that are needed for the program, NO NEED to be 21 | // passed from the cli arguments 22 | pub repository_local_path: Option, 23 | } 24 | 25 | impl DownloadArguments { 26 | pub fn clone_repository(&mut self) -> Result> { 27 | info!("Attempting to clone the repository: {}", &self.repository); 28 | fn ensure_trailing_slash(s: &str) -> String { 29 | if !s.ends_with('/') { 30 | format!("{}{}", s, '/') 31 | } else { 32 | s.to_string() 33 | } 34 | } 35 | 36 | // set the url with a base url 37 | let mut url = 38 | ensure_trailing_slash(option_env!("HF_ENDPOINT").unwrap_or("https://hf-mirror.com/")); 39 | url.push_str(self.repository.as_str()); 40 | 41 | let path_to_join = std::path::Path::new(&self.repository); 42 | 43 | // get the working directory as the place to clone the repository 44 | let working_directory_buffer = std::env::current_dir()?; 45 | let working_directory = working_directory_buffer.join(path_to_join); 46 | 47 | let repository = RepoBuilder::new().clone(&url, &working_directory)?; 48 | 49 | // store the repository path to the struct for future calls. 50 | self.repository_local_path = Some( 51 | repository 52 | .path() 53 | .parent() 54 | .unwrap() 55 | .to_string_lossy() 56 | .to_string(), 57 | ); 58 | 59 | // clone the repository to the specified directory 60 | return Ok(repository); 61 | } 62 | 63 | pub fn read_lfs_pointers( 64 | &self, 65 | repository_path: &std::path::PathBuf, 66 | ) -> Result, Box> { 67 | let gitattributes_path = repository_path.join(".gitattributes"); 68 | debug!( 69 | "As detected, `.gitattributes` file is located at: {:?}", 70 | &gitattributes_path 71 | ); 72 | 73 | let file = std::fs::File::open(gitattributes_path)?; 74 | let reader = std::io::BufReader::new(file); 75 | let mut lfs_files: Vec = Vec::new(); 76 | 77 | for line in reader.lines() { 78 | let line = line?; 79 | if line.contains("filter=lfs") { 80 | let parts: Vec<&str> = line.split_whitespace().collect(); 81 | if let Some(pattern) = parts.get(0) { 82 | let pattern_path = repository_path.join(pattern); 83 | for entry in glob(pattern_path.to_str().unwrap())? { 84 | match entry { 85 | Ok(result) => { 86 | lfs_files.push( 87 | result.strip_prefix( 88 | self.repository_local_path.clone().unwrap() 89 | )?.to_string_lossy().to_string() 90 | ); 91 | debug!("LFS filepath extracted: {:?}", result); 92 | } 93 | Err(error) => error!( 94 | "LFS filepath pattern ({}) reading error occurred: {}", 95 | pattern, error 96 | ), 97 | } 98 | } 99 | } 100 | } 101 | } 102 | 103 | return Ok(lfs_files); 104 | } 105 | 106 | pub fn extract_lfs_urls( 107 | &self, 108 | repository_path: &std::path::PathBuf, 109 | lfs_files: Vec, 110 | base_url: &String, 111 | ) -> Result, std::io::Error> { 112 | debug!("Trying to locate filepath at: {:?}", &repository_path); 113 | 114 | let mut large_file_information: Vec = Vec::new(); 115 | 116 | for lfs_file in lfs_files { 117 | let pointer_filepath = repository_path.join(&lfs_file); 118 | debug!("Pointer filepath is located at {:?}", pointer_filepath); 119 | 120 | let file = std::fs::File::open(pointer_filepath)?; 121 | let reader = std::io::BufReader::new(file); 122 | 123 | let mut oid: Option = None; 124 | 125 | for line in reader.lines() { 126 | let line = line?; 127 | if line.starts_with("oid sha256:") { 128 | oid = Some(line.replace("oid sha256:", "").trim().to_string()); 129 | break; 130 | } 131 | } 132 | 133 | if let Some(oid) = oid { 134 | let url = format!( 135 | "{}/{}/resolve/main/{}", 136 | base_url, self.repository, &lfs_file 137 | ); 138 | debug!("Constructed URL: {}", &url); 139 | 140 | large_file_information.push( 141 | LargeFileInformation::new(url, oid) 142 | ); 143 | } else { 144 | debug!("OID not found in pointer file: {}", lfs_file); 145 | } 146 | } 147 | 148 | return Ok(large_file_information); 149 | } 150 | 151 | async fn download_single_file_resume( 152 | client: &reqwest::Client, 153 | url: &String, 154 | error: impl std::error::Error, 155 | file: &mut std::fs::File, 156 | hasher: &mut sha2::Sha256, 157 | download_progress: &mut u64, 158 | progress_bar: &indicatif::ProgressBar, 159 | ) -> Result<(), Box> { 160 | // get the ending position of the file 161 | let start = file.seek(std::io::SeekFrom::End(0))?; 162 | debug!("Resume from position: {}", &start); 163 | 164 | // set the start position to where it ended 165 | let retry = client 166 | .get(url) 167 | .header("Range", format!("bytes={}-", start)) 168 | .send() 169 | .await?; 170 | 171 | let status = retry.status().is_success().clone(); 172 | 173 | // for the purpose of debugging, we print the header 174 | let headers = retry.headers().clone(); 175 | debug!("Headers when retrying: {:?}", headers); 176 | 177 | // streaming logic 178 | if status { 179 | let mut content = retry.bytes_stream(); 180 | 181 | while let Some(chunk) = content.next().await { 182 | let chunk = chunk?; 183 | file.write_all(&chunk)?; 184 | hasher.update(&chunk); 185 | 186 | *download_progress += chunk.len() as u64; 187 | progress_bar.set_position(*download_progress); 188 | } 189 | } 190 | 191 | return Ok(()); 192 | } 193 | 194 | /// a single thread for downloading files 195 | async fn download_single_file( 196 | client: reqwest::Client, 197 | url: String, 198 | repository_local_path: String, 199 | progress_bar: std::sync::Arc, 200 | expected_sha256: String, 201 | ) -> Result<(), Box> { 202 | info!("Downloading from URL: {}", &url); 203 | 204 | // setup a hasher for verifying sha256 205 | let mut hasher = sha2::Sha256::new(); 206 | let filename = url.split("/").last().unwrap(); 207 | let template_string = "{spinner:.green} [{elapsed_precise}] [{bar:40.cyan/blue}] {bytes}/{total_bytes} ({bytes_per_sec}, {eta}) - " 208 | .to_string(); 209 | let output_string = template_string + filename; 210 | 211 | // initiate the file download request 212 | let response = client.get(&url).send().await?; 213 | let total_filesize = response.content_length().ok_or("Failed to get filesize")?; 214 | 215 | // construct the eventual filepath, 216 | // make this file downloaded to the repository folder 217 | let filepath = std::path::Path::new(&repository_local_path).join(filename); 218 | 219 | // create a file for writing data 220 | let mut file = std::fs::File::create(filepath)?; 221 | 222 | // initiate the progress bar 223 | let pb = progress_bar.add(indicatif::ProgressBar::new(total_filesize)); 224 | pb.set_style( 225 | indicatif::ProgressStyle::default_bar() 226 | .template(output_string.as_str()) 227 | .expect("Error when trying to render a progress bar") 228 | .progress_chars("#>-"), 229 | ); 230 | let mut download_progress: u64 = 0; 231 | 232 | // check if the response is okay to perform saving logics 233 | if response.status().is_success() { 234 | let mut stream = response.bytes_stream(); 235 | 236 | while let Some(item) = stream.next().await { 237 | match item { 238 | Ok(chunk) => { 239 | // write file chunk 240 | file.write_all(&chunk)?; 241 | 242 | // store sha256 of the chunk to the hasher 243 | hasher.update(&chunk); 244 | 245 | download_progress += chunk.len() as u64; 246 | pb.set_position(download_progress); 247 | } 248 | Err(error) => loop { 249 | match DownloadArguments::download_single_file_resume( 250 | &client, 251 | &url, 252 | &error, 253 | &mut file, 254 | &mut hasher, 255 | &mut download_progress, 256 | &pb, 257 | ) 258 | .await 259 | { 260 | Ok(_) => break, 261 | Err(_) => continue, 262 | }; 263 | }, 264 | }; 265 | } 266 | } else { 267 | error!( 268 | "Failed to download file: {} - Status: {}", 269 | &url, 270 | response.status() 271 | ); 272 | } 273 | 274 | let result_hash = format!("{:x}", hasher.finalize()); 275 | if result_hash == expected_sha256 { 276 | info!("SHA256 hash verification succeeded for file {}", filename); 277 | } else { 278 | warn!( 279 | "SHA256 hash mismatch for file: {}. Expected: {}, Got: {}", 280 | filename, expected_sha256, result_hash 281 | ); 282 | } 283 | 284 | info!("Downloaded and saved file: {}", filename); 285 | 286 | // file.write_all(&content)?; 287 | pb.finish_with_message(format!("Downloaded {}", filename)); 288 | 289 | return Ok(()); 290 | } 291 | 292 | pub async fn download_files( 293 | &self, 294 | large_file_information: Vec, 295 | ) -> Result<(), Box> { 296 | let client = reqwest::Client::new(); 297 | 298 | info!( 299 | "Downloading client has initiated. {} large file(s) to be downloaded.", 300 | large_file_information.len() 301 | ); 302 | 303 | if large_file_information.len() == 0 { 304 | warn!("No LFS urls had been detected. This could potentially be an error?"); 305 | } 306 | 307 | let progress_bar = std::sync::Arc::new(indicatif::MultiProgress::new()); 308 | let mut handlers = Vec::new(); 309 | let semaphore = std::sync::Arc::new(tokio::sync::Semaphore::new(self.tasks)); 310 | 311 | for single_large_file_information in large_file_information { 312 | let client_in_thread = client.clone(); 313 | let repository_local_path_thread = self.repository_local_path.clone().unwrap(); 314 | let progress_bar_thread = progress_bar.clone(); 315 | let semaphore_thread = semaphore.clone(); 316 | 317 | let handler = tokio::task::spawn(async move { 318 | let _permit = semaphore_thread.acquire().await.unwrap(); 319 | 320 | return DownloadArguments::download_single_file( 321 | client_in_thread, 322 | single_large_file_information.url, 323 | repository_local_path_thread, 324 | progress_bar_thread, 325 | single_large_file_information.sha256, 326 | ) 327 | .await; 328 | }); 329 | handlers.push(handler); 330 | } 331 | 332 | let mut results = Vec::new(); 333 | // for calculating the number of failed tasks 334 | let total_handlers = handlers.len(); 335 | 336 | for handler in handlers { 337 | let result = handler 338 | .await? 339 | .expect("Error happened when downloading a file"); 340 | 341 | results.push(result); 342 | } 343 | 344 | if results.len() == total_handlers { 345 | info!("All downloads had been succeeded!"); 346 | } else { 347 | let failures = total_handlers - results.len(); 348 | warn!("{} downloads had failed.", failures); 349 | } 350 | 351 | return Ok(()); 352 | } 353 | } 354 | 355 | #[derive(Debug)] 356 | pub struct LargeFileInformation { 357 | pub url: String, 358 | pub sha256: String, 359 | } 360 | 361 | impl LargeFileInformation { 362 | fn new(url: String, sha256: String) -> Self { 363 | return LargeFileInformation { 364 | url: url, 365 | sha256: sha256, 366 | }; 367 | } 368 | } 369 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | use clap::Parser; 2 | use log::{debug, error, info}; 3 | 4 | mod download; 5 | mod utilities; 6 | 7 | #[tokio::main] 8 | async fn main() -> Result<(), Box> { 9 | utilities::setup_logger()?; 10 | 11 | // display the debugging mode availability 12 | debug!( 13 | "Debugging mode enabled: {}", 14 | log::log_enabled!(log::Level::Debug) 15 | ); 16 | 17 | let mut arguments = download::DownloadArguments::parse(); 18 | 19 | info!("Concurrency is sat to: {}", &arguments.tasks); 20 | 21 | // handle the case in which the repo exists locally 22 | if std::path::Path::new(&arguments.repository).exists() { 23 | println!("Repository has detected in the current working directory, overwrite?(Y/N)"); 24 | let mut buffer = String::new(); 25 | std::io::stdin() 26 | .read_line(&mut buffer) 27 | .expect("Incorrect input"); 28 | let user_input = buffer.trim(); 29 | 30 | let confirm_instructions = vec!["y", "Y"]; 31 | let negative_instructions = vec!["n", "N"]; 32 | 33 | if confirm_instructions.contains(&user_input) { 34 | std::fs::remove_dir_all(&arguments.repository) 35 | .expect("Error when trying to delete the directory"); 36 | } else if negative_instructions.contains(&user_input) { 37 | panic!("Aborted."); 38 | } else { 39 | panic!("Wrong input. You either input y | Y, or n | N.") 40 | } 41 | } 42 | 43 | match arguments.clone_repository() { 44 | Ok(result) => { 45 | match arguments.read_lfs_pointers(&result.path().parent().unwrap().to_path_buf()) { 46 | Ok(lfs_files) => match arguments.extract_lfs_urls( 47 | &result.path().parent().unwrap().to_path_buf(), 48 | lfs_files, 49 | &"https://hf-mirror.com".to_string(), 50 | ) { 51 | Ok(large_file_information) => { 52 | arguments.download_files(large_file_information).await? 53 | } 54 | Err(error) => error!("Downloading large files failed due to {}", error), 55 | }, 56 | Err(error) => error!("Extracting LFS urls failed due to {}", error), 57 | }; 58 | 59 | info!( 60 | "Git cloned the repository to: {}", 61 | result 62 | .path() 63 | .parent() 64 | .unwrap() 65 | .as_os_str() 66 | .to_os_string() 67 | .to_string_lossy() 68 | ); 69 | } 70 | Err(error) => error!("Git clone has failed {}", error), 71 | }; 72 | 73 | return Ok(()); 74 | } 75 | -------------------------------------------------------------------------------- /src/utilities.rs: -------------------------------------------------------------------------------- 1 | use std::time::SystemTime; 2 | 3 | use fern::{ 4 | self, 5 | colors::{self, ColoredLevelConfig}, 6 | }; 7 | use humantime; 8 | 9 | pub fn setup_logger() -> Result<(), fern::InitError> { 10 | let colors = ColoredLevelConfig::new() 11 | .info(colors::Color::Green) 12 | .warn(colors::Color::Yellow) 13 | .error(colors::Color::Red) 14 | .debug(colors::Color::White); 15 | 16 | fern::Dispatch::new() 17 | .format(move |out, message, record| { 18 | out.finish(format_args!( 19 | "[{} {} {}] {}", 20 | humantime::format_rfc3339_seconds(SystemTime::now()), 21 | colors.color(record.level()), 22 | record.target(), 23 | message 24 | )) 25 | }) 26 | .level(log::LevelFilter::Info) 27 | .chain(std::io::stdout()) 28 | .apply()?; 29 | Ok(()) 30 | } 31 | --------------------------------------------------------------------------------