├── .github ├── ISSUE_TEMPLATE │ ├── feature_request.md │ └── waste-report-incorrect.md └── workflows │ ├── release.yml │ └── rust.yml ├── .gitignore ├── CHANGELOG.md ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── Cargo.lock ├── Cargo.toml ├── LICENSE.md ├── Makefile ├── README.md ├── criner-waste-report ├── CHANGELOG.md ├── Cargo.toml ├── LICENSE.md ├── README.md ├── src │ ├── html.rs │ ├── lib.rs │ ├── result.rs │ └── test │ │ ├── from_package.rs │ │ └── mod.rs └── tests │ └── fixtures │ ├── avr_libc-0.1.3extract_crate-1.0.0.package.rmp │ ├── cookie_factory-0.3.1-extract_crate-0.3.1.package.rmp │ ├── curl_sys-0.4.27-extract_crate-1.0.0.package.rmp │ ├── deno_typescript-0.36.0-extract_crate-1.0.0.package.rmp │ ├── falcon_raptor-0.4.9-extract_crate-1.0.0.package.rmp │ ├── fermium-20.12.0-alpha2-extract_crate-1.0.0.package.rmp │ ├── gnir-0.14.0-alpha3-extract_crate-1.0.0.package.rmp │ ├── grpcio-sys-0.5.0_extract_crate-1.0.0.package.rmp │ ├── lw_webdriver-0.4.1-extract_crate-1.0.0.package.rmp │ ├── mozjs_sys-0.67.1-extract_crate-1.0.0.package.rmp │ ├── openblas_provider-0.4.0-extract_crate-1.0.0.package.rmp │ ├── ripgrep-12.0.0-extract_crate-1.0.0.package.rmp │ ├── sovrin-client.0.1.0-179-extract_crate-1.0.0.package.rmp │ └── threed-ice-sys-0.3.0-extract_crate-1.0.0.package.rmp ├── criner ├── CHANGELOG.md ├── Cargo.toml ├── LICENSE.md ├── README.md ├── rustfmt.toml └── src │ ├── engine │ ├── mod.rs │ ├── report │ │ ├── generic.rs │ │ ├── mod.rs │ │ └── waste │ │ │ ├── merge.rs │ │ │ ├── mod.rs │ │ │ └── report_test │ │ │ ├── merge.rs │ │ │ └── mod.rs │ ├── run.rs │ ├── stage │ │ ├── changes.rs │ │ ├── db_download │ │ │ ├── convert.rs │ │ │ ├── csv_model.rs │ │ │ ├── from_csv.rs │ │ │ └── mod.rs │ │ ├── mod.rs │ │ ├── processing.rs │ │ └── report │ │ │ ├── git.rs │ │ │ └── mod.rs │ └── work │ │ ├── cpubound.rs │ │ ├── generic.rs │ │ ├── iobound.rs │ │ ├── mod.rs │ │ └── schedule.rs │ ├── error.rs │ ├── export │ ├── mod.rs │ ├── run.rs │ └── to_sql │ │ ├── dbdump_crate.rs │ │ ├── krate.rs │ │ ├── krate_version.rs │ │ ├── meta.rs │ │ ├── mod.rs │ │ ├── result.rs │ │ └── task.rs │ ├── lib.rs │ ├── migration.rs │ ├── model.rs │ ├── persistence │ ├── keyed.rs │ ├── merge.rs │ ├── mod.rs │ ├── serde.rs │ └── table.rs │ ├── spawn.rs │ └── utils.rs ├── rustfmt.toml └── src ├── args.rs ├── error.rs ├── lib.rs └── main.rs /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: enhancement 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/waste-report-incorrect.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Waste Report Incorrect 3 | about: Help us produce better 'include' directives or less false positives 4 | title: "[INCORRECT WASTE REPORT] " 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | * [ ] Yes, I have read the [limitations](https://github.com/the-lean-crate/criner#limitations-of-waste-reporting) section and believe this issue can be fixed, or my crate does not have a build script. 11 | * **** 12 | * 13 | 14 | ### Expectation vs Actual 15 | 16 | _Please write down what you would want to see, and highlight what you are actually seeing_. (Please keep it short, we will ask if more details are needed) 17 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | # The way this works is a little weird. But basically, the create-release job 2 | # runs purely to initialize the GitHub release itself. Once done, the upload 3 | # URL of the release is saved as an artifact. 4 | # 5 | # The build-release job runs only once create-release is finished. It gets 6 | # the release upload URL by downloading the corresponding artifact (which was 7 | # uploaded by create-release). It then builds the release executables for each 8 | # supported platform and attaches them as release assets to the previously 9 | # created release. 10 | # 11 | # The key here is that we create the release only once. 12 | 13 | name: release 14 | on: 15 | push: 16 | # Enable when testing release infrastructure on a branch. 17 | # branches: 18 | # - release 19 | tags: 20 | - 'v[0-9]+.[0-9]+.[0-9]+' 21 | jobs: 22 | create-release: 23 | name: create-release 24 | runs-on: ubuntu-latest 25 | # env: 26 | # # Set to force version number, e.g., when no tag exists. 27 | # ARTIFACT_VERSION: TEST-0.0.2 28 | steps: 29 | - name: Create artifacts directory 30 | run: mkdir artifacts 31 | 32 | - name: Get the release version from the tag 33 | if: env.ARTIFACT_VERSION == '' 34 | run: | 35 | # Apparently, this is the right way to get a tag name. Really? 36 | # 37 | # See: https://github.community/t5/GitHub-Actions/How-to-get-just-the-tag-name/m-p/32167/highlight/true#M1027 38 | echo "::set-env name=ARTIFACT_VERSION::${GITHUB_REF#refs/tags/}" 39 | echo "version is: ${{ env.ARTIFACT_VERSION }}" 40 | 41 | - name: Create GitHub release 42 | id: release 43 | uses: actions/create-release@v1 44 | env: 45 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 46 | with: 47 | tag_name: ${{ env.ARTIFACT_VERSION }} 48 | release_name: ${{ env.ARTIFACT_VERSION }} 49 | 50 | - name: Save release upload URL to artifact 51 | run: echo "${{ steps.release.outputs.upload_url }}" > artifacts/release-upload-url 52 | 53 | - name: Save version number to artifact 54 | run: echo "${{ env.ARTIFACT_VERSION }}" > artifacts/release-version 55 | 56 | - name: Upload artifacts 57 | uses: actions/upload-artifact@v1 58 | with: 59 | name: artifacts 60 | path: artifacts 61 | 62 | build-release: 63 | name: build-release 64 | needs: ['create-release'] 65 | runs-on: ${{ matrix.os }} 66 | env: 67 | # For some builds, we use cross to test on 32-bit and big-endian 68 | # systems. 69 | CARGO: cargo 70 | # When CARGO is set to CROSS, this is set to `--target matrix.target`. 71 | TARGET_FLAGS: 72 | # When CARGO is set to CROSS, TARGET_DIR includes matrix.target. 73 | TARGET_DIR: ./target 74 | # Emit backtraces on panics. 75 | RUST_BACKTRACE: 1 76 | BIN_NAME: criner 77 | strategy: 78 | matrix: 79 | # build: [linux, linux-arm, macos, win-msvc, win-gnu, win32-msvc] 80 | build: [macos] 81 | include: 82 | # Needs openssl installed, shouldn't be a problem, but it's for another day 83 | # - build: linux 84 | # os: ubuntu-18.04 85 | # rust: nightly 86 | # target: x86_64-unknown-linux-musl 87 | # - build: linux-arm 88 | # os: ubuntu-18.04 89 | # rust: nightly 90 | # target: arm-unknown-linux-gnueabihf 91 | - build: macos 92 | os: macos-latest 93 | rust: nightly 94 | target: x86_64-apple-darwin 95 | # no windows support, at least not with GUI (see https://github.com/Byron/dua-cli/issues/2) 96 | # - build: win-msvc 97 | # os: windows-2019 98 | # rust: nightly 99 | # target: x86_64-pc-windows-msvc 100 | # - build: win-gnu 101 | # os: windows-2019 102 | # rust: nightly-x86_64-gnu 103 | # target: x86_64-pc-windows-gnu 104 | # - build: win32-msvc 105 | # os: windows-2019 106 | # rust: nightly 107 | # target: i686-pc-windows-msvc 108 | 109 | steps: 110 | - name: Checkout repository 111 | uses: actions/checkout@v1 112 | with: 113 | fetch-depth: 1 114 | - name: Install Rust 115 | uses: actions-rs/toolchain@v1 116 | with: 117 | toolchain: ${{ matrix.rust }} 118 | profile: minimal 119 | override: true 120 | target: ${{ matrix.target }} 121 | 122 | - name: Use Cross 123 | run: | 124 | # FIXME: to work around bugs in latest cross release, install master. 125 | # ME: Still needed? Issue seems closed 126 | # See: https://github.com/rust-embedded/cross/issues/357 127 | cargo install --git https://github.com/rust-embedded/cross 128 | echo "::set-env name=CARGO::cross" 129 | echo "::set-env name=TARGET_FLAGS::--target ${{ matrix.target }}" 130 | echo "::set-env name=TARGET_DIR::./target/${{ matrix.target }}" 131 | 132 | - name: Show command used for Cargo 133 | run: | 134 | echo "cargo command is: ${{ env.CARGO }}" 135 | echo "target flag is: ${{ env.TARGET_FLAGS }}" 136 | echo "target dir is: ${{ env.TARGET_DIR }}" 137 | 138 | - name: Get release download URL 139 | uses: actions/download-artifact@v1 140 | with: 141 | name: artifacts 142 | path: artifacts 143 | 144 | - name: Set release upload URL and release version 145 | shell: bash 146 | run: | 147 | release_upload_url="$(cat artifacts/release-upload-url)" 148 | echo "::set-env name=RELEASE_UPLOAD_URL::$release_upload_url" 149 | echo "release upload url: $RELEASE_UPLOAD_URL" 150 | release_version="$(cat artifacts/release-version)" 151 | echo "::set-env name=RELEASE_VERSION::$release_version" 152 | echo "release version: $RELEASE_VERSION" 153 | 154 | - name: Build release binary 155 | run: ${{ env.CARGO }} build --verbose --release ${{ env.TARGET_FLAGS }} 156 | 157 | - name: Strip release binary (linux and macos) 158 | if: matrix.build == 'linux' || matrix.build == 'macos' 159 | run: strip "target/${{ matrix.target }}/release/${{ env.BIN_NAME }}" 160 | 161 | - name: Strip release binary (arm) 162 | if: matrix.build == 'linux-arm' 163 | run: | 164 | docker run --rm -v \ 165 | "$PWD/target:/target:Z" \ 166 | rustembedded/cross:arm-unknown-linux-gnueabihf \ 167 | arm-linux-gnueabihf-strip \ 168 | /target/arm-unknown-linux-gnueabihf/release/${{ env.BIN_NAME }} 169 | 170 | - name: Build archive 171 | shell: bash 172 | run: | 173 | staging="${{ env.BIN_NAME }}-${{ env.RELEASE_VERSION }}-${{ matrix.target }}" 174 | mkdir -p "$staging" 175 | 176 | cp {README.md,LICENSE.md,CHANGELOG.md} "$staging/" 177 | 178 | if [ "${{ matrix.os }}" = "windows-2019" ]; then 179 | cp "target/${{ matrix.target }}/release/${{ env.BIN_NAME }}.exe" "$staging/" 180 | 7z a "$staging.zip" "$staging" 181 | echo "::set-env name=ASSET::$staging.zip" 182 | else 183 | cp "target/${{ matrix.target }}/release/${{ env.BIN_NAME }}" "$staging/" 184 | tar czf "$staging.tar.gz" "$staging" 185 | echo "::set-env name=ASSET::$staging.tar.gz" 186 | fi 187 | 188 | - name: Upload release archive 189 | uses: actions/upload-release-asset@v1.0.1 190 | env: 191 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 192 | with: 193 | upload_url: ${{ env.RELEASE_UPLOAD_URL }} 194 | asset_path: ${{ env.ASSET }} 195 | asset_name: ${{ env.ASSET }} 196 | asset_content_type: application/octet-stream 197 | -------------------------------------------------------------------------------- /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: Rust 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | jobs: 10 | build-and-test: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v2 14 | - name: tests 15 | run: make tests 16 | - name: stress 17 | run: make mine-2min-logonly && du -sch criner.db/* 18 | - name: Check crate package size 19 | run: | 20 | curl -LSfs https://raw.githubusercontent.com/the-lean-crate/cargo-diet/master/ci/install.sh | \ 21 | sh -s -- --git the-lean-crate/cargo-diet --target x86_64-unknown-linux-musl 22 | 23 | cargo diet -n --package-size-limit 15KB 24 | (cd criner && cargo diet -n --package-size-limit 50KB) 25 | (cd criner-waste-report && cargo diet -n --package-size-limit 15KB) 26 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.gitignore.io/api/rust 3 | # Edit at https://www.gitignore.io/?templates=rust 4 | 5 | ### Rust ### 6 | # Generated by Cargo 7 | # will have compiled files and executables 8 | /target/ 9 | 10 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries 11 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html 12 | criner/Cargo.lock 13 | 14 | # These are backup files generated by rustfmt 15 | **/*.rs.bk 16 | 17 | # End of https://www.gitignore.io/api/rust 18 | 19 | # Databases and assets 20 | /index-bare/ 21 | /criner.db/ 22 | /*.sqlite 23 | /criner/tests/fixtures/extracted/ 24 | /crates-io-db-dump.tar.gz 25 | /????-??-??-??????/ 26 | 27 | # for standard variables that are required for pushing reports 28 | /.env 29 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 7 | 8 | ## 0.4.0 (2024-10-17) 9 | 10 | ### Chore (BREAKING) 11 | 12 | - upgrade gix-related dependencies 13 | 14 | ### Commit Statistics 15 | 16 | 17 | 18 | - 2 commits contributed to the release. 19 | - 1 commit was understood as [conventional](https://www.conventionalcommits.org). 20 | - 0 issues like '(#ID)' were seen in commit messages 21 | 22 | ### Commit Details 23 | 24 | 25 | 26 |
view details 27 | 28 | * **Uncategorized** 29 | - Merge branch 'upgrades' ([`cf7fe54`](https://github.com/the-lean-crate/criner/commit/cf7fe541d7a40c21f06c1e256d8f1072439c27d9)) 30 | - Upgrade gix-related dependencies ([`081cc14`](https://github.com/the-lean-crate/criner/commit/081cc14b90e4718ef45190cff1239a9ff5f9a1e7)) 31 |
32 | 33 | ## 0.3.1 (2023-03-16) 34 | 35 | A maintenance release without user-facing changes. 36 | 37 | ### Commit Statistics 38 | 39 | 40 | 41 | - 10 commits contributed to the release. 42 | - 0 commits were understood as [conventional](https://www.conventionalcommits.org). 43 | - 0 issues like '(#ID)' were seen in commit messages 44 | 45 | ### Commit Details 46 | 47 | 48 | 49 |
view details 50 | 51 | * **Uncategorized** 52 | - Upgrade `git2`, `crates-index-diff` and `prodash`. ([`09fb11f`](https://github.com/the-lean-crate/criner/commit/09fb11f4077b8426aadc139fe8d72dfdf6d65bbe)) 53 | - Upgrade clap ([`f54286f`](https://github.com/the-lean-crate/criner/commit/f54286f7b76ac8f5daf5d4d13670347ce79fbe08)) 54 | - Merge branch 'upgrade-index-diff' ([`85e0ca1`](https://github.com/the-lean-crate/criner/commit/85e0ca1b4c9e8abefc450fca89e1f6d8b5c9d17e)) 55 | - Add a flag to skip downloading the database entirely ([`c8908bf`](https://github.com/the-lean-crate/criner/commit/c8908bf5356626e4cfd0f0a7ddd24cd9b6f96e09)) 56 | - Fix deprectation warnings ([`2af218b`](https://github.com/the-lean-crate/criner/commit/2af218bb173f9887151f33b3d8395df6e1cddd94)) 57 | - Fix all of the time::format descriptions to v0.3 ([`25a6416`](https://github.com/the-lean-crate/criner/commit/25a64167c340b61a8f25db79293f910bf452b744)) 58 | - Upgrade to latest time/prodash at the loss of local time support ([`1100c83`](https://github.com/the-lean-crate/criner/commit/1100c830a8a9bf21c60d8e65f19953e71fa752ef)) 59 | - Upgrade to latest clap ([`9302abc`](https://github.com/the-lean-crate/criner/commit/9302abc18056fe249f42bcdd006970543c7ecb12)) 60 | - Dependency upgrade ([`6089587`](https://github.com/the-lean-crate/criner/commit/6089587fd23645ba16590eb639cbcd9eae7228d1)) 61 | - Cargo clippy ([`d285e06`](https://github.com/the-lean-crate/criner/commit/d285e0609eb699bfb164d584ca44a99dbe2c8d71)) 62 |
63 | 64 | ## v0.3.0 (2020-11-02) 65 | 66 | ### Commit Statistics 67 | 68 | 69 | 70 | - 5 commits contributed to the release over the course of 115 calendar days. 71 | - 139 days passed between releases. 72 | - 0 commits were understood as [conventional](https://www.conventionalcommits.org). 73 | - 0 issues like '(#ID)' were seen in commit messages 74 | 75 | ### Commit Details 76 | 77 | 78 | 79 |
view details 80 | 81 | * **Uncategorized** 82 | - Fix build ([`257a601`](https://github.com/the-lean-crate/criner/commit/257a60192d6543648e6684b07a40024b8f894957)) 83 | - Upgrade to prodash 10 ([`72cccf7`](https://github.com/the-lean-crate/criner/commit/72cccf77a5e228fdbbe7ee60c75f1db5f3ad1a37)) 84 | - Replace structopt with Clap 3 ([`c2313b3`](https://github.com/the-lean-crate/criner/commit/c2313b3601e8a848ae68f42301a3f113bdd807af)) 85 | - Allow for more screenspace via rustfmt config file ([`50dcbac`](https://github.com/the-lean-crate/criner/commit/50dcbac5a4c629dbd292c5b57e222a171299d985)) 86 | - Upgrade to prodash 7.0 ([`83d8029`](https://github.com/the-lean-crate/criner/commit/83d8029d782e7d3a6780f66d7383c83c95df3c26)) 87 |
88 | 89 | ## v0.2.0 (2020-06-16) 90 | 91 | ## v0.1.4 (2020-07-25) 92 | 93 | ## v0.1.3 (2020-05-28) 94 | 95 | ## v0.1.2 (2020-04-12) 96 | 97 | * the first release of criner-cli. Early, but able to get you started on your personal crates.io download. 98 | 99 | ### Commit Statistics 100 | 101 | 102 | 103 | - 29 commits contributed to the release over the course of 50 calendar days. 104 | - 0 commits were understood as [conventional](https://www.conventionalcommits.org). 105 | - 0 issues like '(#ID)' were seen in commit messages 106 | 107 | ### Commit Details 108 | 109 | 110 | 111 |
view details 112 | 113 | * **Uncategorized** 114 | - Add default value for db-path ([`dbffa6b`](https://github.com/the-lean-crate/criner/commit/dbffa6bf807e879b67bfbf3f1fbf396a0f60ba88)) 115 | - More efficient drawing on idle, putting CPU usage to half or a third. ([`5b34d88`](https://github.com/the-lean-crate/criner/commit/5b34d88fad62cbf58cecf713374579dcfb047ac3)) 116 | - Very first sketch on how to schedule something every 24h ([`6046420`](https://github.com/the-lean-crate/criner/commit/604642096b84ebcb2d7bb600fce054795179aa3e)) 117 | - More stable gui experience ([`a798b1f`](https://github.com/the-lean-crate/criner/commit/a798b1fb46c3e4d5d32c5207543d42d9f37ca782)) 118 | - Don't write 'yanked …' message, it's log spamming ([`bc2cff6`](https://github.com/the-lean-crate/criner/commit/bc2cff6f0e5c78c2b383a9bcf79e847224cf0008)) 119 | - Make aliases more obvious, increase scrollback buffer size ([`2fb5fb1`](https://github.com/the-lean-crate/criner/commit/2fb5fb120aa569f798bf2f4cb938114fa98021c1)) 120 | - Don't create commits if there was no change, save unnecessary history ([`d7b9c61`](https://github.com/the-lean-crate/criner/commit/d7b9c61cb2278cc0e866cf152a5c8f1781532adf)) 121 | - Some more FPS by default, we can afford it ([`abaeb61`](https://github.com/the-lean-crate/criner/commit/abaeb617b6965c6200fd43368747d7dc45afe2fe)) 122 | - Always initialize an env-logger for non-gui subcommands ([`0898d52`](https://github.com/the-lean-crate/criner/commit/0898d52affcf470807df7d86110d5f030f46b46a)) 123 | - Separate processing and reporting stage, which works due to avoiding… ([`e871dfb`](https://github.com/the-lean-crate/criner/commit/e871dfbbf8326a71b1cebcd51db63db2c81073a5)) 124 | - Since we cannot spawn futures with statements, bundle… ([`c40aa25`](https://github.com/the-lean-crate/criner/commit/c40aa25dab665188094dac24a5b645191d0d9be5)) 125 | - Add support for globbing to limit runtime of reporting ([`79bd2e3`](https://github.com/the-lean-crate/criner/commit/79bd2e31d0e01d67943b6e71253cbe89411ec789)) 126 | - Allow to run the reporting stage separately, to allow turning it off ([`0841822`](https://github.com/the-lean-crate/criner/commit/0841822d6e3e405e96b5a1a47dcc687191ee8e8b)) 127 | - Allow passing options on how often to run stages to CLI ([`e6ad22e`](https://github.com/the-lean-crate/criner/commit/e6ad22ee98305e3bea5c04fc16ca8511f4875060)) 128 | - Automatically turn on the logger in no-gui, but allow people to override it ([`b5e74b6`](https://github.com/the-lean-crate/criner/commit/b5e74b61fd2cd2301741117a43d8cd7fa292880b)) 129 | - First part of exporting crate versions ([`ee2dfa5`](https://github.com/the-lean-crate/criner/commit/ee2dfa5539ee455a1fce43a4ca4f0fa84004005c)) 130 | - Frame for exporting an sqlite database into a clearer form ([`0394e86`](https://github.com/the-lean-crate/criner/commit/0394e86193904018ef082d7e06e895607c6b7c1f)) 131 | - Control intervals from the command-line ([`d478bc5`](https://github.com/the-lean-crate/criner/commit/d478bc5f539a19632aaccee6d1218e4e653fe10c)) 132 | - Spawn cpu+output bound processors (for now dummy ones) ([`896de2b`](https://github.com/the-lean-crate/criner/commit/896de2b1e52de55beaf73107b92dbea509715d78)) 133 | - Fix args ([`2d9bea9`](https://github.com/the-lean-crate/criner/commit/2d9bea983b5baffa6f34b261af66106919f3c4d2)) 134 | - Prepare for CPU bound processors ([`a928d27`](https://github.com/the-lean-crate/criner/commit/a928d274eeb72003de43daf5cf54b041ab438ecd)) 135 | - Let processing stage handle its own workers ([`d8d640d`](https://github.com/the-lean-crate/criner/commit/d8d640ddd3ebf6cd264f86d5fd3d2b8ac4ad944d)) 136 | - Extract engine runner ([`cdd2c0e`](https://github.com/the-lean-crate/criner/commit/cdd2c0ee03d81e6e09c52ffe191b59bd8ba33c79)) 137 | - First migration ([`fd30e97`](https://github.com/the-lean-crate/criner/commit/fd30e97e55dd37b7e8e6e9ae979d56ac6cbfadbd)) 138 | - Initial version of migration command ([`b149866`](https://github.com/the-lean-crate/criner/commit/b1498662841844b451c3240f340224d35116d9f9)) 139 | - Store downloads only in assets directory, now part of the DB ([`dc4d7aa`](https://github.com/the-lean-crate/criner/commit/dc4d7aa59539d4c0c23cfa80624061685916f392)) 140 | - First rough CLI startup ([`cfb6eb5`](https://github.com/the-lean-crate/criner/commit/cfb6eb53c80cc31a5664bb640314b42eac547315)) 141 | - Prepare criner-only CLI ([`4d5a235`](https://github.com/the-lean-crate/criner/commit/4d5a2354b90ea9f243cae8a248f2ca8fcc36dc44)) 142 | - Initial commit as copy from crates-io-cli ([`2dfefdf`](https://github.com/the-lean-crate/criner/commit/2dfefdf902c1bea243489f9deebce95c8bc8b4ac)) 143 |
144 | 145 | ## v0.1.1 (2020-03-20) 146 | 147 | ## v0.1.0 (2020-03-20) 148 | 149 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at sebastian.thiel@icloud.com. All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | 73 | [homepage]: https://www.contributor-covenant.org 74 | 75 | For answers to common questions about this code of conduct, see 76 | https://www.contributor-covenant.org/faq 77 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | It's great to see you, and thanks for thinking about contributing! 2 | Have you seen [issues that need your help][help-wanted]? 3 | 4 | That's about it for now, and this guide will certainly be expanded as we learn more. 5 | 6 | [help-wanted]: https://github.com/the-lean-crate/criner/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22 7 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "criner-cli" 3 | version = "0.4.0" 4 | authors = ["Sebastian Thiel "] 5 | edition = "2018" 6 | description = "A command-line interface for the 'Criner' crates mining platform" 7 | license = "MIT" 8 | repository = "https://github.com/the-lean-crate/criner" 9 | readme = "README.md" 10 | include = ["src/**/*", "LICENSE.md", "README.md", "CHANGELOG.md"] 11 | 12 | [[bin]] 13 | name = "criner" 14 | path = "src/main.rs" 15 | doctest = false 16 | 17 | [features] 18 | default = [] 19 | migration = ["criner/migration"] 20 | 21 | [lib] 22 | doctest = false 23 | 24 | [dependencies] 25 | clap = { version = "4.0.22", features = ["derive"] } 26 | humantime = "2.1.0" 27 | time = { version = "0.3.5", features = ["parsing", "macros" ] } 28 | criner = { version = "^0.4.0", path = "./criner" } 29 | env_logger = { version = "0.10.0", default-features = false, features = ["auto-color", "humantime"] } 30 | 31 | [workspace] 32 | members = ["criner", "criner-waste-report"] 33 | 34 | # Turn on when needed to have faster debug builds 35 | [profile.dev.package."*"] 36 | debug = false 37 | 38 | # We need fast regex for tests 39 | [profile.test.package."*"] 40 | debug = false 41 | opt-level = 2 42 | 43 | [profile.release] 44 | # uncomment this when things stabilize - increase build times, but potentially make faster binaries 45 | #lto = "fat" 46 | #codegen-units = 1 47 | incremental = false 48 | opt-level = 3 49 | panic = "abort" 50 | overflow-checks = true 51 | build-override = { opt-level = 0 } 52 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | ===================== 3 | 4 | Copyright © `2020` `Sebastian Thiel` 5 | 6 | Permission is hereby granted, free of charge, to any person 7 | obtaining a copy of this software and associated documentation 8 | files (the “Software”), to deal in the Software without 9 | restriction, including without limitation the rights to use, 10 | copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the 12 | Software is furnished to do so, subject to the following 13 | conditions: 14 | 15 | The above copyright notice and this permission notice shall be 16 | included in all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, 19 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 20 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 21 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 22 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 23 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 25 | OTHER DEALINGS IN THE SOFTWARE. 26 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY : tests build 2 | 3 | help: ## Display this help 4 | @awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m\033[0m\n"} /^[a-zA-Z_-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST) 5 | 6 | 7 | EXECUTABLE = target/debug/criner 8 | RELEASE_EXECUTABLE = target/release/criner 9 | RUST_SRC_FILES = $(shell find src criner/src -name "*.rs") Cargo.lock 10 | bare_index_path = index-bare 11 | 12 | DB = criner.db 13 | SQLITE_DB = $(DB)/db.msgpack.sqlite 14 | REPORTS = $(DB)/reports 15 | WASTE_REPORT = $(REPORTS)/waste 16 | 17 | $(bare_index_path): 18 | mkdir -p $(dir $@) 19 | git clone --bare https://github.com/rust-lang/crates.io-index $@ 20 | 21 | $(EXECUTABLE): $(RUST_SRC_FILES) 22 | cargo build 23 | 24 | $(RELEASE_EXECUTABLE): $(RUST_SRC_FILES) 25 | cargo build --release 26 | 27 | ##@ Meta 28 | 29 | nix-shell-macos: ## Enter a nix-shell able to build on macos 30 | nix-shell -p pkg-config openssl libiconv darwin.apple_sdk.frameworks.Security darwin.apple_sdk.frameworks.SystemConfiguration zlib 31 | 32 | sloc: ## Count lines of code, without tests 33 | tokei -e '*_test*' 34 | 35 | ##@ Running Criner 36 | 37 | $(WASTE_REPORT): 38 | mkdir -p $(REPORTS) 39 | git clone https://github.com/the-lean-crate/waste $@ 40 | 41 | init: $(WASTE_REPORT) ## Clone output repositories for report generation. Only needed if you have write permissions to https://github.com/crates-io 42 | fetch-only: $(RELEASE_EXECUTABLE) $(bare_index_path) ## Run the fetch stage once 43 | $(RELEASE_EXECUTABLE) mine -c $(bare_index_path) -F 1 -P 0 -R 0 $(DB) 44 | process-only: $(RELEASE_EXECUTABLE) $(bare_index_path) ## Run the processing stage once 45 | $(RELEASE_EXECUTABLE) mine -c $(bare_index_path) --io 10 --cpu 2 -F 0 -P 1 -R 0 $(DB) 46 | process-only-nonstop: $(RELEASE_EXECUTABLE) $(bare_index_path) ## Run the processing stage continuously 47 | $(RELEASE_EXECUTABLE) mine -c $(bare_index_path) --io 10 --cpu 2 -F 0 -p 5min -R 0 $(DB) 48 | report-only: $(RELEASE_EXECUTABLE) $(bare_index_path) ## Run the reporting stage once 49 | $(RELEASE_EXECUTABLE) mine -c $(bare_index_path) --cpu-o 10 -F 0 -P 0 -R 1 $(DB) 50 | force-report-only: $(RELEASE_EXECUTABLE) $(bare_index_path) ## Run the reporting stage once, forcibly, rewriting everything and ignoring caches 51 | $(RELEASE_EXECUTABLE) mine -c $(bare_index_path) --cpu-o 10 -F 0 -P 0 -R 1 -g '*' $(DB) 52 | mine-nonstop: $(RELEASE_EXECUTABLE) $(bare_index_path) ## Run all operations continuously, fully automated 53 | ulimit -n 512; $(RELEASE_EXECUTABLE) mine -c $(bare_index_path) --io 10 --cpu 1 --cpu-o 10 -d 3:00 $(DB) 54 | mine-nonstop-no-report: $(RELEASE_EXECUTABLE) $(bare_index_path) ## Run all operations continuously, fully automated 55 | ulimit -n 512; $(RELEASE_EXECUTABLE) mine -c $(bare_index_path) --io 10 --cpu 1 --cpu-o 10 -d 3:00 -R 0 $(DB) 56 | mine-nonstop-logonly: $(RELEASE_EXECUTABLE) $(bare_index_path) ## Run all operations continuously, fully automated, without gui 57 | ulimit -n 512; $(RELEASE_EXECUTABLE) mine --no-gui -c $(bare_index_path) --io 10 --cpu 1 --cpu-o 10 $(DB) 58 | mine-2min-logonly: $(RELEASE_EXECUTABLE) $(bare_index_path) ## Run all operations continuously, painfully often, and for two minutes only 59 | ulimit -n 512; $(RELEASE_EXECUTABLE) mine --time-limit 2min --no-gui -c $(bare_index_path) --io 10 --cpu 1 --cpu-o 10 -f 10s -p 10s -r 10s $(DB) 60 | 61 | ##@ Waste Report Maintenance 62 | 63 | waste-report-push-changes: $(WASTE_REPORT) ## add, commit and push all changed report pages 64 | cd $(WASTE_REPORT) && git add . && git commit -m "update" && git push origin +HEAD:master 65 | 66 | waste-report-reset-history-and-push: $(WASTE_REPORT) ## clear the history of the waste report repository to reduce its size, and push everything 67 | cd $(WASTE_REPORT); git checkout -b foo; git branch -D tmp; git checkout --orphan tmp; git branch -D foo; 68 | $(MAKE) waste-report-push-changes; 69 | 70 | waste-report-clear-state: $(SQLITE_DB) $(WASTE_REPORT) ## clear database state and local state for waste reporting, but leave all html files 71 | -sqlite3 $< 'drop table report_done;' 72 | -rm -Rf $(WASTE_REPORT)/__incremental_cache__ 73 | 74 | ##@ Testing 75 | 76 | clippy: ## Run cargo clippy 77 | cargo clippy 78 | 79 | fmt: ## Run cargo fmt in check mode 80 | cargo fmt --all -- --check 81 | 82 | tests: fmt clippy ## Run all tests we have 83 | cargo check --all --tests 84 | cd criner-waste-report && cargo check --tests && cargo check --tests --no-default-features 85 | cargo test --all 86 | 87 | ##@ Dataset 88 | 89 | crates-io-db-dump.tar.gz: 90 | curl --progress https://static.crates.io/db-dump.tar.gz > $@ 91 | 92 | update-crate-db: crates-io-db-dump.tar.gz ## Pull all DB data from crates.io - updated every 24h 93 | 94 | -------------------------------------------------------------------------------- /criner-waste-report/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 7 | 8 | ## 0.1.5 (2023-03-16) 9 | 10 | A maintenance release without user-facing changes. 11 | 12 | ### Commit Statistics 13 | 14 | 15 | 16 | - 12 commits contributed to the release over the course of 843 calendar days. 17 | - 964 days passed between releases. 18 | - 0 commits were understood as [conventional](https://www.conventionalcommits.org). 19 | - 0 issues like '(#ID)' were seen in commit messages 20 | 21 | ### Thanks Clippy 22 | 23 | 24 | 25 | [Clippy](https://github.com/rust-lang/rust-clippy) helped 3 times to make code idiomatic. 26 | 27 | ### Commit Details 28 | 29 | 30 | 31 |
view details 32 | 33 | * **Uncategorized** 34 | - Update chnagelogs prior to release ([`2faff3e`](https://github.com/the-lean-crate/criner/commit/2faff3e850634f6dcaaef843b041c7129d84b1d7)) 35 | - Upgrade `toml` in `criner-waste-report` ([`7be638b`](https://github.com/the-lean-crate/criner/commit/7be638bab4c6c7d7c2f753470d09d77ac9bc5ed2)) 36 | - Upgrade dia-semver ([`2e3ab36`](https://github.com/the-lean-crate/criner/commit/2e3ab36a2360ecbf50abfae20c6a25ba7889ca52)) 37 | - Thanks clippy ([`459cc26`](https://github.com/the-lean-crate/criner/commit/459cc26ef2bf0da1c74c807dc355db7ac3497a6a)) 38 | - Upgrade to rmp-serde 1.0 ([`b6b1109`](https://github.com/the-lean-crate/criner/commit/b6b1109e8feb220bdc9ddd834182cb2734a1394f)) 39 | - Update changelogs with `cargo changelog` ([`e80897e`](https://github.com/the-lean-crate/criner/commit/e80897e265ab4d5af7e095a106516bc701c3f315)) 40 | - Cleanup changelogs ([`5553dc2`](https://github.com/the-lean-crate/criner/commit/5553dc208f0463e02a25f7250a71c1c144c2f330)) 41 | - Thanks clippy ([`07c6594`](https://github.com/the-lean-crate/criner/commit/07c659410f252631f982dda39b4003f3c75da33c)) 42 | - Dependency upgrade ([`2f8c330`](https://github.com/the-lean-crate/criner/commit/2f8c3308dbbc28792471a24fbd0d0e544875de4b)) 43 | - Thanks clippy ([`b4fb778`](https://github.com/the-lean-crate/criner/commit/b4fb7783d67f9605ff0f97d299e075a2df3bc5fb)) 44 | - Dependency upgrade ([`c583f50`](https://github.com/the-lean-crate/criner/commit/c583f50ff3e8db1f81309778d06980cae5047fb5)) 45 | - Cargo clippy ([`d285e06`](https://github.com/the-lean-crate/criner/commit/d285e0609eb699bfb164d584ca44a99dbe2c8d71)) 46 |
47 | 48 | ## v0.1.4 (2020-07-25) 49 | 50 | * fix https://github.com/the-lean-crate/cargo-diet/issues/6 51 | 52 | ### Commit Statistics 53 | 54 | 55 | 56 | - 2 commits contributed to the release over the course of 14 calendar days. 57 | - 57 days passed between releases. 58 | - 0 commits were understood as [conventional](https://www.conventionalcommits.org). 59 | - 0 issues like '(#ID)' were seen in commit messages 60 | 61 | ### Commit Details 62 | 63 | 64 | 65 |
view details 66 | 67 | * **Uncategorized** 68 | - Use more generous globs for exclude patterns ([`4cd591d`](https://github.com/the-lean-crate/criner/commit/4cd591d1dc0fd00bda2f632558dd73e230301c0f)) 69 | - Allow for more screenspace via rustfmt config file ([`50dcbac`](https://github.com/the-lean-crate/criner/commit/50dcbac5a4c629dbd292c5b57e222a171299d985)) 70 |
71 | 72 | ## v0.1.3 (2020-05-28) 73 | 74 | * back to the state of 0.1.1 - serde is actually required 75 | 76 | ### Commit Statistics 77 | 78 | 79 | 80 | - 2 commits contributed to the release. 81 | - 0 commits were understood as [conventional](https://www.conventionalcommits.org). 82 | - 0 issues like '(#ID)' were seen in commit messages 83 | 84 | ### Commit Details 85 | 86 | 87 | 88 |
view details 89 | 90 | * **Uncategorized** 91 | - Revert previous change ([`5b6c614`](https://github.com/the-lean-crate/criner/commit/5b6c61445df49aa8ad545fb591c3f9fc7b7cd452)) 92 | - Revert "serde is now behind a feature toggle for criner-waste-report" ([`73c38a0`](https://github.com/the-lean-crate/criner/commit/73c38a0698983a24e1c14db8979c9ed5efd232d8)) 93 |
94 | 95 | ## v0.1.2 (2020-05-28) 96 | 97 | * serde serialization and deserialization capabilities are behind the feature flag 'with-serde', which is enabled by default. 98 | 99 | ### Commit Statistics 100 | 101 | 102 | 103 | - 3 commits contributed to the release over the course of 2 calendar days. 104 | - 4 days passed between releases. 105 | - 0 commits were understood as [conventional](https://www.conventionalcommits.org). 106 | - 0 issues like '(#ID)' were seen in commit messages 107 | 108 | ### Commit Details 109 | 110 | 111 | 112 |
view details 113 | 114 | * **Uncategorized** 115 | - Bump patch level of criner-waste-report ([`90f5930`](https://github.com/the-lean-crate/criner/commit/90f5930c80825eed7574c0fa7cba9039c95f5687)) 116 | - Serde is now behind a feature toggle for criner-waste-report ([`821a15a`](https://github.com/the-lean-crate/criner/commit/821a15a8231597fb99851849ff1740071107e4a9)) 117 | - Update all + cargo diet ([`aa1a31e`](https://github.com/the-lean-crate/criner/commit/aa1a31e0ddea775f1c189645af0bf09ce8fa44b5)) 118 |
119 | 120 | ## v0.1.1 (2020-05-24) 121 | 122 | * remove tar depdendency 123 | 124 | ### Commit Statistics 125 | 126 | 127 | 128 | - 2 commits contributed to the release. 129 | - 0 commits were understood as [conventional](https://www.conventionalcommits.org). 130 | - 0 issues like '(#ID)' were seen in commit messages 131 | 132 | ### Commit Details 133 | 134 | 135 | 136 |
view details 137 | 138 | * **Uncategorized** 139 | - Bump patch level ([`7bfdaa5`](https://github.com/the-lean-crate/criner/commit/7bfdaa582633f15e30316b78836ae21224594ecd)) 140 | - Remove unnecessary tar dependency in criner-waste-report… ([`844512f`](https://github.com/the-lean-crate/criner/commit/844512ff10b678ffd750c24e066b2b246354aa88)) 141 |
142 | 143 | ## v0.1.0 (2020-05-24) 144 | 145 | * initial release 146 | 147 | ### Commit Statistics 148 | 149 | 150 | 151 | - 5 commits contributed to the release. 152 | - 0 commits were understood as [conventional](https://www.conventionalcommits.org). 153 | - 0 issues like '(#ID)' were seen in commit messages 154 | 155 | ### Commit Details 156 | 157 | 158 | 159 |
view details 160 | 161 | * **Uncategorized** 162 | - Prepare release of criner-waste-report ([`ddac38b`](https://github.com/the-lean-crate/criner/commit/ddac38bd31ccdfb88b18370fac8d5c40c8c39a9c)) 163 | - Refactor ([`a794d02`](https://github.com/the-lean-crate/criner/commit/a794d020e2d403379edd5956666bd8113266cc1d)) 164 | - Split html related criner-waste-report crates into their own feature ([`a9a3a19`](https://github.com/the-lean-crate/criner/commit/a9a3a194cf05cf8088a045a13ad4c6e5f2a494b0)) 165 | - Organize dependencies before splitting html out as feature ([`d8d336a`](https://github.com/the-lean-crate/criner/commit/d8d336a4180b6f800567d057c4a3b1c32d546b35)) 166 | - Make use of new criner-waste-report crate within criner ([`acc520e`](https://github.com/the-lean-crate/criner/commit/acc520e065f4969024bf0ce4d5d4e5acb5bd8b33)) 167 |
168 | 169 | -------------------------------------------------------------------------------- /criner-waste-report/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "criner-waste-report" 3 | version = "0.1.5" 4 | authors = ["Sebastian Thiel "] 5 | edition = "2018" 6 | description = "Determine include directives for Cargo.toml files to slim down a crate" 7 | license = "MIT" 8 | repository = "https://github.com/the-lean-crate/criner" 9 | readme = "README.md" 10 | include = ["src/**/*", "LICENSE.md", "README.md", "CHANGELOG.md", "!**/test/**/*"] 11 | 12 | [lib] 13 | doctest = false 14 | 15 | [features] 16 | default = ["html"] 17 | html = ["horrorshow", "bytesize", "dia-semver", "humantime"] 18 | 19 | [dependencies] 20 | serde_derive = "1.0.104" 21 | serde = "1.0.104" 22 | toml = "0.7.2" 23 | globset = "0.4.4" 24 | regex = "1.3.4" 25 | lazy_static = "1.4.0" 26 | 27 | # for html 28 | horrorshow = { version = "0.8.1", optional = true } 29 | bytesize = { version = "1.0.0", optional = true } 30 | dia-semver = { version = "11.0.0", optional = true } 31 | humantime = { version = "2.0.0", optional = true } 32 | 33 | [dev-dependencies] 34 | rmp-serde = "1.0.0" 35 | -------------------------------------------------------------------------------- /criner-waste-report/LICENSE.md: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | ===================== 3 | 4 | Copyright © `2020` `Sebastian Thiel` 5 | 6 | Permission is hereby granted, free of charge, to any person 7 | obtaining a copy of this software and associated documentation 8 | files (the “Software”), to deal in the Software without 9 | restriction, including without limitation the rights to use, 10 | copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the 12 | Software is furnished to do so, subject to the following 13 | conditions: 14 | 15 | The above copyright notice and this permission notice shall be 16 | included in all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, 19 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 20 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 21 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 22 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 23 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 25 | OTHER DEALINGS IN THE SOFTWARE. 26 | -------------------------------------------------------------------------------- /criner-waste-report/README.md: -------------------------------------------------------------------------------- 1 | ![Rust](https://github.com/the-lean-crate/criner/workflows/Rust/badge.svg) 2 | [![crates.io version](https://img.shields.io/crates/v/criner.svg)](https://crates.io/crates/criner) 3 | 4 | ## TODO 5 | * [ ] Incorporate download counts into report 6 | * [ ] Make things prettier and more visual - that way we can try again for a come-back :D 7 | * [ ] See why RipGrep doesn't get any suggestions 8 | * [ ] More reporting - right now the context gathering to see how much time is spent where is neglected. 9 | 10 | ## Possible Improvements 11 | * [ ] Suggest 'top-level' globs like `/README.md` if we know the matched file is on the top-level. Otherwise the pattern `README.md` will actually match `*/README.md`. 12 | * [ ] Count negation patterns in includes and excludes. The latter don't seem to be working, and if nobody is using them, Cargo can either make it work or 13 | reject them properly. Maybe. Maybe first create an issue for that and see what they think. 14 | * [ ] On chunk download timeout, don't restart, but resume the download where it left off 15 | * [ ] resilience: protect against ThreadPanics - they prevent the program from shutting down 16 | * Futures has a wrapper to catch panics, even though we don't use it yet. A panic only brings down the future that panics, not the entire program. 17 | * [ ] Graceful shutdown on Ctrl+C 18 | * The current implementation relies on the database to handle aborted writes, and is no problem for that reason. However, it would be nice to have 19 | A well-behaving program. 20 | * [ ] Git is slowing down report generation, as sending them to git and creating objects is slow. We could possibly multi-thread this by creating lose objects 21 | ourselves and sending these into an aggregator which puts them into the index. This is only interesting during the very first report generation though, so 22 | probably not worth it. 23 | * [ ] Have each sub-section of criner use their own error type, which are aggregated in the crate level error. That way, individual errors will be smaller. 24 | * [ ] Parse CSV files separately and index rows and fields - from there build everything on the fly without having to allocate and copy strings. 25 | * probably warrants a different crate, and will really only be done if the 500MB budget isn't sufficient, that is things don't run on the Pie III 26 | 27 | 28 | ## Lessons learned 29 | 30 | * futures::ThreadPools - panicking futures crash only one thread 31 | * long-running futures need error and potentially panic recovery. Futures has a panick catcher that could be useful. 32 | * sqlite needs a lot of massaging to work acceptably in concurrent applications. Takeaway: WAL_mode, and when writting, always use immediate transactions 33 | when writing. Retry yourself while waiting and set a busy handler which waits. 34 | * Trying to optimize output HTML for git by prettifying failed - I just couldn't see it improve anything. For debugging HTML, it's easiest to use the browser. 35 | 36 | ### When migrating to Sqlite 37 | 38 | * sqlite… 39 | * is really not suited for many concurrent writers - you have to prepare for database locked errors, and the busy_handler doesn't help most of the time. 40 | * writing many small objects is slow, and can only be alleviated with prepared statements which are not always feasible or nice to use with a persistence 41 | design inspired by sled. To alleviate, the whole application must embrace Sqlite and work with statements directly. 42 | * Working with the lifetimes associated with transactions is a necessary evil, but it is painful too when trying to refactor anything! I just don't understand 43 | anymore what it tries to do, and have the feeling the compiler is confused itself (as in theory, there is no issue). 44 | * sled databases are about 4 times bigger than an Sqlite database with the same content, and it would read about 1.2GB of a 14GB database at startup. 45 | * sled is easy to handle in a threaded/concurrent environment, but iteration isn't possible across awaits as it's not sync 46 | * Sqlite is not sync nor is it send, so it needs more treatment before it can be used with spawened futures 47 | * Zero-copy is straigforward with Sled as it provides IVec structs, which are handles into an LRU which is the backing store. 48 | * In retrospect, I would consider zero-copy a nice experiment, but also a premature optimization. It costs additinoal effort 49 | and when done from the beginning, you don't even know how much time is actually saved through that. 50 | -------------------------------------------------------------------------------- /criner-waste-report/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![deny(unsafe_code)] 2 | 3 | #[macro_use] 4 | extern crate lazy_static; 5 | 6 | #[cfg(feature = "html")] 7 | pub mod html; 8 | pub mod result; 9 | 10 | #[cfg(test)] 11 | mod test; 12 | 13 | use serde_derive::{Deserialize, Serialize}; 14 | use std::collections::BTreeMap; 15 | 16 | pub use result::{globset_from_patterns, tar_path_to_utf8_str}; 17 | 18 | pub type Patterns = Vec; 19 | 20 | /// An entry in a tar archive, including the most important meta-data 21 | #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] 22 | pub struct TarHeader { 23 | /// The normalized path of the entry. May not be unicode encoded. 24 | pub path: Vec, 25 | /// The size of the file in bytes 26 | pub size: u64, 27 | /// The type of entry, to be analyzed with tar::EntryType 28 | pub entry_type: u8, 29 | } 30 | 31 | #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] 32 | pub struct TarPackage { 33 | /// Meta data of all entries in the crate 34 | pub entries_meta_data: Vec, 35 | /// The actual content of selected files, Cargo.*, build.rs and lib/main 36 | /// IMPORTANT: This file may be partial and limited in size unless it is Cargo.toml, which 37 | /// is always complete. 38 | /// Note that these are also present in entries_meta_data. 39 | pub entries: Vec<(TarHeader, Vec)>, 40 | } 41 | 42 | #[derive(PartialEq, Eq, Debug, Clone, Deserialize, Serialize)] 43 | pub struct PotentialWaste { 44 | pub patterns_to_fix: Patterns, 45 | pub potential_waste: Vec, 46 | } 47 | 48 | #[derive(PartialEq, Eq, Debug, Clone, Deserialize, Serialize)] 49 | pub enum Fix { 50 | ImprovedInclude { 51 | include: Patterns, 52 | include_removed: Patterns, 53 | potential: Option, 54 | has_build_script: bool, 55 | }, 56 | EnrichedExclude { 57 | exclude: Patterns, 58 | exclude_added: Patterns, 59 | has_build_script: bool, 60 | }, 61 | NewInclude { 62 | include: Patterns, 63 | has_build_script: bool, 64 | }, 65 | RemoveExcludeAndUseInclude { 66 | include_added: Patterns, 67 | include: Patterns, 68 | include_removed: Patterns, 69 | }, 70 | RemoveExclude, 71 | } 72 | 73 | impl Fix { 74 | pub fn merge(self, rhs: Option, mut waste: Vec) -> (Fix, Vec) { 75 | match (self, rhs) { 76 | ( 77 | Fix::NewInclude { 78 | mut include, 79 | has_build_script, 80 | }, 81 | Some(potential), 82 | ) => ( 83 | Fix::NewInclude { 84 | has_build_script, 85 | include: { 86 | include.extend(potential.patterns_to_fix); 87 | include 88 | }, 89 | }, 90 | { 91 | waste.extend(potential.potential_waste); 92 | waste 93 | }, 94 | ), 95 | (lhs, _) => (lhs, waste), 96 | } 97 | } 98 | } 99 | 100 | #[derive(Default, Deserialize)] 101 | pub struct CargoConfig { 102 | pub package: Option, 103 | pub lib: Option, 104 | pub bin: Option>, 105 | } 106 | 107 | impl CargoConfig { 108 | pub fn actual_or_expected_build_script_path(&self) -> &str { 109 | self.build_script_path().unwrap_or("build.rs") 110 | } 111 | pub fn build_script_path(&self) -> Option<&str> { 112 | self.package.as_ref().and_then(|p| p.build_script_path()) 113 | } 114 | pub fn lib_path(&self) -> &str { 115 | self.lib 116 | .as_ref() 117 | .and_then(|l| l.path.as_deref()) 118 | .unwrap_or("src/lib.rs") 119 | } 120 | pub fn bin_paths(&self) -> Vec<&str> { 121 | self.bin 122 | .as_ref() 123 | .map(|l| l.iter().filter_map(|s| s.path.as_deref()).collect()) 124 | .unwrap_or_else(|| vec!["src/main.rs"]) 125 | } 126 | } 127 | 128 | impl From<&str> for CargoConfig { 129 | fn from(v: &str) -> Self { 130 | toml::from_str::(v).unwrap_or_default() // you would think all of them parse OK, but that's wrong :D 131 | } 132 | } 133 | 134 | #[derive(Default, Deserialize)] 135 | pub struct SectionWithPath { 136 | pub path: Option, 137 | } 138 | 139 | #[derive(Default, Deserialize)] 140 | pub struct PackageSection { 141 | pub include: Option, 142 | pub exclude: Option, 143 | pub build: Option, 144 | } 145 | 146 | impl PackageSection { 147 | pub fn build_script_path(&self) -> Option<&str> { 148 | self.build.as_ref().and_then(|s| s.as_str()) 149 | } 150 | } 151 | 152 | pub type WastedFile = (String, u64); 153 | 154 | #[derive(Default, Debug, Eq, PartialEq, Clone, Deserialize, Serialize)] 155 | pub struct AggregateFileInfo { 156 | pub total_bytes: u64, 157 | pub total_files: u64, 158 | } 159 | 160 | impl std::ops::AddAssign for AggregateFileInfo { 161 | fn add_assign(&mut self, rhs: Self) { 162 | let Self { 163 | total_bytes, 164 | total_files, 165 | } = rhs; 166 | self.total_bytes += total_bytes; 167 | self.total_files += total_files; 168 | } 169 | } 170 | 171 | impl std::ops::AddAssign for VersionInfo { 172 | fn add_assign(&mut self, rhs: Self) { 173 | let Self { 174 | all, 175 | waste, 176 | potential_gains, 177 | waste_latest_version, 178 | } = rhs; 179 | self.all += all; 180 | self.waste += waste; 181 | self.potential_gains = add_optional_aggregate(self.potential_gains.clone(), potential_gains); 182 | self.waste_latest_version = 183 | add_named_optional_aggregate(self.waste_latest_version.clone(), waste_latest_version); 184 | } 185 | } 186 | 187 | fn add_named_optional_aggregate( 188 | lhs: Option<(String, AggregateFileInfo)>, 189 | rhs: Option<(String, AggregateFileInfo)>, 190 | ) -> Option<(String, AggregateFileInfo)> { 191 | Some(match (lhs, rhs) { 192 | (Some((lhs_name, lhs)), Some((rhs_name, _))) if lhs_name > rhs_name => (lhs_name, lhs), 193 | (Some(_), Some((rhs_name, rhs))) => (rhs_name, rhs), 194 | (Some(v), None) => v, 195 | (None, Some(v)) => v, 196 | (None, None) => return None, 197 | }) 198 | } 199 | 200 | pub fn add_optional_aggregate( 201 | lhs: Option, 202 | rhs: Option, 203 | ) -> Option { 204 | Some(match (lhs, rhs) { 205 | (Some(mut lhs), Some(rhs)) => { 206 | lhs += rhs; 207 | lhs 208 | } 209 | (Some(v), None) => v, 210 | (None, Some(v)) => v, 211 | (None, None) => return None, 212 | }) 213 | } 214 | 215 | #[derive(Default, Debug, Eq, PartialEq, Clone, Deserialize, Serialize)] 216 | pub struct VersionInfo { 217 | pub all: AggregateFileInfo, 218 | pub waste: AggregateFileInfo, 219 | pub waste_latest_version: Option<(String, AggregateFileInfo)>, 220 | pub potential_gains: Option, 221 | } 222 | 223 | pub type AggregateVersionInfo = VersionInfo; 224 | 225 | pub type Dict = BTreeMap; 226 | 227 | #[derive(Debug, Eq, PartialEq, Clone, Deserialize, Serialize)] 228 | pub enum Report { 229 | Version { 230 | crate_name: String, 231 | crate_version: String, 232 | total_size_in_bytes: u64, 233 | total_files: u64, 234 | wasted_files: Vec, 235 | suggested_fix: Option, 236 | }, 237 | Crate { 238 | crate_name: String, 239 | total_size_in_bytes: u64, 240 | total_files: u64, 241 | info_by_version: Dict, 242 | wasted_by_extension: Dict, 243 | }, 244 | CrateCollection { 245 | total_size_in_bytes: u64, 246 | total_files: u64, 247 | info_by_crate: Dict, 248 | wasted_by_extension: Dict, 249 | }, 250 | } 251 | 252 | fn remove_implicit_entries(entries: &mut Vec) { 253 | entries.retain(|e| { 254 | let p = tar_path_to_utf8_str(&e.path); 255 | p != ".cargo_vcs_info.json" && p != "Cargo.toml.orig" 256 | }); 257 | } 258 | 259 | impl Report { 260 | pub fn from_package( 261 | crate_name: &str, 262 | crate_version: &str, 263 | TarPackage { 264 | mut entries_meta_data, 265 | entries, 266 | }: TarPackage, 267 | ) -> Report { 268 | remove_implicit_entries(&mut entries_meta_data); 269 | let total_size_in_bytes = entries_meta_data.iter().map(|e| e.size).sum(); 270 | let total_files = entries_meta_data.len() as u64; 271 | let cargo_config = Self::cargo_config_from_entries(&entries); 272 | let (includes, excludes, compile_time_includes, build_script_name) = 273 | Self::cargo_config_into_includes_excludes(cargo_config, &entries, &entries_meta_data); 274 | let (suggested_fix, wasted_files) = match (includes, excludes, build_script_name, compile_time_includes) { 275 | (Some(includes), Some(excludes), _presence_of_build_script_not_relevant, _) => { 276 | Self::compute_includes_from_includes_and_excludes(entries_meta_data, includes, excludes) 277 | } 278 | (Some(includes), None, build_script_name, _) => { 279 | Self::enrich_includes(entries_meta_data, includes, build_script_name.is_some()) 280 | } 281 | (None, Some(excludes), build_script_name, compile_time_includes) => Self::enrich_excludes( 282 | entries_meta_data, 283 | excludes, 284 | compile_time_includes, 285 | build_script_name.is_some(), 286 | ), 287 | (None, None, build_script_name, compile_time_includes) => { 288 | Self::standard_includes(entries_meta_data, build_script_name, compile_time_includes) 289 | } 290 | }; 291 | let wasted_files = Self::convert_to_wasted_files(wasted_files); 292 | Report::Version { 293 | crate_name: crate_name.into(), 294 | crate_version: crate_version.into(), 295 | total_size_in_bytes, 296 | total_files, 297 | wasted_files, 298 | suggested_fix, 299 | } 300 | } 301 | } 302 | -------------------------------------------------------------------------------- /criner-waste-report/src/test/mod.rs: -------------------------------------------------------------------------------- 1 | mod from_package; 2 | -------------------------------------------------------------------------------- /criner-waste-report/tests/fixtures/avr_libc-0.1.3extract_crate-1.0.0.package.rmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-lean-crate/criner/a075e734dede8e1de5fe1652ec86f42da0162c41/criner-waste-report/tests/fixtures/avr_libc-0.1.3extract_crate-1.0.0.package.rmp -------------------------------------------------------------------------------- /criner-waste-report/tests/fixtures/cookie_factory-0.3.1-extract_crate-0.3.1.package.rmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-lean-crate/criner/a075e734dede8e1de5fe1652ec86f42da0162c41/criner-waste-report/tests/fixtures/cookie_factory-0.3.1-extract_crate-0.3.1.package.rmp -------------------------------------------------------------------------------- /criner-waste-report/tests/fixtures/curl_sys-0.4.27-extract_crate-1.0.0.package.rmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-lean-crate/criner/a075e734dede8e1de5fe1652ec86f42da0162c41/criner-waste-report/tests/fixtures/curl_sys-0.4.27-extract_crate-1.0.0.package.rmp -------------------------------------------------------------------------------- /criner-waste-report/tests/fixtures/deno_typescript-0.36.0-extract_crate-1.0.0.package.rmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-lean-crate/criner/a075e734dede8e1de5fe1652ec86f42da0162c41/criner-waste-report/tests/fixtures/deno_typescript-0.36.0-extract_crate-1.0.0.package.rmp -------------------------------------------------------------------------------- /criner-waste-report/tests/fixtures/falcon_raptor-0.4.9-extract_crate-1.0.0.package.rmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-lean-crate/criner/a075e734dede8e1de5fe1652ec86f42da0162c41/criner-waste-report/tests/fixtures/falcon_raptor-0.4.9-extract_crate-1.0.0.package.rmp -------------------------------------------------------------------------------- /criner-waste-report/tests/fixtures/fermium-20.12.0-alpha2-extract_crate-1.0.0.package.rmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-lean-crate/criner/a075e734dede8e1de5fe1652ec86f42da0162c41/criner-waste-report/tests/fixtures/fermium-20.12.0-alpha2-extract_crate-1.0.0.package.rmp -------------------------------------------------------------------------------- /criner-waste-report/tests/fixtures/gnir-0.14.0-alpha3-extract_crate-1.0.0.package.rmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-lean-crate/criner/a075e734dede8e1de5fe1652ec86f42da0162c41/criner-waste-report/tests/fixtures/gnir-0.14.0-alpha3-extract_crate-1.0.0.package.rmp -------------------------------------------------------------------------------- /criner-waste-report/tests/fixtures/grpcio-sys-0.5.0_extract_crate-1.0.0.package.rmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-lean-crate/criner/a075e734dede8e1de5fe1652ec86f42da0162c41/criner-waste-report/tests/fixtures/grpcio-sys-0.5.0_extract_crate-1.0.0.package.rmp -------------------------------------------------------------------------------- /criner-waste-report/tests/fixtures/lw_webdriver-0.4.1-extract_crate-1.0.0.package.rmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-lean-crate/criner/a075e734dede8e1de5fe1652ec86f42da0162c41/criner-waste-report/tests/fixtures/lw_webdriver-0.4.1-extract_crate-1.0.0.package.rmp -------------------------------------------------------------------------------- /criner-waste-report/tests/fixtures/mozjs_sys-0.67.1-extract_crate-1.0.0.package.rmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-lean-crate/criner/a075e734dede8e1de5fe1652ec86f42da0162c41/criner-waste-report/tests/fixtures/mozjs_sys-0.67.1-extract_crate-1.0.0.package.rmp -------------------------------------------------------------------------------- /criner-waste-report/tests/fixtures/openblas_provider-0.4.0-extract_crate-1.0.0.package.rmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-lean-crate/criner/a075e734dede8e1de5fe1652ec86f42da0162c41/criner-waste-report/tests/fixtures/openblas_provider-0.4.0-extract_crate-1.0.0.package.rmp -------------------------------------------------------------------------------- /criner-waste-report/tests/fixtures/ripgrep-12.0.0-extract_crate-1.0.0.package.rmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-lean-crate/criner/a075e734dede8e1de5fe1652ec86f42da0162c41/criner-waste-report/tests/fixtures/ripgrep-12.0.0-extract_crate-1.0.0.package.rmp -------------------------------------------------------------------------------- /criner-waste-report/tests/fixtures/sovrin-client.0.1.0-179-extract_crate-1.0.0.package.rmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-lean-crate/criner/a075e734dede8e1de5fe1652ec86f42da0162c41/criner-waste-report/tests/fixtures/sovrin-client.0.1.0-179-extract_crate-1.0.0.package.rmp -------------------------------------------------------------------------------- /criner-waste-report/tests/fixtures/threed-ice-sys-0.3.0-extract_crate-1.0.0.package.rmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-lean-crate/criner/a075e734dede8e1de5fe1652ec86f42da0162c41/criner-waste-report/tests/fixtures/threed-ice-sys-0.3.0-extract_crate-1.0.0.package.rmp -------------------------------------------------------------------------------- /criner/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "criner" 3 | version = "0.4.0" 4 | authors = ["Sebastian Thiel "] 5 | edition = "2018" 6 | description = "a platform for resumable mining of crates.io for knowledge and information" 7 | license = "MIT" 8 | repository = "https://github.com/the-lean-crate/criner" 9 | readme = "README.md" 10 | include = ["src/**/*", "LICENSE.md", "README.md", "!**/*_test/*"] 11 | 12 | [features] 13 | default = [] 14 | migration = ["jwalk"] 15 | 16 | [lib] 17 | doctest = false 18 | 19 | [dependencies] 20 | quick-error = "2.0.0" 21 | futures-util = { version = "0.3.5", default-features = false, features = ["io", "sink"]} 22 | crates-index-diff = "25.0.0" 23 | gix = { version = "0.63.0", features = ["parallel"] } 24 | git2 = { version = "0.19.0", default-features = false, features = ["https"] } 25 | prodash = { version = "28.0.0", default-features = false, features = ["render-tui", "render-tui-crossterm", "local-time", "progress-tree", "progress-tree-log"] } 26 | hex = "0.4.3" 27 | rmp-serde = "1.0.0" 28 | serde_derive = "1.0.104" 29 | serde = "1.0.104" 30 | humantime = "2.0.0" 31 | log = "0.4.8" 32 | reqwest = { version = "0.11.1", features = ["gzip"] } 33 | http = { version = "0.2.0", default-features = false } 34 | jwalk = { version = "0.8.1", optional = true } 35 | tar = "0.4.26" 36 | libflate = "1.0.0" 37 | bytesize = "1.0.0" 38 | rmpv = "1.0.0" 39 | rusqlite = { version = "0.32.1", features = ["bundled", "unlock_notify"] } 40 | parking_lot = "0.12.0" 41 | async-trait = "0.1.24" 42 | dia-semver = "11.0.0" 43 | futures-lite = "1.4.0" 44 | blocking = "1.0.0" 45 | async-channel = "1.1.1" 46 | once_cell = "1.4.0" 47 | async-executor = "1.1.0" 48 | async-io = "1.1.0" 49 | async-compat = "0.2.0" # increase this version to get more recent tokio releases 50 | # for properly shutting down the GUI when SIGTERM is sent directly 51 | ctrlc = { version = "3.1.4", features = ["termination"] } 52 | time = { version = "0.3.5", features = ["parsing", "macros"] } 53 | 54 | # for parsing csv files contained in crates-db-download 55 | csv = "1.1.3" 56 | # for deleting old database dumps - they remain on disk for a day at most 57 | glob = "0.3.0" 58 | 59 | # For 'export' functionality only (embed json in SQL text for simplicity) and for some fields in crates-io csv download 60 | serde_json = "1.0.48" 61 | 62 | # For waste report computation and html generation 63 | toml = "0.7.2" 64 | globset = "0.4.4" 65 | horrorshow = "0.8.1" 66 | regex = "1.3.4" 67 | lazy_static = "1.4.0" 68 | criner-waste-report = { version = "^0.1.5", path = "../criner-waste-report" } 69 | 70 | [dev-dependencies] 71 | common_macros = "0.1.1" 72 | -------------------------------------------------------------------------------- /criner/LICENSE.md: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | ===================== 3 | 4 | Copyright © `2020` `Sebastian Thiel` 5 | 6 | Permission is hereby granted, free of charge, to any person 7 | obtaining a copy of this software and associated documentation 8 | files (the “Software”), to deal in the Software without 9 | restriction, including without limitation the rights to use, 10 | copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the 12 | Software is furnished to do so, subject to the following 13 | conditions: 14 | 15 | The above copyright notice and this permission notice shall be 16 | included in all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, 19 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 20 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 21 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 22 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 23 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 25 | OTHER DEALINGS IN THE SOFTWARE. 26 | -------------------------------------------------------------------------------- /criner/README.md: -------------------------------------------------------------------------------- 1 | ![Rust](https://github.com/the-lean-crate/criner/workflows/Rust/badge.svg) 2 | [![crates.io version](https://img.shields.io/crates/v/criner.svg)](https://crates.io/crates/criner) 3 | 4 | ## TODO 5 | * [ ] Incorporate download counts into report 6 | * [ ] Make things prettier and more visual - that way we can try again for a come-back :D 7 | * [ ] See why RipGrep doesn't get any suggestions 8 | * [ ] More reporting - right now the context gathering to see how much time is spent where is neglected. 9 | 10 | ## Possible Improvements 11 | * [ ] Suggest 'top-level' globs like `/README.md` if we know the matched file is on the top-level. Otherwise the pattern `README.md` will actually match `*/README.md`. 12 | * [ ] Count negation patterns in includes and excludes. The latter don't seem to be working, and if nobody is using them, Cargo can either make it work or 13 | reject them properly. Maybe. Maybe first create an issue for that and see what they think. 14 | * [ ] On chunk download timeout, don't restart, but resume the download where it left off 15 | * [ ] resilience: protect against ThreadPanics - they prevent the program from shutting down 16 | * Futures has a wrapper to catch panics, even though we don't use it yet. A panic only brings down the future that panics, not the entire program. 17 | * [ ] Graceful shutdown on Ctrl+C 18 | * The current implementation relies on the database to handle aborted writes, and is no problem for that reason. However, it would be nice to have 19 | A well-behaving program. 20 | * [ ] Git is slowing down report generation, as sending them to git and creating objects is slow. We could possibly multi-thread this by creating lose objects 21 | ourselves and sending these into an aggregator which puts them into the index. This is only interesting during the very first report generation though, so 22 | probably not worth it. 23 | * [ ] Have each sub-section of criner use their own error type, which are aggregated in the crate level error. That way, individual errors will be smaller. 24 | * [ ] Parse CSV files separately and index rows and fields - from there build everything on the fly without having to allocate and copy strings. 25 | * probably warrants a different crate, and will really only be done if the 500MB budget isn't sufficient, that is things don't run on the Pie III 26 | 27 | 28 | ## Lessons learned 29 | 30 | * futures::ThreadPools - panicking futures crash only one thread 31 | * long-running futures need error and potentially panic recovery. Futures has a panick catcher that could be useful. 32 | * sqlite needs a lot of massaging to work acceptably in concurrent applications. Takeaway: WAL_mode, and when writting, always use immediate transactions 33 | when writing. Retry yourself while waiting and set a busy handler which waits. 34 | * Trying to optimize output HTML for git by prettifying failed - I just couldn't see it improve anything. For debugging HTML, it's easiest to use the browser. 35 | 36 | ### When migrating to Sqlite 37 | 38 | * sqlite… 39 | * is really not suited for many concurrent writers - you have to prepare for database locked errors, and the busy_handler doesn't help most of the time. 40 | * writing many small objects is slow, and can only be alleviated with prepared statements which are not always feasible or nice to use with a persistence 41 | design inspired by sled. To alleviate, the whole application must embrace Sqlite and work with statements directly. 42 | * Working with the lifetimes associated with transactions is a necessary evil, but it is painful too when trying to refactor anything! I just don't understand 43 | anymore what it tries to do, and have the feeling the compiler is confused itself (as in theory, there is no issue). 44 | * sled databases are about 4 times bigger than an Sqlite database with the same content, and it would read about 1.2GB of a 14GB database at startup. 45 | * sled is easy to handle in a threaded/concurrent environment, but iteration isn't possible across awaits as it's not sync 46 | * Sqlite is not sync nor is it send, so it needs more treatment before it can be used with spawened futures 47 | * Zero-copy is straigforward with Sled as it provides IVec structs, which are handles into an LRU which is the backing store. 48 | * In retrospect, I would consider zero-copy a nice experiment, but also a premature optimization. It costs additinoal effort 49 | and when done from the beginning, you don't even know how much time is actually saved through that. 50 | -------------------------------------------------------------------------------- /criner/rustfmt.toml: -------------------------------------------------------------------------------- 1 | max_width = 120 2 | -------------------------------------------------------------------------------- /criner/src/engine/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod report; 2 | pub mod stage; 3 | pub mod work; 4 | 5 | pub mod run; 6 | -------------------------------------------------------------------------------- /criner/src/engine/report/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod generic; 2 | pub mod waste; 3 | -------------------------------------------------------------------------------- /criner/src/engine/report/waste/mod.rs: -------------------------------------------------------------------------------- 1 | use crate::persistence::TableAccess; 2 | use crate::{error::Result, model::TaskResult, persistence}; 3 | use async_trait::async_trait; 4 | 5 | pub use criner_waste_report::*; 6 | 7 | mod merge; 8 | 9 | pub struct Generator; 10 | 11 | // NOTE: When multiple reports should be combined, this must become a compound generator which combines 12 | // multiple implementations into one, statically. 13 | #[async_trait] 14 | impl super::generic::Generator for Generator { 15 | type Report = Report; 16 | type DBResult = TaskResult; 17 | 18 | fn name() -> &'static str { 19 | "waste" 20 | } 21 | 22 | fn version() -> &'static str { 23 | "1.0.0" 24 | } 25 | 26 | fn fq_result_key(crate_name: &str, crate_version: &str, key_buf: &mut String) { 27 | let dummy_task = crate::engine::work::cpubound::default_persisted_extraction_task(); 28 | let dummy_result = TaskResult::ExplodedCrate { 29 | entries_meta_data: Default::default(), 30 | selected_entries: Default::default(), 31 | }; 32 | dummy_result.fq_key(crate_name, crate_version, &dummy_task, key_buf); 33 | } 34 | 35 | fn get_result( 36 | connection: persistence::ThreadSafeConnection, 37 | crate_name: &str, 38 | crate_version: &str, 39 | key_buf: &mut String, 40 | ) -> Result> { 41 | Self::fq_result_key(crate_name, crate_version, key_buf); 42 | let table = persistence::TaskResultTable { inner: connection }; 43 | table.get(&key_buf) 44 | } 45 | 46 | async fn generate_report( 47 | crate_name: &str, 48 | crate_version: &str, 49 | result: TaskResult, 50 | _progress: &mut prodash::tree::Item, 51 | ) -> Result { 52 | Ok(match result { 53 | TaskResult::ExplodedCrate { 54 | entries_meta_data, 55 | selected_entries, 56 | } => Report::from_package( 57 | crate_name, 58 | crate_version, 59 | TarPackage { 60 | entries_meta_data, 61 | entries: selected_entries, 62 | }, 63 | ), 64 | _ => unreachable!("caller must assure we are always an exploded entry"), 65 | }) 66 | } 67 | } 68 | 69 | #[cfg(test)] 70 | mod report_test; 71 | -------------------------------------------------------------------------------- /criner/src/engine/report/waste/report_test/mod.rs: -------------------------------------------------------------------------------- 1 | mod merge; 2 | -------------------------------------------------------------------------------- /criner/src/engine/run.rs: -------------------------------------------------------------------------------- 1 | use crate::{engine::stage, error::Result, model, persistence::Db, utils::*}; 2 | use futures_util::{ 3 | future::{Either, FutureExt}, 4 | stream::StreamExt, 5 | }; 6 | use log::{info, warn}; 7 | use prodash::render::tui::{Event, Line}; 8 | use std::sync::Arc; 9 | use std::{ 10 | path::{Path, PathBuf}, 11 | time::{Duration, SystemTime}, 12 | }; 13 | 14 | pub struct StageRunSettings { 15 | /// Wait for the given duration after the stage ran 16 | pub every: Duration, 17 | /// If None, run the stage indefinitely. Otherwise run it the given amount of times. Some(0) disables the stage. 18 | pub at_most: Option, 19 | } 20 | 21 | /// Like `StageRunSettings`, but also provides a glob pattern 22 | pub struct GlobStageRunSettings { 23 | pub glob: Option, 24 | pub run: StageRunSettings, 25 | } 26 | 27 | #[allow(clippy::too_many_arguments)] 28 | /// Runs the statistics and mining engine. 29 | /// May run for a long time unless a deadline is specified. 30 | /// Even though timeouts can be achieved from outside of the future, knowing the deadline may be used 31 | /// by the engine to manage its time even more efficiently. 32 | pub async fn non_blocking( 33 | db: Db, 34 | crates_io_path: PathBuf, 35 | deadline: Option, 36 | progress: Arc, 37 | io_bound_processors: u32, 38 | cpu_bound_processors: u32, 39 | cpu_o_bound_processors: u32, 40 | interrupt_control: InterruptControlEvents, 41 | db_download: bool, 42 | fetch_settings: StageRunSettings, 43 | process_settings: StageRunSettings, 44 | report_settings: GlobStageRunSettings, 45 | download_crates_io_database_every_24_hours_starting_at: Option, 46 | assets_dir: PathBuf, 47 | ) -> Result<()> { 48 | check(deadline)?; 49 | let startup_time = SystemTime::now(); 50 | 51 | let db_download_handle = db_download.then(|| { 52 | crate::spawn(repeat_daily_at( 53 | download_crates_io_database_every_24_hours_starting_at, 54 | { 55 | let p = progress.clone(); 56 | move || p.add_child("Crates.io DB Digest") 57 | }, 58 | deadline, 59 | { 60 | let db = db.clone(); 61 | let assets_dir = assets_dir.clone(); 62 | let progress = progress.clone(); 63 | move || { 64 | stage::db_download::schedule( 65 | db.clone(), 66 | assets_dir.clone(), 67 | progress.add_child("fetching crates-io db"), 68 | startup_time, 69 | ) 70 | } 71 | }, 72 | )) 73 | }); 74 | 75 | let run = fetch_settings; 76 | let fetch_handle = crate::spawn(repeat_every_s( 77 | run.every.as_secs() as usize, 78 | { 79 | let p = progress.clone(); 80 | move || p.add_child("Fetch Timer") 81 | }, 82 | deadline, 83 | run.at_most, 84 | { 85 | let db = db.clone(); 86 | let progress = progress.clone(); 87 | move || { 88 | stage::changes::fetch( 89 | crates_io_path.clone(), 90 | db.clone(), 91 | progress.add_child("crates.io refresh"), 92 | deadline, 93 | ) 94 | } 95 | }, 96 | )); 97 | 98 | let stage = process_settings; 99 | let processing_handle = crate::spawn(repeat_every_s( 100 | stage.every.as_secs() as usize, 101 | { 102 | let p = progress.clone(); 103 | move || p.add_child("Processing Timer") 104 | }, 105 | deadline, 106 | stage.at_most, 107 | { 108 | let progress = progress.clone(); 109 | let db = db.clone(); 110 | let assets_dir = assets_dir.clone(); 111 | move || { 112 | stage::processing::process( 113 | db.clone(), 114 | progress.add_child("Process Crate Versions"), 115 | io_bound_processors, 116 | cpu_bound_processors, 117 | progress.add_child("Downloads"), 118 | assets_dir.clone(), 119 | startup_time, 120 | ) 121 | } 122 | }, 123 | )); 124 | 125 | let stage = report_settings; 126 | let report_handle = crate::spawn(repeat_every_s( 127 | stage.run.every.as_secs() as usize, 128 | { 129 | let p = progress.clone(); 130 | move || p.add_child("Reporting Timer") 131 | }, 132 | deadline, 133 | stage.run.at_most, 134 | { 135 | move || { 136 | let progress = progress.clone(); 137 | let db = db.clone(); 138 | let assets_dir = assets_dir.clone(); 139 | let glob = stage.glob.clone(); 140 | let interrupt_control = interrupt_control.clone(); 141 | async move { 142 | let ctrl = interrupt_control; 143 | ctrl.send(Interruptible::Deferred).await.ok(); // there might be no TUI 144 | let res = stage::report::generate( 145 | db.clone(), 146 | progress.add_child("Reports"), 147 | assets_dir.clone(), 148 | glob.clone(), 149 | deadline, 150 | cpu_o_bound_processors, 151 | ) 152 | .await; 153 | ctrl.send(Interruptible::Instantly).await.ok(); // there might be no TUI 154 | res 155 | } 156 | } 157 | }, 158 | )); 159 | 160 | fetch_handle.await?; 161 | if let Some(handle) = db_download_handle { 162 | handle.await? 163 | }; 164 | report_handle.await?; 165 | processing_handle.await 166 | } 167 | 168 | pub enum Interruptible { 169 | Instantly, 170 | Deferred, 171 | } 172 | 173 | pub type InterruptControlEvents = async_channel::Sender; 174 | 175 | impl From for prodash::render::tui::Event { 176 | fn from(v: Interruptible) -> Self { 177 | match v { 178 | Interruptible::Instantly => Event::SetInterruptMode(prodash::render::tui::Interrupt::Instantly), 179 | Interruptible::Deferred => Event::SetInterruptMode(prodash::render::tui::Interrupt::Deferred), 180 | } 181 | } 182 | } 183 | 184 | #[allow(clippy::too_many_arguments)] 185 | /// For convenience, run the engine and block until done. 186 | pub fn blocking( 187 | db: impl AsRef, 188 | crates_io_path: impl AsRef, 189 | deadline: Option, 190 | io_bound_processors: u32, 191 | cpu_bound_processors: u32, 192 | cpu_o_bound_processors: u32, 193 | db_download: bool, 194 | fetch_settings: StageRunSettings, 195 | process_settings: StageRunSettings, 196 | report_settings: GlobStageRunSettings, 197 | download_crates_io_database_every_24_hours_starting_at: Option, 198 | root: Arc, 199 | gui: Option, 200 | ) -> Result<()> { 201 | let start_of_computation = SystemTime::now(); 202 | let assets_dir = db.as_ref().join("assets"); 203 | let db = Db::open(db)?; 204 | std::fs::create_dir_all(&assets_dir)?; 205 | let (interrupt_control_sink, interrupt_control_stream) = async_channel::bounded::(1); 206 | 207 | // dropping the work handle will stop (non-blocking) futures 208 | let work_handle = non_blocking( 209 | db.clone(), 210 | crates_io_path.as_ref().into(), 211 | deadline, 212 | root.clone(), 213 | io_bound_processors, 214 | cpu_bound_processors, 215 | cpu_o_bound_processors, 216 | interrupt_control_sink, 217 | db_download, 218 | fetch_settings, 219 | process_settings, 220 | report_settings, 221 | download_crates_io_database_every_24_hours_starting_at, 222 | assets_dir, 223 | ); 224 | 225 | match gui { 226 | Some(gui_options) => { 227 | let gui = crate::spawn(prodash::render::tui::render_with_input( 228 | std::io::stdout(), 229 | Arc::downgrade(&root), 230 | gui_options, 231 | futures_util::stream::select( 232 | context_stream(&db, start_of_computation), 233 | interrupt_control_stream.map(Event::from), 234 | ), 235 | )?); 236 | 237 | let either = futures_lite::future::block_on(futures_util::future::select( 238 | handle_ctrl_c_and_sigterm(work_handle.boxed_local()).boxed_local(), 239 | gui, 240 | )); 241 | match either { 242 | Either::Left((work_result, gui)) => { 243 | futures_lite::future::block_on(gui.cancel()); 244 | if let Err(e) = work_result? { 245 | warn!("work processor failed: {}", e); 246 | } 247 | } 248 | Either::Right((_, _work_handle)) => {} 249 | } 250 | } 251 | None => { 252 | drop(interrupt_control_stream); 253 | let work_result = futures_lite::future::block_on(handle_ctrl_c_and_sigterm(work_handle.boxed_local())); 254 | if let Err(e) = work_result { 255 | warn!("work processor failed: {}", e); 256 | } 257 | } 258 | }; 259 | 260 | // at this point, we forget all currently running computation, and since it's in the local thread, it's all 261 | // destroyed/dropped properly. 262 | info!("{}", wallclock(start_of_computation)); 263 | Ok(()) 264 | } 265 | 266 | fn wallclock(since: SystemTime) -> String { 267 | format!( 268 | "Wallclock elapsed: {}", 269 | humantime::format_duration(SystemTime::now().duration_since(since).unwrap_or_default()) 270 | ) 271 | } 272 | 273 | fn context_stream(db: &Db, start_of_computation: SystemTime) -> impl futures_util::stream::Stream { 274 | prodash::render::tui::ticker(Duration::from_secs(1)).map({ 275 | let db = db.clone(); 276 | move |_| { 277 | db.open_context() 278 | .ok() 279 | .and_then(|c| c.most_recent().ok()) 280 | .flatten() 281 | .map(|(_, c): (_, model::Context)| { 282 | let lines = vec![ 283 | Line::Text(wallclock(start_of_computation)), 284 | Line::Title("Durations".into()), 285 | Line::Text(format!("fetch-crate-versions: {:?}", c.durations.fetch_crate_versions)), 286 | Line::Title("Counts".into()), 287 | Line::Text(format!("crate-versions: {}", c.counts.crate_versions)), 288 | Line::Text(format!(" crates: {}", c.counts.crates)), 289 | ]; 290 | Event::SetInformation(lines) 291 | }) 292 | .unwrap_or(Event::Tick) 293 | } 294 | }) 295 | } 296 | -------------------------------------------------------------------------------- /criner/src/engine/stage/changes.rs: -------------------------------------------------------------------------------- 1 | use crate::persistence::{key_value_iter, new_key_value_query_old_to_new, CrateTable, Keyed}; 2 | use crate::{ 3 | error::{Error, Result}, 4 | model, 5 | persistence::{self, new_key_value_insertion, CrateVersionTable, TableAccess}, 6 | utils::enforce_threaded, 7 | }; 8 | use crates_index_diff::Index; 9 | use rusqlite::params; 10 | use std::convert::TryFrom; 11 | use std::sync::atomic::AtomicBool; 12 | use std::{ 13 | collections::BTreeMap, 14 | ops::Add, 15 | path::Path, 16 | time::{Duration, SystemTime}, 17 | }; 18 | 19 | pub async fn fetch( 20 | crates_io_path: impl AsRef, 21 | db: persistence::Db, 22 | mut progress: prodash::tree::Item, 23 | deadline: Option, 24 | ) -> Result<()> { 25 | let start = SystemTime::now(); 26 | let subprogress = progress.add_child("Fetching changes from crates.io index"); 27 | subprogress.blocked("potentially cloning", None); 28 | let index = enforce_threaded( 29 | deadline.unwrap_or_else(|| SystemTime::now().add(Duration::from_secs(60 * 60))), 30 | { 31 | let path = crates_io_path.as_ref().to_path_buf(); 32 | if !path.is_dir() { 33 | std::fs::create_dir(&path)?; 34 | } 35 | || Index::from_path_or_cloned(path) 36 | }, 37 | ) 38 | .await??; 39 | let (crate_versions, last_seen_git_object) = enforce_threaded( 40 | deadline.unwrap_or_else(|| SystemTime::now().add(Duration::from_secs(10 * 60))), 41 | move || { 42 | index.peek_changes_with_options( 43 | subprogress, 44 | &AtomicBool::default(), 45 | crates_index_diff::index::diff::Order::ImplementationDefined, 46 | ) 47 | }, 48 | ) 49 | .await??; 50 | 51 | progress.done(format!("Fetched {} changed crates", crate_versions.len())); 52 | 53 | let mut store_progress = progress.add_child("processing new crates"); 54 | store_progress.init(Some(crate_versions.len()), Some("crate versions".into())); 55 | 56 | let without_time_limit_unless_one_is_set = 57 | deadline.unwrap_or_else(|| SystemTime::now().add(Duration::from_secs(24 * 60 * 60))); 58 | enforce_threaded(without_time_limit_unless_one_is_set, { 59 | let db = db.clone(); 60 | let index_path = crates_io_path.as_ref().to_path_buf(); 61 | move || { 62 | let mut connection = db.open_connection_no_async_with_busy_wait()?; 63 | let mut crates_lut: BTreeMap<_, _> = { 64 | let transaction = connection.transaction()?; 65 | store_progress.blocked("caching crates", None); 66 | let mut statement = new_key_value_query_old_to_new(CrateTable::table_name(), &transaction)?; 67 | let iter = key_value_iter::(&mut statement)?.flat_map(Result::ok); 68 | iter.collect() 69 | }; 70 | 71 | let mut key_buf = String::new(); 72 | let crate_versions_len = crate_versions.len(); 73 | let mut new_crate_versions = 0; 74 | let mut new_crates = 0; 75 | store_progress.blocked("write lock for crate versions", None); 76 | let transaction = connection.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?; 77 | { 78 | let mut statement = new_key_value_insertion(CrateVersionTable::table_name(), &transaction)?; 79 | for version in crate_versions 80 | .into_iter() 81 | .filter_map(|v| model::CrateVersion::try_from(v).ok()) 82 | { 83 | key_buf.clear(); 84 | version.key_buf(&mut key_buf); 85 | statement.execute(params![&key_buf, rmp_serde::to_vec(&version)?])?; 86 | new_crate_versions += 1; 87 | 88 | key_buf.clear(); 89 | model::Crate::key_from_version_buf(&version, &mut key_buf); 90 | if crates_lut 91 | .entry(key_buf.to_owned()) 92 | .or_default() 93 | .merge_mut(&version) 94 | .versions 95 | .len() 96 | == 1 97 | { 98 | new_crates += 1; 99 | } 100 | 101 | store_progress.inc(); 102 | } 103 | } 104 | 105 | store_progress.blocked("commit crate versions", None); 106 | transaction.commit()?; 107 | 108 | let transaction = { 109 | store_progress.blocked("write lock for crates", None); 110 | let mut t = connection.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?; 111 | t.set_drop_behavior(rusqlite::DropBehavior::Commit); 112 | t 113 | }; 114 | { 115 | let mut statement = new_key_value_insertion(CrateTable::table_name(), &transaction)?; 116 | store_progress.init(Some(crates_lut.len()), Some("crates".into())); 117 | for (key, value) in crates_lut.into_iter() { 118 | statement.execute(params![key, rmp_serde::to_vec(&value)?])?; 119 | store_progress.inc(); 120 | } 121 | } 122 | store_progress.blocked("commit crates", None); 123 | transaction.commit()?; 124 | 125 | Index::from_path_or_cloned(index_path)?.set_last_seen_reference(last_seen_git_object)?; 126 | db.open_context()?.update_today(|c| { 127 | c.counts.crate_versions += new_crate_versions; 128 | c.counts.crates += new_crates; 129 | c.durations.fetch_crate_versions += SystemTime::now() 130 | .duration_since(start) 131 | .unwrap_or_else(|_| Duration::default()) 132 | })?; 133 | store_progress.done(format!("Stored {} crate versions to database", crate_versions_len)); 134 | Ok::<_, Error>(()) 135 | } 136 | }) 137 | .await??; 138 | Ok(()) 139 | } 140 | -------------------------------------------------------------------------------- /criner/src/engine/stage/db_download/csv_model.rs: -------------------------------------------------------------------------------- 1 | use serde_derive::Deserialize; 2 | use std::collections::BTreeMap; 3 | use std::time::SystemTime; 4 | 5 | type UserId = u32; 6 | pub type Id = u32; 7 | pub type GitHubId = i32; 8 | 9 | #[derive(Deserialize, Default, Clone)] 10 | pub struct Keyword { 11 | pub id: Id, 12 | #[serde(rename = "keyword")] 13 | pub name: String, 14 | // amount of crates using the keyword 15 | #[serde(rename = "crates_cnt")] 16 | pub crates_count: u32, 17 | } 18 | 19 | #[derive(Deserialize, Default, Clone)] 20 | pub struct Category { 21 | pub id: Id, 22 | #[serde(rename = "category")] 23 | pub name: String, 24 | #[serde(rename = "crates_cnt")] 25 | pub crates_count: u32, 26 | pub description: String, 27 | pub path: String, 28 | pub slug: String, 29 | } 30 | 31 | #[derive(Deserialize)] 32 | pub struct Crate { 33 | pub id: Id, 34 | pub name: String, 35 | #[serde(deserialize_with = "deserialize_timestamp")] 36 | pub created_at: SystemTime, 37 | #[serde(deserialize_with = "deserialize_timestamp")] 38 | pub updated_at: SystemTime, 39 | pub description: Option, 40 | pub documentation: Option, 41 | pub downloads: u64, 42 | pub homepage: Option, 43 | pub readme: Option, 44 | pub repository: Option, 45 | } 46 | 47 | pub enum UserKind { 48 | User, 49 | Team, 50 | } 51 | 52 | #[derive(Deserialize)] 53 | pub struct User { 54 | pub id: Id, 55 | #[serde(rename = "gh_avatar")] 56 | pub github_avatar_url: String, 57 | #[serde(rename = "gh_id")] 58 | pub github_id: GitHubId, 59 | #[serde(rename = "gh_login")] 60 | pub github_login: String, 61 | pub name: Option, 62 | } 63 | 64 | #[derive(Deserialize)] 65 | pub struct Team { 66 | pub id: Id, 67 | #[serde(rename = "avatar")] 68 | pub github_avatar_url: String, 69 | #[serde(rename = "github_id")] 70 | pub github_id: GitHubId, 71 | #[serde(rename = "login")] 72 | pub github_login: String, 73 | pub name: Option, 74 | } 75 | 76 | fn deserialize_owner_kind<'de, D>(deserializer: D) -> Result 77 | where 78 | D: serde::Deserializer<'de>, 79 | { 80 | use serde::Deserialize; 81 | let val = u8::deserialize(deserializer)?; 82 | Ok(if val == 0 { UserKind::User } else { UserKind::Team }) 83 | } 84 | 85 | fn deserialize_json_map<'de, D>(deserializer: D) -> Result, D::Error> 86 | where 87 | D: serde::Deserializer<'de>, 88 | { 89 | use serde::Deserialize; 90 | let val = std::borrow::Cow::<'de, str>::deserialize(deserializer)?; 91 | let val: BTreeMap> = serde_json::from_str(&val).map_err(serde::de::Error::custom)?; 92 | Ok(val.into_iter().map(|(name, crates)| Feature { name, crates }).collect()) 93 | } 94 | 95 | fn deserialize_yanked<'de, D>(deserializer: D) -> Result 96 | where 97 | D: serde::Deserializer<'de>, 98 | { 99 | use serde::Deserialize; 100 | let val = std::borrow::Cow::<'de, str>::deserialize(deserializer)?; 101 | Ok(val == "t") 102 | } 103 | 104 | fn deserialize_timestamp<'de, D>(deserializer: D) -> Result 105 | where 106 | D: serde::Deserializer<'de>, 107 | { 108 | use serde::Deserialize; 109 | let val = std::borrow::Cow::<'de, str>::deserialize(deserializer)?; 110 | // 2017-11-30 04:00:19.334919 111 | let t = time::PrimitiveDateTime::parse( 112 | val.as_ref().split('.').next().unwrap_or_else(|| val.as_ref()), 113 | // 2015 -04 - 24 18 : 26 : 11 114 | &time::macros::format_description!("[year]-[month]-[day] [hour]:[minute]:[second]"), 115 | ) 116 | .map_err(serde::de::Error::custom)?; 117 | Ok(t.assume_offset(time::UtcOffset::UTC).into()) 118 | } 119 | 120 | pub struct Feature { 121 | pub name: String, 122 | /// The crates the feature depends on 123 | pub crates: Vec, 124 | } 125 | 126 | #[derive(Deserialize)] 127 | pub struct Version { 128 | pub id: Id, 129 | pub crate_id: Id, 130 | pub crate_size: Option, 131 | #[serde(deserialize_with = "deserialize_timestamp")] 132 | pub created_at: SystemTime, 133 | #[serde(deserialize_with = "deserialize_timestamp")] 134 | pub updated_at: SystemTime, 135 | pub downloads: u32, 136 | #[serde(deserialize_with = "deserialize_json_map")] 137 | pub features: Vec, 138 | pub license: String, 139 | #[serde(rename = "num")] 140 | pub semver: String, 141 | pub published_by: Option, 142 | #[serde(deserialize_with = "deserialize_yanked", rename = "yanked")] 143 | pub is_yanked: bool, 144 | } 145 | 146 | #[derive(Deserialize)] 147 | pub struct CrateOwner { 148 | pub crate_id: Id, 149 | pub created_by: Option, 150 | pub owner_id: UserId, 151 | #[serde(deserialize_with = "deserialize_owner_kind")] 152 | pub owner_kind: UserKind, 153 | } 154 | 155 | #[derive(Deserialize)] 156 | pub struct CratesCategory { 157 | pub category_id: Id, 158 | pub crate_id: Id, 159 | } 160 | 161 | #[derive(Deserialize)] 162 | pub struct CratesKeyword { 163 | pub keyword_id: Id, 164 | pub crate_id: Id, 165 | } 166 | -------------------------------------------------------------------------------- /criner/src/engine/stage/db_download/from_csv.rs: -------------------------------------------------------------------------------- 1 | use super::csv_model; 2 | use std::collections::BTreeMap; 3 | 4 | pub trait AsId { 5 | fn as_id(&self) -> csv_model::Id; 6 | } 7 | 8 | macro_rules! impl_as_id { 9 | ($name:ident) => { 10 | impl AsId for csv_model::$name { 11 | fn as_id(&self) -> csv_model::Id { 12 | self.id 13 | } 14 | } 15 | }; 16 | } 17 | 18 | impl_as_id!(Keyword); 19 | impl_as_id!(Version); 20 | impl_as_id!(Category); 21 | impl_as_id!(User); 22 | impl_as_id!(Team); 23 | impl_as_id!(Crate); 24 | 25 | pub fn records( 26 | csv: impl std::io::Read, 27 | progress: &mut prodash::tree::Item, 28 | mut cb: impl FnMut(T), 29 | ) -> crate::Result<()> 30 | where 31 | T: serde::de::DeserializeOwned, 32 | { 33 | let mut rd = csv::ReaderBuilder::new() 34 | .delimiter(b',') 35 | .has_headers(true) 36 | .flexible(true) 37 | .from_reader(csv); 38 | for item in rd.deserialize() { 39 | cb(item?); 40 | progress.inc(); 41 | } 42 | Ok(()) 43 | } 44 | 45 | pub fn mapping( 46 | rd: impl std::io::Read, 47 | name: &'static str, 48 | progress: &mut prodash::tree::Item, 49 | ) -> crate::Result> 50 | where 51 | T: serde::de::DeserializeOwned + AsId, 52 | { 53 | let mut decode = progress.add_child("decoding"); 54 | decode.init(None, Some(name.into())); 55 | let mut map = BTreeMap::new(); 56 | records(rd, &mut decode, |v: T| { 57 | map.insert(v.as_id(), v); 58 | })?; 59 | decode.info(format!("Decoded {} {} into memory", map.len(), name)); 60 | Ok(map) 61 | } 62 | 63 | pub fn vec(rd: impl std::io::Read, name: &'static str, progress: &mut prodash::tree::Item) -> crate::Result> 64 | where 65 | T: serde::de::DeserializeOwned, 66 | { 67 | let mut decode = progress.add_child("decoding"); 68 | decode.init(None, Some(name.into())); 69 | let mut vec = Vec::new(); 70 | records(rd, &mut decode, |v: T| { 71 | vec.push(v); 72 | })?; 73 | vec.shrink_to_fit(); 74 | decode.info(format!("Decoded {} {} into memory", vec.len(), name)); 75 | Ok(vec) 76 | } 77 | -------------------------------------------------------------------------------- /criner/src/engine/stage/db_download/mod.rs: -------------------------------------------------------------------------------- 1 | use crate::model::db_dump; 2 | use crate::{ 3 | engine::work, persistence::new_key_value_insertion, persistence::Db, persistence::TableAccess, Error, Result, 4 | }; 5 | use bytesize::ByteSize; 6 | use futures_util::FutureExt; 7 | use rusqlite::params; 8 | use rusqlite::TransactionBehavior; 9 | use std::{collections::BTreeMap, fs::File, io::BufReader, path::PathBuf}; 10 | 11 | mod convert; 12 | mod csv_model; 13 | mod from_csv; 14 | 15 | fn store(db: Db, crates: Vec, mut progress: prodash::tree::Item) -> Result<()> { 16 | let now = std::time::SystemTime::now(); 17 | let crates_len = crates.len(); 18 | progress.init(Some(crates_len), Some("crates stored".into())); 19 | let mut connection = db.open_connection_no_async_with_busy_wait()?; 20 | let transaction = connection.transaction_with_behavior(TransactionBehavior::Immediate)?; 21 | { 22 | let mut insert = new_key_value_insertion("crates.io-crate", &transaction)?; 23 | for mut krate in crates.into_iter() { 24 | progress.inc(); 25 | krate.stored_at = now; 26 | let data = rmp_serde::to_vec(&krate)?; 27 | insert.execute(params![krate.name, data])?; 28 | } 29 | } 30 | transaction.commit()?; 31 | progress.done(format!("Stored {} crates in database", crates_len)); 32 | Ok(()) 33 | } 34 | 35 | fn extract_and_ingest(db: Db, mut progress: prodash::tree::Item, db_file_path: PathBuf) -> Result<()> { 36 | progress.init(None, Some("csv files".into())); 37 | let mut archive = tar::Archive::new(libflate::gzip::Decoder::new(BufReader::new(File::open(db_file_path)?))?); 38 | let whitelist_names = [ 39 | "crates", 40 | "crate_owners", 41 | "versions", 42 | "crates_categories", 43 | "categories", 44 | "crates_keywords", 45 | "keywords", 46 | "users", 47 | "teams", 48 | ]; 49 | 50 | let mut num_files_seen = 0; 51 | let mut num_bytes_seen = 0; 52 | let mut teams = None::>; 53 | let mut categories = None::>; 54 | let mut versions = None::>; 55 | let mut keywords = None::>; 56 | let mut users = None::>; 57 | let mut crates = None::>; 58 | let mut crate_owners = None::>; 59 | let mut crates_categories = None::>; 60 | let mut crates_keywords = None::>; 61 | 62 | for (eid, entry) in archive.entries()?.enumerate() { 63 | num_files_seen = eid + 1; 64 | progress.set(eid); 65 | 66 | let entry = entry?; 67 | let entry_size = entry.header().size()?; 68 | num_bytes_seen += entry_size; 69 | 70 | if let Some(name) = entry 71 | .path() 72 | .ok() 73 | .and_then(|p| whitelist_names.iter().find(|n| p.ends_with(format!("{}.csv", n)))) 74 | { 75 | let done_msg = format!( 76 | "extracted '{}' with size {}", 77 | entry.path()?.display(), 78 | ByteSize(entry_size) 79 | ); 80 | match *name { 81 | "teams" => teams = Some(from_csv::mapping(entry, name, &mut progress)?), 82 | "categories" => { 83 | categories = Some(from_csv::mapping(entry, "categories", &mut progress)?); 84 | } 85 | "versions" => { 86 | versions = Some(from_csv::vec(entry, "versions", &mut progress)?); 87 | } 88 | "keywords" => { 89 | keywords = Some(from_csv::mapping(entry, "keywords", &mut progress)?); 90 | } 91 | "users" => { 92 | users = Some(from_csv::mapping(entry, "users", &mut progress)?); 93 | } 94 | "crates" => { 95 | crates = Some(from_csv::vec(entry, "crates", &mut progress)?); 96 | } 97 | "crate_owners" => { 98 | crate_owners = Some(from_csv::vec(entry, "crate_owners", &mut progress)?); 99 | } 100 | "crates_categories" => { 101 | crates_categories = Some(from_csv::vec(entry, "crates_categories", &mut progress)?); 102 | } 103 | "crates_keywords" => { 104 | crates_keywords = Some(from_csv::vec(entry, "crates_keywords", &mut progress)?); 105 | } 106 | _ => progress.fail(format!("bug or oversight: Could not parse table of type {:?}", name)), 107 | } 108 | progress.done(done_msg); 109 | } 110 | } 111 | progress.done(format!( 112 | "Saw {} files and a total of {}", 113 | num_files_seen, 114 | ByteSize(num_bytes_seen) 115 | )); 116 | 117 | let users = users.ok_or(Error::Bug("expected users.csv in crates-io db dump"))?; 118 | let teams = teams.ok_or(Error::Bug("expected teams.csv in crates-io db dump"))?; 119 | let versions = versions.ok_or(Error::Bug("expected versions.csv in crates-io db dump"))?; 120 | let crates = crates.ok_or(Error::Bug("expected crates.csv in crates-io db dump"))?; 121 | let keywords = keywords.ok_or(Error::Bug("expected keywords.csv in crates-io db dump"))?; 122 | let crates_keywords = crates_keywords.ok_or(Error::Bug("expected crates_keywords.csv in crates-io db dump"))?; 123 | let categories = categories.ok_or(Error::Bug("expected categories.csv in crates-io db dump"))?; 124 | let crates_categories = 125 | crates_categories.ok_or(Error::Bug("expected crates_categories.csv in crates-io db dump"))?; 126 | let crate_owners = crate_owners.ok_or(Error::Bug("expected crate_owners.csv in crates-io db dump"))?; 127 | 128 | progress.init(Some(4), Some("conversion steps".into())); 129 | progress.set_name("transform actors"); 130 | progress.set(1); 131 | let actors_by_id = convert::into_actors_by_id(users, teams, progress.add_child("actors")); 132 | 133 | progress.set_name("transform versions"); 134 | progress.set(2); 135 | let versions_by_crate_id = 136 | convert::into_versions_by_crate_id(versions, &actors_by_id, progress.add_child("versions")); 137 | 138 | progress.set_name("transform crates"); 139 | progress.set(3); 140 | let crates = convert::into_crates( 141 | crates, 142 | keywords, 143 | crates_keywords, 144 | categories, 145 | crates_categories, 146 | actors_by_id, 147 | crate_owners, 148 | versions_by_crate_id, 149 | progress.add_child("crates"), 150 | ); 151 | 152 | progress.set_name("storing crates"); 153 | progress.set(4); 154 | store(db, crates, progress.add_child("persist")) 155 | } 156 | 157 | fn cleanup(db_file_path: PathBuf, mut progress: prodash::tree::Item) -> Result<()> { 158 | let glob_pattern = db_file_path 159 | .parent() 160 | .expect("parent directory for db dump") 161 | .join("[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]-*") 162 | .with_extension(db_file_path.extension().expect("file extension")); 163 | let pattern = glob::Pattern::new(glob_pattern.to_str().expect("db dump path is valid utf8 string"))?; 164 | if !pattern.matches_path(&db_file_path) { 165 | return Err(crate::Error::Message(format!( 166 | "BUG: Pattern {} did not match the original database path '{}'", 167 | pattern, 168 | db_file_path.display() 169 | ))); 170 | } 171 | 172 | for file in glob::glob(pattern.as_str())? { 173 | let file = file?; 174 | if file != db_file_path { 175 | std::fs::remove_file(&file)?; 176 | progress.done(format!("Deleted old db-dump at '{}'", file.display())); 177 | } 178 | } 179 | Ok(()) 180 | } 181 | 182 | pub async fn schedule( 183 | db: Db, 184 | assets_dir: PathBuf, 185 | mut progress: prodash::tree::Item, 186 | startup_time: std::time::SystemTime, 187 | ) -> Result<()> { 188 | let (tx_result, rx_result) = async_channel::bounded(1); 189 | let tx_io = { 190 | let (tx_io, rx) = async_channel::bounded(1); 191 | let max_retries_on_timeout = 80; 192 | crate::spawn( 193 | work::generic::processor( 194 | db.clone(), 195 | progress.add_child("↓ IDLE"), 196 | rx, 197 | work::iobound::Agent::new(&db, tx_result, { 198 | move |_, _, output_file_path| Some(output_file_path.to_path_buf()) 199 | })?, 200 | max_retries_on_timeout, 201 | ) 202 | .map(|r| { 203 | if let Err(e) = r { 204 | log::warn!("db download: iobound processor failed: {}", e); 205 | } 206 | }), 207 | ) 208 | .detach(); 209 | tx_io 210 | }; 211 | 212 | let today_yyyy_mm_dd = time::OffsetDateTime::now_local() 213 | .unwrap_or_else(|_| time::OffsetDateTime::now_utc()) 214 | .format(&time::macros::format_description!("[year]-[month]-[day]")) 215 | .expect("formattable"); 216 | let file_suffix = "db-dump.tar.gz"; 217 | let task_key = format!( 218 | "{}{}{}", 219 | "crates-io-db-dump", 220 | crate::persistence::KEY_SEP_CHAR, 221 | today_yyyy_mm_dd 222 | ); 223 | 224 | let db_file_path = assets_dir 225 | .join("crates-io-db") 226 | .join(format!("{}-{}", today_yyyy_mm_dd, file_suffix)); 227 | let tasks = db.open_tasks()?; 228 | if tasks 229 | .get(&task_key)? 230 | .map(|t| t.can_be_started(startup_time) || t.state.is_complete()) // always allow the extractor to run - must be idempotent 231 | .unwrap_or(true) 232 | { 233 | tx_io 234 | .send(work::iobound::DownloadRequest { 235 | output_file_path: db_file_path.clone(), 236 | progress_name: "db dump".to_string(), 237 | task_key, 238 | crate_name_and_version: None, 239 | kind: "tar.gz", 240 | url: "https://static.crates.io/db-dump.tar.gz".to_string(), 241 | }) 242 | .await 243 | .map_err(Error::send_msg("Download Request"))?; 244 | drop(tx_io); 245 | if let Ok(db_file_path) = rx_result.recv().await { 246 | blocking::unblock({ 247 | let progress = progress.add_child("ingest"); 248 | move || extract_and_ingest(db, progress, db_file_path) 249 | }) 250 | .await 251 | .map_err(|err| { 252 | progress.fail(format!("ingestion failed: {}", err)); 253 | err 254 | })?; 255 | } 256 | } 257 | 258 | blocking::unblock(move || cleanup(db_file_path, progress.add_child("removing old db-dumps"))).await?; 259 | Ok(()) 260 | } 261 | -------------------------------------------------------------------------------- /criner/src/engine/stage/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod changes; 2 | pub mod db_download; 3 | pub mod processing; 4 | 5 | pub mod report; 6 | -------------------------------------------------------------------------------- /criner/src/engine/stage/processing.rs: -------------------------------------------------------------------------------- 1 | use crate::persistence::{new_value_query_recent_first, value_iter, CrateVersionTable}; 2 | use crate::{ 3 | engine::work, 4 | error::Result, 5 | model::CrateVersion, 6 | persistence::{Db, Keyed, TableAccess}, 7 | }; 8 | use futures_util::FutureExt; 9 | use std::{path::PathBuf, time::SystemTime}; 10 | 11 | pub async fn process( 12 | db: Db, 13 | mut progress: prodash::tree::Item, 14 | io_bound_processors: u32, 15 | cpu_bound_processors: u32, 16 | mut processing_progress: prodash::tree::Item, 17 | assets_dir: PathBuf, 18 | startup_time: SystemTime, 19 | ) -> Result<()> { 20 | processing_progress.set_name("Downloads and Extractors"); 21 | let tx_cpu = { 22 | let (tx_cpu, rx) = async_channel::bounded(1); 23 | for idx in 0..cpu_bound_processors { 24 | let max_retries_on_timeout = 0; 25 | let db = db.clone(); 26 | let assets_dir = assets_dir.clone(); 27 | let progress = processing_progress.add_child(format!("{}:CPU IDLE", idx + 1)); 28 | let rx = rx.clone(); 29 | crate::spawn(blocking::unblock(move || -> Result<_> { 30 | let agent = work::cpubound::Agent::new(assets_dir, &db)?; 31 | #[allow(clippy::unit_arg)] // don't know where the unit is supposed to be 32 | Ok(futures_lite::future::block_on( 33 | work::generic::processor(db, progress, rx, agent, max_retries_on_timeout).map(|r| { 34 | if let Err(e) = r { 35 | log::warn!("CPU bound processor failed: {}", e); 36 | } 37 | }), 38 | )) 39 | })) 40 | .detach(); 41 | } 42 | tx_cpu 43 | }; 44 | 45 | let tx_io = { 46 | let (tx_io, rx) = async_channel::bounded(1); 47 | for idx in 0..io_bound_processors { 48 | let max_retries_on_timeout = 40; 49 | crate::spawn( 50 | work::generic::processor( 51 | db.clone(), 52 | processing_progress.add_child(format!("{}: ↓ IDLE", idx + 1)), 53 | rx.clone(), 54 | work::iobound::Agent::new(&db, tx_cpu.clone(), |crate_name_and_version, task, _| { 55 | crate_name_and_version.map(|(crate_name, crate_version)| work::cpubound::ExtractRequest { 56 | download_task: task.clone(), 57 | crate_name, 58 | crate_version, 59 | }) 60 | })?, 61 | max_retries_on_timeout, 62 | ) 63 | .map(|r| { 64 | if let Err(e) = r { 65 | log::warn!("iobound processor failed: {}", e); 66 | } 67 | }), 68 | ) 69 | .detach(); 70 | } 71 | tx_io 72 | }; 73 | 74 | blocking::unblock(move || { 75 | let versions = db.open_crate_versions()?; 76 | let num_versions = versions.count(); 77 | progress.init(Some(num_versions as usize), Some("crate versions".into())); 78 | 79 | let auto_checkpoint_every = 10000; 80 | let checkpoint_connection = db.open_connection_with_busy_wait()?; 81 | let mut fetched_versions = 0; 82 | let mut versions = Vec::with_capacity(auto_checkpoint_every); 83 | let mut last_elapsed_for_checkpointing = None; 84 | let mut child_progress = progress.add_child("TBD"); 85 | 86 | loop { 87 | let abort_loop = { 88 | progress.blocked("fetching chunk of version to schedule", None); 89 | let connection = db.open_connection_no_async_with_busy_wait()?; 90 | let mut statement = new_value_query_recent_first( 91 | CrateVersionTable::table_name(), 92 | &connection, 93 | fetched_versions, 94 | auto_checkpoint_every, 95 | )?; 96 | let iter = value_iter::(&mut statement)?; 97 | versions.clear(); 98 | versions.extend(iter); 99 | fetched_versions += versions.len(); 100 | 101 | versions.len() != auto_checkpoint_every 102 | }; 103 | 104 | let tasks = db.open_tasks()?; 105 | for (vid, version) in versions.drain(..).enumerate() { 106 | let version = version?; 107 | 108 | progress.set(vid + fetched_versions + 1); 109 | progress.halted("wait for task consumers", None); 110 | child_progress.set_name(format!("schedule {}", version.key())); 111 | // TODO: with blocking:: API improvements, remove this block-on as all is async 112 | futures_lite::future::block_on(work::schedule::tasks( 113 | &assets_dir, 114 | &tasks, 115 | &version, 116 | &mut child_progress, 117 | work::schedule::Scheduling::AtLeastOne, 118 | &tx_io, 119 | &tx_cpu, 120 | startup_time, 121 | ))?; 122 | } 123 | 124 | // We have too many writers which cause the WAL to get so large that all reads are slowing to a crawl 125 | // Standard SQLITE autocheckpoints are passive, which are not effective in our case as they never 126 | // kick in with too many writers. There is no way to change the autocheckpoint mode to something more suitable… :/ 127 | let start = SystemTime::now(); 128 | progress.blocked( 129 | "checkpointing database", 130 | last_elapsed_for_checkpointing.map(|d| start + d), 131 | ); 132 | checkpoint_connection 133 | .lock() 134 | .execute_batch("PRAGMA wal_checkpoint(TRUNCATE)")?; 135 | last_elapsed_for_checkpointing = Some(SystemTime::now().duration_since(start)?); 136 | 137 | if abort_loop { 138 | progress.running(); 139 | break; 140 | } 141 | } 142 | Ok(()) 143 | }) 144 | .await 145 | } 146 | -------------------------------------------------------------------------------- /criner/src/engine/stage/report/mod.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | engine::report, 3 | persistence::{self, new_key_value_query_old_to_new_filtered, TableAccess}, 4 | utils::check, 5 | {Error, Result}, 6 | }; 7 | use futures_util::FutureExt; 8 | use std::{path::PathBuf, time::SystemTime}; 9 | 10 | mod git; 11 | 12 | pub async fn generate( 13 | db: persistence::Db, 14 | mut progress: prodash::tree::Item, 15 | assets_dir: PathBuf, 16 | glob: Option, 17 | deadline: Option, 18 | cpu_o_bound_processors: u32, 19 | ) -> Result<()> { 20 | use report::generic::Generator; 21 | let krates = db.open_crates()?; 22 | let output_dir = assets_dir 23 | .parent() 24 | .expect("assets directory to be in criner.db") 25 | .join("reports"); 26 | let glob_str = glob.as_deref(); 27 | let num_crates = krates.count_filtered(glob_str) as usize; 28 | let chunk_size = 500.min(num_crates); 29 | if chunk_size == 0 { 30 | return Ok(()); 31 | } 32 | progress.init(Some(num_crates), Some("crates".into())); 33 | 34 | let (processors, rx_result) = { 35 | let (tx_task, rx_task) = async_channel::bounded(1); 36 | let (tx_result, rx_result) = async_channel::bounded(cpu_o_bound_processors as usize * 2); 37 | 38 | for _ in 0..cpu_o_bound_processors { 39 | let task = rx_task.clone(); 40 | let result = tx_result.clone(); 41 | crate::spawn(blocking::unblock(move || { 42 | futures_lite::future::block_on(async move { 43 | while let Ok(f) = task.recv().await { 44 | result.send(f.await).await.map_err(Error::send_msg("send CPU result"))?; 45 | } 46 | Ok::<_, Error>(()) 47 | }) 48 | })) 49 | .detach(); 50 | } 51 | (tx_task, rx_result) 52 | }; 53 | 54 | let waste_report_dir = output_dir.join(report::waste::Generator::name()); 55 | blocking::unblock({ 56 | let dir = waste_report_dir.clone(); 57 | move || std::fs::create_dir_all(dir) 58 | }) 59 | .await?; 60 | use crate::engine::report::generic::WriteCallback; 61 | let (cache_dir, (git_handle, git_state, maybe_join_handle)) = match glob.as_ref() { 62 | Some(_) => (None, (git::not_available as WriteCallback, None, None)), 63 | None => { 64 | let cd = waste_report_dir.join("__incremental_cache__"); 65 | blocking::unblock({ 66 | let cd = cd.clone(); 67 | move || std::fs::create_dir_all(cd) 68 | }) 69 | .await?; 70 | ( 71 | Some(cd), 72 | git::select_callback(cpu_o_bound_processors, &waste_report_dir, progress.add_child("git")), 73 | ) 74 | } 75 | }; 76 | let merge_reports = crate::spawn({ 77 | let merge_progress = progress.add_child("report aggregator"); 78 | merge_progress.init(Some(num_crates / chunk_size), Some("Reports".into())); 79 | report::waste::Generator::merge_reports( 80 | waste_report_dir.clone(), 81 | cache_dir.clone(), 82 | merge_progress, 83 | rx_result, 84 | git_handle, 85 | git_state.clone(), 86 | ) 87 | .map(|_| ()) 88 | .boxed() 89 | }); 90 | 91 | let mut fetched_crates = 0; 92 | let mut chunk = Vec::<(String, Vec)>::with_capacity(chunk_size as usize); 93 | let mut cid = 0; 94 | loop { 95 | let abort_loop = { 96 | progress.blocked("fetching chunk of crates to schedule", None); 97 | let connection = db.open_connection_no_async_with_busy_wait()?; 98 | let mut statement = new_key_value_query_old_to_new_filtered( 99 | persistence::CrateTable::table_name(), 100 | glob_str, 101 | &connection, 102 | Some((fetched_crates, chunk_size as usize)), 103 | )?; 104 | 105 | chunk.clear(); 106 | chunk.extend( 107 | statement 108 | .query_map([], |r| Ok((r.get(0)?, r.get(1)?)))? 109 | .filter_map(|r| r.ok()), 110 | ); 111 | fetched_crates += chunk.len(); 112 | 113 | chunk.len() != chunk_size as usize 114 | }; 115 | 116 | cid += 1; 117 | check(deadline)?; 118 | 119 | progress.set(cid * chunk_size); 120 | progress.halted("write crate report", None); 121 | processors 122 | .send(report::waste::Generator::write_files( 123 | db.clone(), 124 | waste_report_dir.clone(), 125 | cache_dir.clone(), 126 | chunk, 127 | progress.add_child(""), 128 | git_handle, 129 | git_state.clone(), 130 | )) 131 | .await 132 | .map_err(Error::send_msg("Chunk of files to write"))?; 133 | chunk = Vec::with_capacity(chunk_size as usize); 134 | if abort_loop { 135 | break; 136 | } 137 | } 138 | drop(git_state); 139 | drop(processors); 140 | progress.set(num_crates); 141 | merge_reports.await; 142 | progress.done("Generating and merging waste report done"); 143 | 144 | if let Some(handle) = maybe_join_handle { 145 | progress.blocked("waiting for git to finish", None); 146 | if handle.join().is_err() { 147 | progress.fail("git failed with unknown error"); 148 | } 149 | }; 150 | Ok(()) 151 | } 152 | -------------------------------------------------------------------------------- /criner/src/engine/work/cpubound.rs: -------------------------------------------------------------------------------- 1 | use crate::engine::report::waste::{tar_path_to_utf8_str, CargoConfig}; 2 | use crate::{error::Result, model, persistence, Error}; 3 | use async_trait::async_trait; 4 | use std::io::Seek; 5 | use std::{fs::File, io::BufReader, io::Read, path::PathBuf, time::SystemTime}; 6 | 7 | struct ProcessingState { 8 | downloaded_crate: PathBuf, 9 | key: String, 10 | } 11 | pub struct Agent { 12 | asset_dir: PathBuf, 13 | results: persistence::TaskResultTable, 14 | state: Option, 15 | standard_bin_path: globset::GlobMatcher, 16 | } 17 | 18 | impl Agent { 19 | pub fn new(asset_dir: PathBuf, db: &persistence::Db) -> Result { 20 | let results = db.open_results()?; 21 | Ok(Agent { 22 | asset_dir, 23 | results, 24 | state: None, 25 | standard_bin_path: globset::Glob::new("src/bin/*.rs") 26 | .expect("valid statically known glob") 27 | .compile_matcher(), 28 | }) 29 | } 30 | } 31 | 32 | #[async_trait] 33 | impl crate::engine::work::generic::Processor for Agent { 34 | type Item = ExtractRequest; 35 | 36 | fn set( 37 | &mut self, 38 | request: Self::Item, 39 | progress: &mut prodash::tree::Item, 40 | ) -> Result<(model::Task, String, String)> { 41 | progress.init(None, Some("files extracted".into())); 42 | let ExtractRequest { 43 | download_task, 44 | crate_name, 45 | crate_version, 46 | } = request; 47 | 48 | let progress_info = format!("CPU UNZIP+UNTAR {}:{}", crate_name, crate_version); 49 | let dummy_task = default_persisted_extraction_task(); 50 | let mut task_key = String::new(); 51 | dummy_task.fq_key(&crate_name, &crate_version, &mut task_key); 52 | 53 | let downloaded_crate = super::schedule::download_file_path( 54 | &self.asset_dir, 55 | &crate_name, 56 | &crate_version, 57 | &download_task.process, 58 | &download_task.version, 59 | "crate", 60 | ); 61 | let dummy_result = model::TaskResult::ExplodedCrate { 62 | entries_meta_data: vec![], 63 | selected_entries: vec![], 64 | }; 65 | 66 | let mut key = String::with_capacity(task_key.len() * 2); 67 | dummy_result.fq_key(&crate_name, &crate_version, &dummy_task, &mut key); 68 | 69 | self.state = Some(ProcessingState { downloaded_crate, key }); 70 | Ok((dummy_task, task_key, progress_info)) 71 | } 72 | 73 | fn idle_message(&self) -> String { 74 | "CPU IDLE".into() 75 | } 76 | 77 | async fn process(&mut self, progress: &mut prodash::tree::Item) -> std::result::Result<(), (Error, String)> { 78 | let ProcessingState { downloaded_crate, key } = self.state.take().expect("state to be set"); 79 | extract_crate(&self.results, &key, progress, downloaded_crate, &self.standard_bin_path) 80 | .map_err(|err| (err, "Failed to extract crate".into())) 81 | } 82 | } 83 | 84 | #[derive(Clone)] 85 | pub struct ExtractRequest { 86 | pub download_task: model::Task, 87 | pub crate_name: String, 88 | pub crate_version: String, 89 | } 90 | 91 | pub fn default_persisted_extraction_task() -> model::Task { 92 | const TASK_NAME: &str = "extract_crate"; 93 | const TASK_VERSION: &str = "1.0.0"; 94 | model::Task { 95 | stored_at: SystemTime::now(), 96 | process: TASK_NAME.into(), 97 | version: TASK_VERSION.into(), 98 | state: Default::default(), 99 | } 100 | } 101 | 102 | fn extract_crate( 103 | results: &persistence::TaskResultTable, 104 | key: &str, 105 | progress: &mut prodash::tree::Item, 106 | downloaded_crate: PathBuf, 107 | standard_bin_path: &globset::GlobMatcher, 108 | ) -> Result<()> { 109 | use persistence::TableAccess; 110 | let mut archive = tar::Archive::new(libflate::gzip::Decoder::new(BufReader::new(File::open( 111 | downloaded_crate, 112 | )?))?); 113 | 114 | let mut buf = Vec::new(); 115 | let mut interesting_paths = vec!["Cargo.toml".to_string(), "Cargo.lock".into()]; 116 | let mut files = Vec::new(); 117 | for e in archive.entries()? { 118 | progress.inc(); 119 | let mut e: tar::Entry<_> = e?; 120 | if tar_path_to_utf8_str(e.path_bytes().as_ref()) == "Cargo.toml" { 121 | e.read_to_end(&mut buf)?; 122 | let config = std::str::from_utf8(&buf).map(CargoConfig::from).unwrap_or_default(); 123 | interesting_paths.push(config.actual_or_expected_build_script_path().to_owned()); 124 | interesting_paths.push(config.lib_path().to_owned()); 125 | interesting_paths.extend(config.bin_paths().into_iter().map(|s| s.to_owned())); 126 | break; 127 | } 128 | } 129 | 130 | let mut archive = tar::Archive::new(libflate::gzip::Decoder::new(BufReader::new({ 131 | let mut file = archive.into_inner().into_inner(); 132 | file.seek(std::io::SeekFrom::Start(0))?; 133 | file 134 | }))?); 135 | 136 | let mut meta_data = Vec::new(); 137 | let mut meta_count = 0; 138 | let mut file_count = 0; 139 | let mut max_storage_size = [0; 128 * 1024]; 140 | for e in archive.entries()? { 141 | meta_count += 1; 142 | progress.set(meta_count); 143 | let mut e: tar::Entry<_> = e?; 144 | meta_data.push(model::TarHeader { 145 | path: e.path_bytes().to_vec(), 146 | size: e.header().size()?, 147 | entry_type: e.header().entry_type().as_byte(), 148 | }); 149 | 150 | if interesting_paths 151 | .iter() 152 | .any(|p| p == tar_path_to_utf8_str(e.path_bytes().as_ref())) 153 | || standard_bin_path.is_match(tar_path_to_utf8_str(e.path_bytes().as_ref())) 154 | { 155 | file_count += 1; 156 | 157 | let slice = if tar_path_to_utf8_str(e.path_bytes().as_ref()) == "Cargo.toml" 158 | || tar_path_to_utf8_str(e.path_bytes().as_ref()) == "Cargo.lock" 159 | { 160 | buf.clear(); 161 | e.read_to_end(&mut buf)?; 162 | &buf 163 | } else { 164 | let bytes_read = e.read(&mut max_storage_size[..])?; 165 | &max_storage_size[..bytes_read] 166 | }; 167 | files.push(( 168 | meta_data.last().expect("to have pushed one just now").to_owned(), 169 | slice.to_owned(), 170 | )); 171 | } 172 | } 173 | progress.info(format!( 174 | "Recorded {} files and stored {} in full", 175 | meta_count, file_count 176 | )); 177 | 178 | let task_result = model::TaskResult::ExplodedCrate { 179 | entries_meta_data: meta_data, 180 | selected_entries: files, 181 | }; 182 | results.insert(progress, &key, &task_result)?; 183 | 184 | Ok(()) 185 | } 186 | -------------------------------------------------------------------------------- /criner/src/engine/work/generic.rs: -------------------------------------------------------------------------------- 1 | use crate::{model, persistence, persistence::TableAccess, Error, Result}; 2 | use async_trait::async_trait; 3 | 4 | #[async_trait] 5 | pub trait Processor { 6 | type Item; 7 | 8 | fn set(&mut self, request: Self::Item, progress: &mut prodash::tree::Item) 9 | -> Result<(model::Task, String, String)>; 10 | fn idle_message(&self) -> String; 11 | async fn process(&mut self, progress: &mut prodash::tree::Item) -> std::result::Result<(), (Error, String)>; 12 | async fn schedule_next(&mut self, _progress: &mut prodash::tree::Item) -> Result<()> { 13 | Ok(()) 14 | } 15 | } 16 | 17 | pub async fn processor( 18 | db: persistence::Db, 19 | mut progress: prodash::tree::Item, 20 | r: async_channel::Receiver, 21 | mut agent: impl Processor + Send, 22 | max_retries_on_timeout: usize, 23 | ) -> Result<()> { 24 | let tasks = db.open_tasks()?; 25 | 26 | while let Ok(request) = r.recv().await { 27 | let mut try_count = 0; 28 | let (task, task_key) = loop { 29 | let (dummy_task, task_key, progress_name) = agent.set(request.clone(), &mut progress)?; 30 | progress.set_name(progress_name); 31 | 32 | let mut task = tasks.update(Some(&mut progress), &task_key, |mut t| { 33 | t.process = dummy_task.process.clone(); 34 | t.version = dummy_task.version.clone(); 35 | t.state.merge_with(&model::TaskState::InProgress(None)); 36 | t 37 | })?; 38 | 39 | try_count += 1; 40 | progress.blocked("working", None); 41 | let res = agent.process(&mut progress).await; 42 | progress.running(); 43 | 44 | task.state = match res { 45 | Err((err @ Error::Timeout(_, _), _)) if try_count < max_retries_on_timeout => { 46 | progress.fail(format!("{} → retrying ({}/{})", err, try_count, max_retries_on_timeout)); 47 | continue; 48 | } 49 | Err((err, msg)) => { 50 | progress.fail(format!("{}: {}", msg, err)); 51 | model::TaskState::AttemptsWithFailure(vec![err.to_string()]) 52 | } 53 | Ok(_) => { 54 | agent.schedule_next(&mut progress).await.ok(); 55 | model::TaskState::Complete 56 | } 57 | }; 58 | break (task, task_key); 59 | }; 60 | 61 | tasks.upsert(&mut progress, &task_key, &task)?; 62 | progress.set_name(agent.idle_message()); 63 | progress.init(None, None); 64 | } 65 | Ok(()) 66 | } 67 | -------------------------------------------------------------------------------- /criner/src/engine/work/iobound.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | model, 3 | persistence::{self, TableAccess}, 4 | Error, Result, 5 | }; 6 | use bytesize::ByteSize; 7 | use futures_lite::{io::AsyncWriteExt, FutureExt}; 8 | 9 | use crate::utils::timeout_after; 10 | use async_trait::async_trait; 11 | use std::{ 12 | path::{Path, PathBuf}, 13 | time::{Duration, SystemTime}, 14 | }; 15 | 16 | const CONNECT_AND_FETCH_HEAD_TIMEOUT: Duration = Duration::from_secs(15); 17 | const FETCH_CHUNK_TIMEOUT_SECONDS: Duration = Duration::from_secs(10); 18 | 19 | struct ProcessingState { 20 | url: String, 21 | kind: &'static str, 22 | output_file_path: PathBuf, 23 | result_key: Option, 24 | } 25 | pub struct Agent { 26 | client: reqwest::Client, 27 | results: persistence::TaskResultTable, 28 | channel: async_channel::Sender, 29 | state: Option, 30 | make_state: Fn, 31 | next_action_state: Option, 32 | } 33 | 34 | impl Agent 35 | where 36 | Fn: FnMut(Option<(String, String)>, &model::Task, &Path) -> Option, 37 | { 38 | pub fn new( 39 | db: &persistence::Db, 40 | channel: async_channel::Sender, 41 | make_state: Fn, 42 | ) -> Result> { 43 | let client = reqwest::ClientBuilder::new().gzip(true).build()?; 44 | 45 | let results = db.open_results()?; 46 | Ok(Agent { 47 | client, 48 | results, 49 | channel, 50 | state: None, 51 | next_action_state: None, 52 | make_state, 53 | }) 54 | } 55 | } 56 | 57 | #[async_trait] 58 | impl crate::engine::work::generic::Processor for Agent 59 | where 60 | Fn: FnMut(Option<(String, String)>, &model::Task, &Path) -> Option + Send, 61 | FnResult: Send, 62 | { 63 | type Item = DownloadRequest; 64 | 65 | fn set( 66 | &mut self, 67 | request: Self::Item, 68 | progress: &mut prodash::tree::Item, 69 | ) -> Result<(model::Task, String, String)> { 70 | progress.init(None, None); 71 | let DownloadRequest { 72 | output_file_path, 73 | progress_name, 74 | task_key, 75 | crate_name_and_version, 76 | kind, 77 | url, 78 | } = request; 79 | let dummy_task = default_persisted_download_task(); 80 | let progress_name = format!("↓ {}", progress_name); 81 | 82 | let task_result = model::TaskResult::Download { 83 | kind: kind.to_owned(), 84 | url: String::new(), 85 | content_length: 0, 86 | content_type: None, 87 | }; 88 | 89 | self.next_action_state = (self.make_state)(crate_name_and_version.clone(), &dummy_task, &output_file_path); 90 | self.state = Some(ProcessingState { 91 | url, 92 | kind, 93 | output_file_path, 94 | result_key: crate_name_and_version.as_ref().map(|(crate_name, crate_version)| { 95 | let mut result_key = String::with_capacity(task_key.len() * 2); 96 | task_result.fq_key(crate_name, crate_version, &dummy_task, &mut result_key); 97 | result_key 98 | }), 99 | }); 100 | Ok((dummy_task, task_key, progress_name)) 101 | } 102 | 103 | fn idle_message(&self) -> String { 104 | "↓ IDLE".into() 105 | } 106 | 107 | async fn process(&mut self, progress: &mut prodash::tree::Item) -> std::result::Result<(), (Error, String)> { 108 | let ProcessingState { 109 | url, 110 | kind, 111 | output_file_path, 112 | result_key, 113 | } = self.state.take().expect("initialized state"); 114 | download_file_and_store_result( 115 | progress, 116 | result_key, 117 | &self.results, 118 | &self.client, 119 | kind, 120 | &url, 121 | output_file_path, 122 | ) 123 | .await 124 | .map_err(|err| (err, format!("Failed to download '{}'", url))) 125 | } 126 | 127 | async fn schedule_next(&mut self, progress: &mut prodash::tree::Item) -> Result<()> { 128 | if let Some(request) = self.next_action_state.take() { 129 | progress.blocked("schedule crate extraction", None); 130 | // Here we risk doing this work twice, but most of the time, we don't. And since it's fast, 131 | // we take the risk of duplicate work for keeping more processors busy. 132 | // NOTE: We assume there is no risk of double-scheduling, also we assume the consumer is faster 133 | // then the producer (us), so we are ok with blocking until the task is scheduled. 134 | self.channel 135 | .send(request) 136 | .await 137 | .map_err(Error::send_msg("IO Bound: Schedule next task"))?; 138 | } 139 | Ok(()) 140 | } 141 | } 142 | 143 | #[derive(Clone)] 144 | pub struct DownloadRequest { 145 | pub output_file_path: PathBuf, 146 | pub progress_name: String, 147 | pub task_key: String, 148 | pub crate_name_and_version: Option<(String, String)>, 149 | pub kind: &'static str, 150 | pub url: String, 151 | } 152 | 153 | pub fn default_persisted_download_task() -> model::Task { 154 | const TASK_NAME: &str = "download"; 155 | const TASK_VERSION: &str = "1.0.0"; 156 | model::Task { 157 | stored_at: SystemTime::now(), 158 | process: TASK_NAME.into(), 159 | version: TASK_VERSION.into(), 160 | state: Default::default(), 161 | } 162 | } 163 | 164 | async fn download_file_and_store_result( 165 | progress: &mut prodash::tree::Item, 166 | result_key: Option, 167 | results: &persistence::TaskResultTable, 168 | client: &reqwest::Client, 169 | kind: &str, 170 | url: &str, 171 | out_file: PathBuf, 172 | ) -> Result<()> { 173 | blocking::unblock({ 174 | let out_file = out_file.clone(); 175 | move || std::fs::create_dir_all(&out_file.parent().expect("parent directory")) 176 | }) 177 | .await?; 178 | 179 | // NOTE: We assume that the files we download never change, and we assume the server supports resumption! 180 | let (start_byte, truncate) = blocking::unblock({ 181 | let out_file = out_file.clone(); 182 | move || std::fs::metadata(&out_file) 183 | }) 184 | .await 185 | .map(|meta| (meta.len(), false)) 186 | .unwrap_or((0, true)); 187 | 188 | progress.blocked("fetch HEAD", None); 189 | let mut response = timeout_after( 190 | CONNECT_AND_FETCH_HEAD_TIMEOUT, 191 | "fetching HEAD", 192 | client 193 | .get(url) 194 | .header(http::header::RANGE, format!("bytes={}-", start_byte)) 195 | .send(), 196 | ) 197 | .await??; 198 | 199 | match response.status().as_u16() { 200 | 200..=299 => {} 201 | 416 => { 202 | // we assume that this means we have fully downloaded the item previously, and that the DB result was written already 203 | // but not checked 204 | progress.running(); 205 | progress.done(format!( 206 | "GET{}:{}: body-size = {}", 207 | if start_byte != 0 { 208 | "(resumed, already completed)" 209 | } else { 210 | "" 211 | }, 212 | url, 213 | ByteSize(start_byte as u64) 214 | )); 215 | return Ok(()); 216 | } 217 | _ => return Err(Error::HttpStatus(response.status())), 218 | }; 219 | 220 | let remaining_content_length = response 221 | .content_length() 222 | .ok_or(Error::InvalidHeader("expected content-length"))?; 223 | 224 | let content_length = (start_byte + remaining_content_length) as usize; 225 | progress.init(Some(content_length / 1024), Some("Kb".into())); 226 | progress.done(format!( 227 | "HEAD{}:{}: content-length = {}", 228 | if start_byte != 0 { "(resumable)" } else { "" }, 229 | url, 230 | ByteSize(content_length as u64) 231 | )); 232 | 233 | if remaining_content_length != 0 { 234 | let mut out = blocking::Unblock::new( 235 | blocking::unblock({ 236 | let out_file = out_file.clone(); 237 | move || { 238 | std::fs::OpenOptions::new() 239 | .create(truncate) 240 | .truncate(truncate) 241 | .write(truncate) 242 | .append(!truncate) 243 | .open(out_file) 244 | } 245 | }) 246 | .await 247 | .map_err(|err| crate::Error::Message(format!("Failed to open '{}': {}", out_file.display(), err)))?, 248 | ); 249 | 250 | let mut bytes_received = start_byte as usize; 251 | while let Some(chunk) = timeout_after( 252 | FETCH_CHUNK_TIMEOUT_SECONDS, 253 | format!( 254 | "fetched {} of {}", 255 | ByteSize(bytes_received as u64), 256 | ByteSize(content_length as u64) 257 | ), 258 | response.chunk().boxed(), 259 | ) 260 | .await?? 261 | { 262 | out.write_all(&chunk).await?; 263 | bytes_received += chunk.len(); 264 | progress.set(bytes_received / 1024); 265 | } 266 | progress.done(format!( 267 | "GET{}:{}: body-size = {}", 268 | if start_byte != 0 { "(resumed)" } else { "" }, 269 | url, 270 | ByteSize(bytes_received as u64) 271 | )); 272 | out.flush().await?; 273 | } else { 274 | progress.done(format!("{} already on disk - skipping", url)) 275 | } 276 | 277 | if let Some(result_key) = result_key { 278 | let task_result = model::TaskResult::Download { 279 | kind: kind.to_owned(), 280 | url: url.to_owned(), 281 | content_length: content_length as u32, 282 | content_type: response 283 | .headers() 284 | .get(http::header::CONTENT_TYPE) 285 | .and_then(|t| t.to_str().ok()) 286 | .map(Into::into), 287 | }; 288 | results.insert(progress, &result_key, &task_result)?; 289 | } 290 | Ok(()) 291 | } 292 | -------------------------------------------------------------------------------- /criner/src/engine/work/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod generic; 2 | pub mod iobound; 3 | pub mod schedule; 4 | 5 | pub mod cpubound; 6 | -------------------------------------------------------------------------------- /criner/src/engine/work/schedule.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | engine::{work::cpubound, work::iobound}, 3 | error::Result, 4 | model, persistence, 5 | persistence::{TableAccess, TaskTable}, 6 | }; 7 | use std::{ 8 | path::{Path, PathBuf}, 9 | time::SystemTime, 10 | }; 11 | 12 | const MAX_ATTEMPTS_BEFORE_WE_GIVE_UP: usize = 8; 13 | 14 | #[derive(Clone, Copy)] 15 | pub enum Scheduling { 16 | // /// Considers work done if everything was done. Will block to assure that 17 | // All, 18 | /// Considers the work done if at least one task was scheduled. Will block to wait otherwise. 19 | AtLeastOne, 20 | // /// Prefer to never wait for workers to perform a task and instead return without having scheduled anything 21 | // NeverBlock, 22 | } 23 | 24 | pub enum AsyncResult { 25 | // /// The required scheduling cannot be fulfilled without blocking 26 | // WouldBlock, 27 | /// The minimal scheduling requirement was met 28 | Done, 29 | } 30 | 31 | #[allow(clippy::too_many_arguments)] 32 | pub async fn tasks( 33 | assets_dir: &Path, 34 | tasks: &persistence::TaskTable, 35 | krate: &model::CrateVersion, 36 | progress: &mut prodash::tree::Item, 37 | _mode: Scheduling, 38 | perform_io: &async_channel::Sender, 39 | perform_cpu: &async_channel::Sender, 40 | startup_time: SystemTime, 41 | ) -> Result { 42 | use SubmitResult::*; 43 | let mut key_buf = String::with_capacity(32); 44 | let io_task = task_or_default(tasks, &mut key_buf, krate, iobound::default_persisted_download_task)?; 45 | 46 | let kind = "crate"; 47 | let submit_result = submit_single(startup_time, io_task, progress, perform_io, 1, 1, || { 48 | let dummy_task = iobound::default_persisted_download_task(); 49 | let mut task_key = String::new(); 50 | dummy_task.fq_key(&krate.name, &krate.version, &mut task_key); 51 | 52 | iobound::DownloadRequest { 53 | output_file_path: download_file_path( 54 | assets_dir, 55 | &krate.name, 56 | &krate.version, 57 | &dummy_task.process, 58 | &dummy_task.version, 59 | kind, 60 | ), 61 | progress_name: format!("{}:{}", krate.name, krate.version), 62 | task_key, 63 | crate_name_and_version: Some((krate.name.clone(), krate.version.clone())), 64 | kind, 65 | url: format!( 66 | "https://static.crates.io/crates/{name}/{name}-{version}.crate", 67 | name = krate.name, 68 | version = krate.version 69 | ), 70 | } 71 | }) 72 | .await; 73 | 74 | Ok(match submit_result { 75 | PermanentFailure | Submitted => AsyncResult::Done, 76 | Done(download_crate_task) => { 77 | let cpu_task = task_or_default(tasks, &mut key_buf, krate, cpubound::default_persisted_extraction_task)?; 78 | submit_single(startup_time, cpu_task, progress, perform_cpu, 2, 2, || { 79 | cpubound::ExtractRequest { 80 | download_task: download_crate_task, 81 | crate_name: krate.name.clone(), 82 | crate_version: krate.version.clone(), 83 | } 84 | }) 85 | .await; 86 | AsyncResult::Done 87 | } 88 | }) 89 | } 90 | 91 | fn task_or_default( 92 | tasks: &TaskTable, 93 | key_buf: &mut String, 94 | crate_version: &model::CrateVersion, 95 | make_task: impl FnOnce() -> model::Task, 96 | ) -> Result { 97 | let task = make_task(); 98 | key_buf.clear(); 99 | task.fq_key(&crate_version.name, &crate_version.version, key_buf); 100 | Ok(tasks.get(key_buf)?.unwrap_or(task)) 101 | } 102 | 103 | enum SubmitResult { 104 | Submitted, 105 | Done(model::Task), 106 | PermanentFailure, 107 | } 108 | 109 | async fn submit_single( 110 | startup_time: SystemTime, 111 | task: model::Task, 112 | progress: &mut prodash::tree::Item, 113 | channel: &async_channel::Sender, 114 | step: usize, 115 | max_step: usize, 116 | f: impl FnOnce() -> R, 117 | ) -> SubmitResult { 118 | use model::TaskState::*; 119 | use SubmitResult::*; 120 | let configure = || { 121 | progress.init(Some(step), Some("task".into())); 122 | progress.set(max_step); 123 | progress.blocked("wait for consumer", None); 124 | }; 125 | match task.state { 126 | InProgress(_) => { 127 | if startup_time > task.stored_at { 128 | configure(); 129 | channel.send(f()).await.unwrap(); 130 | }; 131 | Submitted 132 | } 133 | NotStarted => { 134 | configure(); 135 | channel.send(f()).await.unwrap(); 136 | Submitted 137 | } 138 | AttemptsWithFailure(ref v) if v.len() < MAX_ATTEMPTS_BEFORE_WE_GIVE_UP => { 139 | configure(); 140 | progress.info(format!("Retrying task, attempt {}", v.len() + 1)); 141 | channel.send(f()).await.unwrap(); 142 | Submitted 143 | } 144 | AttemptsWithFailure(_) => PermanentFailure, 145 | Complete => Done(task), 146 | } 147 | } 148 | 149 | fn crate_dir(assets_dir: &Path, crate_name: &str) -> PathBuf { 150 | // we can safely assume ascii here - otherwise we panic 151 | let crate_path = match crate_name.len() { 152 | 1 => Path::new("1").join(crate_name), 153 | 2 => Path::new("2").join(crate_name), 154 | 3 => Path::new("3").join(&crate_name[..1]).join(&crate_name[1..]), 155 | _ => Path::new(&crate_name[..2]).join(&crate_name[2..4]).join(crate_name), 156 | }; 157 | assets_dir.join(crate_path) 158 | } 159 | 160 | pub fn download_file_path( 161 | assets_dir: &Path, 162 | crate_name: &str, 163 | crate_version: &str, 164 | process: &str, 165 | version: &str, 166 | kind: &str, 167 | ) -> PathBuf { 168 | crate_dir(assets_dir, crate_name).join(format!( 169 | "{crate_version}-{process}{sep}{version}.{kind}", 170 | process = process, 171 | sep = crate::persistence::KEY_SEP_CHAR, 172 | version = version, 173 | kind = kind, 174 | crate_version = crate_version 175 | )) 176 | } 177 | -------------------------------------------------------------------------------- /criner/src/error.rs: -------------------------------------------------------------------------------- 1 | use std::{fmt, time}; 2 | 3 | #[derive(Debug)] 4 | pub struct FormatDeadline(pub time::SystemTime); 5 | 6 | impl fmt::Display for FormatDeadline { 7 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> std::result::Result<(), fmt::Error> { 8 | let now = time::SystemTime::now(); 9 | write!( 10 | f, 11 | "{} ago at {}", 12 | humantime::format_duration(now.duration_since(self.0).unwrap_or_default()), 13 | humantime::format_rfc3339(now) 14 | ) 15 | } 16 | } 17 | 18 | pub type Result = std::result::Result; 19 | 20 | quick_error! { 21 | #[derive(Debug)] 22 | pub enum Error { 23 | Bug(d: &'static str) { 24 | display("{}", d) 25 | } 26 | Message(d: String) { 27 | display("{}", d) 28 | } 29 | InvalidHeader(d: &'static str) { 30 | display("{}", d) 31 | } 32 | HttpStatus(status: http::StatusCode) { 33 | display("{}", status) 34 | } 35 | DeadlineExceeded(d: FormatDeadline) { 36 | display("Stopped computation as deadline was reached {}.", d) 37 | } 38 | Interrupted { 39 | display("Interrupt or termination signal received") 40 | } 41 | Timeout(d: std::time::Duration, msg: String) { 42 | display("{} - timeout after {:?}.", msg, d) 43 | } 44 | RmpSerdeEncode(err: rmp_serde::encode::Error) { 45 | from() 46 | source(err) 47 | } 48 | Git2(err: git2::Error) { 49 | from() 50 | source(err) 51 | } 52 | IndexDiffInit(err: crates_index_diff::index::init::Error) { 53 | from() 54 | source(err) 55 | } 56 | IndexDiffChanges(err: crates_index_diff::index::diff::Error) { 57 | from() 58 | source(err) 59 | } 60 | Io(err: std::io::Error) { 61 | from() 62 | source(err) 63 | } 64 | FromUtf8(err: std::string::FromUtf8Error) { 65 | from() 66 | source(err) 67 | } 68 | Reqwest(err: reqwest::Error) { 69 | from() 70 | source(err) 71 | } 72 | ParseInt(err: std::num::ParseIntError) { 73 | from() 74 | source(err) 75 | } 76 | Rusqlite(err: rusqlite::Error) { 77 | from() 78 | source(err) 79 | } 80 | GlobSet(err: globset::Error) { 81 | from() 82 | source(err) 83 | } 84 | Horrorshow(err: horrorshow::Error) { 85 | from() 86 | source(err) 87 | } 88 | SystemTime(err: std::time::SystemTimeError) { 89 | from() 90 | source(err) 91 | } 92 | StripPrefixError(err: std::path::StripPrefixError) { 93 | from() 94 | source(err) 95 | } 96 | Csv(err: csv::Error) { 97 | from() 98 | source(err) 99 | } 100 | GlobPattern(err: glob::PatternError) { 101 | from() 102 | source(err) 103 | } 104 | Glob(err: glob::GlobError) { 105 | from() 106 | source(err) 107 | } 108 | ChannelSendMessage(msg: &'static str) { 109 | display("{}: Sending into a closed channel", msg) 110 | } 111 | } 112 | } 113 | 114 | impl Error { 115 | pub fn send_msg(msg: &'static str) -> impl FnOnce(async_channel::SendError) -> Error { 116 | move |_err| Error::ChannelSendMessage(msg) 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /criner/src/export/mod.rs: -------------------------------------------------------------------------------- 1 | mod run; 2 | mod to_sql; 3 | 4 | pub use run::run_blocking; 5 | -------------------------------------------------------------------------------- /criner/src/export/run.rs: -------------------------------------------------------------------------------- 1 | use super::to_sql::SqlConvert; 2 | use crate::model; 3 | use rusqlite::Connection; 4 | use std::path::Path; 5 | 6 | pub fn run_blocking(source_db: impl AsRef, destination_db: impl AsRef) -> crate::Result<()> { 7 | if destination_db.as_ref().is_file() { 8 | return Err(crate::Error::Message(format!( 9 | "Destination database at '{}' does already exist - this is currently unsupported", 10 | destination_db.as_ref().display() 11 | ))); 12 | } 13 | let mut input = Connection::open(source_db)?; 14 | let mut output = Connection::open(destination_db)?; 15 | 16 | // Turn off keychecks during insertion - we assume we can't get it wrong 17 | // However, we do embed foreign key relations as form of documentation. 18 | output.execute_batch( 19 | " 20 | PRAGMA foreign_keys = FALSE; -- assume we don't mess up relations, save validation time 21 | PRAGMA journal_mode = 'OFF' -- no journal, direct writes 22 | ", 23 | )?; 24 | 25 | transfer::(&mut input, &mut output)?; 26 | transfer::(&mut input, &mut output)?; 27 | transfer::(&mut input, &mut output)?; 28 | transfer::(&mut input, &mut output)?; 29 | transfer::(&mut input, &mut output)?; 30 | transfer::(&mut input, &mut output)?; 31 | 32 | Ok(()) 33 | } 34 | 35 | fn transfer(input: &mut Connection, output: &mut Connection) -> crate::Result<()> 36 | where 37 | for<'a> T: SqlConvert + From<&'a [u8]>, 38 | { 39 | output.execute_batch(T::init_table_statement())?; 40 | let mut istm = input.prepare(&format!("SELECT key, data FROM '{}'", T::source_table_name()))?; 41 | let transaction = output.transaction()?; 42 | let mut count = 0; 43 | let start = std::time::SystemTime::now(); 44 | { 45 | if let Some(res) = T::convert_to_sql(&mut istm, &transaction) { 46 | count = res?; 47 | } else { 48 | let mut ostm = transaction.prepare(T::replace_statement())?; 49 | let mut secondary_ostm = match T::secondary_replace_statement() { 50 | Some(s) => Some(transaction.prepare(s)?), 51 | None => None, 52 | }; 53 | for (uid, res) in istm 54 | .query_map([], |r| { 55 | let key: String = r.get(0)?; 56 | let value: Vec = r.get(1)?; 57 | Ok((key, value)) 58 | })? 59 | .enumerate() 60 | { 61 | count += 1; 62 | let (key, value) = res?; 63 | let value = T::from(value.as_slice()); 64 | value.insert(&key, uid as i32, &mut ostm, secondary_ostm.as_mut())?; 65 | } 66 | } 67 | } 68 | transaction.commit()?; 69 | log::info!( 70 | "Inserted {} {} in {:?}", 71 | count, 72 | T::source_table_name(), 73 | std::time::SystemTime::now().duration_since(start).unwrap() 74 | ); 75 | 76 | Ok(()) 77 | } 78 | -------------------------------------------------------------------------------- /criner/src/export/to_sql/dbdump_crate.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | export::to_sql::{to_seconds_since_epoch, SqlConvert}, 3 | model, 4 | }; 5 | use rusqlite::{params, Statement}; 6 | 7 | impl SqlConvert for model::db_dump::Crate { 8 | fn replace_statement() -> &'static str { 9 | "will not be called" 10 | } 11 | fn source_table_name() -> &'static str { 12 | "crates.io-crate" 13 | } 14 | fn init_table_statement() -> &'static str { 15 | " 16 | BEGIN; 17 | CREATE TABLE 'crates.io-crate_version' ( 18 | parent_id INTEGER NOT NULL, 19 | crate_name TEXT NOT NULL, 20 | semver TEXT NOT NULL, 21 | created_at TIMESTAMP NOT NULL, 22 | updated_at TIMESTAMP NOT NULL, 23 | downloads INTEGER NOT NULL, 24 | features JSON NOT NULL, -- Array of Feature objects 25 | license TEXT NOT NULL, 26 | crate_size INTEGER, 27 | published_by INTEGER, -- Github user id as index into crates.io-actor table 28 | is_yanked INTEGER NOT NULL, -- is 1 if this version is yanked 29 | FOREIGN KEY (parent_id) REFERENCES 'crates.io-crate'(_row_id_) 30 | ); 31 | CREATE TABLE 'crates.io-actor' ( 32 | crates_io_id INTEGER NOT NULL, -- these IDs are not unique, so we can't use it as unique id 33 | kind TEXT NOT NULL, 34 | github_id INTEGER NOT NULL, -- This is a unique id across teams and users 35 | github_avatar_url TEXT NOT NULL, 36 | github_login TEXT NOT NULL, 37 | name TEXT, 38 | PRIMARY KEY (github_id) 39 | ); 40 | CREATE TABLE 'crates.io-crate' ( 41 | name TEXT NOT NULL, 42 | stored_at TIMESTAMP NOT NULL, 43 | created_at TIMESTAMP NOT NULL, 44 | updated_at TIMESTAMP NOT NULL, 45 | description TEXT, 46 | documentation TEXT, 47 | downloads INTEGER NOT NULL, 48 | homepage TEXT, 49 | readme TEXT, 50 | repository TEXT, 51 | created_by INTEGER, -- Github user id as index into crates.io-actor table 52 | owners JSON NOT NULL, -- Array of github user ids for indexing into the crates.io-actor table 53 | keywords JSON NOT NULL, -- Array of strings, each string being a keyword 54 | categories JSON NOT NULL, -- Array of category objects, providing a wealth of information for each 55 | PRIMARY KEY (name), 56 | FOREIGN KEY (created_by) REFERENCES actor(github_id) 57 | ); 58 | COMMIT; 59 | " 60 | } 61 | 62 | fn convert_to_sql( 63 | input_statement: &mut rusqlite::Statement, 64 | transaction: &rusqlite::Transaction, 65 | ) -> Option> { 66 | Some(do_it(input_statement, transaction)) 67 | } 68 | 69 | fn insert( 70 | &self, 71 | _key: &str, 72 | _uid: i32, 73 | _stm: &mut Statement<'_>, 74 | _sstm: Option<&mut rusqlite::Statement<'_>>, 75 | ) -> crate::Result { 76 | unimplemented!("we implement convert_to_sql instead (having our own loop and unlimited prepared statements") 77 | } 78 | } 79 | 80 | fn do_it(input_statement: &mut rusqlite::Statement, transaction: &rusqlite::Transaction) -> crate::Result { 81 | let mut insert_crate = transaction 82 | .prepare(" 83 | REPLACE INTO 'crates.io-crate' 84 | (name, stored_at, created_at, updated_at, description, documentation, downloads, homepage, readme, repository, created_by, owners, keywords, categories) 85 | VALUES (?1 , ?2 , ?3 , ?4 , ?5 , ?6 , ?7 , ?8 , ?9 , ?10 , ?11 , ?12 , ?13 , ?14); 86 | ",) 87 | .unwrap(); 88 | let mut insert_actor = transaction 89 | .prepare( 90 | " 91 | INSERT OR IGNORE INTO 'crates.io-actor' 92 | (crates_io_id, kind, github_id, github_avatar_url, github_login, name) 93 | VALUES (?1 , ?2 , ?3 , ?4 , ?5 , ?6 ); 94 | ", 95 | ) 96 | .unwrap(); 97 | 98 | let mut insert_crate_version = transaction 99 | .prepare( 100 | " 101 | INSERT OR IGNORE INTO 'crates.io-crate_version' 102 | (parent_id, crate_name, semver, created_at, updated_at, downloads, features, license, crate_size, published_by, is_yanked) 103 | VALUES (?1 , ?2 , ?3 , ?4 , ?5 , ?6 , ?7 , ?8 , ?9 , ?10 , , ?11); 104 | ", 105 | ) 106 | .unwrap(); 107 | 108 | let mut count = 0; 109 | for res in input_statement.query_map([], |r| { 110 | let key: String = r.get(0)?; 111 | let value: Vec = r.get(1)?; 112 | Ok((key, value)) 113 | })? { 114 | let (_crate_name, bytes) = res?; 115 | let model::db_dump::Crate { 116 | name, 117 | stored_at, 118 | created_at, 119 | updated_at, 120 | description, 121 | documentation, 122 | downloads, 123 | homepage, 124 | readme, 125 | repository, 126 | versions, 127 | keywords, 128 | categories, 129 | created_by, 130 | owners, 131 | } = bytes.as_slice().into(); 132 | 133 | if let Some(actor) = created_by.as_ref() { 134 | insert_actor_to_db(&mut insert_actor, actor)?; 135 | } 136 | 137 | for owner in owners.iter() { 138 | insert_actor_to_db(&mut insert_actor, owner)?; 139 | } 140 | 141 | count += insert_crate.execute(params![ 142 | name, 143 | to_seconds_since_epoch(stored_at), 144 | to_seconds_since_epoch(created_at), 145 | to_seconds_since_epoch(updated_at), 146 | description, 147 | documentation, 148 | downloads as i64, 149 | homepage, 150 | readme, 151 | repository, 152 | created_by.map(|actor| actor.github_id), 153 | serde_json::to_string_pretty(&owners.iter().map(|actor| actor.github_id).collect::>()).unwrap(), 154 | serde_json::to_string_pretty(&keywords).unwrap(), 155 | serde_json::to_string_pretty(&categories).unwrap(), 156 | ])?; 157 | 158 | for version in versions { 159 | let model::db_dump::CrateVersion { 160 | crate_size, 161 | created_at, 162 | updated_at, 163 | downloads, 164 | features, 165 | license, 166 | semver, 167 | published_by, 168 | is_yanked, 169 | } = version; 170 | insert_crate_version.execute(params![ 171 | count as i32, 172 | name, 173 | semver, 174 | to_seconds_since_epoch(created_at), 175 | to_seconds_since_epoch(updated_at), 176 | downloads as i64, 177 | serde_json::to_string_pretty(&features).unwrap(), 178 | license, 179 | crate_size, 180 | published_by.map(|a| a.github_id), 181 | is_yanked 182 | ])?; 183 | } 184 | } 185 | Ok(count) 186 | } 187 | 188 | fn insert_actor_to_db(insert_actor: &mut Statement, actor: &model::db_dump::Actor) -> rusqlite::Result { 189 | insert_actor.execute(params![ 190 | actor.crates_io_id, 191 | match actor.kind { 192 | model::db_dump::ActorKind::User => "user", 193 | model::db_dump::ActorKind::Team => "team", 194 | }, 195 | actor.github_id, 196 | actor.github_avatar_url, 197 | actor.github_login, 198 | actor.name 199 | ]) 200 | } 201 | -------------------------------------------------------------------------------- /criner/src/export/to_sql/krate.rs: -------------------------------------------------------------------------------- 1 | use crate::{export::to_sql::SqlConvert, model}; 2 | use rusqlite::{params, Statement}; 3 | 4 | impl SqlConvert for model::Crate { 5 | fn replace_statement() -> &'static str { 6 | "REPLACE INTO crate 7 | (name, version) 8 | VALUES (?1, ?2)" 9 | } 10 | fn source_table_name() -> &'static str { 11 | "crate" 12 | } 13 | fn init_table_statement() -> &'static str { 14 | "CREATE TABLE crate ( 15 | name TEXT NOT NULL, 16 | version TEXT NOT NULL, 17 | PRIMARY KEY (name, version) 18 | )" 19 | } 20 | 21 | fn insert( 22 | &self, 23 | key: &str, 24 | _uid: i32, 25 | stm: &mut Statement<'_>, 26 | _sstm: Option<&mut rusqlite::Statement<'_>>, 27 | ) -> crate::Result { 28 | let mut tokens = key.split(crate::persistence::KEY_SEP_CHAR); 29 | let name = tokens.next().unwrap(); 30 | assert!(tokens.next().is_none()); 31 | 32 | let Self { versions } = self; 33 | for version in versions.iter() { 34 | stm.execute(params![name, version])?; 35 | } 36 | Ok(versions.len()) 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /criner/src/export/to_sql/krate_version.rs: -------------------------------------------------------------------------------- 1 | use crate::{export::to_sql::SqlConvert, model}; 2 | use rusqlite::{params, Statement}; 3 | 4 | impl SqlConvert for model::CrateVersion { 5 | fn replace_statement() -> &'static str { 6 | "REPLACE INTO crate_version 7 | (id, name, version, kind, checksum, features) 8 | VALUES (?1, ?2 , ?3 , ?4 , ?5 , ?6); 9 | " 10 | } 11 | 12 | fn secondary_replace_statement() -> Option<&'static str> { 13 | Some( 14 | "REPLACE INTO crate_version_dependency 15 | (parent_id, name, required_version, features, optional, default_features, target, kind, package) 16 | VALUES (?1 , ?2 , ?3 , ?4 , ?5 , ?6 , ?7 , ?8 , ?9);", 17 | ) 18 | } 19 | 20 | fn source_table_name() -> &'static str { 21 | "crate_version" 22 | } 23 | 24 | fn init_table_statement() -> &'static str { 25 | "CREATE TABLE crate_version ( 26 | id INTEGER UNIQUE NOT NULL, 27 | name TEXT NOT NULL, 28 | version TEXT NOT NULL, 29 | kind TEXT NOT NULL, 30 | checksum TEXT NOT NULL, 31 | features JSON NOT NULL, 32 | PRIMARY KEY (name, version) 33 | ); 34 | CREATE TABLE crate_version_dependency ( 35 | parent_id INTEGER NOT NULL, 36 | name TEXT NOT NULL, 37 | required_version TEXT NOT NULL, 38 | features JSON NOT NULL, 39 | optional INTEGER NOT NULL, -- BOOL 40 | default_features INTEGER NOT NULL, -- BOOL 41 | target TEXT, 42 | kind TEXT, 43 | package TEXT, 44 | FOREIGN KEY (parent_id) REFERENCES crate_version(id) 45 | ); 46 | " 47 | } 48 | 49 | fn insert( 50 | &self, 51 | _key: &str, 52 | uid: i32, 53 | stm: &mut Statement<'_>, 54 | sstm: Option<&mut Statement<'_>>, 55 | ) -> crate::Result { 56 | let model::CrateVersion { 57 | name, 58 | kind, 59 | version, 60 | checksum, 61 | features, 62 | dependencies, 63 | } = self; 64 | 65 | use crate::model::ChangeKind::*; 66 | stm.execute(params![ 67 | uid, 68 | name, 69 | version, 70 | match kind { 71 | Added => "added", 72 | Yanked => "yanked", 73 | }, 74 | checksum, 75 | serde_json::to_string_pretty(features).unwrap() 76 | ])?; 77 | 78 | let sstm = sstm.expect("secondary statement to be set"); 79 | for dep in dependencies { 80 | let model::Dependency { 81 | name, 82 | required_version, 83 | features, 84 | optional, 85 | default_features, 86 | target, 87 | kind, 88 | package, 89 | } = dep; 90 | sstm.execute(params![ 91 | uid, 92 | name, 93 | required_version, 94 | serde_json::to_string_pretty(features).unwrap(), 95 | optional, 96 | default_features, 97 | target, 98 | kind, 99 | package 100 | ])?; 101 | } 102 | Ok(1) 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /criner/src/export/to_sql/meta.rs: -------------------------------------------------------------------------------- 1 | use crate::export::to_sql::SqlConvert; 2 | use crate::model; 3 | use rusqlite::{params, Statement}; 4 | 5 | impl SqlConvert for model::Context { 6 | fn replace_statement() -> &'static str { 7 | "INSERT INTO runtime_statistic 8 | (sample_day, num_new_crate_versions, num_new_crates, dur_s_fetch_new_crate_versions) 9 | VALUES (?1 , ?2 , ?3 , ?4); 10 | " 11 | } 12 | 13 | fn source_table_name() -> &'static str { 14 | "meta" 15 | } 16 | 17 | fn init_table_statement() -> &'static str { 18 | "CREATE TABLE runtime_statistic ( 19 | sample_day TIMESTAMP NOT NULL, 20 | num_new_crate_versions INTEGER NOT NULL, 21 | num_new_crates INTEGER NOT NULL, 22 | dur_s_fetch_new_crate_versions INTEGER NOT NULL, 23 | PRIMARY KEY (sample_day) 24 | ); 25 | " 26 | } 27 | 28 | fn insert( 29 | &self, 30 | key: &str, 31 | _uid: i32, 32 | stm: &mut Statement<'_>, 33 | _sstm: Option<&mut Statement<'_>>, 34 | ) -> crate::Result { 35 | let mut tokens = key.split('/').skip(1); 36 | let day_date = tokens.next().unwrap(); 37 | assert!(tokens.next().is_none()); 38 | assert_eq!(day_date.len(), 10); 39 | let day_date = humantime::parse_rfc3339(&format!("{}T00:00:00Z", day_date)).unwrap(); 40 | let date_stamp = day_date.duration_since(std::time::UNIX_EPOCH).unwrap(); 41 | 42 | let model::Context { 43 | counts: model::Counts { crate_versions, crates }, 44 | durations: model::Durations { fetch_crate_versions }, 45 | } = self; 46 | 47 | stm.execute(params![ 48 | date_stamp.as_secs() as i64, 49 | *crate_versions as i64, 50 | *crates as i64, 51 | fetch_crate_versions.as_secs() as i64 52 | ]) 53 | .map_err(Into::into) 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /criner/src/export/to_sql/mod.rs: -------------------------------------------------------------------------------- 1 | mod dbdump_crate; 2 | mod krate; 3 | mod krate_version; 4 | mod meta; 5 | mod result; 6 | mod task; 7 | 8 | pub fn to_seconds_since_epoch(time: std::time::SystemTime) -> i64 { 9 | time.duration_since(std::time::UNIX_EPOCH).unwrap().as_secs() as i64 10 | } 11 | 12 | pub trait SqlConvert { 13 | fn convert_to_sql( 14 | _input_statement: &mut rusqlite::Statement, 15 | _transaction: &rusqlite::Transaction, 16 | ) -> Option> { 17 | None 18 | } 19 | fn replace_statement() -> &'static str; 20 | fn secondary_replace_statement() -> Option<&'static str> { 21 | None 22 | } 23 | fn source_table_name() -> &'static str; 24 | fn init_table_statement() -> &'static str; 25 | fn insert( 26 | &self, 27 | key: &str, 28 | uid: i32, 29 | stm: &mut rusqlite::Statement, 30 | sstm: Option<&mut rusqlite::Statement>, 31 | ) -> crate::Result; 32 | } 33 | -------------------------------------------------------------------------------- /criner/src/export/to_sql/result.rs: -------------------------------------------------------------------------------- 1 | use crate::export::to_sql::SqlConvert; 2 | use crate::model; 3 | use rusqlite::{params, Statement}; 4 | 5 | impl SqlConvert for model::TaskResult { 6 | fn convert_to_sql( 7 | istm: &mut rusqlite::Statement, 8 | transaction: &rusqlite::Transaction, 9 | ) -> Option> { 10 | let res = (|| { 11 | let mut num_downloads = 0; 12 | let mut num_extract_crates = 0; 13 | let mut num_crate_entries = 0; 14 | let mut insert_download = transaction 15 | .prepare( 16 | " 17 | REPLACE INTO result_download 18 | (crate_name, crate_version, version, kind, url, content_length, content_type) 19 | VALUES (?1 , ?2 , ?3 , ?4 , ?5 , ?6 , ?7); 20 | ", 21 | ) 22 | .unwrap(); 23 | let mut insert_extract_crate = transaction 24 | .prepare( 25 | " 26 | REPLACE INTO result_extract_crate 27 | (id, crate_name, crate_version, version, num_crate_entries) 28 | VALUES (?1, ?2 , ?3 , ?4 , ?5); 29 | ", 30 | ) 31 | .unwrap(); 32 | 33 | let mut insert_crate_entry = transaction 34 | .prepare( 35 | " 36 | REPLACE INTO crate_entry 37 | (parent_id, path, size, entry_type, data) 38 | VALUES (?1 , ?2 , ?3 , ?4 , ?5); 39 | ", 40 | ) 41 | .unwrap(); 42 | 43 | for res in istm.query_map([], |r| { 44 | let key: String = r.get(0)?; 45 | let value: Vec = r.get(1)?; 46 | Ok((key, value)) 47 | })? { 48 | let (key, value) = res?; 49 | let mut tokens = key.split(crate::persistence::KEY_SEP_CHAR); 50 | let crate_name = tokens.next().unwrap(); 51 | let crate_version = tokens.next().unwrap(); 52 | let process = tokens.next().unwrap(); 53 | let process_version = tokens.next().unwrap(); 54 | let optional_last_key = tokens.next(); 55 | assert!(tokens.next().is_none()); 56 | 57 | let value = Self::from(value.as_slice()); 58 | 59 | use model::TaskResult; 60 | match value { 61 | TaskResult::Download { 62 | kind, 63 | url, 64 | content_length, 65 | content_type, 66 | } => { 67 | assert_eq!(process, "download"); 68 | assert_eq!(Some(kind.as_ref()), optional_last_key); 69 | insert_download.execute(params![ 70 | crate_name, 71 | crate_version, 72 | process_version, 73 | kind, 74 | url, 75 | content_length, 76 | content_type 77 | ])?; 78 | num_downloads += 1; 79 | } 80 | TaskResult::ExplodedCrate { 81 | entries_meta_data, 82 | selected_entries, 83 | } => { 84 | assert_eq!(process, "extract_crate"); 85 | let id = num_extract_crates as i32; 86 | insert_extract_crate.execute(params![ 87 | id, 88 | crate_name, 89 | crate_version, 90 | process_version, 91 | entries_meta_data.len() as i64 92 | ])?; 93 | for entry in entries_meta_data.iter() { 94 | let model::TarHeader { path, size, entry_type } = entry; 95 | insert_crate_entry.execute(params![ 96 | id, 97 | std::str::from_utf8(path).expect("utf8 path in crate - lets see how long this is true"), 98 | *size as i64, 99 | entry_type, 100 | rusqlite::types::Null 101 | ])?; 102 | num_crate_entries += 1; 103 | } 104 | for (entry, data) in selected_entries.iter() { 105 | let model::TarHeader { path, size, entry_type } = entry; 106 | insert_crate_entry.execute(params![ 107 | id, 108 | std::str::from_utf8(path).expect("utf8 path in crate - lets see how long this is true"), 109 | *size as i64, 110 | entry_type, 111 | data 112 | ])?; 113 | num_crate_entries += 1; 114 | } 115 | num_extract_crates += 1; 116 | } 117 | TaskResult::None => {} 118 | }; 119 | } 120 | Ok(num_downloads + num_extract_crates + num_crate_entries) 121 | })(); 122 | Some(res) 123 | } 124 | 125 | fn replace_statement() -> &'static str { 126 | "will not be called" 127 | } 128 | 129 | fn source_table_name() -> &'static str { 130 | "result" 131 | } 132 | 133 | fn init_table_statement() -> &'static str { 134 | " 135 | BEGIN; 136 | CREATE TABLE result_download ( 137 | crate_name TEXT NOT NULL, 138 | crate_version TEXT NOT NULL, 139 | version TEXT NOT NULL, -- version of the process that created the result 140 | kind TEXT NOT NULL, 141 | 142 | url TEXT NOT NULL, 143 | content_length INTEGER NOT NULL, 144 | content_type TEXT, 145 | PRIMARY KEY (crate_name, crate_version, version, kind) 146 | ); 147 | CREATE TABLE result_extract_crate ( 148 | id INTEGER UNIQUE NOT NULL, 149 | crate_name TEXT NOT NULL, 150 | crate_version TEXT NOT NULL, 151 | version TEXT NOT NULL, -- version of the process that created the result 152 | 153 | num_crate_entries INTEGER NOT NULL, 154 | PRIMARY KEY (crate_name, crate_version, version) 155 | ); 156 | CREATE TABLE crate_entry ( 157 | parent_id INTEGER NOT NULL, 158 | path TEXT NOT NULL, 159 | 160 | size INTEGER NOT NULL, -- size in bytes 161 | entry_type INTEGER NOT NULL, -- tar::EntryType 162 | data BLOB, -- optionally with entire content 163 | 164 | PRIMARY KEY (parent_id, path), 165 | FOREIGN KEY (parent_id) REFERENCES result_extract_crate(id) 166 | ); 167 | COMMIT; 168 | " 169 | } 170 | 171 | fn insert( 172 | &self, 173 | _key: &str, 174 | _uid: i32, 175 | _stm: &mut Statement<'_>, 176 | _sstm: Option<&mut Statement<'_>>, 177 | ) -> crate::Result { 178 | unimplemented!("we implement convert_to_sql instead (having our own loop and unlimited prepared statements") 179 | } 180 | } 181 | -------------------------------------------------------------------------------- /criner/src/export/to_sql/task.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | export::to_sql::{to_seconds_since_epoch, SqlConvert}, 3 | model, 4 | }; 5 | use rusqlite::{params, Statement}; 6 | 7 | impl SqlConvert for model::Task { 8 | fn replace_statement() -> &'static str { 9 | "REPLACE INTO task 10 | (id, key, process, version, stored_at, state) 11 | VALUES (?1, ?2, ?3, ?4, ?5, ?6); " 12 | } 13 | fn secondary_replace_statement() -> Option<&'static str> { 14 | Some( 15 | "REPLACE INTO task_error 16 | (parent_id, error) 17 | VALUES (?1 , ?2);", 18 | ) 19 | } 20 | fn source_table_name() -> &'static str { 21 | "task" 22 | } 23 | fn init_table_statement() -> &'static str { 24 | "BEGIN; 25 | CREATE TABLE task ( 26 | id INTEGER UNIQUE NOT NULL, 27 | key TEXT NOT NULL, 28 | process TEXT NOT NULL, 29 | version TEXT NOT NULL, 30 | stored_at TIMESTAMP NOT NULL, 31 | state TEXT NOT NULL, 32 | PRIMARY KEY (key) 33 | ); 34 | CREATE TABLE task_error ( 35 | parent_id INTEGER NOT NULL, 36 | error TEXT NOT NULL, 37 | FOREIGN KEY (parent_id) REFERENCES task(id) 38 | ); 39 | COMMIT;" 40 | } 41 | 42 | fn insert( 43 | &self, 44 | key: &str, 45 | uid: i32, 46 | stm: &mut Statement<'_>, 47 | sstm: Option<&mut rusqlite::Statement<'_>>, 48 | ) -> crate::Result { 49 | use model::TaskState::*; 50 | 51 | let Self { 52 | stored_at, 53 | process, 54 | version, 55 | state, 56 | } = self; 57 | stm.execute(params![ 58 | uid, 59 | key, 60 | process, 61 | version, 62 | to_seconds_since_epoch(*stored_at), 63 | match state { 64 | NotStarted => "NotStarted", 65 | Complete => "Complete", 66 | InProgress(_) => "InProgress", 67 | AttemptsWithFailure(_) => "AttemptsWithFailure", 68 | }, 69 | ])?; 70 | match state { 71 | InProgress(Some(errors)) | AttemptsWithFailure(errors) => { 72 | let sstm = sstm.ok_or(crate::Error::Bug("need secondary statement"))?; 73 | for error in errors.iter() { 74 | sstm.execute(params![uid, error])?; 75 | } 76 | } 77 | _ => {} 78 | } 79 | Ok(1) 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /criner/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![allow(clippy::unneeded_field_pattern)] 2 | #![deny(unsafe_code)] 3 | 4 | #[macro_use] 5 | extern crate lazy_static; 6 | 7 | #[macro_use] 8 | extern crate quick_error; 9 | 10 | #[cfg(feature = "migration")] 11 | pub mod migration; 12 | 13 | pub mod error; 14 | pub use error::{Error, Result}; 15 | 16 | pub mod export; 17 | pub(crate) mod model; 18 | pub(crate) mod persistence; 19 | pub(crate) mod utils; 20 | 21 | mod spawn; 22 | pub(crate) use spawn::spawn; 23 | 24 | mod engine; 25 | 26 | pub use engine::run; 27 | 28 | pub use prodash; 29 | -------------------------------------------------------------------------------- /criner/src/migration.rs: -------------------------------------------------------------------------------- 1 | use crate::persistence::{TableAccess, TaskResultTable}; 2 | use rusqlite::params; 3 | use std::path::Path; 4 | 5 | pub fn migrate(db_path: impl AsRef) -> crate::Result<()> { 6 | log::info!("open db"); 7 | let db = crate::persistence::Db::open(&db_path)?; 8 | let mut connection = db.open_connection_no_async_with_busy_wait()?; 9 | let mut keys = Vec::::new(); 10 | let table_name = TaskResultTable::table_name(); 11 | { 12 | log::info!("begin iteration"); 13 | let mut statement = connection.prepare(&format!("SELECT key FROM {}", table_name))?; 14 | let mut rows = statement.query([])?; 15 | while let Some(r) = rows.next()? { 16 | keys.push(r.get(0)?); 17 | } 18 | log::info!("got {} keys", keys.len()); 19 | } 20 | { 21 | log::info!("begin change"); 22 | let transaction = connection.transaction()?; 23 | let mut statement = transaction.prepare(&format!("UPDATE {} SET key=?1 WHERE key=?2;", table_name))?; 24 | for key in keys.into_iter() { 25 | statement.execute(params![ 26 | format!( 27 | "{}", 28 | if key.ends_with(':') { 29 | &key[..key.len() - 1] 30 | } else { 31 | &key[..] 32 | } 33 | ), 34 | key 35 | ])?; 36 | } 37 | drop(statement); 38 | transaction.commit()?; 39 | } 40 | Ok(()) 41 | } 42 | 43 | #[allow(dead_code)] 44 | fn migrate_iterate_assets_and_update_db(db_path: impl AsRef) -> crate::Result<()> { 45 | let assets_dir = db_path.as_ref().join("assets"); 46 | let db = crate::persistence::Db::open(&db_path)?; 47 | let results = db.open_results()?; 48 | let task = crate::engine::work::iobound::default_persisted_download_task(); 49 | let mut key = String::new(); 50 | let root = prodash::Tree::new(); 51 | let mut progress = root.add_child("does not matter"); 52 | 53 | for entry in jwalk::WalkDir::new(assets_dir) 54 | .preload_metadata(true) 55 | .into_iter() 56 | .filter_map(Result::ok) 57 | { 58 | let entry: jwalk::DirEntry = entry; 59 | if entry.file_type.as_ref().ok().map_or(true, |d| d.is_dir()) { 60 | continue; 61 | } 62 | 63 | if entry.file_name != std::ffi::OsString::from("download:1.0.0.crate") { 64 | let new_name = entry.path().parent().unwrap().join("download:1.0.0.crate"); 65 | std::fs::rename(entry.path(), &new_name)?; 66 | log::warn!("Renamed '{}' to '{}'", entry.path().display(), new_name.display()); 67 | } 68 | let file_size = entry.metadata.as_ref().unwrap().as_ref().unwrap().len(); 69 | let mut iter = entry.parent_path().iter().skip(3); 70 | let name = iter.next().and_then(|p| p.to_str()).unwrap(); 71 | let version = iter.next().and_then(|p| p.to_str()).unwrap(); 72 | log::info!("{} {}", name, version); 73 | 74 | key.clear(); 75 | let task_result = crate::model::TaskResult::Download { 76 | kind: "crate".into(), 77 | url: format!( 78 | "https://crates.io/api/v1/crates/{name}/{version}/download", 79 | name = name, 80 | version = version, 81 | ) 82 | .into(), 83 | content_length: file_size as u32, 84 | content_type: Some("application/x-tar".into()), 85 | }; 86 | task_result.fq_key(name, version, &task, &mut key); 87 | results.insert(&mut progress, &key, &task_result)?; 88 | } 89 | Ok(()) 90 | } 91 | -------------------------------------------------------------------------------- /criner/src/persistence/keyed.rs: -------------------------------------------------------------------------------- 1 | use crate::model::{Context, Crate, CrateVersion, Task, TaskResult}; 2 | use std::time::SystemTime; 3 | 4 | pub const KEY_SEP_CHAR: char = ':'; 5 | 6 | pub trait Keyed { 7 | fn key_buf(&self, buf: &mut String); 8 | fn key(&self) -> String { 9 | let mut buf = String::with_capacity(16); 10 | self.key_buf(&mut buf); 11 | buf 12 | } 13 | } 14 | 15 | impl Keyed for Task { 16 | fn key_buf(&self, buf: &mut String) { 17 | buf.push_str(&self.process); 18 | buf.push(KEY_SEP_CHAR); 19 | buf.push_str(&self.version); 20 | } 21 | } 22 | 23 | impl Task { 24 | pub fn fq_key(&self, crate_name: &str, crate_version: &str, buf: &mut String) { 25 | CrateVersion::key_from(crate_name, crate_version, buf); 26 | buf.push(KEY_SEP_CHAR); 27 | self.key_buf(buf); 28 | } 29 | } 30 | 31 | impl Keyed for CrateVersion { 32 | fn key_buf(&self, buf: &mut String) { 33 | CrateVersion::key_from(&self.name, &self.version, buf) 34 | } 35 | } 36 | 37 | impl Crate { 38 | pub fn key_from_version_buf(v: &CrateVersion, buf: &mut String) { 39 | buf.push_str(&v.name); 40 | } 41 | } 42 | 43 | impl Keyed for TaskResult { 44 | fn key_buf(&self, buf: &mut String) { 45 | match self { 46 | TaskResult::Download { kind, .. } => { 47 | buf.push(KEY_SEP_CHAR); 48 | buf.push_str(kind) 49 | } 50 | TaskResult::None | TaskResult::ExplodedCrate { .. } => {} 51 | } 52 | } 53 | } 54 | 55 | impl TaskResult { 56 | pub fn fq_key(&self, crate_name: &str, crate_version: &str, task: &Task, buf: &mut String) { 57 | task.fq_key(crate_name, crate_version, buf); 58 | self.key_buf(buf); 59 | } 60 | } 61 | 62 | impl Keyed for Context { 63 | fn key_buf(&self, buf: &mut String) { 64 | use std::fmt::Write; 65 | write!( 66 | buf, 67 | "context/{}", 68 | humantime::format_rfc3339(SystemTime::now()) 69 | .to_string() 70 | .get(..10) 71 | .expect("YYYY-MM-DD - 10 bytes") 72 | ) 73 | .ok(); 74 | } 75 | } 76 | 77 | impl CrateVersion { 78 | pub fn key_from(name: &str, version: &str, buf: &mut String) { 79 | buf.push_str(name); 80 | buf.push(KEY_SEP_CHAR); 81 | buf.push_str(version); 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /criner/src/persistence/merge.rs: -------------------------------------------------------------------------------- 1 | use crate::model::{self, Context, CrateVersion, Task}; 2 | use crate::utils::parse_semver; 3 | 4 | pub trait Merge { 5 | fn merge(self, other: &T) -> Self; 6 | } 7 | 8 | impl Merge for model::Task { 9 | fn merge(mut self, other: &Task) -> Self { 10 | let my_state = self.state; 11 | self = other.clone(); 12 | self.state = my_state.merge(&other.state); 13 | self 14 | } 15 | } 16 | 17 | impl Merge for model::TaskState { 18 | fn merge(mut self, other: &model::TaskState) -> Self { 19 | fn merge_vec(mut existing: Vec, new: &[String]) -> Vec { 20 | existing.extend(new.iter().cloned()); 21 | existing 22 | } 23 | use model::TaskState::*; 24 | self = match (&self, other) { 25 | (AttemptsWithFailure(existing), AttemptsWithFailure(new)) => { 26 | AttemptsWithFailure(merge_vec(existing.clone(), new)) 27 | } 28 | (AttemptsWithFailure(existing), InProgress(None)) => InProgress(Some(existing.clone())), 29 | (AttemptsWithFailure(_), InProgress(Some(_))) => { 30 | panic!("One must not create inProgress preloaded with failed attempts, I think :D") 31 | } 32 | (InProgress(Some(existing)), AttemptsWithFailure(other)) => { 33 | AttemptsWithFailure(merge_vec(existing.clone(), other)) 34 | } 35 | (_, other) => other.clone(), 36 | }; 37 | self 38 | } 39 | } 40 | 41 | impl Merge for model::Context { 42 | fn merge(self, other: &Context) -> Self { 43 | self + other 44 | } 45 | } 46 | 47 | fn sort_semver(versions: &mut [String]) { 48 | versions.sort_by_key(|v| parse_semver(v)); 49 | } 50 | 51 | impl Merge for model::Crate { 52 | fn merge(mut self, other: &CrateVersion) -> Self { 53 | if !self.versions.contains(&other.version) { 54 | self.versions.push(other.version.to_owned()); 55 | } 56 | sort_semver(&mut self.versions); 57 | self 58 | } 59 | } 60 | 61 | impl model::Crate { 62 | pub fn merge_mut(&mut self, other: &CrateVersion) -> &mut model::Crate { 63 | if !self.versions.contains(&other.version) { 64 | self.versions.push(other.version.to_owned()); 65 | } 66 | sort_semver(&mut self.versions); 67 | self 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /criner/src/persistence/mod.rs: -------------------------------------------------------------------------------- 1 | use crate::Result; 2 | use std::path::{Path, PathBuf}; 3 | 4 | mod keyed; 5 | mod merge; 6 | pub use keyed::*; 7 | 8 | mod serde; 9 | mod table; 10 | pub use table::*; 11 | 12 | #[derive(Clone)] 13 | pub struct Db { 14 | sqlite_path: PathBuf, 15 | } 16 | 17 | impl Db { 18 | pub fn open(path: impl AsRef) -> Result { 19 | std::fs::create_dir_all(&path)?; 20 | let sqlite_path = path.as_ref().join("db.msgpack.sqlite"); 21 | { 22 | let mut connection = rusqlite::Connection::open(&sqlite_path)?; 23 | connection.execute_batch(" 24 | PRAGMA journal_mode = WAL; -- better write-concurrency 25 | PRAGMA synchronous = NORMAL; -- fsync only in critical moments 26 | PRAGMA wal_autocheckpoint = 1000; -- write WAL changes back every 1000 pages, for an in average 1MB WAL file. May affect readers if number is increased 27 | PRAGMA wal_checkpoint(TRUNCATE); -- free some space by truncating possibly massive WAL files from the last run. 28 | ")?; 29 | 30 | let transaction = connection.transaction()?; 31 | for name in &["meta", "crate_version", "crate", "task", "result", "crates.io-crate"] { 32 | transaction.execute_batch(&format!( 33 | "CREATE TABLE IF NOT EXISTS '{}' ( 34 | key TEXT PRIMARY KEY NOT NULL, 35 | data BLOB NOT NULL 36 | )", 37 | name 38 | ))?; 39 | } 40 | transaction.execute_batch( 41 | "CREATE TABLE IF NOT EXISTS report_done ( 42 | key TEXT PRIMARY KEY NOT NULL 43 | )", 44 | )?; 45 | transaction.commit()?; 46 | } 47 | 48 | Ok(Db { sqlite_path }) 49 | } 50 | 51 | pub fn open_connection(&self) -> Result { 52 | Ok(std::sync::Arc::new(parking_lot::Mutex::new( 53 | rusqlite::Connection::open(&self.sqlite_path)?, 54 | ))) 55 | } 56 | 57 | pub fn open_connection_with_busy_wait(&self) -> Result { 58 | let connection = rusqlite::Connection::open(&self.sqlite_path)?; 59 | connection.busy_handler(Some(sleeper))?; 60 | Ok(std::sync::Arc::new(parking_lot::Mutex::new(connection))) 61 | } 62 | 63 | pub fn open_connection_no_async_with_busy_wait(&self) -> Result { 64 | let connection = rusqlite::Connection::open(&self.sqlite_path)?; 65 | connection.busy_handler(Some(sleeper))?; 66 | Ok(connection) 67 | } 68 | 69 | pub fn open_crate_versions(&self) -> Result { 70 | Ok(CrateVersionTable { 71 | inner: self.open_connection()?, 72 | }) 73 | } 74 | pub fn open_crates(&self) -> Result { 75 | Ok(CrateTable { 76 | inner: self.open_connection()?, 77 | }) 78 | } 79 | pub fn open_tasks(&self) -> Result { 80 | Ok(TaskTable { 81 | inner: self.open_connection()?, 82 | }) 83 | } 84 | pub fn open_results(&self) -> Result { 85 | Ok(TaskResultTable { 86 | inner: self.open_connection()?, 87 | }) 88 | } 89 | pub fn open_context(&self) -> Result { 90 | Ok(MetaTable { 91 | inner: self.open_connection()?, 92 | }) 93 | } 94 | pub fn open_reports(&self) -> Result { 95 | Ok(ReportsTree { 96 | inner: self.open_connection()?, 97 | }) 98 | } 99 | } 100 | 101 | fn sleeper(attempts: i32) -> bool { 102 | log::warn!("SQLITE_BUSY, retrying after 50ms (attempt {})", attempts); 103 | std::thread::sleep(std::time::Duration::from_millis(50)); 104 | true 105 | } 106 | -------------------------------------------------------------------------------- /criner/src/persistence/serde.rs: -------------------------------------------------------------------------------- 1 | use crate::model::{db_dump, Context, Crate, CrateVersion, ReportResult, Task, TaskResult}; 2 | 3 | fn expect(r: std::result::Result, panic_message: impl FnOnce(E) -> String) -> T { 4 | match r { 5 | Ok(v) => v, 6 | Err(e) => std::panic::panic_any(panic_message(e)), 7 | } 8 | } 9 | 10 | macro_rules! impl_deserialize { 11 | ($ty:ty) => { 12 | impl From<&[u8]> for $ty { 13 | fn from(b: &[u8]) -> Self { 14 | expect(rmp_serde::from_slice(b), |e| { 15 | format!( 16 | concat!("&[u8]: migration should succeed: ", stringify!($ty), "{:#?}: {}"), 17 | rmpv::decode::value::read_value(&mut std::io::Cursor::new(b)).unwrap(), 18 | e 19 | ) 20 | }) 21 | } 22 | } 23 | }; 24 | } 25 | 26 | impl_deserialize!(Crate); 27 | impl_deserialize!(Task); 28 | impl_deserialize!(TaskResult); 29 | impl_deserialize!(CrateVersion); 30 | impl_deserialize!(Context); 31 | impl_deserialize!(ReportResult); 32 | impl_deserialize!(db_dump::Crate); 33 | -------------------------------------------------------------------------------- /criner/src/spawn.rs: -------------------------------------------------------------------------------- 1 | // Copied and adapted from https://github.com/smol-rs/smol/blob/15447d6859df65fd1992f761ee46067bed62f8a5/src/spawn.rs 2 | use std::future::Future; 3 | use std::panic::catch_unwind; 4 | use std::thread; 5 | 6 | use async_executor::Executor; 7 | pub use async_executor::Task; 8 | use futures_lite::future; 9 | use once_cell::sync::Lazy; 10 | 11 | pub fn spawn(future: impl Future + Send + 'static) -> Task { 12 | static GLOBAL: Lazy> = Lazy::new(|| { 13 | for i in 1..=2 { 14 | thread::Builder::new() 15 | .name(format!("smol-{}", i)) 16 | .spawn(|| loop { 17 | catch_unwind(|| async_io::block_on(GLOBAL.run(future::pending::<()>()))).ok(); 18 | }) 19 | .expect("cannot spawn executor thread"); 20 | } 21 | 22 | Executor::new() 23 | }); 24 | 25 | GLOBAL.spawn(async_compat::Compat::new(future)) 26 | } 27 | -------------------------------------------------------------------------------- /criner/src/utils.rs: -------------------------------------------------------------------------------- 1 | use crate::error::{Error, FormatDeadline, Result}; 2 | use async_io::Timer; 3 | use dia_semver::Semver; 4 | use futures_util::{ 5 | future::{self, Either}, 6 | FutureExt, 7 | }; 8 | use std::{ 9 | convert::TryInto, 10 | future::Future, 11 | time::{Duration, SystemTime}, 12 | }; 13 | 14 | pub fn parse_semver(version: &str) -> Semver { 15 | use std::str::FromStr; 16 | Semver::from_str(version) 17 | .or_else(|_| { 18 | Semver::from_str( 19 | &version[..version 20 | .find('-') 21 | .or_else(|| version.find('+')) 22 | .expect("some prerelease version")], 23 | ) 24 | }) 25 | .expect("semver parsing to work if violating prerelease versions are stripped") 26 | } 27 | 28 | pub async fn wait_with_progress( 29 | duration_s: usize, 30 | progress: prodash::tree::Item, 31 | deadline: Option, 32 | time: Option, 33 | ) -> Result<()> { 34 | progress.init(Some(duration_s), Some("s".into())); 35 | if let Some(time) = time { 36 | progress.set_name(format!( 37 | "{} scheduled at {}", 38 | progress.name().unwrap_or_else(|| "un-named".into()), 39 | time.format(&time::macros::format_description!("[hour]:[minute]")) 40 | .expect("always formattable") 41 | )); 42 | } 43 | for s in 1..=duration_s { 44 | Timer::after(Duration::from_secs(1)).await; 45 | check(deadline)?; 46 | progress.set(s); 47 | } 48 | Ok(()) 49 | } 50 | 51 | fn desired_launch_at(time: Option) -> time::OffsetDateTime { 52 | let time = time.unwrap_or_else(|| { 53 | time::OffsetDateTime::now_local() 54 | .unwrap_or_else(|_| time::OffsetDateTime::now_utc()) 55 | .time() 56 | }); 57 | let now = time::OffsetDateTime::now_local().unwrap_or_else(|_| time::OffsetDateTime::now_utc()); 58 | let mut desired = now.date().with_time(time).assume_offset(now.offset()); 59 | if desired < now { 60 | desired = desired 61 | .date() 62 | .next_day() 63 | .expect("not running in year 9999") 64 | .with_time(time) 65 | .assume_offset(now.offset()); 66 | } 67 | desired 68 | } 69 | 70 | fn duration_until(time: Option) -> Duration { 71 | let desired = desired_launch_at(time); 72 | let now_local = time::OffsetDateTime::now_local().unwrap_or_else(|_| time::OffsetDateTime::now_utc()); 73 | (desired - now_local) 74 | .try_into() 75 | .unwrap_or_else(|_| Duration::from_secs(1)) 76 | } 77 | 78 | pub async fn repeat_daily_at( 79 | time: Option, 80 | mut make_progress: MakeProgress, 81 | deadline: Option, 82 | mut make_future: MakeFut, 83 | ) -> Result<()> 84 | where 85 | Fut: Future>, 86 | MakeFut: FnMut() -> Fut, 87 | MakeProgress: FnMut() -> prodash::tree::Item, 88 | { 89 | let mut iteration = 0; 90 | let time = desired_launch_at(time).time(); 91 | loop { 92 | iteration += 1; 93 | if let Err(err) = make_future().await { 94 | make_progress().fail(format!( 95 | "{} : ignored by repeat_daily_at('{:?}',…) iteration {}", 96 | err, time, iteration 97 | )) 98 | } 99 | wait_with_progress( 100 | duration_until(Some(time)).as_secs() as usize, 101 | make_progress(), 102 | deadline, 103 | Some(time), 104 | ) 105 | .await?; 106 | } 107 | } 108 | 109 | pub async fn repeat_every_s( 110 | interval_s: usize, 111 | mut make_progress: MakeProgress, 112 | deadline: Option, 113 | at_most: Option, 114 | mut make_future: MakeFut, 115 | ) -> Result<()> 116 | where 117 | Fut: Future>, 118 | MakeFut: FnMut() -> Fut, 119 | MakeProgress: FnMut() -> prodash::tree::Item, 120 | { 121 | let max_iterations = at_most.unwrap_or(std::usize::MAX); 122 | let mut iteration = 0; 123 | loop { 124 | if iteration == max_iterations { 125 | return Ok(()); 126 | } 127 | iteration += 1; 128 | if let Err(err) = make_future().await { 129 | make_progress().fail(format!( 130 | "{} : ignored by repeat_every({}s,…) iteration {}", 131 | err, interval_s, iteration 132 | )) 133 | } 134 | if iteration == max_iterations { 135 | return Ok(()); 136 | } 137 | wait_with_progress(interval_s, make_progress(), deadline, None).await?; 138 | } 139 | } 140 | 141 | pub fn check(deadline: Option) -> Result<()> { 142 | deadline 143 | .map(|d| { 144 | if SystemTime::now() >= d { 145 | Err(Error::DeadlineExceeded(FormatDeadline(d))) 146 | } else { 147 | Ok(()) 148 | } 149 | }) 150 | .unwrap_or(Ok(())) 151 | } 152 | 153 | pub async fn handle_ctrl_c_and_sigterm(f: F) -> Result 154 | where 155 | F: Future + Unpin, 156 | { 157 | let (s, r) = async_channel::bounded(100); 158 | ctrlc::set_handler(move || { 159 | s.send(()).now_or_never(); 160 | }) 161 | .ok(); 162 | let selector = future::select(async move { r.recv().await }.boxed_local(), f); 163 | match selector.await { 164 | Either::Left((_, _f)) => Err(Error::Interrupted), 165 | Either::Right((r, _interrupt)) => Ok(r), 166 | } 167 | } 168 | 169 | pub async fn timeout_after(duration: Duration, msg: impl Into, f: F) -> Result 170 | where 171 | F: Future + Unpin, 172 | { 173 | let selector = future::select(Timer::after(duration), f); 174 | match selector.await { 175 | Either::Left((_, _f)) => Err(Error::Timeout(duration, msg.into())), 176 | Either::Right((r, _delay)) => Ok(r), 177 | } 178 | } 179 | 180 | /// Use this if `f()` might block forever, due to code that doesn't implement timeouts like libgit2 fetch does as it has no timeout 181 | /// on 'recv' bytes. 182 | /// 183 | /// This approach eventually fails as we would accumulate more and more threads, but this will also give use additional 184 | /// days of runtime for little effort. On a Chinese network, outside of data centers, one can probably restart criner on 185 | /// a weekly basis or so, which is can easily be automated. 186 | pub async fn enforce_threaded(deadline: SystemTime, f: F) -> Result 187 | where 188 | T: Send + 'static, 189 | F: FnOnce() -> T + Send + 'static, 190 | { 191 | let unblocked = blocking::unblock(f); 192 | let selector = future::select( 193 | Timer::after(deadline.duration_since(SystemTime::now()).unwrap_or_default()), 194 | unblocked.boxed(), 195 | ); 196 | match selector.await { 197 | Either::Left((_, _f_as_future)) => Err(Error::DeadlineExceeded(FormatDeadline(deadline))), 198 | Either::Right((res, _delay)) => Ok(res), 199 | } 200 | } 201 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | max_width = 120 2 | -------------------------------------------------------------------------------- /src/args.rs: -------------------------------------------------------------------------------- 1 | use std::path::PathBuf; 2 | 3 | fn parse_local_time(src: &str) -> Result { 4 | time::Time::parse( 5 | src, 6 | &time::macros::format_description!("[hour repr:24 padding:none]:[minute padding:zero]"), 7 | ) 8 | } 9 | 10 | #[derive(Debug, clap::Parser)] 11 | #[clap(about = "Interact with crates.io from the command-line")] 12 | pub struct Args { 13 | #[clap(subcommand)] 14 | pub sub: Option, 15 | } 16 | 17 | #[derive(Debug, clap::Parser)] 18 | pub enum SubCommands { 19 | /// Mine crates.io in an incorruptible and resumable fashion 20 | #[clap(display_order = 0)] 21 | #[clap(disable_version_flag(true))] 22 | Mine { 23 | /// If set, no gui will be presented. Best with RUST_LOG=info to see basic information. 24 | #[clap(long)] 25 | no_gui: bool, 26 | 27 | /// The amount of frames to show per second 28 | #[clap(long, name = "frames-per-second", default_value = "6.0")] 29 | fps: f32, 30 | 31 | /// The amount of progress messages to keep in a ring buffer. 32 | #[clap(short = 's', long, default_value = "100")] 33 | progress_message_scrollback_buffer_size: usize, 34 | 35 | /// If set, the crates-index database for additional metadata will not be downloaded. 36 | /// 37 | /// It costs a lot of initial processing time and IO when writing changes back to the database, 38 | /// which isn't helpful while on a slow disk - right now it does so unconditionally and doesn't track 39 | /// that the work was already done. 40 | #[clap(long, short = 'D')] 41 | no_db_download: bool, 42 | 43 | /// The amount of IO-bound processors to run concurrently. 44 | /// 45 | /// A way to choose a value is to see which part of the I/O is actually the bottle neck. 46 | /// Depending on that number, one should experiment with an amount of processors that saturate 47 | /// either input or output. 48 | /// Most commonly, these are bound to the input, as it is the network. 49 | #[clap(long, alias = "io", value_name = "io", default_value = "10")] 50 | io_bound_processors: u32, 51 | 52 | /// The amount of CPU- and Output-bound processors to run concurrently. 53 | /// 54 | /// These will perform a computation followed by flushing its result to disk in the form 55 | /// of multiple small files. 56 | /// It's recommended to adjust that number to whatever can saturate the speed of writing to disk, 57 | /// as these processors will yield when writing, allowing other processors to compute. 58 | /// Computes are relatively inexpensive compared to the writes. 59 | #[clap(long, alias = "cpu-o", value_name = "cpu-o", default_value = "20")] 60 | cpu_o_bound_processors: u32, 61 | 62 | /// The amount of CPU-bound processors to run concurrently. 63 | /// 64 | /// One can assume that one of these can occupy one core of a CPU. 65 | /// However, they will not use a lot of IO, nor will they use much memory. 66 | #[clap(long, alias = "cpu", value_name = "cpu", default_value = "4")] 67 | cpu_bound_processors: u32, 68 | 69 | /// Path to the possibly existing crates.io repository clone. If unset, it will be cloned to a temporary spot. 70 | #[clap(short = 'c', long, name = "REPO")] 71 | repository: Option, 72 | 73 | /// The amount of time we can take for the computation. Specified in humantime, like 10s, 5min, or 2h, or '3h 2min 2s' 74 | #[clap(long, short = 't')] 75 | time_limit: Option, 76 | 77 | /// The time between each fetch operation, specified in humantime, like 10s, 5min, or 2h, or '3h 2min 2s' 78 | #[clap(long, short = 'f', default_value = "5min")] 79 | fetch_every: humantime::Duration, 80 | 81 | /// If set, the amount of times the fetch stage will run. If set to 0, it will never run. 82 | #[clap(long, short = 'F')] 83 | fetch_at_most: Option, 84 | 85 | /// The time between each processing run, specified in humantime, like 10s, 5min, or 2h, or '3h 2min 2s' 86 | #[clap(long, short = 'p', default_value = "5min")] 87 | process_every: humantime::Duration, 88 | 89 | /// If set, the amount of times the process stage will run. If set to 0, they will never run. 90 | #[clap(long, short = 'P')] 91 | process_at_most: Option, 92 | 93 | /// The time between each reporting and processing run, specified in humantime, like 10s, 5min, or 2h, or '3h 2min 2s' 94 | #[clap(long, short = 'r', default_value = "5min")] 95 | report_every: humantime::Duration, 96 | 97 | /// If set, the amount of times the reporting stage will run. If set to 0, they will never run. 98 | #[clap(long, short = 'R')] 99 | report_at_most: Option, 100 | 101 | /// If set, declare at which local time to download the crates.io database and digest it. 102 | /// 103 | /// This job runs every 24h, as the database is updated that often. 104 | /// If unset, the job starts right away. 105 | /// Format is HH:MM, e.g. '14:30' for 2:30 pm or 03:15 for quarter past 3 in the morning. 106 | #[clap(long, short = 'd', value_parser = parse_local_time)] 107 | download_crates_io_database_every_24_hours_starting_at: Option, 108 | 109 | /// If set, the reporting stage will only iterate over crates that match the given standard unix glob. 110 | /// 111 | /// moz* would match only crates starting with 'moz' for example. 112 | #[clap(long, short = 'g')] 113 | glob: Option, 114 | 115 | /// Path to the possibly existing database. It's used to persist all mining results. 116 | #[clap(default_value = "criner.db")] 117 | db_path: PathBuf, 118 | }, 119 | /// Export all Criner data into a format friendly for exploration via SQL, best viewed with https://sqlitebrowser.org 120 | /// 121 | /// Criner stores binary blobs internally and migrates them on the fly, which is optimized for raw performance. 122 | /// It's also impractical for exploring the data by hand, so the exported data will explode all types into 123 | /// tables with each column being a field. Foreign key relations are set accordingly to allow joins. 124 | /// Use this to get an overview of what's available, and possibly contribute a report generator which implements 125 | /// a query using raw data and writes it into reports. 126 | #[clap(display_order = 1)] 127 | #[clap(disable_version_flag(true))] 128 | Export { 129 | /// The path to the source database in sqlite format 130 | input_db_path: PathBuf, 131 | 132 | /// Path to which to write the exported data. If it exists the operation will fail. 133 | export_db_path: PathBuf, 134 | }, 135 | #[cfg(feature = "migration")] 136 | /// A special purpose command only to be executed in special circumstances 137 | #[clap(display_order = 9)] 138 | Migrate, 139 | } 140 | 141 | impl Default for SubCommands { 142 | fn default() -> Self { 143 | SubCommands::Mine { 144 | no_gui: false, 145 | fps: 6.0, 146 | progress_message_scrollback_buffer_size: 100, 147 | io_bound_processors: 5, 148 | cpu_bound_processors: 2, 149 | cpu_o_bound_processors: 10, 150 | repository: None, 151 | time_limit: None, 152 | fetch_every: std::time::Duration::from_secs(60).into(), 153 | fetch_at_most: None, 154 | no_db_download: false, 155 | process_every: std::time::Duration::from_secs(60).into(), 156 | process_at_most: None, 157 | download_crates_io_database_every_24_hours_starting_at: Some( 158 | parse_local_time("3:00").expect("valid statically known time"), 159 | ), 160 | report_every: std::time::Duration::from_secs(60).into(), 161 | report_at_most: None, 162 | db_path: PathBuf::from("criner.db"), 163 | glob: None, 164 | } 165 | } 166 | } 167 | -------------------------------------------------------------------------------- /src/error.rs: -------------------------------------------------------------------------------- 1 | use std::{error::Error, fmt, process}; 2 | 3 | struct WithCauses<'a>(&'a dyn Error); 4 | 5 | impl<'a> fmt::Display for WithCauses<'a> { 6 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 7 | write!(f, "ERROR: {}", self.0)?; 8 | let mut cursor = self.0; 9 | while let Some(err) = cursor.source() { 10 | write!(f, "\ncaused by: \n{}", err)?; 11 | cursor = err; 12 | } 13 | writeln!(f) 14 | } 15 | } 16 | 17 | pub fn ok_or_exit(result: Result) -> T 18 | where 19 | E: Error, 20 | { 21 | match result { 22 | Ok(v) => v, 23 | Err(err) => { 24 | println!("{}", WithCauses(&err)); 25 | process::exit(2); 26 | } 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | use std::ops::Add; 2 | 3 | mod args; 4 | pub mod error; 5 | pub use args::*; 6 | 7 | pub fn run_blocking(args: Args) -> criner::error::Result<()> { 8 | use SubCommands::*; 9 | let cmd = args.sub.unwrap_or_default(); 10 | match cmd { 11 | #[cfg(feature = "migration")] 12 | Migrate => criner::migration::migrate("./criner.db"), 13 | Export { 14 | input_db_path, 15 | export_db_path, 16 | } => criner::export::run_blocking(input_db_path, export_db_path), 17 | Mine { 18 | repository, 19 | db_path, 20 | fps, 21 | time_limit, 22 | io_bound_processors, 23 | cpu_bound_processors, 24 | cpu_o_bound_processors, 25 | no_gui, 26 | no_db_download, 27 | progress_message_scrollback_buffer_size, 28 | fetch_every, 29 | fetch_at_most, 30 | process_at_most, 31 | process_every, 32 | download_crates_io_database_every_24_hours_starting_at, 33 | report_every, 34 | report_at_most, 35 | glob, 36 | } => criner::run::blocking( 37 | db_path, 38 | repository.unwrap_or_else(|| std::env::temp_dir().join("criner-crates-io-bare-index.git")), 39 | time_limit.map(|d| std::time::SystemTime::now().add(*d)), 40 | io_bound_processors, 41 | cpu_bound_processors, 42 | cpu_o_bound_processors, 43 | !no_db_download, 44 | criner::run::StageRunSettings { 45 | every: fetch_every.into(), 46 | at_most: fetch_at_most, 47 | }, 48 | criner::run::StageRunSettings { 49 | every: process_every.into(), 50 | at_most: process_at_most, 51 | }, 52 | criner::run::GlobStageRunSettings { 53 | run: criner::run::StageRunSettings { 54 | every: report_every.into(), 55 | at_most: report_at_most, 56 | }, 57 | glob, 58 | }, 59 | download_crates_io_database_every_24_hours_starting_at, 60 | criner::prodash::tree::root::Options { 61 | message_buffer_capacity: progress_message_scrollback_buffer_size, 62 | ..criner::prodash::tree::root::Options::default() 63 | } 64 | .create() 65 | .into(), 66 | if no_gui { 67 | None 68 | } else { 69 | Some(criner::prodash::render::tui::Options { 70 | title: "Criner".into(), 71 | frames_per_second: fps, 72 | recompute_column_width_every_nth_frame: Option::from(fps as usize), 73 | ..criner::prodash::render::tui::Options::default() 74 | }) 75 | }, 76 | ), 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | use clap::Parser; 2 | 3 | fn main() -> criner::error::Result<()> { 4 | let args = criner_cli::Args::parse(); 5 | if let Some(criner_cli::SubCommands::Mine { no_gui, .. }) = args.sub { 6 | if no_gui { 7 | env_logger::init_from_env(env_logger::Env::default().default_filter_or("info")); 8 | } 9 | } else { 10 | env_logger::init(); 11 | } 12 | criner_cli::run_blocking(args) 13 | } 14 | --------------------------------------------------------------------------------