├── .github ├── dependabot.yml └── workflows │ ├── build_and_test.yml │ └── publish.yml ├── .gitignore ├── CHANGELOG.md ├── Cargo.lock ├── Cargo.toml ├── LICENSE-MIT ├── README.md ├── THIRDPARTY.yml ├── UNLICENSE ├── bench-data └── shakespeare.txt ├── bench2.sh ├── benchmark.sh ├── rust-toolchain.toml └── src └── main.rs /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: "cargo" # See documentation for possible values 9 | directory: "/" # Location of package manifests 10 | schedule: 11 | interval: "weekly" -------------------------------------------------------------------------------- /.github/workflows/build_and_test.yml: -------------------------------------------------------------------------------- 1 | name: Check 2 | 3 | on: [push, pull_request] 4 | 5 | env: 6 | CARGO_TERM_COLOR: always 7 | 8 | jobs: 9 | check: 10 | name: Check 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Checkout sources 14 | uses: actions/checkout@v2 15 | 16 | - name: Install stable toolchain 17 | uses: actions-rs/toolchain@v1 18 | with: 19 | profile: minimal 20 | toolchain: stable 21 | override: false 22 | 23 | - name: Cache dependencies 24 | uses: Swatinem/rust-cache@v1 25 | 26 | - name: Run cargo check 27 | uses: actions-rs/cargo@v1 28 | with: 29 | command: check 30 | 31 | lints: 32 | name: Lints 33 | runs-on: ubuntu-latest 34 | steps: 35 | - name: Checkout sources 36 | uses: actions/checkout@v2 37 | 38 | - name: Install stable toolchain 39 | uses: actions-rs/toolchain@v1 40 | with: 41 | profile: minimal 42 | toolchain: stable 43 | override: false 44 | components: rustfmt, clippy 45 | 46 | - name: Cache dependencies 47 | uses: Swatinem/rust-cache@v1 48 | 49 | - name: Run cargo fmt 50 | uses: actions-rs/cargo@v1 51 | with: 52 | command: fmt 53 | args: --all -- --check 54 | 55 | - name: Run cargo clippy 56 | uses: actions-rs/cargo@v1 57 | with: 58 | command: clippy 59 | args: -- -D warnings 60 | 61 | test: 62 | name: Test Suite 63 | runs-on: ${{ matrix.os }} 64 | strategy: 65 | matrix: 66 | os: [ubuntu-latest, macOS-latest, windows-latest] 67 | steps: 68 | - name: Checkout sources 69 | uses: actions/checkout@v2 70 | 71 | - name: Install stable toolchain 72 | uses: actions-rs/toolchain@v1 73 | with: 74 | profile: minimal 75 | toolchain: stable 76 | override: false 77 | 78 | - name: Cache dependencies 79 | uses: Swatinem/rust-cache@v1 80 | 81 | - name: Run tests 82 | run: cargo test --verbose 83 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish 2 | 3 | on: 4 | push: 5 | tags: 6 | - "*" 7 | 8 | jobs: 9 | publish: 10 | name: Publish for ${{ matrix.os }} 11 | runs-on: ${{ matrix.os }} 12 | strategy: 13 | matrix: 14 | include: 15 | - os: ubuntu-latest 16 | artifact_name: crabz 17 | asset_name: crabz-linux-amd64 18 | - os: macos-latest 19 | artifact_name: crabz 20 | asset_name: crabz-macos-amd64 21 | - os: windows-latest 22 | artifact_name: crabz.exe 23 | asset_name: crabz-windows-amd64.exe 24 | 25 | steps: 26 | - uses: actions/checkout@v2 27 | 28 | - name: Build 29 | shell: bash 30 | run: | 31 | cargo build --release --locked 32 | 33 | - name: Build archive 34 | shell: bash 35 | run: | 36 | staging="${{matrix.asset_name}}-src" 37 | mkdir -p "$staging" 38 | 39 | cp {README.md,UNLICENSE,LICENSE-MIT} "$staging/" 40 | cp {Cargo.toml,Cargo.lock} "$staging/" 41 | cp -R ./src "./$staging/src" 42 | 43 | if [ "${{ matrix.os }}" = "windows-latest" ]; then 44 | 7z a "${staging}.zip" "$staging" 45 | echo "ASSET=${staging}.zip" >> $GITHUB_ENV 46 | else 47 | tar czf "${staging}.tar.gz" "${staging}" 48 | echo "ASSET=${staging}.tar.gz" >> $GITHUB_ENV 49 | fi 50 | 51 | - name: Create deb artifact 52 | shell: bash 53 | run: | 54 | if [ "${{ matrix.os }}" = "ubuntu-latest" ]; then 55 | cd .. 56 | cargo install --locked cargo-deb 57 | cd - 58 | asset_path="${{ matrix.asset_name }}.deb" 59 | cargo deb --output ./"${asset_path}" 60 | echo "DEB=${asset_path}" >> $GITHUB_ENV 61 | fi 62 | 63 | - name: Upload deb package 64 | uses: svenstaro/upload-release-action@v2 65 | if: matrix.os == 'ubuntu-latest' 66 | with: 67 | repo_token: ${{ secrets.GITHUB_TOKEN }} 68 | file: ${{ env.DEB }} 69 | asset_name: ${{ env.DEB }} 70 | tag: ${{ github.ref }} 71 | 72 | - name: Upload src to release 73 | uses: svenstaro/upload-release-action@v2 74 | with: 75 | repo_token: ${{ secrets.GITHUB_TOKEN }} 76 | file: ${{ env.ASSET }} 77 | asset_name: ${{ env.ASSET }} 78 | tag: ${{ github.ref }} 79 | 80 | - name: Upload binaries to release 81 | uses: svenstaro/upload-release-action@v2 82 | with: 83 | repo_token: ${{ secrets.GITHUB_TOKEN }} 84 | file: target/release/${{ matrix.artifact_name }} 85 | asset_name: ${{ matrix.asset_name }} 86 | tag: ${{ github.ref }} 87 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | .idea -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # v0.10.0 2 | 3 | - [bugfix] Conditional compilation without snappy feature by @camlloyd 4 | 5 | # v0.9.4 6 | 7 | - [bugfix] Install `cargo-deb` outside of project directory 8 | 9 | # v0.9.3 10 | 11 | - [bugfix] Install `cargo-deb` outside of project directory 12 | 13 | # v0.9.2 14 | 15 | - [bugfix] Install `cargo-deb` with `--locked` in CI 16 | 17 | # v0.9.1 18 | 19 | - [bugfix](https://github.com/sstadick/crabz/pull/36) cargo update to fix dep resolution issue from @chenrui333 20 | 21 | # v0.9.0 22 | 23 | - [feat](https://github.com/sstadick/crabz/pull/34) Add `--quite` flag from @camlloyd 24 | - [feat](https://github.com/sstadick/crabz/pull/33) Update deflate file extensions from @camlloyd 25 | 26 | # v0.7.7 27 | 28 | - [bugfix](https://github.com/sstadick/crabz/pull/24) Remove benchmark data from distribution from @Shnatsel 29 | - [bugfix](https://github.com/sstadick/crabz/issues/25) 30 | 31 | # v0.7.6 32 | 33 | - Update deps, add dependabot 34 | 35 | # v0.7.5 36 | 37 | - Update deps, update thirdparty file, use fixed version of gzp 38 | 39 | # v0.7.4 40 | 41 | - Update deps, specifically gzp to take advantage of updated flate2 42 | 43 | # v0.7.3 44 | 45 | - [bugfix](https://github.com/sstadick/crabz/issues/14) Fixes feature flags to allow for compiling rust-only backend 46 | 47 | # v0.7.2 48 | 49 | - Includes updated THRIDPARYT.yml 50 | 51 | # v0.7.1 52 | 53 | - Fix [issue 11](https://github.com/sstadick/crabz/issues/11) 54 | - Adds "in-place" mode to decompress / compress by stripping/adding a suffix like other compression tools 55 | - Switch to mimalloc which showed large improvement with more threads 56 | - Add "pin-at" api to specifically pin the compression / decompression threads to cores starting at a specific core 57 | - Added benchmarks to README 58 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "adler" 7 | version = "1.0.2" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" 10 | 11 | [[package]] 12 | name = "aho-corasick" 13 | version = "1.1.2" 14 | source = "registry+https://github.com/rust-lang/crates.io-index" 15 | checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" 16 | dependencies = [ 17 | "memchr", 18 | ] 19 | 20 | [[package]] 21 | name = "ansi_term" 22 | version = "0.12.1" 23 | source = "registry+https://github.com/rust-lang/crates.io-index" 24 | checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" 25 | dependencies = [ 26 | "winapi", 27 | ] 28 | 29 | [[package]] 30 | name = "anyhow" 31 | version = "1.0.81" 32 | source = "registry+https://github.com/rust-lang/crates.io-index" 33 | checksum = "0952808a6c2afd1aa8947271f3a60f1a6763c7b912d210184c5149b5cf147247" 34 | 35 | [[package]] 36 | name = "atty" 37 | version = "0.2.14" 38 | source = "registry+https://github.com/rust-lang/crates.io-index" 39 | checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" 40 | dependencies = [ 41 | "hermit-abi 0.1.19", 42 | "libc", 43 | "winapi", 44 | ] 45 | 46 | [[package]] 47 | name = "autocfg" 48 | version = "1.1.0" 49 | source = "registry+https://github.com/rust-lang/crates.io-index" 50 | checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" 51 | 52 | [[package]] 53 | name = "bitflags" 54 | version = "1.3.2" 55 | source = "registry+https://github.com/rust-lang/crates.io-index" 56 | checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" 57 | 58 | [[package]] 59 | name = "bumpalo" 60 | version = "3.15.4" 61 | source = "registry+https://github.com/rust-lang/crates.io-index" 62 | checksum = "7ff69b9dd49fd426c69a0db9fc04dd934cdb6645ff000864d98f7e2af8830eaa" 63 | 64 | [[package]] 65 | name = "byteorder" 66 | version = "1.5.0" 67 | source = "registry+https://github.com/rust-lang/crates.io-index" 68 | checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" 69 | 70 | [[package]] 71 | name = "bytes" 72 | version = "1.5.0" 73 | source = "registry+https://github.com/rust-lang/crates.io-index" 74 | checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" 75 | 76 | [[package]] 77 | name = "cc" 78 | version = "1.0.90" 79 | source = "registry+https://github.com/rust-lang/crates.io-index" 80 | checksum = "8cd6604a82acf3039f1144f54b8eb34e91ffba622051189e71b781822d5ee1f5" 81 | 82 | [[package]] 83 | name = "cfg-if" 84 | version = "1.0.0" 85 | source = "registry+https://github.com/rust-lang/crates.io-index" 86 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 87 | 88 | [[package]] 89 | name = "clap" 90 | version = "2.34.0" 91 | source = "registry+https://github.com/rust-lang/crates.io-index" 92 | checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" 93 | dependencies = [ 94 | "ansi_term", 95 | "atty", 96 | "bitflags", 97 | "strsim", 98 | "textwrap", 99 | "unicode-width", 100 | "vec_map", 101 | ] 102 | 103 | [[package]] 104 | name = "cmake" 105 | version = "0.1.50" 106 | source = "registry+https://github.com/rust-lang/crates.io-index" 107 | checksum = "a31c789563b815f77f4250caee12365734369f942439b7defd71e18a48197130" 108 | dependencies = [ 109 | "cc", 110 | ] 111 | 112 | [[package]] 113 | name = "core_affinity" 114 | version = "0.8.1" 115 | source = "registry+https://github.com/rust-lang/crates.io-index" 116 | checksum = "622892f5635ce1fc38c8f16dfc938553ed64af482edb5e150bf4caedbfcb2304" 117 | dependencies = [ 118 | "libc", 119 | "num_cpus", 120 | "winapi", 121 | ] 122 | 123 | [[package]] 124 | name = "crabz" 125 | version = "0.10.0" 126 | dependencies = [ 127 | "anyhow", 128 | "env_logger", 129 | "flate2", 130 | "git-version", 131 | "gzp", 132 | "lazy_static", 133 | "log", 134 | "mimalloc", 135 | "num_cpus", 136 | "snap", 137 | "structopt", 138 | "strum", 139 | ] 140 | 141 | [[package]] 142 | name = "crc32fast" 143 | version = "1.4.0" 144 | source = "registry+https://github.com/rust-lang/crates.io-index" 145 | checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa" 146 | dependencies = [ 147 | "cfg-if", 148 | ] 149 | 150 | [[package]] 151 | name = "env_logger" 152 | version = "0.10.2" 153 | source = "registry+https://github.com/rust-lang/crates.io-index" 154 | checksum = "4cd405aab171cb85d6735e5c8d9db038c17d3ca007a4d2c25f337935c3d90580" 155 | dependencies = [ 156 | "humantime", 157 | "is-terminal", 158 | "log", 159 | "regex", 160 | "termcolor", 161 | ] 162 | 163 | [[package]] 164 | name = "flate2" 165 | version = "1.0.28" 166 | source = "registry+https://github.com/rust-lang/crates.io-index" 167 | checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e" 168 | dependencies = [ 169 | "crc32fast", 170 | "libz-sys", 171 | "miniz_oxide", 172 | ] 173 | 174 | [[package]] 175 | name = "flume" 176 | version = "0.10.14" 177 | source = "registry+https://github.com/rust-lang/crates.io-index" 178 | checksum = "1657b4441c3403d9f7b3409e47575237dac27b1b5726df654a6ecbf92f0f7577" 179 | dependencies = [ 180 | "futures-core", 181 | "futures-sink", 182 | "nanorand", 183 | "pin-project", 184 | "spin", 185 | ] 186 | 187 | [[package]] 188 | name = "futures-core" 189 | version = "0.3.30" 190 | source = "registry+https://github.com/rust-lang/crates.io-index" 191 | checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" 192 | 193 | [[package]] 194 | name = "futures-sink" 195 | version = "0.3.30" 196 | source = "registry+https://github.com/rust-lang/crates.io-index" 197 | checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" 198 | 199 | [[package]] 200 | name = "getrandom" 201 | version = "0.2.12" 202 | source = "registry+https://github.com/rust-lang/crates.io-index" 203 | checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5" 204 | dependencies = [ 205 | "cfg-if", 206 | "js-sys", 207 | "libc", 208 | "wasi", 209 | "wasm-bindgen", 210 | ] 211 | 212 | [[package]] 213 | name = "git-version" 214 | version = "0.3.9" 215 | source = "registry+https://github.com/rust-lang/crates.io-index" 216 | checksum = "1ad568aa3db0fcbc81f2f116137f263d7304f512a1209b35b85150d3ef88ad19" 217 | dependencies = [ 218 | "git-version-macro", 219 | ] 220 | 221 | [[package]] 222 | name = "git-version-macro" 223 | version = "0.3.9" 224 | source = "registry+https://github.com/rust-lang/crates.io-index" 225 | checksum = "53010ccb100b96a67bc32c0175f0ed1426b31b655d562898e57325f81c023ac0" 226 | dependencies = [ 227 | "proc-macro2", 228 | "quote", 229 | "syn 2.0.53", 230 | ] 231 | 232 | [[package]] 233 | name = "gzp" 234 | version = "0.11.3" 235 | source = "registry+https://github.com/rust-lang/crates.io-index" 236 | checksum = "e7c65d1899521a11810501b50b898464d133e1afc96703cff57726964cfa7baf" 237 | dependencies = [ 238 | "byteorder", 239 | "bytes", 240 | "core_affinity", 241 | "flate2", 242 | "flume", 243 | "libdeflater", 244 | "libz-sys", 245 | "num_cpus", 246 | "snap", 247 | "thiserror", 248 | ] 249 | 250 | [[package]] 251 | name = "heck" 252 | version = "0.3.3" 253 | source = "registry+https://github.com/rust-lang/crates.io-index" 254 | checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" 255 | dependencies = [ 256 | "unicode-segmentation", 257 | ] 258 | 259 | [[package]] 260 | name = "heck" 261 | version = "0.4.1" 262 | source = "registry+https://github.com/rust-lang/crates.io-index" 263 | checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" 264 | 265 | [[package]] 266 | name = "hermit-abi" 267 | version = "0.1.19" 268 | source = "registry+https://github.com/rust-lang/crates.io-index" 269 | checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" 270 | dependencies = [ 271 | "libc", 272 | ] 273 | 274 | [[package]] 275 | name = "hermit-abi" 276 | version = "0.3.9" 277 | source = "registry+https://github.com/rust-lang/crates.io-index" 278 | checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" 279 | 280 | [[package]] 281 | name = "humantime" 282 | version = "2.1.0" 283 | source = "registry+https://github.com/rust-lang/crates.io-index" 284 | checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" 285 | 286 | [[package]] 287 | name = "is-terminal" 288 | version = "0.4.12" 289 | source = "registry+https://github.com/rust-lang/crates.io-index" 290 | checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b" 291 | dependencies = [ 292 | "hermit-abi 0.3.9", 293 | "libc", 294 | "windows-sys", 295 | ] 296 | 297 | [[package]] 298 | name = "js-sys" 299 | version = "0.3.69" 300 | source = "registry+https://github.com/rust-lang/crates.io-index" 301 | checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" 302 | dependencies = [ 303 | "wasm-bindgen", 304 | ] 305 | 306 | [[package]] 307 | name = "lazy_static" 308 | version = "1.4.0" 309 | source = "registry+https://github.com/rust-lang/crates.io-index" 310 | checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" 311 | 312 | [[package]] 313 | name = "libc" 314 | version = "0.2.153" 315 | source = "registry+https://github.com/rust-lang/crates.io-index" 316 | checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" 317 | 318 | [[package]] 319 | name = "libdeflate-sys" 320 | version = "0.12.0" 321 | source = "registry+https://github.com/rust-lang/crates.io-index" 322 | checksum = "e1f7b0817f85e2ba608892f30fbf4c9d03f3ebf9db0c952d1b7c8f7387b54785" 323 | dependencies = [ 324 | "cc", 325 | ] 326 | 327 | [[package]] 328 | name = "libdeflater" 329 | version = "0.12.0" 330 | source = "registry+https://github.com/rust-lang/crates.io-index" 331 | checksum = "671e63282f642c7bcc7d292b212d5a4739fef02a77fe98429a75d308f96e7931" 332 | dependencies = [ 333 | "libdeflate-sys", 334 | ] 335 | 336 | [[package]] 337 | name = "libmimalloc-sys" 338 | version = "0.1.35" 339 | source = "registry+https://github.com/rust-lang/crates.io-index" 340 | checksum = "3979b5c37ece694f1f5e51e7ecc871fdb0f517ed04ee45f88d15d6d553cb9664" 341 | dependencies = [ 342 | "cc", 343 | "libc", 344 | ] 345 | 346 | [[package]] 347 | name = "libz-sys" 348 | version = "1.1.15" 349 | source = "registry+https://github.com/rust-lang/crates.io-index" 350 | checksum = "037731f5d3aaa87a5675e895b63ddff1a87624bc29f77004ea829809654e48f6" 351 | dependencies = [ 352 | "cc", 353 | "cmake", 354 | "libc", 355 | "pkg-config", 356 | "vcpkg", 357 | ] 358 | 359 | [[package]] 360 | name = "lock_api" 361 | version = "0.4.11" 362 | source = "registry+https://github.com/rust-lang/crates.io-index" 363 | checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45" 364 | dependencies = [ 365 | "autocfg", 366 | "scopeguard", 367 | ] 368 | 369 | [[package]] 370 | name = "log" 371 | version = "0.4.21" 372 | source = "registry+https://github.com/rust-lang/crates.io-index" 373 | checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" 374 | 375 | [[package]] 376 | name = "memchr" 377 | version = "2.7.1" 378 | source = "registry+https://github.com/rust-lang/crates.io-index" 379 | checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" 380 | 381 | [[package]] 382 | name = "mimalloc" 383 | version = "0.1.39" 384 | source = "registry+https://github.com/rust-lang/crates.io-index" 385 | checksum = "fa01922b5ea280a911e323e4d2fd24b7fe5cc4042e0d2cda3c40775cdc4bdc9c" 386 | dependencies = [ 387 | "libmimalloc-sys", 388 | ] 389 | 390 | [[package]] 391 | name = "miniz_oxide" 392 | version = "0.7.2" 393 | source = "registry+https://github.com/rust-lang/crates.io-index" 394 | checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7" 395 | dependencies = [ 396 | "adler", 397 | ] 398 | 399 | [[package]] 400 | name = "nanorand" 401 | version = "0.7.0" 402 | source = "registry+https://github.com/rust-lang/crates.io-index" 403 | checksum = "6a51313c5820b0b02bd422f4b44776fbf47961755c74ce64afc73bfad10226c3" 404 | dependencies = [ 405 | "getrandom", 406 | ] 407 | 408 | [[package]] 409 | name = "num_cpus" 410 | version = "1.16.0" 411 | source = "registry+https://github.com/rust-lang/crates.io-index" 412 | checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" 413 | dependencies = [ 414 | "hermit-abi 0.3.9", 415 | "libc", 416 | ] 417 | 418 | [[package]] 419 | name = "once_cell" 420 | version = "1.19.0" 421 | source = "registry+https://github.com/rust-lang/crates.io-index" 422 | checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" 423 | 424 | [[package]] 425 | name = "pin-project" 426 | version = "1.1.5" 427 | source = "registry+https://github.com/rust-lang/crates.io-index" 428 | checksum = "b6bf43b791c5b9e34c3d182969b4abb522f9343702850a2e57f460d00d09b4b3" 429 | dependencies = [ 430 | "pin-project-internal", 431 | ] 432 | 433 | [[package]] 434 | name = "pin-project-internal" 435 | version = "1.1.5" 436 | source = "registry+https://github.com/rust-lang/crates.io-index" 437 | checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" 438 | dependencies = [ 439 | "proc-macro2", 440 | "quote", 441 | "syn 2.0.53", 442 | ] 443 | 444 | [[package]] 445 | name = "pkg-config" 446 | version = "0.3.30" 447 | source = "registry+https://github.com/rust-lang/crates.io-index" 448 | checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" 449 | 450 | [[package]] 451 | name = "proc-macro-error" 452 | version = "1.0.4" 453 | source = "registry+https://github.com/rust-lang/crates.io-index" 454 | checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" 455 | dependencies = [ 456 | "proc-macro-error-attr", 457 | "proc-macro2", 458 | "quote", 459 | "syn 1.0.109", 460 | "version_check", 461 | ] 462 | 463 | [[package]] 464 | name = "proc-macro-error-attr" 465 | version = "1.0.4" 466 | source = "registry+https://github.com/rust-lang/crates.io-index" 467 | checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" 468 | dependencies = [ 469 | "proc-macro2", 470 | "quote", 471 | "version_check", 472 | ] 473 | 474 | [[package]] 475 | name = "proc-macro2" 476 | version = "1.0.79" 477 | source = "registry+https://github.com/rust-lang/crates.io-index" 478 | checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e" 479 | dependencies = [ 480 | "unicode-ident", 481 | ] 482 | 483 | [[package]] 484 | name = "quote" 485 | version = "1.0.35" 486 | source = "registry+https://github.com/rust-lang/crates.io-index" 487 | checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" 488 | dependencies = [ 489 | "proc-macro2", 490 | ] 491 | 492 | [[package]] 493 | name = "regex" 494 | version = "1.10.3" 495 | source = "registry+https://github.com/rust-lang/crates.io-index" 496 | checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15" 497 | dependencies = [ 498 | "aho-corasick", 499 | "memchr", 500 | "regex-automata", 501 | "regex-syntax", 502 | ] 503 | 504 | [[package]] 505 | name = "regex-automata" 506 | version = "0.4.6" 507 | source = "registry+https://github.com/rust-lang/crates.io-index" 508 | checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" 509 | dependencies = [ 510 | "aho-corasick", 511 | "memchr", 512 | "regex-syntax", 513 | ] 514 | 515 | [[package]] 516 | name = "regex-syntax" 517 | version = "0.8.2" 518 | source = "registry+https://github.com/rust-lang/crates.io-index" 519 | checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" 520 | 521 | [[package]] 522 | name = "rustversion" 523 | version = "1.0.14" 524 | source = "registry+https://github.com/rust-lang/crates.io-index" 525 | checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" 526 | 527 | [[package]] 528 | name = "scopeguard" 529 | version = "1.2.0" 530 | source = "registry+https://github.com/rust-lang/crates.io-index" 531 | checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" 532 | 533 | [[package]] 534 | name = "snap" 535 | version = "1.1.1" 536 | source = "registry+https://github.com/rust-lang/crates.io-index" 537 | checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" 538 | 539 | [[package]] 540 | name = "spin" 541 | version = "0.9.8" 542 | source = "registry+https://github.com/rust-lang/crates.io-index" 543 | checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" 544 | dependencies = [ 545 | "lock_api", 546 | ] 547 | 548 | [[package]] 549 | name = "strsim" 550 | version = "0.8.0" 551 | source = "registry+https://github.com/rust-lang/crates.io-index" 552 | checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" 553 | 554 | [[package]] 555 | name = "structopt" 556 | version = "0.3.26" 557 | source = "registry+https://github.com/rust-lang/crates.io-index" 558 | checksum = "0c6b5c64445ba8094a6ab0c3cd2ad323e07171012d9c98b0b15651daf1787a10" 559 | dependencies = [ 560 | "clap", 561 | "lazy_static", 562 | "structopt-derive", 563 | ] 564 | 565 | [[package]] 566 | name = "structopt-derive" 567 | version = "0.4.18" 568 | source = "registry+https://github.com/rust-lang/crates.io-index" 569 | checksum = "dcb5ae327f9cc13b68763b5749770cb9e048a99bd9dfdfa58d0cf05d5f64afe0" 570 | dependencies = [ 571 | "heck 0.3.3", 572 | "proc-macro-error", 573 | "proc-macro2", 574 | "quote", 575 | "syn 1.0.109", 576 | ] 577 | 578 | [[package]] 579 | name = "strum" 580 | version = "0.24.1" 581 | source = "registry+https://github.com/rust-lang/crates.io-index" 582 | checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f" 583 | dependencies = [ 584 | "strum_macros", 585 | ] 586 | 587 | [[package]] 588 | name = "strum_macros" 589 | version = "0.24.3" 590 | source = "registry+https://github.com/rust-lang/crates.io-index" 591 | checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59" 592 | dependencies = [ 593 | "heck 0.4.1", 594 | "proc-macro2", 595 | "quote", 596 | "rustversion", 597 | "syn 1.0.109", 598 | ] 599 | 600 | [[package]] 601 | name = "syn" 602 | version = "1.0.109" 603 | source = "registry+https://github.com/rust-lang/crates.io-index" 604 | checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" 605 | dependencies = [ 606 | "proc-macro2", 607 | "quote", 608 | "unicode-ident", 609 | ] 610 | 611 | [[package]] 612 | name = "syn" 613 | version = "2.0.53" 614 | source = "registry+https://github.com/rust-lang/crates.io-index" 615 | checksum = "7383cd0e49fff4b6b90ca5670bfd3e9d6a733b3f90c686605aa7eec8c4996032" 616 | dependencies = [ 617 | "proc-macro2", 618 | "quote", 619 | "unicode-ident", 620 | ] 621 | 622 | [[package]] 623 | name = "termcolor" 624 | version = "1.4.1" 625 | source = "registry+https://github.com/rust-lang/crates.io-index" 626 | checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" 627 | dependencies = [ 628 | "winapi-util", 629 | ] 630 | 631 | [[package]] 632 | name = "textwrap" 633 | version = "0.11.0" 634 | source = "registry+https://github.com/rust-lang/crates.io-index" 635 | checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" 636 | dependencies = [ 637 | "unicode-width", 638 | ] 639 | 640 | [[package]] 641 | name = "thiserror" 642 | version = "1.0.58" 643 | source = "registry+https://github.com/rust-lang/crates.io-index" 644 | checksum = "03468839009160513471e86a034bb2c5c0e4baae3b43f79ffc55c4a5427b3297" 645 | dependencies = [ 646 | "thiserror-impl", 647 | ] 648 | 649 | [[package]] 650 | name = "thiserror-impl" 651 | version = "1.0.58" 652 | source = "registry+https://github.com/rust-lang/crates.io-index" 653 | checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7" 654 | dependencies = [ 655 | "proc-macro2", 656 | "quote", 657 | "syn 2.0.53", 658 | ] 659 | 660 | [[package]] 661 | name = "unicode-ident" 662 | version = "1.0.12" 663 | source = "registry+https://github.com/rust-lang/crates.io-index" 664 | checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" 665 | 666 | [[package]] 667 | name = "unicode-segmentation" 668 | version = "1.11.0" 669 | source = "registry+https://github.com/rust-lang/crates.io-index" 670 | checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202" 671 | 672 | [[package]] 673 | name = "unicode-width" 674 | version = "0.1.11" 675 | source = "registry+https://github.com/rust-lang/crates.io-index" 676 | checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" 677 | 678 | [[package]] 679 | name = "vcpkg" 680 | version = "0.2.15" 681 | source = "registry+https://github.com/rust-lang/crates.io-index" 682 | checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" 683 | 684 | [[package]] 685 | name = "vec_map" 686 | version = "0.8.2" 687 | source = "registry+https://github.com/rust-lang/crates.io-index" 688 | checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" 689 | 690 | [[package]] 691 | name = "version_check" 692 | version = "0.9.4" 693 | source = "registry+https://github.com/rust-lang/crates.io-index" 694 | checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" 695 | 696 | [[package]] 697 | name = "wasi" 698 | version = "0.11.0+wasi-snapshot-preview1" 699 | source = "registry+https://github.com/rust-lang/crates.io-index" 700 | checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" 701 | 702 | [[package]] 703 | name = "wasm-bindgen" 704 | version = "0.2.92" 705 | source = "registry+https://github.com/rust-lang/crates.io-index" 706 | checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" 707 | dependencies = [ 708 | "cfg-if", 709 | "wasm-bindgen-macro", 710 | ] 711 | 712 | [[package]] 713 | name = "wasm-bindgen-backend" 714 | version = "0.2.92" 715 | source = "registry+https://github.com/rust-lang/crates.io-index" 716 | checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" 717 | dependencies = [ 718 | "bumpalo", 719 | "log", 720 | "once_cell", 721 | "proc-macro2", 722 | "quote", 723 | "syn 2.0.53", 724 | "wasm-bindgen-shared", 725 | ] 726 | 727 | [[package]] 728 | name = "wasm-bindgen-macro" 729 | version = "0.2.92" 730 | source = "registry+https://github.com/rust-lang/crates.io-index" 731 | checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" 732 | dependencies = [ 733 | "quote", 734 | "wasm-bindgen-macro-support", 735 | ] 736 | 737 | [[package]] 738 | name = "wasm-bindgen-macro-support" 739 | version = "0.2.92" 740 | source = "registry+https://github.com/rust-lang/crates.io-index" 741 | checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" 742 | dependencies = [ 743 | "proc-macro2", 744 | "quote", 745 | "syn 2.0.53", 746 | "wasm-bindgen-backend", 747 | "wasm-bindgen-shared", 748 | ] 749 | 750 | [[package]] 751 | name = "wasm-bindgen-shared" 752 | version = "0.2.92" 753 | source = "registry+https://github.com/rust-lang/crates.io-index" 754 | checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" 755 | 756 | [[package]] 757 | name = "winapi" 758 | version = "0.3.9" 759 | source = "registry+https://github.com/rust-lang/crates.io-index" 760 | checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" 761 | dependencies = [ 762 | "winapi-i686-pc-windows-gnu", 763 | "winapi-x86_64-pc-windows-gnu", 764 | ] 765 | 766 | [[package]] 767 | name = "winapi-i686-pc-windows-gnu" 768 | version = "0.4.0" 769 | source = "registry+https://github.com/rust-lang/crates.io-index" 770 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 771 | 772 | [[package]] 773 | name = "winapi-util" 774 | version = "0.1.6" 775 | source = "registry+https://github.com/rust-lang/crates.io-index" 776 | checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" 777 | dependencies = [ 778 | "winapi", 779 | ] 780 | 781 | [[package]] 782 | name = "winapi-x86_64-pc-windows-gnu" 783 | version = "0.4.0" 784 | source = "registry+https://github.com/rust-lang/crates.io-index" 785 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 786 | 787 | [[package]] 788 | name = "windows-sys" 789 | version = "0.52.0" 790 | source = "registry+https://github.com/rust-lang/crates.io-index" 791 | checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" 792 | dependencies = [ 793 | "windows-targets", 794 | ] 795 | 796 | [[package]] 797 | name = "windows-targets" 798 | version = "0.52.4" 799 | source = "registry+https://github.com/rust-lang/crates.io-index" 800 | checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b" 801 | dependencies = [ 802 | "windows_aarch64_gnullvm", 803 | "windows_aarch64_msvc", 804 | "windows_i686_gnu", 805 | "windows_i686_msvc", 806 | "windows_x86_64_gnu", 807 | "windows_x86_64_gnullvm", 808 | "windows_x86_64_msvc", 809 | ] 810 | 811 | [[package]] 812 | name = "windows_aarch64_gnullvm" 813 | version = "0.52.4" 814 | source = "registry+https://github.com/rust-lang/crates.io-index" 815 | checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9" 816 | 817 | [[package]] 818 | name = "windows_aarch64_msvc" 819 | version = "0.52.4" 820 | source = "registry+https://github.com/rust-lang/crates.io-index" 821 | checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675" 822 | 823 | [[package]] 824 | name = "windows_i686_gnu" 825 | version = "0.52.4" 826 | source = "registry+https://github.com/rust-lang/crates.io-index" 827 | checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3" 828 | 829 | [[package]] 830 | name = "windows_i686_msvc" 831 | version = "0.52.4" 832 | source = "registry+https://github.com/rust-lang/crates.io-index" 833 | checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02" 834 | 835 | [[package]] 836 | name = "windows_x86_64_gnu" 837 | version = "0.52.4" 838 | source = "registry+https://github.com/rust-lang/crates.io-index" 839 | checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03" 840 | 841 | [[package]] 842 | name = "windows_x86_64_gnullvm" 843 | version = "0.52.4" 844 | source = "registry+https://github.com/rust-lang/crates.io-index" 845 | checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177" 846 | 847 | [[package]] 848 | name = "windows_x86_64_msvc" 849 | version = "0.52.4" 850 | source = "registry+https://github.com/rust-lang/crates.io-index" 851 | checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8" 852 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "crabz" 3 | authors = ["Seth Stadick"] 4 | version = "0.10.0" 5 | edition = "2018" 6 | license = "Unlicense/MIT" 7 | readme = "README.md" 8 | documentation = "https://docs.rs/crabz" 9 | homepage = "https://github.com/sstadick/crabz" 10 | repository = "https://github.com/sstadick/crabz" 11 | categories = ["compression", "concurrency"] 12 | keywords = ["compression", "parallel", "pigz"] 13 | description = "Parallel Compression" 14 | exclude = ["bench-data/"] 15 | 16 | [[bin]] 17 | name = "crabz" 18 | path = "src/main.rs" 19 | 20 | [features] 21 | default = ["deflate_zlib_ng", "snap_default", "libdeflate"] 22 | 23 | deflate_zlib_ng = ["gzp/deflate_zlib_ng", "flate2/zlib-ng-compat", "any_zlib"] 24 | deflate_zlib = ["gzp/deflate_zlib", "flate2/zlib", "any_zlib"] 25 | deflate_rust = ["gzp/deflate_rust", "flate2/rust_backend"] 26 | snap_default = ["gzp/snappy_default", "snap", "snappy"] 27 | libdeflate = ["gzp/libdeflate"] 28 | 29 | snappy = [] # internal feature flag 30 | any_zlib = [] # internal feature flag 31 | 32 | 33 | [dependencies] 34 | anyhow = "1.0.68" 35 | env_logger = { version = "0.10.0", default-features = false, features = ["default"] } 36 | flate2 = { version = "1.0.25", default-features = false } 37 | gzp = { version = "0.11.3", default-features = false } 38 | lazy_static = "1.4.0" 39 | log = "0.4.17" 40 | num_cpus = "1.15.0" 41 | structopt = "0.3.26" 42 | strum = { version = "0.24.1", features = ["derive"] } 43 | snap = { version = "1.1.0", optional = true } 44 | mimalloc = { version = "0.1.34", default-features = false } 45 | git-version = "0.3.5" 46 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2021 Seth Stadick 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 🦀 crabz 2 | 3 | 4 |

5 | Build Status 6 | license 7 | Version info
8 | Like pigz, but rust. 9 |

10 | 11 | A cross platform, fast, compression and decompression tool. 12 | 13 | ## Synopsis 14 | 15 | This is currently a proof of concept CLI tool using the [`gzp`](https://github.com/sstadick/gzp/) crate. 16 | 17 | Supported formats: 18 | 19 | - Gzip 20 | - Zlib 21 | - Mgzip 22 | - BGZF 23 | - Raw Deflate 24 | - Snap 25 | 26 | ## Install 27 | 28 | * Homebrew / Linuxbrew 29 | 30 | ``` 31 | brew tap sstadick/crabz 32 | brew install crabz 33 | ``` 34 | 35 | * Debian (Ubuntu) 36 | 37 | ``` 38 | curl -LO https://github.com/sstadick/crabz/releases/download//crabz-linux-amd64.deb 39 | sudo dpkg -i crabz-linux-amd64.deb 40 | ``` 41 | 42 | * Cargo 43 | 44 | ``` 45 | cargo install crabz 46 | ``` 47 | 48 | * Conda 49 | 50 | ``` 51 | conda install -c conda-forge crabz 52 | ``` 53 | 54 | 55 | ## Usage 56 | 57 | ``` 58 | ❯ crabz -h 59 | Compress and decompress files 60 | 61 | USAGE: 62 | crabz [FLAGS] [OPTIONS] [FILE] 63 | 64 | FLAGS: 65 | -d, --decompress 66 | Flag to switch to decompressing inputs. Note: this flag may change in future releases 67 | 68 | -h, --help 69 | Prints help information 70 | 71 | -I, --in-place 72 | Perform the compression / decompression in place. 73 | 74 | **NOTE** this will remove the input file at completion. 75 | -V, --version 76 | Prints version information 77 | 78 | 79 | OPTIONS: 80 | -l, --compression-level 81 | Compression level [default: 6] 82 | 83 | -p, --compression-threads 84 | Number of compression threads to use, or if decompressing a format that allow for multi-threaded 85 | decompression, the number to use. Note that > 4 threads for decompression doesn't seem to help [default: 86 | 32] 87 | -f, --format 88 | The format to use [default: gzip] [possible values: gzip, bgzf, mgzip, 89 | zlib, deflate, snap] 90 | -o, --output 91 | Output path to write to, empty or "-" to write to stdout 92 | 93 | -P, --pin-at 94 | Specify the physical core to pin threads at. 95 | 96 | This can provide a significant performance improvement, but has the downside of possibly conflicting with 97 | other pinned cores. If you are running multiple instances of `crabz` at once you can manually space out the 98 | pinned cores. 99 | 100 | # Example 101 | - Instance 1 has `-p 4 -P 0` set indicating that it will use 4 cores pinned at 0, 1, 2, 3 102 | - Instance 2 has `-p 4 -P 4` set indicating that it will use 4 cores pinned at 4, 5, 6, 7 103 | 104 | ARGS: 105 | 106 | Input file to read from, empty or "-" to read from stdin 107 | ``` 108 | 109 | ## Benchmarks 110 | 111 | These benchmarks use the data in `bench-data` catted together 100 times. Run with `bash ./benchmark.sh data.txt`. 112 | 113 | Benchmark system specs: Ubuntu 20 AMD Ryzen 9 3950X 16-Core Processor w/ 64 GB DDR4 memory and 1TB NVMe Drive 114 | 115 | `pigz` v2.4 installed via apt on Ubuntu 116 | 117 | Takeaways: 118 | 119 | - `crabz` with `zlib` backend is pretty much identical to `pigz` 120 | - `crabz` with `zlib-ng` backend is roughly 30-50% faster than `pigz` 121 | - `crabz` with `rust` backend is roughly 5-10% faster than `pigz` 122 | 123 | It is already known that `zlib-ng` is faster than `zlib`, so none of this is groundbreaking. However, I think `crabz` gets an 124 | an edge due to the following: 125 | 126 | - `crabz` with `deflate_rust` backend is using all Rust only code, which is in theory more secure / safe. 127 | - `crabz` with `zlib-ng` is easier to install than `pigz` with a `zlib-ng` backend 128 | - `crabz` supports more formats than `pigz` 129 | - `crabz` is cross platform and can run on windows 130 | 131 | With regards to block formats like Mgzip and BGZF, `crabz` is using `libdeflater` by default which excels at compressing and 132 | decompression known-sized blocks. This makes block compression formats very fast at a small loss to the compression ratio. 133 | 134 | See end of benchmarks section for comparison against `bgzip`. 135 | 136 | As `crabz` is just a wrapper for the `gzp` library, the most exciting thing about these benchmarks is that `gzp` is on par with 137 | best in class CLI tools for multi-threaded compression and decompression as a library. 138 | 139 | 140 | ### Flate2 zlib-ng backend 141 | 142 | #### Compression 143 | 144 | 145 | | Command | Mean [s] | Min [s] | Max [s] | Relative | 146 | | :------------------------------ | -------------: | ------: | ------: | ------------: | 147 | | `crabz -p 1 -c 3 < ./data.txt` | 6.450 ± 0.069 | 6.328 | 6.540 | 16.86 ± 0.24 | 148 | | `pigz -p 1 -3 < ./data.txt` | 11.404 ± 0.152 | 11.186 | 11.717 | 29.81 ± 0.49 | 149 | | `crabz -p 2 -c 3 < ./data.txt` | 3.437 ± 0.017 | 3.418 | 3.461 | 8.98 ± 0.10 | 150 | | `pigz -p 2 -3 < ./data.txt` | 5.868 ± 0.031 | 5.826 | 5.927 | 15.34 ± 0.17 | 151 | | `crabz -p 4 -c 3 < ./data.txt` | 1.741 ± 0.008 | 1.729 | 1.752 | 4.55 ± 0.05 | 152 | | `pigz -p 4 -3 < ./data.txt` | 2.952 ± 0.008 | 2.939 | 2.960 | 7.72 ± 0.08 | 153 | | `crabz -p 8 -c 3 < ./data.txt` | 0.889 ± 0.004 | 0.882 | 0.895 | 2.32 ± 0.02 | 154 | | `pigz -p 8 -3 < ./data.txt` | 1.505 ± 0.008 | 1.493 | 1.520 | 3.93 ± 0.04 | 155 | | `crabz -p 16 -c 3 < ./data.txt` | 0.485 ± 0.014 | 0.457 | 0.502 | 1.27 ± 0.04 | 156 | | `pigz -p 16 -3 < ./data.txt` | 0.775 ± 0.011 | 0.764 | 0.797 | 2.02 ± 0.04 | 157 | | `crabz -p 32 -c 3 < ./data.txt` | 0.383 ± 0.004 | 0.375 | 0.388 | 1.00 | 158 | | `pigz -p 32 -3 < ./data.txt` | 0.699 ± 0.029 | 0.668 | 0.770 | 1.83 ± 0.08 | 159 | | `crabz -p 1 -c 6 < ./data.txt` | 10.367 ± 0.211 | 10.106 | 10.642 | 27.10 ± 0.61 | 160 | | `pigz -p 1 -6 < ./data.txt` | 26.734 ± 0.345 | 26.234 | 27.135 | 69.89 ± 1.12 | 161 | | `crabz -p 2 -c 6 < ./data.txt` | 5.366 ± 0.036 | 5.299 | 5.429 | 14.03 ± 0.16 | 162 | | `pigz -p 2 -6 < ./data.txt` | 13.589 ± 0.083 | 13.428 | 13.679 | 35.52 ± 0.40 | 163 | | `crabz -p 4 -c 6 < ./data.txt` | 2.719 ± 0.021 | 2.694 | 2.757 | 7.11 ± 0.09 | 164 | | `pigz -p 4 -6 < ./data.txt` | 6.887 ± 0.013 | 6.871 | 6.916 | 18.00 ± 0.17 | 165 | | `crabz -p 8 -c 6 < ./data.txt` | 1.381 ± 0.007 | 1.372 | 1.397 | 3.61 ± 0.04 | 166 | | `pigz -p 8 -6 < ./data.txt` | 3.479 ± 0.008 | 3.463 | 3.488 | 9.09 ± 0.09 | 167 | | `crabz -p 16 -c 6 < ./data.txt` | 0.745 ± 0.022 | 0.727 | 0.804 | 1.95 ± 0.06 | 168 | | `pigz -p 16 -6 < ./data.txt` | 1.818 ± 0.036 | 1.765 | 1.874 | 4.75 ± 0.10 | 169 | | `crabz -p 32 -c 6 < ./data.txt` | 0.549 ± 0.006 | 0.538 | 0.557 | 1.44 ± 0.02 | 170 | | `pigz -p 32 -6 < ./data.txt` | 1.187 ± 0.011 | 1.172 | 1.210 | 3.10 ± 0.04 | 171 | | `crabz -p 1 -c 9 < ./data.txt` | 30.114 ± 0.196 | 29.842 | 30.420 | 78.72 ± 0.90 | 172 | | `pigz -p 1 -9 < ./data.txt` | 51.369 ± 0.164 | 51.246 | 51.698 | 134.29 ± 1.33 | 173 | | `crabz -p 2 -c 9 < ./data.txt` | 15.371 ± 0.070 | 15.202 | 15.443 | 40.18 ± 0.42 | 174 | | `pigz -p 2 -9 < ./data.txt` | 26.452 ± 0.085 | 26.253 | 26.576 | 69.15 ± 0.69 | 175 | | `crabz -p 4 -c 9 < ./data.txt` | 7.729 ± 0.022 | 7.699 | 7.768 | 20.20 ± 0.20 | 176 | | `pigz -p 4 -9 < ./data.txt` | 13.365 ± 0.047 | 13.271 | 13.449 | 34.94 ± 0.35 | 177 | | `crabz -p 8 -c 9 < ./data.txt` | 3.901 ± 0.006 | 3.889 | 3.910 | 10.20 ± 0.10 | 178 | | `pigz -p 8 -9 < ./data.txt` | 6.749 ± 0.014 | 6.737 | 6.781 | 17.64 ± 0.17 | 179 | | `crabz -p 16 -c 9 < ./data.txt` | 2.039 ± 0.024 | 1.997 | 2.071 | 5.33 ± 0.08 | 180 | | `pigz -p 16 -9 < ./data.txt` | 3.486 ± 0.054 | 3.426 | 3.574 | 9.11 ± 0.17 | 181 | | `crabz -p 32 -c 9 < ./data.txt` | 1.337 ± 0.072 | 1.220 | 1.411 | 3.49 ± 0.19 | 182 | | `pigz -p 32 -9 < ./data.txt` | 2.203 ± 0.114 | 2.082 | 2.378 | 5.76 ± 0.30 | 183 | 184 | 185 | #### Decompression 186 | 187 | | Command | Mean [s] | Min [s] | Max [s] | Relative | 188 | | :--------------------------- | ------------: | ------: | ------: | ----------: | 189 | | `crabz -d < ./data.3.txt.gz` | 1.422 ± 0.010 | 1.411 | 1.437 | 1.03 ± 0.02 | 190 | | `pigz -d < ./data.3.txt.gz` | 1.674 ± 0.031 | 1.621 | 1.705 | 1.21 ± 0.03 | 191 | | `crabz -d < ./data.6.txt.gz` | 1.403 ± 0.016 | 1.389 | 1.427 | 1.01 ± 0.02 | 192 | | `pigz -d < ./data.6.txt.gz` | 1.724 ± 0.026 | 1.697 | 1.766 | 1.24 ± 0.02 | 193 | | `crabz -d < ./data.9.txt.gz` | 1.385 ± 0.018 | 1.359 | 1.416 | 1.00 | 194 | | `pigz -d < ./data.9.txt.gz` | 1.745 ± 0.044 | 1.684 | 1.797 | 1.26 ± 0.04 | 195 | 196 | 197 | ### Flate2 zlib backend 198 | 199 | #### Compression 200 | 201 | | Command | Mean [s] | Min [s] | Max [s] | Relative | 202 | | :------------------------------ | -------------: | ------: | ------: | -----------: | 203 | | `crabz -p 1 -c 3 < ./data.txt` | 11.248 ± 0.247 | 11.085 | 11.532 | 20.23 ± 0.45 | 204 | | `pigz -p 1 -3 < ./data.txt` | 11.296 ± 0.171 | 11.104 | 11.434 | 20.32 ± 0.31 | 205 | | `crabz -p 2 -c 3 < ./data.txt` | 5.681 ± 0.040 | 5.645 | 5.725 | 10.22 ± 0.08 | 206 | | `pigz -p 2 -3 < ./data.txt` | 5.926 ± 0.015 | 5.916 | 5.944 | 10.66 ± 0.04 | 207 | | `crabz -p 4 -c 3 < ./data.txt` | 2.891 ± 0.007 | 2.883 | 2.895 | 5.20 ± 0.02 | 208 | | `pigz -p 4 -3 < ./data.txt` | 2.966 ± 0.013 | 2.955 | 2.980 | 5.34 ± 0.03 | 209 | | `crabz -p 8 -c 3 < ./data.txt` | 1.461 ± 0.003 | 1.459 | 1.465 | 2.63 ± 0.01 | 210 | | `pigz -p 8 -3 < ./data.txt` | 1.509 ± 0.004 | 1.505 | 1.512 | 2.71 ± 0.01 | 211 | | `crabz -p 16 -c 3 < ./data.txt` | 0.784 ± 0.010 | 0.775 | 0.795 | 1.41 ± 0.02 | 212 | | `pigz -p 16 -3 < ./data.txt` | 0.772 ± 0.010 | 0.765 | 0.784 | 1.39 ± 0.02 | 213 | | `crabz -p 32 -c 3 < ./data.txt` | 0.556 ± 0.002 | 0.554 | 0.557 | 1.00 | 214 | | `pigz -p 32 -3 < ./data.txt` | 0.743 ± 0.047 | 0.694 | 0.786 | 1.34 ± 0.08 | 215 | | `crabz -p 1 -c 6 < ./data.txt` | 26.366 ± 0.154 | 26.189 | 26.469 | 47.42 ± 0.31 | 216 | | `pigz -p 1 -6 < ./data.txt` | 26.688 ± 0.103 | 26.579 | 26.783 | 48.00 ± 0.23 | 217 | | `crabz -p 2 -c 6 < ./data.txt` | 13.443 ± 0.069 | 13.400 | 13.523 | 24.18 ± 0.14 | 218 | | `pigz -p 2 -6 < ./data.txt` | 13.605 ± 0.059 | 13.567 | 13.673 | 24.47 ± 0.13 | 219 | | `crabz -p 4 -c 6 < ./data.txt` | 6.833 ± 0.005 | 6.828 | 6.837 | 12.29 ± 0.03 | 220 | | `pigz -p 4 -6 < ./data.txt` | 6.866 ± 0.028 | 6.834 | 6.884 | 12.35 ± 0.06 | 221 | | `crabz -p 8 -c 6 < ./data.txt` | 3.446 ± 0.000 | 3.445 | 3.446 | 6.20 ± 0.02 | 222 | | `pigz -p 8 -6 < ./data.txt` | 3.482 ± 0.002 | 3.480 | 3.483 | 6.26 ± 0.02 | 223 | | `crabz -p 16 -c 6 < ./data.txt` | 1.822 ± 0.012 | 1.813 | 1.835 | 3.28 ± 0.02 | 224 | | `pigz -p 16 -6 < ./data.txt` | 1.771 ± 0.004 | 1.767 | 1.776 | 3.19 ± 0.01 | 225 | | `crabz -p 32 -c 6 < ./data.txt` | 1.178 ± 0.008 | 1.171 | 1.187 | 2.12 ± 0.02 | 226 | | `pigz -p 32 -6 < ./data.txt` | 1.184 ± 0.001 | 1.184 | 1.185 | 2.13 ± 0.01 | 227 | | `crabz -p 1 -c 9 < ./data.txt` | 52.122 ± 0.288 | 51.790 | 52.293 | 93.75 ± 0.58 | 228 | | `pigz -p 1 -9 < ./data.txt` | 53.031 ± 0.071 | 52.951 | 53.085 | 95.39 ± 0.29 | 229 | | `crabz -p 2 -c 9 < ./data.txt` | 26.287 ± 0.047 | 26.249 | 26.339 | 47.28 ± 0.15 | 230 | | `pigz -p 2 -9 < ./data.txt` | 26.409 ± 0.238 | 26.190 | 26.662 | 47.50 ± 0.45 | 231 | | `crabz -p 4 -c 9 < ./data.txt` | 13.373 ± 0.051 | 13.317 | 13.419 | 24.05 ± 0.11 | 232 | | `pigz -p 4 -9 < ./data.txt` | 13.414 ± 0.035 | 13.383 | 13.451 | 24.13 ± 0.09 | 233 | | `crabz -p 8 -c 9 < ./data.txt` | 6.733 ± 0.003 | 6.731 | 6.736 | 12.11 ± 0.03 | 234 | | `pigz -p 8 -9 < ./data.txt` | 6.763 ± 0.004 | 6.761 | 6.767 | 12.16 ± 0.03 | 235 | | `crabz -p 16 -c 9 < ./data.txt` | 3.487 ± 0.034 | 3.450 | 3.517 | 6.27 ± 0.06 | 236 | | `pigz -p 16 -9 < ./data.txt` | 3.459 ± 0.021 | 3.434 | 3.473 | 6.22 ± 0.04 | 237 | | `crabz -p 32 -c 9 < ./data.txt` | 2.088 ± 0.008 | 2.081 | 2.097 | 3.76 ± 0.02 | 238 | | `pigz -p 32 -9 < ./data.txt` | 2.107 ± 0.023 | 2.090 | 2.133 | 3.79 ± 0.04 | 239 | 240 | 241 | #### Decompression 242 | 243 | ### Flate2 rust backend 244 | 245 | #### Compression 246 | 247 | | Command | Mean [s] | Min [s] | Max [s] | Relative | 248 | | :------------------------------ | -------------: | ------: | ------: | -----------: | 249 | | `crabz -p 1 -c 3 < ./data.txt` | 10.167 ± 0.164 | 10.050 | 10.355 | 18.57 ± 0.33 | 250 | | `pigz -p 1 -3 < ./data.txt` | 11.338 ± 0.071 | 11.292 | 11.420 | 20.71 ± 0.21 | 251 | | `crabz -p 2 -c 3 < ./data.txt` | 4.912 ± 0.013 | 4.898 | 4.920 | 8.97 ± 0.08 | 252 | | `pigz -p 2 -3 < ./data.txt` | 5.876 ± 0.047 | 5.826 | 5.919 | 10.73 ± 0.12 | 253 | | `crabz -p 4 -c 3 < ./data.txt` | 2.463 ± 0.018 | 2.447 | 2.482 | 4.50 ± 0.05 | 254 | | `pigz -p 4 -3 < ./data.txt` | 2.967 ± 0.008 | 2.958 | 2.972 | 5.42 ± 0.05 | 255 | | `crabz -p 8 -c 3 < ./data.txt` | 1.255 ± 0.005 | 1.250 | 1.261 | 2.29 ± 0.02 | 256 | | `pigz -p 8 -3 < ./data.txt` | 1.509 ± 0.002 | 1.507 | 1.511 | 2.76 ± 0.02 | 257 | | `crabz -p 16 -c 3 < ./data.txt` | 0.705 ± 0.030 | 0.673 | 0.731 | 1.29 ± 0.05 | 258 | | `pigz -p 16 -3 < ./data.txt` | 0.780 ± 0.015 | 0.768 | 0.797 | 1.42 ± 0.03 | 259 | | `crabz -p 32 -c 3 < ./data.txt` | 0.547 ± 0.004 | 0.544 | 0.552 | 1.00 | 260 | | `pigz -p 32 -3 < ./data.txt` | 0.755 ± 0.025 | 0.726 | 0.771 | 1.38 ± 0.05 | 261 | | `crabz -p 1 -c 6 < ./data.txt` | 27.064 ± 0.288 | 26.863 | 27.394 | 49.44 ± 0.66 | 262 | | `pigz -p 1 -6 < ./data.txt` | 27.034 ± 0.090 | 26.938 | 27.117 | 49.38 ± 0.43 | 263 | | `crabz -p 2 -c 6 < ./data.txt` | 12.400 ± 0.083 | 12.321 | 12.487 | 22.65 ± 0.24 | 264 | | `pigz -p 2 -6 < ./data.txt` | 13.619 ± 0.074 | 13.558 | 13.702 | 24.88 ± 0.24 | 265 | | `crabz -p 4 -c 6 < ./data.txt` | 6.279 ± 0.023 | 6.263 | 6.305 | 11.47 ± 0.10 | 266 | | `pigz -p 4 -6 < ./data.txt` | 6.879 ± 0.020 | 6.867 | 6.901 | 12.57 ± 0.11 | 267 | | `crabz -p 8 -c 6 < ./data.txt` | 3.189 ± 0.010 | 3.178 | 3.198 | 5.83 ± 0.05 | 268 | | `pigz -p 8 -6 < ./data.txt` | 3.477 ± 0.007 | 3.470 | 3.483 | 6.35 ± 0.05 | 269 | | `crabz -p 16 -c 6 < ./data.txt` | 1.756 ± 0.015 | 1.740 | 1.771 | 3.21 ± 0.04 | 270 | | `pigz -p 16 -6 < ./data.txt` | 1.799 ± 0.024 | 1.779 | 1.827 | 3.29 ± 0.05 | 271 | | `crabz -p 32 -c 6 < ./data.txt` | 1.192 ± 0.011 | 1.183 | 1.205 | 2.18 ± 0.03 | 272 | | `pigz -p 32 -6 < ./data.txt` | 1.196 ± 0.016 | 1.183 | 1.214 | 2.19 ± 0.03 | 273 | | `crabz -p 1 -c 9 < ./data.txt` | 44.907 ± 0.283 | 44.585 | 45.116 | 82.03 ± 0.84 | 274 | | `pigz -p 1 -9 < ./data.txt` | 53.109 ± 1.049 | 52.373 | 54.311 | 97.02 ± 2.07 | 275 | | `crabz -p 2 -c 9 < ./data.txt` | 19.977 ± 0.159 | 19.819 | 20.136 | 36.49 ± 0.41 | 276 | | `pigz -p 2 -9 < ./data.txt` | 26.562 ± 0.134 | 26.407 | 26.643 | 48.52 ± 0.46 | 277 | | `crabz -p 4 -c 9 < ./data.txt` | 10.397 ± 0.484 | 10.070 | 10.953 | 18.99 ± 0.90 | 278 | | `pigz -p 4 -9 < ./data.txt` | 13.346 ± 0.040 | 13.300 | 13.372 | 24.38 ± 0.21 | 279 | | `crabz -p 8 -c 9 < ./data.txt` | 5.100 ± 0.021 | 5.076 | 5.114 | 9.32 ± 0.08 | 280 | | `pigz -p 8 -9 < ./data.txt` | 6.754 ± 0.016 | 6.736 | 6.767 | 12.34 ± 0.10 | 281 | | `crabz -p 16 -c 9 < ./data.txt` | 2.716 ± 0.014 | 2.708 | 2.732 | 4.96 ± 0.05 | 282 | | `pigz -p 16 -9 < ./data.txt` | 3.444 ± 0.038 | 3.420 | 3.487 | 6.29 ± 0.09 | 283 | | `crabz -p 32 -c 9 < ./data.txt` | 1.747 ± 0.009 | 1.740 | 1.758 | 3.19 ± 0.03 | 284 | | `pigz -p 32 -9 < ./data.txt` | 2.086 ± 0.008 | 2.077 | 2.093 | 3.81 ± 0.03 | 285 | 286 | 287 | #### Decompression 288 | 289 | | Command | Mean [s] | Min [s] | Max [s] | Relative | 290 | | :--------------------------- | ------------: | ------: | ------: | ----------: | 291 | | `crabz -d < ./data.3.txt.gz` | 1.599 ± 0.014 | 1.573 | 1.615 | 1.00 | 292 | | `pigz -d < ./data.3.txt.gz` | 1.696 ± 0.020 | 1.654 | 1.725 | 1.06 ± 0.02 | 293 | | `crabz -d < ./data.6.txt.gz` | 1.615 ± 0.012 | 1.586 | 1.626 | 1.01 ± 0.01 | 294 | | `pigz -d < ./data.6.txt.gz` | 1.760 ± 0.030 | 1.687 | 1.797 | 1.10 ± 0.02 | 295 | | `crabz -d < ./data.9.txt.gz` | 1.613 ± 0.014 | 1.596 | 1.641 | 1.01 ± 0.01 | 296 | | `pigz -d < ./data.9.txt.gz` | 1.767 ± 0.012 | 1.748 | 1.787 | 1.11 ± 0.01 | 297 | 298 | ### Block Formats with libdeflater 299 | 300 | #### Decompression 301 | 302 | | Command | Mean [s] | Min [s] | Max [s] | Relative | 303 | | :----------------------------------------------------- | ------------: | ------: | ------: | ----------: | 304 | | `crabz -p 1 -d -f mgzip ./bdata.3.txt.gz > data.txt` | 1.221 ± 0.164 | 1.073 | 1.397 | 2.32 ± 0.31 | 305 | | `pigz -d -c ./bdata.3.txt.gz > data.txt` | 2.415 ± 0.063 | 2.347 | 2.472 | 4.58 ± 0.14 | 306 | | `crabz -p 1 -d -f mgzip ./bdata.6.txt.gz > data.txt` | 1.256 ± 0.063 | 1.200 | 1.325 | 2.38 ± 0.13 | 307 | | `pigz -d -c ./bdata.6.txt.gz > data.txt` | 2.513 ± 0.052 | 2.467 | 2.569 | 4.77 ± 0.13 | 308 | | `crabz -p 1 -d -f mgzip ./bdata.9.txt.gz > data.txt` | 1.147 ± 0.065 | 1.094 | 1.219 | 2.18 ± 0.13 | 309 | | `pigz -d -c ./bdata.9.txt.gz > data.txt` | 2.394 ± 0.118 | 2.262 | 2.488 | 4.54 ± 0.24 | 310 | | `crabz -p 1 -d -f mgzip ./bdata.12.txt.gz > data.txt` | 1.165 ± 0.074 | 1.106 | 1.248 | 2.21 ± 0.15 | 311 | | `pigz -d -c ./bdata.12.txt.gz > data.txt` | 2.457 ± 0.067 | 2.408 | 2.534 | 4.66 ± 0.15 | 312 | | `crabz -p 2 -d -f mgzip ./bdata.3.txt.gz > data.txt` | 0.634 ± 0.008 | 0.628 | 0.642 | 1.20 ± 0.03 | 313 | | `pigz -d -c ./bdata.3.txt.gz > data.txt` | 2.379 ± 0.012 | 2.368 | 2.391 | 4.51 ± 0.08 | 314 | | `crabz -p 2 -d -f mgzip ./bdata.6.txt.gz > data.txt` | 0.645 ± 0.015 | 0.629 | 0.658 | 1.22 ± 0.03 | 315 | | `pigz -d -c ./bdata.6.txt.gz > data.txt` | 2.438 ± 0.073 | 2.356 | 2.497 | 4.62 ± 0.16 | 316 | | `crabz -p 2 -d -f mgzip ./bdata.9.txt.gz > data.txt` | 0.659 ± 0.015 | 0.644 | 0.674 | 1.25 ± 0.04 | 317 | | `pigz -d -c ./bdata.9.txt.gz > data.txt` | 2.451 ± 0.075 | 2.400 | 2.538 | 4.65 ± 0.16 | 318 | | `crabz -p 2 -d -f mgzip ./bdata.12.txt.gz > data.txt` | 0.656 ± 0.015 | 0.647 | 0.673 | 1.24 ± 0.04 | 319 | | `pigz -d -c ./bdata.12.txt.gz > data.txt` | 2.450 ± 0.045 | 2.412 | 2.500 | 4.65 ± 0.12 | 320 | | `crabz -p 4 -d -f mgzip ./bdata.3.txt.gz > data.txt` | 0.577 ± 0.024 | 0.554 | 0.603 | 1.10 ± 0.05 | 321 | | `pigz -d -c ./bdata.3.txt.gz > data.txt` | 2.459 ± 0.052 | 2.420 | 2.518 | 4.66 ± 0.13 | 322 | | `crabz -p 4 -d -f mgzip ./bdata.6.txt.gz > data.txt` | 0.559 ± 0.024 | 0.531 | 0.576 | 1.06 ± 0.05 | 323 | | `pigz -d -c ./bdata.6.txt.gz > data.txt` | 2.538 ± 0.044 | 2.502 | 2.587 | 4.81 ± 0.12 | 324 | | `crabz -p 4 -d -f mgzip ./bdata.9.txt.gz > data.txt` | 0.552 ± 0.011 | 0.539 | 0.560 | 1.05 ± 0.03 | 325 | | `pigz -d -c ./bdata.9.txt.gz > data.txt` | 2.402 ± 0.018 | 2.385 | 2.420 | 4.56 ± 0.08 | 326 | | `crabz -p 4 -d -f mgzip ./bdata.12.txt.gz > data.txt` | 0.592 ± 0.040 | 0.546 | 0.616 | 1.12 ± 0.08 | 327 | | `pigz -d -c ./bdata.12.txt.gz > data.txt` | 2.525 ± 0.038 | 2.484 | 2.558 | 4.79 ± 0.11 | 328 | | `crabz -p 8 -d -f mgzip ./bdata.3.txt.gz > data.txt` | 0.563 ± 0.013 | 0.548 | 0.571 | 1.07 ± 0.03 | 329 | | `pigz -d -c ./bdata.3.txt.gz > data.txt` | 2.490 ± 0.126 | 2.369 | 2.621 | 4.72 ± 0.25 | 330 | | `crabz -p 8 -d -f mgzip ./bdata.6.txt.gz > data.txt` | 0.552 ± 0.018 | 0.533 | 0.569 | 1.05 ± 0.04 | 331 | | `pigz -d -c ./bdata.6.txt.gz > data.txt` | 2.531 ± 0.115 | 2.417 | 2.647 | 4.80 ± 0.23 | 332 | | `crabz -p 8 -d -f mgzip ./bdata.9.txt.gz > data.txt` | 0.603 ± 0.029 | 0.583 | 0.636 | 1.14 ± 0.06 | 333 | | `pigz -d -c ./bdata.9.txt.gz > data.txt` | 2.483 ± 0.042 | 2.435 | 2.515 | 4.71 ± 0.11 | 334 | | `crabz -p 8 -d -f mgzip ./bdata.12.txt.gz > data.txt` | 0.527 ± 0.009 | 0.519 | 0.537 | 1.00 | 335 | | `pigz -d -c ./bdata.12.txt.gz > data.txt` | 2.524 ± 0.093 | 2.417 | 2.583 | 4.79 ± 0.19 | 336 | | `crabz -p 16 -d -f mgzip ./bdata.3.txt.gz > data.txt` | 0.603 ± 0.058 | 0.551 | 0.665 | 1.14 ± 0.11 | 337 | | `pigz -d -c ./bdata.3.txt.gz > data.txt` | 2.392 ± 0.007 | 2.384 | 2.397 | 4.54 ± 0.08 | 338 | | `crabz -p 16 -d -f mgzip ./bdata.6.txt.gz > data.txt` | 0.611 ± 0.065 | 0.565 | 0.686 | 1.16 ± 0.13 | 339 | | `pigz -d -c ./bdata.6.txt.gz > data.txt` | 2.593 ± 0.148 | 2.427 | 2.712 | 4.92 ± 0.29 | 340 | | `crabz -p 16 -d -f mgzip ./bdata.9.txt.gz > data.txt` | 0.564 ± 0.027 | 0.541 | 0.594 | 1.07 ± 0.05 | 341 | | `pigz -d -c ./bdata.9.txt.gz > data.txt` | 2.426 ± 0.023 | 2.404 | 2.450 | 4.60 ± 0.09 | 342 | | `crabz -p 16 -d -f mgzip ./bdata.12.txt.gz > data.txt` | 0.601 ± 0.020 | 0.582 | 0.623 | 1.14 ± 0.04 | 343 | | `pigz -d -c ./bdata.12.txt.gz > data.txt` | 2.528 ± 0.022 | 2.507 | 2.550 | 4.80 ± 0.09 | 344 | | `crabz -p 32 -d -f mgzip ./bdata.3.txt.gz > data.txt` | 0.595 ± 0.019 | 0.577 | 0.614 | 1.13 ± 0.04 | 345 | | `pigz -d -c ./bdata.3.txt.gz > data.txt` | 2.544 ± 0.107 | 2.422 | 2.621 | 4.83 ± 0.22 | 346 | | `crabz -p 32 -d -f mgzip ./bdata.6.txt.gz > data.txt` | 0.601 ± 0.021 | 0.586 | 0.626 | 1.14 ± 0.05 | 347 | | `pigz -d -c ./bdata.6.txt.gz > data.txt` | 2.519 ± 0.114 | 2.435 | 2.649 | 4.78 ± 0.23 | 348 | | `crabz -p 32 -d -f mgzip ./bdata.9.txt.gz > data.txt` | 0.565 ± 0.023 | 0.539 | 0.579 | 1.07 ± 0.05 | 349 | | `pigz -d -c ./bdata.9.txt.gz > data.txt` | 2.487 ± 0.064 | 2.415 | 2.540 | 4.72 ± 0.15 | 350 | | `crabz -p 32 -d -f mgzip ./bdata.12.txt.gz > data.txt` | 0.557 ± 0.013 | 0.548 | 0.571 | 1.06 ± 0.03 | 351 | | `pigz -d -c ./bdata.12.txt.gz > data.txt` | 2.505 ± 0.105 | 2.442 | 2.626 | 4.75 ± 0.22 | 352 | 353 | 354 | #### `crabz`, `pigz`, and `bgzip` 355 | 356 | These benchmarks were run on the `all_train.csv` data found [here](https://archive.ics.uci.edu/ml/machine-learning-databases/00347/all_train.csv.gz) 357 | 358 | #### Compression 359 | 360 | | Command | Mean [s] | Min [s] | Max [s] | Relative | 361 | | :------------------------------------------------------------- | --------------: | ------: | ------: | -----------: | 362 | | `crabz -p 2 -P 0 -l 2 -f bgzf ./data.txt > ./data.out.txt.gz` | 15.837 ± 0.137 | 15.688 | 15.959 | 5.52 ± 0.13 | 363 | | `bgzip -c -@ 2 -l 2 ./data.txt > ./data.out.txt.gz` | 19.471 ± 0.178 | 19.268 | 19.602 | 6.78 ± 0.16 | 364 | | `crabz -p 2 -P 0 -l 2 -f gzip ./data.txt > ./data.out.txt.gz` | 19.723 ± 0.632 | 19.285 | 20.448 | 6.87 ± 0.26 | 365 | | `pigz -c -p 2 -2 ./data.txt > ./data.out.txt.gz` | 32.249 ± 0.024 | 32.226 | 32.274 | 11.24 ± 0.24 | 366 | | `crabz -p 4 -P 0 -l 2 -f bgzf ./data.txt > ./data.out.txt.gz` | 8.601 ± 0.538 | 8.040 | 9.113 | 3.00 ± 0.20 | 367 | | `bgzip -c -@ 4 -l 2 ./data.txt > ./data.out.txt.gz` | 10.953 ± 0.033 | 10.929 | 10.990 | 3.82 ± 0.08 | 368 | | `crabz -p 4 -P 0 -l 2 -f gzip ./data.txt > ./data.out.txt.gz` | 10.887 ± 0.584 | 10.236 | 11.364 | 3.79 ± 0.22 | 369 | | `pigz -c -p 4 -2 ./data.txt > ./data.out.txt.gz` | 16.493 ± 0.323 | 16.257 | 16.861 | 5.75 ± 0.17 | 370 | | `crabz -p 8 -P 0 -l 2 -f bgzf ./data.txt > ./data.out.txt.gz` | 5.206 ± 0.372 | 4.780 | 5.464 | 1.81 ± 0.14 | 371 | | `bgzip -c -@ 8 -l 2 ./data.txt > ./data.out.txt.gz` | 6.920 ± 0.033 | 6.893 | 6.957 | 2.41 ± 0.05 | 372 | | `crabz -p 8 -P 0 -l 2 -f gzip ./data.txt > ./data.out.txt.gz` | 5.893 ± 0.135 | 5.777 | 6.041 | 2.05 ± 0.06 | 373 | | `pigz -c -p 8 -2 ./data.txt > ./data.out.txt.gz` | 8.974 ± 0.467 | 8.553 | 9.477 | 3.13 ± 0.18 | 374 | | `crabz -p 16 -P 0 -l 2 -f bgzf ./data.txt > ./data.out.txt.gz` | 2.870 ± 0.061 | 2.816 | 2.936 | 1.00 | 375 | | `bgzip -c -@ 16 -l 2 ./data.txt > ./data.out.txt.gz` | 5.124 ± 0.107 | 5.040 | 5.244 | 1.79 ± 0.05 | 376 | | `crabz -p 16 -P 0 -l 2 -f gzip ./data.txt > ./data.out.txt.gz` | 4.250 ± 0.323 | 3.933 | 4.579 | 1.48 ± 0.12 | 377 | | `pigz -c -p 16 -2 ./data.txt > ./data.out.txt.gz` | 4.767 ± 0.223 | 4.513 | 4.933 | 1.66 ± 0.09 | 378 | | `crabz -p 32 -P 0 -l 2 -f bgzf ./data.txt > ./data.out.txt.gz` | 3.669 ± 0.303 | 3.320 | 3.865 | 1.28 ± 0.11 | 379 | | `bgzip -c -@ 32 -l 2 ./data.txt > ./data.out.txt.gz` | 4.676 ± 0.038 | 4.632 | 4.701 | 1.63 ± 0.04 | 380 | | `crabz -p 32 -P 0 -l 2 -f gzip ./data.txt > ./data.out.txt.gz` | 4.324 ± 0.246 | 4.143 | 4.605 | 1.51 ± 0.09 | 381 | | `pigz -c -p 32 -2 ./data.txt > ./data.out.txt.gz` | 5.854 ± 0.070 | 5.795 | 5.931 | 2.04 ± 0.05 | 382 | | `crabz -p 2 -P 0 -l 6 -f bgzf ./data.txt > ./data.out.txt.gz` | 27.696 ± 0.147 | 27.593 | 27.864 | 9.65 ± 0.21 | 383 | | `bgzip -c -@ 2 -l 6 ./data.txt > ./data.out.txt.gz` | 30.961 ± 0.446 | 30.446 | 31.231 | 10.79 ± 0.28 | 384 | | `crabz -p 2 -P 0 -l 6 -f gzip ./data.txt > ./data.out.txt.gz` | 36.229 ± 0.175 | 36.092 | 36.427 | 12.62 ± 0.27 | 385 | | `pigz -c -p 2 -6 ./data.txt > ./data.out.txt.gz` | 97.175 ± 0.571 | 96.743 | 97.823 | 33.86 ± 0.74 | 386 | | `crabz -p 4 -P 0 -l 6 -f bgzf ./data.txt > ./data.out.txt.gz` | 14.802 ± 0.436 | 14.316 | 15.159 | 5.16 ± 0.19 | 387 | | `bgzip -c -@ 4 -l 6 ./data.txt > ./data.out.txt.gz` | 16.927 ± 0.130 | 16.789 | 17.048 | 5.90 ± 0.13 | 388 | | `crabz -p 4 -P 0 -l 6 -f gzip ./data.txt > ./data.out.txt.gz` | 19.192 ± 0.675 | 18.629 | 19.940 | 6.69 ± 0.27 | 389 | | `pigz -c -p 4 -6 ./data.txt > ./data.out.txt.gz` | 49.305 ± 0.114 | 49.203 | 49.429 | 17.18 ± 0.37 | 390 | | `crabz -p 8 -P 0 -l 6 -f bgzf ./data.txt > ./data.out.txt.gz` | 7.833 ± 0.065 | 7.784 | 7.907 | 2.73 ± 0.06 | 391 | | `bgzip -c -@ 8 -l 6 ./data.txt > ./data.out.txt.gz` | 9.858 ± 0.105 | 9.739 | 9.939 | 3.43 ± 0.08 | 392 | | `crabz -p 8 -P 0 -l 6 -f gzip ./data.txt > ./data.out.txt.gz` | 10.417 ± 0.979 | 9.626 | 11.511 | 3.63 ± 0.35 | 393 | | `pigz -c -p 8 -6 ./data.txt > ./data.out.txt.gz` | 25.276 ± 0.170 | 25.083 | 25.404 | 8.81 ± 0.20 | 394 | | `crabz -p 16 -P 0 -l 6 -f bgzf ./data.txt > ./data.out.txt.gz` | 4.704 ± 0.321 | 4.337 | 4.937 | 1.64 ± 0.12 | 395 | | `bgzip -c -@ 16 -l 6 ./data.txt > ./data.out.txt.gz` | 6.565 ± 0.155 | 6.429 | 6.734 | 2.29 ± 0.07 | 396 | | `crabz -p 16 -P 0 -l 6 -f gzip ./data.txt > ./data.out.txt.gz` | 5.722 ± 0.320 | 5.530 | 6.092 | 1.99 ± 0.12 | 397 | | `pigz -c -p 16 -6 ./data.txt > ./data.out.txt.gz` | 13.673 ± 0.129 | 13.525 | 13.762 | 4.76 ± 0.11 | 398 | | `crabz -p 32 -P 0 -l 6 -f bgzf ./data.txt > ./data.out.txt.gz` | 4.202 ± 0.213 | 3.957 | 4.328 | 1.46 ± 0.08 | 399 | | `bgzip -c -@ 32 -l 6 ./data.txt > ./data.out.txt.gz` | 5.538 ± 0.135 | 5.395 | 5.663 | 1.93 ± 0.06 | 400 | | `crabz -p 32 -P 0 -l 6 -f gzip ./data.txt > ./data.out.txt.gz` | 5.488 ± 0.064 | 5.423 | 5.550 | 1.91 ± 0.05 | 401 | | `pigz -c -p 32 -6 ./data.txt > ./data.out.txt.gz` | 9.079 ± 0.286 | 8.808 | 9.379 | 3.16 ± 0.12 | 402 | | `crabz -p 2 -P 0 -l 9 -f bgzf ./data.txt > ./data.out.txt.gz` | 162.875 ± 0.100 | 162.778 | 162.977 | 56.75 ± 1.20 | 403 | | `bgzip -c -@ 2 -l 9 ./data.txt > ./data.out.txt.gz` | 172.428 ± 0.242 | 172.207 | 172.687 | 60.08 ± 1.27 | 404 | | `crabz -p 2 -P 0 -l 9 -f gzip ./data.txt > ./data.out.txt.gz` | 139.245 ± 0.270 | 138.974 | 139.514 | 48.52 ± 1.03 | 405 | | `pigz -c -p 2 -9 ./data.txt > ./data.out.txt.gz` | 209.645 ± 0.058 | 209.580 | 209.691 | 73.05 ± 1.55 | 406 | | `crabz -p 4 -P 0 -l 9 -f bgzf ./data.txt > ./data.out.txt.gz` | 84.624 ± 0.185 | 84.414 | 84.762 | 29.49 ± 0.63 | 407 | | `bgzip -c -@ 4 -l 9 ./data.txt > ./data.out.txt.gz` | 87.228 ± 0.232 | 87.053 | 87.492 | 30.39 ± 0.65 | 408 | | `crabz -p 4 -P 0 -l 9 -f gzip ./data.txt > ./data.out.txt.gz` | 72.339 ± 0.166 | 72.187 | 72.517 | 25.21 ± 0.54 | 409 | | `pigz -c -p 4 -9 ./data.txt > ./data.out.txt.gz` | 106.579 ± 0.236 | 106.307 | 106.731 | 37.14 ± 0.79 | 410 | | `crabz -p 8 -P 0 -l 9 -f bgzf ./data.txt > ./data.out.txt.gz` | 42.988 ± 0.130 | 42.905 | 43.138 | 14.98 ± 0.32 | 411 | | `bgzip -c -@ 8 -l 9 ./data.txt > ./data.out.txt.gz` | 44.550 ± 0.097 | 44.449 | 44.642 | 15.52 ± 0.33 | 412 | | `crabz -p 8 -P 0 -l 9 -f gzip ./data.txt > ./data.out.txt.gz` | 36.555 ± 0.030 | 36.521 | 36.579 | 12.74 ± 0.27 | 413 | | `pigz -c -p 8 -9 ./data.txt > ./data.out.txt.gz` | 54.047 ± 0.016 | 54.030 | 54.062 | 18.83 ± 0.40 | 414 | | `crabz -p 16 -P 0 -l 9 -f bgzf ./data.txt > ./data.out.txt.gz` | 22.391 ± 0.234 | 22.154 | 22.623 | 7.80 ± 0.18 | 415 | | `bgzip -c -@ 16 -l 9 ./data.txt > ./data.out.txt.gz` | 24.041 ± 0.237 | 23.813 | 24.286 | 8.38 ± 0.20 | 416 | | `crabz -p 16 -P 0 -l 9 -f gzip ./data.txt > ./data.out.txt.gz` | 19.285 ± 0.125 | 19.141 | 19.363 | 6.72 ± 0.15 | 417 | | `pigz -c -p 16 -9 ./data.txt > ./data.out.txt.gz` | 27.645 ± 0.078 | 27.579 | 27.731 | 9.63 ± 0.21 | 418 | | `crabz -p 32 -P 0 -l 9 -f bgzf ./data.txt > ./data.out.txt.gz` | 15.148 ± 0.138 | 14.992 | 15.252 | 5.28 ± 0.12 | 419 | | `bgzip -c -@ 32 -l 9 ./data.txt > ./data.out.txt.gz` | 16.091 ± 0.193 | 15.874 | 16.243 | 5.61 ± 0.14 | 420 | | `crabz -p 32 -P 0 -l 9 -f gzip ./data.txt > ./data.out.txt.gz` | 11.832 ± 0.168 | 11.637 | 11.930 | 4.12 ± 0.11 | 421 | | `pigz -c -p 32 -9 ./data.txt > ./data.out.txt.gz` | 16.912 ± 0.095 | 16.804 | 16.982 | 5.89 ± 0.13 | 422 | 423 | #### Decompression 424 | 425 | | Command | Mean [s] | Min [s] | Max [s] | Relative | 426 | | :------------------------------------------------------ | ------------: | ------: | ------: | ----------: | 427 | | `crabz -d -p 4 -f bgzf ./data.txt.gz > ./data.out.txt` | 5.941 ± 0.172 | 5.745 | 6.070 | 1.11 ± 0.09 | 428 | | `bgzip -d -c -@ 4 ./data.txt.gz > ./data.out.txt` | 5.357 ± 0.407 | 4.925 | 5.734 | 1.00 | 429 | | `crabz -d -p 8 -f bgzf ./data.txt.gz > ./data.out.txt` | 5.569 ± 0.496 | 5.023 | 5.990 | 1.04 ± 0.12 | 430 | | `bgzip -d -c -@ 8 ./data.txt.gz > ./data.out.txt` | 5.867 ± 0.252 | 5.682 | 6.154 | 1.10 ± 0.10 | 431 | | `crabz -d -p 16 -f bgzf ./data.txt.gz > ./data.out.txt` | 5.663 ± 0.240 | 5.506 | 5.939 | 1.06 ± 0.09 | 432 | | `bgzip -d -c -@ 16 ./data.txt.gz > ./data.out.txt` | 5.534 ± 0.124 | 5.416 | 5.663 | 1.03 ± 0.08 | 433 | 434 | ## TODOs 435 | 436 | - Add some form of auto format detection, even just by file extension 437 | 438 | -------------------------------------------------------------------------------- /UNLICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | -------------------------------------------------------------------------------- /bench2.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | set -eo pipefail 3 | 4 | test_data="$1" 5 | cp "${test_data}" ./data.txt 6 | 7 | crabz -f bgzf -l6 ./data.txt > ./data.txt.gz 8 | hyperfine \ 9 | --warmup 2 \ 10 | --runs 3 \ 11 | --export-markdown decompression.md \ 12 | --parameter-list num_threads 0,4,8,16 \ 13 | './target/release/crabz -d -p {num_threads} -f bgzf ./data.txt.gz > ./data.out.txt' \ 14 | 'bgzip -d -c -@ {num_threads} ./data.txt.gz > ./data.out.txt' 15 | hyperfine \ 16 | --warmup 2 \ 17 | --runs 3 \ 18 | --export-markdown compression.md \ 19 | --parameter-list num_threads 2,4,8,16,32 \ 20 | --parameter-list comp_level 2,6,9 \ 21 | './target/release/crabz -p {num_threads} -P 0 -l {comp_level} -f bgzf ./data.txt > ./data.out.txt.gz' \ 22 | 'bgzip -c -@ {num_threads} -l {comp_level} ./data.txt > ./data.out.txt.gz' \ 23 | './target/release/crabz -p {num_threads} -P 0 -l {comp_level} -f gzip ./data.txt > ./data.out.txt.gz' \ 24 | 'pigz -c -p {num_threads} -{comp_level} ./data.txt > ./data.out.txt.gz' 25 | -------------------------------------------------------------------------------- /benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | set -eo pipefail 3 | 4 | test_data="$1" 5 | cp "${test_data}" ./data.txt 6 | 7 | cargo clean 8 | cargo build --release 9 | ./target/release/crabz -f mgzip -p 32 -l 3 ./data.txt > ./bdata.3.txt.gz 10 | ./target/release/crabz -f mgzip -p 32 -l 6 ./data.txt > ./bdata.6.txt.gz 11 | ./target/release/crabz -f mgzip -p 32 -l 9 ./data.txt > ./bdata.9.txt.gz 12 | ./target/release/crabz -f mgzip -p 32 -l 12 ./data.txt > ./bdata.12.txt.gz 13 | 14 | gzip -3 -c ./data.txt > ./data.3.txt.gz 15 | gzip -6 -c ./data.txt > ./data.6.txt.gz 16 | gzip -9 -c ./data.txt > ./data.9.txt.gz 17 | 18 | # hyperfine \ 19 | # --warmup 3 \ 20 | # --runs 3 \ 21 | # --export-markdown "block_compression.md" \ 22 | # --parameter-list num_threads 1,2,4,8,16,32 \ 23 | # --parameter-list comp_level 3,6,9 \ 24 | # './target/release/crabz -p {num_threads} -f bgzf -l {comp_level} ./data.txt > ./data.out.txt.gz ' \ 25 | # 'bgzip -f -c -l {comp_level} -@ {num_threads} ./data.txt > ./data.out.txt.gz' 26 | 27 | # hyperfine \ 28 | # --warmup 3 \ 29 | # --runs 3 \ 30 | # --export-markdown "block_decompression.md" \ 31 | # --parameter-list comp_level 3,6,9 \ 32 | # --parameter-list num_threads 1,2,4,8,16,32 \ 33 | # './target/release/crabz -p {num_threads} -d -f bgzf ./bdata.{comp_level}.txt.gz > bdata.txt' \ 34 | # 'bgzip -d -@ {num_threads} -c ./bdata.{comp_level}.txt.gz > bdata.txt' \ 35 | # 'pigz -d -c ./bdata.{comp_level}.txt.gz > bdata.txt' 36 | 37 | # hyperfine \ 38 | # --warmup 3 \ 39 | # --runs 3 \ 40 | # --export-markdown "decompression_blocks_default.md" \ 41 | # --parameter-list comp_level 3,6,9,12 \ 42 | # --parameter-list num_threads 1,2,4,8,16,32 \ 43 | # './target/release/crabz -p {num_threads} -d -f mgzip ./bdata.{comp_level}.txt.gz > data.txt' \ 44 | # 'pigz -d -c ./bdata.{comp_level}.txt.gz > data.txt' 45 | 46 | # Compression Tests 47 | for backend in "deflate_zlib_ng,libdeflate,snap_default" "deflate_zlib,libdeflate,snap_default" "deflate_rust,libdeflate,snap_default"; do 48 | cargo clean 49 | cargo build --release --no-default-features --features "$backend" 50 | 51 | hyperfine \ 52 | --warmup 3 \ 53 | --runs 3 \ 54 | --export-markdown "compression_${backend}.md" \ 55 | --parameter-list num_threads 1,2,4,8,16,32 \ 56 | --parameter-list comp_level 3,6,9 \ 57 | './target/release/crabz -p {num_threads} -l {comp_level} -f gzip ./data.txt > ./data.out.txt.gz' \ 58 | './target/release/crabz -p {num_threads} -l {comp_level} -f mgzip ./data.txt > ./data.out.txt.gz' \ 59 | 'pigz -c -p {num_threads} -{comp_level} ./data.txt > ./data.out.txt.gz' 60 | 61 | hyperfine \ 62 | --warmup 3 \ 63 | --runs 3 \ 64 | --export-markdown "decompression_${backend}.md" \ 65 | --parameter-list comp_level 3,6,9 \ 66 | './target/release/crabz -d -f gzip ./data.{comp_level}.txt.gz > data.txt' \ 67 | 'pigz -d -c ./data.{comp_level}.txt.gz > data.txt' 68 | 69 | 70 | done 71 | 72 | rm data* 73 | -------------------------------------------------------------------------------- /rust-toolchain.toml: -------------------------------------------------------------------------------- 1 | [toolchain] 2 | # The default profile includes rustc, rust-std, cargo, rust-docs, rustfmt and clippy. 3 | # https://rust-lang.github.io/rustup/concepts/profiles.html 4 | profile = "default" 5 | channel = "1.66.0" 6 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | #[global_allocator] 2 | static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; 3 | 4 | use anyhow::{bail, Error, Result}; 5 | use env_logger::Env; 6 | use flate2::read::MultiGzDecoder; 7 | use flate2::write::DeflateDecoder; 8 | use git_version::git_version; 9 | use gzp::deflate::{Bgzf, Gzip, Mgzip, RawDeflate}; 10 | use gzp::par::compress::Compression; 11 | use gzp::par::decompress::ParDecompressBuilder; 12 | use gzp::{BgzfSyncReader, MgzipSyncReader}; 13 | use gzp::{ZBuilder, ZWriter}; 14 | use lazy_static::lazy_static; 15 | use log::info; 16 | use std::collections::HashSet; 17 | use std::fs::File; 18 | use std::io::{self, BufReader, BufWriter, Read, Write}; 19 | use std::path::PathBuf; 20 | use std::process::exit; 21 | use structopt::{clap::AppSettings::ColoredHelp, StructOpt}; 22 | use strum::{EnumString, EnumVariantNames, VariantNames}; 23 | 24 | #[cfg(feature = "any_zlib")] 25 | use flate2::write::ZlibDecoder; 26 | #[cfg(feature = "any_zlib")] 27 | use gzp::deflate::Zlib; 28 | 29 | #[cfg(feature = "snappy")] 30 | use gzp::snap::Snap; 31 | #[cfg(feature = "snappy")] 32 | use snap::read::FrameDecoder; 33 | 34 | const BUFFERSIZE: usize = 64 * 1024; 35 | 36 | macro_rules! string_set { 37 | ( $( $x:expr ),* ) => { // Match zero or more comma delimited items 38 | { 39 | let mut temp_set = HashSet::new(); // Create a mutable HashSet 40 | $( 41 | temp_set.insert(String::from($x)); // Insert each item matched into the HashSet 42 | )* 43 | temp_set // Return the populated HashSet 44 | } 45 | }; 46 | } 47 | 48 | lazy_static! { 49 | /// Return the number of cpus as an &str 50 | pub static ref NUM_CPU: String = num_cpus::get().to_string(); 51 | } 52 | 53 | pub const VERSION: &str = git_version!( 54 | cargo_prefix = "cargo:", 55 | prefix = "git:", 56 | // Note that on the CLI, the v* needs to be in single quotes 57 | // When passed here though there seems to be some magic quoting that happens. 58 | args = ["--always", "--dirty=-modified", "--match=v*"] 59 | ); 60 | 61 | /// Get a bufferd input reader from stdin or a file 62 | fn get_input(path: Option) -> Result> { 63 | let reader: Box = match path { 64 | Some(path) => { 65 | if path.as_os_str() == "-" { 66 | Box::new(BufReader::with_capacity(BUFFERSIZE, io::stdin())) 67 | } else { 68 | Box::new(BufReader::with_capacity(BUFFERSIZE, File::open(path)?)) 69 | } 70 | } 71 | None => Box::new(BufReader::with_capacity(BUFFERSIZE, io::stdin())), 72 | }; 73 | Ok(reader) 74 | } 75 | 76 | /// Get a buffered output writer from stdout or a file. 77 | /// 78 | /// If input is_some and in_place is true and output is None, figure out the inplace name 79 | #[allow(clippy::unnecessary_unwrap)] 80 | fn get_output( 81 | path: Option, 82 | input_file: Option, 83 | in_place: bool, 84 | is_decompress: bool, 85 | format: Format, 86 | ) -> Result> { 87 | let writer: Box = match path { 88 | Some(path) => { 89 | if path.as_os_str() == "-" { 90 | Box::new(BufWriter::with_capacity(BUFFERSIZE, io::stdout())) 91 | } else { 92 | Box::new(BufWriter::with_capacity(BUFFERSIZE, File::create(path)?)) 93 | } 94 | } 95 | None => { 96 | // Create a file 97 | if in_place && input_file.is_some() { 98 | let input_file = input_file.unwrap(); 99 | let (ext, allowed) = format.get_extension(); 100 | if is_decompress { 101 | if let Some(found_ext) = input_file.extension().map(|x| x.to_string_lossy()) { 102 | if allowed.contains(&found_ext.to_string()) { 103 | let input_file_str = input_file.to_string_lossy(); 104 | let stripped = input_file_str 105 | .strip_suffix(&format!(".{}", found_ext)) 106 | .unwrap(); 107 | Box::new(BufWriter::with_capacity( 108 | BUFFERSIZE, 109 | File::create(stripped)?, 110 | )) 111 | } else { 112 | bail!( 113 | "Extension on {:?} does not match expected of {:?}", 114 | input_file, 115 | ext 116 | ) 117 | } 118 | } else { 119 | bail!( 120 | "No extension on {:?}, does not match expected of {:?}", 121 | input_file, 122 | ext 123 | ) 124 | } 125 | } else { 126 | let out = format!("{}.{}", input_file.to_string_lossy(), ext); 127 | Box::new(BufWriter::with_capacity(BUFFERSIZE, File::create(out)?)) 128 | } 129 | } else { 130 | Box::new(BufWriter::with_capacity(BUFFERSIZE, io::stdout())) 131 | } 132 | } 133 | }; 134 | Ok(writer) 135 | } 136 | 137 | /// Check if err is a broken pipe. 138 | #[inline] 139 | fn is_broken_pipe(err: &Error) -> bool { 140 | if let Some(io_err) = err.root_cause().downcast_ref::() { 141 | if io_err.kind() == io::ErrorKind::BrokenPipe { 142 | return true; 143 | } 144 | } 145 | false 146 | } 147 | 148 | #[derive(EnumString, EnumVariantNames, strum::Display, Debug, Copy, Clone)] 149 | #[strum(serialize_all = "kebab_case")] 150 | enum Format { 151 | #[strum(serialize = "gzip", serialize = "gz")] 152 | Gzip, 153 | // TODO: is bgz valid? 154 | #[strum(serialize = "bgzf", serialize = "bgz")] 155 | Bgzf, 156 | #[strum(serialize = "mgzip", serialize = "mgz")] 157 | Mgzip, 158 | #[cfg(feature = "any_zlib")] 159 | #[strum(serialize = "zlib", serialize = "zz")] 160 | Zlib, 161 | #[strum(serialize = "deflate")] 162 | RawDeflate, 163 | #[cfg(feature = "snappy")] 164 | #[strum(serialize = "snap", serialize = "sz")] 165 | Snap, 166 | } 167 | 168 | impl Format { 169 | /// Create a compressor writer matching the selected format 170 | fn create_compressor( 171 | &self, 172 | writer: W, 173 | num_threads: usize, 174 | compression_level: u32, 175 | pin_at: Option, 176 | ) -> Box 177 | where 178 | W: Write + Send + 'static, 179 | { 180 | match self { 181 | Format::Gzip => ZBuilder::::new() 182 | .num_threads(num_threads) 183 | .compression_level(Compression::new(compression_level)) 184 | .pin_threads(pin_at) 185 | .from_writer(writer), 186 | Format::Bgzf => ZBuilder::::new() 187 | .num_threads(num_threads) 188 | .compression_level(Compression::new(compression_level)) 189 | .pin_threads(pin_at) 190 | .from_writer(writer), 191 | Format::Mgzip => ZBuilder::::new() 192 | .num_threads(num_threads) 193 | .compression_level(Compression::new(compression_level)) 194 | .pin_threads(pin_at) 195 | .from_writer(writer), 196 | #[cfg(feature = "any_zlib")] 197 | Format::Zlib => ZBuilder::::new() 198 | .num_threads(num_threads) 199 | .compression_level(Compression::new(compression_level)) 200 | .pin_threads(pin_at) 201 | .from_writer(writer), 202 | Format::RawDeflate => ZBuilder::::new() 203 | .num_threads(num_threads) 204 | .compression_level(Compression::new(compression_level)) 205 | .pin_threads(pin_at) 206 | .from_writer(writer), 207 | #[cfg(feature = "snappy")] 208 | Format::Snap => ZBuilder::::new() 209 | .num_threads(num_threads) 210 | .compression_level(Compression::new(compression_level)) 211 | .pin_threads(pin_at) 212 | .from_writer(writer), 213 | } 214 | } 215 | 216 | fn get_highest_allowed_compression_level(&self) -> u32 { 217 | match self { 218 | Format::Gzip => 9, 219 | #[cfg(feature = "libdeflate")] 220 | Format::Bgzf => 12, 221 | #[cfg(not(feature = "libdeflate"))] 222 | Format::Bgzf => 9, 223 | #[cfg(feature = "libdeflate")] 224 | Format::Mgzip => 12, 225 | #[cfg(not(feature = "libdeflate"))] 226 | Format::Mgzip => 9, 227 | #[cfg(feature = "any_zlib")] 228 | Format::Zlib => 9, 229 | Format::RawDeflate => 9, 230 | // compression level is ignored 231 | #[cfg(feature = "snappy")] 232 | Format::Snap => u32::MAX, 233 | } 234 | } 235 | 236 | fn get_lowest_allowed_compression_level(&self) -> u32 { 237 | match self { 238 | Format::Gzip => 0, 239 | #[cfg(feature = "libdeflate")] 240 | Format::Bgzf => 1, 241 | #[cfg(not(feature = "libdeflate"))] 242 | Format::Bgzf => 0, 243 | #[cfg(feature = "libdeflate")] 244 | Format::Mgzip => 1, 245 | #[cfg(not(feature = "libdeflate"))] 246 | Format::Mgzip => 0, 247 | #[cfg(feature = "any_zlib")] 248 | Format::Zlib => 0, 249 | Format::RawDeflate => 0, 250 | // compression level is ignored 251 | #[cfg(feature = "snappy")] 252 | Format::Snap => 0, 253 | } 254 | } 255 | 256 | fn get_extension(&self) -> (&'static str, HashSet) { 257 | match self { 258 | Format::Gzip => ("gz", string_set!["gz"]), 259 | Format::Bgzf => ("gz", string_set!["gz", "bgz"]), 260 | Format::Mgzip => ("gz", string_set!["gz", "mgz"]), 261 | #[cfg(feature = "any_zlib")] 262 | Format::Zlib => ("zz", string_set!["zz", "z", "gz"]), 263 | Format::RawDeflate => ("deflate", string_set!["deflate", "zz", "z"]), 264 | #[cfg(feature = "snappy")] 265 | Format::Snap => ("sz", string_set!["sz", "snappy"]), 266 | } 267 | } 268 | } 269 | 270 | /// Compress and decompress files. 271 | #[derive(StructOpt, Debug)] 272 | #[structopt(name = "crabz", author, global_setting(ColoredHelp), version = VERSION)] 273 | struct Opts { 274 | /// Output path to write to, empty or "-" to write to stdout 275 | #[structopt(short, long)] 276 | output: Option, 277 | 278 | /// Perform the compression / decompression in place. 279 | /// 280 | /// **NOTE** this will remove the input file at completion. 281 | #[structopt(short = "I", long)] 282 | in_place: bool, 283 | 284 | /// Input file to read from, empty or "-" to read from stdin 285 | #[structopt(name = "FILE", parse(from_os_str))] 286 | file: Option, 287 | 288 | /// The format to use. 289 | #[structopt(short, long, default_value = "gzip", possible_values = Format::VARIANTS)] 290 | format: Format, 291 | 292 | /// Compression level 293 | #[structopt(short = "l", long, default_value = "6")] 294 | compression_level: u32, 295 | 296 | /// Number of compression threads to use, or if decompressing a format that allow for multi-threaded 297 | /// decompression, the number to use. Note that > 4 threads for decompression doesn't seem to help. 298 | #[structopt(short = "p", long, default_value = NUM_CPU.as_str())] 299 | compression_threads: usize, 300 | 301 | /// Flag to switch to decompressing inputs. Note: this flag may change in future releases 302 | #[structopt(short, long)] 303 | decompress: bool, 304 | 305 | /// Specify the physical core to pin threads at. 306 | /// 307 | /// This can provide a significant performance improvement, but has the downside of possibly conflicting 308 | /// with other pinned cores. If you are running multiple instances of `crabz` at once you can manually 309 | /// space out the pinned cores. 310 | /// 311 | /// # Example 312 | /// - Instance 1 has `-p 4 -P 0` set indicating that it will use 4 cores pinned at 0, 1, 2, 3 313 | /// - Instance 2 has `-p 4 -P 4` set indicating that it will use 4 cores pinned at 4, 5, 6, 7 314 | #[structopt(short = "P", long)] 315 | pin_at: Option, 316 | 317 | /// Suppress non-error messages 318 | #[structopt(short = "Q", long)] 319 | quiet: bool, 320 | } 321 | 322 | fn main() -> Result<()> { 323 | let opts = setup(); 324 | if opts.compression_level > opts.format.get_highest_allowed_compression_level() 325 | || opts.compression_level < opts.format.get_lowest_allowed_compression_level() 326 | { 327 | return Err(Error::msg("Invalid compression level")); 328 | } 329 | 330 | if opts.decompress { 331 | if let Err(err) = run_decompress( 332 | get_input(opts.file.clone())?, 333 | get_output( 334 | opts.output, 335 | opts.file.clone(), 336 | opts.in_place, 337 | opts.decompress, 338 | opts.format, 339 | )?, 340 | opts.format, 341 | opts.compression_threads, 342 | opts.pin_at, 343 | ) { 344 | if is_broken_pipe(&err) { 345 | exit(0) 346 | } 347 | return Err(err); 348 | } 349 | } else if let Err(err) = run_compress( 350 | get_input(opts.file.clone())?, 351 | get_output( 352 | opts.output, 353 | opts.file.clone(), 354 | opts.in_place, 355 | opts.decompress, 356 | opts.format, 357 | )?, 358 | opts.format, 359 | opts.compression_level, 360 | opts.compression_threads, 361 | opts.pin_at, 362 | ) { 363 | if is_broken_pipe(&err) { 364 | exit(0) 365 | } 366 | return Err(err); 367 | } 368 | 369 | // Remove input file 370 | if opts.in_place { 371 | if let Some(file) = opts.file { 372 | std::fs::remove_file(file)?; 373 | } 374 | } 375 | 376 | Ok(()) 377 | } 378 | 379 | /// Run the compression program, returning any found errors 380 | fn run_compress( 381 | mut input: R, 382 | output: W, 383 | format: Format, 384 | compression_level: u32, 385 | num_threads: usize, 386 | pin_at: Option, 387 | ) -> Result<()> 388 | where 389 | R: Read, 390 | W: Write + Send + 'static, 391 | { 392 | info!( 393 | "Compressing ({}) with {} threads at compression level {}.", 394 | format.to_string(), 395 | num_threads, 396 | compression_level 397 | ); 398 | let mut writer = format.create_compressor(output, num_threads, compression_level, pin_at); 399 | io::copy(&mut input, &mut writer)?; 400 | writer.finish()?; 401 | Ok(()) 402 | } 403 | 404 | /// Run the compression program, returning any found errors 405 | fn run_decompress( 406 | mut input: R, 407 | mut output: W, 408 | format: Format, 409 | num_threads: usize, 410 | pin_at: Option, 411 | ) -> Result<()> 412 | where 413 | R: Read + Send + 'static, 414 | W: Write + Send + 'static, 415 | { 416 | info!( 417 | "Decompressing ({}) with {} threads available.", 418 | format.to_string(), 419 | num_threads 420 | ); 421 | 422 | match format { 423 | Format::Gzip => { 424 | let mut reader = MultiGzDecoder::new(input); 425 | io::copy(&mut reader, &mut output)?; 426 | output.flush()?; 427 | } 428 | Format::Bgzf => { 429 | if num_threads == 0 { 430 | let mut reader = BgzfSyncReader::new(input); 431 | io::copy(&mut reader, &mut output)?; 432 | output.flush()?; 433 | } else { 434 | let mut reader = ParDecompressBuilder::::new() 435 | .num_threads(num_threads) 436 | .unwrap() 437 | .pin_threads(pin_at) 438 | .from_reader(input); 439 | io::copy(&mut reader, &mut output)?; 440 | output.flush()?; 441 | reader.finish()?; 442 | }; 443 | } 444 | Format::Mgzip => { 445 | if num_threads == 0 { 446 | let mut reader = MgzipSyncReader::new(input); 447 | io::copy(&mut reader, &mut output)?; 448 | output.flush()?; 449 | } else { 450 | let mut reader = ParDecompressBuilder::::new() 451 | .num_threads(num_threads) 452 | .unwrap() 453 | .pin_threads(pin_at) 454 | .from_reader(input); 455 | io::copy(&mut reader, &mut output)?; 456 | output.flush()?; 457 | reader.finish()?; 458 | }; 459 | } 460 | #[cfg(feature = "any_zlib")] 461 | Format::Zlib => { 462 | let mut writer = ZlibDecoder::new(output); 463 | io::copy(&mut input, &mut writer)?; 464 | writer.finish()?; 465 | } 466 | Format::RawDeflate => { 467 | let mut writer = DeflateDecoder::new(output); 468 | io::copy(&mut input, &mut writer)?; 469 | writer.finish()?; 470 | } 471 | #[cfg(feature = "snappy")] 472 | Format::Snap => { 473 | let mut reader = FrameDecoder::new(input); 474 | io::copy(&mut reader, &mut output)?; 475 | output.flush()?; 476 | } 477 | } 478 | 479 | Ok(()) 480 | } 481 | /// Parse args and set up logging / tracing 482 | fn setup() -> Opts { 483 | let opts = Opts::from_args(); 484 | 485 | if opts.quiet { 486 | std::env::set_var("RUST_LOG", "error"); 487 | } else if std::env::var("RUST_LOG").is_err() { 488 | std::env::set_var("RUST_LOG", "info"); 489 | } 490 | env_logger::Builder::from_env(Env::default().default_filter_or("info")).init(); 491 | 492 | opts 493 | } 494 | --------------------------------------------------------------------------------