├── .github ├── dependabot.yml └── workflows │ ├── benchmark.yaml │ ├── pages.yaml │ ├── sync-images.yaml │ └── test.yml ├── .gitignore ├── .pre-commit-config.yaml ├── Cargo.lock ├── Cargo.toml ├── README.md ├── benchmark └── sources.yaml ├── build.rs ├── docs ├── about.md └── preview.gif ├── rustfmt.toml ├── src ├── compression.rs ├── index.rs ├── input │ ├── layers.rs │ ├── local_image.rs │ ├── mod.rs │ └── remote_image.rs ├── io_utils.rs ├── layer_combiner.rs ├── location.rs ├── main.rs ├── output_image │ ├── image.rs │ ├── layers.rs │ ├── mod.rs │ └── stats.rs ├── platform_matcher.rs ├── progress.rs └── test_utils.rs └── web ├── .gitignore ├── .prettierrc.mjs ├── README.md ├── astro.config.mjs ├── package-lock.json ├── package.json ├── public └── favicon.svg ├── src ├── benchmark_parser.ts ├── components │ ├── BenchmarkChart.astro │ ├── Card.astro │ ├── Header.astro │ ├── ReactBenchmarkChart.tsx │ ├── ReactLayerChart.tsx │ ├── Terminal.astro │ └── pull-component.tsx ├── env.d.ts ├── github_client.ts ├── layouts │ ├── Layout.astro │ └── base.css ├── manifest_parser.ts ├── media │ └── logo.png ├── pages │ ├── benchmarks.astro │ └── index.astro └── utils.ts ├── tailwind.config.mjs └── tsconfig.json /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 2 | 3 | version: 2 4 | updates: 5 | - package-ecosystem: "cargo" 6 | directory: "/" 7 | schedule: 8 | interval: "daily" 9 | groups: 10 | dependencies: 11 | patterns: 12 | - "*" 13 | - package-ecosystem: "github-actions" 14 | directory: "/" 15 | schedule: 16 | interval: "daily" 17 | groups: 18 | dependencies: 19 | patterns: 20 | - "*" 21 | - package-ecosystem: "npm" 22 | directory: "web/" 23 | schedule: 24 | interval: "daily" 25 | groups: 26 | dependencies: 27 | patterns: 28 | - "*" 29 | -------------------------------------------------------------------------------- /.github/workflows/benchmark.yaml: -------------------------------------------------------------------------------- 1 | name: Benchmark 2 | 3 | on: 4 | schedule: 5 | - cron: '0 0 * * 0' 6 | workflow_dispatch: 7 | 8 | jobs: 9 | matrix: 10 | runs-on: ubuntu-latest 11 | outputs: 12 | destinations: ${{ steps.matrix.outputs.result }} 13 | sizes: ${{ steps.sizes.outputs.result }} 14 | steps: 15 | - uses: actions/checkout@v4 16 | - name: matrix all 17 | uses: mikefarah/yq@v4.45.1 18 | id: matrix 19 | with: 20 | cmd: yq -o=json '.upstream_images' 'benchmark/sources.yaml' 21 | - name: sizes 22 | uses: mikefarah/yq@v4.45.1 23 | id: sizes 24 | with: 25 | cmd: yq -r -o=json '.sizes | join(",")' 'benchmark/sources.yaml' 26 | 27 | benchmark: 28 | runs-on: ubuntu-latest 29 | 30 | needs: [ matrix ] 31 | 32 | permissions: 33 | contents: read 34 | packages: read 35 | id-token: write 36 | 37 | strategy: 38 | fail-fast: false 39 | matrix: 40 | include: ${{ fromJSON(needs.matrix.outputs.destinations) }} 41 | 42 | concurrency: 43 | group: "benchmark-${{ matrix.destination }}" 44 | cancel-in-progress: false 45 | 46 | env: 47 | RUNS: 15 48 | BENCHMARK_DIR: "${{ matrix.disk && '/tmp/image' || '/dev/shm/image' }}" 49 | HYPERFINE_FLAGS: "--ignore-failure" 50 | # HYPERFINE_FLAGS: "${{ matrix.ignore-failure && '--ignore-failure' || '' }}" 51 | 52 | steps: 53 | - name: Disk space before 54 | run: df -h 55 | - name: Maximize build space 56 | shell: bash 57 | run: | 58 | sudo rm -rf /usr/share/dotnet/* & 59 | sudo rm -rf /usr/local/lib/android/* & 60 | - name: Disk space after 61 | run: df -h 62 | 63 | - uses: actions/checkout@v4 64 | - name: install skopeo 65 | uses: jaxxstorm/action-install-gh-release@v1 66 | with: 67 | repo: lework/skopeo-binary 68 | tag: v1.16.1 69 | cache: true 70 | extension-matching: disable 71 | rename-to: skopeo 72 | chmod: 0755 73 | 74 | - name: skopeo version 75 | run: skopeo --version 76 | 77 | - name: "Skopeo authenticate ghcr.io" 78 | run: skopeo login --username ${{ github.actor }} --password ${{ secrets.GITHUB_TOKEN }} ghcr.io 79 | 80 | - run: wget -q https://github.com/sharkdp/hyperfine/releases/download/v1.16.1/hyperfine_1.16.1_amd64.deb 81 | - run: sudo dpkg -i hyperfine_1.16.1_amd64.deb 82 | - run: hyperfine --version 83 | 84 | - name: Run Benchmark 85 | run: >- 86 | hyperfine "skopeo copy --image-parallel-copies=10 docker://${{ env.tag_prefix }}:{image}-{type} oci:${{ env.BENCHMARK_DIR }}" \ 87 | --warmup 1 \ 88 | -L image ${{ matrix.destination }} \ 89 | -L type original,${{ needs.matrix.outputs.sizes }} \ 90 | --prepare "rm -rf ${{ env.BENCHMARK_DIR }}/" \ 91 | --cleanup "rm -rf ${{ env.BENCHMARK_DIR }}/" \ 92 | ${{ env.HYPERFINE_FLAGS }} \ 93 | --runs="${{ env.RUNS }}" --export-json=results-${{ matrix.destination }}.json --shell=none 94 | env: 95 | tag_prefix: "ghcr.io/${{ github.repository }}/demo" 96 | 97 | - uses: actions/upload-artifact@v4 98 | with: 99 | name: benchmark-results-${{ matrix.destination }} 100 | path: results-${{ matrix.destination }}.json 101 | retention-days: 1 102 | if-no-files-found: error 103 | 104 | create-artifact: 105 | runs-on: ubuntu-latest 106 | needs: 107 | - benchmark 108 | steps: 109 | - uses: actions/download-artifact@v4 110 | with: 111 | merge-multiple: true 112 | - run: | 113 | cat results-*.json | jq ".results[]" | jq -s "{results: .}" > results.json 114 | - uses: actions/upload-artifact@v4 115 | with: 116 | name: benchmark-results 117 | path: results.json 118 | retention-days: 5 119 | if-no-files-found: error 120 | -------------------------------------------------------------------------------- /.github/workflows/pages.yaml: -------------------------------------------------------------------------------- 1 | name: Deploy static content to Pages 2 | 3 | on: 4 | push: 5 | branches: ["main"] 6 | workflow_dispatch: 7 | 8 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages 9 | permissions: 10 | contents: read 11 | pages: write 12 | actions: read 13 | id-token: write 14 | 15 | concurrency: 16 | group: "pages" 17 | cancel-in-progress: false 18 | 19 | jobs: 20 | deploy: 21 | environment: 22 | name: github-pages 23 | url: ${{ steps.deployment.outputs.page_url }} 24 | runs-on: ubuntu-latest 25 | steps: 26 | - name: Checkout 27 | uses: actions/checkout@v4 28 | 29 | - uses: actions/setup-node@v4 30 | with: 31 | cache: npm 32 | cache-dependency-path: web/package-lock.json 33 | 34 | - run: npm install 35 | working-directory: web/ 36 | 37 | - name: Setup Pages 38 | id: pages 39 | uses: actions/configure-pages@v5 40 | 41 | - run: npm run build 42 | working-directory: web/ 43 | env: 44 | SITE_ORIGIN: ${{ steps.pages.outputs.origin }} 45 | SITE_PREFIX: ${{ steps.pages.outputs.base_path }} 46 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 47 | 48 | - name: Upload artifact 49 | uses: actions/upload-pages-artifact@v3 50 | with: 51 | path: 'web/dist/' 52 | - name: Deploy to GitHub Pages 53 | id: deployment 54 | uses: actions/deploy-pages@v4 55 | -------------------------------------------------------------------------------- /.github/workflows/sync-images.yaml: -------------------------------------------------------------------------------- 1 | name: Sync Upstream Images 2 | 3 | on: 4 | workflow_dispatch: 5 | schedule: 6 | - cron: '0 0 * * *' 7 | 8 | jobs: 9 | matrix: 10 | runs-on: ubuntu-latest 11 | outputs: 12 | upstream_images: ${{ steps.matrix.outputs.result }} 13 | destination_images: ${{ steps.destinations.outputs.result }} 14 | sizes: ${{ steps.sizes.outputs.result }} 15 | steps: 16 | - uses: actions/checkout@v4 17 | - name: matrix all 18 | uses: mikefarah/yq@v4.45.1 19 | id: matrix 20 | with: 21 | cmd: yq -o=json '.upstream_images' 'benchmark/sources.yaml' 22 | - name: matrix destinations 23 | uses: mikefarah/yq@v4.45.1 24 | id: destinations 25 | with: 26 | cmd: yq -o=json '[.upstream_images[].destination]' 'benchmark/sources.yaml' 27 | - name: matrix sizes 28 | uses: mikefarah/yq@v4.45.1 29 | id: sizes 30 | with: 31 | cmd: yq -o=json '.sizes' 'benchmark/sources.yaml' 32 | 33 | build: 34 | runs-on: ubuntu-latest 35 | 36 | env: 37 | build-profile: 'lto' 38 | 39 | steps: 40 | - uses: actions/checkout@v4 41 | 42 | - uses: actions-rust-lang/setup-rust-toolchain@v1 43 | with: 44 | cache-on-failure: 'false' 45 | rustflags: '-C target-cpu=native' 46 | 47 | - name: Run sccache-cache 48 | uses: mozilla-actions/sccache-action@v0.0.7 49 | 50 | - name: Build 51 | run: cargo build --profile=${{ env.build-profile }} -F perf 52 | env: 53 | SCCACHE_GHA_ENABLED: "true" 54 | RUSTC_WRAPPER: "sccache" 55 | 56 | - name: Size 57 | run: du -hs target/${{ env.build-profile }}/docker-repack 58 | 59 | - uses: actions/upload-artifact@v4 60 | with: 61 | name: cli 62 | path: target/${{ env.build-profile }}/docker-repack* 63 | retention-days: 1 64 | if-no-files-found: error 65 | 66 | sync-original-images: 67 | runs-on: ubuntu-latest 68 | needs: [ matrix ] 69 | 70 | permissions: 71 | contents: read 72 | packages: write 73 | attestations: write 74 | id-token: write 75 | 76 | strategy: 77 | fail-fast: false 78 | matrix: 79 | include: ${{ fromJSON(needs.matrix.outputs.upstream_images) }} 80 | 81 | concurrency: 82 | group: "sync-${{ matrix.upstream_image }}" 83 | cancel-in-progress: false 84 | 85 | steps: 86 | - name: install skopeo 87 | uses: jaxxstorm/action-install-gh-release@v1 88 | with: 89 | repo: lework/skopeo-binary 90 | tag: v1.16.1 91 | cache: true 92 | extension-matching: disable 93 | rename-to: skopeo 94 | chmod: 0755 95 | 96 | - name: "Skopeo authenticate ghcr.io" 97 | run: skopeo login --username ${{ github.actor }} --password ${{ secrets.GITHUB_TOKEN }} ghcr.io 98 | 99 | - name: "Skopeo authenticate dockerhub" 100 | run: skopeo login --username ${{ secrets.DOCKER_HUB_USER }} --password ${{ secrets.DOCKER_HUB_PAT }} docker.io 101 | 102 | - name: Download image 103 | run: skopeo copy --all --image-parallel-copies=40 --retry-times=3 "${{ env.source }}" "${{ env.dest }}" 104 | env: 105 | source: docker://${{ matrix.upstream_image }} 106 | dest: oci:image 107 | 108 | - name: Upload image - ghcri 109 | run: skopeo copy --all --retry-times=3 --image-parallel-copies=1 "${{ env.source }}" "${{ env.dest }}" 110 | env: 111 | source: oci:image 112 | dest: docker://ghcr.io/${{ github.repository }}/demo:${{ matrix.destination }}-original 113 | 114 | - name: Upload image - dockerhub 115 | run: skopeo copy --all --retry-times=3 --image-parallel-copies=1 "${{ env.source }}" "${{ env.dest }}" 116 | shell: bash 117 | env: 118 | source: oci:image 119 | dest: docker://docker.io/orfal/docker-repack-demo:${{ matrix.destination }}-original 120 | 121 | - run: skopeo inspect oci:image > manifest-${{ matrix.destination }}-original.json 122 | 123 | - uses: actions/upload-artifact@v4 124 | with: 125 | name: manifest-${{ matrix.destination }}-original 126 | path: manifest-${{ matrix.destination }}-original.json 127 | retention-days: 1 128 | if-no-files-found: error 129 | 130 | repack-image: 131 | runs-on: ubuntu-latest 132 | needs: [ sync-original-images, matrix, build ] 133 | permissions: 134 | contents: read 135 | packages: write 136 | attestations: write 137 | id-token: write 138 | 139 | strategy: 140 | fail-fast: false 141 | matrix: 142 | image: ${{ fromJSON(needs.matrix.outputs.destination_images) }} 143 | target_size: ${{ fromJSON(needs.matrix.outputs.sizes) }} 144 | 145 | concurrency: 146 | group: "push-${{ matrix.image }}-${{ matrix.target_size }}" 147 | cancel-in-progress: false 148 | 149 | steps: 150 | - uses: actions/download-artifact@v4 151 | with: 152 | name: cli 153 | 154 | - run: chmod +x docker-repack 155 | - run: ./docker-repack --version 156 | 157 | - name: Disk space before 158 | run: df -h 159 | - name: Maximize build space 160 | shell: bash 161 | run: | 162 | sudo rm -rf /usr/share/dotnet/* & 163 | sudo rm -rf /usr/local/lib/android/* & 164 | sudo rm -rf /opt/ghc/* & 165 | sudo rm -rf /opt/hostedtoolcache/CodeQL/* & 166 | sudo docker image prune --all --force & 167 | - name: Disk space after 168 | run: df -h 169 | 170 | - name: install skopeo 171 | uses: jaxxstorm/action-install-gh-release@v1 172 | with: 173 | repo: lework/skopeo-binary 174 | tag: v1.16.1 175 | cache: true 176 | extension-matching: disable 177 | rename-to: skopeo 178 | chmod: 0755 179 | 180 | - name: "Skopeo authenticate ghcr.io" 181 | run: skopeo login --username ${{ github.actor }} --password ${{ secrets.GITHUB_TOKEN }} ghcr.io 182 | 183 | - name: "Skopeo authenticate dockerhub" 184 | run: skopeo login --username ${{ secrets.DOCKER_HUB_USER }} --password ${{ secrets.DOCKER_HUB_PAT }} docker.io 185 | 186 | - name: Download image - ghcr 187 | run: skopeo copy --all --image-parallel-copies=40 --retry-times=3 "${{ env.source }}" "${{ env.dest }}" 188 | shell: bash 189 | env: 190 | source: "docker://ghcr.io/${{ github.repository }}/demo:${{ matrix.image }}-original" 191 | dest: "oci:image" 192 | 193 | - name: Repack 194 | id: repack 195 | run: | 196 | ./docker-repack "oci://image" "oci://${{ env.output_dir }}" --target-size="${{ matrix.target_size }}" --platform="linux/{amd64,arm64}" --concurrency=4 197 | echo "output-dir=${{ env.output_dir }}" >> $GITHUB_OUTPUT 198 | shell: bash 199 | env: 200 | output_dir: "oci/${{ matrix.target_size }}/" 201 | 202 | - name: Upload image - ghcr 203 | run: skopeo copy --all --image-parallel-copies=40 --retry-times=3 "${{ env.source }}" "${{ env.dest }}" 204 | shell: bash 205 | env: 206 | source: 'oci:${{ steps.repack.outputs.output-dir }}' 207 | dest: docker://ghcr.io/${{ github.repository }}/demo:${{ matrix.image }}-${{ matrix.target_size }} 208 | 209 | - name: Upload image - dockerhub 210 | run: skopeo copy --all --image-parallel-copies=40 --retry-times=3 "${{ env.source }}" "${{ env.dest }}" 211 | shell: bash 212 | env: 213 | source: 'oci:${{ steps.repack.outputs.output-dir }}' 214 | dest: docker://docker.io/orfal/docker-repack-demo:${{ matrix.image }}-${{ matrix.target_size }} 215 | 216 | - run: skopeo inspect oci:${{ steps.repack.outputs.output-dir }} > manifest-${{ matrix.image }}-${{ matrix.target_size }}.json 217 | 218 | - uses: actions/upload-artifact@v4 219 | with: 220 | name: manifest-${{ matrix.image }}-${{ matrix.target_size }} 221 | path: manifest-${{ matrix.image }}-${{ matrix.target_size }}.json 222 | retention-days: 1 223 | if-no-files-found: error 224 | 225 | create-artifact: 226 | runs-on: ubuntu-latest 227 | needs: 228 | - repack-image 229 | - sync-original-images 230 | steps: 231 | - uses: actions/download-artifact@v4 232 | with: 233 | merge-multiple: true 234 | - uses: actions/upload-artifact@v4 235 | with: 236 | name: image-manifests 237 | path: manifest-*.json 238 | retention-days: 5 239 | if-no-files-found: error 240 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | tags: 4 | - v* 5 | branches: 6 | - main 7 | pull_request: 8 | workflow_dispatch: 9 | inputs: 10 | publish-tag: 11 | type: string 12 | required: false 13 | description: "Publish a tag" 14 | 15 | name: CI 16 | 17 | jobs: 18 | build_and_test: 19 | name: Rust project 20 | runs-on: ${{ matrix.os }} 21 | strategy: 22 | fail-fast: false 23 | matrix: 24 | os: [ ubuntu-latest, macos-latest, windows-latest ] 25 | include: 26 | - os: ubuntu-latest 27 | bin: docker-repack 28 | name: docker-repack-Linux-x86_64.tar.gz 29 | - os: macOS-latest 30 | bin: docker-repack 31 | name: docker-repack-Darwin-x86_64.tar.gz 32 | - os: windows-latest 33 | bin: docker-repack.exe 34 | name: docker-repack-Windows-x86_64.zip 35 | env: 36 | RUST_BACKTRACE: "1" 37 | SCCACHE_GHA_ENABLED: "true" 38 | RUSTC_WRAPPER: "sccache" 39 | 40 | release_profile: "lto" 41 | 42 | steps: 43 | - uses: actions/checkout@v4 44 | with: 45 | ref: ${{ inputs.publish-tag || null }} 46 | 47 | - name: Run sccache-cache 48 | uses: mozilla-actions/sccache-action@v0.0.7 49 | 50 | - uses: actions-rust-lang/setup-rust-toolchain@v1 51 | with: 52 | cache-on-failure: 'false' 53 | 54 | - name: Build 55 | run: cargo build --all-targets 56 | 57 | - name: Test 58 | run: cargo test 59 | 60 | - name: Integration test 61 | run: cargo run -- "docker://python:3.11-slim" oci://test-image/ --target-size="500MB" --compression-level=1 --platform=linux/arm64 62 | 63 | - name: Build release 64 | if: startsWith(github.ref, 'refs/tags/') || inputs.publish-tag 65 | run: cargo build --profile=${{env.release_profile}} 66 | - name: Package 67 | if: startsWith(github.ref, 'refs/tags/') || inputs.publish-tag 68 | shell: bash 69 | run: | 70 | strip target/${{env.release_profile}}/${{ matrix.bin }} 71 | cd target/${{env.release_profile}} 72 | if [[ "${{ matrix.os }}" == "windows-latest" ]] 73 | then 74 | 7z a ../../${{ matrix.name }} ${{ matrix.bin }} 75 | else 76 | tar czvf ../../${{ matrix.name }} ${{ matrix.bin }} 77 | fi 78 | cd - 79 | - name: Archive binaries 80 | uses: actions/upload-artifact@v4 81 | if: startsWith(github.ref, 'refs/tags/') || inputs.publish-tag 82 | with: 83 | name: build-${{ matrix.name }} 84 | path: ${{ matrix.name }} 85 | 86 | create_release: 87 | name: Release 88 | runs-on: ubuntu-latest 89 | if: startsWith(github.ref, 'refs/tags/') || inputs.publish-tag 90 | needs: [ build_and_test ] 91 | permissions: 92 | contents: write 93 | steps: 94 | - name: Checkout sources 95 | uses: actions/checkout@v4 96 | with: 97 | ref: ${{ inputs.publish-tag || null }} 98 | - uses: actions/download-artifact@v4 99 | - name: Publish 100 | uses: softprops/action-gh-release@v2 101 | with: 102 | tag_name: "${{ inputs.publish-tag }}" 103 | draft: false 104 | files: | 105 | **/*.tar.gz 106 | **/*.zip 107 | 108 | checks: 109 | name: Checks 110 | runs-on: ubuntu-latest 111 | env: 112 | RUST_BACKTRACE: "1" 113 | SCCACHE_GHA_ENABLED: "true" 114 | RUSTC_WRAPPER: "sccache" 115 | steps: 116 | - name: Checkout sources 117 | uses: actions/checkout@v4 118 | with: 119 | ref: ${{ inputs.publish-tag || null }} 120 | 121 | - name: Run sccache-cache 122 | uses: mozilla-actions/sccache-action@v0.0.7 123 | 124 | - uses: actions-rust-lang/setup-rust-toolchain@v1 125 | with: 126 | cache-on-failure: 'false' 127 | 128 | - name: Run cargo fmt 129 | if: success() || failure() 130 | run: cargo fmt --all -- --check 131 | 132 | - name: Run cargo check 133 | if: success() || failure() 134 | run: cargo check 135 | 136 | - if: success() || failure() 137 | run: cargo clippy --all-targets --all-features -- -D warnings 138 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | oci/ 3 | *.txt 4 | .idea/ 5 | compress.log 6 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/doublify/pre-commit-rust 3 | rev: v1.0 4 | hooks: 5 | - id: fmt 6 | - id: cargo-check -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "docker-repack" 3 | version = "0.5.0" 4 | edition = "2021" 5 | build = "build.rs" 6 | description = "Repack Docker images for faster pull speeds" 7 | license = "MIT" 8 | repository = "https://github.com/orf/docker-repack/" 9 | 10 | [dependencies] 11 | tracing = { version = "0.1.41", features = ["release_max_level_debug", "max_level_trace"] } 12 | tracing-subscriber = { version = "0.3.19", features = ["env-filter"] } 13 | tracing-indicatif = "0.3.6" 14 | memmap2 = "0.9.4" 15 | strum = { version = "0.26.3", features = ["derive"] } 16 | memchr = { version = "2.7.4", default-features = false } 17 | anyhow = "1.0.94" 18 | zstd = { version = "0.13.2" } 19 | tar = { version = "0.4.43", default-features = false } 20 | itertools = "0.14.0" 21 | flate2 = "1.0.35" 22 | byte-unit = { version = "5.1.6" } 23 | sha2 = { version = "0.10.8" } 24 | oci-spec = { version = "0.7.1", default-features = false, features = ["image"] } 25 | serde_json = { version = "1.0.133", default-features = false } 26 | const-hex = { version = "1.14.0", default-features = false } 27 | chrono = { version = "0.4.39", default-features = false, features = ["now"] } 28 | serde = "1.0.216" 29 | rayon = "1.10.0" 30 | indicatif = "0.17.9" 31 | oci-client = "0.14.0" 32 | tokio = { version = "1.42.0", features = ["full"] } 33 | docker_credential = "1.3.1" 34 | tokio-util = { version = "0.7.13", features = ["full"] } 35 | clap = { version = "4.5.27", features = ["derive"] } 36 | shadow-rs = "0.38.0" 37 | rand = {version = "0.8.5", default-features = false, features = ["small_rng", "getrandom", "min_const_gen"]} 38 | globset = { version = "0.4.15", default-features = false } 39 | 40 | [features] 41 | default = ["perf", "zstd-experimental"] 42 | perf = ["tracing/release_max_level_info", "flate2/zlib-ng", "zstd/arrays"] 43 | zstd-experimental = ["zstd/experimental"] 44 | 45 | [target.'cfg(all(unix, any(target_arch = "aarch64", target_arch = "x86", target_arch = "x86_64")))'.dependencies] 46 | sha2 = { version = "0.10.8", features = ["asm"] } 47 | 48 | 49 | [profile.lto] 50 | inherits = "release" 51 | lto = "thin" 52 | codegen-units = 1 53 | panic = "abort" 54 | 55 | [profile.flamegraph] 56 | inherits = "lto" 57 | debug = true 58 | 59 | [dev-dependencies] 60 | test-log = { version = "0.2.16", features = ["color", "trace"] } 61 | 62 | [build-dependencies] 63 | shadow-rs = "0.38.0" 64 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # docker-repack - Speed up Docker image pulls 2 | 3 | This tool repacks a Docker image into a smaller, more efficient version that makes it significantly faster to pull. It 4 | does this by using a few [different techniques](docs/about.md) such as removing redundant data and improving compression 5 | ratios. Some examples of improvements: 6 | 7 | | Image | Before | After | Reduction | 8 | |---------------------------------------------------------------------------------|------------------------------|-------------------|---------------------------| 9 | | [google/deepsomatic](https://registry.hub.docker.com/r/google/deepsomatic/tags) | 16.4GB, 2 minutes 14 seconds | 4.8GB, 16 seconds | 3.4x smaller, 8.2x faster | 10 | | [google/deepvariant](https://hub.docker.com/r/google/deepvariant/tags) | 12.7GB, 1 minute, 2 seconds | 3GB, 10 seconds | 6.3x faster, 4.2x smaller | 11 | | [google/cloud-sdk](https://registry.hub.docker.com/r/google/cloud-sdk/tags) | 1.1GB, 5 seconds | 187MB, 1 second | 3.8x faster, 6.1x smaller | 12 | | [mathworks/matlab](https://hub.docker.com/r/mathworks/matlab/tags) | 2GB, 5 seconds | 230.7MB, 1 second | 6.5x faster, 9x smaller | 13 | | [tensorflow/tensorflow](https://hub.docker.com/r/tensorflow/tensorflow/tags) | 3.5GB, 11 seconds | 2GB, 3 seconds | 3.3x faster, 1.8x smaller | 14 | 15 | See the [benchmarks page](https://orf.github.io/docker-repack/benchmarks/) for a full comparison of pulling times across 16 | many different images. 17 | 18 | ![](./docs/preview.gif) 19 | 20 | ## Usage 21 | 22 | ```bash 23 | $ docker-repack docker://alpine:latest oci://directory/ --target-size=50MB 24 | ``` 25 | 26 | Full arguments: 27 | 28 | ```bash 29 | Usage: docker-repack [OPTIONS] --target-size 30 | 31 | Arguments: 32 | Source image. e.g. `python:3.11`, `tensorflow/tensorflow:latest` or `oci://local/image/path` 33 | Location to save image, e.g oci://directory/path/ 34 | 35 | Options: 36 | -t, --target-size Target size for layers 37 | --concurrency 38 | --keep-temp-files 39 | --compression-level [default: 14] 40 | --platform [default: linux/*] 41 | -h, --help Print help 42 | -V, --version Print version 43 | ``` 44 | 45 | ## Installation 46 | 47 | ### Pre-compiled binaries 48 | 49 | Download a release [from the releases page](https://github.com/orf/docker-repack/releases) 50 | 51 | ### Cargo 52 | 53 | ```bash 54 | cargo install docker-repack 55 | ```` 56 | 57 | -------------------------------------------------------------------------------- /benchmark/sources.yaml: -------------------------------------------------------------------------------- 1 | sizes: 2 | - '50MB' 3 | - '100MB' 4 | - '200MB' 5 | - '500MB' 6 | 7 | upstream_images: 8 | - upstream_image: "tensorflow/tensorflow:2.17.0-gpu" 9 | destination: "tensorflow-2.17.0-gpu" 10 | - upstream_image: "pytorch/pytorch:2.4.1-cuda11.8-cudnn9-devel" 11 | destination: "pytorch-2.4.1-cuda11.8-cudnn9-devel" 12 | - upstream_image: "nvidia/cuda:12.5.1-cudnn-devel-ubuntu20.04" 13 | destination: "nvidia-cuda-12.5.1-cudnn-devel-ubuntu20.04" 14 | - upstream_image: "neo4j:enterprise" 15 | destination: "neo4j-enterprise" 16 | - upstream_image: "elasticsearch:8.15.0" 17 | destination: "elasticsearch-8.15.0" 18 | - upstream_image: "spark:python3-java17" 19 | destination: "spark-python3-java17" 20 | - upstream_image: "storm:2.6" 21 | destination: "storm-2.6" 22 | - upstream_image: "flink:1.20-java8" 23 | destination: "flink-1.20-java8" 24 | - upstream_image: "bitnami/airflow-worker:2.10.1" 25 | destination: "bitnami-airflow-worker-2.10.1" 26 | - upstream_image: "mathworks/matlab:r2022a" 27 | destination: "mathworks-matlab-r2022a" 28 | - upstream_image: "google/cloud-sdk:491.0.0" 29 | destination: "google-cloud-sdk-491.0.0" 30 | - upstream_image: "google/deepvariant:b350512297c12-gpu" 31 | destination: "google-deepvariant-gpu" 32 | disk: true 33 | - upstream_image: "google/deepsomatic:1.7.0-gpu" 34 | destination: "google-deepsomatic-1.7.0-gpu" 35 | disk: true 36 | -------------------------------------------------------------------------------- /build.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | shadow_rs::ShadowBuilder::builder().build().unwrap(); 3 | } 4 | -------------------------------------------------------------------------------- /docs/about.md: -------------------------------------------------------------------------------- 1 | # Repacking Docker images 2 | 3 | Docker repack applies several techniques to optimize Docker images: 4 | 5 | - [Removing redundant files](#removing-redundant-data) 6 | - [Compressing duplicate data](#compressing-duplicate-data) 7 | - [Move small files and directories into the first layer](#move-small-files-and-directories-into-the-first-layer) 8 | - [Compressing with zstd](#compressing-with-zstd) 9 | 10 | ## Removing redundant data 11 | 12 | All files and directories that are not present in the final image due to deletions in previous layers or being 13 | overwritten are removed during repacking. 14 | 15 | ## Compressing duplicate data 16 | 17 | All files are hashed when parsing the image. Files that contain duplicate data are stored in the same layer, ensuring 18 | that `zstd` can optimally compress the data to further reduce layer sizes. 19 | 20 | ## Move small files and directories into the first layer 21 | 22 | All "small files" and directories are moved into the first layer of the image. This means that it downloads fastest, 23 | which allows Docker to begin extracting the huge number of entries within the layer and setting up the filesystem. 24 | 25 | ## Compressing with zstd 26 | 27 | `zstd` is used to compress the layers, which gives a very large reduction in size compared to `gzip` -------------------------------------------------------------------------------- /docs/preview.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/orf/docker-repack/5b5fef6a3298bd6697125056e820c2da81eb1856/docs/preview.gif -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | max_width = 120 -------------------------------------------------------------------------------- /src/compression.rs: -------------------------------------------------------------------------------- 1 | use anyhow::anyhow; 2 | use flate2::read::GzDecoder; 3 | use flate2::write::GzEncoder; 4 | use flate2::Compression as GzipCompression; 5 | use std::io::{BufReader, BufWriter, Read, Write}; 6 | use zstd::{Decoder, Encoder}; 7 | 8 | #[derive(Debug, Clone, Copy, strum::Display, Eq, PartialEq)] 9 | pub enum Compression { 10 | Raw, 11 | Gzip, 12 | Zstd, 13 | } 14 | 15 | impl Compression { 16 | pub fn new_reader(self, file: T) -> anyhow::Result> { 17 | CompressedReader::new(self, file) 18 | } 19 | 20 | pub fn new_writer( 21 | self, 22 | file: T, 23 | level: i32, 24 | ) -> anyhow::Result> { 25 | CompressedWriter::new(self, file, level) 26 | } 27 | } 28 | 29 | pub enum CompressedWriter<'a, T: Write + Sync + Send> { 30 | Raw(T), 31 | Gzip(BufWriter>), 32 | Zstd(BufWriter>), 33 | } 34 | 35 | const DEFAULT_COMPRESSION_BUF_SIZE: usize = 1024 * 1024 * 25; // 25 mb 36 | 37 | impl<'a, T: Write + Sync + Send> CompressedWriter<'a, T> { 38 | fn new(type_: Compression, file: T, level: i32) -> anyhow::Result> { 39 | match type_ { 40 | Compression::Raw => Ok(Self::Raw(file)), 41 | Compression::Gzip => Ok(Self::Gzip(BufWriter::with_capacity( 42 | DEFAULT_COMPRESSION_BUF_SIZE, 43 | GzEncoder::new(file, GzipCompression::new(level as u32)), 44 | ))), 45 | Compression::Zstd => { 46 | let encoder = Encoder::new(file, level)?; 47 | Ok(Self::Zstd(BufWriter::with_capacity( 48 | DEFAULT_COMPRESSION_BUF_SIZE, 49 | encoder, 50 | ))) 51 | } 52 | } 53 | } 54 | 55 | pub fn tune_for_output_size(&mut self, size: u64) -> anyhow::Result<()> { 56 | if let CompressedWriter::Zstd(encoder) = self { 57 | let encoder = encoder.get_mut(); 58 | encoder.set_pledged_src_size(Some(size))?; 59 | encoder.include_contentsize(true)?; 60 | encoder.include_checksum(false)?; 61 | encoder.long_distance_matching(true)?; 62 | } 63 | Ok(()) 64 | } 65 | 66 | #[inline(always)] 67 | pub fn finish(self) -> anyhow::Result<()> { 68 | self.into_inner()?; 69 | Ok(()) 70 | } 71 | 72 | #[inline(always)] 73 | pub fn into_inner(self) -> anyhow::Result { 74 | match self { 75 | CompressedWriter::Raw(f) => Ok(f), 76 | CompressedWriter::Gzip(f) => { 77 | let inner = f.into_inner().map_err(|e| anyhow!("IntoInnerError {e}"))?; 78 | inner.finish().map_err(Into::into) 79 | } 80 | CompressedWriter::Zstd(f) => { 81 | let inner = f.into_inner().map_err(|e| anyhow!("IntoInnerError {e}"))?; 82 | inner.finish().map_err(Into::into) 83 | } 84 | } 85 | } 86 | } 87 | 88 | impl Write for CompressedWriter<'_, T> { 89 | #[inline(always)] 90 | fn write(&mut self, buf: &[u8]) -> std::io::Result { 91 | match self { 92 | CompressedWriter::Raw(f) => f.write(buf), 93 | CompressedWriter::Gzip(f) => f.write(buf), 94 | CompressedWriter::Zstd(f) => f.write(buf), 95 | } 96 | } 97 | 98 | #[inline(always)] 99 | fn flush(&mut self) -> std::io::Result<()> { 100 | match self { 101 | CompressedWriter::Raw(f) => f.flush(), 102 | CompressedWriter::Gzip(f) => f.flush(), 103 | CompressedWriter::Zstd(f) => f.flush(), 104 | } 105 | } 106 | } 107 | 108 | pub enum CompressedReader<'a, T: Read> { 109 | Raw(T), 110 | Gzip(GzDecoder), 111 | Zstd(Decoder<'a, BufReader>), 112 | } 113 | 114 | impl<'a, T: Read> CompressedReader<'a, T> { 115 | #[inline(always)] 116 | fn new(type_: Compression, file: T) -> anyhow::Result> { 117 | match type_ { 118 | Compression::Raw => Ok(Self::Raw(file)), 119 | Compression::Gzip => Ok(Self::Gzip(GzDecoder::new(file))), 120 | Compression::Zstd => Ok(Self::Zstd(Decoder::new(file)?)), 121 | } 122 | } 123 | } 124 | 125 | impl Read for CompressedReader<'_, T> { 126 | #[inline(always)] 127 | fn read(&mut self, buf: &mut [u8]) -> std::io::Result { 128 | match self { 129 | CompressedReader::Raw(f) => f.read(buf), 130 | CompressedReader::Gzip(f) => f.read(buf), 131 | CompressedReader::Zstd(f) => f.read(buf), 132 | } 133 | } 134 | } 135 | 136 | #[cfg(test)] 137 | mod tests { 138 | use crate::compression::Compression; 139 | use flate2::write::GzEncoder; 140 | use std::io::{Read, Write}; 141 | 142 | const CONTENT: &[u8] = b"hello world"; 143 | 144 | #[test] 145 | fn raw_read() { 146 | let mut reader = Compression::Raw.new_reader(CONTENT).unwrap(); 147 | let mut output = vec![]; 148 | std::io::copy(&mut reader, &mut output).unwrap(); 149 | assert_eq!(output, CONTENT); 150 | } 151 | 152 | #[test] 153 | fn gzip_read() { 154 | let mut content = GzEncoder::new(Vec::new(), flate2::Compression::default()); 155 | content.write_all(CONTENT).unwrap(); 156 | content.flush().unwrap(); 157 | let compressed_content = content.finish().unwrap(); 158 | let mut reader = Compression::Gzip.new_reader(compressed_content.as_slice()).unwrap(); 159 | let mut output = vec![]; 160 | std::io::copy(&mut reader, &mut output).unwrap(); 161 | assert_eq!(output, CONTENT); 162 | } 163 | 164 | #[test] 165 | fn zstd_read() { 166 | let content = zstd::encode_all(CONTENT, 1).unwrap(); 167 | let mut reader = Compression::Zstd.new_reader(content.as_slice()).unwrap(); 168 | let mut output = vec![]; 169 | std::io::copy(&mut reader, &mut output).unwrap(); 170 | assert_eq!(output, CONTENT); 171 | } 172 | 173 | #[test] 174 | fn raw_write() { 175 | let mut writer = Compression::Raw.new_writer(vec![], 0).unwrap(); 176 | writer.write_all(CONTENT).unwrap(); 177 | let output = writer.into_inner().unwrap(); 178 | assert_eq!(output, CONTENT); 179 | } 180 | #[test] 181 | fn gzip_write() { 182 | let mut writer = Compression::Gzip.new_writer(vec![], 1).unwrap(); 183 | writer.write_all(CONTENT).unwrap(); 184 | let compressed = writer.into_inner().unwrap(); 185 | let mut s = vec![]; 186 | flate2::read::GzDecoder::new(compressed.as_slice()) 187 | .read_to_end(&mut s) 188 | .unwrap(); 189 | assert_eq!(s, CONTENT); 190 | } 191 | 192 | #[test] 193 | fn zstd_write() { 194 | let mut writer = Compression::Zstd.new_writer(vec![], 1).unwrap(); 195 | writer.write_all(CONTENT).unwrap(); 196 | let compressed = writer.into_inner().unwrap(); 197 | let s = zstd::decode_all(compressed.as_slice()).unwrap(); 198 | assert_eq!(s, CONTENT); 199 | } 200 | } 201 | -------------------------------------------------------------------------------- /src/index.rs: -------------------------------------------------------------------------------- 1 | use memmap2::Mmap; 2 | use sha2::Digest; 3 | use std::fs::File; 4 | use std::io::Cursor; 5 | use std::path::{Path, PathBuf}; 6 | use tar::{Archive, Header}; 7 | use zstd::bulk::Compressor; 8 | use zstd::zstd_safe; 9 | 10 | use anyhow::Context; 11 | #[cfg(test)] 12 | use std::collections::HashMap; 13 | use std::fmt::Debug; 14 | 15 | const EMPTY_SHA: [u8; 32] = [ 16 | 227, 176, 196, 66, 152, 252, 28, 20, 154, 251, 244, 200, 153, 111, 185, 36, 39, 174, 65, 228, 100, 155, 147, 76, 17 | 164, 149, 153, 27, 120, 82, 184, 85, 18 | ]; 19 | 20 | pub struct ImageItems> { 21 | data: T, 22 | pub total_items: usize, 23 | } 24 | 25 | impl ImageItems { 26 | pub fn from_file(path: impl AsRef + Debug, total_items: usize) -> anyhow::Result> { 27 | let combined_input_file = File::options() 28 | .read(true) 29 | .open(&path) 30 | .with_context(|| format!("Loading image items from file {path:?}"))?; 31 | let data = unsafe { memmap2::MmapOptions::new().map(&combined_input_file) }?; 32 | assert_ne!(data.len(), 0); 33 | 34 | Ok(ImageItems { total_items, data }) 35 | } 36 | } 37 | 38 | impl> ImageItems { 39 | #[cfg(test)] 40 | pub fn from_data(data: T, total_items: usize) -> ImageItems { 41 | assert_ne!(data.as_ref().len(), 0); 42 | ImageItems { total_items, data } 43 | } 44 | pub fn get_image_content(&self) -> anyhow::Result> { 45 | let data = self.data.as_ref(); 46 | let seek = Cursor::new(data); 47 | let mut archive = Archive::new(seek); 48 | 49 | let mut items = Vec::with_capacity(self.total_items); 50 | 51 | for entry in archive.entries_with_seek()? { 52 | let entry = entry?; 53 | let start = entry.raw_file_position() as usize; 54 | let end = start + entry.size() as usize; 55 | let content = &data[start..end]; 56 | debug_assert_eq!(content.len(), entry.size() as usize); 57 | let path = entry.path()?.to_path_buf(); 58 | let header = entry.header().clone(); 59 | items.push((path, header, content)); 60 | } 61 | 62 | debug_assert_eq!(items.len(), self.total_items); 63 | Ok(items) 64 | } 65 | } 66 | 67 | #[derive(Debug)] 68 | pub struct ImageItem<'a> { 69 | pub path: PathBuf, 70 | pub header: Header, 71 | pub content: &'a [u8], 72 | pub hash: [u8; 32], 73 | pub compressed_size: u64, 74 | pub raw_size: u64, 75 | } 76 | 77 | impl<'a> ImageItem<'a> { 78 | pub fn create_compressor(compression_level: i32) -> anyhow::Result> { 79 | let mut compressor = Compressor::new(compression_level)?; 80 | compressor.set_parameter(zstd_safe::CParameter::ChecksumFlag(false))?; 81 | compressor.set_parameter(zstd_safe::CParameter::ContentSizeFlag(false))?; 82 | #[cfg(feature = "zstd-experimental")] 83 | compressor.set_parameter(zstd_safe::CParameter::Format(zstd_safe::FrameFormat::Magicless))?; 84 | Ok(compressor) 85 | } 86 | 87 | pub fn from_path_and_header( 88 | path: PathBuf, 89 | header: Header, 90 | content: &'a [u8], 91 | compressor: &mut Compressor, 92 | ) -> anyhow::Result { 93 | let raw_size = content.len() as u64; 94 | let (compressed_size, hash) = if content.is_empty() { 95 | (0, EMPTY_SHA) 96 | } else { 97 | let compressed = compressor.compress(content)?; 98 | #[cfg(feature = "zstd-experimental")] 99 | let header_size = 100 | unsafe { zstd_safe::zstd_sys::ZSTD_frameHeaderSize(compressed.as_ptr() as *const _, compressed.len()) }; 101 | #[cfg(not(feature = "zstd-experimental"))] 102 | let header_size = 4; 103 | let compressed_size = (compressed.len() - header_size) as u64; 104 | let hash = sha2::Sha256::digest(content).into(); 105 | (compressed_size, hash) 106 | }; 107 | 108 | Ok(Self { 109 | path, 110 | header, 111 | content, 112 | hash, 113 | compressed_size, 114 | raw_size, 115 | }) 116 | } 117 | 118 | #[cfg(test)] 119 | pub fn items_from_data( 120 | items: Vec<(PathBuf, Header, &[u8])>, 121 | compression_level: i32, 122 | ) -> anyhow::Result> { 123 | let mut compressor = ImageItem::create_compressor(compression_level)?; 124 | let mut image_items = Vec::with_capacity(items.len()); 125 | for (path, header, content) in items { 126 | let item = ImageItem::from_path_and_header(path, header, content, &mut compressor)?; 127 | image_items.push((item.path.clone(), item)); 128 | } 129 | Ok(image_items.into_iter().collect()) 130 | } 131 | } 132 | 133 | #[cfg(test)] 134 | mod tests { 135 | use super::*; 136 | use crate::test_utils::{add_dir, add_file, setup_tar}; 137 | use std::path::Path; 138 | 139 | #[test] 140 | fn test_from_vec() { 141 | let mut tar_1 = setup_tar(); 142 | add_dir(&mut tar_1, "test/"); 143 | add_file(&mut tar_1, "test/foo.txt", b"hello world"); 144 | add_file(&mut tar_1, "test/foo2.txt", b"hello world 2"); 145 | let data = tar_1.into_inner().unwrap(); 146 | 147 | let items = ImageItems::from_data(data, 3); 148 | let content = items.get_image_content().unwrap(); 149 | let items = ImageItem::items_from_data(content, 1).unwrap(); 150 | assert_eq!(items.len(), 3); 151 | 152 | assert_eq!( 153 | items[Path::new("test/foo.txt")].hash.to_vec(), 154 | const_hex::const_decode_to_array::<32>(b"b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9") 155 | .unwrap() 156 | ); 157 | assert_eq!( 158 | items[Path::new("test/foo2.txt")].hash.to_vec(), 159 | const_hex::const_decode_to_array::<32>(b"ed12932f3ef94c0792fbc55263968006e867e522cf9faa88274340a2671d4441") 160 | .unwrap() 161 | ) 162 | } 163 | 164 | #[test] 165 | fn test_compressed_size() { 166 | let mut tar_1 = setup_tar(); 167 | add_file(&mut tar_1, "foo.txt", b"hihi"); 168 | let data = tar_1.into_inner().unwrap(); 169 | let items = ImageItems::from_data(data, 1); 170 | let content = items.get_image_content().unwrap(); 171 | let items = ImageItem::items_from_data(content, 1).unwrap(); 172 | let item = &items[&PathBuf::from("foo.txt")]; 173 | assert_eq!(item.compressed_size, 3); 174 | } 175 | } 176 | -------------------------------------------------------------------------------- /src/input/layers.rs: -------------------------------------------------------------------------------- 1 | use oci_spec::image::Digest; 2 | use std::fmt::{Debug, Display, Formatter}; 3 | use std::io::Read; 4 | use tar::{Archive, Entry}; 5 | 6 | pub struct InputLayer { 7 | pub name: Digest, 8 | archive: Archive, 9 | } 10 | 11 | impl InputLayer { 12 | pub fn new(name: Digest, reader: T) -> anyhow::Result> { 13 | let archive = Archive::new(reader); 14 | Ok(Self { name, archive }) 15 | } 16 | 17 | pub fn entries(&mut self) -> anyhow::Result>>> { 18 | Ok(self.archive.entries()?) 19 | } 20 | } 21 | 22 | impl Display for InputLayer { 23 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 24 | f.write_fmt(format_args!("{}", self.name)) 25 | } 26 | } 27 | 28 | impl Debug for InputLayer { 29 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 30 | f.write_fmt(format_args!("{}", self.name)) 31 | } 32 | } 33 | 34 | #[cfg(test)] 35 | mod tests { 36 | use super::*; 37 | use crate::compression::Compression; 38 | use crate::test_utils::{add_dir, add_file, setup_tar}; 39 | use std::str::FromStr; 40 | 41 | #[test] 42 | fn input_layer_entries() { 43 | let mut tar_1 = setup_tar(); 44 | add_dir(&mut tar_1, "test/"); 45 | add_file(&mut tar_1, "test/file.txt", b"hello world"); 46 | let vec = tar_1.into_inner().unwrap(); 47 | 48 | let compressed_reader = Compression::Raw.new_reader(vec.as_slice()).unwrap(); 49 | let mut input_layer = InputLayer::new( 50 | Digest::from_str("sha256:0d90d93a5cab3fd2879040420c7b7e4958aee8997fef78e9a5dd80cb01f3bd9c").unwrap(), 51 | compressed_reader, 52 | ) 53 | .unwrap(); 54 | 55 | assert_eq!( 56 | input_layer.to_string(), 57 | "sha256:0d90d93a5cab3fd2879040420c7b7e4958aee8997fef78e9a5dd80cb01f3bd9c" 58 | ); 59 | assert_eq!(input_layer.entries().unwrap().count(), 2); 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/input/local_image.rs: -------------------------------------------------------------------------------- 1 | use crate::input::layers::InputLayer; 2 | use crate::input::InputImage; 3 | use crate::platform_matcher::PlatformMatcher; 4 | use crate::progress; 5 | use anyhow::{bail, Context}; 6 | use oci_spec::image::{Descriptor, Digest, ImageConfiguration, ImageIndex, ImageManifest, MediaType}; 7 | use std::fmt::{Debug, Display, Formatter}; 8 | use std::fs::File; 9 | use std::hash::{Hash, Hasher}; 10 | use std::io::Read; 11 | use std::path::{Path, PathBuf}; 12 | use tracing::{debug, instrument, warn}; 13 | 14 | pub struct LocalOciImage { 15 | blob_directory: PathBuf, 16 | manifest: ImageManifest, 17 | image_config: ImageConfiguration, 18 | } 19 | 20 | impl PartialEq for LocalOciImage { 21 | fn eq(&self, other: &Self) -> bool { 22 | self.image_digest() == other.image_digest() 23 | } 24 | } 25 | 26 | impl Eq for LocalOciImage {} 27 | 28 | impl Hash for LocalOciImage { 29 | fn hash(&self, state: &mut H) { 30 | let digest = self.image_digest(); 31 | digest.digest().hash(state); 32 | digest.algorithm().as_ref().hash(state); 33 | } 34 | } 35 | 36 | impl Debug for LocalOciImage { 37 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 38 | f.write_str(self.manifest.config().digest().digest()) 39 | } 40 | } 41 | 42 | impl Display for LocalOciImage { 43 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 44 | self.platform().fmt(f) 45 | } 46 | } 47 | 48 | fn get_blob_path(blob_directory: &Path, descriptor: &Descriptor) -> PathBuf { 49 | let digest = descriptor.digest(); 50 | blob_directory.join(digest.digest()) 51 | } 52 | 53 | fn read_blob_image_manifest(blob_directory: &Path, descriptor: &Descriptor) -> anyhow::Result { 54 | let digest_path = get_blob_path(blob_directory, descriptor); 55 | ImageManifest::from_file(&digest_path).with_context(|| format!("Error reading image manifest from {digest_path:?}")) 56 | } 57 | 58 | fn read_blob_image_index(blob_directory: &Path, descriptor: &Descriptor) -> anyhow::Result { 59 | let digest_path = get_blob_path(blob_directory, descriptor); 60 | ImageIndex::from_file(&digest_path).with_context(|| format!("Error reading image index from {digest_path:?}")) 61 | } 62 | 63 | impl LocalOciImage { 64 | #[instrument(name = "load_images")] 65 | pub fn from_oci_directory( 66 | directory: impl AsRef + Debug, 67 | platform_matcher: &PlatformMatcher, 68 | ) -> anyhow::Result> { 69 | let directory = directory.as_ref(); 70 | let blob_directory = directory.join("blobs").join("sha256"); 71 | 72 | let index_path = directory.join("index.json"); 73 | let manifest_path = directory.join("manifest.json"); 74 | 75 | if index_path.exists() { 76 | debug!("Reading index from {index_path:?}"); 77 | let index = ImageIndex::from_file(&index_path) 78 | .with_context(|| format!("Error reading index from {index_path:?}"))?; 79 | let mut images = vec![]; 80 | let manifest_iterator = progress::progress_iter("Reading Manifests", index.manifests().iter()); 81 | for manifest_descriptor in manifest_iterator { 82 | if !platform_matcher.matches_oci_spec_platform(manifest_descriptor.platform().as_ref()) { 83 | continue; 84 | } 85 | match manifest_descriptor.media_type() { 86 | MediaType::ImageManifest => { 87 | debug!("Reading image manifest from {}", manifest_descriptor.digest()); 88 | let manifest = read_blob_image_manifest(&blob_directory, manifest_descriptor) 89 | .context("Reading manifest")?; 90 | let img = Self::from_image_manifest(manifest, blob_directory.clone()) 91 | .context("Constructing LocalOciImage")?; 92 | images.push(img); 93 | } 94 | MediaType::ImageIndex => { 95 | debug!("Reading image index from {}", manifest_descriptor.digest()); 96 | let index = 97 | read_blob_image_index(&blob_directory, manifest_descriptor).context("Reading index")?; 98 | images.extend( 99 | Self::from_image_index(index, blob_directory.clone(), platform_matcher) 100 | .context("Parsing image index")?, 101 | ); 102 | } 103 | _ => { 104 | warn!("Skipping unknown media type {}", manifest_descriptor.media_type()); 105 | } 106 | } 107 | } 108 | Ok(images) 109 | } else if manifest_path.exists() { 110 | debug!("Reading manifest from {manifest_path:?}"); 111 | let manifest = ImageManifest::from_file(&manifest_path) 112 | .with_context(|| format!("Error reading manifest from {manifest_path:?}"))?; 113 | let img = Self::from_image_manifest(manifest, blob_directory).context("Constructing LocalOciImage")?; 114 | Ok(vec![img]) 115 | } else { 116 | bail!("No manifest or index found in {directory:?}"); 117 | } 118 | } 119 | 120 | fn from_image_index( 121 | index: ImageIndex, 122 | blob_directory: PathBuf, 123 | platform_matcher: &PlatformMatcher, 124 | ) -> anyhow::Result> { 125 | let mut images = vec![]; 126 | for manifest_descriptor in index.manifests() { 127 | if !platform_matcher.matches_oci_spec_platform(manifest_descriptor.platform().as_ref()) { 128 | continue; 129 | } 130 | let manifest = read_blob_image_manifest(&blob_directory, manifest_descriptor)?; 131 | let img = Self::from_image_manifest(manifest, blob_directory.clone()) 132 | .with_context(|| format!("Constructing LocalOciImage for {}", manifest_descriptor.digest()))?; 133 | images.push(img); 134 | } 135 | Ok(images) 136 | } 137 | 138 | fn from_image_manifest(manifest: ImageManifest, blob_directory: PathBuf) -> anyhow::Result { 139 | let config_descriptor = manifest.config(); 140 | let config_digest = config_descriptor.digest(); 141 | let config_path = blob_directory.join(config_digest.digest()); 142 | let image_config = ImageConfiguration::from_file(&config_path) 143 | .with_context(|| format!("Error reading image configuration from {config_path:?}"))?; 144 | Ok(Self { 145 | blob_directory, 146 | manifest, 147 | image_config, 148 | }) 149 | } 150 | } 151 | 152 | impl InputImage for LocalOciImage { 153 | fn image_digest(&self) -> Digest { 154 | let digest = self.manifest.config().digest(); 155 | digest.clone() 156 | } 157 | 158 | fn layers_from_manifest( 159 | &self, 160 | ) -> anyhow::Result>>> { 161 | Ok(self.layers_with_compression()?.map(|(compression, digest)| { 162 | let path = self.blob_directory.join(digest.digest()); 163 | let file = File::open(&path).with_context(|| format!("Error reading input layer from {path:?}"))?; 164 | let reader = compression.new_reader(file)?; 165 | InputLayer::new(digest, reader) 166 | })) 167 | } 168 | 169 | fn config(&self) -> &ImageConfiguration { 170 | &self.image_config 171 | } 172 | 173 | fn layers(&self) -> anyhow::Result> { 174 | Ok(self 175 | .manifest 176 | .layers() 177 | .iter() 178 | .map(|d| { 179 | let stripped_digest = d.digest(); 180 | (d.media_type().clone(), stripped_digest.clone()) 181 | }) 182 | .collect()) 183 | } 184 | } 185 | -------------------------------------------------------------------------------- /src/input/mod.rs: -------------------------------------------------------------------------------- 1 | use crate::compression::Compression; 2 | use crate::input::layers::InputLayer; 3 | use itertools::Itertools; 4 | use oci_client::manifest::{ 5 | IMAGE_DOCKER_LAYER_GZIP_MEDIA_TYPE, IMAGE_DOCKER_LAYER_TAR_MEDIA_TYPE, IMAGE_LAYER_GZIP_MEDIA_TYPE, 6 | IMAGE_LAYER_MEDIA_TYPE, IMAGE_LAYER_NONDISTRIBUTABLE_GZIP_MEDIA_TYPE, IMAGE_LAYER_NONDISTRIBUTABLE_MEDIA_TYPE, 7 | }; 8 | use oci_spec::image::{Digest, ImageConfiguration, MediaType}; 9 | use std::fmt::{Display, Formatter, Write}; 10 | use std::hash::Hash; 11 | use std::io::Read; 12 | 13 | pub mod layers; 14 | pub mod local_image; 15 | pub mod remote_image; 16 | 17 | const IMAGE_DOCKER_LAYER_ZSTD_MEDIA_TYPE: &str = "application/vnd.docker.image.rootfs.diff.tar.zstd"; 18 | const IMAGE_LAYER_ZSTD_MEDIA_TYPE: &str = "application/vnd.oci.image.layer.v1.tar+zstd"; 19 | const IMAGE_LAYER_NONDISTRIBUTABLE_ZSTD_MEDIA_TYPE: &str = 20 | "application/vnd.oci.image.layer.nondistributable.v1.tar+zstd"; 21 | 22 | pub fn get_layer_media_type(value: &str) -> Option { 23 | match value { 24 | IMAGE_DOCKER_LAYER_TAR_MEDIA_TYPE | IMAGE_LAYER_MEDIA_TYPE | IMAGE_LAYER_NONDISTRIBUTABLE_MEDIA_TYPE => { 25 | Some(MediaType::ImageLayer) 26 | } 27 | IMAGE_DOCKER_LAYER_GZIP_MEDIA_TYPE 28 | | IMAGE_LAYER_GZIP_MEDIA_TYPE 29 | | IMAGE_LAYER_NONDISTRIBUTABLE_GZIP_MEDIA_TYPE => Some(MediaType::ImageLayerGzip), 30 | IMAGE_DOCKER_LAYER_ZSTD_MEDIA_TYPE 31 | | IMAGE_LAYER_ZSTD_MEDIA_TYPE 32 | | IMAGE_LAYER_NONDISTRIBUTABLE_ZSTD_MEDIA_TYPE => Some(MediaType::ImageLayerZstd), 33 | _ => None, 34 | } 35 | } 36 | 37 | #[derive(Clone, Eq, PartialEq)] 38 | pub struct Platform { 39 | config: ImageConfiguration, 40 | } 41 | 42 | impl Platform { 43 | pub fn file_key(&self) -> anyhow::Result { 44 | let mut f = String::new(); 45 | f.write_fmt(format_args!("{}-{}", self.config.os(), self.config.architecture()))?; 46 | if let Some(variant) = self.config.variant() { 47 | f.write_fmt(format_args!("-{}", variant))?; 48 | } 49 | if let Some(os_version) = self.config.os_version() { 50 | f.write_fmt(format_args!("-{}", os_version))?; 51 | } 52 | if let Some(os_features) = self.config.os_features() { 53 | for feature in os_features { 54 | f.write_fmt(format_args!("-{}", feature))?; 55 | } 56 | } 57 | Ok(f) 58 | } 59 | } 60 | 61 | impl Display for Platform { 62 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 63 | write!(f, "{}/{}", self.config.os(), self.config.architecture())?; 64 | if let Some(variant) = self.config.variant() { 65 | write!(f, "/{}", variant)?; 66 | } 67 | Ok(()) 68 | } 69 | } 70 | 71 | pub trait InputImage: Display + Sized + Send + Sync + Hash + Eq + PartialEq { 72 | fn image_digest(&self) -> Digest; 73 | 74 | fn platform(&self) -> Platform { 75 | let config = self.config().clone(); 76 | Platform { config } 77 | } 78 | 79 | fn layers_from_manifest( 80 | &self, 81 | ) -> anyhow::Result>>>; 82 | 83 | fn config(&self) -> &ImageConfiguration; 84 | 85 | fn layers(&self) -> anyhow::Result>; 86 | 87 | fn layers_with_compression(&self) -> anyhow::Result> { 88 | let iterator = self 89 | .layers()? 90 | .into_iter() 91 | .filter_map(|(media_type, digest)| match media_type { 92 | MediaType::ImageLayer | MediaType::ImageLayerNonDistributable => Some((Compression::Raw, digest)), 93 | MediaType::ImageLayerGzip | MediaType::ImageLayerNonDistributableGzip => { 94 | Some((Compression::Gzip, digest)) 95 | } 96 | MediaType::ImageLayerZstd | MediaType::ImageLayerNonDistributableZstd => { 97 | Some((Compression::Zstd, digest)) 98 | } 99 | _ => None, 100 | }) 101 | .rev(); 102 | Ok(iterator.collect_vec().into_iter()) 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /src/input/remote_image.rs: -------------------------------------------------------------------------------- 1 | use crate::input::layers::InputLayer; 2 | use crate::input::{get_layer_media_type, InputImage}; 3 | use crate::platform_matcher::PlatformMatcher; 4 | use crate::progress; 5 | use anyhow::Context; 6 | use docker_credential::{CredentialRetrievalError, DockerCredential}; 7 | use itertools::Itertools; 8 | use oci_client::manifest::{OciImageManifest, OciManifest, IMAGE_MANIFEST_MEDIA_TYPE, OCI_IMAGE_MEDIA_TYPE}; 9 | use oci_client::secrets::RegistryAuth; 10 | use oci_client::{Client, Reference}; 11 | use oci_spec::image::{Digest, ImageConfiguration, MediaType}; 12 | use std::fmt::{Debug, Display, Formatter}; 13 | use std::hash::{Hash, Hasher}; 14 | use std::io::Read; 15 | use std::str::FromStr; 16 | use tokio::io::BufReader; 17 | use tokio::runtime::Handle; 18 | use tokio_util::io::SyncIoBridge; 19 | use tracing::{debug, instrument, trace, warn}; 20 | 21 | #[instrument(skip_all, fields(reference = %reference))] 22 | fn build_auth(reference: &Reference) -> RegistryAuth { 23 | let server = reference 24 | .resolve_registry() 25 | .strip_suffix('/') 26 | .unwrap_or_else(|| reference.resolve_registry()); 27 | 28 | let auth_results = [ 29 | ("docker", docker_credential::get_credential(server)), 30 | ("podman", docker_credential::get_podman_credential(server)), 31 | ]; 32 | 33 | for (name, cred_result) in auth_results.into_iter() { 34 | match cred_result { 35 | Err(e) => match e { 36 | CredentialRetrievalError::HelperFailure { stdout, stderr } => { 37 | let base_message = 38 | "Credential helper returned non-zero response code, falling back to anonymous auth"; 39 | if !stderr.is_empty() || !stdout.is_empty() { 40 | let extra = [stdout.trim(), stderr.trim()].join(" - "); 41 | warn!("{name}: {base_message}: stdout/stderr = {extra}"); 42 | } else { 43 | warn!("{name}: {base_message}"); 44 | }; 45 | } 46 | e => { 47 | debug!("{name}: {e}"); 48 | } 49 | }, 50 | Ok(DockerCredential::UsernamePassword(username, password)) => { 51 | debug!("{name}: Found docker credentials"); 52 | return RegistryAuth::Basic(username, password); 53 | } 54 | Ok(DockerCredential::IdentityToken(_)) => { 55 | warn!("{name}: Cannot use contents of docker config, identity token not supported."); 56 | } 57 | }; 58 | } 59 | debug!("No credentials found, using anonymous auth"); 60 | RegistryAuth::Anonymous 61 | } 62 | 63 | pub struct RemoteImage { 64 | client: Client, 65 | reference: Reference, 66 | layers: Vec<(MediaType, Digest)>, 67 | config_digest: Digest, 68 | image_config: ImageConfiguration, 69 | handle: Handle, 70 | } 71 | 72 | impl PartialEq for RemoteImage { 73 | fn eq(&self, other: &Self) -> bool { 74 | self.image_digest() == other.image_digest() 75 | } 76 | } 77 | 78 | impl Eq for RemoteImage {} 79 | 80 | impl Hash for RemoteImage { 81 | fn hash(&self, state: &mut H) { 82 | let digest = self.image_digest(); 83 | digest.digest().hash(state); 84 | digest.algorithm().as_ref().hash(state); 85 | } 86 | } 87 | 88 | impl Debug for RemoteImage { 89 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 90 | f.debug_struct("RemoteImage") 91 | .field("reference", &self.reference) 92 | .field("layers", &self.layers) 93 | .field("image_config", &self.image_config) 94 | .finish() 95 | } 96 | } 97 | 98 | impl Display for RemoteImage { 99 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 100 | f.write_fmt(format_args!( 101 | "{} {} {}", 102 | self.reference, 103 | self.image_config.os(), 104 | self.image_config.architecture() 105 | )) 106 | } 107 | } 108 | 109 | impl RemoteImage { 110 | #[instrument(name = "load_images", skip_all, fields(image = %reference))] 111 | pub fn create_remote_images( 112 | handle: &Handle, 113 | reference: Reference, 114 | platform: &PlatformMatcher, 115 | ) -> anyhow::Result> { 116 | handle.block_on(Self::from_list_async(reference, platform)) 117 | } 118 | 119 | async fn from_list_async(reference: Reference, platform_matcher: &PlatformMatcher) -> anyhow::Result> { 120 | let auth = build_auth(&reference); 121 | let client = Client::new(Default::default()); 122 | debug!("Fetching manifest list for {}", reference); 123 | let (manifest_content, _) = client 124 | .pull_manifest(&reference, &auth) 125 | .await 126 | .context("Fetch manifest list")?; 127 | match manifest_content { 128 | OciManifest::Image(image) => { 129 | debug!("Found single image manifest"); 130 | let img = Self::from_image_manifest(reference, image, client).await?; 131 | Ok(vec![img]) 132 | } 133 | OciManifest::ImageIndex(index) => { 134 | debug!("Found image index"); 135 | let iterator = progress::progress_iter("Reading Manifests", index.manifests.into_iter()); 136 | let manifests = iterator 137 | .filter(|entry| platform_matcher.matches_oci_client_platform(entry.platform.as_ref())) 138 | .filter_map(|entry| { 139 | let media_type = entry.media_type.as_str(); 140 | trace!("Checking entry media type ({media_type}) {:?}", entry); 141 | match media_type { 142 | OCI_IMAGE_MEDIA_TYPE | IMAGE_MANIFEST_MEDIA_TYPE => { 143 | trace!("Found image manifest"); 144 | Some(reference.clone_with_digest(entry.digest)) 145 | } 146 | _ => { 147 | trace!("Skipped"); 148 | None 149 | } 150 | } 151 | }); 152 | let mut images = vec![]; 153 | for manifest in manifests { 154 | { 155 | let img = Self::from_image_reference(manifest, client.clone(), auth.clone()).await?; 156 | images.push(img); 157 | // Super hacky, but we need to sleep here to avoid rate limiting. 158 | tokio::time::sleep(tokio::time::Duration::from_millis(500)).await; 159 | } 160 | } 161 | debug!("Found {} images", images.len()); 162 | Ok(images) 163 | } 164 | } 165 | } 166 | 167 | async fn from_image_reference(reference: Reference, client: Client, auth: RegistryAuth) -> anyhow::Result { 168 | debug!("Fetching manifest for {}", reference); 169 | let (manifest_content, _) = client 170 | .pull_manifest_raw(&reference, &auth, &[OCI_IMAGE_MEDIA_TYPE]) 171 | .await 172 | .with_context(|| format!("Fetching manifest {reference}"))?; 173 | let manifest: OciImageManifest = serde_json::from_slice(&manifest_content).context("Parse ImageManifest")?; 174 | trace!("Manifest parsed for {}: {:#?}", reference, manifest); 175 | Self::from_image_manifest(reference, manifest, client) 176 | .await 177 | .context("from_image_manifest") 178 | } 179 | 180 | async fn from_image_manifest( 181 | reference: Reference, 182 | manifest: OciImageManifest, 183 | client: Client, 184 | ) -> anyhow::Result { 185 | let mut config_data = vec![]; 186 | let config_digest = Digest::from_str(&manifest.config.digest)?; 187 | debug!("Fetching config for {}", config_digest); 188 | client 189 | .pull_blob(&reference, &manifest.config, &mut config_data) 190 | .await 191 | .with_context(|| format!("Fetch config {}", manifest.config))?; 192 | let image_config = ImageConfiguration::from_reader(&config_data[..]).context("Parse ImageConfiguration")?; 193 | 194 | let layers = manifest 195 | .layers 196 | .into_iter() 197 | .map(|v| { 198 | let media_type = v.media_type.as_str(); 199 | if let Some(parsed_media_type) = get_layer_media_type(media_type) { 200 | trace!("Found layer descriptor: {:?}", v); 201 | let digest = Digest::from_str(&v.digest)?; 202 | Ok(Some((parsed_media_type, digest))) 203 | } else { 204 | trace!("Skipping descriptor: {:?}", v); 205 | Ok(None) 206 | } 207 | }) 208 | .filter_map_ok(|r| r) 209 | .collect::>>(); 210 | let handle = Handle::current(); 211 | Ok(Self { 212 | client, 213 | reference, 214 | layers: layers?, 215 | image_config, 216 | handle, 217 | config_digest, 218 | }) 219 | } 220 | } 221 | 222 | impl InputImage for RemoteImage { 223 | fn image_digest(&self) -> Digest { 224 | self.config_digest.clone() 225 | } 226 | 227 | fn layers_from_manifest( 228 | &self, 229 | ) -> anyhow::Result>>> { 230 | Ok(self.layers_with_compression()?.map(|(compression, digest)| { 231 | debug!("Fetching blob stream for {}", digest); 232 | let res = self.handle.block_on( 233 | self.client 234 | .pull_blob_stream(&self.reference, digest.to_string().as_str()), 235 | )?; 236 | 237 | let reader = tokio_util::io::StreamReader::new(res); 238 | let reader = BufReader::with_capacity(5 * 1024 * 1024, reader); 239 | let bridge = SyncIoBridge::new_with_handle(reader, self.handle.clone()); 240 | let reader = compression.new_reader(bridge)?; 241 | InputLayer::new(digest, reader) 242 | })) 243 | } 244 | 245 | fn config(&self) -> &ImageConfiguration { 246 | &self.image_config 247 | } 248 | 249 | fn layers(&self) -> anyhow::Result> { 250 | Ok(self.layers.clone()) 251 | } 252 | } 253 | 254 | #[cfg(test)] 255 | mod test { 256 | use super::*; 257 | 258 | #[test_log::test] 259 | fn test_remote_image() { 260 | let runtime = tokio::runtime::Runtime::new().unwrap(); 261 | let reference = "alpine:3.20".parse().unwrap(); 262 | let matcher = crate::platform_matcher::PlatformMatcher::match_all(); 263 | let images = RemoteImage::create_remote_images(runtime.handle(), reference, &matcher).unwrap(); 264 | assert_ne!(images.len(), 0); 265 | for image in images { 266 | let layers = image.layers().unwrap(); 267 | assert_ne!(layers, vec![], "No layers found"); 268 | 269 | let compression = image.layers_with_compression().unwrap().collect_vec(); 270 | assert_ne!(compression, vec![], "No compression found"); 271 | 272 | let mut count = 0; 273 | for layer in image.layers_from_manifest().unwrap() { 274 | let mut input_layer = layer.unwrap(); 275 | let entries = input_layer.entries().unwrap().count(); 276 | assert_ne!(entries, 0); 277 | count += 1; 278 | } 279 | assert_eq!(count, layers.len()); 280 | assert_eq!(count, compression.len()); 281 | } 282 | } 283 | } 284 | -------------------------------------------------------------------------------- /src/io_utils.rs: -------------------------------------------------------------------------------- 1 | use std::io::Write; 2 | 3 | pub struct WriteCounter { 4 | count: u64, 5 | } 6 | 7 | impl WriteCounter { 8 | pub fn new() -> Self { 9 | Self { count: 0 } 10 | } 11 | 12 | pub fn written_bytes(&self) -> u64 { 13 | self.count 14 | } 15 | } 16 | 17 | impl Write for WriteCounter { 18 | fn write(&mut self, buf: &[u8]) -> std::io::Result { 19 | self.count += buf.len() as u64; 20 | Ok(buf.len()) 21 | } 22 | 23 | fn flush(&mut self) -> std::io::Result<()> { 24 | Ok(()) 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/layer_combiner.rs: -------------------------------------------------------------------------------- 1 | #[cfg(test)] 2 | use crate::input::layers::InputLayer; 3 | use memchr::memmem; 4 | use std::collections::HashSet; 5 | use std::io::{Read, Write}; 6 | use tar::{Builder, Entry}; 7 | use zstd::zstd_safe::WriteBuf; 8 | 9 | const WHITEOUT_OPAQUE: &[u8] = b".wh..wh..opq"; 10 | const WHITEOUT_PREFIX: &[u8] = b".wh."; 11 | 12 | pub struct LayerCombiner { 13 | archive: Builder, 14 | items: HashSet>, 15 | whiteout_directories: Vec>, 16 | whiteout_files: HashSet>, 17 | } 18 | 19 | impl LayerCombiner { 20 | pub fn new(output: T) -> Self { 21 | let archive = Builder::new(output); 22 | Self { 23 | archive, 24 | items: HashSet::new(), 25 | whiteout_directories: Vec::new(), 26 | whiteout_files: HashSet::new(), 27 | } 28 | } 29 | 30 | fn add_entry(&mut self, entry: Entry) -> anyhow::Result<()> { 31 | let entry_path = entry.path_bytes().to_vec(); 32 | if entry_path.ends_with(WHITEOUT_OPAQUE) { 33 | let directory = &entry_path[..entry_path.len() - WHITEOUT_OPAQUE.len()]; 34 | if !self.whiteout_directories.iter().any(|v| v == directory) { 35 | self.whiteout_directories.push(directory.to_vec()); 36 | } 37 | } else if let Some(whiteout) = memmem::rfind(&entry_path, WHITEOUT_PREFIX) { 38 | let whiteout_file_name = &entry_path[whiteout + WHITEOUT_PREFIX.len()..]; 39 | let whiteout_directory = &entry_path[..whiteout]; 40 | let whiteout_path = [whiteout_directory, whiteout_file_name].concat(); 41 | self.whiteout_files.insert(whiteout_path); 42 | } else { 43 | self.archive.append(&entry.header().clone(), entry)?; 44 | self.items.insert(entry_path); 45 | } 46 | Ok(()) 47 | } 48 | 49 | #[inline(always)] 50 | fn should_add_path(&mut self, path: &[u8]) -> bool { 51 | let in_whiteout_files = self.whiteout_files.contains(path); 52 | let in_items = self.items.contains(path); 53 | let in_whiteout_directories = self.whiteout_directories.iter().any(|dir| path.starts_with(dir)); 54 | 55 | !in_whiteout_files && !in_items && !in_whiteout_directories 56 | } 57 | 58 | #[cfg(test)] 59 | pub fn merge_layer(&mut self, mut layer: InputLayer) -> anyhow::Result<()> { 60 | self.merge_entries(layer.entries()?) 61 | } 62 | 63 | pub fn merge_entries<'a>( 64 | &mut self, 65 | entries: impl Iterator>>, 66 | ) -> anyhow::Result<()> { 67 | for entry in entries { 68 | let entry = entry?; 69 | let entry_path = entry.path_bytes(); 70 | let path = entry_path.as_slice(); 71 | 72 | if self.should_add_path(path) { 73 | self.add_entry(entry)? 74 | } 75 | } 76 | Ok(()) 77 | } 78 | 79 | pub fn finish(mut self) -> anyhow::Result { 80 | self.archive.finish()?; 81 | Ok(self.items.len()) 82 | } 83 | 84 | #[cfg(test)] 85 | fn into_inner(self) -> anyhow::Result<(T, usize)> { 86 | Ok((self.archive.into_inner()?, self.items.len())) 87 | } 88 | } 89 | 90 | #[cfg(test)] 91 | mod tests { 92 | use super::*; 93 | use crate::compression::Compression; 94 | use crate::test_utils::{add_dir, add_file, build_layer, read_tar_entries_content, setup_tar}; 95 | use oci_spec::image::Digest; 96 | use std::path::Path; 97 | use std::str::FromStr; 98 | 99 | fn make_input_layer(builder: Builder>) -> InputLayer { 100 | let finished = builder.into_inner().unwrap(); 101 | assert_ne!(finished.len(), 0); 102 | let reader = std::io::Cursor::new(finished); 103 | let compressed_reader = Compression::Raw.new_reader(reader).unwrap(); 104 | InputLayer::new( 105 | Digest::from_str("sha256:0d90d93a5cab3fd2879040420c7b7e4958aee8997fef78e9a5dd80cb01f3bd9c").unwrap(), 106 | compressed_reader, 107 | ) 108 | .unwrap() 109 | } 110 | 111 | #[test] 112 | fn test_file_whiteout() { 113 | let mut tar_1 = setup_tar(); 114 | add_dir(&mut tar_1, "test/"); 115 | add_file(&mut tar_1, "test/.wh.foo.txt", b""); 116 | let input_layer_1 = make_input_layer(tar_1); 117 | 118 | let mut tar_2 = setup_tar(); 119 | add_dir(&mut tar_2, "test/"); 120 | add_file(&mut tar_2, "test/foo.txt", b"hello world"); 121 | let input_layer_2 = make_input_layer(tar_2); 122 | 123 | let mut combiner = LayerCombiner::new(vec![]); 124 | combiner.merge_layer(input_layer_1).unwrap(); 125 | combiner.merge_layer(input_layer_2).unwrap(); 126 | 127 | assert_eq!(combiner.whiteout_files, HashSet::from([b"test/foo.txt".to_vec()])); 128 | assert_eq!(combiner.items.len(), 1); 129 | } 130 | 131 | #[test] 132 | fn test_opaque_whiteout() { 133 | let mut tar_1 = setup_tar(); 134 | add_dir(&mut tar_1, "test/"); 135 | add_file(&mut tar_1, "test/.wh..wh..opq", b""); 136 | let input_layer_1 = make_input_layer(tar_1); 137 | 138 | let mut tar_2 = setup_tar(); 139 | add_dir(&mut tar_2, "test/"); 140 | add_file(&mut tar_2, "test/new-file.txt", b"hello world"); 141 | add_file(&mut tar_2, "test/foo.txt", b"hello world"); 142 | let input_layer_2 = make_input_layer(tar_2); 143 | 144 | let mut combiner = LayerCombiner::new(vec![]); 145 | combiner.merge_layer(input_layer_1).unwrap(); 146 | combiner.merge_layer(input_layer_2).unwrap(); 147 | 148 | assert_eq!(combiner.whiteout_directories, [b"test/".to_vec()]); 149 | assert_eq!(combiner.items.len(), 1); 150 | } 151 | 152 | #[test] 153 | fn test_multiple_layers() { 154 | let layer_1 = build_layer() 155 | .with_files(&[ 156 | ("one.txt", b"content1"), 157 | ("two.txt", b"content2"), 158 | ("three.txt", b"content3"), 159 | ("four.txt", b"content4"), 160 | ]) 161 | .build(); 162 | 163 | let layer_2 = build_layer() 164 | .with_files(&[ 165 | ("five.txt", b"content5"), 166 | ("six.txt", b"content6"), 167 | ("seven.txt", b"content7"), 168 | ("eight.txt", b"content8"), 169 | ]) 170 | .build(); 171 | 172 | let layer_3 = build_layer() 173 | .with_files(&[ 174 | ("one.txt", b"new content 1"), 175 | ("five.txt", b"new content 2"), 176 | ("nine.txt", b"new content 3"), 177 | ]) 178 | .build(); 179 | 180 | let mut output = vec![]; 181 | let mut combiner = LayerCombiner::new(&mut output); 182 | combiner.merge_layer(layer_3).unwrap(); 183 | combiner.merge_layer(layer_2).unwrap(); 184 | combiner.merge_layer(layer_1).unwrap(); 185 | let (data, total) = combiner.into_inner().unwrap(); 186 | assert_eq!(total, 9); 187 | let entries = read_tar_entries_content(data); 188 | assert_eq!(entries.len(), 9); 189 | assert_eq!(entries[Path::new("one.txt")], b"new content 1"); 190 | assert_eq!(entries[Path::new("five.txt")], b"new content 2"); 191 | } 192 | } 193 | -------------------------------------------------------------------------------- /src/location.rs: -------------------------------------------------------------------------------- 1 | use oci_client::Reference; 2 | use std::fmt::Display; 3 | use std::path::PathBuf; 4 | use std::str::FromStr; 5 | 6 | #[derive(Debug, Clone)] 7 | pub enum Location { 8 | Oci(PathBuf), 9 | Docker(Reference), 10 | } 11 | 12 | impl Display for Location { 13 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 14 | match self { 15 | Location::Oci(path) => write!(f, "oci://{}", path.display()), 16 | Location::Docker(reference) => write!(f, "docker://{}", reference), 17 | } 18 | } 19 | } 20 | 21 | impl FromStr for Location { 22 | type Err = anyhow::Error; 23 | 24 | fn from_str(value: &str) -> Result { 25 | match value.split_once("://") { 26 | None => Ok(Self::Docker(value.parse()?)), 27 | Some(("oci", path)) => Ok(Self::Oci(path.into())), 28 | Some(("docker", reference)) => Ok(Self::Docker(reference.parse()?)), 29 | Some((prefix, _)) => Err(anyhow::anyhow!("Invalid image type {prefix}")), 30 | } 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | use crate::index::{ImageItem, ImageItems}; 2 | use crate::input::remote_image::RemoteImage; 3 | use crate::layer_combiner::LayerCombiner; 4 | use anyhow::{bail, Context}; 5 | use byte_unit::Byte; 6 | use clap::Parser; 7 | use globset::Glob; 8 | use input::InputImage; 9 | use itertools::Itertools; 10 | use memmap2::Mmap; 11 | use oci_spec::image::Sha256Digest; 12 | use output_image::image::OutputImageWriter; 13 | use output_image::layers::OutputLayers; 14 | use rand::prelude::*; 15 | use rayon::prelude::*; 16 | use std::collections::HashMap; 17 | use std::fmt::Debug; 18 | use std::fs::File; 19 | use std::path::Path; 20 | use tracing::{info, info_span, instrument, Level}; 21 | use tracing_indicatif::IndicatifLayer; 22 | use tracing_subscriber::layer::SubscriberExt; 23 | use tracing_subscriber::util::SubscriberInitExt; 24 | 25 | mod compression; 26 | mod index; 27 | mod input; 28 | mod io_utils; 29 | mod layer_combiner; 30 | pub mod location; 31 | mod output_image; 32 | mod platform_matcher; 33 | mod progress; 34 | #[cfg(test)] 35 | mod test_utils; 36 | 37 | use crate::input::local_image::LocalOciImage; 38 | use crate::platform_matcher::PlatformMatcher; 39 | use crate::progress::{display_bytes, progress_parallel_collect}; 40 | use location::Location; 41 | use output_image::stats::WrittenImageStats; 42 | use shadow_rs::shadow; 43 | use tracing_subscriber::filter::Directive; 44 | use tracing_subscriber::EnvFilter; 45 | 46 | shadow!(build); 47 | 48 | #[derive(Parser, Debug)] 49 | #[clap(version = build::CLAP_LONG_VERSION)] 50 | struct Args { 51 | /// Source image. e.g. `python:3.11`, `tensorflow/tensorflow:latest` or `oci://local/image/path` 52 | source: Location, 53 | /// Location to save image, e.g oci://directory/path/ 54 | output_dir: Location, 55 | /// Target size for layers 56 | #[arg(long, short)] 57 | target_size: Byte, 58 | 59 | #[arg(long)] 60 | concurrency: Option, 61 | 62 | #[arg(long)] 63 | keep_temp_files: bool, 64 | 65 | #[arg(long, default_value = "14")] 66 | compression_level: i32, 67 | 68 | #[arg(long, default_value = "linux/*")] 69 | platform: Glob, 70 | } 71 | 72 | pub fn main() -> anyhow::Result<()> { 73 | let indicatif_layer = IndicatifLayer::new().with_max_progress_bars(14, None); 74 | let env_builder = EnvFilter::builder() 75 | .with_default_directive(Directive::from(Level::INFO)) 76 | .from_env()?; 77 | tracing_subscriber::registry() 78 | .with( 79 | tracing_subscriber::fmt::layer() 80 | .compact() 81 | .with_thread_names(true) 82 | .with_writer(indicatif_layer.get_stderr_writer()), 83 | ) 84 | .with(indicatif_layer) 85 | .with(env_builder) 86 | .init(); 87 | let args = Args::parse(); 88 | 89 | let output_dir = match args.output_dir { 90 | Location::Oci(path) => path, 91 | Location::Docker(_) => { 92 | bail!("Docker registry output is not currently supported") 93 | } 94 | }; 95 | 96 | let temp_dir = output_dir.join("temp"); 97 | let target_size = args.target_size; 98 | 99 | let output_image = 100 | OutputImageWriter::new(output_dir.to_path_buf(), temp_dir.clone()).context("Construct OutputImageWriter")?; 101 | 102 | rayon::ThreadPoolBuilder::new() 103 | .thread_name(|i| format!("thread-{}", i)) 104 | .num_threads(args.concurrency.unwrap_or_default()) 105 | .build_global()?; 106 | info!("Using {} threads", rayon::current_num_threads()); 107 | let platform_matcher = PlatformMatcher::from_glob(args.platform)?; 108 | 109 | let results = match args.source { 110 | Location::Oci(path) => { 111 | info!("Reading images from OCI directory: {}", path.display()); 112 | let images = LocalOciImage::from_oci_directory(path, &platform_matcher)?; 113 | handle_input_images(images, &temp_dir, &output_image, target_size, args.compression_level)? 114 | } 115 | Location::Docker(reference) => { 116 | info!("Reading images registry: {}", reference); 117 | let runtime = tokio::runtime::Runtime::new()?; 118 | let images = RemoteImage::create_remote_images(runtime.handle(), reference, &platform_matcher)?; 119 | handle_input_images(images, &temp_dir, &output_image, target_size, args.compression_level)? 120 | } 121 | }; 122 | 123 | if !args.keep_temp_files { 124 | std::fs::remove_dir_all(&temp_dir)?; 125 | } 126 | 127 | info!("Wrote {} images to {}:", results.len(), output_dir.display()); 128 | for (_, _, written_image) in &results { 129 | let total_size = written_image.layers.iter().map(|l| l.compressed_file_size).sum::(); 130 | info!( 131 | "Written image {} - {:#.1}:", 132 | written_image.platform, 133 | display_bytes(total_size) 134 | ); 135 | for layer in &written_image.layers { 136 | info!(" - {}", layer); 137 | } 138 | } 139 | 140 | let manifests = results 141 | .into_iter() 142 | .map(|(size, hash, stats)| (size, hash.clone(), stats)) 143 | .sorted_by_key(|(_, _, stats)| stats.platform.to_string()) 144 | .collect::>(); 145 | 146 | output_image.write_image_index(&manifests)?; 147 | info!("Completed"); 148 | Ok(()) 149 | } 150 | 151 | fn handle_input_images( 152 | images: Vec, 153 | temp_dir: &Path, 154 | output_image: &OutputImageWriter, 155 | target_size: Byte, 156 | compression_level: i32, 157 | ) -> anyhow::Result> { 158 | info!("Found {} images", images.len()); 159 | for image in &images { 160 | info!(" - {} - digest: {}", image.platform(), image.image_digest()); 161 | } 162 | 163 | let images = progress_parallel_collect::, _>( 164 | "Loading and merging", 165 | images.into_par_iter().map(|input_image| { 166 | let image_digest = input_image.image_digest(); 167 | let platform_key = input_image.platform().file_key()?; 168 | let combined_path = temp_dir.join(format!("combined-{platform_key}-{image_digest}.tar")); 169 | let image_items = load_and_merge_image(&input_image, &combined_path)?; 170 | Ok((input_image, image_items)) 171 | }), 172 | )?; 173 | info!( 174 | "Loaded and merged {} images - {} items in total", 175 | images.len(), 176 | images.iter().map(|(_, v)| v.total_items).sum::() 177 | ); 178 | let images_with_content = images 179 | .iter() 180 | .map(|(input_image, image_items)| { 181 | let image_content = image_items.get_image_content()?; 182 | Ok((input_image, image_content)) 183 | }) 184 | .collect::>>()?; 185 | 186 | let all_image_items = images_with_content 187 | .into_iter() 188 | .flat_map(|(input_image, items)| items.into_iter().map(move |v| (input_image, v))) 189 | .collect::>(); 190 | 191 | info!( 192 | "Read {} files from images, hashing and compressing files", 193 | all_image_items.len() 194 | ); 195 | 196 | let hashed_items = progress_parallel_collect::, _>( 197 | "Hashing and compressing", 198 | all_image_items.into_par_iter().map_init( 199 | || ImageItem::create_compressor(compression_level).unwrap(), 200 | |compressor, (input_image, (path, header, content))| { 201 | let item = 202 | ImageItem::from_path_and_header(path, header, content, compressor).map(|v| (v.path.clone(), v))?; 203 | Ok((input_image, item)) 204 | }, 205 | ), 206 | )?; 207 | let file_count = hashed_items.iter().filter(|(_, (_, item))| item.raw_size > 0).count(); 208 | let unique_file_count = hashed_items 209 | .iter() 210 | .filter_map(|(_, (_, item))| if item.raw_size > 0 { Some(item.hash) } else { None }) 211 | .unique() 212 | .count(); 213 | info!( 214 | "Hashed {} items from images - {} non-empty files, {} unique files, {} duplicate files", 215 | hashed_items.len(), 216 | file_count, 217 | unique_file_count, 218 | file_count - unique_file_count 219 | ); 220 | 221 | let all_image_items: Vec<(_, HashMap<_, _>)> = hashed_items 222 | .into_iter() 223 | .into_group_map() 224 | .into_iter() 225 | .map(|(input_image, items)| { 226 | let items = items.into_iter().collect(); 227 | (input_image, items) 228 | }) 229 | .collect(); 230 | let total_item_count: usize = all_image_items.iter().map(|(_, map)| map.len()).sum(); 231 | info!("Packing {} files into layers", total_item_count); 232 | let output_layers = all_image_items 233 | .iter() 234 | .map(|(input_image, items)| { 235 | let output_layer = OutputLayers::pack_items(items, 4096, target_size.as_u64()) 236 | .with_context(|| format!("Packing layers for {}", input_image))?; 237 | Ok((input_image, output_layer)) 238 | }) 239 | .collect::>>()?; 240 | 241 | let mut flattened_layers = output_layers 242 | .iter() 243 | .flat_map(|(image, layers)| layers.all_layers().iter().map(move |layer| (image, layer))) 244 | .collect::>(); 245 | 246 | // Shuffle the layers to avoid any bias in the order of the layers 247 | // We re-sort the layers in write_oci_image 248 | let mut small_rng = SmallRng::from_entropy(); 249 | flattened_layers.shuffle(&mut small_rng); 250 | 251 | info!("Produced {} total layers", flattened_layers.len()); 252 | 253 | let written_layers = progress_parallel_collect::, _>( 254 | "Writing Layers", 255 | flattened_layers.into_par_iter().map(|(image, layer)| { 256 | let raw_size = display_bytes(layer.raw_size()); 257 | let span = info_span!( 258 | "write_layer", 259 | items = layer.len(), 260 | raw_size = format_args!("{:#.1}", raw_size) 261 | ); 262 | let result = span.in_scope(|| { 263 | output_image 264 | .write_layer(layer, compression_level, image.image_digest()) 265 | .with_context(|| format!("Write layer {layer}")) 266 | })?; 267 | Ok((image, result)) 268 | }), 269 | )?; 270 | info!( 271 | "Wrote {} layers, writing config and finalizing image:", 272 | written_layers.len() 273 | ); 274 | let written_layers_map = written_layers.into_iter().into_group_map(); 275 | written_layers_map 276 | .into_iter() 277 | .map(|(image, layers)| { 278 | output_image 279 | .write_oci_image(image.config().clone(), layers, image.platform()) 280 | .context("Write Image") 281 | }) 282 | .collect::>>() 283 | } 284 | 285 | #[instrument(skip_all, fields(image = %input_image))] 286 | fn load_and_merge_image(input_image: &impl InputImage, combined_path: &Path) -> anyhow::Result> { 287 | let combined_output_file = File::options() 288 | .create(true) 289 | .truncate(true) 290 | .write(true) 291 | .open(combined_path) 292 | .with_context(|| format!("Opening file {combined_path:?}"))?; 293 | let mut combiner = LayerCombiner::new(combined_output_file); 294 | let layer_iterator = input_image.layers_from_manifest()?; 295 | for input_layer in progress::progress_iter("Merging Layers", layer_iterator) { 296 | let mut input_layer = input_layer?; 297 | let entries = progress::spinner_iter("Merging Entries", input_layer.entries()?); 298 | combiner.merge_entries(entries)?; 299 | } 300 | 301 | let total_items = combiner.finish()?; 302 | ImageItems::from_file(combined_path, total_items) 303 | } 304 | 305 | #[cfg(test)] 306 | mod tests { 307 | use super::*; 308 | use crate::layer_combiner::LayerCombiner; 309 | 310 | use crate::test_utils::build_layer; 311 | 312 | #[test] 313 | fn test_multiple_layers() { 314 | let layer_1 = build_layer() 315 | .with_files(&[ 316 | ("one.txt", b"content1"), 317 | ("two.txt", b"content2"), 318 | ("three.txt", b"content3"), 319 | ("four.txt", b"content4"), 320 | ]) 321 | .build(); 322 | 323 | let layer_2 = build_layer() 324 | .with_files(&[ 325 | ("five.txt", b"content5"), 326 | ("six.txt", b"content6"), 327 | ("seven.txt", b"content7"), 328 | ("eight.txt", b"content8"), 329 | ]) 330 | .build(); 331 | 332 | let layer_3 = build_layer() 333 | .with_files(&[ 334 | ("one.txt", b"new content 1"), 335 | ("five.txt", b"new content 2"), 336 | ("nine.txt", b"new content 3"), 337 | ]) 338 | .build(); 339 | 340 | let mut data = vec![]; 341 | let mut combiner = LayerCombiner::new(&mut data); 342 | combiner.merge_layer(layer_3).unwrap(); 343 | combiner.merge_layer(layer_2).unwrap(); 344 | combiner.merge_layer(layer_1).unwrap(); 345 | let total_items = combiner.finish().unwrap(); 346 | assert_eq!(total_items, 9); 347 | 348 | let items = ImageItems::from_data(data, 9); 349 | let content = items.get_image_content().unwrap(); 350 | let image_items = ImageItem::items_from_data(content, 1).unwrap(); 351 | assert_eq!(image_items.len(), 9); 352 | let layers = OutputLayers::pack_items(&image_items, 4096, 1024 * 1024 * 250).unwrap(); 353 | assert_eq!(layers.len(), 1); 354 | } 355 | } 356 | -------------------------------------------------------------------------------- /src/output_image/image.rs: -------------------------------------------------------------------------------- 1 | use crate::compression::Compression; 2 | use crate::input::Platform; 3 | use crate::io_utils::WriteCounter; 4 | use crate::output_image::layers::OutputLayer; 5 | use crate::output_image::stats::WrittenImageStats; 6 | use anyhow::Context; 7 | use itertools::Itertools; 8 | use oci_spec::image::{ 9 | Descriptor, HistoryBuilder, ImageConfiguration, ImageIndexBuilder, ImageManifestBuilder, MediaType, Sha256Digest, 10 | }; 11 | use serde::Serialize; 12 | use sha2::Digest; 13 | use std::fmt::{Debug, Display}; 14 | use std::fs::File; 15 | use std::io::{BufReader, BufWriter, Read}; 16 | use std::path::{Path, PathBuf}; 17 | use std::str::FromStr; 18 | use tracing::debug; 19 | 20 | pub struct WrittenLayer<'a> { 21 | pub layer: &'a OutputLayer<'a>, 22 | pub compressed_file_size: u64, 23 | pub raw_content_hash: String, 24 | pub compressed_content_hash: Sha256Digest, 25 | } 26 | 27 | pub struct OutputImageWriter { 28 | output_dir: PathBuf, 29 | blobs_dir: PathBuf, 30 | temp_dir: PathBuf, 31 | } 32 | 33 | impl Display for OutputImageWriter { 34 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 35 | write!(f, "OutputImage {}", self.output_dir.display()) 36 | } 37 | } 38 | 39 | impl OutputImageWriter { 40 | pub fn new(output_dir: PathBuf, temp_dir: PathBuf) -> anyhow::Result { 41 | let blobs_dir = output_dir.join("blobs").join("sha256"); 42 | std::fs::create_dir_all(&blobs_dir).with_context(|| format!("Creating blobs directory {blobs_dir:?}"))?; 43 | std::fs::create_dir_all(&temp_dir).with_context(|| format!("Creating temp directory {temp_dir:?}"))?; 44 | Ok(Self { 45 | output_dir, 46 | blobs_dir, 47 | temp_dir, 48 | }) 49 | } 50 | 51 | // #[instrument(skip_all, fields(self = %self, layers = %layers))] 52 | pub fn write_oci_image( 53 | &self, 54 | config: ImageConfiguration, 55 | mut written_layers: Vec, 56 | platform: Platform, 57 | ) -> anyhow::Result<(u64, Sha256Digest, WrittenImageStats)> { 58 | written_layers.sort_by_key(|l| (l.layer.type_, l.compressed_file_size)); 59 | let (config_size, config_hash) = self.write_config(&config, &written_layers).context("Write config")?; 60 | self.build_manifest(config_size, config_hash, &written_layers, platform) 61 | .context("Build manifest") 62 | } 63 | 64 | pub fn write_image_index(self, manifests: &[(u64, Sha256Digest, WrittenImageStats)]) -> anyhow::Result<()> { 65 | let description = manifests.iter().map(|(_, _, stats)| stats.description()).join(" / "); 66 | 67 | // All of our manifests should be added to a single index, which is stored as a blob. 68 | let index = manifests 69 | .iter() 70 | .map(|(size, hash, _)| Descriptor::new(MediaType::ImageManifest, *size, hash.clone())) 71 | .collect_vec(); 72 | let image_index = ImageIndexBuilder::default() 73 | .schema_version(2u32) 74 | .media_type(MediaType::ImageIndex) 75 | .annotations([("org.opencontainers.image.description".to_string(), description.clone())]) 76 | .manifests(index) 77 | .build() 78 | .context("ImageIndexBuilder Build")?; 79 | let (index_size, index_hash) = self.add_json_to_blobs(&image_index).context("Write index to blobs")?; 80 | 81 | // Now write a single index, that points to our single sub-index. 82 | let oci_index = ImageIndexBuilder::default() 83 | .schema_version(2u32) 84 | .media_type(MediaType::ImageIndex) 85 | .annotations([("org.opencontainers.image.description".to_string(), description.clone())]) 86 | .manifests(&[Descriptor::new(MediaType::ImageIndex, index_size, index_hash)]) 87 | .build() 88 | .context("ImageIndexBuilder Build")?; 89 | 90 | oci_index.to_file_pretty(self.output_dir.join("index.json"))?; 91 | 92 | std::fs::write(self.output_dir.join("oci-layout"), "{\"imageLayoutVersion\":\"1.0.0\"}")?; 93 | Ok(()) 94 | } 95 | 96 | fn build_manifest( 97 | &self, 98 | config_size: u64, 99 | config_hash: Sha256Digest, 100 | written_layers: &[WrittenLayer], 101 | platform: Platform, 102 | ) -> anyhow::Result<(u64, Sha256Digest, WrittenImageStats)> { 103 | let config_descriptor = Descriptor::new(MediaType::ImageConfig, config_size, config_hash); 104 | let layer_descriptors = written_layers 105 | .iter() 106 | .map(|l| { 107 | Descriptor::new( 108 | MediaType::ImageLayerZstd, 109 | l.compressed_file_size, 110 | l.compressed_content_hash.clone(), 111 | ) 112 | }) 113 | .collect_vec(); 114 | 115 | let stats = WrittenImageStats::new(written_layers, platform); 116 | 117 | let manifest = ImageManifestBuilder::default() 118 | .schema_version(2u32) 119 | .annotations([("org.opencontainers.image.description".to_string(), stats.description())]) 120 | .media_type(MediaType::ImageManifest) 121 | .config(config_descriptor) 122 | .layers(layer_descriptors) 123 | .build() 124 | .context("ImageManifestBuilder Build")?; 125 | let (manifest_size, manifest_hash) = self.add_json_to_blobs(&manifest).context("Write manifest to blobs")?; 126 | Ok((manifest_size, manifest_hash, stats)) 127 | } 128 | 129 | fn write_config( 130 | &self, 131 | config: &ImageConfiguration, 132 | layers: &[WrittenLayer], 133 | ) -> anyhow::Result<(u64, Sha256Digest)> { 134 | let created_at = chrono::Utc::now().to_rfc3339(); 135 | let diff_ids = layers 136 | .iter() 137 | .map(|l| format!("sha256:{}", l.raw_content_hash)) 138 | .collect_vec(); 139 | let history: Result, _> = layers 140 | .iter() 141 | .map(|l| { 142 | HistoryBuilder::default() 143 | .author("docker-repack") 144 | .created_by(l.layer.to_string()) 145 | .created(config.created().as_ref().unwrap_or(&created_at)) 146 | .empty_layer(false) 147 | .build() 148 | .with_context(|| format!("HistoryBuilder Build for layer {}", l.layer)) 149 | }) 150 | .collect(); 151 | 152 | let mut config = config.clone(); 153 | let root_fs = config.rootfs_mut(); 154 | root_fs.set_diff_ids(diff_ids); 155 | config.set_history(history?); 156 | 157 | self.add_json_to_blobs(&config).context("Write config to blobs") 158 | } 159 | 160 | pub fn write_layer<'a>( 161 | &'a self, 162 | layer: &'a OutputLayer, 163 | compression_level: i32, 164 | image_digest: oci_spec::image::Digest, 165 | ) -> anyhow::Result> { 166 | let mut hasher = sha2::Sha256::new(); 167 | layer 168 | .to_writer_with_progress("Hashing raw layer", &mut hasher) 169 | .context("Hashing with to_writer")?; 170 | let digest: [u8; 32] = hasher.finalize().into(); 171 | let raw_content_buffer: const_hex::Buffer<32> = const_hex::const_encode(&digest); 172 | let raw_content_hash = raw_content_buffer.as_str().to_string(); 173 | 174 | let mut counter = WriteCounter::new(); 175 | let writer = layer.to_writer(&mut counter).context("Write Counter")?; 176 | let raw_file_size = writer.written_bytes(); 177 | 178 | let layer_path = self.temp_dir.join(format!( 179 | "layer-{raw_content_hash}-for-{}-{}.tar.zst", 180 | image_digest.algorithm(), 181 | image_digest.digest() 182 | )); 183 | let layer_file = File::options() 184 | .create(true) 185 | .truncate(true) 186 | .write(true) 187 | .open(&layer_path) 188 | .with_context(|| format!("Creating temp file {layer_path:?}"))?; 189 | let mut out = Compression::Zstd 190 | .new_writer(BufWriter::new(layer_file), compression_level) 191 | .context("Constructing CompressedWriter")?; 192 | out.tune_for_output_size(raw_file_size)?; 193 | layer 194 | .to_writer_with_progress("Compressing layer", &mut out) 195 | .context("to_writer")?; 196 | out.finish().context("Finishing compression")?; 197 | 198 | debug!("Layer compressed to {:?}", layer_path); 199 | let (compressed_file_size, compressed_content_hash) = 200 | self.add_path_to_blobs(&layer_path).context("Adding layer to blobs")?; 201 | Ok(WrittenLayer { 202 | layer, 203 | raw_content_hash, 204 | compressed_content_hash, 205 | compressed_file_size, 206 | }) 207 | } 208 | 209 | fn add_json_to_blobs(&self, item: impl Serialize) -> anyhow::Result<(u64, Sha256Digest)> { 210 | let value = serde_json::to_string_pretty(&item)?; 211 | let (size, hash) = hash_reader(value.as_bytes())?; 212 | let path = self.blobs_dir.join(hash.digest()); 213 | std::fs::write(&path, value)?; 214 | Ok((size, hash)) 215 | } 216 | 217 | fn add_path_to_blobs(&self, input_path: impl AsRef + Debug) -> anyhow::Result<(u64, Sha256Digest)> { 218 | let (size, hash) = hash_file(&input_path).context("Hashing file")?; 219 | let path = self.blobs_dir.join(hash.digest()); 220 | std::fs::rename(&input_path, &path).with_context(|| format!("Renaming {input_path:?} to {path:?}"))?; 221 | Ok((size, hash)) 222 | } 223 | } 224 | 225 | fn hash_reader(mut content: impl Read) -> anyhow::Result<(u64, Sha256Digest)> { 226 | let mut hasher = sha2::Sha256::new(); 227 | let compressed_file_size = std::io::copy(&mut content, &mut hasher).context("Copying bytes")?; 228 | let digest: [u8; 32] = hasher.finalize().into(); 229 | let compressed_content_hash: const_hex::Buffer<32> = const_hex::const_encode(&digest); 230 | Ok(( 231 | compressed_file_size, 232 | Sha256Digest::from_str(compressed_content_hash.as_str())?, 233 | )) 234 | } 235 | 236 | fn hash_file(path: impl AsRef + Debug) -> anyhow::Result<(u64, Sha256Digest)> { 237 | let layer_file = File::options() 238 | .read(true) 239 | .open(&path) 240 | .with_context(|| format!("Opening {path:?} for reading"))?; 241 | hash_reader(BufReader::new(layer_file)).with_context(|| format!("Hashing {path:?}")) 242 | } 243 | -------------------------------------------------------------------------------- /src/output_image/layers.rs: -------------------------------------------------------------------------------- 1 | use crate::index::ImageItem; 2 | use anyhow::bail; 3 | use itertools::Itertools; 4 | use std::cmp::PartialEq; 5 | use std::collections::HashMap; 6 | use std::path::PathBuf; 7 | use tar::{Builder, EntryType}; 8 | 9 | use crate::progress::{display_bytes, progress_iter}; 10 | #[cfg(test)] 11 | use std::collections::HashSet; 12 | use std::fmt::{Debug, Display, Formatter}; 13 | use std::io::Write; 14 | use tracing::instrument; 15 | 16 | #[derive(Debug, Eq, PartialEq, Copy, Clone, Ord, PartialOrd, strum::Display)] 17 | pub enum LayerType { 18 | Small, 19 | Standard, 20 | Supersized, 21 | } 22 | 23 | #[derive(Debug)] 24 | pub struct OutputLayer<'a> { 25 | pub type_: LayerType, 26 | items: Vec<&'a ImageItem<'a>>, 27 | } 28 | 29 | impl Display for OutputLayer<'_> { 30 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 31 | write!( 32 | f, 33 | "{} items={} size={:#.1} compressed={:#.1}", 34 | self.type_, 35 | self.items.len(), 36 | display_bytes(self.raw_size()), 37 | display_bytes(self.compressed_size()), 38 | ) 39 | } 40 | } 41 | 42 | impl<'a> OutputLayer<'a> { 43 | pub fn from_items( 44 | type_: LayerType, 45 | items: &[&'a ImageItem<'a>], 46 | hardlink_map: &HashMap>, 47 | duplicate_map: &HashMap<[u8; 32], Vec<&&'a ImageItem>>, 48 | ) -> Self { 49 | let mut layer = OutputLayer { type_, items: vec![] }; 50 | for item in items { 51 | layer.add_item(item, hardlink_map, duplicate_map); 52 | } 53 | layer 54 | } 55 | 56 | #[inline] 57 | pub fn add_item( 58 | &mut self, 59 | item: &'a ImageItem<'a>, 60 | hardlink_map: &HashMap>, 61 | duplicate_map: &HashMap<[u8; 32], Vec<&&'a ImageItem>>, 62 | ) { 63 | self.items.push(item); 64 | if let Some(items) = hardlink_map.get(&item.path) { 65 | self.items.extend(items); 66 | } 67 | if let Some(duplicates) = duplicate_map.get(&item.hash) { 68 | self.items 69 | .extend(duplicates.iter().filter(|dup| dup.path != item.path).map(|&&item| item)); 70 | } 71 | } 72 | 73 | pub fn compressed_size(&self) -> u64 { 74 | self.items.iter().map(|item| item.compressed_size).sum() 75 | } 76 | 77 | pub fn raw_size(&self) -> u64 { 78 | self.items.iter().map(|item| item.raw_size).sum() 79 | } 80 | 81 | pub fn len(&self) -> usize { 82 | self.items.len() 83 | } 84 | 85 | #[inline(always)] 86 | fn to_writer_from_iterable( 87 | &self, 88 | out: &'a mut T, 89 | items: impl Iterator>, 90 | ) -> anyhow::Result<&'a mut T> { 91 | let mut archive = Builder::new(out); 92 | for item in items { 93 | if item.content.is_empty() { 94 | archive.append(&item.header, std::io::empty())?; 95 | } else { 96 | archive.append(&item.header, item.content)?; 97 | } 98 | } 99 | Ok(archive.into_inner()?) 100 | } 101 | 102 | #[inline(always)] 103 | pub fn to_writer_with_progress( 104 | &'a self, 105 | name: &'static str, 106 | out: &'a mut T, 107 | ) -> anyhow::Result<&'a mut T> { 108 | self.to_writer_from_iterable(out, progress_iter(name, self.items.iter())) 109 | } 110 | 111 | #[inline(always)] 112 | pub fn to_writer(&'a self, out: &'a mut T) -> anyhow::Result<&'a mut T> { 113 | self.to_writer_from_iterable(out, self.items.iter()) 114 | } 115 | 116 | #[cfg(test)] 117 | pub fn paths(&self) -> Vec<&std::path::Path> { 118 | self.items.iter().map(|item| item.path.as_path()).collect_vec() 119 | } 120 | } 121 | 122 | pub struct OutputLayers<'a> { 123 | layers: Vec>, 124 | } 125 | 126 | impl Display for OutputLayers<'_> { 127 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 128 | let compressed_size = self 129 | .all_layers() 130 | .iter() 131 | .map(|layer| layer.compressed_size()) 132 | .sum::(); 133 | let raw_size = self.all_layers().iter().map(|layer| layer.raw_size()).sum::(); 134 | f.write_fmt(format_args!( 135 | "size={} raw={:#.1} compressed={:#.1}", 136 | self.len(), 137 | display_bytes(raw_size), 138 | display_bytes(compressed_size) 139 | )) 140 | } 141 | } 142 | 143 | impl<'a> OutputLayers<'a> { 144 | #[instrument(name = "packing files", skip_all)] 145 | pub fn pack_items( 146 | items_map: &'a HashMap, 147 | small_items_threshold: u64, 148 | target_size: u64, 149 | ) -> anyhow::Result> { 150 | let (hardlink_items, mut items): (Vec<_>, Vec<_>) = items_map 151 | .values() 152 | .partition(|item| item.header.entry_type() == EntryType::Link); 153 | 154 | let mut hardlink_map: HashMap> = HashMap::new(); 155 | for item in hardlink_items { 156 | if let Some(link_name) = item.header.link_name()? { 157 | hardlink_map.entry(link_name.to_path_buf()).or_default().push(item); 158 | } else { 159 | bail!("Link item without link name: {}", item.path.display()); 160 | } 161 | } 162 | 163 | items.sort_by(|e1, e2| e1.path.cmp(&e2.path)); 164 | 165 | let (small_items, standard_items): (Vec<_>, Vec<_>) = items.into_iter().partition(|item| { 166 | (item.raw_size <= small_items_threshold || item.compressed_size <= small_items_threshold) 167 | && matches!( 168 | item.header.entry_type(), 169 | EntryType::Regular | EntryType::Symlink | EntryType::Directory 170 | ) 171 | }); 172 | 173 | let (standard_items, extra_large_items): (Vec<_>, Vec<_>) = standard_items 174 | .into_iter() 175 | .partition(|item| item.compressed_size <= target_size); 176 | 177 | let files_by_hash = standard_items.iter().into_group_map_by(|v| v.hash); 178 | let small_layer = OutputLayer::from_items(LayerType::Small, &small_items, &hardlink_map, &files_by_hash); 179 | 180 | let unique_files_by_hash = standard_items.iter().unique_by(|v| v.hash).copied().collect_vec(); 181 | 182 | let mut layers: Vec = Vec::with_capacity(14); 183 | 'outer: for item in unique_files_by_hash { 184 | for layer in layers.iter_mut() { 185 | if layer.compressed_size() + item.compressed_size <= target_size { 186 | layer.add_item(item, &hardlink_map, &files_by_hash); 187 | continue 'outer; 188 | } 189 | } 190 | layers.push(OutputLayer::from_items( 191 | LayerType::Standard, 192 | &[item], 193 | &hardlink_map, 194 | &files_by_hash, 195 | )) 196 | } 197 | layers.push(small_layer); 198 | for item in extra_large_items { 199 | layers.push(OutputLayer::from_items( 200 | LayerType::Supersized, 201 | &[item], 202 | &hardlink_map, 203 | &files_by_hash, 204 | )) 205 | } 206 | 207 | Ok(OutputLayers { layers }) 208 | } 209 | 210 | pub fn all_layers(&self) -> &[OutputLayer<'a>] { 211 | self.layers.as_slice() 212 | } 213 | 214 | pub fn len(&self) -> usize { 215 | self.layers.len() 216 | } 217 | 218 | #[cfg(test)] 219 | pub fn layers_by_type(&self, type_: LayerType) -> impl Iterator> { 220 | self.all_layers().iter().filter(move |layer| layer.type_ == type_) 221 | } 222 | 223 | #[cfg(test)] 224 | pub fn small_layers(&self) -> Vec<&OutputLayer<'a>> { 225 | self.layers_by_type(LayerType::Small).collect_vec() 226 | } 227 | 228 | #[cfg(test)] 229 | pub fn supersized_layers(&self) -> Vec<&OutputLayer<'a>> { 230 | self.layers_by_type(LayerType::Supersized).collect_vec() 231 | } 232 | 233 | #[cfg(test)] 234 | fn layer_set(&self) -> HashSet<&std::path::Path> { 235 | self.layers.iter().flat_map(|layer| layer.paths()).collect() 236 | } 237 | } 238 | 239 | #[cfg(test)] 240 | mod tests { 241 | use super::*; 242 | use crate::index::ImageItems; 243 | 244 | use crate::test_utils::{add_dir, add_file, add_hardlink, compare_paths, setup_tar}; 245 | 246 | #[test] 247 | fn test_pack_items_works() { 248 | let mut tar_1 = setup_tar(); 249 | add_dir(&mut tar_1, "test/"); 250 | add_file(&mut tar_1, "test/small.txt", b"small"); 251 | add_file(&mut tar_1, "test/large.txt", b"larger content value"); 252 | let data = tar_1.into_inner().unwrap(); 253 | 254 | let items = ImageItems::from_data(data, 3); 255 | let content = items.get_image_content().unwrap(); 256 | 257 | let items = ImageItem::items_from_data(content, 1).unwrap(); 258 | 259 | let packed = OutputLayers::pack_items(&items, 100, 10).unwrap(); 260 | compare_paths( 261 | packed.small_layers()[0].paths(), 262 | vec!["test/", "test/small.txt", "test/large.txt"], 263 | ); 264 | 265 | let packed = OutputLayers::pack_items(&items, 1, 10).unwrap(); 266 | compare_paths(packed.small_layers()[0].paths(), vec!["test/"]); 267 | } 268 | 269 | #[test] 270 | fn test_pack_items_simple_hardlinks() { 271 | let mut tar_1 = setup_tar(); 272 | add_dir(&mut tar_1, "test/"); 273 | add_file(&mut tar_1, "test/small.txt", b"small"); 274 | add_hardlink(&mut tar_1, "test/small-link.txt", "test/small.txt"); 275 | let data = tar_1.into_inner().unwrap(); 276 | let items = ImageItems::from_data(data, 3); 277 | let content = items.get_image_content().unwrap(); 278 | let items = ImageItem::items_from_data(content, 1).unwrap(); 279 | 280 | let packed = OutputLayers::pack_items(&items, 5, 10).unwrap(); 281 | compare_paths( 282 | packed.layer_set().iter().collect_vec(), 283 | vec!["test/", "test/small.txt", "test/small-link.txt"], 284 | ); 285 | compare_paths( 286 | packed.small_layers()[0].paths(), 287 | vec!["test/", "test/small.txt", "test/small-link.txt"], 288 | ); 289 | 290 | let packed = OutputLayers::pack_items(&items, 2, 10).unwrap(); 291 | compare_paths(packed.small_layers()[0].paths(), vec!["test/"]); 292 | } 293 | 294 | #[test] 295 | fn test_pack_duplicate_items() { 296 | let mut tar_1 = setup_tar(); 297 | add_file(&mut tar_1, "one.txt", b"content1"); 298 | add_file(&mut tar_1, "two.txt", b"content1"); 299 | add_file(&mut tar_1, "three.txt", b"content2"); 300 | let data = tar_1.into_inner().unwrap(); 301 | 302 | let items = ImageItems::from_data(data, 3); 303 | let content = items.get_image_content().unwrap(); 304 | 305 | let items = ImageItem::items_from_data(content, 1).unwrap(); 306 | 307 | let target_size = items[&PathBuf::from("one.txt")].compressed_size; 308 | 309 | let packed = OutputLayers::pack_items(&items, 1, target_size).unwrap(); 310 | compare_paths( 311 | packed.layer_set().iter().collect_vec(), 312 | vec!["two.txt", "one.txt", "three.txt"], 313 | ); 314 | compare_paths(packed.small_layers()[0].paths(), vec![]); 315 | compare_paths(packed.layers[0].paths(), vec!["one.txt", "two.txt"]); 316 | compare_paths(packed.layers[1].paths(), vec!["three.txt"]); 317 | } 318 | 319 | #[test] 320 | fn test_pack_large_items() { 321 | let mut tar_1 = setup_tar(); 322 | add_file(&mut tar_1, "one.txt", b"content1"); 323 | add_file(&mut tar_1, "two.txt", b"content1234567890"); 324 | let data = tar_1.into_inner().unwrap(); 325 | 326 | let items = ImageItems::from_data(data, 2); 327 | let content = items.get_image_content().unwrap(); 328 | let items = ImageItem::items_from_data(content, 1).unwrap(); 329 | 330 | let target_size = items[&PathBuf::from("one.txt")].compressed_size; 331 | 332 | let packed = OutputLayers::pack_items(&items, 1, target_size).unwrap(); 333 | compare_paths(packed.layer_set().iter().collect_vec(), vec!["two.txt", "one.txt"]); 334 | compare_paths(packed.small_layers()[0].paths(), vec![]); 335 | compare_paths(packed.layers[0].paths(), vec!["one.txt"]); 336 | compare_paths(packed.supersized_layers()[0].paths(), vec!["two.txt"]); 337 | } 338 | } 339 | -------------------------------------------------------------------------------- /src/output_image/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod image; 2 | pub mod layers; 3 | pub mod stats; 4 | -------------------------------------------------------------------------------- /src/output_image/stats.rs: -------------------------------------------------------------------------------- 1 | use crate::input::Platform; 2 | use crate::output_image::image::WrittenLayer; 3 | use crate::output_image::layers::LayerType; 4 | use crate::progress::display_bytes; 5 | use std::fmt::Display; 6 | 7 | pub struct WrittenLayerStats { 8 | pub type_: LayerType, 9 | pub compressed_file_size: u64, 10 | pub raw_file_size: u64, 11 | pub item_count: usize, 12 | } 13 | 14 | impl WrittenLayerStats { 15 | pub fn from_written_layer(layer: &WrittenLayer) -> Self { 16 | Self { 17 | type_: layer.layer.type_, 18 | compressed_file_size: layer.compressed_file_size, 19 | raw_file_size: layer.layer.raw_size(), 20 | item_count: layer.layer.len(), 21 | } 22 | } 23 | } 24 | 25 | impl Display for WrittenLayerStats { 26 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 27 | write!( 28 | f, 29 | "Type: {}, Size: {:#.1}, Uncompressed Size: {:#.1}, File Count: {}", 30 | self.type_, 31 | display_bytes(self.compressed_file_size), 32 | display_bytes(self.raw_file_size), 33 | self.item_count 34 | ) 35 | } 36 | } 37 | 38 | pub struct WrittenImageStats { 39 | pub layers: Vec, 40 | pub platform: Platform, 41 | } 42 | 43 | impl WrittenImageStats { 44 | pub fn new(layers: &[WrittenLayer], platform: Platform) -> Self { 45 | Self { 46 | platform, 47 | layers: layers.iter().map(WrittenLayerStats::from_written_layer).collect(), 48 | } 49 | } 50 | 51 | pub fn description(&self) -> String { 52 | let total_raw_size = self.layers.iter().map(|l| l.raw_file_size).sum::(); 53 | let total_compressed_size = self.layers.iter().map(|l| l.compressed_file_size).sum::(); 54 | format!( 55 | "{}: layers={} compressed={:#.1} raw={:#.1}", 56 | self.platform, 57 | self.layers.len(), 58 | display_bytes(total_raw_size), 59 | display_bytes(total_compressed_size) 60 | ) 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /src/platform_matcher.rs: -------------------------------------------------------------------------------- 1 | use globset::{Glob, GlobBuilder, GlobMatcher}; 2 | use oci_client::manifest::Platform as OciClientPlatform; 3 | use oci_spec::image::Platform; 4 | use std::fmt::{Display, Formatter}; 5 | use tracing::{debug, instrument}; 6 | 7 | #[derive(Debug)] 8 | pub struct PlatformMatcher { 9 | glob: GlobMatcher, 10 | exclude: GlobMatcher, 11 | } 12 | 13 | impl Display for PlatformMatcher { 14 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 15 | f.write_str("PlatformMatcher: ")?; 16 | f.write_str(self.glob.glob().glob())?; 17 | f.write_str(" (exclude: ")?; 18 | f.write_str(self.exclude.glob().glob())?; 19 | f.write_str(")") 20 | } 21 | } 22 | 23 | impl PlatformMatcher { 24 | pub fn from_glob(glob: Glob) -> anyhow::Result { 25 | let glob_pattern = glob.glob(); 26 | debug!("Creating platform matcher for glob pattern: {}", glob_pattern); 27 | 28 | let glob = GlobBuilder::new(glob_pattern) 29 | .case_insensitive(true) 30 | .literal_separator(false) 31 | .build()?; 32 | let exclude = GlobBuilder::new("unknown/*") 33 | .case_insensitive(true) 34 | .literal_separator(false) 35 | .build()?; 36 | 37 | let exclude = exclude.compile_matcher(); 38 | let glob = glob.compile_matcher(); 39 | Ok(Self { glob, exclude }) 40 | } 41 | 42 | #[cfg(test)] 43 | pub fn match_all() -> Self { 44 | let glob = Glob::new("*").unwrap(); 45 | Self::from_glob(glob).unwrap() 46 | } 47 | 48 | #[instrument(level = "debug")] 49 | pub fn matches_str(&self, os: &str, arch: &str, variant: &str) -> bool { 50 | let os = os.to_ascii_lowercase(); 51 | let arch = arch.to_ascii_lowercase(); 52 | let variant = variant.to_ascii_lowercase(); 53 | 54 | let os_arch = format!("{}/{}", os, arch); 55 | let os_arch_variant = format!("{}/{}/{}", os, arch, variant); 56 | 57 | if self.exclude.is_match(&os_arch_variant) || self.exclude.is_match(&os_arch) { 58 | debug!("Platform is excluded"); 59 | return false; 60 | } 61 | 62 | let result = self.glob.is_match(os_arch) || self.glob.is_match(&os_arch_variant); 63 | if result { 64 | debug!("Platform matched"); 65 | } else { 66 | debug!("Platform does not match"); 67 | } 68 | result 69 | } 70 | 71 | pub fn matches_oci_spec_platform(&self, platform: Option<&Platform>) -> bool { 72 | match platform { 73 | Some(platform) => { 74 | let os = platform.os(); 75 | let arch = platform.architecture(); 76 | let variant = platform.variant().as_ref().map(|s| s.as_str()).unwrap_or("unknown"); 77 | self.matches_str(&os.to_string(), &arch.to_string(), variant) 78 | } 79 | None => { 80 | // If no platform is specified, we assume it matches 81 | true 82 | } 83 | } 84 | } 85 | 86 | pub fn matches_oci_client_platform(&self, platform: Option<&OciClientPlatform>) -> bool { 87 | match platform { 88 | Some(platform) => { 89 | let os = &platform.os; 90 | let arch = &platform.architecture; 91 | let variant = platform.variant.as_deref().unwrap_or("unknown"); 92 | self.matches_str(os, arch, variant) 93 | } 94 | None => { 95 | // If no platform is specified, we assume it matches 96 | true 97 | } 98 | } 99 | } 100 | } 101 | 102 | #[cfg(test)] 103 | mod tests { 104 | use super::*; 105 | use oci_spec::image::PlatformBuilder; 106 | #[test] 107 | fn test_matcher() { 108 | for pattern in ["linux/*", "linux/amd64"] { 109 | let matcher = PlatformMatcher::from_glob(Glob::new(pattern).unwrap()).unwrap(); 110 | assert!(matcher.matches_str("linux", "amd64", "unknown")); 111 | assert!(!matcher.matches_str("windows", "amd64", "unknown")); 112 | assert!(!matcher.matches_str("unknown", "unknown", "unknown")); 113 | } 114 | } 115 | 116 | #[test] 117 | fn test_matcher_insensitive() { 118 | let matcher = PlatformMatcher::from_glob(Glob::new("linux/*").unwrap()).unwrap(); 119 | assert!(matcher.matches_str("Linux", "amd64", "unknown")); 120 | let matcher = PlatformMatcher::from_glob(Glob::new("linux/amd64").unwrap()).unwrap(); 121 | assert!(matcher.matches_str("Linux", "Amd64", "unknown")); 122 | } 123 | 124 | #[test] 125 | fn test_oci_client_matcher() { 126 | let platform = OciClientPlatform { 127 | os: "Linux".to_string(), 128 | architecture: "arm64".to_string(), 129 | os_version: None, 130 | os_features: None, 131 | variant: Some("v8".to_string()), 132 | features: None, 133 | }; 134 | 135 | let platform_unknown = OciClientPlatform { 136 | os: "Unknown".to_string(), 137 | architecture: "Unknown".to_string(), 138 | os_version: None, 139 | os_features: None, 140 | variant: Some("Unknown".to_string()), 141 | features: None, 142 | }; 143 | 144 | for pattern in ["linux/*", "linux/arm64", "linux/arm64/v8", "*/arm64/v8"] { 145 | let matcher = PlatformMatcher::from_glob(Glob::new(pattern).unwrap()).unwrap(); 146 | assert!(matcher.matches_oci_client_platform(Some(&platform))); 147 | assert!(!matcher.matches_oci_client_platform(Some(&platform_unknown))); 148 | assert!(matcher.matches_oci_client_platform(None)); 149 | } 150 | } 151 | 152 | #[test] 153 | fn test_oci_spec_platform() { 154 | let platform = PlatformBuilder::default() 155 | .os("linux") 156 | .architecture("arm64") 157 | .variant("v8") 158 | .build() 159 | .unwrap(); 160 | let platform_unknown = PlatformBuilder::default() 161 | .os("unknown") 162 | .architecture("unknown") 163 | .variant("unknown") 164 | .build() 165 | .unwrap(); 166 | 167 | for pattern in ["linux/*", "linux/arm64", "linux/arm64/v8", "*/arm64/v8"] { 168 | let matcher = PlatformMatcher::from_glob(Glob::new(pattern).unwrap()).unwrap(); 169 | assert!(matcher.matches_oci_spec_platform(Some(&platform))); 170 | assert!(!matcher.matches_oci_spec_platform(Some(&platform_unknown))); 171 | assert!(matcher.matches_oci_spec_platform(None)); 172 | } 173 | } 174 | } 175 | -------------------------------------------------------------------------------- /src/progress.rs: -------------------------------------------------------------------------------- 1 | use byte_unit::{AdjustedByte, Byte, UnitType}; 2 | use itertools::{Itertools, Position}; 3 | use rayon::iter::{FromParallelIterator, IndexedParallelIterator, ParallelIterator}; 4 | use std::io::{stderr, IsTerminal}; 5 | use std::time::Instant; 6 | use tracing::{info, info_span, Span}; 7 | use tracing_indicatif::span_ext::IndicatifSpanExt; 8 | 9 | pub fn display_bytes(size: u64) -> AdjustedByte { 10 | Byte::from(size).get_appropriate_unit(UnitType::Both) 11 | } 12 | 13 | const PBAR_TEMPLATE: &str = "{span_child_prefix} {msg} {percent}% {wide_bar} {per_sec} [{human_pos}/{human_len}]"; 14 | 15 | const SPINNER_TEMPLATE: &str = "{span_child_prefix} {spinner} {msg} {human_pos} - {per_sec}"; 16 | 17 | fn setup_span_bar(span: &Span, size: usize, message: &'static str) -> Span { 18 | span.pb_set_message(message); 19 | span.pb_set_style(&indicatif::ProgressStyle::default_bar().template(PBAR_TEMPLATE).unwrap()); 20 | span.pb_set_length(size as u64); 21 | Span::current() 22 | } 23 | 24 | fn setup_span_spinner(span: &Span, message: &'static str) -> Span { 25 | span.pb_set_message(message); 26 | span.pb_set_style( 27 | &indicatif::ProgressStyle::default_spinner() 28 | .template(SPINNER_TEMPLATE) 29 | .unwrap(), 30 | ); 31 | Span::current() 32 | } 33 | 34 | pub fn progress_parallel_collect, T: Send>( 35 | message: &'static str, 36 | iterator: impl IndexedParallelIterator>, 37 | ) -> anyhow::Result { 38 | let total = iterator.len(); 39 | let span = info_span!("task", items = total); 40 | let entered = span.enter(); 41 | let span = setup_span_bar(&span, total, message); 42 | let is_term = stderr().is_terminal(); 43 | 44 | if is_term { 45 | iterator 46 | .inspect(move |_| { 47 | span.pb_inc(1); 48 | let _ = entered; 49 | }) 50 | .collect() 51 | } else { 52 | let start = Instant::now(); 53 | let res = iterator.collect(); 54 | info!("{message} completed in {:#.1?}", start.elapsed()); 55 | let _ = entered; 56 | res 57 | } 58 | } 59 | 60 | pub fn progress_iter( 61 | message: &'static str, 62 | iterator: impl ExactSizeIterator, 63 | ) -> impl ExactSizeIterator { 64 | let total = iterator.len(); 65 | let span = info_span!("task", items = total); 66 | let entered = span.enter(); 67 | let span = setup_span_bar(&span, total, message); 68 | let is_term = stderr().is_terminal(); 69 | let start = Instant::now(); 70 | 71 | iterator.with_position().map(move |(pos, v)| { 72 | if is_term { 73 | span.pb_inc(1); 74 | } else if pos == Position::Last { 75 | info!("{message} completed in {:#.1?}", start.elapsed()); 76 | } 77 | let _ = entered; 78 | v 79 | }) 80 | } 81 | 82 | pub fn spinner_iter(message: &'static str, iterator: impl Iterator) -> impl Iterator { 83 | let span = info_span!("task"); 84 | let entered = span.enter(); 85 | let span = setup_span_spinner(&span, message); 86 | 87 | iterator.inspect(move |_| { 88 | span.pb_inc(1); 89 | let _ = entered; 90 | }) 91 | } 92 | -------------------------------------------------------------------------------- /src/test_utils.rs: -------------------------------------------------------------------------------- 1 | use crate::input::layers::InputLayer; 2 | use oci_spec::image::Digest; 3 | use std::collections::{HashMap, HashSet}; 4 | use std::io::{Cursor, Read, Write}; 5 | use std::path::{Path, PathBuf}; 6 | use std::str::FromStr; 7 | use tar::{Builder, EntryType, Header}; 8 | 9 | #[derive(Default)] 10 | pub struct LayerBuilder { 11 | files: Vec<(PathBuf, Vec)>, 12 | hardlinks: Vec<(PathBuf, PathBuf)>, 13 | symlinks: Vec<(PathBuf, PathBuf)>, 14 | directories: Vec, 15 | } 16 | 17 | impl LayerBuilder { 18 | pub fn with_files(mut self, files: &[(impl AsRef, &[u8])]) -> Self { 19 | self.files 20 | .extend(files.iter().map(|(p, d)| (p.as_ref().to_path_buf(), d.to_vec()))); 21 | self 22 | } 23 | 24 | #[allow(dead_code)] 25 | pub fn with_symlinks(mut self, symlinks: &[(impl AsRef, impl AsRef)]) -> Self { 26 | self.symlinks.extend( 27 | symlinks 28 | .iter() 29 | .map(|(p, d)| (p.as_ref().to_path_buf(), d.as_ref().to_path_buf())), 30 | ); 31 | self 32 | } 33 | 34 | #[allow(dead_code)] 35 | pub fn with_hardlinks(mut self, hardlinks: &[(impl AsRef, impl AsRef)]) -> Self { 36 | self.hardlinks.extend( 37 | hardlinks 38 | .iter() 39 | .map(|(p, d)| (p.as_ref().to_path_buf(), d.as_ref().to_path_buf())), 40 | ); 41 | self 42 | } 43 | 44 | #[allow(dead_code)] 45 | pub fn with_directories(mut self, directories: &[impl AsRef]) -> Self { 46 | self.directories 47 | .extend(directories.iter().map(|p| p.as_ref().to_path_buf())); 48 | self 49 | } 50 | 51 | pub fn build(self) -> InputLayer { 52 | let content = self.build_raw(); 53 | InputLayer::new( 54 | Digest::from_str("sha256:0d90d93a5cab3fd2879040420c7b7e4958aee8997fef78e9a5dd80cb01f3bd9c").unwrap(), 55 | Cursor::new(content), 56 | ) 57 | .unwrap() 58 | } 59 | 60 | pub fn build_raw(self) -> Vec { 61 | let mut builder = setup_tar(); 62 | for directory in self.directories { 63 | add_dir(&mut builder, directory); 64 | } 65 | for (path, content) in self.files { 66 | add_file(&mut builder, path, &content); 67 | } 68 | for (path, to_path) in self.hardlinks { 69 | add_hardlink(&mut builder, path, to_path); 70 | } 71 | for (path, to_path) in self.symlinks { 72 | add_symlink(&mut builder, path, to_path); 73 | } 74 | 75 | builder.into_inner().unwrap() 76 | } 77 | } 78 | 79 | pub fn read_tar_entries(content: &[u8]) -> Vec<(Header, Vec)> { 80 | let mut archive = tar::Archive::new(content); 81 | archive 82 | .entries() 83 | .unwrap() 84 | .map(|x| { 85 | let mut entry = x.unwrap(); 86 | let header = entry.header().clone(); 87 | let mut content = vec![]; 88 | entry.read_to_end(&mut content).unwrap(); 89 | (header, content) 90 | }) 91 | .collect() 92 | } 93 | 94 | pub fn read_tar_entries_content(content: &[u8]) -> HashMap> { 95 | let entries = read_tar_entries(content); 96 | entries 97 | .into_iter() 98 | .map(|(header, content)| { 99 | let path = header.path().unwrap().to_path_buf(); 100 | (path, content) 101 | }) 102 | .collect() 103 | } 104 | 105 | pub fn build_layer() -> LayerBuilder { 106 | LayerBuilder::default() 107 | } 108 | 109 | pub fn setup_tar() -> Builder> { 110 | Builder::new(vec![]) 111 | } 112 | 113 | pub fn new_header(type_: EntryType, path: impl AsRef) -> Header { 114 | let mut header = Header::new_gnu(); 115 | header.set_entry_type(type_); 116 | header.set_path(path).unwrap(); 117 | header 118 | } 119 | 120 | pub fn add_dir(builder: &mut Builder, path: impl AsRef) { 121 | let mut header = new_header(EntryType::Directory, path); 122 | header.set_size(0); 123 | header.set_cksum(); 124 | builder.append(&header, &mut std::io::empty()).unwrap(); 125 | } 126 | 127 | pub fn add_file(builder: &mut Builder, path: impl AsRef, content: &[u8]) { 128 | let mut header = new_header(EntryType::Regular, &path); 129 | header.set_size(content.len() as u64); 130 | builder.append_data(&mut header, &path, content).unwrap(); 131 | } 132 | 133 | pub fn add_symlink(builder: &mut Builder, path: impl AsRef, to_path: impl AsRef) { 134 | let mut header = new_header(EntryType::Symlink, &path); 135 | header.set_size(0); 136 | builder.append_link(&mut header, path, &to_path).unwrap(); 137 | } 138 | 139 | pub fn add_hardlink(builder: &mut Builder, path: impl AsRef, to_path: impl AsRef) { 140 | let mut header = new_header(EntryType::Link, &path); 141 | header.set_size(0); 142 | builder.append_link(&mut header, path, &to_path).unwrap(); 143 | } 144 | 145 | pub fn compare_paths(paths: Vec>, expected: Vec<&str>) { 146 | let paths: HashSet<_> = paths.iter().map(|v| v.as_ref()).collect(); 147 | let expected: HashSet<_> = expected.iter().map(|v| v.as_ref()).collect(); 148 | assert_eq!(paths, expected); 149 | } 150 | -------------------------------------------------------------------------------- /web/.gitignore: -------------------------------------------------------------------------------- 1 | # build output 2 | dist/ 3 | 4 | # generated types 5 | .astro/ 6 | 7 | # dependencies 8 | node_modules/ 9 | 10 | # logs 11 | npm-debug.log* 12 | yarn-debug.log* 13 | yarn-error.log* 14 | pnpm-debug.log* 15 | 16 | # environment variables 17 | .env 18 | .env.production 19 | 20 | # macOS-specific files 21 | .DS_Store 22 | 23 | # jetbrains setting folder 24 | .idea/ 25 | src/data/* -------------------------------------------------------------------------------- /web/.prettierrc.mjs: -------------------------------------------------------------------------------- 1 | // .prettierrc.mjs 2 | /** @type {import("prettier").Config} */ 3 | export default { 4 | plugins: ["prettier-plugin-astro"], 5 | overrides: [ 6 | { 7 | files: "*.astro", 8 | options: { 9 | parser: "astro", 10 | }, 11 | }, 12 | ], 13 | }; 14 | -------------------------------------------------------------------------------- /web/README.md: -------------------------------------------------------------------------------- 1 | # Astro Starter Kit: Basics 2 | 3 | ```sh 4 | npm create astro@latest -- --template basics 5 | ``` 6 | 7 | [![Open in StackBlitz](https://developer.stackblitz.com/img/open_in_stackblitz.svg)](https://stackblitz.com/github/withastro/astro/tree/latest/examples/basics) 8 | [![Open with CodeSandbox](https://assets.codesandbox.io/github/button-edit-lime.svg)](https://codesandbox.io/p/sandbox/github/withastro/astro/tree/latest/examples/basics) 9 | [![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://codespaces.new/withastro/astro?devcontainer_path=.devcontainer/basics/devcontainer.json) 10 | 11 | > 🧑‍🚀 **Seasoned astronaut?** Delete this file. Have fun! 12 | 13 | ![just-the-basics](https://github.com/withastro/astro/assets/2244813/a0a5533c-a856-4198-8470-2d67b1d7c554) 14 | 15 | ## 🚀 Project Structure 16 | 17 | Inside of your Astro project, you'll see the following folders and files: 18 | 19 | ```text 20 | / 21 | ├── public/ 22 | │ └── favicon.svg 23 | ├── src/ 24 | │ ├── components/ 25 | │ │ └── Card.astro 26 | │ ├── layouts/ 27 | │ │ └── Layout.astro 28 | │ └── pages/ 29 | │ └── index.astro 30 | └── package.json 31 | ``` 32 | 33 | Astro looks for `.astro` or `.md` files in the `src/pages/` directory. Each page is exposed as a route based on its file name. 34 | 35 | There's nothing special about `src/components/`, but that's where we like to put any Astro/React/Vue/Svelte/Preact components. 36 | 37 | Any static assets, like images, can be placed in the `public/` directory. 38 | 39 | ## 🧞 Commands 40 | 41 | All commands are run from the root of the project, from a terminal: 42 | 43 | | Command | Action | 44 | | :------------------------ | :----------------------------------------------- | 45 | | `npm install` | Installs dependencies | 46 | | `npm run dev` | Starts local dev server at `localhost:4321` | 47 | | `npm run build` | Build your production site to `./dist/` | 48 | | `npm run preview` | Preview your build locally, before deploying | 49 | | `npm run astro ...` | Run CLI commands like `astro add`, `astro check` | 50 | | `npm run astro -- --help` | Get help using the Astro CLI | 51 | 52 | ## 👀 Want to learn more? 53 | 54 | Feel free to check [our documentation](https://docs.astro.build) or jump into our [Discord server](https://astro.build/chat). 55 | -------------------------------------------------------------------------------- /web/astro.config.mjs: -------------------------------------------------------------------------------- 1 | // @ts-check 2 | import { defineConfig } from "astro/config"; 3 | 4 | import tailwind from "@astrojs/tailwind"; 5 | import ViteYaml from "@modyfi/vite-plugin-yaml"; 6 | import react from "@astrojs/react"; 7 | 8 | export default defineConfig({ 9 | site: process.env.SITE_ORIGIN, 10 | base: process.env.SITE_PREFIX, 11 | integrations: [tailwind(), react()], 12 | vite: { 13 | plugins: [ViteYaml()], 14 | }, 15 | }); 16 | -------------------------------------------------------------------------------- /web/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "", 3 | "type": "module", 4 | "version": "0.0.1", 5 | "scripts": { 6 | "dev": "astro dev", 7 | "start": "astro dev", 8 | "build": "astro check && astro build", 9 | "preview": "astro preview", 10 | "astro": "astro" 11 | }, 12 | "dependencies": { 13 | "@astrojs/check": "^0.9.3", 14 | "@astrojs/react": "^3.6.2", 15 | "@astrojs/tailwind": "^5.1.0", 16 | "@emotion/react": "^11.13.3", 17 | "@emotion/styled": "^11.13.0", 18 | "@fontsource/roboto": "^5.0.15", 19 | "@mui/material": "^6.0.2", 20 | "@mui/x-charts": "^7.16.0", 21 | "@octokit/rest": "^21.0.2", 22 | "@types/node": "^22.5.4", 23 | "@types/react": "^18.3.5", 24 | "@types/react-dom": "^18.3.0", 25 | "adm-zip": "^0.5.16", 26 | "astro": "^4.15.4", 27 | "axios": "^1.7.7", 28 | "humanize-duration": "^3.32.1", 29 | "lodash.groupby": "^4.6.0", 30 | "react": "^18.3.1", 31 | "react-dom": "^18.3.1", 32 | "tailwindcss": "^3.4.10", 33 | "typescript": "^5.5.4" 34 | }, 35 | "devDependencies": { 36 | "@modyfi/vite-plugin-yaml": "^1.1.0", 37 | "@types/adm-zip": "^0.5.5", 38 | "@types/lodash.groupby": "^4.6.9", 39 | "prettier": "^3.3.3", 40 | "prettier-plugin-astro": "^0.14.1" 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /web/public/favicon.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 9 | 10 | -------------------------------------------------------------------------------- /web/src/benchmark_parser.ts: -------------------------------------------------------------------------------- 1 | import groupBy from "lodash.groupby"; 2 | import axios from "axios"; 3 | import AdmZip from "adm-zip"; 4 | // @ts-ignore 5 | import sources from "../../benchmark/sources.yaml"; 6 | import { getArtifact, githubClient } from "./github_client.ts"; 7 | import { getManifests, type Layer } from "./manifest_parser.ts"; 8 | 9 | const destinationToUpstream = Object.fromEntries( 10 | // @ts-ignore 11 | sources.upstream_images.map(({ upstream_image, destination }) => [ 12 | destination, 13 | upstream_image, 14 | ]), 15 | ); 16 | 17 | export interface BenchmarkImageTime { 18 | image: string; 19 | // type: "original" | "zstd" | "25MB" | "50MB" | "100MB" | "200MB"; 20 | type: string; 21 | time: number; 22 | total_size: number; 23 | layers: Layer[]; 24 | } 25 | 26 | export interface BenchmarkImage { 27 | name: string; 28 | repo: string; 29 | imageName: string; 30 | name_slug: string; 31 | times_faster: number; 32 | times_smaller: number; 33 | fastest_type: string; 34 | times: BenchmarkImageTime[]; 35 | } 36 | 37 | export interface BenchmarkData { 38 | images: BenchmarkImage[]; 39 | } 40 | 41 | export async function parseBenchmarkData(): Promise { 42 | const manifests = await getManifests(); 43 | 44 | const zip = await getArtifact("benchmark-results"); 45 | const results = zip.getEntry("results.json"); 46 | if (results == null) { 47 | throw new Error("results.json not found in zip"); 48 | } 49 | const benchmark_data: any = JSON.parse(results.getData().toString("utf-8")!); 50 | 51 | const image_times: BenchmarkImageTime[] = benchmark_data.results 52 | .map( 53 | (res: { parameters: { image: string; type: string }; mean: number }) => { 54 | const manifestKey = `${res.parameters.image}-${res.parameters.type}`; 55 | const manifest = manifests[manifestKey]; 56 | if (manifest === undefined) { 57 | return null; 58 | } 59 | return { 60 | image: res.parameters.image, 61 | type: res.parameters.type, 62 | time: res.mean, 63 | total_size: manifest.reduce((acc, layer) => acc + layer.size, 0), 64 | layers: manifest, 65 | }; 66 | }, 67 | ) 68 | .filter((x: BenchmarkImageTime | null) => x !== null); 69 | const mapped = groupBy(image_times, (time) => time.image); 70 | const parsed: BenchmarkImage[] = Object.entries(mapped) 71 | .map(([image, times]) => { 72 | if (times === undefined) { 73 | throw new Error("times is undefined"); 74 | } 75 | 76 | const sorted_by_speed = times 77 | .filter((time) => time.type !== "original") 78 | .sort((a, b) => a.time - b.time); 79 | 80 | const fastest = sorted_by_speed[0]; 81 | const original = times.find((time) => time.type === "original")!; 82 | 83 | const percentage_faster = Number( 84 | (original.time / fastest.time).toFixed(1), 85 | ); 86 | 87 | const percentage_smaller = Number( 88 | (original.total_size / fastest.total_size).toFixed(1), 89 | ); 90 | 91 | const sorted_times = [ 92 | original, 93 | ...times.filter((time) => time.type !== "original"), 94 | ]; 95 | 96 | if (destinationToUpstream[image] === undefined) { 97 | return null; 98 | } 99 | 100 | const name = destinationToUpstream[image]; 101 | const [repo, imageName] = name.split(":"); 102 | return { 103 | name, 104 | repo, 105 | imageName, 106 | name_slug: image.replaceAll(".", "-"), 107 | times: sorted_times, 108 | fastest_type: fastest.type, 109 | times_smaller: percentage_smaller, 110 | times_faster: percentage_faster, 111 | }; 112 | }) 113 | .filter((x) => x !== null); 114 | return { 115 | images: parsed, 116 | }; 117 | } 118 | -------------------------------------------------------------------------------- /web/src/components/BenchmarkChart.astro: -------------------------------------------------------------------------------- 1 | --- 2 | import ReactBenchmarkChart from "./ReactBenchmarkChart"; 3 | import { type BenchmarkImage } from "../benchmark_parser"; 4 | import { formatDuration } from "../utils.ts"; 5 | import LayerChart from "./ReactLayerChart"; 6 | import { humanFileSize } from "../utils"; 7 | 8 | interface Props { 9 | image: BenchmarkImage; 10 | } 11 | 12 | const { image } = Astro.props; 13 | 14 | const fastest_image = image.times.reduce((acc, time) => { 15 | if (time.time < acc.time) { 16 | return time; 17 | } 18 | return acc; 19 | }, image.times[0]); 20 | const original = image.times.find((time) => time.type === "original")!; 21 | if (fastest_image.layers == undefined) { 22 | throw new Error(`No fastest image found for ${JSON.stringify(image)}`); 23 | } 24 | --- 25 | 26 |

27 | 28 | {image.name} 29 | 30 |

31 |

32 | Original: {original.layers.length} layers, { 33 | humanFileSize(original.total_size) 34 | }, {formatDuration(original.time)} 35 |

36 |

37 | Repacked: {fastest_image.layers.length} layers, { 38 | humanFileSize(fastest_image.total_size) 39 | }, {formatDuration(fastest_image.time)} 40 |

41 |

42 | Reduction: {image.times_faster}x faster, { 43 | image.times_smaller 44 | }x smaller 45 |

46 | 47 |
48 |
49 |

Pull time

50 |
51 | 52 |
53 |
54 |
55 |

Image Size

56 |
57 | 62 |
63 |
64 |
65 | 79 | -------------------------------------------------------------------------------- /web/src/components/Card.astro: -------------------------------------------------------------------------------- 1 | --- 2 | interface Props { 3 | title: string; 4 | body: string; 5 | href: string; 6 | } 7 | 8 | const { href, title, body } = Astro.props; 9 | --- 10 | 11 | 22 | 62 | -------------------------------------------------------------------------------- /web/src/components/Header.astro: -------------------------------------------------------------------------------- 1 | --- 2 | import { Image } from "astro:assets"; 3 | import logo from "../media/logo.png"; 4 | 5 | let base_url = import.meta.env.BASE_URL; 6 | if (!base_url.endsWith("/")) { 7 | base_url += "/"; 8 | } 9 | 10 | export const SITE_NAME = "Docker Repack"; 11 | --- 12 | 13 | 43 | -------------------------------------------------------------------------------- /web/src/components/ReactBenchmarkChart.tsx: -------------------------------------------------------------------------------- 1 | import * as React from "react"; 2 | import { BarChart } from "@mui/x-charts/BarChart"; 3 | import type { 4 | BenchmarkImage, 5 | BenchmarkImageTime, 6 | } from "../benchmark_parser.ts"; 7 | 8 | import "@fontsource/roboto/300.css"; 9 | import "@fontsource/roboto/400.css"; 10 | import "@fontsource/roboto/500.css"; 11 | import "@fontsource/roboto/700.css"; 12 | 13 | import { useState } from "react"; 14 | import { formatDuration, humanFileSize } from "../utils.ts"; 15 | 16 | export interface BenchmarkChartProps { 17 | image: BenchmarkImage; 18 | prop: "time" | "total_size"; 19 | } 20 | 21 | export default function ReactBenchmarkChart(props: BenchmarkChartProps) { 22 | const { image, prop } = props; 23 | 24 | // const [showAll, setShowAll] = useState(false); 25 | 26 | const allImageTypes = new Set(image.times.map((time) => time.type)); 27 | const data = []; 28 | const values = Object.fromEntries( 29 | image.times.map((time: BenchmarkImageTime) => [time.type, time[prop]]), 30 | ); 31 | data.push({ image: image.name, ...values }); 32 | 33 | // if (!showAll) { 34 | allImageTypes.clear(); 35 | allImageTypes.add("original"); 36 | allImageTypes.add(image.fastest_type); 37 | // } 38 | 39 | const formatter = prop === "time" ? formatDuration : humanFileSize; 40 | 41 | const series = [...allImageTypes].map((key) => ({ 42 | dataKey: key, 43 | label: key, 44 | valueFormatter: (value: number | null) => { 45 | if (value === null) { 46 | return "N/A"; 47 | } 48 | return formatter(value, true); 49 | }, 50 | })); 51 | 52 | return ( 53 |
54 | {/*
*/} 55 | formatter(value, true) }, 60 | ]} 61 | xAxis={[{ scaleType: "band", dataKey: "image" }]} 62 | series={series} 63 | /> 64 | {/*
*/} 65 | {/*
*/} 66 | {/* */} 71 | {/*
*/} 72 |
73 | ); 74 | } 75 | -------------------------------------------------------------------------------- /web/src/components/ReactLayerChart.tsx: -------------------------------------------------------------------------------- 1 | import { BarChart } from "@mui/x-charts/BarChart"; 2 | import type { Layer } from "../manifest_parser.ts"; 3 | import { humanFileSize } from "../utils.ts"; 4 | 5 | const valueFormatter = (value: number | null) => humanFileSize(value!); 6 | 7 | const chartSetting = { 8 | xAxis: [ 9 | { 10 | label: "Size", 11 | valueFormatter, 12 | }, 13 | ], 14 | }; 15 | 16 | export default function LayerChart({ layers }: { layers: Layer[] }) { 17 | const dataset = layers.map((layer, idx) => ({ 18 | size: layer.size, 19 | layer: `#${idx + 1}`, 20 | })); 21 | return ( 22 | 30 | ); 31 | } 32 | -------------------------------------------------------------------------------- /web/src/components/Terminal.astro: -------------------------------------------------------------------------------- 1 | --- 2 | interface Props { 3 | command: string; 4 | } 5 | const { command } = Astro.props; 6 | --- 7 | 8 |
9 |
13 |
14 |
15 |
16 |
17 |
18 | { 19 | command && ( 20 |
21 | $ 22 |

{command}

23 |
24 | ) 25 | } 26 |
27 | 28 |
29 |
30 |
31 | -------------------------------------------------------------------------------- /web/src/components/pull-component.tsx: -------------------------------------------------------------------------------- 1 | import { useEffect, useState } from "react"; 2 | import { humanFileSize } from "../utils.ts"; 3 | 4 | export interface Layer { 5 | bytes: number; 6 | digest: string; 7 | } 8 | 9 | interface LayerPullState { 10 | layer: Layer; 11 | bytes_done: number; 12 | // percent_done: number; 13 | done: boolean; 14 | } 15 | 16 | function updatePullState( 17 | bytes_per_tick: number, 18 | layer_state: LayerPullState, 19 | ): LayerPullState { 20 | const total_bytes = layer_state.layer.bytes; 21 | if (total_bytes <= layer_state.bytes_done) { 22 | layer_state.done = true; 23 | return layer_state; 24 | } 25 | // vary bytes_per_tick by up to 25% to simulate network jitter 26 | bytes_per_tick *= 0.7 + Math.random() * 0.3; 27 | const new_bytes_done = layer_state.bytes_done + bytes_per_tick; 28 | let done = false; 29 | if (new_bytes_done >= layer_state.layer.bytes) { 30 | done = true; 31 | } 32 | // const percent_done = (new_bytes_done / total_bytes) * 100; 33 | return { 34 | layer: layer_state.layer, 35 | bytes_done: new_bytes_done, 36 | // percent_done, 37 | done, 38 | }; 39 | } 40 | 41 | // 3.11: Pulling from library/python 42 | // 56c9b9253ff9: Downloading 6.589MB/49.59MB 43 | // 364d19f59f69: Downloading 4.668MB/23.59MB 44 | // 843b1d832182: Downloading 8.043MB/64MB 45 | // a348c2a8d946: Waiting 46 | // dd681ddda6db: Waiting 47 | // 2fa7159a8e74: Waiting 48 | // 2d3256a435e2: Waiting 49 | // 8d76c12bea0d: Waiting 50 | 51 | function LayerPullComponent({ pull }: { pull: LayerPullState }) { 52 | const prefix = pull.layer.digest.slice(0, 12); 53 | if (pull.done) { 54 | return ( 55 | <> 56 | {prefix}: Pull complete{"\n"} 57 | 58 | ); 59 | } 60 | return ( 61 | <> 62 | {prefix}: Downloading {humanFileSize(pull.bytes_done).padStart(5)}/ 63 | {humanFileSize(pull.layer.bytes)} 64 | {"\n"} 65 | 66 | ); 67 | } 68 | 69 | export default function PullComponent({ 70 | layers, 71 | bandwidth, 72 | max_downloads, 73 | }: { 74 | layers: Layer[]; 75 | bandwidth: number; 76 | max_downloads: number; 77 | }) { 78 | const initialState = layers.map((layer) => ({ 79 | layer, 80 | bytes_done: 0, 81 | done: false, 82 | })); 83 | const [state, setState] = useState(initialState); 84 | const [startTime, setStartTime] = useState(Date.now()); 85 | 86 | const elapsed = Date.now() - startTime; 87 | 88 | useEffect(() => { 89 | if (state.every((layer_state) => layer_state.done)) { 90 | return; 91 | } 92 | const interval = setInterval(() => { 93 | let downloadCount = 0; 94 | const in_progress_count = state.filter((layer) => !layer.done).length; 95 | const bytes_per_layer = bandwidth / in_progress_count; 96 | const newState = state.map((layer) => { 97 | if (!layer.done && downloadCount < max_downloads) { 98 | downloadCount += 1; 99 | return updatePullState(bytes_per_layer, layer); 100 | } 101 | return layer; 102 | }); 103 | setState(newState); 104 | }, 100); 105 | 106 | // Clean up the interval on component unmount 107 | return () => clearInterval(interval); 108 | }, [state, bandwidth, max_downloads]); 109 | 110 | return ( 111 | <> 112 |
113 |         {state.map((layer, i) => (
114 |           
115 |         ))}
116 |         {"\n"}
117 |         Total Time: {(elapsed / 1000).toFixed(1)} seconds{"\n"}
118 |       
119 | 120 | ); 121 | } 122 | -------------------------------------------------------------------------------- /web/src/env.d.ts: -------------------------------------------------------------------------------- 1 | /// 2 | -------------------------------------------------------------------------------- /web/src/github_client.ts: -------------------------------------------------------------------------------- 1 | import { Octokit } from "@octokit/rest"; 2 | import axios from "axios"; 3 | import AdmZip from "adm-zip"; 4 | 5 | export const githubClient = new Octokit({ auth: process.env.GITHUB_TOKEN }); 6 | 7 | export async function getArtifact(name: string): Promise { 8 | const owner = "orf"; 9 | const repo = "docker-repack"; 10 | const resp = await githubClient.actions.listArtifactsForRepo({ 11 | owner, 12 | repo, 13 | name, 14 | }); 15 | const artifact = resp.data.artifacts[0]; 16 | const artifact_response = await githubClient.actions.downloadArtifact({ 17 | owner, 18 | repo, 19 | artifact_id: artifact.id, 20 | archive_format: "zip", 21 | }); 22 | const artifact_data = await axios.get(artifact_response.url, { 23 | responseType: "arraybuffer", 24 | }); 25 | const zipfile: Buffer = Buffer.from(artifact_data.data); 26 | return new AdmZip(zipfile); 27 | } 28 | -------------------------------------------------------------------------------- /web/src/layouts/Layout.astro: -------------------------------------------------------------------------------- 1 | --- 2 | import Header from "../components/Header.astro"; 3 | import { getImage } from "astro:assets"; 4 | import logo from "../media/logo.png"; 5 | 6 | interface Props { 7 | title: string; 8 | } 9 | 10 | const { title } = Astro.props; 11 | 12 | const logoImage = await getImage({ 13 | src: logo, 14 | format: "webp", 15 | width: 50, 16 | quality: "max", 17 | }); 18 | --- 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | {title} 28 | 29 | 30 |
31 |
32 | 33 |
34 | 35 | 36 | -------------------------------------------------------------------------------- /web/src/layouts/base.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/orf/docker-repack/5b5fef6a3298bd6697125056e820c2da81eb1856/web/src/layouts/base.css -------------------------------------------------------------------------------- /web/src/manifest_parser.ts: -------------------------------------------------------------------------------- 1 | import { getArtifact } from "./github_client.ts"; 2 | 3 | export interface Layer { 4 | digest: string; 5 | size: number; 6 | } 7 | 8 | export async function getManifests(): Promise<{ [key: string]: Layer[] }> { 9 | const zip = await getArtifact("image-manifests"); 10 | const entries = zip.getEntries().map((entry) => { 11 | const parsed = JSON.parse(entry.getData().toString()); 12 | const layers: Layer[] = parsed.LayersData.map( 13 | (layer: { Digest: string; Size: number }) => { 14 | return { 15 | digest: layer.Digest, 16 | size: layer.Size, 17 | }; 18 | }, 19 | ); 20 | const name = entry.entryName.replace("manifest-", "").replace(".json", ""); 21 | return [name, layers]; 22 | }); 23 | return Object.fromEntries(entries); 24 | } 25 | -------------------------------------------------------------------------------- /web/src/media/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/orf/docker-repack/5b5fef6a3298bd6697125056e820c2da81eb1856/web/src/media/logo.png -------------------------------------------------------------------------------- /web/src/pages/benchmarks.astro: -------------------------------------------------------------------------------- 1 | --- 2 | import Layout from "../layouts/Layout.astro"; 3 | import { parseBenchmarkData } from "../benchmark_parser"; 4 | import BenchmarkChart from "../components/BenchmarkChart.astro"; 5 | const fullData = await parseBenchmarkData(); 6 | const dataset = fullData.images.sort((a, b) => b.times_faster - a.times_faster); 7 | --- 8 | 9 | 10 |

Image Benchmarks

11 | 27 | 28 | {dataset.map((image) => )} 29 |
30 | -------------------------------------------------------------------------------- /web/src/pages/index.astro: -------------------------------------------------------------------------------- 1 | --- 2 | import Layout from "../layouts/Layout.astro"; 3 | import Terminal from "../components/Terminal.astro"; 4 | import PullComponent, { type Layer } from "../components/pull-component"; 5 | import crypto from "crypto"; 6 | 7 | function sha256Hash(message: string) { 8 | return crypto.createHash("sha256").update(message).digest("hex"); 9 | } 10 | 11 | function make_layer(idx: number, size_mb: number): Layer { 12 | return { bytes: size_mb * 1024 * 1024, digest: sha256Hash(idx.toString()) }; 13 | } 14 | 15 | const slowLayers = [ 16 | make_layer(1, 10), 17 | make_layer(2, 1), 18 | make_layer(3, 510), 19 | make_layer(4, 100), 20 | make_layer(5, 50), 21 | ]; 22 | 23 | const fastLayers = [ 24 | make_layer(1, 50), 25 | make_layer(2, 70), 26 | make_layer(3, 80), 27 | make_layer(4, 70), 28 | make_layer(5, 85), 29 | ]; 30 | 31 | const bandwidth = 1024 * 1024 * 10; 32 | const maxDownloads = 5; 33 | --- 34 | 35 | 36 |

Docker Repack

37 |

Docker repack rewrites Docker images to optimize for faster pulls.

38 | 39 |
40 |
41 |

It turns this:

42 | 43 | 49 | 50 |
51 | 52 |
53 |

Into this:

54 | 55 | 61 | 62 |
63 |
64 |
65 | -------------------------------------------------------------------------------- /web/src/utils.ts: -------------------------------------------------------------------------------- 1 | // @ts-ignore 2 | import humanizeDuration from "humanize-duration"; 3 | 4 | const shortEnglishHumanizer = humanizeDuration.humanizer({ 5 | language: "shortEn", 6 | languages: { 7 | shortEn: { 8 | y: () => "y", 9 | mo: () => "mo", 10 | w: () => "w", 11 | d: () => "d", 12 | h: () => "h", 13 | m: () => "m", 14 | s: () => "s", 15 | ms: () => "ms", 16 | }, 17 | }, 18 | }); 19 | 20 | export function formatDuration( 21 | seconds: number, 22 | short: boolean = false, 23 | ): string { 24 | const args = { units: ["m", "s"], round: true }; 25 | if (short) { 26 | return shortEnglishHumanizer(seconds * 1000, args); 27 | } 28 | return humanizeDuration(seconds * 1000, args); 29 | } 30 | 31 | export function humanFileSize(size: number): string { 32 | const i = size == 0 ? 0 : Math.floor(Math.log(size) / Math.log(1024)); 33 | return ( 34 | +(size / Math.pow(1024, i)).toFixed(2) + ["B", "kB", "MB", "GB", "TB"][i] 35 | ); 36 | } 37 | -------------------------------------------------------------------------------- /web/tailwind.config.mjs: -------------------------------------------------------------------------------- 1 | /** @type {import('tailwindcss').Config} */ 2 | export default { 3 | content: ["./src/**/*.{astro,html,js,jsx,md,mdx,svelte,ts,tsx,vue}"], 4 | theme: { 5 | extend: {}, 6 | }, 7 | plugins: [], 8 | }; 9 | -------------------------------------------------------------------------------- /web/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "astro/tsconfigs/strict", 3 | "compilerOptions": { 4 | "jsx": "react-jsx", 5 | "jsxImportSource": "react" 6 | } 7 | } 8 | --------------------------------------------------------------------------------