├── .gitignore ├── .travis.yml ├── LICENSE ├── Makefile ├── README.md ├── Setup.hs ├── app └── Main.hs ├── package.yaml ├── src └── Docker │ ├── Cacher.hs │ └── Cacher │ ├── Inspect.hs │ └── Internal.hs └── stack.yaml /.gitignore: -------------------------------------------------------------------------------- 1 | dist-* 2 | Dockerfile 3 | docker-build-cacher.cabal 4 | 5 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | language: c 3 | compiler: gcc 4 | cache: 5 | directories: 6 | - "$HOME/.stack" 7 | timeout: 300 8 | git: 9 | depth: 100 10 | matrix: 11 | include: 12 | - dist: xenial 13 | before_deploy: 14 | - ldd $(which docker-build-cacher) || true 15 | - curl -sSL https://github.com/upx/upx/releases/download/v3.94/upx-3.94-amd64_linux.tar.xz 16 | | tar -x --xz --strip-components=1 -C ~/.local/bin upx-3.94-amd64_linux/upx 17 | - upx -q --best --ultra-brute "./releases/${BINARY_NAME}" 18 | - os: osx 19 | before_deploy: 20 | - otool -L $(which docker-build-cacher) || true 21 | - brew update > /dev/null 22 | - brew install upx 23 | - upx -q --best --ultra-brute "./releases/${BINARY_NAME}" 24 | before_install: 25 | - export BINARY_NAME="docker-build-cacher-$(uname -s)-$(uname -m)" 26 | - mkdir -p ~/.local/bin 27 | - | 28 | if [[ "${TRAVIS_OS_NAME}" = "osx" ]] 29 | then 30 | travis_retry curl -sSL https://www.stackage.org/stack/${TRAVIS_OS_NAME}-x86_64 \ 31 | | tar xz --strip-components=1 -C ~/.local/bin --include '*/stack' 32 | else 33 | travis_retry curl -sSL https://www.stackage.org/stack/${TRAVIS_OS_NAME}-x86_64 \ 34 | | tar xz --strip-components=1 -C ~/.local/bin --wildcards '*/stack' 35 | fi 36 | install: 37 | - travis_retry stack --no-terminal --install-ghc test --only-dependencies 38 | - stack --no-terminal install --ghc-options='-fPIC' 39 | script: 40 | - docker-build-cacher -h 41 | after_success: 42 | - mkdir -p ./releases/ 43 | - cp "$(which docker-build-cacher)" "./releases/${BINARY_NAME}" 44 | deploy: 45 | provider: releases 46 | draft: true 47 | skip_cleanup: true 48 | api_key: 49 | secure: "3tpE+jf1r8uebkitLCxpLFwZfFtPdb/zsEGtqu56oD9LjIE7nEyEeOXUaE8c2KSrg+pWhhTOvShlVRHPXUJAO86mowLnnjEnyM/kIWt4tb89jYmpr+EwLF6qG6AxMk19lImlO2156e0yvl2k4p3NR0Y/CimEyPiyxeRT02cF6TJdP1weOX+6o9cXOG5DZowX+FbOq6WwkQvW3uiPQ0eJkknrirGbbMY4fi3Jgx/Uy/aq5isgMETql53xvd8hoSmoD3p99YV4jc6iF+DXIjEn+tyEEdi/OFg9gNT+R+vYvJ8/WSGpuMyw1UeI7ErGTa6M4kn9/eY5N3abrUNKgs8tD4ZH4EzLym12/dOD3YAcPjD6dqM2cvEUGUvHG9vCEj/012QzeYUoIlgaPCOg0FVTNtpG2OzrQVAh/7xeLz6bPAkKmng2ZAtkHreuwBKqZ4CG9figs7iWYn+Of6iePPGLRoUuvJ5ys749q7Ir9sFwiZF5BYc7DA+DPbqXir0uFBpXikBMQ+cgPsF/z0kO86aB0IcBDiOCtfTxZ+xbIcWq2ABM40NPlp8D3PRAg65hKvO2cjcoD97WxLfA5rcUrsW+0O4JKl/zJzACoGY53GzFMSQwLP6NCZKlE73OFfQupbP3Ihxt/c3r11zdPytM3wZYEE8tQia3lbp7KcMpezbTNRw=" 50 | file: "./releases/${BINARY_NAME}" 51 | on: 52 | tags: true 53 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2017, SeatGeek 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: publish clean 2 | 3 | DOCKER_LINUX_IMAGE="fpco/stack-build:lts-11.13" 4 | API_HOST=https://api.github.com 5 | UPLOAD_HOST=https://uploads.github.com 6 | DASH_VERSION=$(shell echo $(VERSION) | sed -e s/\\./-/g) 7 | 8 | ifdef GITHUB_TOKEN 9 | AUTH=-H 'Authorization: token $(GITHUB_TOKEN)' 10 | endif 11 | 12 | 13 | # Utility target for checking required parameters 14 | guard-%: 15 | @if [ "$($*)" = '' ]; then \ 16 | echo "Missing required $* variable."; \ 17 | exit 1; \ 18 | fi; 19 | 20 | dist-linux/docker-build-cacher: 21 | mkdir -p dist-linux 22 | stack --docker --docker-auto-pull --docker-image $(DOCKER_LINUX_IMAGE) install --local-bin-path dist-linux 23 | upx --best dist-linux/docker-build-cacher 24 | 25 | dist-macos/docker-build-cacher: 26 | mkdir -p dist-macos 27 | stack install --local-bin-path dist-macos 28 | upx --best dist-macos/docker-build-cacher 29 | 30 | release.json: dist-linux/docker-build-cacher dist-macos/docker-build-cacher 31 | @echo "Creating draft release for $(VERSION)" 32 | @curl $(AUTH) -XPOST $(API_HOST)/repos/seatgeek/docker-build-cacher/releases -d '{ \ 33 | "tag_name": "$(VERSION)", \ 34 | "name": "Docker build cacher $(VERSION)", \ 35 | "draft": false, \ 36 | "prerelease": false \ 37 | }' > release.json 38 | @echo "Uploading binaries to github" 39 | 40 | publish: guard-VERSION guard-GITHUB_TOKEN release.json 41 | $(eval RELEASE_ID := $(shell cat release.json | jq .id)) 42 | @sleep 1 43 | @echo "Uploading the Linux docker-build-cacher" 44 | @curl $(AUTH) -XPOST \ 45 | $(UPLOAD_HOST)/repos/seatgeek/docker-build-cacher/releases/$(RELEASE_ID)/assets?name=docker-build-cacher-linux \ 46 | -H "Accept: application/vnd.github.manifold-preview" \ 47 | -H 'Content-Type: application/octet-stream' \ 48 | --data-binary '@dist-linux/docker-build-cacher' > /dev/null 49 | @echo "Uploading the MacOS binary" 50 | @curl $(AUTH) -XPOST \ 51 | $(UPLOAD_HOST)/repos/seatgeek/docker-build-cacher/releases/$(RELEASE_ID)/assets?name=docker-build-cacher-macos \ 52 | -H "Accept: application/vnd.github.manifold-preview" \ 53 | -H 'Content-Type: application/octet-stream' \ 54 | --data-binary '@dist-macos/docker-build-cacher' > /dev/null 55 | @echo Release done, you can go to: 56 | @cat release.json | jq .html_url 57 | 58 | 59 | clean: 60 | rm -rf dist-* 61 | rm -f release.json 62 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Docker Build Cacher 2 | 3 | This tool is intended to speedup multi-stage Dockerfile build times by caching the results of each of the 4 | stages separately. 5 | 6 | ## Why? 7 | 8 | [Multi-stage docker file](https://docs.docker.com/engine/userguide/eng-image/multistage-build/) builds are great, 9 | but they still miss a key feature: It is not possible to carry from one build to another the statically generated 10 | cache files once the source file in your project change. Here's an example that illustrates the issue: 11 | 12 | Imagine you create a generic Dockerfile for building node projects 13 | 14 | ```Dockerfile 15 | FROM nodejs 16 | 17 | RUN apt-get install nodejs yarn 18 | 19 | WORKDIR /app 20 | 21 | # Whenever this image is used execute these triggers 22 | ONBUILD ADD package.json yarn.lock . 23 | ONBUILD RUN yarn 24 | ONBUILD RUN yarn run dist 25 | ``` 26 | 27 | And then you call 28 | 29 | ```bash 30 | docker build -t nodejs-build . 31 | ``` 32 | 33 | So now you can use the `nodejs-build` image in other builds, like this: 34 | 35 | ```Dockerfile 36 | # Automatically build yarn dependencies 37 | FROM nodejs-build as nodedeps 38 | 39 | # Build the final container image 40 | FROM scratch 41 | 42 | # Copy the generated app.js from yarn run dist 43 | COPY --from=nodedeps /app/app.js . 44 | ... 45 | ``` 46 | 47 | So far so good, we have build a pretty lean docker image that discards all the `node_modules` 48 | folder and only keeps the final artifact. For example a bundled reactjs application. 49 | 50 | It's also very fast to build! Since each of the steps in the Dockerfile are cached, as long as 51 | none of the files changed. 52 | 53 | But that's also where the problem is: Whenever `package.json` or `yarn.lock` files change, docker 54 | will trash all the files in `node_modules` and all the cached yarn packages and will start from 55 | scratch downloading, linking and building every single dependency. 56 | 57 | That's far from ideal. What if we could do a change in the process so that changes to those files 58 | do not bust the yarn cache? It turns out that we can! 59 | 60 | ## Enter docker-build-cacher 61 | 62 | This utility overcomes the problem by providing a way to build the docker file and then cache the 63 | intermediate stages. On subsequent builds, it will make sure that the static cache files generated 64 | during previous builds will also be present. 65 | 66 | The effect it has should be obvious: your builds will be consistently fast, at the cost of more disk space. 67 | 68 | ## Installation 69 | 70 | There are binaries provided for `linux-x86_64` and MacOS, check 71 | [the releases page](https://github.com/seatgeek/docker-build-cacher/releases) for downloads. 72 | 73 | ## How It Works 74 | 75 | This works by parsing the Dockerfile and extracting the `COPY` or `ADD` instructions nested inside `ONBUILD` for each of 76 | the stages found in the file. 77 | 78 | It will compare the source files present in such `COPY` or `ADD` instructions to check for changes. If it can detect changes, 79 | it rewrites your Dockerfile on the fly so that the `FROM` directives in each of the stages use the locally cached images instead 80 | of the original base image. 81 | 82 | The effect this `FROM` swap has, is that disk state for the image is preserved between builds. 83 | 84 | ## Usage 85 | 86 | `docker-build-cacher` requires the following environment variables to be present in order to correctly build 87 | your Dockerfile: 88 | 89 | * `APP_NAME`: The name for application you are trying to build. Usually this is just the folder name you are in. 90 | * `GIT_BRANCH`: The name of the git branch you are building. Used to "namespace" cache results 91 | * `DOCKER_TAG`: It will `docker build -t $DOCKER_TAG .` at some point. Let it know the image tag you want at the end. 92 | 93 | This utility has two modes, `Build` and `Cache`. Both modes should be invoked for the cache to work: 94 | 95 | ```bash 96 | # APP_NAME ispassed as argument in the build process, you can use it as an env var in your Dockerfile 97 | export APP_NAME=fancyapp 98 | 99 | # GIT_BRANCH is used as part of the named for the resulting cached image 100 | export GIT_BRANCH=master 101 | 102 | # DOCKER_TAG corresponds to the -t argument in docker build, that will be the resulting image name 103 | export DOCKER_TAG=fancyapp:latest 104 | 105 | docker-build-cacher build # This will build the docker file 106 | docker-build-cacher cache # This will cache each of the stage results separately 107 | ``` 108 | 109 | Additionally, `docker-build-cacher` accepts the `DOCKERFILE` env variable in case the file is not present in the 110 | current directory: 111 | 112 | ```bash 113 | DOCKERFILE=buildfiles/Dockerfile docker-build-cacher build 114 | ``` 115 | 116 | At the end of the process you can call `docker images` and see that it has created `fancyapp:latest`, and if you are using 117 | multi-stage builds, it should have created an image tag for each of the stages in your Dockerfile 118 | 119 | ### Fallback Cache Keys 120 | 121 | 122 | As mentioned before the `GIT_BRANCH` env variable is used as part of the name for the generated cached image, this means that 123 | the generated cache is scope to that name. This is done so you can keep different caches where you can experiment with widly 124 | different requirements and libraries in the dockerfile. 125 | 126 | This has the unfortunate side effect that building other branches will require building the cache from scratch. In order to solve this 127 | you can use the `FALLBACK_BRANCH` environment variable like this: 128 | 129 | ```bash 130 | export APP_NAME=fancyapp 131 | export GIT_BRANCH=my-feature 132 | export FALLBACK_BRANCH=master 133 | export DOCKER_TAG=fancyapp:latest 134 | 135 | docker-build-cacher build 136 | docker-build-cacher cache 137 | ``` 138 | 139 | The above will make the cached image for the `my-feature` branch to be based on the one from the `master` branch. 140 | 141 | ### Caching Intermediate Images 142 | 143 | In some circumstances, you may want to execute additional instructions after 144 | including the base builder image. For instance, building an executable or 145 | bundle using all the dependencies already downloaded: 146 | 147 | ```Dockerfile 148 | # Automatically build haskell stack dependencies 149 | FROM haskell-stack as builder 150 | 151 | COPY . . 152 | RUN stack install 153 | 154 | # Build the final container image 155 | FROM scratch 156 | 157 | COPY --from=builder /root/.local/bin/my-app 158 | ``` 159 | 160 | This very typical example has a shortcoming now, each time we do `COPY . .` we 161 | are also invalidating the compiling artifacts created in `stack install`, that 162 | is, we are losing the benefits of incremental compilation. 163 | 164 | If you want to keep incremental compilation, or any files generated in between 165 | the builder image and the final `FROM`, you can label the intermediate image so 166 | that `docker-build-cacher` will include that into the cached artifacts: 167 | 168 | ```Dockerfile 169 | # Automatically build haskell stack dependencies 170 | FROM haskell-stack as builder 171 | 172 | # Instructs the cacher to also copy the files generated in this stage 173 | LABEL cache_instructions=cache 174 | 175 | COPY . . 176 | RUN stack install 177 | 178 | # Build the final container image 179 | FROM scratch 180 | 181 | COPY --from=builder /root/.local/bin/my-app 182 | ``` 183 | 184 | **Warning:** 185 | 186 | The files copied in `COPY . .` will also be cached! This not only increases the 187 | cache size, but also has a potentially dangerous inconvenient: 188 | 189 | Any files you delete from one build to the other will be restored again by the 190 | cacher. For example, if you delete one file in your source tree because you 191 | don't use it anymore or you did a refactoring, it will pop up again in the build! 192 | 193 | This may be a problem for compilers or build tools that scan all the files in 194 | the folder, like the Go compiler. If you are certain that keeping old files 195 | around is not a problem, then it is safe to use this feature. The Haskell 196 | compiler, for instance, does not care at all about extra cruft in the folder. 197 | 198 | ## Passing extra arguments to docker build 199 | 200 | It is possible to pass extra arguments and flags to the `docker build` step by providing the environment variable `DOCKER_BUILD_OPTIONS` as 201 | shown below: 202 | 203 | 204 | ```bash 205 | DOCKER_BUILD_OPTIONS="--build-arg foo=bar --quiet" docker-build-cacher build 206 | ``` 207 | 208 | ## Building from source 209 | 210 | Dependencies: 211 | 212 | - [Haskell stack](https://docs.haskellstack.org/en/stable/README/#how-to-install) 213 | 214 | Install the `stack` tool from the link above. Then `cd` to the root folder of this repo and execute: 215 | 216 | ```sh 217 | stack setup 218 | stack install 219 | ``` 220 | 221 | If it is the first time, it will take *a lot* of time. Don't worry, it's only once you need to pay this price. 222 | -------------------------------------------------------------------------------- /Setup.hs: -------------------------------------------------------------------------------- 1 | import Distribution.Simple 2 | main = defaultMain 3 | -------------------------------------------------------------------------------- /app/Main.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | {-# LANGUAGE NamedFieldPuns #-} 3 | 4 | module Main where 5 | 6 | import Data.Maybe ( fromMaybe ) 7 | import qualified Data.Text as Text 8 | import Data.Text ( Text ) 9 | import Language.Docker 10 | import Prelude hiding ( FilePath ) 11 | import Text.ParserCombinators.ReadP 12 | import Text.Read 13 | import Turtle 14 | 15 | import qualified Docker.Cacher 16 | import qualified Docker.Cacher.Internal 17 | 18 | 19 | -- | This script has 2 modes. One for building the Dockerfile, and another for caching its stages 20 | data Mode 21 | = Build 22 | | Cache 23 | deriving (Show) 24 | 25 | instance Read Mode where 26 | readPrec = 27 | Text.Read.choice $ 28 | strValMap 29 | [ ("Build", Build) -- Accept both casings 30 | , ("build", Build) 31 | , ("Cache", Cache) 32 | , ("cache", Cache) 33 | ] 34 | where 35 | strValMap = map (\(flag, result) -> lift $ string flag >> return result) 36 | 37 | data Args = Args 38 | { mode :: Mode 39 | , noCacheStages :: Bool 40 | , noBuildCache :: Bool 41 | } 42 | 43 | 44 | -- | Describes the arguments this script takes from the command line 45 | parser :: Parser Args 46 | parser = 47 | Args -- Pass the parsed arguments into the Args data container 48 | <$> argRead "mode" "Whether to build or to cache (options: build | cache)" 49 | <*> switch 50 | "no-cache-stages" 51 | 's' 52 | "Each of the FROM instruction will be cached in separate images if this flag is not set" 53 | <*> switch "no-cache-build" 54 | 'n' 55 | "Skip the internal docker cache when building the image" 56 | 57 | 58 | main :: IO () 59 | main = do 60 | Args { mode, noCacheStages, noBuildCache } <- options -- Parse the CLI arguments as a Mode 61 | "Builds a docker file and caches its stages" 62 | parser 63 | 64 | app <- Docker.Cacher.App <$> needEnv "APP_NAME" -- Get the APP environment variable and then wrap it in App 65 | branch <- Docker.Cacher.Branch <$> needEnv "GIT_BRANCH" 66 | fallbackBranch <- fmap Docker.Cacher.Branch <$> need "FALLBACK_BRANCH" 67 | 68 | maybeFile <- do 69 | file <- need "DOCKERFILE" -- Get the dockerfile path if any 70 | return (fmap fromText file) -- And transform it to a FilePath 71 | 72 | -- if DOCKERFILE is not present, we assume is in the current directory 73 | currentDirectory <- pwd 74 | let Just file = maybeFile <|> Just (currentDirectory "Dockerfile") 75 | 76 | -- Now we try to parse the dockefile 77 | dockerFile <- parseFile (Text.unpack (format fp file)) -- Convert the dockerfile to an AST 78 | case dockerFile of 79 | Left message -> 80 | error ("There was an error parsing the docker file: " <> show message) 81 | 82 | Right ast -> case mode of 83 | Cache -> if noCacheStages 84 | then echo "Skipping... I was told not to cache any stages separetely" 85 | else sh (Docker.Cacher.cacheBuild app branch fallbackBranch ast) 86 | 87 | Build -> do 88 | name <- Docker.Cacher.Internal.ImageName <$> needEnv "DOCKER_TAG" 89 | buildOPtions <- do 90 | opts <- need "DOCKER_BUILD_OPTIONS" 91 | if noBuildCache 92 | then 93 | return 94 | $ Just 95 | ( opts 96 | & fromMaybe "" 97 | & (<> " --no-cache") -- Append the docker --no-cache option 98 | & Docker.Cacher.BuildOptions 99 | ) 100 | else return (fmap Docker.Cacher.BuildOptions opts) 101 | 102 | if noCacheStages 103 | then sh (Docker.Cacher.build app name buildOPtions ast) 104 | else sh 105 | (Docker.Cacher.buildFromCache app 106 | branch 107 | fallbackBranch 108 | name 109 | buildOPtions 110 | ast 111 | ) 112 | 113 | 114 | needEnv :: MonadIO m => Text -> m Text 115 | needEnv varName = do 116 | value <- need varName 117 | case value of 118 | Nothing -> error 119 | ("I was expecting the " <> show varName <> " env var to be present.") 120 | Just val -> return val 121 | -------------------------------------------------------------------------------- /package.yaml: -------------------------------------------------------------------------------- 1 | name: docker-build-cacher 2 | version: '2.1.1' 3 | synopsis: Builds a docker image and caches all of its intermediate stages 4 | description: A CLI tool to speed up multi-stage docker file builds by caching intermediate 5 | category: Operations 6 | author: 7 | - José Lorenzo Rodríguez 8 | maintainer: 9 | - lorenzo@seatgeek.com 10 | copyright: 11 | - Seatgeek, Copyright (c) 2017 12 | license: BSD3 13 | github: seatgeek/docker-build-cacher 14 | extra-source-files: 15 | - README.md 16 | 17 | dependencies: 18 | - base >=4.9.1.0 && <5 19 | - turtle 20 | - language-docker >= 8.0.2 && < 9 21 | - containers 22 | - foldl 23 | - text 24 | - system-filepath 25 | - aeson 26 | 27 | library: 28 | source-dirs: src 29 | ghc-options: 30 | - -Wall 31 | - -fno-warn-unused-do-bind 32 | 33 | executables: 34 | docker-build-cacher: 35 | main: Main.hs 36 | source-dirs: app 37 | dependencies: 38 | - docker-build-cacher 39 | ghc-options: 40 | - -threaded 41 | - -rtsopts 42 | - -with-rtsopts=-N 43 | - -Wall 44 | - -fno-warn-unused-do-bind 45 | when: 46 | # OS X does not support static build https://developer.apple.com/library/content/qa/qa1118 47 | - condition: '!(os(osx))' 48 | ld-options: 49 | - -static 50 | - -pthread 51 | -------------------------------------------------------------------------------- /src/Docker/Cacher.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | {-# LANGUAGE RecordWildCards #-} 3 | {-# LANGUAGE NamedFieldPuns #-} 4 | 5 | module Docker.Cacher where 6 | 7 | import qualified Control.Foldl as Fold 8 | import Control.Monad ( guard 9 | , when 10 | ) 11 | import Data.Either ( rights ) 12 | import qualified Data.Map.Strict as Map 13 | import Data.Maybe ( fromMaybe 14 | , mapMaybe 15 | ) 16 | import qualified Data.Text as Text 17 | import Data.Text ( Text ) 18 | import qualified Data.Text.Lazy as LT 19 | import Language.Docker 20 | import Language.Docker.Syntax ( Tag(..) ) 21 | import Prelude hiding ( FilePath ) 22 | import Turtle 23 | 24 | import qualified Docker.Cacher.Inspect 25 | import Docker.Cacher.Inspect ( ImageConfig(..) 26 | , StageCache(..) 27 | ) 28 | import Docker.Cacher.Internal 29 | import qualified Data.List.NonEmpty 30 | import qualified Data.Aeson.Text 31 | import qualified Data.Coerce 32 | 33 | {- Glossary: 34 | - InstructionPos: in the AST for a docker file, each of the lines are described with the type InstructionPos 35 | - Dockerfile: A list of InstructionPos 36 | - stage: Each of the FROM instructions in a Dockerfile 37 | - cache buster: Files inside a docker image that can be compared with files locally under the same path 38 | -} 39 | newtype App = 40 | App Text 41 | deriving (Show) 42 | 43 | newtype Branch = 44 | Branch Text 45 | deriving (Show) 46 | 47 | newtype BuildOptions = 48 | BuildOptions Text 49 | deriving (Show) 50 | 51 | newtype CacheLabels = 52 | CacheLabels [(Text, Text)] 53 | deriving (Show) 54 | 55 | 56 | -- | Builds the provided Dockerfile. If it is a multi-stage build, check if those stages are already cached 57 | -- and change the dockerfile to take advantage of that. 58 | buildFromCache 59 | :: App 60 | -> Branch 61 | -> Maybe Branch 62 | -> ImageName SourceImage 63 | -> Maybe BuildOptions 64 | -> Dockerfile 65 | -> Shell () 66 | buildFromCache app branch fallbackBranch imageName buildOptions ast = do 67 | -- Inspect the dockerfile and return the stages that got their cache invalidated. We need 68 | -- them to rewrite the docker file and replace the stages with the ones we have in local cache. 69 | changedStages <- getChangedStages app branch fallbackBranch ast 70 | 71 | -- We replace the busted stages with cached primed ones 72 | let cachedStages = replaceStages 73 | (mapMaybe Docker.Cacher.Inspect.alreadyCached changedStages) 74 | ast 75 | 76 | build app imageName buildOptions cachedStages 77 | 78 | 79 | build 80 | :: App 81 | -> ImageName SourceImage 82 | -> Maybe BuildOptions 83 | -> Dockerfile 84 | -> Shell () 85 | build app imageName buildOptions ast = do 86 | echo "I'll start building now the main Dockerfile" 87 | 88 | -- Build the main docker file which may already have been rewritten to use the 89 | -- cached stages. 90 | status <- buildDockerfile app imageName buildOptions ast 91 | 92 | case status of 93 | ExitSuccess -> 94 | echo 95 | "I built the main dockerfile without a problem. Now call this same script with the `Cache` mode" 96 | ExitFailure _ -> die "Boo, I could not build the project" 97 | 98 | 99 | -- | One a dockefile is built, we can extract each of the stages separately and then tag them, so the cache 100 | -- can be retreived at a later point. 101 | cacheBuild :: App -> Branch -> Maybe Branch -> Dockerfile -> Shell () 102 | cacheBuild app branch fallbackBranch ast = do 103 | -- Compare the current dockerfile with whatever we have in the cache. If there are any chages, 104 | -- then we will need to rebuild the cache for each of the changed stages. 105 | inspectedStages <- getChangedStages app branch fallbackBranch ast 106 | 107 | let stagesToBuildFresh = [ stage | NotCached stage <- inspectedStages ] 108 | let stagesToReBuild = 109 | [ (uncached, stage) 110 | | CacheInvalidated uncached stage <- inspectedStages 111 | ] 112 | let stagesToBuildFromFallback = 113 | [ (uncached, cached) 114 | | FallbackCache uncached (Cached cached _) <- inspectedStages 115 | ] 116 | 117 | when (stagesToBuildFresh /= []) $ do 118 | echo "--> Let's build the cache for the first time" 119 | mapM_ (buildAssetStage app) stagesToBuildFresh -- Build cached images for the first time 120 | 121 | when (stagesToBuildFromFallback /= []) $ do 122 | echo "--> Let's build the cache for the first time using a fallback" 123 | mapM_ (uncurry (reBuildFromFallback app)) stagesToBuildFromFallback 124 | 125 | when (stagesToReBuild /= []) $ do 126 | echo "--> Let's re-build the cache for stages that changed" 127 | mapM_ (uncurry (reBuildAssetStage app)) stagesToReBuild -- Build each of the stages so they can be reused later 128 | 129 | 130 | -- | Returns a list of stages which needs to either be built separately or that did not have their cached busted 131 | -- by the introduction of new code. 132 | getChangedStages 133 | :: App -> Branch -> Maybe Branch -> Dockerfile -> Shell [StageCache] 134 | getChangedStages app branch fallbackBranch ast = do 135 | let 136 | -- Filter out the main FROM at the end and only 137 | -- keep the contents of the file before that instruction. 138 | assetStages = init (getStages ast) 139 | 140 | -- For each of the found stages, before the main 141 | -- FROM instruction, convert them to Stage records 142 | stages = mapMaybe (toStage app branch fallbackBranch) assetStages 143 | 144 | when (length assetStages > length stages) showAliasesWarning 145 | fold 146 | ( 147 | -- Find the stages that we already have in local cache 148 | Docker.Cacher.Inspect.getAlreadyCached stages 149 | -- Determine whether or not the cache was invalidated 150 | >>= uncurry Docker.Cacher.Inspect.shouldBustCache 151 | ) 152 | Fold.list 153 | where 154 | showAliasesWarning = do 155 | echo "::::::WARNING::::::" 156 | echo 157 | "I found some FROM directives in the dockerfile that did not have an `as` alias" 158 | echo 159 | "I'm not smart enough to build multi-stage docker files without aliases." 160 | echo "While this is safe to do, you will get no cache benefits" 161 | echo "" 162 | echo 163 | "Please always write your FROM directives as `FROM image:tag as myalias`" 164 | 165 | 166 | -- | The goal is to create a temporary dockefile in this same folder with the contents 167 | -- if the stage variable, call docker build with the generated file and tag the image 168 | -- so we can find it later. 169 | buildAssetStage :: App -> Stage SourceImage -> Shell () 170 | buildAssetStage app Stage {..} = do 171 | printLn 172 | ("\n--> Building asset stage " % s % " at line " % d % " for the first time" 173 | ) 174 | stageName 175 | stagePos 176 | let 177 | fromInstruction = filter isFrom directives 178 | sourceImage = ImageName (extractFullName fromInstruction) 179 | cacheEverything = canCacheDirectives directives 180 | embeddedFiles = 181 | if cacheEverything then extractCopiedFiles directives else [] 182 | 183 | cacheLabels = buildCacheLabels stageName stageTag embeddedFiles 184 | 185 | newDockerfile = toDockerfile $ do 186 | if cacheEverything then embed directives else embed fromInstruction 187 | label (Data.Coerce.coerce cacheLabels) 188 | 189 | doStageBuild app 190 | sourceImage 191 | stageImageName 192 | buildImageName 193 | cacheLabels 194 | newDockerfile 195 | where 196 | extractFullName (instr : _) = extractFromInstr (instruction instr) 197 | extractFullName _ = "" 198 | 199 | extractFromInstr (From BaseImage { image, tag }) = 200 | prettyImage image <> prettyTag tag 201 | extractFromInstr _ = "" 202 | 203 | prettyImage (Image Nothing img) = img 204 | prettyImage (Image (Just (Registry reg)) img) = reg <> "/" <> img 205 | 206 | prettyTag = maybe "" (\(Tag t) -> ":" <> t) 207 | 208 | 209 | -- | The goal is to create a temporary dockefile in this same folder with the contents 210 | -- if the stage variable, call docker build with the generated file and tag the image 211 | -- so we can find it later. 212 | reBuildAssetStage :: App -> Stage SourceImage -> Stage CachedImage -> Shell () 213 | reBuildAssetStage app uncached cached = do 214 | printLn ("\n--> Rebuilding asset stage " % s % " at line " % d) 215 | (stageName cached) 216 | (stagePos cached) 217 | let embeddedFiles = if canCacheDirectives (directives uncached) 218 | then extractCopiedFiles (directives uncached) 219 | else [] 220 | cacheLabels = buildCacheLabels (stageName uncached) 221 | (stageTag uncached) 222 | embeddedFiles 223 | let ImageName t = stageImageName cached 224 | newDockerfile = cacheableDockerFile t (directives uncached) cacheLabels 225 | doStageBuild app 226 | (buildImageName cached) -- The source image is the one having the ONBUILD lines 227 | (stageImageName cached) 228 | (buildImageName cached) 229 | cacheLabels 230 | newDockerfile 231 | 232 | 233 | reBuildFromFallback :: App -> Stage SourceImage -> Stage CachedImage -> Shell () 234 | reBuildFromFallback app uncached cached = do 235 | let embeddedFiles = extractCopiedFiles (directives uncached) 236 | cacheLabels = 237 | buildCacheLabels (stageName uncached) (stageTag uncached) embeddedFiles 238 | let sourceImage@(ImageName t) = buildImageName cached 239 | newDockerfile = cacheableDockerFile t (directives uncached) cacheLabels 240 | doStageBuild app 241 | sourceImage 242 | (stageImageName uncached) 243 | (buildImageName uncached) 244 | cacheLabels 245 | newDockerfile 246 | 247 | 248 | doStageBuild 249 | :: App 250 | -> ImageName source -- ^ This is the image potentially containing the ONBUILD lines, this image needs to exist 251 | -> ImageName intermediate -- ^ This is the image name to build as intermediate with no ONBUILD 252 | -> ImageName target -- ^ This is the final image name to build, after appending the ONBUILD lines to intermediate 253 | -> CacheLabels 254 | -> Dockerfile 255 | -> Shell () 256 | doStageBuild app sourceImageName intermediateImage targetImageName cacheLabels directives 257 | = do 258 | -- Only build the FROM 259 | status <- buildDockerfile app intermediateImage Nothing directives 260 | 261 | -- Break if previous command failed 262 | guard (status == ExitSuccess) 263 | 264 | ImageConfig _ onBuildLines _ <- Docker.Cacher.Inspect.imageConfig 265 | sourceImageName 266 | 267 | -- Append the ONBUILD lines to the new file 268 | newDockerfile <- createDockerfile intermediateImage cacheLabels onBuildLines 269 | 270 | -- Now build it 271 | finalStatus <- buildDockerfile app targetImageName Nothing newDockerfile 272 | 273 | -- Stop here if previous command failed 274 | guard (finalStatus == ExitSuccess) 275 | echo "" 276 | echo 277 | "--> I have tagged a cache container that I can use next time to speed builds!" 278 | 279 | 280 | -- | Simply call docker build for the passed arguments 281 | buildDockerfile 282 | :: App -> ImageName a -> Maybe BuildOptions -> Dockerfile -> Shell ExitCode 283 | buildDockerfile (App app) (ImageName imageName) buildOPtions directives = do 284 | currentDirectory <- pwd 285 | tmpFile <- mktempfile currentDirectory "Dockerfile." 286 | let BuildOptions opts = fromMaybe (BuildOptions "") buildOPtions 287 | let allBuildOptions = 288 | [ "build" 289 | , "--build-arg" 290 | , "APP_NAME=" <> app 291 | , "-f" 292 | , format fp tmpFile 293 | , "-t" 294 | , imageName 295 | , "." 296 | ] 297 | <> [opts] 298 | 299 | -- Put the Dockerfile contents in the tmp file 300 | liftIO (writeTextFile tmpFile (LT.toStrict (prettyPrint directives))) 301 | 302 | -- Build the generated dockerfile 303 | shell ("docker " <> Text.intercalate " " allBuildOptions) empty 304 | 305 | 306 | -- | Given a list of instructions, build a dockerfile where the imageName is the FROM for the file and 307 | -- the list of instructions are wrapped with ONBUILD 308 | createDockerfile :: ImageName a -> CacheLabels -> [Text] -> Shell Dockerfile 309 | createDockerfile (ImageName imageName) (CacheLabels cacheLabels) onBuildLines = 310 | do 311 | let eitherDirectives = map parseText onBuildLines 312 | validDirectives = rights eitherDirectives -- Just in case, filter out bad directives 313 | file = toDockerfile $ do 314 | from $ toImage imageName `tagged` "latest" 315 | label cacheLabels 316 | -- Append each of the ONBUILD instructions 317 | mapM (onBuildRaw . toInstruction) validDirectives 318 | return file 319 | where 320 | toInstruction [InstructionPos inst _ _] = inst 321 | toInstruction _ = error "This is not possible" 322 | 323 | 324 | -- 325 | -- | Returns a list of directives grouped by the appeareance of the FROM directive 326 | -- This will return the group of all stages found in the Dockerfile 327 | getStages :: Dockerfile -> [Dockerfile] 328 | getStages ast = filter startsWithFROM (group ast []) 329 | where 330 | group [] acc = reverse acc -- End of recursion 331 | group (directive@(InstructionPos (From _) _ _) : rest) acc = 332 | group rest ([directive] : acc) -- Append a new group 333 | group (directive : rest) [] = group rest [[directive]] -- Create a new group 334 | group (directive : rest) (current : prev) = 335 | group rest ((current ++ [directive]) : prev) -- Continue the currently open group 336 | 337 | -- 338 | -- | Returns true if the first element in the list is a FROM directive 339 | startsWithFROM (InstructionPos (From _) _ _ : _) = True 340 | startsWithFROM _ = False 341 | 342 | 343 | -- | Converts a list of instructions into a Stage record 344 | toStage :: App -> Branch -> Maybe Branch -> Dockerfile -> Maybe (Stage a) 345 | toStage (App app) branch fallback directives = do 346 | (stageName, stageTag, stagePos, stageAlias) <- extractInfo directives -- If getStageInfo returns Nothing, skip the rest 347 | let newImageName (Branch branchName) = 348 | app <> "__branch__" <> branchName <> "__stage__" <> sanitize stageName 349 | stageImageName = ImageName (newImageName branch) 350 | buildImageName = ImageName (newImageName branch <> "-build") 351 | stageFallbackImage = 352 | fmap (\br -> ImageName (newImageName br <> "-build")) fallback 353 | return Stage { .. } 354 | where 355 | extractInfo :: Dockerfile -> Maybe (Text, Text, Linenumber, Text) 356 | extractInfo (InstructionPos { instruction, lineNumber } : _) = 357 | getStageInfo instruction lineNumber 358 | extractInfo _ = Nothing 359 | 360 | getStageInfo 361 | :: Instruction Text -> Linenumber -> Maybe (Text, Text, Linenumber, Text) 362 | getStageInfo (From BaseImage { image, tag = Just (Tag tag), alias = Just (ImageAlias alias) }) pos 363 | = Just (imageName image, tag, pos, alias) 364 | getStageInfo (From BaseImage { image, tag = Nothing, alias = Just (ImageAlias alias) }) pos 365 | = Just (imageName image, "latest", pos, alias) 366 | getStageInfo _ _ = Nothing 367 | 368 | -- 369 | -- | Makes a string safe to use it as a file name 370 | sanitize = Text.replace "/" "-" . Text.replace ":" "-" 371 | 372 | 373 | -- | Given a list of stages and the AST for a Dockerfile, replace all the FROM instructions 374 | -- with their corresponding images as described in the Stage record. 375 | replaceStages :: [Stage CachedImage] -> Dockerfile -> Dockerfile 376 | replaceStages stages = fmap 377 | (\InstructionPos {..} -> 378 | InstructionPos { instruction = replaceStage instruction, .. } 379 | ) 380 | where 381 | stagesMap = Map.fromList (map createStagePairs stages) 382 | 383 | createStagePairs stage@Stage {..} = (stageAlias, stage) 384 | 385 | -- 386 | -- | Find whehter or not we have extracted a stage with the same alias 387 | -- If we did, then replace the FROM directive with our own version 388 | replaceStage directive@(From BaseImage { alias = Just (ImageAlias imageAlias) }) 389 | = replaceKnownAlias directive imageAlias 390 | replaceStage directive = directive 391 | 392 | replaceKnownAlias directive imageAlias = 393 | case Map.lookup imageAlias stagesMap of 394 | Nothing -> directive 395 | Just Stage { buildImageName, stageAlias } -> 396 | let ImageName t = buildImageName 397 | in From 398 | (BaseImage { image = toImage t 399 | , tag = Just "latest" 400 | , alias = formatAlias stageAlias 401 | , digest = Nothing 402 | , platform = Nothing 403 | } 404 | ) 405 | 406 | formatAlias = Just . fromString . Text.unpack 407 | 408 | 409 | -- | Finds all COPY and ADD instructions in the dockerfile and returns 410 | -- a concatenated list of all the source paths collected 411 | extractCopiedFiles :: Dockerfile -> [(SourcePath, TargetPath)] 412 | extractCopiedFiles = concatMap (extractFiles . instruction) 413 | where 414 | extractFiles (Copy CopyArgs { sourcePaths, sourceFlag = NoSource, targetPath }) 415 | = zip (Data.List.NonEmpty.toList sourcePaths) (repeat targetPath) 416 | extractFiles (Copy CopyArgs { sourceFlag = _ }) = [] 417 | extractFiles (Add AddArgs { sourcePaths, targetPath }) = 418 | zip (Data.List.NonEmpty.toList sourcePaths) (repeat targetPath) 419 | extractFiles _ = [] 420 | 421 | 422 | buildCacheLabels :: Text -> Text -> [(SourcePath, TargetPath)] -> CacheLabels 423 | buildCacheLabels imageName imageTag files = 424 | CacheLabels $ ("cached_image", imageName <> ":" <> imageTag) : case files of 425 | [] -> [] 426 | _ -> [("cached_files", encodedFiles)] 427 | where 428 | encodedFiles = LT.toStrict (Data.Aeson.Text.encodeToLazyText plainTextList) 429 | 430 | plainTextList :: [(Text, Text)] 431 | plainTextList = Data.Coerce.coerce files 432 | 433 | 434 | canCacheDirectives :: Dockerfile -> Bool 435 | canCacheDirectives df = not (null cacheLabels) 436 | where 437 | cacheLabels = 438 | [ True 439 | | Label pairs <- map instruction df 440 | , (key, val) <- pairs 441 | , key == "cache_instructions" 442 | , val == "cache" 443 | ] 444 | 445 | 446 | cacheableDirectives :: Dockerfile -> Dockerfile 447 | cacheableDirectives df = if canCacheDirectives df 448 | then filter (not . isFrom) . filter (not . isOnBuild) $ df 449 | else [] 450 | 451 | 452 | cacheableDockerFile :: Text -> Dockerfile -> CacheLabels -> Dockerfile 453 | cacheableDockerFile t directives (CacheLabels cacheLabels) = toDockerfile $ do 454 | -- Use the cached image as base for the new one 455 | from (toImage t `tagged` "latest") 456 | -- But we want the contents of the original one 457 | -- without the ONBUILD 458 | embed (cacheableDirectives directives) 459 | label cacheLabels 460 | 461 | 462 | isFrom :: InstructionPos args -> Bool 463 | isFrom (InstructionPos From{} _ _) = True 464 | isFrom _ = False 465 | 466 | 467 | isOnBuild :: InstructionPos args -> Bool 468 | isOnBuild (InstructionPos OnBuild{} _ _) = True 469 | isOnBuild _ = False 470 | 471 | 472 | toImage :: Text -> Image 473 | toImage = fromString . Text.unpack 474 | -------------------------------------------------------------------------------- /src/Docker/Cacher/Inspect.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | {-# LANGUAGE RecordWildCards #-} 3 | {-# LANGUAGE NamedFieldPuns #-} 4 | 5 | module Docker.Cacher.Inspect where 6 | 7 | import qualified Control.Foldl as Fold 8 | import qualified Data.Aeson as Aeson 9 | import Data.Aeson ( (.:) ) 10 | import qualified Data.Coerce 11 | import Data.Either ( rights ) 12 | import Data.List.NonEmpty ( toList ) 13 | import Data.Maybe ( fromMaybe 14 | , isJust 15 | , mapMaybe 16 | ) 17 | import qualified Data.Map as Map 18 | import qualified Data.Text as Text 19 | import Data.Text ( Text ) 20 | import qualified Data.Text.Encoding 21 | import Language.Docker hiding ( workdir ) 22 | import Language.Docker.Syntax ( Tag(..) ) 23 | import Prelude hiding ( FilePath ) 24 | import Turtle 25 | 26 | import Docker.Cacher.Internal 27 | 28 | -- | This represents the image config as stored on disk by the docker daemon. We only care here 29 | -- about the .Config.WorkingDir and .Config.OnBuild properties 30 | data ImageConfig = ImageConfig 31 | { workingDir :: !Text 32 | , onBuildInstructions :: ![Text] 33 | , storedLabels :: !(Map.Map Text Text) 34 | } deriving (Show, Eq) 35 | 36 | data StageCache 37 | = NotCached (Stage SourceImage) -- When the image has not yet been built separetely 38 | | FallbackCache (Stage SourceImage) -- When the cache was not present but we looked at a falback key 39 | StageCache -- The fallback cache 40 | | Cached { stage :: Stage CachedImage -- When the image was built and tagged as a separate image 41 | , cacheBusters :: [(SourcePath, TargetPath)] -- List of files that are able to bust the cache 42 | } 43 | | CacheInvalidated (Stage SourceImage) (Stage CachedImage) 44 | deriving (Show) 45 | 46 | instance Aeson.FromJSON ImageConfig where 47 | parseJSON = 48 | Aeson.withObject "ImageConfig" $ \ic -> do 49 | workingDir <- ic .: "WorkingDir" 50 | onBuildInstructions <- fromMaybe [] <$> ic .: "OnBuild" 51 | storedLabels <- fromMaybe Map.empty <$> ic .: "Labels" 52 | return $ ImageConfig {..} 53 | 54 | 55 | -- | Calls docker inspect for the given image name and returns the config 56 | imageConfig :: ImageName a -> Shell ImageConfig 57 | imageConfig (ImageName name) = do 58 | printLn ("----> Inspecting the config for the docker image: " % s) name 59 | out <- fmap lineToText <$> fold fetchConfig Fold.list -- Buffer all the output in the out variable 60 | 61 | case decodeJSON out of 62 | Left decodeErr -> do 63 | error 64 | $ "----> Could not decode the response of docker inspect: " 65 | ++ decodeErr 66 | return (ImageConfig "" [] Map.empty) 67 | 68 | Right ic -> return ic 69 | where 70 | fetchConfig = 71 | inproc "docker" ["inspect", "--format", "{{.Config | json}}", name] empty 72 | decodeJSON = 73 | Aeson.eitherDecodeStrict . Data.Text.Encoding.encodeUtf8 . Text.unlines 74 | 75 | 76 | -- | Check whether or not the imageName exists for each of the passed stages 77 | -- and return only those that already exist. 78 | getAlreadyCached :: [Stage SourceImage] -> Shell (Stage SourceImage, StageCache) 79 | getAlreadyCached stages = do 80 | echo 81 | "--> I'm checking whether or not the stage exists as a docker image already" 82 | 83 | stage@Stage { buildImageName, stageFallbackImage } <- select stages -- foreach stages as stage 84 | exists <- cacheKeyExists stage buildImageName 85 | 86 | if exists 87 | then do 88 | echo 89 | "------> It already exists, so I will then check if the cache files changed" 90 | inspected <- inspectCache stage 91 | return (stage, inspected) 92 | else do 93 | echo "------> It does not exist, so I will need to build it myself later" 94 | maybe (return (stage, NotCached stage)) 95 | (getFallbackCache stage) 96 | stageFallbackImage 97 | where 98 | cacheKeyExists stage (ImageName imageName) = do 99 | printLn ("----> Looking for image " % s) imageName 100 | 101 | existent <- fold 102 | (inproc "docker" 103 | ["image", "ls", imageName, "--format", "{{.Repository}}"] 104 | empty 105 | ) 106 | Fold.list -- Get the output of the command as a list of lines 107 | 108 | if existent == mempty 109 | then return False 110 | -- For the cache to be valid, we need to make sure that the stored image is based on the same 111 | -- base image and tag. Otherwise we will need to rebuild the cache anyway 112 | else imageAndTagMatches (ImageName (stageName stage)) 113 | (Tag (stageTag stage)) 114 | imageName 115 | -- 116 | -- 117 | getFallbackCache stage fallbackName = do 118 | exists <- cacheKeyExists stage fallbackName 119 | 120 | if exists 121 | then do 122 | echo 123 | "------> The fallback image exists, using it to build the initial cache" 124 | cachedStage <- inspectCache 125 | (stage { stageImageName = fallbackName 126 | , buildImageName = fallbackName 127 | } 128 | ) 129 | return (stage, FallbackCache stage cachedStage) 130 | else do 131 | echo "------> There is not fallback cache image" 132 | return (stage, NotCached stage) 133 | 134 | 135 | -- | Here check each of the cache buster from the image and compare them with those we have locally, 136 | -- if the files do not match, then we return the stage back as a result, otherwise return Nothing. 137 | shouldBustCache :: Stage SourceImage -> StageCache -> Shell StageCache 138 | shouldBustCache sourceStage cached@Cached {..} = do 139 | printLn ("----> Checking cache buster files for stage " % s) (stageName stage) 140 | result <- withContainer (buildImageName stage) checkFiles -- Create a container to inspect the files 141 | case result of 142 | Left _ -> do 143 | err 144 | "----> Could not create the container, impossible to inspect the cache" 145 | return $ NotCached sourceStage 146 | Right stCache -> return stCache 147 | where 148 | checkFiles containerId = do 149 | hasChanged <- fold 150 | (mfilter isJust (checkFileChanged containerId cacheBusters)) 151 | Fold.head 152 | -- ^ Get the cache buster files that have changed since last time 153 | 154 | -- The following is executed for each of the files found 155 | if isJust hasChanged 156 | then do 157 | printLn ("----> The stage " % s % " changed") (stageName stage) 158 | return (CacheInvalidated sourceStage stage) 159 | else do 160 | printLn ("----> The stage " % s % " did not change") (stageName stage) 161 | return cached 162 | 163 | -- | 164 | checkFileChanged containerId files = do 165 | (SourcePath src, TargetPath dest) <- select files 166 | let file = fromText src 167 | exists <- testpath file 168 | if not exists 169 | then 170 | return (Just file) 171 | else do 172 | isDir <- testdir file 173 | 174 | if isDir 175 | then do 176 | printLn 177 | ( "------>'" 178 | % fp 179 | % "' is a directory, assuming files inside it changed" 180 | ) 181 | file 182 | return $ Just file 183 | else do 184 | let targetDir = fromText dest 185 | printLn ("------> Checking file '" % fp % "' in directory " % fp) 186 | file 187 | targetDir 188 | currentDirectory <- pwd 189 | tempFile <- mktempfile currentDirectory "comp" 190 | let targetFile = targetDir file 191 | status <- proc 192 | "docker" 193 | [ "cp" 194 | , format (s % ":" % fp) containerId targetFile 195 | , format fp tempFile 196 | ] 197 | empty 198 | 199 | guard (status == ExitSuccess) 200 | 201 | local <- liftIO (readTextFile file) 202 | remote <- liftIO (readTextFile tempFile) 203 | if local == remote then return Nothing else return (Just file) 204 | 205 | -- In any other case return the same inspected stage 206 | shouldBustCache _ c@NotCached{} = return c 207 | shouldBustCache _ c@CacheInvalidated{} = return c 208 | shouldBustCache _ c@FallbackCache{} = return c 209 | 210 | 211 | -- | This will inspect how an image was build and extrack the ONBUILD directives. If any of those 212 | -- instructions are copying or adding files to the build, they are considered "cache busters". 213 | inspectCache :: Stage SourceImage -> Shell StageCache 214 | inspectCache sourceStage@Stage {..} = do 215 | ImageConfig workdir onBuildLines foundLabels <- imageConfig buildImageName 216 | let parsedDirectivesWithErrors = fmap parseText onBuildLines -- Parse each of the lines 217 | parsedDirectives = (getFirst . rights) parsedDirectivesWithErrors -- We only keep the good lines 218 | workPath = fromText workdir 219 | onBuildBusters = extractCachePaths workPath parsedDirectives 220 | cacheBusters = onBuildBusters ++ bustersFromLabels workPath foundLabels 221 | return $ Cached (toCachedStage sourceStage) cacheBusters 222 | where 223 | extractCachePaths workdir dir = 224 | let filePairs = concat (mapMaybe doExtract dir) -- Get the (source, target) pairs of files copied 225 | in fmap (prependWorkdir workdir) filePairs -- Some target paths need to have the WORKDIR prepended 226 | 227 | -- 228 | -- | Prepend a given target dir to the target path 229 | prependWorkdir workdir (source, TargetPath target) = 230 | let dest = fromText target 231 | prependedDest = format fp (collapse (workdir dest)) 232 | in if relative dest -- If the target path is relative, we need to prepend the workdir 233 | then (source, TargetPath prependedDest) -- Remove the ./ prefix and prepend workdir 234 | else (source, TargetPath target) 235 | 236 | -- 237 | -- | COPY allows multiple paths in the same line, we need to convert each to a separate path 238 | doExtract (InstructionPos (Copy CopyArgs { sourcePaths, targetPath }) _ _) = 239 | Just (zip (toList sourcePaths) (repeat targetPath)) 240 | -- 241 | -- | This case is simpler, we only need to convert the source and target from ADD 242 | doExtract (InstructionPos (Add AddArgs { sourcePaths, targetPath }) _ _) = 243 | Just (zip (toList sourcePaths) (repeat targetPath)) 244 | doExtract _ = Nothing 245 | 246 | getFirst (first : _) = first 247 | getFirst [] = [] 248 | 249 | bustersFromLabels workdir labelList = 250 | case Map.lookup "cached_files" labelList of 251 | Nothing -> [] 252 | Just fs -> 253 | case 254 | Aeson.decodeStrict . Data.Text.Encoding.encodeUtf8 $ fs :: Maybe 255 | [(Text, Text)] 256 | of 257 | Nothing -> [] 258 | Just busters -> 259 | fmap (prependWorkdir workdir) (Data.Coerce.coerce busters) 260 | 261 | 262 | toCachedStage :: Stage SourceImage -> Stage CachedImage 263 | toCachedStage Stage {..} = 264 | let stage = Stage {..} 265 | ImageName sImageName = stageImageName 266 | ImageName bImageName = buildImageName 267 | in stage { stageImageName = ImageName sImageName 268 | , stageFallbackImage = Nothing 269 | , buildImageName = ImageName bImageName 270 | } 271 | 272 | 273 | -- | Extracts the label from the cached image passed in the last argument and checks 274 | -- if it matches the passeed image name and tag name. This is used to avoid using a 275 | -- cached image tht was built using a different base iamge 276 | imageAndTagMatches :: ImageName Text -> Tag -> Text -> Shell Bool 277 | imageAndTagMatches (ImageName imageName) (Tag tagName) cachedImage = do 278 | printLn ("------> Checking the stored cached key in a label for " % s) 279 | cachedImage 280 | value <- fold getCacheLabel Fold.head -- Get the only line 281 | let expected = unsafeTextToLine (imageName <> ":" <> tagName) 282 | return (Just expected == value) 283 | where 284 | getCacheLabel = inproc 285 | "docker" 286 | [ "inspect" 287 | , "--format" 288 | , "{{ index .Config.Labels \"cached_image\"}}" 289 | , cachedImage 290 | ] 291 | empty 292 | 293 | 294 | alreadyCached :: StageCache -> Maybe (Stage CachedImage) 295 | -- We want to replace stages where the cache 296 | -- was invalidated by any file changes. 297 | alreadyCached (CacheInvalidated _ stage) = Just stage 298 | 299 | -- Likewise, once we have a cached stage, we need to keep using it 300 | -- in succesive builds, so the cache is not invalidated again. 301 | alreadyCached (Cached stage _) = Just stage 302 | 303 | -- Finally, if we are using a fallback, we apply 304 | -- the same rules as above for the fallback key 305 | alreadyCached (FallbackCache _ (CacheInvalidated _ stage)) = Just stage 306 | 307 | alreadyCached (FallbackCache _ (Cached stage _)) = Just stage 308 | 309 | alreadyCached _ = Nothing 310 | 311 | 312 | -- | Creates a container from a stage and passes the container id to the 313 | -- given shell as an argument 314 | withContainer :: ImageName a -> (Text -> Shell b) -> Shell (Either Line b) 315 | withContainer (ImageName imageName) action = do 316 | result <- inprocWithErr "docker" ["create", imageName, "sh"] empty 317 | case result of 318 | Left errorMsg -> return (Left errorMsg) 319 | Right containerId -> do 320 | res <- fold (action (format l containerId)) Fold.list 321 | _ <- removeContainer containerId -- Ignore the return code of this command 322 | select (map Right res) -- yield each result as a separate line 323 | where 324 | removeContainer containerId = 325 | proc "docker" ["rm", format l containerId] empty 326 | -------------------------------------------------------------------------------- /src/Docker/Cacher/Internal.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | {-# LANGUAGE NamedFieldPuns #-} 3 | 4 | module Docker.Cacher.Internal where 5 | 6 | import Data.Text ( Text ) 7 | import Language.Docker 8 | import Turtle 9 | 10 | newtype ImageName a = 11 | ImageName Text 12 | deriving (Show, Eq) 13 | 14 | data SourceImage = 15 | SourceImage 16 | 17 | data CachedImage = 18 | CachedImage 19 | 20 | -- | A Stage is one of the FROM directives ina dockerfile 21 | -- 22 | data Stage a = Stage 23 | { stageName :: !Text -- ^ The image name, for example ubuntu in "ubuntu:16" 24 | , stageTag :: !Text -- ^ The image tag, for example latest in "python:latest" 25 | , stageImageName :: !(ImageName a) -- ^ The name of the docker image to generate a separate cache for 26 | -- this is pretty much the "template" image to use for 'buildImageName' 27 | -- 28 | , stageFallbackImage :: !(Maybe (ImageName a)) -- ^ If the stageImageName does not exist, then we can try building 29 | -- from another template, usually the one from the master branch 30 | -- 31 | , stagePos :: !Linenumber -- ^ Where in the docker file this line is found 32 | , stageAlias :: !Text -- ^ The alias in the FROM instruction, for example "builder" in "FROM ubuntu:16.10 AS builder" 33 | , buildImageName :: !(ImageName a) -- ^ The resulting image image, after caching all assets 34 | , directives :: !Dockerfile -- ^ Dockerfile is an alias for [InstructionPos] 35 | } deriving (Show, Eq) 36 | 37 | 38 | printLn :: MonadIO io => Format (io ()) r -> r 39 | printLn message = printf (message % "\n") 40 | -------------------------------------------------------------------------------- /stack.yaml: -------------------------------------------------------------------------------- 1 | # This file was automatically generated by 'stack init' 2 | # 3 | # Some commonly used options have been documented as comments in this file. 4 | # For advanced use and comprehensive documentation of the format, please see: 5 | # http://docs.haskellstack.org/en/stable/yaml_configuration/ 6 | 7 | # Resolver to choose a 'specific' stackage snapshot or a compiler version. 8 | # A snapshot resolver dictates the compiler version and the set of packages 9 | # to be used for project dependencies. For example: 10 | # 11 | # resolver: lts-3.5 12 | # resolver: nightly-2015-09-21 13 | # resolver: ghc-7.10.2 14 | # resolver: ghcjs-0.1.0_ghc-7.10.2 15 | # resolver: 16 | # name: custom-snapshot 17 | # location: "./custom-snapshot.yaml" 18 | resolver: lts-13.14 19 | 20 | # User packages to be built. 21 | # Various formats can be used as shown in the example below. 22 | # 23 | packages: 24 | - '.' 25 | 26 | # Dependency packages to be pulled from upstream that are not in the resolver 27 | # (e.g., acme-missiles-0.3) 28 | extra-deps: [] 29 | 30 | # Override default flag values for local packages and extra-deps 31 | flags: {} 32 | 33 | # Extra package databases containing global packages 34 | extra-package-dbs: [] 35 | 36 | # Control whether we use the GHC we find on the path 37 | # system-ghc: true 38 | # 39 | # Require a specific version of stack, using version ranges 40 | # require-stack-version: -any # Default 41 | # require-stack-version: ">=1.3" 42 | # 43 | # Override the architecture used by stack, especially useful on Windows 44 | # arch: i386 45 | # arch: x86_64 46 | # 47 | # Extra directories used by stack for building 48 | # extra-include-dirs: [/path/to/dir] 49 | # extra-lib-dirs: [/path/to/dir] 50 | # 51 | # Allow a newer minor version of GHC than the snapshot specifies 52 | # compiler-check: newer-minor 53 | --------------------------------------------------------------------------------