├── .ghci ├── .ghcid ├── .github └── workflows │ └── ci.yml ├── .gitignore ├── .hlint.yaml ├── CHANGES.md ├── LICENSE ├── README.md ├── build.cabal ├── hie.yaml ├── papers ├── icfp │ ├── .gitignore │ ├── .vscode │ │ └── settings.json │ ├── 1-intro.tex │ ├── 2-background.tex │ ├── 3-abstractions.tex │ ├── 4-build.tex │ ├── 5-implementations.tex │ ├── 6-engineering.tex │ ├── 7-related.tex │ ├── 8-conclusions.tex │ ├── 9-appendix.tex │ ├── ACM-Reference-Format.bst │ ├── Dockerfile │ ├── Makefile │ ├── acmart.cls │ ├── comment.cut │ ├── fig │ │ ├── bazel-example-build.pdf │ │ ├── bazel-example-build.svg │ │ ├── bazel-example-checkout.pdf │ │ ├── bazel-example-checkout.svg │ │ ├── bazel-example-rebuild.pdf │ │ ├── bazel-example-rebuild.svg │ │ ├── frankenbuild-example-build.pdf │ │ ├── frankenbuild-example-build.svg │ │ ├── frankenbuild-example-clean.pdf │ │ ├── frankenbuild-example-clean.svg │ │ ├── frankenbuild-example-rebuild.pdf │ │ ├── frankenbuild-example-rebuild.svg │ │ ├── make-example-full.pdf │ │ ├── make-example-full.svg │ │ ├── make-example-partial.pdf │ │ ├── make-example-partial.svg │ │ ├── make-example.pdf │ │ ├── make-example.svg │ │ ├── shake-example-cutoff.pdf │ │ ├── shake-example-cutoff.svg │ │ ├── shake-example-rebuild.pdf │ │ ├── shake-example-rebuild.svg │ │ ├── shake-example.pdf │ │ └── shake-example.svg │ ├── final-changes.md │ ├── main.tex │ ├── notes.md │ ├── refs.bib │ └── response.md └── jfp │ ├── .gitignore │ ├── .vscode │ └── settings.json │ ├── 1-intro.tex │ ├── 10-conclusions.tex │ ├── 11-appendix.tex │ ├── 2-background.tex │ ├── 3-abstractions.tex │ ├── 4-schedulers.tex │ ├── 5-rebuilders.tex │ ├── 6-implementations.tex │ ├── 7-experience.tex │ ├── 8-engineering.tex │ ├── 9-related.tex │ ├── Makefile │ ├── amsfonts.sty │ ├── bm.sty │ ├── fig │ ├── bazel-example-build.pdf │ ├── bazel-example-build.svg │ ├── bazel-example-checkout.pdf │ ├── bazel-example-checkout.svg │ ├── bazel-example-rebuild.pdf │ ├── bazel-example-rebuild.svg │ ├── frankenbuild-example-build.pdf │ ├── frankenbuild-example-build.svg │ ├── frankenbuild-example-clean.pdf │ ├── frankenbuild-example-clean.svg │ ├── frankenbuild-example-rebuild.pdf │ ├── frankenbuild-example-rebuild.svg │ ├── make-example-full.pdf │ ├── make-example-full.svg │ ├── make-example-partial.pdf │ ├── make-example-partial.svg │ ├── make-example.pdf │ ├── make-example.svg │ ├── shake-example-cutoff.pdf │ ├── shake-example-cutoff.svg │ ├── shake-example-rebuild.pdf │ ├── shake-example-rebuild.svg │ ├── shake-example.pdf │ ├── shake-example.svg │ ├── step-example-step1.pdf │ ├── step-example-step1.svg │ ├── step-example-step2.pdf │ ├── step-example-step2.svg │ ├── step-example-step3.pdf │ └── step-example-step3.svg │ ├── jfp.bst │ ├── jfp1.cls │ ├── jfp2egui.tex │ ├── main.tex │ ├── mathptmx.sty │ ├── refs.bib │ └── todo.md ├── src ├── Build.hs └── Build │ ├── Multi.hs │ ├── Rebuilder.hs │ ├── Scheduler.hs │ ├── SelfTracking.hs │ ├── SelfTracking │ └── Typed.hs │ ├── Store.hs │ ├── System.hs │ ├── Task.hs │ ├── Task │ ├── Applicative.hs │ ├── Free.hs │ ├── Functor.hs │ ├── Monad.hs │ ├── MonadPlus.hs │ ├── Opaque.hs │ └── Typed.hs │ ├── Trace.hs │ └── Utilities.hs ├── stack.yaml └── test ├── Examples.hs ├── Main.hs └── Spreadsheet.hs /.ghci: -------------------------------------------------------------------------------- 1 | :set -Wall -fno-warn-name-shadowing -Wcompat 2 | :set -Wincomplete-record-updates -Wincomplete-uni-patterns -Wredundant-constraints 3 | :set -Wunused-binds -Wunused-imports -Worphans 4 | 5 | :set -isrc 6 | :set -itest 7 | 8 | :set prompt "\x03BB> " 9 | :set prompt-cont "\x03BB| " 10 | -------------------------------------------------------------------------------- /.ghcid: -------------------------------------------------------------------------------- 1 | -T main 2 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | # Adapted from: https://github.com/ndmitchell/hlint/blob/master/.github/workflows/ci.yml 2 | name: ci 3 | 4 | on: 5 | push: 6 | pull_request: 7 | schedule: 8 | - cron: '0 3 * * 6' # 3am Saturday 9 | workflow_dispatch: 10 | 11 | jobs: 12 | test: 13 | runs-on: ${{ matrix.os }} 14 | 15 | strategy: 16 | fail-fast: false 17 | matrix: 18 | os: [ubuntu-latest] 19 | ghc: ['9.8.2', '9.6.3', '9.4.7', '9.2.8'] 20 | include: 21 | - os: windows-latest 22 | # Testing on MacOS is disabled until GitHub actions support 'allow-failure' 23 | # - os: macOS-latest 24 | 25 | steps: 26 | - run: git config --global core.autocrlf false 27 | - uses: actions/checkout@v3 28 | - uses: haskell-actions/setup@v2 29 | id: setup-haskell 30 | with: 31 | ghc-version: ${{ matrix.ghc }} 32 | - name: Get GHC libdir 33 | id: get-ghc-libdir 34 | run: | 35 | echo "name=libdir::$(ghc --print-libdir)" >> $GITHUB_OUTPUT 36 | shell: bash 37 | - run: cabal v2-freeze --enable-tests 38 | - uses: actions/cache@v2 39 | with: 40 | path: ${{ steps.setup-haskell.outputs.cabal-store }} 41 | key: ${{ runner.os }}-${{ matrix.ghc }}-${{ steps.get-ghc-libdir.outputs.libdir }}-${{ hashFiles('cabal.project.freeze') }} 42 | - uses: snowleopard/neil@master 43 | with: 44 | github-user: snowleopard 45 | hlint-arguments: src 46 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | dist 2 | dist-* 3 | cabal-dev 4 | *.o 5 | *.hi 6 | *.chi 7 | *.chs.h 8 | *.dyn_o 9 | *.dyn_hi 10 | .hpc 11 | .hsenv 12 | .cabal-sandbox/ 13 | cabal.sandbox.config 14 | *.prof 15 | *.aux 16 | *.hp 17 | *.eventlog 18 | .stack-work/ 19 | cabal.project.local 20 | cabal.project.local~ 21 | .HTF/ 22 | ghcid.txt 23 | stack.yaml.lock 24 | -------------------------------------------------------------------------------- /.hlint.yaml: -------------------------------------------------------------------------------- 1 | - ignore: {name: Use unless} 2 | -------------------------------------------------------------------------------- /CHANGES.md: -------------------------------------------------------------------------------- 1 | # Change log 2 | 3 | ## 1.0 4 | 5 | The version published in ICFP 2018 paper "Build Systems à la Carte". 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018-2024 Andrey Mokhov, Neil Mitchell, Simon Peyton Jones 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Build Systems à la Carte 2 | 3 | [![Hackage version](https://img.shields.io/hackage/v/build.svg?label=Hackage)](https://hackage.haskell.org/package/build) [![Build status](https://img.shields.io/github/actions/workflow/status/snowleopard/build/ci.yml?branch=main)](https://github.com/snowleopard/build/actions) 4 | 5 | This project provides an executable framework for developing and comparing build systems, viewing them as 6 | related points in landscape rather than as isolated phenomena. The code derives from the ICFP 2018 paper 7 | ["Build Systems à la Carte"](https://dl.acm.org/citation.cfm?id=3236774) 8 | ([PDF](https://dl.acm.org/ft_gateway.cfm?id=3236774)). 9 | 10 | ## Getting Started 11 | 12 | You may be interested to: 13 | 14 | * Run `stack test` to execute all the provided build systems on a very simple example. 15 | * Look at the [HTML documentation](https://hackage.haskell.org/package/build) of all modules for the last release, 16 | or generate it yourself using `stack haddock`. 17 | * Read the code, particularly [Build.System](src/Build/System.hs), which is the concrete implementation of 18 | all build systems. 19 | 20 | ## Related Material 21 | 22 | * Blog post [on the motivation behind the project](https://blogs.ncl.ac.uk/andreymokhov/cloud-and-dynamic-builds/). 23 | * Blog post [on the task abstraction](https://blogs.ncl.ac.uk/andreymokhov/the-task-abstraction/). 24 | * Blog post [reviewing how the paper was written](https://neilmitchell.blogspot.com/2018/07/inside-paper-build-systems-la-carte.html). 25 | * Talk [by Neil on this paper and Shake](https://ndmitchell.com/#shake_18_may_2018). 26 | * A talk on build systems 27 | [by Andrey](https://github.com/snowleopard/build/releases/download/icfp-final/build-systems-slides-andrey.pdf). 28 | -------------------------------------------------------------------------------- /build.cabal: -------------------------------------------------------------------------------- 1 | cabal-version: 2.2 2 | name: build 3 | version: 1.1 4 | synopsis: Build Systems à la Carte 5 | homepage: https://github.com/snowleopard/build 6 | bug-reports: https://github.com/snowleopard/build/issues 7 | license: MIT 8 | license-file: LICENSE 9 | author: Andrey Mokhov, Neil Mitchell, Simon Peyton Jones 10 | maintainer: Andrey Mokhov , github: @snowleopard 11 | copyright: Andrey Mokhov, Neil Mitchell, Simon Peyton Jones, 2018-2024 12 | category: Algorithms, Data Structures 13 | build-type: Simple 14 | description: A library for experimenting with build systems and incremental 15 | computation frameworks, based on the ideas presented in the ICFP 16 | 2018 paper "Build Systems à la Carte". 17 | tested-with: GHC==9.8.2, GHC==9.6.3, GHC==9.4.7, GHC==9.2.8 18 | 19 | extra-doc-files: 20 | CHANGES.md 21 | README.md 22 | 23 | source-repository head 24 | type: git 25 | location: https://github.com/snowleopard/build.git 26 | 27 | library 28 | hs-source-dirs: src 29 | exposed-modules: Build, 30 | Build.Multi, 31 | Build.Rebuilder, 32 | Build.SelfTracking, 33 | Build.SelfTracking.Typed, 34 | Build.Scheduler, 35 | Build.Store, 36 | Build.Task, 37 | Build.Task.Applicative, 38 | Build.Task.Free, 39 | Build.Task.Functor, 40 | Build.Task.Monad, 41 | Build.Task.MonadPlus, 42 | Build.Task.Opaque, 43 | Build.Task.Typed, 44 | Build.Trace, 45 | Build.System 46 | other-modules: Build.Utilities 47 | build-depends: algebraic-graphs >= 0.5 && < 0.8, 48 | base >= 4.7 && < 5, 49 | containers >= 0.6 && < 0.7, 50 | extra >= 1.5.3 && < 1.8, 51 | filepath >= 1.4.1.0 && < 1.5, 52 | mtl >= 2.2.1 && < 2.4, 53 | random >= 1.1 && < 1.3, 54 | transformers >= 0.5.2.0 && < 0.7 55 | default-language: Haskell2010 56 | ghc-options: -Wall 57 | -fno-warn-name-shadowing 58 | -Wcompat 59 | -Wincomplete-record-updates 60 | -Wincomplete-uni-patterns 61 | -Wredundant-constraints 62 | 63 | test-suite test 64 | hs-source-dirs: test 65 | type: exitcode-stdio-1.0 66 | main-is: Main.hs 67 | other-modules: Examples 68 | Spreadsheet 69 | build-depends: build, 70 | base >= 4.7 && < 5, 71 | containers >= 0.6 && < 0.7, 72 | extra >= 1.5.3 && < 1.8, 73 | mtl >= 2.2.1 && < 2.4, 74 | transformers >= 0.5.2.0 && < 0.7 75 | default-language: Haskell2010 76 | ghc-options: -Wall 77 | -fno-warn-name-shadowing 78 | -Wcompat 79 | -Wincomplete-record-updates 80 | -Wincomplete-uni-patterns 81 | -Wredundant-constraints 82 | -------------------------------------------------------------------------------- /hie.yaml: -------------------------------------------------------------------------------- 1 | cradle: 2 | stack: 3 | -------------------------------------------------------------------------------- /papers/icfp/.gitignore: -------------------------------------------------------------------------------- 1 | ## Core latex/pdflatex auxiliary files: 2 | *.aux 3 | *.lof 4 | *.log 5 | *.lot 6 | *.fls 7 | *.out 8 | *.toc 9 | *.fmt 10 | *.fot 11 | *.cb 12 | *.cb2 13 | 14 | ## Intermediate documents: 15 | *.dvi 16 | *-converted-to.* 17 | # these rules might exclude image files for figures etc. 18 | # *.ps 19 | # *.eps 20 | main.pdf 21 | 22 | ## Bibliography auxiliary files (bibtex/biblatex/biber): 23 | *.bbl 24 | *.bcf 25 | *.blg 26 | *-blx.aux 27 | *-blx.bib 28 | *.brf 29 | *.run.xml 30 | 31 | ## Build tool auxiliary files: 32 | *.fdb_latexmk 33 | *.synctex 34 | *.synctex.gz 35 | *.synctex.gz(busy) 36 | *.pdfsync 37 | 38 | ## Auxiliary and intermediate files from other packages: 39 | # algorithms 40 | *.alg 41 | *.loa 42 | 43 | # achemso 44 | acs-*.bib 45 | 46 | # amsthm 47 | *.thm 48 | 49 | # beamer 50 | *.nav 51 | *.snm 52 | *.vrb 53 | 54 | # cprotect 55 | *.cpt 56 | 57 | # fixme 58 | *.lox 59 | 60 | #(r)(e)ledmac/(r)(e)ledpar 61 | *.end 62 | *.?end 63 | *.[1-9] 64 | *.[1-9][0-9] 65 | *.[1-9][0-9][0-9] 66 | *.[1-9]R 67 | *.[1-9][0-9]R 68 | *.[1-9][0-9][0-9]R 69 | *.eledsec[1-9] 70 | *.eledsec[1-9]R 71 | *.eledsec[1-9][0-9] 72 | *.eledsec[1-9][0-9]R 73 | *.eledsec[1-9][0-9][0-9] 74 | *.eledsec[1-9][0-9][0-9]R 75 | 76 | # glossaries 77 | *.acn 78 | *.acr 79 | *.glg 80 | *.glo 81 | *.gls 82 | *.glsdefs 83 | 84 | # gnuplottex 85 | *-gnuplottex-* 86 | 87 | # hyperref 88 | *.brf 89 | 90 | # knitr 91 | *-concordance.tex 92 | # TODO Comment the next line if you want to keep your tikz graphics files 93 | *.tikz 94 | *-tikzDictionary 95 | 96 | # listings 97 | *.lol 98 | 99 | # makeidx 100 | *.idx 101 | *.ilg 102 | *.ind 103 | *.ist 104 | 105 | # minitoc 106 | *.maf 107 | *.mlf 108 | *.mlt 109 | *.mtc 110 | *.mtc[0-9] 111 | *.mtc[1-9][0-9] 112 | 113 | # minted 114 | _minted* 115 | *.pyg 116 | 117 | # morewrites 118 | *.mw 119 | 120 | # mylatexformat 121 | *.fmt 122 | 123 | # nomencl 124 | *.nlo 125 | 126 | # sagetex 127 | *.sagetex.sage 128 | *.sagetex.py 129 | *.sagetex.scmd 130 | 131 | # sympy 132 | *.sout 133 | *.sympy 134 | sympy-plots-for-*.tex/ 135 | 136 | # pdfcomment 137 | *.upa 138 | *.upb 139 | 140 | # pythontex 141 | *.pytxcode 142 | pythontex-files-*/ 143 | 144 | # thmtools 145 | *.loe 146 | 147 | # TikZ & PGF 148 | *.dpth 149 | *.md5 150 | *.auxlock 151 | 152 | # todonotes 153 | *.tdo 154 | 155 | # xindy 156 | *.xdy 157 | 158 | # xypic precompiled matrices 159 | *.xyc 160 | 161 | # endfloat 162 | *.ttt 163 | *.fff 164 | 165 | # Latexian 166 | TSWLatexianTemp* 167 | 168 | ## Editors: 169 | # WinEdt 170 | *.bak 171 | *.sav 172 | 173 | # Texpad 174 | .texpadtmp 175 | 176 | # Kile 177 | *.backup 178 | 179 | # KBibTeX 180 | *~[0-9]* 181 | -------------------------------------------------------------------------------- /papers/icfp/.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "latex-workshop.latex.toolchain": [ 3 | { 4 | "command": "texify", 5 | "args": [ 6 | "--synctex", 7 | "--pdf", 8 | "--tex-option=\"-shell-escape\"", 9 | "--tex-option=\"-interaction=nonstopmode\"", 10 | "--tex-option=\"-file-line-error\"", 11 | "%DOC%.tex" 12 | ] 13 | } 14 | ], 15 | "cSpell.words": [ 16 | "Elem", 17 | "Ericson", 18 | "Hashable", 19 | "PACMPL", 20 | "SIGPLAN", 21 | "acmart", 22 | "bazel", 23 | "etre", 24 | "fmap", 25 | "mempty", 26 | "newtype", 27 | "poss", 28 | "racey", 29 | "sigplanproc", 30 | "sprsh", 31 | "topmatter", 32 | "uncurry" 33 | ] 34 | } 35 | -------------------------------------------------------------------------------- /papers/icfp/1-intro.tex: -------------------------------------------------------------------------------- 1 | \section{Introduction}\label{sec-intro} 2 | 3 | Build systems (such as \Make) are big, complicated, and used by every 4 | software developer on the planet. But they are a sadly unloved part 5 | of the software ecosystem, very much a means to an end, and seldom the 6 | focus of attention. 7 | % Rarely do people ask questions like ``What does it mean for my build 8 | % system to be correct?'' or ``What are the trade-offs between different 9 | % approaches?''. 10 | For years \Make dominated, but more recently the challenges of scale have driven 11 | large software firms like Microsoft, Facebook and Google to develop their own 12 | build systems, exploring new points in the design space. These complex build 13 | systems use subtle algorithms, but they are often hidden away, and not the 14 | object of study. 15 | 16 | In this paper we offer a general framework in which to understand and compare 17 | build systems, in a way that is both abstract (omitting incidental detail) 18 | and yet precise (implemented as Haskell code). Specifically we make these 19 | contributions: 20 | \begin{itemize} 21 | \item Build systems vary on many axes, including: static vs dynamic 22 | dependencies; local vs cloud; deterministic vs non-deterministic build tasks; 23 | support for early cutoff; self-tracking build systems; and the type of 24 | persistent build information. In~\S\ref{sec-background} we identify some key 25 | properties, illustrated by four carefully-chosen build systems. 26 | 27 | \item We describe some simple but novel abstractions that 28 | crisply encapsulate what a build system is (\S\ref{sec-abstractions}), 29 | allowing us, for example, to speak about what it means for a build system to be correct. 30 | 31 | \item We identify two key design choices that are typically deeply wired into 32 | any build system: \emph{the order in which tasks are 33 | built}~(\S\ref{sec-dependency-orderings}) and \emph{whether or not a 34 | task is (re-)built}~(\S\ref{sec-out-of-date}). These choices turn out to 35 | be orthogonal, which leads us to a new classification of the design 36 | space~(\S\ref{sec-design-space}). 37 | 38 | \item We show that we can instantiate our abstractions to describe the essence 39 | of a variety of different real-life build systems, including \Make, \Shake, 40 | \Bazel, \CloudBuild, \Buck, \Nix, and \Excel\footnote{\Excel appears very 41 | different to the others but, seen through the lens of this paper, it is very 42 | close indeed.}, each by the composition of the two design choices 43 | (\S\ref{sec-implementations}). Doing this modelling in a single setting 44 | allows the differences and similarities between these huge systems to be 45 | brought out clearly\footnote{All our models are executable and are available on 46 | Hackage: \url{https://hackage.haskell.org/package/build-1.0}.}. 47 | 48 | \item Moreover, we can readily remix the ingredients to design new build systems 49 | with desired properties, for example, to combine the advantages of \Shake and 50 | \Bazel (\S\ref{sec-implementation-cloud}). 51 | 52 | \end{itemize} 53 | In short, instead of seeing build systems as unrelated 54 | points in space, we now see them as locations in a connected landscape, 55 | leading to a better understanding of what they do and how they compare, 56 | and suggesting exploration of other (as yet unoccupied points) in the 57 | landscape. 58 | We discuss engineering aspects in \S\ref{sec-engineering}, and related 59 | work in \S\ref{sec-related}. 60 | 61 | % Papers about ``frameworks'' are often fuzzy. This one is not: all our 62 | % abstractions are defined in Haskell, and we have (freely-available) 63 | % executable models of all the build systems we describe. 64 | -------------------------------------------------------------------------------- /papers/icfp/7-related.tex: -------------------------------------------------------------------------------- 1 | \section{Related work}\label{sec-related} 2 | 3 | While there is research on individual build systems, there has been little 4 | research to date comparing different build systems. In~\S\ref{sec-background} we 5 | covered several important build systems~--~in this section we relate a few 6 | other build systems to our abstractions, and discuss other work where similar 7 | abstractions~arise. 8 | 9 | \subsection{Other Build Systems}\label{sec-related-build} 10 | 11 | Most build systems, when viewed at the level we talk, can be captured with minor 12 | variations on the code presented in \S\ref{sec-implementations}. Below we list 13 | some notable examples: 14 | 15 | \begin{itemize} 16 | \item \Dune~\cite{dune} is a build system designed for OCaml/Reason projects. 17 | Its distinguishing feature is that it uses 18 | \emph{arrows}~\cite{hughes2000generalising} rather than monads to model 19 | dynamic dependencies, which simplifies static dependency approximation. 20 | 21 | \item \Ninja~\cite{ninja} combines the \hs{topological} scheduler of \Make with 22 | the verifying traces of \Shake~--~our associated implementation provides such a 23 | combination. \Ninja~is also capable of modelling build rules that produce 24 | multiple results, a limited form of polymorphism \S\ref{sec-polymorphism}. 25 | 26 | \item \Nix~\cite{dolstra2004nix} has coarse-grained dependencies, with precise 27 | hashing of dependencies and downloading of precomputed build products. We 28 | provided a model of \Nix in \S\ref{sec-implementation-cloud}, although it is 29 | worth noting that \Nix is not primarily intended as a build system, and the 30 | coarse grained nature (packages, not individual files) makes it targeted to a 31 | different purpose. 32 | 33 | \item \Pluto~\cite{erdweg2015pluto} is based on a similar model to \Shake, but 34 | additionally allows cyclic build rules combined with a user-specific resolution 35 | strategy. Often such a strategy can be unfolded into the user rules without loss 36 | of precision, but a fully general resolution handler extends the \hs{Task} 37 | abstraction with additional features. 38 | 39 | \item \Redo~\cite{redo-idea}\cite{grosskurth2007redo}\cite{redo} almost exactly 40 | matches \Shake at the level of detail given here, differing only on aspects like 41 | polymorphic dependencies~\S\ref{sec-polymorphism}. 42 | 43 | \item \Tup~\cite{tup} functions much like \Make, but with a refined dirty-bit 44 | implementation that watches the file system for changes and can thus avoid 45 | rechecking the entire graph. \Tup also automatically deletes stale results. 46 | \end{itemize} 47 | 48 | The one build system we are aware of that cannot be modelled in our framework is 49 | \Fabricate by \citet{fabricate}. In \Fabricate a build system is a script that is 50 | run in-order, in the spirit of: 51 | % I'm not sure the footnote adds much, let's save space? 52 | % \footnote{\Fabricate requires scripts to be 53 | % written in Python, but those details are not fundamental to what makes 54 | % \Fabricate special.} 55 | 56 | \begin{minted}[xleftmargin=10pt]{bash} 57 | gcc -c util.c 58 | gcc -c main.c 59 | gcc util.o main.o -o main.exe 60 | \end{minted} 61 | 62 | \noindent 63 | To achieve minimality, each separate command is traced at the OS-level, allowing 64 | \Fabricate to record a trace entry stating that \cmd{gcc -c util.c} reads from 65 | \cmd{util.c}. In future runs \Fabricate runs the script from start to finish, 66 | skipping any commands where no inputs have changed. The key difference from our 67 | \hs{Tasks} abstraction is that instead of supplying a mapping from outputs to 68 | tasks, \Fabricate supplies a list of statements, in an order, without declaring 69 | what each line produces. There is no need to schedule the statements, and not 70 | enough information to do so. 71 | 72 | Taking our abstraction, it is possible to encode \Fabricate assuming that 73 | commands like \cmd{gcc -c util.c} are keys, there is a linear dependency between 74 | each successive key, and that the OS-level tracing can be lifted back as a 75 | monadic \hs{Task} function\footnote{\Shake provides support for 76 | \Fabricate{}-like build systems~--~see \cmd{Development.Shake.Forward} 77 | in the \Shake library.}. However, in our pure model the mapping is not perfect 78 | as \cmd{gcc} writes to arbitrary files whose locations are not known in advance. 79 | 80 | \subsection{Self-adjusting Computation} 81 | 82 | While not typically considered build systems, self-adjusting computation is a 83 | well studied area, and in particular the contrast between different formulations 84 | has been thoroughly investigated, e.g. see~\citet{acar2007selfadjusting}. 85 | Self-adjusting computations can automatically adjust to an external change 86 | to their inputs. A classic example is a self-adjusting sorting algorithm, which 87 | can efficiently (in $O(\log{n})$ time where $n$ is the length of the input) 88 | recalculate the result given an incremental change of the input. While very 89 | close to build systems in spirit, self-adjusting computations are mostly used 90 | for in-memory computation and rely on the ability to dynamically allocate new 91 | keys in the store for sharing intermediate computations~--~an intriguing feature 92 | rarely seen in build systems (\Shake's oracles~\S\ref{sec-polymorphism} can be 93 | used to model this feature to a limited degree). 94 | 95 | A lot of research has been dedicated to finding efficient data structures and 96 | algorithms for self-adjusting computations (with a few open-source 97 | implementations, e.g. \Incremental by~\citet{incremental}). We plan to 98 | investigate how these insights can be utilised by build systems as future work. 99 | 100 | \subsection{Memoization}\label{sec-related-memo} 101 | 102 | \emph{Memoization} is a classic optimisation technique for storing values of a 103 | function instead of recomputing them each time the function is called. Minimal 104 | build systems (see the Definition~\ref{def-minimal}) certainly perform 105 | memoization: they \emph{store values instead of recomputing them each time}. 106 | Memoization can therefore be reduced to a minimal build system (as we 107 | demonstrate below), but not vice versa, since minimal build systems solve a more 108 | complex optimisation problem. 109 | 110 | As a simple example of using a build system for memoization, we solve a textbook 111 | dynamic programming problem~--~Levenshtein's \emph{edit 112 | distance}~\cite{levenshtein1966binary}: given two input strings $a$ and 113 | $b$, find the shortest series of edit operations that transforms $a$ 114 | to $b$. The edit operations are typically \emph{inserting}, \emph{deleting} or 115 | \emph{replacing} a symbol. The dynamic programming solution of this problem is 116 | so widely known (e.g., see~\cite{cormen2001introduction}) that we provide its 117 | encoding in our \hs{Tasks} abstraction without further explanation. We address 118 | elements of strings $a_i$ and $b_i$ by keys \hs{A}~$i$ and \hs{B}~$i$, 119 | respectively, while the cost of a subproblem $c_{ij}$ is identified by 120 | \hs{C}~$i$~$j$. 121 | 122 | \vspace{0.5mm} 123 | \begin{minted}[xleftmargin=10pt, fontsize=\small]{haskell} 124 | data Key = A Int | B Int | C Int Int deriving Eq 125 | \end{minted} 126 | \vspace{0mm} 127 | \begin{minted}[xleftmargin=10pt, fontsize=\small]{haskell} 128 | editDistance :: Tasks Monad Key Int 129 | editDistance (C i 0) = Just $ Task $ const $ pure i 130 | editDistance (C 0 j) = Just $ Task $ const $ pure j 131 | editDistance (C i j) = Just $ Task $ \fetch -> do 132 | ai <- fetch (A i) 133 | bj <- fetch (B j) 134 | if ai == bj 135 | then fetch (C (i - 1) (j - 1)) 136 | else do insert <- fetch (C i (j - 1)) 137 | delete <- fetch (C (i - 1) j ) 138 | replace <- fetch (C (i - 1) (j - 1)) 139 | return (1 + minimum [insert, delete, replace]) 140 | editDistance _ = Nothing 141 | \end{minted} 142 | \vspace{0.5mm} 143 | 144 | \noindent 145 | When asked to build \hs{C}~$n$~$m$, a minimal build system will calculate the 146 | result using memoization. Furthermore, when an input symbol $a_i$ is changed, 147 | only necessary, incremental recomputation will be performed~--~an optimisation 148 | that cannot be achieved just with memoization. 149 | 150 | Self-adjusting computation, memoization and build systems are inherently related 151 | topics, which poses the question of whether there is an underlying common 152 | abstraction waiting to be discovered. 153 | -------------------------------------------------------------------------------- /papers/icfp/8-conclusions.tex: -------------------------------------------------------------------------------- 1 | \section{Conclusions}\label{sec-conclusions} 2 | 3 | We have investigated multiple build systems, showing how their properties are consequences of two implementation choices: what order you build in and how you decide whether to rebuild. By first decomposing the pieces, we show how to recompose the pieces to find new points in the design space. In particular, a simple recombination leads to a design for a monadic suspending cloud build system. Armed with that blueprint we hope to actually implement such a system as future work. 4 | -------------------------------------------------------------------------------- /papers/icfp/9-appendix.tex: -------------------------------------------------------------------------------- 1 | \clearpage 2 | \section{Appendix}\label{sec-appendix} 3 | 4 | \subsection{Compute transformers}\label{sec-appendix-transformers} 5 | 6 | In this section we clarify some of the compute transformers used in this paper. 7 | 8 | \hs{execute} uses the transformation based on the \hs{Identity} monad, feeding 9 | \hs{fetch k = pure (store k)} to the compute: 10 | 11 | \begin{minted}[xleftmargin=10pt]{haskell} 12 | execute :: Compute Monad k v -> (k -> v) -> k -> Maybe v 13 | execute compute store = fmap runIdentity . compute (pure . store) 14 | \end{minted} 15 | \vspace{1mm} 16 | \begin{minted}[xleftmargin=10pt]{haskell} 17 | newtype Identity a = Identity { runIdentity :: a } 18 | \end{minted} 19 | \vspace{1mm} 20 | \begin{minted}[xleftmargin=10pt]{haskell} 21 | instance Functor Identity where 22 | fmap f (Identity a) = Identity (f a) 23 | \end{minted} 24 | \vspace{1mm} 25 | \begin{minted}[xleftmargin=10pt]{haskell} 26 | instance Applicative Identity where 27 | pure a = Identity a 28 | Identity f <*> Identity a = Identity (f a) 29 | \end{minted} 30 | \vspace{1mm} 31 | \begin{minted}[xleftmargin=10pt]{haskell} 32 | instance Monad Identity where 33 | Identity a >>= f = f a 34 | \end{minted} 35 | \vspace{1mm} 36 | 37 | \todo{AM}{Explain \hs{track}.} 38 | 39 | 40 | 41 | Here is a draft implementation of \hs{inputs} used in the definition of 42 | build system correctness in \S\ref{sec-build-correctness}: 43 | 44 | \begin{minted}[xleftmargin=10pt]{haskell} 45 | inputs :: Eq k => Task Monad k v -> Store i k v -> k -> [k] 46 | inputs task store key = filter (isInput task) (closure deps key) 47 | where 48 | deps k = maybe [] snd (track task (\k -> getValue k store) k) 49 | 50 | closure :: Eq a => (a -> [a]) -> a -> [a] -- Standard graph transitive closure 51 | 52 | data Proxy a = Proxy 53 | 54 | isInput :: Task Monad k v -> k -> Bool 55 | isInput task = isNothing . task (const Proxy) 56 | \end{minted} 57 | 58 | \subsection{Compute examples}\label{sec-appendix-compute-examples} 59 | 60 | \todo{AM}{Add some explanatory text.} 61 | 62 | The \emph{Collatz sequence} $C_i$ is defined as follows: 63 | 64 | \[ 65 | C_{i} = {\begin{cases}~n&{\text{for }}i=0\\~f(C_{i-1})&{\text{otherwise}},\end{cases}}\hspace{12pt}\text{where}\hspace{12pt}f(k)={\begin{cases}~k/2&{\text{if }}k\text{ is even}\\~3k+1&{\text{otherwise}}\end{cases}} 66 | \vspace{2mm} 67 | \] 68 | \noindent 69 | and $n$ is a positive integer parameter. The famous \emph{Collatz conjecture} 70 | states that the Collatz sequence eventually reaches 1 for all possible values of 71 | $n$. For example, if $n=6$, we reach 1 in eight steps: 72 | $(6, 3, 10, 5, 16, 8, 4, 2, 1, \dots)$, after which the sequence loops forever: 73 | $(4, 2, 1, 4, 2, 1, \dots)$. 74 | 75 | We can express the computation of values in the Collatz sequence as a functorial 76 | compute: 77 | 78 | \begin{minted}[xleftmargin=10pt]{haskell} 79 | data Collatz = Collatz Int 80 | 81 | collatz :: Compute Functor Collatz Int 82 | collatz get (Collatz k) | k <= 0 = Nothing 83 | | otherwise = Just $ f <$> get (Collatz (k - 1)) 84 | where 85 | f n | even n = n `div` 2 86 | | otherwise = 3 * n + 1 87 | \end{minted} 88 | 89 | ... 90 | 91 | The \emph{generalised Fibonacci sequence} $F_i$ is defined as follows: 92 | 93 | \[ 94 | F_{i} = {\begin{cases}~n&{\text{for }}i=0\\~m&{\text{for }}i=1\\~F_{i-1}+F_{i-2}&{\text{otherwise}}\end{cases}} 95 | \vspace{2mm} 96 | \] 97 | \noindent 98 | where $n$ and $m$ are integer parameters. By setting $n=0$ and $m=1$ we obtain 99 | the famous \emph{Fibonacci sequence}: $(0, 1, 1, 2, 3, 5, 8, 13, \dots$), and if 100 | $n=2$ and $m=1$, the result is the \emph{Lucas sequence}: 101 | $(2, 1, 3, 4, 7, 11, 18, 29, \dots)$. 102 | 103 | We can express the computation of values in the generalised Fibonacci sequence 104 | as an applicative compute: 105 | 106 | \begin{minted}[xleftmargin=10pt]{haskell} 107 | data Fibonacci = Fibonacci Int 108 | 109 | fibonacci :: Compute Applicative Fibonacci Int 110 | fibonacci get (Fibonacci k) | k <= 1 = Nothing 111 | | otherwise = Just $ (+) <$> get (Fibonacci (k - 1)) 112 | <*> get (Fibonacci (k - 2)) 113 | \end{minted} 114 | 115 | ... 116 | 117 | The \emph{Ackermann function} $A(m, n)$ is defined as follows: 118 | 119 | \[ 120 | A(m, n) = {\begin{cases}~n+1&{\text{for }}m=0\\~A(m-1, 1)&{\text{for }}n=0\\~A(m-1,A(m,n-1))&{\text{otherwise}}\end{cases}} 121 | \vspace{2mm} 122 | \] 123 | \noindent 124 | We can express the computation of the Ackermann function as a monadic compute: 125 | 126 | \begin{minted}[xleftmargin=10pt]{haskell} 127 | data Ackermann = Ackermann Int Int 128 | 129 | ackermann :: Compute Monad Ackermann Int 130 | ackermann get (Ackermann m n) 131 | | m < 0 || n < 0 = Nothing 132 | | m == 0 = Just $ return (n + 1) 133 | | n == 0 = Just $ get (Ackermann (m - 1) 1) 134 | | otherwise = Just $ do 135 | index <- get (Ackermann m (n - 1)) 136 | get (Ackermann (m - 1) index) 137 | \end{minted} -------------------------------------------------------------------------------- /papers/icfp/Dockerfile: -------------------------------------------------------------------------------- 1 | # Docker image prepared for ICFP'18 Artifact Evaluation. 2 | # 3 | # To build the image, run the following command in the directory containing 4 | # this Dockerfile: `docker build -t snowleopard/build .` 5 | # 6 | # To run a container interactively: 7 | # `docker run -it snowleopard/build` 8 | # 9 | FROM fpco/stack-build:lts-11.8 10 | MAINTAINER Andrey Mokhov 11 | RUN wget -O build-systems.zip https://github.com/snowleopard/build/archive/dbbe1322962cab3f523f61b7a6b3be57533cec44.zip 12 | RUN unzip build-systems.zip 13 | WORKDIR /build-dbbe1322962cab3f523f61b7a6b3be57533cec44 14 | RUN stack build && stack test 15 | RUN exit 16 | -------------------------------------------------------------------------------- /papers/icfp/Makefile: -------------------------------------------------------------------------------- 1 | main.pdf: main.tex 1-intro.tex 2-background.tex 3-abstractions.tex 4-build.tex 5-implementations.tex 6-engineering.tex 7-related.tex 8-conclusions.tex 9-appendix.tex 2 | pdflatex -shell-escape main.tex 3 | -------------------------------------------------------------------------------- /papers/icfp/comment.cut: -------------------------------------------------------------------------------- 1 | %% contents suppressed with 'anonymous' 2 | %% Commands \grantsponsor{}{}{} and 3 | %% \grantnum[]{}{} should be used to 4 | %% acknowledge financial support and will be used by metadata 5 | %% extraction tools. 6 | Thanks to anonymous reviewers and everyone else who provided us with feedback 7 | on earlier drafts: Ulf Adams, Arseniy Alekseyev, Dan Bentley, Martin 8 | Br\"{u}stel, Ulan Degenbaev, Jeremie Dimino, Andrew Fitzgibbon, Georgy 9 | Lukyanov, Simon Marlow, Evan Martin, Yaron Minsky, Guillaume Maudoux, Philip 10 | Patsch, Michael Peyton Jones, Andrew Phillips, Fran\c{c}ois Pottier, Rohit 11 | Ramesh, Irakli Safareli, Zhen Zhang. 12 | Your contributions were incredibly valuable. 13 | 14 | Andrey Mokhov is funded by a Royal Society Industry Fellowship \cmd{IF160117} 15 | on the topic ``Towards Cloud Build Systems with Dynamic Dependency Graphs''. 16 | -------------------------------------------------------------------------------- /papers/icfp/fig/bazel-example-build.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snowleopard/build/43b18b9a362d7d27b64679ea4122e4b8c5dfedd9/papers/icfp/fig/bazel-example-build.pdf -------------------------------------------------------------------------------- /papers/icfp/fig/bazel-example-checkout.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snowleopard/build/43b18b9a362d7d27b64679ea4122e4b8c5dfedd9/papers/icfp/fig/bazel-example-checkout.pdf -------------------------------------------------------------------------------- /papers/icfp/fig/bazel-example-checkout.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 19 | 21 | 29 | 35 | 36 | 44 | 50 | 51 | 59 | 65 | 66 | 74 | 80 | 81 | 89 | 95 | 96 | 97 | 119 | 121 | 122 | 124 | image/svg+xml 125 | 127 | 128 | 129 | 130 | 131 | 136 | 145 | util.c 156 | 165 | util.h 176 | 185 | main.c 196 | 202 | 1 213 | 219 | 2 230 | 236 | 3 247 | 248 | 249 | -------------------------------------------------------------------------------- /papers/icfp/fig/bazel-example-rebuild.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snowleopard/build/43b18b9a362d7d27b64679ea4122e4b8c5dfedd9/papers/icfp/fig/bazel-example-rebuild.pdf -------------------------------------------------------------------------------- /papers/icfp/fig/frankenbuild-example-build.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snowleopard/build/43b18b9a362d7d27b64679ea4122e4b8c5dfedd9/papers/icfp/fig/frankenbuild-example-build.pdf -------------------------------------------------------------------------------- /papers/icfp/fig/frankenbuild-example-clean.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snowleopard/build/43b18b9a362d7d27b64679ea4122e4b8c5dfedd9/papers/icfp/fig/frankenbuild-example-clean.pdf -------------------------------------------------------------------------------- /papers/icfp/fig/frankenbuild-example-rebuild.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snowleopard/build/43b18b9a362d7d27b64679ea4122e4b8c5dfedd9/papers/icfp/fig/frankenbuild-example-rebuild.pdf -------------------------------------------------------------------------------- /papers/icfp/fig/make-example-full.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snowleopard/build/43b18b9a362d7d27b64679ea4122e4b8c5dfedd9/papers/icfp/fig/make-example-full.pdf -------------------------------------------------------------------------------- /papers/icfp/fig/make-example-partial.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snowleopard/build/43b18b9a362d7d27b64679ea4122e4b8c5dfedd9/papers/icfp/fig/make-example-partial.pdf -------------------------------------------------------------------------------- /papers/icfp/fig/make-example.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snowleopard/build/43b18b9a362d7d27b64679ea4122e4b8c5dfedd9/papers/icfp/fig/make-example.pdf -------------------------------------------------------------------------------- /papers/icfp/fig/shake-example-cutoff.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snowleopard/build/43b18b9a362d7d27b64679ea4122e4b8c5dfedd9/papers/icfp/fig/shake-example-cutoff.pdf -------------------------------------------------------------------------------- /papers/icfp/fig/shake-example-rebuild.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snowleopard/build/43b18b9a362d7d27b64679ea4122e4b8c5dfedd9/papers/icfp/fig/shake-example-rebuild.pdf -------------------------------------------------------------------------------- /papers/icfp/fig/shake-example.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snowleopard/build/43b18b9a362d7d27b64679ea4122e4b8c5dfedd9/papers/icfp/fig/shake-example.pdf -------------------------------------------------------------------------------- /papers/icfp/final-changes.md: -------------------------------------------------------------------------------- 1 | # An overview of changes in the final version 2 | 3 | To address feedback from the reviewers as well as everyone else who reached out 4 | to us, we have made the following changes: 5 | 6 | * We substantially simplified our models of build systems (Section 5) by 7 | finding a cleaner decomposition into two main components, which are now called a 8 | 'scheduler' and a 'rebuilder'. This allowed us to provide a complete model of 9 | Excel (the submitted version omitted the implementation of the 'restarting' 10 | scheduler), and to include models of CloudBuild, Buck and Nix. 11 | 12 | * The decomposition forced us to introduce a new type `Task`, which describes a 13 | single build task. The type corresponding to the description of all build tasks 14 | is now appropriately called `Tasks` (Section 3). 15 | 16 | * By popular demand we provided a model of Nix, a popular package manager/build 17 | system, which required us to introduce a new type of traces -- 'deep 18 | constructive traces' (Section 4). As a result, Table 2 (Build systems a la 19 | carte) was extended with a new row. 20 | 21 | * We made a lot of effort to improve text and clarity of descriptions. 22 | 23 | -------------------------------------------------------------------------------- /papers/icfp/main.tex: -------------------------------------------------------------------------------- 1 | \documentclass[acmsmall,screen]{acmart}\settopmatter{} 2 | 3 | %%% The following is specific to ICFP'18 and the paper 4 | %%% 'Build Systems à la Carte' 5 | %%% by Andrey Mokhov, Neil Mitchell, and Simon Peyton Jones. 6 | %%% 7 | \setcopyright{rightsretained} 8 | \acmPrice{} 9 | \acmDOI{10.1145/3236774} 10 | \acmYear{2018} 11 | \copyrightyear{2018} 12 | \acmJournal{PACMPL} 13 | \acmVolume{2} 14 | \acmNumber{ICFP} 15 | \acmArticle{79} 16 | \acmMonth{9} 17 | 18 | \bibliographystyle{ACM-Reference-Format} 19 | \citestyle{acmauthoryear} 20 | 21 | \usepackage{bookmark} 22 | \usepackage{booktabs} 23 | \usepackage{subcaption} 24 | \usepackage[utf8]{inputenc} 25 | \usepackage[T1]{fontenc} 26 | \usepackage{xspace} 27 | \usepackage{fancyhdr} 28 | 29 | % Haskell code snippets and useful shortcuts 30 | \usepackage{minted} 31 | \setminted[haskell]{escapeinside=@@} 32 | \newcommand{\hs}{\mintinline{haskell}} 33 | \newcommand{\cmd}[1]{\textsf{\color[rgb]{0,0,0.5} #1}} 34 | \newcommand{\teq}{\smaller $\sim$} 35 | \newcommand{\ghci}{$\lambda$>} 36 | \newcommand{\defeq}{\stackrel{\text{def}}{=}} 37 | \newcommand{\std}[1]{{\color[rgb]{0,0.3,0} #1}} 38 | \newcommand{\blk}[1]{{\color[rgb]{0,0,0} #1}} 39 | 40 | % \renewcommand{\MintedPygmentize}{path-to-pygmentize}% Questions and tasks 41 | \newcommand{\q}[2]{\textbf{\color{blue} Question #1:} #2} 42 | \newcommand{\todo}[2]{[\textbf{\color{red} #1:} #2]} 43 | 44 | % Abbreviations for build systems 45 | \newcommand{\Bazel}{\textsc{Bazel}\xspace} 46 | \newcommand{\Buck}{\textsc{Buck}\xspace} 47 | \newcommand{\Calc}{\textsc{Calc}\xspace} 48 | \newcommand{\Cloud}{\textsc{Cloud}\xspace} 49 | \newcommand{\CloudBuild}{\textsc{CloudBuild}\xspace} 50 | \newcommand{\Dune}{\textsc{Dune}\xspace} 51 | \newcommand{\Excel}{\textsc{Excel}\xspace} 52 | \newcommand{\Fabricate}{\textsc{Fabricate}\xspace} 53 | \newcommand{\Incremental}{\textsc{Incremental}\xspace} 54 | \newcommand{\Make}{\textsc{Make}\xspace} 55 | \newcommand{\Ninja}{\textsc{Ninja}\xspace} 56 | \newcommand{\Nix}{\textsc{Nix}\xspace} 57 | \newcommand{\Pluto}{\textsc{Pluto}\xspace} 58 | \newcommand{\Redo}{\textsc{Redo}\xspace} 59 | \newcommand{\Reflow}{\textsc{Reflow}\xspace} 60 | \newcommand{\Shake}{\textsc{Shake}\xspace} 61 | \newcommand{\Tup}{\textsc{Tup}\xspace} 62 | \newcommand{\store}{\hs{k}~\hs{->}~\hs{v}\xspace} 63 | \newcommand{\storef}{\hs{k}~\hs{->}~\hs{f}~\hs{v}\xspace} 64 | 65 | % \newcommand{\simon}[1]{} 66 | % \newcommand{\simon}[1]{SLPJ: {\color{red} \em #1} End SLPJ} 67 | 68 | \begin{document} 69 | \title[Build Systems \`a la Carte]{Build Systems \`a la Carte} 70 | 71 | %% An author may have multiple affiliations and/or emails; repeat the 72 | %% appropriate command. 73 | %% Many elements are not rendered, but should be provided for metadata 74 | %% extraction tools. 75 | \author{Andrey Mokhov} 76 | \affiliation{ 77 | \department{School of Engineering} 78 | \institution{Newcastle University} 79 | \city{Newcastle upon Tyne} 80 | \country{United Kingdom} 81 | } 82 | \email{andrey.mokhov@ncl.ac.uk} 83 | 84 | \author{Neil Mitchell} 85 | \affiliation{ 86 | \institution{Digital Asset} 87 | \country{United Kingdom} 88 | } 89 | \email{ndmitchell@gmail.com} 90 | 91 | \author{Simon Peyton Jones} 92 | \affiliation{ 93 | \institution{Microsoft Research} 94 | \city{Cambridge} 95 | \country{United Kingdom} 96 | } 97 | \email{simonpj@microsoft.com} 98 | 99 | \begin{abstract} 100 | Build systems are awesome, terrifying -- and unloved. They are used by every 101 | developer around the world, but are rarely the object of study. In this paper we 102 | offer a systematic, and executable, framework for developing and comparing build 103 | systems, viewing them as related points in landscape rather than as isolated 104 | phenomena. By teasing apart existing build systems, we can recombine their 105 | components, allowing us to prototype new build systems with desired properties. 106 | \end{abstract} 107 | 108 | %% 2012 ACM Computing Classification System (CSS) concepts 109 | %% Generate at 'http://dl.acm.org/ccs/ccs.cfm'. 110 | \begin{CCSXML} 111 | 112 | 113 | 10011007 114 | Software and its engineering 115 | 500 116 | 117 | 118 | 10002950 119 | Mathematics of computing 120 | 300 121 | 122 | 123 | \end{CCSXML} 124 | \ccsdesc[500]{Software and its engineering} 125 | \ccsdesc[300]{Mathematics of computing} 126 | %% End of generated code 127 | \keywords{build systems, functional programming, algorithms} 128 | 129 | \maketitle 130 | 131 | \input{1-intro} 132 | \input{2-background} 133 | \input{3-abstractions} 134 | \input{4-build} 135 | \input{5-implementations} 136 | \input{6-engineering} 137 | \input{7-related} 138 | \input{8-conclusions} 139 | 140 | %% Acknowledgements 141 | \begin{acks} 142 | %% contents suppressed with 'anonymous' 143 | %% Commands \grantsponsor{}{}{} and 144 | %% \grantnum[]{}{} should be used to 145 | %% acknowledge financial support and will be used by metadata 146 | %% extraction tools. 147 | Thanks to anonymous reviewers and everyone else who provided us with feedback 148 | on earlier drafts: Ulf Adams, Arseniy Alekseyev, Dan Bentley, Martin 149 | Br\"{u}stel, Ulan Degenbaev, Jeremie Dimino, Andrew Fitzgibbon, Georgy 150 | Lukyanov, Simon Marlow, Evan Martin, Yaron Minsky, Guillaume Maudoux, Philip 151 | Patsch, Michael Peyton Jones, Andrew Phillips, Fran\c{c}ois Pottier, Rohit 152 | Ramesh, Irakli Safareli, Zhen Zhang. 153 | Your contributions were incredibly valuable. 154 | 155 | Andrey Mokhov is funded by a Royal Society Industry Fellowship \cmd{IF160117} 156 | on the topic ``Towards Cloud Build Systems with Dynamic Dependency Graphs''. 157 | \end{acks} 158 | 159 | \bibliography{refs} 160 | 161 | % \appendix 162 | % \input{9-appendix} 163 | 164 | \end{document} 165 | -------------------------------------------------------------------------------- /papers/icfp/notes.md: -------------------------------------------------------------------------------- 1 | # Notes and feedback 2 | 3 | ## Meeting with Simon Marlow 4 | 5 | * Make: 6 | 7 | - Expand the description of the workaround used to deal with the lack of 8 | early cutoff. Explain why we chose not to model this (easier to just 9 | implement early cutoff). 10 | 11 | * Buck: 12 | 13 | - Has `contents :: Map RuleKey v` instead of `Map (Hash v) v` where `RuleKey` 14 | is the hash of all inputs of a particular key. This relies on determinism: 15 | inputs fully determine the build outcome. If this assumption is violated, 16 | the result can be a frankenbuild (i.e. the executable will segfault). 17 | - Has no early cutoff. Impossible to support early cutoff using this approach. 18 | - Relies on applicative tasks to compute `RuleKey` from a key. How do we 19 | adapt this to dynamic dependencies? 20 | 21 | ## Graydon Hoare 22 | 23 | Nice paper! I suspect it's worth citing a couple steps upstream in redo's 24 | lineage (Dan Bernstein, Alan Grosskurth). Might also be worth putting fbuild 25 | (which works similarly) in the comparison: https://github.com/felix-lang/fbuild 26 | 27 | ## Yaron Minsky 28 | 29 | You might want to think about how something like this fits into your worldview: 30 | https://github.com/grailbio/reflow 31 | This is a build system for scientific workflows, courtesy of @marius. 32 | I'm not sure it's really anything other than a cloud build system. Is it any 33 | more distributed than Bazel? Also, from what I remember from chatting with 34 | @marius about this, one difference in Reflow is the support of streaming, which 35 | no other build system I know about supports. 36 | 37 | (Response by @marius: I tend to think of it as somewhere between a build system 38 | and a data processing system. Because it needs to deal with large data sizes 39 | (often tens of terabytes) and long running computation, things like data 40 | locality become a big concern. Another big difference is that it admits dynamic 41 | graphs, so can express arbitrary computation e.g., you can express mapreduce and 42 | flume/spark-like computation (albeit inefficiently) in Reflow. I think another 43 | big difference between Reflow and traditional build systems is that it provides 44 | a front end language that has lazy evaluation semantics. You write a program, 45 | unreachable computation is implicitly pruned. There are no explicitly 46 | declarations of dependencies.) 47 | 48 | Also, if Excel is a build system, are self-adjusting computation libraries also 49 | build systems? 50 | 51 | I tend to think that SAC is about pure computation, and build systems have a 52 | deadly embrace with external programs that you need to run as part of the 53 | computation. It's this deadly embrace with a complex effectful system is what I 54 | think of as the distinguishing factor. 55 | 56 | Not a build system, but FrTime it's an incremental programming language where 57 | the students are implicit rather than explicit. So there's some relation there. 58 | 59 | I think my main suggestion for improvement for the paper is a fuller treatment 60 | of SAC style approaches. If you're including Excel as a build system, it's hard 61 | to see why SAC isn't fully in scope. 62 | 63 | Another thought: another intellectual frame for what you're discussing (other 64 | than build systems) is incremental computation. That's a much wider world, and 65 | it would be good to have a clearer intellectual basis for understanding just 66 | what is the paper's narrower scope. 67 | 68 | Some of the issues you exclude as "pragmatics" are really where I would hang the 69 | fundamental distinction. My gut is that these pragmatics are at the crux of the 70 | distinction, and are depend questions than you suspect. 71 | 72 | ## Dan Bentley (worked on Google Sheets's calculation engine) 73 | ‏ 74 | I'll be honest, it irked me in describing Excel as a build system. I worked on 75 | (but didn't write!) Google Sheets's calculation engine, and there's a lot not 76 | covered. E.g., inserting a column can't rewrite every cell in the store. 77 | 78 | Happy to chat more. I was surprised how much quad trees mattered for sheets 79 | computation engine. 80 | 81 | Dependencies are often over a contiguous range of cells. e.g. =SUM(A1:B23). 82 | And there are enough they start getting memory intensive. So store them as 83 | ranges, not independent deps per cell. Also helps with the insert-a-row case. 84 | 85 | ## Arseniy Alekseyev 86 | 87 | Nix is a cloud build system with monadic dependencies. 88 | 89 | ## Ulan Degenbaev 90 | 91 | How do I do configuration in this framework? Real-life example from Chromium: 92 | 93 | https://cs.chromium.org/chromium/src/build/config/v8_target_cpu.gni?rcl=c8f117ce2885070675675564dc39be7e92c6853d&l=38 94 | 95 | Andrey's response: What about the following? Here Bool is a configuration type. 96 | 97 | staticIF :: Bool -> Task Applicative String Int 98 | staticIF b fetch "B1" = Just $ if b then fetch "A1" 99 | else (+) <$> fetch "A2" <*> fetch "A3" 100 | staticIF _ _ _ = Nothing 101 | 102 | # Questions 103 | 104 | * What about Turing-completeness? Shall the build tasks be Turing-complete? 105 | 106 | # Recursion conjecture 107 | 108 | Compute Functor = tail recursion 109 | Compute Applicative = primitive recursion 110 | Compute Monad = general recursion 111 | 112 | # ApplicativeZero and MonadZero 113 | 114 | These allow to introduce only failures into build systems. 115 | 116 | As an interesting build system feature, one might have a build system that 117 | performs a retry when a compute fails -- analogous to travis_retry. I found 118 | an example of similar functionality in Bazel: 119 | 120 | --flaky_test_attempts= multiple uses are 122 | accumulated 123 | 124 | Each test will be retried up to the specified number of times in case of any 125 | test failure. Tests that required more than one attempt to pass would be marked 126 | as 'FLAKY' in the test summary. If this option is set, it should specify an int 127 | N or the string 'default'. If it's an int, then all tests will be run up to N 128 | times. If it is not specified or its value is ' default', then only a single 129 | test attempt will be made for regular tests and three for tests marked 130 | explicitly as flaky by their rule (flaky=1 attribute). 131 | 132 | See https://docs.bazel.build/versions/master/command-line-reference.html 133 | 134 | # MonadZero, MonadPlus, MonadOr 135 | 136 | Zero -- for failure. 137 | Or -- for choosing the first success. 138 | Plus -- for picking any success, non-deterministically? 139 | 140 | See https://wiki.haskell.org/MonadPlus_reform_proposal 141 | 142 | Alternative seems to be similar to MonadOr in that it chooses the first one? 143 | 144 | # Memoization and self-adjusting computation 145 | 146 | "More specifically, we show an interesting duality between memoization and 147 | change propagation: memoization performs poorly with deep changes (where 148 | change propagation performs well) and performs well with shallow changes 149 | (where change propagation performs poorly)." 150 | 151 | From "A consistent semantics of self-adjusting computation" by Umut Acar et al. 152 | -------------------------------------------------------------------------------- /papers/icfp/refs.bib: -------------------------------------------------------------------------------- 1 | @article{feldman1979make, 2 | title={Make—A program for maintaining computer programs}, 3 | author={Feldman, Stuart I}, 4 | journal={Software: Practice and experience}, 5 | volume={9}, 6 | number={4}, 7 | pages={255--265}, 8 | year={1979}, 9 | publisher={Wiley Online Library} 10 | } 11 | 12 | @inproceedings{mitchell2012shake, 13 | title={Shake before building: Replacing {Make} with {Haskell}}, 14 | author={Mitchell, Neil}, 15 | booktitle={ACM SIGPLAN Notices}, 16 | volume={47}, 17 | number={9}, 18 | pages={55--66}, 19 | year={2012}, 20 | organization={ACM} 21 | } 22 | 23 | @unpublished{bazel, 24 | title = {Bazel}, 25 | author = {Google}, 26 | year = {2016}, 27 | note = {\url{http://bazel.io/}} 28 | } 29 | 30 | @book{advanced_excel, 31 | title={Advanced Excel for Scientific Data Analysis}, 32 | author={De Levie, R.}, 33 | isbn={9780195152753}, 34 | lccn={2003053590}, 35 | series={Advanced Excel for Scientific Data Analysis}, 36 | year={2004}, 37 | publisher={Oxford University Press} 38 | } 39 | 40 | @unpublished{excel_recalc, 41 | title = {Excel Recalculation (MSDN documentation)}, 42 | author = {Microsoft}, 43 | year = {2011}, 44 | note = {\url{https://msdn.microsoft.com/en-us/library/office/bb687891.aspx}. 45 | Also available in Internet Archive 46 | \url{https://web.archive.org/web/20180308150857/https://msdn.microsoft.com/en-us/library/office/bb687891.aspx}} 47 | } 48 | 49 | @inproceedings{hadrian, 50 | author = {Mokhov, Andrey and Mitchell, Neil and Peyton Jones, Simon and Marlow, Simon}, 51 | title = {{Non-recursive Make Considered Harmful: Build Systems at Scale}}, 52 | booktitle = {Proceedings of the 9th International Symposium on Haskell}, 53 | series = {Haskell 2016}, 54 | year = {2016}, 55 | pages = {170--181}, 56 | publisher = {ACM}, 57 | } 58 | 59 | @inproceedings{demers1981incremental, 60 | author = {Demers, Alan and Reps, Thomas and Teitelbaum, Tim}, 61 | title = {Incremental Evaluation for Attribute Grammars with Application to Syntax-directed Editors}, 62 | booktitle = {Proceedings of the 8th ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages (POPL)}, 63 | year = {1981}, 64 | pages = {105--116}, 65 | publisher = {ACM} 66 | } 67 | 68 | @inproceedings{acar2002adaptive, 69 | author = {Acar, Umut A. and Blelloch, Guy E. and Harper, Robert}, 70 | title = {Adaptive Functional Programming}, 71 | booktitle = {Proceedings of the 29th ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages (POPL)}, 72 | year = {2002}, 73 | pages = {247--259}, 74 | publisher = {ACM} 75 | } 76 | 77 | @inproceedings{acar2007selfadjusting, 78 | title={A consistent semantics of self-adjusting computation}, 79 | author={Acar, Umut A and Blume, Matthias and Donham, Jacob}, 80 | booktitle={European Symposium on Programming}, 81 | pages={458--474}, 82 | year={2007}, 83 | organization={Springer} 84 | } 85 | 86 | @unpublished{gvfs, 87 | title = {{Git Virtual File System}}, 88 | author = {Microsoft}, 89 | year = {2017}, 90 | note = {\url{https://www.gvfs.io/}} 91 | } 92 | 93 | @article{mcbride2008applicative, 94 | title={Applicative programming with effects}, 95 | author={McBride, Conor and Paterson, Ross}, 96 | journal={Journal of Functional Programming}, 97 | volume={18}, 98 | number={1}, 99 | pages={1--13}, 100 | year={2008}, 101 | publisher={Cambridge University Press} 102 | } 103 | 104 | @inproceedings{free-applicatives, 105 | author = {Paolo Capriotti and Ambrus Kaposi}, 106 | title = {Free applicative functors}, 107 | year = {2014}, 108 | journal = {Proceedings 5th Workshop on Mathematically Structured Functional Programming}, 109 | volume = {153}, 110 | pages = {2-30}, 111 | publisher = {Open Publishing Association} 112 | } 113 | 114 | @article{jones2007practical, 115 | title={Practical type inference for arbitrary-rank types}, 116 | author={Peyton~Jones, Simon and Vytiniotis, Dimitrios and Weirich, Stephanie and Shields, Mark}, 117 | journal={Journal of Functional Programming}, 118 | volume={17}, 119 | number={1}, 120 | pages={1--82}, 121 | year={2007}, 122 | publisher={Cambridge University Press} 123 | } 124 | 125 | @inproceedings{marlow2014haxl, 126 | title={There is no fork: An abstraction for efficient, concurrent, and concise data access}, 127 | author={Marlow, Simon and Brandy, Louis and Coens, Jonathan and Purdy, Jon}, 128 | booktitle={ACM SIGPLAN Notices}, 129 | volume={49}, 130 | number={9}, 131 | pages={325--337}, 132 | year={2014}, 133 | organization={ACM} 134 | } 135 | 136 | @inproceedings{esfahani2016cloudbuild, 137 | title={CloudBuild: Microsoft's distributed and caching build service}, 138 | author={Esfahani, Hamed and Fietz, Jonas and Ke, Qi and Kolomiets, Alexei and Lan, Erica and Mavrinac, Erik and Schulte, Wolfram and Sanches, Newton and Kandula, Srikanth}, 139 | booktitle={Proceedings of the 38th International Conference on Software Engineering Companion}, 140 | pages={11--20}, 141 | year={2016}, 142 | organization={ACM} 143 | } 144 | 145 | @article{erdweg2015pluto, 146 | title={A sound and optimal incremental build system with dynamic dependencies}, 147 | author={Erdweg, Sebastian and Lichter, Moritz and Weiel, Manuel}, 148 | journal={ACM SIGPLAN Notices}, 149 | volume={50}, 150 | number={10}, 151 | pages={89--106}, 152 | year={2015}, 153 | publisher={ACM} 154 | } 155 | 156 | @unpublished{shake-fixed-point, 157 | title = {{How to write fixed point build rules in Shake}}, 158 | author = {Neil Mitchell}, 159 | year = {2013}, 160 | note = {\url{https://stackoverflow.com/questions/14622169/how-to-write-fixed-point-build-rules-in-shake-e-g-latex}} 161 | } 162 | 163 | @unpublished{ninja, 164 | title = {{Ninja build system homepage}}, 165 | author = {Evan Martin}, 166 | year = {2017}, 167 | note = {\url{https://ninja-build.org/}} 168 | } 169 | 170 | @unpublished{tup, 171 | title = {{Build System Rules and Algorithms}}, 172 | author = {Mike Shal}, 173 | year = {2009}, 174 | note = {\url{http://gittup.org/tup/build_system_rules_and_algorithms.pdf/}} 175 | } 176 | 177 | @unpublished{redo-idea, 178 | title = {Rebuilding target files when source files have changed}, 179 | author = {Daniel J. Bernstein}, 180 | year = {2003}, 181 | note = {\url{http://cr.yp.to/redo.html}} 182 | } 183 | 184 | @unpublished{docker, 185 | title = {Docker container: A standardized unit of software}, 186 | author = {Solomon Hykes}, 187 | year = {2013}, 188 | note = {\url{https://www.docker.com/what-container}} 189 | } 190 | 191 | @mastersthesis{grosskurth2007redo, 192 | title={Purely top-down software rebuilding}, 193 | author={Grosskurth, Alan}, 194 | year={2007}, 195 | school={University of Waterloo} 196 | } 197 | 198 | @unpublished{redo, 199 | title = {{redo: a top-down software build system}}, 200 | author = {Avery Pennarun}, 201 | year = {2012}, 202 | note = {\url{https://github.com/apenwarr/redo}} 203 | } 204 | 205 | @unpublished{buck, 206 | title = {{Buck: A high-performance build tool}}, 207 | author = {Facebook}, 208 | year = {2013}, 209 | note = {\url{https://buckbuild.com/}} 210 | } 211 | 212 | @unpublished{dune, 213 | title = {{Dune: A composable build system}}, 214 | author = {{Jane~Street}}, 215 | year = {2018}, 216 | note = {\url{https://github.com/ocaml/dune}} 217 | } 218 | 219 | @article{hughes2000generalising, 220 | title={Generalising monads to arrows}, 221 | author={Hughes, John}, 222 | journal={Science of computer programming}, 223 | volume={37}, 224 | number={1-3}, 225 | pages={67--111}, 226 | year={2000}, 227 | publisher={Elsevier} 228 | } 229 | 230 | @unpublished{reflow, 231 | title = {Reflow: A system for incremental data processing in the cloud}, 232 | author = {GRAIL}, 233 | year = {2017}, 234 | note = {\url{https://github.com/grailbio/reflow}} 235 | } 236 | 237 | @unpublished{incremental, 238 | title = {Incremental: A library for incremental computations}, 239 | author = {{Jane~Street}}, 240 | year = {2015}, 241 | note = {\url{https://github.com/janestreet/incremental}} 242 | } 243 | 244 | @unpublished{pottier2009lazy, 245 | title={Lazy least fixed points in ML}, 246 | author={Pottier, Fran{\c{c}}ois}, 247 | year={2009}, 248 | note = {\url{http://gallium.inria.fr/~fpottier/publis/fpottier-fix.pdf}} 249 | } 250 | 251 | @phdthesis{radul2009propagation, 252 | title={Propagation Networks: A Flexible and Expressive Substrate for Computation}, 253 | author={Radul, Alexey}, 254 | year={2009}, 255 | school={MIT} 256 | } 257 | 258 | @unpublished{fabricate, 259 | title = {{Fabricate: The better build tool}}, 260 | author = {Berwyn Hoyt and Bryan Hoyt and Ben Hoyt}, 261 | year = {2009}, 262 | note = {\url{https://github.com/SimonAlfie/fabricate}} 263 | } 264 | 265 | @inproceedings{dolstra2004nix, 266 | title={Nix: A Safe and Policy-Free System for Software Deployment}, 267 | author={Dolstra, Eelco and De Jonge, Merijn and Visser, Eelco and others}, 268 | booktitle={LISA}, 269 | volume={4}, 270 | pages={79--92}, 271 | year={2004} 272 | } 273 | 274 | @article{pickering2017profunctor, 275 | title={Profunctor Optics: Modular Data Accessors}, 276 | author={Pickering, Matthew and Gibbons, Jeremy and Wu, Nicolas}, 277 | journal={The Art, Science, and Engineering of Programming}, 278 | volume={1}, 279 | issue={2}, 280 | year={2017} 281 | } 282 | 283 | @book{cormen2001introduction, 284 | title={Introduction To Algorithms}, 285 | author={Cormen, T.H. and Leiserson, C.E. and Rivest, R.L. and Stein, C.}, 286 | isbn={9780262032933}, 287 | year={2001}, 288 | publisher={MIT Press} 289 | } 290 | 291 | @inproceedings{liang1995monad, 292 | title={Monad transformers and modular interpreters}, 293 | author={Liang, Sheng and Hudak, Paul and Jones, Mark}, 294 | booktitle={Proceedings of the 22nd ACM SIGPLAN-SIGACT symposium on Principles of programming languages}, 295 | pages={333--343}, 296 | year={1995}, 297 | organization={ACM} 298 | } 299 | 300 | @inproceedings{levenshtein1966binary, 301 | title={Binary codes capable of correcting deletions, insertions, and reversals}, 302 | author={Levenshtein, Vladimir I}, 303 | booktitle={Soviet physics doklady}, 304 | volume={10}, 305 | number={8}, 306 | pages={707--710}, 307 | year={1966} 308 | } 309 | 310 | @inproceedings{build_maintenance, 311 | author = {Shane McIntosh and Bram Adams and Thanh Nguyen and Yasutaka Kamei and Ahmed Hassan}, 312 | title = {An empirical study of build maintenance effort}, 313 | booktitle = {Proc. ICSE '11}, 314 | year = {2011}, 315 | pages = {141--150}, 316 | publisher = {ACM Press} 317 | } 318 | 319 | @article{claessen_continuations, 320 | author={Koen Claessen}, 321 | title={A poor man's concurrency monad}, 322 | journal={Journal of Functional Programming}, 323 | volume={9}, 324 | number={3}, 325 | pages={313–-323}, 326 | year={1999}, 327 | publisher={Cambridge University Press} 328 | } 329 | -------------------------------------------------------------------------------- /papers/icfp/response.md: -------------------------------------------------------------------------------- 1 | We thank the reviewers for their comments and suggestions. 2 | 3 | # Key points 4 | 5 | > **B:** The emphasis of the minimality property over correctness seems somewhat 6 | off. 7 | 8 | We agree that correctness is more important, but respectfully *disagree* that 9 | our emphasis is on minimality. On the contrary, a larger part of the paper is 10 | dedicated to the precise formulation of build system correctness (S3.6), 11 | including non-deterministic (S6.3) and shallow build (S6.4) cases. 12 | 13 | > **B:** The property most build system authors chase after is that an 14 | incremental build should be equivalent to a clean build, up to non-determinism 15 | 16 | Yes indeed. Our definition of correctness is stronger than "equivalent to a 17 | clean build", because it does not rely on some earlier notion of a "clean build". 18 | Presumably a "clean build" means a build in which all inputs are treated as 19 | out-of-date; and *also* that the results of that build are "correct". But what 20 | does "correct" mean? Our definition is self-contained. 21 | 22 | > **B:** How do these design decisions affect the developer experience? 23 | 24 | > **C:** the Build abstraction itself is trivial and therefore does not help 25 | us understand some interesting aspects 26 | 27 | Our abstractions are simple, but not *too* simple. They allow us to identify and 28 | study a few key (but subtle) features, while carefully abstracting away from the 29 | details (such as exploiting parallelism, caching, and so on). But for the 30 | developer experience, those engineering details are super-important, and they 31 | absorb 90%+ of the code that implements a build system. So while our 32 | abstractions allow us to study, classify and compare build systems, they 33 | abstract away too much to say much about the developer experience. 34 | 35 | # Details 36 | 37 | > **A:** I can't see how the build system could distinguish this 38 | > [static] dependency from those arising from parsing `release.txt`. 39 | 40 | Indeed, Shake does not distinguish static dependencies from dynamic: it treats 41 | them uniformly as if they all were dynamic. 42 | 43 | > **A:** Presumably you could call `dependencies` on monadic tasks? 44 | 45 | No. Dependencies of a monadic task cannot be determined without providing actual 46 | values; see the example in lines 576-585. 47 | 48 | > **B:** reproducible/deterministic builds 49 | 50 | A strength of our abstractions is that they allow us to discuss the trade-offs 51 | involved in requiring full determinism. We will add such a discussion in our 52 | revision. 53 | 54 | > **B**: npm, yarn, cabal, and cargo, etc? 55 | 56 | Packaging systems bring in a new collection of considerations (such as 57 | constraint-solving to find consistent package sets) which are beyond our scope 58 | here. Perhaps there is another similar paper to be written about packaging systems! 59 | 60 | > **B:** ML Compilation Manager? 61 | 62 | Many languages have built-in build systems, including ML (as you mention), 63 | Haskell's `ghc --make`, and OCamlMake. In the interest of space and breadth of 64 | impact, we chose to focus on language-independent build systems, rather than 65 | language-specific ones. 66 | 67 | > **C:** In the present form, the code of each build system is quite different 68 | from each other 69 | 70 | In fact, our framework does provide reusable 'schedulers' (`topological`, 71 | `recursive`, etc.) and defines build systems using them. One can also factor out 72 | 'rebuilders', e.g. based on verifying/constructive traces. We will address this 73 | in the revision. 74 | 75 | The following didn't fit the limit: 76 | ----------------------------------- 77 | > **A:** how does the file system metadata fit into this model? Is it persistent 78 | > build information? 79 | 80 | We consider file system metadata to be a part of persistent build information. 81 | One could refine the model by introducing a separate notion for the file system 82 | metadata, but we decided against this, since it makes the model more complex. 83 | -------------------------------------------------------------------------------- /papers/jfp/.gitignore: -------------------------------------------------------------------------------- 1 | ## Core latex/pdflatex auxiliary files: 2 | *.aux 3 | *.lof 4 | *.log 5 | *.lot 6 | *.fls 7 | *.out 8 | *.toc 9 | *.fmt 10 | *.fot 11 | *.cb 12 | *.cb2 13 | 14 | ## Intermediate documents: 15 | *.dvi 16 | *-converted-to.* 17 | # these rules might exclude image files for figures etc. 18 | # *.ps 19 | # *.eps 20 | main.pdf 21 | jfp2egui.pdf 22 | 23 | ## Bibliography auxiliary files (bibtex/biblatex/biber): 24 | *.bbl 25 | *.bcf 26 | *.blg 27 | *-blx.aux 28 | *-blx.bib 29 | *.brf 30 | *.run.xml 31 | 32 | ## Build tool auxiliary files: 33 | *.fdb_latexmk 34 | *.synctex 35 | *.synctex.gz 36 | *.synctex.gz(busy) 37 | *.pdfsync 38 | 39 | ## Auxiliary and intermediate files from other packages: 40 | # algorithms 41 | *.alg 42 | *.loa 43 | 44 | # achemso 45 | acs-*.bib 46 | 47 | # amsthm 48 | *.thm 49 | 50 | # beamer 51 | *.nav 52 | *.snm 53 | *.vrb 54 | 55 | # cprotect 56 | *.cpt 57 | 58 | # fixme 59 | *.lox 60 | 61 | #(r)(e)ledmac/(r)(e)ledpar 62 | *.end 63 | *.?end 64 | *.[1-9] 65 | *.[1-9][0-9] 66 | *.[1-9][0-9][0-9] 67 | *.[1-9]R 68 | *.[1-9][0-9]R 69 | *.[1-9][0-9][0-9]R 70 | *.eledsec[1-9] 71 | *.eledsec[1-9]R 72 | *.eledsec[1-9][0-9] 73 | *.eledsec[1-9][0-9]R 74 | *.eledsec[1-9][0-9][0-9] 75 | *.eledsec[1-9][0-9][0-9]R 76 | 77 | # glossaries 78 | *.acn 79 | *.acr 80 | *.glg 81 | *.glo 82 | *.gls 83 | *.glsdefs 84 | 85 | # gnuplottex 86 | *-gnuplottex-* 87 | 88 | # hyperref 89 | *.brf 90 | 91 | # knitr 92 | *-concordance.tex 93 | # TODO Comment the next line if you want to keep your tikz graphics files 94 | *.tikz 95 | *-tikzDictionary 96 | 97 | # listings 98 | *.lol 99 | 100 | # makeidx 101 | *.idx 102 | *.ilg 103 | *.ind 104 | *.ist 105 | 106 | # minitoc 107 | *.maf 108 | *.mlf 109 | *.mlt 110 | *.mtc 111 | *.mtc[0-9] 112 | *.mtc[1-9][0-9] 113 | 114 | # minted 115 | _minted* 116 | *.pyg 117 | 118 | # morewrites 119 | *.mw 120 | 121 | # mylatexformat 122 | *.fmt 123 | 124 | # nomencl 125 | *.nlo 126 | 127 | # sagetex 128 | *.sagetex.sage 129 | *.sagetex.py 130 | *.sagetex.scmd 131 | 132 | # sympy 133 | *.sout 134 | *.sympy 135 | sympy-plots-for-*.tex/ 136 | 137 | # pdfcomment 138 | *.upa 139 | *.upb 140 | 141 | # pythontex 142 | *.pytxcode 143 | pythontex-files-*/ 144 | 145 | # thmtools 146 | *.loe 147 | 148 | # TikZ & PGF 149 | *.dpth 150 | *.md5 151 | *.auxlock 152 | 153 | # todonotes 154 | *.tdo 155 | 156 | # xindy 157 | *.xdy 158 | 159 | # xypic precompiled matrices 160 | *.xyc 161 | 162 | # endfloat 163 | *.ttt 164 | *.fff 165 | 166 | # Latexian 167 | TSWLatexianTemp* 168 | 169 | ## Editors: 170 | # WinEdt 171 | *.bak 172 | *.sav 173 | 174 | # Texpad 175 | .texpadtmp 176 | 177 | # Kile 178 | *.backup 179 | 180 | # KBibTeX 181 | *~[0-9]* 182 | -------------------------------------------------------------------------------- /papers/jfp/.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "latex-workshop.latex.recipes": [ 3 | { 4 | "name": "builder", 5 | "tools": [ 6 | "texify" 7 | ] 8 | }, 9 | ], 10 | "latex-workshop.latex.tools": [ 11 | { 12 | "name": "texify", 13 | "command": "texify", 14 | "args": [ 15 | "--synctex", 16 | "--pdf", 17 | "--tex-option=\"-shell-escape\"", 18 | "--tex-option=\"-interaction=nonstopmode\"", 19 | "--tex-option=\"-file-line-error\"", 20 | "%DOC%.tex" 21 | ] 22 | } 23 | ], 24 | "cSpell.words": [ 25 | "Elem", 26 | "Ericson", 27 | "Hashable", 28 | "PACMPL", 29 | "SIGPLAN", 30 | "acmart", 31 | "bazel", 32 | "etre", 33 | "fmap", 34 | "mempty", 35 | "newtype", 36 | "poss", 37 | "racey", 38 | "sigplanproc", 39 | "sprsh", 40 | "topmatter", 41 | "uncurry" 42 | ] 43 | } 44 | -------------------------------------------------------------------------------- /papers/jfp/1-intro.tex: -------------------------------------------------------------------------------- 1 | \section{Introduction}\label{sec-intro} 2 | 3 | Build systems (such as \Make) are big, complicated, and used by every 4 | software developer on the planet. But they are a sadly unloved part 5 | of the software ecosystem, very much a means to an end, and seldom the 6 | focus of attention. 7 | % Rarely do people ask questions like ``What does it mean for my build 8 | % system to be correct?'' or ``What are the trade-offs between different 9 | % approaches?''. 10 | For years \Make dominated, but more recently the challenges of scale have driven 11 | large software firms like Microsoft, Facebook and Google to develop their own 12 | build systems, exploring new points in the design space. These complex build 13 | systems use subtle algorithms, but they are often hidden away, and not the 14 | object of study. 15 | 16 | In this paper we give a general framework in which to understand and compare 17 | build systems, in a way that is both abstract (omitting incidental detail) and 18 | yet precise (implemented as Haskell code). Specifically we make these 19 | contributions: 20 | 21 | \begin{itemize} 22 | \item Build systems vary on many axes, including: static vs dynamic 23 | dependencies; local vs cloud; deterministic vs non-deterministic build tasks; 24 | early cutoff; self-tracking build systems; and the type of persistently stored 25 | build information. In~\S\ref{sec-background} we identify some of these key 26 | properties, illustrated by four carefully-chosen build systems. 27 | 28 | \item We describe some simple but novel abstractions that 29 | crisply encapsulate what a build system is (\S\ref{sec-abstractions}), 30 | allowing us, for example, to speak about what it means for a build system to 31 | be correct. 32 | 33 | \item We identify two key design choices that are typically deeply wired into 34 | any build system: \emph{the order in which tasks are 35 | built}~(\S\ref{sec-scheduler}) and \emph{whether or not a 36 | task is rebuilt}~(\S\ref{sec-rebuilder}). These choices turn out to 37 | be orthogonal, which leads us to a new classification of the design 38 | space~(\S\ref{sec-design-space}). 39 | 40 | \item We show that we can instantiate our abstractions to describe the essence 41 | of a variety of different real-life build systems, including \Make, \Shake, 42 | \Bazel, \Buck, \Nix, and \Excel\footnote{\Excel appears very different to the 43 | others but, seen through the lens of this paper, it is very close.}, each by the 44 | composition of the two design choices~(\S\ref{sec-implementations}). Doing this 45 | modelling in a single setting allows the differences and similarities between 46 | these huge systems to be brought out clearly\footnote{All our models are 47 | executable and are available on Hackage as \cmd{build-1.0}.}. 48 | 49 | \item Moreover, we can readily remix the ingredients to design new build systems 50 | with desired properties, for example, to combine the advantages of \Shake and 51 | \Bazel. Writing this paper gave us the insights to combine dynamic dependencies 52 | and cloud build systems in a principled way; we evaluate the result 53 | in~\S\ref{sec-experience}. 54 | 55 | \item We can use the presented abstractions to more clearly explain details from 56 | the original \Shake paper~(\S\ref{sec-step-traces}, \S\ref{sec-experience-shake}) 57 | and develop new cloud build features, which are already in use in industry and in 58 | the GHC build system~(\S\S\ref{sec-using-cloud-shake}-\ref{sec-hadrian}). 59 | 60 | \end{itemize} 61 | 62 | In short, instead of seeing build systems as unrelated points in space, we now 63 | see them as locations in a connected landscape, leading to a better 64 | understanding of what they do and how they compare, and making it easier to 65 | explore other points in the landscape. While we steer clear of many engineering 66 | aspects of real build systems, in~\S\ref{sec-engineering} we discuss these 67 | aspects in the context of the presented abstractions. The related work is covered 68 | in~\S\ref{sec-related}. 69 | 70 | This paper is an extended version of an earlier conference 71 | paper~\cite{mokhov2018buildsystems}. The key changes compared to the earlier 72 | version are: (i) we added further clarifications and examples 73 | to~\S\ref{sec-abstractions}, in particular,~\S\ref{sec-task-examples} is 74 | entirely new; (ii)~\S\ref{sec-scheduler} and~\S\ref{sec-rebuilder} are based on 75 | the material from the conference paper but have been substantially expanded to 76 | include further details and examples, as well as completely new material such 77 | as~\S\ref{sec-step-traces}; (iii)~\S\ref{sec-experience} is completely new; (iv) 78 | \S\ref{sec-failures} and \S\S\ref{sec-polymorphism}-\ref{sec-file-watching} 79 | are almost entirely new, and \S\ref{sec-non-determinism} has been revised. 80 | The new material focuses on our experience and various important practical 81 | considerations, hence justifying the ``and Practice'' part of the paper title. 82 | -------------------------------------------------------------------------------- /papers/jfp/10-conclusions.tex: -------------------------------------------------------------------------------- 1 | \section{Conclusions}\label{sec-conclusions} 2 | 3 | We have investigated multiple build systems, showing how their properties are 4 | consequences of two implementation choices: what order you build in and how you 5 | decide whether to rebuild. By first decomposing the pieces, we show how to 6 | recompose the pieces to find new points in the design space. In particular, a 7 | simple recombination leads to a design for a monadic suspending cloud build 8 | system, which we have implemented and use in our day-to-day development. 9 | 10 | \section*{Acknowledgements} 11 | 12 | Thanks to anonymous reviewers and everyone else who provided us with feedback 13 | on earlier drafts: Ulf Adams, Arseniy Alekseyev, Dan Bentley, Martin 14 | Br\"{u}stel, Ulan Degenbaev, Jeremie Dimino, Andrew Fitzgibbon, Georgy 15 | Lukyanov, Simon Marlow, Evan Martin, Yaron Minsky, Guillaume Maudoux, Philip 16 | Patsch, Michael Peyton Jones, Andrew Phillips, Fran\c{c}ois Pottier, Rohit 17 | Ramesh, Irakli Safareli, Zhen Zhang. Your contributions were incredibly 18 | valuable. 19 | 20 | Andrey Mokhov's research was funded by a Royal Society Industry Fellowship 21 | \cmd{IF160117} on the topic ``Towards Cloud Build Systems with Dynamic 22 | Dependency Graphs''. 23 | -------------------------------------------------------------------------------- /papers/jfp/11-appendix.tex: -------------------------------------------------------------------------------- 1 | \clearpage 2 | \section{Appendix}\label{sec-appendix} 3 | 4 | \subsection{Compute transformers}\label{sec-appendix-transformers} 5 | 6 | In this section we clarify some of the compute transformers used in this paper. 7 | 8 | \hs{execute} uses the transformation based on the \hs{Identity} monad, feeding 9 | \hs{fetch k = pure (store k)} to the compute: 10 | 11 | \begin{minted}[xleftmargin=10pt]{haskell} 12 | execute :: Compute Monad k v -> (k -> v) -> k -> Maybe v 13 | execute compute store = fmap runIdentity . compute (pure . store) 14 | \end{minted} 15 | \vspace{1mm} 16 | \begin{minted}[xleftmargin=10pt]{haskell} 17 | newtype Identity a = Identity { runIdentity :: a } 18 | \end{minted} 19 | \vspace{1mm} 20 | \begin{minted}[xleftmargin=10pt]{haskell} 21 | instance Functor Identity where 22 | fmap f (Identity a) = Identity (f a) 23 | \end{minted} 24 | \vspace{1mm} 25 | \begin{minted}[xleftmargin=10pt]{haskell} 26 | instance Applicative Identity where 27 | pure a = Identity a 28 | Identity f <*> Identity a = Identity (f a) 29 | \end{minted} 30 | \vspace{1mm} 31 | \begin{minted}[xleftmargin=10pt]{haskell} 32 | instance Monad Identity where 33 | Identity a >>= f = f a 34 | \end{minted} 35 | \vspace{1mm} 36 | 37 | \todo{AM}{Explain \hs{track}.} 38 | 39 | 40 | 41 | Here is a draft implementation of \hs{inputs} used in the definition of 42 | build system correctness in \S\ref{sec-build-correctness}: 43 | 44 | \begin{minted}[xleftmargin=10pt]{haskell} 45 | inputs :: Eq k => Task Monad k v -> Store i k v -> k -> [k] 46 | inputs task store key = filter (isInput task) (closure deps key) 47 | where 48 | deps k = maybe [] snd (track task (\k -> getValue k store) k) 49 | 50 | closure :: Eq a => (a -> [a]) -> a -> [a] -- Standard graph transitive closure 51 | 52 | data Proxy a = Proxy 53 | 54 | isInput :: Task Monad k v -> k -> Bool 55 | isInput task = isNothing . task (const Proxy) 56 | \end{minted} 57 | 58 | \subsection{Compute examples}\label{sec-appendix-compute-examples} 59 | 60 | \todo{AM}{Add some explanatory text.} 61 | 62 | The \emph{Collatz sequence} $C_i$ is defined as follows: 63 | 64 | \[ 65 | C_{i} = {\begin{cases}~n&{\text{for }}i=0\\~f(C_{i-1})&{\text{otherwise}},\end{cases}}\hspace{12pt}\text{where}\hspace{12pt}f(k)={\begin{cases}~k/2&{\text{if }}k\text{ is even}\\~3k+1&{\text{otherwise}}\end{cases}} 66 | \vspace{2mm} 67 | \] 68 | \noindent 69 | and $n$ is a positive integer parameter. The famous \emph{Collatz conjecture} 70 | states that the Collatz sequence eventually reaches 1 for all possible values of 71 | $n$. For example, if $n=6$, we reach 1 in eight steps: 72 | $(6, 3, 10, 5, 16, 8, 4, 2, 1, \dots)$, after which the sequence loops forever: 73 | $(4, 2, 1, 4, 2, 1, \dots)$. 74 | 75 | We can express the computation of values in the Collatz sequence as a functorial 76 | compute: 77 | 78 | \begin{minted}[xleftmargin=10pt]{haskell} 79 | data Collatz = Collatz Int 80 | 81 | collatz :: Compute Functor Collatz Int 82 | collatz get (Collatz k) | k <= 0 = Nothing 83 | | otherwise = Just $ f <$> get (Collatz (k - 1)) 84 | where 85 | f n | even n = n `div` 2 86 | | otherwise = 3 * n + 1 87 | \end{minted} 88 | 89 | ... 90 | 91 | The \emph{generalised Fibonacci sequence} $F_i$ is defined as follows: 92 | 93 | \[ 94 | F_{i} = {\begin{cases}~n&{\text{for }}i=0\\~m&{\text{for }}i=1\\~F_{i-1}+F_{i-2}&{\text{otherwise}}\end{cases}} 95 | \vspace{2mm} 96 | \] 97 | \noindent 98 | where $n$ and $m$ are integer parameters. By setting $n=0$ and $m=1$ we obtain 99 | the famous \emph{Fibonacci sequence}: $(0, 1, 1, 2, 3, 5, 8, 13, \dots$), and if 100 | $n=2$ and $m=1$, the result is the \emph{Lucas sequence}: 101 | $(2, 1, 3, 4, 7, 11, 18, 29, \dots)$. 102 | 103 | We can express the computation of values in the generalised Fibonacci sequence 104 | as an applicative compute: 105 | 106 | \begin{minted}[xleftmargin=10pt]{haskell} 107 | data Fibonacci = Fibonacci Int 108 | 109 | fibonacci :: Compute Applicative Fibonacci Int 110 | fibonacci get (Fibonacci k) | k <= 1 = Nothing 111 | | otherwise = Just $ (+) <$> get (Fibonacci (k - 1)) 112 | <*> get (Fibonacci (k - 2)) 113 | \end{minted} 114 | 115 | ... 116 | 117 | The \emph{Ackermann function} $A(m, n)$ is defined as follows: 118 | 119 | \[ 120 | A(m, n) = {\begin{cases}~n+1&{\text{for }}m=0\\~A(m-1, 1)&{\text{for }}n=0\\~A(m-1,A(m,n-1))&{\text{otherwise}}\end{cases}} 121 | \vspace{2mm} 122 | \] 123 | \noindent 124 | We can express the computation of the Ackermann function as a monadic compute: 125 | 126 | \begin{minted}[xleftmargin=10pt]{haskell} 127 | data Ackermann = Ackermann Int Int 128 | 129 | ackermann :: Compute Monad Ackermann Int 130 | ackermann get (Ackermann m n) 131 | | m < 0 || n < 0 = Nothing 132 | | m == 0 = Just $ return (n + 1) 133 | | n == 0 = Just $ get (Ackermann (m - 1) 1) 134 | | otherwise = Just $ do 135 | index <- get (Ackermann m (n - 1)) 136 | get (Ackermann (m - 1) index) 137 | \end{minted} -------------------------------------------------------------------------------- /papers/jfp/4-schedulers.tex: -------------------------------------------------------------------------------- 1 | \section{Schedulers}\label{sec-scheduler} 2 | 3 | The focus of this paper is on a variety of implementations of 4 | \hs{Build}~\hs{c}~\hs{i}~\hs{k}~\hs{v}, given a \emph{user-supplied} 5 | implementation of \hs{Tasks}~\hs{c}~\hs{k}~\hs{v}. That is, we are going to take 6 | \hs{Tasks} as given from now on, and explore variants of \hs{Build}: first 7 | abstractly (in this section and in~\S\ref{sec-rebuilder}) and then concretely 8 | in~\S\ref{sec-implementations}. 9 | 10 | As per the definition of minimality~(\S\ref{def-minimal}), a minimal build 11 | system must \textbf{rebuild only out-of-date keys} and at most once. The only 12 | way to achieve the ``at most once'' requirement while producing a correct build 13 | result (\S\ref{sec-build-correctness}) is to \textbf{build all keys in an 14 | order that respects their dependencies}. 15 | 16 | We have emboldened two different aspects above: the part of the build system 17 | responsible for scheduling tasks in the dependency order (a ``scheduler'') can 18 | be cleanly separated from the part responsible for deciding whether a key needs 19 | to be rebuilt (a ``rebuilder''). In this section we discuss schedulers, leaving 20 | rebuilders for \S\ref{sec-rebuilder}. 21 | 22 | Section \S\ref{sec-background} introduced three different \emph{task schedulers} 23 | that decide which tasks to execute and in what order; see the ``Scheduler'' 24 | column of Table~\ref{tab-summary} in \S\ref{sec-background-summary}. The 25 | following subsections explore the properties of the three schedulers, and 26 | possible implementations. 27 | 28 | \subsection{Topological Scheduler}\label{sec-topological} 29 | 30 | The topological scheduler pre-computes a linear order of tasks, which when 31 | followed ensures dependencies are satisfied, then executes the required tasks in 32 | that order. Computing such a linear order is straightforward -- given a task 33 | description and a target \hs{key}, first find the (acyclic) graph of the 34 | \hs{key}'s dependencies, then compute a topological order. Taking the \Make 35 | example from Fig.~\ref{fig-make}, we might compute the following order: 36 | 37 | \begin{enumerate} 38 | \item \cmd{main.o} 39 | \item \cmd{util.o} 40 | \item \cmd{main.exe} 41 | \end{enumerate} 42 | 43 | \noindent 44 | Given the dependencies, we could have equally chosen to build \cmd{util.o} 45 | first, but \cmd{main.exe} \emph{must} come last. 46 | 47 | The advantage of this scheme is simplicity -- compute an order, then execute 48 | tasks in that order. In addition, any missing keys or dependency cycles can be 49 | detected from the graph, and reported to the user before any work has commenced. 50 | 51 | The downside of this approach is that it requires the dependencies of each task 52 | in advance. As we saw in~\S\ref{sec-deps}, we can only extract dependencies from 53 | an applicative task, which requires the build system to choose 54 | \hs{c}~\hs{=}~\hs{Applicative}, ruling out dynamic dependencies. 55 | 56 | \subsection{Restarting Scheduler}\label{sec-restarting} 57 | 58 | To handle dynamic dependencies we cannot precompute a static order -- we must 59 | interleave running tasks and ordering tasks. One approach is just to build tasks 60 | in an arbitrary order, and if a task calls \hs{fetch} on an out-of-date key 61 | \cmd{dep}, abort the task and build \cmd{dep} instead. Returning to the example 62 | from Fig.~\ref{fig-make}, we might build the tasks as follows: 63 | 64 | \begin{enumerate} 65 | \item \cmd{main.exe} (abort because it depends on \cmd{util.o} which is out of 66 | date) 67 | \item \cmd{main.o} 68 | \item \cmd{util.o} 69 | \item \cmd{main.exe} (restart from scratch, completing successfully this time) 70 | \end{enumerate} 71 | 72 | \noindent 73 | We start with \cmd{main.exe} (an arbitrary choice), but discover it depends on 74 | \cmd{main.o}, so instead start building \cmd{main.o}. Next we choose to build 75 | \cmd{util.o} (again, arbitrarily), before finally returning to \cmd{main.exe} 76 | that now has all its dependencies available and completes successfully. 77 | 78 | This approach works, but has a number of disadvantages. Firstly, it requires a 79 | technical mechanism to abort a task, which is easy in our theoretical setting 80 | with \hs{Task} (see an implementation in~\S\ref{sec-implementation-excel}) but 81 | leads to engineering concerns in the real world. Secondly, it is not minimal in 82 | the sense that a task may start, do some meaningful work, and then abort, 83 | repeating that same work when restarted. 84 | 85 | As a refinement, to reduce the number of aborts (often to zero) \Excel records 86 | the discovered task order in its \emph{calc chain}, and uses it as the starting 87 | point for the next build~(\S\ref{sec-background-excel}). \Bazel's restarting 88 | scheduler does not store the discovered order between build runs; instead, it 89 | stores the most recent task dependency information from which it can compute a 90 | linear order. Since this information may become outdated, \Bazel may also need 91 | to abort a task if a newly discovered dependency is out of date. 92 | 93 | \subsection{Suspending Scheduler}\label{sec-suspending} 94 | 95 | An alternative approach, utilised by the \hs{busy} build system 96 | (\S\ref{sec-general-build}) and \Shake, is to simply build dependencies when 97 | they are requested, suspending the currently running task when needed. Using the 98 | example from Fig.~\ref{fig-make}, we would build: 99 | 100 | \begin{itemize} 101 | \item \cmd{main.exe} (suspended)\\ 102 | $\hookrightarrow$ \cmd{main.o} 103 | \item \cmd{main.exe} (resumed then suspended again)\\ 104 | $\hookrightarrow$ \cmd{util.o} 105 | \item \cmd{main.exe} (completed) 106 | \end{itemize} 107 | 108 | \noindent 109 | We start building \cmd{main.exe} first as it is the required target. We soon 110 | discover a dependency on \cmd{main.o} and suspend the current task 111 | \cmd{main.exe} to build \cmd{main.o}, then resume and suspend again to build 112 | \cmd{util.o}, and finally complete the target \cmd{main.exe}. 113 | 114 | This scheduler (when combined with a suitable rebuilder) provides a minimal 115 | build system that supports dynamic dependencies. In our model, a suspending 116 | scheduler is easy to write -- it makes a function call to compute each 117 | dependency. However, a more practical implementation is likely to build multiple 118 | dependencies in parallel, which then requires a more explicit task suspension 119 | and resumption. To implement suspension there are two standard approaches: 120 | 121 | \begin{itemize} 122 | \item Blocking threads or processes. This approach is relatively easy, but can 123 | require significant resources, especially if a large number of tasks are 124 | suspended. In languages with cheap green threads (e.g. Haskell) the approach is 125 | more feasible, and it was the original approach taken by \Shake. 126 | \item Continuation-passing style~\cite{claessen_continuations} can allow the 127 | remainder of a task to be captured, paused, and resumed later. Continuation 128 | passing is efficient, but requires the build script to be architected to allow 129 | capturing continuations. \Shake currently uses this approach. 130 | \end{itemize} 131 | 132 | \noindent 133 | While a suspending scheduler is theoretically optimal, in practice it is better 134 | than a restarting scheduler only if the cost of avoided duplicate work 135 | outweighs the cost of suspending tasks. Note furthermore that the cost of 136 | duplicate work may often be just a fraction of the overall build cost. 137 | -------------------------------------------------------------------------------- /papers/jfp/9-related.tex: -------------------------------------------------------------------------------- 1 | \section{Related Work}\label{sec-related} 2 | 3 | While there is research on individual build systems, there has been little 4 | research to date comparing different build systems. In~\S\ref{sec-background} we 5 | covered several important build systems~--~in this section we relate a few 6 | other build systems to our abstractions, and discuss other work where similar 7 | abstractions~arise. 8 | 9 | \subsection{Other Build Systems}\label{sec-related-build} 10 | 11 | Most build systems, when viewed at the level we talk, can be captured with minor 12 | variations on the code presented in \S\ref{sec-implementations}. Below we list 13 | some notable examples: 14 | 15 | \begin{itemize} 16 | \item \Dune~\cite{dune} is a build system designed for OCaml/Reason projects. 17 | Its original implementation used \emph{arrows}~\cite{hughes2000generalising} 18 | rather than monads to model dynamic dependencies, which simplified static 19 | dependency approximation. \Dune was later redesigned to use a flavour of 20 | selective functors~\cite{mokhov_selective_2019}, making it a closer fit to our 21 | abstractions. 22 | 23 | \item \Ninja~\cite{ninja} combines the \hs{topological} scheduler of \Make with 24 | the verifying traces of \Shake~--~our associated implementation provides such a 25 | combination. \Ninja~is also capable of modelling build rules that produce 26 | multiple results, a limited form of multiple value types \S\ref{sec-polymorphism}. 27 | 28 | \item \Nix~\cite{dolstra2004nix} has coarse-grained dependencies, with precise 29 | hashing of dependencies and downloading of precomputed build products. We 30 | provided a model of \Nix in \S\ref{sec-implementation-cloud}, although it is 31 | worth noting that \Nix is not primarily intended as a build system, and the 32 | coarse grained nature (packages, not individual files) makes it targeted to a 33 | different purpose. 34 | 35 | \item \Pluto~\cite{erdweg2015pluto} is based on a similar model to \Shake, but 36 | additionally allows cyclic build rules combined with a user-specific resolution 37 | strategy. Often such a strategy can be unfolded into the user rules without loss 38 | of precision, but a fully general resolution handler extends the \hs{Task} 39 | abstraction with new features. 40 | 41 | \item \Redo~\cite{redo-idea,grosskurth2007redo,redo} almost exactly 42 | matches \Shake at the level of detail given here, differing only in aspects like 43 | rules producing multiple files~\S\ref{sec-polymorphism}. While \Redo predates 44 | \Shake, they were developed independently; we use \Shake as a prototypical 45 | example of a monadic build system because its implementation presents a closer 46 | mapping to our \hs{Task} abstraction. 47 | 48 | \item \Tup~\cite{tup} functions much like \Make, but with a refined dirty-bit 49 | implementation that watches the file system for changes and can thus avoid 50 | rechecking the entire graph. \Tup also automatically deletes stale results. 51 | \end{itemize} 52 | 53 | The one build system we are aware of that cannot be modelled in our framework is 54 | \Fabricate by~Hoyt~\etal~\shortcite{fabricate}. In \Fabricate a build system is 55 | a script that is run in-order, in the spirit of: 56 | 57 | \vspace{1mm} 58 | \begin{minted}[xleftmargin=10pt]{bash} 59 | gcc -c util.c 60 | gcc -c main.c 61 | gcc util.o main.o -o main.exe 62 | \end{minted} 63 | \vspace{1mm} 64 | 65 | % \noindent 66 | To achieve minimality, each separate command is traced at the OS-level, allowing 67 | \Fabricate to record a trace entry stating that \cmd{gcc -c util.c} reads from 68 | \cmd{util.c}. In future runs \Fabricate runs the script from start to finish, 69 | skipping any commands where no inputs have changed. The main difference from our 70 | \hs{Tasks} abstraction is that instead of supplying a mapping from keys to 71 | tasks, a \Fabricate script supplies a list of build statements, in a 72 | \emph{user-scheduled order}, without declaring what each statement reads or write. 73 | 74 | Taking our abstraction, it is possible to encode \Fabricate assuming that 75 | commands like \cmd{gcc -c util.c} are keys, there is a linear dependency between 76 | each successive key, and that the OS-level tracing can be lifted back as a 77 | monadic \hs{Task} function\footnote{\Shake provides support for 78 | \Fabricate{}-like build systems~--~see \cmd{Development.Shake.Forward}.}. 79 | However, in our pure model the mapping is not perfect as \cmd{gcc} writes to 80 | arbitrary files whose locations are not known in advance. One way of capturing 81 | arbitrary writes in our model is to switch from one callback \hs{fetch} to 82 | \emph{two callbacks}, say \hs{read} and \hs{write}, allowing us to track both 83 | reads and writes separately. 84 | 85 | \subsection{Self-adjusting Computation} 86 | 87 | While not typically considered build systems, self-adjusting computation is a 88 | well studied area, and in particular the contrast between different formulations 89 | has been thoroughly investigated, e.g. 90 | see~Acar~\etal~\shortcite{acar2007selfadjusting}. Self-adjusting computations 91 | can automatically adjust to an external change to their inputs. A classic 92 | example is a self-adjusting sorting algorithm, which can efficiently (in 93 | $O(\log{n})$ time where $n$ is the length of the input) recalculate the result 94 | given an incremental change of the input. While very close to build systems in 95 | spirit, self-adjusting computations are mostly used for in-memory computation 96 | and rely on the ability to dynamically allocate new keys in the store for 97 | sharing intermediate computations~--~an intriguing feature rarely seen in build 98 | systems (\Shake's oracles~\S\ref{sec-polymorphism} can be used to model this 99 | feature to a limited degree). Another important optimisation that self-adjusting 100 | computation engines often support is the incremental processing of 101 | \emph{deltas}, where instead of marking a value as ``changed to 8'', one can 102 | mark it as ``changed by $+1$'', assuming it was equal to 7 before. When a delta 103 | is small, it can often be propagated to the output more efficiently than by 104 | recomputing the output value from scratch. 105 | 106 | A lot of research has been dedicated to finding efficient data structures and 107 | algorithms for self-adjusting computations, with a few open-source 108 | implementations, e.g. \Incremental by Jane Street~\shortcite{incremental}. We 109 | plan to investigate how these insights can be utilised by build systems as 110 | future work. 111 | 112 | \subsection{Memoization}\label{sec-related-memo} 113 | 114 | \emph{Memoization} is a classic optimisation technique for storing values of a 115 | function instead of recomputing them each time the function is called. Minimal 116 | build systems (\S\ref{sec-background-make}) certainly perform 117 | memoization: they \emph{store values instead of recomputing them each time}. 118 | Memoization can therefore be reduced to a minimal build system (as we 119 | demonstrate below), but not vice versa, since minimal build systems solve a more 120 | complex optimisation problem. 121 | 122 | As a simple example of using a build system for memoization, we solve a textbook 123 | dynamic programming problem~--~Levenshtein's \emph{edit 124 | distance}~\cite{levenshtein1966binary}: given two input strings $a$ and 125 | $b$, find the shortest series of edit operations that transforms $a$ 126 | to $b$. The edit operations are typically \emph{inserting}, \emph{deleting} or 127 | \emph{replacing} a symbol. The dynamic programming solution of this problem is 128 | so widely known, e.g., see~Cormen~\etal~\shortcite{cormen2001introduction}, that 129 | we provide its encoding in our \hs{Tasks} abstraction without further 130 | explanation. We address elements of strings $a_i$ and $b_i$ by keys \hs{A}~$i$ 131 | and \hs{B}~$i$, respectively, while the cost of a subproblem $c_{ij}$ is 132 | identified by \hs{C}~$i$~$j$. 133 | 134 | \vspace{1mm} 135 | \begin{minted}[xleftmargin=10pt]{haskell} 136 | data Key = A Int | B Int | C Int Int deriving Eq 137 | \end{minted} 138 | \begin{minted}[xleftmargin=10pt]{haskell} 139 | editDistance :: Tasks Monad Key Int 140 | editDistance (C i 0) = Just $ Task $ const $ pure i 141 | editDistance (C 0 j) = Just $ Task $ const $ pure j 142 | editDistance (C i j) = Just $ Task $ \fetch -> do 143 | ai <- fetch (A i) 144 | bj <- fetch (B j) 145 | if ai == bj 146 | then fetch (C (i - 1) (j - 1)) 147 | else do 148 | insert <- fetch (C i (j - 1)) 149 | delete <- fetch (C (i - 1) j ) 150 | replace <- fetch (C (i - 1) (j - 1)) 151 | return (1 + minimum [insert, delete, replace]) 152 | editDistance _ = Nothing 153 | \end{minted} 154 | \vspace{1mm} 155 | 156 | \noindent 157 | When asked to build \hs{C}~$n$~$m$, a minimal build system will calculate the 158 | result using memoization. Furthermore, when an input $a_i$ is changed, only 159 | necessary, incremental recomputation will be performed~--~an optimisation that 160 | cannot be achieved just with memoization. 161 | 162 | Self-adjusting computation, memoization and build systems are inherently related 163 | topics, which poses the question of whether there is an underlying common 164 | abstraction waiting to be discovered. 165 | -------------------------------------------------------------------------------- /papers/jfp/Makefile: -------------------------------------------------------------------------------- 1 | main.pdf: main.tex 1-intro.tex 2-background.tex 3-abstractions.tex 4-schedulers.tex 5-rebuilders.tex 6-implementations.tex 7-experience.tex 8-engineering.tex 9-related.tex 10-conclusions.tex 11-appendix.tex 2 | pdflatex -shell-escape main.tex 3 | -------------------------------------------------------------------------------- /papers/jfp/amsfonts.sty: -------------------------------------------------------------------------------- 1 | %% 2 | %% This is file `amsfonts.sty', 3 | %% generated with the docstrip utility. 4 | %% 5 | %% The original source files were: 6 | %% 7 | %% amsfonts.dtx 8 | %% 9 | %%% ==================================================================== 10 | %%% @LaTeX-file{ 11 | %%% filename = "amsfonts.dtx", 12 | %%% version = "2.2f", 13 | %%% date = "2001/10/25", 14 | %%% time = "11:51:34 EDT", 15 | %%% checksum = "12894 459 2125 19071", 16 | %%% author = "American Mathematical Society", 17 | %%% copyright = "Copyright 2001 American Mathematical Society, 18 | %%% all rights reserved. Copying of this file is 19 | %%% authorized only if either: 20 | %%% (1) you make absolutely no changes to your copy, 21 | %%% including name; OR 22 | %%% (2) if you do make changes, you first rename it 23 | %%% to some other name.", 24 | %%% address = "American Mathematical Society, 25 | %%% Technical Support, 26 | %%% Publications Technical Group, 27 | %%% P. O. Box 6248, 28 | %%% Providence, RI 02940, 29 | %%% USA", 30 | %%% telephone = "401-455-4080 or (in the USA and Canada) 31 | %%% 800-321-4AMS (321-4267)", 32 | %%% FAX = "401-331-3842", 33 | %%% email = "tech-support@ams.org (Internet)", 34 | %%% codetable = "ISO/ASCII", 35 | %%% keywords = "latex, amslatex, ams-latex, amsfonts, msam, 36 | %%% msbm, eufm, blackboard bold", 37 | %%% supported = "yes", 38 | %%% abstract = "This file is part of the AMSFonts distribution. 39 | %%% It provides easy access in \LaTeXe{} to certain 40 | %%% math fonts for extra math symbols, fraktur 41 | %%% letters, and blackboard bold letters. See the 42 | %%% AMSFonts user's guide for more information.", 43 | %%% docstring = "The checksum field above contains a CRC-16 44 | %%% checksum as the first value, followed by the 45 | %%% equivalent of the standard UNIX wc (word 46 | %%% count) utility output of lines, words, and 47 | %%% characters. This is produced by Robert 48 | %%% Solovay's checksum utility.", 49 | %%% } 50 | %%% ==================================================================== 51 | \NeedsTeXFormat{LaTeX2e}% LaTeX 2.09 can't be used (nor non-LaTeX) 52 | [1994/12/01]% LaTeX date must be December 1994 or later 53 | \ProvidesPackage{amsfonts}[2001/10/25 v2.2f] 54 | \DeclareOption{psamsfonts}{% 55 | \ifnum\cmex@opt=7 \def\cmex@opt{10}% 56 | \else \def\cmex@opt{0}\fi 57 | \DeclareFontFamily{U}{msa}{}% 58 | \DeclareFontShape{U}{msa}{m}{n}{<-6>msam5<6-8>msam7<8->msam10}{}% 59 | \DeclareFontFamily{U}{msb}{}% 60 | \DeclareFontShape{U}{msb}{m}{n}{<-6>msbm5<6-8>msbm7<8->msbm10}{}% 61 | \DeclareFontFamily{U}{euf}{}% 62 | \DeclareFontShape{U}{euf}{m}{n}{<-6>eufm5<6-8>eufm7<8->eufm10}{}% 63 | \DeclareFontShape{U}{euf}{b}{n}{<-6>eufb5<6-8>eufb7<8->eufb10}{}% 64 | } 65 | \@ifundefined{cmex@opt}{\def\cmex@opt{1}}{} 66 | \ProcessOptions\relax 67 | \ifnum\cmex@opt=1 \relax 68 | \DeclareFontShape{OMX}{cmex}{m}{n}{<-8>cmex7<8>cmex8<9>cmex9% 69 | <10><10.95><12><14.4><17.28><20.74><24.88>cmex10}{}% 70 | \expandafter\let\csname OMX/cmex/m/n/10\endcsname\relax 71 | \else 72 | \ifnum\cmex@opt=10 % need to override cmex7 fontdef from amsmath 73 | \begingroup 74 | \expandafter\let\csname OMX+cmex\endcsname\relax 75 | \fontencoding{OMX}\fontfamily{cmex}% 76 | \try@load@fontshape 77 | \endgroup 78 | \expandafter\let\csname OMX/cmex/m/n/10\endcsname\relax 79 | \def\cmex@opt{0}% 80 | \fi 81 | \fi 82 | \providecommand*{\@mathmeasure}[3]{% 83 | \setbox#1\hbox{\frozen@everymath\@emptytoks\m@th$#2#3$}} 84 | \@ifundefined{@emptytoks}{\csname newtoks\endcsname\@emptytoks}{} 85 | \DeclareSymbolFont{AMSa}{U}{msa}{m}{n} 86 | \DeclareSymbolFont{AMSb}{U}{msb}{m}{n} 87 | \@ifundefined{yen}{% 88 | \edef\yen{\noexpand\mathhexbox{\hexnumber@\symAMSa}55} 89 | }{} 90 | \@ifundefined{checkmark}{% 91 | \edef\checkmark{\noexpand\mathhexbox{\hexnumber@\symAMSa}58} 92 | }{} 93 | \@ifundefined{circledR}{% 94 | \edef\circledR{\noexpand\mathhexbox{\hexnumber@\symAMSa}72} 95 | }{} 96 | \@ifundefined{maltese}{% 97 | \edef\maltese{\noexpand\mathhexbox{\hexnumber@\symAMSa}7A} 98 | }{} 99 | \begingroup \catcode`\"=12 100 | \DeclareMathDelimiter{\ulcorner}{\mathopen} {AMSa}{"70}{AMSa}{"70} 101 | \DeclareMathDelimiter{\urcorner}{\mathclose}{AMSa}{"71}{AMSa}{"71} 102 | \DeclareMathDelimiter{\llcorner}{\mathopen} {AMSa}{"78}{AMSa}{"78} 103 | \DeclareMathDelimiter{\lrcorner}{\mathclose}{AMSa}{"79}{AMSa}{"79} 104 | \xdef\widehat#1{\noexpand\@mathmeasure\z@\textstyle{#1}% 105 | \noexpand\ifdim\noexpand\wd\z@>\tw@ em% 106 | \mathaccent"0\hexnumber@\symAMSb 5B{#1}% 107 | \noexpand\else\mathaccent"0362{#1}\noexpand\fi} 108 | \xdef\widetilde#1{\noexpand\@mathmeasure\z@\textstyle{#1}% 109 | \noexpand\ifdim\noexpand\wd\z@>\tw@ em% 110 | \mathaccent"0\hexnumber@\symAMSb 5D{#1}% 111 | \noexpand\else\mathaccent"0365{#1}\noexpand\fi} 112 | \DeclareMathSymbol{\dabar@}{\mathord}{AMSa}{"39} 113 | \xdef\dashrightarrow{\mathrel{\dabar@\dabar@ 114 | \mathchar"0\hexnumber@\symAMSa 4B}}% 115 | \xdef\dashleftarrow{\mathrel{\mathchar"0\hexnumber@\symAMSa 4C\dabar@ 116 | \dabar@}}% 117 | \global\let\dasharrow\dashrightarrow 118 | \global\let\rightleftharpoons\undefined 119 | \DeclareMathSymbol{\rightleftharpoons}{\mathrel}{AMSa}{"0A} 120 | \global\let\angle\undefined 121 | \DeclareMathSymbol{\angle} {\mathord}{AMSa}{"5C} 122 | \global\let\hbar\undefined 123 | \DeclareMathSymbol{\hbar} {\mathord}{AMSb}{"7E} 124 | \global\let\sqsubset\undefined 125 | \DeclareMathSymbol{\sqsubset} {\mathrel}{AMSa}{"40} 126 | \global\let\sqsupset\undefined 127 | \DeclareMathSymbol{\sqsupset} {\mathrel}{AMSa}{"41} 128 | \global\let\mho\undefined 129 | \DeclareMathSymbol{\mho} {\mathord}{AMSb}{"66} 130 | \endgroup 131 | \DeclareMathAlphabet{\mathfrak}{U}{euf}{m}{n} 132 | \SetMathAlphabet{\mathfrak}{bold}{U}{euf}{b}{n} 133 | \DeclareSymbolFontAlphabet{\mathbb}{AMSb} 134 | \DeclareFontEncodingDefaults{\relax}{\def\accentclass@{7}} 135 | \DeclareRobustCommand{\frak}[1]{% 136 | {\@subst@obsolete{amsfonts}\frak\mathfrak{#1}}} 137 | \DeclareRobustCommand{\Bbb}[1]{% 138 | {\@subst@obsolete{amsfonts}\Bbb\mathbb{#1}}} 139 | \DeclareRobustCommand{\bold}[1]{% 140 | {\@subst@obsolete{amsfonts}\bold\mathbf{#1}}} 141 | \begingroup \catcode`\"=12 \relax 142 | \gdef\newsymbol#1#2#3#4#5{% 143 | \@obsolete{amsfonts}\newsymbol\DeclareMathSymbol 144 | \@ifdefinable#1{% 145 | \edef\next@ 146 | {\ifcase #2 \or 147 | \hexnumber@\symAMSa\or 148 | \hexnumber@\symAMSb\fi}% 149 | \ifx\next@\@empty 150 | \PackageError{amsfonts}{\Invalid@@\newsymbol}\@ehd% 151 | \else 152 | \global\mathchardef#1"#3\next@#4#5 153 | \fi}} 154 | \endgroup 155 | \long\def\@gobblethree#1#2#3{} 156 | \if@compatibility 157 | \let\@obsolete\@gobblethree 158 | \else 159 | \def\@obsolete#1#2#3{\PackageWarning{#1}{% 160 | Obsolete command \protect#2; \protect#3 should be used instead}}% 161 | \fi 162 | \def\@subst@obsolete#1#2#3{\@obsolete{#1}#2#3\gdef#2{#3}#2} 163 | \begingroup \catcode`\"=12 164 | \DeclareMathSymbol{\square} {\mathord}{AMSa}{"03} 165 | \DeclareMathSymbol{\lozenge} {\mathord}{AMSa}{"06} 166 | \DeclareMathSymbol{\vartriangleright} {\mathrel}{AMSa}{"42} 167 | \DeclareMathSymbol{\vartriangleleft} {\mathrel}{AMSa}{"43} 168 | \DeclareMathSymbol{\trianglerighteq} {\mathrel}{AMSa}{"44} 169 | \DeclareMathSymbol{\trianglelefteq} {\mathrel}{AMSa}{"45} 170 | \DeclareMathSymbol{\rightsquigarrow} {\mathrel}{AMSa}{"20} 171 | \@ifpackageloaded{latexsym}{\@tempswafalse}{\@tempswatrue} 172 | \if@tempswa 173 | \global\let\Box\square 174 | \global\let\Diamond\lozenge 175 | \global\let\leadsto\rightsquigarrow 176 | \global\let\lhd\@@undefined 177 | \global\let\unlhd\@@undefined 178 | \global\let\rhd\@@undefined 179 | \global\let\unrhd\@@undefined 180 | \DeclareMathSymbol{\lhd} {\mathbin}{AMSa}{"43} 181 | \DeclareMathSymbol{\unlhd} {\mathbin}{AMSa}{"45} 182 | \DeclareMathSymbol{\rhd} {\mathbin}{AMSa}{"42} 183 | \DeclareMathSymbol{\unrhd} {\mathbin}{AMSa}{"44} 184 | \xdef\Join{\mathrel{\mathchar"0\hexnumber@\symAMSb 6F\mkern-13.8mu% 185 | \mathchar"0\hexnumber@\symAMSb 6E}} 186 | \fi 187 | \endgroup 188 | \endinput 189 | %% 190 | %% End of file `amsfonts.sty'. -------------------------------------------------------------------------------- /papers/jfp/fig/bazel-example-build.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snowleopard/build/43b18b9a362d7d27b64679ea4122e4b8c5dfedd9/papers/jfp/fig/bazel-example-build.pdf -------------------------------------------------------------------------------- /papers/jfp/fig/bazel-example-checkout.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snowleopard/build/43b18b9a362d7d27b64679ea4122e4b8c5dfedd9/papers/jfp/fig/bazel-example-checkout.pdf -------------------------------------------------------------------------------- /papers/jfp/fig/bazel-example-checkout.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 19 | 21 | 29 | 35 | 36 | 44 | 50 | 51 | 59 | 65 | 66 | 74 | 80 | 81 | 89 | 95 | 96 | 97 | 119 | 121 | 122 | 124 | image/svg+xml 125 | 127 | 128 | 129 | 130 | 131 | 136 | 145 | util.c 156 | 165 | util.h 176 | 185 | main.c 196 | 202 | 1 213 | 219 | 2 230 | 236 | 3 247 | 248 | 249 | -------------------------------------------------------------------------------- /papers/jfp/fig/bazel-example-rebuild.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snowleopard/build/43b18b9a362d7d27b64679ea4122e4b8c5dfedd9/papers/jfp/fig/bazel-example-rebuild.pdf -------------------------------------------------------------------------------- /papers/jfp/fig/frankenbuild-example-build.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snowleopard/build/43b18b9a362d7d27b64679ea4122e4b8c5dfedd9/papers/jfp/fig/frankenbuild-example-build.pdf -------------------------------------------------------------------------------- /papers/jfp/fig/frankenbuild-example-clean.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snowleopard/build/43b18b9a362d7d27b64679ea4122e4b8c5dfedd9/papers/jfp/fig/frankenbuild-example-clean.pdf -------------------------------------------------------------------------------- /papers/jfp/fig/frankenbuild-example-rebuild.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snowleopard/build/43b18b9a362d7d27b64679ea4122e4b8c5dfedd9/papers/jfp/fig/frankenbuild-example-rebuild.pdf -------------------------------------------------------------------------------- /papers/jfp/fig/make-example-full.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snowleopard/build/43b18b9a362d7d27b64679ea4122e4b8c5dfedd9/papers/jfp/fig/make-example-full.pdf -------------------------------------------------------------------------------- /papers/jfp/fig/make-example-partial.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snowleopard/build/43b18b9a362d7d27b64679ea4122e4b8c5dfedd9/papers/jfp/fig/make-example-partial.pdf -------------------------------------------------------------------------------- /papers/jfp/fig/make-example.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snowleopard/build/43b18b9a362d7d27b64679ea4122e4b8c5dfedd9/papers/jfp/fig/make-example.pdf -------------------------------------------------------------------------------- /papers/jfp/fig/shake-example-cutoff.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snowleopard/build/43b18b9a362d7d27b64679ea4122e4b8c5dfedd9/papers/jfp/fig/shake-example-cutoff.pdf -------------------------------------------------------------------------------- /papers/jfp/fig/shake-example-rebuild.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snowleopard/build/43b18b9a362d7d27b64679ea4122e4b8c5dfedd9/papers/jfp/fig/shake-example-rebuild.pdf -------------------------------------------------------------------------------- /papers/jfp/fig/shake-example.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snowleopard/build/43b18b9a362d7d27b64679ea4122e4b8c5dfedd9/papers/jfp/fig/shake-example.pdf -------------------------------------------------------------------------------- /papers/jfp/fig/step-example-step1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snowleopard/build/43b18b9a362d7d27b64679ea4122e4b8c5dfedd9/papers/jfp/fig/step-example-step1.pdf -------------------------------------------------------------------------------- /papers/jfp/fig/step-example-step2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snowleopard/build/43b18b9a362d7d27b64679ea4122e4b8c5dfedd9/papers/jfp/fig/step-example-step2.pdf -------------------------------------------------------------------------------- /papers/jfp/fig/step-example-step3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snowleopard/build/43b18b9a362d7d27b64679ea4122e4b8c5dfedd9/papers/jfp/fig/step-example-step3.pdf -------------------------------------------------------------------------------- /papers/jfp/main.tex: -------------------------------------------------------------------------------- 1 | \documentclass{jfp1} 2 | 3 | \bibliographystyle{jfp} 4 | % \citestyle{acmauthoryear} 5 | 6 | \usepackage{bookmark} 7 | \usepackage{booktabs} 8 | \usepackage{subcaption} 9 | \usepackage[utf8]{inputenc} 10 | \usepackage[T1]{fontenc} 11 | \usepackage{xspace} 12 | \usepackage{fancyhdr} 13 | 14 | % Haskell code snippets and useful shortcuts 15 | \usepackage{minted} 16 | \setminted[haskell]{escapeinside=@@} 17 | \setminted[text]{escapeinside=@@} 18 | \newcommand{\hs}{\mintinline{haskell}} 19 | \newcommand{\cmd}[1]{\textsf{\color[rgb]{0,0,0.5} #1}} 20 | \newcommand{\teq}{\smaller $\sim$} 21 | \newcommand{\ghci}{$\lambda$>} 22 | \newcommand{\defeq}{\stackrel{\text{def}}{=}} 23 | \newcommand{\std}[1]{{\color[rgb]{0,0.3,0} #1}} 24 | \newcommand{\blk}[1]{{\color[rgb]{0,0,0} #1}} 25 | \newcommand{\blu}[1]{{\color[rgb]{0,0,0.5} #1}} 26 | \newcommand{\gap}{$\;\;$} 27 | \newcommand{\etal}{\emph{et~al.}} 28 | 29 | % \renewcommand{\MintedPygmentize}{path-to-pygmentize}% Questions and tasks 30 | \newcommand{\q}[2]{\textbf{\color{blue} Question (#1):} #2} 31 | \newcommand{\todo}[2]{[\textbf{\color{red} #1:} #2]} 32 | 33 | % Abbreviations for build systems 34 | \newcommand{\Bazel}{\textsc{Bazel}\xspace} 35 | \newcommand{\Buck}{\textsc{Buck}\xspace} 36 | \newcommand{\Calc}{\textsc{Calc}\xspace} 37 | \newcommand{\Cloud}{\textsc{Cloud}\xspace} 38 | \newcommand{\CloudBuild}{\textsc{CloudBuild}\xspace} 39 | \newcommand{\Dune}{\textsc{Dune}\xspace} 40 | \newcommand{\Excel}{\textsc{Excel}\xspace} 41 | \newcommand{\Fabricate}{\textsc{Fabricate}\xspace} 42 | \newcommand{\Incremental}{\textsc{Incremental}\xspace} 43 | \newcommand{\Latex}{\textsc{Latex}\xspace} 44 | \newcommand{\Make}{\textsc{Make}\xspace} 45 | \newcommand{\Ninja}{\textsc{Ninja}\xspace} 46 | \newcommand{\Nix}{\textsc{Nix}\xspace} 47 | \newcommand{\Pluto}{\textsc{Pluto}\xspace} 48 | \newcommand{\Redo}{\textsc{Redo}\xspace} 49 | \newcommand{\Reflow}{\textsc{Reflow}\xspace} 50 | \newcommand{\Shake}{\textsc{Shake}\xspace} 51 | \newcommand{\Hadrian}{\textsc{Hadrian}\xspace} 52 | \newcommand{\Tup}{\textsc{Tup}\xspace} 53 | \newcommand{\store}{\hs{k}~\hs{->}~\hs{v}\xspace} 54 | \newcommand{\storef}{\hs{k}~\hs{->}~\hs{f}~\hs{v}\xspace} 55 | 56 | % \newcommand{\simon}[1]{} 57 | % \newcommand{\simon}[1]{SLPJ: {\color{red} \em #1} End SLPJ} 58 | 59 | \begin{document} 60 | \title[Build Systems \`a la Carte: Theory and Practice] 61 | {Build Systems \`a la Carte: Theory and Practice} 62 | 63 | \author[Andrey Mokhov, Neil Mitchell and Simon Peyton Jones] 64 | {ANDREY MOKHOV\\ 65 | School of Engineering, Newcastle University, United Kingdom\\ 66 | Jane Street, London, United Kingdom} 67 | \email{andrey.mokhov@ncl.ac.uk} 68 | 69 | \author[Andrey Mokhov, Neil Mitchell and Simon Peyton Jones] 70 | {NEIL MITCHELL\\ 71 | Facebook, London, United Kingdom} 72 | \email{ndmitchell@gmail.com} 73 | 74 | \author[Andrey Mokhov, Neil Mitchell and Simon Peyton Jones] 75 | {SIMON PEYTON JONES\\ 76 | Microsoft Research, Cambridge, United Kingdom} 77 | \email{simonpj@microsoft.com} 78 | 79 | % \keywords{build systems, functional programming, algorithms} 80 | 81 | \maketitle 82 | 83 | \begin{abstract} 84 | Build systems are awesome, terrifying -- and unloved. They are used by every 85 | developer around the world, but are rarely the object of study. In this paper we 86 | offer a systematic, and executable, framework for developing and comparing build 87 | systems, viewing them as related points in a landscape rather than as isolated 88 | phenomena. By teasing apart existing build systems, we can recombine their 89 | components, allowing us to prototype new build systems with desired properties. 90 | \end{abstract} 91 | 92 | \input{1-intro} 93 | \input{2-background} 94 | \input{3-abstractions} 95 | \input{4-schedulers} 96 | \input{5-rebuilders} 97 | \input{6-implementations} 98 | \input{7-experience} 99 | \input{8-engineering} 100 | \input{9-related} 101 | \input{10-conclusions} 102 | 103 | \bibliography{refs} 104 | 105 | \appendix 106 | % \input{11-appendix} 107 | 108 | \end{document} 109 | -------------------------------------------------------------------------------- /papers/jfp/mathptmx.sty: -------------------------------------------------------------------------------- 1 | \def\filedate{1998/06/30} 2 | \NeedsTeXFormat{LaTeX2e} 3 | \ProvidesPackage{mathptmx}[\filedate\space 4 | Times + math package from fontinst] 5 | \def\rmdefault{ptm} 6 | \DeclareSymbolFont{operators} {OT1}{ztmcm}{m}{n} 7 | \DeclareSymbolFont{letters} {OML}{ztmcm}{m}{it} 8 | \DeclareSymbolFont{symbols} {OMS}{ztmcm}{m}{n} 9 | \DeclareSymbolFont{largesymbols}{OMX}{ztmcm}{m}{n} 10 | \DeclareSymbolFont{bold} {OT1}{ptm}{bx}{n} 11 | \DeclareSymbolFont{italic} {OT1}{ptm}{m}{it} 12 | \@ifundefined{mathbf}{}{\DeclareMathAlphabet{\mathbf}{OT1}{ptm}{bx}{n}} 13 | \@ifundefined{mathit}{}{\DeclareMathAlphabet{\mathit}{OT1}{ptm}{m}{it}} 14 | \DeclareMathSymbol{\omicron}{0}{operators}{`\o} 15 | \thinmuskip=2mu 16 | \medmuskip=2.5mu plus 1mu minus 1mu 17 | \thickmuskip=4mu plus 1.5mu minus 1mu 18 | \let\@tempa\version@elt 19 | \def\version@elt#1{% 20 | \ifx\mv@bold#1\else\noexpand\version@elt\noexpand#1\fi} 21 | \edef\version@list{\version@list} 22 | \let\version@elt\@tempa 23 | \let\mv@bold\@undefined 24 | \def\boldmath{% 25 | \PackageWarning{there is no bold Symbol font}% 26 | \global\let\boldmath=\relax 27 | } 28 | \DeclareMathSizes{5}{5}{5}{5} 29 | \DeclareMathSizes{6}{6}{5}{5} 30 | \DeclareMathSizes{7}{7}{5}{5} 31 | \DeclareMathSizes{8}{8}{6}{5} 32 | \DeclareMathSizes{9}{9}{7}{5} 33 | \DeclareMathSizes{10}{10}{7.4}{6} 34 | \DeclareMathSizes{10.95}{10.95}{8}{6} 35 | \DeclareMathSizes{12}{12}{9}{7} 36 | \DeclareMathSizes{14.4}{14.4}{10.95}{8} 37 | \DeclareMathSizes{17.28}{17.28}{12}{10} 38 | \DeclareMathSizes{20.74}{20.74}{14.4}{12} 39 | \DeclareMathSizes{24.88}{24.88}{17.28}{14.4} 40 | \endinput 41 | %% 42 | %% End of file `mathptmx.sty'. -------------------------------------------------------------------------------- /papers/jfp/todo.md: -------------------------------------------------------------------------------- 1 | # Build Systems à la Carte: Theory and Practice 2 | 3 | Extended version of the ICFP 2018 paper "Build Systems à la Carte" to be 4 | submitted to JFP. 5 | 6 | ## New material 7 | 8 | ### Tasks (Section 3) 9 | 10 | * **Done**: Section 3.8 Examples of Tasks. (A section with examples of 11 | functorial, applicative and monadic tasks based on Collatz, Fibonacci and 12 | Ackermann sequences: https://blogs.ncl.ac.uk/andreymokhov/the-task-abstraction/) 13 | 14 | * Maybe combine with the `Edit Distance` example from S7.3 Memoization? 15 | **Andrey**: Decided to keep the Memoization and the corresponding example in Related Work. 16 | 17 | ### Traces and schedulers (Section 4) 18 | 19 | **Done** 20 | 21 | Promote 4.2/4.3 (traces) to top level section Rebuilders. Elaborate (it's very 22 | terse right now) with examples. 23 | 24 | Similar Schedulers (current 4.1) 25 | 26 | Move 4.4 into the start of 5 (build systems concretely). 27 | 28 | * Add Shake's step traces 29 | * Optimisation 30 | * Correctness 31 | 32 | Explain how to "add constructiveness" to a trace system. Maybe even concretely: 33 | `constructive :: TraceSystem -> TraceSystem`. Maybe collapse verifying traces 34 | and constructive traces, at least in table 2. 35 | (This would have impact on the structure of the whole trace section.) 36 | 37 | ### Implementation 38 | 39 | Add implementation of `dctRebuilder`. 40 | 41 | ### Cloud Shake 42 | 43 | **Done** 44 | 45 | * Implemented and released 46 | * Absolute paths and system binaries 47 | * Deferred materialisation. Different invariants for (a) local builds, (b) cloud 48 | sharing (you must list all the things you produce), (c) sharing + deferred 49 | materialisation (you must declare all the things you consume). Interaction 50 | with early cut-off. 51 | 52 | ### Cloud Hadrian 53 | 54 | **Done** 55 | 56 | * Real-life example 57 | * Many examples demonstrating the use of dynamic dependencies 58 | 59 | Experience 60 | * Cloud stuff needed tracing infrastructure to expose dependencies. 61 | * Good profiling support (here are pictures) exposes places where we are 62 | over-sequential, and over-big tasks (build systems work best with lots of 63 | small tasks). 64 | * Ability to compute critical paths 65 | * Numbers: no-op rebuild time; fresh clone rebuild time; (vs clean build). 66 | 67 | ### Related work 68 | 69 | Relationship to Shake paper. Fully subsumes it, and explains it much better. 70 | Oracles no longer important. 71 | 72 | ### Engineering section 73 | 74 | * **Done**: Tasks with multiple outputs: 75 | https://github.com/snowleopard/build/blob/master/src/Build/Multi.hs 76 | 77 | * **Done**: Typed tasks: 78 | https://github.com/snowleopard/build/blob/master/src/Build/Task/Typed.hs 79 | 80 | * **Done**: Handling failures, see last section here: 81 | https://blogs.ncl.ac.uk/andreymokhov/the-task-abstraction/ 82 | 83 | * **Done**: Self-tracking (?): 84 | https://github.com/snowleopard/build/blob/master/src/Build/SelfTracking.hs 85 | 86 | * Re-using existing infrastructure: (a) key-value store, (b) remote execution service. 87 | 88 | 89 | ## Feedback 90 | 91 | ### Ulan Degenbaev 92 | 93 | How do I do configuration in this framework? Real-life example from Chromium: 94 | 95 | https://cs.chromium.org/chromium/src/build/config/v8_target_cpu.gni?rcl=c8f117ce2885070675675564dc39be7e92c6853d&l=38 96 | 97 | One could add `Reader conf f` to the `Task` constraints. 98 | 99 | Andrey's response: Yes, here is an example, where `Bool` is a configuration type: 100 | 101 | ```haskell 102 | staticIF :: Bool -> Task Applicative String Int 103 | staticIF b fetch "B1" = Just $ if b then fetch "A1" 104 | else (+) <$> fetch "A2" <*> fetch "A3" 105 | staticIF _ _ _ = Nothing 106 | ``` 107 | 108 | This can be generalised to a `Reader`. 109 | 110 | *A much later comment*: Or maybe this is a use case for selective functors? 111 | 112 | ### Russell O'Connor 113 | 114 | **Done** 115 | 116 | https://www.reddit.com/r/haskell/comments/9l2shn/video_build_systems_%C3%A0_la_carte_by_simon_peyton/e750mwb/ 117 | 118 | The type `Task c k v` is exactly the type of free `c` generated by the 119 | `PStore k v` functor, at `v` where `PStore` is the parametrized store comonad, 120 | 121 | ```haskell 122 | data PStore i j x = PStore i (j -> x) deriving Functor 123 | ``` 124 | 125 | See: "A Representation Theorem for Second-Order Functionals" 126 | https://arxiv.org/abs/1402.1699 127 | 128 | So `Task Applicative k v` is `FreeApplicative (PStore k v) v`. `Task Monad k v` 129 | is `FreeMonad (PStore k v) v`, which a the type of Van Laarhoven free monad, 130 | which is in turn just a different representation of the same class of free 131 | monads that are used in data types à la carte, which I presumed is where the 132 | "à la carte" in the paper's title comes from, though it seems Swierstra isn't 133 | cited so I guess not. 134 | 135 | Edit: In particular, this means that `Task Monad k v` is a type of `v`-branching 136 | trees with internal nodes annotated with values of type `k` and whose leaves 137 | hold `v` values. i.e. 138 | 139 | ```haskell 140 | data TaskMonad k v = Branch k (v -> TaskMonad k v) | Leaf v 141 | ``` 142 | 143 | which is a bit of a peculiar type. I suppose I should study the paper more. 144 | 145 | ### Karl Palmskog 146 | 147 | As I mentioned, there is previous work which formalizes a specific build 148 | language ("CloudMake") in the Dafny verification environment. Dafny is 149 | somewhat similar to Coq, but does not have explicit proofs and much more 150 | built-in automation. Dafny also has a small imperative language, which 151 | is used for CloudMake. 152 | 153 | - Dafny tool: https://github.com/Microsoft/dafny 154 | - Dafny source files for CloudMake (part of Dafny test suite): 155 | https://github.com/Microsoft/dafny/tree/master/Test/cloudmake 156 | - The CloudMake formalization paper (subset of pure JavaScript): 157 | https://link.springer.com/chapter/10.1007/978-3-319-06410-9_43 158 | - Paper on practical migration of build scripts to CloudMake: 159 | https://dl.acm.org/citation.cfm?id=2660239 160 | 161 | ## Random thoughts 162 | 163 | ### Categorisation of tasks, or what if effect `f` is computation? 164 | 165 | Compute Functor = tail recursion 166 | Compute Applicative = primitive recursion 167 | Compute Monad = general recursion 168 | 169 | **Andrey:** Not sure now we should go into this. 170 | 171 | ### ApplicativeZero and MonadZero 172 | 173 | These allow to introduce only failures into build systems. 174 | 175 | As an interesting build system feature, one might have a build system that 176 | performs a retry when a compute fails -- analogous to travis_retry. I found 177 | an example of similar functionality in Bazel: 178 | 179 | --flaky_test_attempts= multiple uses are 181 | accumulated 182 | 183 | Each test will be retried up to the specified number of times in case of any 184 | test failure. Tests that required more than one attempt to pass would be marked 185 | as 'FLAKY' in the test summary. If this option is set, it should specify an int 186 | N or the string 'default'. If it's an int, then all tests will be run up to N 187 | times. If it is not specified or its value is ' default', then only a single 188 | test attempt will be made for regular tests and three for tests marked 189 | explicitly as flaky by their rule (flaky=1 attribute). 190 | 191 | See https://docs.bazel.build/versions/master/command-line-reference.html 192 | 193 | ### MonadZero, MonadPlus, MonadOr 194 | 195 | Zero -- for failure. 196 | Or -- for choosing the first success. 197 | Plus -- for picking any success, non-deterministically? 198 | 199 | See https://wiki.haskell.org/MonadPlus_reform_proposal 200 | 201 | Alternative seems to be similar to MonadOr in that it chooses the first one? 202 | -------------------------------------------------------------------------------- /src/Build.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE ImpredicativeTypes, ConstraintKinds #-} 2 | 3 | -- | Build systems and the properties they should ensure. 4 | module Build ( 5 | -- * Build 6 | Build, 7 | 8 | -- * Properties 9 | correctBuild 10 | ) where 11 | 12 | import Build.Task 13 | import Build.Task.Monad 14 | import Build.Store 15 | import Build.Utilities 16 | 17 | -- | A build system takes a description of 'Tasks', a target key, and a store, 18 | -- and computes a new store, where the key and its dependencies are up to date. 19 | type Build c i k v = Tasks c k v -> k -> Store i k v -> Store i k v 20 | 21 | -- | Given a description of @tasks@, an initial @store@, and a @result@ produced 22 | -- by running a build system on a target @key@, this function returns 'True' if 23 | -- the @result@ is a correct build outcome. Specifically: 24 | -- * @result@ and @store@ must agree on the values of all inputs. In other words, 25 | -- no inputs were corrupted during the build. 26 | -- * @result@ is /consistent/ with the @tasks@, i.e. for every non-input key, 27 | -- the result of recomputing its task matches the value stored in the @result@. 28 | correctBuild :: (Ord k, Eq v) => Tasks Monad k v -> Store i k v -> Store i k v -> k -> Bool 29 | correctBuild tasks store result = all correct . reachable deps 30 | where 31 | deps = maybe [] (\task -> snd $ trackPure task (`getValue` result)) . tasks 32 | correct k = case tasks k of 33 | Nothing -> getValue k result == getValue k store 34 | Just task -> getValue k result == compute task result 35 | -------------------------------------------------------------------------------- /src/Build/Multi.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE ImpredicativeTypes #-} 2 | 3 | -- | Support for multiple-output tasks. 4 | module Build.Multi (Partition, multi) where 5 | 6 | import Data.Maybe 7 | import Build.Task 8 | 9 | -- | Defines a set partition. For a function to be a valid partition, 10 | -- if @f k == ks@, then: 11 | -- 12 | -- * @k \in ks@ 13 | -- 14 | -- * @forall i \in ks . f i == ks@ 15 | type Partition k = k -> [k] 16 | 17 | -- | Given a task description with individual multiple-output keys, compute its 18 | -- "closure" supporting all possible combinations of keys. 19 | multi :: Eq k => Partition k -> Tasks Applicative [k] [v] -> Tasks Applicative [k] [v] 20 | multi partition tasks keys 21 | | k:_ <- keys, partition k == keys = tasks keys 22 | | otherwise = Just $ \fetch -> 23 | sequenceA [ select k <$> fetch (partition k) | k <- keys ] 24 | where 25 | select k = fromMaybe (error msg) . lookup k . zip (partition k) 26 | msg = "Partition invariants violated" 27 | -------------------------------------------------------------------------------- /src/Build/Rebuilder.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE ConstraintKinds, KindSignatures, ImpredicativeTypes, FlexibleContexts #-} 2 | 3 | -- | Rebuilders take care of deciding whether a key needs to be rebuild and 4 | -- running the corresponding task if need be. 5 | module Build.Rebuilder ( 6 | Rebuilder, perpetualRebuilder, 7 | modTimeRebuilder, Time, MakeInfo, 8 | dirtyBitRebuilder, dirtyBitRebuilderWithCleanUp, 9 | approximateRebuilder, ApproximateDependencies, ApproximationInfo, 10 | vtRebuilder, stRebuilder, ctRebuilder, dctRebuilder 11 | ) where 12 | 13 | import Control.Monad 14 | import Control.Monad.State 15 | import Data.Map (Map) 16 | import Data.Set (Set) 17 | 18 | import qualified Data.Map as Map 19 | import qualified Data.Set as Set 20 | 21 | import Build.Store 22 | import Build.Task 23 | import Build.Task.Applicative 24 | import Build.Task.Monad 25 | import Build.Trace 26 | 27 | -- | Given a key-value pair and the corresponding task, a rebuilder returns a 28 | -- new task that has access to the build information and can use it to skip 29 | -- rebuilding a key if it is up to date. 30 | type Rebuilder c i k v = k -> v -> Task c k v -> Task (MonadState i) k v 31 | 32 | -- | Always rebuilds the key. 33 | perpetualRebuilder :: Rebuilder Monad i k v 34 | perpetualRebuilder _key _value task = task 35 | 36 | ------------------------------------- Make ------------------------------------- 37 | type Time = Integer 38 | type MakeInfo k = (Time, Map k Time) 39 | 40 | -- | This rebuilder uses modification time to decide whether a key is dirty and 41 | -- needs to be rebuilt. Used by Make. 42 | modTimeRebuilder :: Ord k => Rebuilder Applicative (MakeInfo k) k v 43 | modTimeRebuilder key value task fetch = do 44 | (now, modTimes) <- get 45 | let dirty = case Map.lookup key modTimes of 46 | Nothing -> True 47 | time -> any (\d -> Map.lookup d modTimes > time) (dependencies task) 48 | if not dirty 49 | then return value 50 | else do 51 | put (now + 1, Map.insert key now modTimes) 52 | task fetch 53 | 54 | ----------------------------------- Dirty bit ---------------------------------- 55 | -- | If the key is dirty, rebuild it. Used by Excel. 56 | dirtyBitRebuilder :: Rebuilder Monad (k -> Bool) k v 57 | dirtyBitRebuilder key value task fetch = do 58 | isDirty <- get 59 | if isDirty key then task fetch else return value 60 | 61 | -- | If the key is dirty, rebuild it and clear the dirty bit. Used by Excel. 62 | dirtyBitRebuilderWithCleanUp :: Ord k => Rebuilder Monad (Set k) k v 63 | dirtyBitRebuilderWithCleanUp key value task fetch = do 64 | isDirty <- get 65 | if key `Set.notMember` isDirty then return value else do 66 | put (Set.delete key isDirty) 67 | task fetch 68 | 69 | --------------------------- Approximate dependencies --------------------------- 70 | -- | If there is an entry for a key, it is an conservative approximation of its 71 | -- dependencies. Otherwise, we have no reasonable approximation and assume the 72 | -- key is always dirty (e.g. it uses an INDIRECT reference). 73 | type ApproximateDependencies k = Map k [k] 74 | 75 | -- | A set of dirty keys and information about dependencies. 76 | type ApproximationInfo k = (Set k, ApproximateDependencies k) 77 | 78 | -- | This rebuilders uses approximate dependencies to decide whether a key 79 | -- needs to be rebuilt. 80 | approximateRebuilder :: (Ord k, Eq v) => Rebuilder Monad (ApproximationInfo k) k v 81 | approximateRebuilder key value task fetch = do 82 | (dirtyKeys, deps) <- get 83 | let dirty = key `Set.member` dirtyKeys || 84 | case Map.lookup key deps of Nothing -> True 85 | Just ks -> any (`Set.member` dirtyKeys) ks 86 | if not dirty 87 | then return value 88 | else do 89 | newValue <- task fetch 90 | when (value /= newValue) $ put (Set.insert key dirtyKeys, deps) 91 | return newValue 92 | 93 | ------------------------------- Verifying traces ------------------------------- 94 | -- | This rebuilder relies on verifying traces. 95 | vtRebuilder :: (Eq k, Hashable v) => Rebuilder Monad (VT k v) k v 96 | vtRebuilder key value task fetch = do 97 | upToDate <- verifyVT key (hash value) (fmap hash . fetch) =<< get 98 | if upToDate 99 | then return value 100 | else do 101 | (newValue, deps) <- track task fetch 102 | modify $ recordVT key (hash newValue) [ (k, hash v) | (k, v) <- deps ] 103 | return newValue 104 | 105 | ------------------------------ Constructive traces ----------------------------- 106 | -- | This rebuilder relies on constructive traces. 107 | ctRebuilder :: (Eq k, Hashable v) => Rebuilder Monad (CT k v) k v 108 | ctRebuilder key value task fetch = do 109 | cachedValues <- constructCT key (fmap hash . fetch) =<< get 110 | if value `elem` cachedValues 111 | then return value -- The current value has been verified, let's keep it 112 | else case cachedValues of 113 | (cachedValue:_) -> return cachedValue -- Any cached value will do 114 | _ -> do -- No cached values, need to run the task 115 | (newValue, deps) <- track task fetch 116 | modify $ recordCT key newValue [ (k, hash v) | (k, v) <- deps ] 117 | return newValue 118 | 119 | --------------------------- Deep constructive traces --------------------------- 120 | -- | This rebuilder relies on deep constructive traces. 121 | dctRebuilder :: (Eq k, Hashable v) => Rebuilder Monad (DCT k v) k v 122 | dctRebuilder key value task fetch = do 123 | cachedValues <- constructDCT key (fmap hash . fetch) =<< get 124 | if value `elem` cachedValues 125 | then return value -- The current value has been verified, let's keep it 126 | else case cachedValues of 127 | (cachedValue:_) -> return cachedValue -- Any cached value will do 128 | _ -> do -- No cached values, need to run the task 129 | (newValue, deps) <- track task fetch 130 | put =<< recordDCT key newValue (map fst deps) (fmap hash . fetch) =<< get 131 | return newValue 132 | 133 | ------------------------------- Version traces ------------------------------- 134 | -- | This rebuilder relies on version/step traces. 135 | stRebuilder :: (Eq k, Hashable v) => Rebuilder Monad (Step, ST k v) k v 136 | stRebuilder key value task fetch = do 137 | upToDate <- verifyST key value (void . fetch) (gets snd) 138 | if upToDate 139 | then return value 140 | else do 141 | (newValue, deps) <- track task fetch 142 | modify $ \(step, st) -> (step, recordST step key newValue (map fst deps) st) 143 | return newValue 144 | -------------------------------------------------------------------------------- /src/Build/Scheduler.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE ImpredicativeTypes, FlexibleContexts, ScopedTypeVariables, TupleSections #-} 2 | {-# LANGUAGE FlexibleInstances, ConstraintKinds, GeneralizedNewtypeDeriving, MultiParamTypeClasses #-} 3 | 4 | -- | Build schedulers execute task rebuilders in the right order. 5 | module Build.Scheduler ( 6 | Scheduler, 7 | topological, 8 | restarting, Chain, 9 | restarting2, 10 | suspending, 11 | independent 12 | ) where 13 | 14 | import Control.Monad.State 15 | import Control.Monad.Trans.Except 16 | import Data.Bifunctor 17 | import Data.Set (Set) 18 | 19 | import Build 20 | import Build.Task 21 | import Build.Task.Applicative 22 | import Build.Task.Monad 23 | import Build.Trace 24 | import Build.Store 25 | import Build.Rebuilder 26 | import Build.Utilities 27 | 28 | import qualified Data.Set as Set 29 | 30 | type Scheduler c i j k v = Rebuilder c j k v -> Build c i k v 31 | 32 | -- | Lift a computation operating on @i@ to @Store i k v@. 33 | liftStore :: State i a -> State (Store i k v) a 34 | liftStore x = do 35 | (a, newInfo) <- gets (runState x . getInfo) 36 | modify (putInfo newInfo) 37 | return a 38 | 39 | -- | Lift a computation operating on @Store i k v@ to @Store (i, j) k v@. 40 | liftInfo :: State (Store i k v) a -> State (Store (i, j) k v) a 41 | liftInfo x = do 42 | store <- get 43 | let (a, newStore) = runState x (mapInfo fst store) 44 | put $ mapInfo (, snd $ getInfo store) newStore 45 | return a 46 | 47 | -- | Update the value of a key in the store. The function takes both the current 48 | -- value (the first argument of type @v@) and the new value (the second argument 49 | -- of type @v@), and can potentially avoid touching the store if the value is 50 | -- unchanged. The current implementation simply ignores the current value, but 51 | -- in future this may be optimised, e.g. by comparing their hashes. 52 | updateValue :: Eq k => k -> v -> v -> Store i k v -> Store i k v 53 | updateValue key _current_value = putValue key 54 | 55 | ---------------------------------- Topological --------------------------------- 56 | -- | This scheduler constructs the dependency graph of the target key by 57 | -- extracting all (static) dependencies upfront, and then traversing the graph 58 | -- in the topological order, rebuilding keys using the supplied rebuilder. 59 | topological :: forall i k v. Ord k => Scheduler Applicative i i k v 60 | topological rebuilder tasks target = execState $ mapM_ build order 61 | where 62 | build :: k -> State (Store i k v) () 63 | build key = case tasks key of 64 | Nothing -> return () 65 | Just task -> do 66 | store <- get 67 | let value = getValue key store 68 | newTask :: Task (MonadState i) k v 69 | newTask = rebuilder key value task 70 | fetch :: k -> State i v 71 | fetch k = return (getValue k store) 72 | newValue <- liftStore (newTask fetch) 73 | modify $ updateValue key value newValue 74 | order = case topSort (graph deps target) of 75 | Nothing -> error "Cannot build tasks with cyclic dependencies" 76 | Just xs -> xs 77 | deps k = maybe [] dependencies (tasks k) 78 | 79 | ---------------------------------- Restarting ---------------------------------- 80 | -- | Convert a task with a total lookup function @k -> m v@ into a task 81 | -- with a lookup function that can throw exceptions @k -> m (Either e v)@. This 82 | -- essentially lifts the task from the type of values @v@ to @Either e v@, 83 | -- where the result @Left e@ indicates that the task failed, e.g. because of a 84 | -- failed dependency lookup, and @Right v@ yields the value otherwise. 85 | try :: Task (MonadState i) k v -> Task (MonadState i) k (Either e v) 86 | try task fetch = runExceptT $ task (ExceptT . fetch) 87 | 88 | -- | The so-called @calculation chain@: the order in which keys were built 89 | -- during the previous build, which is used as the best guess for the current 90 | -- build by Excel and other similar build systems. 91 | type Chain k = [k] 92 | 93 | -- | A model of the scheduler used by Excel, which builds keys in the order used 94 | -- in the previous build. If a key cannot be build because its dependencies have 95 | -- changed and a new dependency is still dirty, the corresponding build task is 96 | -- abandoned and the key is moved at the end of the calculation chain, so it can 97 | -- be restarted when all its dependencies are up to date. 98 | restarting :: forall ir k v. Ord k => Scheduler Monad (ir, Chain k) ir k v 99 | restarting rebuilder tasks target = execState $ do 100 | chain <- gets (snd . getInfo) 101 | newChain <- liftInfo $ go Set.empty $ chain ++ [target | target `notElem` chain] 102 | modify . mapInfo $ \(ir, _) -> (ir, newChain) 103 | where 104 | go :: Set k -> Chain k -> State (Store ir k v) (Chain k) 105 | go _ [] = return [] 106 | go done (key:ks) = case tasks key of 107 | Nothing -> (key :) <$> go (Set.insert key done) ks 108 | Just task -> do 109 | store <- get 110 | let value = getValue key store 111 | newTask :: Task (MonadState ir) k (Either k v) 112 | newTask = try $ rebuilder key value task 113 | fetch :: k -> State ir (Either k v) 114 | fetch k | k `Set.member` done = return $ Right (getValue k store) 115 | | otherwise = return $ Left k 116 | result <- liftStore (newTask fetch) 117 | case result of 118 | Left dep -> go done $ dep : filter (/= dep) ks ++ [key] 119 | Right newValue -> do 120 | modify $ updateValue key value newValue 121 | (key :) <$> go (Set.insert key done) ks 122 | 123 | -- | An item in the queue comprises a key that needs to be built and a list of 124 | -- keys that are blocked on it. More efficient implementations are possible, 125 | -- e.g. storing blocked keys in a @Map k [k]@ would allow faster queue updates. 126 | type Queue k = [(k, [k])] 127 | 128 | -- | Add a key with a list of blocked keys to the queue. If the key is already 129 | -- in the queue, extend its list of blocked keys. 130 | enqueue :: Eq k => k -> [k] -> Queue k -> Queue k 131 | enqueue key blocked [] = [(key, blocked)] 132 | enqueue key blocked ((k, bs):q) 133 | | k == key = (k, blocked ++ bs) : q 134 | | otherwise = (k, bs) : enqueue key blocked q 135 | 136 | -- | Extract a key and a list of blocked keys from the queue, or return 137 | -- @Nothing@ if the queue is empty. 138 | dequeue :: Queue k -> Maybe (k, [k], Queue k) 139 | dequeue [] = Nothing 140 | dequeue ((k, bs):q) = Just (k, bs, q) 141 | 142 | -- | A model of the scheduler used by Bazel. We extract a key K from the queue 143 | -- and try to build it. There are now two cases: 144 | -- 1. The build fails because one of the dependencies of K is dirty. In this 145 | -- case we add the dirty dependency to the queue, listing K as blocked by it. 146 | -- 2. The build succeeds, in which case we add all keys that were previously 147 | -- blocked by K to the queue. 148 | restarting2 :: forall k v. (Hashable v, Eq k) => Scheduler Monad (CT k v) (CT k v) k v 149 | restarting2 rebuilder tasks target = execState $ go (enqueue target [] mempty) 150 | where 151 | go :: Queue k -> State (Store (CT k v) k v) () 152 | go queue = case dequeue queue of 153 | Nothing -> return () 154 | Just (key, bs, q) -> case tasks key of 155 | Nothing -> return () -- Never happens: we have no inputs in the queue 156 | Just task -> do 157 | store <- get 158 | let value = getValue key store 159 | upToDate k = isInput tasks k || not (isDirtyCT k store) 160 | newTask :: Task (MonadState (CT k v)) k (Either k v) 161 | newTask = try $ rebuilder key value task 162 | fetch :: k -> State (CT k v) (Either k v) 163 | fetch k | upToDate k = return (Right (getValue k store)) 164 | | otherwise = return (Left k) 165 | result <- liftStore (newTask fetch) 166 | case result of 167 | Left dep -> go (enqueue dep (key:bs) q) 168 | Right newValue -> do 169 | modify $ updateValue key value newValue 170 | go (foldr (`enqueue` []) q bs) 171 | 172 | ---------------------------------- Suspending ---------------------------------- 173 | -- | This scheduler builds keys recursively: to build a key it executes the 174 | -- associated task, discovering its dependencies on the fly, and if one of the 175 | -- dependencies is dirty, the task is suspended until the dependency is rebuilt. 176 | -- It stores the set of keys that have already been built as part of the state 177 | -- to avoid executing the same task twice. 178 | suspending :: forall i k v. Ord k => Scheduler Monad i i k v 179 | suspending rebuilder tasks target store = fst $ execState (fetch target) (store, Set.empty) 180 | where 181 | fetch :: k -> State (Store i k v, Set k) v 182 | fetch key = do 183 | done <- gets snd 184 | case tasks key of 185 | Just task | key `Set.notMember` done -> do 186 | value <- gets (getValue key . fst) 187 | let newTask :: Task (MonadState i) k v 188 | newTask = rebuilder key value task 189 | newValue <- liftRun newTask fetch 190 | modify $ bimap (updateValue key value newValue) (Set.insert key) 191 | return newValue 192 | _ -> gets (getValue key . fst) -- fetch the existing value 193 | 194 | -- | Run a @Task (MonadState i)@ using a fetch callback operating on a larger 195 | -- state that contains a @Store i k v@ plus some @extra@ information. 196 | liftRun :: Task (MonadState i) k v 197 | -> (k -> State (Store i k v, extra) v) -> State (Store i k v, extra) v 198 | liftRun t f = unwrap $ t (Wrap . f) 199 | 200 | newtype Wrap i extra k v a = Wrap { unwrap :: State (Store i k v, extra) a } 201 | deriving (Functor, Applicative, Monad) 202 | 203 | instance MonadState i (Wrap i extra k v) where 204 | get = Wrap $ gets (getInfo . fst) 205 | put i = Wrap $ modify $ first (putInfo i) 206 | 207 | -- | An incorrect scheduler that builds the target key without respecting its 208 | -- dependencies. It produces the correct result only if all dependencies of the 209 | -- target key are up to date. 210 | independent :: forall i k v. Eq k => Scheduler Monad i i k v 211 | independent rebuilder tasks target store = case tasks target of 212 | Nothing -> store 213 | Just task -> 214 | let value = getValue target store 215 | newTask = rebuilder target value task 216 | fetch :: k -> State i v 217 | fetch k = return (getValue k store) 218 | (newValue, newInfo) = runState (newTask fetch) (getInfo store) 219 | in putInfo newInfo $ updateValue target value newValue store 220 | -------------------------------------------------------------------------------- /src/Build/SelfTracking.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE ScopedTypeVariables, ImpredicativeTypes #-} 2 | 3 | -- | This module defines two different strategies of self-tracking, based 4 | -- around the idea of storing task descriptions that can be parsed into a 'Task'. 5 | -- 6 | -- * For 'Monad' it works out beautifully. You just store the rule on the disk, 7 | -- and depend on it. 8 | -- 9 | -- * For 'Applicative', we generate a fresh 'Task' each time, but have that 10 | -- 'Task' depend on a fake version of the rules. This is a change in the 'Task', 11 | -- but it's one for which the standard implementations tend to cope with just 12 | -- fine. Most applicative systems with self-tracking probably do it this way. 13 | module Build.SelfTracking ( 14 | Key (..), Value (..), selfTrackingM, selfTrackingA 15 | ) where 16 | 17 | import Build.Task 18 | 19 | -- | We assume that the fetch passed to a Task is consistent and returns values 20 | -- matching the keys. It is possible to switch to typed tasks to check this 21 | -- assumption at compile time, e.g. see "Build.Task.Typed". 22 | data Key k = Key k | KeyTask k 23 | data Value v t = Value v | ValueTask t 24 | 25 | -- | Fetch a value. 26 | fetchValue :: Functor f => (Key k -> f (Value v t)) -> k -> f v 27 | fetchValue fetch key = extract <$> fetch (Key key) 28 | where 29 | extract (Value v) = v 30 | extract _ = error "Inconsistent fetch" 31 | 32 | -- | Fetch a task description. 33 | fetchValueTask :: Functor f => (Key k -> f (Value v t)) -> k -> f t 34 | fetchValueTask fetch key = extract <$> fetch (KeyTask key) 35 | where 36 | extract (ValueTask t) = t 37 | extract _ = error "Inconsistent fetch" 38 | 39 | -- | A model for 'Monad', works beautifully and allows storing the key on disk. 40 | selfTrackingM :: forall k v t. (t -> Task Monad k v) -> Tasks Monad k t -> Tasks Monad (Key k) (Value v t) 41 | selfTrackingM _ _ (KeyTask _) = Nothing -- Task keys are inputs 42 | selfTrackingM taskParser tasks (Key k) = runTask <$> tasks k 43 | where 44 | -- Fetch the task description, parse it, and then run the obtained task 45 | runTask :: Task Monad k t -> Task Monad (Key k) (Value v t) 46 | runTask task fetch = do 47 | task <- task (fetchValueTask fetch) 48 | Value <$> taskParser task (fetchValue fetch) 49 | 50 | -- | The applicative model requires every key to be able to associate with its 51 | -- environment (e.g. a reader somewhere). Does not support cutoff if a key changes. 52 | selfTrackingA :: (t -> Task Applicative k v) -> (k -> t) -> Tasks Applicative (Key k) (Value v t) 53 | selfTrackingA _ _ (KeyTask _) = Nothing -- Task keys are inputs 54 | selfTrackingA parser ask (Key k) = Just $ \fetch -> 55 | fetch (KeyTask k) *> (Value <$> (parser $ ask k) (fetchValue fetch)) 56 | -------------------------------------------------------------------------------- /src/Build/SelfTracking/Typed.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE GADTs, ImpredicativeTypes, ConstraintKinds, ScopedTypeVariables #-} 2 | 3 | module Build.SelfTracking.Typed ( 4 | Fetch, TaskT (..), TasksT, Key (..), selfTracking 5 | ) where 6 | 7 | import Build.Task 8 | 9 | type Fetch k f = forall v. k v -> f v 10 | 11 | newtype TaskT c k v = TaskT { runT :: forall f. c f => Fetch k f -> f v } 12 | 13 | type TasksT c k = forall v. k v -> Maybe (TaskT c k v) 14 | 15 | -- | The type variable @s@ stands for "scripts" written in some task description 16 | -- language. 17 | data Key k v s a where 18 | Script :: k -> Key k v s s -- Keys for build scripts 19 | Value :: k -> Key k v s v -- Keys for all other values 20 | 21 | selfTracking :: forall k v s. (s -> Task Monad k v) -> Tasks Monad k s -> TasksT Monad (Key k v s) 22 | selfTracking parse tasks key = case key of 23 | Script k -> getScript <$> tasks k 24 | Value k -> runScript <$> tasks k 25 | where 26 | -- Get the task for building the script 27 | getScript :: Task Monad k s -> TaskT Monad (Key k v s) s 28 | getScript task = TaskT $ \fetch -> task (fetch . Script) 29 | -- Build the script, parse it, and then run the obtained task 30 | runScript :: Task Monad k s -> TaskT Monad (Key k v s) v 31 | runScript task = TaskT $ \fetch -> do 32 | script <- task (fetch . Script) 33 | parse script (fetch . Value) 34 | -------------------------------------------------------------------------------- /src/Build/Store.hs: -------------------------------------------------------------------------------- 1 | -- | An abstract key/value store. 2 | module Build.Store ( 3 | -- * Hashing 4 | Hash, Hashable (..), 5 | 6 | -- * Store 7 | Store, getValue, putValue, getHash, getInfo, putInfo, mapInfo, 8 | initialise 9 | ) where 10 | 11 | -- | A 'Hash' is used for efficient tracking and sharing of build results. We 12 | -- use @newtype Hash a = Hash a@ for prototyping. 13 | newtype Hash a = Hash a deriving (Eq, Ord,Show) 14 | 15 | instance Functor Hash where 16 | fmap f (Hash a) = Hash (f a) 17 | 18 | instance Applicative Hash where 19 | pure = Hash 20 | Hash f <*> Hash a = Hash (f a) 21 | 22 | class Ord a => Hashable a where 23 | -- | Compute the hash of a given value. We typically assume cryptographic 24 | -- hashing, e.g. SHA256. 25 | hash :: a -> Hash a 26 | 27 | instance Hashable Int where 28 | hash = Hash 29 | 30 | instance Hashable Integer where 31 | hash = Hash 32 | 33 | instance Hashable a => Hashable [a] where 34 | hash = Hash 35 | 36 | instance Hashable a => Hashable (Hash a) where 37 | hash = Hash 38 | 39 | instance (Hashable a, Hashable b) => Hashable (a, b) where 40 | hash = Hash 41 | 42 | -- | An abstract datatype for a key/value store with build information of type @i@. 43 | data Store i k v = Store { info :: i, values :: k -> v } 44 | 45 | -- | Read the build information. 46 | getInfo :: Store i k v -> i 47 | getInfo = info 48 | 49 | -- | Read the value of a key. 50 | getValue :: k -> Store i k v -> v 51 | getValue = flip values 52 | 53 | -- | Read the hash of a key's value. In some cases may be implemented more 54 | -- efficiently than @hash . getValue k@. 55 | getHash :: Hashable v => k -> Store i k v -> Hash v 56 | getHash k = hash . getValue k 57 | 58 | -- | Write the build information. 59 | putInfo :: i -> Store i k v -> Store i k v 60 | putInfo i s = s { info = i } 61 | 62 | -- | Modify the build information. 63 | mapInfo :: (i -> j) -> Store i k v -> Store j k v 64 | mapInfo f (Store i kv) = Store (f i) kv 65 | 66 | -- | Update the value of a key. 67 | putValue :: Eq k => k -> v -> Store i k v -> Store i k v 68 | putValue k v s = s { values = \key -> if key == k then v else values s key } 69 | 70 | -- | Initialise the store. 71 | initialise :: i -> (k -> v) -> Store i k v 72 | initialise = Store 73 | -------------------------------------------------------------------------------- /src/Build/System.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE ImpredicativeTypes, FlexibleContexts, ScopedTypeVariables #-} 2 | 3 | -- | Models of several build systems. 4 | module Build.System ( 5 | -- * Toy build systems 6 | dumb, busy, memo, 7 | 8 | -- * Applicative build systems 9 | make, ninja, cloudBuild, buck, 10 | 11 | -- * Monadic build systems 12 | excel, shake, cloudShake, bazel, nix 13 | ) where 14 | 15 | import Control.Monad.State 16 | 17 | import Build 18 | import Build.Scheduler 19 | import Build.Store 20 | import Build.Rebuilder 21 | import Build.Trace 22 | 23 | -- | This is not a correct build system: given a target key, it simply rebuilds 24 | -- it, without rebuilding any of its dependencies. 25 | dumb :: Eq k => Build Monad () k v 26 | dumb = independent perpetualRebuilder 27 | 28 | -- | This is a correct but non-minimal build system: given a target key it 29 | -- recursively rebuilds its dependencies, even if they are already up to date. 30 | -- There is no memoisation, therefore a key may be built multiple times. 31 | busy :: forall k v. Eq k => Build Monad () k v 32 | busy tasks key = execState (fetch key) 33 | where 34 | fetch :: k -> State (Store () k v) v 35 | fetch k = case tasks k of 36 | Nothing -> gets (getValue k) 37 | Just task -> do v <- task fetch; modify (putValue k v); return v 38 | 39 | -- | This is a correct but non-minimal build system: it will rebuild keys even 40 | -- if they are up to date. However, it performs memoization, therefore it never 41 | -- builds a key twice. 42 | memo :: Ord k => Build Monad () k v 43 | memo = suspending perpetualRebuilder 44 | 45 | -- | A model of Make: an applicative build system that uses file modification 46 | -- times to check if a key is up to date. 47 | make :: Ord k => Build Applicative (MakeInfo k) k v 48 | make = topological modTimeRebuilder 49 | 50 | -- | A model of Ninja: an applicative build system that uses verifying traces 51 | -- to check if a key is up to date. 52 | ninja :: forall k v. (Ord k, Hashable v) => Build Applicative (VT k v) k v 53 | ninja = topological rebuilder 54 | where 55 | rebuilder :: Rebuilder Applicative (VT k v) k v 56 | rebuilder = vtRebuilder 57 | 58 | -- | Excel stores a dirty bit per key and a calc chain. 59 | type ExcelInfo k = (k -> Bool, Chain k) 60 | 61 | -- | A model of Excel: a monadic build system that stores the calculation chain 62 | -- from the previous build and approximate dependencies. 63 | excel :: Ord k => Build Monad (ExcelInfo k) k v 64 | excel = restarting dirtyBitRebuilder 65 | 66 | -- | A model of Shake: a monadic build system that uses verifying traces to 67 | -- check if a key is up to date. 68 | shake :: (Ord k, Hashable v) => Build Monad (VT k v) k v 69 | shake = suspending vtRebuilder 70 | 71 | -- | A model of Bazel: a monadic build system that uses constructive traces 72 | -- to check if a key is up to date as well as for caching build results. Note 73 | -- that Bazel currently does not allow users to write monadic build rules: only 74 | -- built-in rules have access to dynamic dependencies. 75 | bazel :: (Ord k, Hashable v) => Build Monad (CT k v) k v 76 | bazel = restarting2 ctRebuilder 77 | 78 | -- | A model of Cloud Shake: a monadic build system that uses constructive 79 | -- traces to check if a key is up to date as well as for caching build results. 80 | cloudShake :: (Ord k, Hashable v) => Build Monad (CT k v) k v 81 | cloudShake = suspending ctRebuilder 82 | 83 | -- | A model of CloudBuild: an applicative build system that uses constructive 84 | -- traces to check if a key is up to date as well as for caching build results. 85 | cloudBuild :: forall k v. (Ord k, Hashable v) => Build Applicative (CT k v) k v 86 | cloudBuild = topological rebuilder 87 | where 88 | rebuilder :: Rebuilder Applicative (CT k v) k v 89 | rebuilder = ctRebuilder 90 | 91 | -- | A model of Buck: an applicative build system that uses deep constructive 92 | -- traces to check if a key is up to date as well as for caching build results. 93 | buck :: forall k v. (Ord k, Hashable v) => Build Applicative (DCT k v) k v 94 | buck = topological rebuilder 95 | where 96 | rebuilder :: Rebuilder Applicative (DCT k v) k v 97 | rebuilder = dctRebuilder 98 | 99 | -- | A model of Nix: a monadic build system that uses deep constructive traces 100 | -- to check if a key is up to date as well as for caching build results. 101 | nix :: (Ord k, Hashable v) => Build Monad (DCT k v) k v 102 | nix = suspending dctRebuilder 103 | -------------------------------------------------------------------------------- /src/Build/Task.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE ImpredicativeTypes, ConstraintKinds #-} 2 | 3 | -- | The Task abstractions. 4 | module Build.Task (Task, Tasks, compose) where 5 | 6 | import Control.Applicative 7 | 8 | -- | A 'Task' is used to compute a value of type @v@, by finding the necessary 9 | -- dependencies using the provided @fetch :: k -> f v@ callback. 10 | type Task c k v = forall f. c f => (k -> f v) -> f v 11 | 12 | -- | 'Tasks' associates a 'Task' with every non-input key. @Nothing@ indicates 13 | -- that the key is an input. 14 | type Tasks c k v = k -> Maybe (Task c k v) 15 | 16 | -- | Compose two task descriptions, preferring the first one in case there are 17 | -- two tasks corresponding to the same key. 18 | compose :: Tasks Monad k v -> Tasks Monad k v -> Tasks Monad k v 19 | compose t1 t2 key = t1 key <|> t2 key 20 | -------------------------------------------------------------------------------- /src/Build/Task/Applicative.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE ImpredicativeTypes #-} 2 | 3 | -- | Applicative tasks, as used by Make, Ninja and other applicative build 4 | -- systems. Dependencies of applicative tasks are known statically, before their 5 | -- execution. 6 | module Build.Task.Applicative (dependencies) where 7 | 8 | import Control.Applicative 9 | 10 | import Build.Task 11 | 12 | -- | Find the dependencies of an applicative task. 13 | dependencies :: Task Applicative k v -> [k] 14 | dependencies task = getConst $ task (\k -> Const [k]) 15 | -------------------------------------------------------------------------------- /src/Build/Task/Free.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE ImpredicativeTypes, DeriveFunctor #-} 2 | {-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-} 3 | 4 | -- | The free description of tasks. 5 | module Build.Task.Free ( 6 | Rule (..), toRule, fromRule, Action (..), toAction, fromAction 7 | ) where 8 | 9 | import Build.Task 10 | import Control.Monad 11 | 12 | ------------------------- Isomorphism with Make's Rule ------------------------- 13 | data Rule k v r = Rule [k] ([v] -> r) 14 | deriving Functor 15 | 16 | instance Applicative (Rule k v) where 17 | pure v = Rule [] (\[] -> v) 18 | Rule d1 f1 <*> Rule d2 f2 = Rule (d1++d2) $ \vs -> 19 | let (v1,v2) = splitAt (length d1) vs in f1 v1 $ f2 v2 20 | 21 | getRule :: k -> Rule k v v 22 | getRule k = Rule [k] $ \[v] -> v 23 | 24 | toRule :: Task Applicative k v -> Rule k v v 25 | toRule task = task getRule 26 | 27 | fromRule :: Rule k v v -> Task Applicative k v 28 | fromRule (Rule ds f) fetch = f <$> traverse fetch ds 29 | 30 | ------------------------ Isomorphism with Shake's Action ----------------------- 31 | data Action k v a = Finished a 32 | | Depends k (v -> Action k v a) 33 | deriving Functor 34 | 35 | instance Applicative (Action k v) where 36 | pure = Finished 37 | (<*>) = ap 38 | 39 | instance Monad (Action k v) where 40 | return = pure 41 | Finished x >>= f = f x 42 | Depends ds op >>= f = Depends ds (op >=> f) 43 | 44 | toAction :: Task Monad k v -> Action k v v 45 | toAction task = task $ \k -> Depends k Finished 46 | 47 | fromAction :: Action k v v -> Task Monad k v 48 | fromAction x fetch = f fetch x 49 | where 50 | f _ (Finished v ) = return v 51 | f fetch (Depends d op) = fetch d >>= f fetch . op 52 | -------------------------------------------------------------------------------- /src/Build/Task/Functor.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE ImpredicativeTypes, CPP #-} 2 | -- | Functorial tasks, which have exactly one statically known dependency. 3 | -- Docker is an example of a functorial build system: Docker containers are 4 | -- organised in layers, where each layer makes changes to the previous one. 5 | module Build.Task.Functor (dependency) where 6 | 7 | #if __GLASGOW_HASKELL__ < 800 8 | import Control.Applicative 9 | #else 10 | import Data.Functor.Const 11 | #endif 12 | 13 | import Build.Task 14 | 15 | -- | Find the dependency of a functorial task. 16 | dependency :: Task Functor k v -> k 17 | dependency task = getConst (task Const) 18 | -------------------------------------------------------------------------------- /src/Build/Task/Monad.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE ImpredicativeTypes, ScopedTypeVariables #-} 2 | 3 | -- | Monadic tasks, as used by Excel, Shake and other build systems. 4 | -- Dependencies of monadic tasks can only be discovered dynamically, i.e. during 5 | -- their execution. 6 | module Build.Task.Monad ( 7 | track, trackPure, isInput, computePure, compute, liftMaybe, liftEither 8 | ) where 9 | 10 | import Control.Monad.Trans 11 | import Control.Monad.Trans.Except 12 | import Control.Monad.Trans.Maybe 13 | import Control.Monad.Writer 14 | import Data.Functor.Identity 15 | import Data.Maybe 16 | 17 | import Build.Store 18 | import Build.Task 19 | 20 | -- | Execute a monadic task on a pure store @k -> v@, tracking the dependencies. 21 | trackPure :: Task Monad k v -> (k -> v) -> (v, [k]) 22 | trackPure task fetch = runWriter $ task (\k -> writer (fetch k, [k])) 23 | 24 | -- | Execute a monadic task using an effectful fetch function @k -> m v@, 25 | -- tracking the dependencies. 26 | track :: forall m k v. Monad m => Task Monad k v -> (k -> m v) -> m (v, [(k, v)]) 27 | track task fetch = runWriterT $ task trackingFetch 28 | where 29 | trackingFetch :: k -> WriterT [(k, v)] m v 30 | trackingFetch k = do 31 | v <- lift $ fetch k 32 | tell [(k, v)] 33 | return v 34 | 35 | -- | Given a description of tasks, check if a key is input. 36 | isInput :: Tasks Monad k v -> k -> Bool 37 | isInput tasks = isNothing . tasks 38 | 39 | -- | Run a task with a pure lookup function. 40 | computePure :: Task Monad k v -> (k -> v) -> v 41 | computePure task store = runIdentity $ task (Identity . store) 42 | 43 | -- | Run a task in a given store. 44 | compute :: Task Monad k v -> Store i k v -> v 45 | compute task store = runIdentity $ task (\k -> Identity (getValue k store)) 46 | 47 | -- | Convert a task with a total lookup function @k -> m v@ into a task with a 48 | -- partial lookup function @k -> m (Maybe v)@. This essentially lifts the task 49 | -- from the type of values @v@ to @Maybe v@, where the result @Nothing@ 50 | -- indicates that the task failed because of a missing dependency. 51 | liftMaybe :: Task Monad k v -> Task Monad k (Maybe v) 52 | liftMaybe task fetch = runMaybeT $ task (MaybeT . fetch) 53 | 54 | -- | Convert a task with a total lookup function @k -> m v@ into a task with a 55 | -- lookup function that can throw exceptions @k -> m (Either e v)@. This 56 | -- essentially lifts the task from the type of values @v@ to @Either e v@, where 57 | -- the result @Left e@ indicates that the task failed because of a failed 58 | -- dependency lookup, and @Right v@ yeilds the value otherwise. 59 | liftEither :: Task Monad k v -> Task Monad k (Either e v) 60 | liftEither task fetch = runExceptT $ task (ExceptT . fetch) 61 | -------------------------------------------------------------------------------- /src/Build/Task/MonadPlus.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE ImpredicativeTypes #-} 2 | 3 | -- | A version of monadic tasks with some support for non-determinism. 4 | module Build.Task.MonadPlus (random, computeND, correctBuildValue) where 5 | 6 | import Control.Monad 7 | 8 | import Build.Task 9 | import Build.Store 10 | 11 | -- | An example of a non-deterministic task: generate a random number from a 12 | -- specified interval. 13 | random :: (Int, Int) -> Task MonadPlus k Int 14 | random (low, high) = const $ msum $ map pure [low..high] 15 | 16 | -- | Run a non-deterministic task with a pure lookup function, listing all 17 | -- possible results. 18 | computePureND :: Task MonadPlus k v -> (k -> v) -> [v] 19 | computePureND task store = task (return . store) 20 | 21 | -- | Run a task in a given store. 22 | computeND :: Task MonadPlus k v -> Store i k v -> [v] 23 | computeND task store = computePureND task (`getValue` store) 24 | 25 | -- | Given a description of @tasks@, an initial @store@, and a @result@ produced 26 | -- by running a build system on a target @key@, this function returns 'True' if 27 | -- the @key@'s value is a possible result of running the associated task. 28 | correctBuildValue :: Eq v => Tasks MonadPlus k v -> Store i k v -> Store i k v -> k -> Bool 29 | correctBuildValue tasks store result k = case tasks k of 30 | Nothing -> getValue k result == getValue k store 31 | Just task -> getValue k result `elem` computeND task store 32 | -------------------------------------------------------------------------------- /src/Build/Task/Typed.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP, ConstraintKinds, RankNTypes, GADTs #-} 2 | #if __GLASGOW_HASKELL__ < 800 3 | {-# OPTIONS_GHC -Wno-unused-binds #-} 4 | #else 5 | {-# OPTIONS_GHC -Wno-unused-top-binds #-} 6 | #endif 7 | {-# OPTIONS_GHC -Wno-unticked-promoted-constructors #-} 8 | 9 | -- | A model of polymorphic tasks, where the value type depends on the key. 10 | -- See the source for an example. 11 | module Build.Task.Typed (Task, dependencies) where 12 | 13 | #if __GLASGOW_HASKELL__ < 800 14 | import Control.Applicative 15 | #else 16 | import Data.Functor.Const 17 | #endif 18 | 19 | -- | The @fetch@ callback whose result type depends on the type of the key. 20 | type Fetch k f = forall a. k a -> f a 21 | 22 | -- | A typed build task. 23 | -- 24 | -- A side observation: we could also rewrite the type of `Task` into 25 | -- 26 | -- type Task c k = forall f. c f => (forall a. k a -> f a) -> (forall a. k a -> Maybe (f a)) 27 | -- 28 | -- ...which looks like a morphism between natural transformations. I'll let 29 | -- category theory enthusiasts explain what this strange creature is doing here. 30 | type Task c k = forall f a. c f => Fetch k f -> k a -> Maybe (f a) 31 | 32 | -- | A way to show the name of a key. 33 | type ShowKey k = forall a. k a -> String 34 | 35 | -- | Extract the names of dependencies. 36 | dependencies :: ShowKey k -> Task Applicative k -> k a -> [String] 37 | dependencies showKey task = maybe [] getConst . task (\k -> Const [showKey k]) 38 | 39 | ----------------------------- GCC versison example ----------------------------- 40 | data Version = Version { major :: Int, minor :: Int } 41 | deriving (Eq, Ord) 42 | 43 | data Key a where 44 | File :: FilePath -> Key String 45 | GccVersion :: Key Version 46 | 47 | newtype TaskT c k v = TaskT { runT :: forall f. c f => Fetch k f -> f v } 48 | 49 | type TasksT c k = forall a. k a -> Maybe (TaskT c k a) 50 | 51 | example :: TasksT Monad Key 52 | example (File "release.txt") = Just $ TaskT $ \fetch -> do 53 | readme <- fetch (File "README") 54 | license <- fetch (File "LICENSE") 55 | return (readme ++ license) 56 | example (File "main.o") = Just $ TaskT $ \fetch -> do 57 | let source = "main.c" 58 | version <- fetch GccVersion 59 | if version >= Version 8 0 then compileNew source 60 | else compileOld source 61 | example _ = Nothing 62 | 63 | compileNew :: String -> f String 64 | compileNew = undefined 65 | 66 | compileOld :: String -> f String 67 | compileOld = undefined 68 | 69 | ------------------------------------ Example ----------------------------------- 70 | data KeyN a where 71 | Base :: KeyN Int 72 | Number :: KeyN Int 73 | SplitDigit :: KeyN (Int, Int) 74 | LastDigit :: KeyN Int 75 | BaseDigits :: KeyN [Int] 76 | 77 | -- | A build task for some simple typed numeric calculations. We can perform 78 | -- static analysis of this task using the function 'dependencies'. For example: 79 | -- 80 | -- @ 81 | -- dependencies showKey task Base == [] 82 | -- dependencies showKey task SplitDigit == ["Number","Base"] 83 | -- @ 84 | task :: Task Applicative KeyN 85 | task fetch SplitDigit = Just $ divMod <$> fetch Number <*> fetch Base 86 | task fetch LastDigit = Just $ snd <$> fetch SplitDigit 87 | task fetch BaseDigits = Just $ (\b -> [0..(b - 1)]) <$> fetch Base 88 | task _ _ = Nothing 89 | 90 | -- | An example key/value mapping consistent with the build 'task'. 91 | fetch :: Applicative f => Fetch KeyN f 92 | fetch key = pure $ case key of 93 | Base -> 10 94 | Number -> 2018 95 | SplitDigit -> (201, 8) 96 | LastDigit -> 8 97 | BaseDigits -> [0..9] 98 | 99 | -- | Show the name of a key. 100 | showKey :: ShowKey KeyN 101 | showKey key = case key of 102 | Base -> "Base" 103 | Number -> "Number" 104 | SplitDigit -> "SplitDigit" 105 | LastDigit -> "LastDigit" 106 | BaseDigits -> "BaseDigits" 107 | -------------------------------------------------------------------------------- /src/Build/Trace.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE GeneralizedNewtypeDeriving, ScopedTypeVariables #-} 2 | 3 | -- | Build traces that are used for recording information from previuos builds. 4 | module Build.Trace ( 5 | Trace (..), 6 | 7 | -- * Verifying traces 8 | VT, recordVT, verifyVT, 9 | 10 | -- * Constructive traces 11 | CT, isDirtyCT, recordCT, constructCT, 12 | 13 | -- * Constructive traces optimised for deep tasks 14 | DCT, recordDCT, constructDCT, 15 | 16 | -- * Step traces 17 | Step, ST, recordST, verifyST 18 | ) where 19 | 20 | import Build.Store 21 | 22 | import Control.Monad.Extra 23 | import Data.List (sortOn) 24 | import Data.Maybe 25 | import Data.Ord 26 | 27 | -- | A trace is parameterised by the types of keys @k@, hashes @h@, as well as the 28 | -- result @r@. For verifying traces, @r = h@; for constructive traces, @Hash r = h@. 29 | data Trace k v r = Trace 30 | { key :: k 31 | , depends :: [(k, Hash v)] 32 | , result :: r } 33 | deriving Show 34 | 35 | ------------------------------- Verifying traces ------------------------------- 36 | 37 | -- | An abstract data type for a set of verifying traces equipped with 'recordVT', 38 | -- 'verifyVT' and a 'Monoid' instance. 39 | newtype VT k v = VT [Trace k v (Hash v)] deriving (Monoid, Semigroup, Show) 40 | 41 | -- | Record a new trace for building a @key@ with dependencies @deps@, obtaining 42 | -- the hashes of up-to-date values by using @fetchHash@. 43 | recordVT :: k -> Hash v -> [(k, Hash v)] -> VT k v -> VT k v 44 | recordVT key valueHash deps (VT ts) = VT $ Trace key deps valueHash : ts 45 | 46 | -- | Given a function to compute the hash of a key's current value, 47 | -- a @key@, and a set of verifying traces, return 'True' if the @key@ is 48 | -- up-to-date. 49 | verifyVT :: (Monad m, Eq k, Eq v) => k -> Hash v -> (k -> m (Hash v)) -> VT k v -> m Bool 50 | verifyVT key valueHash fetchHash (VT ts) = anyM match ts 51 | where 52 | match (Trace k deps result) 53 | | k /= key || result /= valueHash = return False 54 | | otherwise = andM [ (h==) <$> fetchHash k | (k, h) <- deps ] 55 | 56 | ------------------------------ Constructive traces ----------------------------- 57 | 58 | -- | An abstract data type for a set of constructive traces equipped with 59 | -- 'recordCT', 'isDirtyCT', 'constructCT' and a 'Monoid' instance. 60 | newtype CT k v = CT [Trace k v v] deriving (Monoid, Semigroup, Show) 61 | 62 | -- | Check if a given @key@ is dirty w.r.t a @store@. 63 | isDirtyCT :: (Eq k, Hashable v) => k -> Store (CT k v) k v -> Bool 64 | isDirtyCT key store = let CT ts = getInfo store in not (any match ts) 65 | where 66 | match (Trace k deps result) = k == key 67 | && result == getValue key store 68 | && and [ getHash k store == h | (k, h) <- deps ] 69 | 70 | -- | Record a new trace for building a @key@ with dependencies @deps@, obtaining 71 | -- the hashes of up-to-date values by using @fetchHash@. 72 | recordCT :: k -> v -> [(k,Hash v)] -> CT k v -> CT k v 73 | recordCT key value deps (CT ts) = CT $ Trace key deps value : ts 74 | 75 | -- | Given a function to compute the hash of a key's current value, 76 | -- a @key@, and a set of constructive traces, return @Just newValue@ if it is 77 | -- possible to reconstruct it from the traces. Prefer reconstructing the 78 | -- currenct value, if it matches one of the traces. 79 | constructCT :: (Monad m, Eq k, Eq v) => k -> (k -> m (Hash v)) -> CT k v -> m [v] 80 | constructCT key fetchHash (CT ts) = catMaybes <$> mapM match ts 81 | where 82 | match (Trace k deps result) 83 | | k /= key = return Nothing 84 | | otherwise = do 85 | sameInputs <- andM [ (h==) <$> fetchHash k | (k, h) <- deps ] 86 | return $ if sameInputs then Just result else Nothing 87 | 88 | --------------------------- Deep constructive traces --------------------------- 89 | 90 | -- | Our current model has the same representation as 'CT', but requires an 91 | -- additional invariant: if a DCT contains a trace for a key @k@, then it must 92 | -- also contain traces for each of its non-input dependencies. 93 | newtype DCT k v = DCT [Trace k v v] deriving (Monoid, Semigroup, Show) 94 | 95 | -- | Extract the tree of input dependencies of a given key. 96 | deepDependencies :: (Eq k, Hashable v) => DCT k v -> Hash v -> k -> [k] 97 | deepDependencies (DCT ts) valueHash key = 98 | case [ map fst deps | Trace k deps v <- ts, k == key, hash v == valueHash ] of 99 | [] -> [key] -- The @key@ is an input 100 | (deps:_) -> deps -- We assume there is only one record for a pair (k, v) 101 | 102 | -- | Record a new trace for building a @key@ with dependencies @deps@, obtaining 103 | -- the hashes of up-to-date values from the given @store@. 104 | recordDCT :: forall k v m. (Eq k, Hashable v, Monad m) 105 | => k -> v -> [k] -> (k -> m (Hash v)) -> DCT k v -> m (DCT k v) 106 | recordDCT key value deps fetchHash dct@(DCT ts) = do 107 | let deepDeps = concatMap (deepDependencies dct $ hash value) deps 108 | hs <- mapM fetchHash deepDeps 109 | return $ DCT $ Trace key (zip deepDeps hs) value : ts 110 | 111 | -- | Given a function to compute the hash of a key's current value, 112 | -- a @key@, and a set of deep constructive traces, return 113 | -- @Just newValue@ if it is possible to reconstruct it from the traces. 114 | constructDCT :: forall k v m. (Eq k, Hashable v, Monad m) 115 | => k -> (k -> m (Hash v)) -> DCT k v -> m [v] 116 | constructDCT key fetchHash (DCT ts) = constructCT key fetchHash (CT ts) 117 | 118 | ----------------- Step traces: a refinement of verifying traces ---------------- 119 | -- Step traces are an optimised version of the direct implementation of 120 | -- verifying traces (as given by the 'VT' datatype), which is used by Shake. 121 | -- They support the same high-level interface that allows to verify if a key is 122 | -- up to date ('verifyST') as well as record new traces ('recordST'). 123 | 124 | newtype Step = Step Int deriving (Enum, Eq, Ord, Show) 125 | instance Semigroup Step where Step a <> Step b = Step $ a + b 126 | instance Monoid Step where mempty = Step 0; mappend = (<>) 127 | 128 | data TraceST k r = TraceST k [k] r deriving Show 129 | 130 | -- | A step trace, records the resulting value, the step it last build, the step 131 | -- where it changed. 132 | newtype ST k v = ST [TraceST k (Hash v, Step, Step)] 133 | deriving (Monoid, Semigroup, Show) 134 | 135 | latestST :: Eq k => k -> ST k v -> Maybe (TraceST k (Hash v, Step, Step)) 136 | latestST k (ST ts) = fmap snd $ listToMaybe $ sortOn (Down . fst) 137 | [(step, t) | t@(TraceST k2 _ (_, step, _)) <- ts, k == k2] 138 | 139 | -- | Record a new trace for building a @key@ with dependencies @deps@. 140 | recordST :: (Hashable v, Eq k) => Step -> k -> v -> [k] -> ST k v -> ST k v 141 | recordST step key value deps (ST ts) = 142 | let hv = hash value 143 | lastChange = case latestST key (ST ts) of 144 | -- I rebuilt, didn't change, so use the old change time 145 | Just (TraceST _ _ (hv2, _, chng)) | hv2 == hv -> chng 146 | _ -> step 147 | in ST $ TraceST key deps (hash value, step, lastChange) : ts 148 | 149 | -- | Given a function to compute the hash of a key's current value, 150 | -- a @key@, and a set of verifying traces, return 'True' if the @key@ is 151 | -- up-to-date. 152 | verifyST :: (Monad m, Eq k, Hashable v) => k -> v -> (k -> m ()) -> m (ST k v) -> m Bool 153 | verifyST key value demand st = do 154 | me <- latestST key <$> st 155 | case me of 156 | Just (TraceST _ deps (hv, built, _)) | hash value == hv -> do 157 | mapM_ demand deps 158 | st <- st 159 | -- things with no traces must be inputs, which I'm going to ignore for now... 160 | return $ and [ built >= chng | Just (TraceST _ _ (_, _, chng)) <- map (`latestST` st) deps] 161 | _ -> return False 162 | -------------------------------------------------------------------------------- /src/Build/Utilities.hs: -------------------------------------------------------------------------------- 1 | -- | General utilities useful in the rest of the package 2 | module Build.Utilities ( 3 | -- * Graph operations 4 | graph, reachable, topSort, reach, reachM 5 | ) where 6 | 7 | import Algebra.Graph 8 | import qualified Algebra.Graph.ToGraph as T 9 | 10 | import Data.Either.Extra 11 | import Data.Functor.Identity 12 | import qualified Data.Set as Set 13 | 14 | -- | Build a dependency graph given a function for computing dependencies of a 15 | -- key and a target key. 16 | graph :: Ord k => (k -> [k]) -> k -> Graph k 17 | graph deps key = transpose $ overlays [ star k (deps k) | k <- keys Set.empty [key] ] 18 | where 19 | keys seen [] = Set.toList seen 20 | keys seen (x:xs) 21 | | x `Set.member` seen = keys seen xs 22 | | otherwise = keys (Set.insert x seen) (deps x ++ xs) 23 | 24 | -- | Compute all keys reachable via dependecies from a target key. 25 | reachable :: Ord k => (k -> [k]) -> k -> [k] 26 | reachable deps key = vertexList (graph deps key) 27 | 28 | -- | Compute the topological sort of a graph or return @Nothing@ if the graph 29 | -- has cycles. 30 | topSort :: Ord k => Graph k -> Maybe [k] 31 | topSort = eitherToMaybe . T.topSort 32 | 33 | -- | Given a function to compute successors of a vertex, apply it recursively 34 | -- starting from a given vertex. Returns @Nothing@ if this process does not 35 | -- terminate because of cycles. Note that the current implementation is very 36 | -- inefficient: it trades efficiency for simplicity. The resulting list is 37 | -- likely to contain an exponential number of duplicates. 38 | reach :: Eq a => (a -> [a]) -> a -> Maybe [a] 39 | reach successors = runIdentity . reachM (return . successors) 40 | 41 | -- | Given a monadic function to compute successors of a vertex, apply it 42 | -- recursively starting from a given vertex. Returns @Nothing@ if this process 43 | -- does not terminate because of cycles. Note that the current implementation is 44 | -- very inefficient: it trades efficiency for simplicity. The resulting list is 45 | -- likely to contain an exponential number of duplicates. 46 | reachM :: (Eq a, Monad m) => (a -> m [a]) -> a -> m (Maybe [a]) 47 | reachM successors a = fmap (filter (/= a)) <$> go [] a 48 | where 49 | go xs x | x `elem` xs = return Nothing -- A cycle is detected 50 | | otherwise = do res <- traverse (go (x:xs)) =<< successors x 51 | return $ ((x:xs)++) . concat <$> sequence res 52 | -------------------------------------------------------------------------------- /stack.yaml: -------------------------------------------------------------------------------- 1 | resolver: nightly-2024-03-01 # ghc-9.8.1 2 | packages: 3 | - '.' 4 | -------------------------------------------------------------------------------- /test/Examples.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE ConstraintKinds, ImpredicativeTypes, FlexibleContexts, GADTs #-} 2 | module Examples where 3 | 4 | import Build.Task 5 | import Control.Applicative 6 | import Control.Monad.State.Class 7 | import Data.Map (Map) 8 | 9 | import qualified Data.Map as Map 10 | 11 | -- | A useful fetch for experimenting with build systems in interactive GHC. 12 | fetchIO :: (Show k, Read v) => k -> IO v 13 | fetchIO k = do putStr (show k ++ ": "); read <$> getLine 14 | 15 | --------------------------- Task Functor: Collatz --------------------------- 16 | 17 | -- Collatz sequence, starting with an initial value n = c[0]. 18 | -- For example, if n = 6, the sequence is 6, 3, 10, 5, 16, 8, 4, 2, 1, ... 19 | collatz :: Tasks Functor Integer Integer 20 | collatz n | n <= 0 = Nothing 21 | | otherwise = Just $ \fetch -> f <$> fetch (n - 1) 22 | where 23 | f k | even k = k `div` 2 24 | | otherwise = 3 * k + 1 25 | 26 | -- A good demonstration of early cut-off: 27 | -- * Task Collatz sequence from n = 6: 6, 3, 10, 5, 16, 8, 4, 2, 1, ... 28 | -- * Change n from 6 to 40 and rebuild: 40, 20, 10, 5, 16, 8, 4, 2, 1, ... 29 | -- * The recomputation should be cut-off after 10. 30 | 31 | ------------------------ Task Applicative: Fibonacci ------------------------ 32 | 33 | -- Generalised Fibonacci sequence: 34 | -- f[0] = n 35 | -- f[1] = m 36 | -- f[k] = f[k - 1] + f[k - 2] 37 | -- For example, with (n, m) = (0, 1) we get usual Fibonacci sequence, and if 38 | -- (n, m) = (2, 1) we get Lucas sequence: 2, 1, 3, 4, 7, 11, 18, 29, 47, ... 39 | fibonacci :: Tasks Applicative Integer Integer 40 | fibonacci n 41 | | n >= 2 = Just $ \fetch -> (+) <$> fetch (n-1) <*> fetch (n-2) 42 | | otherwise = Nothing 43 | 44 | -- Fibonacci numbers are a classic example of memoization: a non-minimal build 45 | -- system will take ages to compute f[100], doing O(f[100]) recursive calls. 46 | -- The right approach is to build the dependency graph and execute computations 47 | -- in the topological order. 48 | 49 | --------------------------- Task Monad: Ackermann --------------------------- 50 | 51 | -- Ackermann function: 52 | -- a[0, n] = n + 1 53 | -- a[m, 0] = a[m - 1, 1] 54 | -- a[m, n] = a[m - 1, a[m, n - 1]] 55 | -- Formally, it has no inputs, but we return Nothing for negative inputs. 56 | -- For example, a[m, 1] = 2, 3, 5, 13, 65535, ... 57 | ackermann :: Tasks Monad (Integer, Integer) Integer 58 | ackermann (m, n) 59 | | m < 0 || n < 0 = Nothing 60 | | m == 0 = Just $ const $ pure (n + 1) 61 | | n == 0 = Just $ \fetch -> fetch (m - 1, 1) 62 | | otherwise = Just $ \fetch -> do index <- fetch (m, n - 1) 63 | fetch (m - 1, index) 64 | 65 | -- A cloud version of the Ackermann task that uses a cache to store known values 66 | -- of the Ackermann function. 67 | type Cache = Map (Integer, Integer) Integer 68 | 69 | cloudAckermann :: Tasks (MonadState Cache) (Integer, Integer) Integer 70 | cloudAckermann (m, n) 71 | | m < 0 || n < 0 = Nothing 72 | | m == 0 = Just $ const $ pure (n + 1) 73 | | n == 0 = Just $ \fetch -> fetch (m - 1, 1) 74 | | otherwise = Just $ \fetch -> do 75 | cache <- get 76 | case Map.lookup (m, n) cache of 77 | Nothing -> do index <- fetch (m, n - 1) 78 | value <- fetch (m - 1, index) 79 | modify (Map.insert (m, n) value) 80 | return value 81 | Just value -> return value 82 | 83 | -- Unlike Collatz and Fibonacci computations, the Ackermann computation cannot 84 | -- be statically analysed for dependencies. We can only find the first dependency 85 | -- statically (Ackermann m (n - 1)), but not the second one. 86 | 87 | ----------------------------- Spreadsheet examples ----------------------------- 88 | 89 | sprsh1 :: Tasks Applicative String Integer 90 | sprsh1 "B1" = Just $ \fetch -> (+) <$> fetch "A1" <*> fetch "A2" 91 | sprsh1 "B2" = Just $ \fetch -> (*2) <$> fetch "B1" 92 | sprsh1 _ = Nothing 93 | 94 | sprsh2 :: Tasks Monad String Integer 95 | sprsh2 "B1" = Just $ \fetch -> do 96 | c1 <- fetch "C1" 97 | if c1 == 1 then fetch "B2" else fetch "A2" 98 | sprsh2 "B2" = Just $ \fetch -> do 99 | c1 <- fetch "C1" 100 | if c1 == 1 then fetch "A1" else fetch "B1" 101 | sprsh2 _ = Nothing 102 | 103 | sprsh5 :: Tasks Monad String String 104 | sprsh5 "B1" = Just $ \fetch -> do 105 | formula <- fetch "B1-formula" 106 | evalFormula fetch formula 107 | where 108 | evalFormula = undefined 109 | sprsh5 _ = Nothing 110 | 111 | sprsh3 :: Tasks Alternative String Integer 112 | sprsh3 "B1" = Just $ \fetch -> (+) <$> fetch "A1" <*> (pure 1 <|> pure 2) 113 | sprsh3 _ = Nothing 114 | 115 | sprsh4 :: Tasks MonadFail String Integer 116 | sprsh4 "B1" = Just $ \fetch -> do 117 | a1 <- fetch "A1" 118 | a2 <- fetch "A2" 119 | if a2 == 0 then fail "division by 0" else return (a1 `div` a2) 120 | sprsh4 _ = Nothing 121 | 122 | indirect :: Tasks Monad String Integer 123 | indirect key | key /= "B1" = Nothing 124 | | otherwise = Just $ \fetch -> do c1 <- fetch "C1" 125 | fetch ("A" ++ show c1) 126 | 127 | staticIF :: Bool -> Tasks Applicative String Int 128 | staticIF b "B1" = Just $ \fetch -> 129 | if b then fetch "A1" else (+) <$> fetch "A2" <*> fetch "A3" 130 | staticIF _ _ = Nothing 131 | 132 | -------------------------- Dynamic programming example ------------------------- 133 | 134 | data K = A Int | B Int | C Int Int deriving Eq 135 | 136 | editDistance :: Tasks Monad K Int 137 | editDistance (C i 0) = Just $ const $ pure i 138 | editDistance (C 0 j) = Just $ const $ pure j 139 | editDistance (C i j) = Just $ \fetch -> do 140 | ai <- fetch (A i) 141 | bj <- fetch (B j) 142 | if ai == bj 143 | then fetch (C (i - 1) (j - 1)) 144 | else do 145 | insert <- fetch (C i (j - 1)) 146 | delete <- fetch (C (i - 1) j ) 147 | replace <- fetch (C (i - 1) (j - 1)) 148 | return (1 + minimum [insert, delete, replace]) 149 | editDistance _ = Nothing 150 | -------------------------------------------------------------------------------- /test/Main.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings, ImpredicativeTypes, ConstraintKinds #-} 2 | import Control.Monad 3 | import Data.Bool 4 | import Data.List.Extra 5 | import Data.Maybe 6 | import System.Exit 7 | 8 | import qualified Data.Map as Map 9 | 10 | import Build 11 | import Build.Store 12 | import Build.System 13 | import Build.Task 14 | import Build.Task.Free() 15 | 16 | import Spreadsheet 17 | import Examples() 18 | 19 | -- | A build system that accepts a list of target keys. 20 | type MultiBuild c i k v = Tasks c k v -> [k] -> Store i k v -> Store i k v 21 | 22 | sequentialMultiBuild :: Build Monad i k v -> MultiBuild Monad i k v 23 | sequentialMultiBuild build task outputs store = case outputs of 24 | [] -> store 25 | (k:ks) -> sequentialMultiBuild build task ks (build task k store) 26 | 27 | sequentialMultiBuildA :: Build Applicative i k v -> MultiBuild Applicative i k v 28 | sequentialMultiBuildA build task outputs store = case outputs of 29 | [] -> store 30 | (k:ks) -> sequentialMultiBuildA build task ks (build task k store) 31 | 32 | inputCells :: [Cell] 33 | inputCells = [ "A1", "A2", "A3" ] 34 | 35 | inputs :: i -> Store i Cell Int 36 | inputs i = initialise i $ \cell -> fromMaybe 0 $ lookup cell $ zip inputCells [1..] 37 | 38 | spreadsheet :: Spreadsheet 39 | spreadsheet cell = case name cell of 40 | "B1" -> Just 1 -- 1 41 | "B2" -> Just $ "B1" + 1 -- 1 + 1 == 2 42 | "B3" -> Just $ "A3" * abs "B2" -- 3 * 2 == 6 43 | "C1" -> Just $ IfZero "B3" "C2" 1000 -- 1000 44 | "C2" -> Just $ IfZero "B3" 2000 "C1" -- 1000 45 | "C3" -> Just $ Random 1 6 -- 1..6 46 | "F0" -> Just 0 -- 0 47 | "F1" -> Just 1 -- 1 48 | 'F':_ -> Just $ rel (-1) 0 + rel (-2) 0 -- Fn = F(n - 1) + F(n - 2) 49 | _ -> Nothing 50 | 51 | acyclicSpreadsheet :: Spreadsheet 52 | acyclicSpreadsheet cell = case name cell of 53 | "B1" -> Just 1 -- 1 54 | "B2" -> Just $ "B1" + 1 -- 1 + 1 == 2 55 | "B3" -> Just $ "A3" * abs "B2" -- 3 * 2 == 6 56 | "C1" -> Just $ IfZero "B3" "B2" 1000 -- 1000 57 | "C2" -> Just $ IfZero "B3" 2000 "C1" -- 1000 58 | "C3" -> Just $ Random 1 6 -- 1..6 59 | "F0" -> Just 0 -- 0 60 | "F1" -> Just 1 -- 1 61 | 'F':_ -> Just $ rel (-1) 0 + rel (-2) 0 -- Fn = F(n - 1) + F(n - 2) 62 | _ -> Nothing 63 | 64 | targets :: [Cell] 65 | targets = [ "A1", "A2", "A3", "B1", "B2", "B3", "C1", "C2", "F0", "F1", "F4" ] 66 | 67 | tasks :: Tasks Monad Cell Int 68 | tasks = spreadsheetTask spreadsheet 69 | 70 | tasksA :: Tasks Applicative Cell Int 71 | tasksA = spreadsheetTaskA acyclicSpreadsheet 72 | 73 | test :: String -> Build Monad i Cell Int -> i -> IO Bool 74 | test name build i = do 75 | let store = inputs i 76 | result = sequentialMultiBuild build tasks targets store 77 | correct = all (correctBuild tasks store result) targets 78 | -- when False $ putStrLn $ "========\n" ++ show (getInfo result) ++ "\n========" 79 | putStr $ name ++ " is " 80 | case (trim name, correct) of 81 | ("dumb", False) -> do putStr "incorrect, which is [OK]\n"; return True 82 | (_ , False) -> do putStr "incorrect: [FAIL]\n" ; return False 83 | (_ , True ) -> do putStr "correct: [OK]\n" ; return True 84 | 85 | testA :: String -> Build Applicative i Cell Int -> i -> IO Bool 86 | testA name build i = do 87 | let store = inputs i 88 | result = sequentialMultiBuildA build tasksA targets store 89 | correct = all (correctBuild tasks store result) targets 90 | -- when False $ putStrLn $ "========\n" ++ show (getInfo result) ++ "\n========" 91 | putStrLn $ name ++ " is " ++ bool "incorrect: [FAIL]" "correct: [OK]" correct 92 | return correct 93 | 94 | testSuite :: IO Bool 95 | testSuite = and <$> sequence 96 | [ test "dumb " dumb () 97 | , test "busy " busy () 98 | , test "memo " memo () 99 | , testA "make " make (0, Map.empty) 100 | , testA "ninja " ninja mempty 101 | , testA "cloudBuild" cloudBuild mempty 102 | , test "excel " excel (const True, mempty) 103 | , test "shake " shake mempty 104 | , test "bazel " bazel mempty 105 | , test "cloudShake" cloudShake mempty 106 | , testA "buck " buck mempty 107 | , test "nix " nix mempty ] 108 | 109 | main :: IO () 110 | main = do 111 | success <- testSuite 112 | unless success $ die "\n========== At least one test failed! ==========\n" 113 | -------------------------------------------------------------------------------- /test/Spreadsheet.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE ImpredicativeTypes #-} 2 | module Spreadsheet where 3 | 4 | import Data.Bool 5 | import Data.Char 6 | import Data.Maybe 7 | import Data.String 8 | import Build.Store 9 | import Build.Task 10 | import Text.Read 11 | 12 | -- | A 'Cell' is described by a pair integers: 'row' and 'column'. We provide 13 | -- @IsString@ instance for convenience, so @"A8"@ corresponds to @Cell 8 0@. 14 | data Cell = Cell { row :: Int, column :: Int } deriving (Eq, Ord) 15 | 16 | -- | Get the name of a 'Cell', e.g. @name (Cell 8 0) == "A8"@. 17 | name :: Cell -> String 18 | name (Cell r c) | c >= 0 && c < 26 = chr (c + ord 'A') : show r 19 | | otherwise = show (Cell r c) 20 | 21 | instance IsString Cell where 22 | fromString string = case string of 23 | columnChar : rowIndex -> Cell r c 24 | where 25 | r = fromMaybe fail (readMaybe rowIndex) 26 | c | isAsciiUpper columnChar = ord columnChar - ord 'A' 27 | | otherwise = fail 28 | _ -> fail 29 | where 30 | fail = error $ "Cannot parse cell name " ++ string 31 | 32 | instance Show Cell where 33 | show = name 34 | 35 | instance Hashable Cell where 36 | hash (Cell row column) = Cell <$> hash row <*> hash column 37 | 38 | -- | Some cells contain formulas for computing values from other cells. Formulas 39 | -- include: 40 | -- * 'Constant' integer values. 41 | -- * References to cells. 42 | -- * Simple arithmetic functions, such as 'Unary' negation and 'Binary' addition. 43 | -- * Conditional expressions 'IfZero' @x y z@ that evaluate to @y@ if @x@ is zero 44 | -- and to @z@ otherwise. Conditionals require dynamic dependencies to be handled 45 | -- correctly, because their static dependencies may form cycles. Example: 46 | -- 47 | -- A1 = IfZero B1 A2 C1 48 | -- A2 = IfZero B1 C2 A1 49 | -- 50 | -- Statically there is a mutual dependency between A1 and A2, but dynamically 51 | -- there is either A1 -> A2 or A2 -> A1. 52 | -- * Finally, there is a 'Random' formula that returns a random value in a 53 | -- specified range @[low..high]@. This introduces non-determinism, including 54 | -- failures when the range is empty. 55 | data Formula = Constant Int 56 | | Reference Cell 57 | | RelativeReference Int Int 58 | | Unary (Int -> Int) Formula 59 | | Binary (Int -> Int -> Int) Formula Formula 60 | | IfZero Formula Formula Formula 61 | | Random Int Int 62 | 63 | instance Num Formula where 64 | fromInteger = Constant . fromInteger 65 | (+) = Binary (+) 66 | (-) = Binary (-) 67 | (*) = Binary (*) 68 | abs = Unary abs 69 | signum = Unary signum 70 | 71 | instance IsString Formula where 72 | fromString = Reference . fromString 73 | 74 | -- | A short alias for 'RelativeReference'. 75 | rel :: Int -> Int -> Formula 76 | rel = RelativeReference 77 | 78 | -- | A spreadsheet is a partial mapping of cells to formulas. Cells for which 79 | -- the mapping returns @Nothing@ are inputs. 80 | type Spreadsheet = Cell -> Maybe Formula 81 | 82 | -- | Monadic spreadsheet computation. 83 | spreadsheetTask :: Spreadsheet -> Tasks Monad Cell Int 84 | spreadsheetTask spreadsheet cell@(Cell r c) = case spreadsheet cell of 85 | Nothing -> Nothing -- This is an input 86 | Just formula -> Just $ evaluate formula 87 | where 88 | evaluate formula fetch = go formula 89 | where go formula = case formula of 90 | Constant x -> pure x 91 | Reference cell -> fetch cell 92 | RelativeReference dr dc -> fetch (Cell (r + dr) (c + dc)) 93 | Unary op fx -> op <$> go fx 94 | Binary op fx fy -> op <$> go fx <*> go fy 95 | IfZero fx fy fz -> do 96 | x <- go fx 97 | if x == 0 then go fy else go fz 98 | Random _ _ -> error "Not supported by monadic tasks" 99 | 100 | -- | Applicative spreadsheet computation. 101 | spreadsheetTaskA :: Spreadsheet -> Tasks Applicative Cell Int 102 | spreadsheetTaskA spreadsheet cell@(Cell r c) = case spreadsheet cell of 103 | Nothing -> Nothing -- This is an input 104 | Just formula -> Just $ evaluate formula 105 | where 106 | evaluate formula fetch = go formula 107 | where go formula = case formula of 108 | Constant x -> pure x 109 | Reference cell -> fetch cell 110 | RelativeReference dr dc -> fetch (Cell (r + dr) (c + dc)) 111 | Unary op fx -> op <$> go fx 112 | Binary op fx fy -> op <$> go fx <*> go fy 113 | IfZero fx fy fz -> bool <$> go fz 114 | <*> go fy 115 | <*> ((==0) <$> go fx) 116 | Random _ _ -> error "Random not implemented" 117 | --------------------------------------------------------------------------------