├── test ├── test-assets │ ├── file1.md │ ├── file2.md │ └── dir1 │ │ └── file3.md ├── user.clj └── arachne │ └── fileset_test.clj ├── .gitignore ├── deps.edn ├── .circleci └── config.yml ├── src └── arachne │ ├── fileset │ ├── tmpdir.clj │ ├── util.clj │ ├── specs.clj │ └── impl.clj │ └── fileset.clj ├── README.md └── LICENSE /test/test-assets/file1.md: -------------------------------------------------------------------------------- 1 | this is a file -------------------------------------------------------------------------------- /test/test-assets/file2.md: -------------------------------------------------------------------------------- 1 | this is another file 2 | -------------------------------------------------------------------------------- /test/test-assets/dir1/file3.md: -------------------------------------------------------------------------------- 1 | this is a file inside a dir -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /classes 3 | /checkouts 4 | pom.xml 5 | pom.xml.asc 6 | *.jar 7 | *.class 8 | /.lein-* 9 | /.nrepl-port 10 | .hgignore 11 | .hg/ 12 | *.iml 13 | .idea 14 | .DS_Store 15 | .cpcache -------------------------------------------------------------------------------- /deps.edn: -------------------------------------------------------------------------------- 1 | {:paths ["src"] 2 | :deps {org.clojure/clojure {:mvn/version "1.10.0-alpha4"} 3 | org.clojure/tools.logging {:mvn/version "0.3.1"} 4 | org.arachne-framework/valuehash {:git/url "https://github.com/arachne-framework/valuehash.git" 5 | :sha "ff1d4b7f1260daf41c786a61cb45d02871b7baf9"}} 6 | :aliases 7 | {:run-tests {:main-opts ["-m" "cognitect.test-runner"]} 8 | :local {:extra-deps {org.arachne-framework/valuehash {:local/root "../valuehash"}}} 9 | :test {:extra-paths ["test"] 10 | :extra-deps {com.cognitect/test-runner {:git/url "https://github.com/cognitect-labs/test-runner.git" 11 | :sha "028a6d41ac9ac5d5c405dfc38e4da6b4cc1255d5"} 12 | commons-io/commons-io {:mvn/version "2.5"} 13 | org.clojure/test.check {:mvn/version "0.9.0"}}}}} 14 | -------------------------------------------------------------------------------- /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | # Clojure CircleCI 2.0 configuration file 2 | # 3 | # Check https://circleci.com/docs/2.0/language-clojure/ for more details 4 | # 5 | version: 2 6 | jobs: 7 | build: 8 | docker: 9 | # specify the version you desire here 10 | - image: circleci/clojure:tools-deps 11 | working_directory: ~/repo 12 | 13 | steps: 14 | - checkout 15 | 16 | # Download and cache dependencies 17 | - restore_cache: 18 | keys: 19 | - v1-dependencies-{{ checksum "deps.edn" }} 20 | # fallback to using the latest cache if no exact match is found 21 | - v1-dependencies- 22 | 23 | - run: clojure -Atest -Srepro -Sdescribe 24 | 25 | - save_cache: 26 | paths: 27 | - ~/.m2 28 | - ~/.gitlibs 29 | key: v1-dependencies-{{ checksum "deps.edn" }} 30 | 31 | # run tests! 32 | - run: clojure -Atest:run-tests 33 | -------------------------------------------------------------------------------- /src/arachne/fileset/tmpdir.clj: -------------------------------------------------------------------------------- 1 | (ns arachne.fileset.tmpdir 2 | "Tools for creating and managing temporary directories" 3 | (:require [arachne.fileset.util :as util]) 4 | (:import [java.nio.file Files] 5 | [java.nio.file.attribute FileAttribute])) 6 | 7 | (def ^:private tmp-registry (atom #{})) 8 | 9 | (.addShutdownHook (Runtime/getRuntime) 10 | (Thread. (fn [] 11 | (util/debug "cleaning up temp directories") 12 | (dorun (map util/delete! @tmp-registry))))) 13 | 14 | (defn tmpdir! 15 | "Return a new temporary directory as a java.io.File. The directory will be in 16 | the system temporary directory, and tracked for deletion when the JVM 17 | terminates (using a JVM shutdown hook.)" 18 | [] 19 | (let [f (.toFile (Files/createTempDirectory "arachne-fs" 20 | (make-array FileAttribute 0)))] 21 | (util/debug "Creating temp directory at " (.getPath f)) 22 | (swap! tmp-registry conj f) 23 | f)) 24 | -------------------------------------------------------------------------------- /test/user.clj: -------------------------------------------------------------------------------- 1 | (ns user 2 | (:require 3 | [clojure.spec.alpha :as s] 4 | [clojure.spec.test.alpha :as st] 5 | [clojure.spec.gen.alpha :as g] 6 | [clojure.java.io :as io] 7 | [clojure.test :as t :refer [deftest is]])) 8 | 9 | ;; A function 10 | (defn filtered-file-seq 11 | "Return a seq of all the files in the given directory, subject to the given 12 | predicate." 13 | [dir pred] 14 | (filter pred (file-seq dir))) 15 | 16 | ;; Example-based test 17 | (deftest test-filtered-file-seq 18 | (let [dir (io/file "./test") 19 | clojure-file? #(.endsWith (.getName %) ".clj") 20 | clojure-files (filtered-file-seq dir clojure-file?)] 21 | (is (= 2 (count clojure-files))))) 22 | 23 | 24 | ;; Goal: Spec this function, and validate our usage of it in the example-based test. 25 | 26 | ;; We do *not* want to do generative testing in this case, because we don't want 27 | ;; to have to write a file generator or create a ton of files. 28 | 29 | (s/def ::directory #(and (instance? java.io.File %) 30 | (.isDirectory %))) 31 | 32 | (s/def ::file #(instance? java.io.File %)) 33 | 34 | (s/def ::file-predicate 35 | (s/fspec :args (s/cat :file ::file) 36 | :ret any?)) 37 | 38 | (s/fdef filtered-file-seq 39 | :args (s/cat :dir ::directory 40 | :pred ::file-predicate) 41 | :ret (s/coll-of ::file)) 42 | 43 | 44 | (comment 45 | 46 | (st/instrument) 47 | (t/run-tests) 48 | ;;=> clojure.lang.ExceptionInfo: Unable to construct gen at: [:file] for: :user/file 49 | 50 | ;; Gen override does not appear to be used... 51 | (st/instrument {:gen {::file-predicate (g/return (constantly true))}}) 52 | (t/run-tests) 53 | ;;=> clojure.lang.ExceptionInfo: Unable to construct gen at: [:file] for: :user/file 54 | 55 | 56 | ;; Cannot override spec, (instrument {:spec ..}) can only override by symbol name, not by spec name. 57 | 58 | ;; What now? 59 | 60 | ) 61 | -------------------------------------------------------------------------------- /src/arachne/fileset/util.clj: -------------------------------------------------------------------------------- 1 | (ns arachne.fileset.util 2 | (:require [clojure.tools.logging :as log]) 3 | (:import 4 | [java.io File FileInputStream] 5 | [java.nio.file Files StandardCopyOption FileVisitOption OpenOption] 6 | [java.security MessageDigest])) 7 | 8 | (defmacro with-let 9 | "Binds resource to binding and evaluates body. Then, returns resource. It's 10 | a cross between doto and with-open." 11 | [[binding resource] & body] 12 | `(let [ret# ~resource ~binding ret#] ~@body ret#)) 13 | 14 | (defn delete! 15 | "Recursively delete a directory and its contents" 16 | [f] 17 | (when (.isDirectory f) 18 | (doseq [child (seq (.listFiles f))] (delete! child))) 19 | (.delete f)) 20 | 21 | (defn- signature 22 | "Get signature (string) of a digest." 23 | [^MessageDigest algorithm] 24 | (let [size (* 2 (.getDigestLength algorithm)) 25 | sig (.toString (BigInteger. 1 (.digest algorithm)) 16) 26 | padding (apply str (repeat (- size (count sig)) "0"))] 27 | (str padding sig))) 28 | 29 | (defn md5 30 | "Get the MD5 hash of a file" 31 | [file] 32 | (with-open [fis (if (instance? File file) 33 | (FileInputStream. file) 34 | (Files/newInputStream file (into-array java.nio.file.OpenOption [])))] 35 | (let [md (MessageDigest/getInstance "MD5") 36 | buf (byte-array 1024)] 37 | (loop [n (.read fis buf)] 38 | (if (= -1 n) 39 | (signature md) 40 | (do 41 | (.update md buf 0 n) 42 | (recur (.read fis buf)))))))) 43 | 44 | (defn move 45 | [^File src ^File dest & {:keys [atomic replace] 46 | :or {atomic StandardCopyOption/ATOMIC_MOVE 47 | replace StandardCopyOption/REPLACE_EXISTING}}] 48 | (let [opts (filter identity [atomic replace]) 49 | opts-array (into-array StandardCopyOption opts)] 50 | (Files/move (.toPath src) (.toPath dest) opts-array))) 51 | 52 | (defn hard-link 53 | [^File existing-file ^File link-file] 54 | (Files/deleteIfExists (.toPath link-file)) 55 | (Files/createLink (.toPath link-file) (.toPath existing-file))) 56 | 57 | (defn walk-file-tree 58 | "Wrap java.nio.Files/walkFileTree to easily toggle symlink-following behavior." 59 | [root visitor & {:keys [follow-symlinks] 60 | :or {follow-symlinks true}}] 61 | (let [walk-opts (if follow-symlinks #{FileVisitOption/FOLLOW_LINKS} #{})] 62 | (Files/walkFileTree root walk-opts Integer/MAX_VALUE visitor))) 63 | 64 | (defn debug 65 | "Log debug using a formatted message" 66 | [msg & args] 67 | (log/debug (apply format msg args))) 68 | 69 | (defn warn 70 | "Log a warning using a formatted message" 71 | [msg & args] 72 | (log/warn (apply format msg args))) 73 | -------------------------------------------------------------------------------- /src/arachne/fileset/specs.clj: -------------------------------------------------------------------------------- 1 | (ns arachne.fileset.specs 2 | (:require [clojure.spec.alpha :as s] 3 | [arachne.fileset.impl :as impl]) 4 | (:import [java.io File])) 5 | 6 | ;; Need to override specs here so it doesn't try to gen when I instrument 7 | (s/def ::any-fn (partial instance? clojure.lang.IFn)) 8 | 9 | (s/def ::directory (fn [f] 10 | (and (instance? File f) 11 | (.isDirectory ^File f)))) 12 | 13 | (s/def ::fileset (partial satisfies? impl/ITmpFileSet)) 14 | 15 | (s/fdef arachne.fileset/fileset 16 | :ret ::fileset ) 17 | 18 | (s/fdef arachne.fileset/commit! 19 | :args (s/cat :fileset ::fileset :output-directory ::directory) 20 | :ret ::fileset 21 | :fn (fn [{[fs _] :args ret :ret}] 22 | (= fs ret))) 23 | 24 | (s/def ::regex (partial instance? java.util.regex.Pattern)) 25 | 26 | (s/def ::include (s/coll-of ::regex :min-count 1)) 27 | (s/def ::exclude (s/coll-of ::regex :min-count 1)) 28 | 29 | #_(s/def ::merge-fn 30 | (s/fspec :args (s/cat :old (partial instance? java.io.InputStream) 31 | :new (partial instance? java.io.InputStream) 32 | :out (partial instance? java.io.OutputStream)) 33 | :ret nil?)) 34 | 35 | (s/def ::merge-fn ::any-fn) 36 | 37 | (s/def ::mergers (s/map-of ::regex ::merge-fn :min-count 1)) 38 | 39 | (s/def ::meta (s/map-of keyword? any? :min-count 1)) 40 | 41 | (s/fdef arachne.fileset/add 42 | :args (s/cat :fileset ::fileset 43 | :directory ::directory 44 | :options (s/keys* :opt-un [::include ::exclude ::mergers ::meta])) 45 | :ret ::fileset) 46 | 47 | (s/def ::path string?) 48 | (s/def ::tmpfile (partial satisfies? impl/ITmpFile)) 49 | 50 | (s/fdef arachne.fileset/remove 51 | :args (s/cat :fileset ::fileset 52 | :paths (s/+ ::path)) 53 | :ret ::fileset) 54 | 55 | (s/fdef arachne.fileset/diff 56 | :args (s/cat :before ::fileset 57 | :after ::fileset) 58 | :ret ::fileset) 59 | 60 | (s/fdef arachne.fileset/removed 61 | :args (s/cat :before ::fileset 62 | :after ::fileset) 63 | :ret ::fileset) 64 | 65 | (s/fdef arachne.fileset/added 66 | :args (s/cat :before ::fileset 67 | :after ::fileset) 68 | :ret ::fileset) 69 | 70 | (s/fdef arachne.fileset/changed 71 | :args (s/cat :before ::fileset 72 | :after ::fileset) 73 | :ret ::fileset) 74 | 75 | #_(s/def ::filter-pred 76 | (s/fspec :args (s/cat :tmpfile ::tmpfile) 77 | :ret any?)) 78 | 79 | (s/def ::filter-pred ::any-fn) 80 | 81 | (s/fdef arachne.fileset/filter 82 | :args (s/cat :fileset ::fileset 83 | :pred ::filter-pred) 84 | :ret ::fileset) 85 | 86 | (s/fdef arachne.fileset/filter-by-meta 87 | :args (s/cat :fileset ::fileset 88 | :pred ::filter-pred) 89 | :ret ::fileset) 90 | 91 | (s/fdef arachne.fileset/ls 92 | :args (s/cat :fileset ::fileset) 93 | :ret (s/coll-of ::path)) 94 | 95 | (s/fdef arachne.fileset/hash 96 | :args (s/cat :fileset ::fileset 97 | :path ::path) 98 | :ret string?) 99 | 100 | (s/fdef arachne.fileset/timestamp 101 | :args (s/cat :fileset ::fileset 102 | :path ::path) 103 | :ret integer?) 104 | 105 | (s/fdef arachne.fileset/content 106 | :args (s/cat :fileset ::fileset 107 | :path ::path) 108 | :ret (partial instance? java.io.InputStream)) 109 | 110 | (s/fdef arachne.fileset/empty 111 | :args (s/cat :fileset ::fileset) 112 | :ret ::fileset) 113 | 114 | (s/fdef arachne.fileset/merge 115 | :args (s/cat :filesets (s/+ ::fileset)) 116 | :ret ::fileset) 117 | -------------------------------------------------------------------------------- /src/arachne/fileset.clj: -------------------------------------------------------------------------------- 1 | (ns arachne.fileset 2 | (:refer-clojure :exclude [remove filter empty merge hash]) 3 | (:require [arachne.fileset.specs] 4 | [arachne.fileset.impl :as impl] 5 | [arachne.fileset.util :as futil] 6 | [arachne.fileset.tmpdir :as tmpdir] 7 | [clojure.java.io :as io])) 8 | 9 | (def fileset impl/fileset) 10 | 11 | (defn commit! 12 | "Persist the immutable fileset to a concrete directory. The emitted 13 | files are hard links to the fileset's internal blob storage, and therefore 14 | immutable. 15 | 16 | Note that `commit!` assumes that it is the only process modifying the 17 | destination directory. If another privileged process deletes any of the 18 | contents of the commit dir, they might not be re-created on subsequent 19 | commits." 20 | [fs dir] 21 | (impl/-commit! fs dir)) 22 | 23 | (defn add 24 | "Return a Fileset with all the files in the given directory added. 25 | 26 | The directory may be a java.io.File or java.nio.Path object. 27 | 28 | Options are as follows: 29 | 30 | :include - only add files that match regexes in this collection 31 | :exclude - do not add files that match regexes in this collection (takes 32 | priority over :include) 33 | :meta - map of metadata that will be added each file 34 | :mergers - a map of regex patterns to merge functions. When a file to be added 35 | already exists in the fileset, and its name matches a key in the 36 | mergers map, uses the specified merge function to determine the 37 | resulting contents of the file. 38 | 39 | The default behavior (with no merge function) is to replace the file. 40 | 41 | Merge functions take three arguments: an InputStream of the contents 42 | of the existing file, an InputStream of the contents of the new 43 | file, and an OutputStream that will contain the contents of the 44 | resulting file. The streams will be closed after the merge function 45 | returns (meaning that it should do all its processing eagerly.)" 46 | [fileset dir & {:keys [include exclude mergers meta] :as opts}] 47 | (impl/-add fileset dir opts)) 48 | 49 | (declare filter) 50 | (defn remove 51 | "Return a Fileset with the specified paths removed." 52 | [fileset & paths] 53 | (let [paths (set paths)] 54 | (filter fileset #(not (paths (impl/-path %)))))) 55 | 56 | (defn diff 57 | "Return a Fileset containing only the files that are different in `added` and 58 | `before` or not present in `before`" 59 | [before after] 60 | (let [{:keys [added changed]} 61 | (impl/diff* before after nil)] 62 | (update-in added [:tree] clojure.core/merge (:tree changed)))) 63 | 64 | (defn removed 65 | "Return a Fileset containing only the files present in `before` and not in 66 | `after`" 67 | [before after] 68 | (:removed (impl/diff* before after nil))) 69 | 70 | (defn added 71 | "Return a Fileset containing only the files that are present in `after` but 72 | not `before`" 73 | [before after] 74 | (:added (impl/diff* before after nil))) 75 | 76 | (defn changed 77 | "Return a Fileset containing only the files that are different in `after` and 78 | `before`" 79 | [before after] 80 | (:changed (impl/diff* before after nil))) 81 | 82 | (defn filter 83 | "Return a fileset containing only files for which the predicate returns true 84 | when applied to the TempFile" 85 | [fileset pred] 86 | (assoc fileset :tree (reduce-kv (fn [xs k v] 87 | (if (pred v) 88 | (assoc xs k v) 89 | xs)) 90 | {} (:tree fileset)))) 91 | 92 | (defn filter-by-meta 93 | "Return a fileset containing only files for which the predicate returns true 94 | when applied to the metadata of a TempFile" 95 | [fileset pred] 96 | (filter fileset (comp pred :meta))) 97 | 98 | (defn ls 99 | "Return a collection of the paths present in the fileset" 100 | [fileset] 101 | (map impl/-path (impl/-ls fileset))) 102 | 103 | (defn hash 104 | "Return the MD5 hash of the content of the file at the specified path in the 105 | fileset" 106 | [fileset path] 107 | (impl/-hash (get-in fileset [:tree path]))) 108 | 109 | (defn timestamp 110 | "Return the 'last modified' timestamp of the file (as a long) at the specified 111 | path in the fileset" 112 | [fileset path] 113 | (impl/-time (get-in fileset [:tree path]))) 114 | 115 | (defn file 116 | "Returns a java.io.File of the underlying file at the given path. Note that the given file MUST NOT be 117 | modified, at the risk of corrupting the fileset. 118 | 119 | Returns nil if the path does not exist in the fileset." 120 | [fileset path] 121 | (when-let [tmpf (get-in fileset [:tree path])] 122 | (impl/-file tmpf))) 123 | 124 | (defn content 125 | "Opens and returns a java.io.InputStream of the contents of the file at the given path, or nil 126 | if the path does not exist." 127 | [fileset path] 128 | (when-let [f (file fileset path)] 129 | (io/input-stream f))) 130 | 131 | (defn- merge-tempfile 132 | "Merge two tempfiles, logging a warning if one would overwrite the other" 133 | [a b] 134 | (let [[winner loser] (if (< (impl/-time a) (impl/-time b)) [b a] [a b])] 135 | (when-not (and (= (impl/-hash a) (impl/-hash b)) 136 | (= (impl/-meta a) (impl/-meta b))) 137 | (futil/warn "File at path %s was overwritten while merging filesets. Using the file timestamped %s, which is newer than %s" 138 | (impl/-path winner) (impl/-time winner) (impl/-time loser))) 139 | (update winner :meta #(clojure.core/merge %1 (impl/-meta loser))))) 140 | 141 | (defn merge 142 | "Merge multiple filesets. If a path exists in more than one fileset, with 143 | different content, the most recent one is used and a warning is logged." 144 | ([fs] fs) 145 | ([a b] 146 | (assoc a :tree (merge-with merge-tempfile (:tree a) (:tree b)))) 147 | ([a b & more] 148 | (reduce merge a (cons b more)))) 149 | 150 | (defn tmpdir! 151 | "Return a new temporary directory as a java.io.File. The directory will be in 152 | the system temporary directory, and tracked for deletion when the JVM 153 | terminates (using a JVM shutdown hook.)" 154 | [] 155 | (tmpdir/tmpdir!)) 156 | 157 | (defn checksum 158 | "Return the MD5 checksum of the fileset itself. Two filesets with identical contents will have the same hash. 159 | 160 | If timestamps? is true, will incorporate the file's \"last modified\" date into the hash 161 | function, otherwise will hash based only on file names and contents" 162 | [fs timestamps?] 163 | (impl/-checksum fs timestamps?)) 164 | -------------------------------------------------------------------------------- /test/arachne/fileset_test.clj: -------------------------------------------------------------------------------- 1 | (ns arachne.fileset-test 2 | (:require [clojure.test :refer :all] 3 | [clojure.java.io :as io] 4 | [clojure.spec.alpha :as s] 5 | [clojure.spec.test.alpha :as stest] 6 | [arachne.fileset :as fs] 7 | [arachne.fileset.specs :as fss] 8 | [arachne.fileset.util :as fsutil]) 9 | (:import [org.apache.commons.io FileUtils] 10 | [java.nio.file Files Paths])) 11 | 12 | (comment 13 | 14 | (def fs (fs/fileset)) 15 | (def f1 (fs/add fs (io/file "test/test-assets"))) 16 | (def f1 nil) 17 | (System/gc) 18 | 19 | ) 20 | 21 | (deftest test-basic-add-update-commit 22 | (let [fs (fs/fileset) 23 | fs (fs/add fs (io/file "test/test-assets")) 24 | working (fs/tmpdir!) 25 | commit-dir (fs/tmpdir!) 26 | fs (fs/commit! fs commit-dir)] 27 | (FileUtils/copyDirectory commit-dir working) 28 | (spit (io/file working "file1.md") "NEW CONTENT") 29 | (spit (io/file working "dir1/file4.md") "NEW FILE") 30 | (let [fs (fs/add fs working) 31 | dest (fs/tmpdir!) 32 | fs (fs/commit! fs dest) 33 | files (->> (file-seq dest) 34 | (filter #(.isFile %)))] 35 | (is (= "NEW CONTENT" (slurp (io/file dest "file1.md")))) 36 | (is (= #{"file1.md" "file2.md" "file3.md" "file4.md"} 37 | (set (map #(.getName %) files))))))) 38 | 39 | (deftest test-remove-test 40 | (let [fs (fs/fileset) 41 | fs (fs/add fs (io/file "test/test-assets")) 42 | fs (fs/remove fs "dir1/file3.md") 43 | dest (fs/tmpdir!) 44 | fs (fs/commit! fs dest) 45 | files (->> (file-seq dest) 46 | (filter #(.isFile %)))] 47 | (is (= #{"file1.md" "file2.md"} 48 | (set (map #(.getName %) files)))))) 49 | 50 | (deftest test-diffs 51 | (let [fs (fs/fileset) 52 | fs (fs/add fs (io/file "test/test-assets")) 53 | commit-dir (fs/tmpdir!) 54 | working-dir (fs/tmpdir!) 55 | fs (fs/commit! fs commit-dir)] 56 | (FileUtils/copyDirectory commit-dir working-dir) 57 | (spit (io/file working-dir "file1.md") "NEW CONTENT") 58 | (spit (io/file working-dir "dir1/file4.md") "NEW FILE") 59 | (.delete (io/file working-dir "file2.md")) 60 | (let [fs2 (fs/add fs working-dir) 61 | fs2 (fs/remove fs2 "dir1/file3.md")] 62 | (is (= #{"file1.md" "dir1/file4.md"} 63 | (set (fs/ls (fs/diff fs fs2))))) 64 | (is (= #{"dir1/file4.md"} 65 | (set (fs/ls (fs/added fs fs2))))) 66 | (is (= #{"dir1/file3.md"} 67 | (set (fs/ls (fs/removed fs fs2))))) 68 | (is (= #{"file1.md"} 69 | (set (fs/ls (fs/changed fs fs2)))))))) 70 | 71 | (deftest test-filtering-and-meta 72 | (let [fs (fs/fileset) 73 | fs (fs/add fs (io/file "test/test-assets") :meta {:input true}) 74 | working (fs/tmpdir!) 75 | fs (fs/commit! fs working)] 76 | (.mkdirs (io/file working "out")) 77 | (spit (io/file working "out/file1.out") "OUTPUT1") 78 | (spit (io/file working "out/file2.out") "OUTPUT2") 79 | (let [fs (fs/add fs working :include [#"\.out$"] :meta {:output true}) 80 | dest (fs/tmpdir!) 81 | out-fs (fs/filter-by-meta fs :output) 82 | out-fs (fs/commit! out-fs dest) 83 | files (->> (file-seq dest) 84 | (filter #(.isFile %)))] 85 | (is (= #{"file1.out" "file2.out"} 86 | (set (map #(.getName %) files))))))) 87 | 88 | (deftest test-file-access 89 | (let [fs (fs/fileset) 90 | fs (fs/add fs (io/file "test/test-assets"))] 91 | (let [f (io/file "test/test-assets/file1.md")] 92 | ;; Files/getLastModifiedTime returns a higher resolution value 93 | ;; on linux than File/lastModified. 94 | (is (= (.toMillis (Files/getLastModifiedTime (.toPath f) 95 | arachne.fileset.impl/link-opts)) 96 | (fs/timestamp fs "file1.md"))) 97 | (is (= (fsutil/md5 f) (fs/hash fs "file1.md"))) 98 | (is (= (slurp f) (slurp (fs/content fs "file1.md"))))))) 99 | 100 | (deftest test-checksums 101 | (let [original-fs (fs/add (fs/fileset) (io/file "test/test-assets")) 102 | commit-dir (fs/tmpdir!) 103 | working-dir (fs/tmpdir!) 104 | _ (fs/commit! original-fs commit-dir) 105 | _ (FileUtils/copyDirectory commit-dir working-dir false) 106 | fs (fs/add (fs/fileset) (io/file working-dir)) 107 | fs' (fs/add (fs/fileset) (io/file working-dir)) 108 | _ (.setLastModified (io/file working-dir "file1.md") 0) 109 | fs'' (fs/add (fs/fileset) (io/file working-dir)) 110 | _ (spit (io/file working-dir "file1.md") "boo") 111 | fs''' (fs/add (fs/fileset) (io/file working-dir))] 112 | (testing "checksums not including timestamps" 113 | (is (= (fs/checksum fs false) 114 | (fs/checksum fs' false) 115 | (fs/checksum fs'' false))) 116 | (is (not= (fs/checksum fs'' false) 117 | (fs/checksum fs''' false)))) 118 | (testing "checksums including timestamps" 119 | (is (= (fs/checksum fs true) 120 | (fs/checksum fs' true))) 121 | (is (not= (fs/checksum fs true) 122 | (fs/checksum fs'' true)))))) 123 | 124 | (deftest test-date-preservation 125 | (let [fs (fs/add (fs/fileset) (io/file "test/test-assets")) 126 | tmpdir (fs/tmpdir!)] 127 | (fs/commit! fs tmpdir) 128 | (is (= (.lastModified (io/file tmpdir "file1.md")) 129 | (.lastModified (io/file "test/test-assets/file1.md")))))) 130 | 131 | (deftest test-content 132 | (let [fs (fs/add (fs/fileset) (io/file "test/test-assets"))] 133 | (is (= "this is a file" (slurp (fs/content fs "file1.md")))) 134 | (is (nil? (fs/content fs "no-such-file.md"))))) 135 | 136 | (deftest test-file 137 | (let [fs (fs/add (fs/fileset) (io/file "test/test-assets"))] 138 | (is (= "this is a file" (slurp (fs/file fs "file1.md")))) 139 | (is (nil? (fs/file fs "no-such-file.md"))))) 140 | 141 | (deftest test-nio-paths 142 | (let [fs (fs/add (fs/fileset) (Paths/get "test/test-assets" (into-array String [])))] 143 | (is (= "this is a file" (slurp (fs/file fs "file1.md")))) 144 | (is (nil? (fs/file fs "no-such-file.md"))))) 145 | 146 | (deftest test-deletion-recovery 147 | ;; Handles a regression where temp files could be deleted out from 148 | ;; under long-running processes 149 | (let [fs (fs/add (fs/fileset) (io/file "test/test-assets"))] 150 | (is (= "this is a file" (slurp (fs/content fs "file1.md")))) 151 | (let [f (fs/file fs "file1.md")] 152 | (.delete f) 153 | (is (not (.exists f)))) 154 | (is (.exists (fs/file fs "file1.md"))) 155 | (is (= "this is a file" (slurp (fs/content fs "file1.md")))))) 156 | 157 | (comment 158 | (def fs (fs/fileset)) 159 | 160 | (def fs1 (fs/add fs (io/file "test/test-assets"))) 161 | (def fs1 nil) 162 | 163 | (System/gc) 164 | 165 | 166 | ) 167 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # arachne-fileset 2 | 3 | A remix of Boot's filesets, intended to provide a more functional API user in a larger variety of settings. Many thanks to Alan Dipert and Micha Niskin for the original version in Boot's source code, and for their assistance in understanding it. 4 | 5 | ## Usage 6 | 7 | A *fileset* is an immutable data structure representing a logical filesystem directory, containing some number of files. Filesets are immutable persistent data structures; you can obtain a new fileset by adding, removing or modifying files without altering the original fileset instance in any way. They also support correct equality semantics, based on the hashed content and timestamps of the files they contain. 8 | 9 | #### Creating a fileset 10 | 11 | Use the `fileset` function with no aguments to create a new fileset. The fileset will be empty, containing no files. 12 | 13 | #### Adding files to a fileset 14 | 15 | Use the `add` function to add all the files in a normal filesystem directory to the fileset. Under the hood, this actually copies the files, so once they are added changes to the filesystem will not affect the fileset. 16 | 17 | ```clojure 18 | (require '[arachne.fileset :as fs]) 19 | (require '[clojure.java.io :as io]) 20 | 21 | (def my-fileset (fs/add (fs/fileset) (io/file "some/directory"))) 22 | ``` 23 | 24 | `add` also supports options to only add files whose paths match (or do not match) specified regular expressions: see the docstring for details. 25 | 26 | You can also specify a *metadata* map for all the files added in a particular call to `add`, using the `:meta` option. This is data that is attached to each file and can later be used to select or filter specific files. This is useful, among other things, for keeping track of a file's role in some process, and whether it should be included in a final build or not. 27 | 28 | ```clojure 29 | (def my-fileset (fs/add (fs/fileset) (io/file "some/directory") 30 | :include [#".*\.clj$"] 31 | :meta {:role :source-code})) 32 | ``` 33 | 34 | Note that metadata is not the same as Clojure metadata; it does affect the equality semantics of the fileset. 35 | 36 | #### Reading files in a fileset 37 | 38 | You can list the files in a fileset using `ls`, which returns a collection of paths that are present in the fileset. 39 | 40 | For each path, you can use any of the following functions: 41 | 42 | - `hash` - get the MD5 hash of a file 43 | - `timestamp` - get the "last modified" time of a file 44 | - `content` - open a `java.io.InputStream` on the content of the file. 45 | - `file` - Get an immutable `java.io.File` for the File representation. The file will be located in a temp directory and be named according by its content hash and timestamp, not the file path. 46 | 47 | #### Committing 48 | 49 | Interacting with a fileset programatically through the Clojure API is somewhat cumbersome. To perform arbitrary operations on a fileset, it is often necessary to *commit* the fileset, dumping its contents to a concrete location on the filesystem where they can be manipulated as normal files. 50 | 51 | To do this, use the `commit!` function, which takes a fileset and a directory (as a `java.io.File`) and writes all the files in the fileset to the directory. 52 | 53 | Committing is efficient and does not perform a full copy; the emitted files are hard links to the underlying content. This also means that they are read-only; if you wish to modify a file, you must first copy it and re-add it using `add`. 54 | 55 | ```clojure 56 | (require '[arachne.fileset :as fs]) 57 | 58 | (def output-dir (fs/tmpdir!)) 59 | 60 | (fs/commit! fs output-dir) 61 | ``` 62 | 63 | #### Removing files from a fileset 64 | 65 | There are several ways to create a new fileset that does not contain certain files: 66 | 67 | - `remove` removes the files at specific paths from a fileset. 68 | - `filter` applies a predicate to each file in the fileset (using the internal tempfile instance) and returns a fileset containing only files for which the predicate returns true. 69 | - `filter-meta` applies a predicate to the *metadata* of each file in the fileset and returns a fileset containing only files for which the predicate returns true. 70 | 71 | #### Temporary files 72 | 73 | To use filesets effectively, it is often necessary to make extensive use of temporary directories, and the system creates several temporary directories automatically in the course of operations. 74 | 75 | You can obtain a new, empty temporary directory by calling `tmpdir!`. Directories created by `tmpdir!` are created in a directory suitable for temporary files (as determined by the filesystem), and are also registered with the JVM for deletion via a shutdown hook. 76 | 77 | ## How it Works 78 | 79 | Under the hood, the system maintains a directory (the "blob store") full of content-addressed files; that is, files that are named according to the MD5 hash of their contents and their last-modified timestamp. Whenever a file is added to any fileset using `add`, it is added to the blob store. The blob is never modified or deleted until the JVM shuts down. 80 | 81 | A fileset is essentialy just a map of user-level paths (e.g, `"foo/bar.clj"`) to the paths of the corresponding blobs in the blob store (e.g, `"0ac6536c01c4720c6eee617785027c66.1472514953000"`). 82 | 83 | Wherever possible (e.g, when using `commit!`), hard links to the blob store are used to avoid performing a full copy of a file's contents. This makes most operations (aside from the initial `add`) fast and lightweight. 84 | 85 | ## Resource Consumption Notes 86 | 87 | The library maintains an open file handle to each file in a 88 | fileset. This is important, because it prevents files from being 89 | deleted from underneath the blob store in long-running processes 90 | (which Arachne servers are.) 91 | 92 | These file handles are released in the `.finalize` method of each JVM 93 | object representing a tmpfile; that is, after no references to it 94 | remain and they are garbage collected by the JVM. 95 | 96 | However, if you are working with filesets containing many (on the 97 | order of tens of thousands) of files, or if you're churning through 98 | and modifying files in a fileset faster than the old versions can be 99 | garbage collected, it is possible you will see a "too many open files" 100 | exception. If this occurrs, you can either increase your system's open 101 | file limit or decrease the number of files in active filesets. 102 | 103 | If you're experiencing this problem in a long-running dev session, it 104 | might also be worth calling `(System/gc)` explicitly to force old 105 | filesets to be cleaned up (releasing their resources.) 106 | 107 | ## Differences from Boot 108 | 109 | - When you `commit!`, you choose a directory instead using an implicit one. 110 | - Caching has been disabled, 111 | - Multiple fileset instances are fully supported without restrictions, including use in multithreaded environments. 112 | - Filesets no longer have any concept of roles, and no "source", "resource", "input" or "output" status. This can be trivially implemented using metadata, if desired. 113 | - Filesets have been decoupled from the concept of a classpath; they now have nothing to do with eachother (unless you happen to `commit!` to a directory on the classpath.) 114 | - The API has been reworked such that users don't need to interact with the underlying ITmpFile instances. Several convenience functions have been added. 115 | 116 | ## License 117 | 118 | Copyright © 2016 Luke VanderHart, Alan Dipert and Micha Niskin 119 | 120 | Distributed under the Eclipse Public License either version 1.0 or (at 121 | your option) any later version. 122 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC 2 | LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM 3 | CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. 4 | 5 | 1. DEFINITIONS 6 | 7 | "Contribution" means: 8 | 9 | a) in the case of the initial Contributor, the initial code and 10 | documentation distributed under this Agreement, and 11 | 12 | b) in the case of each subsequent Contributor: 13 | 14 | i) changes to the Program, and 15 | 16 | ii) additions to the Program; 17 | 18 | where such changes and/or additions to the Program originate from and are 19 | distributed by that particular Contributor. A Contribution 'originates' from 20 | a Contributor if it was added to the Program by such Contributor itself or 21 | anyone acting on such Contributor's behalf. Contributions do not include 22 | additions to the Program which: (i) are separate modules of software 23 | distributed in conjunction with the Program under their own license 24 | agreement, and (ii) are not derivative works of the Program. 25 | 26 | "Contributor" means any person or entity that distributes the Program. 27 | 28 | "Licensed Patents" mean patent claims licensable by a Contributor which are 29 | necessarily infringed by the use or sale of its Contribution alone or when 30 | combined with the Program. 31 | 32 | "Program" means the Contributions distributed in accordance with this 33 | Agreement. 34 | 35 | "Recipient" means anyone who receives the Program under this Agreement, 36 | including all Contributors. 37 | 38 | 2. GRANT OF RIGHTS 39 | 40 | a) Subject to the terms of this Agreement, each Contributor hereby grants 41 | Recipient a non-exclusive, worldwide, royalty-free copyright license to 42 | reproduce, prepare derivative works of, publicly display, publicly perform, 43 | distribute and sublicense the Contribution of such Contributor, if any, and 44 | such derivative works, in source code and object code form. 45 | 46 | b) Subject to the terms of this Agreement, each Contributor hereby grants 47 | Recipient a non-exclusive, worldwide, royalty-free patent license under 48 | Licensed Patents to make, use, sell, offer to sell, import and otherwise 49 | transfer the Contribution of such Contributor, if any, in source code and 50 | object code form. This patent license shall apply to the combination of the 51 | Contribution and the Program if, at the time the Contribution is added by the 52 | Contributor, such addition of the Contribution causes such combination to be 53 | covered by the Licensed Patents. The patent license shall not apply to any 54 | other combinations which include the Contribution. No hardware per se is 55 | licensed hereunder. 56 | 57 | c) Recipient understands that although each Contributor grants the licenses 58 | to its Contributions set forth herein, no assurances are provided by any 59 | Contributor that the Program does not infringe the patent or other 60 | intellectual property rights of any other entity. Each Contributor disclaims 61 | any liability to Recipient for claims brought by any other entity based on 62 | infringement of intellectual property rights or otherwise. As a condition to 63 | exercising the rights and licenses granted hereunder, each Recipient hereby 64 | assumes sole responsibility to secure any other intellectual property rights 65 | needed, if any. For example, if a third party patent license is required to 66 | allow Recipient to distribute the Program, it is Recipient's responsibility 67 | to acquire that license before distributing the Program. 68 | 69 | d) Each Contributor represents that to its knowledge it has sufficient 70 | copyright rights in its Contribution, if any, to grant the copyright license 71 | set forth in this Agreement. 72 | 73 | 3. REQUIREMENTS 74 | 75 | A Contributor may choose to distribute the Program in object code form under 76 | its own license agreement, provided that: 77 | 78 | a) it complies with the terms and conditions of this Agreement; and 79 | 80 | b) its license agreement: 81 | 82 | i) effectively disclaims on behalf of all Contributors all warranties and 83 | conditions, express and implied, including warranties or conditions of title 84 | and non-infringement, and implied warranties or conditions of merchantability 85 | and fitness for a particular purpose; 86 | 87 | ii) effectively excludes on behalf of all Contributors all liability for 88 | damages, including direct, indirect, special, incidental and consequential 89 | damages, such as lost profits; 90 | 91 | iii) states that any provisions which differ from this Agreement are offered 92 | by that Contributor alone and not by any other party; and 93 | 94 | iv) states that source code for the Program is available from such 95 | Contributor, and informs licensees how to obtain it in a reasonable manner on 96 | or through a medium customarily used for software exchange. 97 | 98 | When the Program is made available in source code form: 99 | 100 | a) it must be made available under this Agreement; and 101 | 102 | b) a copy of this Agreement must be included with each copy of the Program. 103 | 104 | Contributors may not remove or alter any copyright notices contained within 105 | the Program. 106 | 107 | Each Contributor must identify itself as the originator of its Contribution, 108 | if any, in a manner that reasonably allows subsequent Recipients to identify 109 | the originator of the Contribution. 110 | 111 | 4. COMMERCIAL DISTRIBUTION 112 | 113 | Commercial distributors of software may accept certain responsibilities with 114 | respect to end users, business partners and the like. While this license is 115 | intended to facilitate the commercial use of the Program, the Contributor who 116 | includes the Program in a commercial product offering should do so in a 117 | manner which does not create potential liability for other Contributors. 118 | Therefore, if a Contributor includes the Program in a commercial product 119 | offering, such Contributor ("Commercial Contributor") hereby agrees to defend 120 | and indemnify every other Contributor ("Indemnified Contributor") against any 121 | losses, damages and costs (collectively "Losses") arising from claims, 122 | lawsuits and other legal actions brought by a third party against the 123 | Indemnified Contributor to the extent caused by the acts or omissions of such 124 | Commercial Contributor in connection with its distribution of the Program in 125 | a commercial product offering. The obligations in this section do not apply 126 | to any claims or Losses relating to any actual or alleged intellectual 127 | property infringement. In order to qualify, an Indemnified Contributor must: 128 | a) promptly notify the Commercial Contributor in writing of such claim, and 129 | b) allow the Commercial Contributor tocontrol, and cooperate with the 130 | Commercial Contributor in, the defense and any related settlement 131 | negotiations. The Indemnified Contributor may participate in any such claim 132 | at its own expense. 133 | 134 | For example, a Contributor might include the Program in a commercial product 135 | offering, Product X. That Contributor is then a Commercial Contributor. If 136 | that Commercial Contributor then makes performance claims, or offers 137 | warranties related to Product X, those performance claims and warranties are 138 | such Commercial Contributor's responsibility alone. Under this section, the 139 | Commercial Contributor would have to defend claims against the other 140 | Contributors related to those performance claims and warranties, and if a 141 | court requires any other Contributor to pay any damages as a result, the 142 | Commercial Contributor must pay those damages. 143 | 144 | 5. NO WARRANTY 145 | 146 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON 147 | AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER 148 | EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR 149 | CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A 150 | PARTICULAR PURPOSE. Each Recipient is solely responsible for determining the 151 | appropriateness of using and distributing the Program and assumes all risks 152 | associated with its exercise of rights under this Agreement , including but 153 | not limited to the risks and costs of program errors, compliance with 154 | applicable laws, damage to or loss of data, programs or equipment, and 155 | unavailability or interruption of operations. 156 | 157 | 6. DISCLAIMER OF LIABILITY 158 | 159 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY 160 | CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, 161 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION 162 | LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 163 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 164 | ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE 165 | EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY 166 | OF SUCH DAMAGES. 167 | 168 | 7. GENERAL 169 | 170 | If any provision of this Agreement is invalid or unenforceable under 171 | applicable law, it shall not affect the validity or enforceability of the 172 | remainder of the terms of this Agreement, and without further action by the 173 | parties hereto, such provision shall be reformed to the minimum extent 174 | necessary to make such provision valid and enforceable. 175 | 176 | If Recipient institutes patent litigation against any entity (including a 177 | cross-claim or counterclaim in a lawsuit) alleging that the Program itself 178 | (excluding combinations of the Program with other software or hardware) 179 | infringes such Recipient's patent(s), then such Recipient's rights granted 180 | under Section 2(b) shall terminate as of the date such litigation is filed. 181 | 182 | All Recipient's rights under this Agreement shall terminate if it fails to 183 | comply with any of the material terms or conditions of this Agreement and 184 | does not cure such failure in a reasonable period of time after becoming 185 | aware of such noncompliance. If all Recipient's rights under this Agreement 186 | terminate, Recipient agrees to cease use and distribution of the Program as 187 | soon as reasonably practicable. However, Recipient's obligations under this 188 | Agreement and any licenses granted by Recipient relating to the Program shall 189 | continue and survive. 190 | 191 | Everyone is permitted to copy and distribute copies of this Agreement, but in 192 | order to avoid inconsistency the Agreement is copyrighted and may only be 193 | modified in the following manner. The Agreement Steward reserves the right to 194 | publish new versions (including revisions) of this Agreement from time to 195 | time. No one other than the Agreement Steward has the right to modify this 196 | Agreement. The Eclipse Foundation is the initial Agreement Steward. The 197 | Eclipse Foundation may assign the responsibility to serve as the Agreement 198 | Steward to a suitable separate entity. Each new version of the Agreement will 199 | be given a distinguishing version number. The Program (including 200 | Contributions) may always be distributed subject to the version of the 201 | Agreement under which it was received. In addition, after a new version of 202 | the Agreement is published, Contributor may elect to distribute the Program 203 | (including its Contributions) under the new version. Except as expressly 204 | stated in Sections 2(a) and 2(b) above, Recipient receives no rights or 205 | licenses to the intellectual property of any Contributor under this 206 | Agreement, whether expressly, by implication, estoppel or otherwise. All 207 | rights in the Program not expressly granted under this Agreement are 208 | reserved. 209 | 210 | This Agreement is governed by the laws of the State of New York and the 211 | intellectual property laws of the United States of America. No party to this 212 | Agreement will bring a legal action under this Agreement more than one year 213 | after the cause of action arose. Each party waives its rights to a jury trial 214 | in any resulting litigation. 215 | -------------------------------------------------------------------------------- /src/arachne/fileset/impl.clj: -------------------------------------------------------------------------------- 1 | ;; Derived from boot.fileset, many thanks to Alan and Micha for their code and help. 2 | ;; This file remains copyright Alan Dipert and Micha Niskin, and is re-used here 3 | ;; under the terms of the Eclipse Public License. 4 | (ns arachne.fileset.impl 5 | (:require 6 | [arachne.fileset.util :as util :refer [with-let debug warn]] 7 | [arachne.fileset.tmpdir :as tmpdir] 8 | [clojure.java.io :as io] 9 | [clojure.set :as set] 10 | [clojure.data :as data] 11 | [valuehash.api :as vh]) 12 | (:import 13 | [java.io File] 14 | [java.util Properties] 15 | [java.nio.file Path Paths Files SimpleFileVisitor LinkOption StandardCopyOption 16 | StandardOpenOption FileVisitResult] 17 | [java.nio.file.attribute FileAttribute] 18 | [java.nio.channels Channels FileChannel] 19 | [org.apache.commons.io FileUtils])) 20 | 21 | ;; Plan: manage lifecycle & cleanup via reference counting 22 | ;; Plan: clean up api by removing caches and linking options 23 | 24 | ;; These can be truly process global, because they only contain immmutable 25 | ;; content-addressed hard links or one-off subdirectories. 26 | 27 | (def global-scratch-dir (memoize tmpdir/tmpdir!)) 28 | 29 | (def prev-fs (atom {})) 30 | 31 | (def link-opts (into-array LinkOption [])) 32 | (def tmp-attrs (into-array FileAttribute [])) 33 | (def move-opts (into-array StandardCopyOption [StandardCopyOption/REPLACE_EXISTING])) 34 | (def copy-opts (into-array StandardCopyOption [StandardCopyOption/REPLACE_EXISTING 35 | StandardCopyOption/COPY_ATTRIBUTES])) 36 | (def continue FileVisitResult/CONTINUE) 37 | 38 | (defprotocol BlobStore 39 | "Manager for the lifecycle of immutable binary files." 40 | (-blob-add [this path source] 41 | "Add the given source file to blob store at the given path, 42 | returning a new TmpFile instance.") 43 | (-blob-get [this id] "Return a File object in the blob store. The 44 | File is guaranteed to exist and to be readonly. If the given ID does 45 | not exist in the blobstore, returns nil.") 46 | (-blob-release [this id] "Release the blob with the given ID.")) 47 | 48 | (declare map->TmpFile) 49 | (declare channel) 50 | (declare -time) 51 | 52 | (defrecord TmpDirBlobStore [tmpdir references] 53 | BlobStore 54 | (-blob-add [this path source] 55 | (let [hash (util/md5 source) 56 | ts (.toMillis (Files/getLastModifiedTime source link-opts)) 57 | id (str hash "." ts) 58 | tf (map->TmpFile {:path path :id id :hash hash :time ts :blobstore this})] 59 | (locking references 60 | (if (@references id) 61 | (swap! references update-in [id :refs] inc) 62 | (let [blob (.resolve (.toPath tmpdir) id)] 63 | (when-not (.exists (.toFile blob)) 64 | (Files/copy source blob copy-opts) 65 | (.setLastModified (.toFile blob) ts) 66 | (.setReadOnly (.toFile blob))) 67 | (swap! references assoc id {:refs 1 68 | :ch (channel blob)})))) 69 | tf)) 70 | (-blob-get [this tmpfile] 71 | (if-let [ref (@references (:id tmpfile))] 72 | (let [file (io/file tmpdir (:id tmpfile))] 73 | (when-not (.exists file) 74 | (locking (:ch ref) 75 | (.position (:ch ref) 0) 76 | (FileUtils/copyToFile 77 | (Channels/newInputStream (:ch ref)) 78 | file) 79 | (.setLastModified file (:time tmpfile)))) 80 | file))) 81 | (-blob-release [this id] 82 | (locking references 83 | (swap! references (fn [all-refs] 84 | (if-let [ref (get all-refs id)] 85 | (if (<= (:refs ref) 1) 86 | (do 87 | (when (:ch ref) (.close (:ch ref))) 88 | (Files/delete (.resolve (.toPath tmpdir) id)) 89 | (dissoc all-refs id)) 90 | (update-in all-refs [id :refs] dec)) 91 | all-refs)))))) 92 | 93 | (defn new-blobstore 94 | "Construct a new tmpdir-based Blobstore" 95 | [] 96 | (->TmpDirBlobStore (tmpdir/tmpdir!) (atom {}))) 97 | 98 | (def global-blobstore (memoize new-blobstore)) 99 | 100 | (defprotocol ITmpFile 101 | (-path [this]) 102 | (-meta [this]) 103 | (-hash [this]) 104 | (-time [this]) 105 | (-channel [this]) 106 | (-file [this])) 107 | 108 | (defprotocol ITmpFileSet 109 | (-ls [this]) 110 | (-commit! [this dir]) 111 | (-rm [this paths]) 112 | (-add [this src-dir opts]) 113 | (-mv [this from-path to-path]) 114 | (-checksum [this timestamps?])) 115 | 116 | (defrecord TmpFile [path id hash time meta blobstore] 117 | ITmpFile 118 | (-path [this] path) 119 | (-meta [this] meta) 120 | (-hash [this] hash) 121 | (-time [this] time) 122 | (-channel [this] channel) 123 | (-file [this] 124 | (-blob-get blobstore this)) 125 | Object 126 | (finalize [_] (-blob-release blobstore id))) 127 | 128 | (declare ->TmpFileSet) 129 | (defn fileset 130 | "Create a new, empty fileset." 131 | [] 132 | (->TmpFileSet {} (global-blobstore) (global-scratch-dir))) 133 | 134 | (defn- file 135 | [^File dir tmpfile] 136 | (io/file dir (-path tmpfile))) 137 | 138 | (defn- file-stat 139 | [^File f] 140 | (let [h (util/md5 f) 141 | t (.lastModified f)] 142 | {:id (str h "." t) :hash h :time t})) 143 | 144 | (defn- scratch-dir! 145 | [^File scratch] 146 | (.toFile (Files/createTempDirectory (.toPath scratch) 147 | "boot-scratch" (into-array FileAttribute [])))) 148 | 149 | (def ^:dynamic *hard-link* nil) 150 | 151 | (defn- channel 152 | "Return an open READ channel to the given path. A reference to this object may be 153 | maintained to keep the tmpfile from being deleted." 154 | [path] 155 | (FileChannel/open path (into-array [StandardOpenOption/READ]))) 156 | 157 | (defn- mkvisitor 158 | [^Path root blobstore tree] 159 | (proxy [SimpleFileVisitor] [] 160 | (visitFile [^Path path attr] 161 | (with-let [_ continue] 162 | (let [relpath (str (.relativize root path)) 163 | tmpfile (-blob-add blobstore relpath path)] 164 | (swap! tree assoc relpath tmpfile)))))) 165 | 166 | (defn- dir->tree! 167 | [dir blobstore] 168 | (locking dir->tree! 169 | (let [root (if (instance? java.io.File dir) 170 | (.toPath dir) 171 | dir)] 172 | @(with-let [tree (atom {})] 173 | (util/walk-file-tree root (mkvisitor root blobstore tree)))))) 174 | 175 | (defn- apply-mergers! 176 | [mergers ^File old-file path ^File new-file ^File merged-file] 177 | (when-let [merger (some (fn [[re v]] (when (re-find re path) v)) mergers)] 178 | (debug "Merging duplicate entry (%s)\n" path) 179 | (let [out-file (File/createTempFile (.getName merged-file) nil 180 | (.getParentFile merged-file))] 181 | (with-open [curr-stream (io/input-stream old-file) 182 | new-stream (io/input-stream new-file) 183 | out-stream (io/output-stream out-file)] 184 | (merger curr-stream new-stream out-stream)) 185 | (util/move out-file merged-file)))) 186 | 187 | (defn- merge-trees! 188 | [old new mergers scratch] 189 | (with-let [tmp (scratch-dir! scratch)] 190 | (doseq [[path newtmp] new] 191 | (when-let [oldtmp (get old path)] 192 | (debug "Merging %s...\n" path) 193 | (let [newf (-file newtmp) 194 | oldf (-file oldtmp) 195 | mergef (doto (io/file tmp path) io/make-parents)] 196 | (apply-mergers! mergers oldf path newf mergef)))))) 197 | 198 | (defn- comp-res 199 | [regexes] 200 | (when-let [res (seq regexes)] 201 | (->> (map #(partial re-find %) res) (apply some-fn)))) 202 | 203 | (defn- filter-tree 204 | [tree include exclude] 205 | (let [ex (comp-res exclude) 206 | in (when-let [in (comp-res include)] (complement in)) 207 | rm? (or (and in ex #(or (in %) (ex %))) in ex)] 208 | (if-not rm? tree (reduce-kv #(if (rm? %2) %1 (assoc %1 %2 %3)) {} tree)))) 209 | 210 | (defn- index 211 | [key tree] 212 | (reduce-kv #(assoc %1 (get %3 key) %3) {} tree)) 213 | 214 | (defn- diff-tree 215 | [tree props] 216 | (let [->map #(select-keys % props)] 217 | (reduce-kv #(assoc %1 %2 (->map %3)) {} tree))) 218 | 219 | (defn diff* 220 | [{t1 :tree :as before} {t2 :tree :as after} props] 221 | (if-not before 222 | {:added after 223 | :removed (assoc after :tree {}) 224 | :changed (assoc after :tree {})} 225 | (let [props (or (seq props) [:id]) 226 | d1 (diff-tree t1 props) 227 | d2 (diff-tree t2 props) 228 | [x y _] (map (comp set keys) (data/diff d1 d2))] 229 | {:added (->> (set/difference y x) (select-keys t2) (assoc after :tree)) 230 | :removed (->> (set/difference x y) (select-keys t1) (assoc after :tree)) 231 | :changed (->> (set/intersection x y) (select-keys t2) (assoc after :tree))}))) 232 | 233 | (defn- fatal-conflict? 234 | [^File dest] 235 | (if (.isDirectory dest) 236 | (let [tree (->> dest file-seq reverse)] 237 | (or (not (every? #(.isDirectory ^File %) tree)) 238 | (doseq [^File f tree] (.delete f)))) 239 | (not (let [d (.getParentFile dest)] 240 | (or (.isDirectory d) (.mkdirs d)))))) 241 | 242 | (defn- add-tree-meta 243 | [tree meta] 244 | (if (empty? meta) 245 | tree 246 | (reduce-kv (fn [tree k tmpfile] 247 | (assoc tree k (update tmpfile :meta #(merge %1 meta)))) 248 | {} tree))) 249 | 250 | (defn- merge-tempfiles 251 | "Merge two TmpFile records" 252 | [a b] 253 | (assoc (merge a b) :meta (merge (:meta a) (:meta b)))) 254 | 255 | (defn- current-fileset 256 | "Return a new fileset representing the current state of a directory" 257 | [^File dir] 258 | (let [fs (fileset)] 259 | (-add fs dir {}))) 260 | 261 | (defrecord TmpFileSet [tree blobstore scratch] 262 | ITmpFileSet 263 | (-ls [this] 264 | (set (vals tree))) 265 | (-commit! [this dir] 266 | (let [prev (current-fileset dir) 267 | {:keys [added removed changed]} (diff* prev this [:id])] 268 | (debug "Committing fileset...\n") 269 | (doseq [tmpf (set/union (-ls removed) (-ls changed)) 270 | :let [prev (get-in prev [:tree (-path tmpf)]) 271 | exists? (.exists ^File (file dir prev)) 272 | op (if exists? "removing" "no-op")]] 273 | (when exists? (io/delete-file (file dir prev)))) 274 | (let [this (loop [this this 275 | [tmpf & tmpfs] 276 | (->> (set/union (-ls added) (-ls changed)) 277 | (sort-by (comp count -path) >))] 278 | (or (and (not tmpf) this) 279 | (let [p (-path tmpf) 280 | dst (file dir tmpf) 281 | src (-file tmpf) 282 | err? (fatal-conflict? dst) 283 | this (or (and (not err?) this) 284 | (update-in this [:tree] dissoc p))] 285 | (if err? 286 | (warn "Merge conflict: not adding %s\n" p) 287 | (util/hard-link src dst)) 288 | (recur this tmpfs))))] 289 | (with-let [_ this] 290 | (swap! prev-fs assoc (.getCanonicalPath ^File dir) [this (.lastModified ^File dir)]) 291 | (debug "Commit complete.\n"))))) 292 | (-rm [this tmpfiles] 293 | (let [{:keys [tree]} this 294 | treefiles (set (vals tree)) 295 | remove? (->> tmpfiles set (set/difference treefiles) complement)] 296 | (assoc this :tree (reduce-kv #(if (remove? %3) %1 (assoc %1 %2 %3)) {} tree)))) 297 | (-add [this src-dir opts] 298 | (let [{:keys [tree blobstore scratch]} this 299 | {:keys [mergers include exclude meta]} opts 300 | ->tree #(dir->tree! % blobstore) 301 | new-tree (-> (->tree src-dir) 302 | (filter-tree include exclude) 303 | (add-tree-meta meta)) 304 | mrg-tree (when mergers 305 | (->tree (merge-trees! tree new-tree mergers scratch)))] 306 | (assoc this :tree (merge-with merge-tempfiles tree new-tree mrg-tree)))) 307 | (-mv [this from-path to-path] 308 | (if (= from-path to-path) 309 | this 310 | (if-let [from (get-in this [:tree from-path])] 311 | (update-in this [:tree] #(-> % (assoc to-path (assoc from :path to-path)) 312 | (dissoc from-path))) 313 | (throw (Exception. (format "not in fileset (%s)" from-path)))))) 314 | (-checksum [this timestamps?] 315 | (let [basis (set (map (fn [tmpfile] 316 | (select-keys tmpfile [:path :hash (when timestamps? :time)])) 317 | (vals (:tree this))))] 318 | (vh/md5-str basis)))) 319 | --------------------------------------------------------------------------------