├── .cljstyle ├── blocks-tests ├── .gitignore ├── project.clj └── src │ └── blocks │ └── store │ └── tests.clj ├── .gitignore ├── .clj-kondo └── config.edn ├── test └── blocks │ ├── test_utils.clj │ ├── store │ ├── memory_test.clj │ ├── init_test.clj │ ├── replica_test.clj │ ├── buffer_test.clj │ ├── file_test.clj │ └── cache_test.clj │ ├── summary_test.clj │ ├── bytes_test.clj │ ├── meter_test.clj │ ├── data_test.clj │ ├── store_test.clj │ └── core_test.clj ├── .lein-yagni ├── dev └── user.clj ├── UNLICENSE ├── src └── blocks │ ├── summary.clj │ ├── store │ ├── replica.clj │ ├── memory.clj │ ├── buffer.clj │ ├── cache.clj │ └── file.clj │ ├── data │ └── PersistentBytes.java │ ├── meter.clj │ ├── data.clj │ ├── store.clj │ └── core.clj ├── project.clj ├── .circleci └── config.yml ├── doc └── store.md ├── README.md └── CHANGELOG.md /.cljstyle: -------------------------------------------------------------------------------- 1 | ;; Clojure style configs 2 | ;; vim: filetype=clojure 3 | {:files 4 | {:ignore #{".git" "target"}}} 5 | -------------------------------------------------------------------------------- /blocks-tests/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /classes 3 | /checkouts 4 | /.lein-* 5 | /.nrepl-port 6 | profiles.clj 7 | build.xml 8 | pom.xml 9 | pom.xml.asc 10 | *.jar 11 | *.class 12 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /classes 3 | /checkouts 4 | /.lein-* 5 | /.nrepl-port 6 | /.clj-kondo/.cache 7 | profiles.clj 8 | build.xml 9 | pom.xml 10 | pom.xml.asc 11 | *.jar 12 | *.class 13 | -------------------------------------------------------------------------------- /blocks-tests/project.clj: -------------------------------------------------------------------------------- 1 | (defproject mvxcvi/blocks-tests "2.0.3" 2 | :description "Generative tests for block storage implementations." 3 | :url "https://github.com/greglook/blocks" 4 | :license {:name "Public Domain" 5 | :url "http://unlicense.org/"} 6 | 7 | :deploy-branches ["master"] 8 | :pedantic? :abort 9 | 10 | :dependencies 11 | [[org.clojure/clojure "1.10.1"] 12 | [org.clojure/test.check "0.10.0"] 13 | [mvxcvi/blocks "2.0.3"] 14 | [mvxcvi/test.carly "0.4.1"] 15 | [mvxcvi/puget "1.2.0"]]) 16 | -------------------------------------------------------------------------------- /.clj-kondo/config.edn: -------------------------------------------------------------------------------- 1 | {:linters 2 | {:consistent-alias 3 | {:level :warning 4 | :aliases {clojure.java.io io 5 | clojure.set set 6 | clojure.string str 7 | manifold.deferred d 8 | manifold.stream s 9 | multiformats.hash multihash}} 10 | 11 | :unresolved-symbol 12 | {:exclude [(test.carly.core/defop)]} 13 | 14 | ;; https://github.com/borkdude/clj-kondo/issues/678 15 | :private-call 16 | {:level :info}} 17 | 18 | :lint-as 19 | {manifold.deferred/loop clojure.core/let}} 20 | -------------------------------------------------------------------------------- /test/blocks/test_utils.clj: -------------------------------------------------------------------------------- 1 | (ns blocks.test-utils 2 | (:require 3 | [manifold.deferred :as d])) 4 | 5 | 6 | (defn quiet-exception 7 | "Construct a runtime exception which elides stacktrace data. Useful for 8 | throwing inside error handling test cases." 9 | ([] 10 | (quiet-exception "BOOM")) 11 | ([message] 12 | (doto (RuntimeException. ^String message) 13 | (.setStackTrace (into-array StackTraceElement []))))) 14 | 15 | 16 | (defn quiet-error-deferred 17 | [] 18 | (doto (d/error-deferred (quiet-exception)) 19 | (d/error-value nil))) 20 | -------------------------------------------------------------------------------- /test/blocks/store/memory_test.clj: -------------------------------------------------------------------------------- 1 | (ns blocks.store.memory-test 2 | (:require 3 | [blocks.core :as block] 4 | [blocks.data :as data] 5 | [blocks.store.memory :refer [memory-block-store]] 6 | [blocks.store.tests :as tests] 7 | [clojure.test :refer [deftest is]] 8 | [multiformats.hash :as multihash])) 9 | 10 | 11 | (deftest storage-realization 12 | (let [store (memory-block-store) 13 | content "foo bar baz qux" 14 | id (multihash/sha1 content) 15 | block (data/create-block 16 | id (count content) 17 | (fn lazy-reader 18 | [] 19 | (java.io.ByteArrayInputStream. (.getBytes content))))] 20 | (is (block/lazy? block)) 21 | (is (= block @(block/put! store block))) 22 | (is (= 15 (:size @(block/stat store id)))) 23 | (is (block/loaded? @(block/get store id))))) 24 | 25 | 26 | (deftest ^:integration check-behavior 27 | (tests/check-store memory-block-store)) 28 | -------------------------------------------------------------------------------- /.lein-yagni: -------------------------------------------------------------------------------- 1 | blocks.core/loaded? 2 | blocks.core/lazy? 3 | blocks.core/from-file 4 | blocks.core/open 5 | blocks.core/read! 6 | blocks.core/write! 7 | blocks.core/load! 8 | blocks.core/validate! 9 | 10 | blocks.core/->store 11 | 12 | blocks.core/list 13 | blocks.core/list-seq 14 | 15 | blocks.core/stat 16 | blocks.core/get 17 | blocks.core/get-batch 18 | 19 | blocks.core/put! 20 | blocks.core/put-batch! 21 | blocks.core/store! 22 | 23 | blocks.core/delete! 24 | blocks.core/delete-batch! 25 | 26 | blocks.core/scan 27 | blocks.core/erase! 28 | blocks.core/sync! 29 | 30 | blocks.data/merge-blocks 31 | blocks.data/bounded-input-stream 32 | 33 | blocks.store/privatize-constructors! 34 | 35 | blocks.store.tests/populate-blocks! 36 | blocks.store.tests/check-store 37 | 38 | blocks.store.buffer/buffer-block-store 39 | blocks.store.buffer/clear! 40 | blocks.store.buffer/flush! 41 | 42 | blocks.store.cache/caching-block-store 43 | blocks.store.cache/reap! 44 | 45 | blocks.store.file/file-block-store 46 | 47 | blocks.store.memory/memory-block-store 48 | 49 | blocks.store.replica/replica-block-store 50 | -------------------------------------------------------------------------------- /test/blocks/summary_test.clj: -------------------------------------------------------------------------------- 1 | (ns blocks.summary-test 2 | (:require 3 | [blocks.data :as data] 4 | [blocks.summary :as sum] 5 | [clojure.test :refer [deftest is]])) 6 | 7 | 8 | (deftest bucket-histogram 9 | (dotimes [i 16] 10 | (let [[a b] (sum/bucket->range (sum/size->bucket i))] 11 | (is (<= a i)) 12 | (is (< i b))))) 13 | 14 | 15 | (deftest storage-summaries 16 | (let [block-a (data/read-block :sha1 "foo") 17 | block-b (data/read-block :sha1 "bar") 18 | block-c (data/read-block :sha1 "baz") 19 | summary-a (sum/update (sum/init) block-a) 20 | summary-ab (sum/update summary-a block-b) 21 | summary-c (sum/update (sum/init) block-c)] 22 | (is (= 0 (:count (sum/init)))) 23 | (is (= 0 (:size (sum/init)))) 24 | (is (empty? (:sizes (sum/init)))) 25 | (is (= 1 (:count summary-a))) 26 | (is (= (:size block-a) (:size summary-a))) 27 | (is (= 2 (:count summary-ab))) 28 | (is (= (+ (:size block-a) (:size block-b)) (:size summary-ab))) 29 | (is (seq (:sizes summary-ab))) 30 | (is (= (sum/merge summary-ab summary-c) 31 | (sum/update summary-ab block-c))))) 32 | -------------------------------------------------------------------------------- /dev/user.clj: -------------------------------------------------------------------------------- 1 | (ns user 2 | "Custom repl customization for local development." 3 | (:require 4 | [blocks.core :as block] 5 | [blocks.data :as data] 6 | [blocks.store :as store] 7 | [blocks.store.buffer :refer [buffer-block-store]] 8 | [blocks.store.cache :refer [caching-block-store]] 9 | [blocks.store.file :refer [file-block-store]] 10 | [blocks.store.memory :refer [memory-block-store]] 11 | [blocks.store.replica :refer [replica-block-store]] 12 | [blocks.store.tests :as tests] 13 | [clojure.java.io :as io] 14 | [clojure.repl :refer :all] 15 | [clojure.stacktrace :refer [print-cause-trace]] 16 | [clojure.string :as str] 17 | [clojure.tools.namespace.repl :refer [refresh]] 18 | [com.stuartsierra.component :as component] 19 | [manifold.deferred :as d] 20 | [manifold.stream :as s] 21 | [multiformats.hash :as multihash]) 22 | (:import 23 | blocks.data.Block 24 | multiformats.hash.Multihash)) 25 | 26 | 27 | (def test-blocks 28 | (tests/generate-blocks! 10 1024)) 29 | 30 | 31 | (def tbs 32 | "Temporary block store in target." 33 | (component/start (file-block-store "target/blocks" :auto-migrate? true))) 34 | -------------------------------------------------------------------------------- /UNLICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | -------------------------------------------------------------------------------- /test/blocks/store/init_test.clj: -------------------------------------------------------------------------------- 1 | (ns blocks.store.init-test 2 | (:require 3 | [blocks.store :as store] 4 | [blocks.store.file] 5 | [blocks.store.memory] 6 | [clojure.string :as str] 7 | [clojure.test :refer [deftest testing is]]) 8 | (:import 9 | (blocks.store.file 10 | FileBlockStore) 11 | (blocks.store.memory 12 | MemoryBlockStore) 13 | java.io.File)) 14 | 15 | 16 | (defn- relative-path 17 | [& parts] 18 | (str/join File/separator parts)) 19 | 20 | 21 | (defn- absolute-path 22 | [& parts] 23 | (str File/separator (apply relative-path parts))) 24 | 25 | 26 | (deftest uri-initialization 27 | (testing "unknown" 28 | (is (thrown? IllegalArgumentException 29 | (store/initialize "foo://something")))) 30 | (testing "memory init" 31 | (let [store (store/initialize "mem:-")] 32 | (is (instance? MemoryBlockStore store)))) 33 | (testing "file init" 34 | (testing "absolute path" 35 | (let [store (store/initialize "file:///foo/bar/baz")] 36 | (is (instance? FileBlockStore store)) 37 | (is (= (absolute-path "foo" "bar" "baz") (str (:root store)))))) 38 | (testing "relative path" 39 | (let [store (store/initialize "file://foo/bar/baz")] 40 | (is (instance? FileBlockStore store)) 41 | (is (= (relative-path "foo" "bar" "baz") (str (:root store)))))))) 42 | -------------------------------------------------------------------------------- /test/blocks/store/replica_test.clj: -------------------------------------------------------------------------------- 1 | (ns blocks.store.replica-test 2 | (:require 3 | [blocks.core :as block] 4 | [blocks.store.memory :refer [memory-block-store]] 5 | [blocks.store.replica :refer [replica-block-store]] 6 | [blocks.store.tests :as tests] 7 | [clojure.test :refer [deftest is]] 8 | [com.stuartsierra.component :as component])) 9 | 10 | 11 | (deftest lifecycle 12 | (is (thrown-with-msg? Exception #"missing configured keys" 13 | (component/start (replica-block-store [:a :b])))) 14 | (let [store (replica-block-store [:a])] 15 | (is (identical? store (component/stop store))))) 16 | 17 | 18 | (deftest replica-behavior 19 | (let [replica-1 (memory-block-store) 20 | replica-2 (memory-block-store) 21 | store (replica-block-store [:a :b] :a replica-1 :b replica-2) 22 | a (block/read! "foo bar baz") 23 | b (block/read! "abracadabra") 24 | c (block/read! "123 xyz")] 25 | @(block/put! store a) 26 | @(block/put! store b) 27 | @(block/put! store c) 28 | (is (= 3 (count (block/list-seq replica-1)))) 29 | (is (every? (comp deref (partial block/get replica-1)) 30 | (map :id [a b c])) 31 | "all blocks are stored in replica-1") 32 | (is (= 3 (count (block/list-seq replica-2)))) 33 | (is (every? (comp deref (partial block/get replica-2)) 34 | (map :id [a b c])) 35 | "all blocks are stored in replica-2") 36 | (is (= 3 (count (block/list-seq store)))) 37 | (block/delete! replica-1 (:id a)) 38 | (block/delete! replica-2 (:id c)) 39 | (is (= 3 (count (block/list-seq store))) 40 | "replica lists all available blocks"))) 41 | 42 | 43 | (deftest ^:integration check-behavior 44 | (tests/check-store 45 | #(replica-block-store 46 | [:a :b] 47 | :a (memory-block-store) 48 | :b (memory-block-store)))) 49 | -------------------------------------------------------------------------------- /src/blocks/summary.clj: -------------------------------------------------------------------------------- 1 | (ns blocks.summary 2 | "A 'summary' represents a collection of blocks, including certain statistics 3 | about the aggregate count and sizes. These are useful for returning from 4 | certain operations to represent the set of blocks acted upon. 5 | 6 | The following fields are present in a summary: 7 | 8 | - `:count` 9 | The total number of blocks added to the summary. 10 | - `:size` 11 | The total size of blocks added to the summary, in bytes. 12 | - `:sizes` 13 | A histogram map from bucket exponent to a count of the blocks in that 14 | bucket (see `size->bucket` and `bucket->range`)." 15 | (:refer-clojure :exclude [update merge])) 16 | 17 | 18 | (defn init 19 | "Construct a new, empty summary." 20 | [] 21 | {:count 0 22 | :size 0 23 | :sizes {}}) 24 | 25 | 26 | (defn size->bucket 27 | "Assigns a block size to an exponential histogram bucket. Given a size `s`, 28 | returns `n` such that `2^n <= s < 2^(n+1)`." 29 | [size] 30 | (loop [s size 31 | n 0] 32 | (if (pos? s) 33 | (recur (bit-shift-right s 1) (inc n)) 34 | n))) 35 | 36 | 37 | (defn bucket->range 38 | "Returns a vector with the boundaries which a given size bucket covers." 39 | [n] 40 | [(bit-shift-left 1 (dec n)) 41 | (bit-shift-left 1 n)]) 42 | 43 | 44 | (defn update 45 | "Update the summary with the stats from the given block." 46 | [summary block] 47 | (when (instance? Throwable block) 48 | (throw block)) 49 | (-> summary 50 | (clojure.core/update :count inc) 51 | (clojure.core/update :size + (:size block)) 52 | (clojure.core/update :sizes clojure.core/update (size->bucket (:size block)) (fnil inc 0)))) 53 | 54 | 55 | (defn merge 56 | "Merge two summaries together." 57 | [a b] 58 | (-> a 59 | (clojure.core/update :count + (:count b)) 60 | (clojure.core/update :size + (:size b)) 61 | (clojure.core/update :sizes (partial merge-with +) (:sizes b)) 62 | (clojure.core/merge (dissoc b :count :size :sizes)))) 63 | -------------------------------------------------------------------------------- /project.clj: -------------------------------------------------------------------------------- 1 | (defproject mvxcvi/blocks "2.1.0" 2 | :description "Content-addressed data storage interface." 3 | :url "https://github.com/greglook/blocks" 4 | :license {:name "Public Domain" 5 | :url "http://unlicense.org/"} 6 | 7 | :aliases 8 | {"coverage" ["with-profile" "+coverage" "cloverage"]} 9 | 10 | :deploy-branches ["main"] 11 | :java-source-paths ["src"] 12 | :pedantic? :abort 13 | 14 | :dependencies 15 | [[org.clojure/clojure "1.11.2"] 16 | [org.clojure/data.priority-map "1.2.0"] 17 | [org.clojure/tools.logging "1.3.0"] 18 | [com.stuartsierra/component "1.1.0"] 19 | [commons-io "2.15.1"] 20 | [manifold "0.4.2"] 21 | [mvxcvi/multiformats "0.3.107"]] 22 | 23 | :test-selectors 24 | {:default (complement :integration) 25 | :integration :integration} 26 | 27 | :cloverage 28 | {:ns-exclude-regex [#"blocks\.store\.tests"]} 29 | 30 | :hiera 31 | {:cluster-depth 2 32 | :vertical false 33 | :show-external false 34 | :ignore-ns #{blocks.store.tests}} 35 | 36 | :whidbey 37 | {:tag-types {'blocks.data.Block {'blocks.data.Block 38 | #(array-map :id (:id %) 39 | :size (:size %) 40 | :stored-at (:stored-at %))} 41 | 'multiformats.hash.Multihash {'multi/hash str}}} 42 | 43 | :profiles 44 | {:dev 45 | {:source-paths ["blocks-tests/src"] 46 | :dependencies 47 | [[org.clojure/test.check "1.1.1"] 48 | [commons-logging "1.3.0"] 49 | [mvxcvi/puget "1.3.4"] 50 | [mvxcvi/test.carly "0.4.1"]]} 51 | 52 | :repl 53 | {:source-paths ["dev"]} 54 | 55 | :test 56 | {:jvm-opts ["-Dorg.apache.commons.logging.Log=org.apache.commons.logging.impl.NoOpLog"]} 57 | 58 | :coverage 59 | {:plugins 60 | [[org.clojure/clojure "1.11.2"] 61 | [lein-cloverage "1.2.4"]] 62 | :jvm-opts ["-Dorg.apache.commons.logging.Log=org.apache.commons.logging.impl.SimpleLog" 63 | "-Dorg.apache.commons.logging.simplelog.defaultlog=trace"]}}) 64 | -------------------------------------------------------------------------------- /src/blocks/store/replica.clj: -------------------------------------------------------------------------------- 1 | (ns blocks.store.replica 2 | "Replica stores provide logical block storage which writes to multiple 3 | backing stores. Lookups will try the backing stores in order to find blocks. 4 | 5 | Replicas are useful for ensuring durability across stores and for shared 6 | caches, where some external process controls cache eviction." 7 | (:require 8 | [blocks.core :as block] 9 | [blocks.store :as store] 10 | [com.stuartsierra.component :as component] 11 | [manifold.deferred :as d])) 12 | 13 | 14 | (defn- resolve-stores 15 | "Resolve the configured replica stores." 16 | ([store] 17 | (resolve-stores store (:replicas store))) 18 | ([store replicas] 19 | (mapv (partial get store) replicas))) 20 | 21 | 22 | (defrecord ReplicaBlockStore 23 | [replicas] 24 | 25 | component/Lifecycle 26 | 27 | (start 28 | [this] 29 | (when-let [missing (seq (remove (partial contains? this) replicas))] 30 | (throw (IllegalStateException. 31 | (str "Replica block store is missing configured keys: " 32 | (pr-str missing))))) 33 | this) 34 | 35 | 36 | (stop 37 | [this] 38 | this) 39 | 40 | 41 | store/BlockStore 42 | 43 | (-list 44 | [this opts] 45 | (->> (resolve-stores this) 46 | (map #(block/list % opts)) 47 | (apply store/merge-blocks))) 48 | 49 | 50 | (-stat 51 | [this id] 52 | (store/some-store (resolve-stores this) block/stat id)) 53 | 54 | 55 | (-get 56 | [this id] 57 | ;; OPTIMIZE: query in parallel, use `d/alt`? 58 | (store/some-store (resolve-stores this) block/get id)) 59 | 60 | 61 | (-put! 62 | [this block] 63 | (d/chain 64 | (block/put! (get this (first replicas)) block) 65 | (fn keep-preferred 66 | [stored] 67 | (let [block (store/preferred-block block stored)] 68 | (d/chain 69 | (store/zip-stores (resolve-stores this (rest replicas)) block/put! block) 70 | (partial apply store/preferred-block stored)))))) 71 | 72 | 73 | (-delete! 74 | [this id] 75 | (d/chain 76 | (store/zip-stores (resolve-stores this) block/delete! id) 77 | (partial some true?) 78 | boolean))) 79 | 80 | 81 | ;; ## Constructors 82 | 83 | (store/privatize-constructors! ReplicaBlockStore) 84 | 85 | 86 | (defn replica-block-store 87 | "Creates a new replica block store which will persist blocks to multiple 88 | backing stores. Block operations will be performed on the stores in the order 89 | given in `replicas`, where each key is looked up in the store record." 90 | [replicas & {:as opts}] 91 | (map->ReplicaBlockStore 92 | (assoc opts :replicas (vec replicas)))) 93 | -------------------------------------------------------------------------------- /src/blocks/store/memory.clj: -------------------------------------------------------------------------------- 1 | (ns blocks.store.memory 2 | "Memory stores provide process-local storage backed by a map in a ref. Blocks 3 | put into this store will be fully read into memory to ensure the content is 4 | present locally. Memory block stores may be constructed usin the `mem:-` URI 5 | form. 6 | 7 | This store is most suitable for testing, caches, and other situations which 8 | call for an ephemeral block store." 9 | (:require 10 | [blocks.data :as data] 11 | [blocks.store :as store] 12 | [manifold.deferred :as d] 13 | [manifold.stream :as s]) 14 | (:import 15 | blocks.data.Block)) 16 | 17 | 18 | (defn- load-block 19 | "Prepare a new block for storage based on the given block. This ensures the 20 | content is loaded into memory and cleans the block metadata." 21 | [^Block block] 22 | (if (data/byte-content? block) 23 | (data/create-block 24 | (:id block) 25 | (:size block) 26 | (.content block)) 27 | (data/read-block 28 | (:algorithm (:id block)) 29 | (data/read-all (.content block))))) 30 | 31 | 32 | ;; Block records in a memory store are held in a map in a ref. 33 | (defrecord MemoryBlockStore 34 | [memory] 35 | 36 | store/BlockStore 37 | 38 | (-list 39 | [_ _opts] 40 | (s/->source (or (vals @memory) []))) 41 | 42 | 43 | (-stat 44 | [_ id] 45 | (d/success-deferred 46 | (when-let [block (get @memory id)] 47 | {:id (:id block) 48 | :size (:size block) 49 | :stored-at (:stored-at block)}))) 50 | 51 | 52 | (-get 53 | [_ id] 54 | (d/success-deferred 55 | (get @memory id))) 56 | 57 | 58 | (-put! 59 | [_ block] 60 | (let [id (:id block)] 61 | (store/future' 62 | (dosync 63 | (if-let [extant (get @memory id)] 64 | extant 65 | (let [block (load-block block)] 66 | (alter memory assoc id block) 67 | block)))))) 68 | 69 | 70 | (-delete! 71 | [_ id] 72 | (store/future' 73 | (dosync 74 | (let [existed? (contains? @memory id)] 75 | (alter memory dissoc id) 76 | existed?)))) 77 | 78 | 79 | store/ErasableStore 80 | 81 | (-erase! 82 | [_] 83 | (store/future' 84 | (dosync 85 | (alter memory empty) 86 | true)))) 87 | 88 | 89 | ;; ## Constructors 90 | 91 | (store/privatize-constructors! MemoryBlockStore) 92 | 93 | 94 | (defn memory-block-store 95 | "Creates a new in-memory block store." 96 | [& {:as opts}] 97 | (map->MemoryBlockStore 98 | (assoc opts :memory (ref (sorted-map) :validator map?)))) 99 | 100 | 101 | (defmethod store/initialize "mem" 102 | [_] 103 | (memory-block-store)) 104 | -------------------------------------------------------------------------------- /test/blocks/store/buffer_test.clj: -------------------------------------------------------------------------------- 1 | (ns blocks.store.buffer-test 2 | (:require 3 | [blocks.core :as block] 4 | [blocks.store.buffer :refer [buffer-block-store] :as buffer] 5 | [blocks.store.memory :refer [memory-block-store]] 6 | [blocks.store.tests :as tests] 7 | [clojure.test :refer [deftest is]] 8 | [com.stuartsierra.component :as component])) 9 | 10 | 11 | (deftest lifecycle 12 | (is (thrown? IllegalStateException 13 | (component/start (buffer-block-store 14 | :buffer (memory-block-store))))) 15 | (is (thrown? IllegalStateException 16 | (component/start (buffer-block-store 17 | :primary (memory-block-store))))) 18 | (let [store (buffer-block-store)] 19 | (is (identical? store (component/stop store))))) 20 | 21 | 22 | (deftest buffer-behavior 23 | (let [primary (memory-block-store) 24 | buffer (memory-block-store) 25 | store (buffer-block-store 26 | :primary primary 27 | :buffer buffer) 28 | a (block/read! "foo bar baz") 29 | b (block/read! "abracadabra") 30 | c (block/read! "123 xyz")] 31 | @(block/put! store a) 32 | @(block/put! store b) 33 | (is (empty? (block/list-seq primary))) 34 | @(block/put! primary c) 35 | (is (= 3 (count (block/list-seq store)))) 36 | (is (every? (set (map :id [a b c])) 37 | (map :id (block/list-seq store)))) 38 | (is (= (:id c) (:id @(block/put! store c)))) 39 | (is (= 2 (count (block/list-seq buffer)))) 40 | (let [flush-summary @(buffer/flush! store)] 41 | (is (= 2 (:count flush-summary))) 42 | (is (= 22 (:size flush-summary)))) 43 | (is (zero? (:count @(buffer/clear! store)))) 44 | @(block/put! store (block/read! "XYZ")) 45 | (is (= 1 (:count @(buffer/clear! store)))) 46 | (is (empty? (block/list-seq buffer))) 47 | (is (= 3 (count (block/list-seq primary)))) 48 | @(block/delete! store (:id c)) 49 | (is (= 2 (count (block/list-seq store)))))) 50 | 51 | 52 | (deftest buffer-predicate 53 | (let [primary (memory-block-store) 54 | buffer (memory-block-store) 55 | store (buffer-block-store 56 | :primary primary 57 | :buffer buffer 58 | :predicate #(< (:size %) 8)) 59 | a (block/read! "foo") 60 | b (block/read! "bar") 61 | c (block/read! "abcdefghijklmnopqrstuvwxyz")] 62 | @(block/put! store a) 63 | @(block/put! store b) 64 | @(block/put! store c) 65 | (is (= 3 (count (block/list-seq store)))) 66 | (is (= 2 (count (block/list-seq buffer)))) 67 | (is (= 1 (count (block/list-seq primary)))) 68 | (is @(block/stat primary (:id c))))) 69 | 70 | 71 | (deftest ^:integration check-behavior 72 | (tests/check-store 73 | #(buffer-block-store 74 | :primary (memory-block-store) 75 | :buffer (memory-block-store)))) 76 | -------------------------------------------------------------------------------- /test/blocks/bytes_test.clj: -------------------------------------------------------------------------------- 1 | (ns blocks.bytes-test 2 | (:require 3 | [clojure.test :refer [deftest testing is]]) 4 | (:import 5 | blocks.data.PersistentBytes)) 6 | 7 | 8 | (defn- ->pb 9 | "Construct a new `PersistentBytes` value containing the given byte data." 10 | [& data] 11 | (PersistentBytes/wrap (byte-array data))) 12 | 13 | 14 | (deftest persistent-bytes-construction 15 | (testing "wrap does not duplicate the array" 16 | (let [data (byte-array 3) 17 | content (PersistentBytes/wrap data)] 18 | (is (= 3 (count content)) "content should contain three bytes") 19 | (is (= 0 (first content)) "first byte should be zero") 20 | (aset-byte data 0 8) 21 | (is (= 8 (first content)) "first byte should change to eight"))) 22 | (testing "copyFrom duplicates the array" 23 | (let [data (byte-array 3) 24 | content (PersistentBytes/copyFrom data)] 25 | (is (= 3 (count content)) "content should contain three bytes") 26 | (is (= 0 (first content)) "first byte should be zero") 27 | (aset-byte data 0 8) 28 | (is (= 0 (first content)) "first byte should still be zero")))) 29 | 30 | 31 | (deftest persistent-bytes-identity 32 | (let [pb1 (PersistentBytes/wrap (.getBytes "foo")) 33 | pb2 (PersistentBytes/wrap (.getBytes "foo")) 34 | pb3 (PersistentBytes/wrap (.getBytes "bar"))] 35 | (is (= pb1 (.getBytes "foo")) 36 | "should be equal to raw bytes") 37 | (is (= pb1 (.toBuffer pb1)) 38 | "should be equal to byte buffer") 39 | (is (= pb1 pb2) 40 | "should be equal to persistent bytes with equal content") 41 | (is (not= pb1 pb3)) 42 | (is (= (hash pb1) (hash pb2))) 43 | (is (not= (hash pb1) (hash pb3))) 44 | (is (= (.hasheq pb1) (.hasheq pb2))) 45 | (is (not= (.hasheq pb1) (.hasheq pb3))) 46 | (is (= "foo" (slurp (.open pb1)))))) 47 | 48 | 49 | (deftest persistent-bytes-coll 50 | (let [pb (PersistentBytes/wrap (.getBytes "baz"))] 51 | (is (= 3 (count pb))) 52 | (is (= [98 97 122] (seq pb))) 53 | (is (= 97 (nth pb 1))) 54 | (is (thrown? Exception (nth pb 5))) 55 | (is (= ::not-found (nth pb 5 ::not-found))))) 56 | 57 | 58 | (deftest byte-comparison 59 | (testing "equal arrays" 60 | (is (zero? (compare (->pb) (->pb)))) 61 | (is (zero? (compare (->pb 1 2 3) (->pb 1 2 3))))) 62 | (testing "equal prefixes" 63 | (is (neg? (compare (->pb 1 2 3) 64 | (->pb 1 2 3 4)))) 65 | (is (pos? (compare (->pb 1 2 3 4) 66 | (->pb 1 2 3))))) 67 | (testing "order-before" 68 | (is (neg? (compare (->pb 1 2 3) 69 | (->pb 1 2 4)))) 70 | (is (neg? (compare (->pb 1 2 3) 71 | (->pb 1 3 2)))) 72 | (is (neg? (compare (->pb 0 2 3 4) 73 | (->pb 1 3 2 1))))) 74 | (testing "order-after" 75 | (is (pos? (compare (->pb 1 2 4) 76 | (->pb 1 2 3)))) 77 | (is (pos? (compare (->pb 1 3 2) 78 | (->pb 1 2 3)))) 79 | (is (pos? (compare (->pb 1 3 2 1) 80 | (->pb 0 2 3 4)))))) 81 | -------------------------------------------------------------------------------- /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2.1 2 | 3 | # Common executor configuration 4 | executors: 5 | clojure: 6 | docker: 7 | - image: cimg/clojure:1.11-openjdk-11.0 8 | working_directory: ~/repo 9 | 10 | 11 | # Job definitions 12 | jobs: 13 | style: 14 | executor: clojure 15 | steps: 16 | - checkout 17 | - run: 18 | name: Install cljstyle 19 | environment: 20 | CLJSTYLE_VERSION: 0.16.626 21 | CLJSTYLE_PLATFORM: linux_amd64 22 | command: | 23 | wget https://github.com/greglook/cljstyle/releases/download/${CLJSTYLE_VERSION}/cljstyle_${CLJSTYLE_VERSION}_${CLJSTYLE_PLATFORM}.zip 24 | unzip cljstyle_${CLJSTYLE_VERSION}_${CLJSTYLE_PLATFORM}.zip 25 | - run: 26 | name: Check source formatting 27 | command: "./cljstyle check --report" 28 | 29 | lint: 30 | executor: clojure 31 | steps: 32 | - checkout 33 | - run: 34 | name: Install clj-kondo 35 | environment: 36 | CLJ_KONDO_VERSION: 2022.11.02 37 | command: | 38 | wget https://github.com/borkdude/clj-kondo/releases/download/v${CLJ_KONDO_VERSION}/clj-kondo-${CLJ_KONDO_VERSION}-linux-amd64.zip 39 | unzip clj-kondo-${CLJ_KONDO_VERSION}-linux-amd64.zip 40 | - run: 41 | name: Lint source code 42 | command: "./clj-kondo --lint src:test:blocks-tests/src" 43 | 44 | test: 45 | executor: clojure 46 | steps: 47 | - checkout 48 | - restore_cache: 49 | keys: 50 | - v1-test-{{ checksum "project.clj" }} 51 | - v1-test- 52 | - run: lein deps 53 | - run: lein check 54 | - run: lein test 55 | - save_cache: 56 | key: v1-test-{{ checksum "project.clj" }} 57 | paths: 58 | - ~/.m2 59 | 60 | coverage: 61 | executor: clojure 62 | steps: 63 | - checkout 64 | - restore_cache: 65 | keys: 66 | - v1-coverage-{{ checksum "project.clj" }} 67 | - v1-coverage- 68 | - v1-test- 69 | - run: 70 | name: Generate test coverage 71 | command: lein coverage --codecov 72 | - save_cache: 73 | paths: 74 | - ~/.m2 75 | key: v1-coverage-{{ checksum "project.clj" }} 76 | - store_artifacts: 77 | path: target/coverage 78 | destination: coverage 79 | - run: 80 | name: Install codecov 81 | command: | 82 | sudo apt-get update && sudo apt-get install gpg 83 | curl https://keybase.io/codecovsecurity/pgp_keys.asc | gpg --no-default-keyring --keyring trustedkeys.gpg --import 84 | curl -Os https://uploader.codecov.io/latest/linux/codecov 85 | curl -Os https://uploader.codecov.io/latest/linux/codecov.SHA256SUM 86 | curl -Os https://uploader.codecov.io/latest/linux/codecov.SHA256SUM.sig 87 | gpgv codecov.SHA256SUM.sig codecov.SHA256SUM 88 | shasum -a 256 -c codecov.SHA256SUM 89 | chmod +x codecov 90 | - run: 91 | name: Publish coverage report 92 | command: './codecov -f target/coverage/codecov.json' 93 | 94 | 95 | # Workflow definitions 96 | workflows: 97 | version: 2 98 | test: 99 | jobs: 100 | - style 101 | - lint 102 | - test 103 | - coverage: 104 | requires: 105 | - test 106 | -------------------------------------------------------------------------------- /src/blocks/store/buffer.clj: -------------------------------------------------------------------------------- 1 | (ns blocks.store.buffer 2 | "Buffer stores provide logical block storage which uses two backing stores to 3 | implement a buffer. New blocks are written to the _buffer_ store, which can 4 | be flushed to write all of the blocks to the _primary_ store. Reads return a 5 | unified view of the existing and buffered blocks." 6 | (:require 7 | [blocks.core :as block] 8 | [blocks.store :as store] 9 | [blocks.summary :as sum] 10 | [com.stuartsierra.component :as component] 11 | [manifold.deferred :as d] 12 | [manifold.stream :as s])) 13 | 14 | 15 | (defrecord BufferBlockStore 16 | [primary buffer predicate] 17 | 18 | component/Lifecycle 19 | 20 | (start 21 | [this] 22 | (when-not (satisfies? store/BlockStore primary) 23 | (throw (IllegalStateException. 24 | (str "Cannot start buffer block store without a backing primary store: " 25 | (pr-str primary))))) 26 | (when-not (satisfies? store/BlockStore buffer) 27 | (throw (IllegalStateException. 28 | (str "Cannot start buffer block store without a backing buffer store: " 29 | (pr-str buffer))))) 30 | this) 31 | 32 | 33 | (stop 34 | [this] 35 | this) 36 | 37 | 38 | store/BlockStore 39 | 40 | (-list 41 | [_ opts] 42 | (store/merge-blocks 43 | (block/list buffer opts) 44 | (block/list primary opts))) 45 | 46 | 47 | (-stat 48 | [_ id] 49 | (store/some-store [buffer primary] block/stat id)) 50 | 51 | 52 | (-get 53 | [_ id] 54 | (store/some-store [buffer primary] block/get id)) 55 | 56 | 57 | (-put! 58 | [_ block] 59 | (d/chain 60 | (block/get primary (:id block)) 61 | (fn store-block 62 | [stored] 63 | (or stored 64 | (if (or (nil? predicate) (predicate block)) 65 | (block/put! buffer block) 66 | (block/put! primary block)))))) 67 | 68 | 69 | (-delete! 70 | [_ id] 71 | (d/chain 72 | (d/zip 73 | (block/delete! buffer id) 74 | (block/delete! primary id)) 75 | (fn result 76 | [[buffered? stored?]] 77 | (boolean (or buffered? stored?)))))) 78 | 79 | 80 | (defn clear! 81 | "Remove all blocks from the buffer. Returns a deferred which yields a summary 82 | of the deleted blocks." 83 | [store] 84 | (d/chain 85 | (s/reduce 86 | sum/update 87 | (sum/init) 88 | (block/list (:buffer store))) 89 | (fn clear-buffer 90 | [summary] 91 | (d/chain 92 | (block/erase! (:buffer store)) 93 | (constantly summary))))) 94 | 95 | 96 | (defn flush! 97 | "Flush the store, writing all buffered blocks to the primary store. Returns a 98 | deferred which yields a summary of the flushed blocks." 99 | [store] 100 | (->> 101 | (block/list (:buffer store)) 102 | (s/map (fn copy 103 | [block] 104 | (d/chain 105 | (block/put! (:primary store) block) 106 | (fn delete 107 | [block'] 108 | (d/chain 109 | (block/delete! (:buffer store) (:id block)) 110 | (constantly block')))))) 111 | (s/realize-each) 112 | (s/reduce sum/update (sum/init)))) 113 | 114 | 115 | ;; ## Constructors 116 | 117 | (store/privatize-constructors! BufferBlockStore) 118 | 119 | 120 | (defn buffer-block-store 121 | "Create a new buffering block store. 122 | 123 | - `:buffer` 124 | Block store to use for new writes. 125 | - `:primary` 126 | Block store to use for flushed blocks. 127 | - `:predicate` (optional) 128 | A predicate function which should return false for blocks which should not 129 | be buffered; instead, they will be written directly to the primary store." 130 | [& {:as opts}] 131 | (map->BufferBlockStore opts)) 132 | -------------------------------------------------------------------------------- /test/blocks/store/file_test.clj: -------------------------------------------------------------------------------- 1 | (ns blocks.store.file-test 2 | (:require 3 | [blocks.core :as block] 4 | [blocks.store.file :as file :refer [file-block-store]] 5 | [blocks.store.tests :as tests] 6 | [blocks.test-utils :refer [quiet-exception]] 7 | [clojure.java.io :as io] 8 | [clojure.test :refer [deftest testing is]] 9 | [com.stuartsierra.component :as component] 10 | [manifold.stream :as s] 11 | [multiformats.hash :as multihash])) 12 | 13 | 14 | (defn- mk-tmpdir! 15 | [] 16 | (io/file "target" "test" "tmp" 17 | (str "file-block-store." (System/currentTimeMillis)))) 18 | 19 | 20 | (deftest layout-init 21 | (testing "lifecycle" 22 | (let [store (file-block-store (mk-tmpdir!))] 23 | (is (identical? store (component/stop store))))) 24 | (testing "unknown version" 25 | (let [root (mk-tmpdir!) 26 | store (file-block-store root) 27 | meta-file (io/file root "meta.properties")] 28 | (io/make-parents meta-file) 29 | (spit meta-file "version=v2\n") 30 | (is (thrown-with-msg? Exception #"storage layout version \"v2\" does not match supported version \"v1\"" 31 | (component/start store)))))) 32 | 33 | 34 | (deftest v0-layout 35 | (let [root (mk-tmpdir!) 36 | a (block/read! "one") 37 | b (block/read! "two") 38 | c (block/read! "three")] 39 | (doseq [block [a b c]] 40 | (let [hex (multihash/hex (:id block)) 41 | head (subs hex 0 8) 42 | tail (subs hex 8) 43 | file (io/file root head tail)] 44 | (io/make-parents file) 45 | (block/write! block file))) 46 | (testing "with extra files" 47 | (let [store (file-block-store root) 48 | extra (io/file root "something.unknown")] 49 | (try 50 | (spit extra "What is this file for?\n") 51 | (is (thrown-with-msg? Exception #"unknown files in block store" 52 | (component/start store))) 53 | (finally 54 | (.delete extra))))) 55 | (testing "without migration" 56 | (let [store (file-block-store root)] 57 | (is (thrown-with-msg? Exception #"v0 file block store layout" 58 | (component/start store))))) 59 | (testing "with migration" 60 | (let [store (file-block-store root :auto-migrate? true) 61 | store (component/start store)] 62 | (is (= "v1" (:version store))) 63 | (is (= a @(block/get store (:id a)))) 64 | (is (= b @(block/get store (:id b)))) 65 | (is (= c @(block/get store (:id c)))))))) 66 | 67 | 68 | (deftest file-listing 69 | (let [store (file-block-store (mk-tmpdir!)) 70 | a (block/read! "larry") 71 | b (block/read! "curly") 72 | c (block/read! "moe")] 73 | @(block/put-batch! store [a b c]) 74 | (testing "filtering" 75 | (is (= 3 (count (block/list-seq store :after "12200c")))) 76 | (is (= 3 (count (block/list-seq store :after "12200d0980")))) 77 | (is (= 1 (count (block/list-seq store :after "12204b6f51")))) 78 | (is (= 1 (count (block/list-seq store :after "12204b" :before "12204b7")))) 79 | (is (= (:id b) (:id (first (block/list-seq store :after "12204b6f51"))))) 80 | (is (empty? (block/list-seq store :after "122064")))) 81 | (testing "rogue content" 82 | (let [extra (io/file (:root store) "blocks" "12200d09" "wat")] 83 | (spit extra "what is this") 84 | (is (= 3 (count (block/list-seq store)))) 85 | (.delete extra))) 86 | (testing "exception" 87 | (let [ex (quiet-exception)] 88 | (with-redefs [blocks.store.file/file->block (fn [_ _] (throw ex))] 89 | (is (= [ex] (s/stream->seq (block/list store)))) 90 | (is (thrown? Exception 91 | (doall (block/list-seq store))))))))) 92 | 93 | 94 | (deftest ^:integration check-behavior 95 | (let [tmpdir (mk-tmpdir!)] 96 | (tests/check-store 97 | #(let [store (file-block-store tmpdir)] 98 | @(block/erase! store) 99 | store)))) 100 | -------------------------------------------------------------------------------- /test/blocks/store/cache_test.clj: -------------------------------------------------------------------------------- 1 | (ns blocks.store.cache-test 2 | (:require 3 | [blocks.core :as block] 4 | [blocks.store :as store] 5 | [blocks.store.cache :as cache :refer [caching-block-store]] 6 | [blocks.store.memory :refer [memory-block-store]] 7 | [blocks.store.tests :as tests] 8 | [clojure.test :refer [deftest testing is]] 9 | [com.stuartsierra.component :as component])) 10 | 11 | 12 | (defn new-cache 13 | "Helper function to construct a fresh cache store backed by empty memory 14 | stores." 15 | [size-limit & args] 16 | (apply caching-block-store 17 | size-limit 18 | :primary (memory-block-store) 19 | :cache (memory-block-store) 20 | args)) 21 | 22 | 23 | (deftest lifecycle 24 | (testing "construction validation" 25 | (is (thrown? IllegalArgumentException 26 | (caching-block-store nil))) 27 | (is (thrown? IllegalArgumentException 28 | (caching-block-store 0))) 29 | (is (satisfies? store/BlockStore (caching-block-store 512)))) 30 | (testing "starting" 31 | (is (thrown? IllegalStateException 32 | (component/start (caching-block-store 33 | 128 34 | :cache (memory-block-store))))) 35 | (is (thrown? IllegalStateException 36 | (component/start (caching-block-store 37 | 128 38 | :primary (memory-block-store)))))) 39 | (testing "stopping" 40 | (let [store (caching-block-store 256)] 41 | (is (identical? store (component/stop store)))))) 42 | 43 | 44 | (deftest extant-cache-contents 45 | (let [store (new-cache 1024) 46 | blocks (tests/populate-blocks! (:cache store) :max-size 64) 47 | store (component/start store)] 48 | (is (every? #(deref (block/stat (:cache store) %)) (keys blocks)) 49 | "all blocks should still be present in store") 50 | (is (every? (:priorities @(:state store)) (keys blocks)) 51 | "all blocks should have an entry in the priority map"))) 52 | 53 | 54 | (deftest space-reaping 55 | (let [store (new-cache 1024) 56 | _ (tests/populate-blocks! (:cache store) :n 32) 57 | store (component/start store)] 58 | (is (< 1024 (:total-size @(:state store))) 59 | "has more than size-limit blocks cached") 60 | (let [reaped @(cache/reap! store 512)] 61 | (is (pos? (:count reaped))) 62 | (is (< 10000 (:size reaped)))) 63 | (is (<= (:total-size @(:state store)) 512) 64 | "reap cleans up at least the desired free space"))) 65 | 66 | 67 | (deftest size-limits 68 | (testing "block without limit" 69 | (let [store (component/start (new-cache 512)) 70 | block @(block/put! store (block/read! "0123456789"))] 71 | (is @(block/stat store (:id block)) "block is stored") 72 | (is @(block/stat (:cache store) (:id block)) "cache should store block"))) 73 | (testing "block under limit" 74 | (let [store (component/start (new-cache 512 :predicate #(< (:size %) 16))) 75 | block @(block/put! store (block/read! "0123456789"))] 76 | (is @(block/stat store (:id block)) "block is stored") 77 | (is @(block/stat (:cache store) (:id block)) "cache should store block"))) 78 | (testing "block over limit" 79 | (let [store (component/start (new-cache 512 :predicate #(< (:size %) 16))) 80 | block @(block/put! store (block/read! "0123456789abcdef0123"))] 81 | (is @(block/stat store (:id block)) "block is stored") 82 | (is (nil? @(block/stat (:cache store) (:id block))) "cache should not store block"))) 83 | (testing "block larger than cache" 84 | (let [store (component/start (new-cache 16)) 85 | block @(block/put! store (block/read! "0123456789abcdef0123"))] 86 | (is @(block/stat store (:id block)) "block is stored") 87 | (is (nil? @(block/stat (:cache store) (:id block))) "cache should not store block")))) 88 | 89 | 90 | ;; TODO: cache behavior on get/put directly 91 | 92 | 93 | (deftest ^:integration check-behavior 94 | (tests/check-store 95 | #(caching-block-store 96 | 2048 97 | :predicate (fn [block] (< (:size block) 512)) 98 | :primary (memory-block-store) 99 | :cache (memory-block-store)))) 100 | -------------------------------------------------------------------------------- /doc/store.md: -------------------------------------------------------------------------------- 1 | ## Block Stores 2 | 3 | The block store protocol defines the methods necessary for implementing a block 4 | storage backend. The interface is simple enough to map to many different kinds 5 | of storage, and the library comes with basic in-memory and file-based stores. 6 | 7 | 8 | ### Implementation Decisions 9 | 10 | There are a few questions you need to answer if you decide to implement a block 11 | store. 12 | 13 | #### Laziness 14 | 15 | Will your store return _loaded_ or _lazy_ blocks? The former requires pulling 16 | the block data into memory when fetching the block, while the latter only 17 | fetches the data when the block's content is read. 18 | 19 | Different choices make sense for different backends; for example, a persistent 20 | store backed by cloud storage can usually depend on the data being there later, 21 | so lazy blocks are much more efficient. For a store based on a remote cache, it 22 | would be better to pull the block content on fetch because the data might be 23 | evicted before the block is read. 24 | 25 | #### Asynchrony 26 | 27 | The block store methods are intended to be asynchronous, to enable efficient 28 | usage in concurrent environments. This project uses the excellent 29 | [manifold](https://github.com/ztellman/manifold) library for an adaptable async 30 | framework. If there's no underlying asynchronous system to tie into in the 31 | storage backend, you can just wrap the synchronous calls in 32 | `manifold.deferred/future` to put them on a thread-pool. 33 | 34 | #### Metadata 35 | 36 | Blocks support Clojure metadata, so any additional information the store needs 37 | to communicate can be added to the blocks as metadata. This intentionally 38 | doesn't affect block equality or semantics, but can be useful for introspection. 39 | 40 | For example, a common piece of useful metadata is a URI for the concrete 41 | location of the block's data in the backing store. 42 | 43 | 44 | ### Storage Protocol 45 | 46 | Block stores must implement five methods. 47 | 48 | #### list 49 | 50 | The `list` method enumerates the blocks contained in the store as a 51 | [Manifold stream](https://github.com/ztellman/manifold/blob/master/docs/stream.md), 52 | ordered by the blocks' multihash identifier. 53 | 54 | A few simple query criteria are supported to return ranges of blocks whose id 55 | uses a specific algorithm, or falls before or after some given hex markers. The 56 | implementation of this method must return _at least_ the blocks which match the 57 | query options, and _should_ optimize the results by omitting unmatched blocks 58 | when possible. 59 | 60 | In practice, when this method is called the store should spin up an asynchronous 61 | process (usually a thread via `d/future`) that interrogates the storage layer 62 | and emits the resulting blocks on an output stream, which is returned to the 63 | caller. This stream may be closed preemptively if the consumer is done, which 64 | should cleanly terminate the listing process. 65 | 66 | If the listing encounters an exception, the error should be placed on the stream 67 | and the stream should be closed to indicate no further blocks will be coming. 68 | Consumers must handle exceptions propagated on the stream in this fashion. 69 | 70 | #### stat 71 | 72 | The `stat` method is used to return metadata about a block if the store contains 73 | it. This should return a deferred which yields the info, or `nil` if the store 74 | does not contain the requested block. 75 | 76 | This is very similar to `get`, but returns a regular Clojure map instead of 77 | a block. This map should have the same `:id`, `:size`, and `:stored-at` values 78 | as well as the same metadata a block returned from `get` would. Conceptually, 79 | this is similar to the `HEAD` vs `GET` verbs in HTTP. 80 | 81 | This distinction is useful for stores which return loaded blocks, like a cache - 82 | using `stat` allows the caller to avoid the extra IO for block content which 83 | will not be used. 84 | 85 | #### get 86 | 87 | This method fetches a block from the store. This should return a deferred 88 | which yields the block, or nil if not present. 89 | 90 | #### put! 91 | 92 | Putting a block persists it into the store. This should return a deferred which 93 | yields the stored block. If the store already contains this block, then the 94 | implementation _should_ avoid re-storing or other data transfer to the storage 95 | layer, and return the already-stored block directly. 96 | 97 | #### delete! 98 | 99 | Deleting a block removes it from the store. This should return a deferred which 100 | yields true if the block was stored, false if it was not. 101 | 102 | 103 | ### Erasable Protocol 104 | 105 | Stores may optionally add support for the `ErasableStore` protocol, which 106 | provides an efficient mechanism for completely removing the backend data. If not 107 | implemented, the `erase!` function will fall back to listing and deleting all 108 | blocks. 109 | -------------------------------------------------------------------------------- /test/blocks/meter_test.clj: -------------------------------------------------------------------------------- 1 | (ns blocks.meter-test 2 | (:require 3 | [blocks.data :as data] 4 | [blocks.meter :as meter] 5 | [blocks.test-utils :refer [quiet-exception]] 6 | [clojure.test :refer [deftest testing is]] 7 | [manifold.deferred :as d] 8 | [manifold.stream :as s])) 9 | 10 | 11 | (defrecord TestStore 12 | []) 13 | 14 | 15 | (defn- recording-store 16 | [] 17 | (let [events (atom [])] 18 | (map->TestStore 19 | {::events events 20 | ::meter/label "TestStore" 21 | ::meter/recorder 22 | (fn record! 23 | [store event] 24 | (swap! (::events store) conj event))}))) 25 | 26 | 27 | (deftest block-metering 28 | (testing "constructor" 29 | (let [block (data/read-block :sha1 "foo bar baz")] 30 | (is (nil? (meter/metered-block {} :metric nil)) 31 | "should be nil-tolerant") 32 | (is (identical? block (meter/metered-block {} :metric block)) 33 | "disabled store should return block unaltered"))) 34 | (testing "block reading" 35 | (let [store (recording-store) 36 | events (::events store) 37 | content "the quick fox jumped over the lazy brown dog" 38 | block (data/read-block :sha1 content) 39 | block' (meter/metered-block store ::io block)] 40 | (is (not (identical? block' block))) 41 | (binding [meter/*io-report-period* 0] 42 | (testing "read one byte" 43 | (reset! events []) 44 | (with-open [input (data/content-stream block' nil nil)] 45 | (.read input) 46 | (is (= 1 (count @events))) 47 | (let [event (first @events)] 48 | (is (= "TestStore" (:label event))) 49 | (is (= ::io (:type event))) 50 | (is (= 1 (:value event)))))) 51 | (testing "read remaining bytes" 52 | (reset! events []) 53 | (with-open [input (data/content-stream block' nil nil)] 54 | (is (= content (slurp input)))) 55 | (is (= 1 (count @events))) 56 | (let [event (first @events)] 57 | (is (= ::io (:type event))) 58 | (is (= 44 (:value event))))))))) 59 | 60 | 61 | (deftest measure-stream 62 | (testing "constructor" 63 | (let [stream (s/stream 10)] 64 | (is (identical? stream (meter/measure-stream {} ::flow nil stream)) 65 | "disabled store should return stream unaltered"))) 66 | (testing "stream flow" 67 | (let [store (recording-store) 68 | events (::events store) 69 | stream (s/stream 10) 70 | metered (binding [meter/*io-report-period* 0.02] 71 | (meter/measure-stream store ::flow {} stream))] 72 | (is (empty? @events)) 73 | (s/consume any? metered) 74 | @(s/put! stream :x) 75 | @(s/put! stream :x) 76 | (Thread/sleep 30) 77 | (let [result @events] 78 | (if (= 1 (count result)) 79 | (is (= {:type ::meter/list-stream 80 | :method :blocks.meter-test/flow 81 | :label "TestStore" 82 | :value 2} 83 | (first result))) 84 | (is (= [{:type ::meter/list-stream 85 | :method :blocks.meter-test/flow 86 | :label "TestStore" 87 | :value 1} 88 | {:type ::meter/list-stream 89 | :method :blocks.meter-test/flow 90 | :label "TestStore" 91 | :value 1}] 92 | result)))) 93 | (reset! events []) 94 | @(s/put! stream :x) 95 | (s/close! stream) 96 | (is (= [{:type ::meter/list-stream 97 | :method :blocks.meter-test/flow 98 | :label "TestStore" 99 | :value 1}] 100 | @events))))) 101 | 102 | 103 | (deftest measure-method 104 | (testing "constructor" 105 | (let [d (d/deferred)] 106 | (is (identical? d (meter/measure-method {} :foo nil d)) 107 | "disabled store should return deferred unaltered"))) 108 | (testing "elapsed time" 109 | (let [store (recording-store) 110 | events (::events store) 111 | d (d/deferred) 112 | d' (meter/measure-method store :foo nil d)] 113 | (is (not (identical? d d'))) 114 | (is (empty? @events)) 115 | (Thread/sleep 3) 116 | (d/success! d true) 117 | (is (= true @d')) 118 | (is (= 1 (count @events))) 119 | (is (= ::meter/method-time (:type (first @events)))) 120 | (is (<= 3.0 (:value (first @events))))))) 121 | 122 | 123 | (deftest miscellaney 124 | (testing "store labeling" 125 | (is (= "TestStore" (#'meter/meter-label (->TestStore)))) 126 | (is (= "FooStore" (#'meter/meter-label (map->TestStore 127 | {::meter/label "FooStore"}))))) 128 | (testing "bad recorder" 129 | (let [store (map->TestStore 130 | {::meter/recorder 131 | (fn [_ _] (throw (quiet-exception)))})] 132 | (is (nil? (#'meter/record! store :boom! 1 nil)))))) 133 | -------------------------------------------------------------------------------- /test/blocks/data_test.clj: -------------------------------------------------------------------------------- 1 | (ns blocks.data-test 2 | (:require 3 | [blocks.data :as data] 4 | [clojure.test :refer [deftest testing is]] 5 | [multiformats.hash :as multihash]) 6 | (:import 7 | java.io.ByteArrayInputStream)) 8 | 9 | 10 | (deftest block-type 11 | (let [b1 (data/read-block :sha1 "howdy frobblenitz") 12 | b2 (data/read-block :sha2-256 "howdy frobblenitz") 13 | b2' (vary-meta b2 assoc ::test 123)] 14 | (testing "equality" 15 | (is (= b1 (data/read-block :sha1 "howdy frobblenitz"))) 16 | (is (= b2 b2')) 17 | (is (not= b1 b2) 18 | "blocks with different algorithms should not be equal")) 19 | (testing "hash codes" 20 | (is (= (hash b2) (hash b2'))) 21 | (is (not= (hash b1) (hash b2)) 22 | "blocks with different algorithms should not be equal")) 23 | (testing "comparison" 24 | (is (zero? (compare b1 b1))) 25 | (is (zero? (compare b2 b2'))) 26 | (is (neg? (compare b1 b2)) 27 | "should sort blocks by id")) 28 | (testing "metadata" 29 | (is (nil? (meta b1)) 30 | "should be constructed with no metadata") 31 | (is (= 123 (::test (meta b2')))) 32 | (is (= {:foo true} (meta (with-meta b1 {:foo true}))) 33 | "should support metadata")) 34 | (testing "accessors" 35 | (is (some? (:id b1))) 36 | (is (number? (:size b2))) 37 | (is (inst? (:stored-at b1)))) 38 | (testing "print-method" 39 | (is (string? (pr-str b1)))))) 40 | 41 | 42 | (deftest block-laziness 43 | (let [content "foo bar baz abc123" 44 | loaded (data/read-block :sha1 content) 45 | lazy (data/create-block 46 | (:id loaded) (:size loaded) 47 | #(ByteArrayInputStream. (.getBytes content))) 48 | wrapped (data/wrap-content loaded (constantly ::wrapped))] 49 | (is (data/byte-content? loaded)) 50 | (is (not (data/byte-content? lazy))) 51 | (is (= loaded wrapped)) 52 | (is (not (data/byte-content? wrapped))))) 53 | 54 | 55 | (deftest content-reading 56 | (testing "persistent bytes" 57 | (let [block (data/read-block :sha1 "foo bar baz")] 58 | (is (= "foo bar baz" (slurp (data/content-stream block nil nil)))) 59 | (is (= "bar baz" (slurp (data/content-stream block 4 nil)))) 60 | (is (= "foo" (slurp (data/content-stream block nil 3)))) 61 | (is (= "bar" (slurp (data/content-stream block 4 7)))))) 62 | (testing "reader function" 63 | (let [content "foo bar baz" 64 | block (data/create-block 65 | (multihash/sha1 content) 66 | (count content) 67 | (fn reader 68 | [] 69 | (ByteArrayInputStream. (.getBytes content))))] 70 | (is (= "foo bar baz" (slurp (data/content-stream block nil nil)))) 71 | (is (= "bar baz" (slurp (data/content-stream block 4 nil)))) 72 | (is (= "foo" (slurp (data/content-stream block nil 3)))) 73 | (is (= "bar" (slurp (data/content-stream block 4 7))))))) 74 | 75 | 76 | (deftest hasher-resolution 77 | (testing "invalid algorithm name types" 78 | (is (thrown? IllegalArgumentException (data/hasher nil))) 79 | (is (thrown? IllegalArgumentException (data/hasher 123)))) 80 | (testing "keyword algorithm name" 81 | (is (thrown? IllegalArgumentException (data/hasher :sha8-4096)) 82 | "unsupported algorithm should throw exception") 83 | (is (ifn? (data/hasher :sha2-256))))) 84 | 85 | 86 | (deftest block-construction 87 | (let [id (multihash/sha1 "foo")] 88 | (is (thrown? Exception 89 | (data/create-block "foo" 123 (constantly nil)))) 90 | (is (thrown? Exception 91 | (data/create-block id 0 (constantly nil)))) 92 | (is (thrown? Exception 93 | (data/create-block id 1 :inst (constantly nil)))) 94 | (is (thrown? Exception 95 | (data/create-block id 1 nil))))) 96 | 97 | 98 | (deftest block-merging 99 | (let [a (-> (data/read-block :sha1 "foo") 100 | (vary-meta assoc ::abc 123)) 101 | b (-> (data/read-block :sha1 "foo") 102 | (vary-meta assoc ::abc 456 ::xyz :ok)) 103 | c (data/read-block :sha1 "bar")] 104 | (testing "merging blocks with different ids" 105 | (is (thrown-with-msg? Exception #"Cannot merge blocks with differing ids" 106 | (data/merge-blocks a c)))) 107 | (testing "merging blocks with different sizes" 108 | (is (thrown-with-msg? Exception #"Cannot merge blocks with differing sizes" 109 | (data/merge-blocks 110 | a (data/create-block (:id a) 8 (constantly nil)))))) 111 | (testing "merged block" 112 | (let [merged (data/merge-blocks a b)] 113 | (is (= (:id merged) (:id b)) 114 | "should have b's id") 115 | (is (= (:size merged) (:size b)) 116 | "should have b's size") 117 | (is (= (:stored-at merged) (:stored-at b)) 118 | "should have b's timestamp") 119 | (is (identical? (.content merged) (.content b)) 120 | "should have b's content") 121 | (is (= 456 (::abc (meta merged)))) 122 | (is (= :ok (::xyz (meta merged)))))))) 123 | -------------------------------------------------------------------------------- /src/blocks/data/PersistentBytes.java: -------------------------------------------------------------------------------- 1 | package blocks.data; 2 | 3 | 4 | import clojure.lang.ArrayIter; 5 | import clojure.lang.IHashEq; 6 | import clojure.lang.ISeq; 7 | import clojure.lang.Indexed; 8 | import clojure.lang.IteratorSeq; 9 | import clojure.lang.Murmur3; 10 | import clojure.lang.Seqable; 11 | import clojure.lang.Sequential; 12 | 13 | import java.io.ByteArrayInputStream; 14 | import java.io.InputStream; 15 | import java.io.IOException; 16 | 17 | import java.nio.ByteBuffer; 18 | 19 | import java.util.Arrays; 20 | import java.util.Iterator; 21 | 22 | 23 | /** 24 | * Simple immutable byte sequence data structure. 25 | */ 26 | public class PersistentBytes implements Comparable, IHashEq, Indexed, Iterable, Seqable, Sequential { 27 | 28 | private final byte[] _data; 29 | private int _hash = -1; 30 | private int _hasheq = -1; 31 | 32 | 33 | 34 | ///// Constructors ///// 35 | 36 | /** 37 | * Constructs a new PersistentBytes object from the given binary data. 38 | * 39 | * @param data array of bytes to wrap 40 | */ 41 | private PersistentBytes(byte[] data) { 42 | if ( data == null ) { 43 | throw new IllegalArgumentException("Cannot construct persistent byte sequence on null data."); 44 | } 45 | _data = data; 46 | } 47 | 48 | 49 | /** 50 | * Constructs a new PersistentBytes object by wrapping the given binary 51 | * data. This is more efficient, but leaves the instance open to mutation 52 | * if the original array is modified. 53 | * 54 | * @return new persistent byte sequence, or null if data is null or empty 55 | */ 56 | public static PersistentBytes wrap(byte[] data) { 57 | if ( data == null || data.length == 0 ) { 58 | return null; 59 | } 60 | return new PersistentBytes(data); 61 | } 62 | 63 | 64 | /** 65 | * Constructs a new PersistentBytes object with a copy of the given binary 66 | * data. 67 | * 68 | * @return new persistent byte sequence, or null if data is null or empty 69 | */ 70 | public static PersistentBytes copyFrom(byte[] data) { 71 | if ( data == null || data.length == 0 ) { 72 | return null; 73 | } 74 | return new PersistentBytes(Arrays.copyOf(data, data.length)); 75 | } 76 | 77 | 78 | 79 | ///// Object ///// 80 | 81 | @Override 82 | public int hashCode() { 83 | if ( _hash == -1 ) { 84 | _hash = Arrays.hashCode(_data); 85 | } 86 | return _hash; 87 | } 88 | 89 | 90 | @Override 91 | public boolean equals(Object x) { 92 | if ( this == x ) return true; 93 | if ( x instanceof PersistentBytes ) { 94 | PersistentBytes bytes = (PersistentBytes)x; 95 | return toBuffer().equals(bytes.toBuffer()); 96 | } else if ( x instanceof ByteBuffer ) { 97 | return toBuffer().equals(x); 98 | } else if ( x instanceof byte[] ) { 99 | byte[] bytes = (byte[])x; 100 | return Arrays.equals(_data, bytes); 101 | } 102 | return false; 103 | } 104 | 105 | 106 | @Override 107 | public String toString() { 108 | return String.format("%s[size=%d]", this.getClass().getSimpleName(), count()); 109 | } 110 | 111 | 112 | 113 | ///// Comparable ///// 114 | 115 | @Override 116 | public int compareTo(PersistentBytes other) { 117 | int prefixLen = Math.min(count(), other.count()); 118 | for ( int i = 0; i < prefixLen; i++ ) { 119 | byte a = (byte)nth(i); 120 | byte b = (byte)other.nth(i); 121 | if ( a != b ) { 122 | return a - b; 123 | } 124 | } 125 | return count() - other.count(); 126 | } 127 | 128 | 129 | 130 | ///// IHashEq ///// 131 | 132 | @Override 133 | public int hasheq() { 134 | if ( _hasheq == -1 ) { 135 | _hasheq = Murmur3.hashOrdered(this); 136 | } 137 | return _hasheq; 138 | } 139 | 140 | 141 | // TODO: equiv? 142 | 143 | 144 | 145 | ///// Indexed ///// 146 | 147 | @Override 148 | public int count() { 149 | return _data.length; 150 | } 151 | 152 | 153 | @Override 154 | public Object nth(int i) { 155 | return _data[i]; 156 | } 157 | 158 | 159 | @Override 160 | public Object nth(int i, Object notFound) { 161 | if ( i >= 0 && i < count() ) { 162 | return nth(i); 163 | } 164 | return notFound; 165 | } 166 | 167 | 168 | 169 | ///// Iterable ///// 170 | 171 | @Override 172 | public Iterator iterator() { 173 | return ArrayIter.createFromObject(_data); 174 | } 175 | 176 | 177 | 178 | ///// Seqable ///// 179 | 180 | @Override 181 | public ISeq seq() { 182 | return IteratorSeq.create(iterator()); 183 | } 184 | 185 | 186 | 187 | ///// IO Methods ///// 188 | 189 | /** 190 | * Opens an input stream to read the content. 191 | * 192 | * @return initialized input stream 193 | */ 194 | public InputStream open() { 195 | return new ByteArrayInputStream(_data); 196 | } 197 | 198 | 199 | /** 200 | * Creates a buffer view of the content. 201 | * 202 | * @return read-only ByteBuffer 203 | */ 204 | public ByteBuffer toBuffer() { 205 | return ByteBuffer.wrap(_data).asReadOnlyBuffer(); 206 | } 207 | 208 | 209 | /** 210 | * Creates a copy of the byte data. 211 | * 212 | * @return byte array data copy 213 | */ 214 | public byte[] toByteArray() { 215 | return Arrays.copyOf(_data, _data.length); 216 | } 217 | 218 | } 219 | -------------------------------------------------------------------------------- /src/blocks/meter.clj: -------------------------------------------------------------------------------- 1 | (ns blocks.meter 2 | "Instrumentation for block stores to measure data flows, call latencies, and 3 | other metrics. 4 | 5 | The logic in this namespace is built around the notion of a _metric event_ and 6 | an associated _recording function_ on the store which the events are passed 7 | to. Each event has at least a namespaced `:type` keyword, a `:label` 8 | associated with the store, and a numeric `:value`. 9 | 10 | Events may contain other information like the block id or method name as 11 | well, and it is up to the receiver to interpret them." 12 | (:require 13 | [blocks.data :as data] 14 | [clojure.tools.logging :as log] 15 | [manifold.deferred :as d] 16 | [manifold.stream :as s]) 17 | (:import 18 | java.io.InputStream 19 | (java.util.concurrent.atomic 20 | AtomicLong) 21 | (org.apache.commons.io.input 22 | ProxyInputStream))) 23 | 24 | 25 | ;; ## Utilities 26 | 27 | (defn- stopwatch 28 | "Create a delay expression which will return the number of milliseconds 29 | elapsed between its creation and dereference." 30 | [] 31 | (let [start (System/nanoTime)] 32 | (delay (/ (- (System/nanoTime) start) 1e6)))) 33 | 34 | 35 | (defn- format-bytes 36 | "Format a byte value as a string with the given suffix." 37 | [value unit] 38 | (loop [value value 39 | prefixes ["" "K" "M" "G"]] 40 | (if (and (< 1024 value) (seq prefixes)) 41 | (recur (/ value 1024) (next prefixes)) 42 | (if (nat-int? value) 43 | (format "%d %s%s" value (first prefixes) unit) 44 | (format "%.1f %s%s" (double value) (first prefixes) unit))))) 45 | 46 | 47 | (defn- meter-label 48 | "Construct a string to label the metered store." 49 | [store] 50 | (str (or (::label store) (.getSimpleName (class store))))) 51 | 52 | 53 | (defn- enabled? 54 | "True if the store has metering enabled and a valid recorder." 55 | [store] 56 | (boolean (::recorder store))) 57 | 58 | 59 | (defn- record! 60 | "Helper to record an event to the metered store if a recording function is 61 | present." 62 | [store metric-type value attrs] 63 | (when-let [recorder (::recorder store)] 64 | (try 65 | (recorder 66 | store 67 | (assoc attrs 68 | :type metric-type 69 | :label (meter-label store) 70 | :value value)) 71 | (catch Exception ex 72 | (log/warn ex "Failure while recording metric"))))) 73 | 74 | 75 | ;; ## Stream Metering 76 | 77 | (def ^:dynamic *io-report-period* 78 | "Record incremental IO metrics every N seconds." 79 | 10) 80 | 81 | 82 | (defn- metering-block-stream 83 | "Wrap the given stream in an intermediate stream which will record metric 84 | events with the number of blocks which passed through the stream." 85 | [store metric-type attrs stream] 86 | (let [counter (AtomicLong. 0) 87 | period *io-report-period* 88 | label (meter-label store) 89 | out (s/map #(do (.incrementAndGet counter) %) stream) 90 | reports (s/periodically 91 | (* period 1000) 92 | #(.getAndSet counter 0)) 93 | flush! (fn flush! 94 | [sum] 95 | (when (pos? sum) 96 | (log/tracef "Metered %s of %d blocks through stream %s (%.2f/sec)" 97 | (name metric-type) sum label 98 | (double (/ sum period))) 99 | (record! store metric-type sum attrs)))] 100 | (s/consume flush! reports) 101 | (s/on-closed 102 | stream 103 | (fn report-final 104 | [] 105 | (flush! (.getAndSet counter 0)) 106 | (s/close! reports))) 107 | out)) 108 | 109 | 110 | (defn- metering-input-stream 111 | "Wrap the given input stream in a proxy which will record metric events with 112 | the given type and number of bytes read." 113 | [store metric-type block-id ^InputStream input-stream] 114 | (let [meter (volatile! [(System/nanoTime) 0])] 115 | (letfn [(flush! 116 | [] 117 | (let [[last-time sum] @meter 118 | elapsed (/ (- (System/nanoTime) last-time) 1e9) 119 | label (meter-label store)] 120 | (when (pos? sum) 121 | (log/tracef "Metered %s of %s block %s: %s (%s)" 122 | (name metric-type) label block-id 123 | (format-bytes sum "B") 124 | (format-bytes (/ sum elapsed) "Bps")) 125 | (record! store metric-type sum {:block block-id}) 126 | (vreset! meter [(System/nanoTime) 0]))))] 127 | (proxy [ProxyInputStream] [input-stream] 128 | 129 | (afterRead 130 | [n] 131 | (when (pos? n) 132 | (let [[last-time _] (vswap! meter update 1 + n) 133 | elapsed (/ (- (System/nanoTime) last-time) 1e9)] 134 | (when (<= *io-report-period* elapsed) 135 | (flush!))))) 136 | 137 | (close 138 | [] 139 | (flush!) 140 | (.close input-stream)))))) 141 | 142 | 143 | ;; ## Metered Content 144 | 145 | (deftype MeteredContentReader 146 | [store metric-type block-id content] 147 | 148 | data/ContentReader 149 | 150 | (read-all 151 | [_] 152 | (metering-input-stream 153 | store metric-type block-id 154 | (data/read-all content))) 155 | 156 | 157 | (read-range 158 | [_ start end] 159 | (metering-input-stream 160 | store metric-type block-id 161 | (data/read-range content start end)))) 162 | 163 | 164 | (alter-meta! #'->MeteredContentReader assoc :private true) 165 | 166 | 167 | (defn metered-block 168 | "Wrap the block with a lazy constructor for a metered input stream which will 169 | report metrics for the given type. If the store does not have a recorder, the 170 | block will be returned unchanged." 171 | [store metric-type block] 172 | (when block 173 | (if (enabled? store) 174 | (data/wrap-content 175 | block 176 | (partial ->MeteredContentReader 177 | store 178 | metric-type 179 | (:id block))) 180 | block))) 181 | 182 | 183 | ;; ## Method Wrappers 184 | 185 | (defn measure-stream 186 | "Measure the flow of blocks through a manifold stream. Returns the wrapped 187 | stream, or the original if the store does not have metering enabled." 188 | [store method-kw attrs stream] 189 | (cond->> stream 190 | (enabled? store) 191 | (metering-block-stream 192 | store ::list-stream 193 | (assoc attrs :method method-kw)))) 194 | 195 | 196 | (defn measure-method 197 | "Measure the end-to-end elapsed time for a block store method. Returns a 198 | deferred with a final report hook if the store has metering enabled." 199 | [store method-kw attrs body-deferred] 200 | (let [elapsed (stopwatch)] 201 | (cond-> body-deferred 202 | (enabled? store) 203 | (d/finally 204 | (fn record-elapsed 205 | [] 206 | (log/tracef "Method %s of %s block store on %s took %.1f ms" 207 | (name method-kw) 208 | (meter-label store) 209 | attrs 210 | @elapsed) 211 | (record! 212 | store ::method-time @elapsed 213 | (assoc attrs :method method-kw))))))) 214 | -------------------------------------------------------------------------------- /src/blocks/store/cache.clj: -------------------------------------------------------------------------------- 1 | (ns blocks.store.cache 2 | "Cache stores provide logical block storage backed by two other stores, a 3 | _primary store_ and a _cache_. 4 | 5 | Blocks are added to the cache on reads and writes, and evicted with a 6 | least-recently-used strategy to keep the cache under a certain total size. 7 | Operations on this store will prefer to look up blocks in the cache, and fall 8 | back to the primary store when not available. 9 | 10 | Because the caching logic runs locally, the backing cache storage should not 11 | be shared among multiple concurrent processes." 12 | (:require 13 | [blocks.core :as block] 14 | [blocks.store :as store] 15 | [blocks.summary :as sum] 16 | [clojure.data.priority-map :refer [priority-map]] 17 | [clojure.tools.logging :as log] 18 | [com.stuartsierra.component :as component] 19 | [manifold.deferred :as d]) 20 | (:import 21 | java.time.Instant)) 22 | 23 | 24 | (defn- scan-state 25 | "Computes the state of a cache, including priorities for all stored blocks and 26 | the total size of block content stored." 27 | [store] 28 | (reduce 29 | (fn [state block] 30 | (let [tick (if-let [stored-at (:stored-at block)] 31 | (long (/ (.toEpochMilli ^Instant stored-at) 1000)) 32 | 0)] 33 | (-> state 34 | (update :priorities assoc (:id block) [tick (:size block)]) 35 | (update :total-size + (:size block)) 36 | (update :tick max tick)))) 37 | {:priorities (priority-map) 38 | :total-size 0 39 | :tick 0} 40 | (block/list-seq store))) 41 | 42 | 43 | (defn- cacheable? 44 | "True if the block may be cached in this store." 45 | [store block] 46 | (let [{:keys [size-limit predicate]} store] 47 | (and (<= (:size block) size-limit) 48 | (or (nil? predicate) (predicate block))))) 49 | 50 | 51 | (defn- touch-block 52 | "Update the cache state to account for the usage (fetch or store) of a 53 | block." 54 | [state block] 55 | (let [id (:id block) 56 | size (:size block) 57 | priorities (:priorities state)] 58 | (-> state 59 | (update :tick inc) 60 | (update :priorities assoc id [(:tick state) size]) 61 | (cond-> 62 | (not (contains? priorities id)) 63 | (update :total-size + size))))) 64 | 65 | 66 | (defn- remove-block 67 | "Update the cache state to remove a block from it by id." 68 | [state id] 69 | (if-let [[_ size] (get-in state [:priorities id])] 70 | (-> state 71 | (update :total-size - size) 72 | (update :priorities dissoc id)) 73 | state)) 74 | 75 | 76 | (defn reap! 77 | "Given a target amount of space to free and a cache store, deletes blocks from 78 | the cache to free up the desired amount of space. Returns a deferred which 79 | yields a summary of the deleted entries." 80 | [store target-free] 81 | (let [{:keys [cache state size-limit]} store] 82 | (d/loop [deleted (sum/init)] 83 | (let [{:keys [priorities total-size]} @state] 84 | (if (and (< (- size-limit total-size) target-free) 85 | (seq priorities)) 86 | ;; Need to delete the next block. 87 | (let [[id [_ size]] (peek priorities)] 88 | (swap! state remove-block id) 89 | (d/chain 90 | (block/delete! cache id) 91 | (fn next-delete 92 | [deleted?] 93 | (d/recur (if deleted? 94 | (sum/update deleted {:id id, :size size}) 95 | deleted))))) 96 | ;; Enough free space, or no more blocks to delete. 97 | deleted))))) 98 | 99 | 100 | (defn- cache-block! 101 | "Store a block in the cache and update the internal tracking state." 102 | [store block] 103 | (swap! (:state store) touch-block block) 104 | (d/chain 105 | (reap! store (:size block)) 106 | (fn cache-block 107 | [_] 108 | (block/put! (:cache store) block)))) 109 | 110 | 111 | (defrecord CachingBlockStore 112 | [size-limit predicate primary cache state] 113 | 114 | component/Lifecycle 115 | 116 | (start 117 | [this] 118 | (when-not (satisfies? store/BlockStore primary) 119 | (throw (IllegalStateException. 120 | (str "Cannot start caching block store without a backing primary store: " 121 | (pr-str primary))))) 122 | (when-not (satisfies? store/BlockStore cache) 123 | (throw (IllegalStateException. 124 | (str "Cannot start caching block store without a backing cache store: " 125 | (pr-str cache))))) 126 | (when-not @state 127 | (let [initial-state (scan-state cache) 128 | cached-bytes (:total-size initial-state)] 129 | (reset! state initial-state) 130 | (when (pos? cached-bytes) 131 | (log/infof "Cache has %d bytes in %d blocks" 132 | (:total-size initial-state) 133 | (count (:priorities initial-state)))))) 134 | this) 135 | 136 | 137 | (stop 138 | [this] 139 | this) 140 | 141 | 142 | store/BlockStore 143 | 144 | (-list 145 | [_ opts] 146 | (store/merge-blocks 147 | (block/list cache opts) 148 | (block/list primary opts))) 149 | 150 | 151 | (-stat 152 | [_ id] 153 | (store/some-store [cache primary] block/stat id)) 154 | 155 | 156 | (-get 157 | [this id] 158 | (d/chain 159 | (block/get cache id) 160 | (fn check-cache 161 | [block] 162 | (if block 163 | (vary-meta block assoc ::cached? true) 164 | (block/get primary id))) 165 | (fn recache 166 | [block] 167 | (cond 168 | ;; Block not present in cache or primary. 169 | (nil? block) 170 | nil 171 | 172 | ;; Block is already cached. 173 | (::cached? (meta block)) 174 | (do (swap! state touch-block block) 175 | block) 176 | 177 | ;; Determine whether to cache the primary block. 178 | (cacheable? this block) 179 | (cache-block! this block) 180 | 181 | ;; Non cacheable block from the primary store. 182 | :else block)))) 183 | 184 | 185 | (-put! 186 | [this block] 187 | (d/chain 188 | (d/zip 189 | (block/put! primary block) 190 | (when (cacheable? this block) 191 | (cache-block! this block))) 192 | (fn return-preferred 193 | [[stored cached]] 194 | (store/preferred-block 195 | stored 196 | (when cached 197 | (vary-meta cached assoc ::cached? true)))))) 198 | 199 | 200 | (-delete! 201 | [_ id] 202 | (d/chain 203 | (d/zip 204 | (block/delete! primary id) 205 | (block/delete! cache id)) 206 | (fn result 207 | [[stored? cached?]] 208 | (boolean (or stored? cached?))))) 209 | 210 | 211 | store/ErasableStore 212 | 213 | (-erase! 214 | [_] 215 | (d/chain 216 | (d/zip 217 | (block/erase! primary) 218 | (block/erase! cache)) 219 | (constantly true)))) 220 | 221 | 222 | ;; ## Constructors 223 | 224 | (store/privatize-constructors! CachingBlockStore) 225 | 226 | 227 | (defn caching-block-store 228 | "Create a new logical block store which will use one block store to cache 229 | up to a certain size of content for another store. This store should have a 230 | `:primary` and a `:cache` associated with it for backing block storage. 231 | 232 | - `:primary` 233 | Backing store with the primary block data. 234 | - `:cache` 235 | Store to cache blocks in and prefer for reads. 236 | - `:size-limit` 237 | Maximum total size of blocks to keep in the cache store. 238 | - `:predicate` (optional) 239 | A predicate function which should return false for blocks which should not 240 | be cached; instead, they will only be written to the primary store." 241 | [size-limit & {:as opts}] 242 | (when-not (pos-int? size-limit) 243 | (throw (IllegalArgumentException. 244 | (str "Cache store size-limit must be a positive integer: " 245 | (pr-str size-limit))))) 246 | (map->CachingBlockStore 247 | (assoc opts 248 | :size-limit size-limit 249 | :state (atom nil)))) 250 | -------------------------------------------------------------------------------- /test/blocks/store_test.clj: -------------------------------------------------------------------------------- 1 | (ns blocks.store-test 2 | (:require 3 | [blocks.data :as data] 4 | [blocks.store :as store] 5 | [blocks.test-utils :refer [quiet-exception quiet-error-deferred]] 6 | [clojure.test :refer [deftest testing is]] 7 | [manifold.deferred :as d] 8 | [manifold.stream :as s] 9 | [multiformats.hash :as multihash])) 10 | 11 | 12 | (deftest uri-parsing 13 | (is (= {:scheme "mem", :name "-"} (store/parse-uri "mem:-"))) 14 | (is (= {:scheme "file", :path "/foo/bar"} (store/parse-uri "file:///foo/bar"))) 15 | (is (= {:scheme "file", :host "foo" :path "/bar"} (store/parse-uri "file://foo/bar"))) 16 | (is (= {:scheme "https" 17 | :user-info {:id "user" 18 | :secret "password"} 19 | :host "example.com" 20 | :port 443 21 | :path "/path/to/thing" 22 | :query {:foo "alpha" 23 | :bar "123"}} 24 | (store/parse-uri "https://user:password@example.com:443/path/to/thing?foo=alpha&bar=123")))) 25 | 26 | 27 | (deftest block-preference 28 | (is (nil? (store/preferred-block nil)) 29 | "returns nil with no block arguments") 30 | (let [loaded (data/read-block :sha1 "foo") 31 | lazy-a (data/create-block 32 | (multihash/sha1 "foo") 3 33 | #(java.io.ByteArrayInputStream. (.getBytes "foo"))) 34 | lazy-b (data/create-block 35 | (multihash/sha1 "bar") 3 36 | #(java.io.ByteArrayInputStream. (.getBytes "bar")))] 37 | (is (= loaded (store/preferred-block lazy-a loaded lazy-b)) 38 | "returns loaded block if present") 39 | (is (= lazy-a (store/preferred-block lazy-a lazy-b)) 40 | "returns first block if all lazy"))) 41 | 42 | 43 | (deftest block-selection 44 | (let [a (multihash/create :sha1 "37b51d194a7513e45b56f6524f2d51f200000000") 45 | b (multihash/create :sha1 "73fcffa4b7f6bb68e44cf984c85f6e888843d7f9") 46 | c (multihash/create :sha1 "73fe285cedef654fccc4a4d818db4cc225932878") 47 | d (multihash/create :sha1 "acbd18db4cc2f856211de9ecedef654fccc4a4d8") 48 | e (multihash/create :sha1 "c3c23db5285662ef717963ff4ce2373df0003206") 49 | f (multihash/create :sha2-256 "285c3c23d662b5ef7172373df0963ff4ce003206") 50 | ids [a b c d e f] 51 | blocks (mapv #(hash-map :id % :size 1) ids) 52 | filtered-stream #(->> (s/->source %1) 53 | (store/select-blocks %2) 54 | (s/stream->seq) 55 | (into []))] 56 | (is (= ids (map :id (filtered-stream blocks {})))) 57 | (testing "exception" 58 | (let [boom (quiet-exception)] 59 | (is (= [{:id a, :size 1} boom] 60 | (filtered-stream [{:id a, :size 1} boom {:id b, :size 1}] {})) 61 | "should halt stream"))) 62 | (testing "by algorithm" 63 | (is (= [f] (map :id (filtered-stream blocks {:algorithm :sha2-256}))))) 64 | (testing "rank markers" 65 | (is (= [c d e f] (map :id (filtered-stream blocks {:after "111473fd2"})))) 66 | (is (= [a b] (map :id (filtered-stream blocks {:before "111473fd2"})))) 67 | (is (= [b c d] (map :id (filtered-stream blocks {:after "11147", :before "1114b"}))))) 68 | (testing "limit" 69 | (is (= [a b c d e f] (map :id (filtered-stream blocks {:limit 8})))) 70 | (is (= [a b c d e f] (map :id (filtered-stream blocks {:limit 6})))) 71 | (is (= [a b c] (map :id (filtered-stream blocks {:limit 3})))) 72 | (is (= [c d] (map :id (filtered-stream blocks {:after "111473fd", :limit 2}))))))) 73 | 74 | 75 | (deftest list-merging 76 | (let [list-a [{:id "aaa"} 77 | {:id "abb"} 78 | {:id "abc"}] 79 | list-b [{:id "aab"} 80 | {:id "abc"}] 81 | list-c [{:id "aaa"} 82 | {:id "xyz"}] 83 | try-merge (fn try-merge 84 | [& lists] 85 | (->> (map s/->source lists) 86 | (apply store/merge-blocks) 87 | (s/stream->seq) 88 | (into [])))] 89 | (testing "single stream" 90 | (is (= list-a (try-merge list-a)))) 91 | (testing "full merge" 92 | (is (= [{:id "aaa"} 93 | {:id "aab"} 94 | {:id "abb"} 95 | {:id "abc"} 96 | {:id "xyz"}] 97 | (try-merge list-a list-b list-c)))) 98 | (testing "stream error" 99 | (let [boom (quiet-exception)] 100 | (is (= [{:id "aaa"} 101 | {:id "aab"} 102 | boom] 103 | (try-merge list-a [{:id "aab"} boom] list-c))))) 104 | (testing "preemptive close" 105 | (let [merged (store/merge-blocks 106 | (s/->source list-b) 107 | (s/->source list-c))] 108 | (is (= {:id "aaa"} @(s/try-take! merged ::drained 1000 ::timeout))) 109 | (is (= {:id "aab"} @(s/try-take! merged ::drained 1000 ::timeout))) 110 | (s/close! merged) 111 | (is (= {:id "abc"} @(s/try-take! merged ::drained 1000 ::timeout))) 112 | (is (identical? ::drained @(s/try-take! merged ::drained 1000 ::timeout))))))) 113 | 114 | 115 | (deftest missing-block-detection 116 | (letfn [(find-missing 117 | [a b] 118 | (->> (store/missing-blocks 119 | (s/->source a) 120 | (s/->source b)) 121 | (s/stream->seq) 122 | (into [])))] 123 | (testing "basic operation" 124 | (is (= [] (find-missing [] []))) 125 | (is (= [] (find-missing [] [{:id "abc"} {:id "cde"}]))) 126 | (is (= [{:id "abc"} {:id "cde"}] 127 | (find-missing [{:id "abc"} {:id "cde"}] []))) 128 | (is (= [{:id "abc"} {:id "cde"}] 129 | (find-missing [{:id "abc"} {:id "cde"} {:id "def"}] 130 | [{:id "bcd"} {:id "cab"} {:id "def"}])))) 131 | (testing "exceptions" 132 | (let [boom (quiet-exception)] 133 | (is (= [{:id "abc"} boom] 134 | (find-missing [{:id "abc"} {:id "cde"}] 135 | [{:id "bad"} boom]))) 136 | (is (= [{:id "cde"} boom] 137 | (find-missing [{:id "abc"} {:id "cde"} boom] 138 | [{:id "abc"}]))) 139 | (is (= [boom] 140 | (find-missing [{:id "abc"} boom] 141 | [{:id "abc"} {:id "def"} {:id "efg"}]))) 142 | (is (= [boom] 143 | (find-missing [{:id "abc"} {:id "cde"}] 144 | [boom]))))))) 145 | 146 | 147 | (deftest store-utilities 148 | (testing "zip-stores" 149 | (is (= [:a :b :c] 150 | @(store/zip-stores 151 | [{:result (d/success-deferred :a)} 152 | {:result (d/success-deferred :b)} 153 | {:result (d/success-deferred :c)}] 154 | :result))) 155 | (is (thrown? RuntimeException 156 | @(store/zip-stores 157 | [{:result (d/success-deferred :a)} 158 | {:result (quiet-error-deferred)} 159 | {:result (d/success-deferred :c)}] 160 | :result)))) 161 | (testing "some-store" 162 | (testing "edge cases" 163 | (is (nil? @(store/some-store [] :result))) 164 | (is (= :a 165 | @(store/some-store 166 | [{:result (d/success-deferred :a)}] 167 | :result)))) 168 | (testing "fallback behavior" 169 | (is (= :a @(store/some-store 170 | [{:result (d/success-deferred :a)} 171 | {:result (d/success-deferred :b)}] 172 | :result))) 173 | (is (= :b @(store/some-store 174 | [{:result (d/success-deferred nil)} 175 | {:result (d/success-deferred :b)}] 176 | :result))) 177 | (is (nil? @(store/some-store 178 | [{:result (d/success-deferred nil)} 179 | {:result (d/success-deferred nil)}] 180 | :result)))) 181 | (testing "errors" 182 | (is (= :a @(store/some-store 183 | [{:result (d/success-deferred :a)} 184 | {:result (quiet-error-deferred)}] 185 | :result))) 186 | (is (thrown? RuntimeException 187 | @(store/some-store 188 | [{:result (d/success-deferred nil)} 189 | {:result (quiet-error-deferred)}] 190 | :result)))))) 191 | -------------------------------------------------------------------------------- /src/blocks/data.clj: -------------------------------------------------------------------------------- 1 | (ns ^:no-doc blocks.data 2 | "Block type and constructor functions. 3 | 4 | Blocks have two primary attributes, `:id` and `:size`. The block identifier 5 | is a multihash with the digest identifying the content. The size is the 6 | number of bytes in the block content. Blocks also have a `:stored-at` value 7 | giving the instant they were persisted, but this does not affect equality or 8 | the block's hash code. 9 | 10 | Internally, blocks may reference their content in-memory as a byte array, or 11 | a _content reader_ which constructs new input streams for the block data on 12 | demand. A block with in-memory content is considered a _loaded block_, while 13 | blocks with readers are _lazy blocks_." 14 | (:require 15 | [clojure.java.io :as io] 16 | [multiformats.hash :as multihash]) 17 | (:import 18 | blocks.data.PersistentBytes 19 | (java.io 20 | ByteArrayOutputStream 21 | InputStream) 22 | java.time.Instant 23 | multiformats.hash.Multihash 24 | (org.apache.commons.io.input 25 | BoundedInputStream))) 26 | 27 | 28 | ;; ## Block Type 29 | 30 | (deftype Block 31 | [^Multihash id 32 | ^long size 33 | ^Instant stored-at 34 | content 35 | _meta] 36 | 37 | ;; :load-ns true 38 | 39 | 40 | java.lang.Object 41 | 42 | (toString 43 | [_] 44 | (format "Block[%s %s %s]" id size stored-at)) 45 | 46 | 47 | (equals 48 | [this that] 49 | (boolean 50 | (or (identical? this that) 51 | (when (identical? (class this) (class that)) 52 | (let [that ^Block that] 53 | (and (= id (.id that)) 54 | (= size (.size that)))))))) 55 | 56 | 57 | (hashCode 58 | [this] 59 | (-> (hash (class this)) 60 | (hash-combine (hash id)) 61 | (hash-combine size))) 62 | 63 | 64 | java.lang.Comparable 65 | 66 | (compareTo 67 | [_ that] 68 | (if (= id (:id that)) 69 | (if (= size (:size that)) 70 | (if (= stored-at (:stored-at that)) 71 | 0 72 | (compare stored-at (:stored-at that))) 73 | (compare size (:size that))) 74 | (compare id (:id that)))) 75 | 76 | 77 | clojure.lang.IObj 78 | 79 | (meta 80 | [_] 81 | _meta) 82 | 83 | 84 | (withMeta 85 | [_ meta-map] 86 | (Block. id size stored-at content meta-map)) 87 | 88 | 89 | clojure.lang.ILookup 90 | 91 | (valAt 92 | [this k] 93 | (.valAt this k nil)) 94 | 95 | 96 | (valAt 97 | [_ k not-found] 98 | (case k 99 | :id id 100 | :size size 101 | :stored-at stored-at 102 | not-found))) 103 | 104 | 105 | (defmethod print-method Block 106 | [v ^java.io.Writer w] 107 | (.write w (str v))) 108 | 109 | 110 | ;; ## Content Readers 111 | 112 | (defprotocol ContentReader 113 | "Content readers provide functions for repeatably reading byte streams from 114 | some backing data source." 115 | 116 | (read-all 117 | [reader] 118 | "Open an input stream that returns all bytes of the content.") 119 | 120 | (read-range 121 | [reader start end] 122 | "Open an input stream that reads just bytes from `start` to `end`, 123 | inclusive. A `nil` for either value implies the beginning or end of the 124 | stream, respectively.")) 125 | 126 | 127 | (defn bounded-input-stream 128 | "Wrap an input stream such that it only returns a stream of bytes in the 129 | range `start` to `end`." 130 | ^java.io.InputStream 131 | [^InputStream input start end] 132 | (when (pos-int? start) 133 | (.skip input start)) 134 | (if (pos-int? end) 135 | (BoundedInputStream. input (- end (or start 0))) 136 | input)) 137 | 138 | 139 | (extend-protocol ContentReader 140 | 141 | PersistentBytes 142 | 143 | (read-all 144 | [^PersistentBytes this] 145 | (.open this)) 146 | 147 | 148 | (read-range 149 | [^PersistentBytes this start end] 150 | (bounded-input-stream (.open this) start end)) 151 | 152 | 153 | clojure.lang.Fn 154 | 155 | (read-all 156 | [this] 157 | (this)) 158 | 159 | 160 | (read-range 161 | [this start end] 162 | ;; Ranged open not supported for generic functions, use naive approach. 163 | (bounded-input-stream (this) start end))) 164 | 165 | 166 | (defn content-stream 167 | "Open an input stream to read the contents of the block." 168 | ^java.io.InputStream 169 | [^Block block start end] 170 | (let [content (.content block)] 171 | (if (or start end) 172 | (read-range content start end) 173 | (read-all content)))) 174 | 175 | 176 | (defn persistent-bytes? 177 | "True if the argument is a persistent byte array." 178 | [x] 179 | (instance? PersistentBytes x)) 180 | 181 | 182 | (defn byte-content? 183 | "True if the block has content loaded into memory as persistent bytes." 184 | [^Block block] 185 | (persistent-bytes? (.content block))) 186 | 187 | 188 | ;; ## Constructors 189 | 190 | ;; Remove automatic constructor function. 191 | (alter-meta! #'->Block assoc :private true) 192 | 193 | 194 | (defn- now 195 | "Return the current instant in time. 196 | 197 | This is mostly useful for rebinding during tests." 198 | ^Instant 199 | [] 200 | (Instant/now)) 201 | 202 | 203 | (defn- to-byte-array 204 | "Coerce the given source into a byte array." 205 | ^bytes 206 | [source] 207 | (if (bytes? source) 208 | source 209 | (let [baos (ByteArrayOutputStream.)] 210 | (io/copy source baos) 211 | (.toByteArray baos)))) 212 | 213 | 214 | (defn hasher 215 | "Return the hashing function for an algorithm keyword, or throw an exception 216 | if no supported function is available." 217 | [algorithm] 218 | (or (multihash/functions algorithm) 219 | (throw (IllegalArgumentException. 220 | (str "No digest function found for algorithm " 221 | algorithm))))) 222 | 223 | 224 | (defn create-block 225 | "Create a block from a content reader. The simplest version is a no-arg 226 | function which should return a new `InputStream` to read the full block 227 | content. The block is given the id and size directly, without being checked." 228 | ([id size content] 229 | (create-block id size (now) content)) 230 | ([id size stored-at content] 231 | (when-not (instance? Multihash id) 232 | (throw (ex-info "Block id must be a multihash" 233 | {:id id, :size size, :stored-at stored-at}))) 234 | (when-not (pos-int? size) 235 | (throw (ex-info "Block size must be a positive integer" 236 | {:id id, :size size, :stored-at stored-at}))) 237 | (when-not (instance? Instant stored-at) 238 | (throw (ex-info "Block must have a stored-at instant" 239 | {:id id, :size size, :stored-at stored-at}))) 240 | (when-not content 241 | (throw (ex-info "Block must have a content reader" 242 | {:id id, :size size, :stored-at stored-at}))) 243 | (->Block id size stored-at content nil))) 244 | 245 | 246 | (defn read-block 247 | "Create a block by reading the source into memory and hashing it." 248 | [algorithm source] 249 | (let [hash-fn (hasher algorithm) 250 | content (PersistentBytes/wrap (to-byte-array source)) 251 | size (count content)] 252 | (when (pos? size) 253 | (create-block (hash-fn (read-all content)) size (now) content)))) 254 | 255 | 256 | (defn merge-blocks 257 | "Create a new block by merging together two blocks representing the same 258 | content. Block ids and sizes must match. The new block's content and 259 | timestamp come from the second block, and any metadata is merged together." 260 | [^Block left ^Block right] 261 | (when (not= (.id left) (.id right)) 262 | (throw (ex-info 263 | (str "Cannot merge blocks with differing ids " (.id left) 264 | " and " (.id right)) 265 | {:left left, :right right}))) 266 | (when (not= (.size left) (.size right)) 267 | (throw (ex-info 268 | (str "Cannot merge blocks with differing sizes " (.size left) 269 | " and " (.size right)) 270 | {:left left, :right right}))) 271 | (->Block 272 | (.id right) 273 | (.size right) 274 | (.stored-at right) 275 | (.content right) 276 | (not-empty (merge (._meta left) (._meta right))))) 277 | 278 | 279 | (defn wrap-content 280 | "Wrap a block's content by calling `f` on it, returning a new block with the 281 | same id and size." 282 | ^blocks.data.Block 283 | [^Block block f] 284 | (->Block 285 | (.id block) 286 | (.size block) 287 | (.stored-at block) 288 | (f (.content block)) 289 | (._meta block))) 290 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Block Storage 2 | ============= 3 | 4 | [![CircleCI](https://circleci.com/gh/greglook/blocks.svg?style=shield&circle-token=d652bef14116ac200c225d12b6c7af33933f4c26)](https://circleci.com/gh/greglook/blocks) 5 | [![codecov](https://codecov.io/gh/greglook/blocks/branch/master/graph/badge.svg)](https://codecov.io/gh/greglook/blocks) 6 | [![cljdoc lib](https://img.shields.io/badge/cljdoc-lib-blue.svg)](https://cljdoc.org/d/mvxcvi/blocks/) 7 | 8 | This library implements [content-addressable storage](https://en.wikipedia.org/wiki/Content-addressable_storage) 9 | types and protocols for Clojure. Content-addressable storage has several useful properties: 10 | 11 | - Data references are abstracted away from the knowledge of where and how the 12 | blocks are stored, and so can never be 'stale'. 13 | - Blocks are immutable, so there's no concern over having the 'latest version' 14 | of something - you either have it, or you don't. 15 | - References are _secure_, because a client can re-compute the digest to ensure 16 | they have received the original data unaltered. 17 | - Synchronizing data between stores only requires enumerating the stored blocks 18 | in each and exchanging missing ones. 19 | - Data can be structurally shared by different higher-level constructs. For 20 | example, a file's contents can be referenced by different versions of 21 | metadata without duplicating the file data. 22 | 23 | 24 | ## Installation 25 | 26 | Library releases are published on Clojars. To use the latest version with 27 | Leiningen, add the following dependency to your project definition: 28 | 29 | [![Clojars Project](http://clojars.org/mvxcvi/blocks/latest-version.svg)](http://clojars.org/mvxcvi/blocks) 30 | 31 | 32 | ## Block Values 33 | 34 | A _block_ is a sequence of bytes identified by the cryptographic digest of its 35 | content. All blocks have an `:id` and a `:size` - the block identifier is a 36 | [multihash](//github.com/greglook/clj-multiformats) value, and the size is the 37 | number of bytes in the block content. Blocks may also have a `:stored-at` 38 | value, which is the instant the backing store received the block. 39 | 40 | ```clojure 41 | => (require '[blocks.core :as block]) 42 | 43 | ;; Read a block into memory: 44 | => (def hello (block/read! "hello, blocks!")) 45 | #'user/hello 46 | 47 | => hello 48 | #blocks.data.Block 49 | {:id #multi/hash "hash:sha2-256:d2eef339d508c69fb6e3e99c11c11fc4fc8c035d028973057980d41c7d162684", 50 | :size 14, 51 | :stored-at #inst "2019-02-18T07:02:28.751Z"} 52 | 53 | => (:id hello) 54 | #multi/hash "hash:sha2-256:d2eef339d508c69fb6e3e99c11c11fc4fc8c035d028973057980d41c7d162684", 55 | 56 | => (:size hello) 57 | 14 58 | 59 | ;; Write a block to some output stream: 60 | => (let [baos (java.io.ByteArrayOutputStream.)] 61 | (block/write! hello baos) 62 | (String. (.toByteArray baos))) 63 | "hello, blocks!" 64 | ``` 65 | 66 | Internally, blocks either have a buffer holding the data in memory, or a reader 67 | which can be invoked to create new input streams for the block content. A block 68 | with in-memory content is a _loaded block_ while a block with a reader is a 69 | _lazy block_. 70 | 71 | ```clojure 72 | => (block/loaded? hello) 73 | true 74 | 75 | ;; Create a block from a local file: 76 | => (def readme (block/from-file "README.md")) 77 | #'user/readme 78 | 79 | ;; Block is lazily backed by the file on disk: 80 | => (block/loaded? readme) 81 | false 82 | 83 | => (block/lazy? readme) 84 | true 85 | ``` 86 | 87 | To abstract over the loaded/lazy divide, you can create an input stream over a 88 | block's content using `open`: 89 | 90 | ```clojure 91 | => (slurp (block/open hello)) 92 | "hello, blocks!" 93 | 94 | ;; You can also provide a start/end index to get a range of bytes: 95 | => (with-open [content (block/open readme {:start 0, :end 32})] 96 | (slurp content)) 97 | "Block Storage\n=============\n\n[![" 98 | ``` 99 | 100 | A block's properties and content cannot be changed after construction, but 101 | blocks do support metadata. In order to guard against the content changing in 102 | the underlying storage layer, blocks can be validated by re-reading their 103 | content: 104 | 105 | ```clojure 106 | ;; In-memory blocks will never change: 107 | => (block/validate! hello) 108 | nil 109 | 110 | ;; But if the README file backing the second block is changed: 111 | => (block/validate! readme) 112 | ; IllegalStateException Block hash:sha2-256:515c169aa0d95... has mismatched content 113 | ; blocks.core/validate! (core.clj:115) 114 | 115 | ;; Metadata can be set and queried: 116 | => (meta (with-meta readme {:baz 123})) 117 | {:baz 123} 118 | ``` 119 | 120 | 121 | ## Storage Interface 122 | 123 | A _block store_ is a system which saves and retrieves block data. Block stores 124 | have a very simple interface: they must store, retrieve, and enumerate the 125 | contained blocks. The simplest type of block storage is a memory store, which is 126 | backed by a map in memory. Another basic example is a store backed by a local 127 | filesystem, where blocks are stored as files in a directory. 128 | 129 | The block storage protocol is comprised of five methods: 130 | - `list` - enumerate the stored blocks as a stream 131 | - `stat` - get metadata about a stored block 132 | - `get` - retrieve a block from the store 133 | - `put!` - add a block to the store 134 | - `delete!` - remove a block from the store 135 | 136 | These methods are asynchronous operations which return 137 | [manifold](https://github.com/ztellman/manifold) deferred values. If you want 138 | to treat them synchronously, deref the responses immediately. 139 | 140 | ```clojure 141 | ;; Create a new memory store: 142 | => (require 'blocks.store.memory) 143 | => (def store (block/->store "mem:-")) 144 | #'user/store 145 | 146 | => store 147 | #blocks.store.memory.MemoryBlockStore {:memory #} 148 | 149 | ;; Initially, the store is empty: 150 | => (block/list-seq store) 151 | () 152 | 153 | ;; Lets put our blocks in the store so they don't get lost: 154 | => @(block/put! store hello) 155 | #blocks.data.Block 156 | {:id #multi/hash "hash:sha2-256:d2eef339d508c69fb6e3e99c11c11fc4fc8c035d028973057980d41c7d162684", 157 | :size 14, 158 | :stored-at #inst "2019-02-18T07:06:43.655Z"} 159 | 160 | => @(block/put! store readme) 161 | #blocks.data.Block 162 | {:id #multi/hash "hash:sha2-256:94d0eb8d13137ebced045b1e7ef48540af81b2abaf2cce34e924ce2cde7cfbaa", 163 | :size 8597, 164 | :stored-at #inst "2019-02-18T07:07:06.458Z"} 165 | 166 | ;; We can `stat` block ids to get metadata without content: 167 | => @(block/stat store (:id hello)) 168 | {:id #multi/hash "hash:sha2-256:94d0eb8d13137ebced045b1e7ef48540af81b2abaf2cce34e924ce2cde7cfbaa", 169 | :size 14, 170 | :stored-at #inst "2019-02-18T07:07:06.458Z"} 171 | 172 | ;; `list` returns the blocks, and has some basic filtering options: 173 | => (block/list-seq store :algorithm :sha2-256) 174 | (#blocks.data.Block 175 | {:id #multi/hash "hash:sha2-256:94d0eb8d13137ebced045b1e7ef48540af81b2abaf2cce34e924ce2cde7cfbaa", 176 | :size 8597, 177 | :stored-at #inst "2019-02-18T07:07:06.458Z"} 178 | #blocks.data.Block 179 | {:id #multi/hash "hash:sha2-256:d2eef339d508c69fb6e3e99c11c11fc4fc8c035d028973057980d41c7d162684", 180 | :size 14, 181 | :stored-at #inst "2019-02-18T07:06:43.655Z"}) 182 | 183 | ;; Use `get` to fetch blocks from the store: 184 | => @(block/get store (:id readme)) 185 | #blocks.data.Block 186 | {:id #multi/hash "hash:sha2-256:94d0eb8d13137ebced045b1e7ef48540af81b2abaf2cce34e924ce2cde7cfbaa", 187 | :size 8597, 188 | :stored-at #inst "2019-02-18T07:07:06.458Z"} 189 | 190 | ;; You can also store them directly from a byte source like a file: 191 | => @(block/store! store (io/file "project.clj")) 192 | #blocks.data.Block 193 | {:id #multi/hash "hash:sha2-256:95344c6acadde09ecc03a7899231001455690f620f31cf8d5bbe330dcda19594", 194 | :size 2013, 195 | :stored-at #inst "2019-02-18T07:11:12.879Z"} 196 | 197 | => (def project-hash (:id *1)) 198 | #'user/project-hash 199 | 200 | ;; Use `delete!` to remove blocks from a store: 201 | => @(block/delete! store project-hash) 202 | true 203 | 204 | ;; Checking with stat reveals the block is gone: 205 | => @(block/stat store project-hash) 206 | nil 207 | ``` 208 | 209 | ### Implementations 210 | 211 | This library comes with a few block store implementations built in: 212 | 213 | - `blocks.store.memory` provides an in-memory map of blocks for transient 214 | block storage. 215 | - `blocks.store.file` provides a simple one-file-per-block store in a local 216 | directory. 217 | - `blocks.store.buffer` holds blocks in one store, then flushes them to another. 218 | - `blocks.store.replica` stores blocks in multiple backing stores for 219 | durability. 220 | - `blocks.store.cache` manages two backing stores to provide an LRU cache that 221 | will stay under a certain size limit. 222 | 223 | Other storage backends are provided by separate libraries: 224 | 225 | - [blocks-s3](//github.com/greglook/blocks-s3) backed by a bucket in Amazon S3. 226 | 227 | These storage backends exist but aren't compatible with 2.X yet: 228 | 229 | - [blocks-adl](//github.com/amperity/blocks-adl) backed by Azure DataLake store. 230 | - [blocks-blob](//github.com/amperity/blocks-blob) backed by Azure Blob Storage. 231 | - [blocks-monger](//github.com/20centaurifux/blocks-monger) backed by MongoDB. 232 | 233 | 234 | ## Block Metrics 235 | 236 | The `blocks.meter` namespace provides instrumentation for block stores to 237 | measure data flows, call latencies, and other metrics. These measurements are 238 | built around the notion of a _metric event_ and an associated _recording 239 | function_ on the store which the events are passed to. Each event has a 240 | namespaced `:type` keyword, a `:label` associated with the store, and a numeric 241 | `:value`. The store currently measures the call latencies of the storage methods 242 | as well as the flow of bytes into or out of a store's blocks. 243 | 244 | To enable metering, set a `::meter/recorder` function on the store. The function 245 | will be called with the store itself and each metric event. The `:label` on each 246 | event is derived from the store - it will use the store's class name or an 247 | explicit `::meter/label` value if available. 248 | 249 | 250 | ## License 251 | 252 | This is free and unencumbered software released into the public domain. 253 | See the UNLICENSE file for more information. 254 | -------------------------------------------------------------------------------- /src/blocks/store/file.clj: -------------------------------------------------------------------------------- 1 | (ns blocks.store.file 2 | "File stores provide block storage backed by a local filesystem. Each block 3 | is stored in a separate file under the root. File block stores may be 4 | constructed using a `file://` URI. Both relative and 5 | absolute paths are supported. 6 | 7 | Under the root directory, the store keeps a block data in a subdirectory 8 | alongside some layout metadata and a landing directory: 9 | 10 | $ROOT/meta.properties 11 | $ROOT/blocks/111497df/35011497df3588b5a3... 12 | $ROOT/landing/block.123456789.tmp 13 | 14 | In many filesystems, performance degrades as the number of files in a 15 | directory grows. In order to reduce this impact and make navigating the 16 | blocks more efficient, block files are stored in multiple subdirectories 17 | consisting of the four byte prefix of the hashes of the blocks stored in 18 | them. Within each directory, blocks are stored in files whose names consist 19 | of the rest of their id digest. 20 | 21 | In addition to the blocks, a `meta.properties` file at the root holds 22 | information about the current storage layout for future-proofing. This 23 | currently holds a single layout version property, which is always `\"v1\"`." 24 | (:require 25 | [blocks.data :as data] 26 | [blocks.store :as store] 27 | [clojure.java.io :as io] 28 | [clojure.string :as str] 29 | [clojure.tools.logging :as log] 30 | [com.stuartsierra.component :as component] 31 | [manifold.stream :as s] 32 | [multiformats.hash :as multihash]) 33 | (:import 34 | (java.io 35 | File 36 | FileInputStream) 37 | java.time.Instant)) 38 | 39 | 40 | ;; ## Storage Layout 41 | 42 | (def layout-version 43 | "The current supported storage layout version." 44 | "v1") 45 | 46 | 47 | ;; ### Metadata 48 | 49 | (defn- meta-file 50 | "Construct the store-level metadata properties file from the store root." 51 | ^File 52 | [root] 53 | (io/file root "meta.properties")) 54 | 55 | 56 | (defn- read-meta-properties 57 | "Read the store's metadata file if it exists." 58 | [^File root] 59 | (let [props-file (meta-file root)] 60 | (when (.exists props-file) 61 | (into {} 62 | (map (juxt (comp keyword key) val)) 63 | (doto (java.util.Properties.) 64 | (.load (io/reader props-file))))))) 65 | 66 | 67 | (defn- write-meta-properties 68 | "Write a metadata properties file and returns the data map." 69 | [^File root] 70 | (let [props-file (meta-file root) 71 | props (doto (java.util.Properties.) 72 | (.setProperty "version" layout-version))] 73 | (.mkdirs root) 74 | (with-open [out (io/writer props-file)] 75 | (.store props out " blocks.store.file")) 76 | {:version layout-version})) 77 | 78 | 79 | ;; ### Landing Area 80 | 81 | (defn- landing-dir 82 | "Construct the landing directory from the store root." 83 | ^File 84 | [root] 85 | (io/file root "landing")) 86 | 87 | 88 | (defn- landing-file 89 | "Create an empty temporary file to land block data into. Marks the resulting 90 | file for automatic cleanup if it is not moved." 91 | ^File 92 | [^File root] 93 | (let [tmp-dir (landing-dir root)] 94 | (.mkdirs tmp-dir) 95 | (doto (File/createTempFile "block" ".tmp" tmp-dir) 96 | (.deleteOnExit)))) 97 | 98 | 99 | ;; ### Block Files 100 | 101 | (def ^:private prefix-length 102 | "Number of characters to use as a prefix for top-level directory names." 103 | 8) 104 | 105 | 106 | (defn- blocks-dir 107 | "Construct the block directory from the store root." 108 | ^File 109 | [root] 110 | (io/file root "blocks")) 111 | 112 | 113 | (defn- block-files 114 | "Walks a block directory tree depth first, returning a sequence of files 115 | found in lexical order. Intelligently skips subdirectories based on the given 116 | marker." 117 | [^File root after] 118 | (-> 119 | (.listFiles (blocks-dir root)) 120 | (sort) 121 | (cond->> 122 | after 123 | (drop-while 124 | #(let [subdirname (.getName ^File %) 125 | len (min (count after) (count subdirname))] 126 | (pos? (compare (subs after 0 len) (subs subdirname 0 len)))))) 127 | (->> 128 | (mapcat 129 | (fn list-blocks 130 | [^File subdir] 131 | (sort (.listFiles subdir))))))) 132 | 133 | 134 | (defn- file-stats 135 | "Calculate a map of statistics about a block file." 136 | [^File file] 137 | (with-meta 138 | {:size (.length file) 139 | :stored-at (Instant/ofEpochMilli (.lastModified file))} 140 | {::source (.toURI file)})) 141 | 142 | 143 | (defn- id->file 144 | "Determine the filesystem path for a block of content with the given hash 145 | identifier." 146 | ^File 147 | [^File root id] 148 | (let [hex (multihash/hex id) 149 | len (min (dec (count hex)) prefix-length) 150 | subdir (subs hex 0 len) 151 | fragment (subs hex len)] 152 | (io/file (blocks-dir root) subdir fragment))) 153 | 154 | 155 | (defn- file->id 156 | "Reconstruct the hash identifier represented by the given file path. Returns 157 | nil if the file is not a proper block." 158 | [^File root ^File file] 159 | (let [prefix (str (blocks-dir root)) 160 | path (.getPath file) 161 | hex (str/replace (subs path (inc (count prefix))) File/separator "")] 162 | (if (re-matches #"[0-9a-fA-F]+" hex) 163 | (multihash/parse hex) 164 | (log/warnf "File %s did not form valid hex entry: %s" file hex)))) 165 | 166 | 167 | (defn- file->block 168 | "Creates a lazy block to read from the given file." 169 | [id ^File file] 170 | (let [stats (file-stats file)] 171 | (with-meta 172 | (data/create-block 173 | id (:size stats) (:stored-at stats) 174 | ;; OPTIMIZE: use java.io.RandomAccessFile to read subranges 175 | (fn reader [] (FileInputStream. file))) 176 | (meta stats)))) 177 | 178 | 179 | ;; ### Initialization 180 | 181 | (defn- v0-subdir? 182 | "True if the given directory is a v0 block subdirectory." 183 | [^File subdir] 184 | (and (.isDirectory subdir) 185 | (= prefix-length (count (.getName subdir))) 186 | (re-matches #"[0-9a-f]+" (.getName subdir)))) 187 | 188 | 189 | (defn- migrate-v0! 190 | "Migrate an existing v0 layout to v1." 191 | [^File root] 192 | (let [blocks (blocks-dir root)] 193 | (.mkdirs blocks) 194 | (run! 195 | (fn move-block-dir 196 | [^File subdir] 197 | (when (v0-subdir? subdir) 198 | (.renameTo subdir (io/file blocks (.getName subdir))))) 199 | (.listFiles root)))) 200 | 201 | 202 | (defn- initialize-layout! 203 | "Initialize the block store layout by writing out metadata and pre-creating 204 | some directories. Returns the layout meta-properties." 205 | [store] 206 | (let [^File root (:root store)] 207 | (if (empty? (.listFiles root)) 208 | ;; Root doesn't exist or is empty, so initialize the storage layout. 209 | (write-meta-properties root) 210 | ;; Try loading store metadata. 211 | (let [properties (read-meta-properties root)] 212 | (if (nil? properties) 213 | ;; No meta-properties file; check for v0 layout. 214 | (do 215 | ;; Check for unknown file content in root. 216 | (when-not (every? v0-subdir? (.listFiles root)) 217 | (throw (ex-info 218 | (str "Detected unknown files in block store at " root) 219 | {:files (vec (.listFiles root))}))) 220 | ;; Possible v0 store. Abort unless configured to migrate. 221 | (when-not (:auto-migrate? store) 222 | (throw (ex-info 223 | (str "Detected v0 file block store layout at " root) 224 | {:root root}))) 225 | ;; Migrate to v1 layout. 226 | (log/warn "Automatically migrating file block store layout at" 227 | (.getPath root) "from v0 ->" layout-version) 228 | (migrate-v0! root) 229 | (write-meta-properties root)) 230 | ;; Check for known layout version. 231 | (let [version (:version properties)] 232 | (when (not= layout-version version) 233 | (throw (ex-info 234 | (str "Unknown storage layout version " (pr-str version) 235 | " does not match supported version " 236 | (pr-str layout-version)) 237 | {:supported layout-version 238 | :properties properties}))) 239 | ;; Layout matches the expected version. 240 | properties)))))) 241 | 242 | 243 | (defn- rm-r 244 | "Recursively removes a directory of files." 245 | [^File path] 246 | (when (.isDirectory path) 247 | (run! rm-r (.listFiles path))) 248 | (.delete path)) 249 | 250 | 251 | ;; ## File Store 252 | 253 | ;; Block content is stored as files in a multi-level hierarchy under the given 254 | ;; root directory. 255 | (defrecord FileBlockStore 256 | [^File root] 257 | 258 | component/Lifecycle 259 | 260 | (start 261 | [this] 262 | (let [properties (initialize-layout! this) 263 | version (:version properties)] 264 | ;; (log/debug "Using storage layout version" version) 265 | (assoc this :version version))) 266 | 267 | 268 | (stop 269 | [this] 270 | this) 271 | 272 | 273 | store/BlockStore 274 | 275 | (-list 276 | [_ opts] 277 | (let [out (s/stream 1000)] 278 | (store/future' 279 | (try 280 | (loop [files (block-files root (:after opts))] 281 | (when-let [file (first files)] 282 | (if-let [id (file->id root file)] 283 | ;; Check that the id is still before the marker, if set. 284 | (when (or (nil? (:before opts)) 285 | (pos? (compare (:before opts) (multihash/hex id)))) 286 | ;; Process next block. 287 | (when @(s/put! out (file->block id file)) 288 | (recur (next files)))) 289 | ;; Not a valid block file, skip. 290 | (recur (next files))))) 291 | (catch Exception ex 292 | (log/error ex "Failure listing file blocks") 293 | (s/put! out ex)) 294 | (finally 295 | (s/close! out)))) 296 | (s/source-only out))) 297 | 298 | 299 | (-stat 300 | [_ id] 301 | (store/future' 302 | (let [file (id->file root id)] 303 | (when (.exists file) 304 | (assoc (file-stats file) :id id))))) 305 | 306 | 307 | (-get 308 | [_ id] 309 | (store/future' 310 | (let [file (id->file root id)] 311 | (when (.exists file) 312 | (file->block id file))))) 313 | 314 | 315 | (-put! 316 | [_ block] 317 | (store/future' 318 | (let [id (:id block) 319 | file (id->file root id)] 320 | (when-not (.exists file) 321 | (let [tmp (landing-file root)] 322 | (with-open [content (data/content-stream block nil nil)] 323 | (io/copy content tmp)) 324 | (io/make-parents file) 325 | (.setWritable tmp false false) 326 | (.renameTo tmp file))) 327 | (file->block id file)))) 328 | 329 | 330 | (-delete! 331 | [_ id] 332 | (store/future' 333 | (let [file (id->file root id)] 334 | (if (.exists file) 335 | (do (.delete file) true) 336 | false)))) 337 | 338 | 339 | store/ErasableStore 340 | 341 | (-erase! 342 | [_] 343 | (store/future' 344 | (rm-r (landing-dir root)) 345 | (rm-r (blocks-dir root)) 346 | true))) 347 | 348 | 349 | ;; ## Constructors 350 | 351 | (store/privatize-constructors! FileBlockStore) 352 | 353 | 354 | (defn file-block-store 355 | "Creates a new local file-based block store." 356 | [root & {:as opts}] 357 | (map->FileBlockStore 358 | (assoc opts :root (io/file root)))) 359 | 360 | 361 | (defmethod store/initialize "file" 362 | [location] 363 | (let [uri (store/parse-uri location)] 364 | (file-block-store 365 | (if (:host uri) 366 | (io/file (:host uri) (subs (:path uri) 1)) 367 | (io/file (:path uri)))))) 368 | -------------------------------------------------------------------------------- /blocks-tests/src/blocks/store/tests.clj: -------------------------------------------------------------------------------- 1 | (ns ^:no-doc blocks.store.tests 2 | "Suite of generative behavioral tests to verify that a given block store 3 | implementation conforms to the spec." 4 | (:require 5 | [alphabase.bytes :refer [bytes= random-bytes]] 6 | [alphabase.hex :as hex] 7 | [blocks.core :as block] 8 | [clojure.java.io :as io] 9 | [clojure.test :refer [is]] 10 | [clojure.test.check.generators :as gen] 11 | [com.stuartsierra.component :as component] 12 | [multiformats.hash :as multihash] 13 | [puget.printer :as puget] 14 | [test.carly.core :as carly :refer [defop]]) 15 | (:import 16 | (blocks.data 17 | Block 18 | PersistentBytes) 19 | java.time.Instant 20 | multiformats.hash.Multihash)) 21 | 22 | 23 | ;; ## Block Utilities 24 | 25 | (defn random-block 26 | "Creates a new block with random content at most `max-size` bytes long." 27 | [max-size] 28 | (block/read! 29 | (random-bytes (inc (rand-int max-size))) 30 | (rand-nth (keys multihash/functions)))) 31 | 32 | 33 | (defn generate-blocks! 34 | "Generates some test blocks and returns a map of the ids to the blocks." 35 | [n max-size] 36 | (->> (repeatedly #(random-block max-size)) 37 | (take n) 38 | (map (juxt :id identity)) 39 | (into (sorted-map)))) 40 | 41 | 42 | (defn populate-blocks! 43 | "Generates random blocks and puts them into the given store. Returns a map 44 | of multihash ids to blocks." 45 | [store & {:keys [n max-size], :or {n 10, max-size 1024}}] 46 | (let [blocks (generate-blocks! n max-size)] 47 | @(block/put-batch! store (vals blocks)) 48 | blocks)) 49 | 50 | 51 | ;; ## Operation Generators 52 | 53 | ;; Appease clj-kondo 54 | (declare gen->ListBlocks 55 | gen->ScanStore 56 | gen->StatBlock 57 | gen->GetBlock 58 | gen->OpenBlock 59 | gen->OpenBlockRange 60 | gen->PutBlock 61 | gen->DeleteBlock 62 | gen->EraseStore) 63 | 64 | 65 | (defn- choose-id 66 | "Returns a generator which will select a block id from the model pool." 67 | [blocks] 68 | (gen/elements (keys blocks))) 69 | 70 | 71 | (defn- choose-block 72 | "Returns a generator which will select a block from the model pool." 73 | [blocks] 74 | (gen/elements (vals blocks))) 75 | 76 | 77 | (defn- gen-sub-seq 78 | "Generate subsequences of the entries in the given sequence, returning some of 79 | the elements in the same order as given." 80 | [xs] 81 | (gen/fmap 82 | (fn select 83 | [bools] 84 | (sequence (comp (filter first) (map second)) (map vector bools xs))) 85 | (gen/vector gen/boolean (count xs)))) 86 | 87 | 88 | (defn- gen-sub-map 89 | "Generate subsets of the entries in the given map." 90 | [m] 91 | (gen/fmap (partial into {}) (gen-sub-seq (seq m)))) 92 | 93 | 94 | (defop ListBlocks 95 | [query] 96 | 97 | (gen-args 98 | [blocks] 99 | [(gen/bind 100 | (gen/hash-map 101 | :algorithm (gen/elements (keys multihash/functions)) 102 | :after (gen/fmap hex/encode (gen/not-empty gen/bytes)) ; TODO: pick prefixes 103 | :limit (gen/large-integer* {:min 1, :max (inc (count blocks))})) 104 | gen-sub-map)]) 105 | 106 | (apply-op 107 | [this store] 108 | (doall (block/list-seq store query))) 109 | 110 | (check 111 | [this model result] 112 | (let [expected-ids (cond->> (keys model) 113 | (:algorithm query) 114 | (filter #(= (:algorithm query) (:algorithm %))) 115 | (:after query) 116 | (filter #(pos? (compare (multihash/hex %) (:after query)))) 117 | (:before query) 118 | (filter #(neg? (compare (multihash/hex %) (:before query)))) 119 | true 120 | (sort) 121 | (:limit query) 122 | (take (:limit query)))] 123 | (is (sequential? result)) 124 | (is (= (count expected-ids) (count result))) 125 | (doseq [[id result] (zipmap expected-ids result)] 126 | (if-let [block (get model id)] 127 | (do (is (instance? Block result)) 128 | (is (= (:id block) (:id result))) 129 | (is (= (:size block) (:size result))) 130 | (is (instance? Instant (:stored-at result)))) 131 | (is (nil? result))))))) 132 | 133 | 134 | (defop StatBlock 135 | [id] 136 | 137 | (gen-args 138 | [ctx] 139 | [(choose-id ctx)]) 140 | 141 | (apply-op 142 | [this store] 143 | @(block/stat store id)) 144 | 145 | (check 146 | [this model result] 147 | (if-let [block (get model id)] 148 | (do (is (map? result)) 149 | (is (= (:id block) (:id result))) 150 | (is (= (:size block) (:size result))) 151 | (is (instance? Instant (:stored-at result)))) 152 | (is (nil? result))))) 153 | 154 | 155 | (defop GetBlock 156 | [id] 157 | 158 | (gen-args 159 | [blocks] 160 | [(choose-id blocks)]) 161 | 162 | (apply-op 163 | [this store] 164 | @(block/get store id)) 165 | 166 | (check 167 | [this model result] 168 | (if-let [block (get model id)] 169 | (do (is (some? (:id result))) 170 | (is (integer? (:size result))) 171 | (is (= id (:id result))) 172 | (is (= (:size block) (:size result)))) 173 | (is (nil? result))))) 174 | 175 | 176 | (defop PutBlock 177 | [block] 178 | 179 | (gen-args 180 | [blocks] 181 | [(choose-block blocks)]) 182 | 183 | (apply-op 184 | [this store] 185 | @(block/put! store block)) 186 | 187 | (check 188 | [this model result] 189 | (is (= block result))) 190 | 191 | (update-model 192 | [this model] 193 | (assoc model (:id block) block))) 194 | 195 | 196 | (defop DeleteBlock 197 | [id] 198 | 199 | (gen-args 200 | [blocks] 201 | [(choose-id blocks)]) 202 | 203 | (apply-op 204 | [this store] 205 | @(block/delete! store id)) 206 | 207 | (check 208 | [this model result] 209 | (if (contains? model id) 210 | (is (true? result)) 211 | (is (false? result)))) 212 | 213 | (update-model 214 | [this model] 215 | (dissoc model id))) 216 | 217 | 218 | (defop EraseStore 219 | [] 220 | 221 | (apply-op 222 | [this store] 223 | @(block/erase! store)) 224 | 225 | (update-model 226 | [this model] 227 | (empty model))) 228 | 229 | 230 | (defop ScanStore 231 | [p] 232 | 233 | (gen-args 234 | [blocks] 235 | [(gen/elements [nil (fn scan-pred 236 | [stat] 237 | (< (:size stat) 256))])]) 238 | 239 | (apply-op 240 | [this store] 241 | @(block/scan store :filter p)) 242 | 243 | (check 244 | [this model result] 245 | (let [blocks (cond->> (vals model) p (filter p))] 246 | (is (= (count blocks) (:count result))) 247 | (is (= (reduce + (map :size blocks)) (:size result))) 248 | (is (map? (:sizes result))) 249 | (is (every? integer? (keys (:sizes result)))) 250 | (is (= (count blocks) (reduce + (vals (:sizes result)))))))) 251 | 252 | 253 | (defop OpenBlock 254 | [id] 255 | 256 | (gen-args 257 | [blocks] 258 | [(choose-id blocks)]) 259 | 260 | (apply-op 261 | [this store] 262 | (when-let [block @(block/get store id)] 263 | (let [baos (java.io.ByteArrayOutputStream.)] 264 | (with-open [content (block/open block)] 265 | (io/copy content baos)) 266 | (.toByteArray baos)))) 267 | 268 | (check 269 | [this model result] 270 | (if-let [block (get model id)] 271 | (is (bytes= (.toByteArray ^PersistentBytes (.content ^Block block)) result)) 272 | (is (nil? result))))) 273 | 274 | 275 | (defop OpenBlockRange 276 | [id start end] 277 | 278 | (gen-args 279 | [blocks] 280 | (gen/bind 281 | (choose-block blocks) 282 | (fn [block] 283 | (if (< 3 (:size block)) 284 | (gen/fmap 285 | (fn [positions] 286 | (let [[start end] (sort positions)] 287 | {:id (:id block) 288 | :start start 289 | :end end})) 290 | (gen/vector-distinct 291 | (gen/large-integer* {:min 0, :max (:size block)}) 292 | {:num-elements 2})) 293 | (gen/return 294 | {:id (:id block) 295 | :start 0 296 | :end (:size block)}))))) 297 | 298 | (apply-op 299 | [this store] 300 | (when-let [block @(block/get store id)] 301 | (let [baos (java.io.ByteArrayOutputStream.)] 302 | (with-open [content (block/open block {:start start, :end end})] 303 | (io/copy content baos)) 304 | (.toByteArray baos)))) 305 | 306 | (check 307 | [this model result] 308 | (if-let [block (get model id)] 309 | (let [baos (java.io.ByteArrayOutputStream.) 310 | length (- end start) 311 | subarray (byte-array length)] 312 | (block/write! block baos) 313 | (System/arraycopy (.toByteArray baos) start subarray 0 length) 314 | (is (bytes= subarray result))) 315 | (is (nil? result))))) 316 | 317 | 318 | (def ^:private basic-op-generators 319 | (juxt gen->ListBlocks 320 | gen->ScanStore 321 | gen->StatBlock 322 | gen->GetBlock 323 | gen->OpenBlock 324 | gen->OpenBlockRange 325 | gen->PutBlock 326 | gen->DeleteBlock)) 327 | 328 | 329 | (def ^:private erasable-op-generators 330 | (juxt gen->EraseStore)) 331 | 332 | 333 | (defn- join-generators 334 | [ks] 335 | (let [op-gens (keep {:basic basic-op-generators 336 | :erase erasable-op-generators} 337 | ks)] 338 | (fn [ctx] 339 | (into [] (mapcat #(% ctx)) op-gens)))) 340 | 341 | 342 | ;; ## Operation Testing 343 | 344 | (defn- start-store 345 | [constructor] 346 | (let [store (component/start (constructor))] 347 | (when-let [extant (seq (block/list-seq store))] 348 | (throw (IllegalStateException. 349 | (str "Cannot run integration test on " (pr-str store) 350 | " as it already contains blocks: " 351 | (pr-str extant))))) 352 | (is (zero? (:count @(block/scan store)))) 353 | store)) 354 | 355 | 356 | (defn- stop-store 357 | [store] 358 | (block/erase! store) 359 | (is (empty? (block/list-seq store)) 360 | "ends empty") 361 | (component/stop store)) 362 | 363 | 364 | (defn- gen-blocks-context 365 | [test-blocks] 366 | (let [default-ctx (conj {} (first test-blocks))] 367 | (gen/fmap 368 | (fn [ctx] (if (seq ctx) ctx default-ctx)) 369 | (gen-sub-map test-blocks)))) 370 | 371 | 372 | (def ^:private print-handlers 373 | {Instant (puget/tagged-handler 'inst str) 374 | Multihash (puget/tagged-handler 'multi/hash str) 375 | Block (puget/tagged-handler 'blocks/block (juxt :id :size :stored-at)) 376 | (class (byte-array 0)) (puget/tagged-handler 'data/bytes alphabase.hex/encode)}) 377 | 378 | 379 | (defn- type->print-handler 380 | [t] 381 | (or (print-handlers t) (puget/common-handlers t))) 382 | 383 | 384 | (defn check-store* 385 | "Uses generative tests to validate the behavior of a block store 386 | implementation. The first argument must be a no-arg constructor function which 387 | will produce a new block store for testing. The remaining options control the 388 | behavior of the tests: 389 | 390 | - `blocks` 391 | Generate this many random blocks to test the store with. 392 | - `max-size` 393 | Maximum block size to generate, in bytes. 394 | - `operations` 395 | Kinds of operations to test - vector of `:basic`, `:erase`. 396 | - `concurrency` 397 | Maximum number of threads of operations to generate. 398 | - `iterations` 399 | Number of generative tests to perform. 400 | - `repetitions` 401 | Number of times to repeat the test for concurrency checks. 402 | 403 | Returns the results of the generative tests." 404 | [constructor 405 | {:keys [blocks max-size operations concurrency iterations repetitions] 406 | :or {blocks 20 407 | max-size 1024 408 | operations [:basic] 409 | concurrency 4 410 | iterations (or (some-> (System/getenv "BLOCKS_STORE_TEST_ITERATIONS") 411 | (Integer/parseInt)) 412 | 100) 413 | repetitions (or (some-> (System/getenv "BLOCKS_STORE_TEST_REPETITIONS") 414 | (Integer/parseInt)) 415 | 10)}}] 416 | {:pre [(fn? constructor)]} 417 | (let [test-blocks (generate-blocks! blocks max-size)] 418 | (carly/check-system 419 | "integration testing" iterations 420 | (fn init-system [_] (start-store constructor)) 421 | (join-generators operations) 422 | :on-stop stop-store 423 | :context-gen (gen-blocks-context test-blocks) 424 | :concurrency concurrency 425 | :repetitions repetitions 426 | :report {:puget {:print-handlers type->print-handler}}))) 427 | 428 | 429 | (defn check-store 430 | "Uses generative tests to validate the behavior of a block store 431 | implementation. The first argument must be a no-arg constructor function 432 | which will produce a new block store for testing. 433 | 434 | See `check-store*` for a variant with more configurable options." 435 | [constructor] 436 | (check-store* 437 | constructor 438 | {:operations [:basic :erase] 439 | :concurrency 1 440 | :repetitions 1})) 441 | -------------------------------------------------------------------------------- /src/blocks/store.clj: -------------------------------------------------------------------------------- 1 | (ns ^:no-doc blocks.store 2 | "Block storage protocols. Typically, clients of the library should use the 3 | API wrapper functions in `blocks.core` instead of using these methods 4 | directly." 5 | (:require 6 | [blocks.data :as data] 7 | [clojure.string :as str] 8 | [manifold.deferred :as d] 9 | [manifold.executor :as mx] 10 | [manifold.stream :as s] 11 | [manifold.utils :as mu] 12 | [multiformats.hash :as multihash])) 13 | 14 | 15 | ;; ## Storage Protocols 16 | 17 | (defprotocol BlockStore 18 | "Protocol for content-addressable storage keyed by multihash identifiers." 19 | 20 | (-list 21 | [store opts] 22 | "List the blocks contained in the store. This method should return a stream 23 | of blocks ordered by multihash id. See `blocks.core/list` for the supported 24 | options. 25 | 26 | The method must return _at least_ the blocks which match the query options, 27 | and _should_ optimize the results by omitting unmatched blocks. The 28 | returned stream may be closed preemptively if the consumer is done, which 29 | should terminate the list thread. 30 | 31 | If the listing thread encounters an exception, the error should be placed 32 | on the stream and the stream should be closed to indicate no further blocks 33 | will be coming. Consumers must handle exceptions propagated on the stream.") 34 | 35 | (-stat 36 | [store id] 37 | "Load a block's metadata if the store contains it. Returns a deferred which 38 | yields a map with block information but no content, or nil if the store 39 | does not contain the identified block.") 40 | 41 | (-get 42 | [store id] 43 | "Fetch a block from the store. Returns a deferred which yields the block, 44 | or nil if not present.") 45 | 46 | (-put! 47 | [store block] 48 | "Persist a block into the store. Returns a deferred which yields the 49 | stored block, which may have already been present in the store.") 50 | 51 | (-delete! 52 | [store id] 53 | "Remove a block from the store. Returns a deferred which yields true if the 54 | block was stored, false if it was not.")) 55 | 56 | 57 | (defprotocol ErasableStore 58 | "An erasable store has some notion of being removed in its entirety, often 59 | atomically. For example, a file system might unlink the root directory rather 60 | than deleting each individual file." 61 | 62 | (-erase! 63 | [store] 64 | "Completely removes any data associated with the store. Returns a deferred 65 | value which yields when the store is erased.")) 66 | 67 | 68 | ;; ## Store Construction 69 | 70 | (defn parse-uri 71 | "Parse a URI string into a map of keywords to URI parts." 72 | [location] 73 | (let [uri (java.net.URI. location)] 74 | (->> 75 | {:scheme (.getScheme uri) 76 | :name (and (nil? (.getAuthority uri)) 77 | (nil? (.getPath uri)) 78 | (.getSchemeSpecificPart uri)) 79 | :user-info (when-let [info (.getUserInfo uri)] 80 | (zipmap [:id :secret] (str/split info #":" 2))) 81 | :host (.getHost uri) 82 | :port (when (not= (.getPort uri) -1) 83 | (.getPort uri)) 84 | :path (.getPath uri) 85 | :query (when-let [query (.getQuery uri)] 86 | (->> (str/split query #"&") 87 | (map #(let [[k v] (str/split % #"=")] 88 | [(keyword k) v])) 89 | (into {}))) 90 | :fragment (.getFragment uri)} 91 | (filter val) 92 | (into {})))) 93 | 94 | 95 | (defmulti initialize 96 | "Constructs a new block store from a URI by dispatching on the scheme. The 97 | store will be returned in an initialized but not started state." 98 | (comp :scheme parse-uri)) 99 | 100 | 101 | (defmethod initialize :default 102 | [uri] 103 | (throw (IllegalArgumentException. 104 | (str "Unsupported block-store URI scheme: " (pr-str uri))))) 105 | 106 | 107 | (defmacro privatize! 108 | "Alters the metadatata on the given var symbol to change the visibility to 109 | private." 110 | [var-sym] 111 | `(alter-meta! #'~var-sym assoc :private true)) 112 | 113 | 114 | (defmacro privatize-constructors! 115 | "Alters the metadata on the automatic record constructor functions to set 116 | their visibility to private." 117 | [record-name] 118 | `(do (privatize! ~(symbol (str "->" record-name))) 119 | (privatize! ~(symbol (str "map->" record-name))))) 120 | 121 | 122 | ;; ## Async Utilities 123 | 124 | (defn ^:no-doc schedule-future! 125 | "A helper for the `future` macro which wraps some submission logic in a 126 | common function." 127 | [d body-fn] 128 | (mu/future-with 129 | (mx/execute-pool) 130 | (when-not (d/realized? d) 131 | (try 132 | (d/success! d (body-fn)) 133 | (catch Throwable ex 134 | (d/error! d ex)))))) 135 | 136 | 137 | (defmacro future' 138 | "Alternative to `d/future` that has better coverage." 139 | [& body] 140 | `(let [d# (d/deferred)] 141 | (schedule-future! d# (fn future# [] ~@body)) 142 | d#)) 143 | 144 | 145 | (defn zip-stores 146 | "Apply a function to each of the given block stores in parallel. Returns a 147 | deferred which yields the vector of results." 148 | [stores f & args] 149 | (apply d/zip (map #(apply f % args) stores))) 150 | 151 | 152 | (defn some-store 153 | "Apply a function to each of the given block stores in order until one 154 | returns a non-nil result. Returns a deferred which yields the result, or nil 155 | if all stores returned nil." 156 | [stores f & args] 157 | (d/loop [stores stores] 158 | (when-let [store (first stores)] 159 | (d/chain 160 | (apply f store args) 161 | (fn check-result 162 | [result] 163 | (if (nil? result) 164 | (d/recur (rest stores)) 165 | result)))))) 166 | 167 | 168 | ;; ## Stream Utilities 169 | 170 | (defn preferred-block 171 | "Choose among multiple blocks to determine the optimal one to use for 172 | copying into a new store. Returns the first loaded block, if any are 173 | keeping in-memory content. If none are, returns the first block." 174 | [& blocks] 175 | (when-let [blocks (seq (remove nil? blocks))] 176 | (or (first (filter data/byte-content? blocks)) 177 | (first blocks)))) 178 | 179 | 180 | (defn select-blocks 181 | "Select blocks from a stream based on the criteria spported in `-list`. 182 | Returns a filtered view of the block streams that will close the source once 183 | the relevant blocks have been read." 184 | [opts blocks] 185 | (let [{:keys [algorithm after before limit]} opts 186 | counter (atom 0) 187 | out (s/stream)] 188 | (s/connect-via 189 | blocks 190 | (fn test-block 191 | [block] 192 | (if (instance? Throwable block) 193 | ;; Propagate error on the stream. 194 | (do (s/put! out block) 195 | (s/close! out) 196 | (d/success-deferred false)) 197 | ;; Determine if block matches query criteria. 198 | (let [id (:id block) 199 | hex (multihash/hex id)] 200 | (cond 201 | ;; Ignore any blocks which don't match the algorithm. 202 | (and algorithm (not= algorithm (:algorithm id))) 203 | (d/success-deferred true) 204 | 205 | ;; Drop blocks until an id later than `after`. 206 | (and after (not (neg? (compare after hex)))) 207 | (d/success-deferred true) 208 | 209 | ;; Terminate the stream if block is later than `before` or `limit` 210 | ;; blocks have already been returned. 211 | (or (and before (not (pos? (compare before hex)))) 212 | (and (pos-int? limit) (< limit (swap! counter inc)))) 213 | (do (s/close! out) 214 | (d/success-deferred false)) 215 | 216 | ;; Otherwise, pass the block along. 217 | :else 218 | (s/put! out block))))) 219 | out 220 | {:description {:op "select-blocks"}}) 221 | (s/source-only out))) 222 | 223 | 224 | (defn merge-blocks 225 | "Merge multiple streams of blocks and return a stream with one block per 226 | unique id, maintaining sorted order. The input streams are consumed 227 | incrementally and must already be sorted." 228 | [& streams] 229 | (if (= 1 (count streams)) 230 | (first streams) 231 | (let [intermediates (mapv 232 | (fn hook-up 233 | [a] 234 | (let [b (s/stream)] 235 | (s/connect-via 236 | a #(s/put! b %) b 237 | {:description {:op "merge-blocks"}}) 238 | b)) 239 | streams) 240 | out (s/stream)] 241 | (d/loop [inputs (map vector intermediates (repeat nil))] 242 | (d/chain 243 | ;; Take the head value from each stream we don't already have. 244 | (->> 245 | inputs 246 | (map (fn take-next 247 | [[input head :as pair]] 248 | (if (nil? head) 249 | (d/chain 250 | (s/take! input ::drained) 251 | (partial vector input)) 252 | pair))) 253 | (apply d/zip)) 254 | ;; Remove drained streams from consideration. 255 | (fn remove-drained 256 | [inputs] 257 | (remove #(identical? ::drained (second %)) inputs)) 258 | ;; Find the next earliest block to return. 259 | (fn find-next 260 | [inputs] 261 | (if (empty? inputs) 262 | ;; Every input is drained. 263 | (s/close! out) 264 | ;; Check inputs for errors. 265 | (if-let [error (->> (map second inputs) 266 | (filter #(instance? Throwable %)) 267 | (first))] 268 | ;; Propagate error. 269 | (d/finally 270 | (s/put! out error) 271 | #(s/close! out)) 272 | ;; Determine the next block to output. 273 | (let [earliest (first (sort-by :id (map second inputs)))] 274 | (d/chain 275 | (s/put! out earliest) 276 | (fn check-put 277 | [result] 278 | (if result 279 | ;; Remove any blocks matching the one emitted. 280 | (d/recur (mapv (fn remove-earliest 281 | [[input head :as pair]] 282 | (if (= (:id earliest) (:id head)) 283 | [input nil] 284 | pair)) 285 | inputs)) 286 | ;; Out was closed on us. 287 | false))))))))) 288 | (s/source-only out)))) 289 | 290 | 291 | (defn missing-blocks 292 | "Compare two block streams and generate a derived stream of the blocks in 293 | `source` which are not present in `dest`." 294 | [source dest] 295 | (let [src (s/stream) 296 | dst (s/stream) 297 | out (s/stream) 298 | close-all! (fn close-all! 299 | [] 300 | (s/close! src) 301 | (s/close! dst) 302 | (s/close! out))] 303 | (s/connect-via 304 | source #(s/put! src %) src 305 | {:description {:op "missing-blocks"}}) 306 | (s/connect-via 307 | dest #(s/put! dst %) dst 308 | {:description {:op "missing-blocks"}}) 309 | (d/loop [s nil 310 | d nil] 311 | (d/chain 312 | (d/zip 313 | (if (nil? s) 314 | (s/take! src ::drained) 315 | s) 316 | (if (nil? d) 317 | (s/take! dst ::drained) 318 | d)) 319 | (fn compare-next 320 | [[s d]] 321 | (cond 322 | ;; Source stream exhausted; terminate sequence. 323 | (identical? ::drained s) 324 | (close-all!) 325 | 326 | ;; Destination stream exhausted; return remaining blocks in source. 327 | (identical? ::drained d) 328 | (-> (s/put! out s) 329 | (d/chain 330 | (fn [_] (s/drain-into src out))) 331 | (d/finally close-all!)) 332 | 333 | ;; Source threw an error; propagate it. 334 | (instance? Throwable s) 335 | (d/finally 336 | (s/put! out s) 337 | close-all!) 338 | 339 | ;; Dest threw an error; propagate it. 340 | (instance? Throwable d) 341 | (d/finally 342 | (s/put! out d) 343 | close-all!) 344 | 345 | ;; Block is present in both streams; drop and continue. 346 | (= (:id s) (:id d)) 347 | (d/recur nil nil) 348 | 349 | ;; Source has a block not in dest. 350 | (neg? (compare (:id s) (:id d))) 351 | (d/chain 352 | (s/put! out s) 353 | (fn onwards 354 | [result] 355 | (when result 356 | (d/recur nil d)))) 357 | 358 | ;; Next source block comes after some dest blocks; skip forward. 359 | :else 360 | (d/recur s nil))))) 361 | (s/source-only out))) 362 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | Change Log 2 | ========== 3 | 4 | All notable changes to this project will be documented in this file, which 5 | follows the conventions of [keepachangelog.com](http://keepachangelog.com/). 6 | This project adheres to [Semantic Versioning](http://semver.org/). 7 | 8 | ## [Unreleased] 9 | 10 | ... 11 | 12 | 13 | ## [2.1.0] - 2024-03-22 14 | 15 | ### Changed 16 | - Drop `byte-streams` dependency to avoid reflection issues which interfere 17 | with Graal native-image compilation. 18 | - Update most dependencies to latest versions. 19 | - Update style/lint/CI tooling. 20 | 21 | 22 | ## [2.0.4] - 2020-04-20 23 | 24 | ### Changed 25 | - Upgraded dependencies. 26 | - Made `blocks.data/bounded-input-stream` public as a utility for implementing 27 | block stores. 28 | 29 | 30 | ## [2.0.3] - 2020-01-05 31 | 32 | ### Changed 33 | - Upgraded dependencies for better compatibility with JDK11. 34 | 35 | 36 | ## [2.0.2] - 2019-07-31 37 | 38 | ### Changed 39 | - Upgrade to Clojure 1.10.1 40 | 41 | ### Fixed 42 | - File block stores use the platform file separator, so should be compatible 43 | with Windows and other non-linux systems. 44 | [#21](//github.com/greglook/blocks/pull/21) 45 | 46 | 47 | ## [2.0.1] - 2019-03-11 48 | 49 | ### Changed 50 | - Various docstring updates after reviewing generated docs. 51 | - Upgrade multiformats to 0.2.0 for the `multiformats.hash/parse` method. 52 | - Renamed the `blocks-test` project to `blocks-tests` to better reflect the 53 | test namespace. 54 | 55 | 56 | ## [2.0.0] - 2019-03-05 57 | 58 | **This is a major release with a rewritten storage interface.** Much of the 59 | library's API has changed slightly - the core concepts are the same, but now 60 | storage interactions are represented as asynchronous processes using 61 | [manifold](//github.com/ztellman/manifold). Please read the notes here 62 | carefully! 63 | 64 | ### Block Changes 65 | 66 | The first major change is that block values are no longer open maps - they 67 | behave more like opaque types now. Mixing in extra attributes led to some 68 | confusing usage in practice; one example was the way extra attributes affected 69 | equality, meaning you could not test blocks directly for content matches. 70 | Blocks do still support metadata, so any additional information can still be 71 | associated that way. 72 | 73 | Second, this version upgrades from the `mvxcvi/multihash` library to the unified 74 | `mvxcvi/multiformats` code. This changes the type of a block `:id` from 75 | `multihash.core.Multihash` to `multiformats.hash.Multihash`, but otherwise the 76 | identifiers have the same semantics and should behave the same way they did 77 | before. 78 | 79 | Blocks also have a new first-class attribute `:stored-at` which is a 80 | `java.time.Instant` reflecting the time they were persisted. This does not 81 | affect block equality or hashing, but is generally useful for auditing. It 82 | _does_ impact sorting, so that earlier copies of the same block sort before 83 | older ones. 84 | 85 | Finally, block content is no longer represented by separate `reader` and 86 | `content` fields on the block. Now the `content` field contains an 87 | implementation of the new `blocks.data/ContentReader` protocol. This is 88 | implemented for the current `PersistentBytes` values and the "reader function" 89 | approach for lazy blocks. The protocol allows block stores to provide an 90 | efficient mechanism for reading a sub-range of the block content, and will be 91 | useful for any future customizations. 92 | 93 | ### Storage API Changes 94 | 95 | The major change in this version of the library is that all block store methods 96 | are now _asynchronous_. The `stat`, `get`, `store!`, `delete!`, `get-batch!`, 97 | `put-batch!`, `delete-batch!`, `scan`, `erase!`, and `sync` functions now return 98 | manifold deferred values instead of blocking. 99 | 100 | Similarly, the `list` store method now returns a manifold stream instead of a 101 | lazy sequence. An asynchronous process places _blocks_ on this stream for 102 | consumption - previously, this was simple stat metadata. If an error occurs, the 103 | store should place the exception on the stream and close it. Existing consumers 104 | can use the `list-seq` wrapper, which returns a lazy sequence consuming from 105 | this stream and behaves similarly to the old list method. 106 | The `list` and `list-seq` query parameters now also accept a `:before` hex 107 | string (in addition to the current `:after`) to halt enumeration at a certain 108 | point. 109 | 110 | The `blocks.store/BatchStore` protocol has been removed. It was never used in 111 | practice and few backends could ensure atomicity. Instead, the batch methods are 112 | now wrappers around asynchronous behavior over the normal store methods. 113 | 114 | ### Store Metrics 115 | 116 | The library now includes a `blocks.meter` namespace which provides a common 117 | framework for instrumenting block stores and collecting metrics. Users can opt 118 | into metrics collection by setting a `:blocks.meter/recorder` function on each 119 | store they want to instrument. This function will be called with the store 120 | record and metric events, which include method elapsed times, traffic into and 121 | out of the store, and counts of blocks enumerated by list streams. 122 | 123 | ### Block Store Changes 124 | 125 | In addition to the notes for each store below, note that external block store 126 | libraries like [blocks-s3](//github.com/greglook/blocks-s3) will not be 127 | compatible with this version of the library until they upgrade! 128 | 129 | #### BufferBlockStore 130 | 131 | - The backing store field is now `primary` instead of `store`. 132 | - The store checks on component startup that both `primary` and `buffer` stores 133 | are present. 134 | - `clear!` and `flush!` now return deferred values. 135 | - The store supports an arbitrary `predicate` function which can return `false` 136 | to indicate that a block should be stored directly in the primary store 137 | instead of being buffered. 138 | 139 | #### CachingBlockStore 140 | 141 | - On initial state scan, blocks in the cache are prioritized by their 142 | `:stored-at` attributes so that younger blocks are preferred. 143 | - `reap!` now returns a deferred value. 144 | - Instead of the `max-block-size` field, the caching store now supports an 145 | arbitrary `predicate` function, which can return `false` to indicate that a 146 | block should not be cached. 147 | 148 | #### ReplicaBlockStore 149 | 150 | - The sequence of replica keys is now `replicas` instead of `store-keys`. 151 | 152 | #### FileBlockStore 153 | 154 | The file block store has seen the most significant changes. Previously, blocks 155 | were stored in subdirectories under the store root, like 156 | `$ROOT/1220abcd/0123...`, with no additional metadata. Now, file stores maintain 157 | a more sophisticated structure under the root. Block directories are now in 158 | `$ROOT/blocks/`, and a `$ROOT/meta.properties` file contains versioning 159 | information to make future extensibility possible. When the store starts, it 160 | will try to detect a v0 layout; if `:auto-migrate?` is truthy on the store, it 161 | will upgrade it to v1, otherwise it will throw an exception. 162 | 163 | Another change is that blocks are now written to a temporary file in 164 | `$ROOT/landing/` before being atomically renamed to their final location. This 165 | keeps other clients from seeing partially-written blocks that are still being 166 | stored - something that would have been difficult with the prior layout. 167 | 168 | ### Other 169 | 170 | A few other things changed or were added: 171 | 172 | - Added predicate `blocks.core/loaded?` which is the complement of `lazy?`. 173 | - `blocks.core/open` now accepts a map as a second argument instead of a 174 | three-arity `(open block start end)`. Instead this would now be 175 | `(open block {:start start, :end end})` 176 | - `blocks.core/validate!` now returns `true` instead of `nil` on success. 177 | - The `blocks.summary` aggregates no longer contain bloom filters; these didn't 178 | seem to be used in practice, and clients which want that behavior can 179 | reimplement it without much difficulty. 180 | - The behavior tests in `blocks.store.tests` have moved to a separate subproject 181 | `mvxcvi/blocks-test` to simplify usage by store implementations. 182 | - Storage tests no longer test the batch methods, since they are no longer 183 | unique to store types. 184 | 185 | 186 | ## [1.1.0] - 2017-12-24 187 | 188 | This release upgrades the library to Clojure 1.9.0. 189 | 190 | 191 | ## [1.0.0] - 2017-11-05 192 | 193 | Finally seems like time for a 1.0 release. One very minor breaking change. 194 | 195 | ### Added 196 | - New predicate `blocks.core/lazy?`. 197 | 198 | ### Changed 199 | - *BREAKING:* the `:stored-at` metadata on blocks is now returned as a 200 | `java.time.Instant` instead of a `java.util.Date`. 201 | - `MemoryBlockStore` uses a ref internally instead of an atom. 202 | - Minor dependency version upgrades. 203 | - Generative block store tests are now based on `test.carly`. 204 | 205 | ### Removed 206 | - *BREAKING:* Blocks no longer implement `IDeref` as a way to get their internal 207 | content. 208 | 209 | 210 | ## [0.9.1] - 2017-05-17 211 | 212 | ### Added 213 | - `PersistentBytes` has a `toByteArray` method to return a copy of the byte data 214 | as a raw array. 215 | 216 | 217 | ## [0.9.0] - 2017-03-31 218 | 219 | This release has a couple of breaking changes, detailed below. 220 | 221 | ### Added 222 | - `PersistentBytes` values support comparison using lexical sorting rules. 223 | - `blocks.core/->store` initializer function to create block stores from URI 224 | configuration strings. 225 | - `blocks.core/scan` function to produce a summary of the blocks contained in 226 | the store. 227 | - Summary data functions which provide a count, total size, size histogram, and 228 | bloom filter for block id membership. 229 | - `blocks.core/sync!` function to copy blocks between stores. 230 | - `ErasableStore` protocol for block stores which support efficient or atomic 231 | data removal. There's a matching `blocks.core/erase!!` function using it, 232 | which falls back to deleting the blocks in the store individually. 233 | - Buffer store supports a maximum block size limit. Storing blocks larger than 234 | the limit will write directly to the backing store, skipping the buffer. 235 | 236 | ### Changed 237 | - `blocks.store.util` namespace merged into `blocks.store`. This mainly impacts 238 | store implementers. 239 | - Replica store construction changed to make them better components. They now 240 | take a vector of keys, rather than stores. 241 | 242 | ### Removed 243 | - Dropped `EnumerableStore` protocol and `enumerate` method. No usages have 244 | come up requiring it and it's easy to replace in the non-optimized case. 245 | 246 | 247 | ## [0.8.0] - 2016-08-14 248 | 249 | ### Changed 250 | - All block stores were renamed to consistently end with `BlockStore`. 251 | - All block store constructors are similarly renamed, e.g. `file-block-store`. 252 | - Store constructors all follow a component pattern with variadic options. 253 | - Blocks no longer implement `IPending`, because it is not appropriate to treat 254 | immutable values as asynchronous references. 255 | 256 | ### Added 257 | - Multimethod `blocks.store/initialize` for constructing block stores from a 258 | URI string. The method is dispatched by URI scheme. 259 | 260 | ### Removed 261 | - Problematic namespace `blocks.data.conversions`, which defined conversion 262 | paths for the `byte-streams` library. 263 | 264 | 265 | ## [0.7.1] - 2016-07-25 266 | 267 | ### Fixed 268 | - Small number of reflection warnings in `blocks.data/clean-block`. 269 | - Improved generative store tests. 270 | 271 | ## [0.7.0] - 2016-04-27 272 | 273 | ### Changed 274 | - Upgrade `mvxcvi/multihash` to 2.0.0. 275 | - Small efficiency improvements to block construction. 276 | - Memory stores strip metadata and extra attributes from blocks `put!` in them. 277 | - Integration tests in `blocks.store.tests` now build generative sequences of 278 | operations and apply them to the store under test. 279 | 280 | ### Fixed 281 | - Reading an empty content source returns `nil` instead of an empty block. 282 | - Check that the argument to `block/put!` is actually a block. 283 | - Handle block merging in `block/put!` instead of requiring stores to do it. 284 | - File stores correctly return `false` when deleting a block which is not 285 | contained in the store. 286 | 287 | 288 | ## [0.6.1] - 2016-01-25 289 | 290 | ### Added 291 | - Expand `PersistentBytes` equality to include primitive byte arrays and 292 | `ByteBuffer` objects which have identical content. 293 | 294 | ### Fixed 295 | - `block/store!` will no longer try to store empty files. 296 | 297 | 298 | ## [0.6.0] - 2016-01-10 299 | 300 | ### Added 301 | - Add logical 'replica' and 'buffer' stores. 302 | [#2](//github.com/greglook/blocks/issues/2) 303 | - Add a second arity to `block/open` to read a sub-range of the content in a 304 | block by specifying starting and ending bytes. 305 | [#3](//github.com/greglook/blocks/issues/3) 306 | - Add protocol for batch block operations. 307 | [#5](//github.com/greglook/blocks/issues/5) 308 | - Add protocol for efficient block enumeration. 309 | [#8](//github.com/greglook/blocks/issues/8) 310 | 311 | ### Changed 312 | - Remove extra 'Block' from many store record names, for example 313 | `FileBlockStore` to `FileStore`. 314 | - Change file store to match IPFS file repo behavior by restricting it to a 315 | single intermediate directory level. 316 | - Move block store protocols to `blocks.store` namespace, with wrappers in 317 | `blocks.core`. 318 | 319 | ### Fixed 320 | - `validate!` now checks the size of lazy blocks by using a counting input 321 | stream wrapper. 322 | 323 | 324 | ## [0.5.0] - 2015-11-14 325 | 326 | ### Added 327 | - `blocks.store.cache` namespace with logical caching block store 328 | implementation. 329 | 330 | ### Changed 331 | - `random-bytes` and `random-hex` now generate fixed-width data. 332 | 333 | 334 | ## [0.4.2] - 2015-11-13 335 | 336 | ### Changed 337 | - File store now locks itself during `put!`, `delete!`, and `erase!` to 338 | prevent concurrent modifications. 339 | - `select-stats` moved from core to util namespace. 340 | 341 | ### Fixed 342 | - File store skips over malformed files instead of throwing an exception. 343 | 344 | 345 | ## [0.4.1] - 2015-11-12 346 | 347 | ### Changed 348 | - Rename `:origin` block stat to `:source`. 349 | - Switch argument order in `read-block` for consistency. 350 | 351 | ### Fixed 352 | - `put!` retains extra attributes and metadata on the block argument in the 353 | returned block. 354 | - Expanded integration test suite to cover `stat` and `get` on non-existent 355 | blocks and `put!` merging. 356 | 357 | 358 | ## [0.4.0] - 2015-11-10 359 | 360 | Lots of high-level library changes! `blocks.data.Block` is now a custom type to 361 | protect immutable fields like `:id` and `:size` and support the `IPending` 362 | interface. 363 | 364 | ### Added 365 | - Blocks can be either _literal_ or _lazy_ to support larger block sizes. 366 | - A standard set of BlockStore integration tests are available in the 367 | `blocks.store.tests` namespace. 368 | 369 | ### Changed 370 | - `BlockStore` methods `enumerate` and `get*` changed to `-list` and `-get`, 371 | respectively. 372 | - `list` now returns a sequence of block stats, rather than just multihashes. 373 | - Blocks returned by `get` and `put!` add stat information as metadata. 374 | - File stores now keep blocks in a manner compatible with IPFS. 375 | 376 | 377 | ## 0.3.0 - 2015-11-03 378 | 379 | Initial project release. 380 | 381 | 382 | [Unreleased]: https://github.com/greglook/blocks/compare/2.1.0...HEAD 383 | [2.1.0]: https://github.com/greglook/blocks/compare/2.0.4...2.1.0 384 | [2.0.4]: https://github.com/greglook/blocks/compare/2.0.3...2.0.4 385 | [2.0.3]: https://github.com/greglook/blocks/compare/2.0.2...2.0.3 386 | [2.0.2]: https://github.com/greglook/blocks/compare/2.0.1...2.0.2 387 | [2.0.1]: https://github.com/greglook/blocks/compare/2.0.0...2.0.1 388 | [2.0.0]: https://github.com/greglook/blocks/compare/1.1.0...2.0.0 389 | [1.1.0]: https://github.com/greglook/blocks/compare/1.0.0...1.1.0 390 | [1.0.0]: https://github.com/greglook/blocks/compare/0.9.1...1.0.0 391 | [0.9.1]: https://github.com/greglook/blocks/compare/0.9.0...0.9.1 392 | [0.9.0]: https://github.com/greglook/blocks/compare/0.8.0...0.9.0 393 | [0.8.0]: https://github.com/greglook/blocks/compare/0.7.1...0.8.0 394 | [0.7.1]: https://github.com/greglook/blocks/compare/0.7.0...0.7.1 395 | [0.7.0]: https://github.com/greglook/blocks/compare/0.6.1...0.7.0 396 | [0.6.1]: https://github.com/greglook/blocks/compare/0.6.0...0.6.1 397 | [0.6.0]: https://github.com/greglook/blocks/compare/0.5.0...0.6.0 398 | [0.5.0]: https://github.com/greglook/blocks/compare/0.4.2...0.5.0 399 | [0.4.2]: https://github.com/greglook/blocks/compare/0.4.1...0.4.2 400 | [0.4.1]: https://github.com/greglook/blocks/compare/0.4.0...0.4.1 401 | [0.4.0]: https://github.com/greglook/blocks/compare/0.3.0...0.4.0 402 | -------------------------------------------------------------------------------- /test/blocks/core_test.clj: -------------------------------------------------------------------------------- 1 | (ns blocks.core-test 2 | (:require 3 | [blocks.core :as block] 4 | [blocks.data :as data] 5 | [blocks.store :as store] 6 | [blocks.test-utils :refer [quiet-exception]] 7 | [clojure.java.io :as io] 8 | [clojure.test :refer [deftest testing is]] 9 | [manifold.deferred :as d] 10 | [manifold.stream :as s] 11 | [multiformats.hash :as multihash]) 12 | (:import 13 | blocks.data.Block 14 | (java.io 15 | ByteArrayOutputStream 16 | File))) 17 | 18 | 19 | ;; ## IO Tests 20 | 21 | (deftest block-io 22 | (testing "from-file" 23 | (is (thrown? IllegalStateException 24 | (dosync (block/from-file "README.md")))) 25 | (is (nil? (block/from-file "not/a-real/file.txt"))) 26 | (let [tmp-dir (doto (io/file "target" "test" "tmp") 27 | (.mkdirs)) 28 | tmp (doto (File/createTempFile "input" ".tmp" tmp-dir) 29 | (.deleteOnExit))] 30 | (is (nil? (block/from-file tmp)) 31 | "empty file should return nil")) 32 | (let [block (block/from-file "README.md")] 33 | (is (pos? (:size block))) 34 | (is (block/lazy? block)))) 35 | (testing "reading" 36 | (is (thrown? IllegalStateException 37 | (dosync (block/read! "foo bar baz")))) 38 | (is (nil? (block/read! (byte-array 0))) 39 | "empty content reads into nil block")) 40 | (testing "writing" 41 | (let [block (block/read! "frobblenitz") 42 | baos (ByteArrayOutputStream.)] 43 | (is (thrown? IllegalStateException 44 | (dosync (block/write! block baos)))) 45 | (block/write! block baos) 46 | (is (= "frobblenitz" (String. (.toByteArray baos)))))) 47 | (testing "loading" 48 | (let [lazy-readme (block/from-file "README.md") 49 | loaded-readme (block/load! lazy-readme)] 50 | (is (thrown? IllegalStateException 51 | (dosync (block/load! lazy-readme)))) 52 | (is (block/loaded? loaded-readme) 53 | "load returns loaded block for lazy block") 54 | (is (identical? loaded-readme (block/load! loaded-readme)) 55 | "load returns loaded block unchanged") 56 | (is (= (slurp (block/open loaded-readme)) 57 | (slurp (block/open lazy-readme))) 58 | "loaded block content should match lazy block")))) 59 | 60 | 61 | (deftest block-opening 62 | (testing "ranged open validation" 63 | (let [block (block/read! "abcdefg")] 64 | (is (thrown? IllegalArgumentException (block/open block {:start -1, :end 4}))) 65 | (is (thrown? IllegalArgumentException (block/open block {:start 0, :end -1}))) 66 | (is (thrown? IllegalArgumentException (block/open block {:start 3, :end 1}))) 67 | (is (thrown? IllegalArgumentException (block/open block {:start 0, :end 10}))))) 68 | (testing "loaded block" 69 | (let [block (block/read! "the old dog jumped")] 70 | (is (= "the old dog jumped" (slurp (block/open block)))) 71 | (is (= "the old" (slurp (block/open block {:end 7})))) 72 | (is (= "old dog" (slurp (block/open block {:start 4, :end 11})))) 73 | (is (= "jumped" (slurp (block/open block {:start 12})))))) 74 | (testing "lazy block" 75 | (let [block (block/from-file "README.md") 76 | readme (slurp (block/open block))] 77 | (is (true? (block/lazy? block)) "file blocks should be lazy") 78 | (is (string? readme)) 79 | (is (= (subs readme 10 20) (slurp (block/open block {:start 10, :end 20}))))))) 80 | 81 | 82 | (deftest block-validation 83 | (let [base (block/read! "foo bar baz") 84 | fix (fn [b k v] 85 | (Block. (if (= k :id) v (:id b)) 86 | (if (= k :size) v (:size b)) 87 | (:stored-at b) 88 | (if (= k :content) v (.content b)) 89 | nil))] 90 | (testing "non-multihash id" 91 | (is (thrown-with-msg? Exception #"id is not a multihash" 92 | (block/validate! (fix base :id "foo"))))) 93 | (testing "negative size" 94 | (is (thrown-with-msg? Exception #"has an invalid size" 95 | (block/validate! (fix base :size -1))))) 96 | (testing "invalid size" 97 | (is (thrown-with-msg? Exception #"reports size 123 but has actual size 11" 98 | (block/validate! (fix base :size 123))))) 99 | (testing "incorrect identifier" 100 | (is (thrown-with-msg? Exception #"has mismatched id and content" 101 | (block/validate! (fix base :id (multihash/sha1 "qux")))))) 102 | (testing "valid block" 103 | (is (true? (block/validate! base)))))) 104 | 105 | 106 | ;; ## Storage API 107 | 108 | (deftest store-construction 109 | (is (satisfies? store/BlockStore (block/->store "mem:-"))) 110 | (is (thrown? Exception (block/->store "foo://x?z=1")))) 111 | 112 | 113 | (deftest list-wrapper 114 | (let [a (multihash/create :sha1 "37b51d194a7513e45b56f6524f2d51f200000000") 115 | b (multihash/create :sha1 "73fcffa4b7f6bb68e44cf984c85f6e888843d7f9") 116 | c (multihash/create :sha1 "acbd18db4cc2f856211de9ecedef654fccc4a4d8") 117 | d (multihash/create :sha2-256 "285c3c23d662b5ef7172373df0963ff4ce003206") 118 | store (reify store/BlockStore 119 | 120 | (-list 121 | [_ _] 122 | (s/->source [{:id a} {:id b} {:id c} {:id d}])))] 123 | (testing "io check" 124 | (is (thrown? IllegalStateException 125 | (dosync (block/list store))))) 126 | (testing "option validation" 127 | (is (thrown-with-msg? 128 | IllegalArgumentException #":foo" 129 | (block/list store :foo "bar"))) 130 | (is (thrown-with-msg? 131 | IllegalArgumentException #":algorithm .+ keyword.+ \"foo\"" 132 | (block/list store :algorithm "foo"))) 133 | (is (thrown-with-msg? 134 | IllegalArgumentException #":after .+ hex string.+ 123" 135 | (block/list store :after 123))) 136 | (is (thrown-with-msg? 137 | IllegalArgumentException #":after .+ hex string.+ \"123abx\"" 138 | (block/list store :after "123abx"))) 139 | (is (thrown-with-msg? 140 | IllegalArgumentException #":before .+ hex string.+ 123" 141 | (block/list store :before 123))) 142 | (is (thrown-with-msg? 143 | IllegalArgumentException #":before .+ hex string.+ \"123abx\"" 144 | (block/list store :before "123abx"))) 145 | (is (thrown-with-msg? 146 | IllegalArgumentException #":limit .+ positive integer.+ :xyz" 147 | (block/list store :limit :xyz))) 148 | (is (thrown-with-msg? 149 | IllegalArgumentException #":limit .+ positive integer.+ 0" 150 | (block/list store :limit 0)))) 151 | (testing "filtered behavior" 152 | (is (= [{:id b} {:id c}] 153 | (s/stream->seq 154 | (block/list store {:algorithm :sha1 155 | :after "111450" 156 | :before "1300" 157 | :limit 10})))) 158 | (is (= [{:id b}] 159 | (s/stream->seq 160 | (block/list store {:after a, :before c})))) 161 | (is (= [{:id c}] 162 | (s/stream->seq 163 | (block/list store {:after b, :limit 1}))))) 164 | (testing "seq wrapper" 165 | (is (thrown-with-msg? 166 | IllegalArgumentException #":timeout is not a positive integer" 167 | (block/list-seq store :timeout 0))) 168 | (is (= [{:id a} {:id b} {:id c} {:id d}] 169 | (block/list-seq store))) 170 | (is (thrown? RuntimeException 171 | (doall 172 | (block/list-seq 173 | (reify store/BlockStore 174 | 175 | (-list 176 | [_ _] 177 | (s/->source [{:id a} (quiet-exception)])))))) 178 | "rethrows stream exceptions") 179 | (is (thrown-with-msg? Exception #"stream consumption timed out" 180 | (doall 181 | (block/list-seq 182 | (reify store/BlockStore 183 | 184 | (-list 185 | [_ _] 186 | (s/stream))) 187 | :timeout 10))))))) 188 | 189 | 190 | (deftest stat-wrapper 191 | (testing "io check" 192 | (is (thrown? IllegalStateException 193 | (dosync (block/stat {} (multihash/sha1 "foo")))))) 194 | (testing "non-multihash id" 195 | (is (thrown? IllegalArgumentException 196 | (block/stat {} "foo")))) 197 | (testing "normal operation" 198 | (let [id (multihash/sha1 "foo") 199 | now (java.time.Instant/now)] 200 | (is (= {:id id, :size 123, :stored-at now} 201 | @(block/stat 202 | (reify store/BlockStore 203 | 204 | (-stat 205 | [_ id] 206 | (d/success-deferred 207 | {:id id, :size 123, :stored-at now}))) 208 | id)))))) 209 | 210 | 211 | (deftest get-wrapper 212 | (testing "io check" 213 | (is (thrown? IllegalStateException 214 | (dosync (block/get {} (multihash/sha1 "foo")))))) 215 | (testing "non-multihash id" 216 | (is (thrown? IllegalArgumentException 217 | (block/get {} "foo")))) 218 | (testing "no block result" 219 | (let [store (reify store/BlockStore 220 | 221 | (-get 222 | [_ _id] 223 | (d/success-deferred nil)))] 224 | (is (nil? @(block/get store (multihash/sha1 "foo bar")))))) 225 | (testing "invalid block result" 226 | (let [store (reify store/BlockStore 227 | 228 | (-get 229 | [_ _id] 230 | (d/success-deferred (block/read! "foo")))) 231 | other-id (multihash/sha1 "baz")] 232 | (is (thrown? RuntimeException 233 | @(block/get store other-id))))) 234 | (testing "valid block result" 235 | (let [block (block/read! "foo") 236 | store (reify store/BlockStore 237 | 238 | (-get 239 | [_ _id] 240 | (d/success-deferred block)))] 241 | (is (= block @(block/get store (:id block))))))) 242 | 243 | 244 | (deftest put-wrapper 245 | (let [original (block/read! "a block") 246 | store (reify store/BlockStore 247 | 248 | (-put! 249 | [_ block] 250 | (d/success-deferred block)))] 251 | (testing "io check" 252 | (is (thrown? IllegalStateException 253 | (dosync (block/put! store original))))) 254 | (testing "with non-block arg" 255 | (is (thrown? IllegalArgumentException 256 | (block/put! store :foo)))) 257 | (testing "block handling" 258 | (let [stored @(block/put! store original)] 259 | (is (= original stored)))))) 260 | 261 | 262 | (deftest store-wrapper 263 | (let [store (reify store/BlockStore 264 | 265 | (-put! 266 | [_ block] 267 | (d/success-deferred block)))] 268 | (testing "io check" 269 | (is (thrown? IllegalStateException 270 | (dosync (block/store! store "foo"))))) 271 | (testing "file source" 272 | (let [block @(block/store! store (io/file "README.md"))] 273 | (is (block/lazy? block) 274 | "should create lazy block from file"))) 275 | (testing "other source" 276 | (let [block @(block/store! store "foo bar baz")] 277 | (is (block/loaded? block) 278 | "should be read into memory"))))) 279 | 280 | 281 | (deftest delete-wrapper 282 | (let [id (multihash/sha1 "foo") 283 | store (reify store/BlockStore 284 | 285 | (-delete! 286 | [_ id'] 287 | (d/success-deferred (= id id'))))] 288 | (testing "io check" 289 | (is (thrown? IllegalStateException 290 | (dosync (block/delete! store id))))) 291 | (testing "non-multihash id" 292 | (is (thrown? IllegalArgumentException 293 | (block/delete! store "foo")))) 294 | (testing "normal operation" 295 | (is (true? @(block/delete! store id))) 296 | (is (false? @(block/delete! store (multihash/sha1 "bar"))))))) 297 | 298 | 299 | (deftest batch-operations 300 | (let [a (block/read! "foo") 301 | b (block/read! "bar") 302 | c (block/read! "baz") 303 | test-blocks {(:id a) a 304 | (:id b) b 305 | (:id c) c} 306 | store (reify store/BlockStore 307 | 308 | (-get 309 | [_ id] 310 | (d/success-deferred (get test-blocks id))) 311 | 312 | (-put! 313 | [_ block] 314 | (d/success-deferred block)) 315 | 316 | (-delete! 317 | [_ id] 318 | (d/success-deferred (contains? test-blocks id))))] 319 | (testing "get-batch" 320 | (let [ids [(:id a) (:id b) (:id c) (multihash/sha1 "frobble")]] 321 | (is (= [a b c] @(block/get-batch store ids))))) 322 | (testing "put-batch!" 323 | (is (= [] @(block/put-batch! store []))) 324 | (is (= [a b] @(block/put-batch! store [a b])))) 325 | (testing "delete-batch!" 326 | (is (= #{} @(block/delete-batch! store []))) 327 | (is (= #{(:id a) (:id b)} 328 | @(block/delete-batch! store [(:id a) (multihash/sha1 "qux") (:id b)])))))) 329 | 330 | 331 | ;; ## Storage Utilities 332 | 333 | (deftest store-scan 334 | (let [a (block/read! "foo") 335 | b (block/read! "baz") 336 | c (block/read! "bar") 337 | d (block/read! "abcdef") 338 | store (reify store/BlockStore 339 | 340 | (-list 341 | [_ _] 342 | (s/->source [a b c d])))] 343 | (is (thrown? IllegalStateException 344 | (dosync (block/scan store)))) 345 | (is (= {:count 4 346 | :size 15 347 | :sizes {2 3, 3 1}} 348 | @(block/scan store))) 349 | (is (= {:count 1 350 | :size 6 351 | :sizes {3 1}} 352 | @(block/scan store :filter #(< 3 (:size %))))))) 353 | 354 | 355 | (deftest store-erasure 356 | (let [a (block/read! "foo") 357 | b (block/read! "baz") 358 | c (block/read! "bar") 359 | deleted (atom #{}) 360 | store (reify store/BlockStore 361 | 362 | (-list 363 | [_ _] 364 | (s/->source [a b c])) 365 | 366 | (-delete! 367 | [_ id] 368 | (swap! deleted conj id) 369 | (d/success-deferred true)))] 370 | (is (thrown? IllegalStateException 371 | (dosync (block/erase! store)))) 372 | (is (true? @(block/erase! store))) 373 | (is (= #{(:id a) (:id b) (:id c)} @deleted)))) 374 | 375 | 376 | (deftest block-syncing 377 | (let [a (block/read! "789") ; 35a9 378 | b (block/read! "123") ; a665 379 | c (block/read! "456") ; b3a8 380 | d (block/read! "ABC") ; b5d4 381 | source-store (fn [& blocks] 382 | (reify store/BlockStore 383 | 384 | (-list 385 | [_ _] 386 | (s/->source blocks)))) 387 | sink-store (fn [target & blocks] 388 | (reify store/BlockStore 389 | 390 | (-list 391 | [_ _] 392 | (s/->source (vec blocks))) 393 | 394 | (-put! 395 | [_ block] 396 | (swap! target conj block) 397 | (d/success-deferred block))))] 398 | (testing "io check" 399 | (is (thrown? IllegalStateException 400 | (dosync (block/sync! {} {}))))) 401 | (testing "empty dest" 402 | (let [transferred (atom #{}) 403 | source (source-store a b c) 404 | dest (sink-store transferred)] 405 | (is (= 3 (count (block/list-seq source)))) 406 | (is (empty? @transferred)) 407 | (let [sync-summary @(block/sync! source dest)] 408 | (is (= 3 (:count sync-summary))) 409 | (is (= 9 (:size sync-summary)))) 410 | (is (= 3 (count (block/list-seq source)))) 411 | (is (= 3 (count @transferred))))) 412 | (testing "subset source" 413 | (let [transferred (atom #{}) 414 | source (source-store a c) 415 | dest (sink-store transferred a b c) 416 | summary @(block/sync! source dest)] 417 | (is (zero? (:count summary))) 418 | (is (zero? (:size summary))) 419 | (is (= #{} @transferred)))) 420 | (testing "mixed blocks" 421 | (let [transferred (atom #{}) 422 | source (source-store a c) 423 | dest (sink-store transferred b d) 424 | summary @(block/sync! source dest)] 425 | (is (= 2 (:count summary))) 426 | (is (= 6 (:size summary))) 427 | (is (= #{a c} @transferred)))) 428 | (testing "filter logic" 429 | (let [transferred (atom #{}) 430 | source (source-store a c) 431 | dest (sink-store transferred b d) 432 | summary @(block/sync! source dest :filter #(= (:id c) (:id %)))] 433 | (is (= 1 (:count summary))) 434 | (is (= 3 (:size summary))) 435 | (is (= #{c} @transferred)))))) 436 | -------------------------------------------------------------------------------- /src/blocks/core.clj: -------------------------------------------------------------------------------- 1 | (ns blocks.core 2 | "Core block storage API. 3 | 4 | Functions which may cause side effects or IO are marked with bangs - for 5 | example `(read! \"foo\")` doesn't have side-effects, but 6 | `(read! some-input-stream)` will consume bytes from the stream." 7 | (:refer-clojure :exclude [get list]) 8 | (:require 9 | [blocks.data :as data] 10 | [blocks.meter :as meter] 11 | [blocks.store :as store] 12 | [blocks.summary :as sum] 13 | [clojure.java.io :as io] 14 | [clojure.string :as str] 15 | [manifold.deferred :as d] 16 | [manifold.stream :as s] 17 | [multiformats.hash :as multihash]) 18 | (:import 19 | blocks.data.Block 20 | (java.io 21 | File 22 | FileInputStream) 23 | java.time.Instant 24 | multiformats.hash.Multihash 25 | (org.apache.commons.io.input 26 | CountingInputStream))) 27 | 28 | 29 | ;; ## Utilities 30 | 31 | (def default-algorithm 32 | "The hashing algorithm used if not specified in functions which create blocks." 33 | :sha2-256) 34 | 35 | 36 | (defn- args->map 37 | "Accept arguments and return a map corresponding to the input. Accepts either 38 | a single map argument or kw-args." 39 | [args] 40 | (when (seq args) 41 | (if (and (= 1 (count args)) 42 | (or (map? (first args)) 43 | (nil? (first args)))) 44 | (first args) 45 | (apply hash-map args)))) 46 | 47 | 48 | (defn- hex-string? 49 | "True if the value is a hexadecimal string." 50 | [x] 51 | (and (string? x) (re-matches #"[0-9a-fA-F]*" x))) 52 | 53 | 54 | (defn- multihash? 55 | "True if the value is a multihash." 56 | [x] 57 | (instance? Multihash x)) 58 | 59 | 60 | ;; ## Block IO 61 | 62 | (defn loaded? 63 | "True if the block's content is already loaded into memory." 64 | [block] 65 | (data/byte-content? block)) 66 | 67 | 68 | (defn lazy? 69 | "True if the given block reads its content on-demand." 70 | [block] 71 | (not (data/byte-content? block))) 72 | 73 | 74 | (defn from-file 75 | "Create a lazy block from a local file. Returns the block, or nil if the file 76 | does not exist or is empty. 77 | 78 | The file is read once to calculate the identifier." 79 | ([file] 80 | (from-file file default-algorithm)) 81 | ([file algorithm] 82 | (let [file (io/file file) 83 | hash-fn (data/hasher algorithm)] 84 | (io! 85 | (when (and (.exists file) (pos? (.length file))) 86 | (data/create-block 87 | (hash-fn (FileInputStream. file)) 88 | (.length file) 89 | (Instant/ofEpochMilli (.lastModified file)) 90 | (fn reader [] (FileInputStream. file)))))))) 91 | 92 | 93 | (defn open 94 | "Open an input stream to read the contents of the block. 95 | 96 | If an options map with `:start` or `:end` are given, the input stream will 97 | only return content from the starting index byte to the byte before the end 98 | index. For example, opening a block with size _n_ with these options would 99 | return the full block contents: 100 | 101 | (open block {:start 0, :end n}) 102 | 103 | Omitting either boundary will read from the beginning or to the end of the 104 | block, respectively." 105 | (^java.io.InputStream 106 | [block] 107 | (open block nil)) 108 | (^java.io.InputStream 109 | [block opts] 110 | (let [{:keys [start end]} opts] 111 | (when (and start (or (not (nat-int? start)) 112 | (<= (:size block) start))) 113 | (throw (IllegalArgumentException. 114 | (format "Range start must be an integer within block size %d: %s" 115 | (:size block) start)))) 116 | (when (and end (or (not (pos-int? end)) 117 | (< (:size block) end))) 118 | (throw (IllegalArgumentException. 119 | (format "Range end must be an integer within block size %d: %s" 120 | (:size block) end)))) 121 | (when (and start end (not (< start end))) 122 | (throw (IllegalArgumentException. 123 | (format "Range start %d must be less than range end %d" 124 | start end)))) 125 | (io! (data/content-stream block start end))))) 126 | 127 | 128 | (defn read! 129 | "Read data into memory from the given source and hash it to identify the 130 | block." 131 | ([source] 132 | (read! source default-algorithm)) 133 | ([source algorithm] 134 | (io! (data/read-block algorithm source)))) 135 | 136 | 137 | (defn write! 138 | "Write a block's content to an output stream." 139 | [block out] 140 | (with-open [stream (open block)] 141 | (io/copy stream out))) 142 | 143 | 144 | (defn load! 145 | "Ensure the block's content is loaded into memory. Returns a loaded version 146 | of the given block. 147 | 148 | If the block is lazy, the stream is read into memory and returned as a new 149 | block. If the block is already loaded, it is returned unchanged. The returned 150 | block will have the same metadata as the one given." 151 | [block] 152 | (io! 153 | (if (lazy? block) 154 | (with-meta 155 | (data/read-block 156 | (:algorithm (:id block)) 157 | (data/content-stream block nil nil)) 158 | (meta block)) 159 | block))) 160 | 161 | 162 | (defn validate! 163 | "Check a block to verify that it has the correct identifier and size for its 164 | content. Returns true if the block is valid, or throws an exception on any 165 | error." 166 | [block] 167 | (let [id (:id block) 168 | size (:size block)] 169 | (when-not (multihash? id) 170 | (throw (ex-info 171 | (str "Block id is not a multihash: " (pr-str id)) 172 | {:id id}))) 173 | (when-not (pos-int? size) 174 | (throw (ex-info 175 | (str "Block " id " has an invalid size: " (pr-str size)) 176 | {:id id, :size size}))) 177 | (with-open [stream (CountingInputStream. (open block))] 178 | (let [hash-fn (data/hasher (:algorithm id)) 179 | actual-id (hash-fn stream) 180 | actual-size (.getByteCount stream)] 181 | (when (not= id actual-id) 182 | (throw (ex-info 183 | (str "Block " id " has mismatched id and content") 184 | {:id id, :actual-id actual-id}))) 185 | (when (not= size actual-size) 186 | (throw (ex-info 187 | (str "Block " id " reports size " size 188 | " but has actual size " actual-size) 189 | {:id id, :size size, :actual-size actual-size}))))) 190 | true)) 191 | 192 | 193 | ;; ## Storage API 194 | 195 | (defn ->store 196 | "Constructs a new block store from a URI by dispatching on the scheme. The 197 | store will be returned in an initialized (but not started) state." 198 | [uri] 199 | (store/initialize uri)) 200 | 201 | 202 | (defn list 203 | "Enumerate the stored blocks, returning a stream of blocks ordered by their 204 | multihash id. The store will continue listing blocks until the stream is 205 | closed or there are no more matching blocks to return. 206 | 207 | - `:algorithm` 208 | Only return blocks identified by this hash algorithm. 209 | - `:after` 210 | Return blocks whose id (in hex) lexically follows this string. A multihash 211 | may also be provided and will be coerced to hex. 212 | - `:before` 213 | Return blocks whose id (in hex) lexically precedes this string. A multihash 214 | may also be provided and will be coerced to hex. 215 | - `:limit` 216 | Restrict the maximum number of blocks returned on the stream." 217 | [store & opts] 218 | (let [opts (args->map opts) 219 | opts (merge 220 | ;; Validate algorithm option. 221 | (when-let [algorithm (:algorithm opts)] 222 | (if (keyword? algorithm) 223 | {:algorithm algorithm} 224 | (throw (IllegalArgumentException. 225 | (str "Option :algorithm is not a keyword: " 226 | (pr-str algorithm)))))) 227 | ;; Validate 'after' boundary. 228 | (when-let [after (:after opts)] 229 | (cond 230 | (hex-string? after) 231 | {:after (str/lower-case after)} 232 | 233 | (multihash? after) 234 | {:after (multihash/hex after)} 235 | 236 | :else 237 | (throw (IllegalArgumentException. 238 | (str "Option :after is not a hex string or multihash: " 239 | (pr-str after)))))) 240 | ;; Validate 'before' boundary. 241 | (when-let [before (:before opts)] 242 | (cond 243 | (hex-string? before) 244 | {:before (str/lower-case before)} 245 | 246 | (multihash? before) 247 | {:before (multihash/hex before)} 248 | 249 | :else 250 | (throw (IllegalArgumentException. 251 | (str "Option :before is not a hex string or multihash: " 252 | (pr-str before)))))) 253 | ;; Validate query limit. 254 | (when-let [limit (:limit opts)] 255 | (if (pos-int? limit) 256 | {:limit limit} 257 | (throw (IllegalArgumentException. 258 | (str "Option :limit is not a positive integer: " 259 | (pr-str limit)))))) 260 | ;; Ensure no other options. 261 | (when-let [bad-opts (not-empty (dissoc opts :algorithm :after :before :limit))] 262 | (throw (IllegalArgumentException. 263 | (str "Unknown options passed to list: " (pr-str bad-opts))))))] 264 | (meter/measure-stream 265 | store :list nil 266 | (io! (store/select-blocks opts (store/-list store opts)))))) 267 | 268 | 269 | (defn list-seq 270 | "Enumerate the stored blocks, returning a sequence of blocks ordered by their 271 | multihash id. This wraps the `list` method and consumes the stream lazily, 272 | terminating when the stream is drained, a timeout is encountered, or a list 273 | exception is observed on the stream. 274 | 275 | Accepts the same options as `list`, plus: 276 | 277 | - `:timeout` 278 | Millisecond duration to wait for new blocks to arrive on the stream. 279 | (default: `10000`)" 280 | [store & opts] 281 | (let [opts (args->map opts) 282 | timeout (:timeout opts 10000)] 283 | (when-not (pos-int? timeout) 284 | (throw (IllegalArgumentException. 285 | (str "Option :timeout is not a positive integer: " 286 | (pr-str timeout))))) 287 | (letfn [(stream->seq 288 | [s] 289 | (lazy-seq 290 | (let [x @(s/try-take! s ::drained timeout ::timeout)] 291 | (when (instance? Throwable x) 292 | (throw x)) 293 | (when (identical? ::timeout x) 294 | (throw (ex-info 295 | (format "Block stream consumption timed out after %d ms" 296 | timeout) 297 | {:opts opts}))) 298 | (when-not (identical? ::drained x) 299 | (cons x (stream->seq s))))))] 300 | (stream->seq (list store (dissoc opts :timeout)))))) 301 | 302 | 303 | (defn stat 304 | "Load metadata about a block if the store contains it. Returns a deferred 305 | which yields a map with block information but no content, or nil if the store 306 | does not contain the identified block. 307 | 308 | The block stats include the `:id`, `:size`, and `:stored-at` fields. The 309 | returned map may also have additional implementation-specific storage 310 | metadata, similar to returned blocks." 311 | [store id] 312 | (when-not (multihash? id) 313 | (throw (IllegalArgumentException. 314 | (str "Block id must be a multihash, got: " (pr-str id))))) 315 | (meter/measure-method 316 | store :stat 317 | {:block-id id} 318 | (io! (store/-stat store id)))) 319 | 320 | 321 | (defn get 322 | "Load a block from the store. Returns a deferred which yields the block if 323 | the store contains it, or nil if no block is stored for that id." 324 | [store id] 325 | (when-not (multihash? id) 326 | (throw (IllegalArgumentException. 327 | (str "Block id must be a multihash, got: " (pr-str id))))) 328 | (d/chain 329 | (meter/measure-method 330 | store :get 331 | {:block-id id} 332 | (io! (store/-get store id))) 333 | (fn validate-block 334 | [block] 335 | (when block 336 | (when-not (= id (:id block)) 337 | (throw (RuntimeException. 338 | (str "Asked for block " id " but got " (:id block))))) 339 | (meter/metered-block store ::meter/io-read block))))) 340 | 341 | 342 | (defn put! 343 | "Save a block into the store. Returns a deferred which yields the stored 344 | block, which may have already been present in the store." 345 | [store block] 346 | (when-not (instance? Block block) 347 | (throw (IllegalArgumentException. 348 | (str "Argument must be a block, got: " (pr-str block))))) 349 | (d/chain 350 | (meter/measure-method 351 | store :put! 352 | {:block-id (:id block) 353 | :block-size (:size block)} 354 | (->> block 355 | (meter/metered-block store ::meter/io-write) 356 | (store/-put! store) 357 | (io!))) 358 | (fn meter-block 359 | [block] 360 | (meter/metered-block store ::meter/io-read block)))) 361 | 362 | 363 | (defn store! 364 | "Store content from a byte source in a block store. Returns a deferred which 365 | yields the stored block, or nil if the source was empty. 366 | 367 | If the source is a file, it will be streamed into the store, otherwise the 368 | content is read into memory." 369 | ([store source] 370 | (store! store source default-algorithm)) 371 | ([store source algorithm] 372 | (d/chain 373 | (io! 374 | (store/future' 375 | (if (instance? File source) 376 | (from-file source algorithm) 377 | (read! source algorithm)))) 378 | (fn put-block 379 | [block] 380 | (when block 381 | (put! store block)))))) 382 | 383 | 384 | (defn delete! 385 | "Remove a block from the store. Returns a deferred which yields true if the 386 | block was found and removed." 387 | [store id] 388 | (when-not (multihash? id) 389 | (throw (IllegalArgumentException. 390 | (str "Block id must be a multihash, got: " (pr-str id))))) 391 | (meter/measure-method 392 | store :delete! 393 | {:block-id id} 394 | (io! (store/-delete! store id)))) 395 | 396 | 397 | ;; ## Batch API 398 | 399 | (defn get-batch 400 | "Retrieve a batch of blocks identified by a collection of multihashes. 401 | Returns a deferred which yields a collection of the blocks which were found. 402 | 403 | The blocks are returned in no particular order, and any missing blocks are 404 | omitted from the result." 405 | [store ids] 406 | (d/chain 407 | (->> (distinct ids) 408 | (map (partial get store)) 409 | (apply d/zip)) 410 | (fn omit-missing 411 | [blocks] 412 | (into [] (remove nil?) blocks)))) 413 | 414 | 415 | (defn put-batch! 416 | "Save a collection of blocks into the store. Returns a deferred which 417 | yields a collection of stored blocks. 418 | 419 | This is not guaranteed to be atomic; readers may see the store in a 420 | partially updated state." 421 | [store blocks] 422 | (if-let [blocks (seq (remove nil? blocks))] 423 | (apply d/zip (map (partial put! store) blocks)) 424 | (d/success-deferred []))) 425 | 426 | 427 | (defn delete-batch! 428 | "Remove a batch of blocks from the store, identified by a collection of 429 | multihashes. Returns a deferred which yields a set of ids for the blocks 430 | which were found and deleted. 431 | 432 | This is not guaranteed to be atomic; readers may see the store in a 433 | partially deleted state." 434 | [store ids] 435 | (if-let [ids (not-empty (into [] (comp (remove nil?) (distinct)) ids))] 436 | (d/chain 437 | (apply d/zip (map (partial delete! store) ids)) 438 | (fn match-ids 439 | [results] 440 | (into #{} 441 | (comp 442 | (filter first) 443 | (map second)) 444 | (map vector results ids)))) 445 | (d/success-deferred #{}))) 446 | 447 | 448 | ;; ## Storage Utilities 449 | 450 | (defn scan 451 | "Scan blocks in the store, building up a summary. Returns a deferred which 452 | yields the summary map when the scan is complete. 453 | 454 | Accepts the same arguments as `list`, plus: 455 | 456 | - `:filter` 457 | A predicate function which will be used to filter blocks listed by the 458 | store. By default, all blocks are included." 459 | [store & opts] 460 | (let [opts (args->map opts)] 461 | (-> 462 | (list store (dissoc opts :filter)) 463 | (cond->> 464 | (:filter opts) (s/filter (:filter opts))) 465 | (->> 466 | (s/reduce sum/update (sum/init)))))) 467 | 468 | 469 | (defn erase! 470 | "Completely remove all data associated with the store. After this call, the 471 | store will be empty. Returns a deferred which yields true once the store has 472 | been erased. 473 | 474 | This is not guaranteed to be atomic; readers may see the store in a partially 475 | erased state." 476 | [store] 477 | (io! 478 | (if (satisfies? store/ErasableStore store) 479 | (meter/measure-method 480 | store :erase! nil 481 | (store/-erase! store)) 482 | ;; TODO: should be able to parallelize this - how to communicate errors? 483 | (s/consume-async 484 | (fn erase-block 485 | [block] 486 | (if (instance? Throwable block) 487 | (d/error-deferred block) 488 | (delete! store (:id block)))) 489 | (list store))))) 490 | 491 | 492 | (defn sync! 493 | "Synchronize blocks from the `source` store to the `dest` store. Returns a 494 | deferred which yields a summary of the copied blocks. Options may include: 495 | 496 | - `:filter` 497 | A function to run on every block before it is synchronized. The block will 498 | only be copied if the filter returns a truthy value." 499 | [source dest & opts] 500 | (let [opts (args->map opts) 501 | stream (cond->> (io! (store/missing-blocks 502 | (store/-list source nil) 503 | (store/-list dest nil))) 504 | (:filter opts) (s/filter (:filter opts)))] 505 | (d/loop [summary (sum/init)] 506 | (d/chain' 507 | (s/take! stream ::drained) 508 | (fn copy-next 509 | [block] 510 | (cond 511 | (identical? ::drained block) 512 | summary 513 | 514 | (instance? Throwable block) 515 | (d/error-deferred block) 516 | 517 | :else 518 | (d/chain 519 | (put! dest block) 520 | (fn update-sum 521 | [block'] 522 | (d/recur (sum/update summary block')))))))))) 523 | --------------------------------------------------------------------------------