├── bin ├── kaocha ├── run-unittests ├── run-integrationtests └── run-cljstests ├── tests.edn ├── src └── hasch │ ├── hex.cljc │ ├── base64.cljc │ ├── md5.cljc │ ├── core.cljc │ ├── benc.cljc │ ├── platform.cljs │ └── platform.clj ├── .gitignore ├── resources └── test │ ├── unit-test.html │ └── test.js ├── package.json ├── test └── hasch │ ├── datahike_test.clj │ └── api_test.cljc ├── karma.conf.js ├── shadow-cljs.edn ├── template └── pom.xml ├── .circleci └── config.yml ├── deps.edn ├── LICENSE └── README.md /bin/kaocha: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | clojure -M:test "$@" 4 | -------------------------------------------------------------------------------- /bin/run-unittests: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | TIMBRE_LEVEL=':warn' ./bin/kaocha --skip :integration 4 | -------------------------------------------------------------------------------- /tests.edn: -------------------------------------------------------------------------------- 1 | #kaocha/v1 {:tests [{:id :integration 2 | :focus-meta [:integration]}]} 3 | -------------------------------------------------------------------------------- /bin/run-integrationtests: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | TIMBRE_LEVEL=':warn' ./bin/kaocha --focus :integration 4 | -------------------------------------------------------------------------------- /bin/run-cljstests: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -o errexit 4 | set -o pipefail 5 | 6 | # Compile for browser 7 | npm run ci-test 8 | 9 | # Compile for node 10 | npx shadow-cljs compile node-test 11 | -------------------------------------------------------------------------------- /src/hasch/hex.cljc: -------------------------------------------------------------------------------- 1 | (ns hasch.hex 2 | (:require [hasch.platform :refer [byte->hex]])) 3 | 4 | (defn encode [raw] 5 | (apply str (map byte->hex raw))) 6 | 7 | (comment 8 | (encode (byte-array (range 100)))) 9 | 10 | 11 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /classes 3 | /checkouts 4 | /resources/public/js 5 | pom.xml.asc 6 | *.jar 7 | *.class 8 | /.lein-* 9 | /.nrepl-port 10 | /.repl* 11 | /out* 12 | /.cljsbuild* 13 | /.shadow-cljs 14 | /.calva 15 | /.cpcache 16 | node_modules 17 | /public 18 | .cljs_node_repl/ 19 | -------------------------------------------------------------------------------- /resources/test/unit-test.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | This is just a dummy HTML file with which to load the unit tests. 4 | This file could be changed to include HTML for the tests to use 5 | during their operation. 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /resources/test/test.js: -------------------------------------------------------------------------------- 1 | var page = require('webpage').create(); 2 | var system = require("system"); 3 | var url = system.args[1]; 4 | 5 | 6 | page.onConsoleMessage = function (message) { 7 | console.log(message); 8 | }; 9 | 10 | page.open(url, function (status) { 11 | page.evaluate(function(){ 12 | hasch.test.run(); 13 | }); 14 | phantom.exit(0); 15 | }); 16 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "CITests", 3 | "description": "Testing", 4 | "scripts": { 5 | "ci-test": "npx shadow-cljs compile ci && npx karma start --single-run" 6 | }, 7 | "devDependencies": { 8 | "karma": "^6.4.1", 9 | "karma-chrome-launcher": "^3.1.1", 10 | "karma-cljs-test": "^0.1.0" 11 | }, 12 | "dependencies": { 13 | "shadow-cljs": "^2.20.20" 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /test/hasch/datahike_test.clj: -------------------------------------------------------------------------------- 1 | (ns hasch.datahike-test 2 | (:require [clojure.test :refer :all] 3 | [datahike.integration-test :as dt])) 4 | 5 | (def config {:store {:backend :mem 6 | :id "hasch-datahike-test-db"} 7 | :keep-history? true 8 | :schema-flexibility :read}) 9 | 10 | (defn test-fixture [f] 11 | (dt/integration-test-fixture config) 12 | (f)) 13 | 14 | (use-fixtures :once test-fixture) 15 | 16 | (deftest ^:integration datahike-integration-test 17 | (dt/integration-test config)) 18 | -------------------------------------------------------------------------------- /karma.conf.js: -------------------------------------------------------------------------------- 1 | module.exports = function (config) { 2 | config.set({ 3 | browsers: ['ChromeHeadless'], 4 | // The directory where the output file lives 5 | basePath: 'target', 6 | // The file itself 7 | files: ['ci.js'], 8 | frameworks: ['cljs-test'], 9 | plugins: ['karma-cljs-test', 'karma-chrome-launcher'], 10 | colors: true, 11 | logLevel: config.LOG_INFO, 12 | client: { 13 | args: ["shadow.test.karma.init"], 14 | singleRun: true 15 | } 16 | }) 17 | } 18 | 19 | -------------------------------------------------------------------------------- /shadow-cljs.edn: -------------------------------------------------------------------------------- 1 | {:deps {:aliases [:cljs]} 2 | 3 | :builds 4 | {:app 5 | {:target :browser 6 | :output-dir "public/js" 7 | :asset-path "/js" 8 | :modules {:main {:entries [hasch.core]}}} 9 | 10 | :browser-test 11 | {:target :browser-test 12 | :test-dir "resources/public/js/test" 13 | :devtools {:http-port 8021 14 | :http-root "resources/public/js/test"}} 15 | 16 | :node-test 17 | {:target :node-test 18 | :output-to "out/node-tests.js" 19 | :autorun true} 20 | 21 | :ci 22 | {:target :karma 23 | :output-to "target/ci.js"}}} 24 | -------------------------------------------------------------------------------- /src/hasch/base64.cljc: -------------------------------------------------------------------------------- 1 | (ns hasch.base64 2 | #?(:cljs (:require [goog.crypt.base64] 3 | [cljs.reader :as r])) 4 | #?(:clj (:import (java.util Base64)))) 5 | 6 | (defn encode 7 | "Returns a base64 encoded String." 8 | [byte-arr] 9 | #?(:clj (String. (.encode (Base64/getEncoder) 10 | ^bytes byte-arr) 11 | "UTF-8") 12 | :cljs (goog.crypt.base64.encodeByteArray byte-arr))) 13 | 14 | (defn decode 15 | "Returns a byte-array for encoded String." 16 | [^String base64] 17 | #?(:clj (.decode (Base64/getDecoder) base64) 18 | :cljs (goog.crypt.base64.decodeStringToByteArray base64))) 19 | -------------------------------------------------------------------------------- /src/hasch/md5.cljc: -------------------------------------------------------------------------------- 1 | (ns hasch.md5 2 | #?(:cljs (:require [goog.crypt.Md5] 3 | [goog.crypt.Hash] 4 | [goog.crypt])) 5 | #?(:clj (:import [java.security MessageDigest] 6 | [java.math BigInteger]))) 7 | 8 | (defn str->md5 [^String s] 9 | #?(:clj 10 | (let [algorithm (MessageDigest/getInstance "MD5") 11 | raw (.digest algorithm (.getBytes s))] 12 | raw) 13 | :cljs 14 | (let [bytes (goog.crypt/stringToUtf8ByteArray s) 15 | md5-digester (goog.crypt.Md5.) 16 | hashed (do 17 | (.update md5-digester bytes) 18 | (.digest md5-digester))] 19 | hashed))) 20 | -------------------------------------------------------------------------------- /template/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4.0.0 4 | jar 5 | io.replikativ 6 | hasch 7 | hasch 8 | A library to consistently crypto-hash edn data structures on Clojure and ClojureScript with SHA-512 9 | 10 | 11 | Eclipse 12 | http://www.eclipse.org/legal/epl-v10.html 13 | 14 | 15 | 16 | scm:git:git@github.com:replikativ/hasch.git 17 | scm:git:git@github.com/replikativ/hasch.git 18 | https://github.com/replikativ/hasch 19 | 20 | 21 | -------------------------------------------------------------------------------- /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2.1 2 | 3 | orbs: 4 | tools: replikativ/clj-tools@0 5 | 6 | workflows: 7 | build-test-and-deploy: 8 | jobs: 9 | - tools/setup: 10 | context: dockerhub-deploy 11 | setup_cljs: true 12 | - tools/build: 13 | context: dockerhub-deploy 14 | build_cljs: true 15 | requires: 16 | - tools/setup 17 | - tools/format: 18 | context: dockerhub-deploy 19 | requires: 20 | - tools/setup 21 | - tools/unittest: 22 | context: dockerhub-deploy 23 | requires: 24 | - tools/build 25 | - tools/cljstest: 26 | context: dockerhub-deploy 27 | requires: 28 | - tools/build 29 | - tools/integrationtest: 30 | context: dockerhub-deploy 31 | requires: 32 | - tools/build 33 | - tools/deploy: 34 | context: 35 | - clojars-deploy 36 | - dockerhub-deploy 37 | filters: 38 | branches: 39 | only: main 40 | requires: 41 | - tools/format 42 | - tools/unittest 43 | - tools/cljstest 44 | - tools/release: 45 | context: 46 | - github-token 47 | - dockerhub-deploy 48 | filters: 49 | branches: 50 | only: main 51 | requires: 52 | - tools/deploy 53 | -------------------------------------------------------------------------------- /src/hasch/core.cljc: -------------------------------------------------------------------------------- 1 | (ns hasch.core 2 | "Hashing functions for EDN." 3 | #?(:cljs (:refer-clojure :exclude [uuid])) 4 | (:require [hasch.benc :refer [PHashCoercion -coerce digest]] 5 | [hasch.base64 :as b64] 6 | [hasch.platform :as platform])) 7 | 8 | (def uuid4 platform/uuid4) 9 | (def uuid5 platform/uuid5) 10 | (def hash->str platform/hash->str) 11 | 12 | (defn edn-hash 13 | "Hash an edn value with SHA-512 by default or a compatible hash function of choice. 14 | 15 | Please use the write-handlers only in legacy cases and rather extend the PHashCoercion 16 | protocol to your own types." 17 | ([val] (edn-hash val {})) 18 | ([val write-handlers] (edn-hash val hasch.platform/sha512-message-digest write-handlers)) 19 | ([val md-create-fn write-handlers] 20 | (map #(if (neg? %) (+ % 256) %) ;; make unsigned 21 | (digest (-coerce val md-create-fn (or write-handlers {})) md-create-fn)))) 22 | 23 | (defn uuid 24 | "Creates random UUID-4 without argument or UUID-5 for the argument value. 25 | 26 | Optionally an incognito-style write-handlers map can be supplied, 27 | which describes record serialization in terms of Clojure data 28 | structures." 29 | ([] (uuid4)) 30 | ([val & {:keys [write-handlers]}] (-> val (edn-hash write-handlers) uuid5))) 31 | 32 | (defn squuid 33 | "Calculates a sequential UUID as described in 34 | https://github.com/clojure-cookbook/clojure-cookbook/blob/master/01_primitive-data/1-24_uuids.asciidoc" 35 | ([] (squuid (uuid4))) 36 | ([uuid] 37 | #?(:clj 38 | (let [time (System/currentTimeMillis) 39 | secs (quot time 1000) 40 | lsb (.getLeastSignificantBits ^java.util.UUID uuid) 41 | msb (.getMostSignificantBits ^java.util.UUID uuid) 42 | timed-msb (bit-or (bit-shift-left secs 32) 43 | (bit-and 0x00000000ffffffff msb))] 44 | (java.util.UUID. timed-msb lsb)) 45 | :cljs 46 | (let [time (.getTime (js/Date.)) 47 | secs (quot time 1000) 48 | prefix (.toString secs 16)] 49 | (cljs.core/uuid (str prefix (subs (str uuid) 8))))))) 50 | 51 | (defn b64-hash 52 | "Provides a base64 encoded string of the edn-hash of a value val. This contains 53 | all bits of the hash compared to 128 bits for the UUID-5. Both should be safe, 54 | but b64-hash is safer towards collisions." 55 | [val] 56 | (b64/encode (#?(:clj byte-array :cljs clj->js) (edn-hash val)))) 57 | -------------------------------------------------------------------------------- /deps.edn: -------------------------------------------------------------------------------- 1 | {:paths ["src"] 2 | :deps {org.clojure/clojure {:mvn/version "1.11.1"} 3 | org.clojure/clojurescript {:mvn/version "1.11.60"} 4 | io.replikativ/incognito {:mvn/version "0.3.66"}} 5 | :aliases {:cljs {:extra-deps {thheller/shadow-cljs {:mvn/version "2.20.20"} 6 | binaryage/devtools {:mvn/version "1.0.6"}} 7 | :extra-paths ["test"]} 8 | :dev {:extra-deps {criterium/criterium {:mvn/version "0.4.6"}}} 9 | :test {:extra-deps {lambdaisland/kaocha {:mvn/version "1.77.1236"} 10 | io.github.cognitect-labs/test-runner {:git/tag "v0.5.1" 11 | :git/sha "dfb30dd"} 12 | io.replikativ/datahike {:mvn/version "0.6.1531"} 13 | io.replikativ/hasch {:local/root "."}} 14 | :main-opts ["-m" "kaocha.runner"] 15 | :extra-paths ["test"]} 16 | ;; pull in specific versions of clojure and clojurescript 17 | :1.7 {:override-deps {org.clojure/clojure {:mvn/version "1.7.0"} 18 | org.clojure/clojurescript {:mvn/version "1.7.228"}}} 19 | :1.8 {:override-deps {org.clojure/clojure {:mvn/version "1.8.0"} 20 | org.clojure/clojurescript {:mvn/version "1.8.51"}}} 21 | :build {:deps {io.github.clojure/tools.build {:mvn/version "0.9.3"} 22 | slipset/deps-deploy {:mvn/version "0.2.0"} 23 | borkdude/gh-release-artifact {:git/url "https://github.com/borkdude/gh-release-artifact" 24 | :git/sha "a83ee8da47d56a80b6380cbb6b4b9274048067bd"} 25 | babashka/babashka.curl {:mvn/version "0.1.1"} 26 | babashka/fs {:mvn/version "0.1.2"} 27 | cheshire/cheshire {:mvn/version "5.10.2"}} 28 | :ns-default build} 29 | :ffix {:extra-deps {cljfmt/cljfmt {:mvn/version "0.8.0"}} 30 | :main-opts ["-m" "cljfmt.main" "fix"]} 31 | :format {:extra-deps {cljfmt/cljfmt {:mvn/version "0.8.0"}} 32 | :main-opts ["-m" "cljfmt.main" "check"]}}} 33 | -------------------------------------------------------------------------------- /src/hasch/benc.cljc: -------------------------------------------------------------------------------- 1 | (ns hasch.benc 2 | "Binary encoding of EDN values." 3 | #?@(:clj [(:import java.security.MessageDigest 4 | java.io.ByteArrayOutputStream)])) 5 | 6 | #?(:clj (set! *warn-on-reflection* true)) 7 | 8 | (defprotocol PHashCoercion 9 | (-coerce [this md-create-fn write-handlers])) 10 | 11 | ;; changes break hashes! 12 | (def magics {:nil (byte 0) 13 | :boolean (byte 1) 14 | :number (byte 2) 15 | :string (byte 3) 16 | :symbol (byte 4) 17 | :keyword (byte 5) 18 | :inst (byte 6) 19 | :uuid (byte 7) 20 | :seq (byte 8) 21 | :vector (byte 9) 22 | :map (byte 10) 23 | :set (byte 11) 24 | :literal (byte 12) 25 | :binary (byte 13)}) 26 | 27 | (def split-size 1024) 28 | 29 | (def max-entropy-byte-count 32) 30 | 31 | #?(:cljs (defn- byte-array [len] (into-array (repeat len 0)))) 32 | 33 | (defn ^bytes digest 34 | [bytes-or-seq-of-bytes md-create-fn] 35 | (let [^MessageDigest md (md-create-fn)] 36 | (if (seq? bytes-or-seq-of-bytes) 37 | (doseq [^bytes bs bytes-or-seq-of-bytes] 38 | (.update md bs)) 39 | (.update md ^bytes bytes-or-seq-of-bytes)) 40 | (.digest md))) 41 | 42 | (defn ^bytes coerce-seq [seq md-create-fn write-handlers] 43 | (let [^MessageDigest seq-md (md-create-fn)] 44 | (loop [s seq] 45 | (let [[f & r] s] 46 | (.update seq-md ^bytes (-coerce f md-create-fn write-handlers)) 47 | (when-not (empty? r) 48 | (recur (rest s))))) 49 | (.digest seq-md))) 50 | 51 | (defn ^bytes xor-hashes 52 | "Commutatively coerces elements of collection, seq entries must already be crypto hashes 53 | to avoid collisions in XOR. Takes at maximum 32 bytes into account." 54 | [seq] 55 | (let [len (min (count ^bytes (first seq)) max-entropy-byte-count)] 56 | (reduce (fn [^bytes acc ^bytes elem] 57 | (loop [i 0] 58 | (when (< i len) 59 | (aset acc i (byte (bit-xor (aget acc i) (aget elem i)))) 60 | (recur (inc i)))) 61 | acc) 62 | (byte-array len) 63 | seq))) 64 | 65 | (defn ^bytes encode-safe [^bytes a md-create-fn] 66 | (if (< (count a) split-size) 67 | (let [len (long (alength a)) 68 | ea (byte-array len)] 69 | (loop [i 0] 70 | (when-not (= i len) 71 | (let [e (aget a i)] 72 | (when (and (> e (byte 0)) 73 | (< e (byte 30))) 74 | (aset ea i (byte 1)))) 75 | (recur (inc i)))) 76 | #?(:clj (let [out (ByteArrayOutputStream.)] 77 | (.write out a) 78 | (.write out ea) 79 | (.toByteArray out)) 80 | :cljs (.concat a ea))) 81 | (digest a md-create-fn))) 82 | -------------------------------------------------------------------------------- /src/hasch/platform.cljs: -------------------------------------------------------------------------------- 1 | (ns hasch.platform 2 | (:require [goog.crypt] 3 | [goog.crypt.Sha512] 4 | [cljs.reader :as reader] 5 | [clojure.string] 6 | [incognito.base :as ib] 7 | [hasch.benc :refer [magics PHashCoercion -coerce 8 | digest coerce-seq xor-hashes encode-safe]])) 9 | 10 | #_(do 11 | (ns dev) 12 | (def repl-env (reset! cemerick.austin.repls/browser-repl-env 13 | (cemerick.austin/repl-env))) 14 | (cemerick.austin.repls/cljs-repl repl-env)) 15 | 16 | (def uuid4 random-uuid) 17 | 18 | (defn byte->hex [b] 19 | (-> b 20 | (bit-and 0xff) 21 | (+ 0x100) 22 | (.toString 16) 23 | (.substring 1))) 24 | 25 | (defn hash->str [bytes] 26 | (apply str (map byte->hex bytes))) 27 | 28 | (def ^:dynamic *use-legacy-utf8-conversion* false) 29 | 30 | ;; taken from http://jsperf.com/uint8array-vs-array-encode-to-utf8/2 31 | ;; which is taken from //http://user1.matsumoto.ne.jp/~goma/js/utf.js 32 | ;; verified against: "小鳩ちゃんかわいいなぁ" 33 | ;; Note that this variant is broken for higher planes of unicode. For 34 | ;; backwards compatibility, you can enable `*use-legacy-utf8-conversion*` 35 | ;; to keep using the old improper conversion method. 36 | (defn- legacy-utf8 37 | "Encodes a string as UTF-8 in an unsigned js array." 38 | [s] 39 | (into-array 40 | (mapcat 41 | (fn [pos] 42 | (let [c (.charCodeAt s pos)] 43 | (cond (<= c 0x7F) [(bit-and c 0xFF)] 44 | (<= c 0x7FF) [(bit-or 0xC0 (bit-shift-right c 6)) 45 | (bit-or 0x80 (bit-and c 0x3F))] 46 | (<= c 0xFFFF) [(bit-or 0xE0 (bit-shift-right c 12)) 47 | (bit-or 0x80 (bit-and (bit-shift-right c 6) 0x3F)) 48 | (bit-or 0x80 (bit-and c 0x3F))] 49 | :default (let [j (loop [j 4] 50 | (if (pos? (bit-shift-right c (* j 6))) 51 | (recur (inc j)) 52 | j)) 53 | init (bit-or (bit-and (bit-shift-right 0xFF00 j) 0xFF) 54 | (bit-shift-right c (* 6 (dec j))))] 55 | (conj (->> (range (dec j)) 56 | reverse 57 | (map #(bit-or 0x80 58 | (bit-and (bit-shift-right c (* 6 %)) 59 | 0x3F)))) 60 | init))))) 61 | (range (.-length s))))) 62 | 63 | #_(utf8 "小鳩ちゃんかわいいなぁ") 64 | 65 | (defn utf8 66 | [s] 67 | (if *use-legacy-utf8-conversion* 68 | (legacy-utf8 s) 69 | (goog.crypt/stringToUtf8ByteArray s))) 70 | 71 | (defn uuid5 72 | "Generates a uuid5 from a sha-1 hash byte sequence. 73 | Our hash version is coded in first 2 bits." 74 | [sha-hash] 75 | (let [[hb1 hb2 hb3 hb4 hb5 hb6 hb7 hb8 76 | lb1 lb2 lb3 lb4 lb5 lb6 lb7 lb8] sha-hash] 77 | (-> [(bit-clear (bit-clear hb1 7) 6) hb2 hb3 hb4 hb5 hb6 (bit-or 0x50 (bit-and 0x5f hb7)) hb8 78 | (bit-clear (bit-set lb1 7) 6) lb2 lb3 lb4 lb5 lb6 lb7 lb8] 79 | hash->str 80 | ((fn [s] (str (apply str (take 8 s)) 81 | "-" (apply str (take 4 (drop 8 s))) 82 | "-" (apply str (take 4 (drop 12 s))) 83 | "-" (apply str (take 4 (drop 16 s))) 84 | "-" (apply str (drop 20 s))))) 85 | uuid))) 86 | 87 | (defn sha512-message-digest [] 88 | (goog.crypt.Sha512.)) 89 | 90 | (defn md5-message-digest [] 91 | (goog.crypt.Md5.)) 92 | 93 | (defn encode [magic a] 94 | (.concat #js [magic] a)) 95 | 96 | (defn- str->utf8 [x] 97 | (-> x str utf8)) 98 | 99 | (extend-protocol PHashCoercion 100 | nil 101 | (-coerce [this md-create-fn write-handlers] 102 | (encode (:nil magics) #js[])) 103 | 104 | boolean 105 | (-coerce [this md-create-fn write-handlers] 106 | (encode (:boolean magics) #js [(if this 41 40)])) 107 | 108 | string 109 | (-coerce [this md-create-fn write-handlers] 110 | (encode (:string magics) (encode-safe (str->utf8 this) md-create-fn))) 111 | 112 | number 113 | (-coerce [this md-create-fn write-handlers] 114 | ;; utf8 is not needed, can be optimized 115 | (encode (:number magics) (str->utf8 this))) 116 | 117 | js/Date 118 | (-coerce [this md-create-fn write-handlers] 119 | ;; utf8 is not needed, can be optimized 120 | (encode (:inst magics) (str->utf8 (.getTime this)))) 121 | 122 | cljs.core/UUID 123 | (-coerce [this md-create-fn write-handlers] 124 | (encode (:uuid magics) (str->utf8 (.-uuid this)))) 125 | 126 | cljs.core/Symbol 127 | (-coerce [this md-create-fn write-handlers] 128 | (encode (:symbol magics) (encode-safe (str->utf8 this) md-create-fn))) 129 | 130 | cljs.core/Keyword 131 | (-coerce [this md-create-fn write-handlers] 132 | (encode (:keyword magics) (encode-safe (str->utf8 this) md-create-fn))) 133 | 134 | default 135 | (-coerce [this md-create-fn write-handlers] 136 | (cond (instance? ib/IncognitoTaggedLiteral this) 137 | (let [{:keys [tag value]} this] 138 | (encode (:literal magics) (coerce-seq [tag value] md-create-fn write-handlers))) 139 | 140 | (satisfies? IRecord this) 141 | (let [{:keys [tag value]} (ib/incognito-writer write-handlers this)] 142 | (encode (:literal magics) (coerce-seq [tag value] md-create-fn write-handlers))) 143 | 144 | (satisfies? ISeq this) 145 | (encode (:seq magics) (coerce-seq this md-create-fn write-handlers)) 146 | 147 | (satisfies? IVector this) 148 | (encode (:vector magics) (coerce-seq this md-create-fn write-handlers)) 149 | 150 | (satisfies? IMap this) 151 | (encode (:map magics) (xor-hashes (map #(-coerce % md-create-fn write-handlers) 152 | (seq this)))) 153 | 154 | (satisfies? ISet this) 155 | (encode (:set magics) (xor-hashes (map #(digest (-coerce % md-create-fn write-handlers) 156 | md-create-fn) 157 | (seq this)))) 158 | 159 | (instance? js/Uint8Array this) 160 | (encode (:binary magics) (encode-safe (js/Array.prototype.slice.call this) md-create-fn)) 161 | 162 | :else 163 | (throw (ex-info "Cannot hash unknown type, you can extend PHashCoercion protocol for:" 164 | {:type (type this) 165 | :value this}))))) 166 | 167 | (comment 168 | (js/Array.prototype.slice.call (js/Uint8Array. #js [1 2 3])) 169 | (.log js/console (-coerce (js/Uint8Array. #js [1 2 3]) (sha512-message-digest) sha512-message-digest)) 170 | 171 | (do 172 | (def datom-vector (doall (vec (repeat 10000 {:db/id 18239 173 | :person/name "Frederic" 174 | :person/familyname "Johanson" 175 | :person/street "Fifty-First Street 53" 176 | :person/postal 38237 177 | :person/telefon "02343248474" 178 | :person/weeight 0.3823})))) 179 | nil) 180 | 181 | (time (-coerce datom-vector sha512-message-digest)) 182 | 183 | (coerce-seq (sha512-message-digest) sha512-message-digest [:foo {:a "b"}]) 184 | 185 | ;; quick & dirty js advanced compilation benchmark 186 | (enable-console-print!) 187 | 188 | (def datom-vector (doall (vec (repeat 10000 {:db/id 18239 189 | :person/name "Frederic" 190 | :person/familyname "Johanson" 191 | :person/street "Fifty-First Street 53" 192 | :person/postal 38237 193 | :person/telefon "02343248474" 194 | :person/weeight 0.3823})))) 195 | 196 | (.log js/console "benchmarking: " (time (-coerce datom-vector sha512-message-digest)))) 197 | -------------------------------------------------------------------------------- /test/hasch/api_test.cljc: -------------------------------------------------------------------------------- 1 | (ns hasch.api-test 2 | (:require [hasch.core :refer [edn-hash uuid squuid b64-hash]] 3 | [hasch.benc :refer [xor-hashes]] 4 | [hasch.md5 :as md5] 5 | [hasch.hex :as hex] 6 | [hasch.platform :refer [uuid5 hash->str #?(:cljs utf8)]] 7 | [incognito.base :as ic] 8 | [clojure.test :as t :refer (is deftest testing)])) 9 | 10 | #?(:cljs (def byte-array into-array)) 11 | 12 | (defrecord Bar [name]) 13 | 14 | (deftest hash-test 15 | (testing "Basic hash coercions of EDN primitives." 16 | (is (= (edn-hash nil) 17 | '(184 36 77 2 137 129 214 147 175 123 69 106 248 239 164 202 214 61 40 46 25 255 20 148 44 36 110 80 217 53 29 34 112 74 128 42 113 195 88 11 99 112 222 76 235 41 60 50 74 132 35 52 37 87 212 229 195 132 56 240 227 105 16 238))) 18 | 19 | (is (= (edn-hash true) 20 | '(221 223 252 44 103 48 51 199 71 184 156 187 201 140 35 99 235 153 185 70 157 229 122 4 111 90 12 150 43 67 185 166 210 79 54 62 117 173 76 252 187 67 163 85 202 124 63 252 109 44 47 70 74 129 52 241 35 15 116 253 241 141 50 131))) 21 | 22 | (is (= (edn-hash false) 23 | '(54 0 110 63 158 137 176 89 220 235 107 213 84 159 27 25 148 206 193 96 192 73 41 255 220 181 215 106 208 220 173 69 213 190 181 70 141 193 1 225 188 142 127 176 102 61 13 54 151 161 195 158 152 190 212 168 91 43 153 108 122 123 90 32))) 24 | 25 | (is (= (edn-hash \f) 26 | '(211 133 203 224 194 174 136 44 216 77 98 85 54 188 116 101 139 174 40 108 48 180 235 231 214 189 34 246 32 30 56 45 179 218 36 206 61 191 79 160 212 162 212 226 235 17 27 228 218 74 17 229 9 147 187 232 35 244 179 233 66 165 152 253))) 27 | 28 | (is (= (edn-hash \ä) 29 | '(51 232 113 238 243 104 216 10 143 88 143 111 122 220 35 138 251 22 8 130 238 73 253 62 143 207 208 45 116 21 120 18 253 34 160 30 144 46 182 7 160 254 197 120 199 220 140 209 3 66 25 214 131 145 17 222 28 157 22 103 226 254 178 186))) 30 | 31 | (is (= (edn-hash "hello") 32 | '(178 114 9 243 3 150 0 132 236 216 60 87 108 34 2 35 85 37 203 202 97 176 9 55 25 191 143 251 251 47 49 139 99 191 77 63 167 158 61 183 233 59 43 57 16 252 121 198 65 201 112 167 96 61 134 122 177 149 45 87 233 23 173 192))) 33 | 34 | (is (= (edn-hash "小鳩ちゃんかわいいなぁ") 35 | '(2 191 84 39 34 44 227 102 135 109 17 136 159 80 253 7 40 0 170 134 198 204 137 10 194 21 113 203 2 87 125 80 172 165 111 110 222 7 123 138 148 124 207 180 240 207 91 6 248 28 53 168 143 30 106 103 101 82 133 215 69 35 93 47))) 36 | 37 | (is (= (edn-hash "😡😡😡") 38 | '(18 17 129 25 13 183 170 164 178 18 97 0 123 151 164 145 95 197 214 178 107 96 255 105 255 104 69 21 205 160 13 222 9 55 63 37 174 33 35 86 204 73 17 110 82 107 151 64 63 79 191 246 177 76 71 24 107 44 43 156 178 169 195 214))) 39 | 40 | (is (= (edn-hash (int 1234567890)) 41 | (edn-hash (long 1234567890)) 42 | #?(:clj (edn-hash (biginteger "1234567890"))) 43 | #?(:clj (edn-hash (bigint "1234567890"))) 44 | '(65 199 158 164 193 95 213 144 233 29 41 86 123 106 110 215 117 225 149 249 204 124 220 217 226 120 131 178 61 133 39 228 182 233 235 249 10 249 141 122 101 25 46 134 18 222 175 224 134 61 167 114 15 109 2 146 38 65 1 55 128 137 144 55))) 45 | 46 | (is (= (edn-hash (double 123.1)) 47 | (edn-hash (float 123.1)) 48 | '(155 181 33 252 126 113 188 20 210 155 50 24 125 212 205 160 135 108 90 43 154 65 61 229 226 83 11 110 64 61 124 45 43 186 152 127 64 171 171 154 28 149 180 136 229 69 195 145 126 99 56 14 48 194 180 126 212 83 123 206 36 189 189 167) 49 | #?(:clj (edn-hash (BigDecimal. "123.1"))))) 50 | 51 | (is (= (edn-hash :core/test) 52 | '(62 51 214 78 41 84 37 205 69 197 105 26 235 55 30 87 46 117 187 194 101 184 139 244 111 232 98 175 16 174 182 211 11 171 154 64 90 18 229 93 188 246 33 234 102 145 68 30 92 0 81 208 210 10 124 137 203 18 249 138 226 253 60 62))) 53 | 54 | (is (= (edn-hash #uuid "242525f1-8ed7-5979-9232-6992dd1e11e4") 55 | '(42 243 183 237 233 94 246 1 110 56 231 49 64 217 181 17 108 11 120 199 223 53 149 47 49 8 109 94 127 93 250 51 167 211 25 31 3 171 149 67 23 245 38 248 40 31 199 211 162 242 120 99 187 6 29 237 53 174 22 192 27 159 227 164))) 56 | 57 | (is (= (edn-hash (#?(:clj java.util.Date. :cljs js/Date.) 1000000000000)) 58 | '(177 226 212 235 221 67 176 34 184 69 101 45 117 193 95 187 54 50 210 149 10 193 10 67 220 174 25 99 176 115 250 216 29 49 148 167 52 86 203 90 30 170 62 149 115 102 109 120 128 62 2 213 188 41 203 91 202 106 142 100 119 160 26 3))) 59 | 60 | (is (= (edn-hash 'core/+) 61 | '(164 63 64 77 190 144 72 80 34 36 254 237 101 99 57 114 54 44 195 22 255 11 242 114 99 87 99 135 103 73 164 183 20 192 184 54 183 244 192 151 88 96 55 204 73 156 73 92 154 8 248 205 119 157 34 112 202 51 52 169 162 61 91 235))) 62 | 63 | (is (= (edn-hash '(1 2 3)) 64 | '(244 105 186 110 183 117 195 78 70 57 251 132 133 114 134 175 228 94 242 41 194 191 186 237 163 178 255 193 141 120 5 137 223 130 170 47 231 133 78 131 128 194 115 140 186 169 124 71 205 210 228 236 82 97 166 158 190 98 106 80 237 149 96 102))) 65 | 66 | (is (= (edn-hash [1 2 3 4]) 67 | '(172 52 37 123 179 106 243 207 88 177 218 22 170 25 13 155 205 89 156 251 253 50 3 3 191 74 229 97 252 37 162 240 197 252 240 199 177 8 96 227 121 100 106 132 68 227 175 189 247 184 108 25 117 154 186 63 108 4 210 20 75 25 239 199))) 68 | 69 | (is (= (edn-hash {:a "hello" 70 | :balloon "world"}) 71 | '(135 204 255 206 109 55 248 198 218 226 173 91 27 244 68 34 108 207 62 12 114 49 69 90 22 44 155 178 212 188 139 50 217 200 63 207 14 112 179 94 202 96 196 139 202 154 214 211 182 97 31 139 49 153 203 233 240 223 154 161 78 131 159 102))) 72 | 73 | (is (= (edn-hash #{1 2 3 4}) 74 | '(42 216 217 238 97 125 210 112 2 83 128 62 82 47 119 14 59 95 246 107 191 138 251 102 201 52 9 132 96 243 199 223 218 81 88 130 165 214 125 48 222 30 64 233 101 122 196 84 11 93 186 26 92 225 203 161 196 98 186 138 174 118 244 248))) 75 | 76 | (is (= (edn-hash (Bar. "hello")) 77 | (edn-hash (ic/incognito-reader {'hasch.api-test.Bar map->Bar} 78 | (ic/incognito-writer {} (Bar. "hello")))) 79 | (edn-hash (ic/map->IncognitoTaggedLiteral (ic/incognito-writer {} (Bar. "hello")))) 80 | (edn-hash (ic/map->IncognitoTaggedLiteral {:tag 'hasch.api_test.Bar 81 | :value {:name "hello"}})) 82 | '(236 35 140 74 245 164 93 1 239 144 253 91 193 51 241 129 149 210 99 169 16 130 21 235 236 166 36 205 80 10 215 106 173 39 96 197 241 49 64 219 252 119 65 15 87 24 2 253 0 143 61 187 88 216 238 226 146 40 197 51 82 208 246 127))) 83 | 84 | (is (= (edn-hash #?(:cljs (js/Uint8Array. #js [1 2 3 42 149]) 85 | :clj (byte-array [1 2 3 42 149]))) 86 | '(135 209 248 171 162 90 41 221 173 216 64 218 222 93 242 60 243 5 190 153 101 194 74 130 55 184 84 148 167 94 210 250 140 211 6 234 221 25 113 83 153 75 180 4 194 163 178 197 243 126 27 172 248 169 161 90 102 172 160 98 249 32 42 157))))) 87 | 88 | (deftest padded-coercion 89 | (testing "Padded xor coercion for commutative collections." 90 | (is (= (map byte 91 | (xor-hashes (map byte-array 92 | [[0xa0 0x01 0xf3] [0x0c 0xf0 0x5f] [0x0a 0x30 0x07]]))) 93 | (map byte (xor-hashes (map byte-array 94 | [[0x0a 0x30 0x07] [0x0c 0xf0 0x5f] [0xa0 0x01 0xf3]]))))))) 95 | 96 | (deftest code-hashing 97 | (testing "Code hashing." 98 | (is (= (-> '(fn fib [n] 99 | (if (or (= n 0) (= n 1)) 1 100 | (+ (fib (- n 1)) (fib (- n 2))))) 101 | edn-hash 102 | uuid5) 103 | #uuid "386eabb0-8adc-52a2-a715-5a74c9197646")))) 104 | 105 | (deftest hash-stringification 106 | (testing "Stringification." 107 | (is (= (hash->str (range 256)) 108 | "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff")))) 109 | 110 | (deftest squuid-test 111 | (testing "Sequential UUID functionality." 112 | (is (= (subs (str (squuid (uuid [1 2 3]))) 8) 113 | "-5c15-555e-a1c8-6166a78fc808")))) 114 | 115 | (deftest b64-hash-test 116 | (testing "Testing the base64 encoding of a hash." 117 | (is (= (b64-hash [1 2 3 {:key 5 :value 10}]) 118 | "TREJlRrK211AASiqQMFG9RLFW0CPC/arrCxeaUj27Qho2USJU40T01uCdjUg/OMiPGttyL1ELPCrVXXhMIroRQ==")))) 119 | 120 | (deftest test-md5 121 | (is (= (hex/encode (md5/str->md5 "geheimnis")) 122 | "525e92c6aa11544a2ab794f8921ecb0f"))) 123 | 124 | #?(:cljs 125 | (deftest utf8-test 126 | (is (= (js->clj (utf8 "小鳩ちゃんかわいいなぁ")) 127 | [229 176 143 233 179 169 227 129 161 227 130 131 227 130 147 227 128 | 129 139 227 130 143 227 129 132 227 129 132 227 129 170 227 129 129])))) 129 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC 2 | LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM 3 | CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. 4 | 5 | 1. DEFINITIONS 6 | 7 | "Contribution" means: 8 | 9 | a) in the case of the initial Contributor, the initial code and 10 | documentation distributed under this Agreement, and 11 | 12 | b) in the case of each subsequent Contributor: 13 | 14 | i) changes to the Program, and 15 | 16 | ii) additions to the Program; 17 | 18 | where such changes and/or additions to the Program originate from and are 19 | distributed by that particular Contributor. A Contribution 'originates' from 20 | a Contributor if it was added to the Program by such Contributor itself or 21 | anyone acting on such Contributor's behalf. Contributions do not include 22 | additions to the Program which: (i) are separate modules of software 23 | distributed in conjunction with the Program under their own license 24 | agreement, and (ii) are not derivative works of the Program. 25 | 26 | "Contributor" means any person or entity that distributes the Program. 27 | 28 | "Licensed Patents" mean patent claims licensable by a Contributor which are 29 | necessarily infringed by the use or sale of its Contribution alone or when 30 | combined with the Program. 31 | 32 | "Program" means the Contributions distributed in accordance with this 33 | Agreement. 34 | 35 | "Recipient" means anyone who receives the Program under this Agreement, 36 | including all Contributors. 37 | 38 | 2. GRANT OF RIGHTS 39 | 40 | a) Subject to the terms of this Agreement, each Contributor hereby grants 41 | Recipient a non-exclusive, worldwide, royalty-free copyright license to 42 | reproduce, prepare derivative works of, publicly display, publicly perform, 43 | distribute and sublicense the Contribution of such Contributor, if any, and 44 | such derivative works, in source code and object code form. 45 | 46 | b) Subject to the terms of this Agreement, each Contributor hereby grants 47 | Recipient a non-exclusive, worldwide, royalty-free patent license under 48 | Licensed Patents to make, use, sell, offer to sell, import and otherwise 49 | transfer the Contribution of such Contributor, if any, in source code and 50 | object code form. This patent license shall apply to the combination of the 51 | Contribution and the Program if, at the time the Contribution is added by the 52 | Contributor, such addition of the Contribution causes such combination to be 53 | covered by the Licensed Patents. The patent license shall not apply to any 54 | other combinations which include the Contribution. No hardware per se is 55 | licensed hereunder. 56 | 57 | c) Recipient understands that although each Contributor grants the licenses 58 | to its Contributions set forth herein, no assurances are provided by any 59 | Contributor that the Program does not infringe the patent or other 60 | intellectual property rights of any other entity. Each Contributor disclaims 61 | any liability to Recipient for claims brought by any other entity based on 62 | infringement of intellectual property rights or otherwise. As a condition to 63 | exercising the rights and licenses granted hereunder, each Recipient hereby 64 | assumes sole responsibility to secure any other intellectual property rights 65 | needed, if any. For example, if a third party patent license is required to 66 | allow Recipient to distribute the Program, it is Recipient's responsibility 67 | to acquire that license before distributing the Program. 68 | 69 | d) Each Contributor represents that to its knowledge it has sufficient 70 | copyright rights in its Contribution, if any, to grant the copyright license 71 | set forth in this Agreement. 72 | 73 | 3. REQUIREMENTS 74 | 75 | A Contributor may choose to distribute the Program in object code form under 76 | its own license agreement, provided that: 77 | 78 | a) it complies with the terms and conditions of this Agreement; and 79 | 80 | b) its license agreement: 81 | 82 | i) effectively disclaims on behalf of all Contributors all warranties and 83 | conditions, express and implied, including warranties or conditions of title 84 | and non-infringement, and implied warranties or conditions of merchantability 85 | and fitness for a particular purpose; 86 | 87 | ii) effectively excludes on behalf of all Contributors all liability for 88 | damages, including direct, indirect, special, incidental and consequential 89 | damages, such as lost profits; 90 | 91 | iii) states that any provisions which differ from this Agreement are offered 92 | by that Contributor alone and not by any other party; and 93 | 94 | iv) states that source code for the Program is available from such 95 | Contributor, and informs licensees how to obtain it in a reasonable manner on 96 | or through a medium customarily used for software exchange. 97 | 98 | When the Program is made available in source code form: 99 | 100 | a) it must be made available under this Agreement; and 101 | 102 | b) a copy of this Agreement must be included with each copy of the Program. 103 | 104 | Contributors may not remove or alter any copyright notices contained within 105 | the Program. 106 | 107 | Each Contributor must identify itself as the originator of its Contribution, 108 | if any, in a manner that reasonably allows subsequent Recipients to identify 109 | the originator of the Contribution. 110 | 111 | 4. COMMERCIAL DISTRIBUTION 112 | 113 | Commercial distributors of software may accept certain responsibilities with 114 | respect to end users, business partners and the like. While this license is 115 | intended to facilitate the commercial use of the Program, the Contributor who 116 | includes the Program in a commercial product offering should do so in a 117 | manner which does not create potential liability for other Contributors. 118 | Therefore, if a Contributor includes the Program in a commercial product 119 | offering, such Contributor ("Commercial Contributor") hereby agrees to defend 120 | and indemnify every other Contributor ("Indemnified Contributor") against any 121 | losses, damages and costs (collectively "Losses") arising from claims, 122 | lawsuits and other legal actions brought by a third party against the 123 | Indemnified Contributor to the extent caused by the acts or omissions of such 124 | Commercial Contributor in connection with its distribution of the Program in 125 | a commercial product offering. The obligations in this section do not apply 126 | to any claims or Losses relating to any actual or alleged intellectual 127 | property infringement. In order to qualify, an Indemnified Contributor must: 128 | a) promptly notify the Commercial Contributor in writing of such claim, and 129 | b) allow the Commercial Contributor tocontrol, and cooperate with the 130 | Commercial Contributor in, the defense and any related settlement 131 | negotiations. The Indemnified Contributor may participate in any such claim 132 | at its own expense. 133 | 134 | For example, a Contributor might include the Program in a commercial product 135 | offering, Product X. That Contributor is then a Commercial Contributor. If 136 | that Commercial Contributor then makes performance claims, or offers 137 | warranties related to Product X, those performance claims and warranties are 138 | such Commercial Contributor's responsibility alone. Under this section, the 139 | Commercial Contributor would have to defend claims against the other 140 | Contributors related to those performance claims and warranties, and if a 141 | court requires any other Contributor to pay any damages as a result, the 142 | Commercial Contributor must pay those damages. 143 | 144 | 5. NO WARRANTY 145 | 146 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON 147 | AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER 148 | EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR 149 | CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A 150 | PARTICULAR PURPOSE. Each Recipient is solely responsible for determining the 151 | appropriateness of using and distributing the Program and assumes all risks 152 | associated with its exercise of rights under this Agreement , including but 153 | not limited to the risks and costs of program errors, compliance with 154 | applicable laws, damage to or loss of data, programs or equipment, and 155 | unavailability or interruption of operations. 156 | 157 | 6. DISCLAIMER OF LIABILITY 158 | 159 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY 160 | CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, 161 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION 162 | LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 163 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 164 | ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE 165 | EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY 166 | OF SUCH DAMAGES. 167 | 168 | 7. GENERAL 169 | 170 | If any provision of this Agreement is invalid or unenforceable under 171 | applicable law, it shall not affect the validity or enforceability of the 172 | remainder of the terms of this Agreement, and without further action by the 173 | parties hereto, such provision shall be reformed to the minimum extent 174 | necessary to make such provision valid and enforceable. 175 | 176 | If Recipient institutes patent litigation against any entity (including a 177 | cross-claim or counterclaim in a lawsuit) alleging that the Program itself 178 | (excluding combinations of the Program with other software or hardware) 179 | infringes such Recipient's patent(s), then such Recipient's rights granted 180 | under Section 2(b) shall terminate as of the date such litigation is filed. 181 | 182 | All Recipient's rights under this Agreement shall terminate if it fails to 183 | comply with any of the material terms or conditions of this Agreement and 184 | does not cure such failure in a reasonable period of time after becoming 185 | aware of such noncompliance. If all Recipient's rights under this Agreement 186 | terminate, Recipient agrees to cease use and distribution of the Program as 187 | soon as reasonably practicable. However, Recipient's obligations under this 188 | Agreement and any licenses granted by Recipient relating to the Program shall 189 | continue and survive. 190 | 191 | Everyone is permitted to copy and distribute copies of this Agreement, but in 192 | order to avoid inconsistency the Agreement is copyrighted and may only be 193 | modified in the following manner. The Agreement Steward reserves the right to 194 | publish new versions (including revisions) of this Agreement from time to 195 | time. No one other than the Agreement Steward has the right to modify this 196 | Agreement. The Eclipse Foundation is the initial Agreement Steward. The 197 | Eclipse Foundation may assign the responsibility to serve as the Agreement 198 | Steward to a suitable separate entity. Each new version of the Agreement will 199 | be given a distinguishing version number. The Program (including 200 | Contributions) may always be distributed subject to the version of the 201 | Agreement under which it was received. In addition, after a new version of 202 | the Agreement is published, Contributor may elect to distribute the Program 203 | (including its Contributions) under the new version. Except as expressly 204 | stated in Sections 2(a) and 2(b) above, Recipient receives no rights or 205 | licenses to the intellectual property of any Contributor under this 206 | Agreement, whether expressly, by implication, estoppel or otherwise. All 207 | rights in the Program not expressly granted under this Agreement are 208 | reserved. 209 | 210 | This Agreement is governed by the laws of the State of Washington and the 211 | intellectual property laws of the United States of America. No party to this 212 | Agreement will bring a legal action under this Agreement more than one year 213 | after the cause of action arose. Each party waives its rights to a jury trial 214 | in any resulting litigation. 215 | -------------------------------------------------------------------------------- /src/hasch/platform.clj: -------------------------------------------------------------------------------- 1 | (ns hasch.platform 2 | "Platform specific implementations." 3 | (:require [hasch.benc :refer [split-size encode-safe]] 4 | [clojure.edn :as edn] 5 | [clojure.java.io :as io] 6 | [incognito.base :as ib] 7 | [hasch.benc :refer [magics PHashCoercion -coerce 8 | digest coerce-seq xor-hashes encode-safe]]) 9 | (:import java.io.ByteArrayOutputStream 10 | java.nio.ByteBuffer 11 | java.security.MessageDigest)) 12 | 13 | (set! *warn-on-reflection* true) 14 | 15 | (defn uuid4 16 | "Generates a UUID version 4 (random)." 17 | [] 18 | (java.util.UUID/randomUUID)) 19 | 20 | (defn byte->hex [b] 21 | (-> b 22 | (bit-and 0xff) 23 | (+ 0x100) 24 | (Integer/toString 16) 25 | (.substring 1))) 26 | 27 | (defn hash->str [bytes] 28 | (apply str (map byte->hex bytes))) 29 | 30 | (defn ^MessageDigest sha512-message-digest [] 31 | (MessageDigest/getInstance "sha-512")) 32 | 33 | (defn ^MessageDigest md5-message-digest [] 34 | (MessageDigest/getInstance "md5")) 35 | 36 | (defn uuid5 37 | "Generates a UUID version 5 from a sha-1 hash byte sequence. 38 | Our hash version is coded in first 2 bits." 39 | [sha-hash] 40 | (let [bb (ByteBuffer/wrap (byte-array sha-hash)) 41 | high (.getLong bb) 42 | low (.getLong bb)] 43 | (java.util.UUID. (-> high 44 | (bit-or 0x0000000000005000) 45 | (bit-and 0x7fffffffffff5fff) 46 | (bit-clear 63) ;; needed because of BigInt cast of bitmask 47 | (bit-clear 62)) 48 | (-> low 49 | (bit-set 63) 50 | (bit-clear 62))))) 51 | 52 | (defn ^bytes encode [^Byte magic ^bytes a] 53 | (let [out (ByteArrayOutputStream.)] 54 | (.write out (byte-array 1 magic)) 55 | (.write out a) 56 | (.toByteArray out))) 57 | 58 | (defn- ^bytes str->utf8 [x] 59 | (-> x str (.getBytes "UTF-8"))) 60 | 61 | (extend-protocol PHashCoercion 62 | java.lang.Boolean 63 | (-coerce [this md-create-fn write-handlers] 64 | (encode (:boolean magics) (byte-array 1 (if this (byte 41) (byte 40))))) 65 | 66 | ;; don't distinguish characters from string for javascript 67 | java.lang.Character 68 | (-coerce [this md-create-fn write-handlers] 69 | (encode (:string magics) (encode-safe (str->utf8 this) md-create-fn))) 70 | 71 | java.lang.String 72 | (-coerce [this md-create-fn write-handlers] 73 | (encode (:string magics) (encode-safe (str->utf8 this) md-create-fn))) 74 | 75 | java.lang.Integer 76 | (-coerce [this md-create-fn write-handlers] 77 | (encode (:number magics) (.getBytes (.toString this) "UTF-8"))) 78 | 79 | java.lang.Long 80 | (-coerce [this md-create-fn write-handlers] 81 | (encode (:number magics) (.getBytes (.toString this) "UTF-8"))) 82 | 83 | java.math.BigInteger 84 | (-coerce [this md-create-fn write-handlers] 85 | (encode (:number magics) (.getBytes (.toString this) "UTF-8"))) 86 | 87 | java.lang.Float 88 | (-coerce [this md-create-fn write-handlers] 89 | (encode (:number magics) (.getBytes (.toString this) "UTF-8"))) 90 | 91 | java.lang.Double 92 | (-coerce [this md-create-fn write-handlers] 93 | (encode (:number magics) (.getBytes (.toString this) "UTF-8"))) 94 | 95 | java.math.BigDecimal 96 | (-coerce [this md-create-fn write-handlers] 97 | (encode (:number magics) (.getBytes (.toString this) "UTF-8"))) 98 | 99 | clojure.lang.BigInt 100 | (-coerce [this md-create-fn write-handlers] 101 | (encode (:number magics) (.getBytes (.toString this) "UTF-8"))) 102 | 103 | java.util.UUID 104 | (-coerce [this md-create-fn write-handlers] 105 | (encode (:uuid magics) (.getBytes (.toString this) "UTF-8"))) 106 | 107 | java.util.Date 108 | (-coerce [this md-create-fn write-handlers] 109 | (encode (:inst magics) (.getBytes (.toString ^java.lang.Long (.getTime this)) "UTF-8"))) 110 | 111 | nil 112 | (-coerce [this md-create-fn write-handlers] 113 | (encode (:nil magics) (byte-array 0))) 114 | 115 | clojure.lang.Symbol 116 | (-coerce [this md-create-fn write-handlers] 117 | (encode (:symbol magics) (encode-safe (str->utf8 this) md-create-fn))) 118 | 119 | clojure.lang.Keyword 120 | (-coerce [this md-create-fn write-handlers] 121 | (encode (:keyword magics) (encode-safe (str->utf8 this) md-create-fn))) 122 | 123 | clojure.lang.ISeq 124 | (-coerce [this md-create-fn write-handlers] 125 | (encode (:seq magics) (coerce-seq this md-create-fn write-handlers))) 126 | 127 | clojure.lang.IPersistentVector 128 | (-coerce [this md-create-fn write-handlers] 129 | (encode (:vector magics) (coerce-seq this md-create-fn write-handlers))) 130 | 131 | incognito.base.IncognitoTaggedLiteral 132 | (-coerce [this md-create-fn write-handlers] 133 | (let [{:keys [tag value]} this] 134 | (encode (:literal magics) (coerce-seq [tag value] md-create-fn write-handlers)))) 135 | 136 | clojure.lang.IRecord 137 | (-coerce [this md-create-fn write-handlers] 138 | (let [{:keys [tag value]} (ib/incognito-writer write-handlers this)] 139 | (encode (:literal magics) (coerce-seq [tag value] md-create-fn write-handlers)))) 140 | 141 | clojure.lang.IPersistentMap 142 | (-coerce [this md-create-fn write-handlers] 143 | (if (record? this) ;; BUG somehow records can also trigger the map sometimes (?) 144 | (let [{:keys [tag value]} (ib/incognito-writer write-handlers this)] 145 | (encode (:literal magics) (coerce-seq [tag value] md-create-fn write-handlers))) 146 | (encode (:map magics) (xor-hashes (map #(-coerce % md-create-fn write-handlers) (seq this)))))) 147 | 148 | clojure.lang.IPersistentSet 149 | (-coerce [this md-create-fn write-handlers] 150 | (encode (:set magics) (xor-hashes (map #(digest (-coerce % md-create-fn write-handlers) 151 | md-create-fn) 152 | (seq this))))) 153 | 154 | ;; not ideal, InputStream might be more flexible 155 | ;; file is used due to length knowledge 156 | java.io.File 157 | (-coerce [f md-create-fn write-handlers] 158 | (let [^MessageDigest md (md-create-fn) 159 | len (.length f)] 160 | (with-open [fis (java.io.FileInputStream. f)] 161 | (encode (:binary magics) 162 | ;; support default split-size behaviour transparently 163 | (if (< len split-size) 164 | (let [ba (with-open [out (java.io.ByteArrayOutputStream.)] 165 | (clojure.java.io/copy fis out) 166 | (.toByteArray out))] 167 | (encode-safe ba md-create-fn)) 168 | (let [ba (byte-array (* 1024 1024))] 169 | (loop [size (.read fis ba)] 170 | (if (neg? size) (.digest md) 171 | (do 172 | (.update md ba 0 size) 173 | (recur (.read fis ba)))))))))))) 174 | 175 | (extend (Class/forName "[B") 176 | PHashCoercion 177 | {:-coerce (fn [^bytes this md-create-fn write-handlers] 178 | (encode (:binary magics) (encode-safe this md-create-fn)))}) 179 | 180 | (comment 181 | (require '[clojure.java.io :as io]) 182 | (def foo (io/file "/tmp/foo")) 183 | (.length foo) 184 | 185 | (defn slurp-bytes 186 | "Slurp the bytes from a slurpable thing" 187 | [x] 188 | (with-open [out (java.io.ByteArrayOutputStream.)] 189 | (clojure.java.io/copy (clojure.java.io/input-stream x) out) 190 | (.toByteArray out))) 191 | 192 | (clojure.reflect/reflect foo) 193 | (= (map byte (-coerce (io/file "/tmp/bar") sha512-message-digest)) 194 | (map byte (-coerce (slurp-bytes "/tmp/bar") sha512-message-digest))) 195 | 196 | (map byte (-coerce {:hello :world :foo :bar 1 2} sha512-message-digest)) 197 | 198 | (map byte (-coerce #{1 2 3} sha512-message-digest)) 199 | 200 | (use 'criterium.core) 201 | 202 | (def million-map (into {} (doall (map vec (partition 2 203 | (interleave (range 1000000) 204 | (range 1000000))))))) 205 | 206 | (bench (-coerce million-map sha512-message-digest)) ;; 3.80 secs 207 | 208 | (def million-seq (doall (map vec (partition 2 209 | (interleave (range 1000000) 210 | (range 1000000 2000000)))))) 211 | 212 | (def million-seq2 (doall (range 1000000))) 213 | 214 | (bench (-coerce million-seq2 sha512-message-digest)) ;; 296 ms 215 | 216 | (bench (-coerce million-seq2 md5-message-digest)) 217 | 218 | (take 10 (time (into (sorted-set) (range 1e6)))) ;; 1.7 s 219 | 220 | (bench (coerce-seq sha512-message-digest (seq (into (sorted-set) (range 1e4))))) 221 | 222 | (bench (-coerce (into #{} (range 1e4)) sha512-message-digest)) 223 | 224 | (bench (-coerce (seq (into (sorted-set) (range 10))) sha512-message-digest)) ;; 8.6 us 225 | 226 | (bench (-coerce (into #{} (range 10)) sha512-message-digest)) ;; 31.7 us 227 | 228 | (bench (-coerce (seq (into (sorted-set) (range 100))) sha512-message-digest)) 229 | 230 | (bench (-coerce (into #{} (range 100)) sha512-message-digest)) 231 | 232 | (bench (-coerce (seq (into (sorted-set) (range 1e4))) sha512-message-digest)) 233 | 234 | (bench (-coerce (into #{} (range 1e4)) sha512-message-digest)) 235 | 236 | (def small-map (into {} (map vec (partition 2 (take 10 (repeatedly rand)))))) 237 | (bench (-coerce (apply concat (seq (into (sorted-map) small-map))) 238 | sha512-message-digest)) ;; 12.1 us 239 | 240 | (bench (-coerce small-map sha512-message-digest)) ;; 20.7 us 241 | 242 | (def medium-map (into {} (map vec (partition 2 (take 2e6 (repeatedly rand)))))) 243 | (bench (-coerce (apply concat (seq (into (sorted-map) medium-map))) 244 | sha512-message-digest)) 245 | 246 | (bench (-coerce medium-map sha512-message-digest)) 247 | 248 | (def million-set (doall (into #{} (range 1000000)))) 249 | 250 | (bench (-coerce million-set sha512-message-digest)) ;; 2.69 secs 251 | 252 | (def million-seq3 (doall (repeat 1000000 "hello world"))) 253 | 254 | (bench (-coerce million-seq3 sha512-message-digest)) ;; 916 msecs 255 | 256 | (def million-seq4 (doall (repeat 1000000 :foo/bar))) 257 | 258 | (bench (-coerce million-seq4 sha512-message-digest)) ;; 752 msecs 259 | 260 | (def datom-vector (doall (vec (repeat 10000 {:db/id 18239 261 | :person/name "Frederic" 262 | :person/familyname "Johanson" 263 | :person/street "Fifty-First Street 53" 264 | :person/postal 38237 265 | :person/phone "02343248474" 266 | :person/weight 38.23})))) 267 | (let [val (doall (vec (repeat 10000 {:db/id 18239 268 | :person/name "Frederic" 269 | :person/familyname "Johanson" 270 | :person/street "Fifty-First Street 53" 271 | :person/postal 38237 272 | :person/phone "02343248474" 273 | :person/weight 38.23})))] 274 | (bench (-coerce val sha512-message-digest))) 275 | 276 | (time (-coerce datom-vector sha512-message-digest)) 277 | (bench (-coerce datom-vector sha512-message-digest)) ;; xor: 316 ms, sort: 207 ms 278 | 279 | ;; if not single or few byte values, but at least 8 byte size factor per item ~12x 280 | ;; factor for single byte ~100x 281 | (def bs (apply concat (repeat 100000 (.getBytes "Hello World!")))) 282 | (def barr #_(byte-array bs) (byte-array (* 1024 1024 300) (byte 42))) 283 | (def barrs (doall (take (* 1024 1024 10) (repeat (byte-array 1 (byte 42)))) 284 | #_(map byte-array (partition 1 barr)))) 285 | 286 | (bench (-coerce barr sha512-message-digest)) ;; 1.99 secs 287 | 288 | (def arr (into-array Byte/TYPE (take (* 1024) (repeatedly #(- (rand-int 256) 128))))) 289 | 290 | ;; hasch 0.2.3 291 | (use 'criterium.core) 292 | 293 | (def million-map (into {} (doall (map vec (partition 2 294 | (interleave (range 1000000) 295 | (range 1000000 2000000))))))) 296 | 297 | (bench (uuid million-map)) ;; 27 secs 298 | 299 | (def million-seq3 (doall (repeat 1000000 "hello world"))) 300 | 301 | (bench (uuid million-seq3)) ;; 16 secs 302 | 303 | (def datom-vector (doall (vec (repeat 10000 {:db/id 18239 304 | :person/name "Frederic" 305 | :person/familyname "Johanson" 306 | :person/street "Fifty-First Street 53" 307 | :person/postal 38237 308 | :person/telefon "02343248474" 309 | :person/weeight 0.3823})))) 310 | 311 | (bench (uuid datom-vector)) ;; 2.6 secs 312 | ) 313 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # hasch 2 | 3 | 4 |

5 | 6 | 7 | 8 | 9 | 10 |

11 | 12 | A library to consistently crypto-hash [edn](https://github.com/edn-format/edn) data structures on Clojure and ClojureScript with SHA-512. The main motivation is that commutative data structures like maps, sets and records are not hashed in order as was the case with e.g. hashing a simple sequential serialisation, but have the same hash value independent of order. That way Clojure value semantics with `edn` are retained. UTF-8 is supported for strings, symbols and keywords. Beyond this tagged literals are supported in a generic runtime independent fashion and platform-neutral encoding (atm. between JVM and JavaScript) is taken care of. 13 | You can then create UUID5 (using SHA-512) from it. Alternatively you can use your own hash function, but this is not standardized and hence beyond the spec. 14 | 15 | Support for edn types on the JVM and JavaScript is complete including records. This works by printing the tagged-literal and rereading it as pure edn, which also ensures that the hashed value can be reproduced beyond the current runtime. Your type has to be pr-str-able for this to work. Records already have a default serialisation. 16 | 17 | ## Usage Gitter 18 | 19 | 20 | Add this to your leiningen project's dependencies: 21 | [![Clojars Project](http://clojars.org/io.replikativ/hasch/latest-version.svg)](http://clojars.org/io.replikativ/hasch) 22 | 23 | Then you can access the major function through `hasch.core`: 24 | 25 | ~~~clojure 26 | (use 'hasch.core) 27 | (edn-hash ["hello world" {:a 3.14} #{42} '(if true nil \f)]) 28 | => (120 75 53 36 42 91 14 22 174 251 7 222 83 57 158 140 192 131 251 17 176 29 252 118 83 2 106 187 223 17 84 232 24 103 183 27 19 174 222 37 246 138 132 126 172 46 249 42 62 46 66 32 33 100 88 168 4 242 90 25 5 228 2 88) 29 | 30 | (uuid5 (edn-hash "hello world")) 31 | => #uuid "1227fe0a-471b-5329-88db-875fb82737a8" 32 | 33 | ;; or just use the convenience multi-arity uuid fn: 34 | (uuid) => #uuid "a27dfbb9-b69a-4f08-8df4-471464bfeb37" 35 | (uuid "hello world") => #uuid "1227fe0a-471b-5329-88db-875fb82737a8" 36 | ~~~ 37 | 38 | 39 | ## Motivation 40 | 41 | The motivation is to exchange (potentially large) values in a hostile environment without conflicts. The concrete design motivation is to use the commit log of [replikativ](https://github.com/replikativ/replikativ) for exchange of datascript/datomic transaction logs. As long as you are in a trusted environment you can trust the random generator for conflict-free UUIDs as is done internally by many Clojure projects, but as soon as you distribute values, collisions can happen. Note that you can treat hasch's cryptographic UUIDs like random UUIDs internally and don't need to verify them. 42 | 43 | ## Maturity 44 | 45 | The library is tested in cross-platform [applications](https://github.com/replikativ/topiq). The hashing scheme can be considered stable. It is versioned, so we can fix any severe bug without breaking stored hashes. 46 | 47 | 48 | ## Why not use Clojure's `hash`? 49 | 50 | I wish I could have done that instead of reimplementing my own hashing scheme for edn (there are more interesting problems). There is one major reason against using internal hash functions: They need to be very fast for efficient data-structures and hence trade this for potential but unlike collisions, which is unacceptable in an unsecure environment. For the same reason they also only work on 64 bit values, which is fine for a runtime, but not the internet. 51 | 52 | ## Why not sort? 53 | 54 | Sorting of heterogenous collections requires a unique serialization (e.g. pr-str or our encoding) on keys beforehand, which was sadly not faster even for small maps and sets. Sorting on number only maps was faster for maps until at least a size of one million. At some point the complexity of sorting becomes more expansive than xor-ing hashed kv-vectors, so sorting is a simple but not linearly scalable solution. Still it could prove valuable in the future. 55 | 56 | ## edn support 57 | 58 | Support for `edn` types is complete including records. This works according to [incognito](https://github.com/replikativ/incognito) by hashing unknown records the same as their known counterparts. You need to supply the optional `write-handlers` to `uuid` if your records have a custom serialization. Otherwise incognito records won't match. 59 | Importantly the JVM class names are converted into cljs format `foo.bar_baz.Bar` -> `foo.bar-baz/Bar` before hashing. While this potentially allows maliciously induced collisions, you are safe if you use `incognito` or a similar mapping for cross-platform support, as it automatically serializes all record tags accordingly. 60 | 61 | ## Safety 62 | 63 | The library is designed safety first, speed second. I have put quite some thought into getting all input bits (entropy) into the cryptographic hash function. It should be impossible to construct a collision (beyond weaknesses in the underlying SHA-512 which is considered safe in year 2014). The biggest conceptual weakness is XOR-ing of sha-512 hashed elements in maps and sets. 64 | 65 | *Once released, I'll offer a 100 $ bounty for proof of any collision, just open a github issue. This hashing is an important building block for distributed systems to me.* 66 | 67 | ## Speed 68 | 69 | The first versions were just build around safety, but perform poorly with large values. The speed should be sufficient to be in the same order of magnitude as transmission speed (throughput + latency) over slow to mid-range internet broadband connections. If you want to transmit larger values fast, you maybe can chose a sequential binary encoding with native hashing speed. JavaScript performance is still significantly slower (~10x), seemingly due to the lack of native SHA hashing routines. 70 | 71 | *These are just micro-benchmarks on my 3 year old laptop, I just mention them so you can get an impression. * 72 | 73 | ~~~clojure 74 | ;; most important and worst case, what can be done? 75 | hasch.platform> (let [val (into {} (doall (map vec (partition 2 76 | (interleave (range 1000000) 77 | (range 1000000))))))] 78 | (bench (-coerce val sha512-message-digest))) 79 | Evaluation count : 60 in 60 samples of 1 calls. 80 | Execution time mean : 3.596037 sec 81 | Execution time std-deviation : 23.812536 ms 82 | Execution time lower quantile : 3.566430 sec ( 2.5%) 83 | Execution time upper quantile : 3.647540 sec (97.5%) 84 | Overhead used : 2.039920 ns 85 | 86 | Found 4 outliers in 60 samples (6.6667 %) 87 | low-severe 3 (5.0000 %) 88 | low-mild 1 (1.6667 %) 89 | Variance from outliers : 1.6389 % Variance is slightly inflated by outliers 90 | nil 91 | 92 | hasch.platform> (let [val (doall (range 1000000))] 93 | (bench (-coerce val sha512-message-digest))) 94 | Evaluation count : 240 in 60 samples of 4 calls. 95 | Execution time mean : 297.320276 ms 96 | Execution time std-deviation : 2.683060 ms 97 | Execution time lower quantile : 293.217179 ms ( 2.5%) 98 | Execution time upper quantile : 302.059975 ms (97.5%) 99 | Overhead used : 2.039920 ns 100 | 101 | Found 1 outliers in 60 samples (1.6667 %) 102 | low-severe 1 (1.6667 %) 103 | Variance from outliers : 1.6389 % Variance is slightly inflated by outliers 104 | nil 105 | 106 | hasch.platform> (let [val (doall (into #{} (range 1000000)))] 107 | (bench (-coerce val sha512-message-digest))) 108 | Evaluation count : 60 in 60 samples of 1 calls. 109 | Execution time mean : 2.733429 sec 110 | Execution time std-deviation : 15.463782 ms 111 | Execution time lower quantile : 2.708645 sec ( 2.5%) 112 | Execution time upper quantile : 2.758701 sec (97.5%) 113 | Overhead used : 2.039920 ns 114 | 115 | Found 1 outliers in 60 samples (1.6667 %) 116 | low-severe 1 (1.6667 %) 117 | Variance from outliers : 1.6389 % Variance is slightly inflated by outliers 118 | nil 119 | 120 | hasch.platform> (let [val (doall (repeat 1000000 "hello world"))] 121 | (bench (-coerce val sha512-message-digest))) 122 | WARNING: Final GC required 1.472161970438994 % of runtime 123 | Evaluation count : 120 in 60 samples of 2 calls. 124 | Execution time mean : 873.084789 ms 125 | Execution time std-deviation : 5.753430 ms 126 | Execution time lower quantile : 862.909606 ms ( 2.5%) 127 | Execution time upper quantile : 885.560937 ms (97.5%) 128 | Overhead used : 2.039920 ns 129 | 130 | Found 2 outliers in 60 samples (3.3333 %) 131 | low-severe 2 (3.3333 %) 132 | Variance from outliers : 1.6389 % Variance is slightly inflated by outliers 133 | nil 134 | 135 | hasch.platform> (let [val (doall (repeat 1000000 :foo/bar))] 136 | (bench (-coerce val sha512-message-digest))) 137 | WARNING: Final GC required 1.072577784478402 % of runtime 138 | Evaluation count : 120 in 60 samples of 2 calls. 139 | Execution time mean : 756.394263 ms 140 | Execution time std-deviation : 2.935836 ms 141 | Execution time lower quantile : 750.827152 ms ( 2.5%) 142 | Execution time upper quantile : 761.299697 ms (97.5%) 143 | Overhead used : 2.039920 ns 144 | 145 | Found 1 outliers in 60 samples (1.6667 %) 146 | low-severe 1 (1.6667 %) 147 | Variance from outliers : 1.6389 % Variance is slightly inflated by outliers 148 | nil 149 | 150 | hasch.platform> (let [val (byte-array (* 1024 1024 300) (byte 42))] ;; 300 mib bytearray 151 | (bench (-coerce val sha512-message-digest))) 152 | Evaluation count : 60 in 60 samples of 1 calls. 153 | Execution time mean : 1.987549 sec 154 | Execution time std-deviation : 134.189868 ms 155 | Execution time lower quantile : 1.901676 sec ( 2.5%) 156 | Execution time upper quantile : 2.304744 sec (97.5%) 157 | Overhead used : 1.967460 ns 158 | 159 | Found 3 outliers in 60 samples (5.0000 %) 160 | low-severe 3 (5.0000 %) 161 | Variance from outliers : 50.1416 % Variance is severely inflated by outliers 162 | nil 163 | 164 | hasch.platform> (let [val (doall (vec (repeat 10000 {:db/id 18239 165 | :person/name "Frederic" 166 | :person/familyname "Johanson" 167 | :person/street "Fifty-First Street 53" 168 | :person/postal 38237 169 | :person/phone "02343248474" 170 | :person/weight 38.23})))] 171 | (bench (-coerce val sha512-message-digest))) 172 | WARNING: Final GC required 1.2237845534164749 % of runtime 173 | Evaluation count : 240 in 60 samples of 4 calls. 174 | Execution time mean : 322.164678 ms 175 | Execution time std-deviation : 1.821136 ms 176 | Execution time lower quantile : 318.232462 ms ( 2.5%) 177 | Execution time upper quantile : 325.916354 ms (97.5%) 178 | Overhead used : 2.039920 ns 179 | 180 | Found 4 outliers in 60 samples (6.6667 %) 181 | low-severe 2 (3.3333 %) 182 | low-mild 1 (1.6667 %) 183 | high-mild 1 (1.6667 %) 184 | Variance from outliers : 1.6389 % Variance is slightly inflated by outliers 185 | nil 186 | 187 | ~~~ 188 | 189 | 190 | # Changes 191 | - 0.3.5 Support BigInteger and BigDecimal hashing (same as for limited precision types). 192 | - 0.3.4 Expose high-level base64 hashes with full precision. 193 | - 0.3.2 Minimize dependencies, explicit profiles for different Clojure(Script) versions 194 | - 0.3.1 fix bug in hashing sequences containing null 195 | - 0.3.0 fix accidental hashing of records as maps 196 | - 0.3.0-beta4 fix record serialization with incognito 197 | - 0.3.0 Overhaul encoding for ~10x-20x times performance on the JVM. Use safe SHA-512. Add byte-array support for blobs. 198 | - 0.2.3 properly dispatch on IRecord (instead of IMap) 199 | - 0.2.2 cannot coerce record tags because of conflicts, rather extend record to properly print 200 | - 0.2.1 fix tag coercion on JVM 201 | 202 | ## Extension to your own types 203 | 204 | *Warning*: Getting all that right is not trivial. Don't mess with hashing extension if you don't have to, just make your type uniquely mappable with [incognito](https://github.com/replikativ/incognito)! 205 | 206 | You can avoid the mapping step to Clojure datastructures (also effectively allocating double memory) by extending the `hasch.benc/PHashCoercion` protocol to your types. You should orient on the `IRecord` implementation and must use `(:literal magics)` to avoid collisions with literal values of the same form. Either by using the default serialisation mechanism to retrieve a hash-value or by extending the hash-coercion, your serialisation or coercion must satisfy the *equality relation*: 207 | 208 | - hashes *must* follow `IEquiv` equality of Clojure(Script): `(= a b) <=> (= (edn-hash a) (edn-hash b))`, `(not= a b) <=> (not= (edn-hash a) (edn-hash b))`: Your serialisation has to be *unique*, hashing has to be injective or in other words you might not introduce collisions. Non-equal objects must have non-equal hashes. 209 | - *reflexivity*: `(= (edn-hash a) (edn-hash a))`, including on different runtimes 210 | - *symmetry*: `(= (edn-hash a) (edn-hash b)) <=> (= (edn-hash b) (edn-hash a))` (trivial because of `=`) 211 | - *transitivity*: `(and (= (edn-hash a) (edn-hash b)) (= (edn-hash b) (edn-hash c))) => (= (edn-hash a) (edn-hash c))` (also trivial because of `=`) 212 | 213 | 214 | # TODO 215 | - Use test.check/double.check property based tests between Java and JS (?) 216 | - Nested collections are hashed with the supplied hash-fn before they contribute to the hash-value. This allows to form a Merkle-tree like peristent data-structure by breaking out collection values, so you can rehash top-level collections without pulling the whole value in memory. This is not tested yet, a git-like store could be implemented, e.g. in [konserve](https://github.com/replikativ/konserve). This should be useful to build durable indexes also. But it might proof to need runtime tweaking, e.g. depending on value size. 217 | - If keeping sorted maps/sets is feasable for high-throughput applications, allow to hash them sequentally. 218 | 219 | # Contributors 220 | - Max Penet 221 | - James Conroy-Finn 222 | - Konrad Kühne 223 | - Christian Weilbach 224 | 225 | ## License 226 | 227 | Copyright © 2014-2018 Christian Weilbach and contributors 228 | 229 | Distributed under the Eclipse Public License either version 1.0 or (at 230 | your option) any later version. 231 | --------------------------------------------------------------------------------