├── bin
├── kaocha
├── run-unittests
├── run-integrationtests
└── run-cljstests
├── tests.edn
├── src
└── hasch
│ ├── hex.cljc
│ ├── base64.cljc
│ ├── md5.cljc
│ ├── core.cljc
│ ├── benc.cljc
│ ├── platform.cljs
│ └── platform.clj
├── .gitignore
├── resources
└── test
│ ├── unit-test.html
│ └── test.js
├── package.json
├── test
└── hasch
│ ├── datahike_test.clj
│ └── api_test.cljc
├── karma.conf.js
├── shadow-cljs.edn
├── template
└── pom.xml
├── .circleci
└── config.yml
├── deps.edn
├── LICENSE
└── README.md
/bin/kaocha:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | clojure -M:test "$@"
4 |
--------------------------------------------------------------------------------
/bin/run-unittests:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | TIMBRE_LEVEL=':warn' ./bin/kaocha --skip :integration
4 |
--------------------------------------------------------------------------------
/tests.edn:
--------------------------------------------------------------------------------
1 | #kaocha/v1 {:tests [{:id :integration
2 | :focus-meta [:integration]}]}
3 |
--------------------------------------------------------------------------------
/bin/run-integrationtests:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | TIMBRE_LEVEL=':warn' ./bin/kaocha --focus :integration
4 |
--------------------------------------------------------------------------------
/bin/run-cljstests:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | set -o errexit
4 | set -o pipefail
5 |
6 | # Compile for browser
7 | npm run ci-test
8 |
9 | # Compile for node
10 | npx shadow-cljs compile node-test
11 |
--------------------------------------------------------------------------------
/src/hasch/hex.cljc:
--------------------------------------------------------------------------------
1 | (ns hasch.hex
2 | (:require [hasch.platform :refer [byte->hex]]))
3 |
4 | (defn encode [raw]
5 | (apply str (map byte->hex raw)))
6 |
7 | (comment
8 | (encode (byte-array (range 100))))
9 |
10 |
11 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | /classes
3 | /checkouts
4 | /resources/public/js
5 | pom.xml.asc
6 | *.jar
7 | *.class
8 | /.lein-*
9 | /.nrepl-port
10 | /.repl*
11 | /out*
12 | /.cljsbuild*
13 | /.shadow-cljs
14 | /.calva
15 | /.cpcache
16 | node_modules
17 | /public
18 | .cljs_node_repl/
19 |
--------------------------------------------------------------------------------
/resources/test/unit-test.html:
--------------------------------------------------------------------------------
1 |
2 |
3 | This is just a dummy HTML file with which to load the unit tests.
4 | This file could be changed to include HTML for the tests to use
5 | during their operation.
6 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/resources/test/test.js:
--------------------------------------------------------------------------------
1 | var page = require('webpage').create();
2 | var system = require("system");
3 | var url = system.args[1];
4 |
5 |
6 | page.onConsoleMessage = function (message) {
7 | console.log(message);
8 | };
9 |
10 | page.open(url, function (status) {
11 | page.evaluate(function(){
12 | hasch.test.run();
13 | });
14 | phantom.exit(0);
15 | });
16 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "CITests",
3 | "description": "Testing",
4 | "scripts": {
5 | "ci-test": "npx shadow-cljs compile ci && npx karma start --single-run"
6 | },
7 | "devDependencies": {
8 | "karma": "^6.4.1",
9 | "karma-chrome-launcher": "^3.1.1",
10 | "karma-cljs-test": "^0.1.0"
11 | },
12 | "dependencies": {
13 | "shadow-cljs": "^2.20.20"
14 | }
15 | }
16 |
--------------------------------------------------------------------------------
/test/hasch/datahike_test.clj:
--------------------------------------------------------------------------------
1 | (ns hasch.datahike-test
2 | (:require [clojure.test :refer :all]
3 | [datahike.integration-test :as dt]))
4 |
5 | (def config {:store {:backend :mem
6 | :id "hasch-datahike-test-db"}
7 | :keep-history? true
8 | :schema-flexibility :read})
9 |
10 | (defn test-fixture [f]
11 | (dt/integration-test-fixture config)
12 | (f))
13 |
14 | (use-fixtures :once test-fixture)
15 |
16 | (deftest ^:integration datahike-integration-test
17 | (dt/integration-test config))
18 |
--------------------------------------------------------------------------------
/karma.conf.js:
--------------------------------------------------------------------------------
1 | module.exports = function (config) {
2 | config.set({
3 | browsers: ['ChromeHeadless'],
4 | // The directory where the output file lives
5 | basePath: 'target',
6 | // The file itself
7 | files: ['ci.js'],
8 | frameworks: ['cljs-test'],
9 | plugins: ['karma-cljs-test', 'karma-chrome-launcher'],
10 | colors: true,
11 | logLevel: config.LOG_INFO,
12 | client: {
13 | args: ["shadow.test.karma.init"],
14 | singleRun: true
15 | }
16 | })
17 | }
18 |
19 |
--------------------------------------------------------------------------------
/shadow-cljs.edn:
--------------------------------------------------------------------------------
1 | {:deps {:aliases [:cljs]}
2 |
3 | :builds
4 | {:app
5 | {:target :browser
6 | :output-dir "public/js"
7 | :asset-path "/js"
8 | :modules {:main {:entries [hasch.core]}}}
9 |
10 | :browser-test
11 | {:target :browser-test
12 | :test-dir "resources/public/js/test"
13 | :devtools {:http-port 8021
14 | :http-root "resources/public/js/test"}}
15 |
16 | :node-test
17 | {:target :node-test
18 | :output-to "out/node-tests.js"
19 | :autorun true}
20 |
21 | :ci
22 | {:target :karma
23 | :output-to "target/ci.js"}}}
24 |
--------------------------------------------------------------------------------
/src/hasch/base64.cljc:
--------------------------------------------------------------------------------
1 | (ns hasch.base64
2 | #?(:cljs (:require [goog.crypt.base64]
3 | [cljs.reader :as r]))
4 | #?(:clj (:import (java.util Base64))))
5 |
6 | (defn encode
7 | "Returns a base64 encoded String."
8 | [byte-arr]
9 | #?(:clj (String. (.encode (Base64/getEncoder)
10 | ^bytes byte-arr)
11 | "UTF-8")
12 | :cljs (goog.crypt.base64.encodeByteArray byte-arr)))
13 |
14 | (defn decode
15 | "Returns a byte-array for encoded String."
16 | [^String base64]
17 | #?(:clj (.decode (Base64/getDecoder) base64)
18 | :cljs (goog.crypt.base64.decodeStringToByteArray base64)))
19 |
--------------------------------------------------------------------------------
/src/hasch/md5.cljc:
--------------------------------------------------------------------------------
1 | (ns hasch.md5
2 | #?(:cljs (:require [goog.crypt.Md5]
3 | [goog.crypt.Hash]
4 | [goog.crypt]))
5 | #?(:clj (:import [java.security MessageDigest]
6 | [java.math BigInteger])))
7 |
8 | (defn str->md5 [^String s]
9 | #?(:clj
10 | (let [algorithm (MessageDigest/getInstance "MD5")
11 | raw (.digest algorithm (.getBytes s))]
12 | raw)
13 | :cljs
14 | (let [bytes (goog.crypt/stringToUtf8ByteArray s)
15 | md5-digester (goog.crypt.Md5.)
16 | hashed (do
17 | (.update md5-digester bytes)
18 | (.digest md5-digester))]
19 | hashed)))
20 |
--------------------------------------------------------------------------------
/template/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | 4.0.0
4 | jar
5 | io.replikativ
6 | hasch
7 | hasch
8 | A library to consistently crypto-hash edn data structures on Clojure and ClojureScript with SHA-512
9 |
10 |
11 | Eclipse
12 | http://www.eclipse.org/legal/epl-v10.html
13 |
14 |
15 |
16 | scm:git:git@github.com:replikativ/hasch.git
17 | scm:git:git@github.com/replikativ/hasch.git
18 | https://github.com/replikativ/hasch
19 |
20 |
21 |
--------------------------------------------------------------------------------
/.circleci/config.yml:
--------------------------------------------------------------------------------
1 | version: 2.1
2 |
3 | orbs:
4 | tools: replikativ/clj-tools@0
5 |
6 | workflows:
7 | build-test-and-deploy:
8 | jobs:
9 | - tools/setup:
10 | context: dockerhub-deploy
11 | setup_cljs: true
12 | - tools/build:
13 | context: dockerhub-deploy
14 | build_cljs: true
15 | requires:
16 | - tools/setup
17 | - tools/format:
18 | context: dockerhub-deploy
19 | requires:
20 | - tools/setup
21 | - tools/unittest:
22 | context: dockerhub-deploy
23 | requires:
24 | - tools/build
25 | - tools/cljstest:
26 | context: dockerhub-deploy
27 | requires:
28 | - tools/build
29 | - tools/integrationtest:
30 | context: dockerhub-deploy
31 | requires:
32 | - tools/build
33 | - tools/deploy:
34 | context:
35 | - clojars-deploy
36 | - dockerhub-deploy
37 | filters:
38 | branches:
39 | only: main
40 | requires:
41 | - tools/format
42 | - tools/unittest
43 | - tools/cljstest
44 | - tools/release:
45 | context:
46 | - github-token
47 | - dockerhub-deploy
48 | filters:
49 | branches:
50 | only: main
51 | requires:
52 | - tools/deploy
53 |
--------------------------------------------------------------------------------
/src/hasch/core.cljc:
--------------------------------------------------------------------------------
1 | (ns hasch.core
2 | "Hashing functions for EDN."
3 | #?(:cljs (:refer-clojure :exclude [uuid]))
4 | (:require [hasch.benc :refer [PHashCoercion -coerce digest]]
5 | [hasch.base64 :as b64]
6 | [hasch.platform :as platform]))
7 |
8 | (def uuid4 platform/uuid4)
9 | (def uuid5 platform/uuid5)
10 | (def hash->str platform/hash->str)
11 |
12 | (defn edn-hash
13 | "Hash an edn value with SHA-512 by default or a compatible hash function of choice.
14 |
15 | Please use the write-handlers only in legacy cases and rather extend the PHashCoercion
16 | protocol to your own types."
17 | ([val] (edn-hash val {}))
18 | ([val write-handlers] (edn-hash val hasch.platform/sha512-message-digest write-handlers))
19 | ([val md-create-fn write-handlers]
20 | (map #(if (neg? %) (+ % 256) %) ;; make unsigned
21 | (digest (-coerce val md-create-fn (or write-handlers {})) md-create-fn))))
22 |
23 | (defn uuid
24 | "Creates random UUID-4 without argument or UUID-5 for the argument value.
25 |
26 | Optionally an incognito-style write-handlers map can be supplied,
27 | which describes record serialization in terms of Clojure data
28 | structures."
29 | ([] (uuid4))
30 | ([val & {:keys [write-handlers]}] (-> val (edn-hash write-handlers) uuid5)))
31 |
32 | (defn squuid
33 | "Calculates a sequential UUID as described in
34 | https://github.com/clojure-cookbook/clojure-cookbook/blob/master/01_primitive-data/1-24_uuids.asciidoc"
35 | ([] (squuid (uuid4)))
36 | ([uuid]
37 | #?(:clj
38 | (let [time (System/currentTimeMillis)
39 | secs (quot time 1000)
40 | lsb (.getLeastSignificantBits ^java.util.UUID uuid)
41 | msb (.getMostSignificantBits ^java.util.UUID uuid)
42 | timed-msb (bit-or (bit-shift-left secs 32)
43 | (bit-and 0x00000000ffffffff msb))]
44 | (java.util.UUID. timed-msb lsb))
45 | :cljs
46 | (let [time (.getTime (js/Date.))
47 | secs (quot time 1000)
48 | prefix (.toString secs 16)]
49 | (cljs.core/uuid (str prefix (subs (str uuid) 8)))))))
50 |
51 | (defn b64-hash
52 | "Provides a base64 encoded string of the edn-hash of a value val. This contains
53 | all bits of the hash compared to 128 bits for the UUID-5. Both should be safe,
54 | but b64-hash is safer towards collisions."
55 | [val]
56 | (b64/encode (#?(:clj byte-array :cljs clj->js) (edn-hash val))))
57 |
--------------------------------------------------------------------------------
/deps.edn:
--------------------------------------------------------------------------------
1 | {:paths ["src"]
2 | :deps {org.clojure/clojure {:mvn/version "1.11.1"}
3 | org.clojure/clojurescript {:mvn/version "1.11.60"}
4 | io.replikativ/incognito {:mvn/version "0.3.66"}}
5 | :aliases {:cljs {:extra-deps {thheller/shadow-cljs {:mvn/version "2.20.20"}
6 | binaryage/devtools {:mvn/version "1.0.6"}}
7 | :extra-paths ["test"]}
8 | :dev {:extra-deps {criterium/criterium {:mvn/version "0.4.6"}}}
9 | :test {:extra-deps {lambdaisland/kaocha {:mvn/version "1.77.1236"}
10 | io.github.cognitect-labs/test-runner {:git/tag "v0.5.1"
11 | :git/sha "dfb30dd"}
12 | io.replikativ/datahike {:mvn/version "0.6.1531"}
13 | io.replikativ/hasch {:local/root "."}}
14 | :main-opts ["-m" "kaocha.runner"]
15 | :extra-paths ["test"]}
16 | ;; pull in specific versions of clojure and clojurescript
17 | :1.7 {:override-deps {org.clojure/clojure {:mvn/version "1.7.0"}
18 | org.clojure/clojurescript {:mvn/version "1.7.228"}}}
19 | :1.8 {:override-deps {org.clojure/clojure {:mvn/version "1.8.0"}
20 | org.clojure/clojurescript {:mvn/version "1.8.51"}}}
21 | :build {:deps {io.github.clojure/tools.build {:mvn/version "0.9.3"}
22 | slipset/deps-deploy {:mvn/version "0.2.0"}
23 | borkdude/gh-release-artifact {:git/url "https://github.com/borkdude/gh-release-artifact"
24 | :git/sha "a83ee8da47d56a80b6380cbb6b4b9274048067bd"}
25 | babashka/babashka.curl {:mvn/version "0.1.1"}
26 | babashka/fs {:mvn/version "0.1.2"}
27 | cheshire/cheshire {:mvn/version "5.10.2"}}
28 | :ns-default build}
29 | :ffix {:extra-deps {cljfmt/cljfmt {:mvn/version "0.8.0"}}
30 | :main-opts ["-m" "cljfmt.main" "fix"]}
31 | :format {:extra-deps {cljfmt/cljfmt {:mvn/version "0.8.0"}}
32 | :main-opts ["-m" "cljfmt.main" "check"]}}}
33 |
--------------------------------------------------------------------------------
/src/hasch/benc.cljc:
--------------------------------------------------------------------------------
1 | (ns hasch.benc
2 | "Binary encoding of EDN values."
3 | #?@(:clj [(:import java.security.MessageDigest
4 | java.io.ByteArrayOutputStream)]))
5 |
6 | #?(:clj (set! *warn-on-reflection* true))
7 |
8 | (defprotocol PHashCoercion
9 | (-coerce [this md-create-fn write-handlers]))
10 |
11 | ;; changes break hashes!
12 | (def magics {:nil (byte 0)
13 | :boolean (byte 1)
14 | :number (byte 2)
15 | :string (byte 3)
16 | :symbol (byte 4)
17 | :keyword (byte 5)
18 | :inst (byte 6)
19 | :uuid (byte 7)
20 | :seq (byte 8)
21 | :vector (byte 9)
22 | :map (byte 10)
23 | :set (byte 11)
24 | :literal (byte 12)
25 | :binary (byte 13)})
26 |
27 | (def split-size 1024)
28 |
29 | (def max-entropy-byte-count 32)
30 |
31 | #?(:cljs (defn- byte-array [len] (into-array (repeat len 0))))
32 |
33 | (defn ^bytes digest
34 | [bytes-or-seq-of-bytes md-create-fn]
35 | (let [^MessageDigest md (md-create-fn)]
36 | (if (seq? bytes-or-seq-of-bytes)
37 | (doseq [^bytes bs bytes-or-seq-of-bytes]
38 | (.update md bs))
39 | (.update md ^bytes bytes-or-seq-of-bytes))
40 | (.digest md)))
41 |
42 | (defn ^bytes coerce-seq [seq md-create-fn write-handlers]
43 | (let [^MessageDigest seq-md (md-create-fn)]
44 | (loop [s seq]
45 | (let [[f & r] s]
46 | (.update seq-md ^bytes (-coerce f md-create-fn write-handlers))
47 | (when-not (empty? r)
48 | (recur (rest s)))))
49 | (.digest seq-md)))
50 |
51 | (defn ^bytes xor-hashes
52 | "Commutatively coerces elements of collection, seq entries must already be crypto hashes
53 | to avoid collisions in XOR. Takes at maximum 32 bytes into account."
54 | [seq]
55 | (let [len (min (count ^bytes (first seq)) max-entropy-byte-count)]
56 | (reduce (fn [^bytes acc ^bytes elem]
57 | (loop [i 0]
58 | (when (< i len)
59 | (aset acc i (byte (bit-xor (aget acc i) (aget elem i))))
60 | (recur (inc i))))
61 | acc)
62 | (byte-array len)
63 | seq)))
64 |
65 | (defn ^bytes encode-safe [^bytes a md-create-fn]
66 | (if (< (count a) split-size)
67 | (let [len (long (alength a))
68 | ea (byte-array len)]
69 | (loop [i 0]
70 | (when-not (= i len)
71 | (let [e (aget a i)]
72 | (when (and (> e (byte 0))
73 | (< e (byte 30)))
74 | (aset ea i (byte 1))))
75 | (recur (inc i))))
76 | #?(:clj (let [out (ByteArrayOutputStream.)]
77 | (.write out a)
78 | (.write out ea)
79 | (.toByteArray out))
80 | :cljs (.concat a ea)))
81 | (digest a md-create-fn)))
82 |
--------------------------------------------------------------------------------
/src/hasch/platform.cljs:
--------------------------------------------------------------------------------
1 | (ns hasch.platform
2 | (:require [goog.crypt]
3 | [goog.crypt.Sha512]
4 | [cljs.reader :as reader]
5 | [clojure.string]
6 | [incognito.base :as ib]
7 | [hasch.benc :refer [magics PHashCoercion -coerce
8 | digest coerce-seq xor-hashes encode-safe]]))
9 |
10 | #_(do
11 | (ns dev)
12 | (def repl-env (reset! cemerick.austin.repls/browser-repl-env
13 | (cemerick.austin/repl-env)))
14 | (cemerick.austin.repls/cljs-repl repl-env))
15 |
16 | (def uuid4 random-uuid)
17 |
18 | (defn byte->hex [b]
19 | (-> b
20 | (bit-and 0xff)
21 | (+ 0x100)
22 | (.toString 16)
23 | (.substring 1)))
24 |
25 | (defn hash->str [bytes]
26 | (apply str (map byte->hex bytes)))
27 |
28 | (def ^:dynamic *use-legacy-utf8-conversion* false)
29 |
30 | ;; taken from http://jsperf.com/uint8array-vs-array-encode-to-utf8/2
31 | ;; which is taken from //http://user1.matsumoto.ne.jp/~goma/js/utf.js
32 | ;; verified against: "小鳩ちゃんかわいいなぁ"
33 | ;; Note that this variant is broken for higher planes of unicode. For
34 | ;; backwards compatibility, you can enable `*use-legacy-utf8-conversion*`
35 | ;; to keep using the old improper conversion method.
36 | (defn- legacy-utf8
37 | "Encodes a string as UTF-8 in an unsigned js array."
38 | [s]
39 | (into-array
40 | (mapcat
41 | (fn [pos]
42 | (let [c (.charCodeAt s pos)]
43 | (cond (<= c 0x7F) [(bit-and c 0xFF)]
44 | (<= c 0x7FF) [(bit-or 0xC0 (bit-shift-right c 6))
45 | (bit-or 0x80 (bit-and c 0x3F))]
46 | (<= c 0xFFFF) [(bit-or 0xE0 (bit-shift-right c 12))
47 | (bit-or 0x80 (bit-and (bit-shift-right c 6) 0x3F))
48 | (bit-or 0x80 (bit-and c 0x3F))]
49 | :default (let [j (loop [j 4]
50 | (if (pos? (bit-shift-right c (* j 6)))
51 | (recur (inc j))
52 | j))
53 | init (bit-or (bit-and (bit-shift-right 0xFF00 j) 0xFF)
54 | (bit-shift-right c (* 6 (dec j))))]
55 | (conj (->> (range (dec j))
56 | reverse
57 | (map #(bit-or 0x80
58 | (bit-and (bit-shift-right c (* 6 %))
59 | 0x3F))))
60 | init)))))
61 | (range (.-length s)))))
62 |
63 | #_(utf8 "小鳩ちゃんかわいいなぁ")
64 |
65 | (defn utf8
66 | [s]
67 | (if *use-legacy-utf8-conversion*
68 | (legacy-utf8 s)
69 | (goog.crypt/stringToUtf8ByteArray s)))
70 |
71 | (defn uuid5
72 | "Generates a uuid5 from a sha-1 hash byte sequence.
73 | Our hash version is coded in first 2 bits."
74 | [sha-hash]
75 | (let [[hb1 hb2 hb3 hb4 hb5 hb6 hb7 hb8
76 | lb1 lb2 lb3 lb4 lb5 lb6 lb7 lb8] sha-hash]
77 | (-> [(bit-clear (bit-clear hb1 7) 6) hb2 hb3 hb4 hb5 hb6 (bit-or 0x50 (bit-and 0x5f hb7)) hb8
78 | (bit-clear (bit-set lb1 7) 6) lb2 lb3 lb4 lb5 lb6 lb7 lb8]
79 | hash->str
80 | ((fn [s] (str (apply str (take 8 s))
81 | "-" (apply str (take 4 (drop 8 s)))
82 | "-" (apply str (take 4 (drop 12 s)))
83 | "-" (apply str (take 4 (drop 16 s)))
84 | "-" (apply str (drop 20 s)))))
85 | uuid)))
86 |
87 | (defn sha512-message-digest []
88 | (goog.crypt.Sha512.))
89 |
90 | (defn md5-message-digest []
91 | (goog.crypt.Md5.))
92 |
93 | (defn encode [magic a]
94 | (.concat #js [magic] a))
95 |
96 | (defn- str->utf8 [x]
97 | (-> x str utf8))
98 |
99 | (extend-protocol PHashCoercion
100 | nil
101 | (-coerce [this md-create-fn write-handlers]
102 | (encode (:nil magics) #js[]))
103 |
104 | boolean
105 | (-coerce [this md-create-fn write-handlers]
106 | (encode (:boolean magics) #js [(if this 41 40)]))
107 |
108 | string
109 | (-coerce [this md-create-fn write-handlers]
110 | (encode (:string magics) (encode-safe (str->utf8 this) md-create-fn)))
111 |
112 | number
113 | (-coerce [this md-create-fn write-handlers]
114 | ;; utf8 is not needed, can be optimized
115 | (encode (:number magics) (str->utf8 this)))
116 |
117 | js/Date
118 | (-coerce [this md-create-fn write-handlers]
119 | ;; utf8 is not needed, can be optimized
120 | (encode (:inst magics) (str->utf8 (.getTime this))))
121 |
122 | cljs.core/UUID
123 | (-coerce [this md-create-fn write-handlers]
124 | (encode (:uuid magics) (str->utf8 (.-uuid this))))
125 |
126 | cljs.core/Symbol
127 | (-coerce [this md-create-fn write-handlers]
128 | (encode (:symbol magics) (encode-safe (str->utf8 this) md-create-fn)))
129 |
130 | cljs.core/Keyword
131 | (-coerce [this md-create-fn write-handlers]
132 | (encode (:keyword magics) (encode-safe (str->utf8 this) md-create-fn)))
133 |
134 | default
135 | (-coerce [this md-create-fn write-handlers]
136 | (cond (instance? ib/IncognitoTaggedLiteral this)
137 | (let [{:keys [tag value]} this]
138 | (encode (:literal magics) (coerce-seq [tag value] md-create-fn write-handlers)))
139 |
140 | (satisfies? IRecord this)
141 | (let [{:keys [tag value]} (ib/incognito-writer write-handlers this)]
142 | (encode (:literal magics) (coerce-seq [tag value] md-create-fn write-handlers)))
143 |
144 | (satisfies? ISeq this)
145 | (encode (:seq magics) (coerce-seq this md-create-fn write-handlers))
146 |
147 | (satisfies? IVector this)
148 | (encode (:vector magics) (coerce-seq this md-create-fn write-handlers))
149 |
150 | (satisfies? IMap this)
151 | (encode (:map magics) (xor-hashes (map #(-coerce % md-create-fn write-handlers)
152 | (seq this))))
153 |
154 | (satisfies? ISet this)
155 | (encode (:set magics) (xor-hashes (map #(digest (-coerce % md-create-fn write-handlers)
156 | md-create-fn)
157 | (seq this))))
158 |
159 | (instance? js/Uint8Array this)
160 | (encode (:binary magics) (encode-safe (js/Array.prototype.slice.call this) md-create-fn))
161 |
162 | :else
163 | (throw (ex-info "Cannot hash unknown type, you can extend PHashCoercion protocol for:"
164 | {:type (type this)
165 | :value this})))))
166 |
167 | (comment
168 | (js/Array.prototype.slice.call (js/Uint8Array. #js [1 2 3]))
169 | (.log js/console (-coerce (js/Uint8Array. #js [1 2 3]) (sha512-message-digest) sha512-message-digest))
170 |
171 | (do
172 | (def datom-vector (doall (vec (repeat 10000 {:db/id 18239
173 | :person/name "Frederic"
174 | :person/familyname "Johanson"
175 | :person/street "Fifty-First Street 53"
176 | :person/postal 38237
177 | :person/telefon "02343248474"
178 | :person/weeight 0.3823}))))
179 | nil)
180 |
181 | (time (-coerce datom-vector sha512-message-digest))
182 |
183 | (coerce-seq (sha512-message-digest) sha512-message-digest [:foo {:a "b"}])
184 |
185 | ;; quick & dirty js advanced compilation benchmark
186 | (enable-console-print!)
187 |
188 | (def datom-vector (doall (vec (repeat 10000 {:db/id 18239
189 | :person/name "Frederic"
190 | :person/familyname "Johanson"
191 | :person/street "Fifty-First Street 53"
192 | :person/postal 38237
193 | :person/telefon "02343248474"
194 | :person/weeight 0.3823}))))
195 |
196 | (.log js/console "benchmarking: " (time (-coerce datom-vector sha512-message-digest))))
197 |
--------------------------------------------------------------------------------
/test/hasch/api_test.cljc:
--------------------------------------------------------------------------------
1 | (ns hasch.api-test
2 | (:require [hasch.core :refer [edn-hash uuid squuid b64-hash]]
3 | [hasch.benc :refer [xor-hashes]]
4 | [hasch.md5 :as md5]
5 | [hasch.hex :as hex]
6 | [hasch.platform :refer [uuid5 hash->str #?(:cljs utf8)]]
7 | [incognito.base :as ic]
8 | [clojure.test :as t :refer (is deftest testing)]))
9 |
10 | #?(:cljs (def byte-array into-array))
11 |
12 | (defrecord Bar [name])
13 |
14 | (deftest hash-test
15 | (testing "Basic hash coercions of EDN primitives."
16 | (is (= (edn-hash nil)
17 | '(184 36 77 2 137 129 214 147 175 123 69 106 248 239 164 202 214 61 40 46 25 255 20 148 44 36 110 80 217 53 29 34 112 74 128 42 113 195 88 11 99 112 222 76 235 41 60 50 74 132 35 52 37 87 212 229 195 132 56 240 227 105 16 238)))
18 |
19 | (is (= (edn-hash true)
20 | '(221 223 252 44 103 48 51 199 71 184 156 187 201 140 35 99 235 153 185 70 157 229 122 4 111 90 12 150 43 67 185 166 210 79 54 62 117 173 76 252 187 67 163 85 202 124 63 252 109 44 47 70 74 129 52 241 35 15 116 253 241 141 50 131)))
21 |
22 | (is (= (edn-hash false)
23 | '(54 0 110 63 158 137 176 89 220 235 107 213 84 159 27 25 148 206 193 96 192 73 41 255 220 181 215 106 208 220 173 69 213 190 181 70 141 193 1 225 188 142 127 176 102 61 13 54 151 161 195 158 152 190 212 168 91 43 153 108 122 123 90 32)))
24 |
25 | (is (= (edn-hash \f)
26 | '(211 133 203 224 194 174 136 44 216 77 98 85 54 188 116 101 139 174 40 108 48 180 235 231 214 189 34 246 32 30 56 45 179 218 36 206 61 191 79 160 212 162 212 226 235 17 27 228 218 74 17 229 9 147 187 232 35 244 179 233 66 165 152 253)))
27 |
28 | (is (= (edn-hash \ä)
29 | '(51 232 113 238 243 104 216 10 143 88 143 111 122 220 35 138 251 22 8 130 238 73 253 62 143 207 208 45 116 21 120 18 253 34 160 30 144 46 182 7 160 254 197 120 199 220 140 209 3 66 25 214 131 145 17 222 28 157 22 103 226 254 178 186)))
30 |
31 | (is (= (edn-hash "hello")
32 | '(178 114 9 243 3 150 0 132 236 216 60 87 108 34 2 35 85 37 203 202 97 176 9 55 25 191 143 251 251 47 49 139 99 191 77 63 167 158 61 183 233 59 43 57 16 252 121 198 65 201 112 167 96 61 134 122 177 149 45 87 233 23 173 192)))
33 |
34 | (is (= (edn-hash "小鳩ちゃんかわいいなぁ")
35 | '(2 191 84 39 34 44 227 102 135 109 17 136 159 80 253 7 40 0 170 134 198 204 137 10 194 21 113 203 2 87 125 80 172 165 111 110 222 7 123 138 148 124 207 180 240 207 91 6 248 28 53 168 143 30 106 103 101 82 133 215 69 35 93 47)))
36 |
37 | (is (= (edn-hash "😡😡😡")
38 | '(18 17 129 25 13 183 170 164 178 18 97 0 123 151 164 145 95 197 214 178 107 96 255 105 255 104 69 21 205 160 13 222 9 55 63 37 174 33 35 86 204 73 17 110 82 107 151 64 63 79 191 246 177 76 71 24 107 44 43 156 178 169 195 214)))
39 |
40 | (is (= (edn-hash (int 1234567890))
41 | (edn-hash (long 1234567890))
42 | #?(:clj (edn-hash (biginteger "1234567890")))
43 | #?(:clj (edn-hash (bigint "1234567890")))
44 | '(65 199 158 164 193 95 213 144 233 29 41 86 123 106 110 215 117 225 149 249 204 124 220 217 226 120 131 178 61 133 39 228 182 233 235 249 10 249 141 122 101 25 46 134 18 222 175 224 134 61 167 114 15 109 2 146 38 65 1 55 128 137 144 55)))
45 |
46 | (is (= (edn-hash (double 123.1))
47 | (edn-hash (float 123.1))
48 | '(155 181 33 252 126 113 188 20 210 155 50 24 125 212 205 160 135 108 90 43 154 65 61 229 226 83 11 110 64 61 124 45 43 186 152 127 64 171 171 154 28 149 180 136 229 69 195 145 126 99 56 14 48 194 180 126 212 83 123 206 36 189 189 167)
49 | #?(:clj (edn-hash (BigDecimal. "123.1")))))
50 |
51 | (is (= (edn-hash :core/test)
52 | '(62 51 214 78 41 84 37 205 69 197 105 26 235 55 30 87 46 117 187 194 101 184 139 244 111 232 98 175 16 174 182 211 11 171 154 64 90 18 229 93 188 246 33 234 102 145 68 30 92 0 81 208 210 10 124 137 203 18 249 138 226 253 60 62)))
53 |
54 | (is (= (edn-hash #uuid "242525f1-8ed7-5979-9232-6992dd1e11e4")
55 | '(42 243 183 237 233 94 246 1 110 56 231 49 64 217 181 17 108 11 120 199 223 53 149 47 49 8 109 94 127 93 250 51 167 211 25 31 3 171 149 67 23 245 38 248 40 31 199 211 162 242 120 99 187 6 29 237 53 174 22 192 27 159 227 164)))
56 |
57 | (is (= (edn-hash (#?(:clj java.util.Date. :cljs js/Date.) 1000000000000))
58 | '(177 226 212 235 221 67 176 34 184 69 101 45 117 193 95 187 54 50 210 149 10 193 10 67 220 174 25 99 176 115 250 216 29 49 148 167 52 86 203 90 30 170 62 149 115 102 109 120 128 62 2 213 188 41 203 91 202 106 142 100 119 160 26 3)))
59 |
60 | (is (= (edn-hash 'core/+)
61 | '(164 63 64 77 190 144 72 80 34 36 254 237 101 99 57 114 54 44 195 22 255 11 242 114 99 87 99 135 103 73 164 183 20 192 184 54 183 244 192 151 88 96 55 204 73 156 73 92 154 8 248 205 119 157 34 112 202 51 52 169 162 61 91 235)))
62 |
63 | (is (= (edn-hash '(1 2 3))
64 | '(244 105 186 110 183 117 195 78 70 57 251 132 133 114 134 175 228 94 242 41 194 191 186 237 163 178 255 193 141 120 5 137 223 130 170 47 231 133 78 131 128 194 115 140 186 169 124 71 205 210 228 236 82 97 166 158 190 98 106 80 237 149 96 102)))
65 |
66 | (is (= (edn-hash [1 2 3 4])
67 | '(172 52 37 123 179 106 243 207 88 177 218 22 170 25 13 155 205 89 156 251 253 50 3 3 191 74 229 97 252 37 162 240 197 252 240 199 177 8 96 227 121 100 106 132 68 227 175 189 247 184 108 25 117 154 186 63 108 4 210 20 75 25 239 199)))
68 |
69 | (is (= (edn-hash {:a "hello"
70 | :balloon "world"})
71 | '(135 204 255 206 109 55 248 198 218 226 173 91 27 244 68 34 108 207 62 12 114 49 69 90 22 44 155 178 212 188 139 50 217 200 63 207 14 112 179 94 202 96 196 139 202 154 214 211 182 97 31 139 49 153 203 233 240 223 154 161 78 131 159 102)))
72 |
73 | (is (= (edn-hash #{1 2 3 4})
74 | '(42 216 217 238 97 125 210 112 2 83 128 62 82 47 119 14 59 95 246 107 191 138 251 102 201 52 9 132 96 243 199 223 218 81 88 130 165 214 125 48 222 30 64 233 101 122 196 84 11 93 186 26 92 225 203 161 196 98 186 138 174 118 244 248)))
75 |
76 | (is (= (edn-hash (Bar. "hello"))
77 | (edn-hash (ic/incognito-reader {'hasch.api-test.Bar map->Bar}
78 | (ic/incognito-writer {} (Bar. "hello"))))
79 | (edn-hash (ic/map->IncognitoTaggedLiteral (ic/incognito-writer {} (Bar. "hello"))))
80 | (edn-hash (ic/map->IncognitoTaggedLiteral {:tag 'hasch.api_test.Bar
81 | :value {:name "hello"}}))
82 | '(236 35 140 74 245 164 93 1 239 144 253 91 193 51 241 129 149 210 99 169 16 130 21 235 236 166 36 205 80 10 215 106 173 39 96 197 241 49 64 219 252 119 65 15 87 24 2 253 0 143 61 187 88 216 238 226 146 40 197 51 82 208 246 127)))
83 |
84 | (is (= (edn-hash #?(:cljs (js/Uint8Array. #js [1 2 3 42 149])
85 | :clj (byte-array [1 2 3 42 149])))
86 | '(135 209 248 171 162 90 41 221 173 216 64 218 222 93 242 60 243 5 190 153 101 194 74 130 55 184 84 148 167 94 210 250 140 211 6 234 221 25 113 83 153 75 180 4 194 163 178 197 243 126 27 172 248 169 161 90 102 172 160 98 249 32 42 157)))))
87 |
88 | (deftest padded-coercion
89 | (testing "Padded xor coercion for commutative collections."
90 | (is (= (map byte
91 | (xor-hashes (map byte-array
92 | [[0xa0 0x01 0xf3] [0x0c 0xf0 0x5f] [0x0a 0x30 0x07]])))
93 | (map byte (xor-hashes (map byte-array
94 | [[0x0a 0x30 0x07] [0x0c 0xf0 0x5f] [0xa0 0x01 0xf3]])))))))
95 |
96 | (deftest code-hashing
97 | (testing "Code hashing."
98 | (is (= (-> '(fn fib [n]
99 | (if (or (= n 0) (= n 1)) 1
100 | (+ (fib (- n 1)) (fib (- n 2)))))
101 | edn-hash
102 | uuid5)
103 | #uuid "386eabb0-8adc-52a2-a715-5a74c9197646"))))
104 |
105 | (deftest hash-stringification
106 | (testing "Stringification."
107 | (is (= (hash->str (range 256))
108 | "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff"))))
109 |
110 | (deftest squuid-test
111 | (testing "Sequential UUID functionality."
112 | (is (= (subs (str (squuid (uuid [1 2 3]))) 8)
113 | "-5c15-555e-a1c8-6166a78fc808"))))
114 |
115 | (deftest b64-hash-test
116 | (testing "Testing the base64 encoding of a hash."
117 | (is (= (b64-hash [1 2 3 {:key 5 :value 10}])
118 | "TREJlRrK211AASiqQMFG9RLFW0CPC/arrCxeaUj27Qho2USJU40T01uCdjUg/OMiPGttyL1ELPCrVXXhMIroRQ=="))))
119 |
120 | (deftest test-md5
121 | (is (= (hex/encode (md5/str->md5 "geheimnis"))
122 | "525e92c6aa11544a2ab794f8921ecb0f")))
123 |
124 | #?(:cljs
125 | (deftest utf8-test
126 | (is (= (js->clj (utf8 "小鳩ちゃんかわいいなぁ"))
127 | [229 176 143 233 179 169 227 129 161 227 130 131 227 130 147 227
128 | 129 139 227 130 143 227 129 132 227 129 132 227 129 170 227 129 129]))))
129 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC
2 | LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM
3 | CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT.
4 |
5 | 1. DEFINITIONS
6 |
7 | "Contribution" means:
8 |
9 | a) in the case of the initial Contributor, the initial code and
10 | documentation distributed under this Agreement, and
11 |
12 | b) in the case of each subsequent Contributor:
13 |
14 | i) changes to the Program, and
15 |
16 | ii) additions to the Program;
17 |
18 | where such changes and/or additions to the Program originate from and are
19 | distributed by that particular Contributor. A Contribution 'originates' from
20 | a Contributor if it was added to the Program by such Contributor itself or
21 | anyone acting on such Contributor's behalf. Contributions do not include
22 | additions to the Program which: (i) are separate modules of software
23 | distributed in conjunction with the Program under their own license
24 | agreement, and (ii) are not derivative works of the Program.
25 |
26 | "Contributor" means any person or entity that distributes the Program.
27 |
28 | "Licensed Patents" mean patent claims licensable by a Contributor which are
29 | necessarily infringed by the use or sale of its Contribution alone or when
30 | combined with the Program.
31 |
32 | "Program" means the Contributions distributed in accordance with this
33 | Agreement.
34 |
35 | "Recipient" means anyone who receives the Program under this Agreement,
36 | including all Contributors.
37 |
38 | 2. GRANT OF RIGHTS
39 |
40 | a) Subject to the terms of this Agreement, each Contributor hereby grants
41 | Recipient a non-exclusive, worldwide, royalty-free copyright license to
42 | reproduce, prepare derivative works of, publicly display, publicly perform,
43 | distribute and sublicense the Contribution of such Contributor, if any, and
44 | such derivative works, in source code and object code form.
45 |
46 | b) Subject to the terms of this Agreement, each Contributor hereby grants
47 | Recipient a non-exclusive, worldwide, royalty-free patent license under
48 | Licensed Patents to make, use, sell, offer to sell, import and otherwise
49 | transfer the Contribution of such Contributor, if any, in source code and
50 | object code form. This patent license shall apply to the combination of the
51 | Contribution and the Program if, at the time the Contribution is added by the
52 | Contributor, such addition of the Contribution causes such combination to be
53 | covered by the Licensed Patents. The patent license shall not apply to any
54 | other combinations which include the Contribution. No hardware per se is
55 | licensed hereunder.
56 |
57 | c) Recipient understands that although each Contributor grants the licenses
58 | to its Contributions set forth herein, no assurances are provided by any
59 | Contributor that the Program does not infringe the patent or other
60 | intellectual property rights of any other entity. Each Contributor disclaims
61 | any liability to Recipient for claims brought by any other entity based on
62 | infringement of intellectual property rights or otherwise. As a condition to
63 | exercising the rights and licenses granted hereunder, each Recipient hereby
64 | assumes sole responsibility to secure any other intellectual property rights
65 | needed, if any. For example, if a third party patent license is required to
66 | allow Recipient to distribute the Program, it is Recipient's responsibility
67 | to acquire that license before distributing the Program.
68 |
69 | d) Each Contributor represents that to its knowledge it has sufficient
70 | copyright rights in its Contribution, if any, to grant the copyright license
71 | set forth in this Agreement.
72 |
73 | 3. REQUIREMENTS
74 |
75 | A Contributor may choose to distribute the Program in object code form under
76 | its own license agreement, provided that:
77 |
78 | a) it complies with the terms and conditions of this Agreement; and
79 |
80 | b) its license agreement:
81 |
82 | i) effectively disclaims on behalf of all Contributors all warranties and
83 | conditions, express and implied, including warranties or conditions of title
84 | and non-infringement, and implied warranties or conditions of merchantability
85 | and fitness for a particular purpose;
86 |
87 | ii) effectively excludes on behalf of all Contributors all liability for
88 | damages, including direct, indirect, special, incidental and consequential
89 | damages, such as lost profits;
90 |
91 | iii) states that any provisions which differ from this Agreement are offered
92 | by that Contributor alone and not by any other party; and
93 |
94 | iv) states that source code for the Program is available from such
95 | Contributor, and informs licensees how to obtain it in a reasonable manner on
96 | or through a medium customarily used for software exchange.
97 |
98 | When the Program is made available in source code form:
99 |
100 | a) it must be made available under this Agreement; and
101 |
102 | b) a copy of this Agreement must be included with each copy of the Program.
103 |
104 | Contributors may not remove or alter any copyright notices contained within
105 | the Program.
106 |
107 | Each Contributor must identify itself as the originator of its Contribution,
108 | if any, in a manner that reasonably allows subsequent Recipients to identify
109 | the originator of the Contribution.
110 |
111 | 4. COMMERCIAL DISTRIBUTION
112 |
113 | Commercial distributors of software may accept certain responsibilities with
114 | respect to end users, business partners and the like. While this license is
115 | intended to facilitate the commercial use of the Program, the Contributor who
116 | includes the Program in a commercial product offering should do so in a
117 | manner which does not create potential liability for other Contributors.
118 | Therefore, if a Contributor includes the Program in a commercial product
119 | offering, such Contributor ("Commercial Contributor") hereby agrees to defend
120 | and indemnify every other Contributor ("Indemnified Contributor") against any
121 | losses, damages and costs (collectively "Losses") arising from claims,
122 | lawsuits and other legal actions brought by a third party against the
123 | Indemnified Contributor to the extent caused by the acts or omissions of such
124 | Commercial Contributor in connection with its distribution of the Program in
125 | a commercial product offering. The obligations in this section do not apply
126 | to any claims or Losses relating to any actual or alleged intellectual
127 | property infringement. In order to qualify, an Indemnified Contributor must:
128 | a) promptly notify the Commercial Contributor in writing of such claim, and
129 | b) allow the Commercial Contributor tocontrol, and cooperate with the
130 | Commercial Contributor in, the defense and any related settlement
131 | negotiations. The Indemnified Contributor may participate in any such claim
132 | at its own expense.
133 |
134 | For example, a Contributor might include the Program in a commercial product
135 | offering, Product X. That Contributor is then a Commercial Contributor. If
136 | that Commercial Contributor then makes performance claims, or offers
137 | warranties related to Product X, those performance claims and warranties are
138 | such Commercial Contributor's responsibility alone. Under this section, the
139 | Commercial Contributor would have to defend claims against the other
140 | Contributors related to those performance claims and warranties, and if a
141 | court requires any other Contributor to pay any damages as a result, the
142 | Commercial Contributor must pay those damages.
143 |
144 | 5. NO WARRANTY
145 |
146 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON
147 | AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER
148 | EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR
149 | CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A
150 | PARTICULAR PURPOSE. Each Recipient is solely responsible for determining the
151 | appropriateness of using and distributing the Program and assumes all risks
152 | associated with its exercise of rights under this Agreement , including but
153 | not limited to the risks and costs of program errors, compliance with
154 | applicable laws, damage to or loss of data, programs or equipment, and
155 | unavailability or interruption of operations.
156 |
157 | 6. DISCLAIMER OF LIABILITY
158 |
159 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY
160 | CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL,
161 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION
162 | LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
163 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
164 | ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE
165 | EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY
166 | OF SUCH DAMAGES.
167 |
168 | 7. GENERAL
169 |
170 | If any provision of this Agreement is invalid or unenforceable under
171 | applicable law, it shall not affect the validity or enforceability of the
172 | remainder of the terms of this Agreement, and without further action by the
173 | parties hereto, such provision shall be reformed to the minimum extent
174 | necessary to make such provision valid and enforceable.
175 |
176 | If Recipient institutes patent litigation against any entity (including a
177 | cross-claim or counterclaim in a lawsuit) alleging that the Program itself
178 | (excluding combinations of the Program with other software or hardware)
179 | infringes such Recipient's patent(s), then such Recipient's rights granted
180 | under Section 2(b) shall terminate as of the date such litigation is filed.
181 |
182 | All Recipient's rights under this Agreement shall terminate if it fails to
183 | comply with any of the material terms or conditions of this Agreement and
184 | does not cure such failure in a reasonable period of time after becoming
185 | aware of such noncompliance. If all Recipient's rights under this Agreement
186 | terminate, Recipient agrees to cease use and distribution of the Program as
187 | soon as reasonably practicable. However, Recipient's obligations under this
188 | Agreement and any licenses granted by Recipient relating to the Program shall
189 | continue and survive.
190 |
191 | Everyone is permitted to copy and distribute copies of this Agreement, but in
192 | order to avoid inconsistency the Agreement is copyrighted and may only be
193 | modified in the following manner. The Agreement Steward reserves the right to
194 | publish new versions (including revisions) of this Agreement from time to
195 | time. No one other than the Agreement Steward has the right to modify this
196 | Agreement. The Eclipse Foundation is the initial Agreement Steward. The
197 | Eclipse Foundation may assign the responsibility to serve as the Agreement
198 | Steward to a suitable separate entity. Each new version of the Agreement will
199 | be given a distinguishing version number. The Program (including
200 | Contributions) may always be distributed subject to the version of the
201 | Agreement under which it was received. In addition, after a new version of
202 | the Agreement is published, Contributor may elect to distribute the Program
203 | (including its Contributions) under the new version. Except as expressly
204 | stated in Sections 2(a) and 2(b) above, Recipient receives no rights or
205 | licenses to the intellectual property of any Contributor under this
206 | Agreement, whether expressly, by implication, estoppel or otherwise. All
207 | rights in the Program not expressly granted under this Agreement are
208 | reserved.
209 |
210 | This Agreement is governed by the laws of the State of Washington and the
211 | intellectual property laws of the United States of America. No party to this
212 | Agreement will bring a legal action under this Agreement more than one year
213 | after the cause of action arose. Each party waives its rights to a jury trial
214 | in any resulting litigation.
215 |
--------------------------------------------------------------------------------
/src/hasch/platform.clj:
--------------------------------------------------------------------------------
1 | (ns hasch.platform
2 | "Platform specific implementations."
3 | (:require [hasch.benc :refer [split-size encode-safe]]
4 | [clojure.edn :as edn]
5 | [clojure.java.io :as io]
6 | [incognito.base :as ib]
7 | [hasch.benc :refer [magics PHashCoercion -coerce
8 | digest coerce-seq xor-hashes encode-safe]])
9 | (:import java.io.ByteArrayOutputStream
10 | java.nio.ByteBuffer
11 | java.security.MessageDigest))
12 |
13 | (set! *warn-on-reflection* true)
14 |
15 | (defn uuid4
16 | "Generates a UUID version 4 (random)."
17 | []
18 | (java.util.UUID/randomUUID))
19 |
20 | (defn byte->hex [b]
21 | (-> b
22 | (bit-and 0xff)
23 | (+ 0x100)
24 | (Integer/toString 16)
25 | (.substring 1)))
26 |
27 | (defn hash->str [bytes]
28 | (apply str (map byte->hex bytes)))
29 |
30 | (defn ^MessageDigest sha512-message-digest []
31 | (MessageDigest/getInstance "sha-512"))
32 |
33 | (defn ^MessageDigest md5-message-digest []
34 | (MessageDigest/getInstance "md5"))
35 |
36 | (defn uuid5
37 | "Generates a UUID version 5 from a sha-1 hash byte sequence.
38 | Our hash version is coded in first 2 bits."
39 | [sha-hash]
40 | (let [bb (ByteBuffer/wrap (byte-array sha-hash))
41 | high (.getLong bb)
42 | low (.getLong bb)]
43 | (java.util.UUID. (-> high
44 | (bit-or 0x0000000000005000)
45 | (bit-and 0x7fffffffffff5fff)
46 | (bit-clear 63) ;; needed because of BigInt cast of bitmask
47 | (bit-clear 62))
48 | (-> low
49 | (bit-set 63)
50 | (bit-clear 62)))))
51 |
52 | (defn ^bytes encode [^Byte magic ^bytes a]
53 | (let [out (ByteArrayOutputStream.)]
54 | (.write out (byte-array 1 magic))
55 | (.write out a)
56 | (.toByteArray out)))
57 |
58 | (defn- ^bytes str->utf8 [x]
59 | (-> x str (.getBytes "UTF-8")))
60 |
61 | (extend-protocol PHashCoercion
62 | java.lang.Boolean
63 | (-coerce [this md-create-fn write-handlers]
64 | (encode (:boolean magics) (byte-array 1 (if this (byte 41) (byte 40)))))
65 |
66 | ;; don't distinguish characters from string for javascript
67 | java.lang.Character
68 | (-coerce [this md-create-fn write-handlers]
69 | (encode (:string magics) (encode-safe (str->utf8 this) md-create-fn)))
70 |
71 | java.lang.String
72 | (-coerce [this md-create-fn write-handlers]
73 | (encode (:string magics) (encode-safe (str->utf8 this) md-create-fn)))
74 |
75 | java.lang.Integer
76 | (-coerce [this md-create-fn write-handlers]
77 | (encode (:number magics) (.getBytes (.toString this) "UTF-8")))
78 |
79 | java.lang.Long
80 | (-coerce [this md-create-fn write-handlers]
81 | (encode (:number magics) (.getBytes (.toString this) "UTF-8")))
82 |
83 | java.math.BigInteger
84 | (-coerce [this md-create-fn write-handlers]
85 | (encode (:number magics) (.getBytes (.toString this) "UTF-8")))
86 |
87 | java.lang.Float
88 | (-coerce [this md-create-fn write-handlers]
89 | (encode (:number magics) (.getBytes (.toString this) "UTF-8")))
90 |
91 | java.lang.Double
92 | (-coerce [this md-create-fn write-handlers]
93 | (encode (:number magics) (.getBytes (.toString this) "UTF-8")))
94 |
95 | java.math.BigDecimal
96 | (-coerce [this md-create-fn write-handlers]
97 | (encode (:number magics) (.getBytes (.toString this) "UTF-8")))
98 |
99 | clojure.lang.BigInt
100 | (-coerce [this md-create-fn write-handlers]
101 | (encode (:number magics) (.getBytes (.toString this) "UTF-8")))
102 |
103 | java.util.UUID
104 | (-coerce [this md-create-fn write-handlers]
105 | (encode (:uuid magics) (.getBytes (.toString this) "UTF-8")))
106 |
107 | java.util.Date
108 | (-coerce [this md-create-fn write-handlers]
109 | (encode (:inst magics) (.getBytes (.toString ^java.lang.Long (.getTime this)) "UTF-8")))
110 |
111 | nil
112 | (-coerce [this md-create-fn write-handlers]
113 | (encode (:nil magics) (byte-array 0)))
114 |
115 | clojure.lang.Symbol
116 | (-coerce [this md-create-fn write-handlers]
117 | (encode (:symbol magics) (encode-safe (str->utf8 this) md-create-fn)))
118 |
119 | clojure.lang.Keyword
120 | (-coerce [this md-create-fn write-handlers]
121 | (encode (:keyword magics) (encode-safe (str->utf8 this) md-create-fn)))
122 |
123 | clojure.lang.ISeq
124 | (-coerce [this md-create-fn write-handlers]
125 | (encode (:seq magics) (coerce-seq this md-create-fn write-handlers)))
126 |
127 | clojure.lang.IPersistentVector
128 | (-coerce [this md-create-fn write-handlers]
129 | (encode (:vector magics) (coerce-seq this md-create-fn write-handlers)))
130 |
131 | incognito.base.IncognitoTaggedLiteral
132 | (-coerce [this md-create-fn write-handlers]
133 | (let [{:keys [tag value]} this]
134 | (encode (:literal magics) (coerce-seq [tag value] md-create-fn write-handlers))))
135 |
136 | clojure.lang.IRecord
137 | (-coerce [this md-create-fn write-handlers]
138 | (let [{:keys [tag value]} (ib/incognito-writer write-handlers this)]
139 | (encode (:literal magics) (coerce-seq [tag value] md-create-fn write-handlers))))
140 |
141 | clojure.lang.IPersistentMap
142 | (-coerce [this md-create-fn write-handlers]
143 | (if (record? this) ;; BUG somehow records can also trigger the map sometimes (?)
144 | (let [{:keys [tag value]} (ib/incognito-writer write-handlers this)]
145 | (encode (:literal magics) (coerce-seq [tag value] md-create-fn write-handlers)))
146 | (encode (:map magics) (xor-hashes (map #(-coerce % md-create-fn write-handlers) (seq this))))))
147 |
148 | clojure.lang.IPersistentSet
149 | (-coerce [this md-create-fn write-handlers]
150 | (encode (:set magics) (xor-hashes (map #(digest (-coerce % md-create-fn write-handlers)
151 | md-create-fn)
152 | (seq this)))))
153 |
154 | ;; not ideal, InputStream might be more flexible
155 | ;; file is used due to length knowledge
156 | java.io.File
157 | (-coerce [f md-create-fn write-handlers]
158 | (let [^MessageDigest md (md-create-fn)
159 | len (.length f)]
160 | (with-open [fis (java.io.FileInputStream. f)]
161 | (encode (:binary magics)
162 | ;; support default split-size behaviour transparently
163 | (if (< len split-size)
164 | (let [ba (with-open [out (java.io.ByteArrayOutputStream.)]
165 | (clojure.java.io/copy fis out)
166 | (.toByteArray out))]
167 | (encode-safe ba md-create-fn))
168 | (let [ba (byte-array (* 1024 1024))]
169 | (loop [size (.read fis ba)]
170 | (if (neg? size) (.digest md)
171 | (do
172 | (.update md ba 0 size)
173 | (recur (.read fis ba))))))))))))
174 |
175 | (extend (Class/forName "[B")
176 | PHashCoercion
177 | {:-coerce (fn [^bytes this md-create-fn write-handlers]
178 | (encode (:binary magics) (encode-safe this md-create-fn)))})
179 |
180 | (comment
181 | (require '[clojure.java.io :as io])
182 | (def foo (io/file "/tmp/foo"))
183 | (.length foo)
184 |
185 | (defn slurp-bytes
186 | "Slurp the bytes from a slurpable thing"
187 | [x]
188 | (with-open [out (java.io.ByteArrayOutputStream.)]
189 | (clojure.java.io/copy (clojure.java.io/input-stream x) out)
190 | (.toByteArray out)))
191 |
192 | (clojure.reflect/reflect foo)
193 | (= (map byte (-coerce (io/file "/tmp/bar") sha512-message-digest))
194 | (map byte (-coerce (slurp-bytes "/tmp/bar") sha512-message-digest)))
195 |
196 | (map byte (-coerce {:hello :world :foo :bar 1 2} sha512-message-digest))
197 |
198 | (map byte (-coerce #{1 2 3} sha512-message-digest))
199 |
200 | (use 'criterium.core)
201 |
202 | (def million-map (into {} (doall (map vec (partition 2
203 | (interleave (range 1000000)
204 | (range 1000000)))))))
205 |
206 | (bench (-coerce million-map sha512-message-digest)) ;; 3.80 secs
207 |
208 | (def million-seq (doall (map vec (partition 2
209 | (interleave (range 1000000)
210 | (range 1000000 2000000))))))
211 |
212 | (def million-seq2 (doall (range 1000000)))
213 |
214 | (bench (-coerce million-seq2 sha512-message-digest)) ;; 296 ms
215 |
216 | (bench (-coerce million-seq2 md5-message-digest))
217 |
218 | (take 10 (time (into (sorted-set) (range 1e6)))) ;; 1.7 s
219 |
220 | (bench (coerce-seq sha512-message-digest (seq (into (sorted-set) (range 1e4)))))
221 |
222 | (bench (-coerce (into #{} (range 1e4)) sha512-message-digest))
223 |
224 | (bench (-coerce (seq (into (sorted-set) (range 10))) sha512-message-digest)) ;; 8.6 us
225 |
226 | (bench (-coerce (into #{} (range 10)) sha512-message-digest)) ;; 31.7 us
227 |
228 | (bench (-coerce (seq (into (sorted-set) (range 100))) sha512-message-digest))
229 |
230 | (bench (-coerce (into #{} (range 100)) sha512-message-digest))
231 |
232 | (bench (-coerce (seq (into (sorted-set) (range 1e4))) sha512-message-digest))
233 |
234 | (bench (-coerce (into #{} (range 1e4)) sha512-message-digest))
235 |
236 | (def small-map (into {} (map vec (partition 2 (take 10 (repeatedly rand))))))
237 | (bench (-coerce (apply concat (seq (into (sorted-map) small-map)))
238 | sha512-message-digest)) ;; 12.1 us
239 |
240 | (bench (-coerce small-map sha512-message-digest)) ;; 20.7 us
241 |
242 | (def medium-map (into {} (map vec (partition 2 (take 2e6 (repeatedly rand))))))
243 | (bench (-coerce (apply concat (seq (into (sorted-map) medium-map)))
244 | sha512-message-digest))
245 |
246 | (bench (-coerce medium-map sha512-message-digest))
247 |
248 | (def million-set (doall (into #{} (range 1000000))))
249 |
250 | (bench (-coerce million-set sha512-message-digest)) ;; 2.69 secs
251 |
252 | (def million-seq3 (doall (repeat 1000000 "hello world")))
253 |
254 | (bench (-coerce million-seq3 sha512-message-digest)) ;; 916 msecs
255 |
256 | (def million-seq4 (doall (repeat 1000000 :foo/bar)))
257 |
258 | (bench (-coerce million-seq4 sha512-message-digest)) ;; 752 msecs
259 |
260 | (def datom-vector (doall (vec (repeat 10000 {:db/id 18239
261 | :person/name "Frederic"
262 | :person/familyname "Johanson"
263 | :person/street "Fifty-First Street 53"
264 | :person/postal 38237
265 | :person/phone "02343248474"
266 | :person/weight 38.23}))))
267 | (let [val (doall (vec (repeat 10000 {:db/id 18239
268 | :person/name "Frederic"
269 | :person/familyname "Johanson"
270 | :person/street "Fifty-First Street 53"
271 | :person/postal 38237
272 | :person/phone "02343248474"
273 | :person/weight 38.23})))]
274 | (bench (-coerce val sha512-message-digest)))
275 |
276 | (time (-coerce datom-vector sha512-message-digest))
277 | (bench (-coerce datom-vector sha512-message-digest)) ;; xor: 316 ms, sort: 207 ms
278 |
279 | ;; if not single or few byte values, but at least 8 byte size factor per item ~12x
280 | ;; factor for single byte ~100x
281 | (def bs (apply concat (repeat 100000 (.getBytes "Hello World!"))))
282 | (def barr #_(byte-array bs) (byte-array (* 1024 1024 300) (byte 42)))
283 | (def barrs (doall (take (* 1024 1024 10) (repeat (byte-array 1 (byte 42))))
284 | #_(map byte-array (partition 1 barr))))
285 |
286 | (bench (-coerce barr sha512-message-digest)) ;; 1.99 secs
287 |
288 | (def arr (into-array Byte/TYPE (take (* 1024) (repeatedly #(- (rand-int 256) 128)))))
289 |
290 | ;; hasch 0.2.3
291 | (use 'criterium.core)
292 |
293 | (def million-map (into {} (doall (map vec (partition 2
294 | (interleave (range 1000000)
295 | (range 1000000 2000000)))))))
296 |
297 | (bench (uuid million-map)) ;; 27 secs
298 |
299 | (def million-seq3 (doall (repeat 1000000 "hello world")))
300 |
301 | (bench (uuid million-seq3)) ;; 16 secs
302 |
303 | (def datom-vector (doall (vec (repeat 10000 {:db/id 18239
304 | :person/name "Frederic"
305 | :person/familyname "Johanson"
306 | :person/street "Fifty-First Street 53"
307 | :person/postal 38237
308 | :person/telefon "02343248474"
309 | :person/weeight 0.3823}))))
310 |
311 | (bench (uuid datom-vector)) ;; 2.6 secs
312 | )
313 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # hasch
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 | A library to consistently crypto-hash [edn](https://github.com/edn-format/edn) data structures on Clojure and ClojureScript with SHA-512. The main motivation is that commutative data structures like maps, sets and records are not hashed in order as was the case with e.g. hashing a simple sequential serialisation, but have the same hash value independent of order. That way Clojure value semantics with `edn` are retained. UTF-8 is supported for strings, symbols and keywords. Beyond this tagged literals are supported in a generic runtime independent fashion and platform-neutral encoding (atm. between JVM and JavaScript) is taken care of.
13 | You can then create UUID5 (using SHA-512) from it. Alternatively you can use your own hash function, but this is not standardized and hence beyond the spec.
14 |
15 | Support for edn types on the JVM and JavaScript is complete including records. This works by printing the tagged-literal and rereading it as pure edn, which also ensures that the hashed value can be reproduced beyond the current runtime. Your type has to be pr-str-able for this to work. Records already have a default serialisation.
16 |
17 | ## Usage
18 |
19 |
20 | Add this to your leiningen project's dependencies:
21 | [](http://clojars.org/io.replikativ/hasch)
22 |
23 | Then you can access the major function through `hasch.core`:
24 |
25 | ~~~clojure
26 | (use 'hasch.core)
27 | (edn-hash ["hello world" {:a 3.14} #{42} '(if true nil \f)])
28 | => (120 75 53 36 42 91 14 22 174 251 7 222 83 57 158 140 192 131 251 17 176 29 252 118 83 2 106 187 223 17 84 232 24 103 183 27 19 174 222 37 246 138 132 126 172 46 249 42 62 46 66 32 33 100 88 168 4 242 90 25 5 228 2 88)
29 |
30 | (uuid5 (edn-hash "hello world"))
31 | => #uuid "1227fe0a-471b-5329-88db-875fb82737a8"
32 |
33 | ;; or just use the convenience multi-arity uuid fn:
34 | (uuid) => #uuid "a27dfbb9-b69a-4f08-8df4-471464bfeb37"
35 | (uuid "hello world") => #uuid "1227fe0a-471b-5329-88db-875fb82737a8"
36 | ~~~
37 |
38 |
39 | ## Motivation
40 |
41 | The motivation is to exchange (potentially large) values in a hostile environment without conflicts. The concrete design motivation is to use the commit log of [replikativ](https://github.com/replikativ/replikativ) for exchange of datascript/datomic transaction logs. As long as you are in a trusted environment you can trust the random generator for conflict-free UUIDs as is done internally by many Clojure projects, but as soon as you distribute values, collisions can happen. Note that you can treat hasch's cryptographic UUIDs like random UUIDs internally and don't need to verify them.
42 |
43 | ## Maturity
44 |
45 | The library is tested in cross-platform [applications](https://github.com/replikativ/topiq). The hashing scheme can be considered stable. It is versioned, so we can fix any severe bug without breaking stored hashes.
46 |
47 |
48 | ## Why not use Clojure's `hash`?
49 |
50 | I wish I could have done that instead of reimplementing my own hashing scheme for edn (there are more interesting problems). There is one major reason against using internal hash functions: They need to be very fast for efficient data-structures and hence trade this for potential but unlike collisions, which is unacceptable in an unsecure environment. For the same reason they also only work on 64 bit values, which is fine for a runtime, but not the internet.
51 |
52 | ## Why not sort?
53 |
54 | Sorting of heterogenous collections requires a unique serialization (e.g. pr-str or our encoding) on keys beforehand, which was sadly not faster even for small maps and sets. Sorting on number only maps was faster for maps until at least a size of one million. At some point the complexity of sorting becomes more expansive than xor-ing hashed kv-vectors, so sorting is a simple but not linearly scalable solution. Still it could prove valuable in the future.
55 |
56 | ## edn support
57 |
58 | Support for `edn` types is complete including records. This works according to [incognito](https://github.com/replikativ/incognito) by hashing unknown records the same as their known counterparts. You need to supply the optional `write-handlers` to `uuid` if your records have a custom serialization. Otherwise incognito records won't match.
59 | Importantly the JVM class names are converted into cljs format `foo.bar_baz.Bar` -> `foo.bar-baz/Bar` before hashing. While this potentially allows maliciously induced collisions, you are safe if you use `incognito` or a similar mapping for cross-platform support, as it automatically serializes all record tags accordingly.
60 |
61 | ## Safety
62 |
63 | The library is designed safety first, speed second. I have put quite some thought into getting all input bits (entropy) into the cryptographic hash function. It should be impossible to construct a collision (beyond weaknesses in the underlying SHA-512 which is considered safe in year 2014). The biggest conceptual weakness is XOR-ing of sha-512 hashed elements in maps and sets.
64 |
65 | *Once released, I'll offer a 100 $ bounty for proof of any collision, just open a github issue. This hashing is an important building block for distributed systems to me.*
66 |
67 | ## Speed
68 |
69 | The first versions were just build around safety, but perform poorly with large values. The speed should be sufficient to be in the same order of magnitude as transmission speed (throughput + latency) over slow to mid-range internet broadband connections. If you want to transmit larger values fast, you maybe can chose a sequential binary encoding with native hashing speed. JavaScript performance is still significantly slower (~10x), seemingly due to the lack of native SHA hashing routines.
70 |
71 | *These are just micro-benchmarks on my 3 year old laptop, I just mention them so you can get an impression. *
72 |
73 | ~~~clojure
74 | ;; most important and worst case, what can be done?
75 | hasch.platform> (let [val (into {} (doall (map vec (partition 2
76 | (interleave (range 1000000)
77 | (range 1000000))))))]
78 | (bench (-coerce val sha512-message-digest)))
79 | Evaluation count : 60 in 60 samples of 1 calls.
80 | Execution time mean : 3.596037 sec
81 | Execution time std-deviation : 23.812536 ms
82 | Execution time lower quantile : 3.566430 sec ( 2.5%)
83 | Execution time upper quantile : 3.647540 sec (97.5%)
84 | Overhead used : 2.039920 ns
85 |
86 | Found 4 outliers in 60 samples (6.6667 %)
87 | low-severe 3 (5.0000 %)
88 | low-mild 1 (1.6667 %)
89 | Variance from outliers : 1.6389 % Variance is slightly inflated by outliers
90 | nil
91 |
92 | hasch.platform> (let [val (doall (range 1000000))]
93 | (bench (-coerce val sha512-message-digest)))
94 | Evaluation count : 240 in 60 samples of 4 calls.
95 | Execution time mean : 297.320276 ms
96 | Execution time std-deviation : 2.683060 ms
97 | Execution time lower quantile : 293.217179 ms ( 2.5%)
98 | Execution time upper quantile : 302.059975 ms (97.5%)
99 | Overhead used : 2.039920 ns
100 |
101 | Found 1 outliers in 60 samples (1.6667 %)
102 | low-severe 1 (1.6667 %)
103 | Variance from outliers : 1.6389 % Variance is slightly inflated by outliers
104 | nil
105 |
106 | hasch.platform> (let [val (doall (into #{} (range 1000000)))]
107 | (bench (-coerce val sha512-message-digest)))
108 | Evaluation count : 60 in 60 samples of 1 calls.
109 | Execution time mean : 2.733429 sec
110 | Execution time std-deviation : 15.463782 ms
111 | Execution time lower quantile : 2.708645 sec ( 2.5%)
112 | Execution time upper quantile : 2.758701 sec (97.5%)
113 | Overhead used : 2.039920 ns
114 |
115 | Found 1 outliers in 60 samples (1.6667 %)
116 | low-severe 1 (1.6667 %)
117 | Variance from outliers : 1.6389 % Variance is slightly inflated by outliers
118 | nil
119 |
120 | hasch.platform> (let [val (doall (repeat 1000000 "hello world"))]
121 | (bench (-coerce val sha512-message-digest)))
122 | WARNING: Final GC required 1.472161970438994 % of runtime
123 | Evaluation count : 120 in 60 samples of 2 calls.
124 | Execution time mean : 873.084789 ms
125 | Execution time std-deviation : 5.753430 ms
126 | Execution time lower quantile : 862.909606 ms ( 2.5%)
127 | Execution time upper quantile : 885.560937 ms (97.5%)
128 | Overhead used : 2.039920 ns
129 |
130 | Found 2 outliers in 60 samples (3.3333 %)
131 | low-severe 2 (3.3333 %)
132 | Variance from outliers : 1.6389 % Variance is slightly inflated by outliers
133 | nil
134 |
135 | hasch.platform> (let [val (doall (repeat 1000000 :foo/bar))]
136 | (bench (-coerce val sha512-message-digest)))
137 | WARNING: Final GC required 1.072577784478402 % of runtime
138 | Evaluation count : 120 in 60 samples of 2 calls.
139 | Execution time mean : 756.394263 ms
140 | Execution time std-deviation : 2.935836 ms
141 | Execution time lower quantile : 750.827152 ms ( 2.5%)
142 | Execution time upper quantile : 761.299697 ms (97.5%)
143 | Overhead used : 2.039920 ns
144 |
145 | Found 1 outliers in 60 samples (1.6667 %)
146 | low-severe 1 (1.6667 %)
147 | Variance from outliers : 1.6389 % Variance is slightly inflated by outliers
148 | nil
149 |
150 | hasch.platform> (let [val (byte-array (* 1024 1024 300) (byte 42))] ;; 300 mib bytearray
151 | (bench (-coerce val sha512-message-digest)))
152 | Evaluation count : 60 in 60 samples of 1 calls.
153 | Execution time mean : 1.987549 sec
154 | Execution time std-deviation : 134.189868 ms
155 | Execution time lower quantile : 1.901676 sec ( 2.5%)
156 | Execution time upper quantile : 2.304744 sec (97.5%)
157 | Overhead used : 1.967460 ns
158 |
159 | Found 3 outliers in 60 samples (5.0000 %)
160 | low-severe 3 (5.0000 %)
161 | Variance from outliers : 50.1416 % Variance is severely inflated by outliers
162 | nil
163 |
164 | hasch.platform> (let [val (doall (vec (repeat 10000 {:db/id 18239
165 | :person/name "Frederic"
166 | :person/familyname "Johanson"
167 | :person/street "Fifty-First Street 53"
168 | :person/postal 38237
169 | :person/phone "02343248474"
170 | :person/weight 38.23})))]
171 | (bench (-coerce val sha512-message-digest)))
172 | WARNING: Final GC required 1.2237845534164749 % of runtime
173 | Evaluation count : 240 in 60 samples of 4 calls.
174 | Execution time mean : 322.164678 ms
175 | Execution time std-deviation : 1.821136 ms
176 | Execution time lower quantile : 318.232462 ms ( 2.5%)
177 | Execution time upper quantile : 325.916354 ms (97.5%)
178 | Overhead used : 2.039920 ns
179 |
180 | Found 4 outliers in 60 samples (6.6667 %)
181 | low-severe 2 (3.3333 %)
182 | low-mild 1 (1.6667 %)
183 | high-mild 1 (1.6667 %)
184 | Variance from outliers : 1.6389 % Variance is slightly inflated by outliers
185 | nil
186 |
187 | ~~~
188 |
189 |
190 | # Changes
191 | - 0.3.5 Support BigInteger and BigDecimal hashing (same as for limited precision types).
192 | - 0.3.4 Expose high-level base64 hashes with full precision.
193 | - 0.3.2 Minimize dependencies, explicit profiles for different Clojure(Script) versions
194 | - 0.3.1 fix bug in hashing sequences containing null
195 | - 0.3.0 fix accidental hashing of records as maps
196 | - 0.3.0-beta4 fix record serialization with incognito
197 | - 0.3.0 Overhaul encoding for ~10x-20x times performance on the JVM. Use safe SHA-512. Add byte-array support for blobs.
198 | - 0.2.3 properly dispatch on IRecord (instead of IMap)
199 | - 0.2.2 cannot coerce record tags because of conflicts, rather extend record to properly print
200 | - 0.2.1 fix tag coercion on JVM
201 |
202 | ## Extension to your own types
203 |
204 | *Warning*: Getting all that right is not trivial. Don't mess with hashing extension if you don't have to, just make your type uniquely mappable with [incognito](https://github.com/replikativ/incognito)!
205 |
206 | You can avoid the mapping step to Clojure datastructures (also effectively allocating double memory) by extending the `hasch.benc/PHashCoercion` protocol to your types. You should orient on the `IRecord` implementation and must use `(:literal magics)` to avoid collisions with literal values of the same form. Either by using the default serialisation mechanism to retrieve a hash-value or by extending the hash-coercion, your serialisation or coercion must satisfy the *equality relation*:
207 |
208 | - hashes *must* follow `IEquiv` equality of Clojure(Script): `(= a b) <=> (= (edn-hash a) (edn-hash b))`, `(not= a b) <=> (not= (edn-hash a) (edn-hash b))`: Your serialisation has to be *unique*, hashing has to be injective or in other words you might not introduce collisions. Non-equal objects must have non-equal hashes.
209 | - *reflexivity*: `(= (edn-hash a) (edn-hash a))`, including on different runtimes
210 | - *symmetry*: `(= (edn-hash a) (edn-hash b)) <=> (= (edn-hash b) (edn-hash a))` (trivial because of `=`)
211 | - *transitivity*: `(and (= (edn-hash a) (edn-hash b)) (= (edn-hash b) (edn-hash c))) => (= (edn-hash a) (edn-hash c))` (also trivial because of `=`)
212 |
213 |
214 | # TODO
215 | - Use test.check/double.check property based tests between Java and JS (?)
216 | - Nested collections are hashed with the supplied hash-fn before they contribute to the hash-value. This allows to form a Merkle-tree like peristent data-structure by breaking out collection values, so you can rehash top-level collections without pulling the whole value in memory. This is not tested yet, a git-like store could be implemented, e.g. in [konserve](https://github.com/replikativ/konserve). This should be useful to build durable indexes also. But it might proof to need runtime tweaking, e.g. depending on value size.
217 | - If keeping sorted maps/sets is feasable for high-throughput applications, allow to hash them sequentally.
218 |
219 | # Contributors
220 | - Max Penet
221 | - James Conroy-Finn
222 | - Konrad Kühne
223 | - Christian Weilbach
224 |
225 | ## License
226 |
227 | Copyright © 2014-2018 Christian Weilbach and contributors
228 |
229 | Distributed under the Eclipse Public License either version 1.0 or (at
230 | your option) any later version.
231 |
--------------------------------------------------------------------------------