├── .gitignore ├── CHANGELOG.md ├── TODO.md ├── project.clj ├── test └── tech │ └── droit │ ├── fset │ ├── index_test.clj │ └── bench_test.clj │ └── fset_test.clj ├── other-benchmarks.md ├── LICENSE ├── src └── tech │ └── droit │ ├── fset │ └── index.clj │ └── fset.clj └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /classes 3 | /checkouts 4 | profiles.clj 5 | pom.xml 6 | pom.xml.asc 7 | *.jar 8 | *.class 9 | /.lein-* 10 | /.nrepl-port 11 | /.prepl-port 12 | .hgignore 13 | .hg/ 14 | *.iml 15 | .idea 16 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | 3 | ## [0.1.1] 4 | 5 | * Cleanup: namespace renaming, extract `index.clj` new namespace out of `fset.clj`. 6 | * Added `fset/rename-keys` 7 | * Fixed `fset/join` bug for arity-3, added test coverage. 8 | 9 | ## [0.1.0] - 2020-12-31 10 | 11 | - First release. 12 | -------------------------------------------------------------------------------- /TODO.md: -------------------------------------------------------------------------------- 1 | ### Where to go next? 2 | 3 | In very sparse order and requiring some additional thinking: 4 | 5 | * Other interesting set related or relational functions, for example `powerset` or `cartisian-product`. These would build on top of fset functions but in a separate namespace. To verify effectiveness comparing with existing libs or approaches (for example math.combinatorics). 6 | * Restrictions on the type of elements, like set of integers only, give access to bit-wise operations and speed improvements. This would be a dedicate namespace and functions. 7 | -------------------------------------------------------------------------------- /project.clj: -------------------------------------------------------------------------------- 1 | (defproject tech.droit/fset "0.1.1" 2 | :description "A faster implementation of clojure.set and additional relational algebra functions." 3 | :url "https://github.com/droitfintech/fset" 4 | :license {:name "EPL-2.0" 5 | :url "https://www.eclipse.org/legal/epl-2.0/"} 6 | :dependencies [[org.clojure/clojure "1.10.1"]] 7 | :repl-options {:init-ns tech.droit.fset 8 | :timeout 5000000} 9 | :global-vars {*warn-on-reflection* true} 10 | 11 | :repositories [["releases" {:url "https://repo.clojars.org" 12 | :creds :gpg}]] 13 | :profiles 14 | {:dev {:dependencies [[org.clojure/test.check "1.1.0"] 15 | [criterium "0.4.6"]]}} 16 | :test-selectors 17 | {:default #(not (some #{:bench} (cons (:tag %) (keys %)))) 18 | :bench :bench 19 | :all (constantly true)}) 20 | -------------------------------------------------------------------------------- /test/tech/droit/fset/index_test.clj: -------------------------------------------------------------------------------- 1 | (ns tech.droit.fset.index-test 2 | (:import [java.util Map]) 3 | (:require 4 | [clojure.test :refer :all] 5 | [criterium.core :refer [bench quick-bench quick-benchmark]] 6 | [tech.droit.fset.index :as index] 7 | [tech.droit.fset :as fset])) 8 | 9 | (deftest key-for-values-test 10 | (is (= [1] (into [] (.a (index/abox 1))))) 11 | (is (= [1 2] (into [] (.a (index/abox 1 2))))) 12 | (is (= [1 2 3] (into [] (.a (index/abox 1 2 3))))) 13 | (is (= [1 2 3 4] (into [] (.a (index/abox 1 2 3 4))))) 14 | (is (= [1 2 3 4 5] (into [] (.a (index/abox 1 2 3 4 5)))))) 15 | 16 | (deftest key-from-test 17 | (let [m {1 1 2 2 3 3 4 4 5 5 6 6}] 18 | (is (= [1] (into [] (.a (index/key-from m 1))))) 19 | (is (= [1 2] (into [] (.a (index/key-from m 1 2))))) 20 | (is (= [1 2 3] (into [] (.a (index/key-from m 1 2 3))))) 21 | (is (= [1 2 3 4] (into [] (.a (index/key-from m 1 2 3 4))))) 22 | (is (= [1 2 3 4 5] (into [] (.a (index/key-from m 1 2 3 4 5))))) 23 | (is (= [1 2 3 4 5 6] (into [] (.a (fset/select-keys* m [1 2 3 4 5 6]))))))) 24 | 25 | (deftest index-from-test 26 | (let [rel #{{:a 10 :b 2 :c 30 :d 4 :e 50} 27 | {:a 11 :b 21 :c 31 :d 41 :e 51} 28 | {:a 12 :b 22 :c 32 :d 42 :e 52} 29 | {:a 1 :b 2 :c 3 :d 43 :e 53} 30 | {:a 14 :b 24 :c 34 :d 44 :e 54} 31 | {:a 1 :b 2 :c 3 :d 45 :e 55} 32 | {:a 16 :b 26 :c 36 :d 46 :e 56}}] 33 | (is (= [{:a 10 :b 2 :c 30 :d 4 :e 50}] 34 | (.get ^Map (index/index-for rel :b :d) (index/abox 2 4)))) 35 | (is (= [{:a 1 :b 2 :c 3 :d 45 :e 55} {:a 1 :b 2 :c 3 :d 43 :e 53}] 36 | (.get ^Map (index/index-for rel :a :b :c) (index/abox 1 2 3)) 37 | (.get ^Map (fset/index* rel [:a :b :c]) (index/abox 1 2 3)))))) 38 | 39 | -------------------------------------------------------------------------------- /other-benchmarks.md: -------------------------------------------------------------------------------- 1 | # Additional Benchmarks 2 | 3 | This document collects additional benchmarks achieved by replacing `clojure.set` with `fset` in some popular open source projects. 4 | 5 | ## Datascript 6 | 7 | * Version 1.1.0 8 | * SHA ae62fa6 9 | * openjdk version "11.0.9.1" 2020-11-04 10 | 11 | Datascript uses `clojure.set` in a few critical sections and it comes equipped with a ready to run set of benchmarks. Lower means faster. 12 | 13 | ``` 14 | | add-1 | add-5 | add-all | init | retract-5 | q1 | q2 | q3 | q4 | qpred1 | qpred2 | freeze | thaw 15 | before| 650.2 | 913.1 | 878.3 | 33.4 | 622.2 | 2.4 | 6.2 | 9.6 | 15.0 | 8.8 | 31.1 | 823.1 | 1995.8 16 | after | 658.2 | 901.8 | 861.3 | 28.3 | 665.3 | 2.1 | 6.1 | 9.1 | 14.3 | 9.0 | 30.7 | 833.5 | 1965.7 17 | ``` 18 | 19 | Verdict: interesting. 20 | The most important benchmarks for fset are the query related `q1-q4`, which are consistently faster. 21 | Notes: had to coerce [one instance](https://github.com/tonsky/datascript/blob/4f1af628d5650e0ca0ffd0b6b384941eef2c37fb/src/datascript/query.cljc#L737) of `subset?` with `(set vars)`. 22 | 23 | ## Riemann 24 | 25 | * Version 0.3.7-SNAPSHOT 26 | * SHA 2d590cf 27 | * openjdk version "11.0.9.1" 2020-11-04 28 | 29 | [Riemann](https://github.com/riemann/riemann) is a popular distributed monitoring system. It depends on `clojure.set` in some core parts. 30 | 31 | ``` 32 | | indexing | expiring 33 | before| 28.775671 ms | 13.143244 ns 34 | after | 26.060983 ms | 14.106172 ns 35 | ``` 36 | 37 | Verdict: inconclusive. 38 | Notes: had to modify benchmarks to use Criterium, as `dotimes` was too unreliable. 39 | 40 | ## Crux 41 | 42 | 43 | * Version 1.17.1 44 | * SHA 11fd8257 45 | * openjdk version "11.0.9.1" 2020-11-04 46 | 47 | [Crux](https://github.com/juxt/crux) is a graph-oriented bitemporal database written in Clojure. It's query engine uses `clojure.set` mostly for the compilation phase, which is still part of the programming interface although not strictly connected to the execution path. The following are the results of running the query related [benchmarks](https://github.com/juxt/crux/blob/master/crux-bench/src/crux/bench/tpch.clj): 48 | 49 | ``` 50 | | run-tpch-queries 51 | before| 339841.871333 ms 52 | after | 291307.660398 ms 53 | ``` 54 | 55 | Verdict: interesting. 56 | Notes: running of benchmarks was achieved with the help of the Crux team, but no modifications (other than introducing fset) were necessary. 57 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Eclipse Public License - v 2.0 2 | 3 | THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE 4 | PUBLIC LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION 5 | OF THE PROGRAM CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. 6 | 7 | 1. DEFINITIONS 8 | 9 | "Contribution" means: 10 | 11 | a) in the case of the initial Contributor, the initial content 12 | Distributed under this Agreement, and 13 | 14 | b) in the case of each subsequent Contributor: 15 | i) changes to the Program, and 16 | ii) additions to the Program; 17 | where such changes and/or additions to the Program originate from 18 | and are Distributed by that particular Contributor. A Contribution 19 | "originates" from a Contributor if it was added to the Program by 20 | such Contributor itself or anyone acting on such Contributor's behalf. 21 | Contributions do not include changes or additions to the Program that 22 | are not Modified Works. 23 | 24 | "Contributor" means any person or entity that Distributes the Program. 25 | 26 | "Licensed Patents" mean patent claims licensable by a Contributor which 27 | are necessarily infringed by the use or sale of its Contribution alone 28 | or when combined with the Program. 29 | 30 | "Program" means the Contributions Distributed in accordance with this 31 | Agreement. 32 | 33 | "Recipient" means anyone who receives the Program under this Agreement 34 | or any Secondary License (as applicable), including Contributors. 35 | 36 | "Derivative Works" shall mean any work, whether in Source Code or other 37 | form, that is based on (or derived from) the Program and for which the 38 | editorial revisions, annotations, elaborations, or other modifications 39 | represent, as a whole, an original work of authorship. 40 | 41 | "Modified Works" shall mean any work in Source Code or other form that 42 | results from an addition to, deletion from, or modification of the 43 | contents of the Program, including, for purposes of clarity any new file 44 | in Source Code form that contains any contents of the Program. Modified 45 | Works shall not include works that contain only declarations, 46 | interfaces, types, classes, structures, or files of the Program solely 47 | in each case in order to link to, bind by name, or subclass the Program 48 | or Modified Works thereof. 49 | 50 | "Distribute" means the acts of a) distributing or b) making available 51 | in any manner that enables the transfer of a copy. 52 | 53 | "Source Code" means the form of a Program preferred for making 54 | modifications, including but not limited to software source code, 55 | documentation source, and configuration files. 56 | -------------------------------------------------------------------------------- /src/tech/droit/fset/index.clj: -------------------------------------------------------------------------------- 1 | (ns tech.droit.fset.index 2 | (:import 3 | [java.util Arrays Map HashMap ArrayList])) 4 | 5 | ;; Wrapper around an object array instance to 6 | ;; redefine hashCode and equals to use in hash-map keys. 7 | (deftype ABox [^objects a] 8 | Object 9 | (equals [this other] 10 | (Arrays/equals a ^objects (.a ^ABox other))) 11 | (hashCode [this] 12 | (Arrays/hashCode a))) 13 | 14 | ;; Pretty print ABox 15 | (defmethod print-method tech.droit.fset.index.ABox 16 | [^ABox abox ^java.io.StringWriter writer] 17 | (.append writer (str "abox" (into [] (.a abox))))) 18 | 19 | (defn abox 20 | "ABox constructor." 21 | (^ABox [k1] 22 | (ABox. 23 | (doto (object-array 1) 24 | (aset 0 k1)))) 25 | (^ABox [k1 k2] 26 | (ABox. 27 | (doto (object-array 2) 28 | (aset 0 k1) (aset 1 k2)))) 29 | (^ABox [k1 k2 k3] 30 | (ABox. 31 | (doto (object-array 3) 32 | (aset 0 k1) (aset 1 k2) (aset 2 k3)))) 33 | (^ABox [k1 k2 k3 k4] 34 | (ABox. 35 | (doto (object-array 4) 36 | (aset 0 k1) (aset 1 k2) (aset 2 k3) (aset 3 k4)))) 37 | (^ABox [k1 k2 k3 k4 k5] 38 | (ABox. 39 | (doto (object-array 5) 40 | (aset 0 k1) (aset 1 k2) (aset 2 k3) (aset 3 k4) (aset 4 k5))))) 41 | 42 | (defn key-from 43 | "Alternative to core/select-keys when the number of keys is known. Returns 44 | an `ABox` instance containing the values corresponding to the keys." 45 | (^ABox [^Map m k1] 46 | (abox (.get m k1))) 47 | (^ABox [^Map m k1 k2] 48 | (abox (.get m k1) (.get m k2))) 49 | (^ABox [^Map m k1 k2 k3] 50 | (abox (.get m k1) (.get m k2) (.get m k3))) 51 | (^ABox [^Map m k1 k2 k3 k4] 52 | (abox (.get m k1) (.get m k2) (.get m k3) (.get m k4))) 53 | (^ABox [^Map m k1 k2 k3 k4 k5] 54 | (abox (.get m k1) (.get m k2) (.get m k3) (.get m k4) (.get m k5)))) 55 | 56 | (defn indexer 57 | "Creates a hashed index to use for join operations. `rel` is a relation 58 | of maps with uniform keys. `kf` is a function of a map into a native array 59 | of selected values. Returns a mutable java.util.HashMap with ABox objects as 60 | keys and vector of maps as values." 61 | ^HashMap [rel kf] 62 | (let [out (HashMap.)] 63 | (loop [items (seq rel)] 64 | (if items 65 | (let [item (first items) 66 | k (kf item) 67 | ^ArrayList a (if (.containsKey out k) (.get out k) (ArrayList.))] 68 | (.put out k (doto a (.add item))) 69 | (recur (next items))) 70 | out)))) 71 | 72 | (defn index-for 73 | "Public interface to `indexer` specialized for different arities." 74 | (^HashMap [rel k1] 75 | (indexer rel #(key-from % k1))) 76 | (^HashMap [rel k1 k2] 77 | (indexer rel #(key-from % k1 k2))) 78 | (^HashMap [rel k1 k2 k3] 79 | (indexer rel #(key-from % k1 k2 k3))) 80 | (^HashMap [rel k1 k2 k3 k4] 81 | (indexer rel #(key-from % k1 k2 k3 k4))) 82 | (^HashMap [rel k1 k2 k3 k4 k5] 83 | (indexer rel #(key-from % k1 k2 k3 k4 k5)))) 84 | -------------------------------------------------------------------------------- /test/tech/droit/fset_test.clj: -------------------------------------------------------------------------------- 1 | (ns tech.droit.fset-test 2 | "Thanks Dmitry Dzhus @dzhus for contributing the initial set of generative tests." 3 | (:require 4 | [clojure.test.check.clojure-test :refer [defspec]] 5 | [clojure.test.check.generators :as gen] 6 | [clojure.test.check.properties :as prop] 7 | [clojure.set :as cset] 8 | [clojure.string :as string] 9 | [clojure.test :refer :all] 10 | [tech.droit.fset :as fset])) 11 | 12 | (def gen-v (gen/vector gen/char-alpha 4 20)) 13 | (def gen-str (gen/fmap string/join gen-v)) 14 | (def gen-sym (gen/fmap symbol gen-str)) 15 | (def gen-kw (gen/fmap keyword gen-str)) 16 | 17 | (defn relation-instance 18 | [ks] 19 | (apply gen/hash-map 20 | (interleave ks 21 | (repeatedly 22 | #(gen/one-of [gen-str gen-sym gen-kw]))))) 23 | 24 | (defn relation 25 | ([] (relation (gen/sample gen-sym))) 26 | ([ks] 27 | (gen/set 28 | (relation-instance ks) 29 | {:min-elements 2 :max-elements 10}))) 30 | 31 | (defn sorted-instance 32 | ([] (sorted-instance gen-str)) 33 | ([gen] 34 | (gen/sorted-set 35 | gen 36 | {:min-elements 2 :max-elements 10}))) 37 | 38 | (def ^:const rep 100) 39 | 40 | (defspec rename-correctness 41 | rep 42 | (prop/for-all [r (relation (into [:a :b :c] (gen/sample gen-kw)))] 43 | (= (cset/rename r {:a 1 :b 2 :c 3}) 44 | (fset/rename r {:a 1 :b 2 :c 3})))) 45 | 46 | (defspec union-correctness 47 | rep 48 | (prop/for-all [r1 (relation) r2 (relation)] 49 | (and 50 | (= (cset/union r1 r2) (fset/union r1 r2)) 51 | (= r1 (cset/union r1 r1) (fset/union r1 r1))))) 52 | 53 | (defspec intersection-correctness 54 | rep 55 | (prop/for-all [r1 (relation) 56 | r2 (relation) 57 | sr1 (sorted-instance gen/small-integer) 58 | sr2 (sorted-instance gen/small-integer)] 59 | (= (fset/intersection sr1 sr2) (cset/intersection sr1 sr2)) 60 | (let [relation-instance (first r1) 61 | r2 (conj r2 relation-instance)] 62 | (= #{relation-instance} 63 | (cset/intersection r1 r2) 64 | (fset/intersection r1 r2))))) 65 | 66 | (defspec difference-correctness 67 | rep 68 | (prop/for-all 69 | [r1 (relation) r2 (relation) sr1 (sorted-instance) sr2 (sorted-instance)] 70 | (and (= (cset/difference r1 r2) (fset/difference r1 r2)) 71 | (= (cset/difference r2 r1) (fset/difference r2 r1)) 72 | (= #{} (cset/difference r1 r1) (fset/difference r1 r1)) 73 | (= (cset/difference sr1 sr2) (fset/difference sr1 sr2))))) 74 | 75 | (defspec select-correctness 76 | rep 77 | (prop/for-all 78 | [r1 (relation) sr (sorted-instance gen/small-integer)] 79 | (let [pred (comp #(> (count %) 3) #(filter keyword? %) vals)] 80 | (= (cset/select pred r1) (fset/select pred r1)) 81 | (= (cset/select odd? sr) (fset/select odd? sr))))) 82 | 83 | (defspec project-correctness 84 | rep 85 | (prop/for-all 86 | [r1 (relation)] 87 | (let [ks (random-sample 0.5 (keys (first r1)))] 88 | (= (cset/project r1 ks) (fset/project r1 ks))))) 89 | 90 | (defspec join-correctness 91 | rep 92 | (prop/for-all [r1 (relation (into [:a] (gen/sample gen-kw))) 93 | r2 (relation (into [:a] (gen/sample gen-kw)))] 94 | (let [i1 (first r1) 95 | i2 (first r2) 96 | r1 (conj r1 (assoc i1 :a "join-value")) 97 | r2 (conj r2 (assoc i2 :a "join-value"))] 98 | (= #{(assoc (merge i1 i2) :a "join-value")} 99 | (cset/join r1 r2) 100 | (fset/join r1 r2) 101 | (fset/join r1 r2 {:a :a}))))) 102 | 103 | (defspec subset-superset-correctness 104 | rep 105 | (prop/for-all 106 | [xrel (relation)] 107 | (let [sub (set (take 3 xrel))] 108 | (and 109 | (= true (cset/subset? sub xrel) (fset/subset? sub xrel)) 110 | (= true (cset/superset? xrel sub) (fset/superset? xrel sub)))))) 111 | 112 | (defspec rename-keys-correctness 113 | rep 114 | (prop/for-all 115 | [m (relation-instance (into (range 400) (gen/sample gen-kw))) 116 | kmap (relation-instance (into (range 30) (gen/sample gen-kw)))] 117 | (= (cset/rename-keys m kmap) (fset/rename-keys m kmap)))) 118 | 119 | (deftest select-keys-test 120 | (is (= {:a 1 :b 2} (fset/select-keys {:a 1 :b 2 :c 3} [:a :b])))) 121 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # fset - Fast set/relational library 2 | 3 | `fset` is a faster implementation of the functions in Clojure [core.set](https://clojure.github.io/clojure/#clojure.set). 4 | 5 | All functions in `fset` are as compatible as possible with `clojure.set`. There are minor differences, for example requiring input arguments to actually implement `IPersistentSet` (while `clojure.set` also allows other collection types as input with [unpredictable results](https://clojuredocs.org/clojure.set/union#example-5b5a7837e4b00ac801ed9e2e), so this is sort of a feature). The library uses [generative tests](https://github.com/droitfintech/fset/blob/main/test/tech/droit/fset_test.clj) to verify compatibility with core functions (run with `lein test`). 6 | 7 | The library also includes functions with a different interface to those in `clojure.set` to further improve speed. These functions are clearly identified by ending with "*". For example, `fset` contains both `index` and `index*`: `index` is already faster than `clojure.set/index` and has the same interface. `fset/index*` is even faster than `fset/index` but it breaks compatibility with `clojure.set/index` by returning a mutable `java.util.HashMap` instead of `clojure.lang.PersistentHashMap`. 8 | 9 | `fset` also implements other non-set related functions from `clojure.core` when they are instrumental to improve performance for those in `clojure.set` (this is the case of `fset/select-keys` for example). 10 | 11 | For additional info, watch this talk at the *London Clojurians Meetup*: 12 | 13 | [![FSet: a faster set library](https://img.youtube.com/vi/vE8d-tXOuXo/0.jpg)](https://www.youtube.com/watch?v=vE8d-tXOuXo) 14 | 15 | 16 | ## Usage 17 | 18 | Add `[tech.droit/fset "0.1.1"]` to your dependencies and then: 19 | 20 | ```clojure 21 | (require '[tech.droit.fset :as fset] 22 | '[clojure.set :as cset]) 23 | 24 | (def s1 (set (range 10))) 25 | (def s2 (set (range 20))) 26 | 27 | (= (cset/intersection s1 s2) 28 | (fset/intersection s1 s2)) 29 | ;; true 30 | ``` 31 | 32 | ## Perf showcase 33 | 34 | Some comparison with `clojure.core/set` functions as measured in `test/fset/bench.clj` with the [Criterium](https://github.com/hugoduncan/criterium) library. Smaller numbers mean faster computation. All results below are obtained using small sets (of 10-100 items), but the improvement should be bigger on larger sets. 35 | 36 | ### Set functions 37 | 38 | Union (~50% speedup): 39 | 40 | ```clojure 41 | 1.80E-6 cset 42 | 0.96E-6 fset (union) 43 | ``` 44 | 45 | Intersection (~43% speedup): 46 | 47 | ```clojure 48 | 7.4855E-6 cset 49 | 4.2227E-6 intersection (compatible) 50 | ``` 51 | 52 | Difference (~43% speedup): 53 | 54 | ```clojure 55 | 7.33E-6 cset 56 | 4.13E-6 difference 57 | ``` 58 | 59 | Subset? (~80% speedup): 60 | 61 | ```clojure 62 | 6.95E-7 cset 63 | 1.48E-7 fset 64 | ``` 65 | 66 | Superset? (~80% speedup): 67 | 68 | ```clojure 69 | 7.11E-7 cset 70 | 1.49E-7 fset 71 | ``` 72 | 73 | ### Relation functions 74 | 75 | Relation functions operate on sets of maps, where all maps have the same set of keys. 76 | 77 | Select (~25% speedup): 78 | 79 | ```clojure 80 | 2.44E-6 cset 81 | 1.85E-6 fset 82 | ``` 83 | 84 | Project (~50% speedup): 85 | 86 | ```clojure 87 | 1.03E-5 cset 88 | 0.55E-5 fset 89 | ``` 90 | 91 | Join (~70% speedup): 92 | 93 | ```clojure 94 | 2.77E-5 cset 95 | 0.79E-5 fset 96 | ``` 97 | 98 | Index (~33% speedup): 99 | 100 | ```clojure 101 | 1.37E-5 cset 102 | 0.91E-5 fset 103 | ``` 104 | 105 | ### Other functions 106 | 107 | `clojure.set/rename-keys` (~38% speedup) 108 | 109 | ```clojure 110 | 3.00E-6 cset 111 | 1.88E-6 fset 112 | ``` 113 | 114 | `fset/maps` (map over sets). There is no corresponding single function in `clojure.set` for this, but closest `clojure.core` expression is shown below (~42% speedup): 115 | 116 | ```clojure 117 | (def xs (set (range 1000))) 118 | 2.72E-4 (into #{} (map inc xs)) 119 | 1.60E-4 (fset/maps inc xs) 120 | 121 | ``` 122 | 123 | `clojure.core/select-keys` has been re-implemented in `fset` (~ 55% speedup): 124 | 125 | ```clojure 126 | 2.48E-6 core 127 | 1.09E-6 fset (select-keys) 128 | ``` 129 | 130 | `kset` is also new: given a relation (a set of maps with the same keys) find the set of keys repeating in each map (~ 44% speedup): 131 | 132 | ```clojure 133 | 6.31E-7 (set (keys (first rel))) 134 | 3.57E-7 (fset/kset rel) 135 | ``` 136 | 137 | ## Contributors 138 | 139 | Many thanks to: 140 | 141 | * [Dmitry Dzhus](https://github.com/dzhus) generative testing suite 142 | 143 | ## License 144 | 145 | Copyright © 2021 Droit Financial Technologies 146 | 147 | This program and the accompanying materials are made available under the 148 | terms of the Eclipse Public License 2.0 which is available at 149 | http://www.eclipse.org/legal/epl-2.0. 150 | -------------------------------------------------------------------------------- /test/tech/droit/fset/bench_test.clj: -------------------------------------------------------------------------------- 1 | (ns tech.droit.fset.bench-test 2 | (:require 3 | [clojure.test :refer :all] 4 | [criterium.core :refer [bench quick-bench quick-benchmark benchmark]] 5 | [tech.droit.fset :as fset] 6 | [clojure.set :as cset])) 7 | 8 | (defmacro b [expr] `(first (:mean (quick-benchmark ~expr {})))) 9 | 10 | (deftest ^:bench rename-keys-bench 11 | (let [m (zipmap (range 100) (range 100)) 12 | kmap {4 40 14 140 45 450 51 510 60 600 69 690 90 900}] 13 | (is (= (cset/rename-keys m kmap) (fset/rename-keys m kmap))) 14 | ; 3.006680459548847E-6 cset 15 | ; 1.8890617169285616E-6 fset (rename-keys) 16 | (is 17 | (nil? 18 | (println 19 | (b (cset/rename-keys m kmap)) 20 | (str "cset\n" (b (fset/rename-keys m kmap)) " fset (rename-keys)\n")))))) 21 | 22 | (deftest ^:bench maps-bench 23 | (let [xrel '#{{d d1 c c2 a a3 b b2} {d d2 c c1 a a3 b b3} 24 | {d d2 c c2 a a2 b b3} {d d2 c c3 a a2 b b3} 25 | {d d3 c c3 a a3 b b2} {d d3 c c2 a a1 b b2} 26 | {d d3 c c3 a a2 b b1} {d d3 c c1 a a3 b b1}}] 27 | (is (= (into #{} (map inc (set (range 100)))) (fset/maps inc (set (range 100))))) 28 | (is (= (into #{} (map #(assoc % :new 0) xrel)) (fset/maps #(assoc % :new 0) xrel))) 29 | ; 2.720073097731239E-4 core map over set 30 | ; 1.6059608771929826E-4 fset (maps) 31 | (is 32 | (nil? 33 | (println 34 | (let [s (set (range 1000))] 35 | (str (b (into #{} (map inc s))) 36 | " core map over set \n" 37 | (b (fset/maps inc s)) 38 | " fset (maps)\n"))))) 39 | ; 5.3075209319225114E-6 core into set 40 | ; 3.256621217417093E-6 fset (maps) 41 | (is 42 | (nil? 43 | (println 44 | (let [f #(assoc % :new 0)] 45 | (b (into #{} (map f xrel))) 46 | (str "core into set\n" (b (fset/maps f xrel)) " fset (maps)\n"))))))) 47 | 48 | (deftest ^:bench select-keys-test 49 | (let [m (zipmap (range 100) (range 100 200)) 50 | ks (range 0 100 10)] 51 | (is (= (select-keys m ks) (fset/select-keys m ks))) 52 | (is (= (select-keys m [10 30 50]) (fset/select-key m 10 30 50))) 53 | ; 2.481968165076176E-6 core 54 | ; 1.0907595441876438E-6 fset (select-keys) 55 | (is 56 | (nil? 57 | (println 58 | (b (select-keys m ks)) 59 | (str " core\n" (b (fset/select-keys m ks)) " fset (select-keys)\n")))) 60 | ; 8.155316297166944E-7 core 61 | ; 4.1558029460823247E-7 fset (select-key) 62 | (is 63 | (nil? 64 | (println 65 | (b (select-keys m [10 30 50])) 66 | (str "core\n" (b (fset/select-key m 10 30 50)) " fset (select-key)\n")))))) 67 | 68 | (deftest ^:bench union-bench 69 | (let [s1 '#{{d d1 c c2 a a3 b b2} {d d2 c c1 a a3 b b3} 70 | {d d2 c c2 a a2 b b3} {d d2 c c3 a a2 b b3}} 71 | s2 '#{{d d3 c c3 a a3 b b2} {d d3 c c2 a a1 b b2} 72 | {d d3 c c3 a a2 b b1} {d d3 c c1 a a3 b b1}} 73 | s3 '#{{d d3 c c9 a a1 b b2} {d d3 c c3 a a21 b b1} 74 | {d d3 c c11 a a3 b b1}} 75 | s4 '#{{d d3 c c9 a a1 b b2} {d d3 c c3 a a21 b b1} 76 | {d d3 c c11 a a3 b b1} {d d10} {c c14}} 77 | s5 '#{{d d3 c c9 a a1 b b2} {d d3 c c3 a a21 b b1} 78 | {d d3 c c11 a a3 b b1} {d d10} {c c14} {a a21} {a a22}}] 79 | (is (= (cset/union s1 s2) (fset/union s1 s2))) 80 | (is (= (cset/union s1 s2 s3) (fset/union s1 s2 s3))) 81 | (is (= (cset/union s1 s2 s3 s4) (fset/union s1 s2 s3 s4))) 82 | (is (= (cset/union s1 s2 s3 s4 s5) (fset/union s1 s2 s3 s4 s5))) 83 | 84 | ; 2.3562738773283106E-6 cset 85 | ; 1.1675215159560017E-6 fset (union) 86 | ; (is (nil? (println (b (cset/union s1 s2)) (str "cset\n" (b (fset/union s1 s2)) " fset (union)\n")))) 87 | 88 | ; 2.2575491533853774E-6 cset 89 | ; 1.65369902375777E-6 fset 90 | ; (is (nil? (println (b (cset/union s1 s2 s3)) (str "cset\n" (b (fset/union s1 s2 s3)) " fset\n")))) 91 | 92 | ; (is (nil? (println (b (cset/union s1 s2 s3 s4)) (str "cset\n" (b (fset/union s1 s2 s3 s4)) " fset\n")))) 93 | ; (is (nil? (println (b (cset/union s1 s2 s3 s4 s5)) (str "cset\n" (b (fset/union s1 s2 s3 s4 s5)) " fset\n")))) 94 | ; (is (nil? (println (b (cset/union s1 s2 s3 s4 s5 #{})) (str "cset\n" (b (fset/union s1 s2 s3 s4 s5 #{})) " fset (union)\n")))) 95 | )) 96 | 97 | (deftest ^:bench index-bench 98 | ; 1.3768563343788355E-5 cset 99 | ; 0.9147070550913031E-5 fset (index) 100 | (let [xrel '#{{d d1 c c2 a a3 b b2} {d d2 c c1 a a3 b b3} 101 | {d d2 c c2 a a2 b b3} {d d2 c c3 a a2 b b3} 102 | {d d3 c c3 a a3 b b2} {d d3 c c2 a a1 b b2} 103 | {d d3 c c3 a a2 b b1} {d d3 c c1 a a3 b b1}} 104 | ks '[b d]] 105 | (is (= (cset/index xrel ks) (fset/index xrel ks))) 106 | (is 107 | (nil? 108 | (println 109 | (b (cset/index xrel ks)) 110 | (str "cset\n" (b (fset/index xrel ks)) " fset (index)\n")))))) 111 | 112 | (deftest ^:bench kset-bench 113 | (let [rel '#{{d d2 c c1 a a3 b b3} 114 | {d d2 c c2 a a2 b b3} 115 | {d d3 c c3 a a2 b b1} 116 | {d d3 c c1 a a3 b b1}}] 117 | (is (= (set (keys (first rel))) (fset/kset rel))) 118 | ; 6.315077613519734E-7 core 119 | ; 3.577445668244377E-7 fset (kset) 120 | (is 121 | (nil? 122 | (println 123 | (b (set (keys (first rel)))) 124 | (str "core\n" (b (fset/kset rel)) " fset (kset)\n")))) 125 | ; 6.226934114810922E-7 core 126 | ; 3.4987011405747016E-8 fset (kset-native) 127 | (is 128 | (nil? 129 | (println 130 | (b (set (keys (first rel)))) 131 | (str "core\n" (b (fset/kset-native rel)) " fset (kset-native)\n")))))) 132 | 133 | (deftest ^:bench intersection-bench 134 | (let [s1 (set (range 1 40)) 135 | s2 (set (range 30 80)) 136 | ss1 (apply sorted-set s1) 137 | ss2 (apply sorted-set s2)] 138 | (is (= (cset/intersection s1 s2) (fset/intersection s1 s2))) 139 | (is (= (cset/intersection s1 s2) (fset/intersection* s1 s2))) 140 | (is (= (cset/intersection ss1 ss2) (fset/intersection* ss1 ss2))) 141 | ; 7.1893204992967655E-6 cset 142 | ; 4.322495438183127E-6 intersection (compatible) 143 | (is 144 | (nil? 145 | (println 146 | (b (cset/intersection s1 s2)) 147 | (str "cset\n" (b (fset/intersection s1 s2)) " intersection (compatible)\n")))) 148 | ; 9.937802765155177E-6 sorted cset 149 | ; 7.90225522716554E-6 sorted intersection (compatible) 150 | #_(is 151 | (nil? 152 | (println 153 | (b (cset/intersection ss1 ss2)) 154 | (str "sorted cset\n" (b (fset/intersection ss1 ss2)) " sorted intersection (compatible)\n")))) 155 | ; 6.842457065677723E-6 cset 156 | ; 3.2923806294768446E-6 intersection (native) 157 | #_(is 158 | (nil? 159 | (println 160 | (b (cset/intersection s1 s2)) 161 | (str "cset\n" (b (fset/intersection* s1 s2)) " intersection (native)\n")))))) 162 | 163 | (deftest ^:bench difference-bench 164 | (let [s1 (set (range 1 50)) 165 | s2 (set (range 30 800)) 166 | ss1 (apply sorted-set s1) 167 | ss2 (apply sorted-set s2)] 168 | (is (= (cset/difference s1 s2) (fset/difference s1 s2))) 169 | (is (= (cset/difference ss1 ss2) (fset/difference ss1 ss2))) 170 | ; 7.335592370838982E-6 cset 171 | ; 4.132627737969221E-6 difference 172 | (is 173 | (nil? 174 | (println 175 | (b (cset/difference s1 s2)) 176 | (str "cset\n" (b (fset/difference s1 s2)) " difference\n")))) 177 | ; 1.4298325121443442E-5 sorted cset 178 | ; 1.132363716780562E-5 sorted difference 179 | #_(is 180 | (nil? 181 | (println 182 | (b (cset/difference ss1 ss2)) 183 | (str "sorted cset\n" (b (fset/difference ss1 ss2)) " sorted difference\n")))) 184 | ; 5.0338101960004635E-6 cset 185 | ; 3.4274965213695293E-6 difference 186 | #_(is 187 | (nil? 188 | (println 189 | (b (cset/difference s2 s1)) 190 | (str "cset\n" (b (fset/difference s2 s1)) " difference\n")))))) 191 | 192 | (deftest ^:bench select-bench 193 | (let [xrel '#{{d d1 c c2 a a3 b b2} {d d2 c c1 a a3 b b3} 194 | {d d2 c c2 a a2 b b3} {d d2 c c3 a a2 b b3} 195 | {d d3 c c3 a a3 b b2} {d d3 c c2 a a1 b b2} 196 | {d d3 c c3 a a2 b b1} {d d3 c c1 a a3 b b1}} 197 | ss (apply sorted-set (range 100)) 198 | pred (comp #{'a3} 'a)] 199 | (is (= (cset/select pred xrel) (fset/select pred xrel))) 200 | (is (= (cset/select odd? ss) (fset/select odd? ss))) 201 | ; 1.5927641938447406E-5 sorted cset 202 | ; 1.4375441919551693E-5 sorted select 203 | (is 204 | (nil? 205 | (println 206 | (b (cset/select even? ss)) 207 | (str "sorted cset\n" (b (fset/select even? ss)) " sorted select\n")))) 208 | ; 2.44818399082862E-6 cset 209 | ; 1.8558549234948711E-6 select 210 | (is 211 | (nil? 212 | (println 213 | (b (cset/select pred xrel)) 214 | (str "cset\n" (b (fset/select pred xrel)) " select\n")))))) 215 | 216 | (deftest ^:bench project-test 217 | (let [xrel '#{{d d1 c c2 a a3 b b2} {d d2 c c1 a a3 b b3} 218 | {d d2 c c2 a a2 b b3} {d d2 c c3 a a2 b b3} 219 | {d d3 c c3 a a3 b b2} {d d3 c c2 a a1 b b2} 220 | {d d3 c c3 a a2 b b1} {d d3 c c1 a a3 b b1}} 221 | ks '[a b c]] 222 | (is (= (cset/project xrel ks) (fset/project xrel ks))) 223 | (is (= (cset/project xrel ks) (fset/project* xrel 'a 'b 'c))) 224 | ; 1.0354762891861535E-5 cset 225 | ; 0.5515608075685698E-5 project 226 | (is 227 | (nil? 228 | (println 229 | (b (cset/project xrel ks)) 230 | (str "cset\n" (b (fset/project xrel ks)) " project\n")))) 231 | ; 1.0387998330899229E-5 cset 232 | ; 6.966055805671281E-6 project (WIP) 233 | (is 234 | (nil? 235 | (println 236 | (b (cset/project xrel ks)) 237 | (str "cset\n" (b (fset/project* xrel 'a 'b 'c)) " project\n")))))) 238 | 239 | (deftest ^:bench join-bench 240 | ; 2.7759994340847945E-5 cset 241 | ; 0.7999750663668448E-5 fset (join) 242 | ; 0.7712902516469042E-5 fset (join with keys) 243 | (let [xrel '#{{d d2 c c1 a a3 b b3} 244 | {d d2 c c2 a a2 b b3} 245 | {d d2 c c3 a a2 b b3} 246 | {d d3 c c3 a a3 b b2} 247 | {d d3 c c2 a a1 b b2} 248 | {d d1 c c2 a a3 b b2} 249 | {d d3 c c3 a a2 b b1} 250 | {d d3 c c1 a a3 b b1}} 251 | yrel '#{{e e1 f f2 g g1 b b1} 252 | {e e3 f f1 g g2 b b8} 253 | {e e1 f f2 g g3 b b8} 254 | {e e3 f f1 g g2 b b3} 255 | {e e2 f f2 g g3 b b8} 256 | {e e1 f f3 g g1 b b8} 257 | {e e2 f f3 g g2 b b8} 258 | {e e1 f f3 g g1 b b2} 259 | {e e3 f f3 g g3 b b8}} 260 | x-keys (fset/kset-native xrel) 261 | y-keys (fset/kset-native yrel)] 262 | (is (= (cset/join xrel yrel) (fset/join xrel yrel))) 263 | (is (= (cset/join xrel yrel) (fset/join xrel yrel x-keys y-keys))) 264 | (is 265 | (nil? 266 | (println 267 | (b (cset/join xrel yrel)) 268 | (str "cset\n" (b (fset/join xrel yrel)) " fset (join)\n")))) 269 | (is 270 | (nil? 271 | (println 272 | (b (cset/join xrel yrel)) 273 | (str "cset\n" (b (fset/join xrel yrel x-keys y-keys)) " fset (join with keys)\n")))))) 274 | 275 | (deftest ^:bench subset-super-test 276 | (let [xrel '#{{d d1 c c2 a a3 b b2} {d d2 c c1 a a3 b b3} 277 | {d d2 c c2 a a2 b b3} {d d2 c c3 a a2 b b3} 278 | {d d3 c c3 a a3 b b2} {d d3 c c2 a a1 b b2} 279 | {d d3 c c3 a a2 b b1} {d d3 c c1 a a3 b b1}} 280 | sub (set (take 4 xrel))] 281 | (is (= true (cset/subset? sub xrel) (fset/subset? sub xrel))) 282 | (is (= true (cset/superset? xrel sub) (fset/superset? xrel sub))) 283 | ; 6.955503406380901E-7 cset 284 | ; 1.4824609776142325E-7 subset? 285 | (is 286 | (nil? 287 | (println 288 | (b (cset/subset? sub xrel)) 289 | (str "cset\n" (b (fset/subset? sub xrel)) " subset?\n")))) 290 | ; 7.119121416749918E-7 cset 291 | ; 1.4956368832914817E-7 subset? 292 | (is 293 | (nil? 294 | (println 295 | (b (cset/superset? xrel sub)) 296 | (str "cset\n" (b (fset/superset? xrel sub)) " superset?\n")))))) 297 | -------------------------------------------------------------------------------- /src/tech/droit/fset.clj: -------------------------------------------------------------------------------- 1 | (ns tech.droit.fset 2 | (:refer-clojure :exclude [select-keys]) 3 | (:require [clojure.set :as cset] 4 | [tech.droit.fset.index :as index]) 5 | (:import 6 | [java.util Collection Iterator HashSet ArrayList Map HashMap Arrays Set] 7 | [tech.droit.fset.index ABox] 8 | [java.lang Iterable] 9 | [clojure.lang 10 | ILookup 11 | IEditableCollection 12 | ITransientCollection 13 | IPersistentCollection 14 | PersistentHashSet 15 | PersistentArrayMap 16 | PersistentHashMap 17 | IPersistentSet 18 | APersistentSet 19 | APersistentMap 20 | ITransientMap 21 | ITransientSet 22 | PersistentHashMap$NodeIter])) 23 | 24 | (def ^{:dynamic true 25 | :doc "Maximum size for a join operand. This helps controlling large joins 26 | by throwing error instead of going out of memory."} 27 | *join-size-threshold* 10000) 28 | 29 | (defn- arity-select 30 | "Help selecting one of the 5 explicit arities available for most 31 | functions in fset." 32 | [^Collection items arg f g] 33 | (let [cnt (.size items) 34 | ^Iterator it (.iterator items)] 35 | (case cnt 36 | 1 (f arg (.next it)) 37 | 2 (f arg (.next it) (.next it)) 38 | 3 (f arg (.next it) (.next it) (.next it)) 39 | 4 (f arg (.next it) (.next it) (.next it) (.next it)) 40 | 5 (f arg (.next it) (.next it) (.next it) (.next it) (.next it)) 41 | (g arg items)))) 42 | 43 | (defn rename-keys 44 | "Like clojure.set/rename-keys, just way uglier and 40% faster." 45 | [^Map m ^Map kmap] 46 | (let [^Iterator it (.iterator ^Iterable kmap) 47 | init (let [^Iterator it (.iterator (.keySet kmap))] 48 | (loop [^ITransientMap m (.asTransient ^IEditableCollection m)] 49 | (if (.hasNext it) 50 | (do (.without m (.next it)) (recur m)) 51 | m)))] 52 | (loop [^ITransientMap res init] 53 | (if (.hasNext it) 54 | (let [^java.util.Map$Entry pair (.next it) 55 | k (.getKey pair)] 56 | (when (.containsKey m k) 57 | (.assoc res (.getValue pair) (.valAt ^ILookup m k))) 58 | (recur res)) 59 | (.persistent res))))) 60 | 61 | (defn maps 62 | "Like `map` but for sets, returning a set." 63 | [f ^Iterable s] 64 | (let [^Iterator items (.iterator s)] 65 | (loop [out (.asTransient PersistentHashSet/EMPTY)] 66 | (if (.hasNext items) 67 | (recur (.conj out (f (.next items)))) 68 | (.persistent out))))) 69 | 70 | (defn select-key 71 | "Like core/select-keys but with specific arities" 72 | ([m k1] 73 | (let [out {} 74 | v1 (clojure.lang.RT/find m k1)] 75 | (if v1 (conj out v1) out))) 76 | ([m k1 k2] 77 | (let [out (.asTransient PersistentArrayMap/EMPTY) 78 | v1 (clojure.lang.RT/find m k1) 79 | ^ITransientMap out (if v1 (.conj out v1) out) 80 | v2 (clojure.lang.RT/find m k2) 81 | ^ITransientMap out (if v2 (.conj out v2) out)] 82 | (.persistent out))) 83 | ([m k1 k2 k3] 84 | (let [out (.asTransient PersistentArrayMap/EMPTY) 85 | v1 (clojure.lang.RT/find m k1) 86 | ^ITransientMap out (if v1 (.conj out v1) out) 87 | v2 (clojure.lang.RT/find m k2) 88 | ^ITransientMap out (if v2 (.conj out v2) out) 89 | v3 (clojure.lang.RT/find m k3) 90 | ^ITransientMap out (if v3 (.conj out v3) out)] 91 | (.persistent out))) 92 | ([m k1 k2 k3 k4] 93 | (let [out (.asTransient PersistentArrayMap/EMPTY) 94 | v1 (clojure.lang.RT/find m k1) 95 | ^ITransientMap out (if v1 (.conj out v1) out) 96 | v2 (clojure.lang.RT/find m k2) 97 | ^ITransientMap out (if v2 (.conj out v2) out) 98 | v3 (clojure.lang.RT/find m k3) 99 | ^ITransientMap out (if v3 (.conj out v3) out) 100 | v4 (clojure.lang.RT/find m k4) 101 | ^ITransientMap out (if v4 (.conj out v4) out)] 102 | (.persistent out))) 103 | ([m k1 k2 k3 k4 k5] 104 | (let [out (.asTransient PersistentArrayMap/EMPTY) 105 | v1 (clojure.lang.RT/find m k1) 106 | ^ITransientMap out (if v1 (.conj out v1) out) 107 | v2 (clojure.lang.RT/find m k2) 108 | ^ITransientMap out (if v2 (.conj out v2) out) 109 | v3 (clojure.lang.RT/find m k3) 110 | ^ITransientMap out (if v3 (.conj out v3) out) 111 | v4 (clojure.lang.RT/find m k4) 112 | ^ITransientMap out (if v4 (.conj out v4) out) 113 | v5 (clojure.lang.RT/find m k5) 114 | ^ITransientMap out (if v5 (.conj out v5) out)] 115 | (.persistent out)))) 116 | 117 | (defn select-keys 118 | "Like core/select-keys but uses a transient to collect results. 119 | Note: differently from core/select-keys, it doesn't retain metadata. 120 | TODO: generative comparison with core/select-keys." 121 | [m ^Iterable ks] 122 | (if ks 123 | (let [^Iterator items (.iterator ks)] 124 | (loop [out (.asTransient PersistentHashMap/EMPTY)] 125 | (if (.hasNext items) 126 | (let [entry (clojure.lang.RT/find m (.next items))] 127 | (recur (if entry (.conj out entry) out))) 128 | (.persistent out)))) 129 | {})) 130 | 131 | 132 | (defn union 133 | "Like core.set/union but with arities optimizations." 134 | ([] #{}) 135 | ([s1] s1) 136 | ([^IEditableCollection s1 ^IPersistentSet s2] 137 | (if (< (count s1) (count s2)) 138 | (recur s2 s1) 139 | (let [^IEditableCollection s1 (or s1 #{}) s2 (or s2 #{}) 140 | ^Iterator items (.iterator ^Iterable s2)] 141 | (if (instance? IEditableCollection s1) 142 | (loop [^ITransientSet s (.asTransient s1)] 143 | (if (.hasNext items) 144 | (recur (.conj s (.next items))) 145 | (.persistent s))) 146 | (loop [^IPersistentCollection s s1] 147 | (if (.hasNext items) 148 | (recur (.cons s (.next items))) 149 | s)))))) 150 | ([s1 s2 s3] 151 | (let [s1 (or s1 #{}) s2 (or s2 #{}) s3 (or s3 #{}) 152 | c1 (count s1) c2 (count s2) c3 (count s3) 153 | maxc (max c1 c2 c3)] 154 | (case maxc 155 | c2 (let [res1 (reduce conj! (transient s2) s1)] 156 | (persistent! (reduce conj! res1 s3))) 157 | c3 (let [res1 (reduce conj! (transient s3) s1)] 158 | (persistent! (reduce conj! res1 s2))) 159 | (let [res1 (reduce conj! (transient s1) s2)] 160 | (persistent! (reduce conj! res1 s3)))))) 161 | ([s1 s2 s3 s4] 162 | (let [s1 (or s1 #{}) s2 (or s2 #{}) s3 (or s3 #{}) s4 (or s4 #{}) 163 | c1 (count s1) c2 (count s2) c3 (count s3) c4 (count s4) 164 | maxc (max c1 c2 c3 c4)] 165 | (case maxc 166 | c2 (let [res1 (reduce conj! (transient s2) s1) 167 | res2 (reduce conj! res1 s3)] 168 | (persistent! (reduce conj! res2 s4))) 169 | c3 (let [res1 (reduce conj! (transient s3) s1) 170 | res2 (reduce conj! res1 s4)] 171 | (persistent! (reduce conj! res2 s2))) 172 | c4 (let [res1 (reduce conj! (transient s4) s1) 173 | res2 (reduce conj! res1 s2)] 174 | (persistent! (reduce conj! res2 s2))) 175 | (let [res1 (reduce conj! (transient s1) s2) 176 | res2 (reduce conj! res1 s3)] 177 | (persistent! (reduce conj! res2 s4)))))) 178 | ([s1 s2 s3 s4 s5] 179 | (let [s1 (or s1 #{}) s2 (or s2 #{}) s3 (or s3 #{}) s4 (or s4 #{}) s5 (or s5 #{}) 180 | c1 (count s1) c2 (count s2) c3 (count s3) c4 (count s4) c5 (count s5) 181 | maxc (max c1 c2 c3 c4 c5)] 182 | (case maxc 183 | c2 (let [res1 (reduce conj! (transient s2) s1) 184 | res2 (reduce conj! res1 s3) 185 | res3 (reduce conj! res2 s4)] 186 | (persistent! (reduce conj! res3 s5))) 187 | c3 (let [res1 (reduce conj! (transient s3) s1) 188 | res2 (reduce conj! res1 s2) 189 | res3 (reduce conj! res2 s4)] 190 | (persistent! (reduce conj! res3 s5))) 191 | c4 (let [res1 (reduce conj! (transient s4) s1) 192 | res2 (reduce conj! res1 s2) 193 | res3 (reduce conj! res2 s3)] 194 | (persistent! (reduce conj! res3 s5))) 195 | (let [res1 (reduce conj! (transient s1) s2) 196 | res2 (reduce conj! res1 s3) 197 | res3 (reduce conj! res2 s4)] 198 | (persistent! (reduce conj! res3 s5)))))) 199 | ([s1 s2 s3 s4 s5 & sets] 200 | (let [bubbled-sets (#'cset/bubble-max-key count (conj sets s5 s4 s3 s2 s1))] 201 | (reduce into (first bubbled-sets) (rest bubbled-sets))))) 202 | 203 | (defn kset-native 204 | "Retrieve the keyset (the attributes) of relation xrel. 205 | Returns a Java HashSet." 206 | ^Set [^APersistentSet xrel] 207 | (let [^PersistentHashMap$NodeIter it (.iterator xrel) 208 | ^APersistentMap item (when (.hasNext it) (.next it))] 209 | (if item (.keySet item) #{}))) 210 | 211 | (defn kset 212 | "Transforms the result of kset-native into a Clojure set." 213 | [^APersistentSet xrel] 214 | (into #{} (kset-native xrel))) 215 | 216 | (defn intersection 217 | "Optimized version of `clojure.set/intersection`. It's mostly compatible, 218 | but it expects strict sets as arguments (which should be the case). However 219 | clojure.set/intersection also accepts other types with unpredictable results." 220 | ([s1] s1) 221 | ([^IEditableCollection s1 ^IPersistentSet s2] 222 | (if (< (count s2) (count s1)) 223 | (recur s2 s1) 224 | (let [^Iterator items (.iterator ^Iterable s1)] 225 | (if (instance? IEditableCollection s1) 226 | (loop [^ITransientSet out (.asTransient s1)] 227 | (if (.hasNext items) 228 | (let [item (.next items)] 229 | (if (.contains s2 item) 230 | (recur out) 231 | (recur (.disjoin out item)))) 232 | (.persistent out))) 233 | (loop [^IPersistentSet out s1] 234 | (if (.hasNext items) 235 | (let [item (.next items)] 236 | (if (.contains s2 item) 237 | (recur out) 238 | (recur (.disjoin out item)))) 239 | out)))))) 240 | ([s1 s2 & sets] 241 | (let [bubbled-sets (#'cset/bubble-max-key #(- (count %)) (conj sets s2 s1))] 242 | (reduce intersection (first bubbled-sets) (rest bubbled-sets))))) 243 | 244 | (defn intersection* 245 | "Like clojure.set/intersection, but returns a java.util.HashSet" 246 | ([s1] s1) 247 | (^Set [^Set s1 ^Set s2] 248 | (if (< (.size s1) (.size s2)) 249 | (doto (HashSet. s1) (.retainAll s2)) 250 | (doto (HashSet. s2) (.retainAll s1)))) 251 | ([s1 s2 & sets] 252 | (apply intersection s1 s2 sets))) 253 | 254 | (defn difference 255 | "Faster version of clojure.set/difference." 256 | ([s1] s1) 257 | ([^PersistentHashSet s1 ^PersistentHashSet s2] 258 | (if (< (.size s1) (.size s2)) 259 | (let [^Iterator items (.iterator s1)] 260 | (if (instance? IEditableCollection s1) 261 | (loop [^ITransientSet out (.asTransient s1)] 262 | (if (.hasNext items) 263 | (let [item (.next items)] 264 | (recur (if (.contains s2 item) 265 | (.disjoin out item) 266 | out))) 267 | (.persistent out))) 268 | (loop [^IPersistentSet out s1] 269 | (if (.hasNext items) 270 | (let [item (.next items)] 271 | (recur (if (.contains s2 item) 272 | (.disjoin out item) 273 | out))) 274 | out)))) 275 | (let [^Iterator items (.iterator s2)] 276 | (if (instance? IEditableCollection s1) 277 | (loop [^ITransientSet out (.asTransient s1)] 278 | (if (.hasNext items) 279 | (recur (.disjoin out (.next items))) 280 | (.persistent out))) 281 | (loop [^IPersistentSet out s1] 282 | (if (.hasNext items) 283 | (recur (.disjoin out (.next items))) 284 | out)))))) 285 | ([s1 s2 & sets] 286 | (reduce difference s1 (conj sets s2)))) 287 | 288 | (defn select 289 | "Faster version of clojure.set/select" 290 | [pred ^PersistentHashSet s] 291 | (let [^Iterator items (.iterator s)] 292 | (if (instance? IEditableCollection s) 293 | (loop [^ITransientSet out (.asTransient s)] 294 | (if (.hasNext items) 295 | (let [item (.next items)] 296 | (recur (if (pred item) out (.disjoin out item)))) 297 | (.persistent out))) 298 | (loop [^IPersistentSet out s] 299 | (if (.hasNext items) 300 | (let [item (.next items)] 301 | (recur (if (pred item) out (.disjoin out item)))) 302 | out))))) 303 | 304 | (defn project* 305 | "Version of clojure.set/project supporting specific arities for keys. 306 | TODO: work in progress, it doesn't perform better than normal project." 307 | ([xrel k1] (maps #(select-key % k1) xrel)) 308 | ([xrel k1 k2] (maps #(select-key % k1 k2) xrel)) 309 | ([xrel k1 k2 k3] (maps #(select-key % k1 k2 k3) xrel)) 310 | ([xrel k1 k2 k3 k4] (maps #(select-key % k1 k2 k3 k4) xrel)) 311 | ([xrel k1 k2 k3 k4 k5] (maps #(select-key % k1 k2 k3 k5) xrel))) 312 | 313 | (defn project 314 | "Faster version of clojure.set/project" 315 | [xrel ks] 316 | (maps #(select-keys % ks) xrel)) 317 | 318 | (defn rename 319 | "Like clojure.set/rename but no meta and optimized. The additional 320 | arity with k1 k2...kN can be used to rename a known number of keys." 321 | ([xrel kmap] 322 | (maps #(rename-keys % kmap) xrel)) 323 | ([xrel k1 k2] 324 | (maps #(let [v (% k1)] 325 | (-> % 326 | (assoc k2 v) 327 | (dissoc k1))) 328 | xrel))) 329 | 330 | (defn map-invert 331 | "Returns the map with the vals mapped to the keys." 332 | {:added "1.0"} 333 | [m] 334 | (persistent! 335 | (reduce 336 | (fn [m [k v]] (assoc! m v k)) 337 | (transient {}) 338 | m))) 339 | 340 | (defn select-keys* 341 | "Alternative to core/select-keys which retrieves the values corresponding 342 | to `ks` within an `ABox` instance." 343 | ^ABox [^Map m ^Iterable ks] 344 | (let [cnt (count ks) 345 | out (object-array cnt) 346 | ^Iterator it (.iterator ks)] 347 | (loop [i 0] 348 | (when (.hasNext it) 349 | (let [succ (.next it)] 350 | (aset out i (.get m succ)) 351 | (recur (unchecked-inc-int i))))) 352 | (ABox. out))) 353 | 354 | (defn index* 355 | "Alternative clojure.set/index with a different (but non-compatible) 356 | interface using native and mutable types." 357 | ^HashMap [rel ks] 358 | (index/indexer rel #(select-keys* % ks))) 359 | 360 | (defn index 361 | "Faster clojure.set/index with a compatible interface. 362 | TODO: generative comparison with set/index." 363 | [^Set xrel ks] 364 | (let [^Iterator items (.iterator xrel)] 365 | (loop [out (.asTransient PersistentHashMap/EMPTY)] 366 | (if (.hasNext items) 367 | (let [item (.next items) 368 | k (arity-select ks item select-key select-keys)] 369 | (recur (.assoc out k (.cons ^IPersistentSet (.valAt out k #{}) item)))) 370 | (.persistent out))))) 371 | 372 | (defn join 373 | "Optimized version of clojure.set/join about 4x faster." 374 | ([^Set xrel ^Set yrel] 375 | (join xrel yrel (kset-native xrel) (kset-native yrel))) 376 | ([^Set xrel ^Set yrel x-keys y-keys] 377 | {:pre [(< (count xrel) *join-size-threshold*) 378 | (< (count yrel) *join-size-threshold*)]} 379 | (let [xcount (count xrel) ycount (count yrel)] 380 | (if (and (> xcount 0) (> ycount 0)) 381 | (let [common-keys (intersection* x-keys y-keys) 382 | <=? (<= xcount ycount) 383 | r (if <=? xrel yrel) 384 | s (if <=? yrel xrel) 385 | ^Map idx (arity-select common-keys r index/index-for index*)] 386 | (let [^Iterator items (.iterator ^Iterable s)] 387 | (loop [out (.asTransient PersistentHashSet/EMPTY)] 388 | (if (.hasNext items) 389 | (let [item (.next items)] 390 | (recur 391 | (if-let [found (.get idx (arity-select common-keys item index/key-from select-keys*))] 392 | (let [^Iterator joinables (.iterator ^Iterable found)] 393 | (loop [out out] 394 | (if (.hasNext joinables) 395 | (let [^APersistentMap joinable (.next joinables)] 396 | (recur (.conj out (.cons joinable item)))) 397 | out))) 398 | out))) 399 | (.persistent out))))) 400 | #{}))) 401 | ([xrel yrel km] 402 | (let [[r s k] (if (<= (count xrel) (count yrel)) 403 | [xrel yrel (map-invert km)] 404 | [yrel xrel km]) 405 | idx (index r (vals k))] 406 | (.persistent ^ITransientSet (reduce 407 | (fn [ret item] 408 | (if-let [found (idx (rename-keys (select-keys item (keys k)) k))] 409 | (reduce 410 | (fn [^ITransientCollection acc ^APersistentMap itm] 411 | (.conj acc (.cons itm item))) 412 | ret found) 413 | ret)) 414 | (.asTransient PersistentHashSet/EMPTY) s))))) 415 | 416 | (defn subset? 417 | [^Set set1 ^Set set2] 418 | (and (<= (.size set1) (.size set2)) 419 | (let [^Iterator items (.iterator set1)] 420 | (loop [] 421 | (if (.hasNext items) 422 | (if (.contains set2 (.next items)) 423 | (recur) false) 424 | true))))) 425 | 426 | (defn superset? 427 | [^Set set1 ^Set set2] 428 | (and (>= (.size set1) (.size set2)) 429 | (let [^Iterator items (.iterator set2)] 430 | (loop [] 431 | (if (.hasNext items) 432 | (if (.contains set1 (.next items)) 433 | (recur) false) 434 | true))))) 435 | --------------------------------------------------------------------------------