├── .gitignore ├── LICENSE ├── README.md ├── build └── build │ └── core.clj ├── deps.edn ├── docs ├── DOCUMENTATION.md ├── TECHNOTES.md └── TUTORIAL.md ├── drafts └── entity_graph │ ├── core_test_generative.cljc │ ├── macros.clj │ ├── query.cljc │ └── scratch.cljc ├── shadow-cljs.edn ├── src └── entity_graph │ └── core.cljc └── test └── entity_graph ├── benchmark.cljc ├── benchmark_vs.cljc └── core_test.cljc /.gitignore: -------------------------------------------------------------------------------- 1 | .cpcache 2 | .idea 3 | .shadow-cljs 4 | *.iml 5 | .nrepl-port 6 | resources 7 | target 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Eclipse Public License - v 1.0 2 | 3 | THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC 4 | LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM 5 | CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. 6 | 7 | 1. DEFINITIONS 8 | 9 | "Contribution" means: 10 | 11 | a) in the case of the initial Contributor, the initial code and documentation 12 | distributed under this Agreement, and 13 | b) in the case of each subsequent Contributor: 14 | i) changes to the Program, and 15 | ii) additions to the Program; 16 | 17 | where such changes and/or additions to the Program originate from and are 18 | distributed by that particular Contributor. A Contribution 'originates' 19 | from a Contributor if it was added to the Program by such Contributor 20 | itself or anyone acting on such Contributor's behalf. Contributions do not 21 | include additions to the Program which: (i) are separate modules of 22 | software distributed in conjunction with the Program under their own 23 | license agreement, and (ii) are not derivative works of the Program. 24 | 25 | "Contributor" means any person or entity that distributes the Program. 26 | 27 | "Licensed Patents" mean patent claims licensable by a Contributor which are 28 | necessarily infringed by the use or sale of its Contribution alone or when 29 | combined with the Program. 30 | 31 | "Program" means the Contributions distributed in accordance with this 32 | Agreement. 33 | 34 | "Recipient" means anyone who receives the Program under this Agreement, 35 | including all Contributors. 36 | 37 | 2. GRANT OF RIGHTS 38 | a) Subject to the terms of this Agreement, each Contributor hereby grants 39 | Recipient a non-exclusive, worldwide, royalty-free copyright license to 40 | reproduce, prepare derivative works of, publicly display, publicly 41 | perform, distribute and sublicense the Contribution of such Contributor, 42 | if any, and such derivative works, in source code and object code form. 43 | b) Subject to the terms of this Agreement, each Contributor hereby grants 44 | Recipient a non-exclusive, worldwide, royalty-free patent license under 45 | Licensed Patents to make, use, sell, offer to sell, import and otherwise 46 | transfer the Contribution of such Contributor, if any, in source code and 47 | object code form. This patent license shall apply to the combination of 48 | the Contribution and the Program if, at the time the Contribution is 49 | added by the Contributor, such addition of the Contribution causes such 50 | combination to be covered by the Licensed Patents. The patent license 51 | shall not apply to any other combinations which include the Contribution. 52 | No hardware per se is licensed hereunder. 53 | c) Recipient understands that although each Contributor grants the licenses 54 | to its Contributions set forth herein, no assurances are provided by any 55 | Contributor that the Program does not infringe the patent or other 56 | intellectual property rights of any other entity. Each Contributor 57 | disclaims any liability to Recipient for claims brought by any other 58 | entity based on infringement of intellectual property rights or 59 | otherwise. As a condition to exercising the rights and licenses granted 60 | hereunder, each Recipient hereby assumes sole responsibility to secure 61 | any other intellectual property rights needed, if any. For example, if a 62 | third party patent license is required to allow Recipient to distribute 63 | the Program, it is Recipient's responsibility to acquire that license 64 | before distributing the Program. 65 | d) Each Contributor represents that to its knowledge it has sufficient 66 | copyright rights in its Contribution, if any, to grant the copyright 67 | license set forth in this Agreement. 68 | 69 | 3. REQUIREMENTS 70 | 71 | A Contributor may choose to distribute the Program in object code form under 72 | its own license agreement, provided that: 73 | 74 | a) it complies with the terms and conditions of this Agreement; and 75 | b) its license agreement: 76 | i) effectively disclaims on behalf of all Contributors all warranties 77 | and conditions, express and implied, including warranties or 78 | conditions of title and non-infringement, and implied warranties or 79 | conditions of merchantability and fitness for a particular purpose; 80 | ii) effectively excludes on behalf of all Contributors all liability for 81 | damages, including direct, indirect, special, incidental and 82 | consequential damages, such as lost profits; 83 | iii) states that any provisions which differ from this Agreement are 84 | offered by that Contributor alone and not by any other party; and 85 | iv) states that source code for the Program is available from such 86 | Contributor, and informs licensees how to obtain it in a reasonable 87 | manner on or through a medium customarily used for software exchange. 88 | 89 | When the Program is made available in source code form: 90 | 91 | a) it must be made available under this Agreement; and 92 | b) a copy of this Agreement must be included with each copy of the Program. 93 | Contributors may not remove or alter any copyright notices contained 94 | within the Program. 95 | 96 | Each Contributor must identify itself as the originator of its Contribution, 97 | if 98 | any, in a manner that reasonably allows subsequent Recipients to identify the 99 | originator of the Contribution. 100 | 101 | 4. COMMERCIAL DISTRIBUTION 102 | 103 | Commercial distributors of software may accept certain responsibilities with 104 | respect to end users, business partners and the like. While this license is 105 | intended to facilitate the commercial use of the Program, the Contributor who 106 | includes the Program in a commercial product offering should do so in a manner 107 | which does not create potential liability for other Contributors. Therefore, 108 | if a Contributor includes the Program in a commercial product offering, such 109 | Contributor ("Commercial Contributor") hereby agrees to defend and indemnify 110 | every other Contributor ("Indemnified Contributor") against any losses, 111 | damages and costs (collectively "Losses") arising from claims, lawsuits and 112 | other legal actions brought by a third party against the Indemnified 113 | Contributor to the extent caused by the acts or omissions of such Commercial 114 | Contributor in connection with its distribution of the Program in a commercial 115 | product offering. The obligations in this section do not apply to any claims 116 | or Losses relating to any actual or alleged intellectual property 117 | infringement. In order to qualify, an Indemnified Contributor must: 118 | a) promptly notify the Commercial Contributor in writing of such claim, and 119 | b) allow the Commercial Contributor to control, and cooperate with the 120 | Commercial Contributor in, the defense and any related settlement 121 | negotiations. The Indemnified Contributor may participate in any such claim at 122 | its own expense. 123 | 124 | For example, a Contributor might include the Program in a commercial product 125 | offering, Product X. That Contributor is then a Commercial Contributor. If 126 | that Commercial Contributor then makes performance claims, or offers 127 | warranties related to Product X, those performance claims and warranties are 128 | such Commercial Contributor's responsibility alone. Under this section, the 129 | Commercial Contributor would have to defend claims against the other 130 | Contributors related to those performance claims and warranties, and if a 131 | court requires any other Contributor to pay any damages as a result, the 132 | Commercial Contributor must pay those damages. 133 | 134 | 5. NO WARRANTY 135 | 136 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON AN 137 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR 138 | IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, 139 | NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each 140 | Recipient is solely responsible for determining the appropriateness of using 141 | and distributing the Program and assumes all risks associated with its 142 | exercise of rights under this Agreement , including but not limited to the 143 | risks and costs of program errors, compliance with applicable laws, damage to 144 | or loss of data, programs or equipment, and unavailability or interruption of 145 | operations. 146 | 147 | 6. DISCLAIMER OF LIABILITY 148 | 149 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY 150 | CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, 151 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION 152 | LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 153 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 154 | ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE 155 | EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY 156 | OF SUCH DAMAGES. 157 | 158 | 7. GENERAL 159 | 160 | If any provision of this Agreement is invalid or unenforceable under 161 | applicable law, it shall not affect the validity or enforceability of the 162 | remainder of the terms of this Agreement, and without further action by the 163 | parties hereto, such provision shall be reformed to the minimum extent 164 | necessary to make such provision valid and enforceable. 165 | 166 | If Recipient institutes patent litigation against any entity (including a 167 | cross-claim or counterclaim in a lawsuit) alleging that the Program itself 168 | (excluding combinations of the Program with other software or hardware) 169 | infringes such Recipient's patent(s), then such Recipient's rights granted 170 | under Section 2(b) shall terminate as of the date such litigation is filed. 171 | 172 | All Recipient's rights under this Agreement shall terminate if it fails to 173 | comply with any of the material terms or conditions of this Agreement and does 174 | not cure such failure in a reasonable period of time after becoming aware of 175 | such noncompliance. If all Recipient's rights under this Agreement terminate, 176 | Recipient agrees to cease use and distribution of the Program as soon as 177 | reasonably practicable. However, Recipient's obligations under this Agreement 178 | and any licenses granted by Recipient relating to the Program shall continue 179 | and survive. 180 | 181 | Everyone is permitted to copy and distribute copies of this Agreement, but in 182 | order to avoid inconsistency the Agreement is copyrighted and may only be 183 | modified in the following manner. The Agreement Steward reserves the right to 184 | publish new versions (including revisions) of this Agreement from time to 185 | time. No one other than the Agreement Steward has the right to modify this 186 | Agreement. The Eclipse Foundation is the initial Agreement Steward. The 187 | Eclipse Foundation may assign the responsibility to serve as the Agreement 188 | Steward to a suitable separate entity. Each new version of the Agreement will 189 | be given a distinguishing version number. The Program (including 190 | Contributions) may always be distributed subject to the version of the 191 | Agreement under which it was received. In addition, after a new version of the 192 | Agreement is published, Contributor may elect to distribute the Program 193 | (including its Contributions) under the new version. Except as expressly 194 | stated in Sections 2(a) and 2(b) above, Recipient receives no rights or 195 | licenses to the intellectual property of any Contributor under this Agreement, 196 | whether expressly, by implication, estoppel or otherwise. All rights in the 197 | Program not expressly granted under this Agreement are reserved. 198 | 199 | This Agreement is governed by the laws of the State of New York and the 200 | intellectual property laws of the United States of America. No party to this 201 | Agreement will bring a legal action under this Agreement more than one year 202 | after the cause of action arose. Each party waives its rights to a jury trial in 203 | any resulting litigation. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # EntityGraph 2 | 3 | EntityGraph is an in memory immutable data store designed for web applications, with likely other use cases. It is available for Clojure and ClojureScript. 4 | 5 | Based on the triple store concept (entity, attribute, value), data is stored in the form of entities in the EAV index (entity, attribute, value). Select attributes are also indexed in the AVE index (attribute, value, entity). Only indexing select attributes in the AVE index gives the option to economize on memory. 6 | 7 | The indexes are implemented as nested Clojure maps, accessible with Clojure's functions. Attributes can be of any type (including collections) and don't need to be declared in the schema unless special behavior is needed. 8 | 9 | For data retrieval EntityGraph offers pull-style graph query support, which satisfies most use cases. There is no datalog, sql or any other query language support, but since the indexes are Clojure maps, any number of querying solutions could be implemented on top of the indexes. The user may also write custom functions to retrieve data from indexes without the need to parse queries. This is expected to be a rare use case. 10 | 11 | ## Features 12 | 13 | * Tempids, Keyword entity ids for nice programmatic names 14 | * Cardinality one and many attributes 15 | * Sorting within entity for cardinality many attributes (using default or custom comparator) 16 | * Reference attributes and component entities 17 | * Unique identities, unique values, and lookup refs 18 | * Nested entities in assertions 19 | * AVE index sorted by custom comparator 20 | * Pull queries with: wildcarding, nesting, joins, forward/backward attribute nav, recursive pulls 21 | 22 | ## Non-Goals 23 | 24 | **The following are not in scope of the project:** 25 | * EntityGraph makes no effort to synchronize data between client and server and considers this an orthogonal concern. 26 | * The word "database" is used throughout, but keep in mind that there is no storage layer. 27 | * Reactive queries are not supported. 28 | 29 | ## Dependency Information 30 | 31 | [deps.edn](https://clojure.org/guides/deps_and_cli) dependency information: 32 | 33 | `entity-graph/entity-graph {:mvn/version "0.1.0-SNAPSHOT"}` 34 | 35 | [Leiningen](https://github.com/technomancy/leiningen) dependency information: 36 | 37 | `[entity-graph/entity-graph "0.1.0-SNAPSHOT"]` 38 | 39 | ## Status 40 | 41 | * The feature set is complete, though additional features and enhancements are possible in the future. 42 | * In case of unexpected issues, every effort will be made to avoid breaking changes by moving to new names rather than by breaking existing names. 43 | * The code is reasonably well tested, but there has been minimal production use, so some issues may arise. 44 | 45 | ## Documentation And Tutorial 46 | 47 | > **Depending on your preference, you may either start with the tutorial or read the documentation first to learn about concepts and features.** 48 | 49 | The **[Hands-on Tutorial](docs/TUTORIAL.md)** introduces the majority of the features in as succinct a manner as possible. 50 | 51 | **[Documentation](docs/DOCUMENTATION.md)** describes the concepts and features of EntityGraph. **[Schema](docs/DOCUMENTATION.md#schema)** and **[Indexes](docs/DOCUMENTATION.md#indexes)** sections might be especially useful. 52 | 53 | The thought process behind many of the technical design decisions is captured in **[TECHNOTES.md](docs/TECHNOTES.md)**. 54 | 55 | ## Quick Example 56 | 57 | This is just to give you a feel: 58 | 59 | ```clojure 60 | ;; Create a database 61 | ;; This schema has just one attribute, which is to be indexed in the AVE index in a standard Clojure hashmap 62 | (def db-empty 63 | (create-db {:person/last-name 64 | {:db/index {:db/map-type :db.map-type/hash-map}}})) 65 | => #'user/db-empty 66 | 67 | ;; Add and entity to the empty database and capture result of the transaction in tx-result 68 | ;; NOTE: :person/first-name attribute didn't need to be defined in the schema 69 | (def tx-result (transact db-empty [{:person/first-name "Jim" :person/last-name "Morrison"}])) 70 | => #'user/tx-result 71 | 72 | ;; Examine the EAV and AVE indexes after the transaction above 73 | (select-keys (:db-after tx-result) [:db/eav :db/ave]) 74 | => 75 | {:db/eav {1 {:person/first-name "Jim", :person/last-name "Morrison", :db/id 1}}, 76 | :db/ave {:person/last-name {"Morrison" #{1}}}} 77 | 78 | ;; Retrieve some data with a pull query 79 | ;; pull the attribute :person/last-name for the entity with id 1 80 | (pull (:db-after tx-result) [:person/last-name] 1) 81 | => {:person/last-name "Morrison"} 82 | 83 | ;; Read some data with a pull query - pull all attributes for the entity with id 1 84 | (pull (:db-after tx-result) '[*] 1) 85 | => {:person/first-name "Jim", :person/last-name "Morrison", :db/id 1} 86 | 87 | ``` 88 | 89 | ## Performance vs. DataScript and ASAMI 90 | 91 | Performance was measured vs. DataScript and ASAMI. Those two databases are roughly in the same category. For the most part performance as compared with DataScript and ASAMI is favorable. The benchmarks were performed on my M2 MacbookPro. 92 | 93 | The exact code is here: [entity-graph.benchmakr-vs](test/entity_graph/benchmark_vs.cljc). 94 | 95 | It must be acknowledged that these benchmarks, like all benchmarks, are imperfect. These are just a few quick and dirty benchmarks in an attempt to get a rough idea of how performance compares among the three databases. 96 | 97 | The schema used for EntityGraph indexed all attributes in a sorted AVE index, so performance for asserting entities in the database can be compared fairly vs. the other databases. 98 | 99 | The tests were run in both Clojure and ClojureScript and the results largely mirrored each other. EntityGraph was notably faster when asserting new entities into the database and when using pull to retrieve data from the database. 100 | 101 | When querying data without pull (with datalog for DataScript and ASAMI, with custom data retrieval functions for EntityGraph), EntityGraph outperformed ASAMI for a simple query, but ASAMI outperformed EntityGraph for a more complex query. Both ASAMI and EntityGraph outperformed DataScript by an astronomical margin. Either something is seriously wrong with the query benchmark or DataScript queries are particularly slow. 102 | 103 | ### Clojure Results 104 | 105 | * When asserting 20,000 entities into an empty database, **EntityGraph is twice as fast as DataScript and ASAMI**. 106 | * Pull performance for wildcard pattern is **~6x faster** compared to DataScript. For pulling a single attribute EntityGraph is **~2x faster** than DataScript. ASAMI doesn't support pull. 107 | * Pulls were done from a database of 20,000 entities. 108 | * For a simple query EntityGraph is **~100x faster than ASAMI and ~5,000x(!) faster than DataScript** 109 | * Queries were done from a database of 20,000 entities. 110 | * For a more complex query EntityGraph is **~3x slower than ASAMI**, but still **more than 10x faster than DataScript**. 111 | * Queries were done from a database of 20,000 entities 112 | * For EntityGraph the queries were written as custom data retrieval functions since no declarative query language is supported by EntityGraph. 113 | 114 | ### ClojureScript Results 115 | 116 | * When asserting 20,000 entities into an empty database, **EntityGraph is twice as fast as DataScript and ASAM**. 117 | * Pull performance for wildcard pattern is **~4x faster** compared to DataScript. For pulling a single attribute EntityGraph is **~2x faster**. ASAMI doesn't support pull. 118 | * Pulls were done from a database of 20,000 entities. 119 | * For simple query **EntityGraph is ~100x faster than ASAMI and a few thousand times faster than DataScript**. 120 | * Queries were done from a database of 20,000 entities 121 | * For a more complex query EntityGraph is ~3.5x slower than ASAMI, but still more than 10x faster than DataScript 122 | * Queries were done from a database of 20,000 entities 123 | * For EntityGraph the queries were written as custom data retrieval functions since no declarative query language is supported by EntityGraph. 124 | 125 | # License 126 | Copyright © 2021–2023 Georgiy Grigoryan 127 | 128 | Licensed under Eclipse Public License (see LICENSE). 129 | 130 | -------------------------------------------------------------------------------- /build/build/core.clj: -------------------------------------------------------------------------------- 1 | (ns build.core 2 | (:require [clojure.tools.build.api :as b] 3 | [deps-deploy.deps-deploy :as dd])) 4 | 5 | (def lib 'com.github.geodrome/entity-graph) 6 | ;(def version (format "0.0.%s" (b/git-count-revs nil))) 7 | ;; *** NOTE: Change version number manually! *** 8 | (def version "0.1.0-SNAPSHOT") 9 | (def class-dir "target/classes") 10 | (def basis (b/create-basis {:project "deps.edn"})) 11 | (def jar-file (format "target/%s-%s.jar" (name lib) version)) 12 | 13 | (defn clean [_] 14 | (b/delete {:path "target"})) 15 | 16 | (defn jar [_] 17 | (b/write-pom {:class-dir class-dir 18 | :lib lib 19 | :version version 20 | :basis basis 21 | :src-dirs ["src"]}) 22 | (b/copy-dir {:src-dirs ["src"] 23 | :target-dir class-dir}) 24 | (b/jar {:class-dir class-dir 25 | :jar-file jar-file})) 26 | 27 | (defn install 28 | "Install JAR to local maven repo." 29 | [_] 30 | (b/install {:basis basis 31 | :lib lib 32 | :version version 33 | :jar-file jar-file 34 | :class-dir class-dir})) 35 | 36 | (defn deploy 37 | "Install JAR to Clojars." 38 | [_] 39 | (dd/deploy {:installer :remote 40 | :artifact jar-file 41 | :pom-file (b/pom-path {:lib lib :class-dir class-dir})})) 42 | 43 | ;; - From the command line 44 | ;; -- To create new jar: 45 | ; $ clj -T:build clean 46 | ; $ clj -T:build jar 47 | ;; -- To install in local Maven: 48 | ; $ clj -T:build install 49 | ;; -- To deploy to Clojars: 50 | ;; Expects CLOJARS_USERNAME and CLOJARS_PASSWORD env variables 51 | ; $ clj -T:build deploy -------------------------------------------------------------------------------- /deps.edn: -------------------------------------------------------------------------------- 1 | {:deps {org.clojure/data.avl {:mvn/version "0.1.0"}} 2 | :paths ["src"] 3 | :aliases 4 | {:test 5 | {:extra-deps {;; For benchmarking, match versions with shadow-cljs.end 6 | datascript/datascript {:mvn/version "1.4.2"} 7 | org.clojars.quoll/asami {:mvn/version "2.3.3"}} 8 | :extra-paths ["test"]} 9 | :drafts 10 | {:extra-deps {org.clojure/test.check {:mvn/version "1.0.0"} 11 | org.clojure/spec.alpha {:mvn/version "0.2.187"}} 12 | :extra-paths ["drafts"]} 13 | :cljs 14 | {:extra-deps {thheller/shadow-cljs {:mvn/version "2.25.8"}}} 15 | :build 16 | {:extra-deps {io.github.clojure/tools.build {:mvn/version "0.9.6"} 17 | slipset/deps-deploy {:mvn/version "RELEASE"}} 18 | :extra-paths ["build"] 19 | :ns-default build.core}}} -------------------------------------------------------------------------------- /docs/DOCUMENTATION.md: -------------------------------------------------------------------------------- 1 | # EntityGraph Documentation 2 | 3 | # Table of Contents 4 | * [Overview](#overview) - Basic concepts and capabilities. 5 | * [Transactions](#transactions) - How to use `transact` to add/remove/update entities. 6 | * [Schema](#schema) - Explains various attribute properties that can be defined in the schema. 7 | * [Indexes](#indexes) - How indexes are constructed and how entities are represented in indexes. 8 | * [Pull](#pull) - Declarative data retrieval. 9 | * [Read Directly from Index](#read-directly-from-index) - How to read data directly from the indexes. 10 | 11 | ## Basic Overview 12 | 13 | To use EntityGraph: 14 | * Create an initial database with an optional schema using the function `create-db`. 15 | * The schema cannot be updated, once created. 16 | * Add, remove, update entities using the function `transact`. 17 | * Each call to `transact` produces a new immutable database value. 18 | * `transact` enforces certain database constraints and takes care of indexing. 19 | * Read data in a declarative way, use the function `pull` to make hierarchical (and possibly nested) selections of information about entities. 20 | * Alternatively, read from the indexes directly with custom data retrieval functions. 21 | 22 | ### Quick Example 23 | 24 | > **To see these concepts in action check out the [Hands-on Tutorial](TUTORIAL.md).** 25 | 26 | This is just to give you taste: 27 | 28 | ```clojure 29 | ;; Create a database 30 | ;; This schema has just one attribute, which is to be indexed in the AVE index 31 | (def db-empty 32 | (create-db {:person/last-name 33 | {:db/index {:db/map-type :db.map-type/hash-map}}})) 34 | => #'user/db-empty 35 | 36 | ;; Add and entity to the empty database and capture result of the transaction in tx-result 37 | ;; NOTE: :person/first-name attribute didn't need to be defined in the schema 38 | (def tx-result (transact db-empty [{:person/first-name "Jim" :person/last-name "Morrison"}])) 39 | => #'user/tx-result 40 | 41 | ;; Examine the EAV and AVE indexes after the transaction above 42 | (select-keys (:db-after tx-result) [:db/eav :db/ave]) 43 | => 44 | {:db/eav {1 {:person/first-name "Jim", :person/last-name "Morrison", :db/id 1}}, 45 | :db/ave {:person/last-name {"Morrison" #{1}}}} 46 | 47 | ;; Retrieve some data with a pull query 48 | ;; pull the attribute :person/last-name for the entity with id 1 49 | (pull (:db-after tx-result) [:person/last-name] 1) 50 | => {:person/last-name "Morrison"} 51 | 52 | ;; Read some data with a pull query - pull all attributes for the entity with id 1 53 | (pull (:db-after tx-result) '[*] 1) 54 | => {:person/first-name "Jim", :person/last-name "Morrison", :db/id 1} 55 | ``` 56 | 57 | ## Entities 58 | 59 | The database is organized around entities. An entity is a map of attribute/value pairs. Each entity contains a special `:db/id` key signifying its entity id or the internal key in the database. Entities are indexed by `:db/id` in the EAV index and by select attributes in the AVE index. 60 | 61 | Example entity: 62 | 63 | ```clojure 64 | {:db/id 1 65 | :person/name "John" 66 | :person/email "john@johnny.net"} 67 | ``` 68 | 69 | ## Entity IDs 70 | 71 | Entity ids are usually auto-assigned, but they can also be user-specified keywords. Keyword entity ids provide a convenient programmatic name for an entity. 72 | 73 | Here's an entity with a keyword entity id `:ui/chat-window` and one attribute `:chat-window/text` with the string value `"Type here..."`. 74 | 75 | ```clojure 76 | {:db/id :ui/chat-window 77 | :chat-window/text "Type here..."} 78 | ``` 79 | 80 | NOTE: To be encoded in `value` attribute for HTML inputs, keyword ids must be converted to strings. 81 | 82 | ## Attributes 83 | 84 | Entities consist of attribute/value pairs. Attributes are analogous to columns in SQL databases, but entities are not required to have predefined sets of attributes. Any and all attributes may be added or retracted to entities freely. 85 | 86 | Attributes only need to be defined in the schema when specific attribute properties need to be declared. Otherwise, attribute names may be used freely without declaration in the schema. See the [schema section](#schema) for more details. 87 | 88 | ### Attribute Name Constraints 89 | 90 | * Attribute names must be keywords. Though not enforced, things will break if you don't use keywords. 91 | * Attribute names must not begin with an underscore. This is not enforced, but it will break reverse navigation in `pull`. 92 | * Attribute name keywords may be namespaced. 93 | * The `:/` lexical form is preferred to avoid naming collisions. 94 | * The `:db` namespace is reserved for the database by convention. 95 | 96 | ### Attribute Cardinality 97 | 98 | By default, attributes contain just one value. These are known as cardinality one attributes. But cardinality many attributes are also supported. These attributes may contain multiple values. Cardinality many attributes are represented as sets of values: 99 | 100 | ```clojure 101 | {:db/id 1 102 | :person/name "John" 103 | :person/nicknames #{"Johnny" "Versaci"}} 104 | ``` 105 | 106 | The attribute `:person/nicknames` above is cardinality many, and it is represented as a set of values. Cardinality many attributes must be defined as such in the schema when the database is created. 107 | 108 | Several other attribute properties may be defined in the schema. See the [schema section](#schema) for more details. 109 | 110 | ## Values 111 | 112 | * Attributes can hold values of any type, including collections. 113 | * Attributes are not typed and no data type declarations for attributes are required in the schema. 114 | * Any and all data types are allowed, including collections. 115 | * Different entities can hold values of different type under the same attribute name. 116 | * Cardinality many attributes may contain heterogeneous value types for the same entity. 117 | * The one **exception is the reference type**, which must be declared in the schema. 118 | * Reference attributes point to other entities in the database, thus creating a graph. 119 | * Except for reference values, no data type declarations are required in the schema. 120 | * Nil values are illegal. This is enforced. 121 | * To indicate the absence of value for a given attribute, simply avoid adding it to the entity. 122 | * To remove an existing value for a given attribute, simply remove the value; don't attempt to set it to `nil`. 123 | 124 | ## Time/History 125 | 126 | EntityGraph does not keep a history (unlike Datomic, for example). There is no transaction log and no timestamps are recorded. However, since each successive db value, produced by `transact`, is an immutable Clojure map, any number of past db values can be preserved by holding references to those values. A `:db/tx-count` value is kept and incremented after each transaction. 127 | 128 | ## Storage 129 | 130 | Storage is not supported. If you're considering implementing it, see the section [Writing to Storage](TECHNOTES.md#writing-to-storage) in Technical Notes for some considerations. 131 | 132 | # Transactions 133 | 134 | > **To see these concepts in action check out the [Hands-on Tutorial](TUTORIAL.md).** 135 | 136 | All database operations are performed with the `transact` function, which updates the database and enforces database constraints. 137 | 138 | There are three operations: 139 | * `:db/add` - assertions add data to the database. 140 | * `:db/retract` - retractions remove data from the database. 141 | * `:db/retractEntity` - retracts an entity, all of its component entities, and all references to the entity and component entities. 142 | 143 | All operations can be expressed in list form. Assertions can also be expressed in map form. Map form assertions are more convenient and particularly performant when adding new entities to the database. 144 | 145 | As a further convenience: 146 | * The attribute keys in the map may be either keywords or strings. 147 | * Nested maps are supported. 148 | * Map values for reference attributes are interpreted as nested entities. 149 | 150 | To assert new entities, use tempids or maps with no `:db/id` key. To update an existing entity use the existing entity id. 151 | 152 | ## Temporary ids 153 | 154 | New entities may be identified by a temporary id. Tempids in transaction data are represented by a string in the entity id position. When the transaction is processed, temporary ids are resolved to actual entity ids. 155 | 156 | If a temporary id is used more than once, all instances of the tempid are mapped to the same entity id. There is an exception for `:db.unique/identity` attributes, which support upsert behavior: 157 | * The tempid of a `:db.unique/identity` attribute will map to an existing entity if one exists with the same attribute and value (update) 158 | * Or it will make a new entity if one does not exist (insert) 159 | * All further adds in the transaction that apply to that same temporary id are applied to the "upserted" entity 160 | 161 | # Schema 162 | 163 | > **To see these concepts in action check out the [Hands-on Tutorial](TUTORIAL.md).** 164 | 165 | An example schema may be seen in the namespace [entity-graph.core-test](../test/entity_graph/core_test.cljc). 166 | 167 | Defining attributes in the schema is only required for certain attribute behaviors. Though not required, you may wish to list the full schema anyway. 168 | 169 | The following properties may be specified for attributes in the schema: 170 | 171 | ## :db/doc 172 | 173 | An optional documentation string can be specified in the definition of each attribute. It might be used to document the data type, data shape, or anything else about the attribute. The docstring is meant for the programmer reading the code and isn't programmatically leveraged by the database in any way. 174 | 175 | ## :db/valueType 176 | 177 | Attribute values can be of any type, including collections. No data type declarations are required in the schema, except for reference values. 178 | 179 | Adding `{:db/valueType :db.type/ref}` to the schema signifies a reference attribute that refers to other entities in the database by entity id. 180 | 181 | ## :db/isComponent 182 | 183 | Reference attributes can optionally specify sub-component entities with `{:db/isComponent true}`. 184 | 185 | Component entities have the following properties: 186 | * When the parent entity is retracted with `:db/retractEntity`, component entities are also retracted. 187 | * If a component attribute is pulled with `pull`, a map containing all the attributes of the referenced entity will be returned. 188 | * If multiple entities attempt to claim an entity as their component, the transaction will fail with `:db.error/component-conflict`. 189 | * If an entity attempts to hold another entity as a component under different attributes, the transaction will fail with `:db.error/component-conflict`. 190 | * By default reference attributes are non-component. 191 | 192 | ### Pseudo Entities 193 | 194 | An alternative to creating component entities is to **not use a reference attribute** and store map values under the attribute as component "pseudo-entities". This would still offer the two main behaviors of component entities: 195 | * Retracting the parent with `:db/retractEntity` would still retract the component pseudo-entities. 196 | * Pulling the attribute, would return the map containing all the attributes of the component pseudo-entity. 197 | 198 | However, pseudo-entities have no independent existence in the database. They only exist as a map value under some attribute of a parent entity. Thus, the keys of pseudo-entities would not be interpreted as database attributes, resulting in the following trade-offs: 199 | * It is not possible to index the values under individual keys (pseudo attributes) of pseudo-entities in the AVE index. 200 | * The entire map value representing the component pseudo-entity must be indexed. 201 | * However, a sorted AVE index can be used with a comparator that sorts the pseudo-entities by values of individual keys or combinations of keys. 202 | * `pull` would not be able to join/navigate via keys of pseudo-entities. 203 | * The concept of one parent per component becomes meaningless. 204 | 205 | ## :db/cardinality 206 | 207 | By default, attributes contain just one value. These are known as cardinality one attributes. Cardinality many attributes are also supported. 208 | 209 | Adding `{:db/cardinality :db.cardinality/many}` to the schema signifies a cardinality many attribute. Cardinality many attributes may contain more than one value and are represented as sets of values. 210 | 211 | If no `:db/cardinality` is specified, `:db.cardinality/one` is the default. 212 | 213 | ## :db/unique 214 | 215 | A uniqueness constraint can be specified under the `:db/unique` key. 216 | * Only `:db.cardinality/one` attributes can have a uniqueness constraint. 217 | * Unique attributes are always indexed in AVE index to support fast uniqueness checks. 218 | * An entity may contain multiple unique attributes, but anomalies may arise. 219 | * By default attributes are non-unique. 220 | 221 | ### :db.unique/identity and :db.unique/value 222 | 223 | * Adding `{:db/unique :db.unique/identity}` to the schema asserts a database-wide unique identifier for an entity with upsert support. 224 | * A unique identity attribute can be used for a globally unique identifier (e.g. `:global-id`). This identifier might link an entity across different databases. 225 | * Adding `{:db/unique :db.unique/value}` to the schema represents an attribute-wide value that can be asserted only once, with no upsert support. 226 | 227 | > To see the `:db/unique` attributes in action check out the [Hands-on Tutorial](TUTORIAL.md). 228 | 229 | ## :db/sort 230 | 231 | By default, cardinality many attributes are stored as unsorted sets in the EAV index. Cardinality many **non-reference** attributes can optionally specify sorting parameters under the `:db/sort` key. Under some circumstances, this is a convenient solution that amortizes sorting costs and eliminates the need to repeatedly sort the same data. 232 | 233 | > **NOTE:** When using sorted sets, all values must be intercomparable among themselves, else adding to a sorted set will fail. 234 | 235 | ### :db/set-type 236 | 237 | The `:db/set-type` key specifies the type of sorted set. Two types of sorted sets may be used: 238 | * Clojure's sorted sets 239 | * AVL sets from `clojure.data.avl` 240 | * AVL sets support the full `clojure.core` sorted collections API, but also offer logarithmic time operations: rank queries, "nearest key" lookups, splits by index or key, subsets. Learn more here: https://github.com/clojure/data.avl 241 | 242 | To use Clojure's sorted sets: 243 | 244 | ```clojure 245 | {:db/sort {:db/set-type :db.set-type/sorted-set}} 246 | ``` 247 | 248 | To use sorted AVL sets from `clojure.data.avl`: 249 | 250 | ```clojure 251 | {:db/sort {:db/set-type :db.set-type/avl-set}} 252 | ``` 253 | 254 | ### :db/comparator 255 | 256 | An optional `:db/comparator` key specifies a comparator function. Custom comparators can be used with both Clojure's sorted sets and AVL sets. 257 | 258 | The following sorted sets will compare the values with the function `>=` (greater or equal to): 259 | 260 | ```clojure 261 | ;; Clojure's sorted set with comparator 262 | {:db/sort {:db.set-type :db.set-type/sorted-set 263 | :db/comparator >=}} 264 | 265 | ;; AVL set with comparator 266 | {:db/sort {:db.set-type :db.set-type/avl-set 267 | :db/comparator >=}} 268 | ``` 269 | 270 | If no `:db/comparator` is specified, the default comparator `compare` will be used. Learn more about comparators here: https://clojure.org/guides/comparators 271 | 272 | If sorting with multiple comparators is desired, different attributes can be used. For example, `:salary-asc` and :`salary-desc` to sort salaries in ascending and descending order. Each of these attributes would be independent of the other, so the user would need to take care to keep the two attribute values consistent. 273 | 274 | ### Sorted Reference Values 275 | 276 | Sorting reference values doesn't make much sense since it entails sorting internal database keys. To sort the entities pointed to by a reference attribute, pull the entities and then sort them. 277 | 278 | Alternatively, don't use a reference attribute. Instead, store the entities as maps effectively creating component "pseudo-entities". This makes it possible to use sorted sets for these the pseudo-entities, but see [Pseudo Entities](#pseudo-entities) for an explanation of tradeoffs. 279 | 280 | ## :db/index 281 | 282 | Unique and reference attributes (`:db/unique` and `:db.type/ref`) are always indexed in the AVE index. For all other attributes it must be specified in the schema. 283 | 284 | The `:db/index` key specifies that an attribute should be indexed in the AVE index. 285 | 286 | Three types of maps may be used for indexing: 287 | * Clojure's (unsorted) map 288 | * Clojure's sorted map 289 | * Sorted map from `clojure.data.avl`. 290 | * AVL maps support the full `clojure.core` sorted collections API, but also support transients and offer logarithmic time operations: rank queries, "nearest key" lookups, splits by index or key, subsets. Learn more here: https://github.com/clojure/data.avl 291 | 292 | While unsorted maps are good for fast lookups of specific single values, sorted maps enable fast lookups for range queries. 293 | 294 | ### :db/map-type 295 | 296 | To use the standard (unsorted) Clojure map: 297 | 298 | ```clojure 299 | {:db/index {:db/map-type :db.map-type/hash-map}} 300 | ``` 301 | 302 | To use a sorted Clojure map: 303 | 304 | ```clojure 305 | {:db/index {:db/map-type :db.map-type/sorted-map}} 306 | ``` 307 | 308 | To use a sorted AVL map: 309 | 310 | ```clojure 311 | {:db/index {:db/map-type :db.map-type/avl-map}} 312 | ``` 313 | 314 | ### :db/comparator 315 | 316 | Both types of sorted map also support custom comparators. 317 | 318 | To index in a sorted map and compare with `>` (greater than): 319 | 320 | ```clojure 321 | {:db/index {:db/map-type :db.map-type/sorted-map 322 | :db/comparator >}} 323 | ``` 324 | 325 | To index in a sorted AVL map and compare with `>` (greater than): 326 | 327 | ```clojure 328 | {:db/index {:db/map-type :db.map-type/avl-map 329 | :db/comparator >}} 330 | ``` 331 | 332 | If no `:db/comparator` is specified, the default comparator `compare` will be used. Learn more about comparators here: https://clojure.org/guides/comparators 333 | 334 | # Indexes 335 | 336 | > **To see these concepts in action check out the [Hands-on Tutorial](TUTORIAL.md).** 337 | 338 | EntityGraph contains two indexes: 339 | * entity-attribute-value (EAV) 340 | * attribute-value-entity (AVE) 341 | 342 | These two indexes are sufficient to support all data retrieval operations. Each transaction updates the indexes and produces a new immutable database value. 343 | 344 | ## EAV Index 345 | 346 | The EAV index contains all entities in a nested map. Entries have two distinct forms: 347 | * Form `{e {a v}}`, with only one value allowed in the `v` position 348 | * Form `{e {a #{v1 v2 ...}}}`, with many values allowed in the `v` position 349 | 350 | ### EAV Form `{e {a v}}` 351 | 352 | For `:db.cardinality/one` attributes EAV entries are in the form `{e {a v}}`, with only one value allowed in the `v` position. 353 | 354 | Here's an example EAV index that contains just one entity: 355 | 356 | ```clojure 357 | {1 {:db/id 1 358 | :person/name "Tina Turner" 359 | :person/ssn "111-22-3344"}} 360 | ``` 361 | 362 | The keys of the outer map are entity ids. The values of the outer map are the entities stored as maps with keys representing database attributes, and values representing database values. Each entity contains the special `:db/id` key representing the entity id. 363 | 364 | ### EAV Form `{e {a #{v1 v2 ...}}}` 365 | 366 | For `:db.cardinality/many` attributes EAV entries are in the form `{e {a #{v1 v2 ...}}}`, where `v1`, `v2`, etc. are distinct values enclosed in a set. 367 | 368 | Here's an example EAV index that contains just one entity: 369 | 370 | ```clojure 371 | {1 {:db/id 1 372 | :person/name "Tina Turner" 373 | :person/ssn "111-22-3344" 374 | :person/aliases #{"Queen of Rock" "The Queen of Rock'n'Roll"}}} 375 | ``` 376 | 377 | The attribute `:person/aliases` is cardinality many and is represented by a set of values `#{"Queen of Rock" "The Queen of Rock'n'Roll"}`. Thus, the attribute `:person/aliases` contains the values `"Queen of Rock"` and `"The Queen of Rock'n'Roll"`. 378 | 379 | ### Entity Retraction 380 | 381 | * If a transaction results in an entity with no remaining attributes, the entity is completely removed from the EAV index. 382 | * If an entity is retracted with `:db/retractEntity` then the entity, all of its component entities, and all references to the entity and component entities will be retracted as well. 383 | 384 | ## AVE Index 385 | 386 | > **To see these concepts in action check out the [Hands-on Tutorial](TUTORIAL.md).** 387 | 388 | The AVE index significantly improves the speed of data retrieval operations involving the indexed attribute at the expense of additional memory space. The AVE index is non-covering, meaning that only entity id is stored in the entity position, not the full entity. 389 | 390 | While every entity is contained in the EAV index in full, the AVE index only contains select attributes: 391 | 392 | * All reference attributes, to support quick lookups of all entities pointing to a target entity. This speeds up reverse navigation in pull queries and is also used for `:db/retractEntity` operations. 393 | * All unique attributes, declared with `:db/unique` property for fast uniqueness checks. 394 | * Attributes where the `:db/index` key specifies that it should be indexed in the AVE index. 395 | 396 | Entries in the AVE index have two distinct forms: 397 | * `{a {v e}}` with only one entity id allowed in the `e` position 398 | * `{a {v #{e1 e2 ...}}}` with many entity ids allowed in the `e` position 399 | 400 | ### AVE Form `{a {v e}}` 401 | 402 | Unique attributes and component attributes are stored in `{a {v e}}` form, where only one entity id in the `e` position is allowed. This is because only one entity id is logically possible for unique and component attributes. 403 | 404 | The following AVE index contains the `:db.unique/identity` attribute `:person/ssn` with two values: 405 | * `"123-45-6789"` belonging to the person with entity id `1`, 406 | * `"987-65-4321"` belonging to the person with entity id `2`: 407 | 408 | ```clojure 409 | {:person/ssn {"123-45-6789" 1, "987-65-4321" 2}} 410 | ``` 411 | 412 | The keys of the outer map are the indexed attributes (just `:person/ssn` in the example above). The values of the outer map are maps of values mapped to entity ids. 413 | 414 | For `{a {v e}}` form attributes: 415 | * If a transaction results in a removal of an entity id in the `e` position, then the entire `[v e]` entry is removed from the AVE index. 416 | * If a transaction results in more than one entity id in the `e` position for a unique or component attribute, it fails with an error. 417 | 418 | ### AVE Form `{a {v #{e1 e2 ...}}}` 419 | 420 | All non-unique and non-component indexed attributes are stored in `{a {v #{e1 e2 ...}}}` form, where multiple entity ids in the `e` position are possible. The set `#{e1 e2 ...}` contains those entity ids. 421 | 422 | The following AVE index contains the non-unique and non-component indexed attribute `:person/last-name` with two values: `"Brown"` belonging to entity id `1`, and `"Smith"` belonging to entity ids `2` and `3`: 423 | 424 | ```clojure 425 | {:person/last-name {"Brown" #{1}, "Smith" #{2,3}}} 426 | ``` 427 | 428 | For `{a {v #{e1 e2 ...}}}` form attributes: 429 | * If a transaction results in an empty set in the `e` position, then the entire `{v #{e1 e2 ...}}` entry is removed from the AVE index. 430 | * More than one entity id in the `e` position is supported as it logically makes sense. 431 | 432 | ## Collections in Indexes 433 | 434 | > **NOTE:** Your application logic must correctly interpret cardinality one and cardinality many values retrieved from EntityGraph indexes. The following section clarifies this point, especially as it pretains to collection values. 435 | 436 | ### Cardinality One Attribute Values in EAV and AVE Indexes 437 | 438 | Collection values are valid database values, which may cause some confusion when examining the indexes. Since collection values are valid, a set may appear in the `v` position in the EAV index, representing a single value that is a set. 439 | 440 | For example, let's assume `:person/aliases` is a `:db.cardinality/one` attribute, and we have the following in the EAV index: 441 | 442 | ```clojure 443 | {1 {:db/id 1 444 | :person/name "Tina Turner" 445 | :person/aliases #{"Queen of Rock" "The Queen of Rock'n'Roll"}}} 446 | ``` 447 | 448 | Then `#{"Queen of Rock" "The Queen of Rock'n'Roll"}` represents a single value. Again, this because `:person/aliases` is a cardinality one attribute. 449 | 450 | This means `#{"Queen of Rock" "The Queen of Rock'n'Roll"}` will be indexed as a single value in the AVE index: 451 | 452 | ```clojure 453 | {:person/aliases {#{"Queen of Rock" "The Queen of Rock'n'Roll"} 1}} 454 | ``` 455 | 456 | ### Cardinality Many Attribute Values in EAV and AVE Indexes 457 | 458 | In contrast with the cardinality one example above, here's how this would change if `:person/aliases` were a `:db.cardinality/many` attribute. The EAV index would look identical, but the interpretation would be different: 459 | 460 | ```clojure 461 | {1 {:db/id 1 462 | :person/name "Tina Turner" 463 | :person/aliases #{"Queen of Rock" "The Queen of Rock'n'Roll"}}} 464 | ``` 465 | 466 | The set `#{"Queen of Rock" "The Queen of Rock'n'Roll"}` in the EAV index now represents two distinct values: `"Queen of Rock"` and `"The Queen of Rock'n'Roll"`. 467 | 468 | Consequently, `"Queen of Rock"` and `"The Queen of Rock'n'Roll"` will be indexed as separate values in the AVE index: 469 | 470 | ```clojure 471 | {:person/aliases {"Queen of Rock" 1, "The Queen of Rock'n'Roll" 1}} 472 | ``` 473 | 474 | ### Collection Values for Cardinality Many Attributes 475 | 476 | Now let's examine what happens when we have collection values for a cardinality many attribute. 477 | 478 | Since collection values are valid it is possible to see something like this: 479 | 480 | ```clojure 481 | {1 {:db/id 1 482 | :person/name "Tina Turner" 483 | :person/favorite-food-combos #{#{:burger :fries} #{:pasta :shrimp}}}} 484 | ``` 485 | 486 | Assuming the attribute `:person/favorite-food-combos` is `:db.cardinality/many`, what is the correct interpretation? 487 | 488 | The set `#{#{:burger :fries} #{:pasta :shrimp}}` must be treated as two distinct values `#{:burger :fries}` and `#{:pasta :shrimp}`, and not as a single value `#{#{:burger :fries} #{:pasta :shrimp}}`. 489 | 490 | Consequently, `#{:burger :fries}` and `#{:pasta :shrimp}` will be indexed as separate values in the AVE index: 491 | 492 | ```clojure 493 | {:person/aliases {#{:burger :fries} 1, #{:pasta :shrimp} 1}} 494 | ``` 495 | 496 | # Pull 497 | 498 | Data retrieval is primarily accomplished with the `pull` function. Pull is a declarative way to make hierarchical (and possibly nested) selections of information about entities. 499 | 500 | > **Pull examples appear throughout the [Hands-on Tutorial](TUTORIAL.md)**. 501 | 502 | ## Pull Features 503 | 504 | EntityGraph's `pull` is a subset of Datomic's pull. 505 | 506 | The following features are **supported**: 507 | * Wildcarding 508 | * Nesting 509 | * Combining wildcard and map specifications 510 | * Joins 511 | * Forward and backward attribute navigation 512 | * Recursive pulls 513 | * Recursive select is safe in the presence of cycles. 514 | * When a recursive subselect encounters an entity that it has already seen only the `:db/id` of the entity is returned. 515 | * Unlimited depth on recursion not specifically supported, but a large recursion limit can be specified. 516 | 517 | The following features are **NOT supported**: 518 | * Naming control 519 | * Defaults 520 | * Transformations 521 | * Limits on the returned results 522 | 523 | ## Pull Results 524 | 525 | ### Empty Results 526 | 527 | * Pull returns `{}` when nothing in `pattern` matches. 528 | * Except for wildcard pattern or if `:db/id` is requested in pattern, in which cases a map of form `{:db/id id}` is returned. 529 | 530 | ### Reference Attribute Values 531 | 532 | * For reference attributes a map of form `{:db/id id}` will be returned for each value. 533 | * If a reference attribute is a component, a map containing all the attributes of the referenced entity will be returned. 534 | 535 | ### Multiple Results 536 | 537 | Multiple results are returned in the following cases: 538 | * For all forward cardinality-many references. 539 | * Reverse references for non-unique/non-component attributes. 540 | 541 | ### Finding All Reverse References 542 | 543 | In a `pull` pattern reverse navigation is possible by using an underscore in the local name segment of the attribute keyword (e.g. `:person/_friend`). However, this requires the user to know which reference attributes might be pointing to the target entity. 544 | 545 | We may wish to identify all references to a given entity, from any and all attributes. The function `find-reverse-refs` takes a database value and a target entity id, and returns a set of `[attribute entity-id]` vectors representing reverse references to the target entity id. 546 | 547 | # Read Directly from Index 548 | 549 | When `pull` is not sufficient, the indexes can be accessed directly with custom data retrieval functions. This approach is meant to replace Datalog/SQL type queries. Declarative queries are sacrificed for (hopefully) performance. Custom data retrieval functions are not expected to be used frequently. 550 | 551 | The user must understand the data model to write custom data retrieval functions successfully. It's particularly important to understand the semantics of different attribute properties that can be declared in the schema, how the EAV and AVE indexes are constructed, and how data is represented in the indexes. 552 | 553 | >**All of this is described above and further elucidated in the [Hands-on Tutorial](TUTORIAL.md).** 554 | 555 | ## Helper Function `check-attr` 556 | 557 | When writing custom data retrieval functions it becomes important to know the properties of attributes being retrieved. The function `check-attr` is provided to assist with this. It takes a database value, an attribute name, an attribute property, and returns a value for that attribute name/property combination. 558 | 559 | Here are some sample calls of `check-attr`: 560 | 561 | ```clojure 562 | (check-attr db :person/name :db/cardinality) 563 | => :db.cardinality/one 564 | 565 | (check-attr db :person/aliases :db/cardinality) 566 | => :db.cardinality/many 567 | 568 | (check-attr db :person/ssn :db/unique) 569 | => :db.unique/identity 570 | ``` 571 | 572 | ### Return Values of `check-attr` for Different Properties 573 | 574 | The following are all the possible return values for different attribute properties: 575 | 576 | | Property | Possible Return Values | 577 | |:--------------------|----------------------------------------------------------------------------------------------------------------------------:| 578 | | `:db/isRef` | `true`, `false` | 579 | | `:db/isComponent` | `true`, `false` | 580 | | `:db/cardinality` | `:db.cardinality/one`, `:db.cardinality/many` | 581 | | `:db/unique` | `:db.unique/identity`, `:db.unique/value`, `:db.unique/false` | 582 | | `:db/sort` | `:db.sort/sorted-set` `:db.sort/avl-set`, `:db.sort/false` | 583 | | `:db/index` | `:db.index/hash-map`, `:db.index/sorted-map`, `:db.index/avl-map`, `:db.index/false` | 584 | | `:db/ave-form` | `:db.ave-form/single-e`, `:db.ave-form/eset`, `:db.ave-form/false` | 585 | 586 | Note especially the property `:db/ave-form`. It tells us how the attribute is represented in the AVE index: 587 | * `:db.ave-form/single-e` refers to form **[AVE Form {a {v e}}](#ave-form-a-v-e)** 588 | * `:db.ave-form/eset` refers to form **[AVE Form {a {v #{e1 e2 ...}}}](#ave-form-a-v-e1-e2-)** 589 | 590 | The rest of the properties are described in the **[Schema Section](#schema)**. 591 | 592 | ## Schema Predicates 593 | 594 | The following schema predicates are an alternative to `check-attr`. 595 | 596 | Each predicate takes an encoded schema and attribute. An encoded schema may be obtained from a database value like this `(:db/schema db)`. 597 | 598 | The following schema predicates are available: 599 | 600 | * `ref-type?` 601 | * `component?` 602 | * `cardinality-many?` 603 | * `unique-identity?` 604 | * `unique?` 605 | * `ave-form-single-e?` 606 | * `ave-form-eset?` 607 | 608 | ## ID Predicates 609 | 610 | The following id predicates can help identify the type of id that a value in the id position represents: 611 | * `entity-id?` 612 | * `tempid?` 613 | * `lookup-ref?` -------------------------------------------------------------------------------- /docs/TECHNOTES.md: -------------------------------------------------------------------------------- 1 | # Technical Design Notes 2 | 3 | These are a collection of notes about the reasoning behind some of the technical design decisions. Generally, choices were biased to be more restrictive rather than permissive, as relaxing the restrictions is less likely to lead to breaking changes than tightening restrictions. 4 | 5 | ## Schema 6 | 7 | * Map schema definition, with keys as attribute names, was chosen rather than copying datomic's vector of maps. 8 | * Map schema eliminates duplicate attribute names. 9 | * Initial instinct was to match Datomic style to support schema reuse, but this idea was dismissed as the systems are too different. 10 | 11 | ## Failed Optimization Attempts 12 | 13 | The following optimizations seemed promising, but failed to deliver. 14 | 15 | * Use transients/reducers in `prepare-tx-data` was attempted (e.g. combine `replace-tempids-list` and `replace-ref-ids-list`), but didn't yield a significant performance benefit. 16 | * Updating EAV and AVE indexes in parallel might offer a real perfromance gain, but: 17 | * Javascript environment is single-threaded 18 | * Introduces complexity of webworkers 19 | 20 | ## Indexing 21 | 22 | Datomic was the inspiration for EntityGraph indexes, but with considerable modifications. 23 | 24 | Datomic has the following indexes: 25 | * EAV, AEV for all datoms. 26 | * AVE for unique and index attrs. 27 | * VAE for reference attributes 28 | 29 | In EntityGraph, the main distinctions are that indexes only exist in-memory and the index data structure is a nested map. 30 | 31 | EntityGraph indexes: 32 | * EAV index for all attributes and an AVE index for reference and unique attributes, plus any other attributes that are specified by the schema. 33 | * Since we wish to economize on memory use, we don't automatically index everything in the AVE index. 34 | * The original design had a VAE index, but AVE index proved sufficient 35 | * Because the index data structure is a nested map, don't have to scan Vs; just go to A and then to desired V. 36 | * AEV index was not considered because EAV/AVE indexes are sufficient: 37 | * AEV index in datomic helps scans in E order for fixed A. Map indexes don't require scans of EAV index. 38 | * To get all entity ids that contain `:attribute`, we call `(vals (get (:db/ave db) :attribute))`. 39 | * To get all values for `:attribute`, we call `(keys (get (:db/ave db) :attribute))`. 40 | 41 | ## Nested Map Restrictions 42 | 43 | In Datomic, reference to the nested map must be a component attribute or the nested map must include a unique attribute. Of note, the unique attribute can subsequently be retracted. 44 | 45 | Datomic documentation offers this justification: "This constraint prevents the accidental creation of easily-orphaned entities that have no identity or relation to other entities." 46 | https://docs.datomic.com/cloud/transactions/transaction-processing.html#nested-maps-in-transactions 47 | 48 | This constraint was copied, despite occasionally causing some inconvenience, as it can be relaxed in the future without breaking any code. 49 | 50 | ## NanoIDs for entity ids 51 | 52 | Currently sequential integers are used for entity ids, but UUIDs/NanoIDs (https://github.com/zelark/nano-id) were also considered: 53 | * Improved security - not revealing the sequential order. 54 | * May not be relevant for in-memory app state storage. 55 | * UUIDs or NanoIDs can serve as globally unique identifiers. 56 | * For globally unique identifiers prefer to use a `:db.unique/identity` attribute (e.g. `:global-id`). 57 | * NanoIDs are less performant than integer ids, though the ultimate difference in performance may not be important. 58 | * Since NanoIDs and tempids are both strings, transactions would have to distinguish between them and tempids: 59 | * Solution: Length check 60 | * NanoIDs are of length 21, but tempids might be too. 61 | * Solution: Wrap nanoids in another class like with `deftype` or `defrecord` 62 | * Further performance and memory footprint penalty 63 | * NanoIDs take up more memory per id, especially if wrapped with `deftype` or `defrecord` 64 | 65 | 66 | ## Leaving `{:db/id id}` in EAV index 67 | 68 | When all attributes for an entity are retracted, the entity can either be removed from the EAV index entirely or a `[id {:db/id id}]` entry can remain. It was decided to remove the entity from the EAV index. The decision was guided by this discussion on invalid ids: https://groups.google.com/g/datomic/c/hnOLG-fhZOU/m/RZvLlrGajHIJ 69 | 70 | The following points guided the decision: 71 | * The performance penalty for checking that all attributes for an entity are retracted is negligible. 72 | * Can end up with many "abandoned" `[id {:db/id id}]` entries in EAV, taking up space. 73 | * Keeping `:db/id` allows us to detect if an entity id exists or has ever existed historically (call these "valid" entities): 74 | * Can enforce only valid entity ids in the entity position in assertions, but wouldn't expect user to use a non-existent entity id unless they meant to create a new entity with a keyword entity id for easy programmatic access. 75 | * Decided to "trust the user" 76 | * Use of non-existent integer id is considered a user error 77 | * Can enforce only valid entity ids in the value position (for reference attributes) in assertions, avoiding dangling refs (pointing to entities that are invalid). 78 | * Dangling refs allowed in Datomic - this choice was copied - "trust the user". 79 | * A reference attribute pointing to a non-existent entity id is not a big problem. Not worth the hassle. 80 | * If we decide to keep `[id {:db/id id}]` when all attributes for an entity are retracted, what to do for `:db/retractEntity` operation? 81 | * When asserting map form entities, there is a performance optimization: for non-existent entities we can just `assoc` the map directly in the EAV index 82 | * Leaving {:db/id id} negates this optimization for previously retracted entities 83 | * But it is most useful for loading data into an empty database. 84 | * Removing `{:db/id id}` ensures that nobody relies on checking for `{:db/id id}` vs `nil` in EAV. However, if it's added later may still break code checking for `nil` and finding `{:db/id id}`. 85 | 86 | ## Pull 87 | 88 | ### Pull Empty Results 89 | 90 | What should `pull` return when an entity id is not in the database? Should it return `nil`, `{}`, or `{:db/id id}`. This decision was interrelated with the decision above: whether to leave `{:db/id id}` in the EAV index when all attributes for an entity are retracted. 91 | 92 | * When an entity id is not in the database `pull` returns `{:db/id id}` when `pattern` is wildcard `'[*]` or `:db/id` is specified in pattern. 93 | * Had we kept `[id {:db/id id}]` for historical entities, would have had the option to treat them differently from never existing entities, but there is no clear need to do this. 94 | 95 | ### Pull Results - Reference Values 96 | 97 | For reference values a map of form `{:db/id id}` is returned rather than just `id`. This decision was guided by the following considerations: 98 | * Visually it offers more clarity (easy to spot that it's a reference) 99 | * Using `{:db/id id}` offers consistency between component and non-component reference attributes 100 | * The performance hit is negligible: in one test wrapping 10k ids took 4 msecs, 100K ids 29 msecs 101 | 102 | ### Combining wildcard with join attribute specs in pull pattern 103 | 104 | When combining wildcard with join attribute specs in a pull pattern, an issue arises: Should the wildcard attribute spec overwrite any result that's accumulated so far? 105 | 106 | For example if we have the pattern `[{:person/friends 6} '*]`, we begin with the (recursive) join `{:person/friends 6}`, but then comes the wildcard `'*`. Should we overwrite the value in the result under `:person/friends` with whatever the wildcard returns? 107 | 108 | It was decided that the wildcard attribute spec should not overwrite any previous join attribute specs as that defeats the point of specifying any join attribute specs in the pattern to the left of the wildcard pattern. 109 | 110 | ## Checking Entity IDs in Assertions 111 | 112 | Entity ids in assertions can be checked for their current (and possibly historical) presence in the database, but should they be? This is interrelated with the decision not to keep `{:db/id id}` in EAV index when all attributes of an entity are removed. 113 | 114 | * Performance penalty for this check would be negligible (based on tests). 115 | * Checking only makes sense if `[id {:db/id id}]` entries are kept for historical entities. 116 | * Otherwise, once all attributes are removed from an entity, transactions referencing that entity would fail. 117 | * Could also check `(< id (:db/next-id db))` to check the historical existence of an entity id, but this relies on entity ids being sequential integers and would no longer work if entity ids are switched to UUIDs or NanoIDs or if the integers are no longer sequential. 118 | * Checking could prevent non-existent integer ids being specified in transaction data. 119 | * Does not apply to keyword entity ids because the correct behavior with keyword entity ids is to add them if they don't exist! 120 | * If a new integer id is used in an assertion, it will create a new entity without updating `:db/next-id` 121 | * Eventually `:db/next-id` will "catch-up" and the entities will be merged 122 | * The cost of this mistaken use of new integer id would be borne by the user. 123 | * This mistaken use not likely 124 | * This mistaken use is of lower likelihood if integer entity ids are switched to NanoIDs or UUIDs, but still possible in theory. 125 | 126 | It was decided not to check entity ids in assertions. Neither for their current nor historical presence. 127 | 128 | ## One Parent Per Component Constraint 129 | 130 | It was decided to enforce one parent per component constraint, despite Datomic not enforcing it. 131 | 132 | * Semantically component entities can only have one parent, though Datomic does not enforce this constraint. 133 | * This is somewhat perplexing, but there is likely a good reason for this - possibly performance or complexity 134 | * Neither performance, not complexity are obstacles in EntityGraph 135 | * While in Datomic it is possible to end up with multiple parents for the same component entity, starting with the component entity id and navigating backwards to parent via reverse component attribute seems to only return the latest asserted parent. 136 | * The performance cost for enforcing this in EntityGraph is as follows: 137 | * For each assertion of a component attribute-value, a lookup of the value in AVE index under each component attribute in the schema 138 | * If there are no component attributes in the schema there is no performance cost 139 | 140 | The following discussions informed this decision: 141 | https://datomic.narkive.com/1HfrgEI5/cardinality-many-iscomponent-and-reverse-relationships 142 | https://groups.google.com/g/datomic/c/wqMWGY39EGk/m/4DYHMYNUdXQJ 143 | https://groups.google.com/g/datomic/c/wY7Hq2KwB2E/m/qpqRUXEeRiEJ 144 | 145 | ## Transaction Functions 146 | 147 | Transaction functions don't make sense in EntityGraph as it is an in-memory database and Clojure's concurrency facilities can be used. 148 | 149 | ## Writing to Storage 150 | 151 | Currently writing the database to storage is not supported. The database was designed to fully reside in-memory. If in future writing to storage is to be undertaken, the following considerations must be kept in mind: 152 | 153 | * All values in the database must be serializable 154 | * When writing a sorted set or sorted map to disk, must ensure it is read back as a sorted data structure. 155 | * In particular, sorted sets can be used as values in EAV index for cardinality many attributes 156 | * In particular, sorted maps can be used in AVE index 157 | * Any other sorted collection values 158 | * See Saving+reading sorted maps to a file in Clojure: https://stackoverflow.com/questions/17347836/savingreading-sorted-maps-to-a-file-in-clojure 159 | * The schema must also be written to disk. 160 | * Note that currently, once created, the schema cannot be modified as it is intended to last for the duration the program 161 | * May need to consider carefully schema modification of storing db to disk us undertaken 162 | * Some schema changes would be more easily accommodated than others 163 | * The straightforward option is to delete and rewrite the database to disk after each schema modification 164 | 165 | ## Would creating an assertion set offer benefits? 166 | 167 | When preparing tx-data for transaction `retraction-set` is created to avoid asserting and retracting same `[e a v]`. This is checked in `check-db-constraints-[one/many]`. 168 | 169 | One then wonders if creating `assertion-set` might be profitable, but it doesn't offer much gain. Relying on a set of `[e a v]` tuples and processing assertions one tuple at a time negates the benefit of directly `assoc`ing map form tx-data to EAV index (instead of processing `[e a v]` tuples one by one). 170 | 171 | ## Enable Independent Processing of Assertions And Retractions 172 | 173 | Currently, in `transact`, processing of retractions must come before processing of assertions because the code that checks for constraint violation relies on this order of operations. Specifically uniqueness constraint checks and one parent per component checks. 174 | 175 | It would be possible to instead rely on `retraction-set` and `entity-retraction-ids` to check these same constraints. This would enable independent assertions and retractions, thus making them parallelize. However, the primary target for EntityGraph is ClojureScript (web apps) and leveraging Web Workers may not offer sufficient performance benefit, especially considering that performance does not appear to be an issue so far. 176 | 177 | ## Post Transaction Checks 178 | 179 | There's a tradeoff between ensuring the database is always in a valid/consistent state and the corresponding performance penalty. It is particularly undesirable to burden correct programs with the performance cost of checks. Also, what is and isn't a valid state requires careful consideration. 180 | 181 | Some invalid database states are more problematic than others. Some constraints (such as prohibition against nil values) are particularly problematic. Those constraints are enforced. 182 | 183 | Other invalid states are less problematic. Dangling references might be an example. In those instances the constraint is not enforced unless it's particularly simple to implement. 184 | 185 | Finally, there is the option of users checking whatever constraints they want to enforce after calling `transact`. But this is liable to be costly. 186 | 187 | ## Sorted Set Values 188 | 189 | Including the option of sorted sets for cardinality many attributes was carefully considered. The decision to include this feature was made after determining that **there is no performance penalty if this feature is not used**. In other words, if the user doesn't declare any attributes with a `:db/sort` property, there is no performance cost at all. 190 | 191 | The following performance tests confirm this: 192 | * Map form assertion of 10 entities and 20k entities 193 | * Map form assertion overwriting 10 entities and 20k entities 194 | * List form assertions of 10 entities and 20k entities 195 | * List form assertions overwriting 10 entities and 20k entities 196 | * Retraction of 10 entities and 20k entities 197 | 198 | Here are the benchmark numbers in milliseconds first vector with, second vector without support for sorted sets: 199 | * `[4420 5334 3941 4056 7861 10935 4159 4483 6293 11934]` 200 | * `[4387 5659 3938 4131 7791 10496 4139 4444 6269 12123]` 201 | 202 | It's apparent at a glance there is performance difference to speak of. -------------------------------------------------------------------------------- /drafts/entity_graph/core_test_generative.cljc: -------------------------------------------------------------------------------- 1 | (ns entity-graph.core-test-generative 2 | (:require 3 | #?(:clj [clojure.pprint :refer [pprint]] 4 | :cljs [cljs.pprint :refer [pprint]]) 5 | [clojure.set :refer [intersection difference union rename-keys subset?]] 6 | #?(:clj [clojure.test :as t :refer [is are deftest testing]] 7 | :cljs [cljs.test :as t :refer-macros [is are deftest testing]]) 8 | [entity-graph.core :refer [create-db transact cardinality-many? unique? ref-type? index? 9 | pull pull-many get-ids get-eav-tuples] :as eg] 10 | ;; Spec 11 | #?(:clj [clojure.spec.alpha :as s] 12 | :cljs [cljs.spec.alpha :as s]) 13 | ;; need to require clojure.test.check.generators for cljs generators work 14 | ;; https://clojure.atlassian.net/browse/CLJS-1792 15 | ;;https://stackoverflow.com/questions/57877004/how-to-fix-clojure-test-check-generators-never-required-when-exercising-a-func 16 | #?(:cljs [clojure.test.check.generators]) 17 | #?(:clj [clojure.spec.gen.alpha :as gen] 18 | :cljs [cljs.spec.gen.alpha :as gen])) 19 | #?(:cljs (:require-macros [entity-graph.macros :refer [assert-fail? assert-fail-with-msg?]]) 20 | :clj (:require [entity-graph.macros :refer [assert-fail? assert-fail-with-msg?]]))) 21 | 22 | ;; ========= 23 | ;; Schema Specs 24 | 25 | (s/def ::person-first-name #{"John" "Mary" "Sam" "Jen"}) 26 | 27 | (s/def ::person-last-name #{"Smith" "Brown" "Doe" "Black"}) 28 | 29 | (def full-name-gen 30 | (gen/fmap 31 | (fn [[fn ln]] 32 | (str fn " " ln )) 33 | (gen/tuple 34 | (s/gen ::person-first-name) 35 | (s/gen ::person-last-name)))) 36 | 37 | (comment (gen/sample full-name-gen 5)) 38 | 39 | (s/def ::person-name (s/with-gen string? (fn [] full-name-gen))) 40 | 41 | (s/def ::person-aliases (s/* string?)) 42 | 43 | (def non-empty-string-alphanumeric 44 | "Generator for non-empty alphanumeric strings" 45 | (gen/such-that #(not= "" %) (gen/string-alphanumeric))) 46 | 47 | ;; TODO: ensure emails are unique 48 | (def email-gen 49 | "Generator for email addresses" 50 | (gen/fmap 51 | (fn [[name host ]] 52 | (str name "@" host ".com")) 53 | (gen/tuple 54 | non-empty-string-alphanumeric 55 | non-empty-string-alphanumeric))) 56 | 57 | (s/def ::person-email (s/with-gen string? (fn [] email-gen))) 58 | 59 | (s/def ::person-city #{"New York" "Moscow" "London" "Paris" "Munich" "Berlin" "San Francisco" "Houston"}) 60 | 61 | (s/def ::person-past-cities (s/* ::person-city)) 62 | 63 | (s/def ::person-salary (s/int-in 30000 300000)) 64 | 65 | (s/def ::person-past-salaries (s/* ::person-salary)) 66 | 67 | (s/def ::person 68 | (s/keys :req-un [::person-name ::person-email] 69 | :opt-un [::person-aliases ::person-city ::person-past-cities ::person-salary ::person-past-salaries])) 70 | 71 | (comment 72 | (gen/generate (s/gen ::person))) 73 | 74 | (s/def ::drivers-license-number 75 | ;uuid? 76 | (s/with-gen string? (fn [] non-empty-string-alphanumeric)) 77 | ) 78 | 79 | (s/def ::drivers-license-state #{"NY" "NJ" "TX" "AR"}) 80 | 81 | (s/def ::drivers-license 82 | (s/keys :req-un [::drivers-license-number ::drivers-license-state])) 83 | 84 | (comment 85 | (gen/generate (s/gen ::drivers-license))) 86 | 87 | (defn to-db-attr 88 | [person] 89 | (rename-keys person {:person-name :person/name 90 | :person-email :person/email 91 | :person-aliases :person/aliases 92 | :person-city :person/city 93 | :person-past-cities :person/past-cities 94 | :person-salary :person/salary 95 | :person-past-salaries :person/past-salaries 96 | :drivers-license-number :drivers-license/number 97 | :drivers-license-state :drivers-license/state})) 98 | 99 | (def drivers-licenses (->> (gen/sample (s/gen ::drivers-license) 5) (map to-db-attr))) 100 | 101 | (def persons (->> (gen/sample (s/gen ::person) 10) (map to-db-attr))) 102 | 103 | ;; TODO: can be redone with nested maps and with :db/unique email 104 | ;; TODO: ensure drivers license numbers are unique -> maybe this is why datomic wants them to have unique identifiers? 105 | ;; TODO: purposely test duplicate unique identifiers 106 | (defn mk-drivers-licenses-tx-data 107 | [persons] 108 | (reduce (fn [tx-data [{:keys [drivers-license/number] :as dl} {:keys [db/id person/email] :as person}]] 109 | ;; using dl num as tempid, could also use email 110 | (-> tx-data 111 | (conj (assoc dl :db/id number)) 112 | (conj {:db/id id :person/drivers-license number}))) 113 | [] (map vector drivers-licenses persons))) 114 | 115 | (defn mk-friends-tx-data 116 | "Makes tx-data for creating friend relationships between persons." 117 | [persons] 118 | (reduce (fn [tx-data {:keys [db/id] :as person}] 119 | (let [friends (random-sample 0.3 (keys (dissoc persons id)))] 120 | (conj tx-data {:db/id id :person/friends friends}))) 121 | [] (vals persons))) 122 | 123 | (defn mk-best-friend-tx-data 124 | [persons] 125 | (reduce 126 | (fn [tx-data id] 127 | (let [bestie-id (-> (dissoc persons id) vals rand-nth :db/id)] 128 | (conj tx-data {:db/id id :person/best-friend bestie-id}))) 129 | [] (random-sample 0.6 (keys persons)))) 130 | 131 | ;; ========= 132 | ;; Map form / List form 133 | 134 | ;; if there are nil values for retractions, can produce either [:db/retract e a] or [:db/retract e a nil] 135 | ;; currently [:db/retract e a nil] 136 | (defn map->list-form1 137 | "Converts `tx-form` from map form to list form. Returns a vector of list forms." 138 | [schema {:keys [db/id db/op] :as tx-form}] 139 | (let [op (or op :db/add)] 140 | (reduce-kv 141 | (fn [tx-data a v] 142 | (if (cardinality-many? schema a) 143 | (apply conj tx-data (map #(vector op id a %) v)) 144 | (conj tx-data [op id a v]))) 145 | [] (dissoc tx-form :db/id :db/op)))) 146 | 147 | ;; only used for testing 148 | (defn map->list-form 149 | [schema tx-data] 150 | (mapcat #(map->list-form1 schema %) tx-data)) 151 | 152 | ;; ========= 153 | ;; Generative tests 154 | 155 | ;; ========= 156 | ;; AVE net ops 157 | 158 | (defn ve-tuple [[op e a v]] [v e]) 159 | (defn av-tuple [[op e a v]] [a v]) 160 | (defn eav-tuple [[op e a v]] [e a v]) 161 | 162 | ;; remember `tx-data` is already grouped by id and by attr 163 | (defn one-add 164 | [tx-data tuple-fn] 165 | (let [[result last-added] 166 | (reduce (fn [[result last-tuple] [op e a v :as tx-form]] 167 | (let [tuple (tuple-fn tx-form)] 168 | (case op 169 | ;; replace previous assertion 170 | :db/add [(-> result (disj last-tuple) (conj tuple)) tuple] 171 | :db/retract [(disj result tuple) last-tuple]))) 172 | [#{} :last-tuple] tx-data)] 173 | result)) 174 | 175 | (defn many-add 176 | [tx-data tuple-fn] 177 | (reduce (fn [r [op e a v :as tx-form]] 178 | (let [tuple (tuple-fn tx-form)] 179 | (case op 180 | :db/add (conj r tuple) 181 | :db/retract (disj r tuple)))) 182 | #{} tx-data)) 183 | 184 | ;ave 185 | (defn net-additions 186 | [schema [id tx-data] tuple-fn] 187 | (let [by-attr (group-by (fn [[op e a v]] a) tx-data)] 188 | (reduce-kv (fn [r attr data] 189 | (if (cardinality-many? schema attr) 190 | (update r attr #(reduce conj % (many-add data tuple-fn))) 191 | (update r attr #(reduce conj % (one-add data tuple-fn))))) 192 | {} by-attr))) 193 | 194 | (defn one-retract 195 | [tx-data tuple-fn] 196 | (reduce (fn [r [op e a v :as tx-form]] 197 | (let [tuple (tuple-fn tx-form)] 198 | (case op 199 | :db/add (disj r tuple) 200 | :db/retract (conj r tuple)))) 201 | #{} tx-data)) 202 | 203 | (defn many-retract 204 | [tx-data tuple-fn] 205 | (reduce (fn [r [op e a v :as tx-form]] 206 | (let [tuple (tuple-fn tx-form)] 207 | (case op 208 | :db/add (disj r tuple) 209 | :db/retract (conj r tuple)))) 210 | #{} tx-data)) 211 | 212 | (defn net-retractions 213 | [schema [id tx-data] tuple-fn] 214 | (let [by-attr (group-by (fn [[op e a v]] a) tx-data)] 215 | (reduce-kv (fn [r attr tx-data] 216 | (if (cardinality-many? schema attr) 217 | (update r attr #(reduce conj % (many-retract tx-data tuple-fn))) 218 | (update r attr #(reduce conj % (one-retract tx-data tuple-fn))))) 219 | {} by-attr))) 220 | 221 | (defn net-ave-updates 222 | "Returns sets of [v e] pairs that should update AVE index based on `tx-data`. 223 | Additions are under top level :db/add key, retractions under :db/retract key. 224 | The sets of [v e] pairs are further grouped by attribute." 225 | [schema tx-data] 226 | (let [list-form (map->list-form schema tx-data) 227 | by-id (group-by second list-form) 228 | additions (map #(net-additions schema % ve-tuple) by-id) 229 | retractions (map #(net-retractions schema % ve-tuple) by-id)] 230 | {:db/add (apply merge-with concat additions) 231 | :db/retract (apply merge-with concat retractions)})) 232 | 233 | (defn ave-attr->ve-set 234 | "Returns `ave` index under `attr` key represented as a set of [v e] tuples." 235 | [ave schema attr] 236 | (cond 237 | (unique? schema attr) 238 | (set (get ave attr)) 239 | (or (ref-type? schema attr) (index? schema attr)) 240 | (reduce-kv (fn [ve-set v e-set] 241 | (reduce #(conj %1 (vector v %2)) ve-set e-set)) 242 | #{} (get ave attr)) 243 | :default 244 | nil)) 245 | 246 | (defn ave->ave-set 247 | [ave schema] 248 | (reduce (fn [ave-set attr] 249 | (let [ve-set (ave-attr->ve-set ave schema attr) 250 | ave-attr-set (map (fn [[v e]] [attr v e]) ve-set)] 251 | (reduce conj ave-set ave-attr-set))) 252 | #{} (keys ave))) 253 | 254 | (defn ave->eav-set 255 | [ave schema] 256 | (reduce (fn [ave-set attr] 257 | (let [ve-set (ave-attr->ve-set ave schema attr) 258 | eav-attr-set (map (fn [[v e]] [e attr v]) ve-set)] 259 | (reduce conj ave-set eav-attr-set))) 260 | #{} (keys ave))) 261 | 262 | (defn process-ve-map 263 | [ve-map schema attr] 264 | (reduce-kv (fn [r v e] 265 | (cond 266 | (unique? schema attr) 267 | (conj r [e attr v]) 268 | (or (ref-type? schema attr) (index? schema attr)) 269 | (reduce #(conj %1 [%2 attr v]) r e) 270 | :default 271 | r)) 272 | [] ve-map)) 273 | 274 | (defn ave->eav-set2 275 | [ave schema] 276 | (reduce-kv (fn [eav-set attr ve-map] 277 | (reduce conj eav-set (process-ve-map ve-map schema attr))) 278 | #{} ave)) 279 | 280 | (defn ave-updates-attr 281 | "Returns `ave` index under `attr` key represented as a set of [v e] pairs with `ave-updates` applied." 282 | [ave-before schema net-updates attr] 283 | (let [net-add (-> net-updates :db/add attr set) 284 | net-retract (-> net-updates :db/retract attr set) 285 | ve-set (ave-attr->ve-set ave-before schema attr)] 286 | ;; add/retract order doesn't matter 287 | (-> ve-set (union net-add) (difference net-retract)))) 288 | 289 | ;; ========= 290 | ;; EAV net ops 291 | 292 | ;; `last-tuples` keeps track of the last assertion for cardinality/one attrs, so they can be removed when "overwritten" 293 | (defn eav-add1 294 | [[tuple-set last-tuples] schema [op e a v :as tx-form]] 295 | (let [tuple [e a v]] 296 | (if (cardinality-many? schema a) 297 | (case op 298 | :db/add [(conj tuple-set tuple) last-tuples] 299 | :db/retract [(disj tuple-set tuple) last-tuples]) 300 | (case op 301 | ;; replace previous assertion 302 | :db/add [(-> tuple-set (disj (last-tuples [e a])) (conj tuple)) (assoc last-tuples [e a] tuple)] 303 | :db/retract [(disj tuple-set tuple) last-tuples])))) 304 | 305 | (defn eav-retract1 306 | [tuple-set schema [op e a v :as tx-form]] 307 | (let [tuple [e a v]] 308 | (if (cardinality-many? schema a) 309 | (case op 310 | :db/add (disj tuple-set tuple) 311 | :db/retract (conj tuple-set tuple))) 312 | (case op 313 | :db/add (disj tuple-set tuple) 314 | :db/retract (conj tuple-set tuple)))) 315 | 316 | (defn net-additions-eav 317 | [schema tx-data-list] 318 | (let [[additions last-tuples] (reduce #(eav-add1 %1 schema %2) [#{} {}] tx-data-list)] 319 | additions)) 320 | 321 | (defn net-retractions-eav 322 | [schema list-form] 323 | (reduce #(eav-retract1 %1 schema %2) #{} list-form)) 324 | 325 | (defn net-eav-updates 326 | [schema tx-data] 327 | (let [list-form (map->list-form schema tx-data)] 328 | {:db/add (net-additions-eav schema list-form) 329 | :db/retract (net-retractions-eav schema list-form)})) 330 | 331 | (defn eav->eav-set 332 | [eav schema] 333 | "Returns `eav` index represented as a set of [e a v] tuples." 334 | ;; turn into list form removing db/id 335 | (set (map rest (map->list-form schema (vals eav))))) 336 | 337 | (defn expected-eav 338 | [eav-before schema tx-data] 339 | (let [{:keys [db/add db/retract]} (net-eav-updates schema tx-data)] 340 | ;; add/retract order doesn't matter 341 | (-> (eav->eav-set eav-before schema) (union add) (difference retract)))) 342 | 343 | ;; todo: maybe `net-eav-updates` can return a seq instead of set 344 | (defn expected-ave 345 | [ave-before schema tx-data] 346 | (let [{:keys [db/add db/retract]} (net-eav-updates schema tx-data) 347 | index-in-ave? (fn [[e a v]] (or (unique? schema a) (ref-type? schema a) (index? schema a))) 348 | add (set (filter index-in-ave? add)) 349 | retract (set (filter index-in-ave? retract))] 350 | (-> (ave->eav-set ave-before schema) (union add) (difference retract)))) 351 | 352 | ;;;;;;; 353 | 354 | ;; retractions (transactx db (map #(assoc % :db/op :db/retract) persons)) 355 | (deftest test1 356 | ;; Add some persons 357 | (let [{:keys [tx-data db-before db-after tempids] :as r} (transact db-empty persons) 358 | {eav-before :eav ave-before :ave} db-before 359 | {eav-after :eav ave-after :ave} db-after 360 | net-updates-ave (net-ave-updates schema tx-data)] 361 | 362 | (is (inc (:db/tx-count db-before)) (:db/tx-count db-after)) 363 | 364 | ;; EAV/AVE general 365 | (is (= (expected-eav eav-before schema tx-data) (eav->eav-set eav-after schema))) 366 | (is (subset? (ave->eav-set ave-after schema) (eav->eav-set eav-after schema))) 367 | (is (= (expected-ave ave-before schema tx-data) (ave->eav-set ave-after schema))) 368 | (is (= (expected-ave ave-before schema tx-data) (ave->eav-set2 ave-after schema))) 369 | ;; todo generalize to look for all keys in tx-data 370 | (is (= (set (keys ave-after)) #{:person/email :person/salary :person/past-salaries 371 | :person/city :person/past-cities :person/drivers-license 372 | :person/best-friend :person/friends})) 373 | ;; AVE by attribute... 374 | 375 | ;; :person/email - :db.unique/identity 376 | (is (= (ave-updates-attr ave-before schema net-updates-ave :person/email) 377 | (ave-attr->ve-set ave-after schema :person/email))) 378 | 379 | ;; :person/city - :db.index/unsorted, :db.cardinality/one 380 | ;; make sure the non-unique ave indexes are sets 381 | (is (empty? (remove set? (-> ave-after :person/city vals)))) 382 | (is (= (ave-updates-attr ave-before schema net-updates-ave :person/city) 383 | (ave-attr->ve-set ave-after schema :person/city))) 384 | 385 | ;; :person/past-cities - :db.index/unsorted, :db.cardinality/many 386 | (is (= (ave-updates-attr ave-before schema net-updates-ave :person/past-cities) 387 | (ave-attr->ve-set ave-after schema :person/past-cities))) 388 | 389 | ;; :person/salary - :db.index/sorted, :db.cardinality/one 390 | ;; ensure the map is sorted 391 | (is (sorted? (:person/salary ave-after))) 392 | ;; ensure all vals are sets 393 | (is (empty? (remove set? (-> ave-after :person/salary vals)))) 394 | (is (= (ave-updates-attr ave-before schema net-updates-ave :person/salary) 395 | (ave-attr->ve-set ave-after schema :person/salary))) 396 | 397 | ;; :person/past-salaries - :db.index/sorted, :db.cardinality/many 398 | (is (sorted? (:person/past-salaries ave-after))) 399 | (is (= (ave-updates-attr ave-before schema net-updates-ave :person/past-salaries) 400 | (ave-attr->ve-set ave-after schema :person/past-salaries))) 401 | 402 | ;; Add best friend references 403 | (let [ 404 | ;persons (->> (ave-after :person/email) (vals) (select-keys eav-after)) 405 | persons (select-keys eav-after (eg/ids-by-attr-unique ave-after :person/email)) 406 | best-friend-tx (mk-best-friend-tx-data persons) 407 | {:keys [tx-data db-before db-after tempids] :as r} (transact db-after best-friend-tx) 408 | {eav-before :eav ave-before :ave} db-before 409 | {eav-after :eav ave-after :ave} db-after] 410 | (is (= (expected-eav eav-before schema tx-data) (eav->eav-set eav-after schema))) 411 | (is (subset? (ave->eav-set ave-after schema) (eav->eav-set eav-after schema))) 412 | (is (= (expected-ave ave-before schema tx-data) (ave->eav-set ave-after schema))) 413 | (is (= (expected-ave ave-before schema tx-data) (ave->eav-set2 ave-after schema))) 414 | ;; TODO: id is arbitrary 415 | (pprint (eav-after 8)) 416 | (pprint (:person/best-friend ave-after)) 417 | (pprint (pull db-after [:person/best-friend] 8)) 418 | (pprint (pull db-after [:person/_best-friend] 8)) 419 | (pprint (pull db-after [{:person/_best-friend [:person/name]}] 8)) 420 | (pprint tx-data) 421 | 422 | ;; eav check 423 | (is (= (set (pull-many db-after [:db/id :person/best-friend] (map :db/id best-friend-tx))) 424 | (set best-friend-tx))) 425 | 426 | ;; not very generic 427 | #_(is (= (->> (map :db/id best-friend-tx) 428 | (map eav-after) 429 | (map select-keys [:db/id :person/best-friend]) 430 | (set)) 431 | (set best-friend-tx))) 432 | ) 433 | ;; Add friends references 434 | (let [ 435 | persons (select-keys eav-after (vals (ave-after :person/email))) 436 | ;persons (select-keys eav-after (ids-by-attr-ave db-after :person/email)) 437 | friends-tx (mk-friends-tx-data persons) 438 | {:keys [tx-data db-before db-after tempids] :as r} (transact db-after friends-tx) 439 | {eav-before :eav ave-before :ave} db-before 440 | {eav-after :eav ave-after :ave} db-after] 441 | (is (= (expected-eav eav-before schema tx-data) (eav->eav-set eav-after schema))) 442 | (is (subset? (ave->eav-set ave-after schema) (eav->eav-set eav-after schema))) 443 | (is (= (expected-ave ave-before schema tx-data) (ave->eav-set ave-after schema))) 444 | (is (= (expected-ave ave-before schema tx-data) (ave->eav-set2 ave-after schema))) 445 | ;; TODO: testing ranges here 446 | (println ">>>> AVE SALARIES: " (:person/salary ave-after)) 447 | (println ">>> RESULT: " (get-ids db-after :person/salary [[>= 30000] [< 30004]])) 448 | (pprint (eav-after 7)) 449 | (pprint (:person/friends ave-after)) 450 | (pprint (pull db-after [:person/friends] 7)) 451 | (pprint (pull db-after [:person/_friends] 7)) 452 | (pprint (pull db-after [{:person/_friends [:person/name]}] 7)) 453 | (println ">>> NON 1") 454 | (pprint (pull db-after [:person/hui] 7)) 455 | (println ">>> NON 2") 456 | (pprint (pull db-after [:person/name {:person/friends [:person/hui]}] 7)) 457 | (println ">>> NON 3") 458 | (pprint (pull db-after [{:person/_friends [:person/hui]}] 7)) 459 | 460 | ;; "chaining" 461 | (println "111111") 462 | (let [many 463 | (eg/get-ids-multi db-after 464 | :person/past-cities #{"Moscow" "Berlin"} 465 | :person/salary #(> % 30000)) 466 | one 467 | (->> (get-ids db-after :person/past-cities #{"Moscow" "Berlin"}) 468 | (get-ids db-after :person/salary #(> % 30000)))] 469 | (is (= many one))) 470 | (pprint 471 | (->> (get-ids db-after :person/past-cities #{"Moscow" "Berlin"}) 472 | 473 | ;; ids of ppl who HAVE friends 474 | ;(get-ids db-after :person/friends true) 475 | ;; ids of ppl who are friends with intersect-ids 476 | ;; todo: works, but loses grouping by 'original' :db/id of person entity 477 | ;; could keep the grouping by doing each 'original' person id separately => use pull-many! 478 | (get-ids db-after :person/friends) 479 | 480 | ;(get-ids db-after :person/salary #(> % 30000)) 481 | (union (get-ids db-after :person/name #{"John"})) 482 | ;(map eav-after) 483 | ;(map #(select-keys % [:db/id :person/name :person/salary :person/friends :person/past-cities])) 484 | ;; alternative to get entityes 485 | (eg/get-entities-eav db-after) 486 | ;; or select specific keys 487 | ;(eg/get-entities-eav db-after [:db/id :person/name :person/salary :person/friends :person/past-cities]) 488 | )) 489 | 490 | ;(println "222222") 491 | (pprint 492 | (map eav-after 493 | (->> (get-ids db-after :person/past-cities #{"Moscow" "Berlin"}) 494 | (get-ids db-after :person/salary #(> % 30000)) 495 | (get-ids db-after :person/friends) 496 | ))) 497 | 498 | #_(do 499 | (println "=== EAV TUPLES ===") 500 | (pprint 501 | (get-eav-tuples db-after :person/past-cities #{"Moscow" "Berlin"})) 502 | (pprint 503 | (get-eav-tuples db-after :person/salary #(> % 30000)))) 504 | 505 | #_(do 506 | (println "=== AVE INVERT ===") 507 | (pprint (eg/invert-ave-a-non-unique (get-in db-after [:ave :person/past-cities])))) 508 | 509 | (is (= (set (pull-many db-after [:db/id :person/friends] (map :db/id friends-tx))) 510 | (set tx-data)))) 511 | 512 | #_(let [persons (map eav-after (eg/ids-by-attr-unique db-after :person/email)) 513 | drivers-licenses-tx (mk-drivers-licenses-tx-data (vals persons)) 514 | ;persons (eav-by-attr db-after :person/email) 515 | ;drivers-licenses-tx (mk-drivers-licenses-tx-data (vals persons)) 516 | {:keys [tx-data db-before db-after tempids] :as r} (transact db-after drivers-licenses-tx) 517 | {eav-before :eav ave-before :ave} db-before 518 | {eav-after :eav ave-after :ave} db-after 519 | ;; TODO: relying on tx-data rather than original tx with tempids 520 | ;; had to remove nils since not every person was "issued" a drivers license 521 | expected-drivers-licenses-freqs (frequencies (remove nil? (map :person/drivers-license tx-data))) 522 | ;; for eav: 523 | persons-tx-data (filter #(contains? % :person/drivers-license) tx-data) 524 | drivers-license-tx-data (filter #(contains? % :drivers-license/number) tx-data)] 525 | (is (= (expected-eav eav-before schema tx-data) (eav->eav-set eav-after schema))) 526 | (is (subset? (ave->eav-set ave-after schema) (eav->eav-set eav-after schema))) 527 | (is (= (expected-ave ave-before schema tx-data) (ave->eav-set ave-after schema))) 528 | (is (= (expected-ave ave-before schema tx-data) (ave->eav-set2 ave-after schema))) 529 | ;; TODO: id is arbitrary 530 | ;(pprint eav) 531 | (pprint (eav-after 14)) 532 | (pprint (:person/drivers-license ave-after)) 533 | (pprint (pull db-after [:person/drivers-license] 7)) 534 | (pprint (pull db-after [:person/_drivers-license] 14)) 535 | (pprint (pull db-after [{:person/_drivers-license [:person/name :db/id]}] 14)) 536 | 537 | ;; eav check 538 | (is (= (set (pull-many db-after 539 | [:db/id :drivers-license/number :drivers-license/state] 540 | (map :db/id drivers-license-tx-data))) 541 | (set drivers-license-tx-data))) 542 | 543 | ;; TODO: returning :db/id for refs breaks this test 544 | (is (= (set (pull-many db-after 545 | [:db/id :person/drivers-license] 546 | (map :db/id persons-tx-data))) 547 | (set persons-tx-data)))))) 548 | 549 | (deftest test-queries 550 | (let [{:keys [tx-data db-before db-after tempids] :as r} (transact db-empty persons) 551 | {eav-before :eav ave-before :ave} db-before 552 | {eav-after :eav ave-after :ave} db-after] 553 | (pprint (:eav db-after)) 554 | (println "=================================") 555 | ;(pprint (eg/get-tuples db-after :person/past-cities #{"Moscow" "Berlin"})) 556 | (pprint (eg/get-ids2 db-after :person/past-cities #{"Moscow" "Berlin"})) 557 | #_(pprint (map eav-after (intersection (get-ids db-after :person/past-cities #{"Moscow" "Berlin"}) 558 | (get-ids db-after :person/salary #(> % 30000)) 559 | ;(ids-by-attr db-after :person/best-friend) 560 | ))) 561 | 562 | #_(pprint (map eav-after (union (get-ids db-after :person/past-cities #{"Moscow" "Berlin"}) 563 | (get-ids db-after :person/salary #(> % 30000))))) 564 | 565 | )) 566 | 567 | ;; ========= 568 | ;; Low Level Specs 569 | 570 | (s/def ::kw-id #{:kw-id1 :kw-id2 :kw-id3 :kw-id4}) 571 | 572 | (s/def ::string-id #{"string-id1" "string-id2" "string-id3" "string-id4"}) 573 | 574 | (s/def ::proper-id (s/or :pos-int pos-int? :kw ::kw-id)) 575 | 576 | (s/def ::tempid (s/or :neg-int neg-int? :string ::string-id)) 577 | 578 | (s/def ::attribute #{:person/name :person/aliases}) 579 | 580 | (s/def ::value (s/or :number number? :string string?)) 581 | 582 | (s/def ::lookup-ref (s/cat :attribute ::attribute :value ::value)) 583 | 584 | (s/def ::db-id (s/or :proper-id ::proper-id 585 | :tempid ::tempid 586 | :lookup-ref ::lookup-ref)) 587 | 588 | ;; TODO nested map form 589 | (s/def ::map-form-tx 590 | (s/keys :req [] :opt-un [::db-id])) 591 | 592 | (s/def ::op #{:db/add :db/retract}) 593 | 594 | (s/def ::list-form-tx 595 | (s/or :with-value (s/cat :op ::op :id ::db-id :attribute ::attribute :value ::value) 596 | :without-value (s/cat :op #{:db/retract} :id ::db-id :attribute ::attribute))) 597 | 598 | (s/def ::tx-form (s/or :map-form ::map-form-tx :list-form ::list-form-tx)) 599 | 600 | (s/def ::tx-data (s/* ::tx-form)) 601 | 602 | (comment 603 | (gen/generate (s/gen ::db-id)) 604 | (gen/generate (s/gen ::lookup-ref)) 605 | (gen/generate (s/gen ::map-form-tx)) 606 | (gen/generate (s/gen ::list-form-tx)) 607 | (gen/generate (s/gen ::tx-form)) 608 | (gen/generate (s/gen ::tx-data)) 609 | ) 610 | 611 | ;; ========= 612 | ;; Create DB Test 613 | 614 | (deftest test-create-db 615 | (create-db test-schema)) 616 | 617 | ;; ========= 618 | ;; Transact Test 619 | 620 | ;; To Test 621 | ;; add/retract 622 | ;; different :db/id forms: proper-id, :db/id not specified, tempid, lookup-ref 623 | ;; mix of map-forms and list-forms 624 | 625 | ;; ensure there are no uniqueness violations after txs 626 | ;; ensure indexes are properly updated 627 | ;; ensure assertions are fired when there are violations (e.g. "invalid op" 628 | ;; ensure tempids resolve properly and entities with same tempid in tx have the same db/id 629 | ;; ensure blank :db/id in tx get a db/id 630 | 631 | ;; nested maps 632 | 633 | (deftest test-transactx 634 | (let [tx-d [{:person/name "Ivan"}] 635 | {:keys [tx-data db-before db-after tempids] :as r} (transact db-empty tx-d)] 636 | (is (= (set (keys r)) 637 | #{:db-before :db-after :tx-data :tempids})) 638 | (is (contains? (first tx-data) :db/id)))) 639 | 640 | ;; (transactx db [{:db/id -1 :person/name "Ivan"} {:db/id -1 :person/name "Vasil"}]) 641 | (deftest test-replacing-in-ave 642 | ;; add initial value 643 | (let [tx-d [{:person/name "Ivan"}] 644 | {:keys [tx-data db-before db-after tempids] :as r} (transact db-empty tx-d) 645 | id (-> tx-data first :db/id)] 646 | (is (= (-> db-after :ave :person/name (get "Ivan")) #{id})) 647 | ;; replace value 648 | (let [tx-d [{:db/id id :person/name "Vasil"}] 649 | {:keys [tx-data db-before db-after tempids] :as r} (transact db-after tx-d)] 650 | (is (= 1 (count (-> db-after :ave :person/name)))) 651 | (is (= (-> db-after :ave :person/name (get "Vasil")) #{id}))) 652 | ;; replace value list 653 | (let [tx-d [[:db/add id :person/name "Vasil"]] 654 | {:keys [tx-data db-before db-after tempids] :as r} (transact db-after tx-d)] 655 | (is (= 1 (count (-> db-after :ave :person/name)))) 656 | (is (= (-> db-after :ave :person/name (get "Vasil")) #{id})))) 657 | 658 | ;; SAME for UNIQUE 659 | ;; add initial value 660 | (let [tx-d [{:person/email "a@a.com"}] 661 | {:keys [tx-data db-before db-after tempids] :as r} (transact db-empty tx-d) 662 | id (-> tx-data first :db/id)] 663 | (is (= (-> db-after :ave :person/email (get "a@a.com")) id)) 664 | ;; replace value 665 | (let [tx-d [{:db/id id :person/email "b@b.com"}] 666 | {:keys [tx-data db-before db-after tempids] :as r} (transact db-after tx-d)] 667 | (is (= 1 (count (-> db-after :ave :person/email)))) 668 | (is (= (-> db-after :ave :person/email (get "b@b.com")) id))) 669 | ;; replace value list 670 | (let [tx-d [[:db/add id :person/email "b@b.com"]] 671 | {:keys [tx-data db-before db-after tempids] :as r} (transact db-after tx-d)] 672 | (is (= 1 (count (-> db-after :ave :person/email)))) 673 | (is (= (-> db-after :ave :person/email (get "b@b.com")) id))))) 674 | 675 | (deftest test-retract-list 676 | (let [tx-d [{:person/name "Ivan" :person/aliases #{"Goga" "Gosha"}}] 677 | {:keys [tx-data db-before db-after tempids] :as r} (transact db-empty tx-d) 678 | id (-> tx-data first :db/id)] 679 | (transact db-after [[:db/retract id :person/aliases ["Goga" "Gosha"]]]) 680 | (is (= (set (keys r)) 681 | #{:db-before :db-after :tx-data :tempids})) 682 | (is (contains? (first tx-data) :db/id)))) 683 | 684 | #_(transact db-empty [{:db/id "ivan" :person/name "ivan"} 685 | {:person/name "vasil" :person/friends ["ivan"]}]) 686 | 687 | ;; Test lookup ref in value position for :db.type/ref attribute 688 | #_(let [{:keys [tx-data db-before db-after tempids]} (transact db-empty [{:db/id "ivan" :person/name "ivan" 689 | :person/email "a@a.com"}])] 690 | (transact db-after [{:person/name "vasil" :person/friends [[:person/email "a@a.com"]]}])) 691 | -------------------------------------------------------------------------------- /drafts/entity_graph/macros.clj: -------------------------------------------------------------------------------- 1 | (ns entity-graph.macros) 2 | 3 | (defmacro assert-fail? [form] 4 | (if (:ns &env) ;; this only exists when expanding CLJS code 5 | (list 'is (list 'thrown? 'js/Error. form)) 6 | (list 'is (list 'thrown? 'java.lang.AssertionError form)))) 7 | 8 | (defmacro assert-fail-with-msg? [re form] 9 | (if (:ns &env) ;; this only exists when expanding CLJS code 10 | (list 'is (list 'thrown-with-msg? 'js/Error. re form)) 11 | (list 'is (list 'thrown-with-msg? 'java.lang.AssertionError re form)))) 12 | -------------------------------------------------------------------------------- /drafts/entity_graph/query.cljc: -------------------------------------------------------------------------------- 1 | (ns entity-graph.query 2 | (:require 3 | [clojure.data.avl :as avl] 4 | [clojure.set :refer [intersection difference union]] 5 | [entity-graph.core :refer [pull-many cardinality-many?] :refer :all])) 6 | 7 | ;; How to implement sorted to-many relationships in Datomic? 8 | ;; https://stackoverflow.com/questions/44645938/how-to-implement-sorted-to-many-relationships-in-datomic 9 | ;; https://stackoverflow.com/questions/33682064/properties-on-datomic-ref-relationships/61406767#61406767 10 | 11 | ;; NOTE: Query system can be a separate lib, much like pull... 12 | ;; should we even have :db.index/sorted? the only advantage is i.e. sorted :manufacturer/name 13 | ;; - easy to offer, but even sorting manufacturers in re-frame only happens when data changes 14 | ;; (map eav id-seq) to retain (potentially sorted) order - MUST be seq, or (select-keys eav id-set) to get eav subset 15 | ;; RANGE QUERIES 16 | ;; avl trees: https://github.com/clojure/data.avl 17 | ;; hash-map: linear filter of v's, linear select from eav, results unsorted 18 | ;; sorted-map: linear filter of v's (can't do log(n) because subvec and nth are O(n)), linear select from eav, results sorted 19 | ;; avl/sorted-map: logarithmic filter of v's, linear select from eav, results sorted 20 | ;; covering ave index would eliminate linear selects from eav 21 | ;; but for unions/intersections would have to operate on entities or would need to (map :db/id) 22 | 23 | ;; todo: sort order lost on set union/intersection 24 | ;; -> start with [> 35000] (desired sorting seq), `intersection-seq` with #{"Berlin"} set 25 | ;; ... but it may be faster to start with #{"Berlin"}... 26 | ;; -> only makes sense when desired sorting is also optimal way to start query 27 | ;; datomic: tuple for each cardinality/many [> 35000], we just identify id, trim later if needed 28 | ;; keeping sorted entity order during query doesn't really help on cardinality/many attrs -> may be duplicate ids in seq... 29 | ;; only helps to start with sorted cardinality/one, then use `intersection-seq` for additional constraints 30 | ;; then can sort-entities or sort-tuples (after pulling and trimming, if desired) 31 | 32 | ;; linear in size of s1-seq 33 | ;; force s1 to be the sequential or permit either s1 or s2 to be the sequential? 34 | (defn intersection-seq 35 | "Returns a seq that is the intersection of `s1-seq` and `s2-set`. Preserves order of `s1-seq`." 36 | [s1-seq s2-set] 37 | (reduce (fn [result seq-item] 38 | (if (contains? s2-set seq-item) 39 | (conj result seq-item) 40 | result)) 41 | [] s1-seq)) 42 | 43 | ;; tuples can be sorted in any way desired, including for individual cardinality/many attr/vals 44 | ;; can start with trimmed entity, then convert to tuple or can convert pulled-entity to tuples and filter tuples 45 | ;; todo: support :db/id (nested entities can have db/id? - PULL) 46 | (defn pulled-entity->tuple 47 | "Returns a tuple consisting of `paths` into `pulled-entity`. 48 | `paths` may contain underscored keywords for reverse navigation." 49 | [schema paths pulled-entity] 50 | (reduce (fn [result path] 51 | (let [attr (last path) 52 | v (if (keyword? path) 53 | (get pulled-entity path) 54 | (get-in pulled-entity path))] 55 | ;; todo: should return multiple tuples for cardinality/many 56 | ;; should work for reverse attr 57 | (if (cardinality-many? schema attr) 58 | (reduce conj result v) 59 | (conj result v)))) 60 | [] paths)) 61 | 62 | ;; use built-in sort to sort tuples 63 | (defn nested-entities->tuples 64 | [schema entities paths] 65 | ;; map or reduce into single coll? 66 | (map #(pulled-entity->tuple schema paths %) entities)) 67 | 68 | (defn attr-comparator-seq->comparator 69 | "Returns a single comparator fn based on a sequence of attribute comparator pairs." 70 | [attr-comparator-seq] 71 | (let [pairs (partition 2 attr-comparator-seq)] 72 | (fn [entity1 entity2] 73 | (loop [[[attr f] pairs] pairs] 74 | (let [r (f (entity1 attr) (entity2 attr))] 75 | (if (and (zero? r) pairs) 76 | (recur pairs) 77 | r)))))) 78 | 79 | ;; sorting entities on cardinality/many attrs doesn't make sense... need to reduce to single val (min, max, etc) 80 | ;; attr-comparator-seq example: 81 | ;; [:person/salary > :person/city < :person/past-salaries (fn [v-set1 v-set2] (> (max v-set1) (max v-set2)))] 82 | (defn sort-entities 83 | [{:keys [db/eav] :as db} ids & attr-comparator-seq] 84 | (let [comparator (attr-comparator-seq->comparator attr-comparator-seq) 85 | entities (map eav ids)] 86 | (sort comparator entities))) 87 | 88 | ;;;;; 89 | 90 | ;; "RAW" preds that don't account for cardinality/many - pred has to account for it 91 | (defn filter-entities 92 | [{:keys [db/eav] :as db} pred] 93 | (filter pred (vals eav))) 94 | 95 | (defn filter-eav 96 | [{:keys [db/eav db/schema] :as db} pred] 97 | (reduce (fn [result [id entity]] 98 | (if (pred entity) 99 | (assoc result id entity) 100 | result)) 101 | {} eav)) 102 | 103 | ;; ref values are possible, but it's questionable to apply preds to them 104 | ;; ref values for reverse attrs also possible 105 | (defn trim-entity 106 | "Returns `entity` with only the `attr` values that match `pred`." 107 | [schema attr pred entity] 108 | (if (cardinality-many? schema attr) 109 | (let [val-set (filter pred (entity attr))] 110 | ;; empty val-set when (entity attr) is nil or when nothing matched pred 111 | (if (empty? val-set) 112 | (dissoc entity attr) 113 | (assoc entity attr val-set))) 114 | ;; if previously applied pred during `get-ids`, don't need to apply again 115 | ;; unless different pred for trimming... 116 | (if (pred (entity attr)) 117 | entity 118 | (dissoc entity attr)))) 119 | 120 | ;; supports reverse paths with no extra effort 121 | ;; `entity` is a pulled-entity 122 | (defn trim-path 123 | "Returns `entity` with only the `path` values that match `pred`." 124 | [schema [first-path rest-path :as path] pred entity] 125 | (if rest-path 126 | (let [nested-entity (entity first-path)] 127 | (if (map? nested-entity) 128 | (let [nested-entity (trim-path schema rest-path pred nested-entity)] 129 | (if (empty? nested-entity) 130 | (dissoc entity first-path) 131 | (assoc entity first-path nested-entity))) 132 | (let [nested-entities (map #(trim-path schema rest-path pred %) nested-entity)] 133 | (if (empty? nested-entities) 134 | (dissoc entity first-path) 135 | (assoc entity first-path nested-entities))))) 136 | ;; no rest-path, assume first-path is the attr 137 | (trim-entity schema first-path pred entity))) 138 | 139 | ;; possible: {:some-attr 'some-pred [:some-attr] 'some-pred} 140 | ;; supply [path pred] pairs as map to avoid traversing same path more than once 141 | ;; able to handle multiple preds for same path -> not reduce-kv 142 | ;; support for vector form range preds? to make reusing `attr-pred-pairs` more convenient? 143 | (defn trim 144 | [schema path-pred-pairs entity] 145 | (reduce (fn [entity [path pred]] 146 | (if (keyword? path) 147 | (trim-entity schema path pred entity) 148 | (if (= 1 (count path)) 149 | (trim-entity schema (first path) pred entity) 150 | (trim-path schema path pred entity)))) 151 | entity path-pred-pairs)) 152 | 153 | ;;;; TRUE PRED 154 | 155 | ;; same code in ve-map->id-set 156 | ;; returns set 157 | (defn ids-by-attr-unique 158 | [ave attr] 159 | ;; do we have to call set? guaranteed to be unique... 160 | (set (vals (get ave attr)))) 161 | 162 | ;; returns set 163 | ;; might use concat for returning seq: ~2x faster 164 | (defn ids-by-attr-non-unique 165 | [ave attr] 166 | (reduce union (vals (get ave attr)))) 167 | 168 | ;; returns set 169 | (defn ids-by-attr-eav 170 | [eav attr] 171 | (->> (vals eav) (filter #(contains? % attr)) (map :db/id) (set))) 172 | 173 | ;;; SET PRED 174 | 175 | ;; linear time in the size of pred 176 | ;; returns set 177 | (defn filter-ave-unique-set 178 | [ave attr pred] 179 | (reduce (fn [ret v] 180 | (if-let [e (get-in ave [attr v])] 181 | (conj ret e) 182 | ret)) 183 | #{} pred)) 184 | 185 | ;; returns set 186 | (defn filter-ave-non-unique-set 187 | [ave attr pred] 188 | (reduce (fn [ret v] 189 | (if-let [e-set (get-in ave [attr v])] 190 | (apply conj ret e-set) 191 | ret)) 192 | #{} pred)) 193 | 194 | ;; unused 195 | (defn eav-pred 196 | [{:keys [db/schema] :as db} attr pred entity] 197 | (let [v (entity attr)] 198 | (if (cardinality-many? schema attr) 199 | (first (drop-while #(or (nil? %) (false? %)) (map pred v))) 200 | (pred db v)))) 201 | 202 | (defn entity-attr-pred 203 | "Applies pred to `entity` value under `attr`. Works for :db.cardinality/one attrs or :db.cardinality/many attrs. 204 | For :db.cardinality/many returns first truthy value or nil if none are truthy." 205 | [schema attr pred entity] 206 | ;; don't use nil? pred since nil is not a valid db value 207 | (when-let [v (get entity attr)] 208 | (if (cardinality-many? schema attr) 209 | (some pred v) 210 | ;(first (drop-while #(or (nil? %) (false? %)) (map pred v))) 211 | (pred v)))) 212 | 213 | ;; linear time in size of eav 214 | ;; returns seq/set 215 | (defn filter-eav-attr-pred 216 | ([eav schema attr pred] 217 | (->> (vals eav) 218 | (filter #(entity-attr-pred schema attr pred %)) 219 | (map :db/id))) 220 | ;; linear time in size of xids 221 | ([eav schema attr pred xids] 222 | (reduce (fn [ret id] 223 | (if (entity-attr-pred schema attr pred (eav id)) 224 | (conj ret id) 225 | ret)) 226 | #{} xids))) 227 | 228 | ;;; RANGE PRED 229 | 230 | ;; what other ops make sense here? ones that are more efficient with avl-map: 231 | ;; min/max -> also work with sorted-map 232 | ;; median - yes 233 | ;; rank queries, lookups of "nearest entries" = [`nth` 1], but also avl/rank-of [some-val] -> returns value 234 | ;; [:rank> 3] [:percentile 97.55] [:median] [:average] [:nearest 3] 235 | ;; rank queries -> percentile calculations -> rank/total 236 | ;; "nearest entries" 237 | (defn avl-op 238 | [avl-map op val] 239 | (cond 240 | (= op <) (let [[l m r] (avl/split-key val avl-map)] l) 241 | (= op <=) (let [[l m r] (avl/split-key val avl-map)] (if m (apply assoc l m) l)) 242 | (= op >) (let [[l m r] (avl/split-key val avl-map)] r) 243 | (= op >=) (let [[l m r] (avl/split-key val avl-map)] (if m (apply assoc r m) r)))) 244 | 245 | ;; returns ave-a subset 246 | (defn eval-range-pred 247 | [ave attr [a b :as pred]] 248 | (if (vector? a) 249 | (let [[op val] a 250 | r (avl-op (ave attr) op val)] 251 | (if b 252 | (let [[op val] b] 253 | (avl-op r op val)) 254 | r)) 255 | (let [[op val] pred] 256 | (avl-op (ave attr) op val)))) 257 | 258 | ;; TODO: use Logarithmic time slicing for >= < etc!!! 259 | ;; change syntax from [[< 34] [> 8]] to [< 34 > 8] 260 | ;; NOTE: when passing > within vector, it is evaled by clojure 261 | ;; returns ave-a subset just like eval-range-pred 262 | ;; MAYBE should return ids 263 | (defn eval-range-pred2 264 | [ave attr pred] 265 | (apply avl/subrange (ave attr) pred)) 266 | 267 | ;;; GENERIC PRED 268 | 269 | ;; linear time in ave-a size 270 | ;; still better than traversing eav because only looking at entities that contain `attr` 271 | ;; returns seq 272 | (defn filter-ave-unique 273 | [ave attr pred] 274 | (->> (get ave attr) (filter (fn [[v e]] (pred v))) (map second))) 275 | 276 | ;; returns seq 277 | (defn filter-ave-non-unique 278 | [ave attr pred] 279 | (->> (get ave attr) (filter (fn [[v e-set]] (pred v))) (mapcat second))) 280 | 281 | ;;; GET IDS 282 | 283 | ;; always used with OR `ref-type?` => `ave-form-single-e?` `ave-form-eset?` 284 | (defn index? [schema attr] 285 | ((schema :db/index) attr)) 286 | 287 | (defn index-avl-map? [schema attr] 288 | ((schema :db.index/avl-map) attr)) 289 | 290 | (defn get-ids-false-pred 291 | ([{:keys [db/schema db/eav db/ave]} attr] 292 | (cond 293 | ;; linear (keys eav), linear ids-with-attr, linear difference 294 | ;; faster to always filter eav? 295 | (unique? schema attr) 296 | (let [ids-with-attr (ids-by-attr-unique ave attr)] 297 | (difference (set (keys eav)) ids-with-attr)) 298 | (or (index? schema attr) (ref-type? schema attr)) 299 | (let [ids-with-attr (ids-by-attr-non-unique ave attr)] 300 | (difference (set (keys eav)) ids-with-attr)) 301 | :not-in-ave-index 302 | (do 303 | (println "Warning! get-ids-false-pred for attr not in AVE index: " attr) 304 | (->> (vals eav) (remove #(contains? % attr)) (map :db/id) (set))))) 305 | ([{:keys [db/schema db/eav db/ave] :as db} attr xids] 306 | ;; don't bother relying on ave index? 307 | (reduce (fn [ids xid] 308 | (if (contains? (eav xid) attr) 309 | (disj ids xid) 310 | ids)) 311 | xids xids))) 312 | 313 | ;; returns set 314 | (defn get-ids-true-pred 315 | ([{:keys [db/schema db/eav db/ave] :as db} attr] 316 | (cond 317 | (unique? schema attr) 318 | (ids-by-attr-unique ave attr) 319 | (or (index? schema attr) (ref-type? schema attr)) 320 | (ids-by-attr-non-unique ave attr) 321 | :not-in-ave-index 322 | (do 323 | (println "Warning! get-ids-true-pred for attr not in AVE index: " attr) 324 | (ids-by-attr-eav eav attr)))) 325 | ([{:keys [db/schema db/eav db/ave] :as db} attr xids] 326 | ;; `intersection` is linear in size of smaller set 327 | ;; `get-ids-true-pred` is linear in size of ave-a vals concatted 328 | (if (< (count xids) (count (ave attr))) 329 | (reduce (fn [ids xid] 330 | (if (contains? (eav xid) attr) 331 | ids 332 | (disj ids xid))) 333 | xids xids) 334 | (intersection (get-ids-true-pred db attr) xids)))) 335 | 336 | ;; returns set 337 | (defn get-ids-set-pred 338 | ([{:keys [db/schema db/eav db/ave] :as db} attr pred] 339 | (cond 340 | (unique? schema attr) 341 | (filter-ave-unique-set ave attr pred) 342 | (or (index? schema attr) (ref-type? schema attr)) 343 | (filter-ave-non-unique-set ave attr pred) 344 | :not-in-ave-index 345 | (do 346 | (println "Warning! get-ids-set-pred for attr not in AVE index: " attr) 347 | (set (filter-eav-attr-pred eav schema attr pred))))) 348 | ([{:keys [db/schema db/eav db/ave] :as db} attr pred xids] 349 | ;; set lookups are linear in size of pred 350 | ;; but filter-eav does more work per step... or maybe similar -> test 351 | (if (< (count xids) (count pred)) 352 | (filter-eav-attr-pred eav schema attr pred xids) 353 | (intersection (get-ids-set-pred db attr pred) xids)))) 354 | 355 | (defn entity-attr-pred2 356 | "Returns all vals of `attr` that match `pred` or nil." 357 | [schema attr pred entity] 358 | (when-let [v (get entity attr)] 359 | (if (cardinality-many? schema attr) 360 | (seq (filter pred v)) 361 | (when (pred v) v)))) 362 | 363 | (defn get-maps-set-pred 364 | [{:keys [db/schema db/eav db/ave] :as db} attr pred] 365 | (cond 366 | (unique? schema attr) 367 | (reduce (fn [ret v] 368 | (if-let [e (get-in ave [attr v])] 369 | (assoc-in ret [e attr] v) 370 | ret)) 371 | {} pred) 372 | (or (index? schema attr) (ref-type? schema attr)) 373 | (reduce (fn [ret v] 374 | (if-let [e-set (get-in ave [attr v])] 375 | (reduce (fn [ret id] (update-in ret [id attr] conj v)) ret e-set) 376 | ret)) 377 | {} pred) 378 | :not-in-ave-index 379 | (do 380 | (println "Warning! get-maps-set-pred for attr not in AVE index: " attr) 381 | (reduce (fn [result entity] 382 | ;; here we filter the cardinality/many attr ourselves, so might as well save it for the end? 383 | ;; so then we shouldn't use get-maps when no ave index... defeats the purpose? 384 | ;; but then also shouldn't do it for cardinality-one... may lead to waste if discarded 385 | (if-let [v (entity-attr-pred2 schema attr pred entity)] 386 | (assoc-in result [(:db/id entity) attr] v) 387 | result)) 388 | {} (vals eav))))) 389 | 390 | ;; returns seq (to preserve order; may have duplicates) 391 | #_(defn get-ids-set-pred2 392 | [{:keys [db/schema db/eav db/ave] :as db} attr pred] 393 | (let [ave-a (get-ave-a-set-pred db attr pred)] 394 | (cond 395 | (unique? schema attr) 396 | (vals ave-a) 397 | (or (index? schema attr) (ref-type? schema attr)) 398 | (apply concat (vals ave-a)) 399 | :default 400 | (set (filter-eav-attr-pred eav schema attr pred)))) 401 | ) 402 | 403 | (defn log32 [n] 404 | (/ (Math/log n) (Math/log 32))) 405 | 406 | ;; TODO: returning sets means losing order... 407 | ;; when sorted ave-a, can return seq and preserve order - maybe better to return ave-a subset 408 | ;; - duplicate ids possible for cardinality/many, distinct keeps first id (lowest sorted) 409 | ;; 1. pull [val id-set] from ave-a 410 | ;; preserve sorting order when eventual result is desired sorted by same attr in the same order as ave-a (asc/desc) 411 | ;; ... and no additional sorting (like no sorting by city after sorted by salary) 412 | ;; if sorting on attr and sorted ave-a available, can use it as the last pred as a potential optimization 413 | ;; do pred on ave-a, then rm-xids on ave-a => faster than sorting after pulling from eav? yes for larger subsets of ave-a 414 | ;; returns set 415 | (defn get-ids-range-pred 416 | ([{:keys [db/schema db/eav db/ave] :as db} attr pred] 417 | (if (index-avl-map? schema attr) 418 | (let [r (eval-range-pred ave attr pred)] 419 | (if (unique? schema attr) 420 | (set (vals r)) 421 | (apply union (vals r)))) 422 | (println "Warning! get-ids-range-pred for attr not in AVE index or not an AVL index: " attr))) 423 | ([{:keys [db/schema db/eav db/ave] :as db} attr pred xids] 424 | ;; range q is log32(size ave-a) 425 | ;; for few xids might be faster to scan eav index 426 | ;; (if (< (count xids) (log32 (count (ave attr)))))) 427 | ;; also need to convert pred from vector to generic to filter-eav 428 | (intersection (get-ids-range-pred db attr pred) xids))) 429 | 430 | ;; returns set 431 | (defn get-ids-generic-pred 432 | ([{:keys [db/schema db/eav db/ave] :as db} attr pred] 433 | (cond 434 | (unique? schema attr) 435 | (set (filter-ave-unique ave attr pred)) 436 | (or (index? schema attr) (ref-type? schema attr)) 437 | ;; convert seq (with possible duplicate ids) to set 438 | (set (filter-ave-non-unique ave attr pred)) 439 | :not-in-ave-index 440 | (do 441 | (println "Warning! get-ids-generic-pred for attr not in AVE index: " attr) 442 | (set (filter-eav-attr-pred eav schema attr pred))))) 443 | ([{:keys [db/schema db/eav db/ave] :as db} attr pred xids] 444 | (if (contains? ave attr) 445 | (if (< (count xids) (count (ave attr))) 446 | (set (filter-eav-attr-pred eav schema attr pred xids)) 447 | (intersection xids (get-ids-generic-pred db attr pred))) 448 | (filter-eav-attr-pred eav schema attr pred xids)))) 449 | 450 | ;; with reverse attr support 451 | ;; can modify to return all matching vals rather than just yes/no (slower, but returns more info) 452 | (defn some-path 453 | "Returns logical true if some `path` satisfies `pred`, else returns nil. 454 | Assumes everything in `path` is ref attr or reverse ref attr except the final attr in path cannot be a reverse attr." 455 | [{:keys [db/schema db/eav db/ave] :as db} id [attr more-attrs :as path] pred] 456 | (let [entity (eav id)] 457 | (if more-attrs 458 | (if (reverse-reference? attr) 459 | (let [pointing-attr (reverse->attr-name attr)] 460 | (if (unique? schema pointing-attr) 461 | (when-let [pointing-id (get-in ave [pointing-attr id])] 462 | (some-path db pointing-id more-attrs pred)) 463 | (when-let [pointing-ids (get-in ave [pointing-attr id])] 464 | (some #(some-path db % more-attrs pred) pointing-ids)))) 465 | ;; forward attribute 466 | (if (cardinality-many? schema attr) 467 | (when-let [next-ids (entity attr)] 468 | (some #(some-path db % more-attrs pred) next-ids)) 469 | (when-let [next-id (entity attr)] 470 | (some-path db next-id more-attrs pred)))) 471 | (when-let [v (entity attr)] 472 | (if (cardinality-many? schema attr) 473 | (some pred v) 474 | (pred v)))))) 475 | 476 | (declare ve-map->id-set) 477 | 478 | #_(let [pids (get-ids db :person/city #{"Moscow"}) 479 | pids (get-ids db [:person/license :dl/year] #(>= % 2020))]) 480 | ;; ensure everything along path is a ref (except last attr)? No, just assume 481 | ;; apply pred to ref attr? questionable, but is there a downside? 482 | ;; non-generic preds don't make sense (except sets); ranges? nope -> would have to start with range query and link back 483 | ;; and what about applying the pred to all cardinality/many links at once? 484 | ;; -> maybe an extra kw arg to signal to some-path to apply pred to entire set? 485 | ;; what about pred applied to paths? like "get the user with best-friend with largest salary" 486 | ;; returns set 487 | (defn get-ids-path-attr 488 | ([{:keys [db/schema db/eav db/ave] :as db} [attr more-attrs :as path] pred] 489 | (if-let [ave-attr (ave attr)] 490 | ;; could use `get-ids-true-pred`? -> it may fallback to scanning eav index 491 | (let [ids (ve-map->id-set schema attr ave-attr)] 492 | (get-ids-path-attr db path pred ids)) 493 | (get-ids-path-attr db path pred (keys eav)))) 494 | ([db [attr more-attrs :as path] pred xids] 495 | (reduce (fn [result id] 496 | (if (some-path db id path pred) 497 | (conj result id) 498 | result)) 499 | #{} xids))) 500 | 501 | ;; pred is applied to the set of cardinality/many values (not to individual values) 502 | (defn filter-eav-many 503 | ([eav attr pred] 504 | (->> (vals eav) 505 | (filter (fn [entity] (pred (entity attr)))) 506 | (map :db/id) 507 | (set))) 508 | ([eav attr pred xids] 509 | ;; xids don't need to be sets 510 | (filter-eav-many (select-keys eav xids) attr pred))) 511 | 512 | (defn get-ids-many-pred 513 | "Applies pred to the entire set of values for :db.cardinality/many `attr`." 514 | ([{:keys [db/schema db/eav db/ave] :as db} attr pred] 515 | (if (cardinality-many? schema attr) 516 | (if (contains? ave attr) 517 | (filter-eav-many eav attr pred (ids-by-attr-non-unique ave attr)) 518 | (filter-eav-many eav attr pred)) 519 | (assert false (str "Attribute must be :db.cardinality/many: " attr)))) 520 | ([{:keys [db/schema db/eav db/ave] :as db} attr pred xids] 521 | (if (cardinality-many? schema attr) 522 | (if (< (count xids) (count (ave attr))) 523 | (filter-eav-many eav attr pred xids) 524 | (if (contains? ave attr) 525 | (filter-eav-many eav attr pred (ids-by-attr-non-unique ave attr)) 526 | (filter-eav-many eav attr pred xids))) 527 | (assert false (str "Attribute must be :db.cardinality/many: " attr))))) 528 | 529 | ;;;;;; GET AVE_A 530 | 531 | (defn get-ve-map-true-pred 532 | [{:keys [db/schema db/eav db/ave] :as db} attr] 533 | (ave attr)) 534 | 535 | (defn get-ve-map-set-pred 536 | [{:keys [db/schema db/eav db/ave] :as db} attr pred] 537 | (select-keys (ave attr) pred)) 538 | 539 | (defn get-ve-map-range-pred 540 | [{:keys [db/schema db/eav db/ave] :as db} attr pred] 541 | (eval-range-pred ave attr pred)) 542 | 543 | ;; may be less efficient than get-ids because of (into {}) 544 | (defn get-ve-map-generic-pred 545 | [{:keys [db/schema db/eav db/ave] :as db} attr pred] 546 | ;; what if (ave attr) is nil? 547 | (->> (ave attr) 548 | (filter (fn [[v e]] (pred v))) 549 | (into {}))) 550 | 551 | ;; alternative approach: test performance 552 | (defn get-ve-map-generic-pred2 553 | [{:keys [db/schema db/eav db/ave] :as db} attr pred] 554 | (reduce-kv (fn [r v e] 555 | (if (pred v) r (dissoc r v))) 556 | (ave attr) (ave attr))) 557 | 558 | ;; Aggregates like (max, count, etc) operate on ave-a, so returning just ids precludes aggregate operations 559 | ;; for aggregations ave-a can transform e.g :person/salary 560 | ;; {10000 #{2 3 9}, 20000 #{4 6 7}} => {{:val 10000 :count 3} #{2 3 9}, {:val 20000 :count 2} #{4 6}} 561 | ;; or {10000 {:ids #{2 3 9} :count 3}, 20000 {:ids #{4 6} :count 2}} => or a separate map? 562 | ;; aggregates on: ids (count), values individual (length), values aggregate (sum, average, max) 563 | ;; also supports the case where you want to preserve the sorting ave-a index 564 | ;; TODO: maybe easier to accomplish aggregation by going through EAV index? 565 | (defn get-ve-map 566 | [{:keys [db/schema db/eav db/ave] :as db} attr pred] 567 | (cond 568 | ;; support false for lack of attribute 569 | (true? pred) 570 | (get-ve-map-true-pred db attr) 571 | (set? pred) 572 | ;; set pred can get v's from ave-a directly - faster 573 | (get-ve-map-set-pred db attr pred) 574 | ;; range pred: take advantage of avl index 575 | (vector? pred) 576 | (get-ve-map-range-pred db attr pred) 577 | :default ;; just a regular pred, have to go through all values in ave-a 578 | (get-ve-map-generic-pred db attr pred))) 579 | ;; for optimizations like get-ids would need: 580 | ;; 1. keep track of all ids -> need that for `intersect-ve-map` 581 | ;; TODO: don't know what to keep until you got all the ids... 582 | ;; faster to `rm-xids` from desired ave-a after you have all the ids via `get-ids`? 583 | ;; `get-ids` will rely on eav index when that's faster, so doesn't always rely on ave index... 584 | ;; lose the advantge with e.g. set preds of selecting just the v's that you want... 585 | ;; Q: when do I want ve-map? for aggregates? ever? is eav index based aggregation sufficient? 586 | ;; if you NEED ve-map, then could apply pred first, then do rm-x-ids (kind of duplicating the work for that pred) 587 | 588 | (defn ve-map->id-set 589 | [schema attr ve-map] 590 | (if (unique? schema attr) 591 | (set (vals ve-map)) 592 | (apply union (vals ve-map)))) 593 | 594 | (defn ve-map->id-seq 595 | [schema attr ve-map] 596 | (if (unique? schema attr) 597 | (vals ve-map) 598 | (apply concat (vals ve-map)))) 599 | 600 | ;; removes ids from ve-map that are not in xids 601 | ;; removes v's whose id-set doesn't intersect with any xids 602 | ;; returns ve-map 603 | (defn rm-xids 604 | [schema attr xids ve-map] 605 | (reduce-kv (fn [result v e] 606 | (if (unique? schema attr) 607 | (if (contains? xids e) 608 | (assoc result v e) 609 | result) 610 | (if-let [v-ids (intersection e xids)] 611 | (assoc result v v-ids) 612 | result))) 613 | {} ve-map)) 614 | 615 | ;; returns ave-sub 616 | ;; todo: same attr diff preds 617 | ;; at each step, can get-ve-map based on ids or based on pred 618 | ;; when is it faster based on ids? 619 | (defn get-ve-map-many 620 | [{:keys [db/schema db/eav db/ave] :as db} & attr-pred-pairs] 621 | (let [ave-sub 622 | (reduce (fn [ave-sub [attr pred]] 623 | (assoc ave-sub attr (get-ve-map db attr pred))) 624 | {} (partition 2 attr-pred-pairs)) 625 | id-sets (map (fn [[attr ve-map]] (ve-map->id-set schema attr ve-map)) ave-sub) 626 | ids (apply intersection id-sets)] 627 | (reduce-kv (fn [ave-sub attr ve-map] 628 | (assoc ave-sub attr (rm-xids schema attr ids ve-map))) 629 | {} ave-sub))) 630 | 631 | ;; can also select ids from eav, but can we speed it up here or give matching vals? 632 | ;; matching vals help for aggregating count by val 633 | ;; returns ave-sub with each attr ve-map containing only x-ids 634 | (defn intersect-ave-sub 635 | [schema ave-sub] 636 | (let [id-sets (map (fn [[attr ve-map]] (ve-map->id-set schema attr ve-map)) ave-sub) 637 | x-ids (apply intersection id-sets)] 638 | (reduce-kv (fn [result attr ve-map] 639 | (assoc result attr (rm-xids schema attr x-ids ve-map))) 640 | {} ave-sub))) 641 | 642 | ;; returns a seq of 1 or more (for cardinality/many) [e v] tuples based on [v e] tuple 643 | (defn invert-ave-a-entry 644 | [schema attr [v e]] 645 | (if (unique? schema attr) 646 | [[e v]] 647 | (reduce (fn [r [v single-e]] 648 | (conj r [single-e v])) 649 | [] e))) 650 | 651 | ;; returns map of {id v-set} 652 | (defn invert-ave-a-non-unique 653 | [ave-a] 654 | (reduce (fn [a [k v]] 655 | (assoc a k (conj (get a k #{}) v))) 656 | {} (for [[k s] ave-a v s] [v k]))) 657 | 658 | ;; returns [e a v] tuples in same order as ave-a 659 | (defn ave-a->eav-tuples 660 | [schema attr ave-a] 661 | (if (unique? schema attr) 662 | (reduce (fn [r [v e]] 663 | conj r [e attr v]) 664 | [] ave-a) 665 | (reduce (fn [r [v e-set]] 666 | (reduce (fn [r e] 667 | (conj r [e attr v])) 668 | r e-set)) 669 | [] ave-a))) 670 | 671 | (defn get-eav-tuples 672 | [{:keys [db/schema db/eav db/ave] :as db} attr pred] 673 | (let [ave-a (get-ve-map db attr pred)] 674 | (ave-a->eav-tuples schema attr ave-a))) 675 | 676 | ;; TODO: this is an alternative implementation of get-ids that relies on `get-ave-a` first 677 | ;; is it as efficient to get ave-a first, and then convert to id sets? 678 | ;; might be slower for set preds because constructing intermediate map with select-keys 679 | ;; NOTE: predicates support tuple bindings and collection bindings, but relations bindings need a bit extra work 680 | ;; https://docs.datomic.com/cloud/query/query-data-reference.html#relation-binding 681 | ;; todo: assumes ave exists - what about eav fallback? (if relying on this as first step for get-ids) 682 | (defn get-ids2 683 | [{:keys [db/schema db/eav db/ave] :as db} attr pred] 684 | (let [ave-a (get-ve-map db attr pred)] 685 | (ve-map->id-set schema attr ave-a))) 686 | 687 | ;; OTHER 688 | ;; fns to implement: get-else, get-some 689 | ;; missing? = false pred in get-ids for lack of attr 690 | ;; not clause implemented with arbitrary pred, but maybe a better way? 691 | 692 | ;; TODO: another option is to build eav-sub as we go along 693 | ;; can select ids from eav at each step, but replacing attr with just the matching pred from ve-map 694 | ;; then for next pred, we would dissoc the difference between pred1 ids and pred2 ids from eav-sub 695 | ;; this is potentially a lot of wasted processing, since subsequent preds may shrink ids-set by a lot 696 | ;; todo: should `pull` support preds for cardinalit/many attrs? -> better as separate step 697 | 698 | ;; TUPLES allow to get pred #{Moscow}, :person/name :person/building-number (ref) => pull 699 | 700 | ;; this is supposed to support selecting from eav based on a ave-sub 701 | ;; which had been built up as preds matched in ve-maps, and "thinned" with subsequent preds -> could be a LOT of "thinning" 702 | ;; and the thinning process is costly -> more efficient if invert to ev-map? slightly; plus inverting itself costly 703 | ;; returns eav map for all ids in ve-map, excludes `next-attrs` from entities 704 | (defn select-entities1 705 | [result schema attr ve-map eav next-attrs] 706 | (if (unique? schema attr) 707 | (reduce-kv (fn [result v id] 708 | (assoc result id (eav id))) 709 | {} ve-map) 710 | (if (cardinality-many? schema attr) 711 | (reduce-kv (fn [result v e] 712 | (reduce (fn [result id] 713 | (if (contains? result id) 714 | (update-in result [id attr] conj v) 715 | (let [entity (-> (apply dissoc (eav id) next-attrs) 716 | (assoc attr #{v}))] 717 | (assoc result id entity)))) 718 | result e)) 719 | {} ve-map) 720 | ;; cardinality/one 721 | (reduce-kv (fn [result v e] 722 | (reduce (fn [result id] 723 | (assoc result id (eav id))) 724 | result e)) 725 | result ve-map)))) 726 | 727 | ;; todo: if there are two ve-maps for same attr... ave-sub can only have one key for each attr... 728 | ;; returns entities with pred-matching cardinality/many values only 729 | (defn select-entities 730 | [schema ave-sub eav] 731 | (loop [result {} ave-sub ave-sub] 732 | (if-let [[attr ve-map] (first ave-sub)] 733 | (let [next-ave-sub (next ave-sub) 734 | next-attrs (map first next-ave-sub) 735 | result (select-entities1 result schema attr ve-map eav next-attrs)] 736 | (recur result next-ave-sub)) 737 | result))) 738 | 739 | ;; todo: returns id if at least one cardinality/many value matches pred, but not the specific value(s) 740 | ;; how to only include desired values? return partial entities from eav (filter cardinality/many)? 741 | ;; todo: key questions is what do you want returned in the end?? think about the final step of displaying on the screen... 742 | ;; do you want them as tuples or as entity-maps with filtered cardinality/many values? 743 | ;; how about if I want ppl who have ONLY lived in #{Moscow Berlin} and nowhere else? NOT clause? 744 | ;; could re-fitler, but is there a way to do it "along the way" -> get-ids would have to return some sort of tuple 745 | ;; if returning tuples, then is it still possible to compose intersections of ids? maybe if we return distinct ids alongside 746 | ;; -> would have to filter-eav... or filter-ave and then filter-eav... or return ave-a 747 | ;; do we want preds that operate on entire set of values for attr? like "two or more past-cities in germany" 748 | ;; maybe return ave-a submap? how to intersect/union on submaps? => they are on different attrs! 749 | ;; TODO: how to express "not equal"? [!= 4], [!= #{3 4 5}], [not= 8], ( 750 | ;; how to express multiple preds for one attr... 751 | (defn get-ids 752 | ([db attr pred] 753 | (if (vector? attr) 754 | (get-ids-path-attr db attr pred) 755 | (cond 756 | (false? pred) 757 | (get-ids-false-pred db attr) 758 | (true? pred) 759 | (get-ids-true-pred db attr) 760 | (set? pred) 761 | (get-ids-set-pred db attr pred) 762 | (vector? pred) 763 | (get-ids-range-pred db attr pred) 764 | (fn? pred) 765 | (get-ids-generic-pred db attr pred) 766 | :default 767 | (assert false (str "Invalid predicate: " pred))))) 768 | ([db attr pred xids] 769 | ;; here must have sets, while above a seq might do to preserve sort order 770 | (if (vector? attr) 771 | (get-ids-path-attr db attr pred xids) 772 | (cond 773 | (false? pred) 774 | (get-ids-false-pred db attr xids) 775 | (true? pred) 776 | (get-ids-true-pred db attr xids) 777 | (set? pred) 778 | (get-ids-set-pred db attr pred xids) 779 | (vector? pred) 780 | (intersection (get-ids-range-pred db attr pred) xids) 781 | (fn? pred) 782 | (get-ids-generic-pred db attr pred xids) 783 | :default 784 | (assert false (str "Invalid predicate: " pred)))))) 785 | 786 | ;; TODO: build up datoms/tuples instead of ids? ultimately would return... tuples or nested entities 787 | ;; in order to avoid trimming large cardinality/many attrs, especially repeatedly 788 | ;; construct id to datoms map as we go along... -> would be better if we stored datoms in ave index 789 | ;; build up entities via list-form indexing like for eav index? works for path preds too 790 | ;; build up nested-entities as you go along with cardinality/many attrs (and cardinality/one attrs?) 791 | ;; tradeoff: the work of building up nested-entities is wasted if many ids subsequently discarded 792 | ;; TODO: which is the greater waste: re-filtering cardinality-many attr or building up nested-entities and later discarding them? 793 | ;; also constraining-paths aren't necessarily returning-paths, so building up constraining-paths may be wasted 794 | ;; if building up nested-entities along the way: only do it for returning-path cardinality-many to minimize potential waste 795 | ;; -> solution: specify returning-paths upfront 796 | ;; -> end up with partial entity the satisfies query, but then need to "enrich" with additional attrs/refs 797 | ;; can figure out the "enrich-pattern" by subtracting from full pattern what is available 798 | ;; what about "if person ever made more than 100K, return all his salaries" 799 | ;; - constraint applies to set of vals, but returning all vals for cardinality-many attr - HOW TO in datomic? 800 | ;; ENRICHING: pull returning-paths that haven't been built up yet, merge with existing nested-entity built up results 801 | 802 | ;; edge case: constraint-path: ref-type attr 'return only keyword ids'; 803 | ;; return-path: follow those keyword ids and get more 804 | 805 | ;; returning-paths spec 806 | ;; get-entities returns a partially built up entity 807 | ;; todo: maybe only build up nested entity when cardinality-many attr and in return-path 808 | #_(get-ids-spec db 809 | ;; constraint paths 810 | {:person/city #{"Berlin" "moscow"} 811 | [:person/dl :dl/year] [> 2009]} 812 | ;; if return-paths not specified, don't know when to keep and when to discard vals via get-ids/get-entities 813 | ;; e.g. if :person/city not in return-paths, then only get-ids, not get-entities 814 | ;; return paths: how to specify? `path->join-pattern` is available 815 | [:person/city :person/name [:person/dl :dl/year] [:person/dl :dl/city-issued]] 816 | ;[:person/city {:person/dl [:dl/year :dl/city-issued]} :person/name] 817 | ) 818 | 819 | 820 | ;; xsec fns for maintining a built up result map and interoping with id sets 821 | (defn xect-maps 822 | [m1 m2] 823 | (if (< (count m2) (count m1)) 824 | (recur m2 m1) 825 | (reduce-kv (fn [result id m] 826 | (if (contains? m2 id) 827 | (update result id merge (m2 id)) 828 | (dissoc result id))) 829 | m1 m2))) 830 | 831 | (defn xsect-map-set 832 | [m ids] 833 | (if (map? ids) 834 | (recur ids m) 835 | (reduce-kv (fn [result id _] 836 | (if (contains? ids id) 837 | result 838 | (dissoc result id))) 839 | m m))) 840 | 841 | ;; s1 and s2 can both be sets of ids or maps built up eav entities 842 | (defn intersection2 843 | [s1 s2] 844 | (cond 845 | (and (set? s1) (set? s2)) 846 | (intersection s1 s2) 847 | (and (map? s1) (map? s2)) 848 | (xect-maps s1 s2) 849 | :else 850 | (xsect-map-set s1 s2))) 851 | 852 | (defn merge-netsted 853 | [schema m1 m2] 854 | (reduce-kv (fn [result attr v] 855 | (if (ref-type? schema attr) 856 | 857 | ;; not ref type 858 | (assoc result attr v))) 859 | m1 m2)) 860 | 861 | (defn get-ids-multi 862 | "Returns ids that satisfy all of `attr-pred-pairs`. Supports any attr pred pair that `get-ids` can handle." 863 | [db & attr-pred-pairs] 864 | (reduce (fn [ids [attr pred]] 865 | (get-ids db attr pred ids)) 866 | #{} (partition 2 attr-pred-pairs))) 867 | 868 | ;; [:person/license :dl/year] => {:person/license [:dl/year]} 869 | ;; [:person/license :issuing-dmv :dmv-city] => {:person/license {:issuing-dmv [:dmv-city]}} 870 | (defn path->join-pattern 871 | [path] 872 | (let [[final-attr reverse-path] (reverse path)] 873 | (reduce (fn [r reverse-path] 874 | (assoc {} (first reverse-path) r)) 875 | [final-attr] reverse-path))) 876 | 877 | ;; [[>= 30000] [< 30004]] => (fn [x] (and (>= x 30000) (< x 30004))) 878 | (defn range-pred->fn-pred 879 | [range-pred] 880 | (if (vector? (first range-pred)) 881 | (let [[[op1 val1] [op2 val2]] range-pred] 882 | (fn [x] (and (op1 x val1) (op2 x val2)))) 883 | (let [[op val] range-pred] 884 | (fn [x] (op x val))))) 885 | 886 | ;; can handle multiple preds for one attr? -> pattern w/ dups, no prob for trim? 887 | ;; specify whether to do the trimming step? probably not 888 | ;; convenience: reuse of attr-pred-pairs for get-ids and trim; and pull? 889 | (defn get-pull-trim 890 | "For every id that satisfies all of `path-pred-pairs`. 891 | Pulls all attrs/paths, joining on vector paths. 892 | Trims cardinality/many vals of each pulled (nested) entity to match preds from `path-pred-pairs`." 893 | [{:keys [db/schema] :as db} & path-pred-pairs] 894 | (let [ids (apply get-ids-multi db path-pred-pairs) 895 | pattern (->> path-pred-pairs 896 | (map (fn [path pred] 897 | (if (vector? path) 898 | (path->join-pattern path) 899 | path))) 900 | distinct) 901 | pulled-entities (pull-many db pattern ids) 902 | ;; keep only cardinality/many attrs for trimming 903 | path-pred-pairs-trim 904 | (filter (fn [[path pred]] 905 | (if (vector? path) 906 | (cardinality-many? schema (last path)) 907 | (cardinality-many? schema path))) 908 | path-pred-pairs) 909 | ;; convert range preds to fn; remove true and false preds 910 | path-pred-pairs-trim 911 | (reduce (fn [result [path pred :as pair]] 912 | (cond 913 | (vector? pred) 914 | (conj result [path (range-pred->fn-pred pred)]) 915 | (or (true? pred) (false? pred)) 916 | result 917 | :default 918 | (conj result pair))) 919 | [] path-pred-pairs-trim)] 920 | (map (fn [entity] 921 | (trim schema path-pred-pairs-trim entity)) 922 | pulled-entities))) 923 | 924 | ;; instead of filtering by pred, it fetches vals 925 | ;; this is a more streamlined/limited pull-many 926 | (defn get-entities-eav 927 | ([{:keys [db/schema db/eav db/ave] :as db} id-set] 928 | (map eav id-set)) 929 | ([{:keys [db/schema db/eav db/ave] :as db} ks id-set] 930 | (->> (map eav id-set) 931 | (map #(select-keys % ks))))) 932 | 933 | ;; ========= 934 | ;; Reverse Txs 935 | 936 | ;; interim transactions? 937 | ;; can we specify when updates depend on previous successes (update chains) -> compare and swap or similar? 938 | ;; add last1 to man1, add last2 to man1, last1 add fail -> reverse last1 add, last2 add still valid => independent 939 | ;; add 20 to acct, add 30 to acct, add 20 fail -> reverse add 20, add 30 still valid => independent 940 | ;; add last1 to man1, add man1/last1 to shoe1, add last1 fail -> shoe1 invalid -> reverse add man1/last1 shoe1 941 | ;; specify tx-id dependency? keep coll of tx deps? = tx level granularity; [e a] level granularity possible? 942 | ;; types of deps: 1) ref attrs pointing to failed entities 943 | ;; (note difference between existence/non-existence of entity and change in entity data) 944 | ;; 2) [e a] depends on past [e a]? 945 | ;; only one optimistic update at a time? 946 | ;; can't make any changes (including local) until tx completes 947 | ;; everything else gets requed until previous tx succeeds or fails... 948 | ;; if it fails, cancel? 949 | ;; maybe more granular - like [e a] level add? FAILURE are detect at tx level, so maybe better to keep it at tx level 950 | ;; CREATE: tx dependency graph, if pre-req tx fails reverse dependent tx, if pre-req tx succeeds remove dependency 951 | ;; responses can arrive out of order! can txs arrive on backend out of order? 952 | ;; need to indicate that tx in flight by :remote/id :pending, but also :remote/tx :pending for existing entities? 953 | ;; in the mean time local changes are allowed 954 | ;; (meaning stuff that doesn't touch entities w/ remote/id attrs?, doesn't require mutation) 955 | ;; three categories: UI changes (selected items etc), local data changes <--> remote data changes 956 | 957 | ;; just keep db-before reference, only makes sense "more than one transaction ahead" 958 | ;; rely only on db-before ref allows for one timeline, can't have "indepedent" tx succeed 959 | ;; example: add man1, add man2; if man1 fails go back to db-before add man1, 960 | ;; which means add man2 also fails (but it actually succeeded on backend! so front and back out of sync) 961 | ;; TODO: order of add/retract matters 962 | ;; [nil -> sub,add = val; add,sub = nil] ok, [nil -> add,sub = nil; sub,add = val] fail 963 | 964 | ;; seems we have to generate reverse tx on the basis of db-before because cardinality/one attrs are "overwritten" 965 | (defn reverse-tx-list 966 | [{:keys [db/eav db/schema] :as db-before} [op tx-e tx-a tx-v]] 967 | (case op 968 | :db/add 969 | (if (cardinality-many? schema tx-a) 970 | (if-some [db-before-v (get-in eav [tx-e tx-a])] 971 | [:db/retract tx-e tx-a tx-v] 972 | ;; can optimize by dissoc attr directly? (no old-v means no attr existed originally) 973 | [:db/retract tx-e tx-a tx-v]) 974 | (if-some [db-before-v (get-in eav [tx-e tx-a])] 975 | [:db/add tx-e tx-a db-before-v] 976 | [:db/retract tx-e tx-a tx-v])) 977 | :db/retract 978 | (when-some [db-before-v (get-in eav [tx-e tx-a])] 979 | (if (cardinality-many? schema tx-a) 980 | (if (nil? tx-v) 981 | ;; no `v` was specified, so all values of `a` were retracted, add them back in - optimize? 982 | (map #(vector :db/add tx-e tx-a %) db-before-v) 983 | (when (contains? db-before-v tx-v) 984 | [:db/add tx-e tx-a tx-v])) 985 | (if (nil? tx-v) 986 | ;; nil `v` means `a` was retracted, add it back in 987 | [:db/add tx-e tx-a db-before-v] 988 | ;; only add tx-v back in only if db-before-v=tx-v meaning it was actually retracted 989 | ;; -> should this be captured in tx-data? 990 | ;; if nothing had changed tx-data would reflect that by not containing a retract datom 991 | (when (= db-before-v tx-v) 992 | [:db/add tx-e tx-a tx-v])))))) 993 | 994 | (defn reverse-tx-map 995 | [{:keys [db/schema] :as db-before} {:keys [db/id db/op] :as tx-form}] 996 | (reduce-kv 997 | (fn [tx-data a v] 998 | (if (cardinality-many? schema a) 999 | (->> v 1000 | (map #(reverse-tx-list db-before [(or op :db/add) id a %])) 1001 | (reduce conj tx-data)) 1002 | (conj tx-data (reverse-tx-list db-before [(or op :db/add) id a v])))) 1003 | [] (dissoc tx-form :db/id))) 1004 | 1005 | (defn reverse-tx-form 1006 | [db-before tx-form] 1007 | (if (map? tx-form) 1008 | (reverse-tx-map db-before tx-form) 1009 | (reverse-tx-list db-before tx-form))) 1010 | 1011 | (defn reverse-tx-data 1012 | "Generates a ''reverse transacation''" 1013 | [db-before tx-data] 1014 | (let [tx-data 1015 | (reduce 1016 | (fn [tx-data tx-form] 1017 | (if (map? tx-form) 1018 | (reduce conj tx-data (reverse-tx-map db-before tx-form)) 1019 | (conj tx-data (reverse-tx-list db-before tx-form)))) 1020 | [] tx-data) 1021 | tx-data (remove nil? tx-data)] 1022 | (prn :reverse-tx-data tx-data) 1023 | tx-data)) -------------------------------------------------------------------------------- /drafts/entity_graph/scratch.cljc: -------------------------------------------------------------------------------- 1 | (ns entity-graph.scratch) 2 | 3 | ;; ========= 4 | ;; Indexing Helpers 5 | 6 | ;; EAV index - currently unused 7 | 8 | (defn index-eav-one 9 | "Adds [e a v] to eav index. `a` must be a `:db.cardinatliy/one` attribute." 10 | [eav e a v] 11 | (let [eav-e (get eav e {:db/id e}) 12 | eav-e (assoc eav-e a v)] 13 | (assoc! eav e eav-e))) 14 | 15 | ;; existence of (eav e) must be (is) checked up the stack, else end up (assoc! eav e eav-e[=nil]) 16 | (defn unindex-eav-one 17 | "Removes [e a] from eav index. `a` must be a `:db.cardinatliy/one` attribute." 18 | [eav e a] 19 | (let [eav-e (dissoc (eav e) a)] 20 | (assoc! eav e eav-e))) 21 | 22 | (defn index-eav-many 23 | "Adds [e a v] to eav index. `a` must be a :db.cardinatliy/many attribute." 24 | [eav e a v] 25 | (let [eav-e (get eav e {:db/id e}) 26 | v-set (conj (get eav-e a #{}) v) 27 | eav-e (assoc eav-e a v-set)] 28 | (assoc! eav e eav-e))) 29 | 30 | ;; existence of (eav e) must be (is) checked up the stack, else end up (assoc! eav e eav-e[=nil]) 31 | (defn unindex-eav-many 32 | "Removes [e a v] from eav index. `a` must be a `:db.cardinaltiy/many` attribute. 33 | If an empty set of values remains after unindexing, removes the attribute." 34 | [eav e a v] 35 | (let [v-set (disj (get-in eav [e a]) v) 36 | eav-e (if (empty? v-set) 37 | (dissoc (eav e) a) 38 | (assoc (eav e) a v-set))] 39 | (assoc! eav e eav-e))) 40 | 41 | ;; ========= 42 | ;; Alternative random-tempid 43 | 44 | ;; Probability of NO collisions with 1mm int assigned 1000 times 45 | ;; (Math/pow (/ 999999 1000000) (reduce + (range 1000))) 46 | ;; Probability of no collisions with 4 1000000 nums: 47 | #_(Math/pow (/ (- (* 1000000 1000000 1000000 1000000) 1) (* 1000000 1000000 1000000 1000000)) 48 | (reduce + (range 1000))) 49 | ;; could just use negative integer counter? yes if disallow negative integer tempids 50 | ;; datomic cloud reference says only string is accepted 51 | ;; datomic-dev-local accepts negative integers, but doesn't report them in :tempids key after transaction 52 | (defn random-tempid 53 | [] 54 | (str "db.temp-" (clojure.string/join "-" (take 4 (repeatedly #(str (rand-int 1000000))))))) 55 | 56 | ;; ========= 57 | ;; EAV Map Form Ops 58 | 59 | ;; The following functions support adding/retracting to indexes with map form tx-forms 60 | ;; The indexes are expected to be passed in as transients and the functions return transients 61 | 62 | (defn merge-by-key 63 | "Like `merge-with`, but `f` takes the key `k` as first arg 64 | (presumably to allow `f` to merge differently based on `k`)." 65 | [f & maps] 66 | (when (some identity maps) 67 | (let [merge-entry (fn [m e] 68 | (let [k (key e) v (val e)] 69 | (if (contains? m k) 70 | (assoc m k (f k (get m k) v)) 71 | (assoc m k v)))) 72 | merge2 (fn [m1 m2] 73 | (reduce merge-entry (or m1 {}) (seq m2)))] 74 | (reduce merge2 maps)))) 75 | 76 | (defn merge-entity-vals 77 | "Merges entity values based on cardinality of `attr`. 78 | For `cardinality/many` `attr` treats `v1` and `v2` as sets." 79 | [schema attr v1 v2] 80 | ;; works for :db/id `attr` since it's not :db.cardinality/many 81 | (if (cardinality-many? schema attr) 82 | (union v1 v2) 83 | v2)) 84 | 85 | (defn add-map-eav 86 | "Adds `tx-form` to eav index. Treats values as sets for `:db.cardinality/many` attributes." 87 | [schema eav {:keys [db/id] :as tx-form} ex-entity] 88 | ;; :db/id in tx-form is fine; :db/op is dissoced 89 | (if ex-entity 90 | (assoc! eav id (merge-by-key (partial merge-entity-vals schema) ex-entity tx-form)) 91 | (assoc! eav id tx-form))) 92 | 93 | (defn entity-diff 94 | "Returns the \"difference\" between `ex-entity` and `tx-form`." 95 | [schema ex-entity tx-form] 96 | (reduce-kv 97 | (fn [ex-entity a v] 98 | (if (nil? v) 99 | (dissoc ex-entity a) 100 | (if (cardinality-many? schema a) 101 | (let [new-v (difference (ex-entity a) v)] 102 | (if (empty? new-v) 103 | (dissoc ex-entity a) 104 | (assoc ex-entity a new-v))) 105 | (if (= (ex-entity a) v) 106 | (dissoc ex-entity a) 107 | ex-entity)))) 108 | ex-entity (dissoc tx-form :db/id :db/op))) 109 | 110 | ;; existence of ex-entity => checked up the stack 111 | ;; existence of attribute value in ex-entity -> entity-diff 112 | ;; nil value of attribute in tx-form -> entity-diff 113 | (defn retract-map-eav 114 | "Retracts `tx-form` from eav index given existing entity with :db/id `ex-entity`. 115 | Treats values as sets for `:db.cardinality/many` attributes. 116 | When nil value specified for an attribute in `tx-form`, entire attribute is removed regardless of cardinality. 117 | Potentially leaves \"empty entry\" in eav index: {id {:db/id id}}" 118 | [schema eav {:keys [db/id] :as tx-form} ex-entity] 119 | (let [new-entity (entity-diff schema ex-entity tx-form)] 120 | (assoc! eav id new-entity))) 121 | 122 | ;; ========= 123 | ;; Transaction Functions 124 | 125 | ;; note: handle-tx-fns shouldn't return any more :db/fn-call ops 126 | (defn handle-tx-fns 127 | [db tx-data] 128 | ;; `f` is function that takes db as first arg and any number of additional arguments... 129 | ;; `f` should return a seq of tx-forms. 130 | (reduce (fn [new-tx-data [op f & args :as tx-form]] 131 | (if (= op :db.fn/call) 132 | (let [fn-tx-data (remove nil? (apply f db args))] 133 | (reduce conj new-tx-data fn-tx-data)) 134 | (conj new-tx-data tx-form))) 135 | [] tx-data)) 136 | 137 | ;; ========= 138 | ;; Checking that entity id exists in db 139 | 140 | (defn check-id-existence-list 141 | "Checks list form `tx-forms` to ensure :db/id exists in the database. Returns `tx-forms` unchanged. 142 | Throws when non-existent :db/id found." 143 | [tx-forms eav] 144 | (doseq [[_ id _ _] tx-forms] 145 | (when (int? id) ;; don't check keyword ids 146 | (assert (contains? eav id) (str ":db.error/invalid-entity-id Invalid entity id: " id)))) 147 | tx-forms) 148 | 149 | (defn check-id-existence-map 150 | "Checks map form `tx-forms` to ensure :db/id exists in the database. Returns `tx-forms` unchanged. 151 | Throws when non-existent :db/id found." 152 | [tx-forms eav] 153 | (doseq [{:keys [db/id]} tx-forms] 154 | (when (int? id) ;; don't check keyword ids 155 | (assert (contains? eav id) (str ":db.error/invalid-entity-id Invalid entity id: " id)))) 156 | tx-forms) 157 | 158 | ;; ========= 159 | ;; Leverage `retraction-set` for constraint checks 160 | 161 | ;; this version does not rely on indexes reflecting all retractions in tx, leverages retraction-set instead 162 | (when (component? schema a) 163 | (doseq [attr (:db/isComponent schema)] 164 | (when-let [held-by-id (get-in ave' [attr v])] 165 | ;; DISALLOW entity to hold same component under different attrs 166 | (assert (or (and (= e held-by-id) (= a attr)) 167 | (contains? retraction-set [held-by-id attr v]) (contains? entity-retractions held-by-id)) 168 | (str ":db.error/component-conflict Component conflict: " 169 | "Entity with id: " v " already component of: " held-by-id " under attribute " attr 170 | ", asserted for: " e " under attribute " a))))) 171 | ;; TODO: this version does not rely on indexes reflecting all retractions in tx, leverages retraction-set instead 172 | (when (unique? schema a) 173 | (when-let [held-by-id (get-in ave' [a v])] 174 | (assert (or (= e held-by-id) 175 | (contains? retraction-set [held-by-id a v]) (contains? entity-retractions held-by-id)) 176 | (str ":db.error/unique-conflict Unique conflict: " a ", value: " v " already held by: " held-by-id 177 | " asserted for: " e)))) 178 | 179 | ;; REMOVED FROM: `check-db-constraints-many`, since dandling refs are ok and this is not the only way to end up with dangling refs 180 | (when (ref-type? schema a) 181 | (assert (not (contains? entity-retractions v)) 182 | (str ":db.error/retracted-entity-conflict Can't point to a retracted entity. 183 | Attempting to assert " [e a v]))) 184 | 185 | ;; REMOVED FROM: `check-db-constraints-one`, since dandling refs are ok and this is not the only way to end up with dangling refs 186 | (when (ref-type? schema a) 187 | (assert (not (contains? entity-retractions v)) 188 | (str ":db.error/retracted-entity-conflict Can't point to a retracted entity. 189 | Attempting to assert " [e a v]))) 190 | 191 | ;; ========= 192 | ;; Checking for dangling refs after transaction completed 193 | 194 | (defn check-for-dangling-refs1 195 | [schema eav e a v] 196 | (when (ref-type? schema a) 197 | (assert (contains? eav v) 198 | (str ":db.error/dangling-ref A reference attribute points to a non-existent entity: " [e a v])))) 199 | 200 | (defn check-for-dangling-refs-list 201 | [schema eav list-assertion-forms] 202 | (doseq [[_ e a v] list-assertion-forms] 203 | (check-for-dangling-refs1 schema eav e a v))) 204 | 205 | (defn check-for-dangling-refs-map 206 | [schema eav map-assertion-forms] 207 | (doseq [{:keys [db/id] :as map-form} map-assertion-forms 208 | [a v] (dissoc map-form :db/id)] 209 | (if (cardinality-many? schema a) 210 | (doseq [single-v v] 211 | (check-for-dangling-refs1 schema eav id a single-v)) 212 | (check-for-dangling-refs1 schema eav id a v)))) 213 | 214 | ;; this misses the cases where `retract` has left an entity with no attributes, and it was therefore removed from EAV index 215 | (defn check-for-dangling-refs 216 | "Throws if any reference attributes points to non-existent entities. Must wait until tx completes to do this check." 217 | [schema eav tx-data] 218 | (check-for-dangling-refs-list schema eav (concat (get-in tx-data [:list-add :entity-id]) 219 | (get-in tx-data [:list-add :tempid]))) 220 | (check-for-dangling-refs-map schema eav (concat (get-in tx-data [:map-add :entity-id]) 221 | (get-in tx-data [:map-add :tempid]) 222 | (get-in tx-data [:map-add :no-id])))) 223 | 224 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 225 | ;; TODO: include these? 226 | ;; TODO: these should take tx-data from transact: 227 | ;; - check only entities 228 | 229 | (defn find-dangling-refs 230 | "Returns a seq of all [e a v] tuples in db where attribute `a` is a reference attribute pointing to an 231 | entity that does not exist in db." 232 | [{:keys [schema db/eav db/ave]}] 233 | (flatten 234 | (for [ref-attr (-> schema :db/isRef) 235 | :let [[target-id pointing-id] (get ave ref-attr)] 236 | :when (and pointing-id (not (contains? eav target-id)))] 237 | [pointing-id ref-attr target-id]))) 238 | 239 | (defn check-for-dangling-refs [db] 240 | (let [dangling-refs (find-dangling-refs db)] 241 | (assert (empty? dangling-refs) 242 | (str ":db.error/dangling-refs Database contains dangling refs: " dangling-refs)))) 243 | 244 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 245 | ;; Support for string version of wildcard (["*"]) in pull to return string keys in pull results 246 | ;; what about non-wildcard patterns? Also want string attributes? 247 | 248 | (defn ->string-attrs 249 | [entity] 250 | (into {} (map (fn [[k v]] (if (string? k) [(keyword k) v] [k v])) entity))) 251 | 252 | ;; NOTE: would need to ensure (non-entity) map values are not converted 253 | (defn stringify-keys-namespaced 254 | "Recursively transforms all map keys from keywords to strings." 255 | [schema pull-tree] 256 | (let [f (fn [[k v]] (if (keyword? k) [(str k) v] [k v]))] 257 | ;; only apply to maps 258 | (clojure.walk/postwalk (fn [[k v :as x]] (if (map? x) (->string-attrs x) x)) pull-tree))) 259 | 260 | ;; ========= 261 | ;; Integrate `find-reverse-refs` into pull? 262 | 263 | ;; In datomic if you remove all attributes from an entity, pull wildcard (or [:db/id] pattern) 264 | ;; returns {:db/id 101155069755575, :shoe-owned/_shoe ...} 265 | ;; EntityDB has the function `find-reverse-refs`, which could be integrated in pull 266 | ;; option 1: with '_* pull result can include :db/reverse-refs key which returns the result of this fn - must also apply to components 267 | ;; option 2: special :db/reverse-refs attribute that will call this fn -------------------------------------------------------------------------------- /shadow-cljs.edn: -------------------------------------------------------------------------------- 1 | {:deps {:aliases [:cljs :test :drafts]} 2 | :builds {:dev 3 | {:target :browser 4 | :output-dir "resources/public/js/dev" 5 | :modules {:main 6 | {:entries [entity-graph.core]}} 7 | :devtools {:repl-pprint true}}} 8 | :nrepl {:port 9000} 9 | ;; For ClojureScript REPL with nrepl: 10 | ;; 0. Configure nREPL with localhost/port 11 | ;; 1. Command line: `npx shadow-cljs server` 12 | ;; 2. Start nREPL and in the repl: (shadow/watch :dev) 13 | ;; 3. (shadow/browser-repl) 14 | :socket-repl {:port 9001}} 15 | -------------------------------------------------------------------------------- /test/entity_graph/benchmark.cljc: -------------------------------------------------------------------------------- 1 | (ns entity-graph.benchmark 2 | (:require 3 | #?(:clj [clojure.pprint :refer [pprint]] 4 | :cljs [cljs.pprint :refer [pprint]]) 5 | [entity-graph.core :refer [create-db transact cardinality-many? pull] :as eg])) 6 | 7 | (def cardinality-many-values-to-sets #'entity-graph.core/cardinality-many-values-to-sets) 8 | (def prepare-tx-data #'entity-graph.core/prepare-tx-data) 9 | 10 | ;; ========= 11 | ;; Schema 12 | 13 | (def bench-schema 14 | {:person/name 15 | {:db/index {:db/map-type :db.map-type/hash-map}} 16 | :person/last-name 17 | {:db/index {:db/map-type :db.map-type/hash-map}} 18 | :person/alias 19 | {:db/cardinality :db.cardinality/many 20 | :db/index {:db/map-type :db.map-type/hash-map}} 21 | :person/sex 22 | {:db/index {:db/map-type :db.map-type/hash-map}} 23 | :person/age 24 | {:db/index {:db/map-type :db.map-type/hash-map}} 25 | :person/salary 26 | {:db/index {:db/map-type :db.map-type/hash-map}}}) 27 | 28 | (def bench-schema-sorted-ave 29 | (reduce (fn [new-schema attr] 30 | (assoc-in new-schema [attr :db/index :db/map-type] :db.map-type/sorted-map)) 31 | bench-schema (keys bench-schema))) 32 | 33 | (def bench-schema-avl-ave 34 | (reduce (fn [new-schema attr] 35 | (assoc-in new-schema [attr :db/index :db/map-type] :db.map-type/avl-map)) 36 | bench-schema (keys bench-schema))) 37 | 38 | (def db-empty (create-db bench-schema)) 39 | (def db-sorted (create-db bench-schema-sorted-ave)) 40 | (def db-avl (create-db bench-schema-avl-ave)) 41 | 42 | ;; same schema can be reused for both dbs above 43 | (def schema (:db/schema db-empty)) 44 | 45 | (comment 46 | (let [bench-schema (assoc bench-schema :nums {:db/cardinality :db.cardinality/many 47 | :db/sort {:db/set-type :db.set-type/sorted-set 48 | :db/comparator <}}) 49 | db-empty (create-db bench-schema) 50 | people10-map-tempid (map (fn [m] (assoc m :nums (take 3 (repeatedly #(rand-int 100))))) people10-map-tempid) 51 | {:keys [db-after]} (transact db-empty people10-map-tempid) 52 | ] 53 | ;(pprint people10-map-tempid) 54 | (pprint (take 3 (:db/eav db-after))) 55 | )) 56 | 57 | ;; ========= 58 | ;; Benchmark Data Functions 59 | 60 | (let [id (atom 0)] 61 | (defn gen-id [] (swap! id inc) @id)) 62 | 63 | (defn random-person [] 64 | {:db/id (str (gen-id)) 65 | :person/name (rand-nth ["Ivan" "Petr" "Sergei" "Oleg" "Yuri" "Dmitry" "Fedor" "Denis"]) 66 | :person/last-name (rand-nth ["Ivanov" "Petrov" "Sidorov" "Kovalev" "Kuznetsov" "Voronoi"]) 67 | :person/alias (set (repeatedly (rand-int 10) #(rand-nth ["A. C. Q. W." "A. J. Finn" "A.A. Fair" "Aapeli" 68 | "Aaron Wolfe" "Abigail Van Buren" "Jeanne Phillips" 69 | "Abram Tertz" "Abu Nuwas" "Acton Bell" "Adunis"]))) 70 | :person/sex (rand-nth [:sex/male :sex/female]) 71 | :person/age (rand-int 100) 72 | :person/salary (rand-int 100000)}) 73 | 74 | (def random-persons (repeatedly random-person)) 75 | 76 | (defn map->list1 77 | "Returns a seq of list form tx data equivalent of `map-form`." 78 | [schema map-form] 79 | (reduce-kv 80 | (fn [list-forms a v] 81 | (if (cardinality-many? schema a) 82 | (apply conj list-forms (map #(vector :db/add (:db/id map-form) a %) v)) 83 | (conj list-forms [:db/add (:db/id map-form) a v]))) 84 | [] (cardinality-many-values-to-sets schema (dissoc map-form :db/id)))) 85 | 86 | (defn map->list 87 | "Returns a seq of list form tx data equivalent of `map-forms` tx data." 88 | [schema map-forms] 89 | (mapcat #(map->list1 schema %) map-forms)) 90 | 91 | (defn list-assertions->retractions 92 | [tempids list-assertions] 93 | (map (fn [[_ e a v]] [:db/retract (get tempids e) a v]) list-assertions)) 94 | 95 | (defn list-assertions->entity-retractions 96 | [tempids list-assertions] 97 | (let [ids (set (map second list-assertions))] 98 | (map (fn [id] [:db/retractEntity (get tempids id)]) ids))) 99 | 100 | ;; ========= 101 | ;; Benchmark Data 102 | 103 | (def people20k-map-tempid (shuffle (take 20000 random-persons))) 104 | (def people10-map-tempid (shuffle (take 10 random-persons))) 105 | 106 | (def people10-map-noid (map #(dissoc % :db/id) (shuffle (take 10 random-persons)))) 107 | (def people20k-map-noid (map #(dissoc % :db/id) people20k-map-tempid)) 108 | 109 | (def people10-list-tempid (map->list schema people10-map-tempid)) 110 | (def people20k-list-tempid (map->list schema people20k-map-tempid)) 111 | 112 | ;; ========= 113 | ;; Benchmarks CLJS 114 | 115 | (comment 116 | ;;;;;; Prepare Data 117 | 118 | ;; Map Form Assertions Prepare Data 119 | (simple-benchmark [] (prepare-tx-data db-empty people10-map-noid) 10000) 120 | (simple-benchmark [] (prepare-tx-data db-empty people20k-map-tempid) 5) 121 | (simple-benchmark [] (prepare-tx-data db-sorted people20k-map-tempid) 5) 122 | (simple-benchmark [] (prepare-tx-data db-avl people20k-map-tempid) 5) 123 | 124 | ;;;;;; Transact 125 | 126 | ;; Map Form Assertions 127 | 128 | ;; people10-map-noid 129 | (simple-benchmark [] (transact db-empty people10-map-noid) 10000) 130 | (simple-benchmark [] (transact db-sorted people10-map-noid) 10000) 131 | (simple-benchmark [] (transact db-avl people10-map-noid) 10000) 132 | 133 | ;; people10-map-tempid 134 | (simple-benchmark [] (transact db-empty people10-map-tempid) 10000) 135 | 136 | ;; people20k-map-noid 137 | (simple-benchmark [] (transact db-empty people20k-map-noid) 5) 138 | (simple-benchmark [] (transact db-sorted people20k-map-noid) 5) 139 | (simple-benchmark [] (transact db-avl people20k-map-noid) 5) 140 | 141 | ;; people20k-map-tempid 142 | (simple-benchmark [] (transact db-empty people20k-map-tempid) 5) 143 | 144 | ;; Map Form Assertions - Overwrite - faster than writing to empty db 145 | (simple-benchmark [{:keys [db-after tempids]} (transact db-empty people10-map-tempid) 146 | people10-map-entity-id (eg/replace-tempids-map people10-map-tempid tempids)] 147 | (transact db-after people10-map-entity-id) 10000) 148 | (simple-benchmark [{:keys [db-after tempids]} (transact db-empty people20k-map-tempid) 149 | people20k-map-entity-id (eg/replace-tempids-map people20k-map-tempid tempids)] 150 | (transact db-after people20k-map-entity-id) 5) 151 | 152 | ;; Map Form Assertions for component checks: 153 | (simple-benchmark [schema (assoc schema :component1 {:db/valueType :db.type/ref} 154 | :component2 {:db/valueType :db.type/ref} 155 | :component3 {:db/valueType :db.type/ref}) 156 | people10-map-tempid (map #(assoc % :component1 (str (inc (js/parseInt (:db/id %))))) 157 | (butlast people10-map-tempid))] 158 | (transact db-empty people10-map-tempid) 10000) 159 | (simple-benchmark [schema (assoc schema :component1 {:db/valueType :db.type/ref} 160 | :component2 {:db/valueType :db.type/ref} 161 | :component3 {:db/valueType :db.type/ref}) 162 | people20k-map-tempid (map #(assoc % :component1 (str (inc (js/parseInt (:db/id %))))) 163 | (butlast people20k-map-tempid))] 164 | (transact db-empty people20k-map-tempid) 5) 165 | 166 | ;; List Form Assertions 167 | (simple-benchmark [] (transact db-empty people10-list-tempid) 10000) 168 | (simple-benchmark [] (transact db-empty people20k-list-tempid) 5) 169 | 170 | ;; List Form Assertions - Overwrite 171 | (simple-benchmark [{:keys [db-after tempids]} (transact db-empty people10-list-tempid) 172 | people10-list-entity-id (eg/replace-tempids-list people10-list-tempid tempids)] 173 | (transact db-after people10-list-entity-id) 10000) 174 | (simple-benchmark [{:keys [db-after tempids]} (transact db-empty people20k-list-tempid) 175 | people20k-list-entity-id (eg/replace-tempids-list people20k-list-tempid tempids)] 176 | (transact db-after people20k-list-entity-id) 5) 177 | 178 | ;; Retractions 179 | (simple-benchmark [{:keys [db-after tempids]} (transact db-empty people10-list-tempid) 180 | people10-retract (list-assertions->retractions tempids people10-list-tempid)] 181 | (transact db-after people10-retract) 10000) 182 | (simple-benchmark [{:keys [db-after tempids]} (transact db-empty people20k-list-tempid) 183 | people20k-retract (list-assertions->retractions tempids people20k-list-tempid)] 184 | (transact db-after people20k-retract) 5) 185 | 186 | ;; retractEntity 187 | (simple-benchmark [{:keys [db-after tempids]} (transact db-empty people10-list-tempid) 188 | people10-retract-entity (list-assertions->entity-retractions tempids people10-list-tempid)] 189 | (transact db-after people10-retract-entity) 10000) 190 | (simple-benchmark [{:keys [db-after tempids]} (transact db-empty people20k-list-tempid) 191 | people20k-retract-entity (list-assertions->entity-retractions tempids people20k-list-tempid)] 192 | (transact db-after people20k-retract-entity) 5) 193 | 194 | ;;;;;; Misc 195 | 196 | ;; pull 197 | (simple-benchmark [{:keys [db-after]} (transact db-empty people10-map-noid)] 198 | (pull db-after '[*] 1) 10000) 199 | 200 | ;; expand-nested-entities 201 | ;; no nested entities 202 | (simple-benchmark [] (eg/expand-nested-entities (:db/schema db-empty) people20k-map-tempid) 1) 203 | 204 | ;; resolve-temp-ids-unique-identity 205 | ;; no :db.unique/identity attrs in schema 206 | (simple-benchmark [{:keys [db-after]} (transact db-empty people20k-map-noid)] 207 | (eg/resolve-tempids db-after people20k-map-tempid) 1) 208 | ) 209 | 210 | ;; ========= 211 | ;; Benchmarks CLJ 212 | 213 | (comment 214 | ;;;;;; Prepare Data 215 | 216 | ;; Map Form Assertions Prepare Data 217 | (time (dotimes [_ 40000] (prepare-tx-data db-empty people10-map-noid))) 218 | (time (dotimes [_ 40000] (prepare-tx-data db-empty people10-map-tempid))) 219 | (time (dotimes [_ 20] (prepare-tx-data db-empty people20k-map-noid))) 220 | (time (dotimes [_ 20] (prepare-tx-data db-empty people20k-map-tempid))) 221 | 222 | ;; List Form Assertions Prepare Data 223 | (time (dotimes [_ 40000] (prepare-tx-data db-empty people10-list-tempid))) 224 | (time (dotimes [_ 20] (prepare-tx-data db-empty people20k-list-tempid))) 225 | 226 | ;;;;;; Transact 227 | 228 | ;; Map Form Assertions 229 | (time (dotimes [_ 40000] (transact db-empty people10-map-noid))) 230 | (time (dotimes [_ 40000] (transact db-empty people10-map-tempid))) 231 | (time (dotimes [_ 20] (transact db-empty people20k-map-noid))) 232 | (time (dotimes [_ 20] (transact db-empty people20k-map-tempid))) 233 | 234 | ;; List Form Assertions 235 | (time (dotimes [_ 40000] (transact db-empty people10-list-tempid))) 236 | (time (dotimes [_ 20] (transact db-empty people20k-list-tempid))) 237 | 238 | ;; Retractions 239 | (let [{:keys [db-after tempids]} (transact db-empty people10-list-tempid) 240 | people10-retract (list-assertions->retractions tempids people10-list-tempid)] 241 | (time (dotimes [_ 40000] (transact db-after people10-retract)))) 242 | (let [{:keys [db-after tempids]} (transact db-empty people20k-list-tempid) 243 | people20k-retract (list-assertions->retractions tempids people20k-list-tempid)] 244 | (time (dotimes [_ 40000] (transact db-after people20k-retract)))) 245 | ) 246 | -------------------------------------------------------------------------------- /test/entity_graph/benchmark_vs.cljc: -------------------------------------------------------------------------------- 1 | (ns entity-graph.benchmark-vs 2 | "Benchmark entity-graph against datascript and asami." 3 | (:require #?(:clj [clojure.pprint :refer [pprint]] 4 | :cljs [cljs.pprint :refer [pprint]]) 5 | [asami.core :as asami] 6 | [datascript.core :as ds] 7 | [entity-graph.core :as eg] 8 | [entity-graph.benchmark :refer [db-sorted people20k-map-noid]])) 9 | 10 | ;; ========= 11 | ;; Transact 12 | 13 | ;; NOTE: All attributes in Asami are multi-cardinality 14 | 15 | ;; NOTE: The schema for `db-sorted` indexes all attributes in AVE, 16 | ;; so that it `transact` performance can be compared fairly 17 | 18 | (defn tx 19 | [db tx-data] 20 | (:db-after (eg/transact db tx-data))) 21 | 22 | (comment 23 | ;; 2970 msecs 24 | (simple-benchmark [] 25 | (eg/transact db-sorted people20k-map-noid) 5) 26 | ;; 4370 msecs 27 | (simple-benchmark [conn (atom db-sorted)] 28 | (swap! conn tx people20k-map-noid) 5) 29 | ;; 8710 msecs 30 | (simple-benchmark [ds-conn (ds/conn-from-db (ds/empty-db {:person/alias {:db/cardinality :db.cardinality/many}}))] 31 | (ds/transact ds-conn people20k-map-noid) 5) 32 | ;; 10683 msecs 33 | (simple-benchmark [asami-conn (asami/connect (str "asami:mem://" (random-uuid)))] 34 | (asami/transact asami-conn {:tx-data people20k-map-noid}) 5) 35 | ) 36 | 37 | ;; CLJ 38 | (comment 39 | ;; 949 msecs 40 | (let [conn (atom db-sorted)] 41 | (time (dotimes [_ 5] (swap! conn tx people20k-map-noid)))) 42 | ;; 1831 msecs 43 | (let [ds-conn (ds/conn-from-db (ds/empty-db {:person/alias {:db/cardinality :db.cardinality/many}}))] 44 | (time (dotimes [_ 5] (ds/transact ds-conn people20k-map-noid)))) 45 | ;; 1810 msecs 46 | (let [asami-conn (asami/connect (str "asami:mem://" (random-uuid)))] 47 | (time (dotimes [_ 5] (asami/transact asami-conn {:tx-data people20k-map-noid})))) 48 | ) 49 | 50 | ;; ========= 51 | ;; Pull 52 | 53 | ;; NOTE: asami doesn't support pull 54 | 55 | (comment 56 | ;; 32 msecs 57 | (simple-benchmark [db-after (tx db-sorted people20k-map-noid) 58 | _ (pprint (eg/pull db-after '[*] 1))] 59 | (eg/pull db-after '[*] 1) 10000) 60 | 61 | ;; 118 msecs 62 | (simple-benchmark [ds-conn (ds/conn-from-db (ds/empty-db {:person/alias {:db/cardinality :db.cardinality/many}})) 63 | _ (ds/transact ds-conn people20k-map-noid) 64 | _ (pprint (ds/pull (ds/db ds-conn) '[*] 1))] 65 | (ds/pull (ds/db ds-conn) '[*] 1) 10000) 66 | 67 | ;; 15 msecs 68 | (simple-benchmark [db-after (tx db-sorted people20k-map-noid)] 69 | (eg/pull db-after '[:person/name] 1) 10000) 70 | ;; 34 msecs 71 | (simple-benchmark [ds-conn (ds/conn-from-db (ds/empty-db {:person/alias {:db/cardinality :db.cardinality/many}})) 72 | _ (ds/transact ds-conn people20k-map-noid)] 73 | (ds/pull (ds/db ds-conn) '[:person/name] 1) 10000) 74 | ) 75 | 76 | ;; CLJ 77 | (comment 78 | ;; 9 msecs 79 | (let [db-after (tx db-sorted people20k-map-noid)] 80 | (time (dotimes [_ 10000] (eg/pull db-after '[*] 1)))) 81 | ;; 66 msecs 82 | (let [ds-conn (ds/conn-from-db (ds/empty-db {:person/alias {:db/cardinality :db.cardinality/many}})) 83 | _ (ds/transact ds-conn people20k-map-noid)] 84 | (time (dotimes [_ 10000] (ds/pull (ds/db ds-conn) '[*] 1)))) 85 | 86 | ;; 3.45 msecs 87 | (let [db-after (tx db-sorted people20k-map-noid)] 88 | (time (dotimes [_ 10000] (eg/pull db-after '[:person/name] 1)))) 89 | ;; 17.3 msecs 90 | (let [ds-conn (ds/conn-from-db (ds/empty-db {:person/alias {:db/cardinality :db.cardinality/many}})) 91 | _ (ds/transact ds-conn people20k-map-noid)] 92 | (time (dotimes [_ 10000] (ds/pull (ds/db ds-conn) '[:person/name] 1)))) 93 | ) 94 | 95 | ;; ========= 96 | ;; Query 97 | 98 | (def q1 '[:find ?e 99 | :where [?e :person/name "Ivan"]]) 100 | 101 | (def q2 '[:find ?e ?l ?a 102 | :where 103 | [?e :person/name "Ivan"] 104 | [?e :person/last-name ?l] 105 | [?e :person/age ?a] 106 | [?e :person/sex :sex/male]]) 107 | 108 | ;; NOTE: entity-graph doesn't support datalog style queries 109 | ;; these are entity db analogs of the same queries: 110 | 111 | (defn q1-edb 112 | [db] 113 | (get-in db [:db/ave :person/name "Ivan"])) 114 | 115 | (defn q2-edb 116 | [db] 117 | (let [c1-ids (get-in db [:db/ave :person/name "Ivan"]) 118 | c2-ids (get-in db [:db/ave :person/sex :sex/male]) 119 | r-ids (clojure.set/intersection c1-ids c2-ids)] 120 | (map (fn [id] (select-keys (get-in db [:db/eav id]) [:db/id :person/last-name :person/age])) r-ids))) 121 | 122 | (comment 123 | ;; q1: single where clause, single item in tuple 124 | ;; 1 msecs 125 | (simple-benchmark [db-after (tx db-sorted people20k-map-noid) 126 | _ (println (count (q1-edb db-after)))] 127 | (get-in db-after [:db/ave :person/name "Ivan"]) 1000) 128 | ;; 2999 msecs 129 | (simple-benchmark [ds-conn (ds/conn-from-db (ds/empty-db {:person/alias {:db/cardinality :db.cardinality/many}})) 130 | _ (ds/transact ds-conn people20k-map-noid) 131 | _ (println (count (ds/q q1 (ds/db ds-conn))))] 132 | (ds/q q1 (ds/db ds-conn)) 1000) 133 | ;; 425 msecs 134 | (simple-benchmark [asami-conn (asami/connect (str "asami:mem://" (random-uuid))) 135 | _ (asami/transact asami-conn {:tx-data people20k-map-noid}) 136 | _ (println (count (asami/q q1 (asami/db asami-conn))))] 137 | (asami/q q1 (asami/db asami-conn)) 1000) 138 | 139 | ;; q2: multiple where clauses, multiple items in tuple 140 | ;; 873 msecs 141 | (simple-benchmark [db-after (tx db-sorted people20k-map-noid) 142 | _ (println (count (q2-edb db-after)))] 143 | (q2-edb db-after) 1000) 144 | ;; 14029 msecs 145 | (simple-benchmark [ds-conn (ds/conn-from-db (ds/empty-db {:person/alias {:db/cardinality :db.cardinality/many}})) 146 | _ (ds/transact ds-conn people20k-map-noid) 147 | _ (println (count (ds/q q2 (ds/db ds-conn))))] 148 | (ds/q q2 (ds/db ds-conn)) 1000) 149 | ;; 286 msecs 150 | (simple-benchmark [asami-conn (asami/connect (str "asami:mem://" (random-uuid))) 151 | _ (asami/transact asami-conn {:tx-data people20k-map-noid}) 152 | _ (println (count (asami/q q2 (asami/db asami-conn))))] 153 | (asami/q q2 (asami/db asami-conn)) 1000) 154 | ) 155 | 156 | ;; CLJ 157 | (comment 158 | ;; q1: single where clause, single item in tuple 159 | ;; 1.21 msecs 160 | (let [db-after (tx db-sorted people20k-map-noid) 161 | _ (println (count (q1-edb db-after)))] 162 | (time (dotimes [_ 10000] (get-in db-after [:db/ave :person/name "Ivan"])))) 163 | ;; 6269 msecs 164 | (let [ds-conn (ds/conn-from-db (ds/empty-db {:person/alias {:db/cardinality :db.cardinality/many}})) 165 | _ (ds/transact ds-conn people20k-map-noid) 166 | _ (println (count (ds/q q1 (ds/db ds-conn))))] 167 | (time (dotimes [_ 10000] (ds/q q1 (ds/db ds-conn))))) 168 | ;; 293 msecs 169 | (let [asami-conn (asami/connect (str "asami:mem://" (random-uuid))) 170 | _ (asami/transact asami-conn {:tx-data people20k-map-noid}) 171 | _ (println (count (asami/q q1 (asami/db asami-conn))))] 172 | (time (dotimes [_ 10000] (asami/q q1 (asami/db asami-conn))))) 173 | 174 | ;; q2: multiple where clauses, multiple items in tuple 175 | ;; 2532 msecs 176 | (let [db-after (tx db-sorted people20k-map-noid) 177 | _ (println (count (q2-edb db-after)))] 178 | (time (dotimes [_ 10000] (q2-edb db-after)))) 179 | ;; 32046 msecs 180 | (let [ds-conn (ds/conn-from-db (ds/empty-db {:person/alias {:db/cardinality :db.cardinality/many}})) 181 | _ (ds/transact ds-conn people20k-map-noid) 182 | _ (println (count (ds/q q2 (ds/db ds-conn))))] 183 | (time (dotimes [_ 10000] (ds/q q2 (ds/db ds-conn))))) 184 | ;; 734 msecs 185 | (let [asami-conn (asami/connect (str "asami:mem://" (random-uuid))) 186 | _ (asami/transact asami-conn {:tx-data people20k-map-noid}) 187 | _ (println (count (asami/q q2 (asami/db asami-conn))))] 188 | (time (dotimes [_ 10000] (asami/q q2 (asami/db asami-conn))))) 189 | ) 190 | 191 | 192 | --------------------------------------------------------------------------------