├── .gitignore
├── LICENSE
├── README.md
├── build
    └── build
    │   └── core.clj
├── deps.edn
├── docs
    ├── DOCUMENTATION.md
    ├── TECHNOTES.md
    └── TUTORIAL.md
├── drafts
    └── entity_graph
    │   ├── core_test_generative.cljc
    │   ├── macros.clj
    │   ├── query.cljc
    │   └── scratch.cljc
├── shadow-cljs.edn
├── src
    └── entity_graph
    │   └── core.cljc
└── test
    └── entity_graph
        ├── benchmark.cljc
        ├── benchmark_vs.cljc
        └── core_test.cljc


/.gitignore:
--------------------------------------------------------------------------------
1 | .cpcache
2 | .idea
3 | .shadow-cljs
4 | *.iml
5 | .nrepl-port
6 | resources
7 | target
8 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Eclipse Public License - v 1.0
  2 | 
  3 | THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC
  4 | LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM
  5 | CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT.
  6 | 
  7 | 1. DEFINITIONS
  8 | 
  9 | "Contribution" means:
 10 | 
 11 | a) in the case of the initial Contributor, the initial code and documentation
 12 |    distributed under this Agreement, and
 13 | b) in the case of each subsequent Contributor:
 14 |     i) changes to the Program, and
 15 |    ii) additions to the Program;
 16 | 
 17 |    where such changes and/or additions to the Program originate from and are
 18 |    distributed by that particular Contributor. A Contribution 'originates'
 19 |    from a Contributor if it was added to the Program by such Contributor
 20 |    itself or anyone acting on such Contributor's behalf. Contributions do not
 21 |    include additions to the Program which: (i) are separate modules of
 22 |    software distributed in conjunction with the Program under their own
 23 |    license agreement, and (ii) are not derivative works of the Program.
 24 | 
 25 | "Contributor" means any person or entity that distributes the Program.
 26 | 
 27 | "Licensed Patents" mean patent claims licensable by a Contributor which are
 28 | necessarily infringed by the use or sale of its Contribution alone or when
 29 | combined with the Program.
 30 | 
 31 | "Program" means the Contributions distributed in accordance with this
 32 | Agreement.
 33 | 
 34 | "Recipient" means anyone who receives the Program under this Agreement,
 35 | including all Contributors.
 36 | 
 37 | 2. GRANT OF RIGHTS
 38 |   a) Subject to the terms of this Agreement, each Contributor hereby grants
 39 |      Recipient a non-exclusive, worldwide, royalty-free copyright license to
 40 |      reproduce, prepare derivative works of, publicly display, publicly
 41 |      perform, distribute and sublicense the Contribution of such Contributor,
 42 |      if any, and such derivative works, in source code and object code form.
 43 |   b) Subject to the terms of this Agreement, each Contributor hereby grants
 44 |      Recipient a non-exclusive, worldwide, royalty-free patent license under
 45 |      Licensed Patents to make, use, sell, offer to sell, import and otherwise
 46 |      transfer the Contribution of such Contributor, if any, in source code and
 47 |      object code form. This patent license shall apply to the combination of
 48 |      the Contribution and the Program if, at the time the Contribution is
 49 |      added by the Contributor, such addition of the Contribution causes such
 50 |      combination to be covered by the Licensed Patents. The patent license
 51 |      shall not apply to any other combinations which include the Contribution.
 52 |      No hardware per se is licensed hereunder.
 53 |   c) Recipient understands that although each Contributor grants the licenses
 54 |      to its Contributions set forth herein, no assurances are provided by any
 55 |      Contributor that the Program does not infringe the patent or other
 56 |      intellectual property rights of any other entity. Each Contributor
 57 |      disclaims any liability to Recipient for claims brought by any other
 58 |      entity based on infringement of intellectual property rights or
 59 |      otherwise. As a condition to exercising the rights and licenses granted
 60 |      hereunder, each Recipient hereby assumes sole responsibility to secure
 61 |      any other intellectual property rights needed, if any. For example, if a
 62 |      third party patent license is required to allow Recipient to distribute
 63 |      the Program, it is Recipient's responsibility to acquire that license
 64 |      before distributing the Program.
 65 |   d) Each Contributor represents that to its knowledge it has sufficient
 66 |      copyright rights in its Contribution, if any, to grant the copyright
 67 |      license set forth in this Agreement.
 68 | 
 69 | 3. REQUIREMENTS
 70 | 
 71 | A Contributor may choose to distribute the Program in object code form under
 72 | its own license agreement, provided that:
 73 | 
 74 |   a) it complies with the terms and conditions of this Agreement; and
 75 |   b) its license agreement:
 76 |       i) effectively disclaims on behalf of all Contributors all warranties
 77 |          and conditions, express and implied, including warranties or
 78 |          conditions of title and non-infringement, and implied warranties or
 79 |          conditions of merchantability and fitness for a particular purpose;
 80 |      ii) effectively excludes on behalf of all Contributors all liability for
 81 |          damages, including direct, indirect, special, incidental and
 82 |          consequential damages, such as lost profits;
 83 |     iii) states that any provisions which differ from this Agreement are
 84 |          offered by that Contributor alone and not by any other party; and
 85 |      iv) states that source code for the Program is available from such
 86 |          Contributor, and informs licensees how to obtain it in a reasonable
 87 |          manner on or through a medium customarily used for software exchange.
 88 | 
 89 | When the Program is made available in source code form:
 90 | 
 91 |   a) it must be made available under this Agreement; and
 92 |   b) a copy of this Agreement must be included with each copy of the Program.
 93 |      Contributors may not remove or alter any copyright notices contained
 94 |      within the Program.
 95 | 
 96 | Each Contributor must identify itself as the originator of its Contribution,
 97 | if
 98 | any, in a manner that reasonably allows subsequent Recipients to identify the
 99 | originator of the Contribution.
100 | 
101 | 4. COMMERCIAL DISTRIBUTION
102 | 
103 | Commercial distributors of software may accept certain responsibilities with
104 | respect to end users, business partners and the like. While this license is
105 | intended to facilitate the commercial use of the Program, the Contributor who
106 | includes the Program in a commercial product offering should do so in a manner
107 | which does not create potential liability for other Contributors. Therefore,
108 | if a Contributor includes the Program in a commercial product offering, such
109 | Contributor ("Commercial Contributor") hereby agrees to defend and indemnify
110 | every other Contributor ("Indemnified Contributor") against any losses,
111 | damages and costs (collectively "Losses") arising from claims, lawsuits and
112 | other legal actions brought by a third party against the Indemnified
113 | Contributor to the extent caused by the acts or omissions of such Commercial
114 | Contributor in connection with its distribution of the Program in a commercial
115 | product offering. The obligations in this section do not apply to any claims
116 | or Losses relating to any actual or alleged intellectual property
117 | infringement. In order to qualify, an Indemnified Contributor must:
118 | a) promptly notify the Commercial Contributor in writing of such claim, and
119 | b) allow the Commercial Contributor to control, and cooperate with the
120 | Commercial Contributor in, the defense and any related settlement
121 | negotiations. The Indemnified Contributor may participate in any such claim at
122 | its own expense.
123 | 
124 | For example, a Contributor might include the Program in a commercial product
125 | offering, Product X. That Contributor is then a Commercial Contributor. If
126 | that Commercial Contributor then makes performance claims, or offers
127 | warranties related to Product X, those performance claims and warranties are
128 | such Commercial Contributor's responsibility alone. Under this section, the
129 | Commercial Contributor would have to defend claims against the other
130 | Contributors related to those performance claims and warranties, and if a
131 | court requires any other Contributor to pay any damages as a result, the
132 | Commercial Contributor must pay those damages.
133 | 
134 | 5. NO WARRANTY
135 | 
136 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON AN
137 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR
138 | IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE,
139 | NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each
140 | Recipient is solely responsible for determining the appropriateness of using
141 | and distributing the Program and assumes all risks associated with its
142 | exercise of rights under this Agreement , including but not limited to the
143 | risks and costs of program errors, compliance with applicable laws, damage to
144 | or loss of data, programs or equipment, and unavailability or interruption of
145 | operations.
146 | 
147 | 6. DISCLAIMER OF LIABILITY
148 | 
149 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY
150 | CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL,
151 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION
152 | LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
153 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
154 | ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE
155 | EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY
156 | OF SUCH DAMAGES.
157 | 
158 | 7. GENERAL
159 | 
160 | If any provision of this Agreement is invalid or unenforceable under
161 | applicable law, it shall not affect the validity or enforceability of the
162 | remainder of the terms of this Agreement, and without further action by the
163 | parties hereto, such provision shall be reformed to the minimum extent
164 | necessary to make such provision valid and enforceable.
165 | 
166 | If Recipient institutes patent litigation against any entity (including a
167 | cross-claim or counterclaim in a lawsuit) alleging that the Program itself
168 | (excluding combinations of the Program with other software or hardware)
169 | infringes such Recipient's patent(s), then such Recipient's rights granted
170 | under Section 2(b) shall terminate as of the date such litigation is filed.
171 | 
172 | All Recipient's rights under this Agreement shall terminate if it fails to
173 | comply with any of the material terms or conditions of this Agreement and does
174 | not cure such failure in a reasonable period of time after becoming aware of
175 | such noncompliance. If all Recipient's rights under this Agreement terminate,
176 | Recipient agrees to cease use and distribution of the Program as soon as
177 | reasonably practicable. However, Recipient's obligations under this Agreement
178 | and any licenses granted by Recipient relating to the Program shall continue
179 | and survive.
180 | 
181 | Everyone is permitted to copy and distribute copies of this Agreement, but in
182 | order to avoid inconsistency the Agreement is copyrighted and may only be
183 | modified in the following manner. The Agreement Steward reserves the right to
184 | publish new versions (including revisions) of this Agreement from time to
185 | time. No one other than the Agreement Steward has the right to modify this
186 | Agreement. The Eclipse Foundation is the initial Agreement Steward. The
187 | Eclipse Foundation may assign the responsibility to serve as the Agreement
188 | Steward to a suitable separate entity. Each new version of the Agreement will
189 | be given a distinguishing version number. The Program (including
190 | Contributions) may always be distributed subject to the version of the
191 | Agreement under which it was received. In addition, after a new version of the
192 | Agreement is published, Contributor may elect to distribute the Program
193 | (including its Contributions) under the new version. Except as expressly
194 | stated in Sections 2(a) and 2(b) above, Recipient receives no rights or
195 | licenses to the intellectual property of any Contributor under this Agreement,
196 | whether expressly, by implication, estoppel or otherwise. All rights in the
197 | Program not expressly granted under this Agreement are reserved.
198 | 
199 | This Agreement is governed by the laws of the State of New York and the
200 | intellectual property laws of the United States of America. No party to this
201 | Agreement will bring a legal action under this Agreement more than one year
202 | after the cause of action arose. Each party waives its rights to a jury trial in
203 | any resulting litigation.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # EntityGraph
  2 | 
  3 | EntityGraph is an in memory immutable data store designed for web applications, with likely other use cases. It is available for Clojure and ClojureScript.
  4 | 
  5 | Based on the triple store concept (entity, attribute, value), data is stored in the form of entities in the EAV index (entity, attribute, value). Select attributes are also indexed in the AVE index (attribute, value, entity). Only indexing select attributes in the AVE index gives the option to economize on memory.
  6 | 
  7 | The indexes are implemented as nested Clojure maps, accessible with Clojure's functions. Attributes can be of any type (including collections) and don't need to be declared in the schema unless special behavior is needed.
  8 | 
  9 | For data retrieval EntityGraph offers pull-style graph query support, which satisfies most use cases. There is no datalog, sql or any other query language support, but since the indexes are Clojure maps, any number of querying solutions could be implemented on top of the indexes. The user may also write custom functions to retrieve data from indexes without the need to parse queries. This is expected to be a rare use case.
 10 | 
 11 | ## Features
 12 | 
 13 | * Tempids, Keyword entity ids for nice programmatic names
 14 | * Cardinality one and many attributes
 15 |   * Sorting within entity for cardinality many attributes (using default or custom comparator)
 16 | * Reference attributes and component entities
 17 | * Unique identities, unique values, and lookup refs
 18 | * Nested entities in assertions
 19 | * AVE index sorted by custom comparator
 20 | * Pull queries with: wildcarding, nesting, joins, forward/backward attribute nav, recursive pulls
 21 | 
 22 | ## Non-Goals
 23 | 
 24 | **The following are not in scope of the project:**
 25 | * EntityGraph makes no effort to synchronize data between client and server and considers this an orthogonal concern.
 26 | * The word "database" is used throughout, but keep in mind that there is no storage layer.
 27 | * Reactive queries are not supported.
 28 | 
 29 | ## Dependency Information
 30 | 
 31 | [deps.edn](https://clojure.org/guides/deps_and_cli) dependency information:
 32 | 
 33 | `entity-graph/entity-graph {:mvn/version "0.1.0-SNAPSHOT"}`
 34 | 
 35 | [Leiningen](https://github.com/technomancy/leiningen) dependency information:
 36 | 
 37 | `[entity-graph/entity-graph "0.1.0-SNAPSHOT"]`
 38 | 
 39 | ## Status
 40 | 
 41 | * The feature set is complete, though additional features and enhancements are possible in the future. 
 42 | * In case of unexpected issues, every effort will be made to avoid breaking changes by moving to new names rather than by breaking existing names. 
 43 | * The code is reasonably well tested, but there has been minimal production use, so some issues may arise.
 44 | 
 45 | ## Documentation And Tutorial
 46 | 
 47 | > **Depending on your preference, you may either start with the tutorial or read the documentation first to learn about concepts and features.**
 48 | 
 49 | The **[Hands-on Tutorial](docs/TUTORIAL.md)** introduces the majority of the features in as succinct a manner as possible.
 50 | 
 51 | **[Documentation](docs/DOCUMENTATION.md)** describes the concepts and features of EntityGraph. **[Schema](docs/DOCUMENTATION.md#schema)** and **[Indexes](docs/DOCUMENTATION.md#indexes)** sections might be especially useful.
 52 | 
 53 | The thought process behind many of the technical design decisions is captured in **[TECHNOTES.md](docs/TECHNOTES.md)**.
 54 | 
 55 | ## Quick Example
 56 | 
 57 | This is just to give you a feel:
 58 | 
 59 | ```clojure
 60 | ;; Create a database
 61 | ;; This schema has just one attribute, which is to be indexed in the AVE index in a standard Clojure hashmap
 62 | (def db-empty
 63 |   (create-db {:person/last-name
 64 |               {:db/index {:db/map-type :db.map-type/hash-map}}}))
 65 | => #'user/db-empty
 66 | 
 67 | ;; Add and entity to the empty database and capture result of the transaction in tx-result
 68 | ;; NOTE: :person/first-name attribute didn't need to be defined in the schema
 69 | (def tx-result (transact db-empty [{:person/first-name "Jim" :person/last-name "Morrison"}]))
 70 | => #'user/tx-result
 71 | 
 72 | ;; Examine the EAV and AVE indexes after the transaction above
 73 | (select-keys (:db-after tx-result) [:db/eav :db/ave])
 74 | =>
 75 | {:db/eav {1 {:person/first-name "Jim", :person/last-name "Morrison", :db/id 1}},
 76 |  :db/ave {:person/last-name {"Morrison" #{1}}}}
 77 | 
 78 | ;; Retrieve some data with a pull query
 79 | ;; pull the attribute :person/last-name for the entity with id 1
 80 | (pull (:db-after tx-result) [:person/last-name] 1)
 81 | => {:person/last-name "Morrison"}
 82 | 
 83 | ;; Read some data with a pull query - pull all attributes for the entity with id 1
 84 | (pull (:db-after tx-result) '[*] 1)
 85 | => {:person/first-name "Jim", :person/last-name "Morrison", :db/id 1}
 86 | 
 87 | ```
 88 | 
 89 | ## Performance vs. DataScript and ASAMI
 90 | 
 91 | Performance was measured vs. DataScript and ASAMI. Those two databases are roughly in the same category. For the most part performance as compared with DataScript and ASAMI is favorable. The benchmarks were performed on my M2 MacbookPro. 
 92 | 
 93 | The exact code is here: [entity-graph.benchmakr-vs](test/entity_graph/benchmark_vs.cljc).
 94 | 
 95 | It must be acknowledged that these benchmarks, like all benchmarks, are imperfect. These are just a few quick and dirty benchmarks in an attempt to get a rough idea of how performance compares among the three databases.
 96 | 
 97 | The schema used for EntityGraph indexed all attributes in a sorted AVE index, so performance for asserting entities in the database can be compared fairly vs. the other databases.
 98 | 
 99 | The tests were run in both Clojure and ClojureScript and the results largely mirrored each other. EntityGraph was notably faster when asserting new entities into the database and when using pull to retrieve data from the database. 
100 | 
101 | When querying data without pull (with datalog for DataScript and ASAMI, with custom data retrieval functions for EntityGraph), EntityGraph outperformed ASAMI for a simple query, but ASAMI outperformed EntityGraph for a more complex query. Both ASAMI and EntityGraph outperformed DataScript by an astronomical margin. Either something is seriously wrong with the query benchmark or DataScript queries are particularly slow.
102 | 
103 | ### Clojure Results
104 | 
105 | * When asserting 20,000 entities into an empty database, **EntityGraph is twice as fast as DataScript and ASAMI**.
106 | * Pull performance for wildcard pattern is **~6x faster** compared to DataScript. For pulling a single attribute EntityGraph is **~2x faster** than DataScript. ASAMI doesn't support pull.
107 |   * Pulls were done from a database of 20,000 entities.
108 | * For a simple query EntityGraph is **~100x faster than ASAMI and ~5,000x(!) faster than DataScript**
109 |   * Queries were done from a database of 20,000 entities.
110 | * For a more complex query EntityGraph is **~3x slower than ASAMI**, but still **more than 10x faster than DataScript**.
111 |   * Queries were done from a database of 20,000 entities
112 | * For EntityGraph the queries were written as custom data retrieval functions since no declarative query language is supported by EntityGraph.
113 | 
114 | ### ClojureScript Results
115 | 
116 | * When asserting 20,000 entities into an empty database, **EntityGraph is twice as fast as DataScript and ASAM**. 
117 | * Pull performance for wildcard pattern is **~4x faster** compared to DataScript. For pulling a single attribute EntityGraph is **~2x faster**. ASAMI doesn't support pull.
118 |   * Pulls were done from a database of 20,000 entities.
119 | * For simple query **EntityGraph is ~100x faster than ASAMI and a few thousand times faster than DataScript**.
120 |   * Queries were done from a database of 20,000 entities
121 | * For a more complex query EntityGraph is ~3.5x slower than ASAMI, but still more than 10x faster than DataScript
122 |   * Queries were done from a database of 20,000 entities
123 | * For EntityGraph the queries were written as custom data retrieval functions since no declarative query language is supported by EntityGraph.
124 | 
125 | # License
126 | Copyright © 2021–2023 Georgiy Grigoryan
127 | 
128 | Licensed under Eclipse Public License (see LICENSE).
129 | 
130 | 


--------------------------------------------------------------------------------
/build/build/core.clj:
--------------------------------------------------------------------------------
 1 | (ns build.core
 2 |   (:require [clojure.tools.build.api :as b]
 3 |             [deps-deploy.deps-deploy :as dd]))
 4 | 
 5 | (def lib 'com.github.geodrome/entity-graph)
 6 | ;(def version (format "0.0.%s" (b/git-count-revs nil)))
 7 | ;; *** NOTE: Change version number manually! ***
 8 | (def version "0.1.0-SNAPSHOT")
 9 | (def class-dir "target/classes")
10 | (def basis (b/create-basis {:project "deps.edn"}))
11 | (def jar-file (format "target/%s-%s.jar" (name lib) version))
12 | 
13 | (defn clean [_]
14 |   (b/delete {:path "target"}))
15 | 
16 | (defn jar [_]
17 |   (b/write-pom {:class-dir class-dir
18 |                 :lib lib
19 |                 :version version
20 |                 :basis basis
21 |                 :src-dirs ["src"]})
22 |   (b/copy-dir {:src-dirs   ["src"]
23 |                :target-dir class-dir})
24 |   (b/jar {:class-dir class-dir
25 |           :jar-file jar-file}))
26 | 
27 | (defn install
28 |   "Install JAR to local maven repo."
29 |   [_]
30 |   (b/install {:basis      basis
31 |               :lib        lib
32 |               :version    version
33 |               :jar-file   jar-file
34 |               :class-dir  class-dir}))
35 | 
36 | (defn deploy
37 |   "Install JAR to Clojars."
38 |   [_]
39 |   (dd/deploy {:installer :remote
40 |               :artifact jar-file
41 |               :pom-file (b/pom-path {:lib lib :class-dir class-dir})}))
42 | 
43 | ;; - From the command line
44 | ;; -- To create new jar:
45 | ; $ clj -T:build clean
46 | ; $ clj -T:build jar
47 | ;; -- To install in local Maven:
48 | ; $ clj -T:build install
49 | ;; -- To deploy to Clojars:
50 | ;; Expects CLOJARS_USERNAME and CLOJARS_PASSWORD env variables
51 | ; $ clj -T:build deploy


--------------------------------------------------------------------------------
/deps.edn:
--------------------------------------------------------------------------------
 1 | {:deps {org.clojure/data.avl   {:mvn/version "0.1.0"}}
 2 |  :paths ["src"]
 3 |  :aliases
 4 |  {:test
 5 |   {:extra-deps  {;; For benchmarking, match versions with shadow-cljs.end
 6 |                  datascript/datascript       {:mvn/version "1.4.2"}
 7 |                  org.clojars.quoll/asami     {:mvn/version "2.3.3"}}
 8 |    :extra-paths ["test"]}
 9 |   :drafts
10 |   {:extra-deps  {org.clojure/test.check      {:mvn/version "1.0.0"}
11 |                  org.clojure/spec.alpha {:mvn/version "0.2.187"}}
12 |    :extra-paths ["drafts"]}
13 |    :cljs
14 |    {:extra-deps {thheller/shadow-cljs {:mvn/version "2.25.8"}}}
15 |   :build
16 |   {:extra-deps        {io.github.clojure/tools.build {:mvn/version "0.9.6"}
17 |                        slipset/deps-deploy {:mvn/version "RELEASE"}}
18 |    :extra-paths ["build"]
19 |    :ns-default  build.core}}}


--------------------------------------------------------------------------------
/docs/DOCUMENTATION.md:
--------------------------------------------------------------------------------
  1 | # EntityGraph Documentation
  2 | 
  3 | # Table of Contents
  4 | * [Overview](#overview) - Basic concepts and capabilities.
  5 | * [Transactions](#transactions) - How to use `transact` to add/remove/update entities.
  6 | * [Schema](#schema) - Explains various attribute properties that can be defined in the schema. 
  7 | * [Indexes](#indexes) - How indexes are constructed and how entities are represented in indexes.
  8 | * [Pull](#pull) - Declarative data retrieval.
  9 | * [Read Directly from Index](#read-directly-from-index) - How to read data directly from the indexes. 
 10 | 
 11 | ## Basic Overview
 12 | 
 13 | To use EntityGraph:
 14 | * Create an initial database with an optional schema using the function `create-db`.
 15 |   * The schema cannot be updated, once created.
 16 | * Add, remove, update entities using the function `transact`.
 17 |   * Each call to `transact` produces a new immutable database value.
 18 |   * `transact` enforces certain database constraints and takes care of indexing.
 19 | * Read data in a declarative way, use the function `pull` to make hierarchical (and possibly nested) selections of information about entities.
 20 |   * Alternatively, read from the indexes directly with custom data retrieval functions.
 21 | 
 22 | ### Quick Example
 23 | 
 24 | > **To see these concepts in action check out the [Hands-on Tutorial](TUTORIAL.md).**
 25 | 
 26 | This is just to give you taste:
 27 | 
 28 | ```clojure
 29 | ;; Create a database
 30 | ;; This schema has just one attribute, which is to be indexed in the AVE index
 31 | (def db-empty
 32 |   (create-db {:person/last-name
 33 |               {:db/index {:db/map-type :db.map-type/hash-map}}}))
 34 | => #'user/db-empty
 35 | 
 36 | ;; Add and entity to the empty database and capture result of the transaction in tx-result
 37 | ;; NOTE: :person/first-name attribute didn't need to be defined in the schema
 38 | (def tx-result (transact db-empty [{:person/first-name "Jim" :person/last-name "Morrison"}]))
 39 | => #'user/tx-result
 40 | 
 41 | ;; Examine the EAV and AVE indexes after the transaction above
 42 | (select-keys (:db-after tx-result) [:db/eav :db/ave])
 43 | =>
 44 | {:db/eav {1 {:person/first-name "Jim", :person/last-name "Morrison", :db/id 1}},
 45 |  :db/ave {:person/last-name {"Morrison" #{1}}}}
 46 | 
 47 | ;; Retrieve some data with a pull query
 48 | ;; pull the attribute :person/last-name for the entity with id 1
 49 | (pull (:db-after tx-result) [:person/last-name] 1)
 50 | => {:person/last-name "Morrison"}
 51 | 
 52 | ;; Read some data with a pull query - pull all attributes for the entity with id 1
 53 | (pull (:db-after tx-result) '[*] 1)
 54 | => {:person/first-name "Jim", :person/last-name "Morrison", :db/id 1}
 55 | ```
 56 | 
 57 | ## Entities
 58 | 
 59 | The database is organized around entities. An entity is a map of attribute/value pairs. Each entity contains a special `:db/id` key signifying its entity id or the internal key in the database. Entities are indexed by `:db/id` in the EAV index and by select attributes in the AVE index.
 60 | 
 61 | Example entity:
 62 | 
 63 | ```clojure
 64 | {:db/id 1 
 65 |  :person/name "John"
 66 |  :person/email "john@johnny.net"}
 67 | ``` 
 68 | 
 69 | ## Entity IDs
 70 | 
 71 | Entity ids are usually auto-assigned, but they can also be user-specified keywords. Keyword entity ids provide a convenient programmatic name for an entity.
 72 | 
 73 | Here's an entity with a keyword entity id `:ui/chat-window` and one attribute `:chat-window/text` with the string value `"Type here..."`.
 74 | 
 75 | ```clojure
 76 | {:db/id :ui/chat-window 
 77 |  :chat-window/text "Type here..."}
 78 | ```
 79 | 
 80 | NOTE: To be encoded in `value` attribute for HTML inputs, keyword ids must be converted to strings.
 81 | 
 82 | ## Attributes
 83 | 
 84 | Entities consist of attribute/value pairs. Attributes are analogous to columns in SQL databases, but entities are not required to have predefined sets of attributes. Any and all attributes may be added or retracted to entities freely.
 85 | 
 86 | Attributes only need to be defined in the schema when specific attribute properties need to be declared. Otherwise, attribute names may be used freely without declaration in the schema. See the [schema section](#schema) for more details.
 87 | 
 88 | ###  Attribute Name Constraints
 89 | 
 90 | * Attribute names must be keywords. Though not enforced, things will break if you don't use keywords.
 91 | * Attribute names must not begin with an underscore. This is not enforced, but it will break reverse navigation in `pull`.
 92 | * Attribute name keywords may be namespaced. 
 93 |   * The `:<namespace>/<name>` lexical form is preferred to avoid naming collisions.
 94 | * The `:db` namespace is reserved for the database by convention.
 95 | 
 96 | ### Attribute Cardinality
 97 | 
 98 | By default, attributes contain just one value. These are known as cardinality one attributes. But cardinality many attributes are also supported. These attributes may contain multiple values. Cardinality many attributes are represented as sets of values:
 99 | 
100 | ```clojure
101 | {:db/id 1 
102 |  :person/name "John" 
103 |  :person/nicknames #{"Johnny" "Versaci"}}
104 | ```
105 | 
106 | The attribute `:person/nicknames` above is cardinality many, and it is represented as a set of values. Cardinality many attributes must be defined as such in the schema when the database is created.
107 | 
108 | Several other attribute properties may be defined in the schema. See the [schema section](#schema) for more details.
109 | 
110 | ## Values
111 | 
112 | * Attributes can hold values of any type, including collections.
113 | * Attributes are not typed and no data type declarations for attributes are required in the schema. 
114 |   * Any and all data types are allowed, including collections.
115 |   * Different entities can hold values of different type under the same attribute name.
116 |   * Cardinality many attributes may contain heterogeneous value types for the same entity.
117 |   * The one **exception is the reference type**, which must be declared in the schema. 
118 |     * Reference attributes point to other entities in the database, thus creating a graph.
119 |     * Except for reference values, no data type declarations are required in the schema.
120 | * Nil values are illegal. This is enforced.
121 |   * To indicate the absence of value for a given attribute, simply avoid adding it to the entity. 
122 |   * To remove an existing value for a given attribute, simply remove the value; don't attempt to set it to `nil`.
123 | 
124 | ## Time/History
125 | 
126 | EntityGraph does not keep a history (unlike Datomic, for example). There is no transaction log and no timestamps are recorded. However, since each successive db value, produced by `transact`, is an immutable Clojure map, any number of past db values can be preserved by holding references to those values. A `:db/tx-count` value is kept and incremented after each transaction.
127 | 
128 | ## Storage
129 | 
130 | Storage is not supported. If you're considering implementing it, see the section [Writing to Storage](TECHNOTES.md#writing-to-storage) in Technical Notes for some considerations.
131 | 
132 | # Transactions
133 | 
134 | > **To see these concepts in action check out the [Hands-on Tutorial](TUTORIAL.md).**
135 | 
136 | All database operations are performed with the `transact` function, which updates the database and enforces database constraints.
137 | 
138 | There are three operations:
139 | * `:db/add` - assertions add data to the database.
140 | * `:db/retract` - retractions remove data from the database.
141 | * `:db/retractEntity` - retracts an entity, all of its component entities, and all references to the entity and component entities.
142 | 
143 | All operations can be expressed in list form. Assertions can also be expressed in map form. Map form assertions are more convenient and particularly performant when adding new entities to the database.
144 | 
145 | As a further convenience:
146 | * The attribute keys in the map may be either keywords or strings.
147 | * Nested maps are supported. 
148 |   * Map values for reference attributes are interpreted as nested entities. 
149 | 
150 | To assert new entities, use tempids or maps with no `:db/id` key. To update an existing entity use the existing entity id.
151 | 
152 | ## Temporary ids
153 | 
154 | New entities may be identified by a temporary id. Tempids in transaction data are represented by a string in the entity id position. When the transaction is processed, temporary ids are resolved to actual entity ids.
155 | 
156 | If a temporary id is used more than once, all instances of the tempid are mapped to the same entity id. There is an exception for `:db.unique/identity` attributes, which support upsert behavior:
157 | * The tempid of a `:db.unique/identity` attribute will map to an existing entity if one exists with the same attribute and value (update)
158 | * Or it will make a new entity if one does not exist (insert)
159 | * All further adds in the transaction that apply to that same temporary id are applied to the "upserted" entity
160 | 
161 | # Schema
162 | 
163 | > **To see these concepts in action check out the [Hands-on Tutorial](TUTORIAL.md).**  
164 | 
165 | An example schema may be seen in the namespace [entity-graph.core-test](../test/entity_graph/core_test.cljc).
166 | 
167 | Defining attributes in the schema is only required for certain attribute behaviors. Though not required, you may wish to list the full schema anyway.
168 | 
169 | The following properties may be specified for attributes in the schema:
170 | 
171 | ## :db/doc
172 | 
173 | An optional documentation string can be specified in the definition of each attribute. It might be used to document the data type, data shape, or anything else about the attribute. The docstring is meant for the programmer reading the code and isn't programmatically leveraged by the database in any way.
174 | 
175 | ## :db/valueType
176 | 
177 | Attribute values can be of any type, including collections. No data type declarations are required in the schema, except for reference values.
178 | 
179 | Adding `{:db/valueType :db.type/ref}` to the schema signifies a reference attribute that refers to other entities in the database by entity id.
180 | 
181 | ## :db/isComponent
182 | 
183 | Reference attributes can optionally specify sub-component entities with `{:db/isComponent true}`.
184 | 
185 | Component entities have the following properties:
186 | * When the parent entity is retracted with `:db/retractEntity`, component entities are also retracted.
187 | * If a component attribute is pulled with `pull`, a map containing all the attributes of the referenced entity will be returned.
188 | * If multiple entities attempt to claim an entity as their component, the transaction will fail with `:db.error/component-conflict`.
189 | * If an entity attempts to hold another entity as a component under different attributes, the transaction will fail with `:db.error/component-conflict`.
190 | * By default reference attributes are non-component.
191 | 
192 | ### Pseudo Entities
193 | 
194 | An alternative to creating component entities is to **not use a reference attribute** and store map values under the attribute as component "pseudo-entities". This would still offer the two main behaviors of component entities: 
195 | * Retracting the parent with `:db/retractEntity` would still retract the component pseudo-entities.
196 | * Pulling the attribute, would return the map containing all the attributes of the component pseudo-entity.
197 | 
198 | However, pseudo-entities have no independent existence in the database. They only exist as a map value under some attribute of a parent entity. Thus, the keys of pseudo-entities would not be interpreted as database attributes, resulting in the following trade-offs:
199 | * It is not possible to index the values under individual keys (pseudo attributes) of pseudo-entities in the AVE index. 
200 |   * The entire map value representing the component pseudo-entity must be indexed.
201 |   * However, a sorted AVE index can be used with a comparator that sorts the pseudo-entities by values of individual keys or combinations of keys.
202 | * `pull` would not be able to join/navigate via keys of pseudo-entities.
203 | * The concept of one parent per component becomes meaningless.
204 | 
205 | ## :db/cardinality
206 | 
207 | By default, attributes contain just one value. These are known as cardinality one attributes. Cardinality many attributes are also supported.
208 | 
209 | Adding `{:db/cardinality :db.cardinality/many}` to the schema signifies a cardinality many attribute. Cardinality many attributes may contain more than one value and are represented as sets of values.
210 | 
211 | If no `:db/cardinality` is specified, `:db.cardinality/one` is the default.
212 | 
213 | ## :db/unique
214 | 
215 | A uniqueness constraint can be specified under the `:db/unique` key.
216 | * Only `:db.cardinality/one` attributes can have a uniqueness constraint.
217 | * Unique attributes are always indexed in AVE index to support fast uniqueness checks.
218 | * An entity may contain multiple unique attributes, but anomalies may arise.
219 | * By default attributes are non-unique.
220 | 
221 | ### :db.unique/identity and :db.unique/value
222 | 
223 | * Adding `{:db/unique :db.unique/identity}` to the schema asserts a database-wide unique identifier for an entity with upsert support. 
224 |   * A unique identity attribute can be used for a globally unique identifier (e.g. `:global-id`). This identifier might link an entity across different databases. 
225 | * Adding `{:db/unique :db.unique/value}` to the schema represents an attribute-wide value that can be asserted only once, with no upsert support.
226 | 
227 | > To see the `:db/unique` attributes in action check out the [Hands-on Tutorial](TUTORIAL.md).
228 | 
229 | ## :db/sort
230 | 
231 | By default, cardinality many attributes are stored as unsorted sets in the EAV index. Cardinality many **non-reference** attributes can optionally specify sorting parameters under the `:db/sort` key. Under some circumstances, this is a convenient solution that amortizes sorting costs and eliminates the need to repeatedly sort the same data.
232 | 
233 | > **NOTE:** When using sorted sets, all values must be intercomparable among themselves, else adding to a sorted set will fail.
234 | 
235 | ### :db/set-type
236 | 
237 | The `:db/set-type` key specifies the type of sorted set. Two types of sorted sets may be used: 
238 | * Clojure's sorted sets
239 | * AVL sets from `clojure.data.avl` 
240 |   * AVL sets support the full `clojure.core` sorted collections API, but also offer logarithmic time operations: rank queries, "nearest key" lookups, splits by index or key, subsets. Learn more here: https://github.com/clojure/data.avl
241 | 
242 | To use Clojure's sorted sets:
243 | 
244 | ```clojure
245 | {:db/sort {:db/set-type :db.set-type/sorted-set}} 
246 | ``` 
247 | 
248 | To use sorted AVL sets from `clojure.data.avl`:
249 | 
250 | ```clojure
251 | {:db/sort {:db/set-type :db.set-type/avl-set}}
252 | ``` 
253 | 
254 | ### :db/comparator
255 | 
256 | An optional `:db/comparator` key specifies a comparator function. Custom comparators can be used with both Clojure's sorted sets and AVL sets.
257 | 
258 | The following sorted sets will compare the values with the function `>=` (greater or equal to):
259 | 
260 | ```clojure
261 | ;; Clojure's sorted set with comparator
262 | {:db/sort {:db.set-type   :db.set-type/sorted-set
263 |            :db/comparator >=}}
264 | 
265 | ;; AVL set with comparator
266 | {:db/sort {:db.set-type   :db.set-type/avl-set
267 |            :db/comparator >=}}
268 | ```
269 | 
270 | If no `:db/comparator` is specified, the default comparator `compare` will be used. Learn more about comparators here: https://clojure.org/guides/comparators
271 | 
272 | If sorting with multiple comparators is desired, different attributes can be used. For example, `:salary-asc` and :`salary-desc` to sort salaries in ascending and descending order. Each of these attributes would be independent of the other, so the user would need to take care to keep the two attribute values consistent.
273 | 
274 | ### Sorted Reference Values
275 | 
276 | Sorting reference values doesn't make much sense since it entails sorting internal database keys. To sort the entities pointed to by a reference attribute, pull the entities and then sort them. 
277 | 
278 | Alternatively, don't use a reference attribute. Instead, store the entities as maps effectively creating component "pseudo-entities". This makes it possible to use sorted sets for these the pseudo-entities, but see [Pseudo Entities](#pseudo-entities) for an explanation of tradeoffs. 
279 | 
280 | ## :db/index
281 | 
282 | Unique and reference attributes (`:db/unique` and `:db.type/ref`) are always indexed in the AVE index. For all other attributes it must be specified in the schema.
283 | 
284 | The `:db/index` key specifies that an attribute should be indexed in the AVE index.
285 | 
286 | Three types of maps may be used for indexing: 
287 | * Clojure's (unsorted) map
288 | * Clojure's sorted map
289 | * Sorted map from `clojure.data.avl`. 
290 |   * AVL maps support the full `clojure.core` sorted collections API, but also support transients and offer logarithmic time operations: rank queries, "nearest key" lookups, splits by index or key, subsets. Learn more here: https://github.com/clojure/data.avl
291 | 
292 | While unsorted maps are good for fast lookups of specific single values, sorted maps enable fast lookups for range queries.
293 | 
294 | ### :db/map-type
295 | 
296 | To use the standard (unsorted) Clojure map:
297 | 
298 | ```clojure
299 | {:db/index {:db/map-type :db.map-type/hash-map}}
300 | ```
301 | 
302 | To use a sorted Clojure map:
303 | 
304 | ```clojure
305 | {:db/index {:db/map-type :db.map-type/sorted-map}}
306 | ```
307 | 
308 | To use a sorted AVL map:
309 | 
310 | ```clojure
311 | {:db/index {:db/map-type :db.map-type/avl-map}}
312 | ```
313 | 
314 | ### :db/comparator
315 | 
316 | Both types of sorted map also support custom comparators.
317 | 
318 | To index in a sorted map and compare with `>` (greater than):
319 | 
320 | ```clojure
321 | {:db/index {:db/map-type :db.map-type/sorted-map 
322 |             :db/comparator >}}
323 | ```
324 | 
325 | To index in a sorted AVL map and compare with `>` (greater than):
326 | 
327 | ```clojure
328 | {:db/index {:db/map-type :db.map-type/avl-map 
329 |             :db/comparator >}}
330 | ```
331 | 
332 | If no `:db/comparator` is specified, the default comparator `compare` will be used. Learn more about comparators here: https://clojure.org/guides/comparators
333 | 
334 | # Indexes
335 | 
336 | > **To see these concepts in action check out the [Hands-on Tutorial](TUTORIAL.md).**
337 | 
338 | EntityGraph contains two indexes: 
339 | * entity-attribute-value (EAV)
340 | * attribute-value-entity (AVE)
341 | 
342 | These two indexes are sufficient to support all data retrieval operations. Each transaction updates the indexes and produces a new immutable database value.
343 | 
344 | ## EAV Index
345 | 
346 | The EAV index contains all entities in a nested map. Entries have two distinct forms: 
347 | * Form `{e {a v}}`, with only one value allowed in the `v` position
348 | * Form `{e {a #{v1 v2 ...}}}`, with many values allowed in the `v` position
349 | 
350 | ### EAV Form `{e {a v}}`
351 | 
352 | For `:db.cardinality/one` attributes EAV entries are in the form `{e {a v}}`, with only one value allowed in the `v` position.
353 | 
354 | Here's an example EAV index that contains just one entity:
355 | 
356 | ```clojure
357 | {1 {:db/id 1
358 |     :person/name "Tina Turner"
359 |     :person/ssn "111-22-3344"}}
360 | ```
361 | 
362 | The keys of the outer map are entity ids. The values of the outer map are the entities stored as maps with keys representing database attributes, and values representing database values. Each entity contains the special `:db/id` key representing the entity id.
363 | 
364 | ### EAV Form `{e {a #{v1 v2 ...}}}`
365 | 
366 | For `:db.cardinality/many` attributes EAV entries are in the form `{e {a #{v1 v2 ...}}}`, where `v1`, `v2`, etc. are distinct values enclosed in a set.
367 | 
368 | Here's an example EAV index that contains just one entity:
369 | 
370 | ```clojure
371 | {1 {:db/id 1
372 |     :person/name "Tina Turner"
373 |     :person/ssn "111-22-3344"
374 |     :person/aliases #{"Queen of Rock" "The Queen of Rock'n'Roll"}}}
375 | ```
376 | 
377 | The attribute `:person/aliases` is cardinality many and is represented by a set of values `#{"Queen of Rock" "The Queen of Rock'n'Roll"}`. Thus, the attribute `:person/aliases` contains the values `"Queen of Rock"` and `"The Queen of Rock'n'Roll"`.
378 | 
379 | ### Entity Retraction
380 | 
381 | * If a transaction results in an entity with no remaining attributes, the entity is completely removed from the EAV index.
382 | * If an entity is retracted with `:db/retractEntity` then the entity, all of its component entities, and all references to the entity and component entities will be retracted as well.
383 | 
384 | ## AVE Index
385 | 
386 | > **To see these concepts in action check out the [Hands-on Tutorial](TUTORIAL.md).**
387 | 
388 | The AVE index significantly improves the speed of data retrieval operations involving the indexed attribute at the expense of additional memory space. The AVE index is non-covering, meaning that only entity id is stored in the entity position, not the full entity.
389 | 
390 | While every entity is contained in the EAV index in full, the AVE index only contains select attributes:
391 | 
392 | * All reference attributes, to support quick lookups of all entities pointing to a target entity. This speeds up reverse navigation in pull queries and is also used for `:db/retractEntity` operations.
393 | * All unique attributes, declared with `:db/unique` property for fast uniqueness checks. 
394 | * Attributes where the `:db/index` key specifies that it should be indexed in the AVE index.
395 | 
396 | Entries in the AVE index have two distinct forms: 
397 | * `{a {v e}}` with only one entity id allowed in the `e` position
398 | * `{a {v #{e1 e2 ...}}}` with many entity ids allowed in the `e` position
399 | 
400 | ### AVE Form `{a {v e}}`
401 | 
402 | Unique attributes and component attributes are stored in `{a {v e}}` form, where only one entity id in the `e` position is allowed. This is because only one entity id is logically possible for unique and component attributes.
403 | 
404 | The following AVE index contains the `:db.unique/identity` attribute `:person/ssn` with two values: 
405 | * `"123-45-6789"` belonging to the person with entity id `1`,
406 | * `"987-65-4321"` belonging to the person with entity id `2`:
407 | 
408 | ```clojure
409 | {:person/ssn {"123-45-6789" 1, "987-65-4321" 2}}
410 | ```
411 | 
412 | The keys of the outer map are the indexed attributes (just `:person/ssn` in the example above). The values of the outer map are maps of values mapped to entity ids.
413 | 
414 | For `{a {v e}}` form attributes:
415 | * If a transaction results in a removal of an entity id in the `e` position, then the entire `[v e]` entry is removed from the AVE index.
416 | * If a transaction results in more than one entity id in the `e` position for a unique or component attribute, it fails with an error.
417 | 
418 | ### AVE Form `{a {v #{e1 e2 ...}}}`
419 | 
420 | All non-unique and non-component indexed attributes are stored in `{a {v #{e1 e2 ...}}}` form, where multiple entity ids in the `e` position are possible. The set `#{e1 e2 ...}` contains those entity ids.
421 | 
422 | The following AVE index contains the non-unique and non-component indexed attribute `:person/last-name` with two values: `"Brown"` belonging to entity id `1`, and `"Smith"` belonging to entity ids `2` and `3`:
423 | 
424 | ```clojure
425 | {:person/last-name {"Brown" #{1}, "Smith" #{2,3}}}
426 | ```
427 | 
428 | For `{a {v #{e1 e2 ...}}}` form attributes:
429 | * If a transaction results in an empty set in the `e` position, then the entire `{v #{e1 e2 ...}}` entry is removed from the AVE index.
430 | * More than one entity id in the `e` position is supported as it logically makes sense.
431 | 
432 | ## Collections in Indexes
433 | 
434 | > **NOTE:** Your application logic must correctly interpret cardinality one and cardinality many values retrieved from EntityGraph indexes. The following section clarifies this point, especially as it pretains to collection values.
435 | 
436 | ### Cardinality One Attribute Values in EAV and AVE Indexes
437 | 
438 | Collection values are valid database values, which may cause some confusion when examining the indexes. Since collection values are valid, a set may appear in the `v` position in the EAV index, representing a single value that is a set.
439 | 
440 | For example, let's assume `:person/aliases` is a `:db.cardinality/one` attribute, and we have the following in the EAV index:
441 | 
442 | ```clojure
443 | {1 {:db/id 1
444 |     :person/name "Tina Turner"
445 |     :person/aliases #{"Queen of Rock" "The Queen of Rock'n'Roll"}}}
446 | ```
447 | 
448 | Then `#{"Queen of Rock" "The Queen of Rock'n'Roll"}` represents a single value. Again, this because `:person/aliases` is a cardinality one attribute. 
449 | 
450 | This means `#{"Queen of Rock" "The Queen of Rock'n'Roll"}` will be indexed as a single value in the AVE index:
451 | 
452 | ```clojure
453 | {:person/aliases {#{"Queen of Rock" "The Queen of Rock'n'Roll"} 1}}
454 | ```
455 | 
456 | ### Cardinality Many Attribute Values in EAV and AVE Indexes
457 | 
458 | In contrast with the cardinality one example above, here's how this would change if `:person/aliases` were a `:db.cardinality/many` attribute. The EAV index would look identical, but the interpretation would be different:
459 | 
460 | ```clojure
461 | {1 {:db/id 1
462 |     :person/name "Tina Turner"
463 |     :person/aliases #{"Queen of Rock" "The Queen of Rock'n'Roll"}}}
464 | ```
465 | 
466 | The set `#{"Queen of Rock" "The Queen of Rock'n'Roll"}` in the EAV index now represents two distinct values: `"Queen of Rock"` and `"The Queen of Rock'n'Roll"`. 
467 | 
468 | Consequently, `"Queen of Rock"` and `"The Queen of Rock'n'Roll"` will be indexed as separate values in the AVE index:
469 | 
470 | ```clojure
471 | {:person/aliases {"Queen of Rock" 1, "The Queen of Rock'n'Roll" 1}}
472 | ```
473 | 
474 | ### Collection Values for Cardinality Many Attributes
475 | 
476 | Now let's examine what happens when we have collection values for a cardinality many attribute. 
477 | 
478 | Since collection values are valid it is possible to see something like this:
479 | 
480 | ```clojure
481 | {1 {:db/id 1
482 |     :person/name "Tina Turner"
483 |     :person/favorite-food-combos #{#{:burger :fries} #{:pasta :shrimp}}}}
484 | ```
485 | 
486 | Assuming the attribute `:person/favorite-food-combos` is `:db.cardinality/many`, what is the correct interpretation? 
487 | 
488 | The set `#{#{:burger :fries} #{:pasta :shrimp}}` must be treated as two distinct values `#{:burger :fries}` and `#{:pasta :shrimp}`, and not as a single value `#{#{:burger :fries} #{:pasta :shrimp}}`. 
489 | 
490 | Consequently, `#{:burger :fries}` and `#{:pasta :shrimp}` will be indexed as separate values in the AVE index:
491 | 
492 | ```clojure
493 | {:person/aliases {#{:burger :fries} 1, #{:pasta :shrimp} 1}}
494 | ```
495 | 
496 | # Pull
497 | 
498 | Data retrieval is primarily accomplished with the `pull` function. Pull is a declarative way to make hierarchical (and possibly nested) selections of information about entities.
499 | 
500 | > **Pull examples appear throughout the [Hands-on Tutorial](TUTORIAL.md)**.
501 | 
502 | ## Pull Features
503 | 
504 | EntityGraph's `pull` is a subset of Datomic's pull.
505 | 
506 | The following features are **supported**:
507 | * Wildcarding
508 | * Nesting
509 | * Combining wildcard and map specifications
510 | * Joins
511 | * Forward and backward attribute navigation
512 | * Recursive pulls
513 |   * Recursive select is safe in the presence of cycles. 
514 |     * When a recursive subselect encounters an entity that it has already seen only the `:db/id` of the entity is returned.
515 |   * Unlimited depth on recursion not specifically supported, but a large recursion limit can be specified.
516 | 
517 | The following features are **NOT supported**:
518 | * Naming control
519 | * Defaults
520 | * Transformations
521 | * Limits on the returned results
522 | 
523 | ## Pull Results
524 | 
525 | ### Empty Results
526 | 
527 | * Pull returns `{}` when nothing in `pattern` matches.
528 |   * Except for wildcard pattern or if `:db/id` is requested in pattern, in which cases a map of form `{:db/id id}` is returned.
529 | 
530 | ### Reference Attribute Values
531 | 
532 | * For reference attributes a map of form `{:db/id id}` will be returned for each value.
533 | * If a reference attribute is a component, a map containing all the attributes of the referenced entity will be returned.
534 | 
535 | ### Multiple Results
536 | 
537 | Multiple results are returned in the following cases:
538 | * For all forward cardinality-many references.
539 | * Reverse references for non-unique/non-component attributes.
540 | 
541 | ### Finding All Reverse References
542 | 
543 | In a `pull` pattern reverse navigation is possible by using an underscore in the local name segment of the attribute keyword (e.g. `:person/_friend`). However, this requires the user to know which reference attributes might be pointing to the target entity. 
544 | 
545 | We may wish to identify all references to a given entity, from any and all attributes. The function `find-reverse-refs` takes a database value and a target entity id, and returns a set of `[attribute entity-id]` vectors representing reverse references to the target entity id.
546 | 
547 | # Read Directly from Index
548 | 
549 | When `pull` is not sufficient, the indexes can be accessed directly with custom data retrieval functions. This approach is meant to replace Datalog/SQL type queries. Declarative queries are sacrificed for (hopefully) performance. Custom data retrieval functions are not expected to be used frequently.
550 | 
551 | The user must understand the data model to write custom data retrieval functions successfully. It's particularly important to understand the semantics of different attribute properties that can be declared in the schema, how the EAV and AVE indexes are constructed, and how data is represented in the indexes.
552 | 
553 | >**All of this is described above and further elucidated in the [Hands-on Tutorial](TUTORIAL.md).**
554 | 
555 | ## Helper Function `check-attr`
556 | 
557 | When writing custom data retrieval functions it becomes important to know the properties of attributes being retrieved. The function `check-attr` is provided to assist with this. It takes a database value, an attribute name, an attribute property, and returns a value for that attribute name/property combination.
558 | 
559 | Here are some sample calls of `check-attr`:
560 | 
561 | ```clojure
562 | (check-attr db :person/name :db/cardinality)
563 | => :db.cardinality/one
564 | 
565 | (check-attr db :person/aliases :db/cardinality)
566 | => :db.cardinality/many
567 | 
568 | (check-attr db :person/ssn :db/unique)
569 | => :db.unique/identity
570 | ```
571 | 
572 | ### Return Values of `check-attr` for Different Properties
573 | 
574 | The following are all the possible return values for different attribute properties:
575 | 
576 | | Property            |                                                                                                      Possible Return Values |
577 | |:--------------------|----------------------------------------------------------------------------------------------------------------------------:|
578 | | `:db/isRef`         |                                                                                                             `true`, `false` |
579 | | `:db/isComponent`   |                                                                                                             `true`, `false` |
580 | | `:db/cardinality`   |                                                                               `:db.cardinality/one`, `:db.cardinality/many` |
581 | | `:db/unique`        |                                                          `:db.unique/identity`, `:db.unique/value`, `:db.unique/false` |
582 | | `:db/sort`          |                                                             `:db.sort/sorted-set` `:db.sort/avl-set`, `:db.sort/false` |
583 | | `:db/index`         |                             `:db.index/hash-map`, `:db.index/sorted-map`, `:db.index/avl-map`, `:db.index/false` |
584 | | `:db/ave-form`      |                                               `:db.ave-form/single-e`, `:db.ave-form/eset`, `:db.ave-form/false` |
585 | 
586 | Note especially the property `:db/ave-form`. It tells us how the attribute is represented in the AVE index:
587 | * `:db.ave-form/single-e` refers to form **[AVE Form {a {v e}}](#ave-form-a-v-e)**
588 | * `:db.ave-form/eset` refers to form **[AVE Form {a {v #{e1 e2 ...}}}](#ave-form-a-v-e1-e2-)**
589 | 
590 | The rest of the properties are described in the **[Schema Section](#schema)**.
591 | 
592 | ## Schema Predicates
593 | 
594 | The following schema predicates are an alternative to `check-attr`. 
595 | 
596 | Each predicate takes an encoded schema and attribute. An encoded schema may be obtained from a database value like this `(:db/schema db)`.
597 | 
598 | The following schema predicates are available:
599 | 
600 | * `ref-type?`
601 | * `component?`
602 | * `cardinality-many?`
603 | * `unique-identity?`
604 | * `unique?`
605 | * `ave-form-single-e?`
606 | * `ave-form-eset?`
607 | 
608 | ## ID Predicates
609 | 
610 | The following id predicates can help identify the type of id that a value in the id position represents:
611 | * `entity-id?`
612 | * `tempid?`
613 | * `lookup-ref?`


--------------------------------------------------------------------------------
/docs/TECHNOTES.md:
--------------------------------------------------------------------------------
  1 | # Technical Design Notes
  2 | 
  3 | These are a collection of notes about the reasoning behind some of the technical design decisions. Generally, choices were biased to be more restrictive rather than permissive, as relaxing the restrictions is less likely to lead to breaking changes than tightening restrictions.
  4 | 
  5 | ## Schema
  6 | 
  7 | * Map schema definition, with keys as attribute names, was chosen rather than copying datomic's vector of maps.
  8 |   * Map schema eliminates duplicate attribute names.
  9 | * Initial instinct was to match Datomic style to support schema reuse, but this idea was dismissed as the systems are too different.
 10 | 
 11 | ## Failed Optimization Attempts
 12 | 
 13 | The following optimizations seemed promising, but failed to deliver.
 14 | 
 15 | * Use transients/reducers in `prepare-tx-data` was attempted (e.g. combine `replace-tempids-list` and `replace-ref-ids-list`), but didn't yield a significant performance benefit.
 16 | * Updating EAV and AVE indexes in parallel might offer a real perfromance gain, but:
 17 |   * Javascript environment is single-threaded 
 18 |   * Introduces complexity of webworkers
 19 | 
 20 | ## Indexing
 21 | 
 22 | Datomic was the inspiration for EntityGraph indexes, but with considerable modifications. 
 23 | 
 24 | Datomic has the following indexes:
 25 | * EAV, AEV for all datoms.
 26 | * AVE for unique and index attrs.
 27 | * VAE for reference attributes
 28 | 
 29 | In EntityGraph, the main distinctions are that indexes only exist in-memory and the index data structure is a nested map.
 30 | 
 31 | EntityGraph indexes:
 32 | * EAV index for all attributes and an AVE index for reference and unique attributes, plus any other attributes that are specified by the schema.
 33 |   * Since we wish to economize on memory use, we don't automatically index everything in the AVE index.
 34 | * The original design had a VAE index, but AVE index proved sufficient
 35 |   * Because the index data structure is a nested map, don't have to scan Vs; just go to A and then to desired V.
 36 | * AEV index was not considered because EAV/AVE indexes are sufficient:
 37 |   * AEV index in datomic helps scans in E order for fixed A. Map indexes don't require scans of EAV index.
 38 |   * To get all entity ids that contain `:attribute`, we call `(vals (get (:db/ave db) :attribute))`.
 39 |   * To get all values for `:attribute`, we call `(keys (get (:db/ave db) :attribute))`.
 40 | 
 41 | ## Nested Map Restrictions
 42 | 
 43 | In Datomic, reference to the nested map must be a component attribute or the nested map must include a unique attribute. Of note, the unique attribute can subsequently be retracted.
 44 | 
 45 | Datomic documentation offers this justification: "This constraint prevents the accidental creation of easily-orphaned entities that have no identity or relation to other entities."
 46 | https://docs.datomic.com/cloud/transactions/transaction-processing.html#nested-maps-in-transactions
 47 | 
 48 | This constraint was copied, despite occasionally causing some inconvenience, as it can be relaxed in the future without breaking any code.
 49 | 
 50 | ## NanoIDs for entity ids
 51 | 
 52 | Currently sequential integers are used for entity ids, but UUIDs/NanoIDs (https://github.com/zelark/nano-id) were also considered:
 53 | * Improved security - not revealing the sequential order.
 54 |   * May not be relevant for in-memory app state storage.
 55 | * UUIDs or NanoIDs can serve as globally unique identifiers.
 56 |   * For globally unique identifiers prefer to use a `:db.unique/identity` attribute (e.g. `:global-id`).
 57 | * NanoIDs are less performant than integer ids, though the ultimate difference in performance may not be important.
 58 | * Since NanoIDs and tempids are both strings, transactions would have to distinguish between them and tempids:
 59 |   * Solution: Length check
 60 |     * NanoIDs are of length 21, but tempids might be too.
 61 |   * Solution: Wrap nanoids in another class like with `deftype` or `defrecord`
 62 |     * Further performance and memory footprint penalty
 63 | * NanoIDs take up more memory per id, especially if wrapped with `deftype` or `defrecord`
 64 | 
 65 | 
 66 | ## Leaving `{:db/id id}` in EAV index
 67 | 
 68 | When all attributes for an entity are retracted, the entity can either be removed from the EAV index entirely or a `[id {:db/id id}]` entry can remain. It was decided to remove the entity from the EAV index. The decision was guided by this discussion on invalid ids: https://groups.google.com/g/datomic/c/hnOLG-fhZOU/m/RZvLlrGajHIJ
 69 | 
 70 | The following points guided the decision:
 71 | * The performance penalty for checking that all attributes for an entity are retracted is negligible.
 72 | * Can end up with many "abandoned" `[id {:db/id id}]` entries in EAV, taking up space.
 73 | * Keeping `:db/id` allows us to detect if an entity id exists or has ever existed historically (call these "valid" entities):
 74 |   * Can enforce only valid entity ids in the entity position in assertions, but wouldn't expect user to use a non-existent entity id unless they meant to create a new entity with a keyword entity id for easy programmatic access.
 75 |     * Decided to "trust the user"
 76 |     * Use of non-existent integer id is considered a user error
 77 |   * Can enforce only valid entity ids in the value position (for reference attributes) in assertions, avoiding dangling refs (pointing to entities that are invalid). 
 78 |     * Dangling refs allowed in Datomic - this choice was copied - "trust the user".
 79 |     * A reference attribute pointing to a non-existent entity id is not a big problem. Not worth the hassle.
 80 | * If we decide to keep `[id {:db/id id}]` when all attributes for an entity are retracted, what to do for `:db/retractEntity` operation?
 81 | * When asserting map form entities, there is a performance optimization: for non-existent entities we can just `assoc` the map directly in the EAV index 
 82 |   * Leaving {:db/id id} negates this optimization for previously retracted entities
 83 |     * But it is most useful for loading data into an empty database.
 84 | * Removing `{:db/id id}` ensures that nobody relies on checking for `{:db/id id}` vs `nil` in EAV. However, if it's added later may still break code checking for `nil` and finding `{:db/id id}`.
 85 | 
 86 | ## Pull
 87 | 
 88 | ### Pull Empty Results
 89 | 
 90 | What should `pull` return when an entity id is not in the database? Should it return `nil`, `{}`, or `{:db/id id}`. This decision was interrelated with the decision above: whether to leave `{:db/id id}` in the EAV index when all attributes for an entity are retracted.
 91 | 
 92 | * When an entity id is not in the database `pull` returns `{:db/id id}` when `pattern` is wildcard `'[*]` or `:db/id` is specified in pattern.
 93 |   * Had we kept `[id {:db/id id}]` for historical entities, would have had the option to treat them differently from never existing entities, but there is no clear need to do this.
 94 | 
 95 | ### Pull Results - Reference Values
 96 | 
 97 | For reference values a map of form `{:db/id id}` is returned rather than just `id`. This decision was guided by the following considerations:
 98 | * Visually it offers more clarity (easy to spot that it's a reference)
 99 | * Using `{:db/id id}` offers consistency between component and non-component reference attributes
100 | * The performance hit is negligible: in one test wrapping 10k ids took 4 msecs, 100K ids 29 msecs
101 | 
102 | ### Combining wildcard with join attribute specs in pull pattern
103 | 
104 | When combining wildcard with join attribute specs in a pull pattern, an issue arises: Should the wildcard attribute spec overwrite any result that's accumulated so far? 
105 | 
106 | For example if we have the pattern `[{:person/friends 6} '*]`, we begin with the (recursive) join `{:person/friends 6}`, but then comes the wildcard `'*`. Should we overwrite the value in the result under `:person/friends` with whatever the wildcard returns?
107 | 
108 | It was decided that the wildcard attribute spec should not overwrite any previous join attribute specs as that defeats the point of specifying any join attribute specs in the pattern to the left of the wildcard pattern.
109 | 
110 | ## Checking Entity IDs in Assertions
111 | 
112 | Entity ids in assertions can be checked for their current (and possibly historical) presence in the database, but should they be? This is interrelated with the decision not to keep `{:db/id id}` in EAV index when all attributes of an entity are removed.
113 | 
114 | * Performance penalty for this check would be negligible (based on tests).
115 | * Checking only makes sense if `[id {:db/id id}]` entries are kept for historical entities.
116 |   * Otherwise, once all attributes are removed from an entity, transactions referencing that entity would fail.
117 |   * Could also check `(< id (:db/next-id db))` to check the historical existence of an entity id, but this relies on entity ids being sequential integers and would no longer work if entity ids are switched to UUIDs or NanoIDs or if the integers are no longer sequential.
118 | * Checking could prevent non-existent integer ids being specified in transaction data.
119 |   * Does not apply to keyword entity ids because the correct behavior with keyword entity ids is to add them if they don't exist!
120 |   * If a new integer id is used in an assertion, it will create a new entity without updating `:db/next-id` 
121 |     * Eventually `:db/next-id` will "catch-up" and the entities will be merged 
122 |     * The cost of this mistaken use of new integer id would be borne by the user.
123 |     * This mistaken use not likely
124 |   * This mistaken use is of lower likelihood if integer entity ids are switched to NanoIDs or UUIDs, but still possible in theory.
125 | 
126 | It was decided not to check entity ids in assertions. Neither for their current nor historical presence.
127 | 
128 | ## One Parent Per Component Constraint
129 | 
130 | It was decided to enforce one parent per component constraint, despite Datomic not enforcing it.
131 | 
132 | * Semantically component entities can only have one parent, though Datomic does not enforce this constraint.
133 |   * This is somewhat perplexing, but there is likely a good reason for this - possibly performance or complexity
134 |   * Neither performance, not complexity are obstacles in EntityGraph
135 | * While in Datomic it is possible to end up with multiple parents for the same component entity, starting with the component entity id and navigating backwards to parent via reverse component attribute seems to only return the latest asserted parent.
136 | * The performance cost for enforcing this in EntityGraph is as follows:
137 |   * For each assertion of a component attribute-value, a lookup of the value in AVE index under each component attribute in the schema
138 |   * If there are no component attributes in the schema there is no performance cost
139 | 
140 | The following discussions informed this decision:
141 | https://datomic.narkive.com/1HfrgEI5/cardinality-many-iscomponent-and-reverse-relationships
142 | https://groups.google.com/g/datomic/c/wqMWGY39EGk/m/4DYHMYNUdXQJ
143 | https://groups.google.com/g/datomic/c/wY7Hq2KwB2E/m/qpqRUXEeRiEJ
144 | 
145 | ## Transaction Functions
146 | 
147 | Transaction functions don't make sense in EntityGraph as it is an in-memory database and Clojure's concurrency facilities can be used.
148 | 
149 | ## Writing to Storage
150 | 
151 | Currently writing the database to storage is not supported. The database was designed to fully reside in-memory. If in future writing to storage is to be undertaken, the following considerations must be kept in mind: 
152 | 
153 | * All values in the database must be serializable
154 | * When writing a sorted set or sorted map to disk, must ensure it is read back as a sorted data structure. 
155 |   * In particular, sorted sets can be used as values in EAV index for cardinality many attributes 
156 |   * In particular, sorted maps can be used in AVE index
157 |   * Any other sorted collection values 
158 |   * See Saving+reading sorted maps to a file in Clojure: https://stackoverflow.com/questions/17347836/savingreading-sorted-maps-to-a-file-in-clojure
159 | * The schema must also be written to disk. 
160 | * Note that currently, once created, the schema cannot be modified as it is intended to last for the duration the program
161 |   * May need to consider carefully schema modification of storing db to disk us undertaken
162 |     * Some schema changes would be more easily accommodated than others
163 |     * The straightforward option is to delete and rewrite the database to disk after each schema modification
164 | 
165 | ## Would creating an assertion set offer benefits?
166 | 
167 | When preparing tx-data for transaction `retraction-set` is created to avoid asserting and retracting same `[e a v]`. This is checked in `check-db-constraints-[one/many]`. 
168 | 
169 | One then wonders if creating `assertion-set` might be profitable, but it doesn't offer much gain. Relying on a set of `[e a v]` tuples and processing assertions one tuple at a time negates the benefit of directly `assoc`ing map form tx-data to EAV index (instead of processing `[e a v]` tuples one by one).
170 | 
171 | ## Enable Independent Processing of Assertions And Retractions
172 | 
173 | Currently, in `transact`, processing of retractions must come before processing of assertions because the code that checks for constraint violation relies on this order of operations. Specifically uniqueness constraint checks and one parent per component checks.
174 | 
175 | It would be possible to instead rely on `retraction-set` and `entity-retraction-ids` to check these same constraints. This would enable independent assertions and retractions, thus making them parallelize. However, the primary target for EntityGraph is ClojureScript (web apps) and leveraging Web Workers may not offer sufficient performance benefit, especially considering that performance does not appear to be an issue so far.
176 | 
177 | ## Post Transaction Checks
178 | 
179 | There's a tradeoff between ensuring the database is always in a valid/consistent state and the corresponding performance penalty. It is particularly undesirable to burden correct programs with the performance cost of checks. Also, what is and isn't a valid state requires careful consideration. 
180 | 
181 | Some invalid database states are more problematic than others. Some constraints (such as prohibition against nil values) are particularly problematic. Those constraints are enforced. 
182 | 
183 | Other invalid states are less problematic. Dangling references might be an example. In those instances the constraint is not enforced unless it's particularly simple to implement.
184 | 
185 | Finally, there is the option of users checking whatever constraints they want to enforce after calling `transact`. But this is liable to be costly.
186 | 
187 | ## Sorted Set Values
188 | 
189 | Including the option of sorted sets for cardinality many attributes was carefully considered. The decision to include this feature was made after determining that **there is no performance penalty if this feature is not used**. In other words, if the user doesn't declare any attributes with a `:db/sort` property, there is no performance cost at all.
190 | 
191 | The following performance tests confirm this:
192 | * Map form assertion of 10 entities and 20k entities 
193 | * Map form assertion overwriting 10 entities and 20k entities
194 | * List form assertions of 10 entities and 20k entities
195 | * List form assertions overwriting 10 entities and 20k entities
196 | * Retraction of 10 entities and 20k entities
197 | 
198 | Here are the benchmark numbers in milliseconds first vector with, second vector without support for sorted sets:
199 | * `[4420 5334 3941 4056 7861 10935 4159 4483 6293 11934]`
200 | * `[4387 5659 3938 4131 7791 10496 4139 4444 6269 12123]`
201 | 
202 | It's apparent at a glance there is performance difference to speak of.


--------------------------------------------------------------------------------
/drafts/entity_graph/core_test_generative.cljc:
--------------------------------------------------------------------------------
  1 | (ns entity-graph.core-test-generative
  2 |   (:require
  3 |     #?(:clj [clojure.pprint :refer [pprint]]
  4 |        :cljs [cljs.pprint :refer [pprint]])
  5 |     [clojure.set :refer [intersection difference union rename-keys subset?]]
  6 |     #?(:clj  [clojure.test :as t :refer        [is are deftest testing]]
  7 |        :cljs [cljs.test    :as t :refer-macros [is are deftest testing]])
  8 |     [entity-graph.core :refer [create-db transact cardinality-many? unique? ref-type? index?
  9 |                                pull pull-many get-ids get-eav-tuples] :as eg]
 10 |     ;; Spec
 11 |     #?(:clj  [clojure.spec.alpha :as s]
 12 |        :cljs [cljs.spec.alpha :as s])
 13 |     ;; need to require clojure.test.check.generators for cljs generators work
 14 |     ;; https://clojure.atlassian.net/browse/CLJS-1792
 15 |     ;;https://stackoverflow.com/questions/57877004/how-to-fix-clojure-test-check-generators-never-required-when-exercising-a-func
 16 |     #?(:cljs [clojure.test.check.generators])
 17 |     #?(:clj [clojure.spec.gen.alpha :as gen]
 18 |        :cljs [cljs.spec.gen.alpha :as gen]))
 19 |   #?(:cljs (:require-macros [entity-graph.macros :refer [assert-fail? assert-fail-with-msg?]])
 20 |      :clj (:require [entity-graph.macros :refer [assert-fail? assert-fail-with-msg?]])))
 21 | 
 22 | ;; =========
 23 | ;; Schema Specs
 24 | 
 25 | (s/def ::person-first-name #{"John" "Mary" "Sam" "Jen"})
 26 | 
 27 | (s/def ::person-last-name #{"Smith" "Brown" "Doe" "Black"})
 28 | 
 29 | (def full-name-gen
 30 |   (gen/fmap
 31 |     (fn [[fn ln]]
 32 |       (str fn " " ln ))
 33 |     (gen/tuple
 34 |       (s/gen ::person-first-name)
 35 |       (s/gen ::person-last-name))))
 36 | 
 37 | (comment (gen/sample full-name-gen 5))
 38 | 
 39 | (s/def ::person-name (s/with-gen string? (fn [] full-name-gen)))
 40 | 
 41 | (s/def ::person-aliases (s/* string?))
 42 | 
 43 | (def non-empty-string-alphanumeric
 44 |   "Generator for non-empty alphanumeric strings"
 45 |   (gen/such-that #(not= "" %) (gen/string-alphanumeric)))
 46 | 
 47 | ;; TODO: ensure emails are unique
 48 | (def email-gen
 49 |   "Generator for email addresses"
 50 |   (gen/fmap
 51 |     (fn [[name host ]]
 52 |       (str name "@" host ".com"))
 53 |     (gen/tuple
 54 |       non-empty-string-alphanumeric
 55 |       non-empty-string-alphanumeric)))
 56 | 
 57 | (s/def ::person-email (s/with-gen string? (fn [] email-gen)))
 58 | 
 59 | (s/def ::person-city #{"New York" "Moscow" "London" "Paris" "Munich" "Berlin" "San Francisco" "Houston"})
 60 | 
 61 | (s/def ::person-past-cities (s/* ::person-city))
 62 | 
 63 | (s/def ::person-salary (s/int-in 30000 300000))
 64 | 
 65 | (s/def ::person-past-salaries (s/* ::person-salary))
 66 | 
 67 | (s/def ::person
 68 |   (s/keys :req-un [::person-name ::person-email]
 69 |           :opt-un [::person-aliases ::person-city ::person-past-cities ::person-salary ::person-past-salaries]))
 70 | 
 71 | (comment
 72 |   (gen/generate (s/gen ::person)))
 73 | 
 74 | (s/def ::drivers-license-number
 75 |   ;uuid?
 76 |   (s/with-gen string? (fn [] non-empty-string-alphanumeric))
 77 |   )
 78 | 
 79 | (s/def ::drivers-license-state #{"NY" "NJ" "TX" "AR"})
 80 | 
 81 | (s/def ::drivers-license
 82 |   (s/keys :req-un [::drivers-license-number ::drivers-license-state]))
 83 | 
 84 | (comment
 85 |   (gen/generate (s/gen ::drivers-license)))
 86 | 
 87 | (defn to-db-attr
 88 |   [person]
 89 |   (rename-keys person {:person-name :person/name
 90 |                        :person-email :person/email
 91 |                        :person-aliases :person/aliases
 92 |                        :person-city :person/city
 93 |                        :person-past-cities :person/past-cities
 94 |                        :person-salary :person/salary
 95 |                        :person-past-salaries :person/past-salaries
 96 |                        :drivers-license-number :drivers-license/number
 97 |                        :drivers-license-state  :drivers-license/state}))
 98 | 
 99 | (def drivers-licenses (->> (gen/sample (s/gen ::drivers-license) 5) (map to-db-attr)))
100 | 
101 | (def persons (->> (gen/sample (s/gen ::person) 10) (map to-db-attr)))
102 | 
103 | ;; TODO: can be redone with nested maps and with :db/unique email
104 | ;; TODO: ensure drivers license numbers are unique -> maybe this is why datomic wants them to have unique identifiers?
105 | ;; TODO: purposely test duplicate unique identifiers
106 | (defn mk-drivers-licenses-tx-data
107 |   [persons]
108 |   (reduce (fn [tx-data [{:keys [drivers-license/number] :as dl} {:keys [db/id person/email] :as person}]]
109 |             ;; using dl num as tempid, could also use email
110 |             (-> tx-data
111 |                 (conj (assoc dl :db/id number))
112 |                 (conj {:db/id id :person/drivers-license number})))
113 |           [] (map vector drivers-licenses persons)))
114 | 
115 | (defn mk-friends-tx-data
116 |   "Makes tx-data for creating friend relationships between persons."
117 |   [persons]
118 |   (reduce (fn [tx-data {:keys [db/id] :as person}]
119 |             (let [friends (random-sample 0.3 (keys (dissoc persons id)))]
120 |               (conj tx-data {:db/id id :person/friends friends})))
121 |           [] (vals persons)))
122 | 
123 | (defn mk-best-friend-tx-data
124 |   [persons]
125 |   (reduce
126 |     (fn [tx-data id]
127 |       (let [bestie-id (-> (dissoc persons id) vals rand-nth :db/id)]
128 |         (conj tx-data {:db/id id :person/best-friend bestie-id})))
129 |     [] (random-sample 0.6 (keys persons))))
130 | 
131 | ;; =========
132 | ;; Map form / List form
133 | 
134 | ;; if there are nil values for retractions, can produce either [:db/retract e a] or [:db/retract e a nil]
135 | ;; currently [:db/retract e a nil]
136 | (defn map->list-form1
137 |   "Converts `tx-form` from map form to list form. Returns a vector of list forms."
138 |   [schema {:keys [db/id db/op] :as tx-form}]
139 |   (let [op (or op :db/add)]
140 |     (reduce-kv
141 |       (fn [tx-data a v]
142 |         (if (cardinality-many? schema a)
143 |           (apply conj tx-data (map #(vector op id a %) v))
144 |           (conj tx-data [op id a v])))
145 |       [] (dissoc tx-form :db/id :db/op))))
146 | 
147 | ;; only used for testing
148 | (defn map->list-form
149 |   [schema tx-data]
150 |   (mapcat #(map->list-form1 schema %) tx-data))
151 | 
152 | ;; =========
153 | ;; Generative tests
154 | 
155 | ;; =========
156 | ;; AVE net ops
157 | 
158 | (defn ve-tuple [[op e a v]] [v e])
159 | (defn av-tuple [[op e a v]] [a v])
160 | (defn eav-tuple [[op e a v]] [e a v])
161 | 
162 | ;; remember `tx-data` is already grouped by id and by attr
163 | (defn one-add
164 |   [tx-data tuple-fn]
165 |   (let [[result last-added]
166 |         (reduce (fn [[result last-tuple] [op e a v :as tx-form]]
167 |                   (let [tuple (tuple-fn tx-form)]
168 |                     (case op
169 |                       ;; replace previous assertion
170 |                       :db/add [(-> result (disj last-tuple) (conj tuple)) tuple]
171 |                       :db/retract [(disj result tuple) last-tuple])))
172 |                 [#{} :last-tuple] tx-data)]
173 |     result))
174 | 
175 | (defn many-add
176 |   [tx-data tuple-fn]
177 |   (reduce (fn [r [op e a v :as tx-form]]
178 |             (let [tuple (tuple-fn tx-form)]
179 |               (case op
180 |                 :db/add (conj r tuple)
181 |                 :db/retract (disj r tuple))))
182 |           #{} tx-data))
183 | 
184 | ;ave
185 | (defn net-additions
186 |   [schema [id tx-data] tuple-fn]
187 |   (let [by-attr (group-by (fn [[op e a v]] a) tx-data)]
188 |     (reduce-kv (fn [r attr data]
189 |                  (if (cardinality-many? schema attr)
190 |                    (update r attr #(reduce conj % (many-add data tuple-fn)))
191 |                    (update r attr #(reduce conj % (one-add data tuple-fn)))))
192 |                {} by-attr)))
193 | 
194 | (defn one-retract
195 |   [tx-data tuple-fn]
196 |   (reduce (fn [r [op e a v :as tx-form]]
197 |             (let [tuple (tuple-fn tx-form)]
198 |               (case op
199 |                 :db/add (disj r tuple)
200 |                 :db/retract (conj r tuple))))
201 |           #{} tx-data))
202 | 
203 | (defn many-retract
204 |   [tx-data tuple-fn]
205 |   (reduce (fn [r [op e a v :as tx-form]]
206 |             (let [tuple (tuple-fn tx-form)]
207 |               (case op
208 |                 :db/add (disj r tuple)
209 |                 :db/retract (conj r tuple))))
210 |           #{} tx-data))
211 | 
212 | (defn net-retractions
213 |   [schema [id tx-data] tuple-fn]
214 |   (let [by-attr (group-by (fn [[op e a v]] a) tx-data)]
215 |     (reduce-kv (fn [r attr tx-data]
216 |                  (if (cardinality-many? schema attr)
217 |                    (update r attr #(reduce conj % (many-retract tx-data tuple-fn)))
218 |                    (update r attr #(reduce conj % (one-retract tx-data tuple-fn)))))
219 |                {} by-attr)))
220 | 
221 | (defn net-ave-updates
222 |   "Returns sets of [v e] pairs that should update AVE index based on `tx-data`.
223 |    Additions are under top level :db/add key, retractions under :db/retract key.
224 |    The sets of [v e] pairs are further grouped by attribute."
225 |   [schema tx-data]
226 |   (let [list-form (map->list-form schema tx-data)
227 |         by-id (group-by second list-form)
228 |         additions (map #(net-additions schema % ve-tuple) by-id)
229 |         retractions (map #(net-retractions schema % ve-tuple) by-id)]
230 |     {:db/add (apply merge-with concat additions)
231 |      :db/retract (apply merge-with concat retractions)}))
232 | 
233 | (defn ave-attr->ve-set
234 |   "Returns `ave` index under `attr` key represented as a set of [v e] tuples."
235 |   [ave schema attr]
236 |   (cond
237 |     (unique? schema attr)
238 |     (set (get ave attr))
239 |     (or (ref-type? schema attr) (index? schema attr))
240 |     (reduce-kv (fn [ve-set v e-set]
241 |                  (reduce #(conj %1 (vector v %2)) ve-set e-set))
242 |                #{} (get ave attr))
243 |     :default
244 |     nil))
245 | 
246 | (defn ave->ave-set
247 |   [ave schema]
248 |   (reduce (fn [ave-set attr]
249 |             (let [ve-set (ave-attr->ve-set ave schema attr)
250 |                   ave-attr-set (map (fn [[v e]] [attr v e]) ve-set)]
251 |               (reduce conj ave-set ave-attr-set)))
252 |           #{} (keys ave)))
253 | 
254 | (defn ave->eav-set
255 |   [ave schema]
256 |   (reduce (fn [ave-set attr]
257 |             (let [ve-set (ave-attr->ve-set ave schema attr)
258 |                   eav-attr-set (map (fn [[v e]] [e attr v]) ve-set)]
259 |               (reduce conj ave-set eav-attr-set)))
260 |           #{} (keys ave)))
261 | 
262 | (defn process-ve-map
263 |   [ve-map schema attr]
264 |   (reduce-kv (fn [r v e]
265 |                (cond
266 |                  (unique? schema attr)
267 |                  (conj r [e attr v])
268 |                  (or (ref-type? schema attr) (index? schema attr))
269 |                  (reduce #(conj %1 [%2 attr v]) r e)
270 |                  :default
271 |                  r))
272 |              [] ve-map))
273 | 
274 | (defn ave->eav-set2
275 |   [ave schema]
276 |   (reduce-kv (fn [eav-set attr ve-map]
277 |                (reduce conj eav-set (process-ve-map ve-map schema attr)))
278 |              #{} ave))
279 | 
280 | (defn ave-updates-attr
281 |   "Returns `ave` index under `attr` key represented as a set of [v e] pairs with `ave-updates` applied."
282 |   [ave-before schema net-updates attr]
283 |   (let [net-add (-> net-updates :db/add attr set)
284 |         net-retract (-> net-updates :db/retract attr set)
285 |         ve-set (ave-attr->ve-set ave-before schema attr)]
286 |     ;; add/retract order doesn't matter
287 |     (-> ve-set (union net-add) (difference net-retract))))
288 | 
289 | ;; =========
290 | ;; EAV net ops
291 | 
292 | ;; `last-tuples` keeps track of the last assertion for cardinality/one attrs, so they can be removed when "overwritten"
293 | (defn eav-add1
294 |   [[tuple-set last-tuples] schema [op e a v :as tx-form]]
295 |   (let [tuple [e a v]]
296 |     (if (cardinality-many? schema a)
297 |       (case op
298 |         :db/add [(conj tuple-set tuple) last-tuples]
299 |         :db/retract [(disj tuple-set tuple) last-tuples])
300 |       (case op
301 |         ;; replace previous assertion
302 |         :db/add [(-> tuple-set (disj (last-tuples [e a])) (conj tuple)) (assoc last-tuples [e a] tuple)]
303 |         :db/retract [(disj tuple-set tuple) last-tuples]))))
304 | 
305 | (defn eav-retract1
306 |   [tuple-set schema [op e a v :as tx-form]]
307 |   (let [tuple [e a v]]
308 |     (if (cardinality-many? schema a)
309 |       (case op
310 |         :db/add (disj tuple-set tuple)
311 |         :db/retract (conj tuple-set tuple)))
312 |     (case op
313 |       :db/add (disj tuple-set tuple)
314 |       :db/retract (conj tuple-set tuple))))
315 | 
316 | (defn net-additions-eav
317 |   [schema tx-data-list]
318 |   (let [[additions last-tuples] (reduce #(eav-add1 %1 schema %2) [#{} {}] tx-data-list)]
319 |     additions))
320 | 
321 | (defn net-retractions-eav
322 |   [schema list-form]
323 |   (reduce #(eav-retract1 %1 schema %2) #{} list-form))
324 | 
325 | (defn net-eav-updates
326 |   [schema tx-data]
327 |   (let [list-form (map->list-form schema tx-data)]
328 |     {:db/add (net-additions-eav schema list-form)
329 |      :db/retract (net-retractions-eav schema list-form)}))
330 | 
331 | (defn eav->eav-set
332 |   [eav schema]
333 |   "Returns `eav` index represented as a set of [e a v] tuples."
334 |   ;; turn into list form removing db/id
335 |   (set (map rest (map->list-form schema (vals eav)))))
336 | 
337 | (defn expected-eav
338 |   [eav-before schema tx-data]
339 |   (let [{:keys [db/add db/retract]} (net-eav-updates schema tx-data)]
340 |     ;; add/retract order doesn't matter
341 |     (-> (eav->eav-set eav-before schema) (union add) (difference retract))))
342 | 
343 | ;; todo: maybe `net-eav-updates` can return a seq instead of set
344 | (defn expected-ave
345 |   [ave-before schema tx-data]
346 |   (let [{:keys [db/add db/retract]} (net-eav-updates schema tx-data)
347 |         index-in-ave? (fn [[e a v]] (or (unique? schema a) (ref-type? schema a) (index? schema a)))
348 |         add (set (filter index-in-ave? add))
349 |         retract (set (filter index-in-ave? retract))]
350 |     (-> (ave->eav-set ave-before schema) (union add) (difference retract))))
351 | 
352 | ;;;;;;;
353 | 
354 | ;; retractions (transactx db (map #(assoc % :db/op :db/retract) persons))
355 | (deftest test1
356 |   ;; Add some persons
357 |   (let [{:keys [tx-data db-before db-after tempids] :as r} (transact db-empty persons)
358 |         {eav-before :eav ave-before :ave} db-before
359 |         {eav-after :eav ave-after :ave} db-after
360 |         net-updates-ave (net-ave-updates schema tx-data)]
361 | 
362 |     (is (inc (:db/tx-count db-before)) (:db/tx-count db-after))
363 | 
364 |     ;; EAV/AVE general
365 |     (is (= (expected-eav eav-before schema tx-data) (eav->eav-set eav-after schema)))
366 |     (is (subset? (ave->eav-set ave-after schema) (eav->eav-set eav-after schema)))
367 |     (is (= (expected-ave ave-before schema tx-data) (ave->eav-set ave-after schema)))
368 |     (is (= (expected-ave ave-before schema tx-data) (ave->eav-set2 ave-after schema)))
369 |     ;; todo generalize to look for all keys in tx-data
370 |     (is (= (set (keys ave-after)) #{:person/email :person/salary :person/past-salaries
371 |                                     :person/city :person/past-cities :person/drivers-license
372 |                                     :person/best-friend :person/friends}))
373 |     ;; AVE by attribute...
374 | 
375 |     ;; :person/email - :db.unique/identity
376 |     (is (= (ave-updates-attr ave-before schema net-updates-ave :person/email)
377 |            (ave-attr->ve-set ave-after schema :person/email)))
378 | 
379 |     ;; :person/city - :db.index/unsorted, :db.cardinality/one
380 |     ;; make sure the non-unique ave indexes are sets
381 |     (is (empty? (remove set? (-> ave-after :person/city vals))))
382 |     (is (= (ave-updates-attr ave-before schema net-updates-ave :person/city)
383 |            (ave-attr->ve-set ave-after schema :person/city)))
384 | 
385 |     ;; :person/past-cities - :db.index/unsorted, :db.cardinality/many
386 |     (is (= (ave-updates-attr ave-before schema net-updates-ave :person/past-cities)
387 |            (ave-attr->ve-set ave-after schema :person/past-cities)))
388 | 
389 |     ;; :person/salary - :db.index/sorted, :db.cardinality/one
390 |     ;; ensure the map is sorted
391 |     (is (sorted? (:person/salary ave-after)))
392 |     ;; ensure all vals are sets
393 |     (is (empty? (remove set? (-> ave-after :person/salary vals))))
394 |     (is (= (ave-updates-attr ave-before schema net-updates-ave :person/salary)
395 |            (ave-attr->ve-set ave-after schema :person/salary)))
396 | 
397 |     ;; :person/past-salaries - :db.index/sorted, :db.cardinality/many
398 |     (is (sorted? (:person/past-salaries ave-after)))
399 |     (is (= (ave-updates-attr ave-before schema net-updates-ave :person/past-salaries)
400 |            (ave-attr->ve-set ave-after schema :person/past-salaries)))
401 | 
402 |     ;; Add best friend references
403 |     (let [
404 |           ;persons (->> (ave-after :person/email) (vals) (select-keys eav-after))
405 |           persons (select-keys eav-after (eg/ids-by-attr-unique ave-after :person/email))
406 |           best-friend-tx (mk-best-friend-tx-data persons)
407 |           {:keys [tx-data db-before db-after tempids] :as r} (transact db-after best-friend-tx)
408 |           {eav-before :eav ave-before :ave} db-before
409 |           {eav-after :eav ave-after :ave} db-after]
410 |       (is (= (expected-eav eav-before schema tx-data) (eav->eav-set eav-after schema)))
411 |       (is (subset? (ave->eav-set ave-after schema) (eav->eav-set eav-after schema)))
412 |       (is (= (expected-ave ave-before schema tx-data) (ave->eav-set ave-after schema)))
413 |       (is (= (expected-ave ave-before schema tx-data) (ave->eav-set2 ave-after schema)))
414 |       ;; TODO: id is arbitrary
415 |       (pprint (eav-after 8))
416 |       (pprint (:person/best-friend ave-after))
417 |       (pprint (pull db-after [:person/best-friend] 8))
418 |       (pprint (pull db-after [:person/_best-friend] 8))
419 |       (pprint (pull db-after [{:person/_best-friend [:person/name]}] 8))
420 |       (pprint tx-data)
421 | 
422 |       ;; eav check
423 |       (is (= (set (pull-many db-after [:db/id :person/best-friend] (map :db/id best-friend-tx)))
424 |              (set best-friend-tx)))
425 | 
426 |       ;; not very generic
427 |       #_(is (= (->> (map :db/id best-friend-tx)
428 |                     (map eav-after)
429 |                     (map select-keys [:db/id :person/best-friend])
430 |                     (set))
431 |                (set best-friend-tx)))
432 |       )
433 |     ;; Add friends references
434 |     (let [
435 |           persons (select-keys eav-after (vals (ave-after :person/email)))
436 |           ;persons (select-keys eav-after (ids-by-attr-ave db-after :person/email))
437 |           friends-tx (mk-friends-tx-data persons)
438 |           {:keys [tx-data db-before db-after tempids] :as r} (transact db-after friends-tx)
439 |           {eav-before :eav ave-before :ave} db-before
440 |           {eav-after :eav ave-after :ave} db-after]
441 |       (is (= (expected-eav eav-before schema tx-data) (eav->eav-set eav-after schema)))
442 |       (is (subset? (ave->eav-set ave-after schema) (eav->eav-set eav-after schema)))
443 |       (is (= (expected-ave ave-before schema tx-data) (ave->eav-set ave-after schema)))
444 |       (is (= (expected-ave ave-before schema tx-data) (ave->eav-set2 ave-after schema)))
445 |       ;; TODO: testing ranges here
446 |       (println ">>>> AVE SALARIES: " (:person/salary ave-after))
447 |       (println ">>> RESULT: " (get-ids db-after :person/salary [[>= 30000] [< 30004]]))
448 |       (pprint (eav-after 7))
449 |       (pprint (:person/friends ave-after))
450 |       (pprint (pull db-after [:person/friends] 7))
451 |       (pprint (pull db-after [:person/_friends] 7))
452 |       (pprint (pull db-after [{:person/_friends [:person/name]}] 7))
453 |       (println ">>> NON 1")
454 |       (pprint (pull db-after [:person/hui] 7))
455 |       (println ">>> NON 2")
456 |       (pprint (pull db-after [:person/name {:person/friends [:person/hui]}] 7))
457 |       (println ">>> NON 3")
458 |       (pprint (pull db-after [{:person/_friends [:person/hui]}] 7))
459 | 
460 |       ;; "chaining"
461 |       (println "111111")
462 |       (let [many
463 |             (eg/get-ids-multi db-after
464 |                               :person/past-cities #{"Moscow" "Berlin"}
465 |                               :person/salary #(> % 30000))
466 |             one
467 |             (->> (get-ids db-after :person/past-cities #{"Moscow" "Berlin"})
468 |                  (get-ids db-after :person/salary #(> % 30000)))]
469 |         (is (= many one)))
470 |       (pprint
471 |         (->> (get-ids db-after :person/past-cities #{"Moscow" "Berlin"})
472 | 
473 |              ;; ids of ppl who HAVE friends
474 |              ;(get-ids db-after :person/friends true)
475 |              ;; ids of ppl who are friends with intersect-ids
476 |              ;; todo: works, but loses grouping by 'original' :db/id of person entity
477 |              ;; could keep the grouping by doing each 'original' person id separately => use pull-many!
478 |              (get-ids db-after :person/friends)
479 | 
480 |              ;(get-ids db-after :person/salary #(> % 30000))
481 |              (union (get-ids db-after :person/name #{"John"}))
482 |              ;(map eav-after)
483 |              ;(map #(select-keys % [:db/id :person/name :person/salary :person/friends :person/past-cities]))
484 |              ;; alternative to get entityes
485 |              (eg/get-entities-eav db-after)
486 |              ;; or select specific keys
487 |              ;(eg/get-entities-eav db-after [:db/id :person/name :person/salary :person/friends :person/past-cities])
488 |              ))
489 | 
490 |       ;(println "222222")
491 |       (pprint
492 |         (map eav-after
493 |              (->> (get-ids db-after :person/past-cities #{"Moscow" "Berlin"})
494 |                   (get-ids db-after :person/salary #(> % 30000))
495 |                   (get-ids db-after :person/friends)
496 |                   )))
497 | 
498 |       #_(do
499 |           (println "=== EAV TUPLES ===")
500 |           (pprint
501 |             (get-eav-tuples db-after :person/past-cities #{"Moscow" "Berlin"}))
502 |           (pprint
503 |             (get-eav-tuples db-after :person/salary #(> % 30000))))
504 | 
505 |       #_(do
506 |           (println "=== AVE INVERT ===")
507 |           (pprint (eg/invert-ave-a-non-unique (get-in db-after [:ave :person/past-cities]))))
508 | 
509 |       (is (= (set (pull-many db-after [:db/id :person/friends] (map :db/id friends-tx)))
510 |              (set tx-data))))
511 | 
512 |     #_(let [persons (map eav-after (eg/ids-by-attr-unique db-after :person/email))
513 |             drivers-licenses-tx (mk-drivers-licenses-tx-data (vals persons))
514 |             ;persons (eav-by-attr db-after :person/email)
515 |             ;drivers-licenses-tx (mk-drivers-licenses-tx-data (vals persons))
516 |             {:keys [tx-data db-before db-after tempids] :as r} (transact db-after drivers-licenses-tx)
517 |             {eav-before :eav ave-before :ave} db-before
518 |             {eav-after :eav ave-after :ave} db-after
519 |             ;; TODO: relying on tx-data rather than original tx with tempids
520 |             ;; had to remove nils since not every person was "issued" a drivers license
521 |             expected-drivers-licenses-freqs (frequencies (remove nil? (map :person/drivers-license tx-data)))
522 |             ;; for eav:
523 |             persons-tx-data (filter #(contains? % :person/drivers-license) tx-data)
524 |             drivers-license-tx-data (filter #(contains? % :drivers-license/number) tx-data)]
525 |         (is (= (expected-eav eav-before schema tx-data) (eav->eav-set eav-after schema)))
526 |         (is (subset? (ave->eav-set ave-after schema) (eav->eav-set eav-after schema)))
527 |         (is (= (expected-ave ave-before schema tx-data) (ave->eav-set ave-after schema)))
528 |         (is (= (expected-ave ave-before schema tx-data) (ave->eav-set2 ave-after schema)))
529 |         ;; TODO: id is arbitrary
530 |         ;(pprint eav)
531 |         (pprint (eav-after 14))
532 |         (pprint (:person/drivers-license ave-after))
533 |         (pprint (pull db-after [:person/drivers-license] 7))
534 |         (pprint (pull db-after [:person/_drivers-license] 14))
535 |         (pprint (pull db-after [{:person/_drivers-license [:person/name :db/id]}] 14))
536 | 
537 |         ;; eav check
538 |         (is (= (set (pull-many db-after
539 |                                [:db/id :drivers-license/number :drivers-license/state]
540 |                                (map :db/id drivers-license-tx-data)))
541 |                (set drivers-license-tx-data)))
542 | 
543 |         ;; TODO: returning :db/id for refs breaks this test
544 |         (is (= (set (pull-many db-after
545 |                                [:db/id :person/drivers-license]
546 |                                (map :db/id persons-tx-data)))
547 |                (set persons-tx-data))))))
548 | 
549 | (deftest test-queries
550 |   (let [{:keys [tx-data db-before db-after tempids] :as r} (transact db-empty persons)
551 |         {eav-before :eav ave-before :ave} db-before
552 |         {eav-after :eav ave-after :ave} db-after]
553 |     (pprint (:eav db-after))
554 |     (println "=================================")
555 |     ;(pprint (eg/get-tuples db-after :person/past-cities #{"Moscow" "Berlin"}))
556 |     (pprint (eg/get-ids2 db-after :person/past-cities #{"Moscow" "Berlin"}))
557 |     #_(pprint (map eav-after (intersection (get-ids db-after :person/past-cities #{"Moscow" "Berlin"})
558 |                                            (get-ids db-after :person/salary #(> % 30000))
559 |                                            ;(ids-by-attr db-after :person/best-friend)
560 |                                            )))
561 | 
562 |     #_(pprint (map eav-after (union (get-ids db-after :person/past-cities #{"Moscow" "Berlin"})
563 |                                     (get-ids db-after :person/salary #(> % 30000)))))
564 | 
565 |     ))
566 | 
567 | ;; =========
568 | ;; Low Level Specs
569 | 
570 | (s/def ::kw-id #{:kw-id1 :kw-id2 :kw-id3 :kw-id4})
571 | 
572 | (s/def ::string-id #{"string-id1" "string-id2" "string-id3" "string-id4"})
573 | 
574 | (s/def ::proper-id (s/or :pos-int pos-int? :kw ::kw-id))
575 | 
576 | (s/def ::tempid (s/or :neg-int neg-int? :string ::string-id))
577 | 
578 | (s/def ::attribute #{:person/name :person/aliases})
579 | 
580 | (s/def ::value (s/or :number number? :string string?))
581 | 
582 | (s/def ::lookup-ref (s/cat :attribute ::attribute :value ::value))
583 | 
584 | (s/def ::db-id (s/or :proper-id ::proper-id
585 |                      :tempid ::tempid
586 |                      :lookup-ref ::lookup-ref))
587 | 
588 | ;; TODO nested map form
589 | (s/def ::map-form-tx
590 |   (s/keys :req [] :opt-un [::db-id]))
591 | 
592 | (s/def ::op #{:db/add :db/retract})
593 | 
594 | (s/def ::list-form-tx
595 |   (s/or :with-value (s/cat :op ::op :id ::db-id :attribute ::attribute :value ::value)
596 |         :without-value (s/cat :op #{:db/retract} :id ::db-id :attribute ::attribute)))
597 | 
598 | (s/def ::tx-form (s/or :map-form ::map-form-tx :list-form ::list-form-tx))
599 | 
600 | (s/def ::tx-data (s/* ::tx-form))
601 | 
602 | (comment
603 |   (gen/generate (s/gen ::db-id))
604 |   (gen/generate (s/gen ::lookup-ref))
605 |   (gen/generate (s/gen ::map-form-tx))
606 |   (gen/generate (s/gen ::list-form-tx))
607 |   (gen/generate (s/gen ::tx-form))
608 |   (gen/generate (s/gen ::tx-data))
609 |   )
610 | 
611 | ;; =========
612 | ;; Create DB Test
613 | 
614 | (deftest test-create-db
615 |   (create-db test-schema))
616 | 
617 | ;; =========
618 | ;; Transact Test
619 | 
620 | ;; To Test
621 | ;; add/retract
622 | ;; different :db/id forms: proper-id, :db/id not specified, tempid, lookup-ref
623 | ;; mix of map-forms and list-forms
624 | 
625 | ;; ensure there are no uniqueness violations after txs
626 | ;; ensure indexes are properly updated
627 | ;; ensure assertions are fired when there are violations (e.g. "invalid op"
628 | ;; ensure tempids resolve properly and entities with same tempid in tx have the same db/id
629 | ;; ensure blank :db/id in tx get a db/id
630 | 
631 | ;; nested maps
632 | 
633 | (deftest test-transactx
634 |   (let [tx-d [{:person/name "Ivan"}]
635 |         {:keys [tx-data db-before db-after tempids] :as r} (transact db-empty tx-d)]
636 |     (is (= (set (keys r))
637 |            #{:db-before :db-after :tx-data :tempids}))
638 |     (is (contains? (first tx-data) :db/id))))
639 | 
640 | ;; (transactx db [{:db/id -1 :person/name "Ivan"} {:db/id -1 :person/name "Vasil"}])
641 | (deftest test-replacing-in-ave
642 |   ;; add initial value
643 |   (let [tx-d [{:person/name "Ivan"}]
644 |         {:keys [tx-data db-before db-after tempids] :as r} (transact db-empty tx-d)
645 |         id (-> tx-data first :db/id)]
646 |     (is (= (-> db-after :ave :person/name (get "Ivan")) #{id}))
647 |     ;; replace value
648 |     (let [tx-d [{:db/id id :person/name "Vasil"}]
649 |           {:keys [tx-data db-before db-after tempids] :as r} (transact db-after tx-d)]
650 |       (is (= 1 (count (-> db-after :ave :person/name))))
651 |       (is (= (-> db-after :ave :person/name (get "Vasil")) #{id})))
652 |     ;; replace value list
653 |     (let [tx-d [[:db/add id :person/name "Vasil"]]
654 |           {:keys [tx-data db-before db-after tempids] :as r} (transact db-after tx-d)]
655 |       (is (= 1 (count (-> db-after :ave :person/name))))
656 |       (is (= (-> db-after :ave :person/name (get "Vasil")) #{id}))))
657 | 
658 |   ;; SAME for UNIQUE
659 |   ;; add initial value
660 |   (let [tx-d [{:person/email "a@a.com"}]
661 |         {:keys [tx-data db-before db-after tempids] :as r} (transact db-empty tx-d)
662 |         id (-> tx-data first :db/id)]
663 |     (is (= (-> db-after :ave :person/email (get "a@a.com")) id))
664 |     ;; replace value
665 |     (let [tx-d [{:db/id id :person/email "b@b.com"}]
666 |           {:keys [tx-data db-before db-after tempids] :as r} (transact db-after tx-d)]
667 |       (is (= 1 (count (-> db-after :ave :person/email))))
668 |       (is (= (-> db-after :ave :person/email (get "b@b.com")) id)))
669 |     ;; replace value list
670 |     (let [tx-d [[:db/add id :person/email "b@b.com"]]
671 |           {:keys [tx-data db-before db-after tempids] :as r} (transact db-after tx-d)]
672 |       (is (= 1 (count (-> db-after :ave :person/email))))
673 |       (is (= (-> db-after :ave :person/email (get "b@b.com")) id)))))
674 | 
675 | (deftest test-retract-list
676 |   (let [tx-d [{:person/name "Ivan" :person/aliases #{"Goga" "Gosha"}}]
677 |         {:keys [tx-data db-before db-after tempids] :as r} (transact db-empty tx-d)
678 |         id (-> tx-data first :db/id)]
679 |     (transact db-after [[:db/retract id :person/aliases ["Goga" "Gosha"]]])
680 |     (is (= (set (keys r))
681 |            #{:db-before :db-after :tx-data :tempids}))
682 |     (is (contains? (first tx-data) :db/id))))
683 | 
684 | #_(transact db-empty [{:db/id "ivan" :person/name "ivan"}
685 |                       {:person/name "vasil" :person/friends ["ivan"]}])
686 | 
687 | ;; Test lookup ref in value position for :db.type/ref attribute
688 | #_(let [{:keys [tx-data db-before db-after tempids]} (transact db-empty [{:db/id "ivan" :person/name "ivan"
689 |                                                                           :person/email "a@a.com"}])]
690 |     (transact db-after [{:person/name "vasil" :person/friends [[:person/email "a@a.com"]]}]))
691 | 


--------------------------------------------------------------------------------
/drafts/entity_graph/macros.clj:
--------------------------------------------------------------------------------
 1 | (ns entity-graph.macros)
 2 | 
 3 | (defmacro assert-fail? [form]
 4 |   (if (:ns &env) ;; this only exists when expanding CLJS code
 5 |     (list 'is (list 'thrown? 'js/Error. form))
 6 |     (list 'is (list 'thrown? 'java.lang.AssertionError form))))
 7 | 
 8 | (defmacro assert-fail-with-msg? [re form]
 9 |   (if (:ns &env) ;; this only exists when expanding CLJS code
10 |     (list 'is (list 'thrown-with-msg? 'js/Error. re form))
11 |     (list 'is (list 'thrown-with-msg? 'java.lang.AssertionError re form))))
12 | 


--------------------------------------------------------------------------------
/drafts/entity_graph/query.cljc:
--------------------------------------------------------------------------------
   1 | (ns entity-graph.query
   2 |   (:require
   3 |     [clojure.data.avl :as avl]
   4 |     [clojure.set :refer [intersection difference union]]
   5 |     [entity-graph.core :refer [pull-many cardinality-many?] :refer :all]))
   6 | 
   7 | ;; How to implement sorted to-many relationships in Datomic?
   8 | ;; https://stackoverflow.com/questions/44645938/how-to-implement-sorted-to-many-relationships-in-datomic
   9 | ;; https://stackoverflow.com/questions/33682064/properties-on-datomic-ref-relationships/61406767#61406767
  10 | 
  11 | ;; NOTE: Query system can be a separate lib, much like pull...
  12 | ;; should we even have :db.index/sorted? the only advantage is i.e. sorted :manufacturer/name
  13 | ;;   - easy to offer, but even sorting manufacturers in re-frame only happens when data changes
  14 | ;; (map eav id-seq) to retain (potentially sorted) order - MUST be seq, or (select-keys eav id-set) to get eav subset
  15 | ;; RANGE QUERIES
  16 | ;; avl trees: https://github.com/clojure/data.avl
  17 | ;; hash-map: linear filter of v's, linear select from eav, results unsorted
  18 | ;; sorted-map: linear filter of v's (can't do log(n) because subvec and nth are O(n)), linear select from eav, results sorted
  19 | ;; avl/sorted-map: logarithmic filter of v's, linear select from eav, results sorted
  20 | ;; covering ave index would eliminate linear selects from eav
  21 | ;; but for unions/intersections would have to operate on entities or would need to (map :db/id)
  22 | 
  23 | ;; todo: sort order lost on set union/intersection
  24 | ;; -> start with [> 35000] (desired sorting seq), `intersection-seq` with #{"Berlin"} set
  25 | ;; ... but it may be faster to start with #{"Berlin"}...
  26 | ;; -> only makes sense when desired sorting is also optimal way to start query
  27 | ;; datomic: tuple for each cardinality/many [> 35000], we just identify id, trim later if needed
  28 | ;; keeping sorted entity order during query doesn't really help on cardinality/many attrs -> may be duplicate ids in seq...
  29 | ;; only helps to start with sorted cardinality/one, then use `intersection-seq` for additional constraints
  30 | ;; then can sort-entities or sort-tuples (after pulling and trimming, if desired)
  31 | 
  32 | ;; linear in size of s1-seq
  33 | ;; force s1 to be the sequential or permit either s1 or s2 to be the sequential?
  34 | (defn intersection-seq
  35 |   "Returns a seq that is the intersection of `s1-seq` and `s2-set`. Preserves order of `s1-seq`."
  36 |   [s1-seq s2-set]
  37 |   (reduce (fn [result seq-item]
  38 |             (if (contains? s2-set seq-item)
  39 |               (conj result seq-item)
  40 |               result))
  41 |           [] s1-seq))
  42 | 
  43 | ;; tuples can be sorted in any way desired, including for individual cardinality/many attr/vals
  44 | ;; can start with trimmed entity, then convert to tuple or can convert pulled-entity to tuples and filter tuples
  45 | ;; todo: support :db/id (nested entities can have db/id? - PULL)
  46 | (defn pulled-entity->tuple
  47 |   "Returns a tuple consisting of `paths` into `pulled-entity`.
  48 |   `paths` may contain underscored keywords for reverse navigation."
  49 |   [schema paths pulled-entity]
  50 |   (reduce (fn [result path]
  51 |             (let [attr (last path)
  52 |                   v (if (keyword? path)
  53 |                       (get pulled-entity path)
  54 |                       (get-in pulled-entity path))]
  55 |               ;; todo: should return multiple tuples for cardinality/many
  56 |               ;; should work for reverse attr
  57 |               (if (cardinality-many? schema attr)
  58 |                 (reduce conj result v)
  59 |                 (conj result v))))
  60 |           [] paths))
  61 | 
  62 | ;; use built-in sort to sort tuples
  63 | (defn nested-entities->tuples
  64 |   [schema entities paths]
  65 |   ;; map or reduce into single coll?
  66 |   (map #(pulled-entity->tuple schema paths %) entities))
  67 | 
  68 | (defn attr-comparator-seq->comparator
  69 |   "Returns a single comparator fn based on a sequence of attribute comparator pairs."
  70 |   [attr-comparator-seq]
  71 |   (let [pairs (partition 2 attr-comparator-seq)]
  72 |     (fn [entity1 entity2]
  73 |       (loop [[[attr f] pairs] pairs]
  74 |         (let [r (f (entity1 attr) (entity2 attr))]
  75 |           (if (and (zero? r) pairs)
  76 |             (recur pairs)
  77 |             r))))))
  78 | 
  79 | ;; sorting entities on cardinality/many attrs doesn't make sense... need to reduce to single val (min, max, etc)
  80 | ;; attr-comparator-seq example:
  81 | ;; [:person/salary > :person/city < :person/past-salaries (fn [v-set1 v-set2] (> (max v-set1) (max v-set2)))]
  82 | (defn sort-entities
  83 |   [{:keys [db/eav] :as db} ids & attr-comparator-seq]
  84 |   (let [comparator (attr-comparator-seq->comparator attr-comparator-seq)
  85 |         entities (map eav ids)]
  86 |     (sort comparator entities)))
  87 | 
  88 | ;;;;;
  89 | 
  90 | ;; "RAW" preds that don't account for cardinality/many - pred has to account for it
  91 | (defn filter-entities
  92 |   [{:keys [db/eav] :as db} pred]
  93 |   (filter pred (vals eav)))
  94 | 
  95 | (defn filter-eav
  96 |   [{:keys [db/eav db/schema] :as db} pred]
  97 |   (reduce (fn [result [id entity]]
  98 |             (if (pred entity)
  99 |               (assoc result id entity)
 100 |               result))
 101 |           {} eav))
 102 | 
 103 | ;; ref values are possible, but it's questionable to apply preds to them
 104 | ;; ref values for reverse attrs also possible
 105 | (defn trim-entity
 106 |   "Returns `entity` with only the `attr` values that match `pred`."
 107 |   [schema attr pred entity]
 108 |   (if (cardinality-many? schema attr)
 109 |     (let [val-set (filter pred (entity attr))]
 110 |       ;; empty val-set when (entity attr) is nil or when nothing matched pred
 111 |       (if (empty? val-set)
 112 |         (dissoc entity attr)
 113 |         (assoc entity attr val-set)))
 114 |     ;; if previously applied pred during `get-ids`, don't need to apply again
 115 |     ;; unless different pred for trimming...
 116 |     (if (pred (entity attr))
 117 |       entity
 118 |       (dissoc entity attr))))
 119 | 
 120 | ;; supports reverse paths with no extra effort
 121 | ;; `entity` is a pulled-entity
 122 | (defn trim-path
 123 |   "Returns `entity` with only the `path` values that match `pred`."
 124 |   [schema [first-path rest-path :as path] pred entity]
 125 |   (if rest-path
 126 |     (let [nested-entity (entity first-path)]
 127 |       (if (map? nested-entity)
 128 |         (let [nested-entity (trim-path schema rest-path pred nested-entity)]
 129 |           (if (empty? nested-entity)
 130 |             (dissoc entity first-path)
 131 |             (assoc entity first-path nested-entity)))
 132 |         (let [nested-entities (map #(trim-path schema rest-path pred %) nested-entity)]
 133 |           (if (empty? nested-entities)
 134 |             (dissoc entity first-path)
 135 |             (assoc entity first-path nested-entities)))))
 136 |     ;; no rest-path, assume first-path is the attr
 137 |     (trim-entity schema first-path pred entity)))
 138 | 
 139 | ;; possible: {:some-attr 'some-pred [:some-attr] 'some-pred}
 140 | ;; supply [path pred] pairs as map to avoid traversing same path more than once
 141 | ;; able to handle multiple preds for same path -> not reduce-kv
 142 | ;; support for vector form range preds? to make reusing `attr-pred-pairs` more convenient?
 143 | (defn trim
 144 |   [schema path-pred-pairs entity]
 145 |   (reduce (fn [entity [path pred]]
 146 |             (if (keyword? path)
 147 |               (trim-entity schema path pred entity)
 148 |               (if (= 1 (count path))
 149 |                 (trim-entity schema (first path) pred entity)
 150 |                 (trim-path schema path pred entity))))
 151 |           entity path-pred-pairs))
 152 | 
 153 | ;;;; TRUE PRED
 154 | 
 155 | ;; same code in ve-map->id-set
 156 | ;; returns set
 157 | (defn ids-by-attr-unique
 158 |   [ave attr]
 159 |   ;; do we have to call set? guaranteed to be unique...
 160 |   (set (vals (get ave attr))))
 161 | 
 162 | ;; returns set
 163 | ;; might use concat for returning seq: ~2x faster
 164 | (defn ids-by-attr-non-unique
 165 |   [ave attr]
 166 |   (reduce union (vals (get ave attr))))
 167 | 
 168 | ;; returns set
 169 | (defn ids-by-attr-eav
 170 |   [eav attr]
 171 |   (->> (vals eav) (filter #(contains? % attr)) (map :db/id) (set)))
 172 | 
 173 | ;;; SET PRED
 174 | 
 175 | ;; linear time in the size of pred
 176 | ;; returns set
 177 | (defn filter-ave-unique-set
 178 |   [ave attr pred]
 179 |   (reduce (fn [ret v]
 180 |             (if-let [e (get-in ave [attr v])]
 181 |               (conj ret e)
 182 |               ret))
 183 |           #{} pred))
 184 | 
 185 | ;; returns set
 186 | (defn filter-ave-non-unique-set
 187 |   [ave attr pred]
 188 |   (reduce (fn [ret v]
 189 |             (if-let [e-set (get-in ave [attr v])]
 190 |               (apply conj ret e-set)
 191 |               ret))
 192 |           #{} pred))
 193 | 
 194 | ;; unused
 195 | (defn eav-pred
 196 |   [{:keys [db/schema] :as db} attr pred entity]
 197 |   (let [v (entity attr)]
 198 |     (if (cardinality-many? schema attr)
 199 |       (first (drop-while #(or (nil? %) (false? %)) (map pred v)))
 200 |       (pred db v))))
 201 | 
 202 | (defn entity-attr-pred
 203 |   "Applies pred to `entity` value under `attr`. Works for :db.cardinality/one attrs or :db.cardinality/many attrs.
 204 |    For :db.cardinality/many returns first truthy value or nil if none are truthy."
 205 |   [schema attr pred entity]
 206 |   ;; don't use nil? pred since nil is not a valid db value
 207 |   (when-let [v (get entity attr)]
 208 |     (if (cardinality-many? schema attr)
 209 |       (some pred v)
 210 |       ;(first (drop-while #(or (nil? %) (false? %)) (map pred v)))
 211 |       (pred v))))
 212 | 
 213 | ;; linear time in size of eav
 214 | ;; returns seq/set
 215 | (defn filter-eav-attr-pred
 216 |   ([eav schema attr pred]
 217 |    (->> (vals eav)
 218 |         (filter #(entity-attr-pred schema attr pred %))
 219 |         (map :db/id)))
 220 |   ;; linear time in size of xids
 221 |   ([eav schema attr pred xids]
 222 |    (reduce (fn [ret id]
 223 |              (if (entity-attr-pred schema attr pred (eav id))
 224 |                (conj ret id)
 225 |                ret))
 226 |            #{} xids)))
 227 | 
 228 | ;;; RANGE PRED
 229 | 
 230 | ;; what other ops make sense here? ones that are more efficient with avl-map:
 231 | ;; min/max -> also work with sorted-map
 232 | ;; median - yes
 233 | ;; rank queries, lookups of "nearest entries" = [`nth` 1], but also avl/rank-of [some-val] -> returns value
 234 | ;; [:rank> 3] [:percentile 97.55] [:median] [:average] [:nearest 3]
 235 | ;; rank queries -> percentile calculations -> rank/total
 236 | ;; "nearest entries"
 237 | (defn avl-op
 238 |   [avl-map op val]
 239 |   (cond
 240 |     (= op <) (let [[l m r] (avl/split-key val avl-map)] l)
 241 |     (= op <=) (let [[l m r] (avl/split-key val avl-map)] (if m (apply assoc l m) l))
 242 |     (= op >) (let [[l m r] (avl/split-key val avl-map)] r)
 243 |     (= op >=) (let [[l m r] (avl/split-key val avl-map)] (if m (apply assoc r m) r))))
 244 | 
 245 | ;; returns ave-a subset
 246 | (defn eval-range-pred
 247 |   [ave attr [a b :as pred]]
 248 |   (if (vector? a)
 249 |     (let [[op val] a
 250 |           r (avl-op (ave attr) op val)]
 251 |       (if b
 252 |         (let [[op val] b]
 253 |           (avl-op r op val))
 254 |         r))
 255 |     (let [[op val] pred]
 256 |       (avl-op (ave attr) op val))))
 257 | 
 258 | ;; TODO: use Logarithmic time slicing for >= < etc!!!
 259 | ;; change syntax from [[< 34] [> 8]] to [< 34 > 8]
 260 | ;; NOTE: when passing > within vector, it is evaled by clojure
 261 | ;; returns ave-a subset just like eval-range-pred
 262 | ;; MAYBE should return ids
 263 | (defn eval-range-pred2
 264 |   [ave attr pred]
 265 |   (apply avl/subrange (ave attr) pred))
 266 | 
 267 | ;;; GENERIC PRED
 268 | 
 269 | ;; linear time in ave-a size
 270 | ;; still better than traversing eav because only looking at entities that contain `attr`
 271 | ;; returns seq
 272 | (defn filter-ave-unique
 273 |   [ave attr pred]
 274 |   (->> (get ave attr) (filter (fn [[v e]] (pred v))) (map second)))
 275 | 
 276 | ;; returns seq
 277 | (defn filter-ave-non-unique
 278 |   [ave attr pred]
 279 |   (->> (get ave attr) (filter (fn [[v e-set]] (pred v))) (mapcat second)))
 280 | 
 281 | ;;; GET IDS
 282 | 
 283 | ;; always used with OR `ref-type?` => `ave-form-single-e?` `ave-form-eset?`
 284 | (defn index? [schema attr]
 285 |   ((schema :db/index) attr))
 286 | 
 287 | (defn index-avl-map? [schema attr]
 288 |   ((schema :db.index/avl-map) attr))
 289 | 
 290 | (defn get-ids-false-pred
 291 |   ([{:keys [db/schema db/eav db/ave]} attr]
 292 |    (cond
 293 |      ;; linear (keys eav), linear ids-with-attr, linear difference
 294 |      ;; faster to always filter eav?
 295 |      (unique? schema attr)
 296 |      (let [ids-with-attr (ids-by-attr-unique ave attr)]
 297 |        (difference (set (keys eav)) ids-with-attr))
 298 |      (or (index? schema attr) (ref-type? schema attr))
 299 |      (let [ids-with-attr (ids-by-attr-non-unique ave attr)]
 300 |        (difference (set (keys eav)) ids-with-attr))
 301 |      :not-in-ave-index
 302 |      (do
 303 |        (println "Warning! get-ids-false-pred for attr not in AVE index: " attr)
 304 |        (->> (vals eav) (remove #(contains? % attr)) (map :db/id) (set)))))
 305 |   ([{:keys [db/schema db/eav db/ave] :as db} attr xids]
 306 |    ;; don't bother relying on ave index?
 307 |    (reduce (fn [ids xid]
 308 |              (if (contains? (eav xid) attr)
 309 |                (disj ids xid)
 310 |                ids))
 311 |            xids xids)))
 312 | 
 313 | ;; returns set
 314 | (defn get-ids-true-pred
 315 |   ([{:keys [db/schema db/eav db/ave] :as db} attr]
 316 |    (cond
 317 |      (unique? schema attr)
 318 |      (ids-by-attr-unique ave attr)
 319 |      (or (index? schema attr) (ref-type? schema attr))
 320 |      (ids-by-attr-non-unique ave attr)
 321 |      :not-in-ave-index
 322 |      (do
 323 |        (println "Warning! get-ids-true-pred for attr not in AVE index: " attr)
 324 |        (ids-by-attr-eav eav attr))))
 325 |   ([{:keys [db/schema db/eav db/ave] :as db} attr xids]
 326 |    ;; `intersection` is linear in size of smaller set
 327 |    ;; `get-ids-true-pred` is linear in size of ave-a vals concatted
 328 |    (if (< (count xids) (count (ave attr)))
 329 |      (reduce (fn [ids xid]
 330 |                (if (contains? (eav xid) attr)
 331 |                  ids
 332 |                  (disj ids xid)))
 333 |              xids xids)
 334 |      (intersection (get-ids-true-pred db attr) xids))))
 335 | 
 336 | ;; returns set
 337 | (defn get-ids-set-pred
 338 |   ([{:keys [db/schema db/eav db/ave] :as db} attr pred]
 339 |    (cond
 340 |      (unique? schema attr)
 341 |      (filter-ave-unique-set ave attr pred)
 342 |      (or (index? schema attr) (ref-type? schema attr))
 343 |      (filter-ave-non-unique-set ave attr pred)
 344 |      :not-in-ave-index
 345 |      (do
 346 |        (println "Warning! get-ids-set-pred for attr not in AVE index: " attr)
 347 |        (set (filter-eav-attr-pred eav schema attr pred)))))
 348 |   ([{:keys [db/schema db/eav db/ave] :as db} attr pred xids]
 349 |    ;; set lookups are linear in size of pred
 350 |    ;; but filter-eav does more work per step... or maybe similar -> test
 351 |    (if (< (count xids) (count pred))
 352 |      (filter-eav-attr-pred eav schema attr pred xids)
 353 |      (intersection (get-ids-set-pred db attr pred) xids))))
 354 | 
 355 | (defn entity-attr-pred2
 356 |   "Returns all vals of `attr` that match `pred` or nil."
 357 |   [schema attr pred entity]
 358 |   (when-let [v (get entity attr)]
 359 |     (if (cardinality-many? schema attr)
 360 |       (seq (filter pred v))
 361 |       (when (pred v) v))))
 362 | 
 363 | (defn get-maps-set-pred
 364 |   [{:keys [db/schema db/eav db/ave] :as db} attr pred]
 365 |   (cond
 366 |     (unique? schema attr)
 367 |     (reduce (fn [ret v]
 368 |               (if-let [e (get-in ave [attr v])]
 369 |                 (assoc-in ret [e attr] v)
 370 |                 ret))
 371 |             {} pred)
 372 |     (or (index? schema attr) (ref-type? schema attr))
 373 |     (reduce (fn [ret v]
 374 |               (if-let [e-set (get-in ave [attr v])]
 375 |                 (reduce (fn [ret id] (update-in ret [id attr] conj v)) ret e-set)
 376 |                 ret))
 377 |             {} pred)
 378 |     :not-in-ave-index
 379 |     (do
 380 |       (println "Warning! get-maps-set-pred for attr not in AVE index: " attr)
 381 |       (reduce (fn [result entity]
 382 |                 ;; here we filter the cardinality/many attr ourselves, so might as well save it for the end?
 383 |                 ;; so then we shouldn't use get-maps when no ave index... defeats the purpose?
 384 |                 ;; but then also shouldn't do it for cardinality-one... may lead to waste if discarded
 385 |                 (if-let [v (entity-attr-pred2 schema attr pred entity)]
 386 |                   (assoc-in result [(:db/id entity) attr] v)
 387 |                   result))
 388 |               {} (vals eav)))))
 389 | 
 390 | ;; returns seq (to preserve order; may have duplicates)
 391 | #_(defn get-ids-set-pred2
 392 |     [{:keys [db/schema db/eav db/ave] :as db} attr pred]
 393 |     (let [ave-a (get-ave-a-set-pred db attr pred)]
 394 |       (cond
 395 |         (unique? schema attr)
 396 |         (vals ave-a)
 397 |         (or (index? schema attr) (ref-type? schema attr))
 398 |         (apply concat (vals ave-a))
 399 |         :default
 400 |         (set (filter-eav-attr-pred eav schema attr pred))))
 401 |     )
 402 | 
 403 | (defn log32 [n]
 404 |   (/ (Math/log n) (Math/log 32)))
 405 | 
 406 | ;; TODO: returning sets means losing order...
 407 | ;; when sorted ave-a, can return seq and preserve order - maybe better to return ave-a subset
 408 | ;; - duplicate ids possible for cardinality/many, distinct keeps first id (lowest sorted)
 409 | ;; 1. pull [val id-set] from ave-a
 410 | ;; preserve sorting order when eventual result is desired sorted by same attr in the same order as ave-a (asc/desc)
 411 | ;; ... and no additional sorting (like no sorting by city after sorted by salary)
 412 | ;; if sorting on attr and sorted ave-a available, can use it as the last pred as a potential optimization
 413 | ;; do pred on ave-a, then rm-xids on ave-a => faster than sorting after pulling from eav? yes for larger subsets of ave-a
 414 | ;; returns set
 415 | (defn get-ids-range-pred
 416 |   ([{:keys [db/schema db/eav db/ave] :as db} attr pred]
 417 |    (if (index-avl-map? schema attr)
 418 |      (let [r (eval-range-pred ave attr pred)]
 419 |        (if (unique? schema attr)
 420 |          (set (vals r))
 421 |          (apply union (vals r))))
 422 |      (println "Warning! get-ids-range-pred for attr not in AVE index or not an AVL index: " attr)))
 423 |   ([{:keys [db/schema db/eav db/ave] :as db} attr pred xids]
 424 |    ;; range q is log32(size ave-a)
 425 |    ;; for few xids might be faster to scan eav index
 426 |    ;; (if (< (count xids) (log32 (count (ave attr))))))
 427 |    ;; also need to convert pred from vector to generic to filter-eav
 428 |    (intersection (get-ids-range-pred db attr pred) xids)))
 429 | 
 430 | ;; returns set
 431 | (defn get-ids-generic-pred
 432 |   ([{:keys [db/schema db/eav db/ave] :as db} attr pred]
 433 |    (cond
 434 |      (unique? schema attr)
 435 |      (set (filter-ave-unique ave attr pred))
 436 |      (or (index? schema attr) (ref-type? schema attr))
 437 |      ;; convert seq (with possible duplicate ids) to set
 438 |      (set (filter-ave-non-unique ave attr pred))
 439 |      :not-in-ave-index
 440 |      (do
 441 |        (println "Warning! get-ids-generic-pred for attr not in AVE index: " attr)
 442 |        (set (filter-eav-attr-pred eav schema attr pred)))))
 443 |   ([{:keys [db/schema db/eav db/ave] :as db} attr pred xids]
 444 |    (if (contains? ave attr)
 445 |      (if (< (count xids) (count (ave attr)))
 446 |        (set (filter-eav-attr-pred eav schema attr pred xids))
 447 |        (intersection xids (get-ids-generic-pred db attr pred)))
 448 |      (filter-eav-attr-pred eav schema attr pred xids))))
 449 | 
 450 | ;; with reverse attr support
 451 | ;; can modify to return all matching vals rather than just yes/no (slower, but returns more info)
 452 | (defn some-path
 453 |   "Returns logical true if some `path` satisfies `pred`, else returns nil.
 454 |    Assumes everything in `path` is ref attr or reverse ref attr except the final attr in path cannot be a reverse attr."
 455 |   [{:keys [db/schema db/eav db/ave] :as db} id [attr more-attrs :as path] pred]
 456 |   (let [entity (eav id)]
 457 |     (if more-attrs
 458 |       (if (reverse-reference? attr)
 459 |         (let [pointing-attr (reverse->attr-name attr)]
 460 |           (if (unique? schema pointing-attr)
 461 |             (when-let [pointing-id (get-in ave [pointing-attr id])]
 462 |               (some-path db pointing-id more-attrs pred))
 463 |             (when-let [pointing-ids (get-in ave [pointing-attr id])]
 464 |               (some #(some-path db % more-attrs pred) pointing-ids))))
 465 |         ;; forward attribute
 466 |         (if (cardinality-many? schema attr)
 467 |           (when-let [next-ids (entity attr)]
 468 |             (some #(some-path db % more-attrs pred) next-ids))
 469 |           (when-let [next-id (entity attr)]
 470 |             (some-path db next-id more-attrs pred))))
 471 |       (when-let [v (entity attr)]
 472 |         (if (cardinality-many? schema attr)
 473 |           (some pred v)
 474 |           (pred v))))))
 475 | 
 476 | (declare ve-map->id-set)
 477 | 
 478 | #_(let [pids (get-ids db :person/city #{"Moscow"})
 479 |         pids (get-ids db [:person/license :dl/year] #(>= % 2020))])
 480 | ;; ensure everything along path is a ref (except last attr)? No, just assume
 481 | ;; apply pred to ref attr? questionable, but is there a downside?
 482 | ;; non-generic preds don't make sense (except sets); ranges? nope -> would have to start with range query and link back
 483 | ;; and what about applying the pred to all cardinality/many links at once?
 484 | ;; -> maybe an extra kw arg to signal to some-path to apply pred to entire set?
 485 | ;; what about pred applied to paths? like "get the user with best-friend with largest salary"
 486 | ;; returns set
 487 | (defn get-ids-path-attr
 488 |   ([{:keys [db/schema db/eav db/ave] :as db} [attr more-attrs :as path] pred]
 489 |    (if-let [ave-attr (ave attr)]
 490 |      ;; could use `get-ids-true-pred`? -> it may fallback to scanning eav index
 491 |      (let [ids (ve-map->id-set schema attr ave-attr)]
 492 |        (get-ids-path-attr db path pred ids))
 493 |      (get-ids-path-attr db path pred (keys eav))))
 494 |   ([db [attr more-attrs :as path] pred xids]
 495 |    (reduce (fn [result id]
 496 |              (if (some-path db id path pred)
 497 |                (conj result id)
 498 |                result))
 499 |            #{} xids)))
 500 | 
 501 | ;; pred is applied to the set of cardinality/many values (not to individual values)
 502 | (defn filter-eav-many
 503 |   ([eav attr pred]
 504 |    (->> (vals eav)
 505 |         (filter (fn [entity] (pred (entity attr))))
 506 |         (map :db/id)
 507 |         (set)))
 508 |   ([eav attr pred xids]
 509 |    ;; xids don't need to be sets
 510 |    (filter-eav-many (select-keys eav xids) attr pred)))
 511 | 
 512 | (defn get-ids-many-pred
 513 |   "Applies pred to the entire set of values for :db.cardinality/many `attr`."
 514 |   ([{:keys [db/schema db/eav db/ave] :as db} attr pred]
 515 |    (if (cardinality-many? schema attr)
 516 |      (if (contains? ave attr)
 517 |        (filter-eav-many eav attr pred (ids-by-attr-non-unique ave attr))
 518 |        (filter-eav-many eav attr pred))
 519 |      (assert false (str "Attribute must be :db.cardinality/many: " attr))))
 520 |   ([{:keys [db/schema db/eav db/ave] :as db} attr pred xids]
 521 |    (if (cardinality-many? schema attr)
 522 |      (if (< (count xids) (count (ave attr)))
 523 |        (filter-eav-many eav attr pred xids)
 524 |        (if (contains? ave attr)
 525 |          (filter-eav-many eav attr pred (ids-by-attr-non-unique ave attr))
 526 |          (filter-eav-many eav attr pred xids)))
 527 |      (assert false (str "Attribute must be :db.cardinality/many: " attr)))))
 528 | 
 529 | ;;;;;; GET AVE_A
 530 | 
 531 | (defn get-ve-map-true-pred
 532 |   [{:keys [db/schema db/eav db/ave] :as db} attr]
 533 |   (ave attr))
 534 | 
 535 | (defn get-ve-map-set-pred
 536 |   [{:keys [db/schema db/eav db/ave] :as db} attr pred]
 537 |   (select-keys (ave attr) pred))
 538 | 
 539 | (defn get-ve-map-range-pred
 540 |   [{:keys [db/schema db/eav db/ave] :as db} attr pred]
 541 |   (eval-range-pred ave attr pred))
 542 | 
 543 | ;; may be less efficient than get-ids because of (into {})
 544 | (defn get-ve-map-generic-pred
 545 |   [{:keys [db/schema db/eav db/ave] :as db} attr pred]
 546 |   ;; what if (ave attr) is nil?
 547 |   (->> (ave attr)
 548 |        (filter (fn [[v e]] (pred v)))
 549 |        (into {})))
 550 | 
 551 | ;; alternative approach: test performance
 552 | (defn get-ve-map-generic-pred2
 553 |   [{:keys [db/schema db/eav db/ave] :as db} attr pred]
 554 |   (reduce-kv (fn [r v e]
 555 |                (if (pred v) r (dissoc r v)))
 556 |              (ave attr) (ave attr)))
 557 | 
 558 | ;; Aggregates like (max, count, etc) operate on ave-a, so returning just ids precludes aggregate operations
 559 | ;; for aggregations ave-a can transform e.g :person/salary
 560 | ;; {10000 #{2 3 9}, 20000 #{4 6 7}} => {{:val 10000 :count 3} #{2 3 9}, {:val 20000 :count 2} #{4 6}}
 561 | ;; or {10000 {:ids #{2 3 9} :count 3}, 20000 {:ids #{4 6} :count 2}} => or a separate map?
 562 | ;; aggregates on: ids (count), values individual (length), values aggregate (sum, average, max)
 563 | ;; also supports the case where you want to preserve the sorting ave-a index
 564 | ;; TODO: maybe easier to accomplish aggregation by going through EAV index?
 565 | (defn get-ve-map
 566 |   [{:keys [db/schema db/eav db/ave] :as db} attr pred]
 567 |   (cond
 568 |     ;; support false for lack of attribute
 569 |     (true? pred)
 570 |     (get-ve-map-true-pred db attr)
 571 |     (set? pred)
 572 |     ;; set pred can get v's from ave-a directly - faster
 573 |     (get-ve-map-set-pred db attr pred)
 574 |     ;; range pred: take advantage of avl index
 575 |     (vector? pred)
 576 |     (get-ve-map-range-pred db attr pred)
 577 |     :default ;; just a regular pred, have to go through all values in ave-a
 578 |     (get-ve-map-generic-pred db attr pred)))
 579 | ;; for optimizations like get-ids would need:
 580 | ;; 1. keep track of all ids -> need that for `intersect-ve-map`
 581 | ;; TODO: don't know what to keep until you got all the ids...
 582 | ;; faster to `rm-xids` from desired ave-a after you have all the ids via `get-ids`?
 583 | ;; `get-ids` will rely on eav index when that's faster, so doesn't always rely on ave index...
 584 | ;; lose the advantge with e.g. set preds of selecting just the v's that you want...
 585 | ;; Q: when do I want ve-map? for aggregates? ever? is eav index based aggregation sufficient?
 586 | ;; if you NEED ve-map, then could apply pred first, then do rm-x-ids (kind of duplicating the work for that pred)
 587 | 
 588 | (defn ve-map->id-set
 589 |   [schema attr ve-map]
 590 |   (if (unique? schema attr)
 591 |     (set (vals ve-map))
 592 |     (apply union (vals ve-map))))
 593 | 
 594 | (defn ve-map->id-seq
 595 |   [schema attr ve-map]
 596 |   (if (unique? schema attr)
 597 |     (vals ve-map)
 598 |     (apply concat (vals ve-map))))
 599 | 
 600 | ;; removes ids from ve-map that are not in xids
 601 | ;; removes v's whose id-set doesn't intersect with any xids
 602 | ;; returns ve-map
 603 | (defn rm-xids
 604 |   [schema attr xids ve-map]
 605 |   (reduce-kv (fn [result v e]
 606 |                (if (unique? schema attr)
 607 |                  (if (contains? xids e)
 608 |                    (assoc result v e)
 609 |                    result)
 610 |                  (if-let [v-ids (intersection e xids)]
 611 |                    (assoc result v v-ids)
 612 |                    result)))
 613 |              {} ve-map))
 614 | 
 615 | ;; returns ave-sub
 616 | ;; todo: same attr diff preds
 617 | ;; at each step, can get-ve-map based on ids or based on pred
 618 | ;; when is it faster based on ids?
 619 | (defn get-ve-map-many
 620 |   [{:keys [db/schema db/eav db/ave] :as db} & attr-pred-pairs]
 621 |   (let [ave-sub
 622 |         (reduce (fn [ave-sub [attr pred]]
 623 |                   (assoc ave-sub attr (get-ve-map db attr pred)))
 624 |                 {} (partition 2 attr-pred-pairs))
 625 |         id-sets (map (fn [[attr ve-map]] (ve-map->id-set schema attr ve-map)) ave-sub)
 626 |         ids (apply intersection id-sets)]
 627 |     (reduce-kv (fn [ave-sub attr ve-map]
 628 |                  (assoc ave-sub attr (rm-xids schema attr ids ve-map)))
 629 |                {} ave-sub)))
 630 | 
 631 | ;; can also select ids from eav, but can we speed it up here or give matching vals?
 632 | ;; matching vals help for aggregating count by val
 633 | ;; returns ave-sub with each attr ve-map containing only x-ids
 634 | (defn intersect-ave-sub
 635 |   [schema ave-sub]
 636 |   (let [id-sets (map (fn [[attr ve-map]] (ve-map->id-set schema attr ve-map)) ave-sub)
 637 |         x-ids (apply intersection id-sets)]
 638 |     (reduce-kv (fn [result attr ve-map]
 639 |                  (assoc result attr (rm-xids schema attr x-ids ve-map)))
 640 |                {} ave-sub)))
 641 | 
 642 | ;; returns a seq of 1 or more (for cardinality/many) [e v] tuples based on [v e] tuple
 643 | (defn invert-ave-a-entry
 644 |   [schema attr [v e]]
 645 |   (if (unique? schema attr)
 646 |     [[e v]]
 647 |     (reduce (fn [r [v single-e]]
 648 |               (conj r [single-e v]))
 649 |             [] e)))
 650 | 
 651 | ;; returns map of {id v-set}
 652 | (defn invert-ave-a-non-unique
 653 |   [ave-a]
 654 |   (reduce (fn [a [k v]]
 655 |             (assoc a k (conj (get a k #{}) v)))
 656 |           {} (for [[k s] ave-a v s] [v k])))
 657 | 
 658 | ;; returns [e a v] tuples in same order as ave-a
 659 | (defn ave-a->eav-tuples
 660 |   [schema attr ave-a]
 661 |   (if (unique? schema attr)
 662 |     (reduce (fn [r [v e]]
 663 |               conj r [e attr v])
 664 |             [] ave-a)
 665 |     (reduce (fn [r [v e-set]]
 666 |               (reduce (fn [r e]
 667 |                         (conj r [e attr v]))
 668 |                       r e-set))
 669 |             [] ave-a)))
 670 | 
 671 | (defn get-eav-tuples
 672 |   [{:keys [db/schema db/eav db/ave] :as db} attr pred]
 673 |   (let [ave-a (get-ve-map db attr pred)]
 674 |     (ave-a->eav-tuples schema attr ave-a)))
 675 | 
 676 | ;; TODO: this is an alternative implementation of get-ids that relies on `get-ave-a` first
 677 | ;; is it as efficient to get ave-a first, and then convert to id sets?
 678 | ;; might be slower for set preds because constructing intermediate map with select-keys
 679 | ;; NOTE: predicates support tuple bindings and collection bindings, but relations bindings need a bit extra work
 680 | ;; https://docs.datomic.com/cloud/query/query-data-reference.html#relation-binding
 681 | ;; todo: assumes ave exists - what about eav fallback? (if relying on this as first step for get-ids)
 682 | (defn get-ids2
 683 |   [{:keys [db/schema db/eav db/ave] :as db} attr pred]
 684 |   (let [ave-a (get-ve-map db attr pred)]
 685 |     (ve-map->id-set schema attr ave-a)))
 686 | 
 687 | ;; OTHER
 688 | ;; fns to implement: get-else, get-some
 689 | ;; missing? = false pred in get-ids for lack of attr
 690 | ;; not clause implemented with arbitrary pred, but maybe a better way?
 691 | 
 692 | ;; TODO: another option is to build eav-sub as we go along
 693 | ;; can select ids from eav at each step, but replacing attr with just the matching pred from ve-map
 694 | ;; then for next pred, we would dissoc the difference between pred1 ids and pred2 ids from eav-sub
 695 | ;; this is potentially a lot of wasted processing, since subsequent preds may shrink ids-set by a lot
 696 | ;; todo: should `pull` support preds for cardinalit/many attrs? -> better as separate step
 697 | 
 698 | ;; TUPLES allow to get pred #{Moscow}, :person/name :person/building-number (ref) => pull
 699 | 
 700 | ;; this is supposed to support selecting from eav based on a ave-sub
 701 | ;; which had been built up as preds matched in ve-maps, and "thinned" with subsequent preds -> could be a LOT of "thinning"
 702 | ;; and the thinning process is costly -> more efficient if invert to ev-map? slightly; plus inverting itself costly
 703 | ;; returns eav map for all ids in ve-map, excludes `next-attrs` from entities
 704 | (defn select-entities1
 705 |   [result schema attr ve-map eav next-attrs]
 706 |   (if (unique? schema attr)
 707 |     (reduce-kv (fn [result v id]
 708 |                  (assoc result id (eav id)))
 709 |                {} ve-map)
 710 |     (if (cardinality-many? schema attr)
 711 |       (reduce-kv (fn [result v e]
 712 |                    (reduce (fn [result id]
 713 |                              (if (contains? result id)
 714 |                                (update-in result [id attr] conj v)
 715 |                                (let [entity (-> (apply dissoc (eav id) next-attrs)
 716 |                                                 (assoc attr #{v}))]
 717 |                                  (assoc result id entity))))
 718 |                            result e))
 719 |                  {} ve-map)
 720 |       ;; cardinality/one
 721 |       (reduce-kv (fn [result v e]
 722 |                    (reduce (fn [result id]
 723 |                              (assoc result id (eav id)))
 724 |                            result e))
 725 |                  result ve-map))))
 726 | 
 727 | ;; todo: if there are two ve-maps for same attr... ave-sub can only have one key for each attr...
 728 | ;; returns entities with pred-matching cardinality/many values only
 729 | (defn select-entities
 730 |   [schema ave-sub eav]
 731 |   (loop [result {} ave-sub ave-sub]
 732 |     (if-let [[attr ve-map] (first ave-sub)]
 733 |       (let [next-ave-sub (next ave-sub)
 734 |             next-attrs (map first next-ave-sub)
 735 |             result (select-entities1 result schema attr ve-map eav next-attrs)]
 736 |         (recur result next-ave-sub))
 737 |       result)))
 738 | 
 739 | ;; todo: returns id if at least one cardinality/many value matches pred, but not the specific value(s)
 740 | ;; how to only include desired values? return partial entities from eav (filter cardinality/many)?
 741 | ;; todo: key questions is what do you want returned in the end?? think about the final step of displaying on the screen...
 742 | ;; do you want them as tuples or as entity-maps with filtered cardinality/many values?
 743 | ;; how about if I want ppl who have ONLY lived in #{Moscow Berlin} and nowhere else? NOT clause?
 744 | ;; could re-fitler, but is there a way to do it "along the way" -> get-ids would have to return some sort of tuple
 745 | ;; if returning tuples, then is it still possible to compose intersections of ids? maybe if we return distinct ids alongside
 746 | ;; -> would have to filter-eav... or filter-ave and then filter-eav... or return ave-a
 747 | ;; do we want preds that operate on entire set of values for attr? like "two or more past-cities in germany"
 748 | ;; maybe return ave-a submap? how to intersect/union on submaps? => they are on different attrs!
 749 | ;; TODO: how to express "not equal"? [!= 4], [!= #{3 4 5}], [not= 8], (
 750 | ;; how to express multiple preds for one attr...
 751 | (defn get-ids
 752 |   ([db attr pred]
 753 |    (if (vector? attr)
 754 |      (get-ids-path-attr db attr pred)
 755 |      (cond
 756 |        (false? pred)
 757 |        (get-ids-false-pred db attr)
 758 |        (true? pred)
 759 |        (get-ids-true-pred db attr)
 760 |        (set? pred)
 761 |        (get-ids-set-pred db attr pred)
 762 |        (vector? pred)
 763 |        (get-ids-range-pred db attr pred)
 764 |        (fn? pred)
 765 |        (get-ids-generic-pred db attr pred)
 766 |        :default
 767 |        (assert false (str "Invalid predicate: " pred)))))
 768 |   ([db attr pred xids]
 769 |    ;; here must have sets, while above a seq might do to preserve sort order
 770 |    (if (vector? attr)
 771 |      (get-ids-path-attr db attr pred xids)
 772 |      (cond
 773 |        (false? pred)
 774 |        (get-ids-false-pred db attr xids)
 775 |        (true? pred)
 776 |        (get-ids-true-pred db attr xids)
 777 |        (set? pred)
 778 |        (get-ids-set-pred db attr pred xids)
 779 |        (vector? pred)
 780 |        (intersection (get-ids-range-pred db attr pred) xids)
 781 |        (fn? pred)
 782 |        (get-ids-generic-pred db attr pred xids)
 783 |        :default
 784 |        (assert false (str "Invalid predicate: " pred))))))
 785 | 
 786 | ;; TODO: build up datoms/tuples instead of ids? ultimately would return... tuples or nested entities
 787 | ;; in order to avoid trimming large cardinality/many attrs, especially repeatedly
 788 | ;; construct id to datoms map as we go along... -> would be better if we stored datoms in ave index
 789 | ;; build up entities via list-form indexing like for eav index? works for path preds too
 790 | ;; build up nested-entities as you go along with cardinality/many attrs (and cardinality/one attrs?)
 791 | ;; tradeoff: the work of building up nested-entities is wasted if many ids subsequently discarded
 792 | ;; TODO: which is the greater waste: re-filtering cardinality-many attr or building up nested-entities and later discarding them?
 793 | ;; also constraining-paths aren't necessarily returning-paths, so building up constraining-paths may be wasted
 794 | ;; if building up nested-entities along the way: only do it for returning-path cardinality-many to minimize potential waste
 795 | ;; -> solution: specify returning-paths upfront
 796 | ;; -> end up with partial entity the satisfies query, but then need to "enrich" with additional attrs/refs
 797 | ;; can figure out the "enrich-pattern" by subtracting from full pattern what is available
 798 | ;; what about "if person ever made more than 100K, return all his salaries"
 799 | ;; - constraint applies to set of vals, but returning all vals for cardinality-many attr - HOW TO in datomic?
 800 | ;; ENRICHING: pull returning-paths that haven't been built up yet, merge with existing nested-entity built up results
 801 | 
 802 | ;; edge case: constraint-path: ref-type attr 'return only keyword ids';
 803 | ;; return-path: follow those keyword ids and get more
 804 | 
 805 | ;; returning-paths spec
 806 | ;; get-entities returns a partially built up entity
 807 | ;; todo: maybe only build up nested entity when cardinality-many attr and in return-path
 808 | #_(get-ids-spec db
 809 |                 ;; constraint paths
 810 |                 {:person/city #{"Berlin" "moscow"}
 811 |                  [:person/dl :dl/year] [> 2009]}
 812 |                 ;; if return-paths not specified, don't know when to keep and when to discard vals via get-ids/get-entities
 813 |                 ;; e.g. if :person/city not in return-paths, then only get-ids, not get-entities
 814 |                 ;; return paths: how to specify? `path->join-pattern` is available
 815 |                 [:person/city :person/name [:person/dl :dl/year] [:person/dl :dl/city-issued]]
 816 |                 ;[:person/city {:person/dl [:dl/year :dl/city-issued]} :person/name]
 817 |                 )
 818 | 
 819 | 
 820 | ;; xsec fns for maintining a built up result map and interoping with id sets
 821 | (defn xect-maps
 822 |   [m1 m2]
 823 |   (if (< (count m2) (count m1))
 824 |     (recur m2 m1)
 825 |     (reduce-kv (fn [result id m]
 826 |                  (if (contains? m2 id)
 827 |                    (update result id merge (m2 id))
 828 |                    (dissoc result id)))
 829 |                m1 m2)))
 830 | 
 831 | (defn xsect-map-set
 832 |   [m ids]
 833 |   (if (map? ids)
 834 |     (recur ids m)
 835 |     (reduce-kv (fn [result id _]
 836 |                  (if (contains? ids id)
 837 |                    result
 838 |                    (dissoc result id)))
 839 |                m m)))
 840 | 
 841 | ;; s1 and s2 can both be sets of ids or maps built up eav entities
 842 | (defn intersection2
 843 |   [s1 s2]
 844 |   (cond
 845 |     (and (set? s1) (set? s2))
 846 |     (intersection s1 s2)
 847 |     (and (map? s1) (map? s2))
 848 |     (xect-maps s1 s2)
 849 |     :else
 850 |     (xsect-map-set s1 s2)))
 851 | 
 852 | (defn merge-netsted
 853 |   [schema m1 m2]
 854 |   (reduce-kv (fn [result attr v]
 855 |                (if (ref-type? schema attr)
 856 | 
 857 |                  ;; not ref type
 858 |                  (assoc result attr v)))
 859 |              m1 m2))
 860 | 
 861 | (defn get-ids-multi
 862 |   "Returns ids that satisfy all of `attr-pred-pairs`. Supports any attr pred pair that `get-ids` can handle."
 863 |   [db & attr-pred-pairs]
 864 |   (reduce (fn [ids [attr pred]]
 865 |             (get-ids db attr pred ids))
 866 |           #{} (partition 2 attr-pred-pairs)))
 867 | 
 868 | ;; [:person/license :dl/year] => {:person/license [:dl/year]}
 869 | ;; [:person/license :issuing-dmv :dmv-city] => {:person/license {:issuing-dmv [:dmv-city]}}
 870 | (defn path->join-pattern
 871 |   [path]
 872 |   (let [[final-attr reverse-path] (reverse path)]
 873 |     (reduce (fn [r reverse-path]
 874 |               (assoc {} (first reverse-path) r))
 875 |             [final-attr] reverse-path)))
 876 | 
 877 | ;; [[>= 30000] [< 30004]] => (fn [x] (and (>= x 30000) (< x 30004)))
 878 | (defn range-pred->fn-pred
 879 |   [range-pred]
 880 |   (if (vector? (first range-pred))
 881 |     (let [[[op1 val1] [op2 val2]] range-pred]
 882 |       (fn [x] (and (op1 x val1) (op2 x val2))))
 883 |     (let [[op val] range-pred]
 884 |       (fn [x] (op x val)))))
 885 | 
 886 | ;; can handle multiple preds for one attr? -> pattern w/ dups, no prob for trim?
 887 | ;; specify whether to do the trimming step? probably not
 888 | ;; convenience: reuse of attr-pred-pairs for get-ids and trim; and pull?
 889 | (defn get-pull-trim
 890 |   "For every id that satisfies all of `path-pred-pairs`.
 891 |    Pulls all attrs/paths, joining on vector paths.
 892 |    Trims cardinality/many vals of each pulled (nested) entity to match preds from `path-pred-pairs`."
 893 |   [{:keys [db/schema] :as db} & path-pred-pairs]
 894 |   (let [ids (apply get-ids-multi db path-pred-pairs)
 895 |         pattern (->> path-pred-pairs
 896 |                      (map (fn [path pred]
 897 |                             (if (vector? path)
 898 |                               (path->join-pattern path)
 899 |                               path)))
 900 |                      distinct)
 901 |         pulled-entities (pull-many db pattern ids)
 902 |         ;; keep only cardinality/many attrs for trimming
 903 |         path-pred-pairs-trim
 904 |         (filter (fn [[path pred]]
 905 |                   (if (vector? path)
 906 |                     (cardinality-many? schema (last path))
 907 |                     (cardinality-many? schema path)))
 908 |                 path-pred-pairs)
 909 |         ;; convert range preds to fn; remove true and false preds
 910 |         path-pred-pairs-trim
 911 |         (reduce (fn [result [path pred :as pair]]
 912 |                   (cond
 913 |                     (vector? pred)
 914 |                     (conj result [path (range-pred->fn-pred pred)])
 915 |                     (or (true? pred) (false? pred))
 916 |                     result
 917 |                     :default
 918 |                     (conj result pair)))
 919 |                 [] path-pred-pairs-trim)]
 920 |     (map (fn [entity]
 921 |            (trim schema path-pred-pairs-trim entity))
 922 |          pulled-entities)))
 923 | 
 924 | ;; instead of filtering by pred, it fetches vals
 925 | ;; this is a more streamlined/limited pull-many
 926 | (defn get-entities-eav
 927 |   ([{:keys [db/schema db/eav db/ave] :as db} id-set]
 928 |    (map eav id-set))
 929 |   ([{:keys [db/schema db/eav db/ave] :as db} ks id-set]
 930 |    (->> (map eav id-set)
 931 |         (map #(select-keys % ks)))))
 932 | 
 933 | ;; =========
 934 | ;; Reverse Txs
 935 | 
 936 | ;; interim transactions?
 937 | ;;  can we specify when updates depend on previous successes (update chains) -> compare and swap or similar?
 938 | ;;  add last1 to man1, add last2 to man1, last1 add fail -> reverse last1 add, last2 add still valid => independent
 939 | ;;  add 20 to acct, add 30 to acct, add 20 fail -> reverse add 20, add 30 still valid => independent
 940 | ;;  add last1 to man1, add man1/last1 to shoe1, add last1 fail -> shoe1 invalid -> reverse add man1/last1 shoe1
 941 | ;;     specify tx-id dependency? keep coll of tx deps? = tx level granularity; [e a] level granularity possible?
 942 | ;;  types of deps: 1) ref attrs pointing to failed entities
 943 | ;;   (note difference between existence/non-existence of entity and change in entity data)
 944 | ;;   2) [e a] depends on past [e a]?
 945 | ;; only one optimistic update at a time?
 946 | ;; can't make any changes (including local) until tx completes
 947 | ;; everything else gets requed until previous tx succeeds or fails...
 948 | ;; if it fails, cancel?
 949 | ;; maybe more granular - like [e a] level add? FAILURE are detect at tx level, so maybe better to keep it at tx level
 950 | ;; CREATE: tx dependency graph, if pre-req tx fails reverse dependent tx, if pre-req tx succeeds remove dependency
 951 | ;; responses can arrive out of order! can txs arrive on backend out of order?
 952 | ;; need to indicate that tx in flight by :remote/id :pending, but also :remote/tx :pending for existing entities?
 953 | ;; in the mean time local changes are allowed
 954 | ;; (meaning stuff that doesn't touch entities w/ remote/id attrs?, doesn't require mutation)
 955 | ;; three categories: UI changes (selected items etc), local data changes <--> remote data changes
 956 | 
 957 | ;; just keep db-before reference, only makes sense "more than one transaction ahead"
 958 | ;; rely only on db-before ref allows for one timeline, can't have "indepedent" tx succeed
 959 | ;; example: add man1, add man2; if man1 fails go back to db-before add man1,
 960 | ;;   which means add man2 also fails (but it actually succeeded on backend! so front and back out of sync)
 961 | ;; TODO: order of add/retract  matters
 962 | ;; [nil -> sub,add = val; add,sub = nil] ok, [nil -> add,sub = nil; sub,add = val] fail
 963 | 
 964 | ;; seems we have to generate reverse tx on the basis of db-before because cardinality/one attrs are "overwritten"
 965 | (defn reverse-tx-list
 966 |   [{:keys [db/eav db/schema] :as db-before} [op tx-e tx-a tx-v]]
 967 |   (case op
 968 |     :db/add
 969 |     (if (cardinality-many? schema tx-a)
 970 |       (if-some [db-before-v (get-in eav [tx-e tx-a])]
 971 |         [:db/retract tx-e tx-a tx-v]
 972 |         ;; can optimize by dissoc attr directly? (no old-v means no attr existed originally)
 973 |         [:db/retract tx-e tx-a tx-v])
 974 |       (if-some [db-before-v (get-in eav [tx-e tx-a])]
 975 |         [:db/add tx-e tx-a db-before-v]
 976 |         [:db/retract tx-e tx-a tx-v]))
 977 |     :db/retract
 978 |     (when-some [db-before-v (get-in eav [tx-e tx-a])]
 979 |       (if (cardinality-many? schema tx-a)
 980 |         (if (nil? tx-v)
 981 |           ;; no `v` was specified, so all values of `a` were retracted, add them back in - optimize?
 982 |           (map #(vector :db/add tx-e tx-a %) db-before-v)
 983 |           (when (contains? db-before-v tx-v)
 984 |             [:db/add tx-e tx-a tx-v]))
 985 |         (if (nil? tx-v)
 986 |           ;; nil `v` means `a` was retracted, add it back in
 987 |           [:db/add tx-e tx-a db-before-v]
 988 |           ;; only add tx-v back in only if db-before-v=tx-v meaning it was actually retracted
 989 |           ;; -> should this be captured in tx-data?
 990 |           ;; if nothing had changed tx-data would reflect that by not containing a retract datom
 991 |           (when (= db-before-v tx-v)
 992 |             [:db/add tx-e tx-a tx-v]))))))
 993 | 
 994 | (defn reverse-tx-map
 995 |   [{:keys [db/schema] :as db-before} {:keys [db/id db/op] :as tx-form}]
 996 |   (reduce-kv
 997 |     (fn [tx-data a v]
 998 |       (if (cardinality-many? schema a)
 999 |         (->> v
1000 |              (map #(reverse-tx-list db-before [(or op :db/add) id a %]))
1001 |              (reduce conj tx-data))
1002 |         (conj tx-data (reverse-tx-list db-before [(or op :db/add) id a v]))))
1003 |     [] (dissoc tx-form :db/id)))
1004 | 
1005 | (defn reverse-tx-form
1006 |   [db-before tx-form]
1007 |   (if (map? tx-form)
1008 |     (reverse-tx-map db-before tx-form)
1009 |     (reverse-tx-list db-before tx-form)))
1010 | 
1011 | (defn reverse-tx-data
1012 |   "Generates a ''reverse transacation''"
1013 |   [db-before tx-data]
1014 |   (let [tx-data
1015 |         (reduce
1016 |           (fn [tx-data tx-form]
1017 |             (if (map? tx-form)
1018 |               (reduce conj tx-data (reverse-tx-map db-before tx-form))
1019 |               (conj tx-data (reverse-tx-list db-before tx-form))))
1020 |           [] tx-data)
1021 |         tx-data (remove nil? tx-data)]
1022 |     (prn :reverse-tx-data tx-data)
1023 |     tx-data))


--------------------------------------------------------------------------------
/drafts/entity_graph/scratch.cljc:
--------------------------------------------------------------------------------
  1 | (ns entity-graph.scratch)
  2 | 
  3 | ;; =========
  4 | ;; Indexing Helpers
  5 | 
  6 | ;; EAV index - currently unused
  7 | 
  8 | (defn index-eav-one
  9 |   "Adds [e a v] to eav index. `a` must be a `:db.cardinatliy/one` attribute."
 10 |   [eav e a v]
 11 |   (let [eav-e (get eav e {:db/id e})
 12 |         eav-e (assoc eav-e a v)]
 13 |        (assoc! eav e eav-e)))
 14 | 
 15 | ;; existence of (eav e) must be (is) checked up the stack, else end up (assoc! eav e eav-e[=nil])
 16 | (defn unindex-eav-one
 17 |   "Removes [e a] from eav index. `a` must be a `:db.cardinatliy/one` attribute."
 18 |   [eav e a]
 19 |   (let [eav-e (dissoc (eav e) a)]
 20 |        (assoc! eav e eav-e)))
 21 | 
 22 | (defn index-eav-many
 23 |   "Adds [e a v] to eav index. `a` must be a :db.cardinatliy/many attribute."
 24 |   [eav e a v]
 25 |   (let [eav-e (get eav e {:db/id e})
 26 |         v-set (conj (get eav-e a #{}) v)
 27 |         eav-e (assoc eav-e a v-set)]
 28 |        (assoc! eav e eav-e)))
 29 | 
 30 | ;; existence of (eav e) must be (is) checked up the stack, else end up (assoc! eav e eav-e[=nil])
 31 | (defn unindex-eav-many
 32 |   "Removes [e a v] from eav index. `a` must be a `:db.cardinaltiy/many` attribute.
 33 |    If an empty set of values remains after unindexing, removes the attribute."
 34 |   [eav e a v]
 35 |   (let [v-set (disj (get-in eav [e a]) v)
 36 |         eav-e (if (empty? v-set)
 37 |                 (dissoc (eav e) a)
 38 |                 (assoc (eav e) a v-set))]
 39 |        (assoc! eav e eav-e)))
 40 | 
 41 | ;; =========
 42 | ;; Alternative random-tempid
 43 | 
 44 | ;; Probability of NO collisions  with 1mm int assigned 1000 times
 45 | ;; (Math/pow (/ 999999 1000000) (reduce + (range 1000)))
 46 | ;; Probability of no collisions with 4 1000000 nums:
 47 | #_(Math/pow (/ (- (* 1000000 1000000 1000000 1000000) 1) (* 1000000 1000000 1000000 1000000))
 48 |             (reduce + (range 1000)))
 49 | ;; could just use negative integer counter? yes if disallow negative integer tempids
 50 | ;; datomic cloud reference says only string is accepted
 51 | ;; datomic-dev-local accepts negative integers, but doesn't report them in :tempids key after transaction
 52 | (defn random-tempid
 53 |       []
 54 |       (str "db.temp-" (clojure.string/join "-" (take 4 (repeatedly #(str (rand-int 1000000)))))))
 55 | 
 56 | ;; =========
 57 | ;; EAV Map Form Ops
 58 | 
 59 | ;; The following functions support adding/retracting to indexes with map form tx-forms
 60 | ;; The indexes are expected to be passed in as transients and the functions return transients
 61 | 
 62 | (defn merge-by-key
 63 |       "Like `merge-with`, but `f` takes the key `k` as first arg
 64 |        (presumably to allow `f` to merge differently based on `k`)."
 65 |       [f & maps]
 66 |       (when (some identity maps)
 67 |             (let [merge-entry (fn [m e]
 68 |                                   (let [k (key e) v (val e)]
 69 |                                        (if (contains? m k)
 70 |                                          (assoc m k (f k (get m k) v))
 71 |                                          (assoc m k v))))
 72 |                   merge2 (fn [m1 m2]
 73 |                              (reduce merge-entry (or m1 {}) (seq m2)))]
 74 |                  (reduce merge2 maps))))
 75 | 
 76 | (defn merge-entity-vals
 77 |       "Merges entity values based on cardinality of `attr`.
 78 |       For `cardinality/many` `attr` treats `v1` and `v2` as sets."
 79 |       [schema attr v1 v2]
 80 |       ;; works for :db/id `attr` since it's not :db.cardinality/many
 81 |       (if (cardinality-many? schema attr)
 82 |         (union v1 v2)
 83 |         v2))
 84 | 
 85 | (defn add-map-eav
 86 |       "Adds `tx-form` to eav index. Treats values as sets for `:db.cardinality/many` attributes."
 87 |       [schema eav {:keys [db/id] :as tx-form} ex-entity]
 88 |       ;; :db/id in tx-form is fine; :db/op is dissoced
 89 |       (if ex-entity
 90 |         (assoc! eav id (merge-by-key (partial merge-entity-vals schema) ex-entity tx-form))
 91 |         (assoc! eav id tx-form)))
 92 | 
 93 | (defn entity-diff
 94 |       "Returns the \"difference\" between `ex-entity` and `tx-form`."
 95 |       [schema ex-entity tx-form]
 96 |       (reduce-kv
 97 |         (fn [ex-entity a v]
 98 |             (if (nil? v)
 99 |               (dissoc ex-entity a)
100 |               (if (cardinality-many? schema a)
101 |                 (let [new-v (difference (ex-entity a) v)]
102 |                      (if (empty? new-v)
103 |                        (dissoc ex-entity a)
104 |                        (assoc ex-entity a new-v)))
105 |                 (if (= (ex-entity a) v)
106 |                   (dissoc ex-entity a)
107 |                   ex-entity))))
108 |         ex-entity (dissoc tx-form :db/id :db/op)))
109 | 
110 | ;; existence of ex-entity => checked up the stack
111 | ;; existence of attribute value in ex-entity -> entity-diff
112 | ;; nil value of attribute in tx-form -> entity-diff
113 | (defn retract-map-eav
114 |       "Retracts `tx-form` from eav index given existing entity with :db/id `ex-entity`.
115 |        Treats values as sets for `:db.cardinality/many` attributes.
116 |        When nil value specified for an attribute in `tx-form`, entire attribute is removed regardless of cardinality.
117 |        Potentially leaves \"empty entry\" in eav index: {id {:db/id id}}"
118 |       [schema eav {:keys [db/id] :as tx-form} ex-entity]
119 |       (let [new-entity (entity-diff schema ex-entity tx-form)]
120 |            (assoc! eav id new-entity)))
121 | 
122 | ;; =========
123 | ;; Transaction Functions
124 | 
125 | ;; note: handle-tx-fns shouldn't return any more :db/fn-call ops
126 | (defn handle-tx-fns
127 |       [db tx-data]
128 |       ;; `f` is function that takes db as first arg and any number of additional arguments...
129 |       ;; `f` should return a seq of tx-forms.
130 |       (reduce (fn [new-tx-data [op f & args :as tx-form]]
131 |                   (if (= op :db.fn/call)
132 |                     (let [fn-tx-data (remove nil? (apply f db args))]
133 |                          (reduce conj new-tx-data fn-tx-data))
134 |                     (conj new-tx-data tx-form)))
135 |               [] tx-data))
136 | 
137 | ;; =========
138 | ;; Checking that entity id exists in db
139 | 
140 | (defn check-id-existence-list
141 |     "Checks list form `tx-forms` to ensure :db/id exists in the database. Returns `tx-forms` unchanged.
142 |      Throws when non-existent :db/id found."
143 |     [tx-forms eav]
144 |     (doseq [[_ id _ _] tx-forms]
145 |       (when (int? id) ;; don't check keyword ids
146 |         (assert (contains? eav id) (str ":db.error/invalid-entity-id Invalid entity id: " id))))
147 |     tx-forms)
148 | 
149 | (defn check-id-existence-map
150 |     "Checks map form `tx-forms` to ensure :db/id exists in the database. Returns `tx-forms` unchanged.
151 |      Throws when non-existent :db/id found."
152 |     [tx-forms eav]
153 |     (doseq [{:keys [db/id]} tx-forms]
154 |       (when (int? id) ;; don't check keyword ids
155 |         (assert (contains? eav id) (str ":db.error/invalid-entity-id Invalid entity id: " id))))
156 |     tx-forms)
157 | 
158 | ;; =========
159 | ;; Leverage `retraction-set` for constraint checks
160 | 
161 | ;; this version does not rely on indexes reflecting all retractions in tx, leverages retraction-set instead
162 | (when (component? schema a)
163 |   (doseq [attr (:db/isComponent schema)]
164 |     (when-let [held-by-id (get-in ave' [attr v])]
165 |       ;; DISALLOW entity to hold same component under different attrs
166 |       (assert (or (and (= e held-by-id) (= a attr))
167 |                   (contains? retraction-set [held-by-id attr v]) (contains? entity-retractions held-by-id))
168 |               (str ":db.error/component-conflict Component conflict: "
169 |                    "Entity with id: " v " already component of: " held-by-id " under attribute " attr
170 |                    ", asserted for: " e " under attribute " a)))))
171 | ;; TODO: this version does not rely on indexes reflecting all retractions in tx, leverages retraction-set instead
172 | (when (unique? schema a)
173 |   (when-let [held-by-id (get-in ave' [a v])]
174 |     (assert (or (= e held-by-id)
175 |                 (contains? retraction-set [held-by-id a v]) (contains? entity-retractions held-by-id))
176 |             (str ":db.error/unique-conflict Unique conflict: " a ", value: " v " already held by: " held-by-id
177 |                  " asserted for: " e))))
178 | 
179 | ;; REMOVED FROM: `check-db-constraints-many`, since dandling refs are ok and this is not the only way to end up with dangling refs
180 | (when (ref-type? schema a)
181 |   (assert (not (contains? entity-retractions v))
182 |           (str ":db.error/retracted-entity-conflict Can't point to a retracted entity.
183 |                   Attempting to assert " [e a v])))
184 | 
185 | ;; REMOVED FROM: `check-db-constraints-one`, since dandling refs are ok and this is not the only way to end up with dangling refs
186 | (when (ref-type? schema a)
187 |   (assert (not (contains? entity-retractions v))
188 |           (str ":db.error/retracted-entity-conflict Can't point to a retracted entity.
189 |                   Attempting to assert " [e a v])))
190 | 
191 | ;; =========
192 | ;; Checking for dangling refs after transaction completed
193 | 
194 | (defn check-for-dangling-refs1
195 |   [schema eav e a v]
196 |   (when (ref-type? schema a)
197 |     (assert (contains? eav v)
198 |             (str ":db.error/dangling-ref A reference attribute points to a non-existent entity: " [e a v]))))
199 | 
200 | (defn check-for-dangling-refs-list
201 |   [schema eav list-assertion-forms]
202 |   (doseq [[_ e a v] list-assertion-forms]
203 |     (check-for-dangling-refs1 schema eav e a v)))
204 | 
205 | (defn check-for-dangling-refs-map
206 |   [schema eav map-assertion-forms]
207 |   (doseq [{:keys [db/id] :as map-form} map-assertion-forms
208 |           [a v] (dissoc map-form :db/id)]
209 |     (if (cardinality-many? schema a)
210 |       (doseq [single-v v]
211 |         (check-for-dangling-refs1 schema eav id a single-v))
212 |       (check-for-dangling-refs1 schema eav id a v))))
213 | 
214 | ;; this misses the cases where `retract` has left an entity with no attributes, and it was therefore removed from EAV index
215 | (defn check-for-dangling-refs
216 |   "Throws if any reference attributes points to non-existent entities. Must wait until tx completes to do this check."
217 |   [schema eav tx-data]
218 |   (check-for-dangling-refs-list schema eav (concat (get-in tx-data [:list-add :entity-id])
219 |                                                    (get-in tx-data [:list-add :tempid])))
220 |   (check-for-dangling-refs-map schema eav (concat (get-in tx-data [:map-add :entity-id])
221 |                                                   (get-in tx-data [:map-add :tempid])
222 |                                                   (get-in tx-data [:map-add :no-id]))))
223 | 
224 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
225 | ;; TODO: include these?
226 | ;; TODO: these should take tx-data from transact:
227 | ;; - check only entities
228 | 
229 | (defn find-dangling-refs
230 |   "Returns a seq of all [e a v] tuples in db where attribute `a` is a reference attribute pointing to an
231 |    entity that does not exist in db."
232 |   [{:keys [schema db/eav db/ave]}]
233 |   (flatten
234 |     (for [ref-attr (-> schema :db/isRef)
235 |           :let [[target-id pointing-id] (get ave ref-attr)]
236 |           :when (and pointing-id (not (contains? eav target-id)))]
237 |       [pointing-id ref-attr target-id])))
238 | 
239 | (defn check-for-dangling-refs [db]
240 |   (let [dangling-refs (find-dangling-refs db)]
241 |     (assert (empty? dangling-refs)
242 |             (str ":db.error/dangling-refs Database contains dangling refs: " dangling-refs))))
243 | 
244 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
245 | ;; Support for string version of wildcard (["*"]) in pull to return string keys in pull results
246 | ;; what about non-wildcard patterns? Also want string attributes?
247 | 
248 | (defn ->string-attrs
249 |   [entity]
250 |   (into {} (map (fn [[k v]] (if (string? k) [(keyword k) v] [k v])) entity)))
251 | 
252 | ;; NOTE: would need to ensure (non-entity) map values are not converted
253 | (defn stringify-keys-namespaced
254 |   "Recursively transforms all map keys from keywords to strings."
255 |   [schema pull-tree]
256 |   (let [f (fn [[k v]] (if (keyword? k) [(str k) v] [k v]))]
257 |     ;; only apply to maps
258 |     (clojure.walk/postwalk (fn [[k v :as x]] (if (map? x) (->string-attrs x) x)) pull-tree)))
259 | 
260 | ;; =========
261 | ;; Integrate `find-reverse-refs` into pull?
262 | 
263 | ;; In datomic if you remove all attributes from an entity, pull wildcard (or [:db/id] pattern)
264 | ;; returns {:db/id 101155069755575, :shoe-owned/_shoe ...}
265 | ;; EntityDB has the function `find-reverse-refs`, which could be integrated in pull
266 | ;; option 1: with '_* pull result can include :db/reverse-refs key which returns the result of this fn - must also apply to components
267 | ;; option 2: special :db/reverse-refs attribute that will call this fn


--------------------------------------------------------------------------------
/shadow-cljs.edn:
--------------------------------------------------------------------------------
 1 | {:deps {:aliases [:cljs :test :drafts]}
 2 |  :builds       {:dev
 3 |                 {:target     :browser
 4 |                  :output-dir "resources/public/js/dev"
 5 |                  :modules    {:main
 6 |                               {:entries [entity-graph.core]}}
 7 |                  :devtools   {:repl-pprint true}}}
 8 |  :nrepl        {:port 9000}
 9 |  ;; For ClojureScript REPL with nrepl:
10 |  ;; 0. Configure nREPL with localhost/port
11 |  ;; 1. Command line: `npx shadow-cljs server`
12 |  ;; 2. Start nREPL and in the repl: (shadow/watch :dev)
13 |  ;; 3. (shadow/browser-repl)
14 |  :socket-repl  {:port 9001}}
15 | 


--------------------------------------------------------------------------------
/test/entity_graph/benchmark.cljc:
--------------------------------------------------------------------------------
  1 | (ns entity-graph.benchmark
  2 |   (:require
  3 |     #?(:clj [clojure.pprint :refer [pprint]]
  4 |        :cljs [cljs.pprint :refer [pprint]])
  5 |     [entity-graph.core :refer [create-db transact cardinality-many? pull] :as eg]))
  6 | 
  7 | (def cardinality-many-values-to-sets #'entity-graph.core/cardinality-many-values-to-sets)
  8 | (def prepare-tx-data #'entity-graph.core/prepare-tx-data)
  9 | 
 10 | ;; =========
 11 | ;; Schema
 12 | 
 13 | (def bench-schema
 14 |   {:person/name
 15 |    {:db/index {:db/map-type :db.map-type/hash-map}}
 16 |    :person/last-name
 17 |    {:db/index {:db/map-type :db.map-type/hash-map}}
 18 |    :person/alias
 19 |    {:db/cardinality :db.cardinality/many
 20 |     :db/index       {:db/map-type :db.map-type/hash-map}}
 21 |    :person/sex
 22 |    {:db/index {:db/map-type :db.map-type/hash-map}}
 23 |    :person/age
 24 |    {:db/index {:db/map-type :db.map-type/hash-map}}
 25 |    :person/salary
 26 |    {:db/index {:db/map-type :db.map-type/hash-map}}})
 27 | 
 28 | (def bench-schema-sorted-ave
 29 |   (reduce (fn [new-schema attr]
 30 |             (assoc-in new-schema [attr :db/index :db/map-type] :db.map-type/sorted-map))
 31 |           bench-schema (keys bench-schema)))
 32 | 
 33 | (def bench-schema-avl-ave
 34 |   (reduce (fn [new-schema attr]
 35 |             (assoc-in new-schema [attr :db/index :db/map-type] :db.map-type/avl-map))
 36 |           bench-schema (keys bench-schema)))
 37 | 
 38 | (def db-empty (create-db bench-schema))
 39 | (def db-sorted (create-db bench-schema-sorted-ave))
 40 | (def db-avl (create-db bench-schema-avl-ave))
 41 | 
 42 | ;; same schema can be reused for both dbs above
 43 | (def schema (:db/schema db-empty))
 44 | 
 45 | (comment
 46 |   (let [bench-schema (assoc bench-schema :nums {:db/cardinality :db.cardinality/many
 47 |                                                 :db/sort        {:db/set-type :db.set-type/sorted-set
 48 |                                                                  :db/comparator <}})
 49 |         db-empty (create-db bench-schema)
 50 |         people10-map-tempid (map (fn [m] (assoc m :nums (take 3 (repeatedly #(rand-int 100))))) people10-map-tempid)
 51 |        {:keys [db-after]} (transact db-empty people10-map-tempid)
 52 |        ]
 53 |     ;(pprint people10-map-tempid)
 54 |    (pprint (take 3 (:db/eav db-after)))
 55 |    ))
 56 | 
 57 | ;; =========
 58 | ;; Benchmark Data Functions
 59 | 
 60 | (let [id (atom 0)]
 61 |   (defn gen-id [] (swap! id inc) @id))
 62 | 
 63 | (defn random-person []
 64 |   {:db/id            (str (gen-id))
 65 |    :person/name      (rand-nth ["Ivan" "Petr" "Sergei" "Oleg" "Yuri" "Dmitry" "Fedor" "Denis"])
 66 |    :person/last-name (rand-nth ["Ivanov" "Petrov" "Sidorov" "Kovalev" "Kuznetsov" "Voronoi"])
 67 |    :person/alias     (set (repeatedly (rand-int 10) #(rand-nth ["A. C. Q. W." "A. J. Finn" "A.A. Fair" "Aapeli"
 68 |                                                                 "Aaron Wolfe" "Abigail Van Buren" "Jeanne Phillips"
 69 |                                                                 "Abram Tertz" "Abu Nuwas" "Acton Bell" "Adunis"])))
 70 |    :person/sex       (rand-nth [:sex/male :sex/female])
 71 |    :person/age       (rand-int 100)
 72 |    :person/salary    (rand-int 100000)})
 73 | 
 74 | (def random-persons (repeatedly random-person))
 75 | 
 76 | (defn map->list1
 77 |   "Returns a seq of list form tx data equivalent of `map-form`."
 78 |   [schema map-form]
 79 |   (reduce-kv
 80 |     (fn [list-forms a v]
 81 |       (if (cardinality-many? schema a)
 82 |         (apply conj list-forms (map #(vector :db/add (:db/id map-form) a %) v))
 83 |         (conj list-forms [:db/add (:db/id map-form) a v])))
 84 |     [] (cardinality-many-values-to-sets schema (dissoc map-form :db/id))))
 85 | 
 86 | (defn map->list
 87 |   "Returns a seq of list form tx data equivalent of `map-forms` tx data."
 88 |   [schema map-forms]
 89 |   (mapcat #(map->list1 schema %) map-forms))
 90 | 
 91 | (defn list-assertions->retractions
 92 |   [tempids list-assertions]
 93 |   (map (fn [[_ e a v]] [:db/retract (get tempids e) a v]) list-assertions))
 94 | 
 95 | (defn list-assertions->entity-retractions
 96 |   [tempids list-assertions]
 97 |   (let [ids (set (map second list-assertions))]
 98 |     (map (fn [id] [:db/retractEntity (get tempids id)]) ids)))
 99 | 
100 | ;; =========
101 | ;; Benchmark Data
102 | 
103 | (def people20k-map-tempid (shuffle (take 20000 random-persons)))
104 | (def people10-map-tempid (shuffle (take 10 random-persons)))
105 | 
106 | (def people10-map-noid (map #(dissoc % :db/id) (shuffle (take 10 random-persons))))
107 | (def people20k-map-noid (map #(dissoc % :db/id) people20k-map-tempid))
108 | 
109 | (def people10-list-tempid (map->list schema people10-map-tempid))
110 | (def people20k-list-tempid (map->list schema people20k-map-tempid))
111 | 
112 | ;; =========
113 | ;; Benchmarks CLJS
114 | 
115 | (comment
116 |   ;;;;;; Prepare Data
117 | 
118 |   ;; Map Form Assertions Prepare Data
119 |   (simple-benchmark [] (prepare-tx-data db-empty people10-map-noid) 10000)
120 |   (simple-benchmark [] (prepare-tx-data db-empty people20k-map-tempid) 5)
121 |   (simple-benchmark [] (prepare-tx-data db-sorted people20k-map-tempid) 5)
122 |   (simple-benchmark [] (prepare-tx-data db-avl people20k-map-tempid) 5)
123 | 
124 |   ;;;;;; Transact
125 | 
126 |   ;; Map Form Assertions
127 | 
128 |   ;; people10-map-noid
129 |   (simple-benchmark [] (transact db-empty people10-map-noid) 10000)
130 |   (simple-benchmark [] (transact db-sorted people10-map-noid) 10000)
131 |   (simple-benchmark [] (transact db-avl people10-map-noid) 10000)
132 | 
133 |   ;; people10-map-tempid
134 |   (simple-benchmark [] (transact db-empty people10-map-tempid) 10000)
135 | 
136 |   ;; people20k-map-noid
137 |   (simple-benchmark [] (transact db-empty people20k-map-noid) 5)
138 |   (simple-benchmark [] (transact db-sorted people20k-map-noid) 5)
139 |   (simple-benchmark [] (transact db-avl people20k-map-noid) 5)
140 | 
141 |   ;; people20k-map-tempid
142 |   (simple-benchmark [] (transact db-empty people20k-map-tempid) 5)
143 | 
144 |   ;; Map Form Assertions - Overwrite - faster than writing to empty db
145 |   (simple-benchmark [{:keys [db-after tempids]} (transact db-empty people10-map-tempid)
146 |                      people10-map-entity-id (eg/replace-tempids-map people10-map-tempid tempids)]
147 |                     (transact db-after people10-map-entity-id) 10000)
148 |   (simple-benchmark [{:keys [db-after tempids]} (transact db-empty people20k-map-tempid)
149 |                      people20k-map-entity-id (eg/replace-tempids-map people20k-map-tempid tempids)]
150 |                     (transact db-after people20k-map-entity-id) 5)
151 | 
152 |   ;; Map Form Assertions for component checks:
153 |   (simple-benchmark [schema (assoc schema :component1 {:db/valueType :db.type/ref}
154 |                                           :component2 {:db/valueType :db.type/ref}
155 |                                           :component3 {:db/valueType :db.type/ref})
156 |                      people10-map-tempid (map #(assoc % :component1 (str (inc (js/parseInt (:db/id %)))))
157 |                                               (butlast people10-map-tempid))]
158 |                     (transact db-empty people10-map-tempid) 10000)
159 |   (simple-benchmark [schema (assoc schema :component1 {:db/valueType :db.type/ref}
160 |                                           :component2 {:db/valueType :db.type/ref}
161 |                                           :component3 {:db/valueType :db.type/ref})
162 |                      people20k-map-tempid (map #(assoc % :component1 (str (inc (js/parseInt (:db/id %)))))
163 |                                                (butlast people20k-map-tempid))]
164 |                     (transact db-empty people20k-map-tempid) 5)
165 | 
166 |   ;; List Form Assertions
167 |   (simple-benchmark [] (transact db-empty people10-list-tempid) 10000)
168 |   (simple-benchmark [] (transact db-empty people20k-list-tempid) 5)
169 | 
170 |   ;; List Form Assertions - Overwrite
171 |   (simple-benchmark [{:keys [db-after tempids]} (transact db-empty people10-list-tempid)
172 |                      people10-list-entity-id (eg/replace-tempids-list people10-list-tempid tempids)]
173 |                     (transact db-after people10-list-entity-id) 10000)
174 |   (simple-benchmark [{:keys [db-after tempids]} (transact db-empty people20k-list-tempid)
175 |                      people20k-list-entity-id (eg/replace-tempids-list people20k-list-tempid tempids)]
176 |                     (transact db-after people20k-list-entity-id) 5)
177 | 
178 |   ;; Retractions
179 |   (simple-benchmark [{:keys [db-after tempids]} (transact db-empty people10-list-tempid)
180 |                      people10-retract (list-assertions->retractions tempids people10-list-tempid)]
181 |                     (transact db-after people10-retract) 10000)
182 |   (simple-benchmark [{:keys [db-after tempids]} (transact db-empty people20k-list-tempid)
183 |                      people20k-retract (list-assertions->retractions tempids people20k-list-tempid)]
184 |                     (transact db-after people20k-retract) 5)
185 | 
186 |   ;; retractEntity
187 |   (simple-benchmark [{:keys [db-after tempids]} (transact db-empty people10-list-tempid)
188 |                      people10-retract-entity (list-assertions->entity-retractions tempids people10-list-tempid)]
189 |                     (transact db-after people10-retract-entity) 10000)
190 |   (simple-benchmark [{:keys [db-after tempids]} (transact db-empty people20k-list-tempid)
191 |                      people20k-retract-entity (list-assertions->entity-retractions tempids people20k-list-tempid)]
192 |                     (transact db-after people20k-retract-entity) 5)
193 | 
194 |   ;;;;;; Misc
195 | 
196 |   ;; pull
197 |   (simple-benchmark [{:keys [db-after]} (transact db-empty people10-map-noid)]
198 |                     (pull db-after '[*] 1) 10000)
199 | 
200 |   ;; expand-nested-entities
201 |   ;; no nested entities
202 |   (simple-benchmark [] (eg/expand-nested-entities (:db/schema db-empty) people20k-map-tempid) 1)
203 | 
204 |   ;; resolve-temp-ids-unique-identity
205 |   ;; no :db.unique/identity attrs in schema
206 |   (simple-benchmark [{:keys [db-after]} (transact db-empty people20k-map-noid)]
207 |                     (eg/resolve-tempids db-after people20k-map-tempid) 1)
208 |   )
209 | 
210 | ;; =========
211 | ;; Benchmarks CLJ
212 | 
213 | (comment
214 |   ;;;;;; Prepare Data
215 | 
216 |   ;; Map Form Assertions Prepare Data
217 |   (time (dotimes [_ 40000] (prepare-tx-data db-empty people10-map-noid)))
218 |   (time (dotimes [_ 40000] (prepare-tx-data db-empty people10-map-tempid)))
219 |   (time (dotimes [_ 20] (prepare-tx-data db-empty people20k-map-noid)))
220 |   (time (dotimes [_ 20] (prepare-tx-data db-empty people20k-map-tempid)))
221 | 
222 |   ;; List Form Assertions Prepare Data
223 |   (time (dotimes [_ 40000] (prepare-tx-data db-empty people10-list-tempid)))
224 |   (time (dotimes [_ 20] (prepare-tx-data db-empty people20k-list-tempid)))
225 | 
226 |   ;;;;;; Transact
227 | 
228 |   ;; Map Form Assertions
229 |   (time (dotimes [_ 40000] (transact db-empty people10-map-noid)))
230 |   (time (dotimes [_ 40000] (transact db-empty people10-map-tempid)))
231 |   (time (dotimes [_ 20] (transact db-empty people20k-map-noid)))
232 |   (time (dotimes [_ 20] (transact db-empty people20k-map-tempid)))
233 | 
234 |   ;; List Form Assertions
235 |   (time (dotimes [_ 40000] (transact db-empty people10-list-tempid)))
236 |   (time (dotimes [_ 20] (transact db-empty people20k-list-tempid)))
237 | 
238 |   ;; Retractions
239 |   (let [{:keys [db-after tempids]} (transact db-empty people10-list-tempid)
240 |         people10-retract (list-assertions->retractions tempids people10-list-tempid)]
241 |     (time (dotimes [_ 40000] (transact db-after people10-retract))))
242 |   (let [{:keys [db-after tempids]} (transact db-empty people20k-list-tempid)
243 |         people20k-retract (list-assertions->retractions tempids people20k-list-tempid)]
244 |     (time (dotimes [_ 40000] (transact db-after people20k-retract))))
245 |   )
246 | 


--------------------------------------------------------------------------------
/test/entity_graph/benchmark_vs.cljc:
--------------------------------------------------------------------------------
  1 | (ns entity-graph.benchmark-vs
  2 |   "Benchmark entity-graph against datascript and asami."
  3 |   (:require #?(:clj [clojure.pprint :refer [pprint]]
  4 |                :cljs [cljs.pprint :refer [pprint]])
  5 |             [asami.core :as asami]
  6 |             [datascript.core :as ds]
  7 |             [entity-graph.core :as eg]
  8 |             [entity-graph.benchmark :refer [db-sorted people20k-map-noid]]))
  9 | 
 10 | ;; =========
 11 | ;; Transact
 12 | 
 13 | ;; NOTE: All attributes in Asami are multi-cardinality
 14 | 
 15 | ;; NOTE: The schema for `db-sorted` indexes all attributes in AVE,
 16 | ;; so that it `transact` performance can be compared fairly
 17 | 
 18 | (defn tx
 19 |   [db tx-data]
 20 |   (:db-after (eg/transact db tx-data)))
 21 | 
 22 | (comment
 23 |   ;; 2970 msecs
 24 |   (simple-benchmark []
 25 |                     (eg/transact db-sorted people20k-map-noid) 5)
 26 |   ;; 4370 msecs
 27 |   (simple-benchmark [conn (atom db-sorted)]
 28 |                     (swap! conn tx people20k-map-noid) 5)
 29 |   ;; 8710 msecs
 30 |   (simple-benchmark [ds-conn (ds/conn-from-db (ds/empty-db {:person/alias {:db/cardinality :db.cardinality/many}}))]
 31 |                     (ds/transact ds-conn people20k-map-noid) 5)
 32 |   ;; 10683 msecs
 33 |   (simple-benchmark [asami-conn (asami/connect (str "asami:mem://" (random-uuid)))]
 34 |                     (asami/transact asami-conn {:tx-data people20k-map-noid}) 5)
 35 |   )
 36 | 
 37 | ;; CLJ
 38 | (comment
 39 |   ;; 949 msecs
 40 |   (let [conn (atom db-sorted)]
 41 |     (time (dotimes [_ 5] (swap! conn tx people20k-map-noid))))
 42 |   ;; 1831 msecs
 43 |   (let [ds-conn (ds/conn-from-db (ds/empty-db {:person/alias {:db/cardinality :db.cardinality/many}}))]
 44 |     (time (dotimes [_ 5] (ds/transact ds-conn people20k-map-noid))))
 45 |   ;; 1810 msecs
 46 |   (let [asami-conn (asami/connect (str "asami:mem://" (random-uuid)))]
 47 |     (time (dotimes [_ 5] (asami/transact asami-conn {:tx-data people20k-map-noid}))))
 48 |   )
 49 | 
 50 | ;; =========
 51 | ;; Pull
 52 | 
 53 | ;; NOTE: asami doesn't support pull
 54 | 
 55 | (comment
 56 |   ;; 32 msecs
 57 |   (simple-benchmark [db-after (tx db-sorted people20k-map-noid)
 58 |                      _ (pprint (eg/pull db-after '[*] 1))]
 59 |                     (eg/pull db-after '[*] 1) 10000)
 60 | 
 61 |   ;; 118 msecs
 62 |   (simple-benchmark [ds-conn (ds/conn-from-db (ds/empty-db {:person/alias {:db/cardinality :db.cardinality/many}}))
 63 |                      _ (ds/transact ds-conn people20k-map-noid)
 64 |                      _ (pprint (ds/pull (ds/db ds-conn) '[*] 1))]
 65 |                     (ds/pull (ds/db ds-conn) '[*] 1) 10000)
 66 | 
 67 |   ;; 15 msecs
 68 |   (simple-benchmark [db-after (tx db-sorted people20k-map-noid)]
 69 |                     (eg/pull db-after '[:person/name] 1) 10000)
 70 |   ;; 34 msecs
 71 |   (simple-benchmark [ds-conn (ds/conn-from-db (ds/empty-db {:person/alias {:db/cardinality :db.cardinality/many}}))
 72 |                      _ (ds/transact ds-conn people20k-map-noid)]
 73 |                     (ds/pull (ds/db ds-conn) '[:person/name] 1) 10000)
 74 |   )
 75 | 
 76 | ;; CLJ
 77 | (comment
 78 |   ;; 9 msecs
 79 |   (let [db-after (tx db-sorted people20k-map-noid)]
 80 |     (time (dotimes [_ 10000] (eg/pull db-after '[*] 1))))
 81 |   ;; 66 msecs
 82 |   (let [ds-conn (ds/conn-from-db (ds/empty-db {:person/alias {:db/cardinality :db.cardinality/many}}))
 83 |         _ (ds/transact ds-conn people20k-map-noid)]
 84 |     (time (dotimes [_ 10000] (ds/pull (ds/db ds-conn) '[*] 1))))
 85 | 
 86 |   ;; 3.45 msecs
 87 |   (let [db-after (tx db-sorted people20k-map-noid)]
 88 |     (time (dotimes [_ 10000] (eg/pull db-after '[:person/name] 1))))
 89 |   ;; 17.3 msecs
 90 |   (let [ds-conn (ds/conn-from-db (ds/empty-db {:person/alias {:db/cardinality :db.cardinality/many}}))
 91 |         _ (ds/transact ds-conn people20k-map-noid)]
 92 |     (time (dotimes [_ 10000] (ds/pull (ds/db ds-conn) '[:person/name] 1))))
 93 |   )
 94 | 
 95 | ;; =========
 96 | ;; Query
 97 | 
 98 | (def q1 '[:find ?e
 99 |           :where [?e :person/name "Ivan"]])
100 | 
101 | (def q2 '[:find ?e ?l ?a
102 |           :where
103 |           [?e :person/name "Ivan"]
104 |           [?e :person/last-name ?l]
105 |           [?e :person/age ?a]
106 |           [?e :person/sex :sex/male]])
107 | 
108 | ;; NOTE: entity-graph doesn't support datalog style queries
109 | ;; these are entity db analogs of the same queries:
110 | 
111 | (defn q1-edb
112 |   [db]
113 |   (get-in db [:db/ave :person/name "Ivan"]))
114 | 
115 | (defn q2-edb
116 |   [db]
117 |   (let [c1-ids (get-in db [:db/ave :person/name "Ivan"])
118 |         c2-ids (get-in db [:db/ave :person/sex :sex/male])
119 |         r-ids (clojure.set/intersection c1-ids c2-ids)]
120 |     (map (fn [id] (select-keys (get-in db [:db/eav id]) [:db/id :person/last-name :person/age])) r-ids)))
121 | 
122 | (comment
123 |   ;; q1: single where clause, single item in tuple
124 |   ;; 1 msecs
125 |   (simple-benchmark [db-after (tx db-sorted people20k-map-noid)
126 |                      _ (println (count (q1-edb db-after)))]
127 |                     (get-in db-after [:db/ave :person/name "Ivan"]) 1000)
128 |   ;; 2999 msecs
129 |   (simple-benchmark [ds-conn (ds/conn-from-db (ds/empty-db {:person/alias {:db/cardinality :db.cardinality/many}}))
130 |                      _ (ds/transact ds-conn people20k-map-noid)
131 |                      _ (println (count (ds/q q1 (ds/db ds-conn))))]
132 |                     (ds/q q1 (ds/db ds-conn)) 1000)
133 |   ;; 425 msecs
134 |   (simple-benchmark [asami-conn (asami/connect (str "asami:mem://" (random-uuid)))
135 |                      _ (asami/transact asami-conn {:tx-data people20k-map-noid})
136 |                      _ (println (count (asami/q q1 (asami/db asami-conn))))]
137 |                     (asami/q q1 (asami/db asami-conn)) 1000)
138 | 
139 |   ;; q2: multiple where clauses, multiple items in tuple
140 |   ;; 873 msecs
141 |   (simple-benchmark [db-after (tx db-sorted people20k-map-noid)
142 |                      _ (println (count (q2-edb db-after)))]
143 |                     (q2-edb db-after) 1000)
144 |   ;; 14029 msecs
145 |   (simple-benchmark [ds-conn (ds/conn-from-db (ds/empty-db {:person/alias {:db/cardinality :db.cardinality/many}}))
146 |                      _ (ds/transact ds-conn people20k-map-noid)
147 |                      _ (println (count (ds/q q2 (ds/db ds-conn))))]
148 |                     (ds/q q2 (ds/db ds-conn)) 1000)
149 |   ;; 286 msecs
150 |   (simple-benchmark [asami-conn (asami/connect (str "asami:mem://" (random-uuid)))
151 |                      _ (asami/transact asami-conn {:tx-data people20k-map-noid})
152 |                      _ (println (count (asami/q q2 (asami/db asami-conn))))]
153 |                     (asami/q q2 (asami/db asami-conn)) 1000)
154 |   )
155 | 
156 | ;; CLJ
157 | (comment
158 |   ;; q1: single where clause, single item in tuple
159 |   ;; 1.21 msecs
160 |   (let [db-after (tx db-sorted people20k-map-noid)
161 |         _ (println (count (q1-edb db-after)))]
162 |     (time (dotimes [_ 10000] (get-in db-after [:db/ave :person/name "Ivan"]))))
163 |   ;; 6269 msecs
164 |   (let [ds-conn (ds/conn-from-db (ds/empty-db {:person/alias {:db/cardinality :db.cardinality/many}}))
165 |         _ (ds/transact ds-conn people20k-map-noid)
166 |         _ (println (count (ds/q q1 (ds/db ds-conn))))]
167 |     (time (dotimes [_ 10000] (ds/q q1 (ds/db ds-conn)))))
168 |   ;; 293 msecs
169 |   (let [asami-conn (asami/connect (str "asami:mem://" (random-uuid)))
170 |         _ (asami/transact asami-conn {:tx-data people20k-map-noid})
171 |         _ (println (count (asami/q q1 (asami/db asami-conn))))]
172 |     (time (dotimes [_ 10000] (asami/q q1 (asami/db asami-conn)))))
173 | 
174 |   ;; q2: multiple where clauses, multiple items in tuple
175 |   ;; 2532 msecs
176 |   (let [db-after (tx db-sorted people20k-map-noid)
177 |         _ (println (count (q2-edb db-after)))]
178 |     (time (dotimes [_ 10000] (q2-edb db-after))))
179 |   ;; 32046 msecs
180 |   (let [ds-conn (ds/conn-from-db (ds/empty-db {:person/alias {:db/cardinality :db.cardinality/many}}))
181 |         _ (ds/transact ds-conn people20k-map-noid)
182 |         _ (println (count (ds/q q2 (ds/db ds-conn))))]
183 |     (time (dotimes [_ 10000] (ds/q q2 (ds/db ds-conn)))))
184 |   ;; 734 msecs
185 |   (let [asami-conn (asami/connect (str "asami:mem://" (random-uuid)))
186 |         _ (asami/transact asami-conn {:tx-data people20k-map-noid})
187 |         _ (println (count (asami/q q2 (asami/db asami-conn))))]
188 |     (time (dotimes [_ 10000] (asami/q q2 (asami/db asami-conn)))))
189 |   )
190 | 
191 | 
192 | 


--------------------------------------------------------------------------------