├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── doc ├── LL3-collections-enumerators.md ├── LL3-collections-enumerators.txt ├── LL3-collections-talk.pdf ├── adams.pdf ├── hirai-yamamoto.pdf ├── making-data-structures-persistent.pdf └── scheme-wttree.txt ├── project.clj ├── resources └── rdfs.edn ├── src └── wbtree │ ├── tree.clj │ ├── types.clj │ └── util.clj └── test └── wbtree ├── clojure_set.txt └── tree_test.clj /.gitignore: -------------------------------------------------------------------------------- 1 | pom.xml 2 | pom.xml.asc 3 | *jar 4 | /lib/ 5 | /classes/ 6 | /target/ 7 | /checkouts/ 8 | .lein-deps-sum 9 | .lein-repl-history 10 | .lein-plugins/ 11 | .lein-failures 12 | *~ 13 | .nrepl-port -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: clojure 2 | script: lein test 3 | jdk: 4 | - oraclejdk8 5 | - openjdk7 6 | 7 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC 2 | LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM 3 | CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. 4 | 5 | 1. DEFINITIONS 6 | 7 | "Contribution" means: 8 | 9 | a) in the case of the initial Contributor, the initial code and 10 | documentation distributed under this Agreement, and 11 | 12 | b) in the case of each subsequent Contributor: 13 | 14 | i) changes to the Program, and 15 | 16 | ii) additions to the Program; 17 | 18 | where such changes and/or additions to the Program originate from and are 19 | distributed by that particular Contributor. A Contribution 'originates' from 20 | a Contributor if it was added to the Program by such Contributor itself or 21 | anyone acting on such Contributor's behalf. Contributions do not include 22 | additions to the Program which: (i) are separate modules of software 23 | distributed in conjunction with the Program under their own license 24 | agreement, and (ii) are not derivative works of the Program. 25 | 26 | "Contributor" means any person or entity that distributes the Program. 27 | 28 | "Licensed Patents" mean patent claims licensable by a Contributor which are 29 | necessarily infringed by the use or sale of its Contribution alone or when 30 | combined with the Program. 31 | 32 | "Program" means the Contributions distributed in accordance with this 33 | Agreement. 34 | 35 | "Recipient" means anyone who receives the Program under this Agreement, 36 | including all Contributors. 37 | 38 | 2. GRANT OF RIGHTS 39 | 40 | a) Subject to the terms of this Agreement, each Contributor hereby grants 41 | Recipient a non-exclusive, worldwide, royalty-free copyright license to 42 | reproduce, prepare derivative works of, publicly display, publicly perform, 43 | distribute and sublicense the Contribution of such Contributor, if any, and 44 | such derivative works, in source code and object code form. 45 | 46 | b) Subject to the terms of this Agreement, each Contributor hereby grants 47 | Recipient a non-exclusive, worldwide, royalty-free patent license under 48 | Licensed Patents to make, use, sell, offer to sell, import and otherwise 49 | transfer the Contribution of such Contributor, if any, in source code and 50 | object code form. This patent license shall apply to the combination of the 51 | Contribution and the Program if, at the time the Contribution is added by the 52 | Contributor, such addition of the Contribution causes such combination to be 53 | covered by the Licensed Patents. The patent license shall not apply to any 54 | other combinations which include the Contribution. No hardware per se is 55 | licensed hereunder. 56 | 57 | c) Recipient understands that although each Contributor grants the licenses 58 | to its Contributions set forth herein, no assurances are provided by any 59 | Contributor that the Program does not infringe the patent or other 60 | intellectual property rights of any other entity. Each Contributor disclaims 61 | any liability to Recipient for claims brought by any other entity based on 62 | infringement of intellectual property rights or otherwise. As a condition to 63 | exercising the rights and licenses granted hereunder, each Recipient hereby 64 | assumes sole responsibility to secure any other intellectual property rights 65 | needed, if any. For example, if a third party patent license is required to 66 | allow Recipient to distribute the Program, it is Recipient's responsibility 67 | to acquire that license before distributing the Program. 68 | 69 | d) Each Contributor represents that to its knowledge it has sufficient 70 | copyright rights in its Contribution, if any, to grant the copyright license 71 | set forth in this Agreement. 72 | 73 | 3. REQUIREMENTS 74 | 75 | A Contributor may choose to distribute the Program in object code form under 76 | its own license agreement, provided that: 77 | 78 | a) it complies with the terms and conditions of this Agreement; and 79 | 80 | b) its license agreement: 81 | 82 | i) effectively disclaims on behalf of all Contributors all warranties and 83 | conditions, express and implied, including warranties or conditions of title 84 | and non-infringement, and implied warranties or conditions of merchantability 85 | and fitness for a particular purpose; 86 | 87 | ii) effectively excludes on behalf of all Contributors all liability for 88 | damages, including direct, indirect, special, incidental and consequential 89 | damages, such as lost profits; 90 | 91 | iii) states that any provisions which differ from this Agreement are offered 92 | by that Contributor alone and not by any other party; and 93 | 94 | iv) states that source code for the Program is available from such 95 | Contributor, and informs licensees how to obtain it in a reasonable manner on 96 | or through a medium customarily used for software exchange. 97 | 98 | When the Program is made available in source code form: 99 | 100 | a) it must be made available under this Agreement; and 101 | 102 | b) a copy of this Agreement must be included with each copy of the Program. 103 | 104 | Contributors may not remove or alter any copyright notices contained within 105 | the Program. 106 | 107 | Each Contributor must identify itself as the originator of its Contribution, 108 | if any, in a manner that reasonably allows subsequent Recipients to identify 109 | the originator of the Contribution. 110 | 111 | 4. COMMERCIAL DISTRIBUTION 112 | 113 | Commercial distributors of software may accept certain responsibilities with 114 | respect to end users, business partners and the like. While this license is 115 | intended to facilitate the commercial use of the Program, the Contributor who 116 | includes the Program in a commercial product offering should do so in a 117 | manner which does not create potential liability for other Contributors. 118 | Therefore, if a Contributor includes the Program in a commercial product 119 | offering, such Contributor ("Commercial Contributor") hereby agrees to defend 120 | and indemnify every other Contributor ("Indemnified Contributor") against any 121 | losses, damages and costs (collectively "Losses") arising from claims, 122 | lawsuits and other legal actions brought by a third party against the 123 | Indemnified Contributor to the extent caused by the acts or omissions of such 124 | Commercial Contributor in connection with its distribution of the Program in 125 | a commercial product offering. The obligations in this section do not apply 126 | to any claims or Losses relating to any actual or alleged intellectual 127 | property infringement. In order to qualify, an Indemnified Contributor must: 128 | a) promptly notify the Commercial Contributor in writing of such claim, and 129 | b) allow the Commercial Contributor tocontrol, and cooperate with the 130 | Commercial Contributor in, the defense and any related settlement 131 | negotiations. The Indemnified Contributor may participate in any such claim 132 | at its own expense. 133 | 134 | For example, a Contributor might include the Program in a commercial product 135 | offering, Product X. That Contributor is then a Commercial Contributor. If 136 | that Commercial Contributor then makes performance claims, or offers 137 | warranties related to Product X, those performance claims and warranties are 138 | such Commercial Contributor's responsibility alone. Under this section, the 139 | Commercial Contributor would have to defend claims against the other 140 | Contributors related to those performance claims and warranties, and if a 141 | court requires any other Contributor to pay any damages as a result, the 142 | Commercial Contributor must pay those damages. 143 | 144 | 5. NO WARRANTY 145 | 146 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON 147 | AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER 148 | EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR 149 | CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A 150 | PARTICULAR PURPOSE. Each Recipient is solely responsible for determining the 151 | appropriateness of using and distributing the Program and assumes all risks 152 | associated with its exercise of rights under this Agreement , including but 153 | not limited to the risks and costs of program errors, compliance with 154 | applicable laws, damage to or loss of data, programs or equipment, and 155 | unavailability or interruption of operations. 156 | 157 | 6. DISCLAIMER OF LIABILITY 158 | 159 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY 160 | CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, 161 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION 162 | LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 163 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 164 | ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE 165 | EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY 166 | OF SUCH DAMAGES. 167 | 168 | 7. GENERAL 169 | 170 | If any provision of this Agreement is invalid or unenforceable under 171 | applicable law, it shall not affect the validity or enforceability of the 172 | remainder of the terms of this Agreement, and without further action by the 173 | parties hereto, such provision shall be reformed to the minimum extent 174 | necessary to make such provision valid and enforceable. 175 | 176 | If Recipient institutes patent litigation against any entity (including a 177 | cross-claim or counterclaim in a lawsuit) alleging that the Program itself 178 | (excluding combinations of the Program with other software or hardware) 179 | infringes such Recipient's patent(s), then such Recipient's rights granted 180 | under Section 2(b) shall terminate as of the date such litigation is filed. 181 | 182 | All Recipient's rights under this Agreement shall terminate if it fails to 183 | comply with any of the material terms or conditions of this Agreement and 184 | does not cure such failure in a reasonable period of time after becoming 185 | aware of such noncompliance. If all Recipient's rights under this Agreement 186 | terminate, Recipient agrees to cease use and distribution of the Program as 187 | soon as reasonably practicable. However, Recipient's obligations under this 188 | Agreement and any licenses granted by Recipient relating to the Program shall 189 | continue and survive. 190 | 191 | Everyone is permitted to copy and distribute copies of this Agreement, but in 192 | order to avoid inconsistency the Agreement is copyrighted and may only be 193 | modified in the following manner. The Agreement Steward reserves the right to 194 | publish new versions (including revisions) of this Agreement from time to 195 | time. No one other than the Agreement Steward has the right to modify this 196 | Agreement. The Eclipse Foundation is the initial Agreement Steward. The 197 | Eclipse Foundation may assign the responsibility to serve as the Agreement 198 | Steward to a suitable separate entity. Each new version of the Agreement will 199 | be given a distinguishing version number. The Program (including 200 | Contributions) may always be distributed subject to the version of the 201 | Agreement under which it was received. In addition, after a new version of 202 | the Agreement is published, Contributor may elect to distribute the Program 203 | (including its Contributions) under the new version. Except as expressly 204 | stated in Sections 2(a) and 2(b) above, Recipient receives no rights or 205 | licenses to the intellectual property of any Contributor under this 206 | Agreement, whether expressly, by implication, estoppel or otherwise. All 207 | rights in the Program not expressly granted under this Agreement are 208 | reserved. 209 | 210 | This Agreement is governed by the laws of the State of Washington and the 211 | intellectual property laws of the United States of America. No party to this 212 | Agreement will bring a legal action under this Agreement more than one year 213 | after the cause of action arose. Each party waives its rights to a jury trial 214 | in any resulting litigation. 215 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # clj-wbtree 2 | 3 | #### _Weight Balanced Functional Binary Tree (Hirai-Yamamoto Tree)_ 4 | ------ 5 | [![Build Status](https://travis-ci.org/danlentz/clj-wbtree.svg)](https://travis-ci.org/danlentz/clj-wbtree)[![Dependency Status](https://www.versioneye.com/user/projects/54d1e1b73ca0840b19000070/badge.svg?style=flat)](https://www.versioneye.com/user/projects/54d1e1b73ca0840b19000070) 6 | 7 | 8 | ## Overview 9 | 10 | 11 | This is an implementation of a weight-balanced binary tree data 12 | structure based on the following references: 13 | 14 | - Adams (1992) 15 | 'Implementing Sets Efficiently in a Functional Language' 16 | Technical Report CSTR 92-10, University of Southampton. 17 | 18 | - Hirai and Yamamoto (2011) 19 | 'Balancing Weight-Balanced Trees' 20 | Journal of Functional Programming / 21 (3): 21 | Pages 287-307 22 | 23 | - Oleg Kiselyov 24 | 'Towards the best collection API, A design of the overall optimal 25 | collection traversal interface' 26 | 27 | 28 | - Nievergelt and Reingold (1972) 29 | 'Binary Search Trees of Bounded Balance' 30 | STOC '72 Proceedings 31 | 4th Annual ACM symposium on Theory of Computing 32 | Pages 137-142 33 | 34 | - Driscoll, Sarnak, Sleator, and Tarjan (1989) 35 | 'Making Data Structures Persistent' 36 | Journal of Computer and System Sciences Volume 38 Issue 1, February 1989 37 | 18th Annual ACM Symposium on Theory of Computing 38 | Pages 86-124 39 | 40 | - MIT Scheme weight balanced tree as reimplemented by Yoichi Hirai 41 | and Kazuhiko Yamamoto using the revised non-variant algorithm recommended 42 | integer balance parameters from (Hirai/Yamomoto 2011). 43 | 44 | ## Features 45 | 46 | Some unique features of a weight-balanced binary-tree as compared with 47 | other binary tree algorithms: 48 | 49 | 50 | - Less frequent rebalancing as compared to height-balanced 51 | implementations such as red-black or avl trees. 52 | 53 | - Logarithmic rank/at-rank indexed element access. 54 | 55 | 56 | This particular implementation also provides additional useful 57 | qualities such as lazy traversal, partial enumeration, universal order, 58 | and search for a given key in only d comparisons (where d is depth of 59 | tree) rather than the traditional compare/low compare/high which takes 60 | on average (* 1.5 (- d 1)) comparisons. In addition, a comprehensive 61 | functional binary tree api provides a rich collection of tools 62 | for the creation of efficient higher-order data structures. 63 | 64 | 65 | ## Usage 66 | 67 | ### Leiningen 68 | 69 | [![Clojars Project](http://clojars.org/danlentz/clj-wbtree/latest-version.svg)](http://clojars.org/danlentz/clj-wbtree) 70 | 71 | ## Examples 72 | 73 | ## Credits 74 | 75 | Warm appreciation and thanks for the skill and effort of Jason Wolfe and 76 | Zach Tellman whose collaborative help tuning this tree for Clojure were 77 | indespensible. 78 | 79 | ## License 80 | 81 | Copyright © 2014 FIXME 82 | 83 | Distributed under the Eclipse Public License either version 1.0 or (at 84 | your option) any later version. 85 | -------------------------------------------------------------------------------- /doc/LL3-collections-enumerators.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # Towards the best collection API 4 | 5 | ## Abstract 6 | 7 | Most programming languages support collections, represented by an 8 | in-memory data structure, a file, a database, or a generating 9 | function. A programming language system gives us typically one of the 10 | two interfaces to systematically access elements of a collection. One 11 | traversal API is based on enumerators -- e.g., for-each, map, filter 12 | higher-order procedures -- of which the most general is fold. The 13 | second approach relies on streams, a.k.a. cursors, lazy 14 | lists. Generators such as the ones in Icon, Ruby and Python are a 15 | hybrid approach. 16 | 17 | This article presents a design of the overall optimal collection 18 | traversal interface, which is based on a left-fold-like combinator 19 | with premature termination. We argue from the practical standpoint 20 | that such an interface is superior: in efficiency; in ease of 21 | programming; in more predictable resource utilization and avoidance of 22 | resource leaks. 23 | 24 | We also demonstrate a procedure that converts _any_ left-fold-like 25 | enumerator for any collection into the corresponding 26 | cursor. Therefore, the stream-based interface, when it is truly needed 27 | or preferred, can be automatically and generically derived from the 28 | proposed interface. 29 | 30 | We present two variants of the left-fold enumerator: for languages 31 | with and without first-class continuations. The proposed traversal 32 | interface has been implemented in a Scheme database access API used in 33 | a production environment. The generic-left-fold-based interface 34 | underlies the Collection SRFI (Scheme Request for Implementation) and 35 | is being considered for an Oracle database interface in Haskell. 36 | 37 | 38 | ## Introduction and terminology 39 | 40 | Access to collections cuts through many programming languages. By a 41 | collection we mean a hash table; a file as a collection of lines, 42 | words, or bytes; a file system directory; an XML document; a suffix 43 | tree; or a "dynamic" collection produced by a database query or any 44 | other generating function. 45 | 46 | There are two basic approaches to systematically access collection 47 | elements. One approach relies on enumerators such as 'map', 'for-each' 48 | and 'filter'. The most general of those is fold, which lets us 49 | maintain our state during the traversal. The other traversal API is 50 | based on lazy lists (a.k.a. cursors, or streams). 51 | 52 | A word about terminology is in order, as it is rather confusing. We 53 | will call a higher-order for-each-type procedure an enumerator. It is 54 | this procedure that takes a collection and a handler, and applies the 55 | handler to each element of the collection in turn. The handler itself 56 | will be called an iteratee, because it is being applied -- by an 57 | active entity, the enumeratOR. An enumerator is presumably privy to 58 | the internals of a collection; it knows how to fetch elements in some 59 | (specified) order, and knows when to stop. A handler does not have 60 | this knowledge, nor is it supposed to. Throughout the article we 61 | eschew the commonly used term 'iterator': in C++ it means an accessor, 62 | which does not iterate over a collection. Rather, it is _being_ 63 | iterated. On the other hand, languages like OCaml provide a procedure 64 | 'iter' that actively traverses a collection, applying a user-defined 65 | function to elements. 66 | 67 | The next section argues from the practical standpoint against cursors 68 | and for enumerators. We contend that an enumerator such as left fold 69 | should be the primary means of traversing and obtaining all values of 70 | a collection. Enumerators are superior to cursors: 71 | 72 | - in efficiency 73 | - in ease of programming 74 | - in more predictable resource utilization and avoidance of resource leaks. 75 | 76 | The latter is of special importance for collections backed by external 77 | resources such as file handles and database connections. 78 | 79 | Section 3 proposes the overall optimal enumeration interface, which 80 | adds premature termination and explicit multiple state variables to 81 | the left fold. Section 4 discusses generators. 82 | 83 | Given a stream, we can always construct the corresponding 84 | enumerator. It is perhaps less appreciated that given a left fold 85 | enumerator, we can always construct the corresponding stream: we can 86 | invert an enumerator inside out. Section 5 describes the inversion 87 | procedure, which is fully generic and works for any 88 | collection. Because the enumerator interface subsumes the cursor-based 89 | one and because the former is more efficient and less error-prone, a 90 | collection API should offer enumerators as the primary, native 91 | traversal interface. 92 | 93 | The automatical enumerator inversion procedure in Section 5 relies on 94 | first-class (delimited) continuations. However, we can write a similar 95 | enumerator inversion even in a language that lacks first-class 96 | continuations. Section 6 demonstrates a non-recursive left-fold-like 97 | combinator that can be instantiated into the ordinary left fold or into 98 | a stream. Both instantiation procedures are generic and do not depend 99 | on the type of the collection. 100 | 101 | In conclusion, we recap our recommendations for the optimal collection 102 | traversal API and point out to the existing implementations of the 103 | API. 104 | 105 | ## Why an enumerator is better than a cursor 106 | 107 | There are many fundamental reasons why enumerators such as fold are 108 | profoundly better than cursors. One such reason is the ease of program 109 | analysis and optimizations [Sheard, Bananas, Hutton]. This 110 | article will not discuss them. Instead, we concentrate on utterly 111 | practical considerations: performance, ease of programming, and 112 | resource utilization. 113 | 114 | Writing a procedure that traverses, e.g., an AVL tree is far easier 115 | than writing a cursor that remembers the current position and can 116 | advance to the next one. The latter is especially tricky if the tree 117 | in question does not have parent pointers (which waste space and lead 118 | to sharing and resource leakage problems). One look at the C++ STL 119 | should convince one how wretched programming of iterators could be. 120 | 121 | The "current element" of a cursor is a state, which can be altered by 122 | advancing the cursor. This state introduces an implicit dependency 123 | among all expressions that use the cursor, similar to the dependency 124 | imposed by global mutable variables. Therefore, programming with 125 | cursors is error-prone. In contrast, an enumerator does not expose its 126 | traversal state to an iteratee and does not permit any alteration to 127 | that state by an iteratee. The difference between the leaky and the 128 | perfect encapsulations of the traversal state is well explained in 129 | [ULLMAN] when comparing a cursor-based network database query language 130 | DBTG with SQL. Incidentally, streams, another cursor-based interface, 131 | expose the current element and hence eliminate the implicit state 132 | dependency. 133 | 134 | Another problem of cursors is telling the user that there are no more 135 | values to access. When a collection is exhausted, cursor operations 136 | typically return an "out-of-band" value such as EOF (cf. getc() in C) 137 | or raise an exception (cf. iterator.next() in Python). None of these 138 | approaches appear entirely satisfactory. Unlike cursors, an enumerator 139 | does not need to do anything special to tell the iteratee that the 140 | traversal is finished: the enumerator will simply return. There is no 141 | longer a problem with out-of-band values and user's disregarding such 142 | values. 143 | 144 | Cursors are simply less efficient. A cursor must maintain the state of 145 | the traversal. That state may be invalid. Each operation on a cursor, 146 | therefore, must first verify that the cursor is in the valid 147 | state. The cost of the checks quickly adds up. 148 | 149 | This issue of the inefficiency of cursors is well known. For example, 150 | it is highly advisable to use "native" operations to move large 151 | sections of an array (ArrayCopy in Java) rather than copy 152 | element-by-element. The cost of an array bound check on each access to 153 | an element is one of the considerations. 154 | 155 | Enumerators can do more than just eliminate redundant bound 156 | checks. Enumerators, if implemented as staged functions (e.g., C++ 157 | templates) can tile or unroll loops and thus partially or completely 158 | remove the traversal iteration overhead [Veldhuizen]. The perfect 159 | encapsulation of the traversal state makes enumerators particularly 160 | adaptable for multi-stage programming. 161 | 162 | Databases give another example of the efficiency of enumerators: "The 163 | performance of cursors is horrible in almost all systems. One of us 164 | (Shasha) once had an experience of re-writing an eight-hour query 165 | having nested cursors into a cursor-free query that took 15 seconds." 166 | [Shasha] 167 | 168 | It is often said that the key to the best performance is to 169 | do more on the server. To find the maximum value of a database table 170 | column, it's far faster to evaluate 171 | `select MAX(col) from collection` 172 | than to open a cursor on the collection and retrieve all values from 173 | 'col' searching for the maximum. Stored procedure languages (some of 174 | which are quite sophisticated) were introduced to make the server-side 175 | processing easier and powerful. We should stress that 176 | `select MAX(col) from collection` 177 | is precisely equivalent to 178 | `foldl max lower_bound collection` 179 | 180 | 181 | Traversing a collection often requires resources, e.g., a stack space, 182 | a connection to a database, a DIR handle, a DOM tree of an XML 183 | document, etc. An enumerator takes care of allocating these resources. 184 | When iteratee explicitly tells the enumerator to stop -- or when the 185 | collection is exhausted -- the enumerator can orderly free the 186 | resources. An enumerator can be programmed as 187 | 188 | ```scheme 189 | (define (enum proc collection) 190 | (let ((database-connection #f)) 191 | (dynamic-wind 192 | (lambda () 193 | (set! database-connection 194 | (open-connection collection))) 195 | (lambda () (iterate proc)) 196 | (lambda () 197 | (set! database-connection 198 | (close-connection database-connection)))))) 199 | ``` 200 | 201 | If 'proc' does not capture its continuation [Footnote-1] -- as it is 202 | often the case -- the database connection is opened (taken from the 203 | pool) at the beginning of the iteration and is returned to the pool at 204 | the end. If we were to provide access to our collection in the form of 205 | a cursor, we would have to place the variable 'database-connection' 206 | into that cursor. We cannot close the database until the cursor is 207 | alive. But how do we know when there are no alive cursors and 208 | therefore it is safe to recycle the database connection? We must rely 209 | either on the programmer's explicitly closing the connection, or on a 210 | finalizer. The sheer number of internet security advisories concerning 211 | memory allocation problems indicates that manual management of 212 | resources is greatly error-prone. The finalizer solution is not 213 | satisfactory either: finalizers are rarely supported and when they 214 | are, they are an unreliable tool to manage precious resources. The 215 | execution of finalizers is unpredictable and is generally beyond 216 | programmer's control. 217 | 218 | [Footnote-1] 219 | An iteratee may call a continuation captured before the enumerator was 220 | entered. For that reason, an enumerator should employ a dynamic-wind 221 | to detect such an attempt to escape, shed excessive resources and 222 | switch to a "low-power" mode while the iteratee is on the run. 223 | 224 | 225 | ## The most general enumeration interface 226 | 227 | We propose the following general enumeration interface. It is based on 228 | a left-fold enumerator and explicitly supports multiple state 229 | variables (i.e., seeds) and a premature termination of the 230 | iteration. In this Section we describe the interface in pseudo-code, 231 | which can be instantiated in Scheme [SRFI-44], Haskell ([Daume], 232 | Section 6) or other concrete language. 233 | 234 | The enumeration procedure 235 | coll-fold-left COLL PROC SEED SEED ... -> [SEED ... ] 236 | 237 | traverses the collection denoted by COLL. The procedure takes one or 238 | more state arguments denoted by SEED, and returns just as many. PROC 239 | is an iteratee procedure: 240 | 241 | PROC VAL SEED SEED ... -> [INDIC SEED SEED ...] 242 | 243 | It takes n+1 arguments and returns n+1 values. The first argument is 244 | the current value retrieved from the collection. The other n arguments 245 | are the seeds. The first return value is a boolean indicator, whose 246 | false value causes the premature termination of the iteration. The 247 | other return values from PROC are the new values of the seeds. The 248 | procedure coll-fold-left enumerates the collection in some order and 249 | invokes PROC passing it the current value of the collection and the 250 | current seeds. The first invocation of PROC receives SEEDs that were 251 | the arguments of coll-fold-left. The further invocations of PROC 252 | receive SEEDs that were produced by the previous invocation of 253 | PROC. When the collection is exhausted or when the PROC procedure 254 | returns the indicator value of false, coll-fold-left terminates the 255 | iterations, disposes of allocated resources, and returns the current 256 | SEEDs. If the collection COLL is empty, coll-fold-left does not invoke 257 | PROC and returns its argument SEEDs. 258 | 259 | 260 | 261 | ## Enumerators and generators 262 | 263 | A programming language Icon popularized generators as a way to 264 | traverse actual and virtual collections. Generators are also supported 265 | in Ruby and Python. The documentation of Icon defines a generator as 266 | an expression that can produce several values, on demand. 267 | Multiple-valued expressions can also be implemented with shift and 268 | reset [SHIFT]. The paper [SHIFT] gives many examples of their use. 269 | 270 | Generators occupy an intermediate place between enumerators and 271 | cursors. A generator is just as easy to write as an enumerator. It 272 | traverses a collection, fetches the current element and _yields_ it, 273 | by passing the element to a dedicated procedure or syntax form. When 274 | the latter returns, the traversal continues. On the other hand, 275 | generators are trivially related to streams [ENUM-CC]. The latter 276 | article discusses generators and enumerators in more 277 | detail. Incidentally, the article demonstrates that a generator-based 278 | code in Python can be translated into Scheme almost 279 | verbatim. Generators give the first hint that enumerators and cursors 280 | are related via first-class continuations. 281 | 282 | Like cursors and streams, generators are demand-driven. A user must 283 | explicitly request a new value to advance the traversal. Therefore, 284 | like cursors generators leak resources: it is not clear when the 285 | iteration should be assumed terminated and the associated resources 286 | can be safely disposed of. 287 | 288 | ## How to invert an enumerator in a language with first-class continuations 289 | 290 | Sometimes we indeed need to traverse a collection via a cursor. 291 | Reasons may include moving data from one collection to another, or 292 | interfacing legacy code. If a collection API provides enumerators, we 293 | obtain cursors for free. We pass an enumerator to a generic 294 | translation procedure, which inverts the enumerator "inside out" and 295 | returns a cursor. 296 | 297 | The following code illustrates the conversion in Scheme, a language 298 | with first-class continuations. The procedure lfold->lazy-list is a 299 | fully generic translation procedure: it takes a left-fold enumerator 300 | for _any_ collection, and converts the enumerator to a stream (lazy 301 | list). The latter is a realization of a cursor. 302 | 303 | ```scheme 304 | (define (lfold->lazy-list lfold collection) 305 | (delay 306 | (call-with-current-continuation 307 | (lambda (k-main) 308 | (lfold collection 309 | (lambda (val seed) 310 | (values 311 | (call-with-current-continuation 312 | (lambda (k-reenter) 313 | (k-main 314 | (cons val 315 | (delay 316 | (call-with-current-continuation 317 | (lambda (k-new-main) 318 | (set! k-main k-new-main) 319 | (k-reenter #t)))))))) 320 | seed)) 321 | '()) ; Initial seed 322 | (k-main '()))))) 323 | 324 | ``` 325 | 326 | The present article is an abstract. Code in a functional language is 327 | the best abstract. The discussion is delegated to the talk and the 328 | full paper. The article [ENUM-CC] discusses the inversion procedure in 329 | more detail and points out to the complete code and the test cases. 330 | 331 | 332 | ## How to invert an enumerator in a language without first-class continuations 333 | 334 | If a programming language lacks first-class continuations, the 335 | conversion from an enumerator to a cursor is still possible. However, 336 | we need to generalize the enumerator interface and make it 337 | non-recursive. For concreteness, this section demonstrates our 338 | approach for one particular language without first-class 339 | continuations: Haskell. Applications to other languages are 340 | straightforward. 341 | 342 | In Haskell, the general left-fold enumerator has the following 343 | interface [HINV]: 344 | 345 | > type CFoldLeft coll val m seed = coll -> CollEnumerator val m seed 346 | > type CollEnumerator val m seed = 347 | > Iteratee val seed 348 | > -> seed -- the initial seed 349 | > -> m seed 350 | > type Iteratee val seed = seed -> val -> Either seed seed 351 | 352 | where 'coll' is the type of a collection with elements of the type 353 | 'val', and 'm' is an arbitrary monad. The type 'seed' is the type of a 354 | state variable or variables (if the seed is a tuple). If an iteratee 355 | returns Right seed', the iteration continues with seed' as the new 356 | seed. If the iteratee returns Left seed'', the enumerator immediately 357 | stops further iterations, frees all the resources, and returns seed'' 358 | as the final result. 359 | 360 | Incidentally, Hal Daume III mentioned [DAUME] that such left-fold 361 | enumerator is indeed useful in practice. It is his preferred method of 362 | iterating over a file considered as a collection of characters, lines, 363 | or words. 364 | 365 | To make the enumerator non-recursive, we need to add an additional 366 | argument -- self: 367 | 368 | > type CFoldLeft' val m seed = 369 | > Self (Iteratee val seed) m seed 370 | > -> CollEnumerator val m seed 371 | > type Self iter m seed = iter -> seed -> m seed 372 | > type CFoldLeft1Maker coll val m seed = coll -> m (CFoldLeft' val m seed) 373 | 374 | A function of the type CFoldLeft' is also an enumerator. However, that 375 | enumerator does not recurse to advance the traversal. It invokes Self 376 | instead. Given CFoldLeft1Maker, we can obtain either the CFoldLeft 377 | enumerator, or a stream. The former translation procedure amounts to 378 | taking a fixpoint: 379 | 380 | > hfold_nonrec_to_rec:: (Monad m) => 381 | > coll -> (CFoldLeft1Maker coll val m seed) 382 | > -> m (CollEnumerator val m seed) 383 | > hfold_nonrec_to_rec coll hfold1_maker = do 384 | > hfold_left' <- hfold1_maker coll 385 | > return $ fix hfold_left' 386 | > fix f = f g where g = f g 387 | 388 | Converting CFoldLeft' into a stream is equally simple: 389 | 390 | > data MyStream m a = MyNil (Maybe a) | MyCons a (m (MyStream m a)) 391 | 392 | > hfold_nonrec_to_stream:: 393 | > (Monad m) => CFoldLeft' val m (MyStream m val) 394 | > -> m (MyStream m val) 395 | > hfold_nonrec_to_stream hfold_left' = do 396 | > let k fn (MyNil Nothing) = return $ MyNil Nothing 397 | > k fn (MyNil (Just c)) 398 | > = return $ MyCons c (hfold_left' k fn (MyNil Nothing)) 399 | > hfold_left' k (\_ c -> Right $ MyNil $ Just c) (MyNil Nothing) 400 | 401 | The polymorphic types of both conversion procedures indicate that the 402 | procedures are generic and apply to any collection and any traversal. 403 | 404 | 405 | The article [HINV] demonstrates both translations of CFoldLeft' on a 406 | concrete example of a file taken as a collection of 407 | characters. Haskell provides a cursor interface to that collection: 408 | hGetChar. We implement a left fold enumerator CFoldLeft'. We then show 409 | how to turn that enumerator back to a stream: how to express functions 410 | myhgetchar and myhiseof only in terms of the left fold enumerator. The 411 | derivation of these functions is independent of the precise nature of 412 | the enumerator. Incidentally, if we turn two enumerators into streams, 413 | we can safely interleave these streams. 414 | 415 | 416 | ## Conclusions 417 | 418 | In a language with first-class continuations, we propose 419 | coll-fold-left as the overall optimal interface to systematically 420 | access values of a collection (Section 3): 421 | 422 | coll-fold-left COLL PROC SEED SEED ... -> [SEED ... ] 423 | PROC VAL SEED SEED ... -> [INDIC SEED SEED ...] 424 | 425 | In a language without first-class continuations, we propose 426 | CFoldLeft1Maker (Section 6) as such optimal interface. 427 | 428 | The enumerator-based interface is optimal because enumerators: are 429 | easier to write; are less error-prone to use; are more efficient; 430 | provide a better encapsulation of the state of the traversal; avoid 431 | resource leaks. 432 | 433 | We have presented generic conversion procedures that turn enumerators 434 | into cursors. The existence of these procedures demonstrates that 435 | enumerator- and cursor-based interfaces are inter-convertible. We have 436 | argued however that the enumerator interface should be considered 437 | primary and offered natively in a collection API. It is far more 438 | efficient and easy for a programmer to implement cursors via 439 | enumerators, than the other way around. 440 | 441 | The coll-fold-left interface has indeed been implemented and tested in 442 | practice. We have written a relational database interface for Scheme 443 | [DBINTF], which we have been using in the production environment. We 444 | have also implemented coll-fold-left to enumerate entries in a TIFF 445 | image tag directory. The enumerator coll-fold-left has been chosen to 446 | be the primary traversal interface in Scheme Collections SRFI 447 | [SRFI-44]. A similar interface is being considered for an Oracle RDBMS 448 | binding in Haskell. 449 | 450 | 451 | ## References 452 | 453 | [SHIFT] Olivier Danvy and Andrzej Filinski. Abstracting Control. 454 | Proc. 1990 ACM Conf. on LISP and Functional Programming, pp. 151-160, 455 | Nice, France, June 1990. 456 | 457 | [Daume] Hal Daume III. Re: From enumerators to cursors: turning the 458 | left fold inside out. 459 | A message posted on the Haskell mailing list on 460 | 24 Sep 2003 07:47:23 -0700. 461 | 462 | [Hutton] Graham Hutton. A tutorial on the universality and 463 | expressiveness of fold. 464 | Journal of Functional Programming, 9(4):355-372, July 1999. 465 | 466 | [ENUM-CC] Oleg Kiselyov. General ways to traverse collections. 467 | January 1, 2004. 468 | http://pobox.com/~oleg/ftp/Scheme/enumerators-callcc.html 469 | 470 | [DBINTF] Oleg Kiselyov. Scheme database access tools. May 10, 2003. 471 | http://pobox.com/~oleg/ftp/Scheme/lib/db-util1.scm 472 | http://pobox.com/~oleg/ftp/Scheme/tests/vdbaccess.scm 473 | 474 | [HINV] Oleg Kiselyov. From enumerators to cursors: turning the left 475 | fold inside out. January 1, 2004. 476 | http://pobox.com/~oleg/ftp/Haskell/misc.html#fold-stream 477 | The first draft was posted on the Haskell mailing list on 478 | 23 Sep 2003 23:59:45 -0700. 479 | 480 | [Bananas] Erik Meijer, Maarten M. Fokkinga, and Ross Paterson. 481 | Functional programming with bananas, lenses, envelopes, and barbed wire. 482 | In J. Hughes, editor, FPCA'91: Functional Programming Languages and 483 | Computer Architecture, volume 523 of LNCS, pp. 124-144. 484 | Springer-Verlag, 1991. 485 | 486 | [SRFI-44] Scott G. Miller. Collections. 487 | Scheme Request for Implementation SRFI-44. October 2003. 488 | http://srfi.schemers.org/srfi-44/srfi-44.html 489 | 490 | [Shasha] Dennis E. Shasha and Philippe Bonnet. Smooth Talking Your Databases. 491 | Dr.Dobbs Journal, July 2002, pp. 46-54. 492 | 493 | [Sheard] Tim Sheard and Leonidas Fegaras. A fold for all seasons. 494 | Proc. Conf. on Functional Programming and Computer Architecture 495 | (FPCA'93), pp. 233-242, Copenhagen, Denmark, June 1993. 496 | 497 | [ULLMAN] Jeffrey Ullman. Principles of Database Systems. 498 | Second Edition, 484 pp. Computer Science Press, 1982. 499 | 500 | [Veldhuizen] 501 | T. Veldhuizen. Expression Templates. 502 | C++ Report, Vol. 7 No. 5 June 1995, pp. 26-31 503 | http://osl.iu.edu/~tveldhui/papers/Expression-Templates/exprtmpl.html 504 | -------------------------------------------------------------------------------- /doc/LL3-collections-enumerators.txt: -------------------------------------------------------------------------------- 1 | ;; -*-mode: Outline; -*- 2 | 3 | Towards the best collection API 4 | 5 | * Abstract 6 | Most programming languages support collections, represented by an 7 | in-memory data structure, a file, a database, or a generating 8 | function. A programming language system gives us typically one of the 9 | two interfaces to systematically access elements of a collection. One 10 | traversal API is based on enumerators -- e.g., for-each, map, filter 11 | higher-order procedures -- of which the most general is fold. The 12 | second approach relies on streams, a.k.a. cursors, lazy 13 | lists. Generators such as the ones in Icon, Ruby and Python are a 14 | hybrid approach. 15 | 16 | This article presents a design of the overall optimal collection 17 | traversal interface, which is based on a left-fold-like combinator 18 | with premature termination. We argue from the practical standpoint 19 | that such an interface is superior: in efficiency; in ease of 20 | programming; in more predictable resource utilization and avoidance of 21 | resource leaks. 22 | 23 | We also demonstrate a procedure that converts _any_ left-fold-like 24 | enumerator for any collection into the corresponding 25 | cursor. Therefore, the stream-based interface, when it is truly needed 26 | or preferred, can be automatically and generically derived from the 27 | proposed interface. 28 | 29 | We present two variants of the left-fold enumerator: for languages 30 | with and without first-class continuations. The proposed traversal 31 | interface has been implemented in a Scheme database access API used in 32 | a production environment. The generic-left-fold-based interface 33 | underlies the Collection SRFI (Scheme Request for Implementation) and 34 | is being considered for an Oracle database interface in Haskell. 35 | 36 | 37 | * Introduction and terminology 38 | Access to collections cuts through many programming languages. By a 39 | collection we mean a hash table; a file as a collection of lines, 40 | words, or bytes; a file system directory; an XML document; a suffix 41 | tree; or a "dynamic" collection produced by a database query or any 42 | other generating function. 43 | 44 | There are two basic approaches to systematically access collection 45 | elements. One approach relies on enumerators such as 'map', 'for-each' 46 | and 'filter'. The most general of those is fold, which lets us 47 | maintain our state during the traversal. The other traversal API is 48 | based on lazy lists (a.k.a. cursors, or streams). 49 | 50 | A word about terminology is in order, as it is rather confusing. We 51 | will call a higher-order for-each-type procedure an enumerator. It is 52 | this procedure that takes a collection and a handler, and applies the 53 | handler to each element of the collection in turn. The handler itself 54 | will be called an iteratee, because it is being applied -- by an 55 | active entity, the enumeratOR. An enumerator is presumably privy to 56 | the internals of a collection; it knows how to fetch elements in some 57 | (specified) order, and knows when to stop. A handler does not have 58 | this knowledge, nor is it supposed to. Throughout the article we 59 | eschew the commonly used term 'iterator': in C++ it means an accessor, 60 | which does not iterate over a collection. Rather, it is _being_ 61 | iterated. On the other hand, languages like OCaml provide a procedure 62 | 'iter' that actively traverses a collection, applying a user-defined 63 | function to elements. 64 | 65 | The next section argues from the practical standpoint against cursors 66 | and for enumerators. We contend that an enumerator such as left fold 67 | should be the primary means of traversing and obtaining all values of 68 | a collection. Enumerators are superior to cursors: 69 | - in efficiency 70 | - in ease of programming 71 | - in more predictable resource utilization and 72 | avoidance of resource leaks. 73 | The latter is of special importance for collections backed by external 74 | resources such as file handles and database connections. 75 | 76 | Section 3 proposes the overall optimal enumeration interface, which 77 | adds premature termination and explicit multiple state variables to 78 | the left fold. Section 4 discusses generators. 79 | 80 | Given a stream, we can always construct the corresponding 81 | enumerator. It is perhaps less appreciated that given a left fold 82 | enumerator, we can always construct the corresponding stream: we can 83 | invert an enumerator inside out. Section 5 describes the inversion 84 | procedure, which is fully generic and works for any 85 | collection. Because the enumerator interface subsumes the cursor-based 86 | one and because the former is more efficient and less error-prone, a 87 | collection API should offer enumerators as the primary, native 88 | traversal interface. 89 | 90 | The automatical enumerator inversion procedure in Section 5 relies on 91 | first-class (delimited) continuations. However, we can write a similar 92 | enumerator inversion even in a language that lacks first-class 93 | continuations. Section 6 demonstrates a non-recursive left-fold-like 94 | combinator that can be instantiated into the ordinary left fold or into 95 | a stream. Both instantiation procedures are generic and do not depend 96 | on the type of the collection. 97 | 98 | In conclusion, we recap our recommendations for the optimal collection 99 | traversal API and point out to the existing implementations of the 100 | API. 101 | 102 | 103 | 104 | * Why an enumerator is better than a cursor 105 | 106 | There are many fundamental reasons why enumerators such as fold are 107 | profoundly better than cursors. One such reason is the ease of program 108 | analysis and optimizations [Sheard, Bananas, Hutton]. This 109 | article will not discuss them. Instead, we concentrate on utterly 110 | practical considerations: performance, ease of programming, and 111 | resource utilization. 112 | 113 | Writing a procedure that traverses, e.g., an AVL tree is far easier 114 | than writing a cursor that remembers the current position and can 115 | advance to the next one. The latter is especially tricky if the tree 116 | in question does not have parent pointers (which waste space and lead 117 | to sharing and resource leakage problems). One look at the C++ STL 118 | should convince one how wretched programming of iterators could be. 119 | 120 | The "current element" of a cursor is a state, which can be altered by 121 | advancing the cursor. This state introduces an implicit dependency 122 | among all expressions that use the cursor, similar to the dependency 123 | imposed by global mutable variables. Therefore, programming with 124 | cursors is error-prone. In contrast, an enumerator does not expose its 125 | traversal state to an iteratee and does not permit any alteration to 126 | that state by an iteratee. The difference between the leaky and the 127 | perfect encapsulations of the traversal state is well explained in 128 | [ULLMAN] when comparing a cursor-based network database query language 129 | DBTG with SQL. Incidentally, streams, another cursor-based interface, 130 | expose the current element and hence eliminate the implicit state 131 | dependency. 132 | 133 | Another problem of cursors is telling the user that there are no more 134 | values to access. When a collection is exhausted, cursor operations 135 | typically return an "out-of-band" value such as EOF (cf. getc() in C) 136 | or raise an exception (cf. iterator.next() in Python). None of these 137 | approaches appear entirely satisfactory. Unlike cursors, an enumerator 138 | does not need to do anything special to tell the iteratee that the 139 | traversal is finished: the enumerator will simply return. There is no 140 | longer a problem with out-of-band values and user's disregarding such 141 | values. 142 | 143 | Cursors are simply less efficient. A cursor must maintain the state of 144 | the traversal. That state may be invalid. Each operation on a cursor, 145 | therefore, must first verify that the cursor is in the valid 146 | state. The cost of the checks quickly adds up. 147 | 148 | This issue of the inefficiency of cursors is well known. For example, 149 | it is highly advisable to use "native" operations to move large 150 | sections of an array (ArrayCopy in Java) rather than copy 151 | element-by-element. The cost of an array bound check on each access to 152 | an element is one of the considerations. 153 | 154 | Enumerators can do more than just eliminate redundant bound 155 | checks. Enumerators, if implemented as staged functions (e.g., C++ 156 | templates) can tile or unroll loops and thus partially or completely 157 | remove the traversal iteration overhead [Veldhuizen]. The perfect 158 | encapsulation of the traversal state makes enumerators particularly 159 | adaptable for multi-stage programming. 160 | 161 | Databases give another example of the efficiency of enumerators: "The 162 | performance of cursors is horrible in almost all systems. One of us 163 | (Shasha) once had an experience of re-writing an eight-hour query 164 | having nested cursors into a cursor-free query that took 15 seconds." 165 | [Shasha] 166 | 167 | It is often said that the key to the best performance is to 168 | do more on the server. To find the maximum value of a database table 169 | column, it's far faster to evaluate 170 | select MAX(col) from collection 171 | than to open a cursor on the collection and retrieve all values from 172 | 'col' searching for the maximum. Stored procedure languages (some of 173 | which are quite sophisticated) were introduced to make the server-side 174 | processing easier and powerful. We should stress that 175 | select MAX(col) from collection 176 | is precisely equivalent to 177 | foldl max lower_bound collection 178 | 179 | 180 | Traversing a collection often requires resources, e.g., a stack space, 181 | a connection to a database, a DIR handle, a DOM tree of an XML 182 | document, etc. An enumerator takes care of allocating these resources. 183 | When iteratee explicitly tells the enumerator to stop -- or when the 184 | collection is exhausted -- the enumerator can orderly free the 185 | resources. An enumerator can be programmed as 186 | 187 | (define (enum proc collection) 188 | (let ((database-connection #f)) 189 | (dynamic-wind 190 | (lambda () 191 | (set! database-connection 192 | (open-connection collection))) 193 | (lambda () (iterate proc)) 194 | (lambda () 195 | (set! database-connection 196 | (close-connection database-connection)))))) 197 | 198 | If 'proc' does not capture its continuation [Footnote-1] -- as it is 199 | often the case -- the database connection is opened (taken from the 200 | pool) at the beginning of the iteration and is returned to the pool at 201 | the end. If we were to provide access to our collection in the form of 202 | a cursor, we would have to place the variable 'database-connection' 203 | into that cursor. We cannot close the database until the cursor is 204 | alive. But how do we know when there are no alive cursors and 205 | therefore it is safe to recycle the database connection? We must rely 206 | either on the programmer's explicitly closing the connection, or on a 207 | finalizer. The sheer number of internet security advisories concerning 208 | memory allocation problems indicates that manual management of 209 | resources is greatly error-prone. The finalizer solution is not 210 | satisfactory either: finalizers are rarely supported and when they 211 | are, they are an unreliable tool to manage precious resources. The 212 | execution of finalizers is unpredictable and is generally beyond 213 | programmer's control. 214 | 215 | [Footnote-1] 216 | An iteratee may call a continuation captured before the enumerator was 217 | entered. For that reason, an enumerator should employ a dynamic-wind 218 | to detect such an attempt to escape, shed excessive resources and 219 | switch to a "low-power" mode while the iteratee is on the run. 220 | 221 | 222 | * The most general enumeration interface 223 | 224 | We propose the following general enumeration interface. It is based on 225 | a left-fold enumerator and explicitly supports multiple state 226 | variables (i.e., seeds) and a premature termination of the 227 | iteration. In this Section we describe the interface in pseudo-code, 228 | which can be instantiated in Scheme [SRFI-44], Haskell ([Daume], 229 | Section 6) or other concrete language. 230 | 231 | The enumeration procedure 232 | coll-fold-left COLL PROC SEED SEED ... -> [SEED ... ] 233 | 234 | traverses the collection denoted by COLL. The procedure takes one or 235 | more state arguments denoted by SEED, and returns just as many. PROC 236 | is an iteratee procedure: 237 | PROC VAL SEED SEED ... -> [INDIC SEED SEED ...] 238 | 239 | It takes n+1 arguments and returns n+1 values. The first argument is 240 | the current value retrieved from the collection. The other n arguments 241 | are the seeds. The first return value is a boolean indicator, whose 242 | false value causes the premature termination of the iteration. The 243 | other return values from PROC are the new values of the seeds. The 244 | procedure coll-fold-left enumerates the collection in some order and 245 | invokes PROC passing it the current value of the collection and the 246 | current seeds. The first invocation of PROC receives SEEDs that were 247 | the arguments of coll-fold-left. The further invocations of PROC 248 | receive SEEDs that were produced by the previous invocation of 249 | PROC. When the collection is exhausted or when the PROC procedure 250 | returns the indicator value of false, coll-fold-left terminates the 251 | iterations, disposes of allocated resources, and returns the current 252 | SEEDs. If the collection COLL is empty, coll-fold-left does not invoke 253 | PROC and returns its argument SEEDs. 254 | 255 | 256 | 257 | * Enumerators and generators 258 | 259 | A programming language Icon popularized generators as a way to 260 | traverse actual and virtual collections. Generators are also supported 261 | in Ruby and Python. The documentation of Icon defines a generator as 262 | an expression that can produce several values, on demand. 263 | Multiple-valued expressions can also be implemented with shift and 264 | reset [SHIFT]. The paper [SHIFT] gives many examples of their use. 265 | 266 | Generators occupy an intermediate place between enumerators and 267 | cursors. A generator is just as easy to write as an enumerator. It 268 | traverses a collection, fetches the current element and _yields_ it, 269 | by passing the element to a dedicated procedure or syntax form. When 270 | the latter returns, the traversal continues. On the other hand, 271 | generators are trivially related to streams [ENUM-CC]. The latter 272 | article discusses generators and enumerators in more 273 | detail. Incidentally, the article demonstrates that a generator-based 274 | code in Python can be translated into Scheme almost 275 | verbatim. Generators give the first hint that enumerators and cursors 276 | are related via first-class continuations. 277 | 278 | Like cursors and streams, generators are demand-driven. A user must 279 | explicitly request a new value to advance the traversal. Therefore, 280 | like cursors generators leak resources: it is not clear when the 281 | iteration should be assumed terminated and the associated resources 282 | can be safely disposed of. 283 | 284 | 285 | * How to invert an enumerator in a language with first-class continuations 286 | 287 | Sometimes we indeed need to traverse a collection via a cursor. 288 | Reasons may include moving data from one collection to another, or 289 | interfacing legacy code. If a collection API provides enumerators, we 290 | obtain cursors for free. We pass an enumerator to a generic 291 | translation procedure, which inverts the enumerator "inside out" and 292 | returns a cursor. 293 | 294 | The following code illustrates the conversion in Scheme, a language 295 | with first-class continuations. The procedure lfold->lazy-list is a 296 | fully generic translation procedure: it takes a left-fold enumerator 297 | for _any_ collection, and converts the enumerator to a stream (lazy 298 | list). The latter is a realization of a cursor. 299 | 300 | (define (lfold->lazy-list lfold collection) 301 | (delay 302 | (call-with-current-continuation 303 | (lambda (k-main) 304 | (lfold collection 305 | (lambda (val seed) 306 | (values 307 | (call-with-current-continuation 308 | (lambda (k-reenter) 309 | (k-main 310 | (cons val 311 | (delay 312 | (call-with-current-continuation 313 | (lambda (k-new-main) 314 | (set! k-main k-new-main) 315 | (k-reenter #t)))))))) 316 | seed)) 317 | '()) ; Initial seed 318 | (k-main '()))))) 319 | 320 | 321 | The present article is an abstract. Code in a functional language is 322 | the best abstract. The discussion is delegated to the talk and the 323 | full paper. The article [ENUM-CC] discusses the inversion procedure in 324 | more detail and points out to the complete code and the test cases. 325 | 326 | 327 | * How to invert an enumerator in a language without first-class continuations 328 | 329 | If a programming language lacks first-class continuations, the 330 | conversion from an enumerator to a cursor is still possible. However, 331 | we need to generalize the enumerator interface and make it 332 | non-recursive. For concreteness, this section demonstrates our 333 | approach for one particular language without first-class 334 | continuations: Haskell. Applications to other languages are 335 | straightforward. 336 | 337 | In Haskell, the general left-fold enumerator has the following 338 | interface [HINV]: 339 | 340 | > type CFoldLeft coll val m seed = coll -> CollEnumerator val m seed 341 | > type CollEnumerator val m seed = 342 | > Iteratee val seed 343 | > -> seed -- the initial seed 344 | > -> m seed 345 | > type Iteratee val seed = seed -> val -> Either seed seed 346 | 347 | where 'coll' is the type of a collection with elements of the type 348 | 'val', and 'm' is an arbitrary monad. The type 'seed' is the type of a 349 | state variable or variables (if the seed is a tuple). If an iteratee 350 | returns Right seed', the iteration continues with seed' as the new 351 | seed. If the iteratee returns Left seed'', the enumerator immediately 352 | stops further iterations, frees all the resources, and returns seed'' 353 | as the final result. 354 | 355 | Incidentally, Hal Daume III mentioned [DAUME] that such left-fold 356 | enumerator is indeed useful in practice. It is his preferred method of 357 | iterating over a file considered as a collection of characters, lines, 358 | or words. 359 | 360 | To make the enumerator non-recursive, we need to add an additional 361 | argument -- self: 362 | 363 | > type CFoldLeft' val m seed = 364 | > Self (Iteratee val seed) m seed 365 | > -> CollEnumerator val m seed 366 | > type Self iter m seed = iter -> seed -> m seed 367 | > type CFoldLeft1Maker coll val m seed = coll -> m (CFoldLeft' val m seed) 368 | 369 | A function of the type CFoldLeft' is also an enumerator. However, that 370 | enumerator does not recurse to advance the traversal. It invokes Self 371 | instead. Given CFoldLeft1Maker, we can obtain either the CFoldLeft 372 | enumerator, or a stream. The former translation procedure amounts to 373 | taking a fixpoint: 374 | 375 | > hfold_nonrec_to_rec:: (Monad m) => 376 | > coll -> (CFoldLeft1Maker coll val m seed) 377 | > -> m (CollEnumerator val m seed) 378 | > hfold_nonrec_to_rec coll hfold1_maker = do 379 | > hfold_left' <- hfold1_maker coll 380 | > return $ fix hfold_left' 381 | > fix f = f g where g = f g 382 | 383 | Converting CFoldLeft' into a stream is equally simple: 384 | 385 | > data MyStream m a = MyNil (Maybe a) | MyCons a (m (MyStream m a)) 386 | 387 | > hfold_nonrec_to_stream:: 388 | > (Monad m) => CFoldLeft' val m (MyStream m val) 389 | > -> m (MyStream m val) 390 | > hfold_nonrec_to_stream hfold_left' = do 391 | > let k fn (MyNil Nothing) = return $ MyNil Nothing 392 | > k fn (MyNil (Just c)) 393 | > = return $ MyCons c (hfold_left' k fn (MyNil Nothing)) 394 | > hfold_left' k (\_ c -> Right $ MyNil $ Just c) (MyNil Nothing) 395 | 396 | The polymorphic types of both conversion procedures indicate that the 397 | procedures are generic and apply to any collection and any traversal. 398 | 399 | 400 | The article [HINV] demonstrates both translations of CFoldLeft' on a 401 | concrete example of a file taken as a collection of 402 | characters. Haskell provides a cursor interface to that collection: 403 | hGetChar. We implement a left fold enumerator CFoldLeft'. We then show 404 | how to turn that enumerator back to a stream: how to express functions 405 | myhgetchar and myhiseof only in terms of the left fold enumerator. The 406 | derivation of these functions is independent of the precise nature of 407 | the enumerator. Incidentally, if we turn two enumerators into streams, 408 | we can safely interleave these streams. 409 | 410 | 411 | * Conclusions 412 | 413 | In a language with first-class continuations, we propose 414 | coll-fold-left as the overall optimal interface to systematically 415 | access values of a collection (Section 3): 416 | 417 | coll-fold-left COLL PROC SEED SEED ... -> [SEED ... ] 418 | PROC VAL SEED SEED ... -> [INDIC SEED SEED ...] 419 | 420 | In a language without first-class continuations, we propose 421 | CFoldLeft1Maker (Section 6) as such optimal interface. 422 | 423 | The enumerator-based interface is optimal because enumerators: are 424 | easier to write; are less error-prone to use; are more efficient; 425 | provide a better encapsulation of the state of the traversal; avoid 426 | resource leaks. 427 | 428 | We have presented generic conversion procedures that turn enumerators 429 | into cursors. The existence of these procedures demonstrates that 430 | enumerator- and cursor-based interfaces are inter-convertible. We have 431 | argued however that the enumerator interface should be considered 432 | primary and offered natively in a collection API. It is far more 433 | efficient and easy for a programmer to implement cursors via 434 | enumerators, than the other way around. 435 | 436 | The coll-fold-left interface has indeed been implemented and tested in 437 | practice. We have written a relational database interface for Scheme 438 | [DBINTF], which we have been using in the production environment. We 439 | have also implemented coll-fold-left to enumerate entries in a TIFF 440 | image tag directory. The enumerator coll-fold-left has been chosen to 441 | be the primary traversal interface in Scheme Collections SRFI 442 | [SRFI-44]. A similar interface is being considered for an Oracle RDBMS 443 | binding in Haskell. 444 | 445 | 446 | * References 447 | 448 | [SHIFT] Olivier Danvy and Andrzej Filinski. Abstracting Control. 449 | Proc. 1990 ACM Conf. on LISP and Functional Programming, pp. 151-160, 450 | Nice, France, June 1990. 451 | 452 | [Daume] Hal Daume III. Re: From enumerators to cursors: turning the 453 | left fold inside out. 454 | A message posted on the Haskell mailing list on 455 | 24 Sep 2003 07:47:23 -0700. 456 | 457 | [Hutton] Graham Hutton. A tutorial on the universality and 458 | expressiveness of fold. 459 | Journal of Functional Programming, 9(4):355-372, July 1999. 460 | 461 | [ENUM-CC] Oleg Kiselyov. General ways to traverse collections. 462 | January 1, 2004. 463 | http://pobox.com/~oleg/ftp/Scheme/enumerators-callcc.html 464 | 465 | [DBINTF] Oleg Kiselyov. Scheme database access tools. May 10, 2003. 466 | http://pobox.com/~oleg/ftp/Scheme/lib/db-util1.scm 467 | http://pobox.com/~oleg/ftp/Scheme/tests/vdbaccess.scm 468 | 469 | [HINV] Oleg Kiselyov. From enumerators to cursors: turning the left 470 | fold inside out. January 1, 2004. 471 | http://pobox.com/~oleg/ftp/Haskell/misc.html#fold-stream 472 | The first draft was posted on the Haskell mailing list on 473 | 23 Sep 2003 23:59:45 -0700. 474 | 475 | [Bananas] Erik Meijer, Maarten M. Fokkinga, and Ross Paterson. 476 | Functional programming with bananas, lenses, envelopes, and barbed wire. 477 | In J. Hughes, editor, FPCA'91: Functional Programming Languages and 478 | Computer Architecture, volume 523 of LNCS, pp. 124-144. 479 | Springer-Verlag, 1991. 480 | 481 | [SRFI-44] Scott G. Miller. Collections. 482 | Scheme Request for Implementation SRFI-44. October 2003. 483 | http://srfi.schemers.org/srfi-44/srfi-44.html 484 | 485 | [Shasha] Dennis E. Shasha and Philippe Bonnet. Smooth Talking Your Databases. 486 | Dr.Dobbs Journal, July 2002, pp. 46-54. 487 | 488 | [Sheard] Tim Sheard and Leonidas Fegaras. A fold for all seasons. 489 | Proc. Conf. on Functional Programming and Computer Architecture 490 | (FPCA'93), pp. 233-242, Copenhagen, Denmark, June 1993. 491 | 492 | [ULLMAN] Jeffrey Ullman. Principles of Database Systems. 493 | Second Edition, 484 pp. Computer Science Press, 1982. 494 | 495 | [Veldhuizen] 496 | T. Veldhuizen. Expression Templates. 497 | C++ Report, Vol. 7 No. 5 June 1995, pp. 26-31 498 | http://osl.iu.edu/~tveldhui/papers/Expression-Templates/exprtmpl.html 499 | -------------------------------------------------------------------------------- /doc/LL3-collections-talk.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danlentz/clj-wbtree/ec7bf2bbc156fce5da8044fa9f2fd4eb7e5f1238/doc/LL3-collections-talk.pdf -------------------------------------------------------------------------------- /doc/adams.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danlentz/clj-wbtree/ec7bf2bbc156fce5da8044fa9f2fd4eb7e5f1238/doc/adams.pdf -------------------------------------------------------------------------------- /doc/hirai-yamamoto.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danlentz/clj-wbtree/ec7bf2bbc156fce5da8044fa9f2fd4eb7e5f1238/doc/hirai-yamamoto.pdf -------------------------------------------------------------------------------- /doc/making-data-structures-persistent.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danlentz/clj-wbtree/ec7bf2bbc156fce5da8044fa9f2fd4eb7e5f1238/doc/making-data-structures-persistent.pdf -------------------------------------------------------------------------------- /doc/scheme-wttree.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | A Scheme Implementation of Weight-Balanced Binary Trees. 5 | 6 | 7 | Copyright (C) 1993-1994 Stephen Adams 8 | 9 | 10 | This document describes the interface to an R4RS Scheme implementation 11 | of weight-balanced binary trees. The original design and 12 | implementation used the language ML and is described in: 13 | 14 | Stephen Adams, Implemeting Sets Efficiently in a Functional 15 | Language, CSTR 92-10, Department of Electronics and Computer 16 | Science, University of Southampton, 1992 17 | 18 | The body of the file was derived from the MIT Scheme system 19 | documentation, which contains the following notice: 20 | 21 | 22 | 23 | 24 | Weight-Balanced Trees 25 | ===================== 26 | 27 | Balanced binary trees are a useful data structure for maintaining 28 | large sets of ordered objects or sets of associations whose keys are 29 | ordered. MIT Scheme has an comprehensive implementation of 30 | weight-balanced binary trees which has several advantages over the 31 | other data structures for large aggregates: 32 | 33 | * In addition to the usual element-level operations like insertion, 34 | deletion and lookup, there is a full complement of 35 | collection-level operations, like set intersection, set union and 36 | subset test, all of which are implemented with good orders of 37 | growth in time and space. This makes weight balanced trees ideal 38 | for rapid prototyping of functionally derived specifications. 39 | 40 | * An element in a tree may be indexed by its position under the 41 | ordering of the keys, and the position of an element may be 42 | determined, both with reasonable efficiency. 43 | 44 | * Operations to find and remove minimum element make weight 45 | balanced trees simple to use for priority queues. 46 | 47 | * The implementation is *functional* rather than *imperative*. This 48 | means that operations like `inserting' an association in a tree 49 | do not destroy the old tree, in much the same way that `(+ 1 x)' 50 | modifies neither the constant 1 nor the value bound to `x'. The 51 | trees are referentially transparent thus the programmer need not 52 | worry about copying the trees. Referential transparency allows 53 | space efficiency to be achieved by sharing subtrees. 54 | 55 | These features make weight-balanced trees suitable for a wide range 56 | of applications, especially those that require large numbers of sets or 57 | discrete maps. Applications that have a few global databases and/or 58 | concentrate on element-level operations like insertion and lookup are 59 | probably better off using hash-tables or red-black trees. 60 | 61 | The *size* of a tree is the number of associations that it contains. 62 | Weight balanced binary trees are balanced to keep the sizes of the 63 | subtrees of each node within a constant factor of each other. This 64 | ensures logarithmic times for single-path operations (like lookup and 65 | insertion). A weight balanced tree takes space that is proportional to 66 | the number of associations in the tree. For the current 67 | implementation, the constant of proportionality is six words per 68 | association. 69 | 70 | Weight balanced trees can be used as an implementation for either 71 | discrete sets or discrete maps (associations). Sets are implemented by 72 | ignoring the datum that is associated with the key. Under this scheme 73 | if an associations exists in the tree this indicates that the key of the 74 | association is a member of the set. Typically a value such as `()', 75 | `#t' or `#f' is associated with the key. 76 | 77 | Many operations can be viewed as computing a result that, depending 78 | on whether the tree arguments are thought of as sets or maps, is known 79 | by two different names. An example is `wt-tree/member?', which, when 80 | regarding the tree argument as a set, computes the set membership 81 | operation, but, when regarding the tree as a discrete map, 82 | `wt-tree/member?' is the predicate testing if the map is defined at an 83 | element in its domain. Most names in this package have been chosen 84 | based on interpreting the trees as sets, hence the name 85 | `wt-tree/member?' rather than `wt-tree/defined-at?'. 86 | 87 | The weight balanced tree implementation is a run-time-loadable 88 | option. To use weight balanced trees, execute 89 | 90 | (load-option 'wt-tree) 91 | 92 | once before calling any of the procedures defined here. 93 | 94 | 95 | 96 | 97 | Construction of Weight-Balanced Trees 98 | ------------------------------------- 99 | 100 | Binary trees require there to be a total order on the keys used to 101 | arrange the elements in the tree. Weight balanced trees are organized 102 | by *types*, where the type is an object encapsulating the ordering 103 | relation. Creating a tree is a two-stage process. First a tree type 104 | must be created from the predicate which gives the ordering. The tree 105 | type is then used for making trees, either empty or singleton trees or 106 | trees from other aggregate structures like association lists. Once 107 | created, a tree `knows' its type and the type is used to test 108 | compatibility between trees in operations taking two trees. Usually a 109 | small number of tree types are created at the beginning of a program 110 | and used many times throughout the program's execution. 111 | 112 | -- procedure+: make-wt-tree-type KEY #f 118 | (and (key #f 119 | (if (and (key #t 122 | 123 | Two key values are assumed to be equal if neither is less than the 124 | other by KEYwt-tree TREE-TYPE ALIST 158 | Returns a newly allocated weight-balanced tree that contains the 159 | same associations as ALIST. This procedure is equivalent to: 160 | 161 | (lambda (type alist) 162 | (let ((tree (make-wt-tree type))) 163 | (for-each (lambda (association) 164 | (wt-tree/add! tree 165 | (car association) 166 | (cdr association))) 167 | alist) 168 | tree)) 169 | 170 | 171 | 172 | 173 | Basic Operations on Weight-Balanced Trees 174 | ----------------------------------------- 175 | 176 | This section describes the basic tree operations on weight balanced 177 | trees. These operations are the usual tree operations for insertion, 178 | deletion and lookup, some predicates and a procedure for determining the 179 | number of associations in a tree. 180 | 181 | -- procedure+: wt-tree? OBJECT 182 | Returns `#t' if OBJECT is a weight-balanced tree, otherwise 183 | returns `#f'. 184 | 185 | -- procedure+: wt-tree/empty? WT-TREE 186 | Returns `#t' if WT-TREE contains no associations, otherwise 187 | returns `#f'. 188 | 189 | -- procedure+: wt-tree/size WT-TREE 190 | Returns the number of associations in WT-TREE, an exact 191 | non-negative integer. This operation takes constant time. 192 | 193 | -- procedure+: wt-tree/add WT-TREE KEY DATUM 194 | Returns a new tree containing all the associations in WT-TREE and 195 | the association of DATUM with KEY. If WT-TREE already had an 196 | association for KEY, the new association overrides the old. The 197 | average and worst-case times required by this operation are 198 | proportional to the logarithm of the number of associations in 199 | WT-TREE. 200 | 201 | -- procedure+: wt-tree/add! WT-TREE KEY DATUM 202 | Associates DATUM with KEY in WT-TREE and returns an unspecified 203 | value. If WT-TREE already has an association for KEY, that 204 | association is replaced. The average and worst-case times 205 | required by this operation are proportional to the logarithm of 206 | the number of associations in WT-TREE. 207 | 208 | -- procedure+: wt-tree/member? KEY WT-TREE 209 | Returns `#t' if WT-TREE contains an association for KEY, otherwise 210 | returns `#f'. The average and worst-case times required by this 211 | operation are proportional to the logarithm of the number of 212 | associations in WT-TREE. 213 | 214 | -- procedure+: wt-tree/lookup WT-TREE KEY DEFAULT 215 | Returns the datum associated with KEY in WT-TREE. If WT-TREE 216 | doesn't contain an association for KEY, DEFAULT is returned. The 217 | average and worst-case times required by this operation are 218 | proportional to the logarithm of the number of associations in 219 | WT-TREE. 220 | 221 | -- procedure+: wt-tree/delete WT-TREE KEY 222 | Returns a new tree containing all the associations in WT-TREE, 223 | except that if WT-TREE contains an association for KEY, it is 224 | removed from the result. The average and worst-case times required 225 | by this operation are proportional to the logarithm of the number 226 | of associations in WT-TREE. 227 | 228 | -- procedure+: wt-tree/delete! WT-TREE KEY 229 | If WT-TREE contains an association for KEY the association is 230 | removed. Returns an unspecified value. The average and worst-case 231 | times required by this operation are proportional to the logarithm 232 | of the number of associations in WT-TREE. 233 | 234 | 235 | 236 | 237 | Advanced Operations on Weight-Balanced Trees 238 | -------------------------------------------- 239 | 240 | In the following the *size* of a tree is the number of associations 241 | that the tree contains, and a *smaller* tree contains fewer 242 | associations. 243 | 244 | -- procedure+: wt-tree/split< WT-TREE BOUND 245 | Returns a new tree containing all and only the associations in 246 | WT-TREE which have a key that is less than BOUND in the ordering 247 | relation of the tree type of WT-TREE. The average and worst-case 248 | times required by this operation are proportional to the logarithm 249 | of the size of WT-TREE. 250 | 251 | -- procedure+: wt-tree/split> WT-TREE BOUND 252 | Returns a new tree containing all and only the associations in 253 | WT-TREE which have a key that is greater than BOUND in the 254 | ordering relation of the tree type of WT-TREE. The average and 255 | worst-case times required by this operation are proportional to the 256 | logarithm of size of WT-TREE. 257 | 258 | -- procedure+: wt-tree/union WT-TREE-1 WT-TREE-2 259 | Returns a new tree containing all the associations from both trees. 260 | This operation is asymmetric: when both trees have an association 261 | for the same key, the returned tree associates the datum from 262 | WT-TREE-2 with the key. Thus if the trees are viewed as discrete 263 | maps then `wt-tree/union' computes the map override of WT-TREE-1 by 264 | WT-TREE-2. If the trees are viewed as sets the result is the set 265 | union of the arguments. The worst-case time required by this 266 | operation is proportional to the sum of the sizes of both trees. 267 | If the minimum key of one tree is greater than the maximum key of 268 | the other tree then the time required is at worst proportional to 269 | the logarithm of the size of the larger tree. 270 | 271 | -- procedure+: wt-tree/intersection WT-TREE-1 WT-TREE-2 272 | Returns a new tree containing all and only those associations from 273 | WT-TREE-1 which have keys appearing as the key of an association 274 | in WT-TREE-2. Thus the associated data in the result are those 275 | from WT-TREE-1. If the trees are being used as sets the result is 276 | the set intersection of the arguments. As a discrete map 277 | operation, `wt-tree/intersection' computes the domain restriction 278 | of WT-TREE-1 to (the domain of) WT-TREE-2. The time required by 279 | this operation is never worse that proportional to the sum of the 280 | sizes of the trees. 281 | 282 | -- procedure+: wt-tree/difference WT-TREE-1 WT-TREE-2 283 | Returns a new tree containing all and only those associations from 284 | WT-TREE-1 which have keys that *do not* appear as the key of an 285 | association in WT-TREE-2. If the trees are viewed as sets the 286 | result is the asymmetric set difference of the arguments. As a 287 | discrete map operation, it computes the domain restriction of 288 | WT-TREE-1 to the complement of (the domain of) WT-TREE-2. The time 289 | required by this operation is never worse that proportional to the 290 | sum of the sizes of the trees. 291 | 292 | -- procedure+: wt-tree/subset? WT-TREE-1 WT-TREE-2 293 | Returns `#t' iff the key of each association in WT-TREE-1 is the 294 | key of some association in WT-TREE-2, otherwise returns `#f'. 295 | Viewed as a set operation, `wt-tree/subset?' is the improper subset 296 | predicate. A proper subset predicate can be constructed: 297 | 298 | (define (proper-subset? s1 s2) 299 | (and (wt-tree/subset? s1 s2) 300 | (< (wt-tree/size s1) (wt-tree/size s2)))) 301 | 302 | As a discrete map operation, `wt-tree/subset?' is the subset test 303 | on the domain(s) of the map(s). In the worst-case the time 304 | required by this operation is proportional to the size of 305 | WT-TREE-1. 306 | 307 | -- procedure+: wt-tree/set-equal? WT-TREE-1 WT-TREE-2 308 | Returns `#t' iff for every association in WT-TREE-1 there is an 309 | association in WT-TREE-2 that has the same key, and *vice versa*. 310 | 311 | Viewing the arguments as sets `wt-tree/set-equal?' is the set 312 | equality predicate. As a map operation it determines if two maps 313 | are defined on the same domain. 314 | 315 | This procedure is equivalent to 316 | 317 | (lambda (wt-tree-1 wt-tree-2) 318 | (and (wt-tree/subset? wt-tree-1 wt-tree-2 319 | (wt-tree/subset? wt-tree-2 wt-tree-1))) 320 | 321 | In the worst-case the time required by this operation is 322 | proportional to the size of the smaller tree. 323 | 324 | -- procedure+: wt-tree/fold COMBINER INITIAL WT-TREE 325 | This procedure reduces WT-TREE by combining all the associations, 326 | using an reverse in-order traversal, so the associations are 327 | visited in reverse order. COMBINER is a procedure of three 328 | arguments: a key, a datum and the accumulated result so far. 329 | Provided COMBINER takes time bounded by a constant, `wt-tree/fold' 330 | takes time proportional to the size of WT-TREE. 331 | 332 | A sorted association list can be derived simply: 333 | 334 | (wt-tree/fold (lambda (key datum list) 335 | (cons (cons key datum) list)) 336 | '() 337 | WT-TREE)) 338 | 339 | The data in the associations can be summed like this: 340 | 341 | (wt-tree/fold (lambda (key datum sum) (+ sum datum)) 342 | 0 343 | WT-TREE) 344 | 345 | -- procedure+: wt-tree/for-each ACTION WT-TREE 346 | This procedure traverses the tree in-order, applying ACTION to 347 | each association. The associations are processed in increasing 348 | order of their keys. ACTION is a procedure of two arguments which 349 | take the key and datum respectively of the association. Provided 350 | ACTION takes time bounded by a constant, `wt-tree/for-each' takes 351 | time proportional to in the size of WT-TREE. The example prints 352 | the tree: 353 | 354 | (wt-tree/for-each (lambda (key value) 355 | (display (list key value))) 356 | WT-TREE)) 357 | 358 | 359 | 360 | 361 | Indexing Operations on Weight-Balanced Trees 362 | -------------------------------------------- 363 | 364 | Weight balanced trees support operations that view the tree as sorted 365 | sequence of associations. Elements of the sequence can be accessed by 366 | position, and the position of an element in the sequence can be 367 | determined, both in logarthmic time. 368 | 369 | -- procedure+: wt-tree/index WT-TREE INDEX 370 | -- procedure+: wt-tree/index-datum WT-TREE INDEX 371 | -- procedure+: wt-tree/index-pair WT-TREE INDEX 372 | Returns the 0-based INDEXth association of WT-TREE in the sorted 373 | sequence under the tree's ordering relation on the keys. 374 | `wt-tree/index' returns the INDEXth key, `wt-tree/index-datum' 375 | returns the datum associated with the INDEXth key and 376 | `wt-tree/index-pair' returns a new pair `(KEY . DATUM)' which is 377 | the `cons' of the INDEXth key and its datum. The average and 378 | worst-case times required by this operation are proportional to 379 | the logarithm of the number of associations in the tree. 380 | 381 | These operations signal an error if the tree is empty, if 382 | INDEX`<0', or if INDEX is greater than or equal to the number of 383 | associations in the tree. 384 | 385 | Indexing can be used to find the median and maximum keys in the 386 | tree as follows: 387 | 388 | median: (wt-tree/index WT-TREE (quotient (wt-tree/size WT-TREE) 2)) 389 | 390 | maximum: (wt-tree/index WT-TREE (-1+ (wt-tree/size WT-TREE))) 391 | 392 | -- procedure+: wt-tree/rank WT-TREE KEY 393 | Determines the 0-based position of KEY in the sorted sequence of 394 | the keys under the tree's ordering relation, or `#f' if the tree 395 | has no association with for KEY. This procedure returns either an 396 | exact non-negative integer or `#f'. The average and worst-case 397 | times required by this operation are proportional to the logarithm 398 | of the number of associations in the tree. 399 | 400 | -- procedure+: wt-tree/min WT-TREE 401 | -- procedure+: wt-tree/min-datum WT-TREE 402 | -- procedure+: wt-tree/min-pair WT-TREE 403 | Returns the association of WT-TREE that has the least key under 404 | the tree's ordering relation. `wt-tree/min' returns the least key, 405 | `wt-tree/min-datum' returns the datum associated with the least 406 | key and `wt-tree/min-pair' returns a new pair `(key . datum)' 407 | which is the `cons' of the minimum key and its datum. The average 408 | and worst-case times required by this operation are proportional 409 | to the logarithm of the number of associations in the tree. 410 | 411 | These operations signal an error if the tree is empty. They could 412 | be written 413 | (define (wt-tree/min tree) (wt-tree/index tree 0)) 414 | (define (wt-tree/min-datum tree) (wt-tree/index-datum tree 0)) 415 | (define (wt-tree/min-pair tree) (wt-tree/index-pair tree 0)) 416 | 417 | -- procedure+: wt-tree/delete-min WT-TREE 418 | Returns a new tree containing all of the associations in WT-TREE 419 | except the association with the least key under the WT-TREE's 420 | ordering relation. An error is signalled if the tree is empty. 421 | The average and worst-case times required by this operation are 422 | proportional to the logarithm of the number of associations in the 423 | tree. This operation is equivalent to 424 | 425 | (wt-tree/delete WT-TREE (wt-tree/min WT-TREE)) 426 | 427 | -- procedure+: wt-tree/delete-min! WT-TREE 428 | Removes the association with the least key under the WT-TREE's 429 | ordering relation. An error is signalled if the tree is empty. 430 | The average and worst-case times required by this operation are 431 | proportional to the logarithm of the number of associations in the 432 | tree. This operation is equivalent to 433 | 434 | (wt-tree/delete! WT-TREE (wt-tree/min WT-TREE)) 435 | 436 | -------------------------------------------------------------------------------- /project.clj: -------------------------------------------------------------------------------- 1 | (defproject danlentz/clj-wbtree "0.1.3-SNAPSHOT" 2 | :description "A 'Purely Functional' Weight Balanced Binary Tree" 3 | :url "http://github.com/danlentz/clj-wbtree" 4 | :license {:name "Eclipse Public License" 5 | :url "http://www.eclipse.org/legal/epl-v10.html"} 6 | :dependencies [[org.clojure/clojure "1.6.0"]]) 7 | 8 | -------------------------------------------------------------------------------- /resources/rdfs.edn: -------------------------------------------------------------------------------- 1 | ;;; 2 | ;;; Minimal RDF & RDFS schema containing the axiomatic tuples. 3 | ;;; 4 | 5 | #{ 6 | [:rdfs/Class :rdf/type :rdfs/Class] 7 | [:rdfs/Resource :rdf/type :rdfs/Class] 8 | [:rdfs/Literal :rdf/type :rdfs/Class] 9 | [:rdfs/Property :rdf/type :rdfs/Class] 10 | [:rdfs/Datatype :rdf/type :rdfs/Class] 11 | [:rdfs/Datatype :rdfs/subClassOf :rdfs/Class] 12 | [:rdfs/Container :rdf/type :rdfs/Class] 13 | [:rdf/XMLLiteral :rdf/type :rdfs/Datatype] 14 | [:rdf/XMLLiteral :rdfs/subClassOf :rdfs/Literal] 15 | [:rdf/EDNLiteral :rdf/type :rdfs/Datatype] 16 | [:rdf/EDNLiteral :rdfs/subClassOf :rdfs/Literal] 17 | [:rdf/Seq :rdf/type :rdfs/Class] 18 | [:rdf/Seq :rdfs/subClassOf :rdfs/Container] 19 | [:rdf/Bag :rdf/type :rdfs/Class] 20 | [:rdf/Bag :rdfs/subClassOf :rdfs/Container] 21 | [:rdf/Alt :rdf/type :rdfs/Class] 22 | [:rdf/Alt :rdfs/subClassOf :rdfs/Container] 23 | [:rdf/Statement :rdf/type :rdfs/Class] 24 | [:rdf/List :rdf/type :rdfs/Class] 25 | [:rdf/type :rdf/type :rdf/Property] 26 | [:rdf/type :rdfs/domain :rdfs/Resource] 27 | [:rdf/type :rdfs/range :rdfs/Class] 28 | [:rdfs/domain :rdf/type :rdf/Property] 29 | [:rdfs/domain :rdfs/domain :rdfs/Resource] 30 | [:rdfs/domain :rdfs/range :rdfs/Class] 31 | [:rdfs/range :rdf/type :rdf/Property] 32 | [:rdfs/range :rdfs/domain :rdf/Property] 33 | [:rdfs/range :rdfs/range :rdfs/Class] 34 | [:rdfs/subPropertyOf :rdf/type :rdf/Property] 35 | [:rdfs/subPropertyOf :rdfs/domain :rdf/Property] 36 | [:rdfs/subPropertyOf :rdfs/range :rdf/Property] 37 | [:rdfs/subClassOf :rdf/type :rdf/Property] 38 | [:rdfs/subClassOf :rdfs/domain :rdfs/Class] 39 | [:rdfs/subClassOf :rdfs/range :rdfs/Class] 40 | [:rdf/subject :rdf/type :rdf/Property] 41 | [:rdf/subject :rdfs/domain :rdf/Statement] 42 | [:rdf/subject :rdfs/range :rdfs/Resource] 43 | [:rdf/predicate :rdf/type :rdf/Property] 44 | [:rdf/predicate :rdfs/domain :rdf/Statement] 45 | [:rdf/predicate :rdfs/range :rdfs/Resource] 46 | [:rdf/object :rdf/type :rdf/Property] 47 | [:rdf/object :rdfs/domain :rdf/Statement] 48 | [:rdf/object :rdfs/range :rdfs/Resource] 49 | [:rdfs/member :rdf/type :rdf/Property] 50 | [:rdfs/member :rdfs/domain :rdfs/Resource] 51 | [:rdfs/member :rdfs/range :rdfs/Resource] 52 | [:rdf/first :rdf/type :rdf/Property] 53 | [:rdf/first :rdfs/domain :rdf/List] 54 | [:rdf/first :rdfs/range :rdfs/Resource] 55 | [:rdf/rest :rdf/type :rdf/Property] 56 | [:rdf/rest :rdfs/domain :rdf/List] 57 | [:rdf/rest :rdfs/range :rdf/List] 58 | [:rdf/nil :rdf/type :rdf/List] 59 | [:rdf/value :rdf/type :rdf/Property] 60 | [:rdf/value :rdfs/domain :rdfs/Resource] 61 | [:rdf/value :rdfs/range :rdfs/Resource] 62 | [:rdfs/ContainerMembershipProperty :rdf/type :rdfs/Class] 63 | [:rdfs/ContainerMembershipProperty :rdfs/subClassOf :rdf/Property] 64 | [:rdfs/seeAlso :rdf/type :rdf/Property] 65 | [:rdfs/seeAlso :rdfs/domain :rdfs/Resource] 66 | [:rdfs/seeAlso :rdfs/range :rdfs/Resource] 67 | [:rdfs/isDefinedBy :rdf/type :rdf/Property] 68 | [:rdfs/isDefinedBy :rdfs/subPropertyOf :rdfs/seeAlso] 69 | [:rdfs/isDefinedBy :rdfs/domain :rdfs/Resource] 70 | [:rdfs/isDefinedBy :rdfs/range :rdfs/Resource] 71 | [:rdfs/comment :rdf/type :rdf/Property] 72 | [:rdfs/comment :rdfs/domain :rdfs/Resource] 73 | [:rdfs/comment :rdfs/range :rdfs/Literal] 74 | [:rdfs/label :rdf/type :rdf/Property] 75 | [:rdfs/label :rdfs/domain :rdfs/Resource] 76 | [:rdfs/label :rdfs/range :rdfs/Literal] 77 | } 78 | 79 | -------------------------------------------------------------------------------- /src/wbtree/tree.clj: -------------------------------------------------------------------------------- 1 | (ns wbtree.tree 2 | (:require [wbtree.util :as util])) 3 | 4 | 5 | 6 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 7 | ;; Weight Balanced Functional Binary Tree (Hirai-Yamamoto Tree) 8 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 9 | ;; 10 | ;; This is an implementation of a weight-balanced binary tree data 11 | ;; structure based on the following references: 12 | ;; 13 | ;; -- Adams (1992) 14 | ;; 'Implementing Sets Efficiently in a Functional Language' 15 | ;; Technical Report CSTR 92-10, University of Southampton. 16 | ;; 17 | ;; -- Hirai and Yamamoto (2011) 18 | ;; 'Balancing Weight-Balanced Trees' 19 | ;; Journal of Functional Programming / 21 (3): 20 | ;; Pages 287-307 21 | ;; 22 | ;; -- Oleg Kiselyov 23 | ;; 'Towards the best collection API, A design of the overall optimal 24 | ;; collection traversal interface' 25 | ;; 26 | ;; 27 | ;; -- Nievergelt and Reingold (1972) 28 | ;; 'Binary Search Trees of Bounded Balance' 29 | ;; STOC '72 Proceedings 30 | ;; 4th Annual ACM symposium on Theory of Computing 31 | ;; Pages 137-142 32 | ;; 33 | ;; -- Driscoll, Sarnak, Sleator, and Tarjan (1989) 34 | ;; 'Making Data Structures Persistent' 35 | ;; Journal of Computer and System Sciences Volume 38 Issue 1, February 1989 36 | ;; 18th Annual ACM Symposium on Theory of Computing 37 | ;; Pages 86-124 38 | ;; 39 | ;; -- MIT Scheme weight balanced tree as reimplemented by Yoichi Hirai 40 | ;; and Kazuhiko Yamamoto using the revised non-variant algorithm recommended 41 | ;; integer balance parameters from (Hirai/Yamomoto 2011). 42 | ;; 43 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 44 | 45 | (set! *warn-on-reflection* true) 46 | 47 | 48 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 49 | ;; Data Collection and Metrics 50 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 51 | 52 | (def stats (atom {})) 53 | 54 | (defn reset-stats!! [] 55 | (reset! stats {})) 56 | 57 | (defn inc-stat! [stat] 58 | #_ (swap! stats assoc stat (inc (get @stats stat 1)))) 59 | 60 | 61 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 62 | ;; Weight Balancing Constants 63 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 64 | 65 | 66 | ;; +delta+: 67 | ;; The primary balancing rotation parameter that is used for the 68 | ;; determination whether two subtrees of a node are in balance or 69 | ;; require adjustment by means of a rotation operation. The specific 70 | ;; rotation to be performed is determined by +gamma+. 71 | 72 | (def ^:const +delta+ 3) 73 | 74 | ;; +gamma+: 75 | ;; The secondary balancing rotation parameter that is used for the 76 | ;; determination of whether a single or double rotation operation should 77 | ;; occur, once it has been decided based on +delta+ that a rotation is 78 | ;; indeed required. 79 | 80 | (def ^:const +gamma+ 2) 81 | 82 | 83 | 84 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 85 | ;; Universal Comparator 86 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 87 | 88 | 89 | (defn xcompare [x y] 90 | (inc-stat! :comparisons) 91 | (if (= (class x) (class y)) 92 | (if (instance? Comparable x) 93 | (.compareTo ^Comparable x y) 94 | (.compareTo ^Integer (hash x) (hash y))) 95 | (.compareTo ^Integer (hash (class x)) (hash (class y))))) 96 | 97 | 98 | (defn xcompare< [x y] 99 | (neg? (xcompare x y))) 100 | 101 | (defn xcompare> [x y] 102 | (pos? (xcompare x y))) 103 | 104 | (defn xcompare= [x y] 105 | (zero? (xcompare x y))) 106 | 107 | 108 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 109 | ;; Storage Model 110 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 111 | 112 | 113 | 114 | (defn- leaf [] nil) 115 | 116 | 117 | (defn null 118 | "a value which satisfies null?" 119 | [] 120 | (leaf)) 121 | 122 | (defn null? [n] 123 | (nil? n)) 124 | 125 | 126 | 127 | (deftype Node [k v l r ^long x]) 128 | 129 | (defn node [k v l r x] 130 | (Node. k v l r x)) 131 | 132 | 133 | (defn node? [thing] 134 | (instance? Node thing)) 135 | 136 | 137 | ;; (util/avg-timing 10 138 | ;; (count (repeatedly 1000000 #(apply tuple (range 5))))) 139 | 140 | ;; ;; 513.698 141 | 142 | 143 | 144 | 145 | ;; (util/avg-timing 10 146 | ;; (count (repeatedly 1000000 #(apply node (range 5))))) 147 | 148 | 149 | ;; 382.1553 150 | 151 | 152 | 153 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 154 | ;; Constituent Accessors 155 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 156 | 157 | 158 | (defn -k [^Node n] 159 | (.k n)) 160 | 161 | (defn -v [^Node n] 162 | (.v n)) 163 | 164 | (defn -l [^Node n] 165 | (.l n)) 166 | 167 | (defn -r [^Node n] 168 | (.r n)) 169 | 170 | (defn -x [^Node n] 171 | (.x n)) 172 | 173 | (defn -kv [^Node n] 174 | (new clojure.lang.MapEntry (.k n) (.v n))) 175 | 176 | (defn -lr [^Node n] 177 | (list (.l n) (.r n))) 178 | 179 | (defn -kvlr [^Node n] 180 | (list (.k n) (.v n) (.l n) (.r n))) 181 | 182 | 183 | 184 | 185 | 186 | (defmacro kv 187 | "destructure node n: key value" 188 | [[ksym vsym] n & body] 189 | `(let [^Node n# ~n 190 | ~ksym (.k n#) ~vsym (.v n#)] 191 | ~@body)) 192 | 193 | 194 | (defmacro kvlr 195 | "destructure node n: key value left right" 196 | [[ksym vsym lsym rsym] n & body] 197 | `(let [^Node n# ~n 198 | ~ksym (.k n#) ~vsym (.v n#) 199 | ~lsym (.l n#) ~rsym (.r n#)] 200 | ~@body)) 201 | 202 | 203 | (defmacro kvlrx 204 | "destructure node n: key value left right size" 205 | [[ksym vsym lsym rsym xsym] n & body] 206 | `(let [^Node n# ~n 207 | ~ksym (.k n#) ~vsym (.v n#) 208 | ~lsym (.l n#) ~rsym (.r n#) 209 | ~xsym (.x n#)] 210 | ~@body)) 211 | 212 | 213 | (defn node-call 214 | "apply f to the destructured constituent values of n. 215 | f is a function taking four parameters: K, V, L, and R, 216 | where K is the key of NODE, V is the value of NODE, L is the left 217 | subtree of NODE, and R is the right subtree of NODE." 218 | [n f] 219 | (apply f (-kvlr n))) 220 | 221 | 222 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 223 | ;; Fundamental Node Operations 224 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 225 | 226 | 227 | (defn node-size ^long [n] 228 | (if (null? n) 229 | 0 230 | (-x n))) 231 | 232 | 233 | (defn node-weight 234 | "returns node weight as appropriate for rotation calculations using 235 | the 'revised non-variant algorithm' for weight balanced binary tree." 236 | ^long [n] 237 | (inc (node-size n))) 238 | 239 | 240 | (defn node-enumerator 241 | "Efficient mechanism to accomplish partial enumeration of 242 | tree-structure into a seq representation without incurring the 243 | overhead of operating over the entire tree. Used internally for 244 | implementation of higher-level collection api routines" 245 | ([n] (node-enumerator n nil)) 246 | ([n enum] 247 | (if (null? n) 248 | enum 249 | (kvlr [k v l r] n 250 | (recur l (list n r enum)))))) 251 | 252 | 253 | (defn node-enumerator-reverse 254 | ([n] (node-enumerator-reverse n nil)) 255 | ([n enum] 256 | (if (null? n) 257 | enum 258 | (kvlr [k v l r] n 259 | (recur r (list n l enum)))))) 260 | 261 | 262 | (defn node-enum-first [enum] 263 | (when (seq enum) 264 | (first enum))) 265 | 266 | 267 | (defn node-enum-rest [enum] 268 | (when (seq enum) 269 | (let [[x1 x2 x3] enum] 270 | (when-not (and (nil? x2) (nil? x3)) 271 | (node-enumerator x2 x3))))) 272 | 273 | 274 | (defn node-enum-prior [enum] 275 | (when (seq enum) 276 | (let [[x1 x2 x3] enum] 277 | (when-not (and (nil? x2) (nil? x3)) 278 | (node-enumerator-reverse x2 x3))))) 279 | 280 | 281 | (defn node-create 282 | "Join left and right subtrees at root k/v. 283 | Assumes all keys in l < k < all keys in r." 284 | [k v ^Node l ^Node r] 285 | (Node. k v l r (+ 1 (if l (.x l) 0) (if r (.x r) 0)))) 286 | 287 | 288 | (defn node-singleton 289 | "Create and return a newly allocated weight balanced 290 | tree containing a single association, that value V with key K." 291 | [k v] 292 | (node-create k v (null) (null))) 293 | 294 | 295 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 296 | ;; Tree Rotations 297 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 298 | 299 | 300 | (defn rotate-single-left 301 | "Perform a single left rotation, moving Y, the left subtree of the 302 | right subtree of A, into the left subtree (shown below). This must 303 | occur in order to restore proper balance when the weight of the left 304 | subtree of node A is less then the weight of the right subtree of 305 | node A multiplied by rotation coefficient +delta+ and the weight of 306 | the left subtree of node B is less than the weight of the right subtree 307 | of node B multiplied by rotation coefficient +gamma+ 308 | 309 | ,---, ,---, 310 | | A | | B | 311 | :---: :---: 312 | : : : : 313 | ,---: :---, ,---: :---, 314 | | X | | B | => | A | | Z | 315 | '---' :---: :---: '---' 316 | ,---: :---, ,---: :---, 317 | | Y | | Z | | X | | Y | 318 | '---' '---' '---' '---' 319 | " 320 | [ak av x b] 321 | (inc-stat! :single-left) 322 | (kvlr [bk bv y z] b 323 | (node-create bk bv 324 | (node-create ak av x y) z))) 325 | 326 | 327 | 328 | (defn rotate-double-left 329 | "Perform a double left rotation, moving Y1, the left subtree of the 330 | left subtree of the right subtree of A, into the left subtree (shown 331 | below). This must occur in order to restore proper balance when the 332 | weight of the left subtree of node A is less then the weight of the 333 | right subtree of node A multiplied by rotation coefficient +delta+ 334 | and the weight of the left subtree of node B is greater than or equal 335 | to the weight of the right subtree of node B multiplied by rotation 336 | coefficient +gamma+. 337 | 338 | ,---, ,---, 339 | | A | | B | 340 | ___:---:___ ____:---:____ 341 | ,---: :---, ,---: :---, 342 | | X | | C | | A | | C | 343 | '---' :---: => :---: :---: 344 | ,---: :---, ,---: :---, ,---: :---, 345 | | B | | Z | | X | | y1| | y2| | Z | 346 | :---: '---' '---' '---' '---' '---' 347 | ,---: :---, 348 | | y1| | y2| 349 | '---' '---' 350 | " 351 | [ak av x c] 352 | (inc-stat! :double-left) 353 | (kvlr [ck cv b z] c 354 | (kvlr [bk bv y1 y2] b 355 | (node-create bk bv 356 | (node-create ak av x y1) 357 | (node-create ck cv y2 z))))) 358 | 359 | 360 | (defn rotate-single-right 361 | "Perform a single right rotation, moving Y, the right subtree of the 362 | left subtree of B, into the right subtree (shown below). This must 363 | occur in order to restore proper balance when the weight of the right 364 | subtree of node B is less then the weight of the left subtree of 365 | node B multiplied by rotation coefficient +delta+ and the weight of the 366 | right subtree of node A is less than the weight of the left subtree 367 | of node A multiplied by rotation coefficient +gamma+. 368 | 369 | ,---, ,---, 370 | | B | | A | 371 | :---: :---: 372 | : : : : 373 | ,---: :---, ,---: :---, 374 | | A | | Z | => | X | | B | 375 | :---: '---' '---' :---: 376 | ,---: :---, ,---: :---, 377 | | X | | Y | | Y | | Z | 378 | '---' '---' '---' '---' 379 | " 380 | [bk bv a z] 381 | (inc-stat! :single-right) 382 | (kvlr [ak av x y] a 383 | (node-create ak av x (node-create bk bv y z)))) 384 | 385 | 386 | (defn rotate-double-right 387 | "Perform a double right rotation, moving Y2, the right subtree of 388 | the right subtree of the left subtree of C, into the right 389 | subtree (shown below). This must occur in order to restore proper 390 | balance when the weight of the right subtree of node C is less then 391 | the weight of the left subtree of node C multiplied by rotation 392 | coefficient +delta+ and the weight of the right subtree of node B 393 | is greater than or equal to the weight of the left subtree of node B 394 | multiplied by rotation coefficient +gamma+. 395 | 396 | ,---, ,---, 397 | | C | | B | 398 | ___:---:___ ____:---:____ 399 | ,---: :---, ,---: :---, 400 | | A | | Z | | A | | C | 401 | :---: '---' => :---: :---: 402 | ,---: :---, ,---: :---, ,---: :---, 403 | | X | | B | | X | | y1| | y2| | Z | 404 | '---' :---: '---' '---' '---' '---' 405 | ,---: :---, 406 | | y1| | y2| 407 | '---' '---' 408 | " 409 | [ck cv a z] 410 | (inc-stat! :double-right) 411 | (kvlr [ak av x b] a 412 | (kvlr [bk bv y1 y2] b 413 | (node-create bk bv 414 | (node-create ak av x y1) 415 | (node-create ck cv y2 z))))) 416 | 417 | 418 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 419 | ;; Fundamental Tree Operations 420 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 421 | 422 | 423 | (defn node-stitch 424 | "Join left and right subtrees at root k/v, performing a single or 425 | double rotation to balance the resulting tree, if needed. Assumes 426 | all keys in l < k < all keys in r, and the relative weight balance 427 | of the left and right subtrees is such that no more than one 428 | single/double rotation will result in each subtree being less than 429 | +delta+ times the weight of the other." 430 | [k v l r] 431 | (let [lw (node-weight l) 432 | rw (node-weight r)] 433 | (cond 434 | (> rw (* +delta+ lw)) (let [rlw (node-weight (-l r)) 435 | rrw (node-weight (-r r))] 436 | (if (< rlw (* +gamma+ rrw)) 437 | (rotate-single-left k v l r) 438 | (rotate-double-left k v l r))) 439 | (> lw (* +delta+ rw)) (let [llw (node-weight (-l l)) 440 | lrw (node-weight (-r l))] 441 | (if (< lrw (* +gamma+ llw)) 442 | (rotate-single-right k v l r) 443 | (rotate-double-right k v l r))) 444 | true (node-create k v l r)))) 445 | 446 | 447 | (defn node-add 448 | ([n k] (node-add n k k)) 449 | ([n k v] 450 | (if (null? n) 451 | (node-singleton k v) 452 | (kvlr [key val l r] n 453 | (let [c (xcompare k key)] 454 | (cond 455 | (neg? c) (node-stitch key val (node-add l k v) r) 456 | (pos? c) (node-stitch key val l (node-add r k v)) 457 | true (node-create key v l r))))))) 458 | 459 | 460 | (defn node-concat3 [k v l r] 461 | (cond 462 | (null? l) (node-add r k v) 463 | (null? r) (node-add l k v) 464 | true (let [lw (node-weight l) 465 | rw (node-weight r)] 466 | (cond 467 | (< (* +delta+ lw) rw) (kvlr [k2 v2 l2 r2] r 468 | (node-stitch k2 v2 469 | (node-concat3 k v l l2) r2)) 470 | (< (* +delta+ rw) lw) (kvlr [k1 v1 l1 r1] l 471 | (node-stitch k1 v1 l1 472 | (node-concat3 k v r1 r))) 473 | true (node-create k v l r))))) 474 | 475 | 476 | (defn node-least 477 | "Return the node containing the minimum key of the tree rooted at n" 478 | [n] 479 | (cond 480 | (null? n) (util/exception "least: empty tree") 481 | (null? (-l n)) n 482 | true (recur (-l n)))) 483 | 484 | 485 | (defn node-greatest 486 | "Return the node containing the minimum key of the tree rooted at n" 487 | [n] 488 | (cond 489 | (null? n) (util/exception "greatest: empty tree") 490 | (null? (-r n)) n 491 | true (recur (-r n)))) 492 | 493 | 494 | (defn node-remove-least 495 | "Return a tree the same as the one rooted at n, with the node 496 | containing the minimum key removed. See node-least." 497 | [n] 498 | (cond 499 | (null? n) (util/exception "remove-least: empty tree") 500 | (null? (-l n)) (-r n) 501 | true (node-stitch (-k n) (-v n) 502 | (node-remove-least (-l n)) (-r n)))) 503 | 504 | 505 | (defn node-remove-greatest 506 | "Return a tree the same as the one rooted at n, with the node 507 | containing the maximum key removed. See node-greatest." 508 | [n] 509 | (cond 510 | (null? n) (util/exception "remove-greatest: empty tree") 511 | (null? (-r n)) (-l n) 512 | true (node-stitch (-k n) (-v n) (-l n) 513 | (node-remove-greatest (-r n))))) 514 | 515 | 516 | (defn node-concat2 517 | "Join two trees, the left rooted at l, and the right at r, 518 | performing a single balancing operation on the resulting tree, if 519 | needed. Assumes all keys in l are smaller than all keys in r, and 520 | the relative balance of l and r is such that no more than one rotation 521 | operation will be required to balance the resulting tree" 522 | [l r] 523 | (cond 524 | (null? l) r 525 | (null? r) l 526 | true (kvlr [k v _ _] (node-least r) 527 | (node-stitch k v l (node-remove-least r))))) 528 | 529 | 530 | (defn node-concat [n1 n2] 531 | (cond 532 | (null? n1) n2 533 | (null? n2) n1 534 | true (let [minimum (node-least n2)] 535 | (node-concat3 (-k minimum) (-v minimum) n1 536 | (node-remove-least n2))))) 537 | 538 | 539 | (defn node-remove [n k] 540 | (if (null? n) 541 | (null) 542 | (kvlr [key val l r] n 543 | (let [c (xcompare k key)] 544 | (cond 545 | (neg? c) (node-stitch key val (node-remove l k) r) 546 | (pos? c) (node-stitch key val l (node-remove r k)) 547 | true (node-concat2 l r)))))) 548 | 549 | 550 | (defn node-find 551 | "Find k (if exists) in only d comparisons (d is depth of tree) 552 | rather than the traditional compare/low compare/high which takes on 553 | avg (* 1.5 (- d 1))" 554 | [n k] 555 | (letfn [(srch [this best] 556 | (cond 557 | (null? this) best 558 | (xcompare< k (-k this)) (recur (-l this) best) 559 | true (recur (-r this) this)))] 560 | (let [best (srch n nil)] 561 | (when best 562 | (when-not (xcompare< (-k best) k) 563 | best))))) 564 | 565 | 566 | (defn node-fold-left 567 | "Fold-left (reduce) the collection from least to greatest." 568 | [f base n] 569 | (loop [e (node-enumerator n) acc base] 570 | (if (nil? e) 571 | acc 572 | (recur (node-enum-rest e) 573 | (f acc (node-enum-first e)))))) 574 | 575 | 576 | (defn node-fold-right 577 | "Fold-right (reduce) the collection from greatest to least." 578 | [f base n] 579 | (loop [e (node-enumerator-reverse n) acc base] 580 | (if (nil? e) 581 | acc 582 | (recur (node-enum-prior e) 583 | (f acc (node-enum-first e)))))) 584 | 585 | 586 | (defn node-filter [p n] 587 | (node-fold-left (fn [x y] 588 | (if (p y) 589 | x 590 | (node-remove x (-k y)))) 591 | n n)) 592 | 593 | 594 | (defn node-invert [n] 595 | (node-fold-left (fn [acc x] 596 | (node-add acc (-v x) (-k x))) 597 | (null) n)) 598 | 599 | 600 | (defn node-iter 601 | "For the side-effect, apply f to each node of the tree rooted at n" 602 | [n f] 603 | (if (null? n) 604 | nil 605 | (kvlr [_ _ l r] n 606 | (node-iter l f) 607 | (f n) 608 | (node-iter r f)))) 609 | 610 | 611 | (defn for-all-nodes 612 | "For the side-effect, apply f to each node of the tree rooted at 613 | n for which the predicate function returns a logically true value" 614 | [n p f] 615 | (node-iter n (fn [n] (when (p n) (f n))))) 616 | 617 | 618 | (defn node-split-lesser [n k] 619 | (cond 620 | (null? n) (null) 621 | (xcompare< k (-k n)) (node-split-lesser (-l n) k) 622 | (xcompare> k (-k n)) (node-concat3 (-k n) (-v n) (-l n) 623 | (node-split-lesser (-r n) k)) 624 | true (-l n))) 625 | 626 | 627 | (defn node-split-greater [n k] 628 | (cond 629 | (null? n) (null) 630 | (xcompare> k (-k n)) (node-split-greater (-r n) k) 631 | (xcompare< k (-k n)) (node-concat3 (-k n) (-v n) 632 | (node-split-greater (-l n) k) (-r n)) 633 | true (-r n))) 634 | 635 | 636 | (defn node-split 637 | "returns a triple (l present r) where: l is the set of elements of 638 | n that are < k, r is the set of elements of n that are > k, present 639 | is false if n contains no element equal to k, or (k v) if n contains 640 | an element with key equal to k" 641 | [n k] 642 | (if (null? n) 643 | (list nil nil nil) 644 | (kvlr [ak v l r] n 645 | (let [c (xcompare k ak)] 646 | (cond 647 | (zero? c) [l (list k v) r] 648 | (neg? c) (let [[ll pres rl] (node-split l k)] 649 | [ll pres (node-concat3 ak v rl r)]) 650 | (pos? c) (let [[lr pres rr] (node-split r k)] 651 | [(node-concat3 ak v l lr) pres rr])))))) 652 | 653 | 654 | 655 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 656 | ;; Fundamental Set Operations 657 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 658 | 659 | 660 | (defn node-set-union [n1 n2] 661 | (cond 662 | (null? n1) n2 663 | (null? n2) n1 664 | true (kvlr [ak av l r] n2 665 | (let [[l1 _ r1] (node-split n1 ak)] 666 | (node-concat3 ak av 667 | (node-set-union l1 l) 668 | (node-set-union r1 r)))))) 669 | 670 | 671 | 672 | (defn node-set-intersection [n1 n2] 673 | (cond 674 | (null? n1) (null) 675 | (null? n2) (null) 676 | true (kvlr [ak av l r] n2 677 | (let [[l1 x r1] (node-split n1 ak)] 678 | (if x 679 | (node-concat3 ak av 680 | (node-set-intersection l1 l) 681 | (node-set-intersection r1 r)) 682 | (node-concat 683 | (node-set-intersection l1 l) 684 | (node-set-intersection r1 r))))))) 685 | 686 | 687 | 688 | (defn node-set-difference [n1 n2] 689 | (cond 690 | (null? n1) (null) 691 | (null? n2) n1 692 | true (kvlr [ak _ l r] n2 693 | (let [[l1 _ r1] (node-split n1 ak)] 694 | (node-concat 695 | (node-set-difference l1 l) 696 | (node-set-difference r1 r)))))) 697 | 698 | 699 | 700 | (defn node-subset? [super sub] 701 | (letfn [(subset? [n1 n2] 702 | (or (null? n1) 703 | (and (<= (node-size n1) (node-size n2)) 704 | (kvlr [k1 _ l1 r1] n1 705 | (kvlr [k2 _ l2 r2] n2 706 | (let [c (xcompare k1 k2)] 707 | (cond 708 | (neg? c) (and 709 | (subset? l1 l2) 710 | (node-find n2 k1) 711 | (subset? r1 n2)) 712 | (pos? c) (and 713 | (subset? r1 r2) 714 | (node-find n2 k1) 715 | (subset? l1 n2)) 716 | true (and 717 | (subset? l1 l2) 718 | (subset? r1 r2)))))))))] 719 | (let [res (or (null? sub) (subset? sub super))] 720 | (if res true false)))) 721 | 722 | 723 | 724 | (defn node-set-compare 725 | "return 3-way ordinal comparison of the trees n1 and n2 with the following 726 | return-value semantics: 727 | 728 | -1 -> n1 is LESS-THAN n2 729 | 0 -> n1 is EQAL-TO n2 730 | +1 -> n1 is GREATER-THAN n2" 731 | [n1 n2] 732 | (loop [e1 (node-enumerator n1 nil) 733 | e2 (node-enumerator n2 nil)] 734 | (cond 735 | (and (nil? e1) (nil? e2)) 0 736 | (nil? e1) -1 737 | (nil? e2) 1 738 | true (let [[v1 r1 ee1] e1 739 | [v2 r2 ee2] e2 740 | c (xcompare (-k v1) (-k v2))] 741 | (if-not (zero? c) 742 | c 743 | (recur 744 | (node-enumerator r1 ee1) 745 | (node-enumerator r2 ee2))))))) 746 | 747 | 748 | 749 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 750 | ;; Fundamental Map Operations 751 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 752 | 753 | 754 | (defn node-merge 755 | "Merge two maps in worst case linear time" 756 | [n1 n2 merge-fn] 757 | (cond 758 | (null? n1) n2 759 | (null? n2) n1 760 | true (kvlr [ak av l r] n2 761 | (let [[l1 x r1] (node-split n1 ak) 762 | val (if x 763 | (merge-fn ak av (-v x)) 764 | av)] 765 | (node-concat3 ak val 766 | (node-merge l1 l) 767 | (node-merge r1 r)))))) 768 | 769 | 770 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 771 | ;; Fundamental Vector Operations 772 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 773 | 774 | 775 | (defn node-nth 776 | "Return nth node from the beginning of the ordered tree rooted at n." 777 | [n index] 778 | (letfn [(srch [n index] 779 | (kvlr [_ _ l r] n 780 | (let [lsize (node-size l)] 781 | (cond 782 | (< index lsize) (recur l index) 783 | (> index lsize) (recur r (- index (inc lsize))) 784 | true n))))] 785 | (if-not (and (<= 0 index) (< index (node-size n))) 786 | (util/exception "index out of range") 787 | (srch n index)))) 788 | 789 | 790 | (defn node-rank 791 | "Return the rank (sequential position) of a given KEY within the 792 | ordered tree rooted at n." 793 | ([n k] (node-rank n k 0)) 794 | ([n k rank] 795 | (cond 796 | (null? n) nil 797 | (xcompare< k (-k n)) (recur (-l n) k rank) 798 | (xcompare> k (-k n)) (recur (-r n) k 799 | (+ 1 rank (node-size (-l n)))) 800 | true (+ rank (node-size (-l n)))))) 801 | 802 | 803 | (defn node-vec 804 | "Return a (lon-lazy) vector of all nodes in tree rooted at n in 805 | the order they occur." 806 | [n] 807 | (node-fold-left conj [] n)) 808 | 809 | 810 | (defn node-vec-reverse 811 | "Return a (lon-lazy) vector of all nodes in tree rooted at n in 812 | reverse order." 813 | [n] 814 | (node-fold-right conj [] n)) 815 | 816 | 817 | (defn node-key-vec 818 | "Return a (lon-lazy) vector of all keys in tree rooted at n 819 | in the order they occur." 820 | [n] 821 | (node-fold-left #(conj %1 (-k %2)) [] n)) 822 | 823 | 824 | (defn node-key-vec-reverse 825 | "Return a (lon-lazy) vector of all keys in tree rooted at n 826 | in reverse-order." 827 | [n] 828 | (node-fold-right #(conj %1 (-k %2)) [] n)) 829 | 830 | 831 | (defn node-value-vec 832 | "Return a (lon-lazy) vector of all values in tree rooted at n 833 | in the order they occur." 834 | [n] 835 | (node-fold-left #(conj %1 (-v %2)) [] n)) 836 | 837 | 838 | (defn node-value-vec-reverse 839 | "Return a (lon-lazy) vector of all values in tree rooted at n 840 | in reverse-order." 841 | [n] 842 | (node-fold-right #(conj %1 (-v %2)) [] n)) 843 | 844 | 845 | 846 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 847 | ;; Fundamental Seq Operations 848 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 849 | 850 | 851 | (defn node-seq 852 | "Return a (lazy) seq of nodes in tree rooted at n in the order they occur." 853 | [n] 854 | (letfn [(node-enum-seq [enum] 855 | (lazy-seq 856 | (when-not (nil? enum) 857 | (cons (node-enum-first enum) 858 | (node-enum-seq (node-enum-rest enum))))))] 859 | (node-enum-seq (node-enumerator n)))) 860 | 861 | 862 | (defn node-seq-reverse 863 | "Return a (lazy) seq of nodes in tree rooted at n in reverse order." 864 | [n] 865 | (letfn [(node-enum-seq-reverse [enum] 866 | (lazy-seq 867 | (when-not (nil? enum) 868 | (cons (node-enum-first enum) 869 | (node-enum-seq-reverse (node-enum-prior enum))))))] 870 | (node-enum-seq-reverse (node-enumerator-reverse n)))) 871 | 872 | 873 | 874 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 875 | ;; Misc 876 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 877 | 878 | 879 | (defn- make-integer-tree 880 | ([size] (reduce node-add (null) (shuffle (range size)))) 881 | ([start end] (reduce node-add (null) (shuffle (range start end)))) 882 | ([start end step] (reduce node-add (null) (shuffle (range start end step))))) 883 | 884 | 885 | (defn- make-integer-set 886 | ([size] (reduce conj #{} (range size))) 887 | ([start end] (reduce conj #{} (range start end))) 888 | ([start end step] (reduce conj #{} (range start end step)))) 889 | 890 | 891 | (defn- make-integer-sorted-set 892 | ([size] (reduce conj (sorted-set) (shuffle (range size)))) 893 | ([start end] (reduce conj (sorted-set) (shuffle (range start end)))) 894 | ([start end step] (reduce conj (sorted-set) (shuffle (range start end step))))) 895 | 896 | 897 | (set! *warn-on-reflection* false) 898 | 899 | 900 | ;; (def i10 (make-integer-tree 10)) 901 | ;; (def i30 (make-integer-tree 20 30)) 902 | ;; (def i (make-integer-tree 30)) 903 | 904 | ;; (map -k (node-seq i)) 905 | ;; => (0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 906 | ;; 25 26 27 28 29) 907 | 908 | ;; (map -k (node-seq-reverse i)) 909 | ;; => (29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 910 | ;; 7 6 5 4 3 2 1 0) 911 | 912 | 913 | 914 | ;; (map -k (node-seq (node-filter (comp even? -k) i))) 915 | ;; => (0 2 4 6 8 10 12 14 16 18 20 22 24 26 28) 916 | 917 | ;; (def m (reduce #(apply node-add %1 %2) nil 918 | ;; [[1 :a][2 :b][3 :c][4 :d][5 :e]])) 919 | 920 | ;; (map -k (node-seq (node-invert m))) 921 | ;; => (:a :b :c :d :e) 922 | 923 | 924 | ;; (def a (make-integer-tree 0 100)) 925 | ;; (def b (make-integer-tree 0 100 2)) 926 | ;; (def c (make-integer-tree 0 100 3)) 927 | ;; (def d (make-integer-tree 0 100 4)) 928 | ;; (def e (make-integer-tree 0 100 5)) 929 | 930 | ;; (map -k (node-seq (node-set-intersection b c))) 931 | ;; => (0 6 12 18 24 30 36 42 48 54 60 66 72 78 84 90 96) 932 | 933 | ;; (map -k (node-seq (node-set-union b c))) 934 | ;; => (0 2 3 4 6 8 9 10 12 14 15 16 18 20 21 22 24 26 27 28 30 32 33 34 36 38 935 | ;; 39 40 42 44 45 46 48 50 51 52 54 56 57 58 60 62 63 64 66 68 69 70 72 936 | ;; 74 75 76 78 80 81 82 84 86 87 88 90 92 93 94 96 98 99) 937 | 938 | ;; (map -k (node-seq (node-set-difference b c))) 939 | ;; => (2 4 8 10 14 16 20 22 26 28 32 34 38 40 44 46 50 52 56 58 62 64 68 70 940 | ;; 74 76 80 82 86 88 92 94 98) 941 | 942 | ;; (map -k (node-seq (node-set-difference c b))) 943 | ;; => (3 9 15 21 27 33 39 45 51 57 63 69 75 81 87 93 99) 944 | 945 | 946 | 947 | 948 | ;; (util/avg-timing 10 (class (make-integer-sorted-set 100000))) 949 | ;; 131.3212 950 | 951 | 952 | ;; (util/avg-timing 10 (class (make-integer-tree 100000))) 953 | ;; 148.5932 954 | 955 | 956 | -------------------------------------------------------------------------------- /src/wbtree/types.clj: -------------------------------------------------------------------------------- 1 | (ns wbtree.types 2 | (:use [clj-tuple]) 3 | (:require [wbtree.tree :as tree]) 4 | (:require [wbtree.util :as util])) 5 | 6 | 7 | 8 | 9 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 10 | ;; Common Interface 11 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 12 | 13 | (definterface IOrderedCollection 14 | (getRoot [])) 15 | 16 | 17 | 18 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 19 | ;; Ordered Set Collection 20 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 21 | 22 | 23 | (deftype OrderedSet [root _meta] 24 | 25 | IOrderedCollection 26 | (getRoot [_] 27 | root) 28 | 29 | clojure.lang.IMeta 30 | (meta [_] 31 | _meta) 32 | 33 | clojure.lang.IObj 34 | (withMeta [_ m] 35 | (new OrderedSet root m)) 36 | 37 | clojure.lang.Indexed 38 | (nth [_ i] 39 | (tree/-k 40 | (tree/node-nth root i))) 41 | 42 | clojure.lang.Seqable 43 | (seq [_] 44 | (map tree/-k (tree/node-seq root))) 45 | 46 | clojure.lang.Reversible 47 | (rseq [_] 48 | (map tree/-k (tree/node-seq-reverse root))) 49 | 50 | clojure.lang.ILookup 51 | (valAt [_ k not-found] 52 | (if-let [found (tree/node-find root k)] 53 | (tree/-k found) 54 | not-found)) 55 | (valAt [this k] 56 | (.valAt this k nil)) 57 | 58 | clojure.lang.IFn 59 | (invoke [this k not-found] 60 | (.valAt this k not-found)) 61 | (invoke [this k] 62 | (.valAt this k nil)) 63 | 64 | java.lang.Comparable 65 | (compareTo [this o] 66 | (if (identical? this o) 67 | 0 68 | (if (instance? IOrderedCollection o) 69 | (tree/node-set-compare root (.getRoot o)) 70 | (util/exception "unsupported comparison: " this o)))) 71 | 72 | java.util.Collection 73 | (toArray [_] 74 | (object-array (tree/node-key-vec root))) 75 | (add [_ o] 76 | (util/exception UnsupportedOperationException)) 77 | (addAll [_ o] 78 | (util/exception UnsupportedOperationException)) 79 | (remove [_ o] 80 | (util/exception UnsupportedOperationException)) 81 | (removeAll [_ o] 82 | (util/exception UnsupportedOperationException)) 83 | (retainAll [_ o] 84 | (util/exception UnsupportedOperationException)) 85 | 86 | java.util.Set 87 | (size [_] 88 | (tree/node-size root)) 89 | (isEmpty [_] 90 | (tree/null? root)) 91 | (iterator [this] 92 | (clojure.lang.SeqIterator. (seq this))) 93 | (containsAll [this s] 94 | (every? #(.contains this %) s)) 95 | 96 | 97 | clojure.lang.IPersistentSet 98 | (equiv [this o] 99 | (if (identical? this o) 100 | 0 101 | (if (instance? IOrderedCollection o) 102 | (zero? (tree/node-set-compare root (.getRoot o))) 103 | (if (set? o) 104 | (zero? (.equiv (set (tree/node-key-vec root)) o)) 105 | (util/exception "unsupported comparison: " this o))))) 106 | (count [_] 107 | (tree/node-size root)) 108 | (empty [_] 109 | (new OrderedSet (tree/null) {})) 110 | (contains [_ k] 111 | (if (tree/node-find root k) 112 | true 113 | false)) 114 | (disjoin [this k] 115 | (new OrderedSet (tree/node-remove root k) _meta)) 116 | (cons [this k] 117 | (new OrderedSet (tree/node-add root k) _meta)) 118 | ) 119 | 120 | 121 | (defmethod print-method OrderedSet [s w] 122 | ((get (methods print-method) clojure.lang.IPersistentSet) s w)) 123 | 124 | 125 | (defn ordered-set 126 | ([] 127 | (ordered-set [])) 128 | ([coll] 129 | (->OrderedSet (reduce tree/node-add (tree/null) coll) {}))) 130 | 131 | 132 | 133 | 134 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 135 | ;; Ordered Map Collection 136 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 137 | 138 | (defn pair [k v] 139 | (new clojure.lang.MapEntry k v)) 140 | 141 | 142 | (deftype OrderedMap [root _meta] 143 | 144 | IOrderedCollection 145 | (getRoot [_] 146 | root) 147 | 148 | clojure.lang.IMeta 149 | (meta [_] 150 | _meta) 151 | 152 | clojure.lang.IObj 153 | (withMeta [_ m] 154 | (new OrderedMap root m)) 155 | 156 | clojure.lang.Indexed 157 | (nth [_ i] 158 | (tree/-kv 159 | (tree/node-nth root i))) 160 | 161 | clojure.lang.MapEquivalence 162 | 163 | clojure.lang.Counted 164 | (count [this] 165 | (tree/node-size root)) 166 | 167 | clojure.lang.Seqable 168 | (seq [_] 169 | (map tree/-kv (tree/node-seq root))) 170 | 171 | clojure.lang.Reversible 172 | (rseq [_] 173 | (map tree/-kv (tree/node-seq-reverse root))) 174 | 175 | clojure.lang.ILookup 176 | (valAt [_ k not-found] 177 | (if-let [found (tree/node-find root k)] 178 | (tree/-v found) 179 | not-found)) 180 | (valAt [this k] 181 | (.valAt this k nil)) 182 | 183 | clojure.lang.IFn 184 | (invoke [this k not-found] 185 | (.valAt this k not-found)) 186 | (invoke [this k] 187 | (.valAt this k nil)) 188 | 189 | java.lang.Comparable 190 | (compareTo [this o] 191 | (if (identical? this o) 192 | 0 193 | (if (instance? IOrderedCollection o) 194 | (tree/node-set-compare root (.getRoot o)) 195 | (util/exception "unsupported comparison: " this o)))) 196 | 197 | clojure.lang.Associative 198 | (containsKey [_ k] 199 | (not (nil? (tree/node-find root k)))) 200 | (entryAt [_ k] 201 | (when-let [x (tree/node-find root k)] 202 | (tree/-kv x))) 203 | (assoc [_ k v] 204 | (new OrderedMap (tree/node-add root k v) _meta)) 205 | (empty [this] 206 | (new OrderedMap (tree/null) {})) 207 | 208 | java.util.Map 209 | (get [this k] 210 | (.valAt this k)) 211 | (isEmpty [_] 212 | (tree/null? root)) 213 | (size [_] 214 | (tree/node-size root)) 215 | (keySet [_] 216 | (set (tree/node-key-vec root))) 217 | (put [_ _ _] 218 | (throw (UnsupportedOperationException.))) 219 | (putAll [_ _] 220 | (throw (UnsupportedOperationException.))) 221 | (clear [_] 222 | (throw (UnsupportedOperationException.))) 223 | (values [_] 224 | (map tree/-v (tree/node-seq root))) 225 | (entrySet [this] 226 | (set (seq this))) 227 | (iterator [this] 228 | (clojure.lang.SeqIterator. (seq this))) 229 | 230 | clojure.lang.IPersistentCollection 231 | (equiv [this x] 232 | (and (map? x) (= x (into {} this)))) 233 | 234 | (cons [this o] 235 | (if (map? o) 236 | (reduce #(apply assoc %1 %2) this o) 237 | (.assoc this (nth o 0) (nth o 1)))) 238 | 239 | clojure.lang.IPersistentMap 240 | (assocEx [this k v] 241 | (if (contains? this k) 242 | (throw (Exception. "Key or value already present")) 243 | (assoc this k v))) 244 | (without [_ k] 245 | (new OrderedMap (tree/node-remove root k) _meta))) 246 | 247 | 248 | 249 | 250 | (defmethod print-method OrderedMap [m w] 251 | ((get (methods print-method) clojure.lang.IPersistentMap) m w)) 252 | 253 | 254 | (defn ordered-map 255 | ([] 256 | (ordered-map [])) 257 | ([coll] 258 | (->OrderedMap (reduce (fn [acc [k v]] 259 | (tree/node-add acc k v)) 260 | (tree/null) (seq coll)) 261 | {}))) 262 | 263 | 264 | 265 | 266 | ;; (ordered-map) 267 | ;; => {} 268 | 269 | ;; (seq (ordered-map [[:b "b"] [:c "c"] [:a "a"] [:d "d"]])) 270 | ;; => ([:a "a"] [:b "b"] [:c "c"] [:d "d"]) 271 | 272 | ;; (ordered-map {:a "a", :b "b", :c "c", :d "d"}) 273 | ;; => {:a "a", :b "b", :c "c", :d "d"} 274 | 275 | ;; (-> (ordered-map) (assoc :b "b") (assoc :a "a") (assoc :c "c")) 276 | ;; => {:a "a", :b "b", :c "c"} 277 | 278 | ;; ((ordered-map {:a "a", :b "b", :c "c", :d "d"}) :c) 279 | ;; => "c" 280 | 281 | ;; ((ordered-map {:a "a", :b "b", :c "c", :d "d"}) :z ::not-found) 282 | ;; => :wbtree.types/not-found 283 | -------------------------------------------------------------------------------- /src/wbtree/util.clj: -------------------------------------------------------------------------------- 1 | (ns wbtree.util 2 | (:require [clojure.pprint :as pp]) 3 | (:require [clojure.repl])) 4 | 5 | 6 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 7 | ;; Control Flow 8 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 9 | 10 | (defmacro returning 11 | "Compute a return value, then execute other forms for side effects. 12 | Like prog1 in common lisp, or a (do) that returns the first form." 13 | [value & forms] 14 | `(let [value# ~value] 15 | ~@forms 16 | value#)) 17 | 18 | (defmacro returning-bind 19 | "Compute a return value, bind that value to provided sym, then 20 | execute other forms for side effects within the lexical scope of 21 | that binding. The return value of a returning-bind block will be 22 | the value computed by retn-form. Similar in concept to Paul 23 | Graham's APROG1, or what is commonly found in CL libraries as 24 | PROG1-BIND. This macro is especially handy when one needs to 25 | interact with stateful resources such as io. 26 | 27 | Example: 28 | 29 | (returning-bind [x (inc 41)] 30 | (println :returning x) 31 | (println 3.141592654)) 32 | 33 | PRINTS: :returning 42 34 | 3.141592654 35 | RETURNS: 42" 36 | [[sym retn-form] & body] 37 | `(let [val# ~retn-form 38 | ~sym val#] 39 | ~@body 40 | val#)) 41 | 42 | 43 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 44 | ;; Collections 45 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 46 | 47 | (defn indexed 48 | "Returns a lazy sequence of [index, item] pairs, where items come 49 | from 's' and indexes count up from zero. 50 | (indexed '(a b c d)) => ([0 a] [1 b] [2 c] [3 d])" 51 | [s] 52 | (map vector (range) s)) 53 | 54 | (defn positions 55 | "Returns a lazy sequence containing the positions at which pred 56 | is true for items in coll." 57 | [pred coll] 58 | (for [[idx elt] (indexed coll) :when (pred elt)] idx)) 59 | 60 | (defn split-vec 61 | "Split the given vector at the provided offsets using subvec. Supports 62 | negative offsets." 63 | [v & ns] 64 | (let [ns (map #(if (neg? %) (+ % (count v)) %) ns)] 65 | (lazy-seq 66 | (if-let [n (first ns)] 67 | (cons (subvec v 0 n) 68 | (apply split-vec 69 | (subvec v n) 70 | (map #(- % n) (rest ns)))) 71 | (list v))))) 72 | 73 | (defn knit 74 | "Takes a list of functions (f1 f2 ... fn) and returns a new function F. 75 | F takes a collection of size n (x1 x2 ... xn) and returns a vector 76 | [(f1 x1) (f2 x2) ... (fn xn)]." 77 | [& fs] 78 | (fn [arg-coll] split-vec (vec (map #(% %2) fs arg-coll)))) 79 | 80 | (defn rmerge 81 | "Recursive merge of the provided maps." 82 | [& maps] 83 | (if (every? map? maps) 84 | (apply merge-with rmerge maps) 85 | (last maps))) 86 | 87 | (defn mappend 88 | "maps elements in list and finally appends all resulted lists." 89 | [f & seqs] 90 | (apply concat (apply map f seqs))) 91 | 92 | 93 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 94 | ;; Namespace Introspection 95 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 96 | 97 | (defn ns-docs 98 | "Prints docs for all public symbols in given namespace 99 | http://blog.twonegatives.com/post/42435179639/ns-docs 100 | https://gist.github.com/timvisher/4728530" 101 | [ns-symbol] 102 | (dorun 103 | (map (comp #'clojure.repl/print-doc meta) 104 | (->> ns-symbol 105 | ns-publics 106 | sort 107 | vals)))) 108 | 109 | (defn symbolic-name-from-var 110 | [var] 111 | (clojure.string/join "/" ((juxt (comp str :ns) :name) (meta var)))) 112 | 113 | 114 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 115 | ;; Timing and Performance Metric 116 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 117 | 118 | 119 | (defmacro with-timing [& body] 120 | `(let [start# (System/nanoTime) ret# ~(cons 'do body)] 121 | [ret# (/ (double (- (System/nanoTime) start#)) 1000000.0)])) 122 | 123 | (defmacro avg-timing [iter & body] 124 | `(let [iter# ~iter] 125 | (/ (reduce + 126 | (map second 127 | (repeatedly iter# 128 | #(let [start# (System/nanoTime) ret# ~(cons 'do body)] 129 | [ret# (/ (double (- (System/nanoTime) start#)) 130 | 1000000.0)])))) 131 | iter#))) 132 | 133 | (defmacro run-and-measure-timing [expr] 134 | `(let [start-time# (System/currentTimeMillis) 135 | response# ~expr 136 | end-time# (System/currentTimeMillis)] 137 | {:time-taken (- end-time# start-time#) 138 | :response response# 139 | :start-time start-time# 140 | :end-time end-time#})) 141 | 142 | 143 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 144 | ;; Debugging 145 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 146 | 147 | (defmacro wrap-fn [name args & body] 148 | `(let [old-fn# (var-get (var ~name)) 149 | new-fn# (fn [& p#] 150 | (let [~args p#] 151 | (do ~@body))) 152 | wrapper# (fn [& params#] 153 | (if (= ~(count args) (count params#)) 154 | (apply new-fn# params#) 155 | (apply old-fn# params#)))] 156 | (alter-var-root (var ~name) (constantly wrapper#)))) 157 | 158 | 159 | (defmacro ppmx [form] 160 | `(do 161 | (pp/cl-format *out* ";;; Macroexpansion:~%~% ~S~%~%;;; First Step~%~%" 162 | '~form) 163 | (pp/pprint (macroexpand-1 '~form)) 164 | (pp/cl-format *out* "~%;;; Full expansion:~%~%") 165 | (pp/pprint (macroexpand '~form)) 166 | (println ""))) 167 | 168 | 169 | ;;; 170 | ;;; From: The Joy of Clojure 171 | ;;; 172 | 173 | (defn contextual-eval [ctx expr] 174 | (eval 175 | `(let [~@(mapcat (fn [[k v]] [k `'~v]) ctx)] 176 | ~expr))) 177 | 178 | (defmacro local-context [] 179 | (let [symbols (keys &env)] 180 | (zipmap (map (fn [sym] `(quote ~sym)) symbols) symbols))) 181 | 182 | (defn readr [prompt exit-code] 183 | (let [input (clojure.main/repl-read prompt exit-code)] 184 | (if (= input ::r) 185 | exit-code 186 | input))) 187 | 188 | (defmacro break [] 189 | `(clojure.main/repl 190 | :prompt #(print "debug=> ") 191 | :read readr 192 | :eval (partial contextual-eval (local-context)))) 193 | 194 | 195 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 196 | ;; IO 197 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 198 | 199 | (defn lines-of-file [file-name] 200 | (line-seq 201 | (java.io.BufferedReader. 202 | (java.io.InputStreamReader. 203 | (java.io.FileInputStream. file-name))))) 204 | 205 | 206 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 207 | ;; Exceptions 208 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 209 | 210 | 211 | (defmacro exception [& [param & more :as params]] 212 | (if (class? param) 213 | `(throw (new ~param (str ~@(interpose " " more)))) 214 | `(throw (Exception. (str ~@(interpose " " params)))))) 215 | 216 | 217 | (defmacro ignore-exceptions [& body] 218 | `(try 219 | ~@body 220 | (catch Exception e# nil))) 221 | 222 | 223 | 224 | 225 | 226 | ;; (defn to-byte-array [x] 227 | ;; (let [baos (ByteArrayOutputStream.) 228 | ;; oos (ObjectOutputStream. baos)] 229 | ;; (pr oos x) 230 | ;; (.close oos) 231 | ;; (.toByteArray baos))) 232 | -------------------------------------------------------------------------------- /test/wbtree/clojure_set.txt: -------------------------------------------------------------------------------- 1 | ; Copyright (c) Rich Hickey. All rights reserved. 2 | ; The use and distribution terms for this software are covered by the 3 | ; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) 4 | ; which can be found in the file epl-v10.html at the root of this distribution. 5 | ; By using this software in any fashion, you are agreeing to be bound by 6 | ; the terms of this license. 7 | ; You must not remove this notice, or any other, from this software. 8 | 9 | ;; Author: Frantisek Sodomka 10 | 11 | 12 | (ns clojure.test-clojure.clojure-set 13 | (:use clojure.test) 14 | (:require [clojure.set :as set])) 15 | 16 | (deftest test-union 17 | (are [x y] (= x y) 18 | (set/union) #{} 19 | 20 | ; identity 21 | (set/union #{}) #{} 22 | (set/union #{1}) #{1} 23 | (set/union #{1 2 3}) #{1 2 3} 24 | 25 | ; 2 sets, at least one is empty 26 | (set/union #{} #{}) #{} 27 | (set/union #{} #{1}) #{1} 28 | (set/union #{} #{1 2 3}) #{1 2 3} 29 | (set/union #{1} #{}) #{1} 30 | (set/union #{1 2 3} #{}) #{1 2 3} 31 | 32 | ; 2 sets 33 | (set/union #{1} #{2}) #{1 2} 34 | (set/union #{1} #{1 2}) #{1 2} 35 | (set/union #{2} #{1 2}) #{1 2} 36 | (set/union #{1 2} #{3}) #{1 2 3} 37 | (set/union #{1 2} #{2 3}) #{1 2 3} 38 | 39 | ; 3 sets, some are empty 40 | (set/union #{} #{} #{}) #{} 41 | (set/union #{1} #{} #{}) #{1} 42 | (set/union #{} #{1} #{}) #{1} 43 | (set/union #{} #{} #{1}) #{1} 44 | (set/union #{1 2} #{2 3} #{}) #{1 2 3} 45 | 46 | ; 3 sets 47 | (set/union #{1 2} #{3 4} #{5 6}) #{1 2 3 4 5 6} 48 | (set/union #{1 2} #{2 3} #{1 3 4}) #{1 2 3 4} 49 | 50 | ; different data types 51 | (set/union #{1 2} #{:a :b} #{nil} #{false true} #{\c "abc"} #{[] [1 2]} 52 | #{{} {:a 1}} #{#{} #{1 2}}) 53 | #{1 2 :a :b nil false true \c "abc" [] [1 2] {} {:a 1} #{} #{1 2}} 54 | 55 | ; different types of sets 56 | (set/union (hash-set) (hash-set 1 2) (hash-set 2 3)) 57 | (hash-set 1 2 3) 58 | (set/union (sorted-set) (sorted-set 1 2) (sorted-set 2 3)) 59 | (sorted-set 1 2 3) 60 | (set/union (hash-set) (hash-set 1 2) (hash-set 2 3) 61 | (sorted-set) (sorted-set 4 5) (sorted-set 5 6)) 62 | (hash-set 1 2 3 4 5 6) ; also equals (sorted-set 1 2 3 4 5 6) 63 | )) 64 | 65 | (deftest test-intersection 66 | ; at least one argument is needed 67 | (is (thrown? IllegalArgumentException (set/intersection))) 68 | 69 | (are [x y] (= x y) 70 | ; identity 71 | (set/intersection #{}) #{} 72 | (set/intersection #{1}) #{1} 73 | (set/intersection #{1 2 3}) #{1 2 3} 74 | 75 | ; 2 sets, at least one is empty 76 | (set/intersection #{} #{}) #{} 77 | (set/intersection #{} #{1}) #{} 78 | (set/intersection #{} #{1 2 3}) #{} 79 | (set/intersection #{1} #{}) #{} 80 | (set/intersection #{1 2 3} #{}) #{} 81 | 82 | ; 2 sets 83 | (set/intersection #{1 2} #{1 2}) #{1 2} 84 | (set/intersection #{1 2} #{3 4}) #{} 85 | (set/intersection #{1 2} #{1}) #{1} 86 | (set/intersection #{1 2} #{2}) #{2} 87 | (set/intersection #{1 2 4} #{2 3 4 5}) #{2 4} 88 | 89 | ; 3 sets, some are empty 90 | (set/intersection #{} #{} #{}) #{} 91 | (set/intersection #{1} #{} #{}) #{} 92 | (set/intersection #{1} #{1} #{}) #{} 93 | (set/intersection #{1} #{} #{1}) #{} 94 | (set/intersection #{1 2} #{2 3} #{}) #{} 95 | 96 | ; 3 sets 97 | (set/intersection #{1 2} #{2 3} #{5 2}) #{2} 98 | (set/intersection #{1 2 3} #{1 3 4} #{1 3}) #{1 3} 99 | (set/intersection #{1 2 3} #{3 4 5} #{8 2 3}) #{3} 100 | 101 | ; different types of sets 102 | (set/intersection (hash-set 1 2) (hash-set 2 3)) #{2} 103 | (set/intersection (sorted-set 1 2) (sorted-set 2 3)) #{2} 104 | (set/intersection 105 | (hash-set 1 2) (hash-set 2 3) 106 | (sorted-set 1 2) (sorted-set 2 3)) #{2} )) 107 | 108 | (deftest test-difference 109 | (are [x y] (= x y) 110 | ; identity 111 | (set/difference #{}) #{} 112 | (set/difference #{1}) #{1} 113 | (set/difference #{1 2 3}) #{1 2 3} 114 | 115 | ; 2 sets 116 | (set/difference #{1 2} #{1 2}) #{} 117 | (set/difference #{1 2} #{3 4}) #{1 2} 118 | (set/difference #{1 2} #{1}) #{2} 119 | (set/difference #{1 2} #{2}) #{1} 120 | (set/difference #{1 2 4} #{2 3 4 5}) #{1} 121 | 122 | ; 3 sets 123 | (set/difference #{1 2} #{2 3} #{5 2}) #{1} 124 | (set/difference #{1 2 3} #{1 3 4} #{1 3}) #{2} 125 | (set/difference #{1 2 3} #{3 4 5} #{8 2 3}) #{1} )) 126 | 127 | (deftest test-select 128 | (are [x y] (= x y) 129 | (set/select integer? #{}) #{} 130 | (set/select integer? #{1 2}) #{1 2} 131 | (set/select integer? #{1 2 :a :b :c}) #{1 2} 132 | (set/select integer? #{:a :b :c}) #{}) ) 133 | 134 | (def compositions 135 | #{{:name "Art of the Fugue" :composer "J. S. Bach"} 136 | {:name "Musical Offering" :composer "J. S. Bach"} 137 | {:name "Requiem" :composer "Giuseppe Verdi"} 138 | {:name "Requiem" :composer "W. A. Mozart"}}) 139 | 140 | (deftest test-project 141 | (are [x y] (= x y) 142 | (set/project compositions [:name]) #{{:name "Art of the Fugue"} 143 | {:name "Requiem"} 144 | {:name "Musical Offering"}} 145 | (set/project compositions [:composer]) #{{:composer "W. A. Mozart"} 146 | {:composer "Giuseppe Verdi"} 147 | {:composer "J. S. Bach"}} 148 | (set/project compositions [:year]) #{{}} 149 | (set/project #{{}} [:name]) #{{}} )) 150 | 151 | (deftest test-rename 152 | (are [x y] (= x y) 153 | (set/rename compositions {:name :title}) #{{:title "Art of the Fugue" :composer "J. S. Bach"} 154 | {:title "Musical Offering" :composer "J. S. Bach"} 155 | {:title "Requiem" :composer "Giuseppe Verdi"} 156 | {:title "Requiem" :composer "W. A. Mozart"}} 157 | (set/rename compositions {:year :decade}) #{{:name "Art of the Fugue" :composer "J. S. Bach"} 158 | {:name "Musical Offering" :composer "J. S. Bach"} 159 | {:name "Requiem" :composer "Giuseppe Verdi"} 160 | {:name "Requiem" :composer "W. A. Mozart"}} 161 | (set/rename #{{}} {:year :decade}) #{{}})) 162 | 163 | (deftest test-rename-keys 164 | (are [x y] (= x y) 165 | (set/rename-keys {:a "one" :b "two"} {:a :z}) {:z "one" :b "two"} 166 | (set/rename-keys {:a "one" :b "two"} {:a :z :c :y}) {:z "one" :b "two"} 167 | (set/rename-keys {:a "one" :b "two" :c "three"} {:a :b :b :a}) {:a "two" :b "one" :c "three"})) 168 | 169 | (deftest test-index 170 | (are [x y] (= x y) 171 | (set/index #{{:c 2} {:b 1} {:a 1 :b 2}} [:b]) {{:b 2} #{{:a 1 :b 2}}, {:b 1} #{{:b 1}} {} #{{:c 2}}} 172 | )) 173 | 174 | (deftest test-join 175 | (are [x y] (= x y) 176 | (set/join compositions compositions) compositions 177 | (set/join compositions #{{:name "Art of the Fugue" :genre "Classical"}}) 178 | #{{:name "Art of the Fugue" :composer "J. S. Bach" :genre "Classical"}} 179 | )) 180 | 181 | (deftest test-map-invert 182 | (are [x y] (= x y) 183 | (set/map-invert {:a "one" :b "two"}) {"one" :a "two" :b})) 184 | 185 | (deftest test-subset? 186 | (are [sub super] (set/subset? sub super) 187 | #{} #{} 188 | #{} #{1} 189 | #{1} #{1} 190 | #{1 2} #{1 2} 191 | #{1 2} #{1 2 42} 192 | #{false} #{false} 193 | #{nil} #{nil} 194 | #{nil} #{nil false} 195 | #{1 2 nil} #{1 2 nil 4}) 196 | (are [notsub super] (not (set/subset? notsub super)) 197 | #{1} #{} 198 | #{2} #{1} 199 | #{1 3} #{1} 200 | #{nil} #{false} 201 | #{false} #{nil} 202 | #{false nil} #{nil} 203 | #{1 2 nil} #{1 2})) 204 | 205 | (deftest test-superset? 206 | (are [super sub] (set/superset? super sub) 207 | #{} #{} 208 | #{1} #{} 209 | #{1} #{1} 210 | #{1 2} #{1 2} 211 | #{1 2 42} #{1 2} 212 | #{false} #{false} 213 | #{nil} #{nil} 214 | #{false nil} #{false} 215 | #{1 2 4 nil false} #{1 2 nil}) 216 | (are [notsuper sub] (not (set/superset? notsuper sub)) 217 | #{} #{1} 218 | #{2} #{1} 219 | #{1} #{1 3} 220 | #{nil} #{false} 221 | #{false} #{nil} 222 | #{nil} #{false nil} 223 | #{nil 2 3} #{false nil 2 3})) 224 | 225 | -------------------------------------------------------------------------------- /test/wbtree/tree_test.clj: -------------------------------------------------------------------------------- 1 | (ns wbtree.tree-test 2 | (:require [clojure.test :refer :all] 3 | [wbtree.tree :refer :all])) 4 | 5 | (deftest a-test 6 | (testing "FIXME." 7 | (is (= 0 0)))) 8 | --------------------------------------------------------------------------------