├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── doc
    ├── LL3-collections-enumerators.md
    ├── LL3-collections-enumerators.txt
    ├── LL3-collections-talk.pdf
    ├── adams.pdf
    ├── hirai-yamamoto.pdf
    ├── making-data-structures-persistent.pdf
    └── scheme-wttree.txt
├── project.clj
├── resources
    └── rdfs.edn
├── src
    └── wbtree
    │   ├── tree.clj
    │   ├── types.clj
    │   └── util.clj
└── test
    └── wbtree
        ├── clojure_set.txt
        └── tree_test.clj


/.gitignore:
--------------------------------------------------------------------------------
 1 | pom.xml
 2 | pom.xml.asc
 3 | *jar
 4 | /lib/
 5 | /classes/
 6 | /target/
 7 | /checkouts/
 8 | .lein-deps-sum
 9 | .lein-repl-history
10 | .lein-plugins/
11 | .lein-failures
12 | *~
13 | .nrepl-port


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: clojure
2 | script: lein test
3 | jdk:
4 |   - oraclejdk8
5 |   - openjdk7
6 | 
7 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC
  2 | LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM
  3 | CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT.
  4 | 
  5 | 1. DEFINITIONS
  6 | 
  7 | "Contribution" means:
  8 | 
  9 | a) in the case of the initial Contributor, the initial code and
 10 | documentation distributed under this Agreement, and
 11 | 
 12 | b) in the case of each subsequent Contributor:
 13 | 
 14 | i) changes to the Program, and
 15 | 
 16 | ii) additions to the Program;
 17 | 
 18 | where such changes and/or additions to the Program originate from and are
 19 | distributed by that particular Contributor. A Contribution 'originates' from
 20 | a Contributor if it was added to the Program by such Contributor itself or
 21 | anyone acting on such Contributor's behalf. Contributions do not include
 22 | additions to the Program which: (i) are separate modules of software
 23 | distributed in conjunction with the Program under their own license
 24 | agreement, and (ii) are not derivative works of the Program.
 25 | 
 26 | "Contributor" means any person or entity that distributes the Program.
 27 | 
 28 | "Licensed Patents" mean patent claims licensable by a Contributor which are
 29 | necessarily infringed by the use or sale of its Contribution alone or when
 30 | combined with the Program.
 31 | 
 32 | "Program" means the Contributions distributed in accordance with this
 33 | Agreement.
 34 | 
 35 | "Recipient" means anyone who receives the Program under this Agreement,
 36 | including all Contributors.
 37 | 
 38 | 2. GRANT OF RIGHTS
 39 | 
 40 | a) Subject to the terms of this Agreement, each Contributor hereby grants
 41 | Recipient a non-exclusive, worldwide, royalty-free copyright license to
 42 | reproduce, prepare derivative works of, publicly display, publicly perform,
 43 | distribute and sublicense the Contribution of such Contributor, if any, and
 44 | such derivative works, in source code and object code form.
 45 | 
 46 | b) Subject to the terms of this Agreement, each Contributor hereby grants
 47 | Recipient a non-exclusive, worldwide, royalty-free patent license under
 48 | Licensed Patents to make, use, sell, offer to sell, import and otherwise
 49 | transfer the Contribution of such Contributor, if any, in source code and
 50 | object code form.  This patent license shall apply to the combination of the
 51 | Contribution and the Program if, at the time the Contribution is added by the
 52 | Contributor, such addition of the Contribution causes such combination to be
 53 | covered by the Licensed Patents. The patent license shall not apply to any
 54 | other combinations which include the Contribution. No hardware per se is
 55 | licensed hereunder.
 56 | 
 57 | c) Recipient understands that although each Contributor grants the licenses
 58 | to its Contributions set forth herein, no assurances are provided by any
 59 | Contributor that the Program does not infringe the patent or other
 60 | intellectual property rights of any other entity. Each Contributor disclaims
 61 | any liability to Recipient for claims brought by any other entity based on
 62 | infringement of intellectual property rights or otherwise. As a condition to
 63 | exercising the rights and licenses granted hereunder, each Recipient hereby
 64 | assumes sole responsibility to secure any other intellectual property rights
 65 | needed, if any. For example, if a third party patent license is required to
 66 | allow Recipient to distribute the Program, it is Recipient's responsibility
 67 | to acquire that license before distributing the Program.
 68 | 
 69 | d) Each Contributor represents that to its knowledge it has sufficient
 70 | copyright rights in its Contribution, if any, to grant the copyright license
 71 | set forth in this Agreement.
 72 | 
 73 | 3. REQUIREMENTS
 74 | 
 75 | A Contributor may choose to distribute the Program in object code form under
 76 | its own license agreement, provided that:
 77 | 
 78 | a) it complies with the terms and conditions of this Agreement; and
 79 | 
 80 | b) its license agreement:
 81 | 
 82 | i) effectively disclaims on behalf of all Contributors all warranties and
 83 | conditions, express and implied, including warranties or conditions of title
 84 | and non-infringement, and implied warranties or conditions of merchantability
 85 | and fitness for a particular purpose;
 86 | 
 87 | ii) effectively excludes on behalf of all Contributors all liability for
 88 | damages, including direct, indirect, special, incidental and consequential
 89 | damages, such as lost profits;
 90 | 
 91 | iii) states that any provisions which differ from this Agreement are offered
 92 | by that Contributor alone and not by any other party; and
 93 | 
 94 | iv) states that source code for the Program is available from such
 95 | Contributor, and informs licensees how to obtain it in a reasonable manner on
 96 | or through a medium customarily used for software exchange.
 97 | 
 98 | When the Program is made available in source code form:
 99 | 
100 | a) it must be made available under this Agreement; and
101 | 
102 | b) a copy of this Agreement must be included with each copy of the Program.
103 | 
104 | Contributors may not remove or alter any copyright notices contained within
105 | the Program.
106 | 
107 | Each Contributor must identify itself as the originator of its Contribution,
108 | if any, in a manner that reasonably allows subsequent Recipients to identify
109 | the originator of the Contribution.
110 | 
111 | 4. COMMERCIAL DISTRIBUTION
112 | 
113 | Commercial distributors of software may accept certain responsibilities with
114 | respect to end users, business partners and the like. While this license is
115 | intended to facilitate the commercial use of the Program, the Contributor who
116 | includes the Program in a commercial product offering should do so in a
117 | manner which does not create potential liability for other Contributors.
118 | Therefore, if a Contributor includes the Program in a commercial product
119 | offering, such Contributor ("Commercial Contributor") hereby agrees to defend
120 | and indemnify every other Contributor ("Indemnified Contributor") against any
121 | losses, damages and costs (collectively "Losses") arising from claims,
122 | lawsuits and other legal actions brought by a third party against the
123 | Indemnified Contributor to the extent caused by the acts or omissions of such
124 | Commercial Contributor in connection with its distribution of the Program in
125 | a commercial product offering.  The obligations in this section do not apply
126 | to any claims or Losses relating to any actual or alleged intellectual
127 | property infringement. In order to qualify, an Indemnified Contributor must:
128 | a) promptly notify the Commercial Contributor in writing of such claim, and
129 | b) allow the Commercial Contributor tocontrol, and cooperate with the
130 | Commercial Contributor in, the defense and any related settlement
131 | negotiations. The Indemnified Contributor may participate in any such claim
132 | at its own expense.
133 | 
134 | For example, a Contributor might include the Program in a commercial product
135 | offering, Product X. That Contributor is then a Commercial Contributor. If
136 | that Commercial Contributor then makes performance claims, or offers
137 | warranties related to Product X, those performance claims and warranties are
138 | such Commercial Contributor's responsibility alone. Under this section, the
139 | Commercial Contributor would have to defend claims against the other
140 | Contributors related to those performance claims and warranties, and if a
141 | court requires any other Contributor to pay any damages as a result, the
142 | Commercial Contributor must pay those damages.
143 | 
144 | 5. NO WARRANTY
145 | 
146 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON
147 | AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER
148 | EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR
149 | CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A
150 | PARTICULAR PURPOSE. Each Recipient is solely responsible for determining the
151 | appropriateness of using and distributing the Program and assumes all risks
152 | associated with its exercise of rights under this Agreement , including but
153 | not limited to the risks and costs of program errors, compliance with
154 | applicable laws, damage to or loss of data, programs or equipment, and
155 | unavailability or interruption of operations.
156 | 
157 | 6. DISCLAIMER OF LIABILITY
158 | 
159 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY
160 | CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL,
161 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION
162 | LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
163 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
164 | ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE
165 | EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY
166 | OF SUCH DAMAGES.
167 | 
168 | 7. GENERAL
169 | 
170 | If any provision of this Agreement is invalid or unenforceable under
171 | applicable law, it shall not affect the validity or enforceability of the
172 | remainder of the terms of this Agreement, and without further action by the
173 | parties hereto, such provision shall be reformed to the minimum extent
174 | necessary to make such provision valid and enforceable.
175 | 
176 | If Recipient institutes patent litigation against any entity (including a
177 | cross-claim or counterclaim in a lawsuit) alleging that the Program itself
178 | (excluding combinations of the Program with other software or hardware)
179 | infringes such Recipient's patent(s), then such Recipient's rights granted
180 | under Section 2(b) shall terminate as of the date such litigation is filed.
181 | 
182 | All Recipient's rights under this Agreement shall terminate if it fails to
183 | comply with any of the material terms or conditions of this Agreement and
184 | does not cure such failure in a reasonable period of time after becoming
185 | aware of such noncompliance. If all Recipient's rights under this Agreement
186 | terminate, Recipient agrees to cease use and distribution of the Program as
187 | soon as reasonably practicable. However, Recipient's obligations under this
188 | Agreement and any licenses granted by Recipient relating to the Program shall
189 | continue and survive.
190 | 
191 | Everyone is permitted to copy and distribute copies of this Agreement, but in
192 | order to avoid inconsistency the Agreement is copyrighted and may only be
193 | modified in the following manner. The Agreement Steward reserves the right to
194 | publish new versions (including revisions) of this Agreement from time to
195 | time. No one other than the Agreement Steward has the right to modify this
196 | Agreement. The Eclipse Foundation is the initial Agreement Steward. The
197 | Eclipse Foundation may assign the responsibility to serve as the Agreement
198 | Steward to a suitable separate entity. Each new version of the Agreement will
199 | be given a distinguishing version number. The Program (including
200 | Contributions) may always be distributed subject to the version of the
201 | Agreement under which it was received. In addition, after a new version of
202 | the Agreement is published, Contributor may elect to distribute the Program
203 | (including its Contributions) under the new version. Except as expressly
204 | stated in Sections 2(a) and 2(b) above, Recipient receives no rights or
205 | licenses to the intellectual property of any Contributor under this
206 | Agreement, whether expressly, by implication, estoppel or otherwise. All
207 | rights in the Program not expressly granted under this Agreement are
208 | reserved.
209 | 
210 | This Agreement is governed by the laws of the State of Washington and the
211 | intellectual property laws of the United States of America. No party to this
212 | Agreement will bring a legal action under this Agreement more than one year
213 | after the cause of action arose. Each party waives its rights to a jury trial
214 | in any resulting litigation.
215 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # clj-wbtree
 2 | 
 3 | #### _Weight Balanced Functional Binary Tree (Hirai-Yamamoto Tree)_
 4 | ------
 5 | [![Build Status](https://travis-ci.org/danlentz/clj-wbtree.svg)](https://travis-ci.org/danlentz/clj-wbtree)[![Dependency Status](https://www.versioneye.com/user/projects/54d1e1b73ca0840b19000070/badge.svg?style=flat)](https://www.versioneye.com/user/projects/54d1e1b73ca0840b19000070)
 6 | 
 7 | 
 8 | ## Overview
 9 | 
10 | 
11 | This is an implementation of a weight-balanced binary tree data
12 |  structure based on the following references:
13 | 
14 | -  Adams (1992)
15 |     'Implementing Sets Efficiently in a Functional Language'
16 |     Technical Report CSTR 92-10, University of Southampton.
17 | 
18 | -  Hirai and Yamamoto (2011)
19 |     'Balancing Weight-Balanced Trees'
20 |     Journal of Functional Programming / 21 (3):
21 |     Pages 287-307
22 | 
23 | -  Oleg Kiselyov
24 |     'Towards the best collection API, A design of the overall optimal
25 |     collection traversal interface'
26 |     <http://pobox.com/~oleg/ftp/papers/LL3-collections-enumerators.txt>
27 | 
28 | -  Nievergelt and Reingold (1972)
29 |     'Binary Search Trees of Bounded Balance'
30 |     STOC '72 Proceedings
31 |     4th Annual ACM symposium on Theory of Computing
32 |     Pages 137-142 
33 | 
34 | -  Driscoll, Sarnak, Sleator, and Tarjan (1989)
35 |     'Making Data Structures Persistent'
36 |     Journal of Computer and System Sciences Volume 38 Issue 1, February 1989
37 |     18th Annual ACM Symposium on Theory of Computing
38 |     Pages 86-124
39 | 
40 | -  MIT Scheme weight balanced tree as reimplemented by Yoichi Hirai
41 |     and Kazuhiko Yamamoto using the revised non-variant algorithm recommended
42 |     integer balance parameters from (Hirai/Yamomoto 2011).
43 | 
44 | ## Features
45 | 
46 | Some unique features of a weight-balanced binary-tree as compared with
47 | other binary tree algorithms:
48 | 
49 | 
50 | - Less frequent rebalancing as compared to height-balanced
51 |   implementations such as red-black or avl trees.
52 | 
53 | - Logarithmic rank/at-rank indexed element access.
54 | 
55 | 
56 | This particular implementation also provides additional useful
57 | qualities such as lazy traversal, partial enumeration, universal order,
58 | and search for a given key in only d comparisons (where d is depth of
59 | tree) rather than the traditional compare/low compare/high which takes
60 | on average (* 1.5 (- d 1)) comparisons.  In addition, a comprehensive
61 | functional binary tree api provides a rich collection of tools
62 | for the creation of efficient higher-order data structures.
63 | 
64 | 
65 | ## Usage
66 | 
67 | ### Leiningen
68 | 
69 | [![Clojars Project](http://clojars.org/danlentz/clj-wbtree/latest-version.svg)](http://clojars.org/danlentz/clj-wbtree)
70 | 
71 | ## Examples
72 | 
73 | ## Credits
74 | 
75 | Warm appreciation and thanks for the skill and effort of Jason Wolfe and
76 | Zach Tellman whose collaborative help tuning this tree for Clojure were
77 | indespensible.
78 | 
79 | ## License
80 | 
81 | Copyright © 2014 FIXME
82 | 
83 | Distributed under the Eclipse Public License either version 1.0 or (at
84 | your option) any later version.
85 | 


--------------------------------------------------------------------------------
/doc/LL3-collections-enumerators.md:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | # Towards the best collection API
  4 | 
  5 | ## Abstract
  6 | 
  7 | Most programming languages support collections, represented by an
  8 | in-memory data structure, a file, a database, or a generating
  9 | function. A programming language system gives us typically one of the
 10 | two interfaces to systematically access elements of a collection. One
 11 | traversal API is based on enumerators -- e.g., for-each, map, filter
 12 | higher-order procedures -- of which the most general is fold. The
 13 | second approach relies on streams, a.k.a. cursors, lazy
 14 | lists. Generators such as the ones in Icon, Ruby and Python are a
 15 | hybrid approach.
 16 | 
 17 | This article presents a design of the overall optimal collection
 18 | traversal interface, which is based on a left-fold-like combinator
 19 | with premature termination. We argue from the practical standpoint
 20 | that such an interface is superior: in efficiency; in ease of
 21 | programming; in more predictable resource utilization and avoidance of
 22 | resource leaks.
 23 | 
 24 | We also demonstrate a procedure that converts _any_ left-fold-like
 25 | enumerator for any collection into the corresponding
 26 | cursor. Therefore, the stream-based interface, when it is truly needed
 27 | or preferred, can be automatically and generically derived from the
 28 | proposed interface.
 29 | 
 30 | We present two variants of the left-fold enumerator: for languages
 31 | with and without first-class continuations. The proposed traversal
 32 | interface has been implemented in a Scheme database access API used in
 33 | a production environment. The generic-left-fold-based interface
 34 | underlies the Collection SRFI (Scheme Request for Implementation) and
 35 | is being considered for an Oracle database interface in Haskell.
 36 | 
 37 | 
 38 | ## Introduction and terminology
 39 | 
 40 | Access to collections cuts through many programming languages. By a
 41 | collection we mean a hash table; a file as a collection of lines,
 42 | words, or bytes; a file system directory; an XML document; a suffix
 43 | tree; or a "dynamic" collection produced by a database query or any
 44 | other generating function.
 45 | 
 46 | There are two basic approaches to systematically access collection
 47 | elements. One approach relies on enumerators such as 'map', 'for-each'
 48 | and 'filter'. The most general of those is fold, which lets us
 49 | maintain our state during the traversal. The other traversal API is
 50 | based on lazy lists (a.k.a. cursors, or streams).
 51 | 
 52 | A word about terminology is in order, as it is rather confusing.  We
 53 | will call a higher-order for-each-type procedure an enumerator. It is
 54 | this procedure that takes a collection and a handler, and applies the
 55 | handler to each element of the collection in turn. The handler itself
 56 | will be called an iteratee, because it is being applied -- by an
 57 | active entity, the enumeratOR.  An enumerator is presumably privy to
 58 | the internals of a collection; it knows how to fetch elements in some
 59 | (specified) order, and knows when to stop. A handler does not have
 60 | this knowledge, nor is it supposed to. Throughout the article we
 61 | eschew the commonly used term 'iterator': in C++ it means an accessor,
 62 | which does not iterate over a collection. Rather, it is _being_
 63 | iterated. On the other hand, languages like OCaml provide a procedure
 64 | 'iter' that actively traverses a collection, applying a user-defined
 65 | function to elements.
 66 | 
 67 | The next section argues from the practical standpoint against cursors
 68 | and for enumerators. We contend that an enumerator such as left fold
 69 | should be the primary means of traversing and obtaining all values of
 70 | a collection. Enumerators are superior to cursors:
 71 | 
 72 | - in efficiency
 73 | - in ease of programming
 74 | - in more predictable resource utilization and avoidance of resource leaks.
 75 | 
 76 | The latter is of special importance for collections backed by external
 77 | resources such as file handles and database connections.
 78 | 
 79 | Section 3 proposes the overall optimal enumeration interface, which
 80 | adds premature termination and explicit multiple state variables to
 81 | the left fold. Section 4 discusses generators.
 82 | 
 83 | Given a stream, we can always construct the corresponding
 84 | enumerator. It is perhaps less appreciated that given a left fold
 85 | enumerator, we can always construct the corresponding stream: we can
 86 | invert an enumerator inside out. Section 5 describes the inversion
 87 | procedure, which is fully generic and works for any
 88 | collection. Because the enumerator interface subsumes the cursor-based
 89 | one and because the former is more efficient and less error-prone, a
 90 | collection API should offer enumerators as the primary, native
 91 | traversal interface.
 92 | 
 93 | The automatical enumerator inversion procedure in Section 5 relies on
 94 | first-class (delimited) continuations. However, we can write a similar
 95 | enumerator inversion even in a language that lacks first-class
 96 | continuations.  Section 6 demonstrates a non-recursive left-fold-like
 97 | combinator that can be instantiated into the ordinary left fold or into
 98 | a stream. Both instantiation procedures are generic and do not depend
 99 | on the type of the collection.
100 | 
101 | In conclusion, we recap our recommendations for the optimal collection
102 | traversal API and point out to the existing implementations of the
103 | API.
104 | 
105 | ## Why an enumerator is better than a cursor
106 | 
107 | There are many fundamental reasons why enumerators such as fold are
108 | profoundly better than cursors. One such reason is the ease of program
109 | analysis and optimizations [Sheard, Bananas, Hutton]. This
110 | article will not discuss them. Instead, we concentrate on utterly
111 | practical considerations: performance, ease of programming, and
112 | resource utilization.
113 | 
114 | Writing a procedure that traverses, e.g., an AVL tree is far easier
115 | than writing a cursor that remembers the current position and can
116 | advance to the next one. The latter is especially tricky if the tree
117 | in question does not have parent pointers (which waste space and lead
118 | to sharing and resource leakage problems). One look at the C++ STL
119 | should convince one how wretched programming of iterators could be.
120 | 
121 | The "current element" of a cursor is a state, which can be altered by
122 | advancing the cursor. This state introduces an implicit dependency
123 | among all expressions that use the cursor, similar to the dependency
124 | imposed by global mutable variables. Therefore, programming with
125 | cursors is error-prone. In contrast, an enumerator does not expose its
126 | traversal state to an iteratee and does not permit any alteration to
127 | that state by an iteratee. The difference between the leaky and the
128 | perfect encapsulations of the traversal state is well explained in
129 | [ULLMAN] when comparing a cursor-based network database query language
130 | DBTG with SQL. Incidentally, streams, another cursor-based interface,
131 | expose the current element and hence eliminate the implicit state
132 | dependency.
133 | 
134 | Another problem of cursors is telling the user that there are no more
135 | values to access. When a collection is exhausted, cursor operations
136 | typically return an "out-of-band" value such as EOF (cf.  getc() in C)
137 | or raise an exception (cf. iterator.next() in Python). None of these
138 | approaches appear entirely satisfactory. Unlike cursors, an enumerator
139 | does not need to do anything special to tell the iteratee that the
140 | traversal is finished: the enumerator will simply return. There is no
141 | longer a problem with out-of-band values and user's disregarding such
142 | values.
143 | 
144 | Cursors are simply less efficient. A cursor must maintain the state of
145 | the traversal. That state may be invalid. Each operation on a cursor,
146 | therefore, must first verify that the cursor is in the valid
147 | state. The cost of the checks quickly adds up.
148 | 
149 | This issue of the inefficiency of cursors is well known. For example,
150 | it is highly advisable to use "native" operations to move large
151 | sections of an array (ArrayCopy in Java) rather than copy
152 | element-by-element. The cost of an array bound check on each access to
153 | an element is one of the considerations.
154 | 
155 | Enumerators can do more than just eliminate redundant bound
156 | checks. Enumerators, if implemented as staged functions (e.g., C++
157 | templates) can tile or unroll loops and thus partially or completely
158 | remove the traversal iteration overhead [Veldhuizen]. The perfect
159 | encapsulation of the traversal state makes enumerators particularly
160 | adaptable for multi-stage programming.
161 | 
162 | Databases give another example of the efficiency of enumerators: "The
163 | performance of cursors is horrible in almost all systems. One of us
164 | (Shasha) once had an experience of re-writing an eight-hour query
165 | having nested cursors into a cursor-free query that took 15 seconds."
166 | [Shasha]
167 | 
168 | It is often said that the key to the best performance is to 
169 | do more on the server. To find the maximum value of a database table
170 | column, it's far faster to evaluate
171 |        `select MAX(col) from collection`
172 | than to open a cursor on the collection and retrieve all values from
173 | 'col' searching for the maximum. Stored procedure languages (some of
174 | which are quite sophisticated) were introduced to make the server-side
175 | processing easier and powerful.  We should stress that 
176 | 	`select MAX(col) from collection`
177 | is precisely equivalent to
178 | 	`foldl max lower_bound collection`
179 | 
180 | 
181 | Traversing a collection often requires resources, e.g., a stack space,
182 | a connection to a database, a DIR handle, a DOM tree of an XML
183 | document, etc. An enumerator takes care of allocating these resources.
184 | When iteratee explicitly tells the enumerator to stop -- or when the
185 | collection is exhausted -- the enumerator can orderly free the
186 | resources. An enumerator can be programmed as
187 | 
188 | ```scheme
189 | 	   (define (enum proc collection)
190 | 	       (let ((database-connection #f))
191 | 		(dynamic-wind
192 | 		   (lambda () 
193 | 		      (set! database-connection
194 | 			    (open-connection collection)))
195 | 		   (lambda () (iterate proc))
196 | 		   (lambda ()
197 | 		      (set! database-connection
198 | 		            (close-connection database-connection))))))
199 | ```
200 | 
201 | If 'proc' does not capture its continuation [Footnote-1] -- as it is
202 | often the case -- the database connection is opened (taken from the
203 | pool) at the beginning of the iteration and is returned to the pool at
204 | the end.  If we were to provide access to our collection in the form of
205 | a cursor, we would have to place the variable 'database-connection'
206 | into that cursor. We cannot close the database until the cursor is
207 | alive. But how do we know when there are no alive cursors and
208 | therefore it is safe to recycle the database connection? We must rely
209 | either on the programmer's explicitly closing the connection, or on a
210 | finalizer. The sheer number of internet security advisories concerning
211 | memory allocation problems indicates that manual management of
212 | resources is greatly error-prone. The finalizer solution is not
213 | satisfactory either: finalizers are rarely supported and when they
214 | are, they are an unreliable tool to manage precious resources. The
215 | execution of finalizers is unpredictable and is generally beyond
216 | programmer's control.
217 | 
218 | [Footnote-1]
219 | An iteratee may call a continuation captured before the enumerator was
220 | entered. For that reason, an enumerator should employ a dynamic-wind
221 | to detect such an attempt to escape, shed excessive resources and
222 | switch to a "low-power" mode while the iteratee is on the run.
223 | 
224 | 
225 | ## The most general enumeration interface
226 | 
227 | We propose the following general enumeration interface. It is based on
228 | a left-fold enumerator and explicitly supports multiple state
229 | variables (i.e., seeds) and a premature termination of the
230 | iteration. In this Section we describe the interface in pseudo-code,
231 | which can be instantiated in Scheme [SRFI-44], Haskell ([Daume],
232 | Section 6) or other concrete language.
233 | 
234 |     The enumeration procedure
235 |        coll-fold-left COLL PROC SEED SEED ... -> [SEED ... ]
236 | 
237 | traverses the collection denoted by COLL. The procedure takes one or
238 | more state arguments denoted by SEED, and returns just as many. PROC
239 | is an iteratee procedure:
240 | 
241 |    PROC VAL SEED SEED ... -> [INDIC SEED SEED ...]
242 | 
243 | It takes n+1 arguments and returns n+1 values. The first argument is
244 | the current value retrieved from the collection. The other n arguments
245 | are the seeds. The first return value is a boolean indicator, whose
246 | false value causes the premature termination of the iteration.  The
247 | other return values from PROC are the new values of the seeds. The
248 | procedure coll-fold-left enumerates the collection in some order and
249 | invokes PROC passing it the current value of the collection and the
250 | current seeds. The first invocation of PROC receives SEEDs that were
251 | the arguments of coll-fold-left. The further invocations of PROC
252 | receive SEEDs that were produced by the previous invocation of
253 | PROC. When the collection is exhausted or when the PROC procedure
254 | returns the indicator value of false, coll-fold-left terminates the
255 | iterations, disposes of allocated resources, and returns the current
256 | SEEDs. If the collection COLL is empty, coll-fold-left does not invoke
257 | PROC and returns its argument SEEDs.
258 | 
259 | 
260 | 
261 | ## Enumerators and generators
262 | 
263 | A programming language Icon popularized generators as a way to
264 | traverse actual and virtual collections. Generators are also supported
265 | in Ruby and Python. The documentation of Icon defines a generator as
266 | an expression that can produce several values, on demand.
267 | Multiple-valued expressions can also be implemented with shift and
268 | reset [SHIFT]. The paper [SHIFT] gives many examples of their use.
269 | 
270 | Generators occupy an intermediate place between enumerators and
271 | cursors. A generator is just as easy to write as an enumerator. It
272 | traverses a collection, fetches the current element and _yields_ it,
273 | by passing the element to a dedicated procedure or syntax form. When
274 | the latter returns, the traversal continues. On the other hand,
275 | generators are trivially related to streams [ENUM-CC]. The latter
276 | article discusses generators and enumerators in more
277 | detail. Incidentally, the article demonstrates that a generator-based
278 | code in Python can be translated into Scheme almost
279 | verbatim. Generators give the first hint that enumerators and cursors
280 | are related via first-class continuations.
281 | 
282 | Like cursors and streams, generators are demand-driven. A user must
283 | explicitly request a new value to advance the traversal. Therefore,
284 | like cursors generators leak resources: it is not clear when the
285 | iteration should be assumed terminated and the associated resources
286 | can be safely disposed of.
287 | 
288 | ## How to invert an enumerator in a language with first-class continuations
289 | 
290 | Sometimes we indeed need to traverse a collection via a cursor.
291 | Reasons may include moving data from one collection to another, or
292 | interfacing legacy code. If a collection API provides enumerators, we
293 | obtain cursors for free. We pass an enumerator to a generic
294 | translation procedure, which inverts the enumerator "inside out" and
295 | returns a cursor.
296 | 
297 | The following code illustrates the conversion in Scheme, a language
298 | with first-class continuations. The procedure lfold->lazy-list is a
299 | fully generic translation procedure: it takes a left-fold enumerator
300 | for _any_ collection, and converts the enumerator to a stream (lazy
301 | list). The latter is a realization of a cursor.
302 | 
303 | ```scheme
304 | (define (lfold->lazy-list lfold collection)
305 |   (delay
306 |     (call-with-current-continuation
307 |       (lambda (k-main)
308 | 	(lfold collection
309 | 	  (lambda (val seed)
310 | 	    (values
311 | 	      (call-with-current-continuation
312 | 		(lambda (k-reenter)
313 | 		  (k-main
314 | 		    (cons val
315 | 		      (delay
316 | 			(call-with-current-continuation
317 | 			  (lambda (k-new-main)
318 | 			    (set! k-main k-new-main)
319 | 			    (k-reenter #t))))))))
320 | 	      seed))
321 | 	  '()) 				; Initial seed
322 | 	(k-main '())))))
323 | 
324 | ```
325 | 
326 | The present article is an abstract. Code in a functional language is
327 | the best abstract. The discussion is delegated to the talk and the
328 | full paper. The article [ENUM-CC] discusses the inversion procedure in
329 | more detail and points out to the complete code and the test cases.
330 | 
331 | 
332 | ## How to invert an enumerator in a language without first-class continuations
333 | 
334 | If a programming language lacks first-class continuations, the
335 | conversion from an enumerator to a cursor is still possible. However,
336 | we need to generalize the enumerator interface and make it
337 | non-recursive. For concreteness, this section demonstrates our
338 | approach for one particular language without first-class
339 | continuations: Haskell. Applications to other languages are
340 | straightforward.
341 | 
342 | In Haskell, the general left-fold enumerator has the following
343 | interface [HINV]:
344 | 
345 | > type CFoldLeft coll val m seed = coll -> CollEnumerator val m seed
346 | > type CollEnumerator val m seed =
347 | >        Iteratee val seed
348 | >        -> seed      -- the initial seed
349 | >        -> m seed
350 | > type Iteratee val seed = seed -> val -> Either seed seed
351 | 
352 | where 'coll' is the type of a collection with elements of the type
353 | 'val', and 'm' is an arbitrary monad. The type 'seed' is the type of a
354 | state variable or variables (if the seed is a tuple). If an iteratee
355 | returns Right seed', the iteration continues with seed' as the new
356 | seed. If the iteratee returns Left seed'', the enumerator immediately
357 | stops further iterations, frees all the resources, and returns seed''
358 | as the final result.
359 | 
360 | Incidentally, Hal Daume III mentioned [DAUME] that such left-fold
361 | enumerator is indeed useful in practice. It is his preferred method of
362 | iterating over a file considered as a collection of characters, lines,
363 | or words.
364 | 
365 | To make the enumerator non-recursive, we need to add an additional
366 | argument -- self:
367 | 
368 | > type CFoldLeft' val m seed = 
369 | >        Self (Iteratee val seed) m seed
370 | >        -> CollEnumerator val m seed
371 | > type Self iter m seed = iter -> seed -> m seed
372 | > type CFoldLeft1Maker coll val m seed = coll -> m (CFoldLeft' val m seed)
373 | 
374 | A function of the type CFoldLeft' is also an enumerator. However, that
375 | enumerator does not recurse to advance the traversal. It invokes Self
376 | instead. Given CFoldLeft1Maker, we can obtain either the CFoldLeft
377 | enumerator, or a stream.  The former translation procedure amounts to
378 | taking a fixpoint:
379 | 
380 | > hfold_nonrec_to_rec:: (Monad m) => 
381 | >        coll -> (CFoldLeft1Maker coll val m seed)
382 | >             -> m (CollEnumerator val m seed)
383 | > hfold_nonrec_to_rec coll hfold1_maker = do
384 | >    hfold_left' <- hfold1_maker coll
385 | >    return $ fix hfold_left'
386 | > fix f = f g where g = f g
387 | 
388 | Converting CFoldLeft' into a stream is equally simple:
389 | 
390 | > data MyStream m a = MyNil (Maybe a) | MyCons a (m (MyStream m a))
391 | 
392 | > hfold_nonrec_to_stream:: 
393 | >    (Monad m) => CFoldLeft' val m (MyStream m val) 
394 | >                 -> m (MyStream m val)
395 | > hfold_nonrec_to_stream hfold_left' = do
396 | >   let k fn (MyNil Nothing)  = return $ MyNil Nothing
397 | >       k fn (MyNil (Just c)) 
398 | >         = return $ MyCons c (hfold_left' k fn (MyNil Nothing))
399 | >   hfold_left' k (\_ c -> Right $ MyNil $ Just c) (MyNil Nothing)
400 | 
401 | The polymorphic types of both conversion procedures indicate that the
402 | procedures are generic and apply to any collection and any traversal.
403 | 
404 | 
405 | The article [HINV] demonstrates both translations of CFoldLeft' on a
406 | concrete example of a file taken as a collection of
407 | characters. Haskell provides a cursor interface to that collection:
408 | hGetChar. We implement a left fold enumerator CFoldLeft'. We then show
409 | how to turn that enumerator back to a stream: how to express functions
410 | myhgetchar and myhiseof only in terms of the left fold enumerator. The
411 | derivation of these functions is independent of the precise nature of
412 | the enumerator. Incidentally, if we turn two enumerators into streams,
413 | we can safely interleave these streams.
414 | 
415 | 
416 | ## Conclusions
417 | 
418 | In a language with first-class continuations, we propose
419 | coll-fold-left as the overall optimal interface to systematically
420 | access values of a collection (Section 3):
421 | 
422 |    coll-fold-left COLL PROC SEED SEED ... -> [SEED ... ]
423 |    PROC VAL SEED SEED ... -> [INDIC SEED SEED ...]
424 | 
425 | In a language without first-class continuations, we propose
426 | CFoldLeft1Maker (Section 6) as such optimal interface. 
427 | 
428 | The enumerator-based interface is optimal because enumerators: are
429 | easier to write; are less error-prone to use; are more efficient;
430 | provide a better encapsulation of the state of the traversal; avoid
431 | resource leaks.
432 | 
433 | We have presented generic conversion procedures that turn enumerators
434 | into cursors. The existence of these procedures demonstrates that
435 | enumerator- and cursor-based interfaces are inter-convertible. We have
436 | argued however that the enumerator interface should be considered
437 | primary and offered natively in a collection API. It is far more
438 | efficient and easy for a programmer to implement cursors via
439 | enumerators, than the other way around.
440 | 
441 | The coll-fold-left interface has indeed been implemented and tested in
442 | practice. We have written a relational database interface for Scheme
443 | [DBINTF], which we have been using in the production environment. We
444 | have also implemented coll-fold-left to enumerate entries in a TIFF
445 | image tag directory. The enumerator coll-fold-left has been chosen to
446 | be the primary traversal interface in Scheme Collections SRFI
447 | [SRFI-44]. A similar interface is being considered for an Oracle RDBMS
448 | binding in Haskell.
449 | 
450 | 
451 | ## References
452 | 
453 | [SHIFT] Olivier Danvy and Andrzej Filinski. Abstracting Control.
454 | Proc. 1990 ACM Conf. on LISP and Functional Programming, pp. 151-160,
455 | Nice, France, June 1990.
456 | 
457 | [Daume] Hal Daume III.  Re: From enumerators to cursors: turning the 
458 | left fold inside out.
459 | A message posted on the Haskell mailing list on
460 | 24 Sep 2003 07:47:23 -0700.
461 | 
462 | [Hutton] Graham Hutton. A tutorial on the universality and 
463 | expressiveness of fold.
464 | Journal of Functional Programming, 9(4):355-372, July 1999.
465 | 
466 | [ENUM-CC] Oleg Kiselyov. General ways to traverse collections.
467 | January 1, 2004.
468 | http://pobox.com/~oleg/ftp/Scheme/enumerators-callcc.html
469 | 
470 | [DBINTF] Oleg Kiselyov. Scheme database access tools. May 10, 2003.
471 | http://pobox.com/~oleg/ftp/Scheme/lib/db-util1.scm
472 | http://pobox.com/~oleg/ftp/Scheme/tests/vdbaccess.scm 
473 | 
474 | [HINV] Oleg Kiselyov. From enumerators to cursors: turning the left
475 | fold inside out. January 1, 2004.
476 | http://pobox.com/~oleg/ftp/Haskell/misc.html#fold-stream
477 | The first draft was posted on the Haskell mailing list on
478 | 23 Sep 2003 23:59:45 -0700.
479 | 
480 | [Bananas] Erik Meijer, Maarten M. Fokkinga, and Ross Paterson. 
481 | Functional programming with bananas, lenses, envelopes, and barbed wire. 
482 | In J. Hughes, editor, FPCA'91: Functional Programming Languages and
483 | Computer Architecture, volume 523 of LNCS, pp. 124-144. 
484 | Springer-Verlag, 1991.                              
485 | 
486 | [SRFI-44] Scott G. Miller. Collections.
487 | Scheme Request for Implementation SRFI-44. October 2003.
488 | http://srfi.schemers.org/srfi-44/srfi-44.html
489 | 
490 | [Shasha] Dennis E. Shasha and Philippe Bonnet. Smooth Talking Your Databases.
491 | Dr.Dobbs Journal, July 2002, pp. 46-54.
492 | 
493 | [Sheard] Tim Sheard and Leonidas Fegaras. A fold for all seasons. 
494 | Proc. Conf. on Functional Programming and Computer Architecture
495 | (FPCA'93), pp. 233-242, Copenhagen, Denmark, June 1993.
496 | 
497 | [ULLMAN] Jeffrey Ullman. Principles of Database Systems.
498 | Second Edition, 484 pp. Computer Science Press, 1982.
499 | 
500 | [Veldhuizen]
501 | T. Veldhuizen. Expression Templates.
502 | C++ Report, Vol. 7 No. 5 June 1995, pp. 26-31
503 | http://osl.iu.edu/~tveldhui/papers/Expression-Templates/exprtmpl.html
504 | 


--------------------------------------------------------------------------------
/doc/LL3-collections-enumerators.txt:
--------------------------------------------------------------------------------
  1 | ;; -*-mode: Outline;  -*-
  2 | 
  3 | Towards the best collection API
  4 | 
  5 | * Abstract
  6 | Most programming languages support collections, represented by an
  7 | in-memory data structure, a file, a database, or a generating
  8 | function. A programming language system gives us typically one of the
  9 | two interfaces to systematically access elements of a collection. One
 10 | traversal API is based on enumerators -- e.g., for-each, map, filter
 11 | higher-order procedures -- of which the most general is fold. The
 12 | second approach relies on streams, a.k.a. cursors, lazy
 13 | lists. Generators such as the ones in Icon, Ruby and Python are a
 14 | hybrid approach.
 15 | 
 16 | This article presents a design of the overall optimal collection
 17 | traversal interface, which is based on a left-fold-like combinator
 18 | with premature termination. We argue from the practical standpoint
 19 | that such an interface is superior: in efficiency; in ease of
 20 | programming; in more predictable resource utilization and avoidance of
 21 | resource leaks.
 22 | 
 23 | We also demonstrate a procedure that converts _any_ left-fold-like
 24 | enumerator for any collection into the corresponding
 25 | cursor. Therefore, the stream-based interface, when it is truly needed
 26 | or preferred, can be automatically and generically derived from the
 27 | proposed interface.
 28 | 
 29 | We present two variants of the left-fold enumerator: for languages
 30 | with and without first-class continuations. The proposed traversal
 31 | interface has been implemented in a Scheme database access API used in
 32 | a production environment. The generic-left-fold-based interface
 33 | underlies the Collection SRFI (Scheme Request for Implementation) and
 34 | is being considered for an Oracle database interface in Haskell.
 35 | 
 36 | 
 37 | * Introduction and terminology
 38 | Access to collections cuts through many programming languages. By a
 39 | collection we mean a hash table; a file as a collection of lines,
 40 | words, or bytes; a file system directory; an XML document; a suffix
 41 | tree; or a "dynamic" collection produced by a database query or any
 42 | other generating function.
 43 | 
 44 | There are two basic approaches to systematically access collection
 45 | elements. One approach relies on enumerators such as 'map', 'for-each'
 46 | and 'filter'. The most general of those is fold, which lets us
 47 | maintain our state during the traversal. The other traversal API is
 48 | based on lazy lists (a.k.a. cursors, or streams).
 49 | 
 50 | A word about terminology is in order, as it is rather confusing.  We
 51 | will call a higher-order for-each-type procedure an enumerator. It is
 52 | this procedure that takes a collection and a handler, and applies the
 53 | handler to each element of the collection in turn. The handler itself
 54 | will be called an iteratee, because it is being applied -- by an
 55 | active entity, the enumeratOR.  An enumerator is presumably privy to
 56 | the internals of a collection; it knows how to fetch elements in some
 57 | (specified) order, and knows when to stop. A handler does not have
 58 | this knowledge, nor is it supposed to. Throughout the article we
 59 | eschew the commonly used term 'iterator': in C++ it means an accessor,
 60 | which does not iterate over a collection. Rather, it is _being_
 61 | iterated. On the other hand, languages like OCaml provide a procedure
 62 | 'iter' that actively traverses a collection, applying a user-defined
 63 | function to elements.
 64 | 
 65 | The next section argues from the practical standpoint against cursors
 66 | and for enumerators. We contend that an enumerator such as left fold
 67 | should be the primary means of traversing and obtaining all values of
 68 | a collection. Enumerators are superior to cursors:
 69 |       - in efficiency
 70 |       - in ease of programming
 71 |       - in more predictable resource utilization and
 72 |       avoidance of resource leaks.
 73 | The latter is of special importance for collections backed by external
 74 | resources such as file handles and database connections.
 75 | 
 76 | Section 3 proposes the overall optimal enumeration interface, which
 77 | adds premature termination and explicit multiple state variables to
 78 | the left fold. Section 4 discusses generators.
 79 | 
 80 | Given a stream, we can always construct the corresponding
 81 | enumerator. It is perhaps less appreciated that given a left fold
 82 | enumerator, we can always construct the corresponding stream: we can
 83 | invert an enumerator inside out. Section 5 describes the inversion
 84 | procedure, which is fully generic and works for any
 85 | collection. Because the enumerator interface subsumes the cursor-based
 86 | one and because the former is more efficient and less error-prone, a
 87 | collection API should offer enumerators as the primary, native
 88 | traversal interface.
 89 | 
 90 | The automatical enumerator inversion procedure in Section 5 relies on
 91 | first-class (delimited) continuations. However, we can write a similar
 92 | enumerator inversion even in a language that lacks first-class
 93 | continuations.  Section 6 demonstrates a non-recursive left-fold-like
 94 | combinator that can be instantiated into the ordinary left fold or into
 95 | a stream. Both instantiation procedures are generic and do not depend
 96 | on the type of the collection.
 97 | 
 98 | In conclusion, we recap our recommendations for the optimal collection
 99 | traversal API and point out to the existing implementations of the
100 | API.
101 | 
102 | 
103 | 
104 | * Why an enumerator is better than a cursor
105 | 
106 | There are many fundamental reasons why enumerators such as fold are
107 | profoundly better than cursors. One such reason is the ease of program
108 | analysis and optimizations [Sheard, Bananas, Hutton]. This
109 | article will not discuss them. Instead, we concentrate on utterly
110 | practical considerations: performance, ease of programming, and
111 | resource utilization.
112 | 
113 | Writing a procedure that traverses, e.g., an AVL tree is far easier
114 | than writing a cursor that remembers the current position and can
115 | advance to the next one. The latter is especially tricky if the tree
116 | in question does not have parent pointers (which waste space and lead
117 | to sharing and resource leakage problems). One look at the C++ STL
118 | should convince one how wretched programming of iterators could be.
119 | 
120 | The "current element" of a cursor is a state, which can be altered by
121 | advancing the cursor. This state introduces an implicit dependency
122 | among all expressions that use the cursor, similar to the dependency
123 | imposed by global mutable variables. Therefore, programming with
124 | cursors is error-prone. In contrast, an enumerator does not expose its
125 | traversal state to an iteratee and does not permit any alteration to
126 | that state by an iteratee. The difference between the leaky and the
127 | perfect encapsulations of the traversal state is well explained in
128 | [ULLMAN] when comparing a cursor-based network database query language
129 | DBTG with SQL. Incidentally, streams, another cursor-based interface,
130 | expose the current element and hence eliminate the implicit state
131 | dependency.
132 | 
133 | Another problem of cursors is telling the user that there are no more
134 | values to access. When a collection is exhausted, cursor operations
135 | typically return an "out-of-band" value such as EOF (cf.  getc() in C)
136 | or raise an exception (cf. iterator.next() in Python). None of these
137 | approaches appear entirely satisfactory. Unlike cursors, an enumerator
138 | does not need to do anything special to tell the iteratee that the
139 | traversal is finished: the enumerator will simply return. There is no
140 | longer a problem with out-of-band values and user's disregarding such
141 | values.
142 | 
143 | Cursors are simply less efficient. A cursor must maintain the state of
144 | the traversal. That state may be invalid. Each operation on a cursor,
145 | therefore, must first verify that the cursor is in the valid
146 | state. The cost of the checks quickly adds up.
147 | 
148 | This issue of the inefficiency of cursors is well known. For example,
149 | it is highly advisable to use "native" operations to move large
150 | sections of an array (ArrayCopy in Java) rather than copy
151 | element-by-element. The cost of an array bound check on each access to
152 | an element is one of the considerations.
153 | 
154 | Enumerators can do more than just eliminate redundant bound
155 | checks. Enumerators, if implemented as staged functions (e.g., C++
156 | templates) can tile or unroll loops and thus partially or completely
157 | remove the traversal iteration overhead [Veldhuizen]. The perfect
158 | encapsulation of the traversal state makes enumerators particularly
159 | adaptable for multi-stage programming.
160 | 
161 | Databases give another example of the efficiency of enumerators: "The
162 | performance of cursors is horrible in almost all systems. One of us
163 | (Shasha) once had an experience of re-writing an eight-hour query
164 | having nested cursors into a cursor-free query that took 15 seconds."
165 | [Shasha]
166 | 
167 | It is often said that the key to the best performance is to 
168 | do more on the server. To find the maximum value of a database table
169 | column, it's far faster to evaluate
170 |        select MAX(col) from collection
171 | than to open a cursor on the collection and retrieve all values from
172 | 'col' searching for the maximum. Stored procedure languages (some of
173 | which are quite sophisticated) were introduced to make the server-side
174 | processing easier and powerful.  We should stress that 
175 | 	select MAX(col) from collection
176 | is precisely equivalent to
177 | 	foldl max lower_bound collection
178 | 
179 | 
180 | Traversing a collection often requires resources, e.g., a stack space,
181 | a connection to a database, a DIR handle, a DOM tree of an XML
182 | document, etc. An enumerator takes care of allocating these resources.
183 | When iteratee explicitly tells the enumerator to stop -- or when the
184 | collection is exhausted -- the enumerator can orderly free the
185 | resources. An enumerator can be programmed as
186 | 
187 | 	   (define (enum proc collection)
188 | 	       (let ((database-connection #f))
189 | 		(dynamic-wind
190 | 		   (lambda () 
191 | 		      (set! database-connection
192 | 			    (open-connection collection)))
193 | 		   (lambda () (iterate proc))
194 | 		   (lambda ()
195 | 		      (set! database-connection
196 | 		            (close-connection database-connection))))))
197 | 
198 | If 'proc' does not capture its continuation [Footnote-1] -- as it is
199 | often the case -- the database connection is opened (taken from the
200 | pool) at the beginning of the iteration and is returned to the pool at
201 | the end.  If we were to provide access to our collection in the form of
202 | a cursor, we would have to place the variable 'database-connection'
203 | into that cursor. We cannot close the database until the cursor is
204 | alive. But how do we know when there are no alive cursors and
205 | therefore it is safe to recycle the database connection? We must rely
206 | either on the programmer's explicitly closing the connection, or on a
207 | finalizer. The sheer number of internet security advisories concerning
208 | memory allocation problems indicates that manual management of
209 | resources is greatly error-prone. The finalizer solution is not
210 | satisfactory either: finalizers are rarely supported and when they
211 | are, they are an unreliable tool to manage precious resources. The
212 | execution of finalizers is unpredictable and is generally beyond
213 | programmer's control.
214 | 
215 | [Footnote-1]
216 | An iteratee may call a continuation captured before the enumerator was
217 | entered. For that reason, an enumerator should employ a dynamic-wind
218 | to detect such an attempt to escape, shed excessive resources and
219 | switch to a "low-power" mode while the iteratee is on the run.
220 | 
221 | 
222 | * The most general enumeration interface
223 | 
224 | We propose the following general enumeration interface. It is based on
225 | a left-fold enumerator and explicitly supports multiple state
226 | variables (i.e., seeds) and a premature termination of the
227 | iteration. In this Section we describe the interface in pseudo-code,
228 | which can be instantiated in Scheme [SRFI-44], Haskell ([Daume],
229 | Section 6) or other concrete language.
230 | 
231 | The enumeration procedure
232 |    coll-fold-left COLL PROC SEED SEED ... -> [SEED ... ]
233 | 
234 | traverses the collection denoted by COLL. The procedure takes one or
235 | more state arguments denoted by SEED, and returns just as many. PROC
236 | is an iteratee procedure:
237 |    PROC VAL SEED SEED ... -> [INDIC SEED SEED ...]
238 | 
239 | It takes n+1 arguments and returns n+1 values. The first argument is
240 | the current value retrieved from the collection. The other n arguments
241 | are the seeds. The first return value is a boolean indicator, whose
242 | false value causes the premature termination of the iteration.  The
243 | other return values from PROC are the new values of the seeds. The
244 | procedure coll-fold-left enumerates the collection in some order and
245 | invokes PROC passing it the current value of the collection and the
246 | current seeds. The first invocation of PROC receives SEEDs that were
247 | the arguments of coll-fold-left. The further invocations of PROC
248 | receive SEEDs that were produced by the previous invocation of
249 | PROC. When the collection is exhausted or when the PROC procedure
250 | returns the indicator value of false, coll-fold-left terminates the
251 | iterations, disposes of allocated resources, and returns the current
252 | SEEDs. If the collection COLL is empty, coll-fold-left does not invoke
253 | PROC and returns its argument SEEDs.
254 | 
255 | 
256 | 
257 | * Enumerators and generators
258 | 
259 | A programming language Icon popularized generators as a way to
260 | traverse actual and virtual collections. Generators are also supported
261 | in Ruby and Python. The documentation of Icon defines a generator as
262 | an expression that can produce several values, on demand.
263 | Multiple-valued expressions can also be implemented with shift and
264 | reset [SHIFT]. The paper [SHIFT] gives many examples of their use.
265 | 
266 | Generators occupy an intermediate place between enumerators and
267 | cursors. A generator is just as easy to write as an enumerator. It
268 | traverses a collection, fetches the current element and _yields_ it,
269 | by passing the element to a dedicated procedure or syntax form. When
270 | the latter returns, the traversal continues. On the other hand,
271 | generators are trivially related to streams [ENUM-CC]. The latter
272 | article discusses generators and enumerators in more
273 | detail. Incidentally, the article demonstrates that a generator-based
274 | code in Python can be translated into Scheme almost
275 | verbatim. Generators give the first hint that enumerators and cursors
276 | are related via first-class continuations.
277 | 
278 | Like cursors and streams, generators are demand-driven. A user must
279 | explicitly request a new value to advance the traversal. Therefore,
280 | like cursors generators leak resources: it is not clear when the
281 | iteration should be assumed terminated and the associated resources
282 | can be safely disposed of.
283 | 
284 | 
285 | * How to invert an enumerator in a language with first-class continuations
286 | 
287 | Sometimes we indeed need to traverse a collection via a cursor.
288 | Reasons may include moving data from one collection to another, or
289 | interfacing legacy code. If a collection API provides enumerators, we
290 | obtain cursors for free. We pass an enumerator to a generic
291 | translation procedure, which inverts the enumerator "inside out" and
292 | returns a cursor.
293 | 
294 | The following code illustrates the conversion in Scheme, a language
295 | with first-class continuations. The procedure lfold->lazy-list is a
296 | fully generic translation procedure: it takes a left-fold enumerator
297 | for _any_ collection, and converts the enumerator to a stream (lazy
298 | list). The latter is a realization of a cursor.
299 | 
300 | (define (lfold->lazy-list lfold collection)
301 |   (delay
302 |     (call-with-current-continuation
303 |       (lambda (k-main)
304 | 	(lfold collection
305 | 	  (lambda (val seed)
306 | 	    (values
307 | 	      (call-with-current-continuation
308 | 		(lambda (k-reenter)
309 | 		  (k-main
310 | 		    (cons val
311 | 		      (delay
312 | 			(call-with-current-continuation
313 | 			  (lambda (k-new-main)
314 | 			    (set! k-main k-new-main)
315 | 			    (k-reenter #t))))))))
316 | 	      seed))
317 | 	  '()) 				; Initial seed
318 | 	(k-main '())))))
319 | 
320 | 
321 | The present article is an abstract. Code in a functional language is
322 | the best abstract. The discussion is delegated to the talk and the
323 | full paper. The article [ENUM-CC] discusses the inversion procedure in
324 | more detail and points out to the complete code and the test cases.
325 | 
326 | 
327 | * How to invert an enumerator in a language without first-class continuations
328 | 
329 | If a programming language lacks first-class continuations, the
330 | conversion from an enumerator to a cursor is still possible. However,
331 | we need to generalize the enumerator interface and make it
332 | non-recursive. For concreteness, this section demonstrates our
333 | approach for one particular language without first-class
334 | continuations: Haskell. Applications to other languages are
335 | straightforward.
336 | 
337 | In Haskell, the general left-fold enumerator has the following
338 | interface [HINV]:
339 | 
340 | > type CFoldLeft coll val m seed = coll -> CollEnumerator val m seed
341 | > type CollEnumerator val m seed =
342 | >        Iteratee val seed
343 | >        -> seed      -- the initial seed
344 | >        -> m seed
345 | > type Iteratee val seed = seed -> val -> Either seed seed
346 | 
347 | where 'coll' is the type of a collection with elements of the type
348 | 'val', and 'm' is an arbitrary monad. The type 'seed' is the type of a
349 | state variable or variables (if the seed is a tuple). If an iteratee
350 | returns Right seed', the iteration continues with seed' as the new
351 | seed. If the iteratee returns Left seed'', the enumerator immediately
352 | stops further iterations, frees all the resources, and returns seed''
353 | as the final result.
354 | 
355 | Incidentally, Hal Daume III mentioned [DAUME] that such left-fold
356 | enumerator is indeed useful in practice. It is his preferred method of
357 | iterating over a file considered as a collection of characters, lines,
358 | or words.
359 | 
360 | To make the enumerator non-recursive, we need to add an additional
361 | argument -- self:
362 | 
363 | > type CFoldLeft' val m seed = 
364 | >        Self (Iteratee val seed) m seed
365 | >        -> CollEnumerator val m seed
366 | > type Self iter m seed = iter -> seed -> m seed
367 | > type CFoldLeft1Maker coll val m seed = coll -> m (CFoldLeft' val m seed)
368 | 
369 | A function of the type CFoldLeft' is also an enumerator. However, that
370 | enumerator does not recurse to advance the traversal. It invokes Self
371 | instead. Given CFoldLeft1Maker, we can obtain either the CFoldLeft
372 | enumerator, or a stream.  The former translation procedure amounts to
373 | taking a fixpoint:
374 | 
375 | > hfold_nonrec_to_rec:: (Monad m) => 
376 | >        coll -> (CFoldLeft1Maker coll val m seed)
377 | >             -> m (CollEnumerator val m seed)
378 | > hfold_nonrec_to_rec coll hfold1_maker = do
379 | >    hfold_left' <- hfold1_maker coll
380 | >    return $ fix hfold_left'
381 | > fix f = f g where g = f g
382 | 
383 | Converting CFoldLeft' into a stream is equally simple:
384 | 
385 | > data MyStream m a = MyNil (Maybe a) | MyCons a (m (MyStream m a))
386 | 
387 | > hfold_nonrec_to_stream:: 
388 | >    (Monad m) => CFoldLeft' val m (MyStream m val) 
389 | >                 -> m (MyStream m val)
390 | > hfold_nonrec_to_stream hfold_left' = do
391 | >   let k fn (MyNil Nothing)  = return $ MyNil Nothing
392 | >       k fn (MyNil (Just c)) 
393 | >         = return $ MyCons c (hfold_left' k fn (MyNil Nothing))
394 | >   hfold_left' k (\_ c -> Right $ MyNil $ Just c) (MyNil Nothing)
395 | 
396 | The polymorphic types of both conversion procedures indicate that the
397 | procedures are generic and apply to any collection and any traversal.
398 | 
399 | 
400 | The article [HINV] demonstrates both translations of CFoldLeft' on a
401 | concrete example of a file taken as a collection of
402 | characters. Haskell provides a cursor interface to that collection:
403 | hGetChar. We implement a left fold enumerator CFoldLeft'. We then show
404 | how to turn that enumerator back to a stream: how to express functions
405 | myhgetchar and myhiseof only in terms of the left fold enumerator. The
406 | derivation of these functions is independent of the precise nature of
407 | the enumerator. Incidentally, if we turn two enumerators into streams,
408 | we can safely interleave these streams.
409 | 
410 | 
411 | * Conclusions
412 | 
413 | In a language with first-class continuations, we propose
414 | coll-fold-left as the overall optimal interface to systematically
415 | access values of a collection (Section 3):
416 | 
417 |    coll-fold-left COLL PROC SEED SEED ... -> [SEED ... ]
418 |    PROC VAL SEED SEED ... -> [INDIC SEED SEED ...]
419 | 
420 | In a language without first-class continuations, we propose
421 | CFoldLeft1Maker (Section 6) as such optimal interface. 
422 | 
423 | The enumerator-based interface is optimal because enumerators: are
424 | easier to write; are less error-prone to use; are more efficient;
425 | provide a better encapsulation of the state of the traversal; avoid
426 | resource leaks.
427 | 
428 | We have presented generic conversion procedures that turn enumerators
429 | into cursors. The existence of these procedures demonstrates that
430 | enumerator- and cursor-based interfaces are inter-convertible. We have
431 | argued however that the enumerator interface should be considered
432 | primary and offered natively in a collection API. It is far more
433 | efficient and easy for a programmer to implement cursors via
434 | enumerators, than the other way around.
435 | 
436 | The coll-fold-left interface has indeed been implemented and tested in
437 | practice. We have written a relational database interface for Scheme
438 | [DBINTF], which we have been using in the production environment. We
439 | have also implemented coll-fold-left to enumerate entries in a TIFF
440 | image tag directory. The enumerator coll-fold-left has been chosen to
441 | be the primary traversal interface in Scheme Collections SRFI
442 | [SRFI-44]. A similar interface is being considered for an Oracle RDBMS
443 | binding in Haskell.
444 | 
445 | 
446 | * References
447 | 
448 | [SHIFT] Olivier Danvy and Andrzej Filinski. Abstracting Control.
449 | Proc. 1990 ACM Conf. on LISP and Functional Programming, pp. 151-160,
450 | Nice, France, June 1990.
451 | 
452 | [Daume] Hal Daume III.  Re: From enumerators to cursors: turning the 
453 | left fold inside out.
454 | A message posted on the Haskell mailing list on
455 | 24 Sep 2003 07:47:23 -0700.
456 | 
457 | [Hutton] Graham Hutton. A tutorial on the universality and 
458 | expressiveness of fold.
459 | Journal of Functional Programming, 9(4):355-372, July 1999.
460 | 
461 | [ENUM-CC] Oleg Kiselyov. General ways to traverse collections.
462 | January 1, 2004.
463 | http://pobox.com/~oleg/ftp/Scheme/enumerators-callcc.html
464 | 
465 | [DBINTF] Oleg Kiselyov. Scheme database access tools. May 10, 2003.
466 | http://pobox.com/~oleg/ftp/Scheme/lib/db-util1.scm
467 | http://pobox.com/~oleg/ftp/Scheme/tests/vdbaccess.scm 
468 | 
469 | [HINV] Oleg Kiselyov. From enumerators to cursors: turning the left
470 | fold inside out. January 1, 2004.
471 | http://pobox.com/~oleg/ftp/Haskell/misc.html#fold-stream
472 | The first draft was posted on the Haskell mailing list on
473 | 23 Sep 2003 23:59:45 -0700.
474 | 
475 | [Bananas] Erik Meijer, Maarten M. Fokkinga, and Ross Paterson. 
476 | Functional programming with bananas, lenses, envelopes, and barbed wire. 
477 | In J. Hughes, editor, FPCA'91: Functional Programming Languages and
478 | Computer Architecture, volume 523 of LNCS, pp. 124-144. 
479 | Springer-Verlag, 1991.                              
480 | 
481 | [SRFI-44] Scott G. Miller. Collections.
482 | Scheme Request for Implementation SRFI-44. October 2003.
483 | http://srfi.schemers.org/srfi-44/srfi-44.html
484 | 
485 | [Shasha] Dennis E. Shasha and Philippe Bonnet. Smooth Talking Your Databases.
486 | Dr.Dobbs Journal, July 2002, pp. 46-54.
487 | 
488 | [Sheard] Tim Sheard and Leonidas Fegaras. A fold for all seasons. 
489 | Proc. Conf. on Functional Programming and Computer Architecture
490 | (FPCA'93), pp. 233-242, Copenhagen, Denmark, June 1993.
491 | 
492 | [ULLMAN] Jeffrey Ullman. Principles of Database Systems.
493 | Second Edition, 484 pp. Computer Science Press, 1982.
494 | 
495 | [Veldhuizen]
496 | T. Veldhuizen. Expression Templates.
497 | C++ Report, Vol. 7 No. 5 June 1995, pp. 26-31
498 | http://osl.iu.edu/~tveldhui/papers/Expression-Templates/exprtmpl.html
499 | 


--------------------------------------------------------------------------------
/doc/LL3-collections-talk.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/danlentz/clj-wbtree/ec7bf2bbc156fce5da8044fa9f2fd4eb7e5f1238/doc/LL3-collections-talk.pdf


--------------------------------------------------------------------------------
/doc/adams.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/danlentz/clj-wbtree/ec7bf2bbc156fce5da8044fa9f2fd4eb7e5f1238/doc/adams.pdf


--------------------------------------------------------------------------------
/doc/hirai-yamamoto.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/danlentz/clj-wbtree/ec7bf2bbc156fce5da8044fa9f2fd4eb7e5f1238/doc/hirai-yamamoto.pdf


--------------------------------------------------------------------------------
/doc/making-data-structures-persistent.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/danlentz/clj-wbtree/ec7bf2bbc156fce5da8044fa9f2fd4eb7e5f1238/doc/making-data-structures-persistent.pdf


--------------------------------------------------------------------------------
/doc/scheme-wttree.txt:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | 
  4 |        A Scheme Implementation of Weight-Balanced Binary Trees.
  5 | 				   
  6 | 				   
  7 | 		Copyright (C) 1993-1994 Stephen Adams
  8 | 				   
  9 | 
 10 | This document describes the interface to an R4RS Scheme implementation
 11 | of weight-balanced binary trees.  The original design and
 12 | implementation used the language ML and is described in:
 13 | 
 14 |   Stephen Adams, Implemeting Sets Efficiently in a Functional
 15 |     Language, CSTR 92-10, Department of Electronics and Computer
 16 |     Science, University of Southampton, 1992
 17 | 
 18 | The body of the file was derived from the MIT Scheme system
 19 | documentation, which contains the following notice:
 20 | 
 21 | 
 22 | 
 23 | 
 24 | Weight-Balanced Trees
 25 | =====================
 26 | 
 27 |    Balanced binary trees are a useful data structure for maintaining
 28 | large sets of ordered objects or sets of associations whose keys are
 29 | ordered. MIT Scheme has an comprehensive implementation of
 30 | weight-balanced binary trees which has several advantages over the
 31 | other data structures for large aggregates:
 32 | 
 33 |    * In addition to the usual element-level operations like insertion,
 34 |      deletion and lookup, there is a full complement of
 35 |      collection-level operations, like set intersection, set union and
 36 |      subset test, all of which are implemented with good orders of
 37 |      growth in time and space. This makes weight balanced trees ideal
 38 |      for rapid prototyping of functionally derived specifications.
 39 | 
 40 |    * An element in a tree may be indexed by its position under the
 41 |      ordering of the keys, and the position of an element may be
 42 |      determined, both with reasonable efficiency.
 43 | 
 44 |    * Operations to find and remove minimum element make weight
 45 |      balanced trees simple to use for priority queues.
 46 | 
 47 |    * The implementation is *functional* rather than *imperative*. This
 48 |      means that operations like `inserting' an association in a tree
 49 |      do not destroy the old tree, in much the same way that `(+ 1 x)'
 50 |      modifies neither the constant 1 nor the value bound to `x'.  The
 51 |      trees are referentially transparent thus the programmer need not
 52 |      worry about copying the trees.  Referential transparency allows
 53 |      space efficiency to be achieved by sharing subtrees.
 54 | 
 55 |    These features make weight-balanced trees suitable for a wide range
 56 | of applications, especially those that require large numbers of sets or
 57 | discrete maps.  Applications that have a few global databases and/or
 58 | concentrate on element-level operations like insertion and lookup are
 59 | probably better off using hash-tables or red-black trees.
 60 | 
 61 |    The *size* of a tree is the number of associations that it contains.
 62 |  Weight balanced binary trees are balanced to keep the sizes of the
 63 | subtrees of each node within a constant factor of each other. This
 64 | ensures logarithmic times for single-path operations (like lookup and
 65 | insertion).  A weight balanced tree takes space that is proportional to
 66 | the number of associations in the tree.  For the current
 67 | implementation, the constant of proportionality is six words per
 68 | association.
 69 | 
 70 |    Weight balanced trees can be used as an implementation for either
 71 | discrete sets or discrete maps (associations).  Sets are implemented by
 72 | ignoring the datum that is associated with the key.  Under this scheme
 73 | if an associations exists in the tree this indicates that the key of the
 74 | association is a member of the set.  Typically a value such as `()',
 75 | `#t' or `#f' is associated with the key.
 76 | 
 77 |    Many operations can be viewed as computing a result that, depending
 78 | on whether the tree arguments are thought of as sets or maps, is known
 79 | by two different names. An example is `wt-tree/member?', which, when
 80 | regarding the tree argument as a set, computes the set membership
 81 | operation, but, when regarding the tree as a discrete map,
 82 | `wt-tree/member?' is the predicate testing if the map is defined at an
 83 | element in its domain. Most names in this package have been chosen
 84 | based on interpreting the trees as sets, hence the name
 85 | `wt-tree/member?' rather than `wt-tree/defined-at?'.
 86 | 
 87 |    The weight balanced tree implementation is a run-time-loadable
 88 | option. To use weight balanced trees, execute
 89 | 
 90 |      (load-option 'wt-tree)
 91 | 
 92 | once before calling any of the procedures defined here.
 93 | 
 94 | 
 95 | 
 96 | 
 97 | Construction of Weight-Balanced Trees
 98 | -------------------------------------
 99 | 
100 |    Binary trees require there to be a total order on the keys used to
101 | arrange the elements in the tree.  Weight balanced trees are organized
102 | by *types*, where the type is an object encapsulating the ordering
103 | relation.  Creating a tree is a two-stage process.  First a tree type
104 | must be created from the predicate which gives the ordering.  The tree
105 | type is then used for making trees, either empty or singleton trees or
106 | trees from other aggregate structures like association lists.  Once
107 | created, a tree `knows' its type and the type is used to test
108 | compatibility between trees in operations taking two trees.  Usually a
109 | small number of tree types are created at the beginning of a program
110 | and used many times throughout the program's execution.
111 | 
112 |  -- procedure+: make-wt-tree-type KEY<?
113 |      This procedure creates and returns a new tree type based on the
114 |      ordering predicate KEY<?. KEY<? must be a total ordering, having
115 |      the property that for all key values `a', `b' and `c':
116 | 
117 |           (key<? a a)                         => #f
118 |           (and (key<? a b) (key<? b a))       => #f
119 |           (if (and (key<? a b) (key<? b c))
120 |               (key<? a c)
121 |               #t)                             => #t
122 | 
123 |      Two key values are assumed to be equal if neither is less than the
124 |      other by KEY<?.
125 | 
126 |      Each call to `make-wt-tree-type' returns a distinct value, and
127 |      trees are only compatible if their tree types are `eq?'. A
128 |      consequence is that trees that are intended to be used in binary
129 |      tree operations must all be created with a tree type originating
130 |      from the same call to `make-wt-tree-type'.
131 | 
132 |  -- variable+: number-wt-type
133 |      A standard tree type for trees with numeric keys.  `Number-wt-type'
134 |      could have been defined by
135 | 
136 |           (define number-wt-type (make-wt-tree-type  <))
137 | 
138 |  -- variable+: string-wt-type
139 |      A standard tree type for trees with string keys.  `String-wt-type'
140 |      could have been defined by
141 | 
142 |           (define string-wt-type (make-wt-tree-type  string<?))
143 | 
144 |  -- procedure+: make-wt-tree WT-TREE-TYPE
145 |      This procedure creates and returns a newly allocated weight
146 |      balanced tree.  The tree is empty, i.e. it contains no
147 |      associations. WT-TREE-TYPE is a weight balanced tree type obtained
148 |      by calling `make-wt-tree-type'; the returned tree has this type.
149 | 
150 |  -- procedure+: singleton-wt-tree WT-TREE-TYPE KEY DATUM
151 |      This procedure creates and returns a newly allocated weight
152 |      balanced tree.  The tree contains a single association, that of
153 |      DATUM with KEY.  WT-TREE-TYPE is a weight balanced tree type
154 |      obtained by calling `make-wt-tree-type'; the returned tree has
155 |      this type.
156 | 
157 |  -- procedure+: alist->wt-tree TREE-TYPE ALIST
158 |      Returns a newly allocated weight-balanced tree that contains the
159 |      same associations as ALIST.  This procedure is equivalent to:
160 | 
161 |           (lambda (type alist)
162 |             (let ((tree (make-wt-tree type)))
163 |               (for-each (lambda (association)
164 |                           (wt-tree/add! tree
165 |                                         (car association)
166 |                                         (cdr association)))
167 |                         alist)
168 |               tree))
169 | 
170 | 
171 | 
172 | 
173 | Basic Operations on Weight-Balanced Trees
174 | -----------------------------------------
175 | 
176 |    This section describes the basic tree operations on weight balanced
177 | trees.  These operations are the usual tree operations for insertion,
178 | deletion and lookup, some predicates and a procedure for determining the
179 | number of associations in a tree.
180 | 
181 |  -- procedure+: wt-tree? OBJECT
182 |      Returns `#t' if OBJECT is a weight-balanced tree, otherwise
183 |      returns `#f'.
184 | 
185 |  -- procedure+: wt-tree/empty? WT-TREE
186 |      Returns `#t' if WT-TREE contains no associations, otherwise
187 |      returns `#f'.
188 | 
189 |  -- procedure+: wt-tree/size WT-TREE
190 |      Returns the number of associations in WT-TREE, an exact
191 |      non-negative integer.  This operation takes constant time.
192 | 
193 |  -- procedure+: wt-tree/add WT-TREE KEY DATUM
194 |      Returns a new tree containing all the associations in WT-TREE and
195 |      the association of DATUM with KEY.  If WT-TREE already had an
196 |      association for KEY, the new association overrides the old. The
197 |      average and worst-case times required by this operation are
198 |      proportional to the logarithm of the number of associations in
199 |      WT-TREE.
200 | 
201 |  -- procedure+: wt-tree/add! WT-TREE KEY DATUM
202 |      Associates DATUM with KEY in WT-TREE and returns an unspecified
203 |      value.  If WT-TREE already has an association for KEY, that
204 |      association is replaced.  The average and worst-case times
205 |      required by this operation are proportional to the logarithm of
206 |      the number of associations in WT-TREE.
207 | 
208 |  -- procedure+: wt-tree/member? KEY WT-TREE
209 |      Returns `#t' if WT-TREE contains an association for KEY, otherwise
210 |      returns `#f'.  The average and worst-case times required by this
211 |      operation are proportional to the logarithm of the number of
212 |      associations in WT-TREE.
213 | 
214 |  -- procedure+: wt-tree/lookup WT-TREE KEY DEFAULT
215 |      Returns the datum associated with KEY in WT-TREE.  If WT-TREE
216 |      doesn't contain an association for KEY, DEFAULT is returned.  The
217 |      average and worst-case times required by this operation are
218 |      proportional to the logarithm of the number of associations in
219 |      WT-TREE.
220 | 
221 |  -- procedure+: wt-tree/delete WT-TREE KEY
222 |      Returns a new tree containing all the associations in WT-TREE,
223 |      except that if WT-TREE contains an association for KEY, it is
224 |      removed from the result.  The average and worst-case times required
225 |      by this operation are proportional to the logarithm of the number
226 |      of associations in WT-TREE.
227 | 
228 |  -- procedure+: wt-tree/delete! WT-TREE KEY
229 |      If WT-TREE contains an association for KEY the association is
230 |      removed.  Returns an unspecified value.  The average and worst-case
231 |      times required by this operation are proportional to the logarithm
232 |      of the number of associations in WT-TREE.
233 | 
234 | 
235 | 
236 | 
237 | Advanced Operations on Weight-Balanced Trees
238 | --------------------------------------------
239 | 
240 |    In the following the *size* of a tree is the number of associations
241 | that the tree contains, and a *smaller* tree contains fewer
242 | associations.
243 | 
244 |  -- procedure+: wt-tree/split< WT-TREE BOUND
245 |      Returns a new tree containing all and only the associations in
246 |      WT-TREE which have a key that is less than BOUND in the ordering
247 |      relation of the tree type of WT-TREE.  The average and worst-case
248 |      times required by this operation are proportional to the logarithm
249 |      of the size of WT-TREE.
250 | 
251 |  -- procedure+: wt-tree/split> WT-TREE BOUND
252 |      Returns a new tree containing all and only the associations in
253 |      WT-TREE which have a key that is greater than BOUND in the
254 |      ordering relation of the tree type of WT-TREE.  The average and
255 |      worst-case times required by this operation are proportional to the
256 |      logarithm of size of WT-TREE.
257 | 
258 |  -- procedure+: wt-tree/union WT-TREE-1 WT-TREE-2
259 |      Returns a new tree containing all the associations from both trees.
260 |      This operation is asymmetric: when both trees have an association
261 |      for the same key, the returned tree associates the datum from
262 |      WT-TREE-2 with the key.  Thus if the trees are viewed as discrete
263 |      maps then `wt-tree/union' computes the map override of WT-TREE-1 by
264 |      WT-TREE-2.  If the trees are viewed as sets the result is the set
265 |      union of the arguments. The worst-case time required by this
266 |      operation is proportional to the sum of the sizes of both trees.
267 |      If the minimum key of one tree is greater than the maximum key of
268 |      the other tree then the time required is at worst proportional to
269 |      the logarithm of the size of the larger tree.
270 | 
271 |  -- procedure+: wt-tree/intersection WT-TREE-1 WT-TREE-2
272 |      Returns a new tree containing all and only those associations from
273 |      WT-TREE-1 which have keys appearing as the key of an association
274 |      in WT-TREE-2.  Thus the associated data in the result are those
275 |      from WT-TREE-1.  If the trees are being used as sets the result is
276 |      the set intersection of the arguments.  As a discrete map
277 |      operation, `wt-tree/intersection' computes the domain restriction
278 |      of WT-TREE-1 to (the domain of) WT-TREE-2. The time required by
279 |      this operation is never worse that proportional to the sum of the
280 |      sizes of the trees.
281 | 
282 |  -- procedure+: wt-tree/difference WT-TREE-1 WT-TREE-2
283 |      Returns a new tree containing all and only those associations from
284 |      WT-TREE-1 which have keys that *do not* appear as the key of an
285 |      association in WT-TREE-2.  If the trees are viewed as sets the
286 |      result is the asymmetric set difference of the arguments.  As a
287 |      discrete map operation, it computes the domain restriction of
288 |      WT-TREE-1 to the complement of (the domain of) WT-TREE-2. The time
289 |      required by this operation is never worse that proportional to the
290 |      sum of the sizes of the trees.
291 | 
292 |  -- procedure+: wt-tree/subset? WT-TREE-1 WT-TREE-2
293 |      Returns `#t' iff the key of each association in WT-TREE-1 is the
294 |      key of some association in WT-TREE-2, otherwise returns `#f'.
295 |      Viewed as a set operation, `wt-tree/subset?' is the improper subset
296 |      predicate. A proper subset predicate can be constructed:
297 | 
298 |           (define (proper-subset? s1 s2)
299 |             (and (wt-tree/subset? s1 s2)
300 |                  (< (wt-tree/size s1) (wt-tree/size s2))))
301 | 
302 |      As a discrete map operation, `wt-tree/subset?' is the subset test
303 |      on the domain(s) of the map(s).  In the worst-case the time
304 |      required by this operation is proportional to the size of
305 |      WT-TREE-1.
306 | 
307 |  -- procedure+: wt-tree/set-equal? WT-TREE-1 WT-TREE-2
308 |      Returns `#t' iff for every association in WT-TREE-1 there is an
309 |      association in WT-TREE-2 that has the same key, and *vice versa*.
310 | 
311 |      Viewing the arguments as sets `wt-tree/set-equal?' is the set
312 |      equality predicate.  As a map operation it determines if two maps
313 |      are defined on the same domain.
314 | 
315 |      This procedure is equivalent to
316 | 
317 |           (lambda (wt-tree-1 wt-tree-2)
318 |             (and (wt-tree/subset? wt-tree-1 wt-tree-2
319 |                  (wt-tree/subset? wt-tree-2 wt-tree-1)))
320 | 
321 |      In the worst-case the time required by this operation is
322 |      proportional to the size of the smaller tree.
323 | 
324 |  -- procedure+: wt-tree/fold COMBINER INITIAL WT-TREE
325 |      This procedure reduces WT-TREE by combining all the associations,
326 |      using an reverse in-order traversal, so the associations are
327 |      visited in reverse order.  COMBINER is a procedure of three
328 |      arguments: a key, a datum and the accumulated result so far. 
329 |      Provided COMBINER takes time bounded by a constant, `wt-tree/fold'
330 |      takes time proportional to the size of WT-TREE.
331 | 
332 |      A sorted association list can be derived simply:
333 | 
334 |           (wt-tree/fold  (lambda (key datum list)
335 |                            (cons (cons key datum) list))
336 |                          '()
337 |                          WT-TREE))
338 | 
339 |      The data in the associations can be summed like this:
340 | 
341 |           (wt-tree/fold  (lambda (key datum sum) (+ sum datum))
342 |                          0
343 |                          WT-TREE)
344 | 
345 |  -- procedure+: wt-tree/for-each ACTION WT-TREE
346 |      This procedure traverses the tree in-order, applying ACTION to
347 |      each association. The associations are processed in increasing
348 |      order of their keys. ACTION is a procedure of two arguments which
349 |      take the key and datum respectively of the association. Provided
350 |      ACTION takes time bounded by a constant, `wt-tree/for-each' takes
351 |      time proportional to in the size of WT-TREE. The example prints
352 |      the tree:
353 | 
354 |           (wt-tree/for-each (lambda (key value)
355 |                               (display (list key value)))
356 |                             WT-TREE))
357 | 
358 | 
359 | 
360 | 
361 | Indexing Operations on Weight-Balanced Trees
362 | --------------------------------------------
363 | 
364 |    Weight balanced trees support operations that view the tree as sorted
365 | sequence of associations.  Elements of the sequence can be accessed by
366 | position, and the position of an element in the sequence can be
367 | determined, both in logarthmic time.
368 | 
369 |  -- procedure+: wt-tree/index WT-TREE INDEX
370 |  -- procedure+: wt-tree/index-datum WT-TREE INDEX
371 |  -- procedure+: wt-tree/index-pair WT-TREE INDEX
372 |      Returns the 0-based INDEXth association of WT-TREE in the sorted
373 |      sequence under the tree's ordering relation on the keys.
374 |      `wt-tree/index' returns the INDEXth key, `wt-tree/index-datum'
375 |      returns the datum associated with the INDEXth key and
376 |      `wt-tree/index-pair' returns a new pair `(KEY . DATUM)' which is
377 |      the `cons' of the INDEXth key and its datum.  The average and
378 |      worst-case times required by this operation are proportional to
379 |      the logarithm of the number of associations in the tree.
380 | 
381 |      These operations signal an error if the tree is empty, if
382 |      INDEX`<0', or if INDEX is greater than or equal to the number of
383 |      associations in the tree.
384 | 
385 |      Indexing can be used to find the median and maximum keys in the
386 |      tree as follows:
387 | 
388 |           median:   (wt-tree/index WT-TREE (quotient (wt-tree/size WT-TREE) 2))
389 |           
390 |           maximum:  (wt-tree/index WT-TREE (-1+ (wt-tree/size WT-TREE)))
391 | 
392 |  -- procedure+: wt-tree/rank WT-TREE KEY
393 |      Determines the 0-based position of KEY in the sorted sequence of
394 |      the keys under the tree's ordering relation, or `#f' if the tree
395 |      has no association with for KEY.  This procedure returns either an
396 |      exact non-negative integer or `#f'.  The average and worst-case
397 |      times required by this operation are proportional to the logarithm
398 |      of the number of associations in the tree.
399 | 
400 |  -- procedure+: wt-tree/min WT-TREE
401 |  -- procedure+: wt-tree/min-datum WT-TREE
402 |  -- procedure+: wt-tree/min-pair WT-TREE
403 |      Returns the association of WT-TREE that has the least key under
404 |      the tree's ordering relation. `wt-tree/min' returns the least key,
405 |      `wt-tree/min-datum' returns the datum associated with the least
406 |      key and `wt-tree/min-pair' returns a new pair `(key . datum)'
407 |      which is the `cons' of the minimum key and its datum. The average
408 |      and worst-case times required by this operation are proportional
409 |      to the logarithm of the number of associations in the tree.
410 | 
411 |      These operations signal an error if the tree is empty. They could
412 |      be written
413 |           (define (wt-tree/min tree)        (wt-tree/index tree 0))
414 |           (define (wt-tree/min-datum tree)  (wt-tree/index-datum tree 0))
415 |           (define (wt-tree/min-pair tree)   (wt-tree/index-pair tree 0))
416 | 
417 |  -- procedure+: wt-tree/delete-min WT-TREE
418 |      Returns a new tree containing all of the associations in WT-TREE
419 |      except the association with the least key under the WT-TREE's
420 |      ordering relation.  An error is signalled if the tree is empty. 
421 |      The average and worst-case times required by this operation are
422 |      proportional to the logarithm of the number of associations in the
423 |      tree.  This operation is equivalent to
424 | 
425 |           (wt-tree/delete WT-TREE (wt-tree/min WT-TREE))
426 | 
427 |  -- procedure+: wt-tree/delete-min! WT-TREE
428 |      Removes the association with the least key under the WT-TREE's
429 |      ordering relation.  An error is signalled if the tree is empty. 
430 |      The average and worst-case times required by this operation are
431 |      proportional to the logarithm of the number of associations in the
432 |      tree.  This operation is equivalent to
433 | 
434 |           (wt-tree/delete! WT-TREE (wt-tree/min WT-TREE))
435 | 
436 | 


--------------------------------------------------------------------------------
/project.clj:
--------------------------------------------------------------------------------
1 | (defproject danlentz/clj-wbtree "0.1.3-SNAPSHOT"
2 |   :description "A 'Purely Functional' Weight Balanced Binary Tree"
3 |   :url "http://github.com/danlentz/clj-wbtree"
4 |   :license {:name "Eclipse Public License"
5 |             :url "http://www.eclipse.org/legal/epl-v10.html"}
6 |   :dependencies [[org.clojure/clojure  "1.6.0"]])
7 | 
8 | 


--------------------------------------------------------------------------------
/resources/rdfs.edn:
--------------------------------------------------------------------------------
 1 | ;;;
 2 | ;;; Minimal RDF & RDFS schema containing the axiomatic tuples.
 3 | ;;;
 4 | 
 5 | #{
 6 |   [:rdfs/Class           :rdf/type           :rdfs/Class]
 7 |   [:rdfs/Resource        :rdf/type           :rdfs/Class]
 8 |   [:rdfs/Literal         :rdf/type           :rdfs/Class]
 9 |   [:rdfs/Property        :rdf/type           :rdfs/Class]
10 |   [:rdfs/Datatype        :rdf/type           :rdfs/Class]
11 |   [:rdfs/Datatype        :rdfs/subClassOf    :rdfs/Class]
12 |   [:rdfs/Container       :rdf/type           :rdfs/Class]
13 |   [:rdf/XMLLiteral       :rdf/type           :rdfs/Datatype]
14 |   [:rdf/XMLLiteral       :rdfs/subClassOf    :rdfs/Literal]
15 |   [:rdf/EDNLiteral       :rdf/type           :rdfs/Datatype]
16 |   [:rdf/EDNLiteral       :rdfs/subClassOf    :rdfs/Literal]
17 |   [:rdf/Seq              :rdf/type           :rdfs/Class]
18 |   [:rdf/Seq              :rdfs/subClassOf    :rdfs/Container]
19 |   [:rdf/Bag              :rdf/type           :rdfs/Class]
20 |   [:rdf/Bag              :rdfs/subClassOf    :rdfs/Container]
21 |   [:rdf/Alt              :rdf/type           :rdfs/Class]
22 |   [:rdf/Alt              :rdfs/subClassOf    :rdfs/Container]
23 |   [:rdf/Statement        :rdf/type           :rdfs/Class]
24 |   [:rdf/List             :rdf/type           :rdfs/Class]
25 |   [:rdf/type             :rdf/type           :rdf/Property]
26 |   [:rdf/type             :rdfs/domain        :rdfs/Resource]
27 |   [:rdf/type             :rdfs/range         :rdfs/Class]
28 |   [:rdfs/domain          :rdf/type           :rdf/Property]
29 |   [:rdfs/domain          :rdfs/domain        :rdfs/Resource]
30 |   [:rdfs/domain          :rdfs/range         :rdfs/Class]
31 |   [:rdfs/range           :rdf/type           :rdf/Property]
32 |   [:rdfs/range           :rdfs/domain        :rdf/Property]
33 |   [:rdfs/range           :rdfs/range         :rdfs/Class]
34 |   [:rdfs/subPropertyOf   :rdf/type           :rdf/Property]
35 |   [:rdfs/subPropertyOf   :rdfs/domain        :rdf/Property]
36 |   [:rdfs/subPropertyOf   :rdfs/range         :rdf/Property]
37 |   [:rdfs/subClassOf      :rdf/type           :rdf/Property]
38 |   [:rdfs/subClassOf      :rdfs/domain        :rdfs/Class]
39 |   [:rdfs/subClassOf      :rdfs/range         :rdfs/Class]
40 |   [:rdf/subject          :rdf/type           :rdf/Property]
41 |   [:rdf/subject          :rdfs/domain        :rdf/Statement]
42 |   [:rdf/subject          :rdfs/range         :rdfs/Resource]
43 |   [:rdf/predicate        :rdf/type           :rdf/Property]
44 |   [:rdf/predicate        :rdfs/domain        :rdf/Statement]
45 |   [:rdf/predicate        :rdfs/range         :rdfs/Resource]
46 |   [:rdf/object           :rdf/type           :rdf/Property]
47 |   [:rdf/object           :rdfs/domain        :rdf/Statement]
48 |   [:rdf/object           :rdfs/range         :rdfs/Resource]
49 |   [:rdfs/member          :rdf/type           :rdf/Property]
50 |   [:rdfs/member          :rdfs/domain        :rdfs/Resource]
51 |   [:rdfs/member          :rdfs/range         :rdfs/Resource]
52 |   [:rdf/first            :rdf/type           :rdf/Property]
53 |   [:rdf/first            :rdfs/domain        :rdf/List]
54 |   [:rdf/first            :rdfs/range         :rdfs/Resource]
55 |   [:rdf/rest             :rdf/type           :rdf/Property]
56 |   [:rdf/rest             :rdfs/domain        :rdf/List]
57 |   [:rdf/rest             :rdfs/range         :rdf/List]
58 |   [:rdf/nil              :rdf/type           :rdf/List]
59 |   [:rdf/value            :rdf/type           :rdf/Property]
60 |   [:rdf/value            :rdfs/domain        :rdfs/Resource]
61 |   [:rdf/value            :rdfs/range         :rdfs/Resource]
62 |   [:rdfs/ContainerMembershipProperty :rdf/type        :rdfs/Class]
63 |   [:rdfs/ContainerMembershipProperty :rdfs/subClassOf :rdf/Property]
64 |   [:rdfs/seeAlso         :rdf/type           :rdf/Property]
65 |   [:rdfs/seeAlso         :rdfs/domain        :rdfs/Resource]
66 |   [:rdfs/seeAlso         :rdfs/range         :rdfs/Resource]
67 |   [:rdfs/isDefinedBy     :rdf/type           :rdf/Property]
68 |   [:rdfs/isDefinedBy     :rdfs/subPropertyOf :rdfs/seeAlso]
69 |   [:rdfs/isDefinedBy     :rdfs/domain        :rdfs/Resource]
70 |   [:rdfs/isDefinedBy     :rdfs/range         :rdfs/Resource]
71 |   [:rdfs/comment         :rdf/type           :rdf/Property]
72 |   [:rdfs/comment         :rdfs/domain        :rdfs/Resource]
73 |   [:rdfs/comment         :rdfs/range         :rdfs/Literal]
74 |   [:rdfs/label           :rdf/type           :rdf/Property]
75 |   [:rdfs/label           :rdfs/domain        :rdfs/Resource]
76 |   [:rdfs/label           :rdfs/range         :rdfs/Literal]
77 |   }
78 |   
79 | 


--------------------------------------------------------------------------------
/src/wbtree/tree.clj:
--------------------------------------------------------------------------------
  1 | (ns wbtree.tree
  2 |   (:require [wbtree.util :as util]))
  3 | 
  4 | 
  5 | 
  6 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  7 | ;; Weight Balanced Functional Binary Tree (Hirai-Yamamoto Tree)
  8 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  9 | ;;
 10 | ;; This is an implementation of a weight-balanced binary tree data
 11 | ;;  structure based on the following references:
 12 | ;;
 13 | ;; --  Adams (1992)
 14 | ;;     'Implementing Sets Efficiently in a Functional Language'
 15 | ;;     Technical Report CSTR 92-10, University of Southampton.
 16 | ;;
 17 | ;; --  Hirai and Yamamoto (2011)
 18 | ;;     'Balancing Weight-Balanced Trees'
 19 | ;;     Journal of Functional Programming / 21 (3):
 20 | ;;     Pages 287-307
 21 | ;;
 22 | ;; --  Oleg Kiselyov
 23 | ;;     'Towards the best collection API, A design of the overall optimal
 24 | ;;     collection traversal interface'
 25 | ;;     <http://pobox.com/~oleg/ftp/papers/LL3-collections-enumerators.txt>
 26 | ;;
 27 | ;; --  Nievergelt and Reingold (1972)
 28 | ;;     'Binary Search Trees of Bounded Balance'
 29 | ;;     STOC '72 Proceedings
 30 | ;;     4th Annual ACM symposium on Theory of Computing
 31 | ;;     Pages 137-142 
 32 | ;;
 33 | ;; --  Driscoll, Sarnak, Sleator, and Tarjan (1989)
 34 | ;;     'Making Data Structures Persistent'
 35 | ;;     Journal of Computer and System Sciences Volume 38 Issue 1, February 1989
 36 | ;;     18th Annual ACM Symposium on Theory of Computing
 37 | ;;     Pages 86-124
 38 | ;;
 39 | ;; --  MIT Scheme weight balanced tree as reimplemented by Yoichi Hirai
 40 | ;;     and Kazuhiko Yamamoto using the revised non-variant algorithm recommended
 41 | ;;     integer balance parameters from (Hirai/Yamomoto 2011).
 42 | ;;
 43 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 44 | 
 45 | (set! *warn-on-reflection* true)
 46 | 
 47 | 
 48 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 49 | ;; Data Collection and Metrics
 50 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 51 | 
 52 | (def stats (atom {}))
 53 | 
 54 | (defn reset-stats!! []
 55 |   (reset! stats {}))
 56 | 
 57 | (defn inc-stat! [stat]
 58 |  #_ (swap! stats assoc stat (inc (get @stats stat 1)))) 
 59 | 
 60 | 
 61 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 62 | ;; Weight Balancing Constants
 63 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 64 | 
 65 |   
 66 | ;; +delta+:
 67 | ;;   The primary balancing rotation parameter that is used for the
 68 | ;;   determination whether two subtrees of a node are in balance or
 69 | ;;   require adjustment by means of a rotation operation.  The specific
 70 | ;;   rotation to be performed is determined by +gamma+.
 71 | 
 72 | (def ^:const +delta+ 3)
 73 | 
 74 | ;; +gamma+:
 75 | ;;   The secondary balancing rotation parameter that is used for the
 76 | ;;   determination of whether a single or double rotation operation should
 77 | ;;   occur, once it has been decided based on +delta+ that a rotation is
 78 | ;;   indeed required.
 79 | 
 80 | (def ^:const +gamma+ 2)
 81 | 
 82 | 
 83 | 
 84 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 85 | ;; Universal Comparator
 86 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 87 | 
 88 | 
 89 | (defn xcompare [x y]
 90 |   (inc-stat! :comparisons)
 91 |   (if (= (class x) (class y))
 92 |     (if (instance? Comparable x)
 93 |       (.compareTo ^Comparable x y)
 94 |       (.compareTo ^Integer (hash x) (hash y)))
 95 |     (.compareTo ^Integer (hash (class x)) (hash (class y)))))
 96 | 
 97 | 
 98 | (defn xcompare< [x y]
 99 |   (neg? (xcompare x y)))
100 | 
101 | (defn xcompare> [x y]
102 |   (pos? (xcompare x y)))
103 | 
104 | (defn xcompare= [x y]
105 |   (zero? (xcompare x y)))
106 | 
107 | 
108 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
109 | ;; Storage Model
110 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
111 | 
112 | 
113 | 
114 | (defn- leaf [] nil)
115 | 
116 | 
117 | (defn null
118 |   "a value which satisfies null?"
119 |   []
120 |   (leaf))
121 | 
122 | (defn null? [n]
123 |   (nil? n))
124 | 
125 | 
126 | 
127 | (deftype Node [k v l r ^long x])
128 |  
129 | (defn node [k v l r x]
130 |   (Node. k v l r x))
131 |  
132 |  
133 | (defn node? [thing]
134 |   (instance? Node thing))
135 |  
136 | 
137 | ;;  (util/avg-timing 10
138 | ;;    (count (repeatedly 1000000 #(apply tuple (range 5)))))
139 | 
140 | ;; ;; 513.698
141 | 
142 | 
143 | 
144 | 
145 | ;;   (util/avg-timing 10
146 | ;;     (count (repeatedly 1000000 #(apply node (range 5)))))
147 | 
148 | 
149 | ;; 382.1553
150 | 
151 | 
152 | 
153 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
154 | ;; Constituent Accessors
155 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
156 | 
157 | 
158 | (defn -k [^Node n]
159 |   (.k n))
160 | 
161 | (defn -v [^Node n]
162 |   (.v n))
163 | 
164 | (defn -l [^Node n]
165 |   (.l n))
166 | 
167 | (defn -r [^Node n]
168 |   (.r n))
169 | 
170 | (defn -x [^Node n]
171 |   (.x n))
172 | 
173 | (defn -kv [^Node n]
174 |   (new clojure.lang.MapEntry (.k n) (.v n)))
175 | 
176 | (defn -lr [^Node n]
177 |   (list (.l n) (.r n)))
178 | 
179 | (defn -kvlr [^Node n]
180 |   (list (.k n) (.v n) (.l n) (.r n)))
181 | 
182 | 
183 | 
184 | 
185 | 
186 | (defmacro kv
187 |   "destructure node n: key value"
188 |   [[ksym vsym] n & body]
189 |   `(let [^Node n# ~n
190 |          ~ksym (.k n#) ~vsym (.v n#)]
191 |      ~@body))
192 | 
193 | 
194 | (defmacro kvlr
195 |   "destructure node n: key value left right"
196 |   [[ksym vsym lsym rsym] n & body]
197 |   `(let [^Node n# ~n
198 |          ~ksym (.k n#) ~vsym (.v n#)
199 |          ~lsym (.l n#) ~rsym (.r n#)]
200 |      ~@body))
201 | 
202 | 
203 | (defmacro kvlrx
204 |   "destructure node n: key value left right size"
205 |   [[ksym vsym lsym rsym xsym] n & body]
206 |   `(let [^Node n# ~n
207 |          ~ksym (.k n#) ~vsym (.v n#)
208 |          ~lsym (.l n#) ~rsym (.r n#)
209 |          ~xsym (.x n#)]
210 |      ~@body))
211 | 
212 | 
213 | (defn node-call 
214 |   "apply f to the destructured constituent values of n.
215 |    f is a function taking four parameters: K, V, L, and R,
216 |    where K is the key of NODE, V is the value of NODE, L is the left
217 |    subtree of NODE, and R is the right subtree of NODE."
218 |   [n f]
219 |   (apply f (-kvlr n)))
220 | 
221 | 
222 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
223 | ;; Fundamental Node Operations
224 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
225 | 
226 | 
227 | (defn node-size ^long [n]
228 |   (if (null? n)
229 |     0
230 |     (-x n)))
231 | 
232 | 
233 | (defn node-weight
234 |   "returns node weight as appropriate for rotation calculations using
235 |    the 'revised non-variant algorithm' for weight balanced binary tree."
236 |   ^long [n]
237 |   (inc (node-size n)))
238 | 
239 | 
240 | (defn node-enumerator
241 |   "Efficient mechanism to accomplish partial enumeration of
242 |    tree-structure into a seq representation without incurring the
243 |    overhead of operating over the entire tree.  Used internally for
244 |    implementation of higher-level collection api routines"
245 |   ([n] (node-enumerator n nil))
246 |   ([n enum]
247 |      (if (null? n)
248 |        enum
249 |        (kvlr [k v l r] n
250 |          (recur l (list n r enum))))))
251 | 
252 | 
253 | (defn node-enumerator-reverse
254 |   ([n] (node-enumerator-reverse n nil))
255 |   ([n enum]
256 |      (if (null? n)
257 |        enum
258 |        (kvlr [k v l r] n
259 |          (recur r (list n l enum))))))
260 | 
261 | 
262 | (defn node-enum-first [enum]
263 |   (when (seq enum)
264 |     (first enum)))
265 | 
266 | 
267 | (defn node-enum-rest  [enum]
268 |   (when (seq enum)
269 |     (let [[x1 x2 x3] enum]
270 |       (when-not (and (nil? x2) (nil? x3))
271 |         (node-enumerator x2 x3)))))
272 | 
273 | 
274 | (defn node-enum-prior [enum]
275 |   (when (seq enum)
276 |     (let [[x1 x2 x3] enum]
277 |       (when-not (and (nil? x2) (nil? x3))
278 |         (node-enumerator-reverse x2 x3)))))
279 | 
280 | 
281 | (defn node-create
282 |   "Join left and right subtrees at root k/v.
283 |   Assumes all keys in l < k < all keys in r."
284 |   [k v ^Node l ^Node r]
285 |   (Node. k v l r (+ 1 (if l (.x l) 0) (if r (.x r) 0))))
286 | 
287 | 
288 | (defn node-singleton 
289 |   "Create and return a newly allocated weight balanced
290 |   tree containing a single association, that value V with key K."
291 |   [k v]
292 |   (node-create k v (null) (null)))
293 | 
294 | 
295 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
296 | ;; Tree Rotations
297 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
298 | 
299 | 
300 | (defn rotate-single-left
301 |   "Perform a single left rotation, moving Y, the left subtree of the
302 |   right subtree of A, into the left subtree (shown below).  This must
303 |   occur in order to restore proper balance when the weight of the left
304 |   subtree of node A is less then the weight of the right subtree of
305 |   node A multiplied by rotation coefficient +delta+ and the weight of
306 |   the left subtree of node B is less than the weight of the right subtree
307 |   of node B multiplied by rotation coefficient +gamma+
308 |  
309 |                 ,---,                                  ,---,
310 |                 | A |                                  | B |
311 |                 :---:                                  :---:
312 |                :     :                                :     :
313 |           ,---:       :---,                      ,---:       :---, 
314 |           | X |       | B |           =>         | A |       | Z | 
315 |           '---'       :---:                      :---:       '---'
316 |                  ,---:     :---,            ,---:     :---,
317 |                  | Y |     | Z |            | X |     | Y |
318 |                  '---'     '---'            '---'     '---'
319 |   "
320 |   [ak av x b]
321 |   (inc-stat! :single-left)
322 |   (kvlr [bk bv y z] b
323 |     (node-create bk bv
324 |       (node-create ak av x y) z)))
325 | 
326 | 
327 | 
328 | (defn rotate-double-left
329 |   "Perform a double left rotation, moving Y1, the left subtree of the
330 |   left subtree of the right subtree of A, into the left subtree (shown
331 |   below).  This must occur in order to restore proper balance when the
332 |   weight of the left subtree of node A is less then the weight of the
333 |   right subtree of node A multiplied by rotation coefficient +delta+
334 |   and the weight of the left subtree of node B is greater than or equal
335 |   to the weight of the right subtree of node B multiplied by rotation
336 |   coefficient +gamma+.
337 |   
338 |                 ,---,                                    ,---,             
339 |                 | A |                                    | B |             
340 |              ___:---:___                             ____:---:____          
341 |         ,---:           :---,                   ,---:             :---,       
342 |         | X |           | C |                   | A |             | C |       
343 |         '---'           :---:         =>        :---:             :---:
344 |                    ,---:     :---,         ,---:     :---,   ,---:     :---,  
345 |                    | B |     | Z |         | X |     | y1|   | y2|     | Z |  
346 |                    :---:     '---'         '---'     '---'   '---'     '---'
347 |               ,---:     :---,   
348 |               | y1|     | y2|  
349 |               '---'     '---'
350 |   "
351 |   [ak av x c]
352 |   (inc-stat! :double-left)
353 |   (kvlr [ck cv b z] c
354 |     (kvlr [bk bv y1 y2] b
355 |       (node-create bk bv
356 |         (node-create ak av x y1)
357 |         (node-create ck cv y2 z)))))
358 | 
359 | 
360 | (defn rotate-single-right
361 |   "Perform a single right rotation, moving Y, the right subtree of the
362 |   left subtree of B, into the right subtree (shown below).  This must
363 |   occur in order to restore proper balance when the weight of the right
364 |   subtree of node B is less then the weight of the left subtree of
365 |   node B multiplied by rotation coefficient +delta+ and the weight of the
366 |   right subtree of node A is less than the weight of the left subtree
367 |   of node A multiplied by rotation coefficient +gamma+.
368 |   
369 |                 ,---,                                  ,---,             
370 |                 | B |                                  | A |             
371 |                 :---:                                  :---:             
372 |                :     :                                :     :            
373 |           ,---:       :---,                      ,---:       :---,       
374 |           | A |       | Z |          =>          | X |       | B |       
375 |           :---:       '---'                      '---'       :---:       
376 |      ,---:     :---,                                    ,---:     :---,  
377 |      | X |     | Y |                                    | Y |     | Z |  
378 |      '---'     '---'                                    '---'     '---'  
379 |   "
380 |   [bk bv a z]
381 |   (inc-stat! :single-right)
382 |   (kvlr [ak av x y] a
383 |     (node-create ak av x (node-create bk bv y z))))
384 | 
385 | 
386 | (defn rotate-double-right
387 |   "Perform a double right rotation, moving Y2, the right subtree of
388 |   the right subtree of the left subtree of C, into the right
389 |   subtree (shown below).  This must occur in order to restore proper
390 |   balance when the weight of the right subtree of node C is less then
391 |   the weight of the left subtree of node C multiplied by rotation
392 |   coefficient +delta+ and the weight of the right subtree of node B
393 |   is greater than or equal to the weight of the left subtree of node B
394 |   multiplied by rotation coefficient +gamma+.
395 |   
396 |                 ,---,                                    ,---,             
397 |                 | C |                                    | B |             
398 |              ___:---:___                             ____:---:____          
399 |         ,---:           :---,                   ,---:             :---,       
400 |         | A |           | Z |                   | A |             | C |       
401 |         :---:           '---'        =>         :---:             :---:
402 |    ,---:     :---,                         ,---:     :---,   ,---:     :---,  
403 |    | X |     | B |                         | X |     | y1|   | y2|     | Z |  
404 |    '---'     :---:                         '---'     '---'   '---'     '---'
405 |         ,---:     :---,   
406 |         | y1|     | y2|  
407 |         '---'     '---'
408 |   "
409 |   [ck cv a z]
410 |   (inc-stat! :double-right)
411 |   (kvlr [ak av x b] a
412 |     (kvlr [bk bv y1 y2] b
413 |       (node-create bk bv
414 |         (node-create ak av x y1)
415 |         (node-create ck cv y2 z)))))
416 | 
417 | 
418 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
419 | ;; Fundamental Tree Operations
420 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
421 | 
422 | 
423 | (defn node-stitch
424 |   "Join left and right subtrees at root k/v, performing a single or
425 |   double rotation to balance the resulting tree, if needed.  Assumes
426 |   all keys in l < k < all keys in r, and the relative weight balance
427 |   of the left and right subtrees is such that no more than one
428 |   single/double rotation will result in each subtree being less than
429 |   +delta+ times the weight of the other."
430 |   [k v l r]
431 |   (let [lw (node-weight l)
432 |         rw (node-weight r)]
433 |     (cond
434 |       (> rw (* +delta+ lw)) (let [rlw (node-weight (-l r))
435 |                                   rrw (node-weight (-r r))]
436 |                               (if (< rlw (* +gamma+ rrw))
437 |                                 (rotate-single-left k v l r)
438 |                                 (rotate-double-left k v l r)))
439 |       (> lw (* +delta+ rw)) (let [llw (node-weight (-l l))
440 |                                   lrw (node-weight (-r l))]
441 |                               (if (< lrw (* +gamma+ llw))
442 |                                 (rotate-single-right k v l r)
443 |                                 (rotate-double-right k v l r)))
444 |       true                  (node-create k v l r))))
445 | 
446 | 
447 | (defn node-add
448 |   ([n k] (node-add n k k))
449 |   ([n k v]
450 |      (if (null? n)
451 |        (node-singleton k v)
452 |        (kvlr [key val l r] n
453 |          (let [c (xcompare k key)]
454 |            (cond
455 |              (neg? c) (node-stitch key val (node-add l k v) r)
456 |              (pos? c) (node-stitch key val l (node-add r k v))
457 |              true     (node-create key v l r)))))))
458 | 
459 | 
460 | (defn node-concat3 [k v l r]
461 |   (cond
462 |     (null? l) (node-add r k v)
463 |     (null? r) (node-add l k v)
464 |     true      (let [lw (node-weight l)
465 |                     rw (node-weight r)]
466 |                 (cond
467 |                   (< (* +delta+ lw) rw) (kvlr [k2 v2 l2 r2] r
468 |                                           (node-stitch k2 v2
469 |                                             (node-concat3 k v l l2) r2))
470 |                   (< (* +delta+ rw) lw) (kvlr [k1 v1 l1 r1] l
471 |                                           (node-stitch k1 v1 l1
472 |                                             (node-concat3 k v r1 r)))
473 |                   true                  (node-create k v l r)))))
474 | 
475 | 
476 | (defn node-least 
477 |   "Return the node containing the minimum key of the tree rooted at n"
478 |   [n]
479 |   (cond
480 |     (null? n)   (util/exception "least: empty tree")
481 |     (null? (-l n)) n
482 |     true           (recur (-l n))))
483 | 
484 | 
485 | (defn node-greatest
486 |   "Return the node containing the minimum key of the tree rooted at n"
487 |   [n]
488 |   (cond
489 |     (null? n)   (util/exception "greatest: empty tree")
490 |     (null? (-r n)) n
491 |     true           (recur (-r n))))
492 | 
493 | 
494 | (defn node-remove-least
495 |   "Return a tree the same as the one rooted at n, with the node
496 |   containing the minimum key removed. See node-least."
497 |   [n]
498 |   (cond
499 |     (null? n)       (util/exception "remove-least: empty tree")
500 |     (null? (-l n))  (-r n)
501 |     true            (node-stitch (-k n) (-v n)
502 |                       (node-remove-least (-l n)) (-r n))))
503 | 
504 | 
505 | (defn node-remove-greatest
506 |   "Return a tree the same as the one rooted at n, with the node
507 |   containing the maximum key removed. See node-greatest."
508 |   [n]
509 |   (cond
510 |     (null? n)       (util/exception "remove-greatest: empty tree")
511 |     (null? (-r n))  (-l n)
512 |     true            (node-stitch (-k n) (-v n) (-l n)
513 |                       (node-remove-greatest (-r n)))))
514 | 
515 | 
516 | (defn node-concat2
517 |   "Join two trees, the left rooted at l, and the right at r,
518 |   performing a single balancing operation on the resulting tree, if
519 |   needed. Assumes all keys in l are smaller than all keys in r, and
520 |   the relative balance of l and r is such that no more than one rotation
521 |   operation will be required to balance the resulting tree"
522 |   [l r]
523 |   (cond
524 |     (null? l) r
525 |     (null? r) l
526 |     true      (kvlr [k v _ _] (node-least r)
527 |                 (node-stitch k v l (node-remove-least r)))))
528 | 
529 | 
530 | (defn node-concat [n1 n2]
531 |   (cond
532 |     (null? n1) n2
533 |     (null? n2) n1
534 |     true       (let [minimum (node-least n2)]
535 |                  (node-concat3 (-k minimum) (-v minimum) n1
536 |                    (node-remove-least n2)))))
537 | 
538 | 
539 | (defn node-remove [n k]
540 |   (if (null? n)
541 |     (null)
542 |     (kvlr [key val l r] n
543 |       (let [c (xcompare k key)]
544 |         (cond
545 |           (neg? c) (node-stitch key val (node-remove l k) r)
546 |           (pos? c) (node-stitch key val l (node-remove r k))
547 |           true     (node-concat2 l r))))))
548 | 
549 | 
550 | (defn node-find
551 |   "Find k (if exists) in only d comparisons (d is depth of tree)
552 |    rather than the traditional compare/low compare/high which takes on
553 |    avg (* 1.5 (- d 1))"
554 |   [n k]
555 |   (letfn [(srch [this best]
556 |             (cond
557 |               (null? this)            best
558 |               (xcompare< k (-k this)) (recur (-l this) best)
559 |               true                    (recur (-r this) this)))]
560 |     (let [best (srch n nil)]
561 |       (when best
562 |         (when-not (xcompare< (-k best) k)
563 |           best)))))
564 | 
565 | 
566 | (defn node-fold-left
567 |   "Fold-left (reduce) the collection from least to greatest."
568 |   [f base n]
569 |   (loop [e (node-enumerator n) acc base]
570 |     (if (nil? e)
571 |       acc
572 |       (recur (node-enum-rest e)
573 |         (f acc (node-enum-first e))))))
574 | 
575 | 
576 | (defn node-fold-right
577 |   "Fold-right (reduce) the collection from greatest to least."
578 |   [f base n]
579 |   (loop [e (node-enumerator-reverse n) acc base]
580 |     (if (nil? e)
581 |       acc
582 |       (recur (node-enum-prior e)
583 |         (f acc (node-enum-first e))))))
584 | 
585 | 
586 | (defn node-filter [p n]
587 |   (node-fold-left (fn [x y]
588 |                     (if (p y)
589 |                       x
590 |                       (node-remove x (-k y))))
591 |     n n))
592 | 
593 | 
594 | (defn node-invert [n]
595 |   (node-fold-left (fn [acc x]
596 |                     (node-add acc (-v x) (-k x)))
597 |     (null) n))
598 | 
599 | 
600 | (defn node-iter
601 |   "For the side-effect, apply f to each node of the tree rooted at n"
602 |   [n f]
603 |   (if (null? n)
604 |     nil
605 |     (kvlr [_ _ l r] n
606 |       (node-iter l f)
607 |       (f n)
608 |       (node-iter r f))))
609 | 
610 | 
611 | (defn for-all-nodes
612 |   "For the side-effect, apply f to each node of the tree rooted at
613 |   n for which the predicate function returns a logically true value"
614 |   [n p f]
615 |   (node-iter n (fn [n] (when (p n) (f n)))))
616 | 
617 | 
618 | (defn node-split-lesser [n k]
619 |   (cond
620 |     (null? n)            (null)
621 |     (xcompare< k (-k n)) (node-split-lesser (-l n) k)
622 |     (xcompare> k (-k n)) (node-concat3 (-k n) (-v n) (-l n)
623 |                            (node-split-lesser (-r n) k))
624 |     true                 (-l n)))
625 | 
626 | 
627 | (defn node-split-greater [n k]
628 |   (cond
629 |     (null? n)            (null)
630 |     (xcompare> k (-k n)) (node-split-greater (-r n) k)
631 |     (xcompare< k (-k n)) (node-concat3 (-k n) (-v n)
632 |                            (node-split-greater (-l n) k) (-r n))
633 |     true                 (-r n)))
634 | 
635 | 
636 | (defn node-split
637 |   "returns a triple (l present r) where: l is the set of elements of
638 |   n that are < k, r is the set of elements of n that are > k, present
639 |   is false if n contains no element equal to k, or (k v) if n contains
640 |   an element with key equal to k"
641 |   [n k]
642 |   (if (null? n)
643 |     (list nil nil nil)
644 |     (kvlr [ak v l r] n
645 |       (let [c (xcompare k ak)]
646 |         (cond
647 |           (zero? c) [l (list k v) r]
648 |           (neg?  c) (let [[ll pres rl] (node-split l k)]
649 |                       [ll pres (node-concat3 ak v rl r)])
650 |           (pos?  c) (let [[lr pres rr] (node-split r k)]
651 |                       [(node-concat3 ak v l lr) pres rr]))))))
652 | 
653 | 
654 | 
655 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
656 | ;; Fundamental Set Operations
657 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
658 | 
659 | 
660 | (defn node-set-union [n1 n2]
661 |   (cond
662 |     (null? n1) n2
663 |     (null? n2) n1
664 |     true       (kvlr [ak av l r] n2
665 |                  (let [[l1 _ r1] (node-split n1 ak)]
666 |                    (node-concat3 ak av
667 |                      (node-set-union l1 l)
668 |                      (node-set-union r1 r))))))
669 | 
670 | 
671 | 
672 | (defn node-set-intersection [n1 n2]
673 |   (cond
674 |     (null? n1) (null)
675 |     (null? n2) (null)
676 |     true       (kvlr [ak av l r] n2
677 |                  (let [[l1 x r1] (node-split n1 ak)]
678 |                    (if x 
679 |                      (node-concat3 ak av
680 |                        (node-set-intersection l1 l)
681 |                        (node-set-intersection r1 r))
682 |                      (node-concat
683 |                        (node-set-intersection l1 l)
684 |                        (node-set-intersection r1 r)))))))
685 | 
686 | 
687 | 
688 | (defn node-set-difference [n1 n2]
689 |   (cond
690 |     (null? n1) (null)
691 |     (null? n2) n1
692 |     true       (kvlr [ak _ l r] n2
693 |                  (let  [[l1 _ r1] (node-split n1 ak)]
694 |                    (node-concat
695 |                      (node-set-difference l1 l)
696 |                      (node-set-difference r1 r))))))
697 | 
698 | 
699 | 
700 | (defn node-subset? [super sub]
701 |   (letfn [(subset? [n1 n2]
702 |             (or (null? n1)
703 |               (and (<= (node-size n1) (node-size n2))
704 |                 (kvlr [k1 _ l1 r1] n1
705 |                   (kvlr [k2 _ l2 r2] n2
706 |                     (let [c (xcompare k1 k2)]
707 |                       (cond
708 |                         (neg? c) (and
709 |                                    (subset?   l1 l2)
710 |                                    (node-find n2 k1)
711 |                                    (subset?   r1 n2))
712 |                         (pos? c) (and
713 |                                    (subset?   r1 r2)
714 |                                    (node-find n2 k1)
715 |                                    (subset?   l1 n2))
716 |                         true     (and
717 |                                    (subset? l1 l2)
718 |                                    (subset? r1 r2)))))))))]
719 |     (let [res (or (null? sub) (subset? sub super))]
720 |       (if res true false))))
721 | 
722 | 
723 | 
724 | (defn node-set-compare 
725 |   "return 3-way ordinal comparison of the trees n1 and n2 with the following
726 |   return-value semantics:
727 | 
728 |    -1  -> n1 is LESS-THAN    n2
729 |     0  -> n1 is EQAL-TO      n2
730 |    +1  -> n1 is GREATER-THAN n2"
731 |   [n1 n2] 
732 |   (loop [e1 (node-enumerator n1 nil)
733 |          e2 (node-enumerator n2 nil)]      
734 |     (cond
735 |       (and (nil? e1) (nil? e2))  0
736 |       (nil? e1)                 -1
737 |       (nil? e2)                  1
738 |       true                       (let [[v1 r1 ee1] e1
739 |                                        [v2 r2 ee2] e2
740 |                                        c (xcompare (-k v1) (-k v2))]
741 |                                    (if-not (zero? c)
742 |                                      c
743 |                                      (recur
744 |                                        (node-enumerator r1 ee1)
745 |                                        (node-enumerator r2 ee2)))))))
746 |                                      
747 | 
748 | 
749 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
750 | ;; Fundamental Map Operations
751 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
752 | 
753 | 
754 | (defn node-merge 
755 |   "Merge two maps in worst case linear time"
756 |   [n1 n2 merge-fn]
757 |   (cond
758 |     (null? n1) n2
759 |     (null? n2) n1
760 |     true       (kvlr [ak av l r] n2
761 |                  (let [[l1 x r1] (node-split n1 ak)
762 |                        val       (if x
763 |                                    (merge-fn ak av (-v x))
764 |                                    av)]                      
765 |                    (node-concat3 ak val
766 |                      (node-merge l1 l)
767 |                      (node-merge r1 r))))))
768 | 
769 | 
770 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
771 | ;; Fundamental Vector Operations
772 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
773 | 
774 | 
775 | (defn node-nth 
776 |   "Return nth node from the beginning of the ordered tree rooted at n."
777 |   [n index]
778 |   (letfn [(srch [n index]
779 |             (kvlr [_ _ l r] n
780 |               (let [lsize (node-size l)]
781 |                 (cond
782 |                   (< index lsize) (recur l index)
783 |                   (> index lsize) (recur r (- index (inc lsize)))
784 |                   true            n))))]
785 |     (if-not (and (<= 0 index) (< index (node-size n)))
786 |       (util/exception "index out of range")
787 |       (srch n index))))
788 | 
789 | 
790 | (defn node-rank
791 |   "Return the rank (sequential position) of a given KEY within the
792 |   ordered tree rooted at n."
793 |   ([n k] (node-rank n k 0))
794 |   ([n k rank]
795 |      (cond
796 |        (null? n)            nil
797 |        (xcompare< k (-k n)) (recur (-l n) k rank)
798 |        (xcompare> k (-k n)) (recur (-r n) k
799 |                               (+ 1 rank (node-size (-l n))))
800 |        true                 (+ rank (node-size (-l n))))))
801 |   
802 |     
803 | (defn node-vec
804 |   "Return a (lon-lazy) vector of all nodes in tree rooted at n in
805 |   the order they occur."
806 |   [n]
807 |   (node-fold-left conj [] n))
808 | 
809 |     
810 | (defn node-vec-reverse
811 |   "Return a (lon-lazy) vector of all nodes in tree rooted at n in
812 |   reverse order."
813 |   [n]
814 |   (node-fold-right conj [] n))
815 | 
816 | 
817 | (defn node-key-vec
818 |   "Return a (lon-lazy) vector of all keys in tree rooted at n
819 |   in the order they occur."
820 |   [n]
821 |   (node-fold-left #(conj %1 (-k %2)) [] n))
822 | 
823 | 
824 | (defn node-key-vec-reverse
825 |   "Return a (lon-lazy) vector of all keys in tree rooted at n
826 |   in reverse-order."
827 |   [n]
828 |   (node-fold-right #(conj %1 (-k %2)) [] n))
829 | 
830 | 
831 | (defn node-value-vec
832 |   "Return a (lon-lazy) vector of all values in tree rooted at n
833 |   in the order they occur."
834 |   [n]
835 |   (node-fold-left #(conj %1 (-v %2)) [] n))
836 | 
837 | 
838 | (defn node-value-vec-reverse
839 |   "Return a (lon-lazy) vector of all values in tree rooted at n
840 |   in reverse-order."
841 |   [n]
842 |   (node-fold-right #(conj %1 (-v %2)) [] n))
843 | 
844 | 
845 | 
846 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
847 | ;; Fundamental Seq Operations
848 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
849 | 
850 | 
851 | (defn node-seq
852 |   "Return a (lazy) seq of nodes in tree rooted at n in the order they occur."
853 |   [n]
854 |   (letfn [(node-enum-seq [enum]
855 |             (lazy-seq
856 |               (when-not (nil? enum)
857 |                 (cons (node-enum-first enum)
858 |                   (node-enum-seq (node-enum-rest enum))))))]
859 |     (node-enum-seq (node-enumerator n))))
860 | 
861 | 
862 | (defn node-seq-reverse
863 |   "Return a (lazy) seq of nodes in tree rooted at n in reverse order."
864 |   [n]
865 |   (letfn [(node-enum-seq-reverse [enum]
866 |             (lazy-seq
867 |               (when-not (nil? enum)
868 |                 (cons (node-enum-first enum)
869 |                   (node-enum-seq-reverse (node-enum-prior enum))))))] 
870 |     (node-enum-seq-reverse (node-enumerator-reverse n))))
871 | 
872 | 
873 | 
874 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
875 | ;; Misc
876 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
877 | 
878 | 
879 | (defn- make-integer-tree
880 |   ([size]           (reduce node-add (null) (shuffle (range size))))
881 |   ([start end]      (reduce node-add (null) (shuffle (range start end))))
882 |   ([start end step] (reduce node-add (null) (shuffle (range start end step)))))
883 | 
884 | 
885 | (defn- make-integer-set
886 |   ([size]           (reduce conj #{} (range size)))
887 |   ([start end]      (reduce conj #{} (range start end)))
888 |   ([start end step] (reduce conj #{} (range start end step))))
889 | 
890 | 
891 | (defn- make-integer-sorted-set
892 |   ([size]           (reduce conj (sorted-set) (shuffle (range size))))
893 |   ([start end]      (reduce conj (sorted-set) (shuffle (range start end))))
894 |   ([start end step] (reduce conj (sorted-set) (shuffle (range start end step)))))
895 | 
896 | 
897 | (set! *warn-on-reflection* false)
898 | 
899 | 
900 | ;; (def i10 (make-integer-tree 10))
901 | ;; (def i30 (make-integer-tree 20 30))
902 | ;; (def i   (make-integer-tree 30))
903 | 
904 | ;; (map -k (node-seq i))
905 | ;;   => (0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
906 | ;;       25 26 27 28 29)
907 | 
908 | ;; (map -k (node-seq-reverse i))
909 | ;;   => (29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8
910 | ;;       7 6 5 4 3 2 1 0)
911 | 
912 | 
913 | 
914 | ;; (map -k (node-seq (node-filter (comp even? -k) i)))
915 | ;;   => (0 2 4 6 8 10 12 14 16 18 20 22 24 26 28)
916 | 
917 | ;; (def m (reduce #(apply node-add %1 %2) nil
918 | ;;          [[1 :a][2 :b][3 :c][4 :d][5 :e]]))
919 | 
920 | ;; (map -k (node-seq (node-invert m)))
921 | ;;   => (:a :b :c :d :e)
922 | 
923 | 
924 | ;; (def a (make-integer-tree 0 100))
925 | ;; (def b (make-integer-tree 0 100 2))
926 | ;; (def c (make-integer-tree 0 100 3))
927 | ;; (def d (make-integer-tree 0 100 4))
928 | ;; (def e (make-integer-tree 0 100 5))
929 | 
930 | ;; (map -k (node-seq (node-set-intersection b c)))
931 | ;;   => (0 6 12 18 24 30 36 42 48 54 60 66 72 78 84 90 96)
932 | 
933 | ;; (map -k (node-seq (node-set-union b c)))
934 | ;;   => (0 2 3 4 6 8 9 10 12 14 15 16 18 20 21 22 24 26 27 28 30 32 33 34 36 38
935 | ;;       39 40 42 44 45 46 48 50 51 52 54 56 57 58 60 62 63 64 66 68 69 70 72
936 | ;;       74 75 76 78 80 81 82 84 86 87 88 90 92 93 94 96 98 99)
937 | 
938 | ;; (map -k (node-seq (node-set-difference b c)))
939 | ;;   => (2 4 8 10 14 16 20 22 26 28 32 34 38 40 44 46 50 52 56 58 62 64 68 70
940 | ;;       74 76 80 82 86 88 92 94 98)
941 | 
942 | ;; (map -k (node-seq (node-set-difference c b)))
943 | ;;   => (3 9 15 21 27 33 39 45 51 57 63 69 75 81 87 93 99)
944 | 
945 | 
946 | 
947 | 
948 | ;; (util/avg-timing 10 (class (make-integer-sorted-set 100000)))
949 | ;; 131.3212
950 | 
951 | 
952 | ;; (util/avg-timing 10 (class (make-integer-tree 100000)))
953 | ;; 148.5932
954 | 
955 | 
956 | 


--------------------------------------------------------------------------------
/src/wbtree/types.clj:
--------------------------------------------------------------------------------
  1 | (ns wbtree.types
  2 |   (:use     [clj-tuple])
  3 |   (:require [wbtree.tree :as tree])
  4 |   (:require [wbtree.util :as util]))
  5 | 
  6 | 
  7 | 
  8 | 
  9 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 10 | ;; Common Interface
 11 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 12 | 
 13 | (definterface IOrderedCollection
 14 |   (getRoot          []))
 15 | 
 16 | 
 17 | 
 18 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 19 | ;; Ordered Set Collection
 20 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 21 | 
 22 | 
 23 | (deftype OrderedSet [root _meta]
 24 | 
 25 |   IOrderedCollection
 26 |   (getRoot [_]
 27 |     root)
 28 | 
 29 |   clojure.lang.IMeta
 30 |   (meta [_]
 31 |     _meta)
 32 | 
 33 |   clojure.lang.IObj
 34 |   (withMeta [_ m]
 35 |     (new OrderedSet root m))
 36 |   
 37 |   clojure.lang.Indexed
 38 |   (nth [_ i]
 39 |     (tree/-k
 40 |       (tree/node-nth root i)))
 41 | 
 42 |   clojure.lang.Seqable
 43 |   (seq [_]
 44 |     (map tree/-k (tree/node-seq root)))
 45 | 
 46 |   clojure.lang.Reversible
 47 |   (rseq [_]
 48 |     (map tree/-k (tree/node-seq-reverse root)))
 49 | 
 50 |   clojure.lang.ILookup
 51 |   (valAt [_ k not-found]
 52 |     (if-let [found (tree/node-find root k)]
 53 |       (tree/-k found)
 54 |       not-found))
 55 |   (valAt [this k]
 56 |     (.valAt this k nil))
 57 | 
 58 |   clojure.lang.IFn
 59 |   (invoke [this k not-found]
 60 |     (.valAt this k not-found))
 61 |   (invoke [this k]
 62 |     (.valAt this k nil))
 63 | 
 64 |   java.lang.Comparable
 65 |   (compareTo [this o]
 66 |     (if (identical? this o)
 67 |       0
 68 |       (if (instance? IOrderedCollection o)                      
 69 |         (tree/node-set-compare root (.getRoot o))
 70 |         (util/exception "unsupported comparison: " this o))))
 71 | 
 72 |   java.util.Collection
 73 |   (toArray [_]
 74 |     (object-array (tree/node-key-vec root)))
 75 |   (add [_ o]
 76 |     (util/exception UnsupportedOperationException))
 77 |   (addAll [_ o]
 78 |     (util/exception UnsupportedOperationException))
 79 |   (remove [_ o]
 80 |     (util/exception UnsupportedOperationException))
 81 |   (removeAll [_ o]
 82 |     (util/exception UnsupportedOperationException))
 83 |   (retainAll [_ o]
 84 |     (util/exception UnsupportedOperationException))
 85 | 
 86 |   java.util.Set
 87 |   (size [_]
 88 |     (tree/node-size root))
 89 |   (isEmpty [_]
 90 |     (tree/null? root))
 91 |   (iterator [this]
 92 |     (clojure.lang.SeqIterator. (seq this)))
 93 |   (containsAll [this s]
 94 |     (every? #(.contains this %) s))
 95 | 
 96 |   
 97 |   clojure.lang.IPersistentSet
 98 |   (equiv [this o]
 99 |     (if (identical? this o)
100 |       0
101 |       (if (instance? IOrderedCollection o)                      
102 |         (zero? (tree/node-set-compare root (.getRoot o)))
103 |         (if (set? o)
104 |           (zero? (.equiv (set (tree/node-key-vec root)) o))
105 |           (util/exception "unsupported comparison: " this o)))))
106 |   (count [_]
107 |     (tree/node-size root))
108 |   (empty [_]
109 |     (new OrderedSet (tree/null) {}))
110 |   (contains [_ k]
111 |     (if (tree/node-find root k)
112 |       true
113 |       false))
114 |   (disjoin [this k]
115 |     (new OrderedSet (tree/node-remove root k) _meta))
116 |   (cons [this k]
117 |     (new OrderedSet (tree/node-add root k) _meta))
118 |   )
119 | 
120 | 
121 | (defmethod print-method OrderedSet [s w]
122 |   ((get (methods print-method) clojure.lang.IPersistentSet) s w))
123 | 
124 | 
125 | (defn ordered-set
126 |   ([]
127 |      (ordered-set []))
128 |   ([coll]
129 |      (->OrderedSet (reduce tree/node-add (tree/null) coll) {})))
130 | 
131 | 
132 | 
133 | 
134 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
135 | ;; Ordered Map Collection
136 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
137 | 
138 | (defn pair [k v]
139 |   (new clojure.lang.MapEntry k v))
140 | 
141 | 
142 | (deftype OrderedMap [root _meta]
143 | 
144 |   IOrderedCollection
145 |   (getRoot [_]
146 |     root)
147 | 
148 |   clojure.lang.IMeta
149 |   (meta [_]
150 |     _meta)
151 | 
152 |   clojure.lang.IObj
153 |   (withMeta [_ m]
154 |     (new OrderedMap root m))
155 |   
156 |   clojure.lang.Indexed
157 |   (nth [_ i]
158 |     (tree/-kv
159 |       (tree/node-nth root i)))
160 |   
161 |   clojure.lang.MapEquivalence
162 | 
163 |   clojure.lang.Counted
164 |   (count [this]
165 |     (tree/node-size root))
166 |   
167 |   clojure.lang.Seqable
168 |   (seq [_]
169 |     (map tree/-kv (tree/node-seq root)))
170 | 
171 |   clojure.lang.Reversible
172 |   (rseq [_]
173 |     (map tree/-kv (tree/node-seq-reverse root)))
174 | 
175 |   clojure.lang.ILookup
176 |   (valAt [_ k not-found]
177 |     (if-let [found (tree/node-find root k)]
178 |       (tree/-v found)
179 |       not-found))
180 |   (valAt [this k]
181 |     (.valAt this k nil))
182 | 
183 |   clojure.lang.IFn
184 |   (invoke [this k not-found]
185 |     (.valAt this k not-found))
186 |   (invoke [this k]
187 |     (.valAt this k nil))
188 | 
189 |   java.lang.Comparable
190 |   (compareTo [this o]
191 |     (if (identical? this o)
192 |       0
193 |       (if (instance? IOrderedCollection o)                      
194 |         (tree/node-set-compare root (.getRoot o))
195 |         (util/exception "unsupported comparison: " this o))))
196 | 
197 |   clojure.lang.Associative
198 |   (containsKey [_ k]
199 |     (not (nil? (tree/node-find root k))))
200 |   (entryAt [_ k]
201 |     (when-let [x (tree/node-find root k)]
202 |       (tree/-kv x)))
203 |   (assoc [_ k v]
204 |     (new OrderedMap (tree/node-add root k v) _meta))
205 |   (empty [this]
206 |     (new OrderedMap (tree/null) {}))
207 | 
208 |   java.util.Map
209 |   (get [this k]
210 |     (.valAt this k))
211 |   (isEmpty [_]
212 |     (tree/null? root))
213 |   (size [_]
214 |     (tree/node-size root))
215 |   (keySet [_]
216 |     (set (tree/node-key-vec root)))
217 |   (put [_ _ _]
218 |     (throw (UnsupportedOperationException.)))
219 |   (putAll [_ _]
220 |     (throw (UnsupportedOperationException.)))
221 |   (clear [_]
222 |     (throw (UnsupportedOperationException.)))
223 |   (values [_]
224 |     (map tree/-v (tree/node-seq root)))
225 |   (entrySet [this]
226 |     (set (seq this)))
227 |   (iterator [this]
228 |     (clojure.lang.SeqIterator. (seq this)))
229 | 
230 |   clojure.lang.IPersistentCollection
231 |   (equiv [this x]
232 |     (and (map? x) (= x (into {} this))))
233 | 
234 |   (cons [this o]
235 |     (if (map? o)
236 |       (reduce #(apply assoc %1 %2) this o)
237 |       (.assoc this (nth o 0) (nth o 1))))
238 |   
239 |   clojure.lang.IPersistentMap
240 |   (assocEx [this k v]
241 |     (if (contains? this k)
242 |       (throw (Exception. "Key or value already present"))
243 |       (assoc this k v)))
244 |   (without [_ k]
245 |     (new OrderedMap (tree/node-remove root k) _meta)))
246 | 
247 | 
248 | 
249 | 
250 | (defmethod print-method OrderedMap [m w]
251 |   ((get (methods print-method) clojure.lang.IPersistentMap) m w))
252 | 
253 | 
254 | (defn ordered-map
255 |   ([]
256 |      (ordered-map []))
257 |   ([coll]
258 |      (->OrderedMap (reduce (fn [acc [k v]]
259 |                              (tree/node-add acc k v))
260 |                      (tree/null) (seq coll))
261 |        {})))
262 | 
263 | 
264 | 
265 | 
266 | ;; (ordered-map)
267 | ;;  => {}
268 | 
269 | ;; (seq (ordered-map [[:b "b"] [:c "c"] [:a "a"] [:d "d"]]))
270 | ;;  => ([:a "a"] [:b "b"] [:c "c"] [:d "d"])
271 | 
272 | ;; (ordered-map {:a "a", :b "b", :c "c", :d "d"})
273 | ;;  => {:a "a", :b "b", :c "c", :d "d"}
274 | 
275 | ;; (-> (ordered-map) (assoc :b "b") (assoc :a "a") (assoc :c "c"))
276 | ;;  => {:a "a", :b "b", :c "c"}
277 | 
278 | ;; ((ordered-map {:a "a", :b "b", :c "c", :d "d"}) :c)
279 | ;;  => "c"
280 | 
281 | ;; ((ordered-map {:a "a", :b "b", :c "c", :d "d"}) :z ::not-found)
282 | ;;  => :wbtree.types/not-found
283 | 


--------------------------------------------------------------------------------
/src/wbtree/util.clj:
--------------------------------------------------------------------------------
  1 | (ns wbtree.util
  2 |   (:require [clojure.pprint :as pp])
  3 |   (:require [clojure.repl]))
  4 | 
  5 | 
  6 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  7 | ;; Control Flow
  8 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  9 |   
 10 | (defmacro returning
 11 |   "Compute a return value, then execute other forms for side effects.
 12 |   Like prog1 in common lisp, or a (do) that returns the first form."
 13 |   [value & forms]
 14 |   `(let [value# ~value]
 15 |      ~@forms
 16 |      value#))
 17 | 
 18 | (defmacro returning-bind 
 19 |   "Compute a return value, bind that value to provided sym, then
 20 |   execute other forms for side effects within the lexical scope of
 21 |   that binding.  The return value of a returning-bind block will be
 22 |   the value computed by retn-form.  Similar in concept to Paul
 23 |   Graham's APROG1, or what is commonly found in CL libraries as
 24 |   PROG1-BIND.  This macro is especially handy when one needs to
 25 |   interact with stateful resources such as io.
 26 | 
 27 |   Example:
 28 | 
 29 |     (returning-bind [x (inc 41)]
 30 |       (println :returning x)
 31 |       (println 3.141592654))
 32 | 
 33 |   PRINTS:   :returning 42
 34 |             3.141592654
 35 |   RETURNS:  42"
 36 |   [[sym retn-form] & body]
 37 |       `(let [val# ~retn-form
 38 |              ~sym val#]
 39 |          ~@body
 40 |          val#))
 41 | 
 42 | 
 43 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 44 | ;; Collections
 45 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 46 | 
 47 | (defn indexed
 48 |   "Returns a lazy sequence of [index, item] pairs, where items come
 49 |   from 's' and indexes count up from zero.
 50 |   (indexed '(a b c d))  =>  ([0 a] [1 b] [2 c] [3 d])"
 51 |   [s]
 52 |   (map vector (range) s))
 53 | 
 54 | (defn positions
 55 |   "Returns a lazy sequence containing the positions at which pred
 56 |    is true for items in coll."
 57 |   [pred coll]
 58 |   (for [[idx elt] (indexed coll) :when (pred elt)] idx))
 59 | 
 60 | (defn split-vec
 61 |   "Split the given vector at the provided offsets using subvec. Supports
 62 |   negative offsets."
 63 |   [v & ns]
 64 |   (let [ns (map #(if (neg? %) (+ % (count v)) %) ns)]
 65 |     (lazy-seq
 66 |      (if-let [n (first ns)]
 67 |        (cons (subvec v 0 n)
 68 |              (apply split-vec
 69 |                     (subvec v n)
 70 |                     (map #(- % n) (rest ns))))
 71 |        (list v)))))
 72 | 
 73 | (defn knit
 74 |   "Takes a list of functions (f1 f2 ... fn) and returns a new function F.
 75 |   F takes a collection of size n (x1 x2 ... xn) and returns a vector
 76 |       [(f1 x1) (f2 x2) ... (fn xn)]."
 77 |   [& fs]
 78 |   (fn [arg-coll] split-vec    (vec (map #(% %2) fs arg-coll))))
 79 | 
 80 | (defn rmerge
 81 |   "Recursive merge of the provided maps."
 82 |   [& maps]
 83 |   (if (every? map? maps)
 84 |     (apply merge-with rmerge maps)
 85 |     (last maps)))
 86 | 
 87 | (defn mappend
 88 |   "maps elements in list and finally appends all resulted lists."
 89 |   [f & seqs]
 90 |   (apply concat (apply map f seqs)))
 91 | 
 92 | 
 93 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 94 | ;; Namespace Introspection
 95 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 96 | 
 97 | (defn ns-docs
 98 |   "Prints docs for all public symbols in given namespace
 99 |    http://blog.twonegatives.com/post/42435179639/ns-docs
100 |    https://gist.github.com/timvisher/4728530"
101 |   [ns-symbol]
102 |   (dorun 
103 |    (map (comp #'clojure.repl/print-doc meta)
104 |         (->> ns-symbol 
105 |              ns-publics 
106 |              sort 
107 |              vals))))
108 | 
109 | (defn symbolic-name-from-var
110 |   [var]
111 |   (clojure.string/join "/" ((juxt (comp str :ns) :name) (meta var))))
112 | 
113 | 
114 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
115 | ;; Timing and Performance Metric
116 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
117 | 
118 | 
119 | (defmacro with-timing [& body]
120 |   `(let [start# (System/nanoTime)  ret# ~(cons 'do body)]
121 |      [ret# (/ (double (- (System/nanoTime) start#)) 1000000.0)]))
122 | 
123 | (defmacro avg-timing [iter & body]
124 |   `(let [iter# ~iter]
125 |       (/ (reduce +
126 |             (map second
127 |               (repeatedly iter#
128 |                  #(let [start# (System/nanoTime) ret# ~(cons 'do body)]
129 |                     [ret# (/ (double (- (System/nanoTime) start#))
130 |                             1000000.0)]))))
131 |            iter#)))
132 | 
133 | (defmacro run-and-measure-timing [expr]
134 |   `(let [start-time# (System/currentTimeMillis)
135 |          response# ~expr
136 |          end-time# (System/currentTimeMillis)]
137 |      {:time-taken (- end-time# start-time#)
138 |       :response response#
139 |       :start-time start-time#
140 |       :end-time end-time#}))
141 | 
142 | 
143 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
144 | ;; Debugging
145 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
146 | 
147 | (defmacro wrap-fn [name args & body]
148 |   `(let [old-fn# (var-get (var ~name))
149 |          new-fn# (fn [& p#] 
150 |                    (let [~args p#] 
151 |                      (do ~@body)))
152 |          wrapper# (fn [& params#]
153 |                     (if (= ~(count args) (count params#))
154 |                       (apply new-fn# params#)
155 |                       (apply old-fn# params#)))] 
156 |      (alter-var-root (var ~name) (constantly wrapper#))))
157 | 
158 | 
159 | (defmacro ppmx [form]
160 |   `(do
161 |      (pp/cl-format *out*  ";;; Macroexpansion:~%~% ~S~%~%;;; First Step~%~%"
162 |        '~form)
163 |      (pp/pprint (macroexpand-1 '~form))
164 |      (pp/cl-format *out*  "~%;;; Full expansion:~%~%")
165 |      (pp/pprint (macroexpand '~form))
166 |      (println "")))
167 | 
168 | 
169 | ;;;
170 | ;;; From: The Joy of Clojure
171 | ;;;
172 | 
173 | (defn contextual-eval [ctx expr]
174 |   (eval
175 |     `(let [~@(mapcat (fn [[k v]] [k `'~v]) ctx)]
176 |        ~expr)))
177 | 
178 | (defmacro local-context []
179 |   (let [symbols (keys &env)]
180 |     (zipmap (map (fn [sym] `(quote ~sym)) symbols) symbols)))
181 | 
182 | (defn readr [prompt exit-code]
183 |   (let [input (clojure.main/repl-read prompt exit-code)]
184 |     (if (= input ::r)
185 |       exit-code
186 |       input)))
187 | 
188 | (defmacro break []
189 |   `(clojure.main/repl
190 |      :prompt #(print "debug=> ")
191 |      :read   readr
192 |      :eval   (partial contextual-eval (local-context))))
193 | 
194 | 
195 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
196 | ;; IO
197 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
198 | 
199 | (defn lines-of-file [file-name]
200 |   (line-seq
201 |     (java.io.BufferedReader.
202 |       (java.io.InputStreamReader.
203 |         (java.io.FileInputStream. file-name)))))
204 | 
205 | 
206 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
207 | ;; Exceptions
208 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
209 | 
210 | 
211 | (defmacro exception [& [param & more :as params]] 
212 |   (if (class? param) 
213 |     `(throw (new ~param (str ~@(interpose " " more)))) 
214 |     `(throw (Exception. (str ~@(interpose " " params))))))
215 | 
216 | 
217 | (defmacro ignore-exceptions [& body]
218 |   `(try
219 |      ~@body
220 |      (catch Exception e# nil)))
221 | 
222 | 
223 | 
224 | 
225 | 
226 | ;; (defn to-byte-array [x]
227 | ;;   (let [baos (ByteArrayOutputStream.)
228 | ;;         oos (ObjectOutputStream. baos)]
229 | ;;     (pr oos x)
230 | ;;     (.close oos)
231 | ;;     (.toByteArray baos)))  
232 | 


--------------------------------------------------------------------------------
/test/wbtree/clojure_set.txt:
--------------------------------------------------------------------------------
  1 | ;   Copyright (c) Rich Hickey. All rights reserved.
  2 | ;   The use and distribution terms for this software are covered by the
  3 | ;   Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
  4 | ;   which can be found in the file epl-v10.html at the root of this distribution.
  5 | ;   By using this software in any fashion, you are agreeing to be bound by
  6 | ;   the terms of this license.
  7 | ;   You must not remove this notice, or any other, from this software.
  8 | 
  9 | ;; Author: Frantisek Sodomka
 10 | 
 11 | 
 12 | (ns clojure.test-clojure.clojure-set
 13 |   (:use clojure.test)
 14 |   (:require [clojure.set :as set]))
 15 | 
 16 | (deftest test-union
 17 |   (are [x y] (= x y)
 18 |       (set/union) #{}
 19 | 
 20 |       ; identity
 21 |       (set/union #{}) #{}
 22 |       (set/union #{1}) #{1}
 23 |       (set/union #{1 2 3}) #{1 2 3}
 24 | 
 25 |       ; 2 sets, at least one is empty
 26 |       (set/union #{} #{}) #{}
 27 |       (set/union #{} #{1}) #{1}
 28 |       (set/union #{} #{1 2 3}) #{1 2 3}
 29 |       (set/union #{1} #{}) #{1}
 30 |       (set/union #{1 2 3} #{}) #{1 2 3}
 31 | 
 32 |       ; 2 sets
 33 |       (set/union #{1} #{2}) #{1 2}
 34 |       (set/union #{1} #{1 2}) #{1 2}
 35 |       (set/union #{2} #{1 2}) #{1 2}
 36 |       (set/union #{1 2} #{3}) #{1 2 3}
 37 |       (set/union #{1 2} #{2 3}) #{1 2 3}
 38 | 
 39 |       ; 3 sets, some are empty
 40 |       (set/union #{} #{} #{}) #{}
 41 |       (set/union #{1} #{} #{}) #{1}
 42 |       (set/union #{} #{1} #{}) #{1}
 43 |       (set/union #{} #{} #{1}) #{1}
 44 |       (set/union #{1 2} #{2 3} #{}) #{1 2 3}
 45 | 
 46 |       ; 3 sets
 47 |       (set/union #{1 2} #{3 4} #{5 6}) #{1 2 3 4 5 6}
 48 |       (set/union #{1 2} #{2 3} #{1 3 4}) #{1 2 3 4}
 49 | 
 50 |       ; different data types
 51 |       (set/union #{1 2} #{:a :b} #{nil} #{false true} #{\c "abc"} #{[] [1 2]}
 52 |         #{{} {:a 1}} #{#{} #{1 2}})
 53 |           #{1 2 :a :b nil false true \c "abc" [] [1 2] {} {:a 1} #{} #{1 2}}
 54 | 
 55 |       ; different types of sets
 56 |       (set/union (hash-set) (hash-set 1 2) (hash-set 2 3))
 57 |           (hash-set 1 2 3)
 58 |       (set/union (sorted-set) (sorted-set 1 2) (sorted-set 2 3))
 59 |           (sorted-set 1 2 3)
 60 |       (set/union (hash-set) (hash-set 1 2) (hash-set 2 3)
 61 |         (sorted-set) (sorted-set 4 5) (sorted-set 5 6))
 62 |           (hash-set 1 2 3 4 5 6)  ; also equals (sorted-set 1 2 3 4 5 6)
 63 | ))
 64 | 
 65 | (deftest test-intersection
 66 |   ; at least one argument is needed
 67 |   (is (thrown? IllegalArgumentException (set/intersection)))
 68 |   
 69 |   (are [x y] (= x y)
 70 |       ; identity
 71 |       (set/intersection #{}) #{}
 72 |       (set/intersection #{1}) #{1}
 73 |       (set/intersection #{1 2 3}) #{1 2 3}
 74 |       
 75 |       ; 2 sets, at least one is empty
 76 |       (set/intersection #{} #{}) #{}
 77 |       (set/intersection #{} #{1}) #{}
 78 |       (set/intersection #{} #{1 2 3}) #{}
 79 |       (set/intersection #{1} #{}) #{}
 80 |       (set/intersection #{1 2 3} #{}) #{}
 81 | 
 82 |       ; 2 sets
 83 |       (set/intersection #{1 2} #{1 2}) #{1 2}
 84 |       (set/intersection #{1 2} #{3 4}) #{}
 85 |       (set/intersection #{1 2} #{1}) #{1}
 86 |       (set/intersection #{1 2} #{2}) #{2}
 87 |       (set/intersection #{1 2 4} #{2 3 4 5}) #{2 4}
 88 | 
 89 |       ; 3 sets, some are empty
 90 |       (set/intersection #{} #{} #{}) #{}
 91 |       (set/intersection #{1} #{} #{}) #{}
 92 |       (set/intersection #{1} #{1} #{}) #{}
 93 |       (set/intersection #{1} #{} #{1}) #{}
 94 |       (set/intersection #{1 2} #{2 3} #{}) #{}
 95 | 
 96 |       ; 3 sets
 97 |       (set/intersection #{1 2} #{2 3} #{5 2}) #{2}
 98 |       (set/intersection #{1 2 3} #{1 3 4} #{1 3}) #{1 3}
 99 |       (set/intersection #{1 2 3} #{3 4 5} #{8 2 3}) #{3}
100 | 
101 |       ; different types of sets
102 |       (set/intersection (hash-set 1 2) (hash-set 2 3)) #{2}
103 |       (set/intersection (sorted-set 1 2) (sorted-set 2 3)) #{2}
104 |       (set/intersection
105 |         (hash-set 1 2) (hash-set 2 3)
106 |         (sorted-set 1 2) (sorted-set 2 3)) #{2} ))
107 | 
108 | (deftest test-difference
109 |   (are [x y] (= x y)
110 |       ; identity
111 |       (set/difference #{}) #{}
112 |       (set/difference #{1}) #{1}
113 |       (set/difference #{1 2 3}) #{1 2 3}
114 | 
115 |       ; 2 sets
116 |       (set/difference #{1 2} #{1 2}) #{}
117 |       (set/difference #{1 2} #{3 4}) #{1 2}
118 |       (set/difference #{1 2} #{1}) #{2}
119 |       (set/difference #{1 2} #{2}) #{1}
120 |       (set/difference #{1 2 4} #{2 3 4 5}) #{1}
121 | 
122 |        ; 3 sets
123 |       (set/difference #{1 2} #{2 3} #{5 2}) #{1}
124 |       (set/difference #{1 2 3} #{1 3 4} #{1 3}) #{2}
125 |       (set/difference #{1 2 3} #{3 4 5} #{8 2 3}) #{1} ))
126 | 
127 | (deftest test-select
128 |   (are [x y] (= x y)
129 |     (set/select integer? #{}) #{}
130 |     (set/select integer? #{1 2}) #{1 2}
131 |     (set/select integer? #{1 2 :a :b :c}) #{1 2}
132 |     (set/select integer? #{:a :b :c}) #{}) )
133 | 
134 | (def compositions
135 |   #{{:name "Art of the Fugue" :composer "J. S. Bach"}
136 |     {:name "Musical Offering" :composer "J. S. Bach"}
137 |     {:name "Requiem" :composer "Giuseppe Verdi"}
138 |     {:name "Requiem" :composer "W. A. Mozart"}})
139 | 
140 | (deftest test-project
141 |   (are [x y] (= x y)
142 |     (set/project compositions [:name]) #{{:name "Art of the Fugue"}
143 |                                          {:name "Requiem"}
144 |                                          {:name "Musical Offering"}}
145 |     (set/project compositions [:composer]) #{{:composer "W. A. Mozart"}
146 |                                              {:composer "Giuseppe Verdi"}
147 |                                              {:composer "J. S. Bach"}}
148 |     (set/project compositions [:year]) #{{}}
149 |     (set/project #{{}} [:name]) #{{}} ))
150 | 
151 | (deftest test-rename
152 |   (are [x y] (= x y)
153 |     (set/rename compositions {:name :title}) #{{:title "Art of the Fugue" :composer "J. S. Bach"}
154 |                                                {:title "Musical Offering" :composer "J. S. Bach"}
155 |                                                {:title "Requiem" :composer "Giuseppe Verdi"}
156 |                                                {:title "Requiem" :composer "W. A. Mozart"}}
157 |     (set/rename compositions {:year :decade}) #{{:name "Art of the Fugue" :composer "J. S. Bach"}
158 |                                                 {:name "Musical Offering" :composer "J. S. Bach"}
159 |                                                 {:name "Requiem" :composer "Giuseppe Verdi"}
160 |                                                 {:name "Requiem" :composer "W. A. Mozart"}}
161 |     (set/rename #{{}} {:year :decade}) #{{}}))
162 | 
163 | (deftest test-rename-keys
164 |   (are [x y] (= x y)
165 |        (set/rename-keys {:a "one" :b "two"} {:a :z}) {:z "one" :b "two"}
166 |        (set/rename-keys {:a "one" :b "two"} {:a :z :c :y}) {:z "one" :b "two"}
167 |        (set/rename-keys {:a "one" :b "two" :c "three"} {:a :b :b :a}) {:a "two" :b "one" :c "three"}))
168 | 
169 | (deftest test-index
170 |   (are [x y] (= x y)
171 |     (set/index  #{{:c 2} {:b 1} {:a 1 :b 2}} [:b]) {{:b 2} #{{:a 1 :b 2}}, {:b 1} #{{:b 1}} {} #{{:c 2}}}
172 |   ))
173 | 
174 | (deftest test-join
175 |   (are [x y] (= x y)
176 |     (set/join compositions compositions) compositions
177 |     (set/join compositions #{{:name "Art of the Fugue" :genre "Classical"}})
178 |                            #{{:name "Art of the Fugue" :composer "J. S. Bach" :genre "Classical"}}
179 |     ))
180 | 
181 | (deftest test-map-invert
182 |   (are [x y] (= x y)
183 |        (set/map-invert {:a "one" :b "two"}) {"one" :a "two" :b}))
184 | 
185 | (deftest test-subset?
186 |   (are [sub super] (set/subset? sub super)
187 |        #{} #{}
188 |        #{} #{1}
189 |        #{1} #{1}
190 |        #{1 2} #{1 2}
191 |        #{1 2} #{1 2 42}
192 |        #{false} #{false}
193 |        #{nil}   #{nil}
194 |        #{nil}   #{nil false}
195 |        #{1 2 nil} #{1 2 nil 4})
196 |   (are [notsub super] (not (set/subset? notsub super))
197 |        #{1} #{}
198 |        #{2} #{1}
199 |        #{1 3} #{1}
200 |        #{nil} #{false}
201 |        #{false} #{nil}
202 |        #{false nil} #{nil}
203 |        #{1 2 nil}   #{1 2}))
204 | 
205 | (deftest test-superset?
206 |   (are [super sub] (set/superset? super sub)
207 |        #{} #{}
208 |        #{1} #{}
209 |        #{1} #{1}
210 |        #{1 2} #{1 2}
211 |        #{1 2 42} #{1 2}
212 |        #{false}  #{false}
213 |        #{nil}    #{nil}
214 |        #{false nil} #{false}
215 |        #{1 2 4 nil false} #{1 2 nil})
216 |   (are [notsuper sub] (not (set/superset? notsuper sub))
217 |        #{} #{1}
218 |        #{2} #{1}
219 |        #{1} #{1 3}
220 |        #{nil} #{false}
221 |        #{false} #{nil}
222 |        #{nil}   #{false nil}
223 |        #{nil 2 3} #{false nil 2 3}))
224 | 
225 | 


--------------------------------------------------------------------------------
/test/wbtree/tree_test.clj:
--------------------------------------------------------------------------------
1 | (ns wbtree.tree-test
2 |   (:require [clojure.test :refer :all]
3 |             [wbtree.tree  :refer :all]))
4 | 
5 | (deftest a-test
6 |   (testing "FIXME."
7 |     (is (= 0 0))))
8 | 


--------------------------------------------------------------------------------