nearest = nnl.getHighest();
193 | double dist_sqd;
194 |
195 | if (!nnl.isCapacityReached()) {
196 | dist_sqd = Double.MAX_VALUE;
197 | } else {
198 | dist_sqd = nnl.getMaxPriority();
199 | }
200 |
201 | // 9. max-dist-sqd := minimum of max-dist-sqd and dist-sqd
202 | max_dist_sqd = Math.min(max_dist_sqd, dist_sqd);
203 |
204 | // 10. A nearer point could only lie in further-kd if there were some
205 | // part of further-hr within distance max-dist-sqd of
206 | // target.
207 | final HPoint closest = further_hr.closest(target);
208 | if (HPoint.sqrdist(closest, target) < max_dist_sqd) {
209 |
210 | // 10.1 if (pivot-target)^2 < dist-sqd then
211 | if (pivot_to_target < dist_sqd) {
212 |
213 | // 10.1.1 nearest := (pivot, range-elt field of kd)
214 | nearest = kd;
215 |
216 | // 10.1.2 dist-sqd = (pivot-target)^2
217 | dist_sqd = pivot_to_target;
218 |
219 | // add to nnl
220 | if (!kd.deleted && ((checker == null) || checker.usable(kd.v))) {
221 | nnl.insert(kd, dist_sqd);
222 | }
223 |
224 | // 10.1.3 max-dist-sqd = dist-sqd
225 | // max_dist_sqd = dist_sqd;
226 | if (nnl.isCapacityReached()) {
227 | max_dist_sqd = nnl.getMaxPriority();
228 | } else {
229 | max_dist_sqd = Double.MAX_VALUE;
230 | }
231 | }
232 |
233 | // 10.2 Recursively call Nearest Neighbor with parameters
234 | // (further-kd, target, further-hr, max-dist_sqd),
235 | // storing results in temp-nearest and temp-dist-sqd
236 | nnbr(further_kd, target, further_hr, max_dist_sqd, lev + 1, K, nnl, checker, timeout);
237 | }
238 | }
239 |
240 | // constructor is used only by class; other methods are static
241 | private KDNode(final HPoint key, final T val) {
242 |
243 | k = key;
244 | v = val;
245 | left = null;
246 | right = null;
247 | deleted = false;
248 | }
249 |
250 | protected String toString(final int depth) {
251 | String s = k + " " + v + (deleted ? "*" : "");
252 | if (left != null) {
253 | s = s + "\n" + pad(depth) + "L " + left.toString(depth + 1);
254 | }
255 | if (right != null) {
256 | s = s + "\n" + pad(depth) + "R " + right.toString(depth + 1);
257 | }
258 | return s;
259 | }
260 |
261 | private static String pad(final int n) {
262 | String s = "";
263 | for (int i = 0; i < n; ++i) {
264 | s += " ";
265 | }
266 | return s;
267 | }
268 |
269 | private static void hrcopy(final HRect hr_src, final HRect hr_dst) {
270 | hpcopy(hr_src.min, hr_dst.min);
271 | hpcopy(hr_src.max, hr_dst.max);
272 | }
273 |
274 | private static void hpcopy(final HPoint hp_src, final HPoint hp_dst) {
275 | for (int i = 0; i < hp_dst.coord.length; ++i) {
276 | hp_dst.coord[i] = hp_src.coord[i];
277 | }
278 | }
279 | }
280 |
--------------------------------------------------------------------------------
/src/java/edu/wlu/cs/levy/CG/KDTree.java:
--------------------------------------------------------------------------------
1 | package edu.wlu.cs.levy.CG;
2 |
3 | import java.io.Serializable;
4 | import java.util.List;
5 | import java.util.LinkedList;
6 | import java.util.Stack;
7 |
8 | /**
9 | * KDTree is a class supporting KD-tree insertion, deletion, equality search,
10 | * range search, and nearest neighbor(s) using double-precision floating-point
11 | * keys. Splitting dimension is chosen naively, by depth modulo K. Semantics are
12 | * as follows:
13 | *
14 | *
15 | * - Two different keys containing identical numbers should retrieve the same
16 | * value from a given KD-tree. Therefore keys are cloned when a node is
17 | * inserted.
18 | *
19 | * - As with Hashtables, values inserted into a KD-tree are not cloned.
20 | * Modifying a value between insertion and retrieval will therefore modify the
21 | * value stored in the tree.
22 | *
23 | *
24 | * Implements the Nearest Neighbor algorithm (Table 6.4) of
25 | *
26 | *
27 | * &*064;techreport{AndrewMooreNearestNeighbor,
28 | * author = {Andrew Moore},
29 | * title = {An introductory tutorial on kd-trees},
30 | * institution = {Robotics Institute, Carnegie Mellon University},
31 | * year = {1991},
32 | * number = {Technical Report No. 209, Computer Laboratory,
33 | * University of Cambridge},
34 | * address = {Pittsburgh, PA}
35 | * }
36 | *
37 | *
38 | * Copyright (C) Simon D. Levy and Bjoern Heckel 2014
39 | *
40 | * This code is free software: you can redistribute it and/or modify it under
41 | * the terms of the GNU Lesser General Public License as published by the Free
42 | * Software Foundation, either version 3 of the License, or (at your option) any
43 | * later version.
44 | *
45 | * This code is distributed in the hope that it will be useful, but WITHOUT ANY
46 | * WARRANTY without even the implied warranty of MERCHANTABILITY or FITNESS FOR
47 | * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
48 | *
49 | * You should have received a copy of the GNU Lesser General Public License
50 | * along with this code. If not, see . You should
51 | * also have received a copy of the Parrot Parrot AR.Drone Development License
52 | * and Parrot AR.Drone copyright notice and disclaimer and If not, see
53 | * and
54 | * .
56 | */
57 | public class KDTree implements Serializable {
58 | // number of milliseconds
59 | final long m_timeout;
60 |
61 | // K = number of dimensions
62 | final private int m_K;
63 |
64 | // root of KD-tree
65 | private KDNode m_root;
66 |
67 | // count of nodes
68 | private int m_count;
69 |
70 | /**
71 | * Creates a KD-tree with specified number of dimensions.
72 | *
73 | * @param k
74 | * number of dimensions
75 | */
76 |
77 | public KDTree(final int k) {
78 | this(k, 0);
79 | }
80 | public KDTree(final int k, final long timeout) {
81 | this.m_timeout = timeout;
82 | m_K = k;
83 | m_root = null;
84 | }
85 |
86 | /**
87 | * Insert a node in a KD-tree. Uses algorithm translated from 352.ins.c of
88 | *
89 | *
90 | * &*064;Book{GonnetBaezaYates1991,
91 | * author = {G.H. Gonnet and R. Baeza-Yates},
92 | * title = {Handbook of Algorithms and Data Structures},
93 | * publisher = {Addison-Wesley},
94 | * year = {1991}
95 | * }
96 | *
97 | *
98 | * @param key
99 | * key for KD-tree node
100 | * @param value
101 | * value at that key
102 | *
103 | * @throws KeySizeException
104 | * if key.length mismatches K
105 | * @throws KeyDuplicateException
106 | * if key already in tree
107 | */
108 | public void insert(final double[] key, final T value) throws KeySizeException, KeyDuplicateException {
109 | this.edit(key, new Editor.Inserter(value));
110 | }
111 |
112 | /**
113 | * Edit a node in a KD-tree
114 | *
115 | * @param key
116 | * key for KD-tree node
117 | * @param editor
118 | * object to edit the value at that key
119 | *
120 | * @throws KeySizeException
121 | * if key.length mismatches K
122 | * @throws KeyDuplicateException
123 | * if key already in tree
124 | */
125 |
126 | public void edit(final double[] key, final Editor editor) throws KeySizeException, KeyDuplicateException {
127 |
128 | if (key.length != m_K) {
129 | throw new KeySizeException();
130 | }
131 |
132 | synchronized (this) {
133 | // the first insert has to be synchronized
134 | if (null == m_root) {
135 | m_root = KDNode.create(new HPoint(key), editor);
136 | m_count = m_root.deleted ? 0 : 1;
137 | return;
138 | }
139 | }
140 |
141 | m_count += KDNode.edit(new HPoint(key), editor, m_root, 0, m_K);
142 | }
143 |
144 | /**
145 | * Find KD-tree node whose key is identical to key. Uses algorithm translated
146 | * from 352.srch.c of Gonnet & Baeza-Yates.
147 | *
148 | * @param key
149 | * key for KD-tree node
150 | *
151 | * @return object at key, or null if not found
152 | *
153 | * @throws KeySizeException
154 | * if key.length mismatches K
155 | */
156 | public T search(final double[] key) throws KeySizeException {
157 |
158 | if (key.length != m_K) {
159 | throw new KeySizeException();
160 | }
161 |
162 | final KDNode kd = KDNode.srch(new HPoint(key), m_root, m_K);
163 |
164 | return (kd == null ? null : kd.v);
165 | }
166 |
167 | public void delete(final double[] key) throws KeySizeException, KeyMissingException {
168 | delete(key, false);
169 | }
170 | /**
171 | * Delete a node from a KD-tree. Instead of actually deleting node and
172 | * rebuilding tree, marks node as deleted. Hence, it is up to the caller to
173 | * rebuild the tree as needed for efficiency.
174 | *
175 | * @param key
176 | * key for KD-tree node
177 | * @param optional
178 | * if false and node not found, throw an exception
179 | *
180 | * @throws KeySizeException
181 | * if key.length mismatches K
182 | * @throws KeyMissingException
183 | * if no node in tree has key
184 | */
185 | public void delete(final double[] key, final boolean optional) throws KeySizeException, KeyMissingException {
186 |
187 | if (key.length != m_K) {
188 | throw new KeySizeException();
189 | }
190 | final KDNode t = KDNode.srch(new HPoint(key), m_root, m_K);
191 | if (t == null) {
192 | if (optional == false) {
193 | throw new KeyMissingException();
194 | }
195 | } else {
196 | if (KDNode.del(t)) {
197 | m_count--;
198 | }
199 | }
200 | }
201 |
202 | /**
203 | * Find KD-tree node whose key is nearest neighbor to key.
204 | *
205 | * @param key
206 | * key for KD-tree node
207 | *
208 | * @return object at node nearest to key, or null on failure
209 | *
210 | * @throws KeySizeException
211 | * if key.length mismatches K
212 | */
213 | public T nearest(final double[] key) throws KeySizeException {
214 |
215 | final List nbrs = nearest(key, 1, null);
216 | return nbrs.get(0);
217 | }
218 |
219 | /**
220 | * Find KD-tree nodes whose keys are n nearest neighbors to key.
221 | *
222 | * @param key
223 | * key for KD-tree node
224 | * @param n
225 | * number of nodes to return
226 | *
227 | * @return objects at nodes nearest to key, or null on failure
228 | *
229 | * @throws KeySizeException
230 | * if key.length mismatches K
231 | */
232 | public List nearest(final double[] key, final int n) throws KeySizeException, IllegalArgumentException {
233 | return nearest(key, n, null);
234 | }
235 |
236 | /**
237 | * Find KD-tree nodes whose keys are within a given Euclidean distance of a
238 | * given key.
239 | *
240 | * @param key
241 | * key for KD-tree node
242 | * @param d
243 | * Euclidean distance
244 | *
245 | * @return objects at nodes with distance of key, or null on failure
246 | *
247 | * @throws KeySizeException
248 | * if key.length mismatches K
249 | */
250 | public List nearestEuclidean(final double[] key, final double dist) throws KeySizeException {
251 | return nearestDistance(key, dist, new EuclideanDistance());
252 | }
253 |
254 | /**
255 | * Find KD-tree nodes whose keys are within a given Hamming distance of a
256 | * given key.
257 | *
258 | * @param key
259 | * key for KD-tree node
260 | * @param d
261 | * Hamming distance
262 | *
263 | * @return objects at nodes with distance of key, or null on failure
264 | *
265 | * @throws KeySizeException
266 | * if key.length mismatches K
267 | */
268 | public List nearestHamming(final double[] key, final double dist) throws KeySizeException {
269 |
270 | return nearestDistance(key, dist, new HammingDistance());
271 | }
272 |
273 | /**
274 | * Find KD-tree nodes whose keys are n nearest neighbors to key. Uses
275 | * algorithm above. Neighbors are returned in ascending order of distance to
276 | * key.
277 | *
278 | * @param key
279 | * key for KD-tree node
280 | * @param n
281 | * how many neighbors to find
282 | * @param checker
283 | * an optional object to filter matches
284 | *
285 | * @return objects at node nearest to key, or null on failure
286 | *
287 | * @throws KeySizeException
288 | * if key.length mismatches K
289 | * @throws IllegalArgumentException
290 | * if n is negative or exceeds tree size
291 | */
292 | public List nearest(final double[] key, int n, final Checker checker) throws KeySizeException, IllegalArgumentException {
293 |
294 | if (n <= 0) {
295 | return new LinkedList();
296 | }
297 |
298 | final NearestNeighborList> nnl = getnbrs(key, n, checker);
299 |
300 | n = nnl.getSize();
301 | final Stack nbrs = new Stack();
302 |
303 | for (int i = 0; i < n; ++i) {
304 | final KDNode kd = nnl.removeHighest();
305 | nbrs.push(kd.v);
306 | }
307 |
308 | return nbrs;
309 | }
310 |
311 | /**
312 | * Range search in a KD-tree. Uses algorithm translated from 352.range.c of
313 | * Gonnet & Baeza-Yates.
314 | *
315 | * @param lowk
316 | * lower-bounds for key
317 | * @param uppk
318 | * upper-bounds for key
319 | *
320 | * @return array of Objects whose keys fall in range [lowk,uppk]
321 | *
322 | * @throws KeySizeException
323 | * on mismatch among lowk.length, uppk.length, or K
324 | */
325 | public List range(final double[] lowk, final double[] uppk) throws KeySizeException {
326 |
327 | if (lowk.length != uppk.length) {
328 | throw new KeySizeException();
329 | }
330 |
331 | else if (lowk.length != m_K) {
332 | throw new KeySizeException();
333 | }
334 |
335 | else {
336 | final List> found = new LinkedList>();
337 | KDNode.rsearch(new HPoint(lowk), new HPoint(uppk), m_root, 0, m_K, found);
338 | final List o = new LinkedList();
339 | for (final KDNode node : found) {
340 | o.add(node.v);
341 | }
342 | return o;
343 | }
344 | }
345 |
346 | public int size() { /* added by MSL */
347 | return m_count;
348 | }
349 |
350 | public String toString() {
351 | return m_root != null ? m_root.toString(0) : "";
352 | }
353 |
354 | private NearestNeighborList> getnbrs(final double[] key) throws KeySizeException {
355 | return getnbrs(key, m_count, null);
356 | }
357 |
358 | private NearestNeighborList> getnbrs(final double[] key, final int n, final Checker checker) throws KeySizeException {
359 |
360 | if (key.length != m_K) {
361 | throw new KeySizeException();
362 | }
363 |
364 | final NearestNeighborList> nnl = new NearestNeighborList>(n);
365 |
366 | // initial call is with infinite hyper-rectangle and max distance
367 | final HRect hr = HRect.infiniteHRect(key.length);
368 | final double max_dist_sqd = Double.MAX_VALUE;
369 | final HPoint keyp = new HPoint(key);
370 |
371 | if (m_count > 0) {
372 | final long timeout = (this.m_timeout > 0) ? (System.currentTimeMillis() + this.m_timeout) : 0;
373 | KDNode.nnbr(m_root, keyp, hr, max_dist_sqd, 0, m_K, nnl, checker, timeout);
374 | }
375 |
376 | return nnl;
377 |
378 | }
379 |
380 | private List nearestDistance(final double[] key, final double dist, final DistanceMetric metric) throws KeySizeException {
381 |
382 | final NearestNeighborList> nnl = getnbrs(key);
383 | final int n = nnl.getSize();
384 | final Stack nbrs = new Stack();
385 |
386 | for (int i = 0; i < n; ++i) {
387 | final KDNode kd = nnl.removeHighest();
388 | final HPoint p = kd.k;
389 | if (metric.distance(kd.k.coord, key) < dist) {
390 | nbrs.push(kd.v);
391 | }
392 | }
393 |
394 | return nbrs;
395 | }
396 |
397 | }
398 |
--------------------------------------------------------------------------------
/src/java/edu/wlu/cs/levy/CG/KeyDuplicateException.java:
--------------------------------------------------------------------------------
1 | package edu.wlu.cs.levy.CG;
2 |
3 | /**
4 | * KeyDuplicateException is thrown when the KDTree.insert method is
5 | * invoked on a key already in the KDTree.
6 | *
7 | *
8 | * Copyright (C) Simon D. Levy 2014
9 | *
10 | * This code is free software: you can redistribute it and/or modify it under
11 | * the terms of the GNU Lesser General Public License as published by the Free
12 | * Software Foundation, either version 3 of the License, or (at your option) any
13 | * later version.
14 | *
15 | * This code is distributed in the hope that it will be useful, but WITHOUT ANY
16 | * WARRANTY without even the implied warranty of MERCHANTABILITY or FITNESS FOR
17 | * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
18 | *
19 | * You should have received a copy of the GNU Lesser General Public License
20 | * along with this code. If not, see . You should
21 | * also have received a copy of the Parrot Parrot AR.Drone Development License
22 | * and Parrot AR.Drone copyright notice and disclaimer and If not, see
23 | * and
24 | * .
26 | */
27 | public class KeyDuplicateException extends KDException {
28 |
29 | protected KeyDuplicateException() {
30 | super("Key already in tree");
31 | }
32 |
33 | // arbitrary; every serializable class has to have one of these
34 | public static final long serialVersionUID = 1L;
35 | }
36 |
--------------------------------------------------------------------------------
/src/java/edu/wlu/cs/levy/CG/KeyMissingException.java:
--------------------------------------------------------------------------------
1 | // KeyMissingException.java : cKey-size mismatch exception supporting KDTree class
2 | //
3 | // Copyright (C) Simon D. Levy 2014
4 | //
5 | // This code is free software: you can redistribute it and/or modify
6 | // it under the terms of the GNU Lesser General Public License as
7 | // published by the Free Software Foundation, either version 3 of the
8 | // License, or (at your option) any later version.
9 | //
10 | // This code is distributed in the hope that it will be useful,
11 | // but WITHOUT ANY WARRANTY without even the implied warranty of
12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | // GNU General Public License for more details.
14 | //
15 | // You should have received a copy of the GNU Lesser General Public License
16 | // along with this code. If not, see .
17 | // You should also have received a copy of the Parrot Parrot AR.Drone
18 | // Development License and Parrot AR.Drone copyright notice and disclaimer
19 | // and If not, see
20 | //
21 | // and
22 | // .
23 |
24 | package edu.wlu.cs.levy.CG;
25 |
26 | public class KeyMissingException extends KDException { /* made public by MSL */
27 |
28 | public KeyMissingException() {
29 | super("Key not found");
30 | }
31 |
32 | // arbitrary; every serializable class has to have one of these
33 | public static final long serialVersionUID = 3L;
34 |
35 | }
36 |
--------------------------------------------------------------------------------
/src/java/edu/wlu/cs/levy/CG/KeySizeException.java:
--------------------------------------------------------------------------------
1 | package edu.wlu.cs.levy.CG;
2 |
3 | /**
4 | * KeySizeException is thrown when a KDTree method is invoked on a key whose
5 | * size (array length) mismatches the one used in the that KDTree's constructor.
6 | *
7 | * Copyright (C) Simon D. Levy 2014
8 | *
9 | * This code is free software: you can redistribute it and/or modify it under
10 | * the terms of the GNU Lesser General Public License as published by the Free
11 | * Software Foundation, either version 3 of the License, or (at your option) any
12 | * later version.
13 | *
14 | * This code is distributed in the hope that it will be useful, but WITHOUT ANY
15 | * WARRANTY without even the implied warranty of MERCHANTABILITY or FITNESS FOR
16 | * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
17 | *
18 | * You should have received a copy of the GNU Lesser General Public License
19 | * along with this code. If not, see . You should
20 | * also have received a copy of the Parrot Parrot AR.Drone Development License
21 | * and Parrot AR.Drone copyright notice and disclaimer and If not, see
22 | * and
23 | * .
25 | *
26 | */
27 | public class KeySizeException extends KDException {
28 |
29 | protected KeySizeException() {
30 | super("Key size mismatch");
31 | }
32 |
33 | // arbitrary; every serializable class has to have one of these
34 | public static final long serialVersionUID = 2L;
35 |
36 | }
37 |
--------------------------------------------------------------------------------
/src/java/edu/wlu/cs/levy/CG/NearestNeighborList.java:
--------------------------------------------------------------------------------
1 | // NearestNeighborList.java : A solution to the KD-Tree n-nearest-neighbor problem
2 | //
3 | // Copyright (C) Bjoern Heckel and Simon D. Levy 2014
4 | //
5 | // This code is free software: you can redistribute it and/or modify
6 | // it under the terms of the GNU Lesser General Public License as
7 | // published by the Free Software Foundation, either version 3 of the
8 | // License, or (at your option) any later version.
9 | //
10 | // This code is distributed in the hope that it will be useful,
11 | // but WITHOUT ANY WARRANTY without even the implied warranty of
12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | // GNU General Public License for more details.
14 | //
15 | // You should have received a copy of the GNU Lesser General Public License
16 | // along with this code. If not, see .
17 | // You should also have received a copy of the Parrot Parrot AR.Drone
18 | // Development License and Parrot AR.Drone copyright notice and disclaimer
19 | // and If not, see
20 | //
21 | // and
22 | // .
23 | //
24 |
25 | package edu.wlu.cs.levy.CG;
26 |
27 | import java.util.*;
28 |
29 | class NearestNeighborList {
30 |
31 | static class NeighborEntry implements Comparable> {
32 | final T data;
33 | final double value;
34 |
35 | public NeighborEntry(final T data, final double value) {
36 | this.data = data;
37 | this.value = value;
38 | }
39 |
40 | public int compareTo(final NeighborEntry t) {
41 | // note that the positions are reversed!
42 | return Double.compare(t.value, this.value);
43 | }
44 | };
45 |
46 | java.util.PriorityQueue> m_Queue;
47 | int m_Capacity = 0;
48 |
49 | // constructor
50 | public NearestNeighborList(final int capacity) {
51 | m_Capacity = capacity;
52 | m_Queue = new java.util.PriorityQueue>(m_Capacity);
53 | }
54 |
55 | public double getMaxPriority() {
56 | final NeighborEntry p = m_Queue.peek();
57 | return (p == null) ? Double.POSITIVE_INFINITY : p.value;
58 | }
59 |
60 | public boolean insert(final T object, final double priority) {
61 | if (isCapacityReached()) {
62 | if (priority > getMaxPriority()) {
63 | // do not insert - all elements in queue have lower priority
64 | return false;
65 | }
66 | m_Queue.add(new NeighborEntry(object, priority));
67 | // remove object with highest priority
68 | m_Queue.poll();
69 | } else {
70 | m_Queue.add(new NeighborEntry(object, priority));
71 | }
72 | return true;
73 | }
74 |
75 | public boolean isCapacityReached() {
76 | return m_Queue.size() >= m_Capacity;
77 | }
78 |
79 | public T getHighest() {
80 | final NeighborEntry p = m_Queue.peek();
81 | return (p == null) ? null : p.data;
82 | }
83 |
84 | public boolean isEmpty() {
85 | return m_Queue.size() == 0;
86 | }
87 |
88 | public int getSize() {
89 | return m_Queue.size();
90 | }
91 |
92 | public T removeHighest() {
93 | // remove object with highest priority
94 | final NeighborEntry p = m_Queue.poll();
95 | return (p == null) ? null : p.data;
96 | }
97 | }
98 |
--------------------------------------------------------------------------------
/test/clj_similar/benchmark.clj:
--------------------------------------------------------------------------------
1 | (ns clj-similar.benchmark
2 | (:require [clojure.test :refer :all]
3 | [criterium.core :refer [quick-bench bench with-progress-reporting]]
4 | [clj-similar.core :refer :all]))
5 |
6 |
7 | (def dict
8 | ;; Upper case + lower case ASCII letters
9 | (map (comp str char) (concat (range 65 91) (range 97 123))))
10 |
11 | (defn random-set [max-size]
12 | (let [size (+ 1 (rand-int max-size))]
13 | (set (take size (repeatedly #(rand-nth dict))))))
14 |
15 | (defn generate-random
16 | [count max-size]
17 | (for [_ (range count)]
18 | (random-set max-size)))
19 |
20 | (defn omit-random
21 | [s n]
22 | (let [omit (set (take n (shuffle s)))]
23 | (apply (partial disj s) omit)))
24 |
25 | (deftest benchmark
26 | (let [count 1E6
27 | max-size 10
28 | coll (do
29 | (println "Generating" (long count) "random sets with max-size" max-size)
30 | (generate-random count max-size))
31 | s (do
32 | (println "Generating similar data structure")
33 | (time (similar coll 10 3)))]
34 | (println "Testing speed of nearest neighbor retrieval")
35 | (println "Sample output for random target sets")
36 | (doseq [_ (range 10)]
37 | (let [in (random-set max-size)
38 | out (nearest s in 2 :exact? true)]
39 | (println "in" in "out" out "exact" (map meta out))))
40 |
41 | (println "Sample output for existing sets")
42 | (doseq [in (take 10 (random-sample 0.25 coll))]
43 | (let [part (omit-random in 2)
44 | out (nearest s part 2 :exact? true)]
45 | (println "in" part "original" in "out" out "exact" (map meta out))))
46 | #_(bench (nearest s (random-set max-size)))
47 | ))
48 |
--------------------------------------------------------------------------------
/test/clj_similar/core_test.clj:
--------------------------------------------------------------------------------
1 | (ns clj-similar.core-test
2 | (:require [clojure.test :refer :all]
3 | [clj-similar.core :refer :all]))
4 |
5 | (defn in?
6 | "true if coll contains elm"
7 | [coll el]
8 | (some #(= el %) coll))
9 |
10 | (defn all-in?
11 | [coll els]
12 | (every? (partial in? coll) els))
13 |
14 | (deftest simple-test
15 | (let [coll [#{"a" "b" "c"} #{"d" "e" "c"} #{"f" "e" "a" "b"}]
16 | s (similar coll)]
17 | (testing "Return the nearest set when exact match"
18 | (is (all-in? (nearest s #{"f" "e" "a" "b"}) #{"f" "e" "a" "b"})))
19 | #_(testing "Return nil if element is unseen"
20 | (is (= (nearest s #{"x"}) nil)))
21 | #_(testing "Return the nearest set when fuzzy match with extra element"
22 | (is (all-in? (nearest s #{"f" "e" "a" "b" "x"}) #{"f" "e" "a" "b"})))
23 | #_(testing "Return the nearest set when fuzzy match with omitted element"
24 | (is (all-in? (nearest s #{"a"}) #{"a" "b" "c"})))
25 | #_(testing "Return the nearest two sets"
26 | (is (all-in? (nearest s #{"a" "b"} 2) '(#{"a" "b" "c"} #{"f" "e" "a" "b"}))))))
27 |
28 | (deftest threshold-test
29 | (let [coll [#{"a" "b" "c"} #{"d" "e" "c"} #{"f" "e" "a" "b"}]
30 | s (similar coll)]
31 | (testing "omit too values with a too low jaccard-index"
32 | (is (all-in? (nearest s #{"x"} 1 :threshold 0.8) '())))
33 | #_(testing "omit too values with a too low jaccard-index"
34 | (is (all-in? (nearest s #{"a" "b"} 3 :threshold 0.4) '(#{"a" "b" "c"} #{"f" "e" "a" "b"}))))
35 | ))
36 |
--------------------------------------------------------------------------------