├── .gitignore
├── LICENSE
├── README.md
├── build.sbt
├── project
    └── assembly.sbt
├── scripts
    ├── convert.sh
    ├── gpx_parse.py
    ├── parse.sh
    ├── ptos.py
    └── stat_generate.py
└── src
    └── main
        ├── java
            └── mtree
            │   ├── ComposedSplitFunction.java
            │   ├── DistanceFunction.java
            │   ├── DistanceFunctions.java
            │   ├── MTree.java
            │   ├── PartitionFunction.java
            │   ├── PartitionFunctions.java
            │   ├── PromotionFunction.java
            │   ├── PromotionFunctions.java
            │   ├── SplitFunction.java
            │   └── utils
            │       ├── Pair.java
            │       └── Utils.java
        └── scala
            └── edu
                └── utah
                    └── cs
                        ├── generator
                            └── RandomTrajGenerator.scala
                        ├── index
                            ├── Index.scala
                            ├── RTree.scala
                            ├── VPTree.scala
                            └── VPTreeTest.scala
                        ├── index_bf
                            └── RTreeWithBF.scala
                        ├── index_bm
                            ├── RTreeWithBM.scala
                            └── RTreeWithBMTest.scala
                        ├── index_rr
                            └── RTreeWithRR.scala
                        ├── partitioner
                            ├── IDPartitioner.scala
                            ├── STRMBRPartitioner.scala
                            ├── STRSegPartitioner.scala
                            └── STRTrajPartition.scala
                        ├── spatial
                            ├── Circle.scala
                            ├── Dist.scala
                            ├── DistanceUtil.scala
                            ├── LineSegment.scala
                            ├── LineString.scala
                            ├── MBR.scala
                            ├── Point.scala
                            ├── Polygon.scala
                            ├── Shape.scala
                            └── ZValue.scala
                        ├── trajectory
                            ├── BFDISolution.scala
                            ├── BaseLine.scala
                            ├── BaseLineST.scala
                            ├── BitMapSolution.scala
                            ├── BloomFilterSolution.scala
                            ├── DataSampling.scala
                            ├── DualIndexingSolution.scala
                            ├── LineSegmentClustering.scala
                            ├── MTreeSolution.scala
                            ├── RRSolution.scala
                            ├── Relabel.scala
                            ├── SpatialSpanClustering.scala
                            ├── SpatialSpanFiltering.scala
                            ├── SpatialSpanStat.scala
                            ├── TrajIndexing.scala
                            ├── TrajObjects.scala
                            ├── TrajSampling.scala
                            ├── VPTreeST.scala
                            └── VPTreeSolution.scala
                        └── util
                            ├── BitArray.scala
                            ├── BitMap.scala
                            ├── BloomFilter.scala
                            └── MetricObject.scala


/.gitignore:
--------------------------------------------------------------------------------
1 | target
2 | .idea
3 | project
4 | data
5 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {yyyy} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Trajectory Similarity Search in Apache Spark
 2 | ============================================
 3 | This project implements the trajectory simiarltiy algorithm and all its competitors described in [this paper](http://www.vldb.org/pvldb/vol10/p1478-xie.pdf).
 4 | 
 5 | Implemented algorithms and variants include:
 6 | - **DualIndexingSolution**: Roaring Bitmap DFT w/ Dual Indexing
 7 | - **RRSolution**: Roaring Bitmap DFT w/o Dual Indexing
 8 | - **BFDISolution**: Bloom Filter DFT w/ Dual Indexing
 9 | - **BloomFilterSolution**: Bloom Filter DFT w/o Dual Indexing
10 | - **BitMapSolution**: Raw Bitmap DFT
11 | - **TrajIndexingSolution**: Distributed R-Tree on Bounding Boxes.
12 | - **VPTreeSolution**: Distributed VP-Tree over Trajectories
13 | - **MTreeSolution**: Distributed M-Tree over Trajectories.
14 | - **BaseLine**: Brute Force Top-k
15 | 
16 | Build
17 | -----
18 | Call `sbt assembly` and you will get the compiled package at `target/scala-2.11/traj-sim-assembly-1.0.jar`.
19 | 
20 | Run
21 | ---
22 | Run it by feeding the package to `spark-submit`, the entry point of different algorithms (listed above) and other utilities are located at `edu.utah.cs.trajecotry`.
23 | 
24 | Contributor
25 | -----------
26 | - Dong Xie: dongx [at] cs [dot] utah [dot] edu


--------------------------------------------------------------------------------
/build.sbt:
--------------------------------------------------------------------------------
 1 | name := "traj-sim"
 2 | 
 3 | version := "1.0"
 4 | 
 5 | scalaVersion := "2.11.8"
 6 | 
 7 | libraryDependencies += "org.apache.spark" % "spark-core_2.11" % "2.1.0" % "provided"
 8 | libraryDependencies += "org.roaringbitmap" % "RoaringBitmap" % "0.6.28"
 9 | 
10 | libraryDependencies ++= Seq(
11 |   "org.geotools" % "gt-geojson" % "15.2"
12 | )
13 | 
14 | resolvers ++= Seq(
15 |   "geosolutions" at "http://maven.geo-solutions.it/",
16 |   "osgeo" at "http://download.osgeo.org/webdav/geotools/"
17 | )
18 | 


--------------------------------------------------------------------------------
/project/assembly.sbt:
--------------------------------------------------------------------------------
1 | addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.2")


--------------------------------------------------------------------------------
/scripts/convert.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | if [ -z $1 ]
 4 | then
 5 | 	echo 'You should specify an input folder....'
 6 | 	exit $E_MISSING_POS_PARAM
 7 | fi
 8 | 
 9 | mkdir -p output/seg
10 | 
11 | for x in $( find $1 -name "*" );
12 | do
13 | 	python ptos.py $x output/seg
14 | done
15 | 


--------------------------------------------------------------------------------
/scripts/gpx_parse.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import gpxpy
 4 | import gpxpy.gpx
 5 | import sys
 6 | 
 7 | if len(sys.argv) < 2:
 8 |     print "usage:", sys.argv[0], "<gpx_file_name> <output_path>"
 9 |     sys.exit(1)
10 | 
11 | gpx_file_name = sys.argv[1]
12 | trace_id = gpx_file_name[-13:-4]
13 | if sys.argv[2] == "-":
14 |     output_file = sys.stdout
15 | else:
16 |     output_path = sys.argv[2] + '/' + trace_id
17 |     output_file = open(output_path, 'w')
18 | 
19 | print >> sys.stderr, 'parsing', gpx_file_name, '...'
20 | 
21 | gpx_file = open(gpx_file_name, 'r')
22 | gpx = gpxpy.parse(gpx_file)
23 | 
24 | i = 0
25 | for track in gpx.tracks:
26 | 	cur_trace_id = trace_id + '-' + str(i)
27 | 	for segment in track.segments:
28 | 		for point in segment.points:
29 | 			output_file.write('{0}\t{1:.6f}\t{2:.6f}\n'.format(cur_trace_id, point.latitude, point.longitude))
30 | 	i = i + 1
31 | 
32 | gpx_file.close()
33 | if sys.argv[2] != "-":
34 |     output_file.close()
35 | #print 'GPX:', gpx.to_xml()
36 | 


--------------------------------------------------------------------------------
/scripts/parse.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | mkdir -p output
 4 | 
 5 | if [ -z $1 ]
 6 | then
 7 | 	echo 'You should specify an input folder....'
 8 | 	exit $E_MISSING_POS_PARAM
 9 | fi
10 | 
11 | mkdir -p output/$1
12 | 
13 | for x in $( find $1 -name "*.gpx" );
14 | do
15 | 	python gpx_parse.py $x output/$1
16 | done
17 | 


--------------------------------------------------------------------------------
/scripts/ptos.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import sys
 4 | 
 5 | if len(sys.argv) < 2:
 6 | 	print "usage:", sys.argv[0], '<sample_point_csv> <output_path>'
 7 | 	sys.exit(1)
 8 | 
 9 | sample_point_csv = sys.argv[1]
10 | trace_id = sample_point_csv[-9:]
11 | 
12 | if sys.argv[2] == '-':
13 | 	output_file = sys.stdout
14 | else:
15 | 	output_path = sys.argv[2] + "/" + trace_id
16 | 	output_file = open(output_path, 'w')
17 | 
18 | print >> sys.stderr, 'converting', sample_point_csv, '...'
19 | 
20 | sample_point_file = open(sample_point_csv, 'r')
21 | prev_traj_id = ''
22 | for line in sample_point_file:
23 |     elements = line.rstrip('\n').split('\t')
24 |     if elements[0] != prev_traj_id:
25 |         prev_traj_id = elements[0]
26 |         prev_point = (float(elements[1]), float(elements[2]))
27 |         seg_id = 0
28 |     else:
29 |         output_file.write('%s\t%.6f\t%.6f\t%.6f\t%.6f\t%d\n' % (prev_traj_id, float(prev_point[0]), float(prev_point[1]), float(elements[1]), float(elements[2]), seg_id))
30 |         prev_point = (elements[1], elements[2])
31 |         seg_id += 1
32 | sample_point_file.close()
33 | 
34 | if sys.argv[2] != '-':
35 |     output_file.close()
36 | 


--------------------------------------------------------------------------------
/scripts/stat_generate.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | #import matplotlib.pyplot as plt
 3 | #import seaborn as sns
 4 | 
 5 | #sns.set(color_codes=True)
 6 | 
 7 | dis_arr = []
 8 | pts_arr = []
 9 | span_arr = []
10 | 
11 | f = open('osm_de_stat.txt', 'r')
12 | 
13 | for line in f:
14 | 	splitted = line.split()
15 | 	pts_arr.append(int(splitted[1]))
16 | 	dis_arr.append(float(splitted[2]))
17 | 	span_arr.append(float(splitted[3]))
18 | 
19 | # Filters for BJTaxi...
20 | #data = filter(lambda x: x[1] < 129 and x[1] > 13 and x[2] > 0.01, zip(dis_arr, pts_arr, span_arr))
21 | # Filters For gen_traj_10M
22 | #data = filter(lambda x: x[1] < 523 and x[1] > 20 and x[2] > 0.1, zip(dis_arr, pts_arr, span_arr))
23 | # Filters for OSM_traj
24 | #data = filter(lambda x: x[2] > 0.001 and x[2] < 0.50 and x[1] > 50 and x[1] < 4670, zip(dis_arr, pts_arr, span_arr))
25 | 
26 | data = filter(lambda x: x[1] > 20 and x[2] > 0.001 and x[2] < 0.5080, zip(dis_arr, pts_arr, span_arr))
27 | print "# of Traj after filter:", len(data)
28 | 
29 | pts_arr = map(lambda x: x[1], data)
30 | span_arr = map(lambda x: x[2], data)
31 | dis_arr = map(lambda x: x[0], data)
32 | 
33 | print "variances (pts, dis, span):"
34 | print np.var(pts_arr)
35 | print np.var(dis_arr)
36 | print np.var(span_arr)
37 | print "max min (pts, dis, span):"
38 | print max(pts_arr), ' ', min(pts_arr)
39 | print max(dis_arr), ' ', min(dis_arr)
40 | print max(span_arr), ' ', min(span_arr)
41 | print "standard deviation (pts, dis, span):"
42 | print np.std(pts_arr)
43 | print np.std(dis_arr)
44 | print np.std(span_arr)
45 | print "average (pts, dis, span):"
46 | print np.average(pts_arr)
47 | print np.average(dis_arr)
48 | print np.average(span_arr)
49 | print "median deviation (pts, dis, span):"
50 | print np.median(pts_arr)
51 | print np.median(dis_arr)
52 | print np.median(span_arr)
53 | print "5% 95% percentile (pts, dis, span)"
54 | print np.percentile(pts_arr, 5), ' ', np.percentile(pts_arr, 95)
55 | print np.percentile(dis_arr, 5), ' ', np.percentile(dis_arr, 95)
56 | print np.percentile(span_arr, 5), ' ', np.percentile(span_arr, 95)
57 | 
58 | #print map(lambda x: x[1], data)
59 | 
60 | #span_plot = sns.distplot(span_arr)
61 | #span_plot.get_figure().savefig("osm_traj_span.png")
62 | #pts_plot = sns.distplot(pts_arr)
63 | #pts_plot.get_figure().savefig("osm_traj_pts.png")
64 | #dis_plot = sns.distplot(dis_arr)
65 | #dis_plot.get_figure().savefig("osm_traj_dis.png")
66 | 


--------------------------------------------------------------------------------
/src/main/java/mtree/ComposedSplitFunction.java:
--------------------------------------------------------------------------------
 1 | package mtree;
 2 | 
 3 | import java.util.Set;
 4 | 
 5 | import mtree.utils.Pair;
 6 | 
 7 | /**
 8 |  * A {@linkplain SplitFunction split function} that is defined by composing
 9 |  * a {@linkplain PromotionFunction promotion function} and a
10 |  * {@linkplain PartitionFunction partition function}.
11 |  *
12 |  * @param <DATA> The type of the data objects.
13 |  */
14 | public class ComposedSplitFunction<DATA> implements SplitFunction<DATA> {
15 | 
16 | 	private PromotionFunction<DATA> promotionFunction;
17 | 	private PartitionFunction<DATA> partitionFunction;
18 | 
19 | 	/**
20 | 	 * The constructor of a {@link SplitFunction} composed by a
21 | 	 * {@link PromotionFunction} and a {@link PartitionFunction}.
22 | 	 */
23 | 	public ComposedSplitFunction(
24 | 			PromotionFunction<DATA> promotionFunction,
25 | 			PartitionFunction<DATA> partitionFunction
26 | 		)
27 | 	{
28 | 		this.promotionFunction = promotionFunction;
29 | 		this.partitionFunction = partitionFunction;
30 | 	}
31 | 
32 | 	
33 | 	@Override
34 | 	public SplitResult<DATA> process(Set<DATA> dataSet, DistanceFunction<? super DATA> distanceFunction) {
35 | 		Pair<DATA> promoted = promotionFunction.process(dataSet, distanceFunction);
36 | 		Pair<Set<DATA>> partitions = partitionFunction.process(promoted, dataSet, distanceFunction);
37 | 		return new SplitResult<DATA>(promoted, partitions);
38 | 	}
39 | 
40 | }
41 | 


--------------------------------------------------------------------------------
/src/main/java/mtree/DistanceFunction.java:
--------------------------------------------------------------------------------
 1 | package mtree;
 2 | 
 3 | /**
 4 |  * An object that can calculate the distance between two data objects.
 5 |  *
 6 |  * @param <DATA> The type of the data objects.
 7 |  */
 8 | public interface DistanceFunction<DATA> {
 9 | 	
10 | 	double calculate(DATA data1, DATA data2);
11 | 	
12 | }
13 | 


--------------------------------------------------------------------------------
/src/main/java/mtree/DistanceFunctions.java:
--------------------------------------------------------------------------------
  1 | package mtree;
  2 | 
  3 | import java.util.HashMap;
  4 | import java.util.List;
  5 | import java.util.Map;
  6 | 
  7 | /**
  8 |  * Some pre-defined implementations of {@linkplain DistanceFunction distance
  9 |  * functions}.
 10 |  */
 11 | public final class DistanceFunctions {
 12 | 	
 13 |     /**
 14 |      * Don't let anyone instantiate this class.
 15 |      */
 16 | 	private DistanceFunctions() {}
 17 | 	
 18 | 	
 19 | 	/**
 20 | 	 * Creates a cached version of a {@linkplain DistanceFunction distance
 21 | 	 * function}. This method is used internally by {@link MTree} to create
 22 | 	 * a cached distance function to pass to the {@linkplain SplitFunction split
 23 | 	 * function}.
 24 | 	 * @param distanceFunction The distance function to create a cached version
 25 | 	 *        of.
 26 | 	 * @return The cached distance function.
 27 | 	 */
 28 | 	public static <Data> DistanceFunction<Data> cached(final DistanceFunction<Data> distanceFunction) {
 29 | 		return new DistanceFunction<Data>() {
 30 | 			class Pair {
 31 | 				Data data1;
 32 | 				Data data2;
 33 | 				
 34 | 				public Pair(Data data1, Data data2) {
 35 | 					this.data1 = data1;
 36 | 					this.data2 = data2;
 37 | 				}
 38 | 
 39 | 				@Override
 40 | 				public int hashCode() {
 41 | 					return data1.hashCode() ^ data2.hashCode();
 42 | 				}
 43 | 				
 44 | 				@Override
 45 | 				public boolean equals(Object arg0) {
 46 | 					if(arg0 instanceof Pair) {
 47 | 						Pair that = (Pair) arg0;
 48 | 						return this.data1.equals(that.data1)
 49 | 						    && this.data2.equals(that.data2);
 50 | 					} else {
 51 | 						return false;
 52 | 					}
 53 | 				}
 54 | 			}
 55 | 			
 56 | 			private final Map<Pair, Double> cache = new HashMap<Pair, Double>();
 57 | 			
 58 | 			@Override
 59 | 			public double calculate(Data data1, Data data2) {
 60 | 				Pair pair1 = new Pair(data1, data2);
 61 | 				Double distance = cache.get(pair1);
 62 | 				if(distance != null) {
 63 | 					return distance;
 64 | 				}
 65 | 				
 66 | 				Pair pair2 = new Pair(data2, data1);
 67 | 				distance = cache.get(pair2);
 68 | 				if(distance != null) {
 69 | 					return distance;
 70 | 				}
 71 | 				
 72 | 				distance = distanceFunction.calculate(data1, data2);
 73 | 				cache.put(pair1, distance);
 74 | 				cache.put(pair2, distance);
 75 | 				return distance;
 76 | 			}
 77 | 		};
 78 | 	}
 79 | 	
 80 | 	
 81 | 	
 82 | 	/**
 83 | 	 * An interface to represent coordinates in Euclidean spaces.
 84 | 	 * @see <a href="http://en.wikipedia.org/wiki/Euclidean_space">"Euclidean
 85 | 	 *      Space" article at Wikipedia</a>
 86 | 	 */
 87 | 	public interface EuclideanCoordinate {
 88 | 		/**
 89 | 		 * The number of dimensions.
 90 | 		 */
 91 | 		int dimensions();
 92 | 		
 93 | 		/**
 94 | 		 * A method to access the {@code index}-th component of the coordinate.
 95 | 		 * 
 96 | 		 * @param index The index of the component. Must be less than {@link
 97 | 		 *              #dimensions()}. 
 98 | 		 */
 99 | 		double get(int index);
100 | 	}
101 | 	
102 | 	
103 | 	/**
104 | 	 * Calculates the distance between two {@linkplain EuclideanCoordinate 
105 | 	 * euclidean coordinates}.
106 | 	 */
107 | 	public static double euclidean(EuclideanCoordinate coord1, EuclideanCoordinate coord2) {
108 | 		int size = Math.min(coord1.dimensions(), coord2.dimensions());
109 | 		double distance = 0;
110 | 		for(int i = 0; i < size; i++) {
111 | 			double diff = coord1.get(i) - coord2.get(i);
112 | 			distance += diff * diff;
113 | 		}
114 | 		distance = Math.sqrt(distance);
115 | 		return distance;
116 | 	}
117 | 
118 | 
119 | 	/**
120 | 	 * A {@linkplain DistanceFunction distance function} object that calculates
121 | 	 * the distance between two {@linkplain EuclideanCoordinate euclidean
122 | 	 * coordinates}.
123 | 	 */
124 | 	public static final DistanceFunction<EuclideanCoordinate> EUCLIDEAN = new DistanceFunction<DistanceFunctions.EuclideanCoordinate>() {
125 | 		@Override
126 | 		public double calculate(EuclideanCoordinate coord1, EuclideanCoordinate coord2) {
127 | 			return DistanceFunctions.euclidean(coord1, coord2);
128 | 		}
129 | 	};
130 | 	
131 | 	
132 | 	/**
133 | 	 * A {@linkplain DistanceFunction distance function} object that calculates
134 | 	 * the distance between two coordinates represented by {@linkplain 
135 | 	 * java.util.List lists} of {@link java.lang.Integer}s.
136 | 	 */
137 | 	public static final DistanceFunction<List<Integer>> EUCLIDEAN_INTEGER_LIST = new DistanceFunction<List<Integer>>() {
138 | 		@Override
139 | 		public double calculate(List<Integer> data1, List<Integer> data2) {
140 | 			class IntegerListEuclideanCoordinate implements EuclideanCoordinate {
141 | 				List<Integer> list;
142 | 				public IntegerListEuclideanCoordinate(List<Integer> list) { this.list = list; }
143 | 				@Override public int dimensions() { return list.size(); }
144 | 				@Override public double get(int index) { return list.get(index); }
145 | 			};
146 | 			IntegerListEuclideanCoordinate coord1 = new IntegerListEuclideanCoordinate(data1);
147 | 			IntegerListEuclideanCoordinate coord2 = new IntegerListEuclideanCoordinate(data2);
148 | 			return DistanceFunctions.euclidean(coord1, coord2);
149 | 		}
150 | 	};
151 | 	
152 | 	/**
153 | 	 * A {@linkplain DistanceFunction distance function} object that calculates
154 | 	 * the distance between two coordinates represented by {@linkplain 
155 | 	 * java.util.List lists} of {@link java.lang.Double}s.
156 | 	 */
157 | 	public static final DistanceFunction<List<Double>> EUCLIDEAN_DOUBLE_LIST = new DistanceFunction<List<Double>>() {
158 | 		@Override
159 | 		public double calculate(List<Double> data1, List<Double> data2) {
160 | 			class DoubleListEuclideanCoordinate implements EuclideanCoordinate {
161 | 				List<Double> list;
162 | 				public DoubleListEuclideanCoordinate(List<Double> list) { this.list = list; }
163 | 				@Override public int dimensions() { return list.size(); }
164 | 				@Override public double get(int index) { return list.get(index); }
165 | 			};
166 | 			DoubleListEuclideanCoordinate coord1 = new DoubleListEuclideanCoordinate(data1);
167 | 			DoubleListEuclideanCoordinate coord2 = new DoubleListEuclideanCoordinate(data2);
168 | 			return DistanceFunctions.euclidean(coord1, coord2);
169 | 		}
170 | 	};
171 | }
172 | 


--------------------------------------------------------------------------------
/src/main/java/mtree/PartitionFunction.java:
--------------------------------------------------------------------------------
 1 | package mtree;
 2 | 
 3 | import java.util.Set;
 4 | 
 5 | import mtree.utils.Pair;
 6 | 
 7 | /**
 8 |  * An object with partitions a set of data into two sub-sets.
 9 |  *
10 |  * @param <DATA> The type of the data on the sets.
11 |  */
12 | public interface PartitionFunction<DATA> {
13 | 	
14 | 	/**
15 | 	 * Executes the partitioning.
16 | 	 * 
17 | 	 * @param promoted The pair of data objects that will guide the partition
18 | 	 *        process.
19 | 	 * @param dataSet The original set of data objects to be partitioned.
20 | 	 * @param distanceFunction A {@linkplain DistanceFunction distance function}
21 | 	 *        to be used on the partitioning.
22 | 	 * @return A pair of partition sub-sets. Each sub-set must correspond to one
23 | 	 *         of the {@code promoted} data objects.
24 | 	 */
25 | 	Pair<Set<DATA>> process(Pair<DATA> promoted, Set<DATA> dataSet, DistanceFunction<? super DATA> distanceFunction);
26 | 	
27 | }
28 | 


--------------------------------------------------------------------------------
/src/main/java/mtree/PartitionFunctions.java:
--------------------------------------------------------------------------------
  1 | package mtree;
  2 | 
  3 | import java.util.ArrayList;
  4 | import java.util.Collections;
  5 | import java.util.Comparator;
  6 | import java.util.HashSet;
  7 | import java.util.List;
  8 | import java.util.Set;
  9 | 
 10 | import mtree.utils.Pair;
 11 | 
 12 | /**
 13 |  * Some pre-defined implementations of {@linkplain PartitionFunction partition
 14 |  * functions}.
 15 |  */
 16 | public final class PartitionFunctions {
 17 | 
 18 |     /**
 19 |      * Don't let anyone instantiate this class.
 20 |      */
 21 | 	private PartitionFunctions() {}
 22 | 	
 23 | 	
 24 | 	/**
 25 | 	 * A {@linkplain PartitionFunction partition function} that tries to
 26 | 	 * distribute the data objects equally between the promoted data objects,
 27 | 	 * associating to each promoted data objects the nearest data objects.
 28 | 	 * 
 29 | 	 * @param <DATA> The type of the data objects.
 30 | 	 */
 31 | 	public static class BalancedPartition<DATA> implements PartitionFunction<DATA> {
 32 | 		
 33 | 		/**
 34 | 		 * Processes the balanced partition.
 35 | 		 * 
 36 | 		 * <p>The algorithm is roughly equivalent to this:
 37 | 		 * <pre>
 38 | 		 *     While dataSet is not Empty:
 39 | 		 *         X := The object in dataSet which is nearest to promoted.<b>first</b>
 40 | 		 *         Remove X from dataSet
 41 | 		 *         Add X to result.<b>first</b>
 42 | 		 *         
 43 | 		 *         Y := The object in dataSet which is nearest to promoted.<b>second</b>
 44 | 		 *         Remove Y from dataSet
 45 | 		 *         Add Y to result.<b>second</b>
 46 | 		 *         
 47 | 		 *     Return result
 48 | 		 * </pre>
 49 | 		 * 
 50 | 		 * @see mtree.PartitionFunction#process(mtree.utils.Pair, java.util.Set, mtree.DistanceFunction)
 51 | 		 */
 52 | 		@Override
 53 | 		public Pair<Set<DATA>> process(
 54 | 				final Pair<DATA> promoted,
 55 | 				Set<DATA> dataSet,
 56 | 				final DistanceFunction<? super DATA> distanceFunction
 57 | 			)
 58 | 		{
 59 | 			List<DATA> queue1 = new ArrayList<DATA>(dataSet);
 60 | 			// Sort by distance to the first promoted data
 61 | 			Collections.sort(queue1, new Comparator<DATA>() {
 62 | 				@Override
 63 | 				public int compare(DATA data1, DATA data2) {
 64 | 					double distance1 = distanceFunction.calculate(data1, promoted.first);
 65 | 					double distance2 = distanceFunction.calculate(data2, promoted.first);
 66 | 					return Double.compare(distance1, distance2);
 67 | 				}
 68 | 			});
 69 | 			
 70 | 			List<DATA> queue2 = new ArrayList<DATA>(dataSet);
 71 | 			// Sort by distance to the second promoted data
 72 | 			Collections.sort(queue2, new Comparator<DATA>() {
 73 | 				@Override
 74 | 				public int compare(DATA data1, DATA data2) {
 75 | 					double distance1 = distanceFunction.calculate(data1, promoted.second);
 76 | 					double distance2 = distanceFunction.calculate(data2, promoted.second);
 77 | 					return Double.compare(distance1, distance2);
 78 | 				}
 79 | 			});
 80 | 			
 81 | 			Pair<Set<DATA>> partitions = new Pair<Set<DATA>>(new HashSet<DATA>(), new HashSet<DATA>());
 82 | 			
 83 | 			int index1 = 0;
 84 | 			int index2 = 0;
 85 | 	
 86 | 			while(index1 < queue1.size()  ||  index2 != queue2.size()) {
 87 | 				while(index1 < queue1.size()) {
 88 | 					DATA data = queue1.get(index1++);
 89 | 					if(!partitions.second.contains(data)) {
 90 | 						partitions.first.add(data);
 91 | 						break;
 92 | 					}
 93 | 				}
 94 | 	
 95 | 				while(index2 < queue2.size()) {
 96 | 					DATA data = queue2.get(index2++);
 97 | 					if(!partitions.first.contains(data)) {
 98 | 						partitions.second.add(data);
 99 | 						break;
100 | 					}
101 | 				}
102 | 			}
103 | 			
104 | 			return partitions;
105 | 		}
106 | 	}
107 | }
108 | 


--------------------------------------------------------------------------------
/src/main/java/mtree/PromotionFunction.java:
--------------------------------------------------------------------------------
 1 | package mtree;
 2 | 
 3 | import java.util.Set;
 4 | 
 5 | import mtree.utils.Pair;
 6 | 
 7 | /**
 8 |  * An object that chooses a pair from a set of data objects.
 9 |  *
10 |  * @param <DATA> The type of the data objects.
11 |  */
12 | public interface PromotionFunction<DATA> {
13 | 	
14 | 	/**
15 | 	 * Chooses (promotes) a pair of objects according to some criteria that is
16 | 	 * suitable for the application using the M-Tree.
17 | 	 * 
18 | 	 * @param dataSet The set of objects to choose a pair from.
19 | 	 * @param distanceFunction A function that can be used for choosing the 
20 | 	 *        promoted objects.
21 | 	 * @return A pair of chosen objects.
22 | 	 */
23 | 	Pair<DATA> process(Set<DATA> dataSet, DistanceFunction<? super DATA> distanceFunction);
24 | 	
25 | }
26 | 


--------------------------------------------------------------------------------
/src/main/java/mtree/PromotionFunctions.java:
--------------------------------------------------------------------------------
 1 | package mtree;
 2 | 
 3 | import java.util.List;
 4 | import java.util.Set;
 5 | 
 6 | import mtree.utils.Pair;
 7 | import mtree.utils.Utils;
 8 | 
 9 | /**
10 |  * Some pre-defined implementations of {@linkplain PromotionFunction promotion
11 |  * functions}.
12 |  */
13 | public final class PromotionFunctions {
14 | 
15 |     /**
16 |      * Don't let anyone instantiate this class.
17 |      */
18 | 	private PromotionFunctions() {}
19 | 	
20 | 	
21 | 	/**
22 | 	 * A {@linkplain PromotionFunction promotion function} object that randomly
23 | 	 * chooses ("promotes") two data objects.
24 | 	 *
25 | 	 * @param <DATA> The type of the data objects.
26 | 	 */
27 | 	public static class RandomPromotion<DATA> implements PromotionFunction<DATA> {
28 | 		@Override
29 | 		public Pair<DATA> process(Set<DATA> dataSet,
30 | 				DistanceFunction<? super DATA> distanceFunction)
31 | 		{
32 | 			List<DATA> promotedList = Utils.randomSample(dataSet, 2);
33 | 			return new Pair<DATA>(promotedList.get(0), promotedList.get(1));
34 | 		}
35 | 	}
36 | 	
37 | }
38 | 


--------------------------------------------------------------------------------
/src/main/java/mtree/SplitFunction.java:
--------------------------------------------------------------------------------
 1 | package mtree;
 2 | 
 3 | import java.util.Set;
 4 | 
 5 | import mtree.utils.Pair;
 6 | 
 7 | /**
 8 |  * Defines an object to be used to split a node in an M-Tree. A node must be
 9 |  * split when it has reached its maximum capacity and a new child node would be
10 |  * added to it.
11 |  * 
12 |  * <p>The splitting consists in choosing a pair of "promoted" data objects from
13 |  * the children and then partition the set of children in two partitions
14 |  * corresponding to the two promoted data objects.
15 |  *
16 |  * @param <DATA> The type of the data objects.
17 |  */
18 | public interface SplitFunction<DATA> {
19 | 
20 | 	/**
21 | 	 * Processes the splitting of a node.
22 | 	 * 
23 | 	 * @param dataSet A set of data that are keys to the children of the node
24 | 	 *        to be split.
25 | 	 * @param distanceFunction A {@linkplain DistanceFunction distance function}
26 | 	 *        that can be used to help splitting the node.
27 | 	 * @return A {@link SplitResult} object with a pair of promoted data objects
28 | 	 *         and a pair of corresponding partitions of the data objects.
29 | 	 */
30 | 	SplitResult<DATA> process(Set<DATA> dataSet, DistanceFunction<? super DATA> distanceFunction);
31 | 	
32 | 	
33 | 	/**
34 | 	 * An object used as the result for the
35 | 	 * {@link SplitFunction#process(Set, DistanceFunction)} method.
36 | 	 *
37 | 	 * @param <DATA> The type of the data objects.
38 | 	 */
39 | 	public static class SplitResult<DATA> {
40 | 
41 | 		/**
42 | 		 * A pair of promoted data objects.
43 | 		 */
44 | 		public Pair<DATA> promoted;
45 | 		
46 | 		/**
47 | 		 * A pair of partitions corresponding to the {@code promoted} data
48 | 		 * objects.
49 | 		 */
50 | 		public Pair<Set<DATA>> partitions;
51 | 
52 | 		/**
53 | 		 * The constructor for a {@link SplitResult} object.
54 | 		 */
55 | 		public SplitResult(Pair<DATA> promoted, Pair<Set<DATA>> partitions) {
56 | 			this.promoted = promoted;
57 | 			this.partitions = partitions;
58 | 		}
59 | 		
60 | 	}
61 | 	
62 | }
63 | 


--------------------------------------------------------------------------------
/src/main/java/mtree/utils/Pair.java:
--------------------------------------------------------------------------------
 1 | package mtree.utils;
 2 | 
 3 | /**
 4 |  * A pair of objects of the same type.
 5 |  *
 6 |  * @param <T> The type of the objects.
 7 |  */
 8 | public class Pair<T> {
 9 | 	
10 | 	/**
11 | 	 * The first object.
12 | 	 */
13 | 	public T first;
14 | 	
15 | 	
16 | 	/**
17 | 	 * The second object.
18 | 	 */
19 | 	public T second;
20 | 	
21 | 	/**
22 | 	 * Creates a pair of {@code null} objects.
23 | 	 */
24 | 	public Pair() {}
25 | 	
26 | 	/**
27 | 	 * Creates a pair with the objects specified in the arguments.
28 | 	 * @param first  The first object.
29 | 	 * @param second The second object.
30 | 	 */
31 | 	public Pair(T first, T second) {
32 | 		this.first = first;
33 | 		this.second = second;
34 | 	}
35 | 
36 | 	/**
37 | 	 * Accesses an object by its index. The {@link #first} object has index
38 | 	 * {@code 0} and the {@link #second} object has index {@code 1}.
39 | 	 * @param index The index of the object to be accessed.
40 | 	 * @return The {@link #first} object if {@code index} is {@code 0}; the
41 | 	 *         {@link #second} object if {@code index} is {@code 1}.
42 | 	 * @throws IllegalArgumentException If {@code index} is neither {@code 0}
43 | 	 *         or {@code 1}.
44 | 	 */
45 | 	public T get(int index) throws IllegalArgumentException {
46 | 		switch(index) {
47 | 		case 0: return first;
48 | 		case 1: return second;
49 | 		default: throw new IllegalArgumentException();
50 | 		}
51 | 	}
52 | 
53 | }
54 | 


--------------------------------------------------------------------------------
/src/main/java/mtree/utils/Utils.java:
--------------------------------------------------------------------------------
 1 | package mtree.utils;
 2 | 
 3 | import java.util.ArrayList;
 4 | import java.util.Collection;
 5 | import java.util.Iterator;
 6 | import java.util.List;
 7 | import java.util.Random;
 8 | 
 9 | /**
10 |  * Some utilities.
11 |  */
12 | public final class Utils {
13 | 
14 |     /**
15 |      * Don't let anyone instantiate this class.
16 |      */
17 | 	private Utils() {}
18 | 
19 | 	
20 | 	/**
21 | 	 * Identifies the minimum and maximum elements from an iterable, according
22 | 	 * to the natural ordering of the elements.
23 | 	 * @param items An {@link Iterable} object with the elements
24 | 	 * @param <T> The type of the elements.
25 | 	 * @return A pair with the minimum and maximum elements.
26 | 	 */
27 | 	public static <T extends Comparable<T>> Pair<T> minMax(Iterable<T> items) {
28 | 		Iterator<T> iterator = items.iterator();
29 | 		if(!iterator.hasNext()) {
30 | 			return null;
31 | 		}
32 | 		
33 | 		T min = iterator.next();
34 | 		T max = min;
35 | 		
36 | 		while(iterator.hasNext()) {
37 | 			T item = iterator.next();
38 | 			if(item.compareTo(min) < 0) {
39 | 				min = item;
40 | 			}
41 | 			if(item.compareTo(max) > 0) {
42 | 				max = item;
43 | 			}
44 | 		}
45 | 		
46 | 		return new Pair<T>(min, max);
47 | 	}
48 | 	
49 | 	
50 | 	/**
51 | 	 * Randomly chooses elements from the collection.
52 | 	 * @param collection The collection.
53 | 	 * @param n The number of elements to choose.
54 | 	 * @param <T> The type of the elements.
55 | 	 * @return A list with the chosen elements.
56 | 	 */
57 | 	public static <T> List<T> randomSample(Collection<T> collection, int n) {
58 | 		List<T> list = new ArrayList<T>(collection);
59 | 		List<T> sample = new ArrayList<T>(n);
60 | 		Random random = new Random();
61 | 		while(n > 0  &&  !list.isEmpty()) {
62 | 			int index = random.nextInt(list.size());
63 | 			sample.add(list.get(index));
64 | 			int indexLast = list.size() - 1;
65 | 			T last = list.remove(indexLast);
66 | 			if(index < indexLast) {
67 | 				list.set(index, last);
68 | 			}
69 | 			n--;
70 | 		}
71 | 		return sample;
72 | 	}
73 | 	
74 | }
75 | 


--------------------------------------------------------------------------------
/src/main/scala/edu/utah/cs/generator/RandomTrajGenerator.scala:
--------------------------------------------------------------------------------
  1 | package edu.utah.cs.generator
  2 | 
  3 | import edu.utah.cs.spatial.Point
  4 | import org.apache.spark.{SparkConf, SparkContext}
  5 | 
  6 | import scala.collection.mutable
  7 | import scala.util.Random
  8 | 
  9 | /**
 10 |   * Created by dongx on 1/18/17.
 11 |   * This generator should work in such manner:
 12 |   * - Generate a uniform random point within the starting scope defined by (<low_x>, <low_y>) -- (<high_x>, <high_y>)
 13 |   * - Generate the number of steps <num_step> following a normal distribution with parameters (<step_avg> and <step_dev>)
 14 |   * - Iterate <num_step> times:
 15 |   *   + generate two random numbers (<step_x>, <step_y>) with normal distribution defined by <range_dev> (mean is 0)
 16 |   *   + move the object to (<cur_x> - <step_x>, (<cur_y> - <step_y>))
 17 |   */
 18 | object RandomTrajGenerator {
 19 |   def rnd(low: Double, high: Double): Double = Random.nextDouble() * (high - low) + low
 20 |   def flip_coin(p: Double): Int = if (Random.nextDouble() > 0.9) 1 else -1
 21 |   def gaussianRnd(mean: Double, scale: Double) = Random.nextGaussian() * scale + mean
 22 |   def gaussianRnd(scale: Double) = Random.nextGaussian() * scale
 23 | 
 24 |   def main(args: Array[String]): Unit = {
 25 | //    if (args.length != 9) {
 26 | //      println("Usage: RandomTrajGenerator <n> <low_x> <low_y> <high_x> <high_y> <steps_avg> <steps_dev> <basic_step> <range_dev> <output_file_path>")
 27 | //      System.exit(1)
 28 | //    }
 29 | 
 30 |     //val sparkConf = new SparkConf().setAppName("TrajSampling")
 31 |     //val sc = new SparkContext(sparkConf)
 32 | 
 33 | //    Thread.sleep(3000)
 34 | 
 35 | //    val n = args(0).toInt
 36 | //    val low_x = args(1).toDouble
 37 | //    val low_y = args(2).toDouble
 38 | //    val high_x = args(3).toDouble
 39 | //    val high_y = args(4).toDouble
 40 | //    val steps_avg = args(5).toInt
 41 | //    val steps_dev = args(6).toDouble
 42 | //    val range_dev = args(7).toDouble
 43 | //    val output_file_path = args(8)
 44 | //    val n = args(0).toInt
 45 |     val low_x = 0.0
 46 |     val low_y = 0.0
 47 |     val high_x = 100.0
 48 |     val high_y = 100.0
 49 |     val steps_avg = 20
 50 |     val steps_dev = 10
 51 |     val range_dev = 8.0
 52 | //    val output_file_path = args(8)
 53 | 
 54 | //    sc.parallelize(0 until n, sc.defaultParallelism)
 55 | //      .flatMap(x => {
 56 | //        val ans = mutable.ListBuffer[String]()
 57 | //        val last_x = rnd(low_x, high_x)
 58 | //        val last_y = rnd(low_y, high_y)
 59 | //        val steps = gaussianRnd(steps_avg, steps_dev).toInt
 60 | //        for (i <- 0 until steps) {
 61 | //          val cur_x = last_x + gaussianRnd(range_dev)
 62 | //          val cur_y = last_y + gaussianRnd(range_dev)
 63 | //          ans += s"$x\t$last_x\t$last_y\t$cur_x\t$cur_y\t$i"
 64 | //        }
 65 | //        ans.iterator
 66 | //      }).saveAsTextFile(output_file_path)
 67 | 
 68 |     val res = mutable.ListBuffer[Point]()
 69 |     var x1 = rnd(low_x, high_x)
 70 |     var y1 = rnd(low_y, high_y)
 71 |     val basic_step_x = gaussianRnd(5.0)
 72 |     val basic_step_y = gaussianRnd(5.0)
 73 |     var x2 = x1 + gaussianRnd(basic_step_x, basic_step_x * 0.5)
 74 |     var y2 = y1 + gaussianRnd(basic_step_y, basic_step_y * 0.5)
 75 |     res += Point(Array(x1, y1))
 76 |     res += Point(Array(x2, y2))
 77 |     val steps = gaussianRnd(steps_avg, steps_dev).toInt
 78 |     assert(steps > 10)
 79 |     for (i <- 0 until steps) {
 80 |       val cur_x = ((x1 + x2) / 2.0) + flip_coin(0.95) * gaussianRnd(basic_step_x, basic_step_x * 0.3)
 81 |       val cur_y = ((y1 + y2) / 2.0) + flip_coin(0.95) * gaussianRnd(basic_step_y, basic_step_y * 0.3)
 82 |       res += Point(Array(cur_x, cur_y))
 83 |       x1 = x2
 84 |       y1 = y2
 85 |       x2 = cur_x
 86 |       y2 = cur_y
 87 |     }
 88 | 
 89 |     println("X = [")
 90 |     res.foreach(x => println(s"${x.coord(0)},"))
 91 |     println("]")
 92 |     println()
 93 |     println("Y = [")
 94 |     res.foreach(x => println(s"${x.coord(1)},"))
 95 |     println("]")
 96 | 
 97 |     //sc.stop()
 98 |   }
 99 | 
100 | }
101 | 


--------------------------------------------------------------------------------
/src/main/scala/edu/utah/cs/index/Index.scala:
--------------------------------------------------------------------------------
1 | package edu.utah.cs.index
2 | 
3 | /**
4 |   * Created by dongx on 8/30/16.
5 |   * Abstract trait for Index
6 |   */
7 | trait Index
8 | 


--------------------------------------------------------------------------------
/src/main/scala/edu/utah/cs/index/RTree.scala:
--------------------------------------------------------------------------------
  1 | package edu.utah.cs.index
  2 | 
  3 | import edu.utah.cs.spatial.{LineSegment, MBR, Point, Shape}
  4 | import edu.utah.cs.util._
  5 | 
  6 | import scala.collection.mutable
  7 | import scala.util.control.Breaks
  8 | 
  9 | /**
 10 |   * Created by dong on 1/15/16.
 11 |   * Static Multi-Dimensional R-Tree Index for LineSegments
 12 |   */
 13 | abstract class RTreeEntry {
 14 |   def minDist(x: Shape): Double
 15 | 
 16 |   def intersects(x: Shape): Boolean
 17 | }
 18 | 
 19 | case class RTreeLeafEntry(shape: Shape, m_data: Int, size: Int, traj_id: Int) extends RTreeEntry {
 20 |   override def minDist(x: Shape): Double = shape.minDist(x)
 21 |   override def intersects(x: Shape): Boolean = x.intersects(shape)
 22 | }
 23 | 
 24 | case class RTreeInternalEntry(mbr: MBR, node: RTreeNode) extends RTreeEntry {
 25 |   override def minDist(x: Shape): Double = mbr.minDist(x)
 26 |   override def intersects(x: Shape): Boolean = x.intersects(mbr)
 27 | }
 28 | 
 29 | case class RTreeNode(m_mbr: MBR, m_child: Array[RTreeEntry], isLeaf: Boolean) {
 30 |   def this(m_mbr: MBR, children: Array[(MBR, RTreeNode)]) = {
 31 |     this(m_mbr, children.map(x => RTreeInternalEntry(x._1, x._2)), false)
 32 |   }
 33 | 
 34 |   // XX Interesting Trick! Overriding same function
 35 |   def this(m_mbr: MBR, children: => Array[(Shape, Int, Int)]) = {
 36 |     this(m_mbr, children.map(x => RTreeLeafEntry(x._1, x._2, 1, x._3)), true)
 37 |   }
 38 | 
 39 |   def this(m_mbr: MBR, children: Array[(MBR, Int, Int)]) = {
 40 |     this(m_mbr, children.map(x => RTreeLeafEntry(x._1, x._2, x._3, 1)), true)
 41 |   }
 42 | 
 43 |   val size: Long = {
 44 |     if (isLeaf) m_child.map(x => x.asInstanceOf[RTreeLeafEntry].size).sum
 45 |     else m_child.map(x => x.asInstanceOf[RTreeInternalEntry].node.size).sum
 46 |   }
 47 | }
 48 | 
 49 | class NNOrdering() extends Ordering[(_, Double)] {
 50 |   def compare(a: (_, Double), b: (_, Double)): Int = -a._2.compare(b._2)
 51 | }
 52 | 
 53 | case class RTree(root: RTreeNode) extends Index with Serializable {
 54 |   def range(query: MBR): Array[(Shape, Int)] = {
 55 |     val ans = mutable.ArrayBuffer[(Shape, Int)]()
 56 |     val st = new mutable.Stack[RTreeNode]()
 57 |     if (root.m_mbr.intersects(query) && root.m_child.nonEmpty) st.push(root)
 58 |     while (st.nonEmpty) {
 59 |       val now = st.pop()
 60 |       if (!now.isLeaf) {
 61 |         now.m_child.foreach {
 62 |           case RTreeInternalEntry(mbr, node) =>
 63 |             if (query.intersects(mbr)) st.push(node)
 64 |         }
 65 |       } else {
 66 |         now.m_child.foreach {
 67 |           case RTreeLeafEntry(shape, m_data, _, _) =>
 68 |             if (query.intersects(shape)) ans += ((shape, m_data))
 69 |         }
 70 |       }
 71 |     }
 72 |     ans.toArray
 73 |   }
 74 | 
 75 |   def range(query: MBR, level_limit: Int, s_threshold: Double): Option[Array[(Shape, Int)]] = {
 76 |     val ans = mutable.ArrayBuffer[(Shape, Int)]()
 77 |     val q = new mutable.Queue[(RTreeNode, Int)]()
 78 |     if (root.m_mbr.intersects(query) && root.m_child.nonEmpty) q.enqueue((root, 1))
 79 |     var estimate: Double = 0
 80 |     val loop = new Breaks
 81 |     import loop.{break, breakable}
 82 |     breakable {
 83 |       while (q.nonEmpty) {
 84 |         val now = q.dequeue
 85 |         val cur_node = now._1
 86 |         val cur_level = now._2
 87 |         if (cur_node.isLeaf) {
 88 |           cur_node.m_child.foreach {
 89 |             case RTreeLeafEntry(shape, m_data, _, _) =>
 90 |               if (query.intersects(shape)) ans += ((shape, m_data))
 91 |           }
 92 |         } else if (cur_level < level_limit) {
 93 |           cur_node.m_child.foreach {
 94 |             case RTreeInternalEntry(mbr, node) =>
 95 |               if (query.intersects(mbr)) q.enqueue((node, cur_level + 1))
 96 |           }
 97 |         } else if (cur_level == level_limit) {
 98 |           estimate += cur_node.m_mbr.calcRatio(query) * cur_node.size
 99 |           cur_node.m_child.foreach {
100 |             case RTreeInternalEntry(mbr, node) =>
101 |               if (query.intersects(mbr)) q.enqueue((node, cur_level + 1))
102 |           }
103 |         } else break
104 |       }
105 |     }
106 |     if (ans.nonEmpty) return Some(ans.toArray)
107 |     else if (estimate / root.size > s_threshold) return None
108 |     while (q.nonEmpty) {
109 |       val now = q.dequeue
110 |       val cur_node = now._1
111 |       val cur_level = now._2
112 |       if (cur_node.isLeaf) {
113 |         cur_node.m_child.foreach {
114 |           case RTreeLeafEntry(shape, m_data, _, _) =>
115 |             if (query.intersects(shape)) ans += ((shape, m_data))
116 |         }
117 |       } else {
118 |         cur_node.m_child.foreach {
119 |           case RTreeInternalEntry(mbr, node) =>
120 |             if (query.intersects(mbr)) q.enqueue((node, cur_level + 1))
121 |         }
122 |       }
123 |     }
124 |     Some(ans.toArray)
125 |   }
126 | 
127 |   def circleRange(origin: Shape, r: Double): Array[(Shape, Int)] = {
128 |     val ans = mutable.ArrayBuffer[(Shape, Int)]()
129 |     val st = new mutable.Stack[RTreeNode]()
130 |     if (root.m_mbr.minDist(origin) <= r && root.m_child.nonEmpty) st.push(root)
131 |     while (st.nonEmpty) {
132 |       val now = st.pop()
133 |       if (!now.isLeaf) {
134 |         now.m_child.foreach{
135 |           case RTreeInternalEntry(mbr, node) =>
136 |             if (origin.minDist(mbr) <= r) st.push(node)
137 |         }
138 |       } else {
139 |         now.m_child.foreach {
140 |           case RTreeLeafEntry(shape, m_data, _, _) =>
141 |             if (origin.minDist(shape) <= r) ans += ((shape, m_data))
142 |         }
143 |       }
144 |     }
145 |     ans.toArray
146 |   }
147 | 
148 |   def antiCircleRange(query: Array[LineSegment], r: Double): Array[(Shape, Int)] = {
149 |     def check(now: Shape) : Boolean = {
150 |       for (i <- query.indices)
151 |         if (now.minDist(query(i)) <= r) return false
152 |       true
153 |     }
154 | 
155 |     def checkCovered(now: MBR): Boolean = {
156 |       for (i <- query.indices)
157 |         if (query(i).cover(now, r)) return true
158 |       false
159 |     }
160 | 
161 |     var ans = mutable.ListBuffer[(Shape, Int)]()
162 |     val st = new mutable.Stack[RTreeNode]()
163 |     if (!checkCovered(root.m_mbr)) st.push(root)
164 |     while (st.nonEmpty) {
165 |       val now = st.pop()
166 |       if (!now.isLeaf) {
167 |         now.m_child.foreach {
168 |           case RTreeInternalEntry(_, node) =>
169 |             if (!checkCovered(root.m_mbr)) st.push(node)
170 |         }
171 |       } else {
172 |         now.m_child.foreach {
173 |           case RTreeLeafEntry(shape, m_data, _, _) =>
174 |             if (check(shape)) ans += ((shape, m_data))
175 |         }
176 |       }
177 |     }
178 |     ans.toArray
179 |   }
180 | 
181 |   def circleRange(origin: LineSegment, r: Double, distFunc: (LineSegment, MBR) => Double): Array[(Shape, Int)] = {
182 |     val ans = mutable.ArrayBuffer[(Shape, Int)]()
183 |     val st = new mutable.Stack[RTreeNode]()
184 |     if (distFunc(origin, root.m_mbr) <= r && root.m_child.nonEmpty) st.push(root)
185 |     while (st.nonEmpty) {
186 |       val now = st.pop()
187 |       if (!now.isLeaf) {
188 |         now.m_child.foreach{
189 |           case RTreeInternalEntry(mbr, node) =>
190 |             if (distFunc(origin, mbr) <= r) st.push(node)
191 |         }
192 |       } else {
193 |         now.m_child.foreach {
194 |           case RTreeLeafEntry(shape, m_data, _, _) =>
195 |             if (distFunc(origin, shape.asInstanceOf[MBR]) <= r) ans += ((shape, m_data))
196 |         }
197 |       }
198 |     }
199 |     ans.toArray
200 |   }
201 | 
202 |   def circleRange(query: Array[LineSegment], r: Double): Array[(Shape, Int)] = {
203 |     val ans = mutable.ArrayBuffer[(Shape, Int)]()
204 |     val st = new mutable.Stack[RTreeNode]()
205 | 
206 |     def check(now: Shape) : Boolean = {
207 |       for (i <- query.indices)
208 |         if (now.minDist(query(i)) <= r) return true
209 |       false
210 |     }
211 | 
212 |     if (check(root.m_mbr) && root.m_child.nonEmpty) st.push(root)
213 |     while (st.nonEmpty) {
214 |       val now = st.pop()
215 |       if (!now.isLeaf) now.m_child.foreach {
216 |         case RTreeInternalEntry(mbr, node) =>
217 |           if (check(mbr)) st.push(node)
218 |       } else {
219 |         now.m_child.foreach {
220 |           case RTreeLeafEntry(shape, m_data, _, _) =>
221 |             if (check(shape)) ans += ((shape, m_data))
222 |         }
223 |       }
224 |     }
225 |     ans.toArray
226 |   }
227 | 
228 |   def circleRange(query: Array[MBR], r: Double): Array[(Shape, Int)] = {
229 |     val ans = mutable.ArrayBuffer[(Shape, Int)]()
230 |     val st = new mutable.Stack[RTreeNode]()
231 | 
232 |     def check(now: Shape) : Boolean = {
233 |       for (i <- query.indices)
234 |         if (now.minDist(query(i)) <= r) return true
235 |       false
236 |     }
237 | 
238 |     if (check(root.m_mbr) && root.m_child.nonEmpty) st.push(root)
239 |     while (st.nonEmpty) {
240 |       val now = st.pop()
241 |       if (!now.isLeaf) now.m_child.foreach {
242 |         case RTreeInternalEntry(mbr, node) =>
243 |           if (check(mbr)) st.push(node)
244 |       } else {
245 |         now.m_child.foreach {
246 |           case RTreeLeafEntry(shape, m_data, _, _) =>
247 |             if (check(shape)) ans += ((shape, m_data))
248 |         }
249 |       }
250 |     }
251 |     ans.toArray
252 |   }
253 | 
254 | 
255 |   def circleRangeConj(queries: Array[(Point, Double)]): Array[(Shape, Int)] = {
256 |     val ans = mutable.ArrayBuffer[(Shape, Int)]()
257 |     val st = new mutable.Stack[RTreeNode]()
258 | 
259 |     def check(now: Shape) : Boolean = {
260 |       for (i <- queries.indices)
261 |         if (now.minDist(queries(i)._1) > queries(i)._2) return false
262 |       true
263 |     }
264 | 
265 |     if (check(root.m_mbr) && root.m_child.nonEmpty) st.push(root)
266 |     while (st.nonEmpty) {
267 |       val now = st.pop()
268 |       if (!now.isLeaf) now.m_child.foreach {
269 |         case RTreeInternalEntry(mbr, node) =>
270 |           if (check(mbr)) st.push(node)
271 |       } else {
272 |         now.m_child.foreach {
273 |           case RTreeLeafEntry(shape, m_data, _, _) =>
274 |             if (check(shape)) ans += ((shape, m_data))
275 |         }
276 |       }
277 |     }
278 |     ans.toArray
279 |   }
280 | 
281 |   def kNN(query: Point, k: Int, keepSame: Boolean = false): Array[(Shape, Int)] = {
282 |     val ans = mutable.ArrayBuffer[(Shape, Int)]()
283 |     val pq = new mutable.PriorityQueue[(_, Double)]()(new NNOrdering())
284 |     var cnt = 0
285 |     var kNN_dis = 0.0
286 |     pq.enqueue((root, 0.0))
287 | 
288 |     val loop = new Breaks
289 |     import loop.{break, breakable}
290 |     breakable {
291 |       while (pq.nonEmpty) {
292 |         val now = pq.dequeue()
293 |         if (cnt >= k && (!keepSame || now._2 > kNN_dis)) break()
294 | 
295 |         now._1 match {
296 |           case RTreeNode(_, m_child, isLeaf) =>
297 |             m_child.foreach(entry =>
298 |               if (isLeaf) pq.enqueue((entry, entry.minDist(query)))
299 |               else pq.enqueue((entry.asInstanceOf[RTreeInternalEntry].node, entry.minDist(query)))
300 |             )
301 |           case RTreeLeafEntry(p, m_data, size, _) =>
302 |             cnt += size
303 |             kNN_dis = now._2
304 |             ans += ((p, m_data))
305 |         }
306 |       }
307 |     }
308 | 
309 |     ans.toArray
310 |   }
311 | 
312 |   def kNN(query: Point, distFunc: (Point, MBR) => Double,
313 |           k: Int, keepSame: Boolean): Array[(Shape, Int)] = {
314 |     val ans = mutable.ArrayBuffer[(Shape, Int)]()
315 |     val pq = new mutable.PriorityQueue[(_, Double)]()(new NNOrdering())
316 |     var cnt = 0
317 |     var kNN_dis = 0.0
318 |     pq.enqueue((root, 0.0))
319 | 
320 |     val loop = new Breaks
321 |     import loop.{break, breakable}
322 |     breakable {
323 |       while (pq.nonEmpty) {
324 |         val now = pq.dequeue()
325 |         if (cnt >= k && (!keepSame || now._2 > kNN_dis)) break()
326 | 
327 |         now._1 match {
328 |           case RTreeNode(_, m_child, _) =>
329 |             m_child.foreach {
330 |               case RTreeInternalEntry(mbr, node) =>
331 |                 pq.enqueue((node, distFunc(query, mbr)))
332 |               case entry @ RTreeLeafEntry(mbr, _, _, _) =>
333 |                 require(mbr.isInstanceOf[MBR])
334 |                 pq.enqueue((entry, distFunc(query, mbr.asInstanceOf[MBR])))
335 |             }
336 |           case RTreeLeafEntry(mbr, m_data, size, _) =>
337 |             cnt += size
338 |             kNN_dis = now._2
339 |             ans += ((mbr, m_data))
340 |         }
341 |       }
342 |     }
343 | 
344 |     ans.toArray
345 |   }
346 | 
347 |   def kNN(query: MBR, distFunc: (MBR, MBR) => Double,
348 |           k: Int, keepSame: Boolean): Array[(Shape, Int)] = {
349 |     val ans = mutable.ArrayBuffer[(Shape, Int)]()
350 |     val pq = new mutable.PriorityQueue[(_, Double)]()(new NNOrdering())
351 |     var cnt = 0
352 |     var kNN_dis = 0.0
353 |     pq.enqueue((root, 0.0))
354 | 
355 |     val loop = new Breaks
356 |     import loop.{break, breakable}
357 |     breakable {
358 |       while (pq.nonEmpty) {
359 |         val now = pq.dequeue()
360 |         if (cnt >= k && (!keepSame || now._2 > kNN_dis)) break()
361 | 
362 |         now._1 match {
363 |           case RTreeNode(_, m_child, _) =>
364 |             m_child.foreach {
365 |               case RTreeInternalEntry(mbr, node) =>
366 |                 pq.enqueue((node, distFunc(query, mbr)))
367 |               case entry @ RTreeLeafEntry(mbr, _, _, _) =>
368 |                 require(mbr.isInstanceOf[MBR])
369 |                 pq.enqueue((entry, distFunc(query, mbr.asInstanceOf[MBR])))
370 |             }
371 |           case RTreeLeafEntry(mbr, m_data, size, _) =>
372 |             cnt += size
373 |             kNN_dis = now._2
374 |             ans += ((mbr, m_data))
375 |         }
376 |       }
377 |     }
378 |     ans.toArray
379 |   }
380 | }
381 | 
382 | object RTree {
383 |   def apply(entries: Array[(MBR, Int, Int)], max_entries_per_node: Int): RTree = {
384 |     val dimension = entries(0)._1.low.coord.length
385 |     val entries_len = entries.length.toDouble
386 |     val dim = new Array[Int](dimension)
387 |     var remaining = entries_len / max_entries_per_node
388 |     for (i <- 0 until dimension) {
389 |       dim(i) = Math.ceil(Math.pow(remaining, 1.0/(dimension - i))).toInt
390 |       remaining /= dim(i)
391 |     }
392 | 
393 |     def recursiveGroupSegments(entries: Array[(MBR, Int, Int)],
394 |                                cur_dim: Int, until_dim: Int): Array[Array[(MBR, Int, Int)]] = {
395 |       val len = entries.length.toDouble
396 |       val grouped = entries.sortWith(_._1.centroid.coord(cur_dim) < _._1.centroid.coord(cur_dim))
397 |         .grouped(Math.ceil(len / dim(cur_dim)).toInt).toArray
398 |       if (cur_dim < until_dim)
399 |         grouped.flatMap(now => recursiveGroupSegments(now, cur_dim + 1, until_dim))
400 |       else grouped
401 |     }
402 | 
403 |     val grouped = recursiveGroupSegments(entries, 0, dimension - 1)
404 |     val rtree_nodes = mutable.ArrayBuffer[(MBR, RTreeNode)]()
405 |     grouped.foreach(list => {
406 |       val min = new Array[Double](dimension).map(_ => Double.MaxValue)
407 |       val max = new Array[Double](dimension).map(_ => Double.MinValue)
408 |       list.foreach(now => {
409 |         for (i <- 0 until dimension) min(i) = Math.min(min(i), now._1.low.coord(i))
410 |         for (i <- 0 until dimension) max(i) = Math.max(max(i), now._1.high.coord(i))
411 |       })
412 |       val mbr = MBR(Point(min), Point(max))
413 |       rtree_nodes += ((mbr, new RTreeNode(mbr, list.map(x => (x._1.asInstanceOf[Shape], x._2, x._3)))))
414 |     })
415 | 
416 |     var cur_rtree_nodes = rtree_nodes.toArray
417 |     var cur_len = cur_rtree_nodes.length.toDouble
418 |     remaining = cur_len / max_entries_per_node
419 |     for (i <- 0 until dimension) {
420 |       dim(i) = Math.ceil(Math.pow(remaining, 1.0 / (dimension - i))).toInt
421 |       remaining /= dim(i)
422 |     }
423 | 
424 |     def over(dim: Array[Int]): Boolean = {
425 |       for (i <- dim.indices)
426 |         if (dim(i) != 1) return false
427 |       true
428 |     }
429 | 
430 |     def comp(dim: Int)(left: (MBR, RTreeNode), right: (MBR, RTreeNode)): Boolean = {
431 |       val left_center = left._1.low.coord(dim) + left._1.high.coord(dim)
432 |       val right_center = right._1.low.coord(dim) + right._1.high.coord(dim)
433 |       left_center < right_center
434 |     }
435 | 
436 |     def recursiveGroupRTreeNode(entries: Array[(MBR, RTreeNode)], cur_dim: Int, until_dim: Int)
437 |     : Array[Array[(MBR, RTreeNode)]] = {
438 |       val len = entries.length.toDouble
439 |       val grouped = entries.sortWith(comp(cur_dim))
440 |         .grouped(Math.ceil(len / dim(cur_dim)).toInt).toArray
441 |       if (cur_dim < until_dim)
442 |         grouped.flatMap(now => recursiveGroupRTreeNode(now, cur_dim + 1, until_dim))
443 |       else grouped
444 |     }
445 | 
446 |     while (!over(dim)) {
447 |       val grouped = recursiveGroupRTreeNode(cur_rtree_nodes, 0, dimension - 1)
448 |       var tmp_nodes = mutable.ArrayBuffer[(MBR, RTreeNode)]()
449 |       grouped.foreach(list => {
450 |         val min = new Array[Double](dimension).map(_ => Double.MaxValue)
451 |         val max = new Array[Double](dimension).map(_ => Double.MinValue)
452 |         list.foreach(now => {
453 |           for (i <- 0 until dimension) min(i) = Math.min(min(i), now._1.low.coord(i))
454 |           for (i <- 0 until dimension) max(i) = Math.max(max(i), now._1.high.coord(i))
455 |         })
456 |         val mbr = MBR(Point(min), Point(max))
457 |         tmp_nodes += ((mbr, new RTreeNode(mbr, list)))
458 |       })
459 |       cur_rtree_nodes = tmp_nodes.toArray
460 |       cur_len = cur_rtree_nodes.length.toDouble
461 |       remaining = cur_len / max_entries_per_node
462 |       for (i <- 0 until dimension) {
463 |         dim(i) = Math.ceil(Math.pow(remaining, 1.0 / (dimension - i))).toInt
464 |         remaining /= dim(i)
465 |       }
466 |     }
467 | 
468 |     val min = new Array[Double](dimension).map(_ => Double.MaxValue)
469 |     val max = new Array[Double](dimension).map(_ => Double.MinValue)
470 |     cur_rtree_nodes.foreach(now => {
471 |       for (i <- 0 until dimension) min(i) = Math.min(min(i), now._1.low.coord(i))
472 |       for (i <- 0 until dimension) max(i) = Math.max(max(i), now._1.high.coord(i))
473 |     })
474 | 
475 |     val mbr = MBR(Point(min), Point(max))
476 |     val root = new RTreeNode(mbr, cur_rtree_nodes)
477 |     new RTree(root)
478 |   }
479 | 
480 |   def applyMBR(entries: Array[(MBR, Int, Int)], max_entries_per_node: Int): RTree = {
481 |     val dimension = entries(0)._1.low.coord.length
482 |     val entries_len = entries.length.toDouble
483 |     val dim = new Array[Int](dimension)
484 |     var remaining = entries_len / max_entries_per_node
485 |     for (i <- 0 until dimension) {
486 |       dim(i) = Math.ceil(Math.pow(remaining, 1.0 / (dimension - i))).toInt
487 |       remaining /= dim(i)
488 |     }
489 | 
490 |     def compMBR(dim: Int)(left: (MBR, Int, Int), right: (MBR, Int, Int)): Boolean = {
491 |       val left_center = left._1.low.coord(dim) + left._1.high.coord(dim)
492 |       val right_center = right._1.low.coord(dim) + right._1.high.coord(dim)
493 |       left_center < right_center
494 |     }
495 | 
496 |     def recursiveGroupMBR(entries: Array[(MBR, Int, Int)], cur_dim: Int, until_dim: Int)
497 |     : Array[Array[(MBR, Int, Int)]] = {
498 |       val len = entries.length.toDouble
499 |       val grouped = entries.sortWith(compMBR(cur_dim))
500 |         .grouped(Math.ceil(len / dim(cur_dim)).toInt).toArray
501 |       if (cur_dim < until_dim)
502 |         grouped.flatMap(now => recursiveGroupMBR(now, cur_dim + 1, until_dim))
503 |       else grouped
504 |     }
505 | 
506 |     val grouped = recursiveGroupMBR(entries, 0, dimension - 1)
507 |     val rtree_nodes = mutable.ArrayBuffer[(MBR, RTreeNode)]()
508 |     grouped.foreach(list => {
509 |       val min = new Array[Double](dimension).map(_ => Double.MaxValue)
510 |       val max = new Array[Double](dimension).map(_ => Double.MinValue)
511 |       list.foreach(now => {
512 |         for (i <- 0 until dimension) min(i) = Math.min(min(i), now._1.low.coord(i))
513 |         for (i <- 0 until dimension) max(i) = Math.max(max(i), now._1.high.coord(i))
514 |       })
515 |       val mbr = MBR(Point(min), Point(max))
516 |       rtree_nodes += ((mbr, new RTreeNode(mbr, list)))
517 |     })
518 | 
519 |     var cur_rtree_nodes = rtree_nodes.toArray
520 |     var cur_len = cur_rtree_nodes.length.toDouble
521 |     remaining = cur_len / max_entries_per_node
522 |     for (i <- 0 until dimension) {
523 |       dim(i) = Math.ceil(Math.pow(remaining, 1.0 / (dimension - i))).toInt
524 |       remaining /= dim(i)
525 |     }
526 | 
527 |     def over(dim : Array[Int]) : Boolean = {
528 |       for (i <- dim.indices)
529 |         if (dim(i) != 1) return false
530 |       true
531 |     }
532 | 
533 |     def comp(dim: Int)(left : (MBR, RTreeNode), right : (MBR, RTreeNode)) : Boolean = {
534 |       val left_center = left._1.low.coord(dim) + left._1.high.coord(dim)
535 |       val right_center = right._1.low.coord(dim) + right._1.high.coord(dim)
536 |       left_center < right_center
537 |     }
538 | 
539 |     def recursiveGroupRTreeNode(entries: Array[(MBR, RTreeNode)],
540 |                                 cur_dim : Int, until_dim : Int) : Array[Array[(MBR, RTreeNode)]] = {
541 |       val len = entries.length.toDouble
542 |       val grouped = entries.sortWith(comp(cur_dim))
543 |         .grouped(Math.ceil(len / dim(cur_dim)).toInt).toArray
544 |       if (cur_dim < until_dim)
545 |         grouped.flatMap(now => {
546 |           recursiveGroupRTreeNode(now, cur_dim + 1, until_dim)
547 |         })
548 |       else grouped
549 |     }
550 | 
551 |     while (!over(dim)) {
552 |       val grouped = recursiveGroupRTreeNode(cur_rtree_nodes, 0, dimension - 1)
553 |       var tmp_nodes = mutable.ArrayBuffer[(MBR, RTreeNode)]()
554 |       grouped.foreach(list => {
555 |         val min = new Array[Double](dimension).map(_ => Double.MaxValue)
556 |         val max = new Array[Double](dimension).map(_ => Double.MinValue)
557 |         list.foreach(now => {
558 |           for (i <- 0 until dimension) min(i) = Math.min(min(i), now._1.low.coord(i))
559 |           for (i <- 0 until dimension) max(i) = Math.max(max(i), now._1.high.coord(i))
560 |         })
561 |         val mbr = MBR(Point(min), Point(max))
562 |         tmp_nodes += ((mbr, new RTreeNode(mbr, list)))
563 |       })
564 |       cur_rtree_nodes = tmp_nodes.toArray
565 |       cur_len = cur_rtree_nodes.length.toDouble
566 |       remaining = cur_len / max_entries_per_node
567 |       for (i <- 0 until dimension) {
568 |         dim(i) = Math.ceil(Math.pow(remaining, 1.0 / (dimension - i))).toInt
569 |         remaining /= dim(i)
570 |       }
571 |     }
572 | 
573 |     val min = new Array[Double](dimension).map(_ => Double.MaxValue)
574 |     val max = new Array[Double](dimension).map(_ => Double.MinValue)
575 |     cur_rtree_nodes.foreach(now => {
576 |       for (i <- 0 until dimension) min(i) = Math.min(min(i), now._1.low.coord(i))
577 |       for (i <- 0 until dimension) max(i) = Math.max(max(i), now._1.high.coord(i))
578 |     })
579 | 
580 |     val mbr = MBR(Point(min), Point(max))
581 |     val root = new RTreeNode(mbr, cur_rtree_nodes)
582 |     new RTree(root)
583 |   }
584 | }
585 | 


--------------------------------------------------------------------------------
/src/main/scala/edu/utah/cs/index/VPTree.scala:
--------------------------------------------------------------------------------
 1 | package edu.utah.cs.index
 2 | 
 3 | import edu.utah.cs.util.MetricObject
 4 | 
 5 | import scala.collection.mutable
 6 | import scala.reflect.ClassTag
 7 | import scala.util.Random
 8 | 
 9 | /**
10 |   * Created by dongx on 2/3/17.
11 |   */
12 | abstract class VPTreeNode[T <: MetricObject: ClassTag]
13 | 
14 | case class VPTreeInternalNode[T <: MetricObject: ClassTag](vp: T, threshold: Double,
15 |                                  left: VPTreeNode[T], right: VPTreeNode[T]) extends VPTreeNode[T]
16 | 
17 | case class VPTreeLeafNode[T <: MetricObject: ClassTag](points: Array[T]) extends VPTreeNode[T]
18 | 
19 | case class VPTree[T <: MetricObject: ClassTag](root: VPTreeNode[T]) extends Index with Serializable {
20 |   private[cs] case class HeapItem(point: T, dis: Double) extends Ordered[HeapItem] {
21 |     override def compare(that: HeapItem): Int = dis.compare(that.dis)
22 |   }
23 | 
24 |   def knn(query: T, k: Int, dis_threshold: Double = Double.MaxValue): (Array[(T, Double)], Int) = {
25 |     val pq = mutable.PriorityQueue[HeapItem]()
26 |     var tau = dis_threshold
27 |     var checked = 0
28 | 
29 |     def offer(x: HeapItem) = {
30 |       if (pq.size == k) pq.dequeue()
31 |       pq.enqueue(x)
32 |       if (pq.size == k) tau = pq.head.dis
33 |     }
34 | 
35 |     def recursive_knn(node: VPTreeNode[T]) : Unit = {
36 |       if (node != null) {
37 |         node match {
38 |           case VPTreeLeafNode(ps) =>
39 |             checked += ps.length
40 |             ps.foreach(x => {
41 |               val dis = query.distance(x)
42 |               if (dis < tau) offer(HeapItem(x, dis))
43 |             })
44 |           case VPTreeInternalNode(vp, th, left, right) =>
45 |             val vp_dis = query.distance(vp)
46 |             checked += 1
47 |             if (vp_dis < tau) offer(HeapItem(vp, vp_dis))
48 |             if (vp_dis < th) {
49 |               if (vp_dis - tau <= th) recursive_knn(left)
50 |               if (vp_dis + tau >= th) recursive_knn(right)
51 |             } else {
52 |               if (vp_dis + tau >= th) recursive_knn(right)
53 |               if (vp_dis - tau <= th) recursive_knn(left)
54 |             }
55 |         }
56 |       }
57 |     }
58 |     recursive_knn(root)
59 | 
60 |     (pq.dequeueAll.map(x => (x.point, x.dis)).toArray.reverse, checked)
61 |   }
62 | 
63 | }
64 | 
65 | object VPTree {
66 |   def buildNode[T <: MetricObject: ClassTag](points: Array[T], leaf_capacity: Int): VPTreeNode[T] = {
67 |     if (points.isEmpty) {
68 |       null
69 |     } else if (points.length < leaf_capacity) {
70 |       VPTreeLeafNode(points)
71 |     } else {
72 |       val n = points.length
73 |       val vp_id = Random.nextInt(n)
74 |       val t = points(vp_id)
75 |       points(vp_id) = points(0)
76 |       points(0) = t
77 |       val vp = points.head
78 |       val ps_with_dis = points.slice(1, n).map(x => (vp.distance(x), x)).sortBy(_._1)
79 |       val median = Math.ceil((n - 1) / 2.0).toInt - 1
80 |       val threshold = ps_with_dis(median)._1
81 |       VPTreeInternalNode(vp, threshold,
82 |         buildNode(ps_with_dis.slice(0, median + 1).map(_._2), leaf_capacity),
83 |         buildNode(ps_with_dis.slice(median + 1, n).map(_._2), leaf_capacity))
84 |     }
85 |   }
86 | 
87 |   def apply[T <: MetricObject: ClassTag](points: Array[T], leaf_capacity: Int = 25): VPTree[T] = {
88 |     VPTree(buildNode(points, leaf_capacity))
89 |   }
90 | }
91 | 


--------------------------------------------------------------------------------
/src/main/scala/edu/utah/cs/index/VPTreeTest.scala:
--------------------------------------------------------------------------------
 1 | package edu.utah.cs.index
 2 | 
 3 | import edu.utah.cs.spatial.Point
 4 | import edu.utah.cs.util.{BloomFilter, MetricObject}
 5 | 
 6 | /**
 7 |   * Created by dongx on 2/3/17.
 8 |   */
 9 | object VPTreeTest {
10 |   private case class VPPoint(data: Point, id: Int) extends MetricObject {
11 |     override def distance(o: MetricObject): Double = {
12 |       data.minDist(o.asInstanceOf[VPPoint].data)
13 |     }
14 |   }
15 | 
16 | 
17 |   def main(args: Array[String]): Unit = {
18 | //    val tree = VPTree((0 until 1000).map(x => VPPoint(Point(Array(x - 1, x + 1)), x + 1)).toArray)
19 | //    tree.knn(VPPoint(Point(Array(3, 3)), 0), 10, 5)._1.foreach(println)
20 |     val optimal_num_bits = BloomFilter.optimalNumBits(10000, 0.1)
21 |     val optimal_num_hashes = BloomFilter.optimalNumHashes(10000, optimal_num_bits)
22 |     println(optimal_num_bits, optimal_num_hashes)
23 |   }
24 | }
25 | 


--------------------------------------------------------------------------------
/src/main/scala/edu/utah/cs/index_bm/RTreeWithBMTest.scala:
--------------------------------------------------------------------------------
 1 | package edu.utah.cs.index_bm
 2 | 
 3 | import edu.utah.cs.spatial._
 4 | import edu.utah.cs.util._
 5 | import org.roaringbitmap.RoaringBitmap
 6 | 
 7 | /**
 8 |   * Created by dongx on 10/7/16.
 9 |   */
10 | object RTreeWithBMTest {
11 |   def main(args: Array[String]) = {
12 | //    val bm_meta = BitMapMeta(100)
13 | //    BitMap.meta = bm_meta
14 | //    val data = (0 until 100).map(x => (LineSegment(Point(Array(x - 1, x)), Point(Array(x, x))), x, x)).toArray
15 | //    val rt = RTreeWithBM(data, 10, bm_meta)
16 | //    val res = rt.circleRangeBF(LineSegment(Point(Array(2, 2)), Point(Array(1, 2))), 1000)
17 | //    println(BitArray.count(res))
18 | //    println(BitArray.count(rt.root.bf))
19 | //    rt.root.bf.foreach(x => println(x.toBinaryString))
20 |     val bitmap1 = RoaringBitmap.bitmapOf(1, 2, 3, 4)
21 |     val bitmap2 = RoaringBitmap.bitmapOf(2, 3, 6, 7)
22 |     println(RoaringBitmap.andNot(bitmap1, bitmap2))
23 |     println(RoaringBitmap.andNot(bitmap2, bitmap1))
24 |   }
25 | }
26 | 


--------------------------------------------------------------------------------
/src/main/scala/edu/utah/cs/partitioner/IDPartitioner.scala:
--------------------------------------------------------------------------------
 1 | package edu.utah.cs.partitioner
 2 | 
 3 | import org.apache.spark.Partitioner
 4 | import org.apache.spark.rdd.{RDD, ShuffledRDD}
 5 | 
 6 | 
 7 | /**
 8 |   * Created by dongx on 2/1/17.
 9 |   */
10 | object IDPartition {
11 |   def apply(origin: RDD[_ <: Product2[Int, Any]], n_part: Int)
12 |   : RDD[_ <: Product2[Int, Any]] = {
13 |     val part = new IDPartitioner(n_part)
14 |     val shuffled = new ShuffledRDD[Int, Any, Any](origin, part)
15 |     shuffled
16 |   }
17 | }
18 | 
19 | class IDPartitioner(n_part: Int) extends Partitioner {
20 |   override def numPartitions: Int = n_part
21 | 
22 |   override def getPartition(key: Any): Int = {
23 |     key.asInstanceOf[Int]
24 |   }
25 | }
26 | 


--------------------------------------------------------------------------------
/src/main/scala/edu/utah/cs/partitioner/STRMBRPartitioner.scala:
--------------------------------------------------------------------------------
  1 | package edu.utah.cs.partitioner
  2 | 
  3 | import edu.utah.cs.index.RTree
  4 | import edu.utah.cs.spatial.{MBR, Point}
  5 | import org.apache.spark.Partitioner
  6 | import org.apache.spark.rdd.{RDD, ShuffledRDD}
  7 | 
  8 | import scala.collection.mutable
  9 | 
 10 | /**
 11 |   * Created by Dong Xie on 10/24/2016.
 12 |   */
 13 | object STRMBRPartition {
 14 |   def apply(origin: RDD[(MBR, Int)], est_partition: Int,
 15 |             sample_rate: Double, max_entries_per_node: Int): RDD[(MBR, Int)] = {
 16 |     val part = new STRMBRPartitioner(est_partition, sample_rate, max_entries_per_node, origin)
 17 |     val shuffled = new ShuffledRDD[MBR, Int, Int](origin, part)
 18 |     shuffled
 19 |   }
 20 | }
 21 | 
 22 | 
 23 | class STRMBRPartitioner(est_partition: Int,
 24 |                         sample_rate: Double,
 25 |                         max_entries_per_node: Int,
 26 |                         rdd: RDD[_ <: Product2[MBR, Any]])
 27 |   extends Partitioner {
 28 | 
 29 |   def numPartitions: Int = partitions
 30 | 
 31 |   private case class Bounds(min: Array[Double], max: Array[Double])
 32 | 
 33 |   var (partBound, partitions) = {
 34 |     val data_bounds = {
 35 |       rdd.aggregate[Bounds](null)((bound, data) => {
 36 |         if (bound == null) {
 37 |           Bounds(data._1.low.coord, data._1.high.coord)
 38 |         } else {
 39 |           Bounds(bound.min.zip(data._1.low.coord).map(x => Math.min(x._1, x._2)),
 40 |             bound.max.zip(data._1.high.coord).map(x => Math.max(x._1, x._2)))
 41 |         }
 42 |       }, (left, right) => {
 43 |         if (left == null) right
 44 |         else if (right == null) left
 45 |         else {
 46 |           Bounds(left.min.zip(right.min).map(x => Math.min(x._1, x._2)),
 47 |             left.max.zip(right.max).map(x => Math.max(x._1, x._2)))
 48 |         }
 49 |       })
 50 |     }
 51 | 
 52 |     val seed = System.currentTimeMillis()
 53 |     val sampled = rdd.sample(withReplacement = false, sample_rate, seed).map(_._1).collect()
 54 | 
 55 |     val dim = new Array[Int](2)
 56 |     var remaining = est_partition.toDouble
 57 |     for (i <- 0 until 2) {
 58 |       dim(i) = Math.ceil(Math.pow(remaining, 1.0 / (2 - i))).toInt
 59 |       remaining /= dim(i)
 60 |     }
 61 | 
 62 |     def recursiveGroupSegment(entries: Array[MBR], now_min: Array[Double],
 63 |                               now_max: Array[Double], cur_dim: Int, until_dim: Int): Array[MBR] = {
 64 |       val len = entries.length.toDouble
 65 |       val grouped = entries.sortWith(_.centroid.coord(cur_dim) < _.centroid.coord(cur_dim))
 66 |         .grouped(Math.ceil(len / dim(cur_dim)).toInt).toArray
 67 |       val flag = 1 << cur_dim
 68 |       var ans = mutable.ArrayBuffer[MBR]()
 69 |       if (cur_dim < until_dim) {
 70 |         for (i <- grouped.indices) {
 71 |           val cur_min = now_min
 72 |           val cur_max = now_max
 73 |           if (i == 0 && i == grouped.length - 1) {
 74 |             cur_min(cur_dim) = data_bounds.min(cur_dim)
 75 |             cur_max(cur_dim) = data_bounds.max(cur_dim)
 76 |           } else if (i == 0) {
 77 |             cur_min(cur_dim) = data_bounds.min(cur_dim)
 78 |             cur_max(cur_dim) = grouped(i + 1).head.centroid.coord(cur_dim)
 79 |           } else if (i == grouped.length - 1) {
 80 |             cur_min(cur_dim) = grouped(i).head.centroid.coord(cur_dim)
 81 |             cur_max(cur_dim) = data_bounds.max(cur_dim)
 82 |           } else {
 83 |             cur_min(cur_dim) = grouped(i).head.centroid.coord(cur_dim)
 84 |             cur_max(cur_dim) = grouped(i + 1).head.centroid.coord(cur_dim)
 85 |           }
 86 |           ans ++= recursiveGroupSegment(grouped(i), cur_min, cur_max, cur_dim + 1, until_dim)
 87 |         }
 88 |         ans.toArray
 89 |       } else {
 90 |         for (i <- grouped.indices) {
 91 |           if (i == 0 && i == grouped.length - 1) {
 92 |             now_min(cur_dim) = data_bounds.min(cur_dim)
 93 |             now_max(cur_dim) = data_bounds.max(cur_dim)
 94 |           } else if (i == 0) {
 95 |             now_min(cur_dim) = data_bounds.min(cur_dim)
 96 |             now_max(cur_dim) = grouped(i + 1).head.centroid.coord(cur_dim)
 97 |           } else if (i == grouped.length - 1) {
 98 |             now_min(cur_dim) = grouped(i).head.centroid.coord(cur_dim)
 99 |             now_max(cur_dim) = data_bounds.max(cur_dim)
100 |           } else {
101 |             now_min(cur_dim) = grouped(i).head.centroid.coord(cur_dim)
102 |             now_max(cur_dim) = grouped(i + 1).head.centroid.coord(cur_dim)
103 |           }
104 |           ans += MBR(Point(now_min.clone()), Point(now_max.clone()))
105 |         }
106 |         ans.toArray
107 |       }
108 |     }
109 | 
110 |     val cur_min = new Array[Double](2)
111 |     val cur_max = new Array[Double](2)
112 |     val mbrs = recursiveGroupSegment(sampled, cur_min, cur_max, 0, 1)
113 | 
114 |     (mbrs.zipWithIndex, mbrs.length)
115 |   }
116 | 
117 |   private val rt = RTree.applyMBR(partBound.map(x => (x._1, x._2, 1)), max_entries_per_node)
118 | 
119 |   def getPartition(key: Any): Int = {
120 |     val k = key.asInstanceOf[MBR]
121 | 
122 |     rt.circleRange(k.centroid, 0.0).head._2
123 |   }
124 | }
125 | 


--------------------------------------------------------------------------------
/src/main/scala/edu/utah/cs/partitioner/STRSegPartitioner.scala:
--------------------------------------------------------------------------------
  1 | package edu.utah.cs.partitioner
  2 | 
  3 | import edu.utah.cs.spatial.{LineSegment, MBR, Point}
  4 | import edu.utah.cs.trajectory.TrajMeta
  5 | import edu.utah.cs.index.RTree
  6 | import org.apache.spark.Partitioner
  7 | import org.apache.spark.rdd.{RDD, ShuffledRDD}
  8 | 
  9 | import scala.collection.mutable
 10 | 
 11 | /**
 12 |   * Created by dongx on 8/30/16.
 13 |   * STRPartitioner for two-dimensional Line Segments
 14 |   */
 15 | 
 16 | object STRSegPartition {
 17 |   def apply(origin: RDD[(LineSegment, TrajMeta)], est_partition: Int,
 18 |             sample_rate: Double, max_entries_per_node: Int)
 19 |   : (RDD[(LineSegment, TrajMeta)], Array[(MBR, Int)]) = {
 20 |     val part = new STRSegPartitioner(est_partition, sample_rate, max_entries_per_node, origin)
 21 |     val shuffled = new ShuffledRDD[LineSegment, TrajMeta, TrajMeta](origin, part)
 22 |     (shuffled, part.partBound)
 23 |   }
 24 | }
 25 | 
 26 | 
 27 | class STRSegPartitioner(est_partition: Int,
 28 |                         sample_rate: Double,
 29 |                         max_entries_per_node: Int,
 30 |                         rdd: RDD[_ <: Product2[LineSegment, Any]])
 31 |   extends Partitioner {
 32 | 
 33 |   def numPartitions: Int = partitions
 34 | 
 35 |   private case class Bounds(min: Array[Double], max: Array[Double])
 36 | 
 37 |   var (partBound, partitions) = {
 38 |     val data_bounds = {
 39 |       rdd.aggregate[Bounds](null)((bound, data) => {
 40 |         if (bound == null) {
 41 |           val tmp_mbr = data._1.getMBR
 42 |           Bounds(tmp_mbr.low.coord, tmp_mbr.high.coord)
 43 |         } else {
 44 |           val tmp_mbr = data._1.getMBR
 45 |           Bounds(bound.min.zip(tmp_mbr.low.coord).map(x => Math.min(x._1, x._2)),
 46 |             bound.max.zip(tmp_mbr.high.coord).map(x => Math.max(x._1, x._2)))
 47 |         }
 48 |       }, (left, right) => {
 49 |         if (left == null) right
 50 |         else if (right == null) left
 51 |         else {
 52 |           Bounds(left.min.zip(right.min).map(x => Math.min(x._1, x._2)),
 53 |             left.max.zip(right.max).map(x => Math.max(x._1, x._2)))
 54 |         }
 55 |       })
 56 |     }
 57 | 
 58 |     val seed = System.currentTimeMillis()
 59 |     val sampled = rdd.sample(withReplacement = false, sample_rate, seed).map(_._1).collect()
 60 | 
 61 |     val dim = new Array[Int](2)
 62 |     var remaining = est_partition.toDouble
 63 |     for (i <- 0 until 2) {
 64 |       dim(i) = Math.ceil(Math.pow(remaining, 1.0 / (2 - i))).toInt
 65 |       remaining /= dim(i)
 66 |     }
 67 | 
 68 |     def recursiveGroupSegment(entries: Array[LineSegment], now_min: Array[Double],
 69 |                               now_max: Array[Double], cur_dim: Int, until_dim: Int): Array[MBR] = {
 70 |       val len = entries.length.toDouble
 71 |       val grouped = entries.sortWith(_.centroid.coord(cur_dim) < _.centroid.coord(cur_dim))
 72 |         .grouped(Math.ceil(len / dim(cur_dim)).toInt).toArray
 73 |       var ans = mutable.ArrayBuffer[MBR]()
 74 |       if (cur_dim < until_dim) {
 75 |         for (i <- grouped.indices) {
 76 |           val cur_min = now_min
 77 |           val cur_max = now_max
 78 |           if (i == 0 && i == grouped.length - 1) {
 79 |             cur_min(cur_dim) = data_bounds.min(cur_dim)
 80 |             cur_max(cur_dim) = data_bounds.max(cur_dim)
 81 |           } else if (i == 0) {
 82 |             cur_min(cur_dim) = data_bounds.min(cur_dim)
 83 |             cur_max(cur_dim) = grouped(i + 1).head.centroid.coord(cur_dim)
 84 |           } else if (i == grouped.length - 1) {
 85 |             cur_min(cur_dim) = grouped(i).head.centroid.coord(cur_dim)
 86 |             cur_max(cur_dim) = data_bounds.max(cur_dim)
 87 |           } else {
 88 |             cur_min(cur_dim) = grouped(i).head.centroid.coord(cur_dim)
 89 |             cur_max(cur_dim) = grouped(i + 1).head.centroid.coord(cur_dim)
 90 |           }
 91 |           ans ++= recursiveGroupSegment(grouped(i), cur_min, cur_max, cur_dim + 1, until_dim)
 92 |         }
 93 |         ans.toArray
 94 |       } else {
 95 |         for (i <- grouped.indices) {
 96 |           if (i == 0 && i == grouped.length - 1) {
 97 |             now_min(cur_dim) = data_bounds.min(cur_dim)
 98 |             now_max(cur_dim) = data_bounds.max(cur_dim)
 99 |           } else if (i == 0) {
100 |             now_min(cur_dim) = data_bounds.min(cur_dim)
101 |             now_max(cur_dim) = grouped(i + 1).head.centroid.coord(cur_dim)
102 |           } else if (i == grouped.length - 1) {
103 |             now_min(cur_dim) = grouped(i).head.centroid.coord(cur_dim)
104 |             now_max(cur_dim) = data_bounds.max(cur_dim)
105 |           } else {
106 |             now_min(cur_dim) = grouped(i).head.centroid.coord(cur_dim)
107 |             now_max(cur_dim) = grouped(i + 1).head.centroid.coord(cur_dim)
108 |           }
109 |           ans += MBR(Point(now_min.clone()), Point(now_max.clone()))
110 |         }
111 |         ans.toArray
112 |       }
113 |     }
114 | 
115 |     val cur_min = new Array[Double](2)
116 |     val cur_max = new Array[Double](2)
117 |     val mbrs = recursiveGroupSegment(sampled, cur_min, cur_max, 0, 1)
118 | 
119 |     (mbrs.zipWithIndex, mbrs.length)
120 |   }
121 | 
122 |   private val rt = RTree.applyMBR(partBound.map(x => (x._1, x._2, 1)), max_entries_per_node)
123 | 
124 |   def getPartition(key: Any): Int = {
125 |     val k = key.asInstanceOf[LineSegment]
126 | 
127 |     rt.circleRange(k.centroid, 0.0).head._2
128 |   }
129 | }
130 | 


--------------------------------------------------------------------------------
/src/main/scala/edu/utah/cs/partitioner/STRTrajPartition.scala:
--------------------------------------------------------------------------------
 1 | package edu.utah.cs.partitioner
 2 | 
 3 | import edu.utah.cs.spatial.{LineSegment, MBR}
 4 | import org.apache.spark.rdd.{RDD, ShuffledRDD}
 5 | 
 6 | import scala.collection.mutable
 7 | 
 8 | /**
 9 |   * Created by dongx on 1/16/2017.
10 |   */
11 | object STRTrajPartition {
12 |   def apply(origin: RDD[(MBR, (Int, Array[LineSegment]))], est_partition: Int,
13 |             sample_rate: Double, max_entries_per_node: Int)
14 |   : RDD[(MBR, (Int, Array[LineSegment]))] = {
15 |     val part = new STRMBRPartitioner(est_partition, sample_rate, max_entries_per_node, origin)
16 |     val shuffled = new ShuffledRDD[MBR, (Int, Array[LineSegment]), (Int, Array[LineSegment])](origin, part)
17 |     shuffled
18 |   }
19 | }


--------------------------------------------------------------------------------
/src/main/scala/edu/utah/cs/spatial/Circle.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Copyright 2016 by Simba Project
 3 |  *
 4 |  *  Licensed under the Apache License, Version 2.0 (the "License");
 5 |  *  you may not use this file except in compliance with the License.
 6 |  *  You may obtain a copy of the License at
 7 |  *
 8 |  *    http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  *  Unless required by applicable law or agreed to in writing, software
11 |  *  distributed under the License is distributed on an "AS IS" BASIS,
12 |  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  *  See the License for the specific language governing permissions and
14 |  *  limitations under the License.
15 |  */
16 | 
17 | package edu.utah.cs.spatial
18 | 
19 | /**
20 |   * Created by dong on 3/16/16.
21 |   */
22 | case class Circle(center: Point, radius: Double) extends Shape {
23 |   override def intersects(other: Shape): Boolean = {
24 |     other match {
25 |       case p: Point => contains(p)
26 |       case mbr: MBR => intersects(mbr)
27 |       case cir: Circle => intersects(cir)
28 |       case poly: Polygon => poly.intersects(this)
29 |       case seg: LineSegment => seg.intersects(this)
30 |     }
31 |   }
32 | 
33 |   override def minDist(other: Shape): Double = {
34 |     other match {
35 |       case p: Point => minDist(p)
36 |       case mbr: MBR => minDist(mbr)
37 |       case cir: Circle => minDist(cir)
38 |       case poly: Polygon => poly.minDist(this)
39 |       case seg: LineSegment => seg.minDist(this)
40 |     }
41 |   }
42 | 
43 |   def minDist(other: Point): Double = {
44 |     require(center.coord.length == other.coord.length)
45 |     if (contains(other)) 0.0
46 |     else other.minDist(center) - radius
47 |   }
48 | 
49 |   def minDist(other: MBR): Double = {
50 |     require(center.coord.length == other.low.coord.length)
51 |     if (intersects(other)) 0.0
52 |     else center.minDist(other) - radius
53 |   }
54 | 
55 |   def minDist(other: Circle): Double = {
56 |     require(center.coord.length == other.center.coord.length)
57 |     if (intersects(other)) 0.0
58 |     else center.minDist(other.center) - radius - other.radius
59 |   }
60 | 
61 |   def contains(p: Point): Boolean = p.minDist(center) <= radius
62 | 
63 |   def intersects(other: MBR): Boolean = center.minDist(other) <= radius
64 | 
65 |   def intersects(other: Circle): Boolean = other.center.minDist(center) <= other.radius + radius
66 | 
67 |   def getMBR: MBR = new MBR(center.shift(-radius), center.shift(radius))
68 | 
69 |   override def toString: String = "CIRCLE(" + center.toString + "," + radius + ")"
70 | }
71 | 


--------------------------------------------------------------------------------
/src/main/scala/edu/utah/cs/spatial/Dist.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Copyright 2016 by Simba Project
 3 |  *
 4 |  *  Licensed under the Apache License, Version 2.0 (the "License");
 5 |  *  you may not use this file except in compliance with the License.
 6 |  *  You may obtain a copy of the License at
 7 |  *
 8 |  *    http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  *  Unless required by applicable law or agreed to in writing, software
11 |  *  distributed under the License is distributed on an "AS IS" BASIS,
12 |  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  *  See the License for the specific language governing permissions and
14 |  *  limitations under the License.
15 |  */
16 | 
17 | package edu.utah.cs.spatial
18 | 
19 | /**
20 |  * Created by dong on 1/15/16.
21 |  * Distance Utilities
22 |  */
23 | object Dist {
24 |   def furthest(a: Point, b: MBR) : Double = {
25 |     require(a.coord.length == b.low.coord.length)
26 |     var ans = 0.0
27 |     for (i <- a.coord.indices) {
28 |       ans += Math.max((a.coord(i) - b.low.coord(i)) * (a.coord(i) - b.low.coord(i)),
29 |         (a.coord(i) - b.high.coord(i)) * (a.coord(i) - b.high.coord(i)))
30 |     }
31 |     Math.sqrt(ans)
32 |   }
33 | }
34 | 


--------------------------------------------------------------------------------
/src/main/scala/edu/utah/cs/spatial/DistanceUtil.scala:
--------------------------------------------------------------------------------
 1 | package edu.utah.cs.spatial
 2 | 
 3 | import org.apache.commons.math3.util.FastMath
 4 | 
 5 | object DistanceUtil {
 6 |   def computeGPSCoordDis(lat0: Double, lon0: Double, lat1: Double, lon1: Double): Double = {
 7 |     val R = 6371e3
 8 |     val phi0 = lat0.toRadians
 9 |     val phi1 = lat1.toRadians
10 |     val delta_phi = (lat1 - lat0).toRadians
11 |     val delta_lambda = (lon1 - lon0).toRadians
12 | 
13 |     val a = FastMath.sin(delta_phi / 2) * FastMath.sin(delta_phi / 2) +
14 |       FastMath.cos(phi0) * FastMath.cos(phi1) *
15 |       FastMath.sin(delta_lambda / 2) * Math.sin(delta_lambda / 2)
16 |     val c = 2 * Math.atan2(Math.sqrt(a), Math.sqrt(1-a))
17 | 
18 |     R * c
19 |   }
20 | }


--------------------------------------------------------------------------------
/src/main/scala/edu/utah/cs/spatial/LineSegment.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  Copyright 2016 by Simba Project
  3 |  *
  4 |  *  Licensed under the Apache License, Version 2.0 (the "License");
  5 |  *  you may not use this file except in compliance with the License.
  6 |  *  You may obtain a copy of the License at
  7 |  *
  8 |  *    http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  *  Unless required by applicable law or agreed to in writing, software
 11 |  *  distributed under the License is distributed on an "AS IS" BASIS,
 12 |  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  *  See the License for the specific language governing permissions and
 14 |  *  limitations under the License.
 15 |  */
 16 | 
 17 | 
 18 | package edu.utah.cs.spatial
 19 | 
 20 | /**
 21 |   * Created by dongx on 5/31/16.
 22 |   * This is a simple implementation for Line Segment.
 23 |   * Note: Currently, we only support 2D line segments.
 24 |   */
 25 | case class LineSegment(start: Point, end: Point) extends Shape {
 26 |   require(start.coord.length == 2 && end.coord.length == 2)
 27 | 
 28 |   val centroid = new Point(start.coord.zip(end.coord).map(x => (x._1 + x._2) / 2.0))
 29 | 
 30 |   override def intersects(other: Shape): Boolean = {
 31 |     other match {
 32 |       case p: Point => contains(p)
 33 |       case mbr: MBR => intersects(mbr)
 34 |       case cir: Circle => intersects(cir)
 35 |       case poly: Polygon => poly.intersects(this)
 36 |       case seg: LineSegment => intersects(seg)
 37 |     }
 38 |   }
 39 | 
 40 |   override def minDist(other: Shape): Double = {
 41 |     other match {
 42 |       case p: Point => minDist(p)
 43 |       case mbr: MBR => minDist(mbr)
 44 |       case cir: Circle => minDist(cir)
 45 |       case poly: Polygon => poly.minDist(this)
 46 |       case seg: LineSegment => minDist(seg)
 47 |     }
 48 |   }
 49 | 
 50 |   def matchDist(other: LineSegment): Double = {
 51 |     Math.max(Math.max(start.minDist(other), end.minDist(other)),
 52 |       Math.max(other.start.minDist(this), other.end.minDist(this)))
 53 |   }
 54 | 
 55 |   private def orientation(p: Point, q: Point, r: Point): Int = {
 56 |     val cross = (q.coord(1) - p.coord(1)) * (r.coord(0) - q.coord(0)) -
 57 |       (q.coord(0) - p.coord(0)) * (r.coord(1) - q.coord(1))
 58 |     if (cross == 0) 0
 59 |     else if (cross > 0) 1
 60 |     else -1
 61 |   }
 62 | 
 63 |   private def withinBox(check: Point, start: Point, end: Point): Boolean = {
 64 |     if (check.coord(0) >= Math.min(start.coord(0), end.coord(0)) &&
 65 |       check.coord(0) <= Math.max(start.coord(0), end.coord(0)) &&
 66 |       check.coord(1) >= Math.min(start.coord(1), end.coord(1)) &&
 67 |       check.coord(1) <= Math.max(start.coord(1), end.coord(1))) {
 68 |       true
 69 |     } else false
 70 |   }
 71 | 
 72 |   def intersects(l: LineSegment): Boolean = intersects(l.start, l.end)
 73 | 
 74 |   private def intersects(p: Point, q: Point): Boolean = {
 75 |     val o1 = orientation(start, end, p)
 76 |     val o2 = orientation(start, end, q)
 77 |     val o3 = orientation(p, q, start)
 78 |     val o4 = orientation(p, q, end)
 79 |     if (o1 != o2 && o3 != o4) true
 80 |     else if (o1 == 0 && withinBox(p, start, end)) true
 81 |     else if (o2 == 0 && withinBox(q, start, end)) true
 82 |     else if (o3 == 0 && withinBox(start, p, q)) true
 83 |     else if (o4 == 0 && withinBox(end, p, q)) true
 84 |     else false
 85 |   }
 86 | 
 87 |   def contains(l: Point): Boolean = orientation(start, l, end) == 0 && withinBox(l, start, end)
 88 | 
 89 |   def intersects(cir: Circle): Boolean = {
 90 |     minDist(cir.center) <= cir.radius
 91 |   }
 92 | 
 93 |   def intersects(mbr: MBR): Boolean = {
 94 |     assert(mbr.low.coord.length == 2)
 95 |     if (mbr.contains(start) && mbr.contains(end)) true
 96 |     else if (intersects(mbr.low, Point(Array(mbr.high.coord(0), mbr.low.coord(1))))) true
 97 |     else if (intersects(mbr.low, Point(Array(mbr.low.coord(0), mbr.high.coord(1))))) true
 98 |     else if (intersects(mbr.high, Point(Array(mbr.high.coord(0), mbr.low.coord(1))))) true
 99 |     else if (intersects(mbr.high, Point(Array(mbr.low.coord(0), mbr.high.coord(1))))) true
100 |     else false
101 |   }
102 | 
103 |   def minDist(p: Point): Double = {
104 |     require(p.coord.length == 2)
105 |     val len = start.minDist(end)
106 |     if (len == 0) return p.minDist(start)
107 |     var t = ((p.coord(0) - start.coord(0)) * (end.coord(0) - start.coord(0))
108 |       + (p.coord(1) - start.coord(1)) * (end.coord(1) - start.coord(1))) / (len * len)
109 |     t = Math.max(0, Math.min(1, t))
110 |     val proj_x = start.coord(0) + t * (end.coord(0) - start.coord(0))
111 |     val proj_y = start.coord(1) + t * (end.coord(1) - start.coord(1))
112 |     p.minDist(Point(Array(proj_x, proj_y)))
113 |   }
114 | 
115 |   def minDist(cir: Circle): Double = {
116 |     val centeral_dis = minDist(cir.center)
117 |     if (centeral_dis <= cir.radius) 0.0
118 |     else centeral_dis - cir.radius
119 |   }
120 | 
121 |   def minDist(l: LineSegment): Double = {
122 |     if (intersects(l)) 0.0
123 |     else {
124 |       Math.min(Math.min(minDist(l.start), minDist(l.end)),
125 |         Math.min(l.minDist(start), l.minDist(end)))
126 |     }
127 |   }
128 | 
129 |   def minDist(mbr: MBR): Double = {
130 |     if (mbr.contains(start) && mbr.contains(end)) return 0.0
131 |     val s1 = LineSegment(mbr.low, Point(Array(mbr.low.coord(0), mbr.high.coord(1))))
132 |     val s2 = LineSegment(mbr.low, Point(Array(mbr.high.coord(0), mbr.low.coord(1))))
133 |     val s3 = LineSegment(mbr.high, Point(Array(mbr.low.coord(0), mbr.high.coord(1))))
134 |     val s4 = LineSegment(mbr.high, Point(Array(mbr.high.coord(0), mbr.low.coord(1))))
135 |     Math.min(Math.min(minDist(s1), minDist(s2)), Math.min(minDist(s3), minDist(s4)))
136 |   }
137 | 
138 |   def cover(mbr: MBR, r: Double): Boolean = {
139 |     if (minDist(mbr.low) > r) false
140 |     else if (minDist(mbr.high) > r) false
141 |     else if (minDist(Point(Array(mbr.low.coord(0), mbr.high.coord(1)))) > r) false
142 |     else if (minDist(Point(Array(mbr.high.coord(0), mbr.low.coord(1)))) > r) false
143 |     else true
144 |   }
145 | 
146 |   override def getMBR: MBR = {
147 |     val (low_x, high_x) = if (start.coord(0) < end.coord(0)) {
148 |       (start.coord(0), end.coord(0))
149 |     } else {
150 |       (end.coord(0), start.coord(0))
151 |     }
152 | 
153 |     val (low_y, high_y) = if (start.coord(1) < end.coord(1)) {
154 |       (start.coord(1), end.coord(1))
155 |     } else {
156 |       (end.coord(1), start.coord(1))
157 |     }
158 | 
159 |     MBR(Point(Array(low_x, low_y)), Point(Array(high_x, high_y)))
160 |   }
161 | 
162 |   def length: Double = start.minDist(end)
163 | 
164 |   override def toString: String = "SEG(" + start.toString + "->" + end.toString + ")"
165 | 
166 |   def toTSV: String = start.coord(0) + "\t" + start.coord(1) +"\t" + end.coord(0) + "\t" + end.coord(1)
167 | }
168 | 


--------------------------------------------------------------------------------
/src/main/scala/edu/utah/cs/spatial/LineString.scala:
--------------------------------------------------------------------------------
 1 | package edu.utah.cs.spatial
 2 | 
 3 | /**
 4 |   * Created by dongx on 1/16/2017.
 5 |   */
 6 | case class LineString(segs: Array[LineSegment]) extends Shape {
 7 |   private val mbr: MBR = segs.foldLeft(segs(0).getMBR)((now , seg) => now.union(seg.getMBR))
 8 | 
 9 |   override def minDist(other: Shape): Double = segs.map(x => x.minDist(other)).min
10 | 
11 |   override def intersects(other: Shape): Boolean = segs.exists(x => x.intersects(other))
12 | 
13 |   def hausdorff(other: LineString): Double =
14 |     Math.max(segs.map(now_x => other.segs.map(now_y => now_x.minDist(now_y)).min).max,
15 |              other.segs.map(now_x => segs.map(now_y => now_x.minDist(now_y)).min).max)
16 | 
17 |   override def getMBR: MBR = mbr
18 | }
19 | 


--------------------------------------------------------------------------------
/src/main/scala/edu/utah/cs/spatial/MBR.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  Copyright 2016 by Simba Project
  3 |  *
  4 |  *  Licensed under the Apache License, Version 2.0 (the "License");
  5 |  *  you may not use this file except in compliance with the License.
  6 |  *  You may obtain a copy of the License at
  7 |  *
  8 |  *    http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  *  Unless required by applicable law or agreed to in writing, software
 11 |  *  distributed under the License is distributed on an "AS IS" BASIS,
 12 |  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  *  See the License for the specific language governing permissions and
 14 |  *  limitations under the License.
 15 |  */
 16 | 
 17 | package edu.utah.cs.spatial
 18 | 
 19 | /**
 20 |  * Created by dong on 1/15/16.
 21 |  * Multi-Dimensional Minimum Bounding Box
 22 |  */
 23 | case class MBR(low: Point, high: Point) extends Shape {
 24 |   require(low.coord.length == high.coord.length)
 25 |   require(low <= high)
 26 | 
 27 |   override def intersects(other: Shape): Boolean = {
 28 |     other match {
 29 |       case p: Point => contains(p)
 30 |       case mbr: MBR => intersects(mbr)
 31 |       case cir: Circle => cir.intersects(this)
 32 |       case poly: Polygon => poly.intersects(this)
 33 |       case seg: LineSegment => seg.intersects(this)
 34 |     }
 35 |   }
 36 | 
 37 |   override def minDist(other: Shape): Double = {
 38 |     other match {
 39 |       case p: Point => minDist(p)
 40 |       case mbr: MBR => minDist(mbr)
 41 |       case cir: Circle => cir.minDist(this)
 42 |       case poly: Polygon => poly.minDist(this)
 43 |       case seg: LineSegment => seg.minDist(this)
 44 |     }
 45 |   }
 46 | 
 47 |   def this(low_x: Double, low_y: Double, high_x: Double, high_y: Double) {
 48 |     this(Point(Array(low_x, low_y)), Point(Array(high_x, high_y)))
 49 |   }
 50 | 
 51 |   val centroid = Point(low.coord.zip(high.coord).map(x => (x._1 + x._2) / 2.0))
 52 | 
 53 |   def union(other: MBR): MBR = {
 54 |     val new_low = low.coord.zip(other.low.coord).map(x => Math.min(x._1, x._2))
 55 |     val new_high = high.coord.zip(other.high.coord).map(x => Math.max(x._1, x._2))
 56 |     MBR(Point(new_low), Point(new_high))
 57 |   }
 58 | 
 59 |   def intersects(other: MBR): Boolean = {
 60 |     require(low.coord.length == other.low.coord.length)
 61 |     for (i <- low.coord.indices)
 62 |       if (low.coord(i) > other.high.coord(i) || high.coord(i) < other.low.coord(i)) {
 63 |         return false
 64 |       }
 65 |     true
 66 |   }
 67 | 
 68 |   def contains(p: Point): Boolean = {
 69 |     require(low.coord.length == p.coord.length)
 70 |     for (i <- p.coord.indices)
 71 |       if (low.coord(i) > p.coord(i) || high.coord(i) < p.coord(i)) {
 72 |         return false
 73 |       }
 74 |     true
 75 |   }
 76 | 
 77 |   def minDist(p: Point): Double = {
 78 |     require(low.coord.length == p.coord.length)
 79 |     var ans = 0.0
 80 |     for (i <- p.coord.indices) {
 81 |       if (p.coord(i) < low.coord(i)) {
 82 |         ans += (low.coord(i) - p.coord(i)) * (low.coord(i) - p.coord(i))
 83 |       } else if (p.coord(i) > high.coord(i)) {
 84 |         ans += (p.coord(i) - high.coord(i)) * (p.coord(i) - high.coord(i))
 85 |       }
 86 |     }
 87 |     Math.sqrt(ans)
 88 |   }
 89 | 
 90 |   def maxDist(p: Point): Double = {
 91 |     require(low.coord.length == p.coord.length)
 92 |     var ans = 0.0
 93 |     for (i <- p.coord.indices) {
 94 |       ans += Math.max((p.coord(i) - low.coord(i)) * (p.coord(i) - low.coord(i)),
 95 |         (p.coord(i) - high.coord(i)) * (p.coord(i) - high.coord(i)))
 96 |     }
 97 |     Math.sqrt(ans)
 98 |   }
 99 | 
100 |   def minDist(other: MBR): Double = {
101 |     require(low.coord.length == other.low.coord.length)
102 |     var ans = 0.0
103 |     for (i <- low.coord.indices) {
104 |       var x = 0.0
105 |       if (other.high.coord(i) < low.coord(i)) {
106 |         x = Math.abs(other.high.coord(i) - low.coord(i))
107 |       } else if (high.coord(i) < other.low.coord(i)) {
108 |         x = Math.abs(other.low.coord(i) - high.coord(i))
109 |       }
110 |       ans += x * x
111 |     }
112 |     Math.sqrt(ans)
113 |   }
114 | 
115 |   def area: Double = low.coord.zip(high.coord).map(x => x._2 - x._1).product
116 | 
117 |   def calcRatio(query: MBR): Double = {
118 |     val intersect_low = low.coord.zip(query.low.coord).map(x => Math.max(x._1, x._2))
119 |     val intersect_high = high.coord.zip(query.high.coord).map(x => Math.min(x._1, x._2))
120 |     val diff_intersect = intersect_low.zip(intersect_high).map(x => x._2 - x._1)
121 |     if (diff_intersect.forall(_ > 0)) 1.0 * diff_intersect.product / area
122 |     else 0.0
123 |   }
124 | 
125 |   override def toString: String = "MBR(" + low.toString + "," + high.toString + ")"
126 | 
127 |   def getMBR: MBR = this.copy()
128 | }
129 | 


--------------------------------------------------------------------------------
/src/main/scala/edu/utah/cs/spatial/Point.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Copyright 2016 by Simba Project
 3 |  *
 4 |  *  Licensed under the Apache License, Version 2.0 (the "License");
 5 |  *  you may not use this file except in compliance with the License.
 6 |  *  You may obtain a copy of the License at
 7 |  *
 8 |  *    http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  *  Unless required by applicable law or agreed to in writing, software
11 |  *  distributed under the License is distributed on an "AS IS" BASIS,
12 |  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  *  See the License for the specific language governing permissions and
14 |  *  limitations under the License.
15 |  */
16 | 
17 | package edu.utah.cs.spatial
18 | 
19 | /**
20 |  * Created by dong on 1/15/16.
21 |  * Multi-Dimensional Point
22 |  */
23 | final case class Point(coord: Array[Double]) extends Shape {
24 |   def this() = this(Array())
25 | 
26 |   override def intersects(other: Shape): Boolean = {
27 |     other match {
28 |       case p: Point => p == this
29 |       case mbr: MBR => mbr.contains(this)
30 |       case cir: Circle => cir.contains(this)
31 |       case poly: Polygon => poly.contains(this)
32 |       case seg: LineSegment => seg.contains(this)
33 |     }
34 |   }
35 | 
36 |   override def minDist(other: Shape): Double = {
37 |     other match {
38 |       case p: Point => minDist(p)
39 |       case mbr: MBR => mbr.minDist(this)
40 |       case cir: Circle => cir.minDist(this)
41 |       case poly: Polygon => poly.minDist(this)
42 |       case seg: LineSegment => seg.minDist(this)
43 |     }
44 |   }
45 | 
46 |   def minDist(other: Point): Double = {
47 |     require(coord.length == other.coord.length)
48 |     var ans = 0.0
49 |     for (i <- coord.indices)
50 |       ans += (coord(i) - other.coord(i)) * (coord(i) - other.coord(i))
51 |     Math.sqrt(ans)
52 |   }
53 | 
54 |   def ==(other: Point): Boolean = other match {
55 |     case p: Point =>
56 |       if (p.coord.length != coord.length) false
57 |       else {
58 |         for (i <- coord.indices)
59 |           if (coord(i) != p.coord(i)) return false
60 |         true
61 |       }
62 |     case _ => false
63 |   }
64 | 
65 |   def <=(other: Point): Boolean = {
66 |     for (i <- coord.indices)
67 |       if (coord(i) > other.coord(i)) return false
68 |     true
69 |   }
70 | 
71 |   def shift(d: Double): Point = Point(coord.map(x => x + d))
72 | 
73 |   override def toString: String = {
74 |     var s = "POINT("
75 |     s += coord(0).toString
76 |     for (i <- 1 until coord.length) s += "," + coord(i)
77 |     s + ")"
78 |   }
79 | 
80 |   def getMBR: MBR = new MBR(this, this)
81 | }
82 | 


--------------------------------------------------------------------------------
/src/main/scala/edu/utah/cs/spatial/Polygon.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  Copyright 2016 by Simba Project
  3 |  *
  4 |  *  Licensed under the Apache License, Version 2.0 (the "License");
  5 |  *  you may not use this file except in compliance with the License.
  6 |  *  You may obtain a copy of the License at
  7 |  *
  8 |  *    http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  *  Unless required by applicable law or agreed to in writing, software
 11 |  *  distributed under the License is distributed on an "AS IS" BASIS,
 12 |  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  *  See the License for the specific language governing permissions and
 14 |  *  limitations under the License.
 15 |  */
 16 | 
 17 | package edu.utah.cs.spatial
 18 | 
 19 | import com.vividsolutions.jts.geom.{Coordinate, Envelope, GeometryFactory, Polygon => JTSPolygon}
 20 | import com.vividsolutions.jts.io.{WKBReader, WKBWriter, WKTWriter}
 21 | 
 22 | /**
 23 |   * Created by Dong Xie on 3/16/2016.
 24 |   * Light wraper of JTS Polygon
 25 |   * Note: Only support up to 2 dimension
 26 |   */
 27 | case class Polygon(content: JTSPolygon) extends Shape {
 28 |   def this() = {
 29 |     this(null)
 30 |   }
 31 | 
 32 |   val gf = new GeometryFactory()
 33 | 
 34 |   override def minDist(other: Shape): Double = {
 35 |     other match {
 36 |       case p: Point => minDist(p)
 37 |       case mbr: MBR => minDist(mbr)
 38 |       case cir: Circle => minDist(cir)
 39 |       case poly: Polygon => minDist(poly)
 40 |     }
 41 |   }
 42 | 
 43 |   override def intersects(other: Shape): Boolean = {
 44 |     other match {
 45 |       case p: Point => contains(p)
 46 |       case mbr: MBR => intersects(mbr)
 47 |       case cir: Circle => intersects(cir)
 48 |       case poly: Polygon => intersects(poly)
 49 |     }
 50 |   }
 51 | 
 52 |   def contains(p: Point): Boolean = {
 53 |     require(p.coord.length == 2)
 54 |     content.contains(gf.createPoint(new Coordinate(p.coord(0), p.coord(1))))
 55 |   }
 56 | 
 57 |   def intersects(mbr: MBR): Boolean = {
 58 |     require(mbr.low.coord.length == 2)
 59 |     val low = new Coordinate(mbr.low.coord(0), mbr.low.coord(1))
 60 |     val high = new Coordinate(mbr.high.coord(0), mbr.high.coord(1))
 61 |     content.intersects(gf.toGeometry(new Envelope(low, high)))
 62 |   }
 63 | 
 64 |   def intersects(cir: Circle): Boolean = minDist(cir.center) <= cir.radius
 65 | 
 66 |   def intersects(poly: Polygon): Boolean = content.intersects(poly.content)
 67 | 
 68 |   def intersects(seg: LineSegment): Boolean = {
 69 |     val start = new Coordinate(seg.start.coord(0), seg.start.coord(1))
 70 |     val end = new Coordinate(seg.end.coord(0), seg.end.coord(1))
 71 |     content.intersects(gf.createLineString(Array(start, end)))
 72 |   }
 73 | 
 74 |   def minDist(p: Point): Double = {
 75 |     require(p.coord.length == 2)
 76 |     content.distance(gf.createPoint(new Coordinate(p.coord(0), p.coord(1))))
 77 |   }
 78 | 
 79 |   def minDist(mbr: MBR): Double = {
 80 |     require(mbr.low.coord.length == 2)
 81 |     val low = new Coordinate(mbr.low.coord(0), mbr.low.coord(1))
 82 |     val high = new Coordinate(mbr.high.coord(0), mbr.high.coord(1))
 83 |     content.distance(gf.toGeometry(new Envelope(low, high)))
 84 |   }
 85 | 
 86 |   def minDist(cir: Circle): Double = {
 87 |     val res = minDist(cir.center) - cir.radius
 88 |     if (res <= 0) 0
 89 |     else res
 90 |   }
 91 | 
 92 | 
 93 |   def minDist(poly: Polygon): Double = content.distance(poly.content)
 94 | 
 95 |   def minDist(seg: LineSegment): Double = {
 96 |     val start = new Coordinate(seg.start.coord(0), seg.start.coord(1))
 97 |     val end = new Coordinate(seg.end.coord(0), seg.end.coord(1))
 98 |     content.distance(gf.createLineString(Array(start, end)))
 99 |   }
100 | 
101 |   override def toString: String = new WKTWriter().write(content)
102 |   def toWKB: Array[Byte] = new WKBWriter().write(content)
103 | 
104 |   def getMBR: MBR = {
105 |     val envelope = content.getEnvelopeInternal
106 |     new MBR(envelope.getMinX, envelope.getMinY, envelope.getMaxX, envelope.getMaxY)
107 |   }
108 | }
109 | 
110 | object Polygon {
111 |   def apply(points: Array[Point]): Polygon = {
112 |     require(points.length > 2 && points(0).coord.length == 2)
113 |     val gf = new GeometryFactory()
114 |     Polygon(gf.createPolygon(points.map(x => new Coordinate(x.coord(0), x.coord(1)))))
115 |   }
116 |   def fromJTSPolygon(polygon: JTSPolygon): Polygon = new Polygon(polygon)
117 |   def fromWKB(bytes: Array[Byte]): Polygon =
118 |     new Polygon(new WKBReader().read(bytes).asInstanceOf[JTSPolygon])
119 | }
120 | 


--------------------------------------------------------------------------------
/src/main/scala/edu/utah/cs/spatial/Shape.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Copyright 2016 by Simba Project
 3 |  *
 4 |  *  Licensed under the Apache License, Version 2.0 (the "License");
 5 |  *  you may not use this file except in compliance with the License.
 6 |  *  You may obtain a copy of the License at
 7 |  *
 8 |  *    http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  *  Unless required by applicable law or agreed to in writing, software
11 |  *  distributed under the License is distributed on an "AS IS" BASIS,
12 |  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  *  See the License for the specific language governing permissions and
14 |  *  limitations under the License.
15 |  */
16 | 
17 | package edu.utah.cs.spatial
18 | 
19 | import com.vividsolutions.jts.geom.{Geometry, Polygon => JTSPolygon}
20 | 
21 | /**
22 |   * Created by dong on 3/16/16.
23 |   */
24 | abstract class Shape extends Serializable {
25 |   def minDist(other: Shape): Double
26 | 
27 |   def intersects(other: Shape): Boolean
28 | 
29 |   def getMBR: MBR
30 | }
31 | 
32 | object Shape {
33 |   final def apply(g: Geometry): Shape = g match {
34 |     case jtsPolygon : JTSPolygon => new Polygon(jtsPolygon)
35 |     case _ => null
36 |   }
37 | }
38 | 


--------------------------------------------------------------------------------
/src/main/scala/edu/utah/cs/spatial/ZValue.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Copyright 2016 by Simba Project
 3 |  *
 4 |  *  Licensed under the Apache License, Version 2.0 (the "License");
 5 |  *  you may not use this file except in compliance with the License.
 6 |  *  You may obtain a copy of the License at
 7 |  *
 8 |  *    http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  *  Unless required by applicable law or agreed to in writing, software
11 |  *  distributed under the License is distributed on an "AS IS" BASIS,
12 |  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  *  See the License for the specific language governing permissions and
14 |  *  limitations under the License.
15 |  */
16 | 
17 | package edu.utah.cs.spatial
18 | 
19 | /**
20 |  * Created by dong on 1/15/16.
21 |  * Utilities for Z-Value Curves
22 |  */
23 | object ZValue {
24 |   def paddingBinaryBits(source: Int, digits: Int): String = {
25 |     val pd_length = digits - source.toBinaryString.length
26 |     "0" * pd_length + source.toBinaryString
27 |   }
28 | 
29 |   // TODO shift Long to BitInt for supporting bigger Z-Values
30 |   def apply(point: Array[Int]): Long = {
31 |     var maxBit = 0
32 |     for (i <- point.indices)
33 |       if (point(i).toBinaryString.length > maxBit) {
34 |         maxBit = point(i).toBinaryString.length
35 |       }
36 | 
37 |     var ans = ""
38 |     val pointStrs = point.map(x => paddingBinaryBits(x, maxBit))
39 | 
40 |     for (i <- 0 until maxBit)
41 |       for (j <- point.indices)
42 |         ans += pointStrs(j)(i)
43 | 
44 |     java.lang.Long.parseLong(ans, 2)
45 |   }
46 | 
47 |   def unapply(value: Long, dimension: Int): Option[Array[Int]] = {
48 |     val ans = new Array[Int](dimension)
49 |     val binaryZValue = value.toBinaryString
50 |     var currentBit = binaryZValue.length - 1
51 |     var shiftBase = 1
52 |     while (currentBit >= 0) {
53 |       for (i <- 0 until dimension)
54 |         if (currentBit - dimension + 1 + i >= 0) {
55 |           ans(i) += shiftBase * binaryZValue(currentBit - dimension + 1 + i).toString.toInt
56 |         }
57 | 
58 |       currentBit -= dimension
59 |       shiftBase *= 2
60 |     }
61 |     Some(ans)
62 |   }
63 | }
64 | 


--------------------------------------------------------------------------------
/src/main/scala/edu/utah/cs/trajectory/BFDISolution.scala:
--------------------------------------------------------------------------------
  1 | package edu.utah.cs.trajectory
  2 | import edu.utah.cs.index.RTree
  3 | import edu.utah.cs.index_bf.RTreeWithBF
  4 | import edu.utah.cs.partitioner.{STRSegPartition, STRTrajPartition}
  5 | import edu.utah.cs.spatial.{LineSegment, MBR, Point}
  6 | import edu.utah.cs.util._
  7 | import org.apache.spark.rdd.PartitionPruningRDD
  8 | import org.apache.spark.storage.StorageLevel
  9 | import org.apache.spark.{SparkConf, SparkContext}
 10 | 
 11 | import scala.collection.mutable
 12 | import scala.io.Source
 13 | 
 14 | /**
 15 |   * Created by dongx on 9/6/16.
 16 |   * Line Segment Trajectory Storage
 17 |   */
 18 | object BFDISolution {
 19 |   final val max_entries_per_node = 25
 20 |   final val k = 10
 21 |   final val c = 5
 22 | 
 23 |   private class ResultOrdering extends Ordering[(Double, Int)] {
 24 |     override def compare(x: (Double, Int), y: (Double, Int)): Int = x._1.compare(y._1)
 25 |   }
 26 | 
 27 |   def getMBR(x: Array[LineSegment]): MBR = {
 28 |     val pts = x.flatMap(p => Array(p.start, p.end))
 29 |     var maxx = Double.MinValue
 30 |     var maxy = Double.MinValue
 31 |     var minx = Double.MaxValue
 32 |     var miny = Double.MaxValue
 33 |     pts.foreach(x => {
 34 |       maxx = Math.max(x.coord(0), maxx)
 35 |       maxy = Math.max(x.coord(1), maxy)
 36 |       minx = Math.min(x.coord(0), minx)
 37 |       miny = Math.min(x.coord(1), miny)
 38 |     })
 39 |     MBR(Point(Array(minx, miny)), Point(Array(maxx, maxy)))
 40 |   }
 41 | 
 42 |   def main(args: Array[String]): Unit = {
 43 |     val sparkConf = new SparkConf().setAppName("BFDISolution").set("spark.locality.wait", "0")
 44 |       .set("spark.driver.maxResultSize", "4g")
 45 |     val sc = new SparkContext(sparkConf)
 46 | 
 47 |     if (args.length != 2) {
 48 |       println("usage: BloomFilterSolution <query_traj_filename> <traj_data_filename>")
 49 |       System.exit(1)
 50 |     }
 51 | 
 52 |     Thread.sleep(3000)
 53 | 
 54 |     val query_traj_filename = args(0)
 55 |     val traj_data_filename = args(1)
 56 | 
 57 |     val start1 = System.currentTimeMillis()
 58 | 
 59 |     val dataRDD = sc.textFile(traj_data_filename)
 60 |       .map(x => x.split('\t'))
 61 |       .map(x => (LineSegment(Point(Array(x(1).toDouble, x(2).toDouble)),
 62 |         Point(Array(x(3).toDouble, x(4).toDouble))),
 63 |         TrajMeta(x(0).toInt, x(5).toInt)))
 64 | 
 65 |     val trajs = sc.textFile(traj_data_filename).mapPartitions(iter => {
 66 |       iter.map(x => {
 67 |         val splitted = x.split("\t")
 68 |         (splitted(0).toInt,
 69 |           LineSegment(Point(Array(splitted(1).toDouble, splitted(2).toDouble)),
 70 |             Point(Array(splitted(3).toDouble, splitted(4).toDouble))))
 71 |       }).toArray.groupBy(_._1).map(now => {
 72 |         val cur_traj = now._2.sortBy(_._1).map(_._2)
 73 |         (getMBR(cur_traj), (now._1, cur_traj))
 74 |       }).iterator
 75 |     })
 76 | 
 77 |     val part_traj = STRTrajPartition(trajs, dataRDD.partitions.length, 0.01, max_entries_per_node)
 78 |       .persist(StorageLevel.MEMORY_AND_DISK_SER)
 79 | 
 80 |     println(part_traj.partitions.length)
 81 | 
 82 |     val traj_stat = part_traj.mapPartitions(iter => {
 83 |       Array(iter.aggregate[(MBR, Int)]((null, 0))((res, now) => {
 84 |         if (res._1 == null) (now._1, 1)
 85 |         else (res._1.union(now._1), res._2 + 1)
 86 |       }, (left, right) => {
 87 |         if (left._1 == null) right
 88 |         else if (left._1 == null) left
 89 |         else (left._1.union(right._1), left._2 + right._2)
 90 |       })).iterator
 91 |     }).collect()
 92 |     val traj_global_rtree =
 93 |       RTree.applyMBR(traj_stat.zipWithIndex.map(x => (x._1._1, x._2, x._1._2)), max_entries_per_node)
 94 | 
 95 |     val optimal_num_bits = BloomFilter.optimalNumBits(10000, 0.1)
 96 |     val optimal_num_hashes = BloomFilter.optimalNumHashes(10000, optimal_num_bits)
 97 |     val bf_meta = BloomFilterMeta(optimal_num_bits, optimal_num_hashes)
 98 |     val bc_bf_meta = sc.broadcast(bf_meta)
 99 |     BloomFilter.meta = bf_meta
100 | 
101 |     val (partitioned_rdd, part_mbrs) = STRSegPartition(dataRDD, dataRDD.partitions.length, 0.01, max_entries_per_node)
102 | 
103 |     val indexed_seg_rdd_with_traj_id = partitioned_rdd.mapPartitions(iter => {
104 |       BloomFilter.meta = bc_bf_meta.value
105 |       val data = iter.toArray
106 |       var index: RTreeWithBF = null
107 |       if (data.length > 0) {
108 |         index = RTreeWithBF(data.map(x => (x._1, x._2.traj_id)).zipWithIndex.map(x => (x._1._1, x._2, x._1._2)),
109 |           max_entries_per_node, bc_bf_meta.value)
110 |       }
111 |       Iterator((data.map(_._2.traj_id).distinct, index))
112 |     })
113 |     val indexed_seg_rdd = indexed_seg_rdd_with_traj_id.map(_._2).persist(StorageLevel.MEMORY_AND_DISK_SER)
114 | 	indexed_seg_rdd.count()
115 |     val stat = indexed_seg_rdd_with_traj_id
116 |       .mapPartitions(iter => iter.map(x => (x._2.root.m_mbr, x._2.root.size, x._1))).collect()
117 | 
118 |     val global_rtree = RTree.applyMBR(stat.zipWithIndex.map(x => (x._1._1, x._2, x._1._2.toInt)), max_entries_per_node)
119 | 
120 |     val end1 = System.currentTimeMillis()
121 |     println("------------------------------------------------------------")
122 |     println("Time to build indexes: " + (end1 - start1) / 1000.0)
123 |     println("------------------------------------------------------------")
124 | 
125 | 
126 |     val query_traj_file = Source.fromFile(query_traj_filename)
127 |     val queries = query_traj_file.getLines().map { line =>
128 |       val splitted = line.split('\t')
129 |       (splitted(0).toInt, LineSegment(Point(Array(splitted(1).toDouble, splitted(2).toDouble)),
130 |         Point(Array(splitted(3).toDouble, splitted(4).toDouble))))
131 |     }.toArray.groupBy(_._1).map(x => x._2.map(_._2))
132 | 
133 |     var tot_time = 0.0
134 |     queries.foreach(query_traj => {
135 |       val start2 = System.currentTimeMillis()
136 |       val bc_query = sc.broadcast(query_traj)
137 |       val global_intersect = global_rtree.circleRange(query_traj, 0.0)
138 |       val global_intersect_mbrs = global_intersect.map(_._1.asInstanceOf[MBR])
139 |       val global_intersect_set = global_intersect.map(_._2).toSet
140 | 
141 |       val sample_base = stat.zipWithIndex.filter(x => global_intersect_set.contains(x._2)).flatMap(_._1._3)
142 | 
143 |       val cards = sample_base.length
144 |       val rnd = scala.util.Random
145 |       val set = mutable.HashSet[Int]()
146 |       val samples = mutable.HashSet[Int]()
147 |       val n_samples = c * k
148 |       for (i <- 0 until n_samples) {
149 |         var x = rnd.nextInt(cards)
150 |         while (set.contains(x)) x = rnd.nextInt(cards)
151 |         set += x
152 |         samples += sample_base(x)
153 |       }
154 | 
155 |       val bc_samples = sc.broadcast(samples.toSet)
156 |       val pruning_bound_filter = traj_global_rtree.circleRange(global_intersect_mbrs, 0.0).map(_._2).toSet
157 |       val pruning_bound = new PartitionPruningRDD(part_traj, pruning_bound_filter.contains)
158 |         .filter(x => bc_samples.value.contains(x._2._1))
159 |         .repartition(Math.min(samples.size, sc.defaultParallelism))
160 |         .map(x => Trajectory.discreteFrechetDistance(bc_query.value, x._2._2))
161 |         .takeOrdered(k).last
162 |       val end2 = System.currentTimeMillis()
163 |       println("Time to calculate pruning bound: " + (end2 - start2) / 1000.0)
164 |       println("The pruning bound is: " + pruning_bound)
165 | 
166 |       val start3 = System.currentTimeMillis()
167 |       val global_prune = global_rtree.circleRange(query_traj, pruning_bound)
168 |       val global_prune_mbrs = global_prune.map(_._1.asInstanceOf[MBR])
169 |       val global_prune_set = global_prune.map(_._2).toSet
170 | 
171 |       val pruned_rdd = new PartitionPruningRDD(indexed_seg_rdd, global_prune_set.contains)
172 | 
173 |       val bc_prunbound = sc.broadcast(pruning_bound)
174 |       val saved_trajs = pruned_rdd.map(part => {
175 |         BloomFilter.meta = bc_bf_meta.value
176 |         part.circleRangeBF(bc_query.value, bc_prunbound.value)
177 |       }).reduce((a, b) => BitArray.or(a, b))
178 | 
179 |       val end3 = System.currentTimeMillis()
180 | 
181 |       println("Time to calculate all saved traj_ids: " + (end3 - start3) / 1000.0)
182 | 
183 |       val start4 = System.currentTimeMillis()
184 |       val bc_saved_traj = sc.broadcast(saved_trajs)
185 |       val final_prune_set = traj_global_rtree.circleRange(global_prune.map(_._1.asInstanceOf[MBR]), 0.0).map(_._2).toSet
186 |       val final_filtered = new PartitionPruningRDD(part_traj, final_prune_set.contains)
187 |         .mapPartitions(iter => {
188 |           BloomFilter.meta = bc_bf_meta.value
189 |           iter.filter(now => BloomFilter.mayContains(bc_saved_traj.value, now._2._1))
190 |         })
191 | 
192 |       val res = final_filtered.repartition(sc.defaultParallelism)
193 |         .mapPartitions(iter => iter.map(x =>(Trajectory.discreteFrechetDistance(x._2._2, bc_query.value), x._2._1)))
194 |         .takeOrdered(k)(new ResultOrdering)
195 | 
196 |       val end4 = System.currentTimeMillis()
197 |       tot_time += (end4 - start2) / 1000.0
198 |       println("Time to finish the final filter: " + (end4 - start4) / 1000.0)
199 |       println("# of distance calculated: " + (c * k + final_filtered.count()))
200 |       println("Total Latency: " + ((end4 - start2) / 1000.0))
201 |       println("The results show as below:")
202 |       res.foreach(println)
203 |       println("------------------------------------------------------------")
204 |     })
205 |     println("Average Latency for c = " + c + " is : " + (tot_time / 100.0))
206 |     println("===================================================")
207 | 
208 |     sc.stop()
209 |   }
210 | }
211 | 
212 | 


--------------------------------------------------------------------------------
/src/main/scala/edu/utah/cs/trajectory/BaseLine.scala:
--------------------------------------------------------------------------------
 1 | package edu.utah.cs.trajectory
 2 | 
 3 | import edu.utah.cs.spatial.{LineSegment, Point}
 4 | import org.apache.spark.{SparkConf, SparkContext}
 5 | 
 6 | import scala.collection.mutable
 7 | import scala.io.Source
 8 | 
 9 | /**
10 |   * Created by dongx on 8/22/16.
11 |   */
12 | object BaseLine {
13 |   //final val k_values = Array(1, 10, 30, 50, 70, 100)
14 |   final val k_values = Array(10)
15 | 
16 |   private class ResultOrdering extends Ordering[(Double, Int)] {
17 |     override def compare(x: (Double, Int), y: (Double, Int)): Int = x._1.compare(y._1)
18 |   }
19 | 
20 |   def main(args: Array[String]): Unit = {
21 |     val sparkConf = new SparkConf().setAppName("BaseLine")//.setMaster("local[*]")
22 |     val sc = new SparkContext(sparkConf)
23 | 
24 |     if (args.length != 2) {
25 |       println("usage: BaseLine <query_traj_filename> <traj_data_filename>")
26 |       System.exit(1)
27 |     }
28 | 
29 |     Thread.sleep(3000)
30 | 
31 |     val query_traj_filename = args(0)
32 |     val traj_data_filename = args(1)
33 | 
34 |     val query_traj_file = Source.fromFile(query_traj_filename)
35 |     val queries = query_traj_file.getLines().map { line =>
36 |       val splitted = line.split('\t')
37 |       (splitted(0).toInt, LineSegment(Point(Array(splitted(1).toDouble, splitted(2).toDouble)),
38 |         Point(Array(splitted(3).toDouble, splitted(4).toDouble))))
39 |     }.toArray.groupBy(_._1).map(x => x._2.map(_._2)).slice(0, 20)
40 | 
41 |     k_values.foreach(k => {
42 |       var tot_time = 0.0
43 |       queries.foreach(query_traj => {
44 |         println("-------------------------------------------------")
45 | 
46 |         val start = System.currentTimeMillis()
47 |         val bc_query = sc.broadcast(query_traj)
48 | 
49 |         val res = sc.textFile(traj_data_filename).map{ line =>
50 |           val splitted = line.split('\t')
51 |           (splitted(0).toInt,
52 |             LineSegment(Point(Array(splitted(1).toDouble, splitted(2).toDouble)),
53 |               Point(Array(splitted(3).toDouble, splitted(4).toDouble))))}.mapPartitions(iter => {
54 |           val cur_traj = mutable.ListBuffer[LineSegment]()
55 |           val ans = mutable.ListBuffer[(Double, Int)]()
56 |           var last_traj_id = -1
57 |           while (iter.hasNext) {
58 |             val now = iter.next
59 |             if (now._1 != last_traj_id) {
60 |               if (cur_traj.nonEmpty) ans += ((Trajectory.hausdorffDistance(cur_traj.toArray, bc_query.value), last_traj_id))
61 |               //if (cur_traj.nonEmpty) ans += ((Trajectory.discreteFrechetDistance(cur_traj.toArray, bc_query.value), last_traj_id))
62 |               last_traj_id = now._1
63 |               cur_traj.clear()
64 |             }
65 |             cur_traj += now._2
66 |           }
67 |           if (cur_traj.nonEmpty) ans += ((Trajectory.hausdorffDistance(cur_traj.toArray, bc_query.value), last_traj_id))
68 |           //if (cur_traj.nonEmpty) ans += ((Trajectory.discreteFrechetDistance(cur_traj.toArray, bc_query.value), last_traj_id))
69 |           ans.iterator
70 |         }).takeOrdered(k)(new ResultOrdering)
71 | 
72 |         val end = System.currentTimeMillis()
73 |         res.foreach(println)
74 |         println("Latency: " + ((end - start) / 1000.0))
75 |         println("-------------------------------------------------")
76 |         tot_time += (end - start) / 1000.0
77 |       })
78 | 
79 |       println("Average Latency for k = " + k + " is : " + (tot_time / 20.0))
80 |       println("===================================================")
81 |     })
82 | 
83 | 
84 |     sc.stop()
85 |   }
86 | }
87 | 


--------------------------------------------------------------------------------
/src/main/scala/edu/utah/cs/trajectory/BaseLineST.scala:
--------------------------------------------------------------------------------
 1 | package edu.utah.cs.trajectory
 2 | 
 3 | import edu.utah.cs.spatial.{Point, LineSegment}
 4 | 
 5 | import scala.collection.mutable
 6 | import scala.io.Source
 7 | 
 8 | /**
 9 |   * Created by Dong Xie on 10/23/2016.
10 |   */
11 | object BaseLineST {
12 |   final val k = 10
13 |   final val N = 34085
14 | 
15 |   def minmaxtraj(x: Array[LineSegment], y: Array[LineSegment]) = {
16 |     x.map(now_x => y.map(now_y => now_x.minDist(now_y)).min).max
17 |   }
18 | 
19 |   def main(args: Array[String]): Unit = {
20 |     if (args.length != 2) {
21 |       println("usage: BaseLine <query_traj_filename> <traj_data_filename>")
22 |       System.exit(1)
23 |     }
24 | 
25 |     val query_traj_filename = args(0)
26 |     val traj_data_filename = args(1)
27 | 
28 |     val query_traj_file = Source.fromFile(query_traj_filename)
29 |     val query_traj = query_traj_file.getLines().map { line =>
30 |       val splitted = line.split('\t')
31 |       LineSegment(Point(Array(splitted(1).toDouble, splitted(2).toDouble)),
32 |         Point(Array(splitted(3).toDouble, splitted(4).toDouble)))
33 |     }.toArray
34 | 
35 |     val traj_data_file = Source.fromFile(traj_data_filename)
36 |     val cur_traj = mutable.ListBuffer[LineSegment]()
37 |     val ans = mutable.ListBuffer[(Double, Int)]()
38 |     var last_traj_id = -1
39 |     val new_iter = traj_data_file.getLines().map(cur => {
40 |       val x = cur.split("\t")
41 |       (LineSegment(Point(Array(x(1).toDouble, x(2).toDouble)), Point(Array(x(3).toDouble, x(4).toDouble))),
42 |         TrajMeta(x(0).toInt, 1))
43 |     })
44 |     var i = 0
45 |     while (new_iter.hasNext) {
46 |       val now = new_iter.next
47 |       if (now._2.traj_id != last_traj_id) {
48 |         if (cur_traj.nonEmpty) ans += ((Trajectory.hausdorffDistance(cur_traj.toArray, query_traj), last_traj_id))
49 |         last_traj_id = now._2.traj_id
50 |         i += 1
51 |         println("checking " + i + " trajectory....")
52 |         cur_traj.clear()
53 |       }
54 |       cur_traj += now._1
55 |     }
56 |     if (cur_traj.nonEmpty) ans += ((Trajectory.hausdorffDistance(cur_traj.toArray, query_traj), last_traj_id))
57 |     //assert(ans.size == N)
58 |     ans.sortBy(_._1).take(k).foreach(println)
59 |   }
60 | }
61 | 


--------------------------------------------------------------------------------
/src/main/scala/edu/utah/cs/trajectory/BitMapSolution.scala:
--------------------------------------------------------------------------------
  1 | package edu.utah.cs.trajectory
  2 | 
  3 | import edu.utah.cs.index.RTree
  4 | import edu.utah.cs.index_bm.RTreeWithBM
  5 | import edu.utah.cs.partitioner.STRSegPartition
  6 | import edu.utah.cs.spatial.{LineSegment, MBR, Point}
  7 | import edu.utah.cs.util._
  8 | import org.apache.spark.rdd.PartitionPruningRDD
  9 | import org.apache.spark.storage.StorageLevel
 10 | import org.apache.spark.{SparkConf, SparkContext}
 11 | 
 12 | import scala.io.Source
 13 | 
 14 | /**
 15 |   * Created by dongx on 9/6/16.
 16 |   * Line Segment Trajectory Storage
 17 |   */
 18 | object BitMapSolution {
 19 |   final val max_entries_per_node = 25
 20 |   final val k = 10
 21 |   final val c = 5
 22 |   final val N = 940698
 23 |   //final val max_spatial_span = 0.46757
 24 |   final val max_spatial_span = 2.550598
 25 | 
 26 |   private class ResultOrdering extends Ordering[(Double, Int)] {
 27 |     override def compare(x: (Double, Int), y: (Double, Int)): Int = x._1.compare(y._1)
 28 |   }
 29 | 
 30 |   def main(args: Array[String]): Unit = {
 31 |     val sparkConf = new SparkConf().setAppName("BitMapSolution")//.setMaster("local[*]")
 32 |     val sc = new SparkContext(sparkConf)
 33 | 
 34 |     if (args.length != 2) {
 35 |       println("usage: BitMapSolution <query_traj_filename> <traj_data_filename>")
 36 |       System.exit(1)
 37 |     }
 38 | 
 39 |     Thread.sleep(3000)
 40 | 
 41 |     val query_traj_filename = args(0)
 42 |     val traj_data_filename = args(1)
 43 | 
 44 |     val start1 = System.currentTimeMillis()
 45 | 
 46 |     val dataRDD = sc.textFile(traj_data_filename)
 47 |       .map(x => x.split('\t'))
 48 |       .map(x => (LineSegment(Point(Array(x(1).toDouble, x(2).toDouble)),
 49 |         Point(Array(x(3).toDouble, x(4).toDouble))),
 50 |         TrajMeta(x(0).toInt, x(5).toInt)))//.persist(StorageLevel.MEMORY_AND_DISK_SER)
 51 | 
 52 |     //val optimal_num_bits = BloomFilter.optimalNumBits(N, 0.1)
 53 |     //val optimal_num_hashes = BloomFilter.optimalNumHashes(N, optimal_num_bits)
 54 |     //println(optimal_num_bits + "\t" + optimal_num_hashes)
 55 |     val bm_meta = BitMapMeta(N)
 56 |     val bc_bm_meta = sc.broadcast(bm_meta)
 57 |     BitMap.meta = bm_meta
 58 | 
 59 |     val (partitioned_rdd, part_mbrs) = STRSegPartition(dataRDD, dataRDD.partitions.length, 0.01, max_entries_per_node)
 60 | 
 61 |     val indexed_seg_rdd = partitioned_rdd.mapPartitions(iter => {
 62 |       BitMap.meta = bc_bm_meta.value
 63 |       val data = iter.toArray
 64 |       var index: RTreeWithBM = null
 65 |       //var traj_ids: Array[Int] = null
 66 |       if (data.length > 0) {
 67 |         index = RTreeWithBM(data.map(x => (x._1, x._2.traj_id)).zipWithIndex.map(x => (x._1._1, x._2, x._1._2)),
 68 |           max_entries_per_node, bc_bm_meta.value)
 69 |         //traj_ids = data.map(_._2.traj_id).distinct
 70 |       }
 71 |       Array((data, index)).iterator
 72 |     }).persist(StorageLevel.MEMORY_AND_DISK_SER)
 73 | 
 74 |     val stat = indexed_seg_rdd.mapPartitions(iter => iter.map(x => (x._2.root.m_mbr, x._1.length, x._2.root.bf))).collect()
 75 | 
 76 |     val global_rtree = RTree.applyMBR(stat.zipWithIndex.map(x => (x._1._1, x._2, x._1._2)), max_entries_per_node)
 77 | 
 78 |     val end1 = System.currentTimeMillis()
 79 |     println("------------------------------------------------------------")
 80 |     println("Time to build indexes: " + (end1 - start1) / 1000.0)
 81 |     println("------------------------------------------------------------")
 82 | 
 83 |     val start2 = System.currentTimeMillis()
 84 |     val query_traj_file = Source.fromFile(query_traj_filename)
 85 |     val query_traj = query_traj_file.getLines().map { line =>
 86 |       val splitted = line.split('\t')
 87 |       LineSegment(Point(Array(splitted(1).toDouble, splitted(2).toDouble)),
 88 |         Point(Array(splitted(3).toDouble, splitted(4).toDouble)))
 89 |     }.toArray
 90 | 
 91 |     val bc_query = sc.broadcast(query_traj)
 92 | 
 93 | //    val sample_set = dataRDD.takeSample(withReplacement = false, c * k, System.currentTimeMillis()).map(_._2.traj_id).toSet
 94 | //
 95 | //    assert(sample_set.size >= k)
 96 | //
 97 | //    val pruning_bound = dataRDD.filter(x => sample_set.contains(x._2.traj_id)).groupBy(_._2.traj_id)
 98 | //      .map(x => minmaxtraj(x._2.toArray.map(_._1), bc_query.value)).takeOrdered(k).last
 99 |     val global_intersect = global_rtree.circleRange(query_traj, 0.0)
100 |     val global_intersect_mbrs = global_intersect.map(_._1.asInstanceOf[MBR])
101 |     val global_intersect_set = global_intersect.map(_._2).toSet
102 | 
103 |     val sample_set = new PartitionPruningRDD(indexed_seg_rdd, global_intersect_set.contains).flatMap(_._1)
104 |       .takeSample(withReplacement = false, c * k, System.currentTimeMillis()).map(_._2.traj_id).toSet
105 | 
106 |     assert(sample_set.size >= k)
107 | 
108 |     val pruning_bound_filter = global_rtree.circleRange(global_intersect_mbrs, max_spatial_span).map(_._2).toSet
109 |     val pruning_bound = new PartitionPruningRDD(indexed_seg_rdd, pruning_bound_filter.contains)
110 |       .flatMap(x => x._1.filter(now => sample_set.contains(now._2.traj_id))).groupBy(_._2.traj_id)
111 |       .map(x => Trajectory.hausdorffDistance(x._2.toArray.map(_._1), bc_query.value)).takeOrdered(k).last
112 | //    val pruning_bound = dataRDD.filter(x => sample_set.contains(x._2.traj_id)).groupBy(_._2.traj_id)
113 | //      .map(x => minmaxtraj(x._2.toArray.map(_._1), bc_query.value)).takeOrdered(k).last
114 | 
115 |     //val pruning_bound = 8.65080562241333
116 | 
117 |     val end2 = System.currentTimeMillis()
118 | 
119 |     println("------------------------------------------------------------")
120 |     println("Time to calculate pruning bound: " + (end2 - start2) / 1000.0)
121 |     println("The pruning bound is: " + pruning_bound)
122 |     println("------------------------------------------------------------")
123 | 
124 |     val start3 = System.currentTimeMillis()
125 |     //val global_prune_set = query_traj.map(x => {
126 |     //  global_rtree.circleRange(x, pruning_bound).map(_._2)
127 |     //}).flatMap(list => list).toSet
128 |     val global_prune = global_rtree.circleRange(query_traj, pruning_bound)
129 |     val global_prune_mbrs = global_prune.map(_._1.asInstanceOf[MBR])
130 |     val global_prune_set = global_prune.map(_._2).toSet
131 | 
132 |     val pruned_rdd = new PartitionPruningRDD(indexed_seg_rdd, global_prune_set.contains)
133 |     val pruned_traj_id1 = stat.zipWithIndex.filter(x => !global_prune_set.contains(x._2)).map(_._1._3)
134 |       .aggregate(BitArray.create(bm_meta.num_bits))((a, b) => BitArray.or(a, b), (a, b) => BitArray.or(a, b))
135 | 
136 |     val bc_prunbound = sc.broadcast(pruning_bound)
137 | 
138 |     val pruned_traj_id2 = pruned_rdd.map(part => {
139 |       BitMap.meta = bc_bm_meta.value
140 |       part._2.antiCircleRangeBF(bc_query.value, bc_prunbound.value)
141 |     }).reduce((a, b) => BitArray.or(a, b))
142 | 
143 |     val saved_trajs = BitArray.flip(BitArray.or(pruned_traj_id1, pruned_traj_id2))
144 | 
145 | //    val saved_trajs = pruned_rdd.map(part => {
146 | //      BloomFilter.meta = bc_bm_meta.value
147 | //      part._2.circleRangeBF(bc_query.value, bc_prunbound.value)
148 | //    }).reduce((a, b) => BitArray.or(a, b))
149 | 
150 |     val end3 = System.currentTimeMillis()
151 | 
152 |     println("------------------------------------------------------------")
153 |     println("Time to calculate all saved traj_ids: " + (end3 - start3) / 1000.0)
154 |     println("Pruned trajs after global pruning:" + BitArray.count(pruned_traj_id1))
155 |     println("Pruned trajs after local pruning:" + BitArray.count(BitArray.or(pruned_traj_id1, pruned_traj_id2)))
156 |     println("# of saved trajs: " + BitArray.count(saved_trajs))
157 |     println("------------------------------------------------------------")
158 | 
159 |     val start4 = System.currentTimeMillis()
160 |     val bc_saved_traj = sc.broadcast(saved_trajs)
161 |     val final_filter_set = global_rtree.circleRange(global_prune_mbrs, max_spatial_span).map(_._2).toSet
162 | 
163 |     val res = new PartitionPruningRDD(indexed_seg_rdd, final_filter_set.contains)
164 |       .flatMap(x => {
165 |         BitMap.meta = bc_bm_meta.value
166 |         x._1.filter(now => BitMap.contains(bc_saved_traj.value, now._2.traj_id))
167 |       }).groupBy(_._2.traj_id).map(x => (Trajectory.hausdorffDistance(x._2.map(_._1).toArray, bc_query.value), x._1))
168 |       .takeOrdered(k)(new ResultOrdering)
169 | 
170 | //    val res = dataRDD.mapPartitions(iter => {
171 | //      BloomFilter.meta = bc_bm_meta.value
172 | //      val cur_traj = mutable.ListBuffer[LineSegment]()
173 | //      val ans = mutable.ListBuffer[(Double, Int)]()
174 | //      var last_traj_id = -1
175 | //      val new_iter = iter.filter(x => BloomFilter.mayContains(bc_saved_traj.value, x._2.traj_id))
176 | //      while (new_iter.hasNext) {
177 | //        val now = new_iter.next
178 | //        if (now._2.traj_id != last_traj_id) {
179 | //          if (cur_traj.nonEmpty) ans += ((minmaxtraj(cur_traj.toArray, bc_query.value), last_traj_id))
180 | //          last_traj_id = now._2.traj_id
181 | //          cur_traj.clear()
182 | //        }
183 | //        cur_traj += now._1
184 | //      }
185 | //      if (cur_traj.nonEmpty) ans += ((minmaxtraj(cur_traj.toArray, bc_query.value), last_traj_id))
186 | //      ans.iterator
187 | //      //iter.toArray.groupBy(_._2.traj_id).filter(x => BloomFilter.mayContains(bc_saved_traj.value, x._1))
188 | //      //    .map(x => (minmaxtraj(x._2.map(_._1), bc_query.value), x._1)).iterator
189 | //    }).takeOrdered(k)(new ResultOrdering)
190 | 
191 |     val end4 = System.currentTimeMillis()
192 | 
193 |     println("------------------------------------------------------------")
194 |     println("Time to finish the final filter: " + (end4 - start4) / 1000.0)
195 |     println("------------------------------------------------------------")
196 | 
197 |     println("------------------------------------------------------------")
198 |     println("The results show as below:")
199 |     res.foreach(println)
200 |     println("------------------------------------------------------------")
201 | 
202 |     sc.stop()
203 |   }
204 | }
205 | 


--------------------------------------------------------------------------------
/src/main/scala/edu/utah/cs/trajectory/BloomFilterSolution.scala:
--------------------------------------------------------------------------------
  1 | package edu.utah.cs.trajectory
  2 | 
  3 | import edu.utah.cs.index.RTree
  4 | import edu.utah.cs.index_bf.RTreeWithBF
  5 | import edu.utah.cs.partitioner.STRSegPartition
  6 | import edu.utah.cs.spatial.{LineSegment, MBR, Point}
  7 | import edu.utah.cs.util._
  8 | import org.apache.spark.rdd.PartitionPruningRDD
  9 | import org.apache.spark.storage.StorageLevel
 10 | import org.apache.spark.{SparkConf, SparkContext}
 11 | 
 12 | import scala.io.Source
 13 | 
 14 | /**
 15 |   * Created by dongx on 9/6/16.
 16 |   * Line Segment Trajectory Storage
 17 |   */
 18 | object BloomFilterSolution {
 19 |   final val max_entries_per_node = 25
 20 |   final val k = 10
 21 |   final val c = 5
 22 |   final val max_spatial_span = 0.46757
 23 |   //final val max_spatial_span = 2.550598
 24 | 
 25 |   private class ResultOrdering extends Ordering[(Double, Int)] {
 26 |     override def compare(x: (Double, Int), y: (Double, Int)): Int = x._1.compare(y._1)
 27 |   }
 28 | 
 29 |   def main(args: Array[String]): Unit = {
 30 |     val sparkConf = new SparkConf().setAppName("BloomFilterSolution").set("spark.locality.wait", "0")
 31 |       .set("spark.driver.maxResultSize", "4g")
 32 |     val sc = new SparkContext(sparkConf)
 33 | 
 34 |     if (args.length != 2) {
 35 |       println("usage: BloomFilterSolution <query_traj_filename> <traj_data_filename>")
 36 |       System.exit(1)
 37 |     }
 38 | 
 39 |     Thread.sleep(3000)
 40 | 
 41 |     val query_traj_filename = args(0)
 42 |     val traj_data_filename = args(1)
 43 | 
 44 |     val start1 = System.currentTimeMillis()
 45 | 
 46 |     val dataRDD = sc.textFile(traj_data_filename)
 47 |       .map(x => x.split('\t'))
 48 |       .map(x => (LineSegment(Point(Array(x(1).toDouble, x(2).toDouble)),
 49 |         Point(Array(x(3).toDouble, x(4).toDouble))),
 50 |         TrajMeta(x(0).toInt, x(5).toInt)))
 51 | 
 52 |     val optimal_num_bits = BloomFilter.optimalNumBits(5000, 0.1)
 53 |     val optimal_num_hashes = BloomFilter.optimalNumHashes(5000, optimal_num_bits)
 54 |     val bf_meta = BloomFilterMeta(optimal_num_bits, optimal_num_hashes)
 55 |     val bc_bf_meta = sc.broadcast(bf_meta)
 56 |     BloomFilter.meta = bf_meta
 57 | 
 58 |     val (partitioned_rdd, part_mbrs) = STRSegPartition(dataRDD, dataRDD.partitions.length, 0.01, max_entries_per_node)
 59 | 
 60 |     val indexed_seg_rdd = partitioned_rdd.mapPartitions(iter => {
 61 |       BloomFilter.meta = bc_bf_meta.value
 62 |       val data = iter.toArray
 63 |       var index: RTreeWithBF = null
 64 |       if (data.length > 0) {
 65 |         index = RTreeWithBF(data.map(x => (x._1, x._2.traj_id)).zipWithIndex.map(x => (x._1._1, x._2, x._1._2)),
 66 |           max_entries_per_node, bc_bf_meta.value)
 67 |       }
 68 |       Array((data, index)).iterator
 69 |     }).persist(StorageLevel.MEMORY_AND_DISK_SER)
 70 | 
 71 |     val stat = indexed_seg_rdd.mapPartitions(iter => iter.map(x => (x._2.root.m_mbr, x._1.length, x._2.root.bf))).collect()
 72 | 
 73 |     val global_rtree = RTree.applyMBR(stat.zipWithIndex.map(x => (x._1._1, x._2, x._1._2)), max_entries_per_node)
 74 | 
 75 |     val end1 = System.currentTimeMillis()
 76 |     println("------------------------------------------------------------")
 77 |     println("Time to build indexes: " + (end1 - start1) / 1000.0)
 78 |     println("------------------------------------------------------------")
 79 | 
 80 | 
 81 |     val query_traj_file = Source.fromFile(query_traj_filename)
 82 |     val queries = query_traj_file.getLines().map { line =>
 83 |       val splitted = line.split('\t')
 84 |       (splitted(0).toInt, LineSegment(Point(Array(splitted(1).toDouble, splitted(2).toDouble)),
 85 |         Point(Array(splitted(3).toDouble, splitted(4).toDouble))))
 86 |     }.toArray.groupBy(_._1).map(x => x._2.map(_._2))
 87 | 
 88 |     var tot_time = 0.0
 89 |     queries.foreach(query_traj => {
 90 | 	  val start2 = System.currentTimeMillis()
 91 |       val bc_query = sc.broadcast(query_traj)
 92 |       val global_intersect = global_rtree.circleRange(query_traj, 0.0)
 93 |       val global_intersect_mbrs = global_intersect.map(_._1.asInstanceOf[MBR])
 94 |       val global_intersect_set = global_intersect.map(_._2).toSet
 95 | 
 96 |       val sample_set = new PartitionPruningRDD(indexed_seg_rdd, global_intersect_set.contains).flatMap(_._1)
 97 |         .takeSample(withReplacement = false, c * k, System.currentTimeMillis()).map(_._2.traj_id).toSet
 98 | 
 99 |       assert(sample_set.size >= k)
100 | 
101 |       val pruning_bound_filter = global_rtree.circleRange(global_intersect_mbrs, max_spatial_span).map(_._2).toSet
102 |       val pruning_bound = new PartitionPruningRDD(indexed_seg_rdd, pruning_bound_filter.contains)
103 |         .flatMap(x => x._1.filter(now => sample_set.contains(now._2.traj_id)))
104 |         .groupBy(_._2.traj_id).repartition(Math.min(sample_set.size, sc.defaultParallelism))
105 |         .map(x => Trajectory.hausdorffDistance(x._2.toArray.map(_._1), bc_query.value)).takeOrdered(k).last
106 | 
107 |       val end2 = System.currentTimeMillis()
108 |       println("Time to calculate pruning bound: " + (end2 - start2) / 1000.0)
109 |       println("The pruning bound is: " + pruning_bound)
110 | 
111 |       val start3 = System.currentTimeMillis()
112 |       val global_prune = global_rtree.circleRange(query_traj, pruning_bound)
113 |       val global_prune_mbrs = global_prune.map(_._1.asInstanceOf[MBR])
114 |       val global_prune_set = global_prune.map(_._2).toSet
115 | 
116 |       val pruned_rdd = new PartitionPruningRDD(indexed_seg_rdd, global_prune_set.contains)
117 | 
118 |       val bc_prunbound = sc.broadcast(pruning_bound)
119 |       val saved_trajs = pruned_rdd.map(part => {
120 |         BloomFilter.meta = bc_bf_meta.value
121 |         part._2.circleRangeBF(bc_query.value, bc_prunbound.value)
122 |       }).reduce((a, b) => BitArray.or(a, b))
123 | 
124 |       val end3 = System.currentTimeMillis()
125 | 
126 |       println("Time to calculate all saved traj_ids: " + (end3 - start3) / 1000.0)
127 | 
128 |       val start4 = System.currentTimeMillis()
129 |       val bc_saved_traj = sc.broadcast(saved_trajs)
130 |       val final_filter_set = global_rtree.circleRange(global_prune_mbrs, max_spatial_span).map(_._2).toSet
131 | 
132 |       val fianl_filter = new PartitionPruningRDD(indexed_seg_rdd, final_filter_set.contains)
133 |         .flatMap(x => {
134 |           BloomFilter.meta = bc_bf_meta.value
135 |           x._1.filter(now => BloomFilter.mayContains(bc_saved_traj.value, now._2.traj_id))
136 |         }).groupBy(_._2.traj_id).repartition(sc.defaultParallelism)
137 | 
138 |       val res = fianl_filter.map(x => (Trajectory.hausdorffDistance(x._2.map(_._1).toArray, bc_query.value), x._1))
139 |         .takeOrdered(k)(new ResultOrdering)
140 | 
141 |       val end4 = System.currentTimeMillis()
142 |       tot_time += (end4 - start2) / 1000.0
143 |       println("Time to finish the final filter: " + (end4 - start4) / 1000.0)
144 |       println("# of distance calculated: " + (c * k + fianl_filter.count()))
145 |       println("Total Latency: " + ((end4 - start2) / 1000.0))
146 |       println("The results show as below:")
147 |       res.foreach(println)
148 |       println("------------------------------------------------------------")
149 |     })
150 |     println("Average Latency for c = " + c + " is : " + (tot_time / 100.0))
151 |     println("===================================================")
152 | 
153 |     sc.stop()
154 |   }
155 | }
156 | 
157 | 


--------------------------------------------------------------------------------
/src/main/scala/edu/utah/cs/trajectory/DataSampling.scala:
--------------------------------------------------------------------------------
 1 | package edu.utah.cs.trajectory
 2 | 
 3 | import edu.utah.cs.spatial.{LineSegment, Point}
 4 | import org.apache.spark.{SparkConf, SparkContext}
 5 | 
 6 | /**
 7 |   * Created by dongx on 1/27/2017.
 8 |   */
 9 | object DataSampling {
10 |   def main(args: Array[String]): Unit = {
11 |     val sparkConf = new SparkConf().setAppName("DataSampling")
12 |     val sc = new SparkContext(sparkConf)
13 | 
14 |     Thread.sleep(3000)
15 | 
16 |     if (args.length < 2) {
17 |       println("usage: DataSampling <input_file_path> <output_file_path> <sample_rate>")
18 |       System.exit(1)
19 |     }
20 | 
21 |     val input_file_path = args(0)
22 |     val output_file_path = args(1)
23 |     val sample_rate = args(2).toDouble
24 | 
25 |     sc.textFile(input_file_path).mapPartitions(iter => {
26 |       iter.map(x => {
27 |         val splitted = x.split("\t")
28 |         (splitted(0).toInt,
29 |           LineSegment(Point(Array(splitted(1).toDouble, splitted(2).toDouble)),
30 |             Point(Array(splitted(3).toDouble, splitted(4).toDouble))))
31 |       }).toArray.groupBy(_._1).map(now => (now._1, now._2.sortBy(_._1).map(_._2))).iterator
32 |     }).sample(withReplacement = false, sample_rate, System.currentTimeMillis()).repartition(4096)
33 |       .flatMap(x => x._2.zipWithIndex.map(now => x._1 + "\t" + now._1.toTSV + "\t" + now._2))
34 |       .saveAsTextFile(output_file_path)
35 | 
36 |     sc.stop()
37 |   }
38 | }
39 | 


--------------------------------------------------------------------------------
/src/main/scala/edu/utah/cs/trajectory/DualIndexingSolution.scala:
--------------------------------------------------------------------------------
  1 | package edu.utah.cs.trajectory
  2 | 
  3 | import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}
  4 | import java.util.zip.{GZIPInputStream, GZIPOutputStream}
  5 | 
  6 | import edu.utah.cs.index.RTree
  7 | import edu.utah.cs.index_rr.RTreeWithRR
  8 | import edu.utah.cs.partitioner.{STRSegPartition, STRTrajPartition}
  9 | import edu.utah.cs.spatial.{LineSegment, MBR, Point}
 10 | import org.apache.spark.rdd.PartitionPruningRDD
 11 | import org.apache.spark.storage.StorageLevel
 12 | import org.apache.spark.{SparkConf, SparkContext}
 13 | import org.roaringbitmap.RoaringBitmap
 14 | 
 15 | import scala.collection.mutable
 16 | import scala.io.Source
 17 | 
 18 | /**
 19 |   * Created by dongx on 12/19/2016.
 20 |   */
 21 | object DualIndexingSolution {
 22 |   final val max_entries_per_node = 25
 23 |   final val k = 10
 24 |   final val c_values = Array(5)
 25 | 
 26 |   def getMBR(x: Array[LineSegment]): MBR = {
 27 |     val pts = x.flatMap(p => Array(p.start, p.end))
 28 |     var maxx = Double.MinValue
 29 |     var maxy = Double.MinValue
 30 |     var minx = Double.MaxValue
 31 |     var miny = Double.MaxValue
 32 |     pts.foreach(x => {
 33 |       maxx = Math.max(x.coord(0), maxx)
 34 |       maxy = Math.max(x.coord(1), maxy)
 35 |       minx = Math.min(x.coord(0), minx)
 36 |       miny = Math.min(x.coord(1), miny)
 37 |     })
 38 |     MBR(Point(Array(minx, miny)), Point(Array(maxx, maxy)))
 39 |   }
 40 | 
 41 |   private class ResultOrdering extends Ordering[(Double, Int)] {
 42 |     override def compare(x: (Double, Int), y: (Double, Int)): Int = x._1.compare(y._1)
 43 |   }
 44 | 
 45 |   def main(args: Array[String]): Unit = {
 46 |     val sparkConf = new SparkConf().setAppName("RRSolution").set("spark.locality.wait", "0")
 47 |       .set("spark.driver.maxResultSize", "4g")//.setMaster("local[*]")
 48 |     val sc = new SparkContext(sparkConf)
 49 | 
 50 |     if (args.length != 2) {
 51 |       println("usage: RRSolution <query_traj_filename> <traj_data_filename>")
 52 |       System.exit(1)
 53 |     }
 54 | 
 55 |     Thread.sleep(6000)
 56 | 
 57 |     val query_traj_filename = args(0)
 58 |     val traj_data_filename = args(1)
 59 | 
 60 |     val query_traj_file = Source.fromFile(query_traj_filename)
 61 |     val queries = query_traj_file.getLines().map { line =>
 62 |       val splitted = line.split('\t')
 63 |       (splitted(0).toInt, LineSegment(Point(Array(splitted(1).toDouble, splitted(2).toDouble)),
 64 |         Point(Array(splitted(3).toDouble, splitted(4).toDouble))))
 65 |     }.toArray.groupBy(_._1).map(x => x._2.map(_._2))
 66 | 
 67 |     val start1 = System.currentTimeMillis()
 68 | 
 69 |     val dataRDD = sc.textFile(traj_data_filename)
 70 |       .map(x => x.split('\t'))
 71 |       .map(x => (LineSegment(Point(Array(x(1).toDouble, x(2).toDouble)),
 72 |         Point(Array(x(3).toDouble, x(4).toDouble))),
 73 |         TrajMeta(x(0).toInt, x(5).toInt)))
 74 | 
 75 |     val trajs = sc.textFile(traj_data_filename).mapPartitions(iter => {
 76 |       iter.map(x => {
 77 |         val splitted = x.split("\t")
 78 |         (splitted(0).toInt,
 79 |           LineSegment(Point(Array(splitted(1).toDouble, splitted(2).toDouble)),
 80 |             Point(Array(splitted(3).toDouble, splitted(4).toDouble))))
 81 |       }).toArray.groupBy(_._1).map(now => {
 82 |         val cur_traj = now._2.sortBy(_._1).map(_._2)
 83 |         (getMBR(cur_traj), (now._1, cur_traj))
 84 |       }).iterator
 85 |     })
 86 | 
 87 |     val part_traj = STRTrajPartition(trajs, dataRDD.partitions.length, 0.01, max_entries_per_node)
 88 | 
 89 |     val compressed_traj = part_traj.mapPartitions(iter => iter.map(x => {
 90 |       val baos = new ByteArrayOutputStream()
 91 |       val gzipOut = new GZIPOutputStream(baos)
 92 |       val objectOut = new ObjectOutputStream(gzipOut)
 93 |       objectOut.writeObject(x._2._2)
 94 |       objectOut.close()
 95 |       (x._2._1, baos.toByteArray)
 96 |     })).persist(StorageLevel.MEMORY_AND_DISK_SER)
 97 | 
 98 |     println(compressed_traj.count)
 99 | 
100 |     val traj_stat = part_traj.mapPartitions(iter => {
101 |       Array(iter.aggregate[(MBR, Int)]((null, 0))((res, now) => {
102 |         if (res._1 == null) (now._1, 1)
103 |         else (res._1.union(now._1), res._2 + 1)
104 |       }, (left, right) => {
105 |         if (left._1 == null) right
106 |         else if (left._1 == null) left
107 |         else (left._1.union(right._1), left._2 + right._2)
108 |       })).iterator
109 |     }).collect()
110 |     val traj_global_rtree =
111 |       RTree.applyMBR(traj_stat.zipWithIndex.map(x => (x._1._1, x._2, x._1._2)), max_entries_per_node)
112 | 
113 | 
114 |     val (partitioned_rdd, _) = STRSegPartition(dataRDD, dataRDD.partitions.length, 0.01, max_entries_per_node)
115 | 
116 |     val indexed_seg_rdd = partitioned_rdd.mapPartitions(iter => {
117 |       val data = iter.toArray
118 |       var index: RTreeWithRR = if (data.length > 0) {
119 |         RTreeWithRR(data.zipWithIndex.map(x => (x._1._1, x._2, x._1._2.traj_id)), 25)
120 |       } else null
121 |       Array(index).iterator
122 |     }).persist(StorageLevel.MEMORY_AND_DISK_SER)
123 | 
124 |     val stat = indexed_seg_rdd.mapPartitions(iter => iter.map(x => (x.root.m_mbr, x.root.size, x.root.rr))).collect()
125 | 
126 |     val global_rtree = RTree.applyMBR(stat.zipWithIndex.map(x => (x._1._1, x._2, x._1._2.toInt)), max_entries_per_node)
127 | 
128 |     val end1 = System.currentTimeMillis()
129 |     println("------------------------------------------------------------")
130 |     println("Time to build indexes: " + (end1 - start1) / 1000.0)
131 |     println("------------------------------------------------------------")
132 | 
133 |     c_values.foreach(c => {
134 |       var tot_time = 0.0
135 |       queries.foreach(query_traj => {
136 |         val start2 = System.currentTimeMillis()
137 |         val bc_query = sc.broadcast(query_traj)
138 | 
139 |         val global_intersect = global_rtree.circleRange(query_traj, 0.0)
140 |         val global_intersect_mbrs = global_intersect.map(_._1.asInstanceOf[MBR])
141 | 
142 |         val sample_base = global_intersect.aggregate(new RoaringBitmap())((a, b) => RoaringBitmap.or(a, stat(b._2)._3),
143 |           (a, b) => RoaringBitmap.or(a, b))
144 | 
145 |         val cards = sample_base.getCardinality
146 |         println("Cardinality of intersected Partitions: " + cards)
147 |         val n_samples = c * k
148 |         println("Going to sample: " + n_samples)
149 |         assert(cards >= k)
150 | 
151 |         val set = mutable.HashSet[Int]()
152 |         val rnd = scala.util.Random
153 | 
154 |         for (i <- 0 until n_samples) {
155 |           var x = rnd.nextInt(cards)
156 |           while (set.contains(x)) x = rnd.nextInt(cards)
157 |           set += x
158 |         }
159 | 
160 |         var i = 0
161 |         val samples = mutable.HashSet[Int]()
162 |         val iter = sample_base.iterator()
163 |         while (iter.hasNext) {
164 |           val x = iter.next()
165 |           if (set.contains(i)) samples += x
166 |           i = i + 1
167 |         }
168 | 
169 |         val bc_samples = sc.broadcast(samples.toSet)
170 |         val pruning_bound_filter = traj_global_rtree.circleRange(global_intersect_mbrs, 0.0).map(_._2).toSet
171 |         val pruning_bound = new PartitionPruningRDD(compressed_traj, pruning_bound_filter.contains)
172 |           .filter(x => bc_samples.value.contains(x._1))
173 |           .repartition(Math.min(samples.size, sc.defaultParallelism))
174 |           .map(x => {
175 |             val bais = new ByteArrayInputStream(x._2)
176 |             val gzipIn = new GZIPInputStream(bais)
177 |             val objectIn = new ObjectInputStream(gzipIn)
178 |             val content = objectIn.readObject().asInstanceOf[Array[LineSegment]]
179 |             Trajectory.hausdorffDistance(bc_query.value, content)
180 |             //Trajectory.discreteFrechetDistance(bc_query.value, content)
181 |           })
182 |           .takeOrdered(k).last
183 |         val end2 = System.currentTimeMillis()
184 | 
185 |         println("------------------------------------------------------------")
186 |         println("Time to calculate pruning bound: " + (end2 - start2) / 1000.0)
187 |         println("The pruning bound is: " + pruning_bound)
188 | 
189 |         val start3 = System.currentTimeMillis()
190 |         val global_prune = global_rtree.circleRange(query_traj, pruning_bound)
191 |         val global_prune_set = global_prune.map(_._2).toSet
192 | 
193 |         val pruned_rdd = new PartitionPruningRDD(indexed_seg_rdd, global_prune_set.contains)
194 |         val pruned_traj_id1 = stat.zipWithIndex.filter(x => !global_prune_set.contains(x._2)).map(_._1._3)
195 |           .aggregate(new RoaringBitmap())((a, b) => RoaringBitmap.or(a, b), (a, b) => RoaringBitmap.or(a, b))
196 | 
197 |         val bc_pruning_bound = sc.broadcast(pruning_bound)
198 |         val pruned_traj_id2 = pruned_rdd.map(part => {
199 |           part.antiCircleRangeBF(bc_query.value, bc_pruning_bound.value)
200 |         }).reduce((a, b) => RoaringBitmap.or(a, b))
201 | 
202 |         val tot_pruned_traj = RoaringBitmap.or(pruned_traj_id1, pruned_traj_id2)
203 | 
204 |         val end3 = System.currentTimeMillis()
205 |         val tot_prune_count = tot_pruned_traj.getCardinality
206 |         println("Time to calculate all saved traj_ids: " + (end3 - start3) / 1000.0)
207 | 
208 |         val start4 = System.currentTimeMillis()
209 |         val bc_pruned_traj = sc.broadcast(tot_pruned_traj)
210 | 
211 |         val final_prune_set = traj_global_rtree.circleRange(global_prune.map(_._1.asInstanceOf[MBR]), 0.0).map(_._2).toSet
212 |         val final_filtered = new PartitionPruningRDD(compressed_traj, final_prune_set.contains)
213 |           .filter(x => !bc_pruned_traj.value.contains(x._1))
214 | 
215 |         val res = final_filtered.repartition(sc.defaultParallelism)
216 |           .mapPartitions(iter => iter.map(x =>{
217 |             val bais = new ByteArrayInputStream(x._2)
218 |             val gzipIn = new GZIPInputStream(bais)
219 |             val objectIn = new ObjectInputStream(gzipIn)
220 |             val content = objectIn.readObject().asInstanceOf[Array[LineSegment]]
221 |             (Trajectory.hausdorffDistance(bc_query.value, content), x._1)
222 |           }))
223 |           .takeOrdered(k)(new ResultOrdering)
224 | 
225 |         val end4 = System.currentTimeMillis()
226 |         tot_time += (end4 - start2) / 1000.0
227 |         println("Time to finish the final filter: " + (end4 - start4) / 1000.0)
228 |         println("# of distance calculated: " + (c * k + final_filtered.count()))
229 |         println("Total Latency: " + ((end4 - start2) / 1000.0))
230 |         println("The results show as below:")
231 |         res.foreach(println)
232 |         println("------------------------------------------------------------")
233 |         bc_query.destroy()
234 |         bc_samples.destroy()
235 |         bc_pruned_traj.destroy()
236 |         bc_pruning_bound.destroy()
237 |       })
238 | 
239 |       println("Average Latency for c = " + c + " is : " + (tot_time / 100.0))
240 |       println("===================================================")
241 |     })
242 | 
243 |     sc.stop()
244 |   }
245 | }
246 | 


--------------------------------------------------------------------------------
/src/main/scala/edu/utah/cs/trajectory/LineSegmentClustering.scala:
--------------------------------------------------------------------------------
 1 | package edu.utah.cs.trajectory
 2 | 
 3 | import java.io.{BufferedWriter, File, FileWriter}
 4 | 
 5 | import com.vividsolutions.jts.geom.{GeometryCollection, GeometryFactory}
 6 | import edu.utah.cs.partitioner.STRSegPartition
 7 | import edu.utah.cs.spatial.{LineSegment, MBR, Point, Polygon}
 8 | import edu.utah.cs.util.{BloomFilter, BloomFilterMeta}
 9 | import org.apache.spark.{SparkConf, SparkContext}
10 | import org.geotools.geojson.geom.GeometryJSON
11 | 
12 | /**
13 |   * Created by dongx on 10/24/16.
14 |   */
15 | object LineSegmentClustering {
16 |   final val max_entries_per_node = 25
17 |   final val k = 10
18 |   final val N = 34085
19 | 
20 |   def main(args: Array[String]): Unit = {
21 |     val sc = new SparkContext(new SparkConf().setAppName("LineSegmentClustering"))
22 | 
23 |     if (args.length < 2) {
24 |       println("usage: SpatialSpanClustering <input_file_path> <output_file_path>")
25 |       System.exit(1)
26 |     }
27 | 
28 |     val input_file_path = args(0)
29 |     val output_file_path = args(1)
30 | 
31 |     val dataRDD = sc.textFile(input_file_path)
32 |       .map(x => x.split('\t'))
33 |       .map(x => (LineSegment(Point(Array(x(2).toDouble, x(1).toDouble)),
34 |         Point(Array(x(4).toDouble, x(3).toDouble))),
35 |         TrajMeta(x(0).toInt, x(5).toInt)))
36 | 
37 |     val bf_meta = BloomFilterMeta(N, 1)
38 |     val bc_bf_meta = sc.broadcast(bf_meta)
39 |     BloomFilter.meta = bf_meta
40 | 
41 |     val num_partitions = dataRDD.getNumPartitions
42 |     val (partitioned_rdd, part_mbrs) = STRSegPartition(dataRDD, num_partitions, 0.01, max_entries_per_node)
43 | 
44 |     val part_bounds = partitioned_rdd.mapPartitions(iter => {
45 |       if (iter.nonEmpty) {
46 |         var maxx = Double.MinValue
47 |         var maxy = Double.MinValue
48 |         var minx = Double.MaxValue
49 |         var miny = Double.MaxValue
50 |         iter.map(_._1).foreach(x => {
51 |           maxx = Math.max(Math.max(x.start.coord(0), x.end.coord(0)), maxx)
52 |           maxy = Math.max(Math.max(x.start.coord(1), x.end.coord(1)), maxy)
53 |           minx = Math.min(Math.min(x.start.coord(0), x.end.coord(0)), minx)
54 |           miny = Math.min(Math.min(x.start.coord(1), x.end.coord(1)), miny)
55 |         })
56 |         Array(MBR(Point(Array(minx, miny)), Point(Array(maxx, maxy)))).iterator
57 |       } else Array().iterator
58 |     }).collect()
59 | 
60 |     val file = new File(output_file_path)
61 |     val bw = new BufferedWriter(new FileWriter(file))
62 | 
63 |     val collection = new GeometryCollection(part_bounds.map(x =>
64 |       Polygon(Array(x.low, Point(Array(x.low.coord(0), x.high.coord(1))),
65 |         x.high, Point(Array(x.high.coord(0), x.low.coord(1))), x.low)).content), new GeometryFactory)
66 | 
67 |     new GeometryJSON().writeGeometryCollection(collection, bw)
68 | 
69 |     bw.close()
70 | 
71 |     sc.stop()
72 |   }
73 | }
74 | 


--------------------------------------------------------------------------------
/src/main/scala/edu/utah/cs/trajectory/MTreeSolution.scala:
--------------------------------------------------------------------------------
  1 | package edu.utah.cs.trajectory
  2 | 
  3 | import edu.utah.cs.partitioner.IDPartition
  4 | import edu.utah.cs.spatial.{LineSegment, Point}
  5 | import mtree.{DistanceFunction, MTree}
  6 | import org.apache.spark.rdd.PartitionPruningRDD
  7 | import org.apache.spark.storage.StorageLevel
  8 | import org.apache.spark.{SparkConf, SparkContext}
  9 | 
 10 | import scala.collection.mutable
 11 | import scala.io.Source
 12 | 
 13 | /**
 14 |   * Created by dongx on 4/28/17.
 15 |   */
 16 | object MTreeSolution {
 17 |   final val k = 10
 18 |   final val c = 5
 19 | 
 20 |   case class MTreeTraj(id: Int, data: Array[LineSegment])
 21 | 
 22 |   class TrajDistanceFunction extends DistanceFunction[MTreeTraj] {
 23 |     override def calculate(traj1: MTreeTraj, traj2: MTreeTraj): Double = {
 24 |       Trajectory.hausdorffDistance(traj1.data, traj2.data)
 25 |     }
 26 |   }
 27 | 
 28 |   def main(args: Array[String]): Unit = {
 29 |     val sparkConf = new SparkConf().setAppName("MTreeSolution")
 30 |       .set("spark.locality.wait", "0").set("spark.driver.maxResultSize", "4g")
 31 |     val sc = new SparkContext(sparkConf)
 32 | 
 33 |     if (args.length != 2) {
 34 |       println("usage: MTreeSolution <query_traj_filename> <traj_data_filename>")
 35 |       System.exit(1)
 36 |     }
 37 | 
 38 |     val query_traj_filename = args(0)
 39 |     val traj_data_filename = args(1)
 40 | 
 41 |     val query_traj_file = Source.fromFile(query_traj_filename)
 42 |     val queries = query_traj_file.getLines().map { line =>
 43 |       val splitted = line.split('\t')
 44 |       (splitted(0).toInt, LineSegment(Point(Array(splitted(1).toDouble, splitted(2).toDouble)),
 45 |         Point(Array(splitted(3).toDouble, splitted(4).toDouble))))
 46 |     }.toArray.groupBy(_._1).map(x => x._2.map(_._2))
 47 | 
 48 |     Thread.sleep(6000)
 49 | 
 50 |     val start1 = System.currentTimeMillis()
 51 | 
 52 |     val trajs = sc.textFile(traj_data_filename).mapPartitions(iter => {
 53 |       iter.map(x => {
 54 |         val splitted = x.split("\t")
 55 |         (splitted(0).toInt,
 56 |           LineSegment(Point(Array(splitted(1).toDouble, splitted(2).toDouble)),
 57 |             Point(Array(splitted(3).toDouble, splitted(4).toDouble))))
 58 |       }).toArray.groupBy(_._1).map(now => MTreeTraj(now._1, now._2.map(_._2))).iterator
 59 |     })
 60 | 
 61 |     val pivots = trajs.takeSample(withReplacement = false, trajs.partitions.length, System.currentTimeMillis()).map(_.data)
 62 |     val pivot_mt = new MTree[MTreeTraj](2, new TrajDistanceFunction(), null)
 63 |     for (i <- pivots.indices) {
 64 |       pivot_mt.add(MTreeTraj(i, pivots(i)))
 65 |     }
 66 |     val bc_pivots = sc.broadcast(pivots)
 67 |     val bc_pivots_mt = sc.broadcast(pivot_mt)
 68 |     val traj_with_pivot = trajs.mapPartitions(iter => {
 69 |       iter.map(x => {
 70 |         val tmp = bc_pivots_mt.value.getNearest(x)
 71 |         (tmp.iterator().next().data.id, x)
 72 |       })
 73 |     })
 74 |     val parted_by_pivot = IDPartition(traj_with_pivot, pivots.length)
 75 |     val indexed = parted_by_pivot.mapPartitionsWithIndex((id, iter) => {
 76 |       val data = iter.map(_._2.asInstanceOf[MTreeTraj]).toArray
 77 |       val pivot = bc_pivots.value(id)
 78 |       val cover_radius = data.map(x => Trajectory.hausdorffDistance(x.data, pivot)).max
 79 |       val m_tree = new MTree[MTreeTraj](2, new TrajDistanceFunction(), null)
 80 |       data.foreach(x => m_tree.add(x))
 81 |       Array((pivot, cover_radius, data.length, m_tree)).iterator
 82 |     }).persist(StorageLevel.MEMORY_AND_DISK_SER)
 83 | 
 84 |     val stats = indexed.map(x => (x._1, x._2, x._3)).collect()
 85 |       .zipWithIndex.map(x => (x._1._1, x._1._2, x._1._3, x._2))
 86 | 
 87 |     val end1 = System.currentTimeMillis()
 88 |     println("Time to build index: " + ((end1 - start1) / 1000.0))
 89 | 
 90 |     bc_pivots.destroy()
 91 |     bc_pivots_mt.destroy()
 92 | 
 93 |     var tot_time = 0.0
 94 |     queries.foreach(query => {
 95 |       val start2 = System.currentTimeMillis()
 96 |       println("----------------------------------------------")
 97 |       val sorted_pivots = stats.map(x => (Trajectory.hausdorffDistance(x._1, query), x._2, x._3, x._4)).sortBy(_._1)
 98 |       var i = 0
 99 |       var sum = 0
100 |       while (sum < k) {
101 |         sum +=  sorted_pivots(i)._3
102 |         i += 1
103 |       }
104 | 
105 |       val prune_set = sorted_pivots.slice(0, i).map(_._4).toSet
106 |       val bc_query = sc.broadcast(query)
107 |       val bc_k = sc.broadcast(k)
108 |       //      val first_filter = new PartitionPruningRDD(indexed, prune_set.contains)
109 |       //        .flatMap(i_part => {
110 |       //          i_part._4.knn(VPTraj(0, bc_query.value), bc_k.value)._1.map(x => (x._2, x._1.id))
111 |       //        }).takeOrdered(k)(new ResultOrdering)
112 | 
113 |       val first_filter = new PartitionPruningRDD(indexed, prune_set.contains)
114 |         .aggregate((Array[(Double, Int)](), 0))((now, part) => {
115 |           val knn_res = part._4.getNearestByLimit(MTreeTraj(0, bc_query.value), bc_k.value)
116 |           val knn_iter = knn_res.iterator()
117 |           val res = mutable.ListBuffer[(Double, Int)]()
118 |           while (knn_iter.hasNext) {
119 |             val tmp = knn_iter.next()
120 |             res += ((tmp.distance, tmp.data.id))
121 |           }
122 |           ((res ++ now._1).sortBy(_._1).take(bc_k.value).toArray, knn_res.cnt + now._2)
123 |         }, (left, right) => {
124 |           ((left._1 ++ right._1).sortBy(_._1).take(bc_k.value), left._2 + right._2)
125 |         })
126 | 
127 |       val tick1 = System.currentTimeMillis()
128 |       println("Time for first filter: " + ((tick1 - start2) / 1000.0))
129 | 
130 |       val pruning_bound = first_filter._1.last._1
131 |       val global_prune_set =
132 |         sorted_pivots.filter(x => x._1 - x._2 <= pruning_bound).map(_._4).toSet - prune_set
133 |       val bc_pruning_bound = sc.broadcast(pruning_bound)
134 | 
135 |       val second_filter = new PartitionPruningRDD(indexed, global_prune_set.contains)
136 |         .aggregate((Array[(Double, Int)](), 0))((now, part) => {
137 |           val knn_res = part._4.getNearestByLimit(MTreeTraj(0, bc_query.value), bc_k.value)
138 |           val knn_iter = knn_res.iterator()
139 |           val res = mutable.ListBuffer[(Double, Int)]()
140 |           while (knn_iter.hasNext) {
141 |             val tmp = knn_iter.next()
142 |             res += ((tmp.distance, tmp.data.id))
143 |           }
144 |           ((res ++ now._1).sortBy(_._1).take(bc_k.value).toArray, knn_res.cnt + now._2)
145 |         }, (left, right) => {
146 |           ((left._1 ++ right._1).sortBy(_._1).take(bc_k.value), left._2 + right._2)
147 |         })
148 | 
149 |       val final_res = (first_filter._1 ++ second_filter._1).sortBy(_._1).take(k)
150 | 
151 |       val end2 = System.currentTimeMillis()
152 |       println("Time for second filter and final merge: " + ((end2 - tick1) / 1000.0))
153 |       println("# of trajs checked distance:" + (first_filter._2 + second_filter._2 + pivots.length))
154 |       println("Total Latency: " + ((end2 - start2) / 1000.0))
155 |       final_res.foreach(println)
156 |       tot_time += (end2 - start2) / 1000.0
157 |       println("----------------------------------------------")
158 |       bc_k.destroy()
159 |       bc_query.destroy()
160 |       bc_pruning_bound.destroy()
161 |     })
162 | 
163 |     println("Average Latency: " + (tot_time / 100.0))
164 | 
165 |     sc.stop()
166 |   }
167 | }
168 | 


--------------------------------------------------------------------------------
/src/main/scala/edu/utah/cs/trajectory/RRSolution.scala:
--------------------------------------------------------------------------------
  1 | package edu.utah.cs.trajectory
  2 | 
  3 | import edu.utah.cs.index.RTree
  4 | import edu.utah.cs.index_rr.RTreeWithRR
  5 | import edu.utah.cs.partitioner.STRSegPartition
  6 | import edu.utah.cs.spatial.{LineSegment, MBR, Point}
  7 | import org.apache.spark.rdd.PartitionPruningRDD
  8 | import org.apache.spark.storage.StorageLevel
  9 | import org.apache.spark.{SparkConf, SparkContext}
 10 | import org.roaringbitmap.RoaringBitmap
 11 | 
 12 | import scala.collection.mutable
 13 | import scala.io.Source
 14 | 
 15 | /**
 16 |   * Created by dongx on 12/19/2016.
 17 |   */
 18 | object RRSolution {
 19 |   final val max_entries_per_node = 25
 20 |   final val k = 10
 21 |   final val c = 5
 22 |   //final val max_spatial_span = 2.550598
 23 |   //final val max_spatial_span = 0.46757
 24 |   final val max_spatial_span = 0.5080
 25 | 
 26 |   def getMBR(x: Array[LineSegment]): MBR = {
 27 |     val pts = x.flatMap(p => Array(p.start, p.end))
 28 |     var maxx = Double.MinValue
 29 |     var maxy = Double.MinValue
 30 |     var minx = Double.MaxValue
 31 |     var miny = Double.MaxValue
 32 |     pts.foreach(x => {
 33 |       maxx = Math.max(x.coord(0), maxx)
 34 |       maxy = Math.max(x.coord(1), maxy)
 35 |       minx = Math.min(x.coord(0), minx)
 36 |       miny = Math.min(x.coord(1), miny)
 37 |     })
 38 |     MBR(Point(Array(minx, miny)), Point(Array(maxx, maxy)))
 39 |   }
 40 | 
 41 |   private class ResultOrdering extends Ordering[(Double, Int)] {
 42 |     override def compare(x: (Double, Int), y: (Double, Int)): Int = x._1.compare(y._1)
 43 |   }
 44 | 
 45 |   def main(args: Array[String]): Unit = {
 46 |     val sparkConf = new SparkConf().setAppName("RRSolution")
 47 |       .set("spark.locality.wait", "0").set("spark.driver.maxResultSize", "4g")//.setMaster("local[*]")
 48 |     val sc = new SparkContext(sparkConf)
 49 | 
 50 |     if (args.length != 2) {
 51 |       println("usage: RRSolution <query_traj_filename> <traj_data_filename>")
 52 |       System.exit(1)
 53 |     }
 54 | 
 55 |     Thread.sleep(6000)
 56 | 
 57 |     val query_traj_filename = args(0)
 58 |     val traj_data_filename = args(1)
 59 | 
 60 |     val query_traj_file = Source.fromFile(query_traj_filename)
 61 |     val queries = query_traj_file.getLines().map { line =>
 62 |       val splitted = line.split('\t')
 63 |       (splitted(0).toInt, LineSegment(Point(Array(splitted(1).toDouble, splitted(2).toDouble)),
 64 |         Point(Array(splitted(3).toDouble, splitted(4).toDouble))))
 65 |     }.toArray.groupBy(_._1).map(x => x._2.map(_._2))
 66 | 
 67 |     val start1 = System.currentTimeMillis()
 68 | 
 69 |     val dataRDD = sc.textFile(traj_data_filename)
 70 |       .map(x => x.split('\t'))
 71 |       .map(x => (LineSegment(Point(Array(x(1).toDouble, x(2).toDouble)),
 72 |         Point(Array(x(3).toDouble, x(4).toDouble))),
 73 |         TrajMeta(x(0).toInt, x(5).toInt)))
 74 | 
 75 |     val (partitioned_rdd, _) = STRSegPartition(dataRDD, dataRDD.partitions.length, 0.01, max_entries_per_node)
 76 | 
 77 |     val indexed_seg_rdd = partitioned_rdd.mapPartitions(iter => {
 78 |       val data = iter.toArray
 79 |       var index: RTreeWithRR = null
 80 |       if (data.length > 0) {
 81 |         index = RTreeWithRR(data.zipWithIndex.map(x => (x._1._1, x._2, x._1._2.traj_id)), max_entries_per_node)
 82 |       }
 83 |       Array((data, index)).iterator
 84 |     }).persist(StorageLevel.MEMORY_AND_DISK_SER)
 85 | 
 86 |     val stat = indexed_seg_rdd.mapPartitions(iter => iter.map(x => (x._2.root.m_mbr, x._1.length, x._2.root.rr))).collect()
 87 | 
 88 |     val global_rtree = RTree.applyMBR(stat.zipWithIndex.map(x => (x._1._1, x._2, x._1._2.toInt)), max_entries_per_node)
 89 | 
 90 |     val end1 = System.currentTimeMillis()
 91 |     println("------------------------------------------------------------")
 92 |     println("Time to build indexes: " + (end1 - start1) / 1000.0)
 93 |     println("------------------------------------------------------------")
 94 | 
 95 |     var tot_time = 0.0
 96 |     queries.foreach(query_traj => {
 97 |       val start2 = System.currentTimeMillis()
 98 |       val bc_query = sc.broadcast(query_traj)
 99 |       val global_intersect = global_rtree.circleRange(query_traj, 0.0)
100 |       val global_intersect_mbrs = global_intersect.map(_._1.asInstanceOf[MBR])
101 | 
102 |       val sample_base = global_intersect.aggregate(new RoaringBitmap())((a, b) => RoaringBitmap.or(a, stat(b._2)._3),
103 |         (a, b) => RoaringBitmap.or(a, b))
104 | 
105 |       val cards = sample_base.getCardinality
106 |       println("Cardinality of intersected Partitions: " + cards)
107 |       val n_samples = c * k
108 |       println("Going to sample: " + n_samples)
109 |       assert(cards >= k)
110 | 
111 |       val set = mutable.HashSet[Int]()
112 |       val rnd = scala.util.Random
113 | 
114 |       for (i <- 0 until n_samples) {
115 |         var x = rnd.nextInt(cards)
116 |         while (set.contains(x)) x = rnd.nextInt(cards)
117 |         set += x
118 |       }
119 | 
120 |       var i = 0
121 |       val samples = mutable.HashSet[Int]()
122 |       val iter = sample_base.iterator()
123 |       while (iter.hasNext) {
124 |         val x = iter.next()
125 |         if (set.contains(i)) samples += x
126 |         i = i + 1
127 |       }
128 | 
129 |       val bc_samples = sc.broadcast(samples.toSet)
130 | 
131 |       val pruning_bound_filter = global_rtree.circleRange(global_intersect_mbrs, max_spatial_span).map(_._2).toSet
132 |       val pruning_bound = new PartitionPruningRDD(indexed_seg_rdd, pruning_bound_filter.contains)
133 |         .flatMap(x => x._1.filter(now => samples.contains(now._2.traj_id)).map(x => x._2.traj_id -> x._1))
134 |         .groupByKey(Math.min(samples.size, sc.defaultParallelism))
135 |         .map(x => Trajectory.hausdorffDistance(bc_query.value, x._2.toArray)).takeOrdered(k).last
136 | 
137 |       val end2 = System.currentTimeMillis()
138 | 
139 |       println("------------------------------------------------------------")
140 |       println("Time to calculate pruning bound: " + (end2 - start2) / 1000.0)
141 |       println("The pruning bound is: " + pruning_bound)
142 | 
143 |       val start3 = System.currentTimeMillis()
144 |       val global_prune = global_rtree.circleRange(query_traj, pruning_bound)
145 |       val global_prune_set = global_prune.map(_._2).toSet
146 | 
147 |       val pruned_rdd = new PartitionPruningRDD(indexed_seg_rdd, global_prune_set.contains)
148 |       val pruned_traj_id1 = stat.zipWithIndex.filter(x => !global_prune_set.contains(x._2)).map(_._1._3)
149 |         .aggregate(new RoaringBitmap())((a, b) => RoaringBitmap.or(a, b), (a, b) => RoaringBitmap.or(a, b))
150 | 
151 |       val bc_pruning_bound = sc.broadcast(pruning_bound)
152 |       val saved_traj_local = pruned_rdd.map(part => {
153 |         RoaringBitmap.andNot(part._2.root.rr, part._2.antiCircleRangeBF(bc_query.value, bc_pruning_bound.value))
154 |       }).reduce((a, b) => RoaringBitmap.or(a, b))
155 | 
156 |       val saved_traj = RoaringBitmap.andNot(saved_traj_local, pruned_traj_id1)
157 | 
158 |       val end3 = System.currentTimeMillis()
159 | 
160 |       println("Time to calculate all saved traj_ids: " + (end3 - start3) / 1000.0)
161 | 
162 |       val start4 = System.currentTimeMillis()
163 |       val bc_saved_traj = sc.broadcast(saved_traj_local.toArray)
164 | 
165 |       val final_filter_set = global_rtree.circleRange(global_prune.map(_._1.asInstanceOf[MBR]), max_spatial_span)
166 |         .map(_._2).toSet
167 | 
168 |       val final_filtered = new PartitionPruningRDD(indexed_seg_rdd, final_filter_set.contains)
169 |         .flatMap(x => {
170 |           x._1.filter(now => bc_saved_traj.value.contains(now._2.traj_id)).map(x => x._2.traj_id -> x._1)
171 |         })
172 | 
173 |       val res = final_filtered.groupByKey(sc.defaultParallelism)
174 |         .map(x => (Trajectory.hausdorffDistance(bc_query.value, x._2.toArray), x._1))
175 |         .takeOrdered(k)(new ResultOrdering)
176 | 
177 |       val end4 = System.currentTimeMillis()
178 |       tot_time += (end4 - start2) / 1000.0
179 |       println("Time to finish the final filter: " + (end4 - start4) / 1000.0)
180 |       println("# of distance calculated: " + (c * k + saved_traj.getCardinality))
181 |       println("Total Latency: " + ((end4 - start2) / 1000.0))
182 |       println("The results show as below:")
183 |       res.foreach(println)
184 |       println("------------------------------------------------------------")
185 |       tot_time += (end4 - start2) / 1000.0
186 |     })
187 | 
188 |     printf("Average Latency: " + (tot_time / 100.0))
189 | 
190 |     sc.stop()
191 |   }
192 | }
193 | 


--------------------------------------------------------------------------------
/src/main/scala/edu/utah/cs/trajectory/Relabel.scala:
--------------------------------------------------------------------------------
 1 | package edu.utah.cs.trajectory
 2 | 
 3 | import edu.utah.cs.spatial.{LineSegment, Point}
 4 | import org.apache.spark.{SparkConf, SparkContext}
 5 | 
 6 | /**
 7 |   * Created by dongx on 10/5/16.
 8 |   */
 9 | object Relabel {
10 |   case class TrajMeta(traj_id: String, seg_id: Int)
11 | 
12 |   def main(args: Array[String]): Unit = {
13 |     val sparkConf = new SparkConf().setAppName("Relabel")
14 |     val sc = new SparkContext(sparkConf)
15 | 
16 |     if (args.length != 2) {
17 |       println("usage: Relabel <input_path> <output_path>")
18 |       System.exit(1)
19 |     }
20 | 
21 |     Thread.sleep(3000)
22 | 
23 |     val input_file_name = args(0)
24 |     val output_file_name = args(1)
25 | 
26 |     sc.textFile(input_file_name, 900).map(x => {
27 |       val splitted = x.split('\t')
28 |       (splitted(0),
29 |         LineSegment(Point(Array(splitted(1).toDouble, splitted(2).toDouble)),
30 |           Point(Array(splitted(3).toDouble, splitted(4).toDouble))), splitted(5))
31 |     }).groupBy(_._1)
32 |       .zipWithIndex()
33 |       .flatMap(x => x._1._2.map(now => x._2.toString + "\t" + now._2.toTSV + "\t" + now._3))
34 |       .saveAsTextFile(output_file_name)
35 | 
36 |     sc.stop()
37 |   }
38 | }
39 | 


--------------------------------------------------------------------------------
/src/main/scala/edu/utah/cs/trajectory/SpatialSpanClustering.scala:
--------------------------------------------------------------------------------
 1 | package edu.utah.cs.trajectory
 2 | 
 3 | import java.io.{BufferedWriter, File, FileWriter}
 4 | 
 5 | import com.vividsolutions.jts.geom.{GeometryCollection, GeometryFactory}
 6 | import edu.utah.cs.partitioner.STRMBRPartition
 7 | import edu.utah.cs.spatial.{LineSegment, MBR, Point, Polygon}
 8 | import edu.utah.cs.util._
 9 | import org.apache.spark.{SparkConf, SparkContext}
10 | import org.geotools.geojson.geom.GeometryJSON
11 | 
12 | /**
13 |   * Created by Dong Xie on 10/24/2016.
14 |   */
15 | object SpatialSpanClustering {
16 |   final val max_entries_per_node = 25
17 | 
18 |   def getMBR(x: (Int, Array[(Int, LineSegment)])): (MBR, Int) = {
19 |     val pts = x._2.flatMap(p => Array(p._2.start, p._2.end))
20 |     var maxx = Double.MinValue
21 |     var maxy = Double.MinValue
22 |     var minx = Double.MaxValue
23 |     var miny = Double.MaxValue
24 |     pts.foreach(x => {
25 |       maxx = Math.max(x.coord(0), maxx)
26 |       maxy = Math.max(x.coord(1), maxy)
27 |       minx = Math.min(x.coord(0), minx)
28 |       miny = Math.min(x.coord(1), miny)
29 |     })
30 |     (MBR(Point(Array(minx, miny)), Point(Array(maxx, maxy))), x._1)
31 |   }
32 | 
33 |   def main(args: Array[String]): Unit = {
34 |     val sc = new SparkContext(new SparkConf().setAppName("SpatialSpanClustering"))
35 | 
36 |     if (args.length < 2) {
37 |       println("usage: SpatialSpanClustering <input_file_path> <output_file_path>")
38 |       System.exit(1)
39 |     }
40 | 
41 |     val input_file_path = args(0)
42 |     val output_file_path = args(1)
43 | 
44 |     val bf_meta = BloomFilterMeta(10000, 1)
45 |     val bc_bf_meta = sc.broadcast(bf_meta)
46 |     BloomFilter.meta = bf_meta
47 | 
48 |     val mbrs = sc.textFile(input_file_path).mapPartitions(iter => {
49 |       iter.map(x => {
50 |         val splitted = x.split("\t")
51 |         (splitted(0).toInt,
52 |           LineSegment(Point(Array(splitted(2).toDouble, splitted(1).toDouble)),
53 |             Point(Array(splitted(4).toDouble, splitted(3).toDouble))))
54 |       }).toArray.groupBy(_._1).map(now => getMBR(now)).iterator
55 |     })
56 | 
57 |     val num_partitions = mbrs.getNumPartitions * 4
58 | 
59 |     val partitioned_rdd = STRMBRPartition(mbrs, num_partitions, 0.01, max_entries_per_node)
60 | 
61 |     val part_bounds = partitioned_rdd.mapPartitions(iter => {
62 |       if (iter.nonEmpty) {
63 |         var maxx = Double.MinValue
64 |         var maxy = Double.MinValue
65 |         var minx = Double.MaxValue
66 |         var miny = Double.MaxValue
67 |         iter.map(_._1).foreach(x => {
68 |           maxx = Math.max(x.high.coord(0), maxx)
69 |           maxy = Math.max(x.high.coord(1), maxy)
70 |           minx = Math.min(x.low.coord(0), minx)
71 |           miny = Math.min(x.low.coord(1), miny)
72 |         })
73 |         Array(MBR(Point(Array(minx, miny)), Point(Array(maxx, maxy)))).iterator
74 |       } else Array().iterator
75 |     }).collect()
76 | 
77 |     val file = new File(output_file_path)
78 |     val bw = new BufferedWriter(new FileWriter(file))
79 | 
80 |     val collection = new GeometryCollection(part_bounds.map(x =>
81 |       Polygon(Array(x.low, Point(Array(x.low.coord(0), x.high.coord(1))),
82 |         x.high, Point(Array(x.high.coord(0), x.low.coord(1))), x.low)).content), new GeometryFactory)
83 | 
84 |     new GeometryJSON().writeGeometryCollection(collection, bw)
85 | 
86 |     bw.close()
87 | 
88 |     sc.stop()
89 |   }
90 | }
91 | 


--------------------------------------------------------------------------------
/src/main/scala/edu/utah/cs/trajectory/SpatialSpanFiltering.scala:
--------------------------------------------------------------------------------
 1 | package edu.utah.cs.trajectory
 2 | 
 3 | import edu.utah.cs.spatial.{Point, LineSegment}
 4 | import org.apache.spark.{SparkContext, SparkConf}
 5 | 
 6 | /**
 7 |   * Created by Dong Xie on 10/23/2016.
 8 |   */
 9 | object SpatialSpanFiltering {
10 |   def getStats(x: (Int, Array[(Int, LineSegment, Int)])) = {
11 |     val num_segs = x._2.length
12 |     val tot_dis = x._2.map(p => p._2.length).sum
13 |     val pts = x._2.flatMap(p => Array(p._2.start, p._2.end))
14 |     var maxx = Double.MinValue
15 |     var maxy = Double.MinValue
16 |     var minx = Double.MaxValue
17 |     var miny = Double.MaxValue
18 |     pts.foreach(x => {
19 |       maxx = Math.max(x.coord(0), maxx)
20 |       maxy = Math.max(x.coord(1), maxy)
21 |       minx = Math.min(x.coord(0), minx)
22 |       miny = Math.min(x.coord(1), miny)
23 |     })
24 |     (x._1, num_segs, tot_dis, Point(Array(minx, miny)).minDist(Point(Array(maxx, maxy))))
25 |   }
26 | 
27 |   def main(args: Array[String]): Unit = {
28 |     val sparkConf = new SparkConf().setAppName("SpatialSpanFiltering")//.setMaster("local[*]")
29 |     val sc = new SparkContext(sparkConf)
30 | 
31 |     Thread.sleep(3000)
32 | 
33 |     if (args.length < 2) {
34 |       println("usage: SpatialSpanFiltering <input_file_path> <output_file_path>")
35 |       System.exit(1)
36 |     }
37 | 
38 |     val input_file_path = args(0)
39 |     val output_file_path = args(1)
40 | 
41 |     val stats = sc.textFile(input_file_path).mapPartitions(iter => {
42 |       iter.map(x => {
43 |         val splitted = x.split("\t")
44 |         (splitted(0).toInt,
45 |           LineSegment(Point(Array(splitted(1).toDouble, splitted(2).toDouble)),
46 |             Point(Array(splitted(3).toDouble, splitted(4).toDouble))), splitted(5).toInt)
47 |       }).toArray.groupBy(_._1).filter(now => {
48 |         val stat = getStats(now)
49 |         stat._4 > 0.001 && stat._2 > 20 && stat._4 < 0.5080
50 |       }).iterator
51 |     }).repartition(800)
52 | 	  .flatMap(x => x._2.map(now => now._1 + "\t" + now._2.toTSV + "\t" + now._3))
53 |       .saveAsTextFile(output_file_path)
54 | 
55 |     sc.stop()
56 |   }
57 | }
58 | 


--------------------------------------------------------------------------------
/src/main/scala/edu/utah/cs/trajectory/SpatialSpanStat.scala:
--------------------------------------------------------------------------------
 1 | package edu.utah.cs.trajectory
 2 | 
 3 | import java.io._
 4 | 
 5 | import edu.utah.cs.spatial.{LineSegment, Point}
 6 | import org.apache.spark.{SparkConf, SparkContext}
 7 | 
 8 | /**
 9 |   * Created by dongx on 10/5/16.
10 |   */
11 | object SpatialSpanStat {
12 |   def getStats(x: (Int, Array[(Int, LineSegment)])) = {
13 |     val num_segs = x._2.length
14 |     val tot_dis = x._2.map(p => p._2.length).sum
15 |     val pts = x._2.flatMap(p => Array(p._2.start, p._2.end))
16 |     var maxx = Double.MinValue
17 |     var maxy = Double.MinValue
18 |     var minx = Double.MaxValue
19 |     var miny = Double.MaxValue
20 |     pts.foreach(x => {
21 |       maxx = Math.max(x.coord(0), maxx)
22 |       maxy = Math.max(x.coord(1), maxy)
23 |       minx = Math.min(x.coord(0), minx)
24 |       miny = Math.min(x.coord(1), miny)
25 |     })
26 |     (x._1, num_segs, tot_dis, Point(Array(minx, miny)).minDist(Point(Array(maxx, maxy))))
27 |   }
28 | 
29 |   def main(args: Array[String]): Unit = {
30 |     val sparkConf = new SparkConf().setAppName("SpatialSpanStat")//.setMaster("local[*]")
31 |     val sc = new SparkContext(sparkConf)
32 | 
33 |     Thread.sleep(3000)
34 | 
35 |     if (args.length < 2) {
36 |       println("usage: SpatialSpanStat <input_file_path> <output_file_path>")
37 |       System.exit(1)
38 |     }
39 | 
40 |     val input_file_path = args(0)
41 |     val output_file_path = args(1)
42 | 
43 |     val stats = sc.textFile(input_file_path).mapPartitions(iter => {
44 |       iter.map(x => {
45 |         val splitted = x.split("\t")
46 |         (splitted(0).toInt,
47 |           LineSegment(Point(Array(splitted(1).toDouble, splitted(2).toDouble)),
48 |             Point(Array(splitted(3).toDouble, splitted(4).toDouble))))
49 |       }).toArray.groupBy(_._1).map(now => getStats(now)).iterator
50 |     }).collect().sortBy(_._1)
51 | 
52 |     val file = new File(output_file_path)
53 |     val bw = new BufferedWriter(new FileWriter(file))
54 | 
55 |     stats.foreach(x => bw.write(x._1 + "\t" + x._2 + "\t" + "%.6f".format(x._3)
56 |       + "\t" + "%.6f".format(x._4) + "\n"))
57 | 
58 |     bw.close()
59 | 
60 |     sc.stop()
61 |   }
62 | }
63 | 


--------------------------------------------------------------------------------
/src/main/scala/edu/utah/cs/trajectory/TrajIndexing.scala:
--------------------------------------------------------------------------------
  1 | package edu.utah.cs.trajectory
  2 | 
  3 | import edu.utah.cs.index.RTree
  4 | import edu.utah.cs.partitioner.STRTrajPartition
  5 | import edu.utah.cs.spatial.{LineSegment, MBR, Point}
  6 | import org.apache.spark.rdd.PartitionPruningRDD
  7 | import org.apache.spark.storage.StorageLevel
  8 | import org.apache.spark.{SparkConf, SparkContext}
  9 | 
 10 | import scala.io.Source
 11 | 
 12 | /**
 13 |   * Created by dongx on 1/16/2017.
 14 |   */
 15 | object TrajIndexing {
 16 |   final val max_entries_per_node = 25
 17 |   //final val k_values = Array(1, 10, 30, 50, 70, 100)
 18 |   final val k_values = Array(10)
 19 |   //final val k = 10
 20 |   final val N = 1401138 
 21 |   final val c = 5
 22 |   //final val c_values = Array(1, 3, 5, 7, 10)
 23 |   //final val c_values = Array(5)
 24 | 
 25 |   private class ResultOrdering extends Ordering[(Double, Int)] {
 26 |     override def compare(x: (Double, Int), y: (Double, Int)): Int = x._1.compare(y._1)
 27 |   }
 28 | 
 29 |   def getMBR(x: (Int, Array[(Int, LineSegment)])): MBR = {
 30 |     val pts = x._2.flatMap(p => Array(p._2.start, p._2.end))
 31 |     var maxx = Double.MinValue
 32 |     var maxy = Double.MinValue
 33 |     var minx = Double.MaxValue
 34 |     var miny = Double.MaxValue
 35 |     pts.foreach(x => {
 36 |       maxx = Math.max(x.coord(0), maxx)
 37 |       maxy = Math.max(x.coord(1), maxy)
 38 |       minx = Math.min(x.coord(0), minx)
 39 |       miny = Math.min(x.coord(1), miny)
 40 |     })
 41 |     MBR(Point(Array(minx, miny)), Point(Array(maxx, maxy)))
 42 |   }
 43 | 
 44 |   def main(args: Array[String]) : Unit = {
 45 |     val sparkConf = new SparkConf().setAppName("TrajIndexing").set("spark.locality.wait", "0")
 46 |       .set("spark.driver.maxResultSize", "4g")
 47 |     val sc = new SparkContext(sparkConf)
 48 | 
 49 |     if (args.length != 2) {
 50 |       println("usage: TrajIndexing <query_traj_filename> <traj_data_filename>")
 51 |       System.exit(1)
 52 |     }
 53 | 
 54 |     val query_traj_filename = args(0)
 55 |     val traj_data_filename = args(1)
 56 | 
 57 |     val query_traj_file = Source.fromFile(query_traj_filename)
 58 |     val queries = query_traj_file.getLines().map { line =>
 59 |       val splitted = line.split('\t')
 60 |       (splitted(0).toInt, LineSegment(Point(Array(splitted(1).toDouble, splitted(2).toDouble)),
 61 |         Point(Array(splitted(3).toDouble, splitted(4).toDouble))))
 62 |     }.toArray.groupBy(_._1).map(x => x._2.map(_._2))
 63 | 
 64 |     Thread.sleep(6000)
 65 | 
 66 |     val start1 = System.currentTimeMillis()
 67 |     val trajs = sc.textFile(traj_data_filename).mapPartitions(iter => {
 68 |       iter.map(x => {
 69 |         val splitted = x.split("\t")
 70 |         (splitted(0).toInt,
 71 |           LineSegment(Point(Array(splitted(1).toDouble, splitted(2).toDouble)),
 72 |             Point(Array(splitted(3).toDouble, splitted(4).toDouble))))
 73 |       }).toArray.groupBy(_._1).map(now => (getMBR(now), (now._1, now._2.sortBy(_._1).map(_._2)))).iterator
 74 |     })
 75 | 
 76 |     val partitioned_traj = STRTrajPartition(trajs, trajs.partitions.length, 0.01, max_entries_per_node)
 77 |     //val partitioned_traj = STRTrajPartition(trajs, trajs.partitions.length, 0.01, max_entries_per_node)
 78 | 
 79 |     val indexed_traj = partitioned_traj.mapPartitions(iter => {
 80 |       val data = iter.toArray
 81 |       var index: RTree = null
 82 |       if (data.length > 0) {
 83 |         index = RTree(data.zipWithIndex.map(x => (x._1._1, x._2, x._1._2._1)), 25)
 84 |       }
 85 |       Array((data.map(_._2), index)).iterator
 86 |     }).persist(StorageLevel.MEMORY_AND_DISK_SER)
 87 | 
 88 |     val stat = indexed_traj.mapPartitions(iter => iter.map(x => (x._2.root.m_mbr, x._1.length))).collect()
 89 |     val global_rtree = RTree.applyMBR(stat.zipWithIndex.map(x => (x._1._1, x._2, x._1._2)), max_entries_per_node)
 90 | 
 91 |     val end1 = System.currentTimeMillis()
 92 |     println("------------------------------------------------------------")
 93 |     println("Time to build indexes: " + (end1 - start1) / 1000.0)
 94 |     println("------------------------------------------------------------")
 95 | 
 96 |     k_values.foreach(k => {
 97 |       var tot_time = 0.0
 98 |       queries.foreach(query_traj => {
 99 |         val start2 = System.currentTimeMillis()
100 |         println("------------------------------------------------------------")
101 |         val bc_query = sc.broadcast(query_traj)
102 |         val global_intersect = global_rtree.circleRange(query_traj, 0.0).map(_._2).toSet
103 |         //val c = global_intersect.size
104 |         println("Going to Sample:" + (c * k))
105 |         val sample_set = new PartitionPruningRDD(indexed_traj, global_intersect.contains).flatMap(_._1)
106 |           .takeSample(withReplacement = false, c * k, System.currentTimeMillis())
107 | 
108 |         val pruning_bound = sc.parallelize(sample_set, Math.min(c * k, sc.defaultParallelism))
109 |           .map(x => Trajectory.discreteFrechetDistance(x._2, bc_query.value)).collect().sorted.take(k).last
110 |           //.map(x => Trajectory.hausdorffDistance(x._2, bc_query.value)).collect().sorted.take(k).last
111 |         val end2 = System.currentTimeMillis()
112 | 
113 |         println("Time to calculate pruning bound: " + (end2 - start2) / 1000.0)
114 |         println("The pruning bound is: " + pruning_bound)
115 | 
116 |         val start3 = System.currentTimeMillis()
117 |         val bc_pruning_bound = sc.broadcast(pruning_bound)
118 |         val global_prune_set = global_rtree.circleRange(query_traj, pruning_bound).map(_._2).toSet
119 | 
120 |         val pruned_rdd = new PartitionPruningRDD(indexed_traj, global_prune_set.contains)
121 |         val filtered = pruned_rdd.flatMap(part => part._2.circleRange(bc_query.value, bc_pruning_bound.value)
122 |           .map(x => part._1(x._2)))
123 |         val res = filtered.repartition(Math.max(sc.defaultParallelism, filtered.partitions.length))
124 |           .map(x => (Trajectory.discreteFrechetDistance(bc_query.value, x._2), x._1))
125 |           //.map(x => (Trajectory.hausdorffDistance(bc_query.value, x._2), x._1))
126 |           .takeOrdered(k)(new ResultOrdering)
127 | 
128 |         val end3 = System.currentTimeMillis()
129 |         println("# distance calculated: " + (filtered.count() + c * k))
130 |         println("Time to calculate Finalize Result: " + (end3 - start3) / 1000.0)
131 |         println("Total Latency: " + ((end3 - start2) / 1000.0))
132 |         println("The results show as below:")
133 |         res.foreach(println)
134 |         println("------------------------------------------------------------")
135 |         tot_time += (end3 - start2) / 1000.0
136 |         bc_query.destroy()
137 |         bc_pruning_bound.destroy()
138 |       })
139 | 
140 |       println("Average Latency for k = " + k + " is : " + (tot_time / 100.0))
141 |       println("===================================================")
142 |     })
143 | 
144 |     sc.stop()
145 |   }
146 | }
147 | 


--------------------------------------------------------------------------------
/src/main/scala/edu/utah/cs/trajectory/TrajObjects.scala:
--------------------------------------------------------------------------------
 1 | package edu.utah.cs.trajectory
 2 | 
 3 | import edu.utah.cs.spatial.{LineSegment, Point}
 4 | 
 5 | case class TrajMeta(traj_id: Int, seg_id: Int)
 6 | 
 7 | case class Trajectory(id: Int, segments: Array[Point]) {
 8 |   def distanceFrom(otherTraj: Trajectory): Double = {
 9 |     Math.min(Trajectory.hDistance(this, otherTraj), Trajectory.hDistance(otherTraj, this))
10 |   }
11 | }
12 | 
13 | object Trajectory {
14 |   def RDPCompress(traj: Array[Point], epsilon: Double): Array[Point] = {
15 |     val baseLineSeg = LineSegment(traj.head, traj.last)
16 |     val dmax = traj.map(x => x.minDist(baseLineSeg)).zipWithIndex.maxBy(_._1)
17 |     if (dmax._1 > epsilon) {
18 |       RDPCompress(traj.slice(0, dmax._2 + 1), epsilon) ++ RDPCompress(traj.slice(dmax._2, traj.length), epsilon)
19 |     } else {
20 |       Array(traj.head, traj.last)
21 |     }
22 |   }
23 | 
24 |   def parseLine(line: String): Trajectory = {
25 |     val splitted = line.split(" ")
26 |     Trajectory(splitted(0).toInt, splitted.iterator.drop(1).map(_.toDouble).grouped(2).map(seq => Point(Array(seq(0), seq(1)))).toArray)
27 |   }
28 | 
29 |   def hDistance(traj1: Trajectory, traj2: Trajectory): Double = {
30 |     traj1.segments.iterator.take(traj1.segments.length - 1).zip(traj1.segments.iterator.drop(1)).map {
31 |       case (q0, q1) =>
32 |         val qSegment = LineSegment(q0, q1)
33 |         traj2.segments.iterator.take(traj2.segments.length - 1).zip(traj2.segments.iterator.drop(1)).map {
34 |           case (p0, p1) => qSegment.minDist(LineSegment(p0, p1))
35 |         }.min
36 |     }.max
37 |   }
38 | 
39 |   def distanceFrom(seg_iter: Iterable[Tuple2[Int, LineSegment]],
40 |       traj2: Array[LineSegment]): Double = {
41 |     seg_iter.map { case (_, seg1) =>
42 |         traj2.iterator.map { seg2 =>
43 |             seg1.minDist(seg2)
44 |         }.min
45 |     }.max
46 |   }
47 | 
48 |   def hausdorffDistance(x: Array[LineSegment], y: Array[LineSegment]): Double = {
49 |     Math.max(x.map(seg_1 => y.map(seg_2 => seg_1.matchDist(seg_2)).min).max,
50 |              y.map(seg_1 => x.map(seg_2 => seg_1.matchDist(seg_2)).min).max)
51 |   }
52 | 
53 |   def discreteFrechetDistance(x: Array[LineSegment], y: Array[LineSegment]): Double = {
54 |     val n = x.length
55 |     val m = y.length
56 |     val ca: Array[Array[Double]] = Array.fill[Double](n, m)(-1.0)
57 |     var i = 0
58 |     while (i < n) {
59 |       var j = 0
60 |       while (j < m) {
61 |         if (i == 0 && j == 0) ca(i)(j) = x(i).matchDist(y(j))
62 |         else if (i == 0) ca(i)(j) = Math.max(ca(i)(j - 1), x(i).matchDist(y(j)))
63 |         else if (j == 0) ca(i)(j) = Math.max(ca(i - 1)(j), x(i).matchDist(y(j)))
64 |         else ca(i)(j) = Math.max(Math.min(Math.min(ca(i - 1)(j), ca(i)(j - 1)), ca(i - 1)(j - 1)), x(i).matchDist(y(j)))
65 |         j += 1
66 |       }
67 |       i += 1
68 |     }
69 |     ca.last.last
70 |   }
71 | }
72 | 


--------------------------------------------------------------------------------
/src/main/scala/edu/utah/cs/trajectory/TrajSampling.scala:
--------------------------------------------------------------------------------
 1 | package edu.utah.cs.trajectory
 2 | 
 3 | import java.io.{BufferedWriter, File, FileWriter}
 4 | 
 5 | import edu.utah.cs.spatial.{LineSegment, Point}
 6 | import org.apache.spark.{SparkConf, SparkContext}
 7 | 
 8 | /**
 9 |   * Created by dongx on 1/17/17.
10 |   */
11 | object TrajSampling {
12 |   def main(args: Array[String]): Unit = {
13 |     val sparkConf = new SparkConf().setAppName("TrajSampling")
14 |     val sc = new SparkContext(sparkConf)
15 | 
16 |     Thread.sleep(3000)
17 | 
18 |     if (args.length < 2) {
19 |       println("usage: TrajSampling <input_file_path> <output_file_path> <sample_count>")
20 |       System.exit(1)
21 |     }
22 | 
23 |     val input_file_path = args(0)
24 |     val output_file_path = args(1)
25 |     val cnt = args(2).toInt
26 | 
27 |     val sampled_trajs = sc.textFile(input_file_path).mapPartitions(iter => {
28 |       iter.map(x => {
29 |         val splitted = x.split("\t")
30 |         (splitted(0).toInt,
31 |           LineSegment(Point(Array(splitted(1).toDouble, splitted(2).toDouble)),
32 |             Point(Array(splitted(3).toDouble, splitted(4).toDouble))))
33 |       }).toArray.groupBy(_._1).map(now => (now._1, now._2.sortBy(_._1).map(_._2))).iterator
34 |     }).takeSample(withReplacement = false, cnt, System.currentTimeMillis())
35 | 
36 |     val file = new File(output_file_path)
37 |     val bw = new BufferedWriter(new FileWriter(file))
38 | 
39 |     for (i <- sampled_trajs.indices) {
40 |       val cur_traj = sampled_trajs(i)._2
41 |       cur_traj.foreach(x => bw.write(i + "\t" + x.toTSV + "\n"))
42 |     }
43 | 
44 |     bw.close()
45 | 
46 |     sc.stop()
47 |   }
48 | }
49 | 


--------------------------------------------------------------------------------
/src/main/scala/edu/utah/cs/trajectory/VPTreeST.scala:
--------------------------------------------------------------------------------
 1 | package edu.utah.cs.trajectory
 2 | 
 3 | import edu.utah.cs.index.VPTree
 4 | import edu.utah.cs.spatial.{LineSegment, Point}
 5 | import edu.utah.cs.util.MetricObject
 6 | 
 7 | import scala.io.Source
 8 | import scala.collection.mutable
 9 | 
10 | /**
11 |   * Created by dongx on 2/1/17.
12 |   */
13 | object VPTreeST {
14 |   //final val k_values = Array(10, 30, 50, 70, 100)
15 |   final val k = 10
16 | 
17 |   private case class VPTraj(id: Int, data: Array[LineSegment]) extends MetricObject {
18 |     override def distance(o: MetricObject): Double = {
19 |       Trajectory.hausdorffDistance(data, o.asInstanceOf[VPTraj].data)
20 |     }
21 |   }
22 | 
23 |   def main(args: Array[String]) : Unit = {
24 |     if (args.length != 2) {
25 |       println("usage: BaseLine <query_traj_filename> <traj_data_filename>")
26 |       System.exit(1)
27 |     }
28 | 
29 |     val query_traj_filename = args(0)
30 |     val traj_data_filename = args(1)
31 | 
32 |     val query_traj_file = Source.fromFile(query_traj_filename)
33 |     val query_traj = query_traj_file.getLines().map { line =>
34 |       val splitted = line.split('\t')
35 |       LineSegment(Point(Array(splitted(1).toDouble, splitted(2).toDouble)),
36 |         Point(Array(splitted(3).toDouble, splitted(4).toDouble)))
37 |     }.toArray
38 | 
39 |     val traj_data_file = Source.fromFile(traj_data_filename)
40 |     val cur_traj = mutable.ListBuffer[LineSegment]()
41 |     val trajs = mutable.ListBuffer[VPTraj]()
42 |     val ans = mutable.ListBuffer[(Double, Int)]()
43 |     var last_traj_id = -1
44 |     val new_iter = traj_data_file.getLines().map(cur => {
45 |       val x = cur.split("\t")
46 |       (LineSegment(Point(Array(x(1).toDouble, x(2).toDouble)), Point(Array(x(3).toDouble, x(4).toDouble))), x(0).toInt)
47 |     })
48 |     var i = 0
49 |     while (new_iter.hasNext) {
50 |       val now = new_iter.next
51 |       if (now._2 != last_traj_id) {
52 |         if (cur_traj.nonEmpty) trajs += VPTraj(last_traj_id, cur_traj.toArray)
53 |         last_traj_id = now._2
54 |         i += 1
55 |         //println("checking " + i + " trajectory....")
56 |         cur_traj.clear()
57 |       }
58 |       cur_traj += now._1
59 |     }
60 |     if (cur_traj.nonEmpty) trajs += VPTraj(last_traj_id, cur_traj.toArray)
61 |     //assert(ans.size == N)
62 |     val tree = VPTree(trajs.toArray)
63 |     tree.knn(VPTraj(-1, query_traj), k)._1.map(x => (x._1.id, x._2)).foreach(println)
64 |   }
65 | }
66 | 


--------------------------------------------------------------------------------
/src/main/scala/edu/utah/cs/trajectory/VPTreeSolution.scala:
--------------------------------------------------------------------------------
  1 | package edu.utah.cs.trajectory
  2 | 
  3 | import edu.utah.cs.index.VPTree
  4 | import edu.utah.cs.partitioner.IDPartition
  5 | import edu.utah.cs.spatial.{LineSegment, Point}
  6 | import edu.utah.cs.util.MetricObject
  7 | import org.apache.spark.rdd.PartitionPruningRDD
  8 | import org.apache.spark.storage.StorageLevel
  9 | import org.apache.spark.{SparkConf, SparkContext}
 10 | 
 11 | import scala.io.Source
 12 | import scala.util.Random
 13 | 
 14 | /**
 15 |   * Created by dongx on 2/1/17.
 16 |   */
 17 | object VPTreeSolution {
 18 |   final val max_entries_per_node = 25
 19 |   //final val k_values = Array(10, 30, 50, 70, 100)
 20 |   final val k = 10
 21 |   final val c = 5
 22 | 
 23 |   private case class VPTraj(id: Int, data: Array[LineSegment]) extends MetricObject {
 24 |     override def distance(o: MetricObject): Double = {
 25 |       Trajectory.hausdorffDistance(data, o.asInstanceOf[VPTraj].data)
 26 |     }
 27 |   }
 28 | 
 29 |   private class ResultOrdering extends Ordering[(Double, Int)] {
 30 |     override def compare(x: (Double, Int), y: (Double, Int)): Int = x._1.compare(y._1)
 31 |   }
 32 | 
 33 |   private def shuffle[T](data: Array[T]) = {
 34 |     var i = 0
 35 |     val n = data.length
 36 |     while (i < n - 1) {
 37 |       val tmp = i + Random.nextInt(n - i)
 38 |       var t = data(i)
 39 |       data(i) = data(tmp)
 40 |       data(tmp) = data(i)
 41 |       i += 1
 42 |     }
 43 |   }
 44 | 
 45 |   def main(args: Array[String]) : Unit = {
 46 |     val sparkConf = new SparkConf().setAppName("VPTreeSolution")
 47 |       .set("spark.locality.wait", "0").set("spark.driver.maxResultSize", "4g")
 48 |     val sc = new SparkContext(sparkConf)
 49 | 
 50 |     if (args.length != 2) {
 51 |       println("usage: VPTreeSolution <query_traj_filename> <traj_data_filename>")
 52 |       System.exit(1)
 53 |     }
 54 | 
 55 |     val query_traj_filename = args(0)
 56 |     val traj_data_filename = args(1)
 57 | 
 58 |     val query_traj_file = Source.fromFile(query_traj_filename)
 59 |     val queries = query_traj_file.getLines().map { line =>
 60 |       val splitted = line.split('\t')
 61 |       (splitted(0).toInt, LineSegment(Point(Array(splitted(1).toDouble, splitted(2).toDouble)),
 62 |         Point(Array(splitted(3).toDouble, splitted(4).toDouble))))
 63 |     }.toArray.groupBy(_._1).map(x => x._2.map(_._2))
 64 | 
 65 |     Thread.sleep(6000)
 66 | 
 67 |     val start1 = System.currentTimeMillis()
 68 | 
 69 |     val trajs = sc.textFile(traj_data_filename).mapPartitions(iter => {
 70 |       iter.map(x => {
 71 |         val splitted = x.split("\t")
 72 |         (splitted(0).toInt,
 73 |           LineSegment(Point(Array(splitted(1).toDouble, splitted(2).toDouble)),
 74 |             Point(Array(splitted(3).toDouble, splitted(4).toDouble))))
 75 |       }).toArray.groupBy(_._1).map(now => VPTraj(now._1, now._2.map(_._2))).iterator
 76 |     })
 77 | 
 78 |     val pivots = trajs.takeSample(withReplacement = false, trajs.partitions.length, System.currentTimeMillis()).map(_.data)
 79 |     val bc_pivots = sc.broadcast(pivots)
 80 |     val bc_pivots_vptree = sc.broadcast(VPTree(pivots.zipWithIndex.map(x => VPTraj(x._2, x._1))))
 81 |     val traj_with_pivot = trajs.mapPartitions(iter => {
 82 |       iter.map(x => {
 83 |         (bc_pivots_vptree.value.knn(x, 1)._1.head._1.id, x)
 84 |       })
 85 |     })
 86 |     val parted_by_pivot = IDPartition(traj_with_pivot, pivots.length)
 87 |     val indexed = parted_by_pivot.mapPartitionsWithIndex((id, iter) => {
 88 |       val data = iter.map(_._2.asInstanceOf[VPTraj]).toArray
 89 |       val pivot = bc_pivots.value(id)
 90 |       val cover_radius = data.map(x => Trajectory.hausdorffDistance(x.data, pivot)).max
 91 |       val vp_tree = VPTree(data)
 92 |       Array((pivot, cover_radius, data.length, vp_tree)).iterator
 93 |     }).persist(StorageLevel.MEMORY_AND_DISK_SER)
 94 | 
 95 |     val stats = indexed.map(x => (x._1, x._2, x._3)).collect()
 96 |       .zipWithIndex.map(x => (x._1._1, x._1._2, x._1._3, x._2))
 97 | 
 98 |     val end1 = System.currentTimeMillis()
 99 |     println("Time to build index: " + ((end1 - start1) / 1000.0))
100 | 
101 | 
102 |     var tot_time = 0.0
103 |     queries.foreach(query => {
104 |       val start2 = System.currentTimeMillis()
105 |       println("----------------------------------------------")
106 |       val sorted_pivots = stats.map(x => (Trajectory.hausdorffDistance(x._1, query), x._2, x._3, x._4)).sortBy(_._1)
107 |       var i = 0
108 |       var sum = 0
109 |       while (sum < k) {
110 |         sum +=  sorted_pivots(i)._3
111 |         i += 1
112 |       }
113 | 
114 |       val prune_set = sorted_pivots.slice(0, i).map(_._4).toSet
115 |       val bc_query = sc.broadcast(query)
116 |       val bc_k = sc.broadcast(k)
117 | //      val first_filter = new PartitionPruningRDD(indexed, prune_set.contains)
118 | //        .flatMap(i_part => {
119 | //          i_part._4.knn(VPTraj(0, bc_query.value), bc_k.value)._1.map(x => (x._2, x._1.id))
120 | //        }).takeOrdered(k)(new ResultOrdering)
121 | 
122 |       val first_filter = new PartitionPruningRDD(indexed, prune_set.contains)
123 |         .aggregate((Array[(Double, Int)](), 0))((now, part) => {
124 |           val knn_res = part._4.knn(VPTraj(0, bc_query.value), bc_k.value)
125 |           ((knn_res._1.map(x => (x._2, x._1.id)) ++ now._1).sortBy(_._1).take(bc_k.value), now._2 + knn_res._2)
126 |         }, (left, right) => {
127 |           ((left._1 ++ right._1).sortBy(_._1).take(bc_k.value), left._2 + right._2)
128 |         })
129 | 
130 |       val tick1 = System.currentTimeMillis()
131 |       println("Time for first filter: " + ((tick1 - start2) / 1000.0))
132 | 
133 |       val pruning_bound = first_filter._1.last._1
134 |       val global_prune_set =
135 |         sorted_pivots.filter(x => x._1 - x._2 <= pruning_bound).map(_._4).toSet -- prune_set
136 |       val bc_pruning_bound = sc.broadcast(pruning_bound)
137 | 
138 | //      val second_filter = new PartitionPruningRDD(indexed, global_prune_set.contains)
139 | //        .flatMap(i_part => {
140 | //          i_part._4.knn(VPTraj(0, bc_query.value), k, bc_pruning_bound.value)._1.map(x => (x._2, x._1.id))
141 | //        }).takeOrdered(k)(new ResultOrdering)
142 | 
143 |       val second_filter = new PartitionPruningRDD(indexed, global_prune_set.contains)
144 |         .aggregate((Array[(Double, Int)](), 0))((now, part) => {
145 |           val knn_res = part._4.knn(VPTraj(0, bc_query.value), bc_k.value, bc_pruning_bound.value)
146 |           ((knn_res._1.map(x => (x._2, x._1.id)) ++ now._1).sortBy(_._1).take(bc_k.value), now._2 + knn_res._2)
147 |         }, (left, right) => {
148 |           ((left._1 ++ right._1).sortBy(_._1).take(bc_k.value), left._2 + right._2)
149 |         })
150 | 
151 |       val final_res = (first_filter._1 ++ second_filter._1).sortBy(_._1).take(k)
152 | 
153 |       val end2 = System.currentTimeMillis()
154 |       println("Time for second filter and final merge: " + ((end2 - tick1) / 1000.0))
155 |       println("# of trajs checked distance:" + (first_filter._2 + second_filter._2 + pivots.length))
156 |       println("Total Latency: " + ((end2 - start2) / 1000.0))
157 |       final_res.foreach(println)
158 |       tot_time += (end2 - start2) / 1000.0
159 |       println("----------------------------------------------")
160 |       bc_k.destroy()
161 |       bc_query.destroy()
162 |       bc_pruning_bound.destroy()
163 |     })
164 | 
165 |     println("Average Latency: " + (tot_time / 100.0))
166 | 
167 |     bc_pivots.destroy()
168 |     bc_pivots_vptree.destroy()
169 |     sc.stop()
170 |   }
171 | }
172 | 


--------------------------------------------------------------------------------
/src/main/scala/edu/utah/cs/util/BitArray.scala:
--------------------------------------------------------------------------------
 1 | package edu.utah.cs.util
 2 | 
 3 | /**
 4 |   * Created by dongx on 10/4/16.
 5 |   */
 6 | object BitArray {
 7 |   def create(length: Int): Array[Int] = {
 8 |     Array.fill[Int](math.ceil(length / 32.0).toInt){0}
 9 |   }
10 | 
11 |   def get(bytes: Array[Int], id: Int) = {
12 |     (bytes(id / 32) & (1 << (id % 32))) != 0
13 |   }
14 | 
15 |   def set(bytes: Array[Int], id: Int) = {
16 |     bytes(id / 32) = bytes(id / 32) | (1 << (id % 32))
17 |   }
18 | 
19 |   def or(a: Array[Int], b: Array[Int]) = {
20 |     a.zip(b).map(x => x._1 | x._2)
21 |   }
22 | 
23 |   def and(a: Array[Int], b: Array[Int]) = {
24 |     a.zip(b).map(x => x._1 & x._2)
25 |   }
26 | 
27 |   def flip(a: Array[Int]) = a.map(~_)
28 | 
29 |   def count(a: Array[Int]) = {
30 |     a.map(x => x.toBinaryString.count(_ == '1')).sum
31 |   }
32 | 
33 | }
34 | 


--------------------------------------------------------------------------------
/src/main/scala/edu/utah/cs/util/BitMap.scala:
--------------------------------------------------------------------------------
 1 | package edu.utah.cs.util
 2 | 
 3 | /**
 4 |   * Created by dongx on 1/19/17.
 5 |   */
 6 | case class BitMapMeta(num_bits: Int)
 7 | 
 8 | object BitMap {
 9 |   var meta: BitMapMeta = null
10 | 
11 |   def put(bf: Array[Int], key: Int): Unit = BitArray.set(bf, key)
12 | 
13 |   def contains(bf: Array[Int], key: Int): Boolean = BitArray.get(bf, key)
14 | }
15 | 


--------------------------------------------------------------------------------
/src/main/scala/edu/utah/cs/util/BloomFilter.scala:
--------------------------------------------------------------------------------
 1 | package edu.utah.cs.util
 2 | 
 3 | import scala.util.Random
 4 | 
 5 | /**
 6 |   * Created by dongx on 10/4/16.
 7 |   */
 8 | case class BloomFilterMeta(num_bits: Int, num_hashs: Int) {
 9 |   val seeds = (1 to num_hashs).map(x => (Random.nextInt(Integer.MAX_VALUE), Random.nextInt(Integer.MAX_VALUE)))
10 | }
11 | 
12 | object BloomFilter {
13 |   var meta: BloomFilterMeta = null
14 | 
15 |   private def calcHash(seed: (Int, Int), key: Int) =
16 |     (((seed._1 % meta.num_bits) * (key & meta.num_bits) + seed._2 % meta.num_bits) % meta.num_bits + meta.num_bits) % meta.num_bits
17 | 
18 |   def put(bf: Array[Int], key: Int): Unit = {
19 |     meta.seeds.foreach(seed => {
20 |       BitArray.set(bf, calcHash(seed, key))
21 |     })
22 |   }
23 | 
24 |   def mayContains(bf: Array[Int], key: Int): Boolean = {
25 |     meta.seeds.foreach(seed => {
26 |       if (!BitArray.get(bf, calcHash(seed, key))) return false
27 |     })
28 |     true
29 |   }
30 | 
31 |   def optimalNumBits(num_items: Long, fp_rate: Double): Int = {
32 |     math.ceil(-1 * num_items * math.log(fp_rate) / math.log(2) / math.log(2)).toInt
33 |   }
34 | 
35 |   def optimalNumHashes(num_items: Long, num_bits: Long): Int = {
36 |     math.ceil(num_bits / num_items * math.log(2)).toInt
37 |   }
38 | }
39 | 


--------------------------------------------------------------------------------
/src/main/scala/edu/utah/cs/util/MetricObject.scala:
--------------------------------------------------------------------------------
1 | package edu.utah.cs.util
2 | 
3 | /**
4 |   * Created by dongx on 2/3/17.
5 |   */
6 | abstract class MetricObject {
7 |   def distance(o: MetricObject): Double
8 | }
9 | 


--------------------------------------------------------------------------------