├── .gitignore ├── LICENSE ├── README.md ├── build.sbt ├── project └── assembly.sbt ├── scripts ├── convert.sh ├── gpx_parse.py ├── parse.sh ├── ptos.py └── stat_generate.py └── src └── main ├── java └── mtree │ ├── ComposedSplitFunction.java │ ├── DistanceFunction.java │ ├── DistanceFunctions.java │ ├── MTree.java │ ├── PartitionFunction.java │ ├── PartitionFunctions.java │ ├── PromotionFunction.java │ ├── PromotionFunctions.java │ ├── SplitFunction.java │ └── utils │ ├── Pair.java │ └── Utils.java └── scala └── edu └── utah └── cs ├── generator └── RandomTrajGenerator.scala ├── index ├── Index.scala ├── RTree.scala ├── VPTree.scala └── VPTreeTest.scala ├── index_bf └── RTreeWithBF.scala ├── index_bm ├── RTreeWithBM.scala └── RTreeWithBMTest.scala ├── index_rr └── RTreeWithRR.scala ├── partitioner ├── IDPartitioner.scala ├── STRMBRPartitioner.scala ├── STRSegPartitioner.scala └── STRTrajPartition.scala ├── spatial ├── Circle.scala ├── Dist.scala ├── DistanceUtil.scala ├── LineSegment.scala ├── LineString.scala ├── MBR.scala ├── Point.scala ├── Polygon.scala ├── Shape.scala └── ZValue.scala ├── trajectory ├── BFDISolution.scala ├── BaseLine.scala ├── BaseLineST.scala ├── BitMapSolution.scala ├── BloomFilterSolution.scala ├── DataSampling.scala ├── DualIndexingSolution.scala ├── LineSegmentClustering.scala ├── MTreeSolution.scala ├── RRSolution.scala ├── Relabel.scala ├── SpatialSpanClustering.scala ├── SpatialSpanFiltering.scala ├── SpatialSpanStat.scala ├── TrajIndexing.scala ├── TrajObjects.scala ├── TrajSampling.scala ├── VPTreeST.scala └── VPTreeSolution.scala └── util ├── BitArray.scala ├── BitMap.scala ├── BloomFilter.scala └── MetricObject.scala /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | .idea 3 | project 4 | data 5 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Trajectory Similarity Search in Apache Spark 2 | ============================================ 3 | This project implements the trajectory simiarltiy algorithm and all its competitors described in [this paper](http://www.vldb.org/pvldb/vol10/p1478-xie.pdf). 4 | 5 | Implemented algorithms and variants include: 6 | - **DualIndexingSolution**: Roaring Bitmap DFT w/ Dual Indexing 7 | - **RRSolution**: Roaring Bitmap DFT w/o Dual Indexing 8 | - **BFDISolution**: Bloom Filter DFT w/ Dual Indexing 9 | - **BloomFilterSolution**: Bloom Filter DFT w/o Dual Indexing 10 | - **BitMapSolution**: Raw Bitmap DFT 11 | - **TrajIndexingSolution**: Distributed R-Tree on Bounding Boxes. 12 | - **VPTreeSolution**: Distributed VP-Tree over Trajectories 13 | - **MTreeSolution**: Distributed M-Tree over Trajectories. 14 | - **BaseLine**: Brute Force Top-k 15 | 16 | Build 17 | ----- 18 | Call `sbt assembly` and you will get the compiled package at `target/scala-2.11/traj-sim-assembly-1.0.jar`. 19 | 20 | Run 21 | --- 22 | Run it by feeding the package to `spark-submit`, the entry point of different algorithms (listed above) and other utilities are located at `edu.utah.cs.trajecotry`. 23 | 24 | Contributor 25 | ----------- 26 | - Dong Xie: dongx [at] cs [dot] utah [dot] edu -------------------------------------------------------------------------------- /build.sbt: -------------------------------------------------------------------------------- 1 | name := "traj-sim" 2 | 3 | version := "1.0" 4 | 5 | scalaVersion := "2.11.8" 6 | 7 | libraryDependencies += "org.apache.spark" % "spark-core_2.11" % "2.1.0" % "provided" 8 | libraryDependencies += "org.roaringbitmap" % "RoaringBitmap" % "0.6.28" 9 | 10 | libraryDependencies ++= Seq( 11 | "org.geotools" % "gt-geojson" % "15.2" 12 | ) 13 | 14 | resolvers ++= Seq( 15 | "geosolutions" at "http://maven.geo-solutions.it/", 16 | "osgeo" at "http://download.osgeo.org/webdav/geotools/" 17 | ) 18 | -------------------------------------------------------------------------------- /project/assembly.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.2") -------------------------------------------------------------------------------- /scripts/convert.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | if [ -z $1 ] 4 | then 5 | echo 'You should specify an input folder....' 6 | exit $E_MISSING_POS_PARAM 7 | fi 8 | 9 | mkdir -p output/seg 10 | 11 | for x in $( find $1 -name "*" ); 12 | do 13 | python ptos.py $x output/seg 14 | done 15 | -------------------------------------------------------------------------------- /scripts/gpx_parse.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import gpxpy 4 | import gpxpy.gpx 5 | import sys 6 | 7 | if len(sys.argv) < 2: 8 | print "usage:", sys.argv[0], " " 9 | sys.exit(1) 10 | 11 | gpx_file_name = sys.argv[1] 12 | trace_id = gpx_file_name[-13:-4] 13 | if sys.argv[2] == "-": 14 | output_file = sys.stdout 15 | else: 16 | output_path = sys.argv[2] + '/' + trace_id 17 | output_file = open(output_path, 'w') 18 | 19 | print >> sys.stderr, 'parsing', gpx_file_name, '...' 20 | 21 | gpx_file = open(gpx_file_name, 'r') 22 | gpx = gpxpy.parse(gpx_file) 23 | 24 | i = 0 25 | for track in gpx.tracks: 26 | cur_trace_id = trace_id + '-' + str(i) 27 | for segment in track.segments: 28 | for point in segment.points: 29 | output_file.write('{0}\t{1:.6f}\t{2:.6f}\n'.format(cur_trace_id, point.latitude, point.longitude)) 30 | i = i + 1 31 | 32 | gpx_file.close() 33 | if sys.argv[2] != "-": 34 | output_file.close() 35 | #print 'GPX:', gpx.to_xml() 36 | -------------------------------------------------------------------------------- /scripts/parse.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | mkdir -p output 4 | 5 | if [ -z $1 ] 6 | then 7 | echo 'You should specify an input folder....' 8 | exit $E_MISSING_POS_PARAM 9 | fi 10 | 11 | mkdir -p output/$1 12 | 13 | for x in $( find $1 -name "*.gpx" ); 14 | do 15 | python gpx_parse.py $x output/$1 16 | done 17 | -------------------------------------------------------------------------------- /scripts/ptos.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys 4 | 5 | if len(sys.argv) < 2: 6 | print "usage:", sys.argv[0], ' ' 7 | sys.exit(1) 8 | 9 | sample_point_csv = sys.argv[1] 10 | trace_id = sample_point_csv[-9:] 11 | 12 | if sys.argv[2] == '-': 13 | output_file = sys.stdout 14 | else: 15 | output_path = sys.argv[2] + "/" + trace_id 16 | output_file = open(output_path, 'w') 17 | 18 | print >> sys.stderr, 'converting', sample_point_csv, '...' 19 | 20 | sample_point_file = open(sample_point_csv, 'r') 21 | prev_traj_id = '' 22 | for line in sample_point_file: 23 | elements = line.rstrip('\n').split('\t') 24 | if elements[0] != prev_traj_id: 25 | prev_traj_id = elements[0] 26 | prev_point = (float(elements[1]), float(elements[2])) 27 | seg_id = 0 28 | else: 29 | output_file.write('%s\t%.6f\t%.6f\t%.6f\t%.6f\t%d\n' % (prev_traj_id, float(prev_point[0]), float(prev_point[1]), float(elements[1]), float(elements[2]), seg_id)) 30 | prev_point = (elements[1], elements[2]) 31 | seg_id += 1 32 | sample_point_file.close() 33 | 34 | if sys.argv[2] != '-': 35 | output_file.close() 36 | -------------------------------------------------------------------------------- /scripts/stat_generate.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | #import matplotlib.pyplot as plt 3 | #import seaborn as sns 4 | 5 | #sns.set(color_codes=True) 6 | 7 | dis_arr = [] 8 | pts_arr = [] 9 | span_arr = [] 10 | 11 | f = open('osm_de_stat.txt', 'r') 12 | 13 | for line in f: 14 | splitted = line.split() 15 | pts_arr.append(int(splitted[1])) 16 | dis_arr.append(float(splitted[2])) 17 | span_arr.append(float(splitted[3])) 18 | 19 | # Filters for BJTaxi... 20 | #data = filter(lambda x: x[1] < 129 and x[1] > 13 and x[2] > 0.01, zip(dis_arr, pts_arr, span_arr)) 21 | # Filters For gen_traj_10M 22 | #data = filter(lambda x: x[1] < 523 and x[1] > 20 and x[2] > 0.1, zip(dis_arr, pts_arr, span_arr)) 23 | # Filters for OSM_traj 24 | #data = filter(lambda x: x[2] > 0.001 and x[2] < 0.50 and x[1] > 50 and x[1] < 4670, zip(dis_arr, pts_arr, span_arr)) 25 | 26 | data = filter(lambda x: x[1] > 20 and x[2] > 0.001 and x[2] < 0.5080, zip(dis_arr, pts_arr, span_arr)) 27 | print "# of Traj after filter:", len(data) 28 | 29 | pts_arr = map(lambda x: x[1], data) 30 | span_arr = map(lambda x: x[2], data) 31 | dis_arr = map(lambda x: x[0], data) 32 | 33 | print "variances (pts, dis, span):" 34 | print np.var(pts_arr) 35 | print np.var(dis_arr) 36 | print np.var(span_arr) 37 | print "max min (pts, dis, span):" 38 | print max(pts_arr), ' ', min(pts_arr) 39 | print max(dis_arr), ' ', min(dis_arr) 40 | print max(span_arr), ' ', min(span_arr) 41 | print "standard deviation (pts, dis, span):" 42 | print np.std(pts_arr) 43 | print np.std(dis_arr) 44 | print np.std(span_arr) 45 | print "average (pts, dis, span):" 46 | print np.average(pts_arr) 47 | print np.average(dis_arr) 48 | print np.average(span_arr) 49 | print "median deviation (pts, dis, span):" 50 | print np.median(pts_arr) 51 | print np.median(dis_arr) 52 | print np.median(span_arr) 53 | print "5% 95% percentile (pts, dis, span)" 54 | print np.percentile(pts_arr, 5), ' ', np.percentile(pts_arr, 95) 55 | print np.percentile(dis_arr, 5), ' ', np.percentile(dis_arr, 95) 56 | print np.percentile(span_arr, 5), ' ', np.percentile(span_arr, 95) 57 | 58 | #print map(lambda x: x[1], data) 59 | 60 | #span_plot = sns.distplot(span_arr) 61 | #span_plot.get_figure().savefig("osm_traj_span.png") 62 | #pts_plot = sns.distplot(pts_arr) 63 | #pts_plot.get_figure().savefig("osm_traj_pts.png") 64 | #dis_plot = sns.distplot(dis_arr) 65 | #dis_plot.get_figure().savefig("osm_traj_dis.png") 66 | -------------------------------------------------------------------------------- /src/main/java/mtree/ComposedSplitFunction.java: -------------------------------------------------------------------------------- 1 | package mtree; 2 | 3 | import java.util.Set; 4 | 5 | import mtree.utils.Pair; 6 | 7 | /** 8 | * A {@linkplain SplitFunction split function} that is defined by composing 9 | * a {@linkplain PromotionFunction promotion function} and a 10 | * {@linkplain PartitionFunction partition function}. 11 | * 12 | * @param The type of the data objects. 13 | */ 14 | public class ComposedSplitFunction implements SplitFunction { 15 | 16 | private PromotionFunction promotionFunction; 17 | private PartitionFunction partitionFunction; 18 | 19 | /** 20 | * The constructor of a {@link SplitFunction} composed by a 21 | * {@link PromotionFunction} and a {@link PartitionFunction}. 22 | */ 23 | public ComposedSplitFunction( 24 | PromotionFunction promotionFunction, 25 | PartitionFunction partitionFunction 26 | ) 27 | { 28 | this.promotionFunction = promotionFunction; 29 | this.partitionFunction = partitionFunction; 30 | } 31 | 32 | 33 | @Override 34 | public SplitResult process(Set dataSet, DistanceFunction distanceFunction) { 35 | Pair promoted = promotionFunction.process(dataSet, distanceFunction); 36 | Pair> partitions = partitionFunction.process(promoted, dataSet, distanceFunction); 37 | return new SplitResult(promoted, partitions); 38 | } 39 | 40 | } 41 | -------------------------------------------------------------------------------- /src/main/java/mtree/DistanceFunction.java: -------------------------------------------------------------------------------- 1 | package mtree; 2 | 3 | /** 4 | * An object that can calculate the distance between two data objects. 5 | * 6 | * @param The type of the data objects. 7 | */ 8 | public interface DistanceFunction { 9 | 10 | double calculate(DATA data1, DATA data2); 11 | 12 | } 13 | -------------------------------------------------------------------------------- /src/main/java/mtree/DistanceFunctions.java: -------------------------------------------------------------------------------- 1 | package mtree; 2 | 3 | import java.util.HashMap; 4 | import java.util.List; 5 | import java.util.Map; 6 | 7 | /** 8 | * Some pre-defined implementations of {@linkplain DistanceFunction distance 9 | * functions}. 10 | */ 11 | public final class DistanceFunctions { 12 | 13 | /** 14 | * Don't let anyone instantiate this class. 15 | */ 16 | private DistanceFunctions() {} 17 | 18 | 19 | /** 20 | * Creates a cached version of a {@linkplain DistanceFunction distance 21 | * function}. This method is used internally by {@link MTree} to create 22 | * a cached distance function to pass to the {@linkplain SplitFunction split 23 | * function}. 24 | * @param distanceFunction The distance function to create a cached version 25 | * of. 26 | * @return The cached distance function. 27 | */ 28 | public static DistanceFunction cached(final DistanceFunction distanceFunction) { 29 | return new DistanceFunction() { 30 | class Pair { 31 | Data data1; 32 | Data data2; 33 | 34 | public Pair(Data data1, Data data2) { 35 | this.data1 = data1; 36 | this.data2 = data2; 37 | } 38 | 39 | @Override 40 | public int hashCode() { 41 | return data1.hashCode() ^ data2.hashCode(); 42 | } 43 | 44 | @Override 45 | public boolean equals(Object arg0) { 46 | if(arg0 instanceof Pair) { 47 | Pair that = (Pair) arg0; 48 | return this.data1.equals(that.data1) 49 | && this.data2.equals(that.data2); 50 | } else { 51 | return false; 52 | } 53 | } 54 | } 55 | 56 | private final Map cache = new HashMap(); 57 | 58 | @Override 59 | public double calculate(Data data1, Data data2) { 60 | Pair pair1 = new Pair(data1, data2); 61 | Double distance = cache.get(pair1); 62 | if(distance != null) { 63 | return distance; 64 | } 65 | 66 | Pair pair2 = new Pair(data2, data1); 67 | distance = cache.get(pair2); 68 | if(distance != null) { 69 | return distance; 70 | } 71 | 72 | distance = distanceFunction.calculate(data1, data2); 73 | cache.put(pair1, distance); 74 | cache.put(pair2, distance); 75 | return distance; 76 | } 77 | }; 78 | } 79 | 80 | 81 | 82 | /** 83 | * An interface to represent coordinates in Euclidean spaces. 84 | * @see "Euclidean 85 | * Space" article at Wikipedia 86 | */ 87 | public interface EuclideanCoordinate { 88 | /** 89 | * The number of dimensions. 90 | */ 91 | int dimensions(); 92 | 93 | /** 94 | * A method to access the {@code index}-th component of the coordinate. 95 | * 96 | * @param index The index of the component. Must be less than {@link 97 | * #dimensions()}. 98 | */ 99 | double get(int index); 100 | } 101 | 102 | 103 | /** 104 | * Calculates the distance between two {@linkplain EuclideanCoordinate 105 | * euclidean coordinates}. 106 | */ 107 | public static double euclidean(EuclideanCoordinate coord1, EuclideanCoordinate coord2) { 108 | int size = Math.min(coord1.dimensions(), coord2.dimensions()); 109 | double distance = 0; 110 | for(int i = 0; i < size; i++) { 111 | double diff = coord1.get(i) - coord2.get(i); 112 | distance += diff * diff; 113 | } 114 | distance = Math.sqrt(distance); 115 | return distance; 116 | } 117 | 118 | 119 | /** 120 | * A {@linkplain DistanceFunction distance function} object that calculates 121 | * the distance between two {@linkplain EuclideanCoordinate euclidean 122 | * coordinates}. 123 | */ 124 | public static final DistanceFunction EUCLIDEAN = new DistanceFunction() { 125 | @Override 126 | public double calculate(EuclideanCoordinate coord1, EuclideanCoordinate coord2) { 127 | return DistanceFunctions.euclidean(coord1, coord2); 128 | } 129 | }; 130 | 131 | 132 | /** 133 | * A {@linkplain DistanceFunction distance function} object that calculates 134 | * the distance between two coordinates represented by {@linkplain 135 | * java.util.List lists} of {@link java.lang.Integer}s. 136 | */ 137 | public static final DistanceFunction> EUCLIDEAN_INTEGER_LIST = new DistanceFunction>() { 138 | @Override 139 | public double calculate(List data1, List data2) { 140 | class IntegerListEuclideanCoordinate implements EuclideanCoordinate { 141 | List list; 142 | public IntegerListEuclideanCoordinate(List list) { this.list = list; } 143 | @Override public int dimensions() { return list.size(); } 144 | @Override public double get(int index) { return list.get(index); } 145 | }; 146 | IntegerListEuclideanCoordinate coord1 = new IntegerListEuclideanCoordinate(data1); 147 | IntegerListEuclideanCoordinate coord2 = new IntegerListEuclideanCoordinate(data2); 148 | return DistanceFunctions.euclidean(coord1, coord2); 149 | } 150 | }; 151 | 152 | /** 153 | * A {@linkplain DistanceFunction distance function} object that calculates 154 | * the distance between two coordinates represented by {@linkplain 155 | * java.util.List lists} of {@link java.lang.Double}s. 156 | */ 157 | public static final DistanceFunction> EUCLIDEAN_DOUBLE_LIST = new DistanceFunction>() { 158 | @Override 159 | public double calculate(List data1, List data2) { 160 | class DoubleListEuclideanCoordinate implements EuclideanCoordinate { 161 | List list; 162 | public DoubleListEuclideanCoordinate(List list) { this.list = list; } 163 | @Override public int dimensions() { return list.size(); } 164 | @Override public double get(int index) { return list.get(index); } 165 | }; 166 | DoubleListEuclideanCoordinate coord1 = new DoubleListEuclideanCoordinate(data1); 167 | DoubleListEuclideanCoordinate coord2 = new DoubleListEuclideanCoordinate(data2); 168 | return DistanceFunctions.euclidean(coord1, coord2); 169 | } 170 | }; 171 | } 172 | -------------------------------------------------------------------------------- /src/main/java/mtree/PartitionFunction.java: -------------------------------------------------------------------------------- 1 | package mtree; 2 | 3 | import java.util.Set; 4 | 5 | import mtree.utils.Pair; 6 | 7 | /** 8 | * An object with partitions a set of data into two sub-sets. 9 | * 10 | * @param The type of the data on the sets. 11 | */ 12 | public interface PartitionFunction { 13 | 14 | /** 15 | * Executes the partitioning. 16 | * 17 | * @param promoted The pair of data objects that will guide the partition 18 | * process. 19 | * @param dataSet The original set of data objects to be partitioned. 20 | * @param distanceFunction A {@linkplain DistanceFunction distance function} 21 | * to be used on the partitioning. 22 | * @return A pair of partition sub-sets. Each sub-set must correspond to one 23 | * of the {@code promoted} data objects. 24 | */ 25 | Pair> process(Pair promoted, Set dataSet, DistanceFunction distanceFunction); 26 | 27 | } 28 | -------------------------------------------------------------------------------- /src/main/java/mtree/PartitionFunctions.java: -------------------------------------------------------------------------------- 1 | package mtree; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Collections; 5 | import java.util.Comparator; 6 | import java.util.HashSet; 7 | import java.util.List; 8 | import java.util.Set; 9 | 10 | import mtree.utils.Pair; 11 | 12 | /** 13 | * Some pre-defined implementations of {@linkplain PartitionFunction partition 14 | * functions}. 15 | */ 16 | public final class PartitionFunctions { 17 | 18 | /** 19 | * Don't let anyone instantiate this class. 20 | */ 21 | private PartitionFunctions() {} 22 | 23 | 24 | /** 25 | * A {@linkplain PartitionFunction partition function} that tries to 26 | * distribute the data objects equally between the promoted data objects, 27 | * associating to each promoted data objects the nearest data objects. 28 | * 29 | * @param The type of the data objects. 30 | */ 31 | public static class BalancedPartition implements PartitionFunction { 32 | 33 | /** 34 | * Processes the balanced partition. 35 | * 36 | *

The algorithm is roughly equivalent to this: 37 | *

 38 | 		 *     While dataSet is not Empty:
 39 | 		 *         X := The object in dataSet which is nearest to promoted.first
 40 | 		 *         Remove X from dataSet
 41 | 		 *         Add X to result.first
 42 | 		 *         
 43 | 		 *         Y := The object in dataSet which is nearest to promoted.second
 44 | 		 *         Remove Y from dataSet
 45 | 		 *         Add Y to result.second
 46 | 		 *         
 47 | 		 *     Return result
 48 | 		 * 
49 | * 50 | * @see mtree.PartitionFunction#process(mtree.utils.Pair, java.util.Set, mtree.DistanceFunction) 51 | */ 52 | @Override 53 | public Pair> process( 54 | final Pair promoted, 55 | Set dataSet, 56 | final DistanceFunction distanceFunction 57 | ) 58 | { 59 | List queue1 = new ArrayList(dataSet); 60 | // Sort by distance to the first promoted data 61 | Collections.sort(queue1, new Comparator() { 62 | @Override 63 | public int compare(DATA data1, DATA data2) { 64 | double distance1 = distanceFunction.calculate(data1, promoted.first); 65 | double distance2 = distanceFunction.calculate(data2, promoted.first); 66 | return Double.compare(distance1, distance2); 67 | } 68 | }); 69 | 70 | List queue2 = new ArrayList(dataSet); 71 | // Sort by distance to the second promoted data 72 | Collections.sort(queue2, new Comparator() { 73 | @Override 74 | public int compare(DATA data1, DATA data2) { 75 | double distance1 = distanceFunction.calculate(data1, promoted.second); 76 | double distance2 = distanceFunction.calculate(data2, promoted.second); 77 | return Double.compare(distance1, distance2); 78 | } 79 | }); 80 | 81 | Pair> partitions = new Pair>(new HashSet(), new HashSet()); 82 | 83 | int index1 = 0; 84 | int index2 = 0; 85 | 86 | while(index1 < queue1.size() || index2 != queue2.size()) { 87 | while(index1 < queue1.size()) { 88 | DATA data = queue1.get(index1++); 89 | if(!partitions.second.contains(data)) { 90 | partitions.first.add(data); 91 | break; 92 | } 93 | } 94 | 95 | while(index2 < queue2.size()) { 96 | DATA data = queue2.get(index2++); 97 | if(!partitions.first.contains(data)) { 98 | partitions.second.add(data); 99 | break; 100 | } 101 | } 102 | } 103 | 104 | return partitions; 105 | } 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /src/main/java/mtree/PromotionFunction.java: -------------------------------------------------------------------------------- 1 | package mtree; 2 | 3 | import java.util.Set; 4 | 5 | import mtree.utils.Pair; 6 | 7 | /** 8 | * An object that chooses a pair from a set of data objects. 9 | * 10 | * @param The type of the data objects. 11 | */ 12 | public interface PromotionFunction { 13 | 14 | /** 15 | * Chooses (promotes) a pair of objects according to some criteria that is 16 | * suitable for the application using the M-Tree. 17 | * 18 | * @param dataSet The set of objects to choose a pair from. 19 | * @param distanceFunction A function that can be used for choosing the 20 | * promoted objects. 21 | * @return A pair of chosen objects. 22 | */ 23 | Pair process(Set dataSet, DistanceFunction distanceFunction); 24 | 25 | } 26 | -------------------------------------------------------------------------------- /src/main/java/mtree/PromotionFunctions.java: -------------------------------------------------------------------------------- 1 | package mtree; 2 | 3 | import java.util.List; 4 | import java.util.Set; 5 | 6 | import mtree.utils.Pair; 7 | import mtree.utils.Utils; 8 | 9 | /** 10 | * Some pre-defined implementations of {@linkplain PromotionFunction promotion 11 | * functions}. 12 | */ 13 | public final class PromotionFunctions { 14 | 15 | /** 16 | * Don't let anyone instantiate this class. 17 | */ 18 | private PromotionFunctions() {} 19 | 20 | 21 | /** 22 | * A {@linkplain PromotionFunction promotion function} object that randomly 23 | * chooses ("promotes") two data objects. 24 | * 25 | * @param The type of the data objects. 26 | */ 27 | public static class RandomPromotion implements PromotionFunction { 28 | @Override 29 | public Pair process(Set dataSet, 30 | DistanceFunction distanceFunction) 31 | { 32 | List promotedList = Utils.randomSample(dataSet, 2); 33 | return new Pair(promotedList.get(0), promotedList.get(1)); 34 | } 35 | } 36 | 37 | } 38 | -------------------------------------------------------------------------------- /src/main/java/mtree/SplitFunction.java: -------------------------------------------------------------------------------- 1 | package mtree; 2 | 3 | import java.util.Set; 4 | 5 | import mtree.utils.Pair; 6 | 7 | /** 8 | * Defines an object to be used to split a node in an M-Tree. A node must be 9 | * split when it has reached its maximum capacity and a new child node would be 10 | * added to it. 11 | * 12 | *

The splitting consists in choosing a pair of "promoted" data objects from 13 | * the children and then partition the set of children in two partitions 14 | * corresponding to the two promoted data objects. 15 | * 16 | * @param The type of the data objects. 17 | */ 18 | public interface SplitFunction { 19 | 20 | /** 21 | * Processes the splitting of a node. 22 | * 23 | * @param dataSet A set of data that are keys to the children of the node 24 | * to be split. 25 | * @param distanceFunction A {@linkplain DistanceFunction distance function} 26 | * that can be used to help splitting the node. 27 | * @return A {@link SplitResult} object with a pair of promoted data objects 28 | * and a pair of corresponding partitions of the data objects. 29 | */ 30 | SplitResult process(Set dataSet, DistanceFunction distanceFunction); 31 | 32 | 33 | /** 34 | * An object used as the result for the 35 | * {@link SplitFunction#process(Set, DistanceFunction)} method. 36 | * 37 | * @param The type of the data objects. 38 | */ 39 | public static class SplitResult { 40 | 41 | /** 42 | * A pair of promoted data objects. 43 | */ 44 | public Pair promoted; 45 | 46 | /** 47 | * A pair of partitions corresponding to the {@code promoted} data 48 | * objects. 49 | */ 50 | public Pair> partitions; 51 | 52 | /** 53 | * The constructor for a {@link SplitResult} object. 54 | */ 55 | public SplitResult(Pair promoted, Pair> partitions) { 56 | this.promoted = promoted; 57 | this.partitions = partitions; 58 | } 59 | 60 | } 61 | 62 | } 63 | -------------------------------------------------------------------------------- /src/main/java/mtree/utils/Pair.java: -------------------------------------------------------------------------------- 1 | package mtree.utils; 2 | 3 | /** 4 | * A pair of objects of the same type. 5 | * 6 | * @param The type of the objects. 7 | */ 8 | public class Pair { 9 | 10 | /** 11 | * The first object. 12 | */ 13 | public T first; 14 | 15 | 16 | /** 17 | * The second object. 18 | */ 19 | public T second; 20 | 21 | /** 22 | * Creates a pair of {@code null} objects. 23 | */ 24 | public Pair() {} 25 | 26 | /** 27 | * Creates a pair with the objects specified in the arguments. 28 | * @param first The first object. 29 | * @param second The second object. 30 | */ 31 | public Pair(T first, T second) { 32 | this.first = first; 33 | this.second = second; 34 | } 35 | 36 | /** 37 | * Accesses an object by its index. The {@link #first} object has index 38 | * {@code 0} and the {@link #second} object has index {@code 1}. 39 | * @param index The index of the object to be accessed. 40 | * @return The {@link #first} object if {@code index} is {@code 0}; the 41 | * {@link #second} object if {@code index} is {@code 1}. 42 | * @throws IllegalArgumentException If {@code index} is neither {@code 0} 43 | * or {@code 1}. 44 | */ 45 | public T get(int index) throws IllegalArgumentException { 46 | switch(index) { 47 | case 0: return first; 48 | case 1: return second; 49 | default: throw new IllegalArgumentException(); 50 | } 51 | } 52 | 53 | } 54 | -------------------------------------------------------------------------------- /src/main/java/mtree/utils/Utils.java: -------------------------------------------------------------------------------- 1 | package mtree.utils; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Collection; 5 | import java.util.Iterator; 6 | import java.util.List; 7 | import java.util.Random; 8 | 9 | /** 10 | * Some utilities. 11 | */ 12 | public final class Utils { 13 | 14 | /** 15 | * Don't let anyone instantiate this class. 16 | */ 17 | private Utils() {} 18 | 19 | 20 | /** 21 | * Identifies the minimum and maximum elements from an iterable, according 22 | * to the natural ordering of the elements. 23 | * @param items An {@link Iterable} object with the elements 24 | * @param The type of the elements. 25 | * @return A pair with the minimum and maximum elements. 26 | */ 27 | public static > Pair minMax(Iterable items) { 28 | Iterator iterator = items.iterator(); 29 | if(!iterator.hasNext()) { 30 | return null; 31 | } 32 | 33 | T min = iterator.next(); 34 | T max = min; 35 | 36 | while(iterator.hasNext()) { 37 | T item = iterator.next(); 38 | if(item.compareTo(min) < 0) { 39 | min = item; 40 | } 41 | if(item.compareTo(max) > 0) { 42 | max = item; 43 | } 44 | } 45 | 46 | return new Pair(min, max); 47 | } 48 | 49 | 50 | /** 51 | * Randomly chooses elements from the collection. 52 | * @param collection The collection. 53 | * @param n The number of elements to choose. 54 | * @param The type of the elements. 55 | * @return A list with the chosen elements. 56 | */ 57 | public static List randomSample(Collection collection, int n) { 58 | List list = new ArrayList(collection); 59 | List sample = new ArrayList(n); 60 | Random random = new Random(); 61 | while(n > 0 && !list.isEmpty()) { 62 | int index = random.nextInt(list.size()); 63 | sample.add(list.get(index)); 64 | int indexLast = list.size() - 1; 65 | T last = list.remove(indexLast); 66 | if(index < indexLast) { 67 | list.set(index, last); 68 | } 69 | n--; 70 | } 71 | return sample; 72 | } 73 | 74 | } 75 | -------------------------------------------------------------------------------- /src/main/scala/edu/utah/cs/generator/RandomTrajGenerator.scala: -------------------------------------------------------------------------------- 1 | package edu.utah.cs.generator 2 | 3 | import edu.utah.cs.spatial.Point 4 | import org.apache.spark.{SparkConf, SparkContext} 5 | 6 | import scala.collection.mutable 7 | import scala.util.Random 8 | 9 | /** 10 | * Created by dongx on 1/18/17. 11 | * This generator should work in such manner: 12 | * - Generate a uniform random point within the starting scope defined by (, ) -- (, ) 13 | * - Generate the number of steps following a normal distribution with parameters ( and ) 14 | * - Iterate times: 15 | * + generate two random numbers (, ) with normal distribution defined by (mean is 0) 16 | * + move the object to ( - , ( - )) 17 | */ 18 | object RandomTrajGenerator { 19 | def rnd(low: Double, high: Double): Double = Random.nextDouble() * (high - low) + low 20 | def flip_coin(p: Double): Int = if (Random.nextDouble() > 0.9) 1 else -1 21 | def gaussianRnd(mean: Double, scale: Double) = Random.nextGaussian() * scale + mean 22 | def gaussianRnd(scale: Double) = Random.nextGaussian() * scale 23 | 24 | def main(args: Array[String]): Unit = { 25 | // if (args.length != 9) { 26 | // println("Usage: RandomTrajGenerator ") 27 | // System.exit(1) 28 | // } 29 | 30 | //val sparkConf = new SparkConf().setAppName("TrajSampling") 31 | //val sc = new SparkContext(sparkConf) 32 | 33 | // Thread.sleep(3000) 34 | 35 | // val n = args(0).toInt 36 | // val low_x = args(1).toDouble 37 | // val low_y = args(2).toDouble 38 | // val high_x = args(3).toDouble 39 | // val high_y = args(4).toDouble 40 | // val steps_avg = args(5).toInt 41 | // val steps_dev = args(6).toDouble 42 | // val range_dev = args(7).toDouble 43 | // val output_file_path = args(8) 44 | // val n = args(0).toInt 45 | val low_x = 0.0 46 | val low_y = 0.0 47 | val high_x = 100.0 48 | val high_y = 100.0 49 | val steps_avg = 20 50 | val steps_dev = 10 51 | val range_dev = 8.0 52 | // val output_file_path = args(8) 53 | 54 | // sc.parallelize(0 until n, sc.defaultParallelism) 55 | // .flatMap(x => { 56 | // val ans = mutable.ListBuffer[String]() 57 | // val last_x = rnd(low_x, high_x) 58 | // val last_y = rnd(low_y, high_y) 59 | // val steps = gaussianRnd(steps_avg, steps_dev).toInt 60 | // for (i <- 0 until steps) { 61 | // val cur_x = last_x + gaussianRnd(range_dev) 62 | // val cur_y = last_y + gaussianRnd(range_dev) 63 | // ans += s"$x\t$last_x\t$last_y\t$cur_x\t$cur_y\t$i" 64 | // } 65 | // ans.iterator 66 | // }).saveAsTextFile(output_file_path) 67 | 68 | val res = mutable.ListBuffer[Point]() 69 | var x1 = rnd(low_x, high_x) 70 | var y1 = rnd(low_y, high_y) 71 | val basic_step_x = gaussianRnd(5.0) 72 | val basic_step_y = gaussianRnd(5.0) 73 | var x2 = x1 + gaussianRnd(basic_step_x, basic_step_x * 0.5) 74 | var y2 = y1 + gaussianRnd(basic_step_y, basic_step_y * 0.5) 75 | res += Point(Array(x1, y1)) 76 | res += Point(Array(x2, y2)) 77 | val steps = gaussianRnd(steps_avg, steps_dev).toInt 78 | assert(steps > 10) 79 | for (i <- 0 until steps) { 80 | val cur_x = ((x1 + x2) / 2.0) + flip_coin(0.95) * gaussianRnd(basic_step_x, basic_step_x * 0.3) 81 | val cur_y = ((y1 + y2) / 2.0) + flip_coin(0.95) * gaussianRnd(basic_step_y, basic_step_y * 0.3) 82 | res += Point(Array(cur_x, cur_y)) 83 | x1 = x2 84 | y1 = y2 85 | x2 = cur_x 86 | y2 = cur_y 87 | } 88 | 89 | println("X = [") 90 | res.foreach(x => println(s"${x.coord(0)},")) 91 | println("]") 92 | println() 93 | println("Y = [") 94 | res.foreach(x => println(s"${x.coord(1)},")) 95 | println("]") 96 | 97 | //sc.stop() 98 | } 99 | 100 | } 101 | -------------------------------------------------------------------------------- /src/main/scala/edu/utah/cs/index/Index.scala: -------------------------------------------------------------------------------- 1 | package edu.utah.cs.index 2 | 3 | /** 4 | * Created by dongx on 8/30/16. 5 | * Abstract trait for Index 6 | */ 7 | trait Index 8 | -------------------------------------------------------------------------------- /src/main/scala/edu/utah/cs/index/RTree.scala: -------------------------------------------------------------------------------- 1 | package edu.utah.cs.index 2 | 3 | import edu.utah.cs.spatial.{LineSegment, MBR, Point, Shape} 4 | import edu.utah.cs.util._ 5 | 6 | import scala.collection.mutable 7 | import scala.util.control.Breaks 8 | 9 | /** 10 | * Created by dong on 1/15/16. 11 | * Static Multi-Dimensional R-Tree Index for LineSegments 12 | */ 13 | abstract class RTreeEntry { 14 | def minDist(x: Shape): Double 15 | 16 | def intersects(x: Shape): Boolean 17 | } 18 | 19 | case class RTreeLeafEntry(shape: Shape, m_data: Int, size: Int, traj_id: Int) extends RTreeEntry { 20 | override def minDist(x: Shape): Double = shape.minDist(x) 21 | override def intersects(x: Shape): Boolean = x.intersects(shape) 22 | } 23 | 24 | case class RTreeInternalEntry(mbr: MBR, node: RTreeNode) extends RTreeEntry { 25 | override def minDist(x: Shape): Double = mbr.minDist(x) 26 | override def intersects(x: Shape): Boolean = x.intersects(mbr) 27 | } 28 | 29 | case class RTreeNode(m_mbr: MBR, m_child: Array[RTreeEntry], isLeaf: Boolean) { 30 | def this(m_mbr: MBR, children: Array[(MBR, RTreeNode)]) = { 31 | this(m_mbr, children.map(x => RTreeInternalEntry(x._1, x._2)), false) 32 | } 33 | 34 | // XX Interesting Trick! Overriding same function 35 | def this(m_mbr: MBR, children: => Array[(Shape, Int, Int)]) = { 36 | this(m_mbr, children.map(x => RTreeLeafEntry(x._1, x._2, 1, x._3)), true) 37 | } 38 | 39 | def this(m_mbr: MBR, children: Array[(MBR, Int, Int)]) = { 40 | this(m_mbr, children.map(x => RTreeLeafEntry(x._1, x._2, x._3, 1)), true) 41 | } 42 | 43 | val size: Long = { 44 | if (isLeaf) m_child.map(x => x.asInstanceOf[RTreeLeafEntry].size).sum 45 | else m_child.map(x => x.asInstanceOf[RTreeInternalEntry].node.size).sum 46 | } 47 | } 48 | 49 | class NNOrdering() extends Ordering[(_, Double)] { 50 | def compare(a: (_, Double), b: (_, Double)): Int = -a._2.compare(b._2) 51 | } 52 | 53 | case class RTree(root: RTreeNode) extends Index with Serializable { 54 | def range(query: MBR): Array[(Shape, Int)] = { 55 | val ans = mutable.ArrayBuffer[(Shape, Int)]() 56 | val st = new mutable.Stack[RTreeNode]() 57 | if (root.m_mbr.intersects(query) && root.m_child.nonEmpty) st.push(root) 58 | while (st.nonEmpty) { 59 | val now = st.pop() 60 | if (!now.isLeaf) { 61 | now.m_child.foreach { 62 | case RTreeInternalEntry(mbr, node) => 63 | if (query.intersects(mbr)) st.push(node) 64 | } 65 | } else { 66 | now.m_child.foreach { 67 | case RTreeLeafEntry(shape, m_data, _, _) => 68 | if (query.intersects(shape)) ans += ((shape, m_data)) 69 | } 70 | } 71 | } 72 | ans.toArray 73 | } 74 | 75 | def range(query: MBR, level_limit: Int, s_threshold: Double): Option[Array[(Shape, Int)]] = { 76 | val ans = mutable.ArrayBuffer[(Shape, Int)]() 77 | val q = new mutable.Queue[(RTreeNode, Int)]() 78 | if (root.m_mbr.intersects(query) && root.m_child.nonEmpty) q.enqueue((root, 1)) 79 | var estimate: Double = 0 80 | val loop = new Breaks 81 | import loop.{break, breakable} 82 | breakable { 83 | while (q.nonEmpty) { 84 | val now = q.dequeue 85 | val cur_node = now._1 86 | val cur_level = now._2 87 | if (cur_node.isLeaf) { 88 | cur_node.m_child.foreach { 89 | case RTreeLeafEntry(shape, m_data, _, _) => 90 | if (query.intersects(shape)) ans += ((shape, m_data)) 91 | } 92 | } else if (cur_level < level_limit) { 93 | cur_node.m_child.foreach { 94 | case RTreeInternalEntry(mbr, node) => 95 | if (query.intersects(mbr)) q.enqueue((node, cur_level + 1)) 96 | } 97 | } else if (cur_level == level_limit) { 98 | estimate += cur_node.m_mbr.calcRatio(query) * cur_node.size 99 | cur_node.m_child.foreach { 100 | case RTreeInternalEntry(mbr, node) => 101 | if (query.intersects(mbr)) q.enqueue((node, cur_level + 1)) 102 | } 103 | } else break 104 | } 105 | } 106 | if (ans.nonEmpty) return Some(ans.toArray) 107 | else if (estimate / root.size > s_threshold) return None 108 | while (q.nonEmpty) { 109 | val now = q.dequeue 110 | val cur_node = now._1 111 | val cur_level = now._2 112 | if (cur_node.isLeaf) { 113 | cur_node.m_child.foreach { 114 | case RTreeLeafEntry(shape, m_data, _, _) => 115 | if (query.intersects(shape)) ans += ((shape, m_data)) 116 | } 117 | } else { 118 | cur_node.m_child.foreach { 119 | case RTreeInternalEntry(mbr, node) => 120 | if (query.intersects(mbr)) q.enqueue((node, cur_level + 1)) 121 | } 122 | } 123 | } 124 | Some(ans.toArray) 125 | } 126 | 127 | def circleRange(origin: Shape, r: Double): Array[(Shape, Int)] = { 128 | val ans = mutable.ArrayBuffer[(Shape, Int)]() 129 | val st = new mutable.Stack[RTreeNode]() 130 | if (root.m_mbr.minDist(origin) <= r && root.m_child.nonEmpty) st.push(root) 131 | while (st.nonEmpty) { 132 | val now = st.pop() 133 | if (!now.isLeaf) { 134 | now.m_child.foreach{ 135 | case RTreeInternalEntry(mbr, node) => 136 | if (origin.minDist(mbr) <= r) st.push(node) 137 | } 138 | } else { 139 | now.m_child.foreach { 140 | case RTreeLeafEntry(shape, m_data, _, _) => 141 | if (origin.minDist(shape) <= r) ans += ((shape, m_data)) 142 | } 143 | } 144 | } 145 | ans.toArray 146 | } 147 | 148 | def antiCircleRange(query: Array[LineSegment], r: Double): Array[(Shape, Int)] = { 149 | def check(now: Shape) : Boolean = { 150 | for (i <- query.indices) 151 | if (now.minDist(query(i)) <= r) return false 152 | true 153 | } 154 | 155 | def checkCovered(now: MBR): Boolean = { 156 | for (i <- query.indices) 157 | if (query(i).cover(now, r)) return true 158 | false 159 | } 160 | 161 | var ans = mutable.ListBuffer[(Shape, Int)]() 162 | val st = new mutable.Stack[RTreeNode]() 163 | if (!checkCovered(root.m_mbr)) st.push(root) 164 | while (st.nonEmpty) { 165 | val now = st.pop() 166 | if (!now.isLeaf) { 167 | now.m_child.foreach { 168 | case RTreeInternalEntry(_, node) => 169 | if (!checkCovered(root.m_mbr)) st.push(node) 170 | } 171 | } else { 172 | now.m_child.foreach { 173 | case RTreeLeafEntry(shape, m_data, _, _) => 174 | if (check(shape)) ans += ((shape, m_data)) 175 | } 176 | } 177 | } 178 | ans.toArray 179 | } 180 | 181 | def circleRange(origin: LineSegment, r: Double, distFunc: (LineSegment, MBR) => Double): Array[(Shape, Int)] = { 182 | val ans = mutable.ArrayBuffer[(Shape, Int)]() 183 | val st = new mutable.Stack[RTreeNode]() 184 | if (distFunc(origin, root.m_mbr) <= r && root.m_child.nonEmpty) st.push(root) 185 | while (st.nonEmpty) { 186 | val now = st.pop() 187 | if (!now.isLeaf) { 188 | now.m_child.foreach{ 189 | case RTreeInternalEntry(mbr, node) => 190 | if (distFunc(origin, mbr) <= r) st.push(node) 191 | } 192 | } else { 193 | now.m_child.foreach { 194 | case RTreeLeafEntry(shape, m_data, _, _) => 195 | if (distFunc(origin, shape.asInstanceOf[MBR]) <= r) ans += ((shape, m_data)) 196 | } 197 | } 198 | } 199 | ans.toArray 200 | } 201 | 202 | def circleRange(query: Array[LineSegment], r: Double): Array[(Shape, Int)] = { 203 | val ans = mutable.ArrayBuffer[(Shape, Int)]() 204 | val st = new mutable.Stack[RTreeNode]() 205 | 206 | def check(now: Shape) : Boolean = { 207 | for (i <- query.indices) 208 | if (now.minDist(query(i)) <= r) return true 209 | false 210 | } 211 | 212 | if (check(root.m_mbr) && root.m_child.nonEmpty) st.push(root) 213 | while (st.nonEmpty) { 214 | val now = st.pop() 215 | if (!now.isLeaf) now.m_child.foreach { 216 | case RTreeInternalEntry(mbr, node) => 217 | if (check(mbr)) st.push(node) 218 | } else { 219 | now.m_child.foreach { 220 | case RTreeLeafEntry(shape, m_data, _, _) => 221 | if (check(shape)) ans += ((shape, m_data)) 222 | } 223 | } 224 | } 225 | ans.toArray 226 | } 227 | 228 | def circleRange(query: Array[MBR], r: Double): Array[(Shape, Int)] = { 229 | val ans = mutable.ArrayBuffer[(Shape, Int)]() 230 | val st = new mutable.Stack[RTreeNode]() 231 | 232 | def check(now: Shape) : Boolean = { 233 | for (i <- query.indices) 234 | if (now.minDist(query(i)) <= r) return true 235 | false 236 | } 237 | 238 | if (check(root.m_mbr) && root.m_child.nonEmpty) st.push(root) 239 | while (st.nonEmpty) { 240 | val now = st.pop() 241 | if (!now.isLeaf) now.m_child.foreach { 242 | case RTreeInternalEntry(mbr, node) => 243 | if (check(mbr)) st.push(node) 244 | } else { 245 | now.m_child.foreach { 246 | case RTreeLeafEntry(shape, m_data, _, _) => 247 | if (check(shape)) ans += ((shape, m_data)) 248 | } 249 | } 250 | } 251 | ans.toArray 252 | } 253 | 254 | 255 | def circleRangeConj(queries: Array[(Point, Double)]): Array[(Shape, Int)] = { 256 | val ans = mutable.ArrayBuffer[(Shape, Int)]() 257 | val st = new mutable.Stack[RTreeNode]() 258 | 259 | def check(now: Shape) : Boolean = { 260 | for (i <- queries.indices) 261 | if (now.minDist(queries(i)._1) > queries(i)._2) return false 262 | true 263 | } 264 | 265 | if (check(root.m_mbr) && root.m_child.nonEmpty) st.push(root) 266 | while (st.nonEmpty) { 267 | val now = st.pop() 268 | if (!now.isLeaf) now.m_child.foreach { 269 | case RTreeInternalEntry(mbr, node) => 270 | if (check(mbr)) st.push(node) 271 | } else { 272 | now.m_child.foreach { 273 | case RTreeLeafEntry(shape, m_data, _, _) => 274 | if (check(shape)) ans += ((shape, m_data)) 275 | } 276 | } 277 | } 278 | ans.toArray 279 | } 280 | 281 | def kNN(query: Point, k: Int, keepSame: Boolean = false): Array[(Shape, Int)] = { 282 | val ans = mutable.ArrayBuffer[(Shape, Int)]() 283 | val pq = new mutable.PriorityQueue[(_, Double)]()(new NNOrdering()) 284 | var cnt = 0 285 | var kNN_dis = 0.0 286 | pq.enqueue((root, 0.0)) 287 | 288 | val loop = new Breaks 289 | import loop.{break, breakable} 290 | breakable { 291 | while (pq.nonEmpty) { 292 | val now = pq.dequeue() 293 | if (cnt >= k && (!keepSame || now._2 > kNN_dis)) break() 294 | 295 | now._1 match { 296 | case RTreeNode(_, m_child, isLeaf) => 297 | m_child.foreach(entry => 298 | if (isLeaf) pq.enqueue((entry, entry.minDist(query))) 299 | else pq.enqueue((entry.asInstanceOf[RTreeInternalEntry].node, entry.minDist(query))) 300 | ) 301 | case RTreeLeafEntry(p, m_data, size, _) => 302 | cnt += size 303 | kNN_dis = now._2 304 | ans += ((p, m_data)) 305 | } 306 | } 307 | } 308 | 309 | ans.toArray 310 | } 311 | 312 | def kNN(query: Point, distFunc: (Point, MBR) => Double, 313 | k: Int, keepSame: Boolean): Array[(Shape, Int)] = { 314 | val ans = mutable.ArrayBuffer[(Shape, Int)]() 315 | val pq = new mutable.PriorityQueue[(_, Double)]()(new NNOrdering()) 316 | var cnt = 0 317 | var kNN_dis = 0.0 318 | pq.enqueue((root, 0.0)) 319 | 320 | val loop = new Breaks 321 | import loop.{break, breakable} 322 | breakable { 323 | while (pq.nonEmpty) { 324 | val now = pq.dequeue() 325 | if (cnt >= k && (!keepSame || now._2 > kNN_dis)) break() 326 | 327 | now._1 match { 328 | case RTreeNode(_, m_child, _) => 329 | m_child.foreach { 330 | case RTreeInternalEntry(mbr, node) => 331 | pq.enqueue((node, distFunc(query, mbr))) 332 | case entry @ RTreeLeafEntry(mbr, _, _, _) => 333 | require(mbr.isInstanceOf[MBR]) 334 | pq.enqueue((entry, distFunc(query, mbr.asInstanceOf[MBR]))) 335 | } 336 | case RTreeLeafEntry(mbr, m_data, size, _) => 337 | cnt += size 338 | kNN_dis = now._2 339 | ans += ((mbr, m_data)) 340 | } 341 | } 342 | } 343 | 344 | ans.toArray 345 | } 346 | 347 | def kNN(query: MBR, distFunc: (MBR, MBR) => Double, 348 | k: Int, keepSame: Boolean): Array[(Shape, Int)] = { 349 | val ans = mutable.ArrayBuffer[(Shape, Int)]() 350 | val pq = new mutable.PriorityQueue[(_, Double)]()(new NNOrdering()) 351 | var cnt = 0 352 | var kNN_dis = 0.0 353 | pq.enqueue((root, 0.0)) 354 | 355 | val loop = new Breaks 356 | import loop.{break, breakable} 357 | breakable { 358 | while (pq.nonEmpty) { 359 | val now = pq.dequeue() 360 | if (cnt >= k && (!keepSame || now._2 > kNN_dis)) break() 361 | 362 | now._1 match { 363 | case RTreeNode(_, m_child, _) => 364 | m_child.foreach { 365 | case RTreeInternalEntry(mbr, node) => 366 | pq.enqueue((node, distFunc(query, mbr))) 367 | case entry @ RTreeLeafEntry(mbr, _, _, _) => 368 | require(mbr.isInstanceOf[MBR]) 369 | pq.enqueue((entry, distFunc(query, mbr.asInstanceOf[MBR]))) 370 | } 371 | case RTreeLeafEntry(mbr, m_data, size, _) => 372 | cnt += size 373 | kNN_dis = now._2 374 | ans += ((mbr, m_data)) 375 | } 376 | } 377 | } 378 | ans.toArray 379 | } 380 | } 381 | 382 | object RTree { 383 | def apply(entries: Array[(MBR, Int, Int)], max_entries_per_node: Int): RTree = { 384 | val dimension = entries(0)._1.low.coord.length 385 | val entries_len = entries.length.toDouble 386 | val dim = new Array[Int](dimension) 387 | var remaining = entries_len / max_entries_per_node 388 | for (i <- 0 until dimension) { 389 | dim(i) = Math.ceil(Math.pow(remaining, 1.0/(dimension - i))).toInt 390 | remaining /= dim(i) 391 | } 392 | 393 | def recursiveGroupSegments(entries: Array[(MBR, Int, Int)], 394 | cur_dim: Int, until_dim: Int): Array[Array[(MBR, Int, Int)]] = { 395 | val len = entries.length.toDouble 396 | val grouped = entries.sortWith(_._1.centroid.coord(cur_dim) < _._1.centroid.coord(cur_dim)) 397 | .grouped(Math.ceil(len / dim(cur_dim)).toInt).toArray 398 | if (cur_dim < until_dim) 399 | grouped.flatMap(now => recursiveGroupSegments(now, cur_dim + 1, until_dim)) 400 | else grouped 401 | } 402 | 403 | val grouped = recursiveGroupSegments(entries, 0, dimension - 1) 404 | val rtree_nodes = mutable.ArrayBuffer[(MBR, RTreeNode)]() 405 | grouped.foreach(list => { 406 | val min = new Array[Double](dimension).map(_ => Double.MaxValue) 407 | val max = new Array[Double](dimension).map(_ => Double.MinValue) 408 | list.foreach(now => { 409 | for (i <- 0 until dimension) min(i) = Math.min(min(i), now._1.low.coord(i)) 410 | for (i <- 0 until dimension) max(i) = Math.max(max(i), now._1.high.coord(i)) 411 | }) 412 | val mbr = MBR(Point(min), Point(max)) 413 | rtree_nodes += ((mbr, new RTreeNode(mbr, list.map(x => (x._1.asInstanceOf[Shape], x._2, x._3))))) 414 | }) 415 | 416 | var cur_rtree_nodes = rtree_nodes.toArray 417 | var cur_len = cur_rtree_nodes.length.toDouble 418 | remaining = cur_len / max_entries_per_node 419 | for (i <- 0 until dimension) { 420 | dim(i) = Math.ceil(Math.pow(remaining, 1.0 / (dimension - i))).toInt 421 | remaining /= dim(i) 422 | } 423 | 424 | def over(dim: Array[Int]): Boolean = { 425 | for (i <- dim.indices) 426 | if (dim(i) != 1) return false 427 | true 428 | } 429 | 430 | def comp(dim: Int)(left: (MBR, RTreeNode), right: (MBR, RTreeNode)): Boolean = { 431 | val left_center = left._1.low.coord(dim) + left._1.high.coord(dim) 432 | val right_center = right._1.low.coord(dim) + right._1.high.coord(dim) 433 | left_center < right_center 434 | } 435 | 436 | def recursiveGroupRTreeNode(entries: Array[(MBR, RTreeNode)], cur_dim: Int, until_dim: Int) 437 | : Array[Array[(MBR, RTreeNode)]] = { 438 | val len = entries.length.toDouble 439 | val grouped = entries.sortWith(comp(cur_dim)) 440 | .grouped(Math.ceil(len / dim(cur_dim)).toInt).toArray 441 | if (cur_dim < until_dim) 442 | grouped.flatMap(now => recursiveGroupRTreeNode(now, cur_dim + 1, until_dim)) 443 | else grouped 444 | } 445 | 446 | while (!over(dim)) { 447 | val grouped = recursiveGroupRTreeNode(cur_rtree_nodes, 0, dimension - 1) 448 | var tmp_nodes = mutable.ArrayBuffer[(MBR, RTreeNode)]() 449 | grouped.foreach(list => { 450 | val min = new Array[Double](dimension).map(_ => Double.MaxValue) 451 | val max = new Array[Double](dimension).map(_ => Double.MinValue) 452 | list.foreach(now => { 453 | for (i <- 0 until dimension) min(i) = Math.min(min(i), now._1.low.coord(i)) 454 | for (i <- 0 until dimension) max(i) = Math.max(max(i), now._1.high.coord(i)) 455 | }) 456 | val mbr = MBR(Point(min), Point(max)) 457 | tmp_nodes += ((mbr, new RTreeNode(mbr, list))) 458 | }) 459 | cur_rtree_nodes = tmp_nodes.toArray 460 | cur_len = cur_rtree_nodes.length.toDouble 461 | remaining = cur_len / max_entries_per_node 462 | for (i <- 0 until dimension) { 463 | dim(i) = Math.ceil(Math.pow(remaining, 1.0 / (dimension - i))).toInt 464 | remaining /= dim(i) 465 | } 466 | } 467 | 468 | val min = new Array[Double](dimension).map(_ => Double.MaxValue) 469 | val max = new Array[Double](dimension).map(_ => Double.MinValue) 470 | cur_rtree_nodes.foreach(now => { 471 | for (i <- 0 until dimension) min(i) = Math.min(min(i), now._1.low.coord(i)) 472 | for (i <- 0 until dimension) max(i) = Math.max(max(i), now._1.high.coord(i)) 473 | }) 474 | 475 | val mbr = MBR(Point(min), Point(max)) 476 | val root = new RTreeNode(mbr, cur_rtree_nodes) 477 | new RTree(root) 478 | } 479 | 480 | def applyMBR(entries: Array[(MBR, Int, Int)], max_entries_per_node: Int): RTree = { 481 | val dimension = entries(0)._1.low.coord.length 482 | val entries_len = entries.length.toDouble 483 | val dim = new Array[Int](dimension) 484 | var remaining = entries_len / max_entries_per_node 485 | for (i <- 0 until dimension) { 486 | dim(i) = Math.ceil(Math.pow(remaining, 1.0 / (dimension - i))).toInt 487 | remaining /= dim(i) 488 | } 489 | 490 | def compMBR(dim: Int)(left: (MBR, Int, Int), right: (MBR, Int, Int)): Boolean = { 491 | val left_center = left._1.low.coord(dim) + left._1.high.coord(dim) 492 | val right_center = right._1.low.coord(dim) + right._1.high.coord(dim) 493 | left_center < right_center 494 | } 495 | 496 | def recursiveGroupMBR(entries: Array[(MBR, Int, Int)], cur_dim: Int, until_dim: Int) 497 | : Array[Array[(MBR, Int, Int)]] = { 498 | val len = entries.length.toDouble 499 | val grouped = entries.sortWith(compMBR(cur_dim)) 500 | .grouped(Math.ceil(len / dim(cur_dim)).toInt).toArray 501 | if (cur_dim < until_dim) 502 | grouped.flatMap(now => recursiveGroupMBR(now, cur_dim + 1, until_dim)) 503 | else grouped 504 | } 505 | 506 | val grouped = recursiveGroupMBR(entries, 0, dimension - 1) 507 | val rtree_nodes = mutable.ArrayBuffer[(MBR, RTreeNode)]() 508 | grouped.foreach(list => { 509 | val min = new Array[Double](dimension).map(_ => Double.MaxValue) 510 | val max = new Array[Double](dimension).map(_ => Double.MinValue) 511 | list.foreach(now => { 512 | for (i <- 0 until dimension) min(i) = Math.min(min(i), now._1.low.coord(i)) 513 | for (i <- 0 until dimension) max(i) = Math.max(max(i), now._1.high.coord(i)) 514 | }) 515 | val mbr = MBR(Point(min), Point(max)) 516 | rtree_nodes += ((mbr, new RTreeNode(mbr, list))) 517 | }) 518 | 519 | var cur_rtree_nodes = rtree_nodes.toArray 520 | var cur_len = cur_rtree_nodes.length.toDouble 521 | remaining = cur_len / max_entries_per_node 522 | for (i <- 0 until dimension) { 523 | dim(i) = Math.ceil(Math.pow(remaining, 1.0 / (dimension - i))).toInt 524 | remaining /= dim(i) 525 | } 526 | 527 | def over(dim : Array[Int]) : Boolean = { 528 | for (i <- dim.indices) 529 | if (dim(i) != 1) return false 530 | true 531 | } 532 | 533 | def comp(dim: Int)(left : (MBR, RTreeNode), right : (MBR, RTreeNode)) : Boolean = { 534 | val left_center = left._1.low.coord(dim) + left._1.high.coord(dim) 535 | val right_center = right._1.low.coord(dim) + right._1.high.coord(dim) 536 | left_center < right_center 537 | } 538 | 539 | def recursiveGroupRTreeNode(entries: Array[(MBR, RTreeNode)], 540 | cur_dim : Int, until_dim : Int) : Array[Array[(MBR, RTreeNode)]] = { 541 | val len = entries.length.toDouble 542 | val grouped = entries.sortWith(comp(cur_dim)) 543 | .grouped(Math.ceil(len / dim(cur_dim)).toInt).toArray 544 | if (cur_dim < until_dim) 545 | grouped.flatMap(now => { 546 | recursiveGroupRTreeNode(now, cur_dim + 1, until_dim) 547 | }) 548 | else grouped 549 | } 550 | 551 | while (!over(dim)) { 552 | val grouped = recursiveGroupRTreeNode(cur_rtree_nodes, 0, dimension - 1) 553 | var tmp_nodes = mutable.ArrayBuffer[(MBR, RTreeNode)]() 554 | grouped.foreach(list => { 555 | val min = new Array[Double](dimension).map(_ => Double.MaxValue) 556 | val max = new Array[Double](dimension).map(_ => Double.MinValue) 557 | list.foreach(now => { 558 | for (i <- 0 until dimension) min(i) = Math.min(min(i), now._1.low.coord(i)) 559 | for (i <- 0 until dimension) max(i) = Math.max(max(i), now._1.high.coord(i)) 560 | }) 561 | val mbr = MBR(Point(min), Point(max)) 562 | tmp_nodes += ((mbr, new RTreeNode(mbr, list))) 563 | }) 564 | cur_rtree_nodes = tmp_nodes.toArray 565 | cur_len = cur_rtree_nodes.length.toDouble 566 | remaining = cur_len / max_entries_per_node 567 | for (i <- 0 until dimension) { 568 | dim(i) = Math.ceil(Math.pow(remaining, 1.0 / (dimension - i))).toInt 569 | remaining /= dim(i) 570 | } 571 | } 572 | 573 | val min = new Array[Double](dimension).map(_ => Double.MaxValue) 574 | val max = new Array[Double](dimension).map(_ => Double.MinValue) 575 | cur_rtree_nodes.foreach(now => { 576 | for (i <- 0 until dimension) min(i) = Math.min(min(i), now._1.low.coord(i)) 577 | for (i <- 0 until dimension) max(i) = Math.max(max(i), now._1.high.coord(i)) 578 | }) 579 | 580 | val mbr = MBR(Point(min), Point(max)) 581 | val root = new RTreeNode(mbr, cur_rtree_nodes) 582 | new RTree(root) 583 | } 584 | } 585 | -------------------------------------------------------------------------------- /src/main/scala/edu/utah/cs/index/VPTree.scala: -------------------------------------------------------------------------------- 1 | package edu.utah.cs.index 2 | 3 | import edu.utah.cs.util.MetricObject 4 | 5 | import scala.collection.mutable 6 | import scala.reflect.ClassTag 7 | import scala.util.Random 8 | 9 | /** 10 | * Created by dongx on 2/3/17. 11 | */ 12 | abstract class VPTreeNode[T <: MetricObject: ClassTag] 13 | 14 | case class VPTreeInternalNode[T <: MetricObject: ClassTag](vp: T, threshold: Double, 15 | left: VPTreeNode[T], right: VPTreeNode[T]) extends VPTreeNode[T] 16 | 17 | case class VPTreeLeafNode[T <: MetricObject: ClassTag](points: Array[T]) extends VPTreeNode[T] 18 | 19 | case class VPTree[T <: MetricObject: ClassTag](root: VPTreeNode[T]) extends Index with Serializable { 20 | private[cs] case class HeapItem(point: T, dis: Double) extends Ordered[HeapItem] { 21 | override def compare(that: HeapItem): Int = dis.compare(that.dis) 22 | } 23 | 24 | def knn(query: T, k: Int, dis_threshold: Double = Double.MaxValue): (Array[(T, Double)], Int) = { 25 | val pq = mutable.PriorityQueue[HeapItem]() 26 | var tau = dis_threshold 27 | var checked = 0 28 | 29 | def offer(x: HeapItem) = { 30 | if (pq.size == k) pq.dequeue() 31 | pq.enqueue(x) 32 | if (pq.size == k) tau = pq.head.dis 33 | } 34 | 35 | def recursive_knn(node: VPTreeNode[T]) : Unit = { 36 | if (node != null) { 37 | node match { 38 | case VPTreeLeafNode(ps) => 39 | checked += ps.length 40 | ps.foreach(x => { 41 | val dis = query.distance(x) 42 | if (dis < tau) offer(HeapItem(x, dis)) 43 | }) 44 | case VPTreeInternalNode(vp, th, left, right) => 45 | val vp_dis = query.distance(vp) 46 | checked += 1 47 | if (vp_dis < tau) offer(HeapItem(vp, vp_dis)) 48 | if (vp_dis < th) { 49 | if (vp_dis - tau <= th) recursive_knn(left) 50 | if (vp_dis + tau >= th) recursive_knn(right) 51 | } else { 52 | if (vp_dis + tau >= th) recursive_knn(right) 53 | if (vp_dis - tau <= th) recursive_knn(left) 54 | } 55 | } 56 | } 57 | } 58 | recursive_knn(root) 59 | 60 | (pq.dequeueAll.map(x => (x.point, x.dis)).toArray.reverse, checked) 61 | } 62 | 63 | } 64 | 65 | object VPTree { 66 | def buildNode[T <: MetricObject: ClassTag](points: Array[T], leaf_capacity: Int): VPTreeNode[T] = { 67 | if (points.isEmpty) { 68 | null 69 | } else if (points.length < leaf_capacity) { 70 | VPTreeLeafNode(points) 71 | } else { 72 | val n = points.length 73 | val vp_id = Random.nextInt(n) 74 | val t = points(vp_id) 75 | points(vp_id) = points(0) 76 | points(0) = t 77 | val vp = points.head 78 | val ps_with_dis = points.slice(1, n).map(x => (vp.distance(x), x)).sortBy(_._1) 79 | val median = Math.ceil((n - 1) / 2.0).toInt - 1 80 | val threshold = ps_with_dis(median)._1 81 | VPTreeInternalNode(vp, threshold, 82 | buildNode(ps_with_dis.slice(0, median + 1).map(_._2), leaf_capacity), 83 | buildNode(ps_with_dis.slice(median + 1, n).map(_._2), leaf_capacity)) 84 | } 85 | } 86 | 87 | def apply[T <: MetricObject: ClassTag](points: Array[T], leaf_capacity: Int = 25): VPTree[T] = { 88 | VPTree(buildNode(points, leaf_capacity)) 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /src/main/scala/edu/utah/cs/index/VPTreeTest.scala: -------------------------------------------------------------------------------- 1 | package edu.utah.cs.index 2 | 3 | import edu.utah.cs.spatial.Point 4 | import edu.utah.cs.util.{BloomFilter, MetricObject} 5 | 6 | /** 7 | * Created by dongx on 2/3/17. 8 | */ 9 | object VPTreeTest { 10 | private case class VPPoint(data: Point, id: Int) extends MetricObject { 11 | override def distance(o: MetricObject): Double = { 12 | data.minDist(o.asInstanceOf[VPPoint].data) 13 | } 14 | } 15 | 16 | 17 | def main(args: Array[String]): Unit = { 18 | // val tree = VPTree((0 until 1000).map(x => VPPoint(Point(Array(x - 1, x + 1)), x + 1)).toArray) 19 | // tree.knn(VPPoint(Point(Array(3, 3)), 0), 10, 5)._1.foreach(println) 20 | val optimal_num_bits = BloomFilter.optimalNumBits(10000, 0.1) 21 | val optimal_num_hashes = BloomFilter.optimalNumHashes(10000, optimal_num_bits) 22 | println(optimal_num_bits, optimal_num_hashes) 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/main/scala/edu/utah/cs/index_bm/RTreeWithBMTest.scala: -------------------------------------------------------------------------------- 1 | package edu.utah.cs.index_bm 2 | 3 | import edu.utah.cs.spatial._ 4 | import edu.utah.cs.util._ 5 | import org.roaringbitmap.RoaringBitmap 6 | 7 | /** 8 | * Created by dongx on 10/7/16. 9 | */ 10 | object RTreeWithBMTest { 11 | def main(args: Array[String]) = { 12 | // val bm_meta = BitMapMeta(100) 13 | // BitMap.meta = bm_meta 14 | // val data = (0 until 100).map(x => (LineSegment(Point(Array(x - 1, x)), Point(Array(x, x))), x, x)).toArray 15 | // val rt = RTreeWithBM(data, 10, bm_meta) 16 | // val res = rt.circleRangeBF(LineSegment(Point(Array(2, 2)), Point(Array(1, 2))), 1000) 17 | // println(BitArray.count(res)) 18 | // println(BitArray.count(rt.root.bf)) 19 | // rt.root.bf.foreach(x => println(x.toBinaryString)) 20 | val bitmap1 = RoaringBitmap.bitmapOf(1, 2, 3, 4) 21 | val bitmap2 = RoaringBitmap.bitmapOf(2, 3, 6, 7) 22 | println(RoaringBitmap.andNot(bitmap1, bitmap2)) 23 | println(RoaringBitmap.andNot(bitmap2, bitmap1)) 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /src/main/scala/edu/utah/cs/partitioner/IDPartitioner.scala: -------------------------------------------------------------------------------- 1 | package edu.utah.cs.partitioner 2 | 3 | import org.apache.spark.Partitioner 4 | import org.apache.spark.rdd.{RDD, ShuffledRDD} 5 | 6 | 7 | /** 8 | * Created by dongx on 2/1/17. 9 | */ 10 | object IDPartition { 11 | def apply(origin: RDD[_ <: Product2[Int, Any]], n_part: Int) 12 | : RDD[_ <: Product2[Int, Any]] = { 13 | val part = new IDPartitioner(n_part) 14 | val shuffled = new ShuffledRDD[Int, Any, Any](origin, part) 15 | shuffled 16 | } 17 | } 18 | 19 | class IDPartitioner(n_part: Int) extends Partitioner { 20 | override def numPartitions: Int = n_part 21 | 22 | override def getPartition(key: Any): Int = { 23 | key.asInstanceOf[Int] 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /src/main/scala/edu/utah/cs/partitioner/STRMBRPartitioner.scala: -------------------------------------------------------------------------------- 1 | package edu.utah.cs.partitioner 2 | 3 | import edu.utah.cs.index.RTree 4 | import edu.utah.cs.spatial.{MBR, Point} 5 | import org.apache.spark.Partitioner 6 | import org.apache.spark.rdd.{RDD, ShuffledRDD} 7 | 8 | import scala.collection.mutable 9 | 10 | /** 11 | * Created by Dong Xie on 10/24/2016. 12 | */ 13 | object STRMBRPartition { 14 | def apply(origin: RDD[(MBR, Int)], est_partition: Int, 15 | sample_rate: Double, max_entries_per_node: Int): RDD[(MBR, Int)] = { 16 | val part = new STRMBRPartitioner(est_partition, sample_rate, max_entries_per_node, origin) 17 | val shuffled = new ShuffledRDD[MBR, Int, Int](origin, part) 18 | shuffled 19 | } 20 | } 21 | 22 | 23 | class STRMBRPartitioner(est_partition: Int, 24 | sample_rate: Double, 25 | max_entries_per_node: Int, 26 | rdd: RDD[_ <: Product2[MBR, Any]]) 27 | extends Partitioner { 28 | 29 | def numPartitions: Int = partitions 30 | 31 | private case class Bounds(min: Array[Double], max: Array[Double]) 32 | 33 | var (partBound, partitions) = { 34 | val data_bounds = { 35 | rdd.aggregate[Bounds](null)((bound, data) => { 36 | if (bound == null) { 37 | Bounds(data._1.low.coord, data._1.high.coord) 38 | } else { 39 | Bounds(bound.min.zip(data._1.low.coord).map(x => Math.min(x._1, x._2)), 40 | bound.max.zip(data._1.high.coord).map(x => Math.max(x._1, x._2))) 41 | } 42 | }, (left, right) => { 43 | if (left == null) right 44 | else if (right == null) left 45 | else { 46 | Bounds(left.min.zip(right.min).map(x => Math.min(x._1, x._2)), 47 | left.max.zip(right.max).map(x => Math.max(x._1, x._2))) 48 | } 49 | }) 50 | } 51 | 52 | val seed = System.currentTimeMillis() 53 | val sampled = rdd.sample(withReplacement = false, sample_rate, seed).map(_._1).collect() 54 | 55 | val dim = new Array[Int](2) 56 | var remaining = est_partition.toDouble 57 | for (i <- 0 until 2) { 58 | dim(i) = Math.ceil(Math.pow(remaining, 1.0 / (2 - i))).toInt 59 | remaining /= dim(i) 60 | } 61 | 62 | def recursiveGroupSegment(entries: Array[MBR], now_min: Array[Double], 63 | now_max: Array[Double], cur_dim: Int, until_dim: Int): Array[MBR] = { 64 | val len = entries.length.toDouble 65 | val grouped = entries.sortWith(_.centroid.coord(cur_dim) < _.centroid.coord(cur_dim)) 66 | .grouped(Math.ceil(len / dim(cur_dim)).toInt).toArray 67 | val flag = 1 << cur_dim 68 | var ans = mutable.ArrayBuffer[MBR]() 69 | if (cur_dim < until_dim) { 70 | for (i <- grouped.indices) { 71 | val cur_min = now_min 72 | val cur_max = now_max 73 | if (i == 0 && i == grouped.length - 1) { 74 | cur_min(cur_dim) = data_bounds.min(cur_dim) 75 | cur_max(cur_dim) = data_bounds.max(cur_dim) 76 | } else if (i == 0) { 77 | cur_min(cur_dim) = data_bounds.min(cur_dim) 78 | cur_max(cur_dim) = grouped(i + 1).head.centroid.coord(cur_dim) 79 | } else if (i == grouped.length - 1) { 80 | cur_min(cur_dim) = grouped(i).head.centroid.coord(cur_dim) 81 | cur_max(cur_dim) = data_bounds.max(cur_dim) 82 | } else { 83 | cur_min(cur_dim) = grouped(i).head.centroid.coord(cur_dim) 84 | cur_max(cur_dim) = grouped(i + 1).head.centroid.coord(cur_dim) 85 | } 86 | ans ++= recursiveGroupSegment(grouped(i), cur_min, cur_max, cur_dim + 1, until_dim) 87 | } 88 | ans.toArray 89 | } else { 90 | for (i <- grouped.indices) { 91 | if (i == 0 && i == grouped.length - 1) { 92 | now_min(cur_dim) = data_bounds.min(cur_dim) 93 | now_max(cur_dim) = data_bounds.max(cur_dim) 94 | } else if (i == 0) { 95 | now_min(cur_dim) = data_bounds.min(cur_dim) 96 | now_max(cur_dim) = grouped(i + 1).head.centroid.coord(cur_dim) 97 | } else if (i == grouped.length - 1) { 98 | now_min(cur_dim) = grouped(i).head.centroid.coord(cur_dim) 99 | now_max(cur_dim) = data_bounds.max(cur_dim) 100 | } else { 101 | now_min(cur_dim) = grouped(i).head.centroid.coord(cur_dim) 102 | now_max(cur_dim) = grouped(i + 1).head.centroid.coord(cur_dim) 103 | } 104 | ans += MBR(Point(now_min.clone()), Point(now_max.clone())) 105 | } 106 | ans.toArray 107 | } 108 | } 109 | 110 | val cur_min = new Array[Double](2) 111 | val cur_max = new Array[Double](2) 112 | val mbrs = recursiveGroupSegment(sampled, cur_min, cur_max, 0, 1) 113 | 114 | (mbrs.zipWithIndex, mbrs.length) 115 | } 116 | 117 | private val rt = RTree.applyMBR(partBound.map(x => (x._1, x._2, 1)), max_entries_per_node) 118 | 119 | def getPartition(key: Any): Int = { 120 | val k = key.asInstanceOf[MBR] 121 | 122 | rt.circleRange(k.centroid, 0.0).head._2 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /src/main/scala/edu/utah/cs/partitioner/STRSegPartitioner.scala: -------------------------------------------------------------------------------- 1 | package edu.utah.cs.partitioner 2 | 3 | import edu.utah.cs.spatial.{LineSegment, MBR, Point} 4 | import edu.utah.cs.trajectory.TrajMeta 5 | import edu.utah.cs.index.RTree 6 | import org.apache.spark.Partitioner 7 | import org.apache.spark.rdd.{RDD, ShuffledRDD} 8 | 9 | import scala.collection.mutable 10 | 11 | /** 12 | * Created by dongx on 8/30/16. 13 | * STRPartitioner for two-dimensional Line Segments 14 | */ 15 | 16 | object STRSegPartition { 17 | def apply(origin: RDD[(LineSegment, TrajMeta)], est_partition: Int, 18 | sample_rate: Double, max_entries_per_node: Int) 19 | : (RDD[(LineSegment, TrajMeta)], Array[(MBR, Int)]) = { 20 | val part = new STRSegPartitioner(est_partition, sample_rate, max_entries_per_node, origin) 21 | val shuffled = new ShuffledRDD[LineSegment, TrajMeta, TrajMeta](origin, part) 22 | (shuffled, part.partBound) 23 | } 24 | } 25 | 26 | 27 | class STRSegPartitioner(est_partition: Int, 28 | sample_rate: Double, 29 | max_entries_per_node: Int, 30 | rdd: RDD[_ <: Product2[LineSegment, Any]]) 31 | extends Partitioner { 32 | 33 | def numPartitions: Int = partitions 34 | 35 | private case class Bounds(min: Array[Double], max: Array[Double]) 36 | 37 | var (partBound, partitions) = { 38 | val data_bounds = { 39 | rdd.aggregate[Bounds](null)((bound, data) => { 40 | if (bound == null) { 41 | val tmp_mbr = data._1.getMBR 42 | Bounds(tmp_mbr.low.coord, tmp_mbr.high.coord) 43 | } else { 44 | val tmp_mbr = data._1.getMBR 45 | Bounds(bound.min.zip(tmp_mbr.low.coord).map(x => Math.min(x._1, x._2)), 46 | bound.max.zip(tmp_mbr.high.coord).map(x => Math.max(x._1, x._2))) 47 | } 48 | }, (left, right) => { 49 | if (left == null) right 50 | else if (right == null) left 51 | else { 52 | Bounds(left.min.zip(right.min).map(x => Math.min(x._1, x._2)), 53 | left.max.zip(right.max).map(x => Math.max(x._1, x._2))) 54 | } 55 | }) 56 | } 57 | 58 | val seed = System.currentTimeMillis() 59 | val sampled = rdd.sample(withReplacement = false, sample_rate, seed).map(_._1).collect() 60 | 61 | val dim = new Array[Int](2) 62 | var remaining = est_partition.toDouble 63 | for (i <- 0 until 2) { 64 | dim(i) = Math.ceil(Math.pow(remaining, 1.0 / (2 - i))).toInt 65 | remaining /= dim(i) 66 | } 67 | 68 | def recursiveGroupSegment(entries: Array[LineSegment], now_min: Array[Double], 69 | now_max: Array[Double], cur_dim: Int, until_dim: Int): Array[MBR] = { 70 | val len = entries.length.toDouble 71 | val grouped = entries.sortWith(_.centroid.coord(cur_dim) < _.centroid.coord(cur_dim)) 72 | .grouped(Math.ceil(len / dim(cur_dim)).toInt).toArray 73 | var ans = mutable.ArrayBuffer[MBR]() 74 | if (cur_dim < until_dim) { 75 | for (i <- grouped.indices) { 76 | val cur_min = now_min 77 | val cur_max = now_max 78 | if (i == 0 && i == grouped.length - 1) { 79 | cur_min(cur_dim) = data_bounds.min(cur_dim) 80 | cur_max(cur_dim) = data_bounds.max(cur_dim) 81 | } else if (i == 0) { 82 | cur_min(cur_dim) = data_bounds.min(cur_dim) 83 | cur_max(cur_dim) = grouped(i + 1).head.centroid.coord(cur_dim) 84 | } else if (i == grouped.length - 1) { 85 | cur_min(cur_dim) = grouped(i).head.centroid.coord(cur_dim) 86 | cur_max(cur_dim) = data_bounds.max(cur_dim) 87 | } else { 88 | cur_min(cur_dim) = grouped(i).head.centroid.coord(cur_dim) 89 | cur_max(cur_dim) = grouped(i + 1).head.centroid.coord(cur_dim) 90 | } 91 | ans ++= recursiveGroupSegment(grouped(i), cur_min, cur_max, cur_dim + 1, until_dim) 92 | } 93 | ans.toArray 94 | } else { 95 | for (i <- grouped.indices) { 96 | if (i == 0 && i == grouped.length - 1) { 97 | now_min(cur_dim) = data_bounds.min(cur_dim) 98 | now_max(cur_dim) = data_bounds.max(cur_dim) 99 | } else if (i == 0) { 100 | now_min(cur_dim) = data_bounds.min(cur_dim) 101 | now_max(cur_dim) = grouped(i + 1).head.centroid.coord(cur_dim) 102 | } else if (i == grouped.length - 1) { 103 | now_min(cur_dim) = grouped(i).head.centroid.coord(cur_dim) 104 | now_max(cur_dim) = data_bounds.max(cur_dim) 105 | } else { 106 | now_min(cur_dim) = grouped(i).head.centroid.coord(cur_dim) 107 | now_max(cur_dim) = grouped(i + 1).head.centroid.coord(cur_dim) 108 | } 109 | ans += MBR(Point(now_min.clone()), Point(now_max.clone())) 110 | } 111 | ans.toArray 112 | } 113 | } 114 | 115 | val cur_min = new Array[Double](2) 116 | val cur_max = new Array[Double](2) 117 | val mbrs = recursiveGroupSegment(sampled, cur_min, cur_max, 0, 1) 118 | 119 | (mbrs.zipWithIndex, mbrs.length) 120 | } 121 | 122 | private val rt = RTree.applyMBR(partBound.map(x => (x._1, x._2, 1)), max_entries_per_node) 123 | 124 | def getPartition(key: Any): Int = { 125 | val k = key.asInstanceOf[LineSegment] 126 | 127 | rt.circleRange(k.centroid, 0.0).head._2 128 | } 129 | } 130 | -------------------------------------------------------------------------------- /src/main/scala/edu/utah/cs/partitioner/STRTrajPartition.scala: -------------------------------------------------------------------------------- 1 | package edu.utah.cs.partitioner 2 | 3 | import edu.utah.cs.spatial.{LineSegment, MBR} 4 | import org.apache.spark.rdd.{RDD, ShuffledRDD} 5 | 6 | import scala.collection.mutable 7 | 8 | /** 9 | * Created by dongx on 1/16/2017. 10 | */ 11 | object STRTrajPartition { 12 | def apply(origin: RDD[(MBR, (Int, Array[LineSegment]))], est_partition: Int, 13 | sample_rate: Double, max_entries_per_node: Int) 14 | : RDD[(MBR, (Int, Array[LineSegment]))] = { 15 | val part = new STRMBRPartitioner(est_partition, sample_rate, max_entries_per_node, origin) 16 | val shuffled = new ShuffledRDD[MBR, (Int, Array[LineSegment]), (Int, Array[LineSegment])](origin, part) 17 | shuffled 18 | } 19 | } -------------------------------------------------------------------------------- /src/main/scala/edu/utah/cs/spatial/Circle.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016 by Simba Project 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package edu.utah.cs.spatial 18 | 19 | /** 20 | * Created by dong on 3/16/16. 21 | */ 22 | case class Circle(center: Point, radius: Double) extends Shape { 23 | override def intersects(other: Shape): Boolean = { 24 | other match { 25 | case p: Point => contains(p) 26 | case mbr: MBR => intersects(mbr) 27 | case cir: Circle => intersects(cir) 28 | case poly: Polygon => poly.intersects(this) 29 | case seg: LineSegment => seg.intersects(this) 30 | } 31 | } 32 | 33 | override def minDist(other: Shape): Double = { 34 | other match { 35 | case p: Point => minDist(p) 36 | case mbr: MBR => minDist(mbr) 37 | case cir: Circle => minDist(cir) 38 | case poly: Polygon => poly.minDist(this) 39 | case seg: LineSegment => seg.minDist(this) 40 | } 41 | } 42 | 43 | def minDist(other: Point): Double = { 44 | require(center.coord.length == other.coord.length) 45 | if (contains(other)) 0.0 46 | else other.minDist(center) - radius 47 | } 48 | 49 | def minDist(other: MBR): Double = { 50 | require(center.coord.length == other.low.coord.length) 51 | if (intersects(other)) 0.0 52 | else center.minDist(other) - radius 53 | } 54 | 55 | def minDist(other: Circle): Double = { 56 | require(center.coord.length == other.center.coord.length) 57 | if (intersects(other)) 0.0 58 | else center.minDist(other.center) - radius - other.radius 59 | } 60 | 61 | def contains(p: Point): Boolean = p.minDist(center) <= radius 62 | 63 | def intersects(other: MBR): Boolean = center.minDist(other) <= radius 64 | 65 | def intersects(other: Circle): Boolean = other.center.minDist(center) <= other.radius + radius 66 | 67 | def getMBR: MBR = new MBR(center.shift(-radius), center.shift(radius)) 68 | 69 | override def toString: String = "CIRCLE(" + center.toString + "," + radius + ")" 70 | } 71 | -------------------------------------------------------------------------------- /src/main/scala/edu/utah/cs/spatial/Dist.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016 by Simba Project 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package edu.utah.cs.spatial 18 | 19 | /** 20 | * Created by dong on 1/15/16. 21 | * Distance Utilities 22 | */ 23 | object Dist { 24 | def furthest(a: Point, b: MBR) : Double = { 25 | require(a.coord.length == b.low.coord.length) 26 | var ans = 0.0 27 | for (i <- a.coord.indices) { 28 | ans += Math.max((a.coord(i) - b.low.coord(i)) * (a.coord(i) - b.low.coord(i)), 29 | (a.coord(i) - b.high.coord(i)) * (a.coord(i) - b.high.coord(i))) 30 | } 31 | Math.sqrt(ans) 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/main/scala/edu/utah/cs/spatial/DistanceUtil.scala: -------------------------------------------------------------------------------- 1 | package edu.utah.cs.spatial 2 | 3 | import org.apache.commons.math3.util.FastMath 4 | 5 | object DistanceUtil { 6 | def computeGPSCoordDis(lat0: Double, lon0: Double, lat1: Double, lon1: Double): Double = { 7 | val R = 6371e3 8 | val phi0 = lat0.toRadians 9 | val phi1 = lat1.toRadians 10 | val delta_phi = (lat1 - lat0).toRadians 11 | val delta_lambda = (lon1 - lon0).toRadians 12 | 13 | val a = FastMath.sin(delta_phi / 2) * FastMath.sin(delta_phi / 2) + 14 | FastMath.cos(phi0) * FastMath.cos(phi1) * 15 | FastMath.sin(delta_lambda / 2) * Math.sin(delta_lambda / 2) 16 | val c = 2 * Math.atan2(Math.sqrt(a), Math.sqrt(1-a)) 17 | 18 | R * c 19 | } 20 | } -------------------------------------------------------------------------------- /src/main/scala/edu/utah/cs/spatial/LineSegment.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016 by Simba Project 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | 18 | package edu.utah.cs.spatial 19 | 20 | /** 21 | * Created by dongx on 5/31/16. 22 | * This is a simple implementation for Line Segment. 23 | * Note: Currently, we only support 2D line segments. 24 | */ 25 | case class LineSegment(start: Point, end: Point) extends Shape { 26 | require(start.coord.length == 2 && end.coord.length == 2) 27 | 28 | val centroid = new Point(start.coord.zip(end.coord).map(x => (x._1 + x._2) / 2.0)) 29 | 30 | override def intersects(other: Shape): Boolean = { 31 | other match { 32 | case p: Point => contains(p) 33 | case mbr: MBR => intersects(mbr) 34 | case cir: Circle => intersects(cir) 35 | case poly: Polygon => poly.intersects(this) 36 | case seg: LineSegment => intersects(seg) 37 | } 38 | } 39 | 40 | override def minDist(other: Shape): Double = { 41 | other match { 42 | case p: Point => minDist(p) 43 | case mbr: MBR => minDist(mbr) 44 | case cir: Circle => minDist(cir) 45 | case poly: Polygon => poly.minDist(this) 46 | case seg: LineSegment => minDist(seg) 47 | } 48 | } 49 | 50 | def matchDist(other: LineSegment): Double = { 51 | Math.max(Math.max(start.minDist(other), end.minDist(other)), 52 | Math.max(other.start.minDist(this), other.end.minDist(this))) 53 | } 54 | 55 | private def orientation(p: Point, q: Point, r: Point): Int = { 56 | val cross = (q.coord(1) - p.coord(1)) * (r.coord(0) - q.coord(0)) - 57 | (q.coord(0) - p.coord(0)) * (r.coord(1) - q.coord(1)) 58 | if (cross == 0) 0 59 | else if (cross > 0) 1 60 | else -1 61 | } 62 | 63 | private def withinBox(check: Point, start: Point, end: Point): Boolean = { 64 | if (check.coord(0) >= Math.min(start.coord(0), end.coord(0)) && 65 | check.coord(0) <= Math.max(start.coord(0), end.coord(0)) && 66 | check.coord(1) >= Math.min(start.coord(1), end.coord(1)) && 67 | check.coord(1) <= Math.max(start.coord(1), end.coord(1))) { 68 | true 69 | } else false 70 | } 71 | 72 | def intersects(l: LineSegment): Boolean = intersects(l.start, l.end) 73 | 74 | private def intersects(p: Point, q: Point): Boolean = { 75 | val o1 = orientation(start, end, p) 76 | val o2 = orientation(start, end, q) 77 | val o3 = orientation(p, q, start) 78 | val o4 = orientation(p, q, end) 79 | if (o1 != o2 && o3 != o4) true 80 | else if (o1 == 0 && withinBox(p, start, end)) true 81 | else if (o2 == 0 && withinBox(q, start, end)) true 82 | else if (o3 == 0 && withinBox(start, p, q)) true 83 | else if (o4 == 0 && withinBox(end, p, q)) true 84 | else false 85 | } 86 | 87 | def contains(l: Point): Boolean = orientation(start, l, end) == 0 && withinBox(l, start, end) 88 | 89 | def intersects(cir: Circle): Boolean = { 90 | minDist(cir.center) <= cir.radius 91 | } 92 | 93 | def intersects(mbr: MBR): Boolean = { 94 | assert(mbr.low.coord.length == 2) 95 | if (mbr.contains(start) && mbr.contains(end)) true 96 | else if (intersects(mbr.low, Point(Array(mbr.high.coord(0), mbr.low.coord(1))))) true 97 | else if (intersects(mbr.low, Point(Array(mbr.low.coord(0), mbr.high.coord(1))))) true 98 | else if (intersects(mbr.high, Point(Array(mbr.high.coord(0), mbr.low.coord(1))))) true 99 | else if (intersects(mbr.high, Point(Array(mbr.low.coord(0), mbr.high.coord(1))))) true 100 | else false 101 | } 102 | 103 | def minDist(p: Point): Double = { 104 | require(p.coord.length == 2) 105 | val len = start.minDist(end) 106 | if (len == 0) return p.minDist(start) 107 | var t = ((p.coord(0) - start.coord(0)) * (end.coord(0) - start.coord(0)) 108 | + (p.coord(1) - start.coord(1)) * (end.coord(1) - start.coord(1))) / (len * len) 109 | t = Math.max(0, Math.min(1, t)) 110 | val proj_x = start.coord(0) + t * (end.coord(0) - start.coord(0)) 111 | val proj_y = start.coord(1) + t * (end.coord(1) - start.coord(1)) 112 | p.minDist(Point(Array(proj_x, proj_y))) 113 | } 114 | 115 | def minDist(cir: Circle): Double = { 116 | val centeral_dis = minDist(cir.center) 117 | if (centeral_dis <= cir.radius) 0.0 118 | else centeral_dis - cir.radius 119 | } 120 | 121 | def minDist(l: LineSegment): Double = { 122 | if (intersects(l)) 0.0 123 | else { 124 | Math.min(Math.min(minDist(l.start), minDist(l.end)), 125 | Math.min(l.minDist(start), l.minDist(end))) 126 | } 127 | } 128 | 129 | def minDist(mbr: MBR): Double = { 130 | if (mbr.contains(start) && mbr.contains(end)) return 0.0 131 | val s1 = LineSegment(mbr.low, Point(Array(mbr.low.coord(0), mbr.high.coord(1)))) 132 | val s2 = LineSegment(mbr.low, Point(Array(mbr.high.coord(0), mbr.low.coord(1)))) 133 | val s3 = LineSegment(mbr.high, Point(Array(mbr.low.coord(0), mbr.high.coord(1)))) 134 | val s4 = LineSegment(mbr.high, Point(Array(mbr.high.coord(0), mbr.low.coord(1)))) 135 | Math.min(Math.min(minDist(s1), minDist(s2)), Math.min(minDist(s3), minDist(s4))) 136 | } 137 | 138 | def cover(mbr: MBR, r: Double): Boolean = { 139 | if (minDist(mbr.low) > r) false 140 | else if (minDist(mbr.high) > r) false 141 | else if (minDist(Point(Array(mbr.low.coord(0), mbr.high.coord(1)))) > r) false 142 | else if (minDist(Point(Array(mbr.high.coord(0), mbr.low.coord(1)))) > r) false 143 | else true 144 | } 145 | 146 | override def getMBR: MBR = { 147 | val (low_x, high_x) = if (start.coord(0) < end.coord(0)) { 148 | (start.coord(0), end.coord(0)) 149 | } else { 150 | (end.coord(0), start.coord(0)) 151 | } 152 | 153 | val (low_y, high_y) = if (start.coord(1) < end.coord(1)) { 154 | (start.coord(1), end.coord(1)) 155 | } else { 156 | (end.coord(1), start.coord(1)) 157 | } 158 | 159 | MBR(Point(Array(low_x, low_y)), Point(Array(high_x, high_y))) 160 | } 161 | 162 | def length: Double = start.minDist(end) 163 | 164 | override def toString: String = "SEG(" + start.toString + "->" + end.toString + ")" 165 | 166 | def toTSV: String = start.coord(0) + "\t" + start.coord(1) +"\t" + end.coord(0) + "\t" + end.coord(1) 167 | } 168 | -------------------------------------------------------------------------------- /src/main/scala/edu/utah/cs/spatial/LineString.scala: -------------------------------------------------------------------------------- 1 | package edu.utah.cs.spatial 2 | 3 | /** 4 | * Created by dongx on 1/16/2017. 5 | */ 6 | case class LineString(segs: Array[LineSegment]) extends Shape { 7 | private val mbr: MBR = segs.foldLeft(segs(0).getMBR)((now , seg) => now.union(seg.getMBR)) 8 | 9 | override def minDist(other: Shape): Double = segs.map(x => x.minDist(other)).min 10 | 11 | override def intersects(other: Shape): Boolean = segs.exists(x => x.intersects(other)) 12 | 13 | def hausdorff(other: LineString): Double = 14 | Math.max(segs.map(now_x => other.segs.map(now_y => now_x.minDist(now_y)).min).max, 15 | other.segs.map(now_x => segs.map(now_y => now_x.minDist(now_y)).min).max) 16 | 17 | override def getMBR: MBR = mbr 18 | } 19 | -------------------------------------------------------------------------------- /src/main/scala/edu/utah/cs/spatial/MBR.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016 by Simba Project 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package edu.utah.cs.spatial 18 | 19 | /** 20 | * Created by dong on 1/15/16. 21 | * Multi-Dimensional Minimum Bounding Box 22 | */ 23 | case class MBR(low: Point, high: Point) extends Shape { 24 | require(low.coord.length == high.coord.length) 25 | require(low <= high) 26 | 27 | override def intersects(other: Shape): Boolean = { 28 | other match { 29 | case p: Point => contains(p) 30 | case mbr: MBR => intersects(mbr) 31 | case cir: Circle => cir.intersects(this) 32 | case poly: Polygon => poly.intersects(this) 33 | case seg: LineSegment => seg.intersects(this) 34 | } 35 | } 36 | 37 | override def minDist(other: Shape): Double = { 38 | other match { 39 | case p: Point => minDist(p) 40 | case mbr: MBR => minDist(mbr) 41 | case cir: Circle => cir.minDist(this) 42 | case poly: Polygon => poly.minDist(this) 43 | case seg: LineSegment => seg.minDist(this) 44 | } 45 | } 46 | 47 | def this(low_x: Double, low_y: Double, high_x: Double, high_y: Double) { 48 | this(Point(Array(low_x, low_y)), Point(Array(high_x, high_y))) 49 | } 50 | 51 | val centroid = Point(low.coord.zip(high.coord).map(x => (x._1 + x._2) / 2.0)) 52 | 53 | def union(other: MBR): MBR = { 54 | val new_low = low.coord.zip(other.low.coord).map(x => Math.min(x._1, x._2)) 55 | val new_high = high.coord.zip(other.high.coord).map(x => Math.max(x._1, x._2)) 56 | MBR(Point(new_low), Point(new_high)) 57 | } 58 | 59 | def intersects(other: MBR): Boolean = { 60 | require(low.coord.length == other.low.coord.length) 61 | for (i <- low.coord.indices) 62 | if (low.coord(i) > other.high.coord(i) || high.coord(i) < other.low.coord(i)) { 63 | return false 64 | } 65 | true 66 | } 67 | 68 | def contains(p: Point): Boolean = { 69 | require(low.coord.length == p.coord.length) 70 | for (i <- p.coord.indices) 71 | if (low.coord(i) > p.coord(i) || high.coord(i) < p.coord(i)) { 72 | return false 73 | } 74 | true 75 | } 76 | 77 | def minDist(p: Point): Double = { 78 | require(low.coord.length == p.coord.length) 79 | var ans = 0.0 80 | for (i <- p.coord.indices) { 81 | if (p.coord(i) < low.coord(i)) { 82 | ans += (low.coord(i) - p.coord(i)) * (low.coord(i) - p.coord(i)) 83 | } else if (p.coord(i) > high.coord(i)) { 84 | ans += (p.coord(i) - high.coord(i)) * (p.coord(i) - high.coord(i)) 85 | } 86 | } 87 | Math.sqrt(ans) 88 | } 89 | 90 | def maxDist(p: Point): Double = { 91 | require(low.coord.length == p.coord.length) 92 | var ans = 0.0 93 | for (i <- p.coord.indices) { 94 | ans += Math.max((p.coord(i) - low.coord(i)) * (p.coord(i) - low.coord(i)), 95 | (p.coord(i) - high.coord(i)) * (p.coord(i) - high.coord(i))) 96 | } 97 | Math.sqrt(ans) 98 | } 99 | 100 | def minDist(other: MBR): Double = { 101 | require(low.coord.length == other.low.coord.length) 102 | var ans = 0.0 103 | for (i <- low.coord.indices) { 104 | var x = 0.0 105 | if (other.high.coord(i) < low.coord(i)) { 106 | x = Math.abs(other.high.coord(i) - low.coord(i)) 107 | } else if (high.coord(i) < other.low.coord(i)) { 108 | x = Math.abs(other.low.coord(i) - high.coord(i)) 109 | } 110 | ans += x * x 111 | } 112 | Math.sqrt(ans) 113 | } 114 | 115 | def area: Double = low.coord.zip(high.coord).map(x => x._2 - x._1).product 116 | 117 | def calcRatio(query: MBR): Double = { 118 | val intersect_low = low.coord.zip(query.low.coord).map(x => Math.max(x._1, x._2)) 119 | val intersect_high = high.coord.zip(query.high.coord).map(x => Math.min(x._1, x._2)) 120 | val diff_intersect = intersect_low.zip(intersect_high).map(x => x._2 - x._1) 121 | if (diff_intersect.forall(_ > 0)) 1.0 * diff_intersect.product / area 122 | else 0.0 123 | } 124 | 125 | override def toString: String = "MBR(" + low.toString + "," + high.toString + ")" 126 | 127 | def getMBR: MBR = this.copy() 128 | } 129 | -------------------------------------------------------------------------------- /src/main/scala/edu/utah/cs/spatial/Point.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016 by Simba Project 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package edu.utah.cs.spatial 18 | 19 | /** 20 | * Created by dong on 1/15/16. 21 | * Multi-Dimensional Point 22 | */ 23 | final case class Point(coord: Array[Double]) extends Shape { 24 | def this() = this(Array()) 25 | 26 | override def intersects(other: Shape): Boolean = { 27 | other match { 28 | case p: Point => p == this 29 | case mbr: MBR => mbr.contains(this) 30 | case cir: Circle => cir.contains(this) 31 | case poly: Polygon => poly.contains(this) 32 | case seg: LineSegment => seg.contains(this) 33 | } 34 | } 35 | 36 | override def minDist(other: Shape): Double = { 37 | other match { 38 | case p: Point => minDist(p) 39 | case mbr: MBR => mbr.minDist(this) 40 | case cir: Circle => cir.minDist(this) 41 | case poly: Polygon => poly.minDist(this) 42 | case seg: LineSegment => seg.minDist(this) 43 | } 44 | } 45 | 46 | def minDist(other: Point): Double = { 47 | require(coord.length == other.coord.length) 48 | var ans = 0.0 49 | for (i <- coord.indices) 50 | ans += (coord(i) - other.coord(i)) * (coord(i) - other.coord(i)) 51 | Math.sqrt(ans) 52 | } 53 | 54 | def ==(other: Point): Boolean = other match { 55 | case p: Point => 56 | if (p.coord.length != coord.length) false 57 | else { 58 | for (i <- coord.indices) 59 | if (coord(i) != p.coord(i)) return false 60 | true 61 | } 62 | case _ => false 63 | } 64 | 65 | def <=(other: Point): Boolean = { 66 | for (i <- coord.indices) 67 | if (coord(i) > other.coord(i)) return false 68 | true 69 | } 70 | 71 | def shift(d: Double): Point = Point(coord.map(x => x + d)) 72 | 73 | override def toString: String = { 74 | var s = "POINT(" 75 | s += coord(0).toString 76 | for (i <- 1 until coord.length) s += "," + coord(i) 77 | s + ")" 78 | } 79 | 80 | def getMBR: MBR = new MBR(this, this) 81 | } 82 | -------------------------------------------------------------------------------- /src/main/scala/edu/utah/cs/spatial/Polygon.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016 by Simba Project 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package edu.utah.cs.spatial 18 | 19 | import com.vividsolutions.jts.geom.{Coordinate, Envelope, GeometryFactory, Polygon => JTSPolygon} 20 | import com.vividsolutions.jts.io.{WKBReader, WKBWriter, WKTWriter} 21 | 22 | /** 23 | * Created by Dong Xie on 3/16/2016. 24 | * Light wraper of JTS Polygon 25 | * Note: Only support up to 2 dimension 26 | */ 27 | case class Polygon(content: JTSPolygon) extends Shape { 28 | def this() = { 29 | this(null) 30 | } 31 | 32 | val gf = new GeometryFactory() 33 | 34 | override def minDist(other: Shape): Double = { 35 | other match { 36 | case p: Point => minDist(p) 37 | case mbr: MBR => minDist(mbr) 38 | case cir: Circle => minDist(cir) 39 | case poly: Polygon => minDist(poly) 40 | } 41 | } 42 | 43 | override def intersects(other: Shape): Boolean = { 44 | other match { 45 | case p: Point => contains(p) 46 | case mbr: MBR => intersects(mbr) 47 | case cir: Circle => intersects(cir) 48 | case poly: Polygon => intersects(poly) 49 | } 50 | } 51 | 52 | def contains(p: Point): Boolean = { 53 | require(p.coord.length == 2) 54 | content.contains(gf.createPoint(new Coordinate(p.coord(0), p.coord(1)))) 55 | } 56 | 57 | def intersects(mbr: MBR): Boolean = { 58 | require(mbr.low.coord.length == 2) 59 | val low = new Coordinate(mbr.low.coord(0), mbr.low.coord(1)) 60 | val high = new Coordinate(mbr.high.coord(0), mbr.high.coord(1)) 61 | content.intersects(gf.toGeometry(new Envelope(low, high))) 62 | } 63 | 64 | def intersects(cir: Circle): Boolean = minDist(cir.center) <= cir.radius 65 | 66 | def intersects(poly: Polygon): Boolean = content.intersects(poly.content) 67 | 68 | def intersects(seg: LineSegment): Boolean = { 69 | val start = new Coordinate(seg.start.coord(0), seg.start.coord(1)) 70 | val end = new Coordinate(seg.end.coord(0), seg.end.coord(1)) 71 | content.intersects(gf.createLineString(Array(start, end))) 72 | } 73 | 74 | def minDist(p: Point): Double = { 75 | require(p.coord.length == 2) 76 | content.distance(gf.createPoint(new Coordinate(p.coord(0), p.coord(1)))) 77 | } 78 | 79 | def minDist(mbr: MBR): Double = { 80 | require(mbr.low.coord.length == 2) 81 | val low = new Coordinate(mbr.low.coord(0), mbr.low.coord(1)) 82 | val high = new Coordinate(mbr.high.coord(0), mbr.high.coord(1)) 83 | content.distance(gf.toGeometry(new Envelope(low, high))) 84 | } 85 | 86 | def minDist(cir: Circle): Double = { 87 | val res = minDist(cir.center) - cir.radius 88 | if (res <= 0) 0 89 | else res 90 | } 91 | 92 | 93 | def minDist(poly: Polygon): Double = content.distance(poly.content) 94 | 95 | def minDist(seg: LineSegment): Double = { 96 | val start = new Coordinate(seg.start.coord(0), seg.start.coord(1)) 97 | val end = new Coordinate(seg.end.coord(0), seg.end.coord(1)) 98 | content.distance(gf.createLineString(Array(start, end))) 99 | } 100 | 101 | override def toString: String = new WKTWriter().write(content) 102 | def toWKB: Array[Byte] = new WKBWriter().write(content) 103 | 104 | def getMBR: MBR = { 105 | val envelope = content.getEnvelopeInternal 106 | new MBR(envelope.getMinX, envelope.getMinY, envelope.getMaxX, envelope.getMaxY) 107 | } 108 | } 109 | 110 | object Polygon { 111 | def apply(points: Array[Point]): Polygon = { 112 | require(points.length > 2 && points(0).coord.length == 2) 113 | val gf = new GeometryFactory() 114 | Polygon(gf.createPolygon(points.map(x => new Coordinate(x.coord(0), x.coord(1))))) 115 | } 116 | def fromJTSPolygon(polygon: JTSPolygon): Polygon = new Polygon(polygon) 117 | def fromWKB(bytes: Array[Byte]): Polygon = 118 | new Polygon(new WKBReader().read(bytes).asInstanceOf[JTSPolygon]) 119 | } 120 | -------------------------------------------------------------------------------- /src/main/scala/edu/utah/cs/spatial/Shape.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016 by Simba Project 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package edu.utah.cs.spatial 18 | 19 | import com.vividsolutions.jts.geom.{Geometry, Polygon => JTSPolygon} 20 | 21 | /** 22 | * Created by dong on 3/16/16. 23 | */ 24 | abstract class Shape extends Serializable { 25 | def minDist(other: Shape): Double 26 | 27 | def intersects(other: Shape): Boolean 28 | 29 | def getMBR: MBR 30 | } 31 | 32 | object Shape { 33 | final def apply(g: Geometry): Shape = g match { 34 | case jtsPolygon : JTSPolygon => new Polygon(jtsPolygon) 35 | case _ => null 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/main/scala/edu/utah/cs/spatial/ZValue.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016 by Simba Project 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package edu.utah.cs.spatial 18 | 19 | /** 20 | * Created by dong on 1/15/16. 21 | * Utilities for Z-Value Curves 22 | */ 23 | object ZValue { 24 | def paddingBinaryBits(source: Int, digits: Int): String = { 25 | val pd_length = digits - source.toBinaryString.length 26 | "0" * pd_length + source.toBinaryString 27 | } 28 | 29 | // TODO shift Long to BitInt for supporting bigger Z-Values 30 | def apply(point: Array[Int]): Long = { 31 | var maxBit = 0 32 | for (i <- point.indices) 33 | if (point(i).toBinaryString.length > maxBit) { 34 | maxBit = point(i).toBinaryString.length 35 | } 36 | 37 | var ans = "" 38 | val pointStrs = point.map(x => paddingBinaryBits(x, maxBit)) 39 | 40 | for (i <- 0 until maxBit) 41 | for (j <- point.indices) 42 | ans += pointStrs(j)(i) 43 | 44 | java.lang.Long.parseLong(ans, 2) 45 | } 46 | 47 | def unapply(value: Long, dimension: Int): Option[Array[Int]] = { 48 | val ans = new Array[Int](dimension) 49 | val binaryZValue = value.toBinaryString 50 | var currentBit = binaryZValue.length - 1 51 | var shiftBase = 1 52 | while (currentBit >= 0) { 53 | for (i <- 0 until dimension) 54 | if (currentBit - dimension + 1 + i >= 0) { 55 | ans(i) += shiftBase * binaryZValue(currentBit - dimension + 1 + i).toString.toInt 56 | } 57 | 58 | currentBit -= dimension 59 | shiftBase *= 2 60 | } 61 | Some(ans) 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/main/scala/edu/utah/cs/trajectory/BFDISolution.scala: -------------------------------------------------------------------------------- 1 | package edu.utah.cs.trajectory 2 | import edu.utah.cs.index.RTree 3 | import edu.utah.cs.index_bf.RTreeWithBF 4 | import edu.utah.cs.partitioner.{STRSegPartition, STRTrajPartition} 5 | import edu.utah.cs.spatial.{LineSegment, MBR, Point} 6 | import edu.utah.cs.util._ 7 | import org.apache.spark.rdd.PartitionPruningRDD 8 | import org.apache.spark.storage.StorageLevel 9 | import org.apache.spark.{SparkConf, SparkContext} 10 | 11 | import scala.collection.mutable 12 | import scala.io.Source 13 | 14 | /** 15 | * Created by dongx on 9/6/16. 16 | * Line Segment Trajectory Storage 17 | */ 18 | object BFDISolution { 19 | final val max_entries_per_node = 25 20 | final val k = 10 21 | final val c = 5 22 | 23 | private class ResultOrdering extends Ordering[(Double, Int)] { 24 | override def compare(x: (Double, Int), y: (Double, Int)): Int = x._1.compare(y._1) 25 | } 26 | 27 | def getMBR(x: Array[LineSegment]): MBR = { 28 | val pts = x.flatMap(p => Array(p.start, p.end)) 29 | var maxx = Double.MinValue 30 | var maxy = Double.MinValue 31 | var minx = Double.MaxValue 32 | var miny = Double.MaxValue 33 | pts.foreach(x => { 34 | maxx = Math.max(x.coord(0), maxx) 35 | maxy = Math.max(x.coord(1), maxy) 36 | minx = Math.min(x.coord(0), minx) 37 | miny = Math.min(x.coord(1), miny) 38 | }) 39 | MBR(Point(Array(minx, miny)), Point(Array(maxx, maxy))) 40 | } 41 | 42 | def main(args: Array[String]): Unit = { 43 | val sparkConf = new SparkConf().setAppName("BFDISolution").set("spark.locality.wait", "0") 44 | .set("spark.driver.maxResultSize", "4g") 45 | val sc = new SparkContext(sparkConf) 46 | 47 | if (args.length != 2) { 48 | println("usage: BloomFilterSolution ") 49 | System.exit(1) 50 | } 51 | 52 | Thread.sleep(3000) 53 | 54 | val query_traj_filename = args(0) 55 | val traj_data_filename = args(1) 56 | 57 | val start1 = System.currentTimeMillis() 58 | 59 | val dataRDD = sc.textFile(traj_data_filename) 60 | .map(x => x.split('\t')) 61 | .map(x => (LineSegment(Point(Array(x(1).toDouble, x(2).toDouble)), 62 | Point(Array(x(3).toDouble, x(4).toDouble))), 63 | TrajMeta(x(0).toInt, x(5).toInt))) 64 | 65 | val trajs = sc.textFile(traj_data_filename).mapPartitions(iter => { 66 | iter.map(x => { 67 | val splitted = x.split("\t") 68 | (splitted(0).toInt, 69 | LineSegment(Point(Array(splitted(1).toDouble, splitted(2).toDouble)), 70 | Point(Array(splitted(3).toDouble, splitted(4).toDouble)))) 71 | }).toArray.groupBy(_._1).map(now => { 72 | val cur_traj = now._2.sortBy(_._1).map(_._2) 73 | (getMBR(cur_traj), (now._1, cur_traj)) 74 | }).iterator 75 | }) 76 | 77 | val part_traj = STRTrajPartition(trajs, dataRDD.partitions.length, 0.01, max_entries_per_node) 78 | .persist(StorageLevel.MEMORY_AND_DISK_SER) 79 | 80 | println(part_traj.partitions.length) 81 | 82 | val traj_stat = part_traj.mapPartitions(iter => { 83 | Array(iter.aggregate[(MBR, Int)]((null, 0))((res, now) => { 84 | if (res._1 == null) (now._1, 1) 85 | else (res._1.union(now._1), res._2 + 1) 86 | }, (left, right) => { 87 | if (left._1 == null) right 88 | else if (left._1 == null) left 89 | else (left._1.union(right._1), left._2 + right._2) 90 | })).iterator 91 | }).collect() 92 | val traj_global_rtree = 93 | RTree.applyMBR(traj_stat.zipWithIndex.map(x => (x._1._1, x._2, x._1._2)), max_entries_per_node) 94 | 95 | val optimal_num_bits = BloomFilter.optimalNumBits(10000, 0.1) 96 | val optimal_num_hashes = BloomFilter.optimalNumHashes(10000, optimal_num_bits) 97 | val bf_meta = BloomFilterMeta(optimal_num_bits, optimal_num_hashes) 98 | val bc_bf_meta = sc.broadcast(bf_meta) 99 | BloomFilter.meta = bf_meta 100 | 101 | val (partitioned_rdd, part_mbrs) = STRSegPartition(dataRDD, dataRDD.partitions.length, 0.01, max_entries_per_node) 102 | 103 | val indexed_seg_rdd_with_traj_id = partitioned_rdd.mapPartitions(iter => { 104 | BloomFilter.meta = bc_bf_meta.value 105 | val data = iter.toArray 106 | var index: RTreeWithBF = null 107 | if (data.length > 0) { 108 | index = RTreeWithBF(data.map(x => (x._1, x._2.traj_id)).zipWithIndex.map(x => (x._1._1, x._2, x._1._2)), 109 | max_entries_per_node, bc_bf_meta.value) 110 | } 111 | Iterator((data.map(_._2.traj_id).distinct, index)) 112 | }) 113 | val indexed_seg_rdd = indexed_seg_rdd_with_traj_id.map(_._2).persist(StorageLevel.MEMORY_AND_DISK_SER) 114 | indexed_seg_rdd.count() 115 | val stat = indexed_seg_rdd_with_traj_id 116 | .mapPartitions(iter => iter.map(x => (x._2.root.m_mbr, x._2.root.size, x._1))).collect() 117 | 118 | val global_rtree = RTree.applyMBR(stat.zipWithIndex.map(x => (x._1._1, x._2, x._1._2.toInt)), max_entries_per_node) 119 | 120 | val end1 = System.currentTimeMillis() 121 | println("------------------------------------------------------------") 122 | println("Time to build indexes: " + (end1 - start1) / 1000.0) 123 | println("------------------------------------------------------------") 124 | 125 | 126 | val query_traj_file = Source.fromFile(query_traj_filename) 127 | val queries = query_traj_file.getLines().map { line => 128 | val splitted = line.split('\t') 129 | (splitted(0).toInt, LineSegment(Point(Array(splitted(1).toDouble, splitted(2).toDouble)), 130 | Point(Array(splitted(3).toDouble, splitted(4).toDouble)))) 131 | }.toArray.groupBy(_._1).map(x => x._2.map(_._2)) 132 | 133 | var tot_time = 0.0 134 | queries.foreach(query_traj => { 135 | val start2 = System.currentTimeMillis() 136 | val bc_query = sc.broadcast(query_traj) 137 | val global_intersect = global_rtree.circleRange(query_traj, 0.0) 138 | val global_intersect_mbrs = global_intersect.map(_._1.asInstanceOf[MBR]) 139 | val global_intersect_set = global_intersect.map(_._2).toSet 140 | 141 | val sample_base = stat.zipWithIndex.filter(x => global_intersect_set.contains(x._2)).flatMap(_._1._3) 142 | 143 | val cards = sample_base.length 144 | val rnd = scala.util.Random 145 | val set = mutable.HashSet[Int]() 146 | val samples = mutable.HashSet[Int]() 147 | val n_samples = c * k 148 | for (i <- 0 until n_samples) { 149 | var x = rnd.nextInt(cards) 150 | while (set.contains(x)) x = rnd.nextInt(cards) 151 | set += x 152 | samples += sample_base(x) 153 | } 154 | 155 | val bc_samples = sc.broadcast(samples.toSet) 156 | val pruning_bound_filter = traj_global_rtree.circleRange(global_intersect_mbrs, 0.0).map(_._2).toSet 157 | val pruning_bound = new PartitionPruningRDD(part_traj, pruning_bound_filter.contains) 158 | .filter(x => bc_samples.value.contains(x._2._1)) 159 | .repartition(Math.min(samples.size, sc.defaultParallelism)) 160 | .map(x => Trajectory.discreteFrechetDistance(bc_query.value, x._2._2)) 161 | .takeOrdered(k).last 162 | val end2 = System.currentTimeMillis() 163 | println("Time to calculate pruning bound: " + (end2 - start2) / 1000.0) 164 | println("The pruning bound is: " + pruning_bound) 165 | 166 | val start3 = System.currentTimeMillis() 167 | val global_prune = global_rtree.circleRange(query_traj, pruning_bound) 168 | val global_prune_mbrs = global_prune.map(_._1.asInstanceOf[MBR]) 169 | val global_prune_set = global_prune.map(_._2).toSet 170 | 171 | val pruned_rdd = new PartitionPruningRDD(indexed_seg_rdd, global_prune_set.contains) 172 | 173 | val bc_prunbound = sc.broadcast(pruning_bound) 174 | val saved_trajs = pruned_rdd.map(part => { 175 | BloomFilter.meta = bc_bf_meta.value 176 | part.circleRangeBF(bc_query.value, bc_prunbound.value) 177 | }).reduce((a, b) => BitArray.or(a, b)) 178 | 179 | val end3 = System.currentTimeMillis() 180 | 181 | println("Time to calculate all saved traj_ids: " + (end3 - start3) / 1000.0) 182 | 183 | val start4 = System.currentTimeMillis() 184 | val bc_saved_traj = sc.broadcast(saved_trajs) 185 | val final_prune_set = traj_global_rtree.circleRange(global_prune.map(_._1.asInstanceOf[MBR]), 0.0).map(_._2).toSet 186 | val final_filtered = new PartitionPruningRDD(part_traj, final_prune_set.contains) 187 | .mapPartitions(iter => { 188 | BloomFilter.meta = bc_bf_meta.value 189 | iter.filter(now => BloomFilter.mayContains(bc_saved_traj.value, now._2._1)) 190 | }) 191 | 192 | val res = final_filtered.repartition(sc.defaultParallelism) 193 | .mapPartitions(iter => iter.map(x =>(Trajectory.discreteFrechetDistance(x._2._2, bc_query.value), x._2._1))) 194 | .takeOrdered(k)(new ResultOrdering) 195 | 196 | val end4 = System.currentTimeMillis() 197 | tot_time += (end4 - start2) / 1000.0 198 | println("Time to finish the final filter: " + (end4 - start4) / 1000.0) 199 | println("# of distance calculated: " + (c * k + final_filtered.count())) 200 | println("Total Latency: " + ((end4 - start2) / 1000.0)) 201 | println("The results show as below:") 202 | res.foreach(println) 203 | println("------------------------------------------------------------") 204 | }) 205 | println("Average Latency for c = " + c + " is : " + (tot_time / 100.0)) 206 | println("===================================================") 207 | 208 | sc.stop() 209 | } 210 | } 211 | 212 | -------------------------------------------------------------------------------- /src/main/scala/edu/utah/cs/trajectory/BaseLine.scala: -------------------------------------------------------------------------------- 1 | package edu.utah.cs.trajectory 2 | 3 | import edu.utah.cs.spatial.{LineSegment, Point} 4 | import org.apache.spark.{SparkConf, SparkContext} 5 | 6 | import scala.collection.mutable 7 | import scala.io.Source 8 | 9 | /** 10 | * Created by dongx on 8/22/16. 11 | */ 12 | object BaseLine { 13 | //final val k_values = Array(1, 10, 30, 50, 70, 100) 14 | final val k_values = Array(10) 15 | 16 | private class ResultOrdering extends Ordering[(Double, Int)] { 17 | override def compare(x: (Double, Int), y: (Double, Int)): Int = x._1.compare(y._1) 18 | } 19 | 20 | def main(args: Array[String]): Unit = { 21 | val sparkConf = new SparkConf().setAppName("BaseLine")//.setMaster("local[*]") 22 | val sc = new SparkContext(sparkConf) 23 | 24 | if (args.length != 2) { 25 | println("usage: BaseLine ") 26 | System.exit(1) 27 | } 28 | 29 | Thread.sleep(3000) 30 | 31 | val query_traj_filename = args(0) 32 | val traj_data_filename = args(1) 33 | 34 | val query_traj_file = Source.fromFile(query_traj_filename) 35 | val queries = query_traj_file.getLines().map { line => 36 | val splitted = line.split('\t') 37 | (splitted(0).toInt, LineSegment(Point(Array(splitted(1).toDouble, splitted(2).toDouble)), 38 | Point(Array(splitted(3).toDouble, splitted(4).toDouble)))) 39 | }.toArray.groupBy(_._1).map(x => x._2.map(_._2)).slice(0, 20) 40 | 41 | k_values.foreach(k => { 42 | var tot_time = 0.0 43 | queries.foreach(query_traj => { 44 | println("-------------------------------------------------") 45 | 46 | val start = System.currentTimeMillis() 47 | val bc_query = sc.broadcast(query_traj) 48 | 49 | val res = sc.textFile(traj_data_filename).map{ line => 50 | val splitted = line.split('\t') 51 | (splitted(0).toInt, 52 | LineSegment(Point(Array(splitted(1).toDouble, splitted(2).toDouble)), 53 | Point(Array(splitted(3).toDouble, splitted(4).toDouble))))}.mapPartitions(iter => { 54 | val cur_traj = mutable.ListBuffer[LineSegment]() 55 | val ans = mutable.ListBuffer[(Double, Int)]() 56 | var last_traj_id = -1 57 | while (iter.hasNext) { 58 | val now = iter.next 59 | if (now._1 != last_traj_id) { 60 | if (cur_traj.nonEmpty) ans += ((Trajectory.hausdorffDistance(cur_traj.toArray, bc_query.value), last_traj_id)) 61 | //if (cur_traj.nonEmpty) ans += ((Trajectory.discreteFrechetDistance(cur_traj.toArray, bc_query.value), last_traj_id)) 62 | last_traj_id = now._1 63 | cur_traj.clear() 64 | } 65 | cur_traj += now._2 66 | } 67 | if (cur_traj.nonEmpty) ans += ((Trajectory.hausdorffDistance(cur_traj.toArray, bc_query.value), last_traj_id)) 68 | //if (cur_traj.nonEmpty) ans += ((Trajectory.discreteFrechetDistance(cur_traj.toArray, bc_query.value), last_traj_id)) 69 | ans.iterator 70 | }).takeOrdered(k)(new ResultOrdering) 71 | 72 | val end = System.currentTimeMillis() 73 | res.foreach(println) 74 | println("Latency: " + ((end - start) / 1000.0)) 75 | println("-------------------------------------------------") 76 | tot_time += (end - start) / 1000.0 77 | }) 78 | 79 | println("Average Latency for k = " + k + " is : " + (tot_time / 20.0)) 80 | println("===================================================") 81 | }) 82 | 83 | 84 | sc.stop() 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /src/main/scala/edu/utah/cs/trajectory/BaseLineST.scala: -------------------------------------------------------------------------------- 1 | package edu.utah.cs.trajectory 2 | 3 | import edu.utah.cs.spatial.{Point, LineSegment} 4 | 5 | import scala.collection.mutable 6 | import scala.io.Source 7 | 8 | /** 9 | * Created by Dong Xie on 10/23/2016. 10 | */ 11 | object BaseLineST { 12 | final val k = 10 13 | final val N = 34085 14 | 15 | def minmaxtraj(x: Array[LineSegment], y: Array[LineSegment]) = { 16 | x.map(now_x => y.map(now_y => now_x.minDist(now_y)).min).max 17 | } 18 | 19 | def main(args: Array[String]): Unit = { 20 | if (args.length != 2) { 21 | println("usage: BaseLine ") 22 | System.exit(1) 23 | } 24 | 25 | val query_traj_filename = args(0) 26 | val traj_data_filename = args(1) 27 | 28 | val query_traj_file = Source.fromFile(query_traj_filename) 29 | val query_traj = query_traj_file.getLines().map { line => 30 | val splitted = line.split('\t') 31 | LineSegment(Point(Array(splitted(1).toDouble, splitted(2).toDouble)), 32 | Point(Array(splitted(3).toDouble, splitted(4).toDouble))) 33 | }.toArray 34 | 35 | val traj_data_file = Source.fromFile(traj_data_filename) 36 | val cur_traj = mutable.ListBuffer[LineSegment]() 37 | val ans = mutable.ListBuffer[(Double, Int)]() 38 | var last_traj_id = -1 39 | val new_iter = traj_data_file.getLines().map(cur => { 40 | val x = cur.split("\t") 41 | (LineSegment(Point(Array(x(1).toDouble, x(2).toDouble)), Point(Array(x(3).toDouble, x(4).toDouble))), 42 | TrajMeta(x(0).toInt, 1)) 43 | }) 44 | var i = 0 45 | while (new_iter.hasNext) { 46 | val now = new_iter.next 47 | if (now._2.traj_id != last_traj_id) { 48 | if (cur_traj.nonEmpty) ans += ((Trajectory.hausdorffDistance(cur_traj.toArray, query_traj), last_traj_id)) 49 | last_traj_id = now._2.traj_id 50 | i += 1 51 | println("checking " + i + " trajectory....") 52 | cur_traj.clear() 53 | } 54 | cur_traj += now._1 55 | } 56 | if (cur_traj.nonEmpty) ans += ((Trajectory.hausdorffDistance(cur_traj.toArray, query_traj), last_traj_id)) 57 | //assert(ans.size == N) 58 | ans.sortBy(_._1).take(k).foreach(println) 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/main/scala/edu/utah/cs/trajectory/BitMapSolution.scala: -------------------------------------------------------------------------------- 1 | package edu.utah.cs.trajectory 2 | 3 | import edu.utah.cs.index.RTree 4 | import edu.utah.cs.index_bm.RTreeWithBM 5 | import edu.utah.cs.partitioner.STRSegPartition 6 | import edu.utah.cs.spatial.{LineSegment, MBR, Point} 7 | import edu.utah.cs.util._ 8 | import org.apache.spark.rdd.PartitionPruningRDD 9 | import org.apache.spark.storage.StorageLevel 10 | import org.apache.spark.{SparkConf, SparkContext} 11 | 12 | import scala.io.Source 13 | 14 | /** 15 | * Created by dongx on 9/6/16. 16 | * Line Segment Trajectory Storage 17 | */ 18 | object BitMapSolution { 19 | final val max_entries_per_node = 25 20 | final val k = 10 21 | final val c = 5 22 | final val N = 940698 23 | //final val max_spatial_span = 0.46757 24 | final val max_spatial_span = 2.550598 25 | 26 | private class ResultOrdering extends Ordering[(Double, Int)] { 27 | override def compare(x: (Double, Int), y: (Double, Int)): Int = x._1.compare(y._1) 28 | } 29 | 30 | def main(args: Array[String]): Unit = { 31 | val sparkConf = new SparkConf().setAppName("BitMapSolution")//.setMaster("local[*]") 32 | val sc = new SparkContext(sparkConf) 33 | 34 | if (args.length != 2) { 35 | println("usage: BitMapSolution ") 36 | System.exit(1) 37 | } 38 | 39 | Thread.sleep(3000) 40 | 41 | val query_traj_filename = args(0) 42 | val traj_data_filename = args(1) 43 | 44 | val start1 = System.currentTimeMillis() 45 | 46 | val dataRDD = sc.textFile(traj_data_filename) 47 | .map(x => x.split('\t')) 48 | .map(x => (LineSegment(Point(Array(x(1).toDouble, x(2).toDouble)), 49 | Point(Array(x(3).toDouble, x(4).toDouble))), 50 | TrajMeta(x(0).toInt, x(5).toInt)))//.persist(StorageLevel.MEMORY_AND_DISK_SER) 51 | 52 | //val optimal_num_bits = BloomFilter.optimalNumBits(N, 0.1) 53 | //val optimal_num_hashes = BloomFilter.optimalNumHashes(N, optimal_num_bits) 54 | //println(optimal_num_bits + "\t" + optimal_num_hashes) 55 | val bm_meta = BitMapMeta(N) 56 | val bc_bm_meta = sc.broadcast(bm_meta) 57 | BitMap.meta = bm_meta 58 | 59 | val (partitioned_rdd, part_mbrs) = STRSegPartition(dataRDD, dataRDD.partitions.length, 0.01, max_entries_per_node) 60 | 61 | val indexed_seg_rdd = partitioned_rdd.mapPartitions(iter => { 62 | BitMap.meta = bc_bm_meta.value 63 | val data = iter.toArray 64 | var index: RTreeWithBM = null 65 | //var traj_ids: Array[Int] = null 66 | if (data.length > 0) { 67 | index = RTreeWithBM(data.map(x => (x._1, x._2.traj_id)).zipWithIndex.map(x => (x._1._1, x._2, x._1._2)), 68 | max_entries_per_node, bc_bm_meta.value) 69 | //traj_ids = data.map(_._2.traj_id).distinct 70 | } 71 | Array((data, index)).iterator 72 | }).persist(StorageLevel.MEMORY_AND_DISK_SER) 73 | 74 | val stat = indexed_seg_rdd.mapPartitions(iter => iter.map(x => (x._2.root.m_mbr, x._1.length, x._2.root.bf))).collect() 75 | 76 | val global_rtree = RTree.applyMBR(stat.zipWithIndex.map(x => (x._1._1, x._2, x._1._2)), max_entries_per_node) 77 | 78 | val end1 = System.currentTimeMillis() 79 | println("------------------------------------------------------------") 80 | println("Time to build indexes: " + (end1 - start1) / 1000.0) 81 | println("------------------------------------------------------------") 82 | 83 | val start2 = System.currentTimeMillis() 84 | val query_traj_file = Source.fromFile(query_traj_filename) 85 | val query_traj = query_traj_file.getLines().map { line => 86 | val splitted = line.split('\t') 87 | LineSegment(Point(Array(splitted(1).toDouble, splitted(2).toDouble)), 88 | Point(Array(splitted(3).toDouble, splitted(4).toDouble))) 89 | }.toArray 90 | 91 | val bc_query = sc.broadcast(query_traj) 92 | 93 | // val sample_set = dataRDD.takeSample(withReplacement = false, c * k, System.currentTimeMillis()).map(_._2.traj_id).toSet 94 | // 95 | // assert(sample_set.size >= k) 96 | // 97 | // val pruning_bound = dataRDD.filter(x => sample_set.contains(x._2.traj_id)).groupBy(_._2.traj_id) 98 | // .map(x => minmaxtraj(x._2.toArray.map(_._1), bc_query.value)).takeOrdered(k).last 99 | val global_intersect = global_rtree.circleRange(query_traj, 0.0) 100 | val global_intersect_mbrs = global_intersect.map(_._1.asInstanceOf[MBR]) 101 | val global_intersect_set = global_intersect.map(_._2).toSet 102 | 103 | val sample_set = new PartitionPruningRDD(indexed_seg_rdd, global_intersect_set.contains).flatMap(_._1) 104 | .takeSample(withReplacement = false, c * k, System.currentTimeMillis()).map(_._2.traj_id).toSet 105 | 106 | assert(sample_set.size >= k) 107 | 108 | val pruning_bound_filter = global_rtree.circleRange(global_intersect_mbrs, max_spatial_span).map(_._2).toSet 109 | val pruning_bound = new PartitionPruningRDD(indexed_seg_rdd, pruning_bound_filter.contains) 110 | .flatMap(x => x._1.filter(now => sample_set.contains(now._2.traj_id))).groupBy(_._2.traj_id) 111 | .map(x => Trajectory.hausdorffDistance(x._2.toArray.map(_._1), bc_query.value)).takeOrdered(k).last 112 | // val pruning_bound = dataRDD.filter(x => sample_set.contains(x._2.traj_id)).groupBy(_._2.traj_id) 113 | // .map(x => minmaxtraj(x._2.toArray.map(_._1), bc_query.value)).takeOrdered(k).last 114 | 115 | //val pruning_bound = 8.65080562241333 116 | 117 | val end2 = System.currentTimeMillis() 118 | 119 | println("------------------------------------------------------------") 120 | println("Time to calculate pruning bound: " + (end2 - start2) / 1000.0) 121 | println("The pruning bound is: " + pruning_bound) 122 | println("------------------------------------------------------------") 123 | 124 | val start3 = System.currentTimeMillis() 125 | //val global_prune_set = query_traj.map(x => { 126 | // global_rtree.circleRange(x, pruning_bound).map(_._2) 127 | //}).flatMap(list => list).toSet 128 | val global_prune = global_rtree.circleRange(query_traj, pruning_bound) 129 | val global_prune_mbrs = global_prune.map(_._1.asInstanceOf[MBR]) 130 | val global_prune_set = global_prune.map(_._2).toSet 131 | 132 | val pruned_rdd = new PartitionPruningRDD(indexed_seg_rdd, global_prune_set.contains) 133 | val pruned_traj_id1 = stat.zipWithIndex.filter(x => !global_prune_set.contains(x._2)).map(_._1._3) 134 | .aggregate(BitArray.create(bm_meta.num_bits))((a, b) => BitArray.or(a, b), (a, b) => BitArray.or(a, b)) 135 | 136 | val bc_prunbound = sc.broadcast(pruning_bound) 137 | 138 | val pruned_traj_id2 = pruned_rdd.map(part => { 139 | BitMap.meta = bc_bm_meta.value 140 | part._2.antiCircleRangeBF(bc_query.value, bc_prunbound.value) 141 | }).reduce((a, b) => BitArray.or(a, b)) 142 | 143 | val saved_trajs = BitArray.flip(BitArray.or(pruned_traj_id1, pruned_traj_id2)) 144 | 145 | // val saved_trajs = pruned_rdd.map(part => { 146 | // BloomFilter.meta = bc_bm_meta.value 147 | // part._2.circleRangeBF(bc_query.value, bc_prunbound.value) 148 | // }).reduce((a, b) => BitArray.or(a, b)) 149 | 150 | val end3 = System.currentTimeMillis() 151 | 152 | println("------------------------------------------------------------") 153 | println("Time to calculate all saved traj_ids: " + (end3 - start3) / 1000.0) 154 | println("Pruned trajs after global pruning:" + BitArray.count(pruned_traj_id1)) 155 | println("Pruned trajs after local pruning:" + BitArray.count(BitArray.or(pruned_traj_id1, pruned_traj_id2))) 156 | println("# of saved trajs: " + BitArray.count(saved_trajs)) 157 | println("------------------------------------------------------------") 158 | 159 | val start4 = System.currentTimeMillis() 160 | val bc_saved_traj = sc.broadcast(saved_trajs) 161 | val final_filter_set = global_rtree.circleRange(global_prune_mbrs, max_spatial_span).map(_._2).toSet 162 | 163 | val res = new PartitionPruningRDD(indexed_seg_rdd, final_filter_set.contains) 164 | .flatMap(x => { 165 | BitMap.meta = bc_bm_meta.value 166 | x._1.filter(now => BitMap.contains(bc_saved_traj.value, now._2.traj_id)) 167 | }).groupBy(_._2.traj_id).map(x => (Trajectory.hausdorffDistance(x._2.map(_._1).toArray, bc_query.value), x._1)) 168 | .takeOrdered(k)(new ResultOrdering) 169 | 170 | // val res = dataRDD.mapPartitions(iter => { 171 | // BloomFilter.meta = bc_bm_meta.value 172 | // val cur_traj = mutable.ListBuffer[LineSegment]() 173 | // val ans = mutable.ListBuffer[(Double, Int)]() 174 | // var last_traj_id = -1 175 | // val new_iter = iter.filter(x => BloomFilter.mayContains(bc_saved_traj.value, x._2.traj_id)) 176 | // while (new_iter.hasNext) { 177 | // val now = new_iter.next 178 | // if (now._2.traj_id != last_traj_id) { 179 | // if (cur_traj.nonEmpty) ans += ((minmaxtraj(cur_traj.toArray, bc_query.value), last_traj_id)) 180 | // last_traj_id = now._2.traj_id 181 | // cur_traj.clear() 182 | // } 183 | // cur_traj += now._1 184 | // } 185 | // if (cur_traj.nonEmpty) ans += ((minmaxtraj(cur_traj.toArray, bc_query.value), last_traj_id)) 186 | // ans.iterator 187 | // //iter.toArray.groupBy(_._2.traj_id).filter(x => BloomFilter.mayContains(bc_saved_traj.value, x._1)) 188 | // // .map(x => (minmaxtraj(x._2.map(_._1), bc_query.value), x._1)).iterator 189 | // }).takeOrdered(k)(new ResultOrdering) 190 | 191 | val end4 = System.currentTimeMillis() 192 | 193 | println("------------------------------------------------------------") 194 | println("Time to finish the final filter: " + (end4 - start4) / 1000.0) 195 | println("------------------------------------------------------------") 196 | 197 | println("------------------------------------------------------------") 198 | println("The results show as below:") 199 | res.foreach(println) 200 | println("------------------------------------------------------------") 201 | 202 | sc.stop() 203 | } 204 | } 205 | -------------------------------------------------------------------------------- /src/main/scala/edu/utah/cs/trajectory/BloomFilterSolution.scala: -------------------------------------------------------------------------------- 1 | package edu.utah.cs.trajectory 2 | 3 | import edu.utah.cs.index.RTree 4 | import edu.utah.cs.index_bf.RTreeWithBF 5 | import edu.utah.cs.partitioner.STRSegPartition 6 | import edu.utah.cs.spatial.{LineSegment, MBR, Point} 7 | import edu.utah.cs.util._ 8 | import org.apache.spark.rdd.PartitionPruningRDD 9 | import org.apache.spark.storage.StorageLevel 10 | import org.apache.spark.{SparkConf, SparkContext} 11 | 12 | import scala.io.Source 13 | 14 | /** 15 | * Created by dongx on 9/6/16. 16 | * Line Segment Trajectory Storage 17 | */ 18 | object BloomFilterSolution { 19 | final val max_entries_per_node = 25 20 | final val k = 10 21 | final val c = 5 22 | final val max_spatial_span = 0.46757 23 | //final val max_spatial_span = 2.550598 24 | 25 | private class ResultOrdering extends Ordering[(Double, Int)] { 26 | override def compare(x: (Double, Int), y: (Double, Int)): Int = x._1.compare(y._1) 27 | } 28 | 29 | def main(args: Array[String]): Unit = { 30 | val sparkConf = new SparkConf().setAppName("BloomFilterSolution").set("spark.locality.wait", "0") 31 | .set("spark.driver.maxResultSize", "4g") 32 | val sc = new SparkContext(sparkConf) 33 | 34 | if (args.length != 2) { 35 | println("usage: BloomFilterSolution ") 36 | System.exit(1) 37 | } 38 | 39 | Thread.sleep(3000) 40 | 41 | val query_traj_filename = args(0) 42 | val traj_data_filename = args(1) 43 | 44 | val start1 = System.currentTimeMillis() 45 | 46 | val dataRDD = sc.textFile(traj_data_filename) 47 | .map(x => x.split('\t')) 48 | .map(x => (LineSegment(Point(Array(x(1).toDouble, x(2).toDouble)), 49 | Point(Array(x(3).toDouble, x(4).toDouble))), 50 | TrajMeta(x(0).toInt, x(5).toInt))) 51 | 52 | val optimal_num_bits = BloomFilter.optimalNumBits(5000, 0.1) 53 | val optimal_num_hashes = BloomFilter.optimalNumHashes(5000, optimal_num_bits) 54 | val bf_meta = BloomFilterMeta(optimal_num_bits, optimal_num_hashes) 55 | val bc_bf_meta = sc.broadcast(bf_meta) 56 | BloomFilter.meta = bf_meta 57 | 58 | val (partitioned_rdd, part_mbrs) = STRSegPartition(dataRDD, dataRDD.partitions.length, 0.01, max_entries_per_node) 59 | 60 | val indexed_seg_rdd = partitioned_rdd.mapPartitions(iter => { 61 | BloomFilter.meta = bc_bf_meta.value 62 | val data = iter.toArray 63 | var index: RTreeWithBF = null 64 | if (data.length > 0) { 65 | index = RTreeWithBF(data.map(x => (x._1, x._2.traj_id)).zipWithIndex.map(x => (x._1._1, x._2, x._1._2)), 66 | max_entries_per_node, bc_bf_meta.value) 67 | } 68 | Array((data, index)).iterator 69 | }).persist(StorageLevel.MEMORY_AND_DISK_SER) 70 | 71 | val stat = indexed_seg_rdd.mapPartitions(iter => iter.map(x => (x._2.root.m_mbr, x._1.length, x._2.root.bf))).collect() 72 | 73 | val global_rtree = RTree.applyMBR(stat.zipWithIndex.map(x => (x._1._1, x._2, x._1._2)), max_entries_per_node) 74 | 75 | val end1 = System.currentTimeMillis() 76 | println("------------------------------------------------------------") 77 | println("Time to build indexes: " + (end1 - start1) / 1000.0) 78 | println("------------------------------------------------------------") 79 | 80 | 81 | val query_traj_file = Source.fromFile(query_traj_filename) 82 | val queries = query_traj_file.getLines().map { line => 83 | val splitted = line.split('\t') 84 | (splitted(0).toInt, LineSegment(Point(Array(splitted(1).toDouble, splitted(2).toDouble)), 85 | Point(Array(splitted(3).toDouble, splitted(4).toDouble)))) 86 | }.toArray.groupBy(_._1).map(x => x._2.map(_._2)) 87 | 88 | var tot_time = 0.0 89 | queries.foreach(query_traj => { 90 | val start2 = System.currentTimeMillis() 91 | val bc_query = sc.broadcast(query_traj) 92 | val global_intersect = global_rtree.circleRange(query_traj, 0.0) 93 | val global_intersect_mbrs = global_intersect.map(_._1.asInstanceOf[MBR]) 94 | val global_intersect_set = global_intersect.map(_._2).toSet 95 | 96 | val sample_set = new PartitionPruningRDD(indexed_seg_rdd, global_intersect_set.contains).flatMap(_._1) 97 | .takeSample(withReplacement = false, c * k, System.currentTimeMillis()).map(_._2.traj_id).toSet 98 | 99 | assert(sample_set.size >= k) 100 | 101 | val pruning_bound_filter = global_rtree.circleRange(global_intersect_mbrs, max_spatial_span).map(_._2).toSet 102 | val pruning_bound = new PartitionPruningRDD(indexed_seg_rdd, pruning_bound_filter.contains) 103 | .flatMap(x => x._1.filter(now => sample_set.contains(now._2.traj_id))) 104 | .groupBy(_._2.traj_id).repartition(Math.min(sample_set.size, sc.defaultParallelism)) 105 | .map(x => Trajectory.hausdorffDistance(x._2.toArray.map(_._1), bc_query.value)).takeOrdered(k).last 106 | 107 | val end2 = System.currentTimeMillis() 108 | println("Time to calculate pruning bound: " + (end2 - start2) / 1000.0) 109 | println("The pruning bound is: " + pruning_bound) 110 | 111 | val start3 = System.currentTimeMillis() 112 | val global_prune = global_rtree.circleRange(query_traj, pruning_bound) 113 | val global_prune_mbrs = global_prune.map(_._1.asInstanceOf[MBR]) 114 | val global_prune_set = global_prune.map(_._2).toSet 115 | 116 | val pruned_rdd = new PartitionPruningRDD(indexed_seg_rdd, global_prune_set.contains) 117 | 118 | val bc_prunbound = sc.broadcast(pruning_bound) 119 | val saved_trajs = pruned_rdd.map(part => { 120 | BloomFilter.meta = bc_bf_meta.value 121 | part._2.circleRangeBF(bc_query.value, bc_prunbound.value) 122 | }).reduce((a, b) => BitArray.or(a, b)) 123 | 124 | val end3 = System.currentTimeMillis() 125 | 126 | println("Time to calculate all saved traj_ids: " + (end3 - start3) / 1000.0) 127 | 128 | val start4 = System.currentTimeMillis() 129 | val bc_saved_traj = sc.broadcast(saved_trajs) 130 | val final_filter_set = global_rtree.circleRange(global_prune_mbrs, max_spatial_span).map(_._2).toSet 131 | 132 | val fianl_filter = new PartitionPruningRDD(indexed_seg_rdd, final_filter_set.contains) 133 | .flatMap(x => { 134 | BloomFilter.meta = bc_bf_meta.value 135 | x._1.filter(now => BloomFilter.mayContains(bc_saved_traj.value, now._2.traj_id)) 136 | }).groupBy(_._2.traj_id).repartition(sc.defaultParallelism) 137 | 138 | val res = fianl_filter.map(x => (Trajectory.hausdorffDistance(x._2.map(_._1).toArray, bc_query.value), x._1)) 139 | .takeOrdered(k)(new ResultOrdering) 140 | 141 | val end4 = System.currentTimeMillis() 142 | tot_time += (end4 - start2) / 1000.0 143 | println("Time to finish the final filter: " + (end4 - start4) / 1000.0) 144 | println("# of distance calculated: " + (c * k + fianl_filter.count())) 145 | println("Total Latency: " + ((end4 - start2) / 1000.0)) 146 | println("The results show as below:") 147 | res.foreach(println) 148 | println("------------------------------------------------------------") 149 | }) 150 | println("Average Latency for c = " + c + " is : " + (tot_time / 100.0)) 151 | println("===================================================") 152 | 153 | sc.stop() 154 | } 155 | } 156 | 157 | -------------------------------------------------------------------------------- /src/main/scala/edu/utah/cs/trajectory/DataSampling.scala: -------------------------------------------------------------------------------- 1 | package edu.utah.cs.trajectory 2 | 3 | import edu.utah.cs.spatial.{LineSegment, Point} 4 | import org.apache.spark.{SparkConf, SparkContext} 5 | 6 | /** 7 | * Created by dongx on 1/27/2017. 8 | */ 9 | object DataSampling { 10 | def main(args: Array[String]): Unit = { 11 | val sparkConf = new SparkConf().setAppName("DataSampling") 12 | val sc = new SparkContext(sparkConf) 13 | 14 | Thread.sleep(3000) 15 | 16 | if (args.length < 2) { 17 | println("usage: DataSampling ") 18 | System.exit(1) 19 | } 20 | 21 | val input_file_path = args(0) 22 | val output_file_path = args(1) 23 | val sample_rate = args(2).toDouble 24 | 25 | sc.textFile(input_file_path).mapPartitions(iter => { 26 | iter.map(x => { 27 | val splitted = x.split("\t") 28 | (splitted(0).toInt, 29 | LineSegment(Point(Array(splitted(1).toDouble, splitted(2).toDouble)), 30 | Point(Array(splitted(3).toDouble, splitted(4).toDouble)))) 31 | }).toArray.groupBy(_._1).map(now => (now._1, now._2.sortBy(_._1).map(_._2))).iterator 32 | }).sample(withReplacement = false, sample_rate, System.currentTimeMillis()).repartition(4096) 33 | .flatMap(x => x._2.zipWithIndex.map(now => x._1 + "\t" + now._1.toTSV + "\t" + now._2)) 34 | .saveAsTextFile(output_file_path) 35 | 36 | sc.stop() 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/main/scala/edu/utah/cs/trajectory/DualIndexingSolution.scala: -------------------------------------------------------------------------------- 1 | package edu.utah.cs.trajectory 2 | 3 | import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream} 4 | import java.util.zip.{GZIPInputStream, GZIPOutputStream} 5 | 6 | import edu.utah.cs.index.RTree 7 | import edu.utah.cs.index_rr.RTreeWithRR 8 | import edu.utah.cs.partitioner.{STRSegPartition, STRTrajPartition} 9 | import edu.utah.cs.spatial.{LineSegment, MBR, Point} 10 | import org.apache.spark.rdd.PartitionPruningRDD 11 | import org.apache.spark.storage.StorageLevel 12 | import org.apache.spark.{SparkConf, SparkContext} 13 | import org.roaringbitmap.RoaringBitmap 14 | 15 | import scala.collection.mutable 16 | import scala.io.Source 17 | 18 | /** 19 | * Created by dongx on 12/19/2016. 20 | */ 21 | object DualIndexingSolution { 22 | final val max_entries_per_node = 25 23 | final val k = 10 24 | final val c_values = Array(5) 25 | 26 | def getMBR(x: Array[LineSegment]): MBR = { 27 | val pts = x.flatMap(p => Array(p.start, p.end)) 28 | var maxx = Double.MinValue 29 | var maxy = Double.MinValue 30 | var minx = Double.MaxValue 31 | var miny = Double.MaxValue 32 | pts.foreach(x => { 33 | maxx = Math.max(x.coord(0), maxx) 34 | maxy = Math.max(x.coord(1), maxy) 35 | minx = Math.min(x.coord(0), minx) 36 | miny = Math.min(x.coord(1), miny) 37 | }) 38 | MBR(Point(Array(minx, miny)), Point(Array(maxx, maxy))) 39 | } 40 | 41 | private class ResultOrdering extends Ordering[(Double, Int)] { 42 | override def compare(x: (Double, Int), y: (Double, Int)): Int = x._1.compare(y._1) 43 | } 44 | 45 | def main(args: Array[String]): Unit = { 46 | val sparkConf = new SparkConf().setAppName("RRSolution").set("spark.locality.wait", "0") 47 | .set("spark.driver.maxResultSize", "4g")//.setMaster("local[*]") 48 | val sc = new SparkContext(sparkConf) 49 | 50 | if (args.length != 2) { 51 | println("usage: RRSolution ") 52 | System.exit(1) 53 | } 54 | 55 | Thread.sleep(6000) 56 | 57 | val query_traj_filename = args(0) 58 | val traj_data_filename = args(1) 59 | 60 | val query_traj_file = Source.fromFile(query_traj_filename) 61 | val queries = query_traj_file.getLines().map { line => 62 | val splitted = line.split('\t') 63 | (splitted(0).toInt, LineSegment(Point(Array(splitted(1).toDouble, splitted(2).toDouble)), 64 | Point(Array(splitted(3).toDouble, splitted(4).toDouble)))) 65 | }.toArray.groupBy(_._1).map(x => x._2.map(_._2)) 66 | 67 | val start1 = System.currentTimeMillis() 68 | 69 | val dataRDD = sc.textFile(traj_data_filename) 70 | .map(x => x.split('\t')) 71 | .map(x => (LineSegment(Point(Array(x(1).toDouble, x(2).toDouble)), 72 | Point(Array(x(3).toDouble, x(4).toDouble))), 73 | TrajMeta(x(0).toInt, x(5).toInt))) 74 | 75 | val trajs = sc.textFile(traj_data_filename).mapPartitions(iter => { 76 | iter.map(x => { 77 | val splitted = x.split("\t") 78 | (splitted(0).toInt, 79 | LineSegment(Point(Array(splitted(1).toDouble, splitted(2).toDouble)), 80 | Point(Array(splitted(3).toDouble, splitted(4).toDouble)))) 81 | }).toArray.groupBy(_._1).map(now => { 82 | val cur_traj = now._2.sortBy(_._1).map(_._2) 83 | (getMBR(cur_traj), (now._1, cur_traj)) 84 | }).iterator 85 | }) 86 | 87 | val part_traj = STRTrajPartition(trajs, dataRDD.partitions.length, 0.01, max_entries_per_node) 88 | 89 | val compressed_traj = part_traj.mapPartitions(iter => iter.map(x => { 90 | val baos = new ByteArrayOutputStream() 91 | val gzipOut = new GZIPOutputStream(baos) 92 | val objectOut = new ObjectOutputStream(gzipOut) 93 | objectOut.writeObject(x._2._2) 94 | objectOut.close() 95 | (x._2._1, baos.toByteArray) 96 | })).persist(StorageLevel.MEMORY_AND_DISK_SER) 97 | 98 | println(compressed_traj.count) 99 | 100 | val traj_stat = part_traj.mapPartitions(iter => { 101 | Array(iter.aggregate[(MBR, Int)]((null, 0))((res, now) => { 102 | if (res._1 == null) (now._1, 1) 103 | else (res._1.union(now._1), res._2 + 1) 104 | }, (left, right) => { 105 | if (left._1 == null) right 106 | else if (left._1 == null) left 107 | else (left._1.union(right._1), left._2 + right._2) 108 | })).iterator 109 | }).collect() 110 | val traj_global_rtree = 111 | RTree.applyMBR(traj_stat.zipWithIndex.map(x => (x._1._1, x._2, x._1._2)), max_entries_per_node) 112 | 113 | 114 | val (partitioned_rdd, _) = STRSegPartition(dataRDD, dataRDD.partitions.length, 0.01, max_entries_per_node) 115 | 116 | val indexed_seg_rdd = partitioned_rdd.mapPartitions(iter => { 117 | val data = iter.toArray 118 | var index: RTreeWithRR = if (data.length > 0) { 119 | RTreeWithRR(data.zipWithIndex.map(x => (x._1._1, x._2, x._1._2.traj_id)), 25) 120 | } else null 121 | Array(index).iterator 122 | }).persist(StorageLevel.MEMORY_AND_DISK_SER) 123 | 124 | val stat = indexed_seg_rdd.mapPartitions(iter => iter.map(x => (x.root.m_mbr, x.root.size, x.root.rr))).collect() 125 | 126 | val global_rtree = RTree.applyMBR(stat.zipWithIndex.map(x => (x._1._1, x._2, x._1._2.toInt)), max_entries_per_node) 127 | 128 | val end1 = System.currentTimeMillis() 129 | println("------------------------------------------------------------") 130 | println("Time to build indexes: " + (end1 - start1) / 1000.0) 131 | println("------------------------------------------------------------") 132 | 133 | c_values.foreach(c => { 134 | var tot_time = 0.0 135 | queries.foreach(query_traj => { 136 | val start2 = System.currentTimeMillis() 137 | val bc_query = sc.broadcast(query_traj) 138 | 139 | val global_intersect = global_rtree.circleRange(query_traj, 0.0) 140 | val global_intersect_mbrs = global_intersect.map(_._1.asInstanceOf[MBR]) 141 | 142 | val sample_base = global_intersect.aggregate(new RoaringBitmap())((a, b) => RoaringBitmap.or(a, stat(b._2)._3), 143 | (a, b) => RoaringBitmap.or(a, b)) 144 | 145 | val cards = sample_base.getCardinality 146 | println("Cardinality of intersected Partitions: " + cards) 147 | val n_samples = c * k 148 | println("Going to sample: " + n_samples) 149 | assert(cards >= k) 150 | 151 | val set = mutable.HashSet[Int]() 152 | val rnd = scala.util.Random 153 | 154 | for (i <- 0 until n_samples) { 155 | var x = rnd.nextInt(cards) 156 | while (set.contains(x)) x = rnd.nextInt(cards) 157 | set += x 158 | } 159 | 160 | var i = 0 161 | val samples = mutable.HashSet[Int]() 162 | val iter = sample_base.iterator() 163 | while (iter.hasNext) { 164 | val x = iter.next() 165 | if (set.contains(i)) samples += x 166 | i = i + 1 167 | } 168 | 169 | val bc_samples = sc.broadcast(samples.toSet) 170 | val pruning_bound_filter = traj_global_rtree.circleRange(global_intersect_mbrs, 0.0).map(_._2).toSet 171 | val pruning_bound = new PartitionPruningRDD(compressed_traj, pruning_bound_filter.contains) 172 | .filter(x => bc_samples.value.contains(x._1)) 173 | .repartition(Math.min(samples.size, sc.defaultParallelism)) 174 | .map(x => { 175 | val bais = new ByteArrayInputStream(x._2) 176 | val gzipIn = new GZIPInputStream(bais) 177 | val objectIn = new ObjectInputStream(gzipIn) 178 | val content = objectIn.readObject().asInstanceOf[Array[LineSegment]] 179 | Trajectory.hausdorffDistance(bc_query.value, content) 180 | //Trajectory.discreteFrechetDistance(bc_query.value, content) 181 | }) 182 | .takeOrdered(k).last 183 | val end2 = System.currentTimeMillis() 184 | 185 | println("------------------------------------------------------------") 186 | println("Time to calculate pruning bound: " + (end2 - start2) / 1000.0) 187 | println("The pruning bound is: " + pruning_bound) 188 | 189 | val start3 = System.currentTimeMillis() 190 | val global_prune = global_rtree.circleRange(query_traj, pruning_bound) 191 | val global_prune_set = global_prune.map(_._2).toSet 192 | 193 | val pruned_rdd = new PartitionPruningRDD(indexed_seg_rdd, global_prune_set.contains) 194 | val pruned_traj_id1 = stat.zipWithIndex.filter(x => !global_prune_set.contains(x._2)).map(_._1._3) 195 | .aggregate(new RoaringBitmap())((a, b) => RoaringBitmap.or(a, b), (a, b) => RoaringBitmap.or(a, b)) 196 | 197 | val bc_pruning_bound = sc.broadcast(pruning_bound) 198 | val pruned_traj_id2 = pruned_rdd.map(part => { 199 | part.antiCircleRangeBF(bc_query.value, bc_pruning_bound.value) 200 | }).reduce((a, b) => RoaringBitmap.or(a, b)) 201 | 202 | val tot_pruned_traj = RoaringBitmap.or(pruned_traj_id1, pruned_traj_id2) 203 | 204 | val end3 = System.currentTimeMillis() 205 | val tot_prune_count = tot_pruned_traj.getCardinality 206 | println("Time to calculate all saved traj_ids: " + (end3 - start3) / 1000.0) 207 | 208 | val start4 = System.currentTimeMillis() 209 | val bc_pruned_traj = sc.broadcast(tot_pruned_traj) 210 | 211 | val final_prune_set = traj_global_rtree.circleRange(global_prune.map(_._1.asInstanceOf[MBR]), 0.0).map(_._2).toSet 212 | val final_filtered = new PartitionPruningRDD(compressed_traj, final_prune_set.contains) 213 | .filter(x => !bc_pruned_traj.value.contains(x._1)) 214 | 215 | val res = final_filtered.repartition(sc.defaultParallelism) 216 | .mapPartitions(iter => iter.map(x =>{ 217 | val bais = new ByteArrayInputStream(x._2) 218 | val gzipIn = new GZIPInputStream(bais) 219 | val objectIn = new ObjectInputStream(gzipIn) 220 | val content = objectIn.readObject().asInstanceOf[Array[LineSegment]] 221 | (Trajectory.hausdorffDistance(bc_query.value, content), x._1) 222 | })) 223 | .takeOrdered(k)(new ResultOrdering) 224 | 225 | val end4 = System.currentTimeMillis() 226 | tot_time += (end4 - start2) / 1000.0 227 | println("Time to finish the final filter: " + (end4 - start4) / 1000.0) 228 | println("# of distance calculated: " + (c * k + final_filtered.count())) 229 | println("Total Latency: " + ((end4 - start2) / 1000.0)) 230 | println("The results show as below:") 231 | res.foreach(println) 232 | println("------------------------------------------------------------") 233 | bc_query.destroy() 234 | bc_samples.destroy() 235 | bc_pruned_traj.destroy() 236 | bc_pruning_bound.destroy() 237 | }) 238 | 239 | println("Average Latency for c = " + c + " is : " + (tot_time / 100.0)) 240 | println("===================================================") 241 | }) 242 | 243 | sc.stop() 244 | } 245 | } 246 | -------------------------------------------------------------------------------- /src/main/scala/edu/utah/cs/trajectory/LineSegmentClustering.scala: -------------------------------------------------------------------------------- 1 | package edu.utah.cs.trajectory 2 | 3 | import java.io.{BufferedWriter, File, FileWriter} 4 | 5 | import com.vividsolutions.jts.geom.{GeometryCollection, GeometryFactory} 6 | import edu.utah.cs.partitioner.STRSegPartition 7 | import edu.utah.cs.spatial.{LineSegment, MBR, Point, Polygon} 8 | import edu.utah.cs.util.{BloomFilter, BloomFilterMeta} 9 | import org.apache.spark.{SparkConf, SparkContext} 10 | import org.geotools.geojson.geom.GeometryJSON 11 | 12 | /** 13 | * Created by dongx on 10/24/16. 14 | */ 15 | object LineSegmentClustering { 16 | final val max_entries_per_node = 25 17 | final val k = 10 18 | final val N = 34085 19 | 20 | def main(args: Array[String]): Unit = { 21 | val sc = new SparkContext(new SparkConf().setAppName("LineSegmentClustering")) 22 | 23 | if (args.length < 2) { 24 | println("usage: SpatialSpanClustering ") 25 | System.exit(1) 26 | } 27 | 28 | val input_file_path = args(0) 29 | val output_file_path = args(1) 30 | 31 | val dataRDD = sc.textFile(input_file_path) 32 | .map(x => x.split('\t')) 33 | .map(x => (LineSegment(Point(Array(x(2).toDouble, x(1).toDouble)), 34 | Point(Array(x(4).toDouble, x(3).toDouble))), 35 | TrajMeta(x(0).toInt, x(5).toInt))) 36 | 37 | val bf_meta = BloomFilterMeta(N, 1) 38 | val bc_bf_meta = sc.broadcast(bf_meta) 39 | BloomFilter.meta = bf_meta 40 | 41 | val num_partitions = dataRDD.getNumPartitions 42 | val (partitioned_rdd, part_mbrs) = STRSegPartition(dataRDD, num_partitions, 0.01, max_entries_per_node) 43 | 44 | val part_bounds = partitioned_rdd.mapPartitions(iter => { 45 | if (iter.nonEmpty) { 46 | var maxx = Double.MinValue 47 | var maxy = Double.MinValue 48 | var minx = Double.MaxValue 49 | var miny = Double.MaxValue 50 | iter.map(_._1).foreach(x => { 51 | maxx = Math.max(Math.max(x.start.coord(0), x.end.coord(0)), maxx) 52 | maxy = Math.max(Math.max(x.start.coord(1), x.end.coord(1)), maxy) 53 | minx = Math.min(Math.min(x.start.coord(0), x.end.coord(0)), minx) 54 | miny = Math.min(Math.min(x.start.coord(1), x.end.coord(1)), miny) 55 | }) 56 | Array(MBR(Point(Array(minx, miny)), Point(Array(maxx, maxy)))).iterator 57 | } else Array().iterator 58 | }).collect() 59 | 60 | val file = new File(output_file_path) 61 | val bw = new BufferedWriter(new FileWriter(file)) 62 | 63 | val collection = new GeometryCollection(part_bounds.map(x => 64 | Polygon(Array(x.low, Point(Array(x.low.coord(0), x.high.coord(1))), 65 | x.high, Point(Array(x.high.coord(0), x.low.coord(1))), x.low)).content), new GeometryFactory) 66 | 67 | new GeometryJSON().writeGeometryCollection(collection, bw) 68 | 69 | bw.close() 70 | 71 | sc.stop() 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /src/main/scala/edu/utah/cs/trajectory/MTreeSolution.scala: -------------------------------------------------------------------------------- 1 | package edu.utah.cs.trajectory 2 | 3 | import edu.utah.cs.partitioner.IDPartition 4 | import edu.utah.cs.spatial.{LineSegment, Point} 5 | import mtree.{DistanceFunction, MTree} 6 | import org.apache.spark.rdd.PartitionPruningRDD 7 | import org.apache.spark.storage.StorageLevel 8 | import org.apache.spark.{SparkConf, SparkContext} 9 | 10 | import scala.collection.mutable 11 | import scala.io.Source 12 | 13 | /** 14 | * Created by dongx on 4/28/17. 15 | */ 16 | object MTreeSolution { 17 | final val k = 10 18 | final val c = 5 19 | 20 | case class MTreeTraj(id: Int, data: Array[LineSegment]) 21 | 22 | class TrajDistanceFunction extends DistanceFunction[MTreeTraj] { 23 | override def calculate(traj1: MTreeTraj, traj2: MTreeTraj): Double = { 24 | Trajectory.hausdorffDistance(traj1.data, traj2.data) 25 | } 26 | } 27 | 28 | def main(args: Array[String]): Unit = { 29 | val sparkConf = new SparkConf().setAppName("MTreeSolution") 30 | .set("spark.locality.wait", "0").set("spark.driver.maxResultSize", "4g") 31 | val sc = new SparkContext(sparkConf) 32 | 33 | if (args.length != 2) { 34 | println("usage: MTreeSolution ") 35 | System.exit(1) 36 | } 37 | 38 | val query_traj_filename = args(0) 39 | val traj_data_filename = args(1) 40 | 41 | val query_traj_file = Source.fromFile(query_traj_filename) 42 | val queries = query_traj_file.getLines().map { line => 43 | val splitted = line.split('\t') 44 | (splitted(0).toInt, LineSegment(Point(Array(splitted(1).toDouble, splitted(2).toDouble)), 45 | Point(Array(splitted(3).toDouble, splitted(4).toDouble)))) 46 | }.toArray.groupBy(_._1).map(x => x._2.map(_._2)) 47 | 48 | Thread.sleep(6000) 49 | 50 | val start1 = System.currentTimeMillis() 51 | 52 | val trajs = sc.textFile(traj_data_filename).mapPartitions(iter => { 53 | iter.map(x => { 54 | val splitted = x.split("\t") 55 | (splitted(0).toInt, 56 | LineSegment(Point(Array(splitted(1).toDouble, splitted(2).toDouble)), 57 | Point(Array(splitted(3).toDouble, splitted(4).toDouble)))) 58 | }).toArray.groupBy(_._1).map(now => MTreeTraj(now._1, now._2.map(_._2))).iterator 59 | }) 60 | 61 | val pivots = trajs.takeSample(withReplacement = false, trajs.partitions.length, System.currentTimeMillis()).map(_.data) 62 | val pivot_mt = new MTree[MTreeTraj](2, new TrajDistanceFunction(), null) 63 | for (i <- pivots.indices) { 64 | pivot_mt.add(MTreeTraj(i, pivots(i))) 65 | } 66 | val bc_pivots = sc.broadcast(pivots) 67 | val bc_pivots_mt = sc.broadcast(pivot_mt) 68 | val traj_with_pivot = trajs.mapPartitions(iter => { 69 | iter.map(x => { 70 | val tmp = bc_pivots_mt.value.getNearest(x) 71 | (tmp.iterator().next().data.id, x) 72 | }) 73 | }) 74 | val parted_by_pivot = IDPartition(traj_with_pivot, pivots.length) 75 | val indexed = parted_by_pivot.mapPartitionsWithIndex((id, iter) => { 76 | val data = iter.map(_._2.asInstanceOf[MTreeTraj]).toArray 77 | val pivot = bc_pivots.value(id) 78 | val cover_radius = data.map(x => Trajectory.hausdorffDistance(x.data, pivot)).max 79 | val m_tree = new MTree[MTreeTraj](2, new TrajDistanceFunction(), null) 80 | data.foreach(x => m_tree.add(x)) 81 | Array((pivot, cover_radius, data.length, m_tree)).iterator 82 | }).persist(StorageLevel.MEMORY_AND_DISK_SER) 83 | 84 | val stats = indexed.map(x => (x._1, x._2, x._3)).collect() 85 | .zipWithIndex.map(x => (x._1._1, x._1._2, x._1._3, x._2)) 86 | 87 | val end1 = System.currentTimeMillis() 88 | println("Time to build index: " + ((end1 - start1) / 1000.0)) 89 | 90 | bc_pivots.destroy() 91 | bc_pivots_mt.destroy() 92 | 93 | var tot_time = 0.0 94 | queries.foreach(query => { 95 | val start2 = System.currentTimeMillis() 96 | println("----------------------------------------------") 97 | val sorted_pivots = stats.map(x => (Trajectory.hausdorffDistance(x._1, query), x._2, x._3, x._4)).sortBy(_._1) 98 | var i = 0 99 | var sum = 0 100 | while (sum < k) { 101 | sum += sorted_pivots(i)._3 102 | i += 1 103 | } 104 | 105 | val prune_set = sorted_pivots.slice(0, i).map(_._4).toSet 106 | val bc_query = sc.broadcast(query) 107 | val bc_k = sc.broadcast(k) 108 | // val first_filter = new PartitionPruningRDD(indexed, prune_set.contains) 109 | // .flatMap(i_part => { 110 | // i_part._4.knn(VPTraj(0, bc_query.value), bc_k.value)._1.map(x => (x._2, x._1.id)) 111 | // }).takeOrdered(k)(new ResultOrdering) 112 | 113 | val first_filter = new PartitionPruningRDD(indexed, prune_set.contains) 114 | .aggregate((Array[(Double, Int)](), 0))((now, part) => { 115 | val knn_res = part._4.getNearestByLimit(MTreeTraj(0, bc_query.value), bc_k.value) 116 | val knn_iter = knn_res.iterator() 117 | val res = mutable.ListBuffer[(Double, Int)]() 118 | while (knn_iter.hasNext) { 119 | val tmp = knn_iter.next() 120 | res += ((tmp.distance, tmp.data.id)) 121 | } 122 | ((res ++ now._1).sortBy(_._1).take(bc_k.value).toArray, knn_res.cnt + now._2) 123 | }, (left, right) => { 124 | ((left._1 ++ right._1).sortBy(_._1).take(bc_k.value), left._2 + right._2) 125 | }) 126 | 127 | val tick1 = System.currentTimeMillis() 128 | println("Time for first filter: " + ((tick1 - start2) / 1000.0)) 129 | 130 | val pruning_bound = first_filter._1.last._1 131 | val global_prune_set = 132 | sorted_pivots.filter(x => x._1 - x._2 <= pruning_bound).map(_._4).toSet - prune_set 133 | val bc_pruning_bound = sc.broadcast(pruning_bound) 134 | 135 | val second_filter = new PartitionPruningRDD(indexed, global_prune_set.contains) 136 | .aggregate((Array[(Double, Int)](), 0))((now, part) => { 137 | val knn_res = part._4.getNearestByLimit(MTreeTraj(0, bc_query.value), bc_k.value) 138 | val knn_iter = knn_res.iterator() 139 | val res = mutable.ListBuffer[(Double, Int)]() 140 | while (knn_iter.hasNext) { 141 | val tmp = knn_iter.next() 142 | res += ((tmp.distance, tmp.data.id)) 143 | } 144 | ((res ++ now._1).sortBy(_._1).take(bc_k.value).toArray, knn_res.cnt + now._2) 145 | }, (left, right) => { 146 | ((left._1 ++ right._1).sortBy(_._1).take(bc_k.value), left._2 + right._2) 147 | }) 148 | 149 | val final_res = (first_filter._1 ++ second_filter._1).sortBy(_._1).take(k) 150 | 151 | val end2 = System.currentTimeMillis() 152 | println("Time for second filter and final merge: " + ((end2 - tick1) / 1000.0)) 153 | println("# of trajs checked distance:" + (first_filter._2 + second_filter._2 + pivots.length)) 154 | println("Total Latency: " + ((end2 - start2) / 1000.0)) 155 | final_res.foreach(println) 156 | tot_time += (end2 - start2) / 1000.0 157 | println("----------------------------------------------") 158 | bc_k.destroy() 159 | bc_query.destroy() 160 | bc_pruning_bound.destroy() 161 | }) 162 | 163 | println("Average Latency: " + (tot_time / 100.0)) 164 | 165 | sc.stop() 166 | } 167 | } 168 | -------------------------------------------------------------------------------- /src/main/scala/edu/utah/cs/trajectory/RRSolution.scala: -------------------------------------------------------------------------------- 1 | package edu.utah.cs.trajectory 2 | 3 | import edu.utah.cs.index.RTree 4 | import edu.utah.cs.index_rr.RTreeWithRR 5 | import edu.utah.cs.partitioner.STRSegPartition 6 | import edu.utah.cs.spatial.{LineSegment, MBR, Point} 7 | import org.apache.spark.rdd.PartitionPruningRDD 8 | import org.apache.spark.storage.StorageLevel 9 | import org.apache.spark.{SparkConf, SparkContext} 10 | import org.roaringbitmap.RoaringBitmap 11 | 12 | import scala.collection.mutable 13 | import scala.io.Source 14 | 15 | /** 16 | * Created by dongx on 12/19/2016. 17 | */ 18 | object RRSolution { 19 | final val max_entries_per_node = 25 20 | final val k = 10 21 | final val c = 5 22 | //final val max_spatial_span = 2.550598 23 | //final val max_spatial_span = 0.46757 24 | final val max_spatial_span = 0.5080 25 | 26 | def getMBR(x: Array[LineSegment]): MBR = { 27 | val pts = x.flatMap(p => Array(p.start, p.end)) 28 | var maxx = Double.MinValue 29 | var maxy = Double.MinValue 30 | var minx = Double.MaxValue 31 | var miny = Double.MaxValue 32 | pts.foreach(x => { 33 | maxx = Math.max(x.coord(0), maxx) 34 | maxy = Math.max(x.coord(1), maxy) 35 | minx = Math.min(x.coord(0), minx) 36 | miny = Math.min(x.coord(1), miny) 37 | }) 38 | MBR(Point(Array(minx, miny)), Point(Array(maxx, maxy))) 39 | } 40 | 41 | private class ResultOrdering extends Ordering[(Double, Int)] { 42 | override def compare(x: (Double, Int), y: (Double, Int)): Int = x._1.compare(y._1) 43 | } 44 | 45 | def main(args: Array[String]): Unit = { 46 | val sparkConf = new SparkConf().setAppName("RRSolution") 47 | .set("spark.locality.wait", "0").set("spark.driver.maxResultSize", "4g")//.setMaster("local[*]") 48 | val sc = new SparkContext(sparkConf) 49 | 50 | if (args.length != 2) { 51 | println("usage: RRSolution ") 52 | System.exit(1) 53 | } 54 | 55 | Thread.sleep(6000) 56 | 57 | val query_traj_filename = args(0) 58 | val traj_data_filename = args(1) 59 | 60 | val query_traj_file = Source.fromFile(query_traj_filename) 61 | val queries = query_traj_file.getLines().map { line => 62 | val splitted = line.split('\t') 63 | (splitted(0).toInt, LineSegment(Point(Array(splitted(1).toDouble, splitted(2).toDouble)), 64 | Point(Array(splitted(3).toDouble, splitted(4).toDouble)))) 65 | }.toArray.groupBy(_._1).map(x => x._2.map(_._2)) 66 | 67 | val start1 = System.currentTimeMillis() 68 | 69 | val dataRDD = sc.textFile(traj_data_filename) 70 | .map(x => x.split('\t')) 71 | .map(x => (LineSegment(Point(Array(x(1).toDouble, x(2).toDouble)), 72 | Point(Array(x(3).toDouble, x(4).toDouble))), 73 | TrajMeta(x(0).toInt, x(5).toInt))) 74 | 75 | val (partitioned_rdd, _) = STRSegPartition(dataRDD, dataRDD.partitions.length, 0.01, max_entries_per_node) 76 | 77 | val indexed_seg_rdd = partitioned_rdd.mapPartitions(iter => { 78 | val data = iter.toArray 79 | var index: RTreeWithRR = null 80 | if (data.length > 0) { 81 | index = RTreeWithRR(data.zipWithIndex.map(x => (x._1._1, x._2, x._1._2.traj_id)), max_entries_per_node) 82 | } 83 | Array((data, index)).iterator 84 | }).persist(StorageLevel.MEMORY_AND_DISK_SER) 85 | 86 | val stat = indexed_seg_rdd.mapPartitions(iter => iter.map(x => (x._2.root.m_mbr, x._1.length, x._2.root.rr))).collect() 87 | 88 | val global_rtree = RTree.applyMBR(stat.zipWithIndex.map(x => (x._1._1, x._2, x._1._2.toInt)), max_entries_per_node) 89 | 90 | val end1 = System.currentTimeMillis() 91 | println("------------------------------------------------------------") 92 | println("Time to build indexes: " + (end1 - start1) / 1000.0) 93 | println("------------------------------------------------------------") 94 | 95 | var tot_time = 0.0 96 | queries.foreach(query_traj => { 97 | val start2 = System.currentTimeMillis() 98 | val bc_query = sc.broadcast(query_traj) 99 | val global_intersect = global_rtree.circleRange(query_traj, 0.0) 100 | val global_intersect_mbrs = global_intersect.map(_._1.asInstanceOf[MBR]) 101 | 102 | val sample_base = global_intersect.aggregate(new RoaringBitmap())((a, b) => RoaringBitmap.or(a, stat(b._2)._3), 103 | (a, b) => RoaringBitmap.or(a, b)) 104 | 105 | val cards = sample_base.getCardinality 106 | println("Cardinality of intersected Partitions: " + cards) 107 | val n_samples = c * k 108 | println("Going to sample: " + n_samples) 109 | assert(cards >= k) 110 | 111 | val set = mutable.HashSet[Int]() 112 | val rnd = scala.util.Random 113 | 114 | for (i <- 0 until n_samples) { 115 | var x = rnd.nextInt(cards) 116 | while (set.contains(x)) x = rnd.nextInt(cards) 117 | set += x 118 | } 119 | 120 | var i = 0 121 | val samples = mutable.HashSet[Int]() 122 | val iter = sample_base.iterator() 123 | while (iter.hasNext) { 124 | val x = iter.next() 125 | if (set.contains(i)) samples += x 126 | i = i + 1 127 | } 128 | 129 | val bc_samples = sc.broadcast(samples.toSet) 130 | 131 | val pruning_bound_filter = global_rtree.circleRange(global_intersect_mbrs, max_spatial_span).map(_._2).toSet 132 | val pruning_bound = new PartitionPruningRDD(indexed_seg_rdd, pruning_bound_filter.contains) 133 | .flatMap(x => x._1.filter(now => samples.contains(now._2.traj_id)).map(x => x._2.traj_id -> x._1)) 134 | .groupByKey(Math.min(samples.size, sc.defaultParallelism)) 135 | .map(x => Trajectory.hausdorffDistance(bc_query.value, x._2.toArray)).takeOrdered(k).last 136 | 137 | val end2 = System.currentTimeMillis() 138 | 139 | println("------------------------------------------------------------") 140 | println("Time to calculate pruning bound: " + (end2 - start2) / 1000.0) 141 | println("The pruning bound is: " + pruning_bound) 142 | 143 | val start3 = System.currentTimeMillis() 144 | val global_prune = global_rtree.circleRange(query_traj, pruning_bound) 145 | val global_prune_set = global_prune.map(_._2).toSet 146 | 147 | val pruned_rdd = new PartitionPruningRDD(indexed_seg_rdd, global_prune_set.contains) 148 | val pruned_traj_id1 = stat.zipWithIndex.filter(x => !global_prune_set.contains(x._2)).map(_._1._3) 149 | .aggregate(new RoaringBitmap())((a, b) => RoaringBitmap.or(a, b), (a, b) => RoaringBitmap.or(a, b)) 150 | 151 | val bc_pruning_bound = sc.broadcast(pruning_bound) 152 | val saved_traj_local = pruned_rdd.map(part => { 153 | RoaringBitmap.andNot(part._2.root.rr, part._2.antiCircleRangeBF(bc_query.value, bc_pruning_bound.value)) 154 | }).reduce((a, b) => RoaringBitmap.or(a, b)) 155 | 156 | val saved_traj = RoaringBitmap.andNot(saved_traj_local, pruned_traj_id1) 157 | 158 | val end3 = System.currentTimeMillis() 159 | 160 | println("Time to calculate all saved traj_ids: " + (end3 - start3) / 1000.0) 161 | 162 | val start4 = System.currentTimeMillis() 163 | val bc_saved_traj = sc.broadcast(saved_traj_local.toArray) 164 | 165 | val final_filter_set = global_rtree.circleRange(global_prune.map(_._1.asInstanceOf[MBR]), max_spatial_span) 166 | .map(_._2).toSet 167 | 168 | val final_filtered = new PartitionPruningRDD(indexed_seg_rdd, final_filter_set.contains) 169 | .flatMap(x => { 170 | x._1.filter(now => bc_saved_traj.value.contains(now._2.traj_id)).map(x => x._2.traj_id -> x._1) 171 | }) 172 | 173 | val res = final_filtered.groupByKey(sc.defaultParallelism) 174 | .map(x => (Trajectory.hausdorffDistance(bc_query.value, x._2.toArray), x._1)) 175 | .takeOrdered(k)(new ResultOrdering) 176 | 177 | val end4 = System.currentTimeMillis() 178 | tot_time += (end4 - start2) / 1000.0 179 | println("Time to finish the final filter: " + (end4 - start4) / 1000.0) 180 | println("# of distance calculated: " + (c * k + saved_traj.getCardinality)) 181 | println("Total Latency: " + ((end4 - start2) / 1000.0)) 182 | println("The results show as below:") 183 | res.foreach(println) 184 | println("------------------------------------------------------------") 185 | tot_time += (end4 - start2) / 1000.0 186 | }) 187 | 188 | printf("Average Latency: " + (tot_time / 100.0)) 189 | 190 | sc.stop() 191 | } 192 | } 193 | -------------------------------------------------------------------------------- /src/main/scala/edu/utah/cs/trajectory/Relabel.scala: -------------------------------------------------------------------------------- 1 | package edu.utah.cs.trajectory 2 | 3 | import edu.utah.cs.spatial.{LineSegment, Point} 4 | import org.apache.spark.{SparkConf, SparkContext} 5 | 6 | /** 7 | * Created by dongx on 10/5/16. 8 | */ 9 | object Relabel { 10 | case class TrajMeta(traj_id: String, seg_id: Int) 11 | 12 | def main(args: Array[String]): Unit = { 13 | val sparkConf = new SparkConf().setAppName("Relabel") 14 | val sc = new SparkContext(sparkConf) 15 | 16 | if (args.length != 2) { 17 | println("usage: Relabel ") 18 | System.exit(1) 19 | } 20 | 21 | Thread.sleep(3000) 22 | 23 | val input_file_name = args(0) 24 | val output_file_name = args(1) 25 | 26 | sc.textFile(input_file_name, 900).map(x => { 27 | val splitted = x.split('\t') 28 | (splitted(0), 29 | LineSegment(Point(Array(splitted(1).toDouble, splitted(2).toDouble)), 30 | Point(Array(splitted(3).toDouble, splitted(4).toDouble))), splitted(5)) 31 | }).groupBy(_._1) 32 | .zipWithIndex() 33 | .flatMap(x => x._1._2.map(now => x._2.toString + "\t" + now._2.toTSV + "\t" + now._3)) 34 | .saveAsTextFile(output_file_name) 35 | 36 | sc.stop() 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/main/scala/edu/utah/cs/trajectory/SpatialSpanClustering.scala: -------------------------------------------------------------------------------- 1 | package edu.utah.cs.trajectory 2 | 3 | import java.io.{BufferedWriter, File, FileWriter} 4 | 5 | import com.vividsolutions.jts.geom.{GeometryCollection, GeometryFactory} 6 | import edu.utah.cs.partitioner.STRMBRPartition 7 | import edu.utah.cs.spatial.{LineSegment, MBR, Point, Polygon} 8 | import edu.utah.cs.util._ 9 | import org.apache.spark.{SparkConf, SparkContext} 10 | import org.geotools.geojson.geom.GeometryJSON 11 | 12 | /** 13 | * Created by Dong Xie on 10/24/2016. 14 | */ 15 | object SpatialSpanClustering { 16 | final val max_entries_per_node = 25 17 | 18 | def getMBR(x: (Int, Array[(Int, LineSegment)])): (MBR, Int) = { 19 | val pts = x._2.flatMap(p => Array(p._2.start, p._2.end)) 20 | var maxx = Double.MinValue 21 | var maxy = Double.MinValue 22 | var minx = Double.MaxValue 23 | var miny = Double.MaxValue 24 | pts.foreach(x => { 25 | maxx = Math.max(x.coord(0), maxx) 26 | maxy = Math.max(x.coord(1), maxy) 27 | minx = Math.min(x.coord(0), minx) 28 | miny = Math.min(x.coord(1), miny) 29 | }) 30 | (MBR(Point(Array(minx, miny)), Point(Array(maxx, maxy))), x._1) 31 | } 32 | 33 | def main(args: Array[String]): Unit = { 34 | val sc = new SparkContext(new SparkConf().setAppName("SpatialSpanClustering")) 35 | 36 | if (args.length < 2) { 37 | println("usage: SpatialSpanClustering ") 38 | System.exit(1) 39 | } 40 | 41 | val input_file_path = args(0) 42 | val output_file_path = args(1) 43 | 44 | val bf_meta = BloomFilterMeta(10000, 1) 45 | val bc_bf_meta = sc.broadcast(bf_meta) 46 | BloomFilter.meta = bf_meta 47 | 48 | val mbrs = sc.textFile(input_file_path).mapPartitions(iter => { 49 | iter.map(x => { 50 | val splitted = x.split("\t") 51 | (splitted(0).toInt, 52 | LineSegment(Point(Array(splitted(2).toDouble, splitted(1).toDouble)), 53 | Point(Array(splitted(4).toDouble, splitted(3).toDouble)))) 54 | }).toArray.groupBy(_._1).map(now => getMBR(now)).iterator 55 | }) 56 | 57 | val num_partitions = mbrs.getNumPartitions * 4 58 | 59 | val partitioned_rdd = STRMBRPartition(mbrs, num_partitions, 0.01, max_entries_per_node) 60 | 61 | val part_bounds = partitioned_rdd.mapPartitions(iter => { 62 | if (iter.nonEmpty) { 63 | var maxx = Double.MinValue 64 | var maxy = Double.MinValue 65 | var minx = Double.MaxValue 66 | var miny = Double.MaxValue 67 | iter.map(_._1).foreach(x => { 68 | maxx = Math.max(x.high.coord(0), maxx) 69 | maxy = Math.max(x.high.coord(1), maxy) 70 | minx = Math.min(x.low.coord(0), minx) 71 | miny = Math.min(x.low.coord(1), miny) 72 | }) 73 | Array(MBR(Point(Array(minx, miny)), Point(Array(maxx, maxy)))).iterator 74 | } else Array().iterator 75 | }).collect() 76 | 77 | val file = new File(output_file_path) 78 | val bw = new BufferedWriter(new FileWriter(file)) 79 | 80 | val collection = new GeometryCollection(part_bounds.map(x => 81 | Polygon(Array(x.low, Point(Array(x.low.coord(0), x.high.coord(1))), 82 | x.high, Point(Array(x.high.coord(0), x.low.coord(1))), x.low)).content), new GeometryFactory) 83 | 84 | new GeometryJSON().writeGeometryCollection(collection, bw) 85 | 86 | bw.close() 87 | 88 | sc.stop() 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /src/main/scala/edu/utah/cs/trajectory/SpatialSpanFiltering.scala: -------------------------------------------------------------------------------- 1 | package edu.utah.cs.trajectory 2 | 3 | import edu.utah.cs.spatial.{Point, LineSegment} 4 | import org.apache.spark.{SparkContext, SparkConf} 5 | 6 | /** 7 | * Created by Dong Xie on 10/23/2016. 8 | */ 9 | object SpatialSpanFiltering { 10 | def getStats(x: (Int, Array[(Int, LineSegment, Int)])) = { 11 | val num_segs = x._2.length 12 | val tot_dis = x._2.map(p => p._2.length).sum 13 | val pts = x._2.flatMap(p => Array(p._2.start, p._2.end)) 14 | var maxx = Double.MinValue 15 | var maxy = Double.MinValue 16 | var minx = Double.MaxValue 17 | var miny = Double.MaxValue 18 | pts.foreach(x => { 19 | maxx = Math.max(x.coord(0), maxx) 20 | maxy = Math.max(x.coord(1), maxy) 21 | minx = Math.min(x.coord(0), minx) 22 | miny = Math.min(x.coord(1), miny) 23 | }) 24 | (x._1, num_segs, tot_dis, Point(Array(minx, miny)).minDist(Point(Array(maxx, maxy)))) 25 | } 26 | 27 | def main(args: Array[String]): Unit = { 28 | val sparkConf = new SparkConf().setAppName("SpatialSpanFiltering")//.setMaster("local[*]") 29 | val sc = new SparkContext(sparkConf) 30 | 31 | Thread.sleep(3000) 32 | 33 | if (args.length < 2) { 34 | println("usage: SpatialSpanFiltering ") 35 | System.exit(1) 36 | } 37 | 38 | val input_file_path = args(0) 39 | val output_file_path = args(1) 40 | 41 | val stats = sc.textFile(input_file_path).mapPartitions(iter => { 42 | iter.map(x => { 43 | val splitted = x.split("\t") 44 | (splitted(0).toInt, 45 | LineSegment(Point(Array(splitted(1).toDouble, splitted(2).toDouble)), 46 | Point(Array(splitted(3).toDouble, splitted(4).toDouble))), splitted(5).toInt) 47 | }).toArray.groupBy(_._1).filter(now => { 48 | val stat = getStats(now) 49 | stat._4 > 0.001 && stat._2 > 20 && stat._4 < 0.5080 50 | }).iterator 51 | }).repartition(800) 52 | .flatMap(x => x._2.map(now => now._1 + "\t" + now._2.toTSV + "\t" + now._3)) 53 | .saveAsTextFile(output_file_path) 54 | 55 | sc.stop() 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/main/scala/edu/utah/cs/trajectory/SpatialSpanStat.scala: -------------------------------------------------------------------------------- 1 | package edu.utah.cs.trajectory 2 | 3 | import java.io._ 4 | 5 | import edu.utah.cs.spatial.{LineSegment, Point} 6 | import org.apache.spark.{SparkConf, SparkContext} 7 | 8 | /** 9 | * Created by dongx on 10/5/16. 10 | */ 11 | object SpatialSpanStat { 12 | def getStats(x: (Int, Array[(Int, LineSegment)])) = { 13 | val num_segs = x._2.length 14 | val tot_dis = x._2.map(p => p._2.length).sum 15 | val pts = x._2.flatMap(p => Array(p._2.start, p._2.end)) 16 | var maxx = Double.MinValue 17 | var maxy = Double.MinValue 18 | var minx = Double.MaxValue 19 | var miny = Double.MaxValue 20 | pts.foreach(x => { 21 | maxx = Math.max(x.coord(0), maxx) 22 | maxy = Math.max(x.coord(1), maxy) 23 | minx = Math.min(x.coord(0), minx) 24 | miny = Math.min(x.coord(1), miny) 25 | }) 26 | (x._1, num_segs, tot_dis, Point(Array(minx, miny)).minDist(Point(Array(maxx, maxy)))) 27 | } 28 | 29 | def main(args: Array[String]): Unit = { 30 | val sparkConf = new SparkConf().setAppName("SpatialSpanStat")//.setMaster("local[*]") 31 | val sc = new SparkContext(sparkConf) 32 | 33 | Thread.sleep(3000) 34 | 35 | if (args.length < 2) { 36 | println("usage: SpatialSpanStat ") 37 | System.exit(1) 38 | } 39 | 40 | val input_file_path = args(0) 41 | val output_file_path = args(1) 42 | 43 | val stats = sc.textFile(input_file_path).mapPartitions(iter => { 44 | iter.map(x => { 45 | val splitted = x.split("\t") 46 | (splitted(0).toInt, 47 | LineSegment(Point(Array(splitted(1).toDouble, splitted(2).toDouble)), 48 | Point(Array(splitted(3).toDouble, splitted(4).toDouble)))) 49 | }).toArray.groupBy(_._1).map(now => getStats(now)).iterator 50 | }).collect().sortBy(_._1) 51 | 52 | val file = new File(output_file_path) 53 | val bw = new BufferedWriter(new FileWriter(file)) 54 | 55 | stats.foreach(x => bw.write(x._1 + "\t" + x._2 + "\t" + "%.6f".format(x._3) 56 | + "\t" + "%.6f".format(x._4) + "\n")) 57 | 58 | bw.close() 59 | 60 | sc.stop() 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /src/main/scala/edu/utah/cs/trajectory/TrajIndexing.scala: -------------------------------------------------------------------------------- 1 | package edu.utah.cs.trajectory 2 | 3 | import edu.utah.cs.index.RTree 4 | import edu.utah.cs.partitioner.STRTrajPartition 5 | import edu.utah.cs.spatial.{LineSegment, MBR, Point} 6 | import org.apache.spark.rdd.PartitionPruningRDD 7 | import org.apache.spark.storage.StorageLevel 8 | import org.apache.spark.{SparkConf, SparkContext} 9 | 10 | import scala.io.Source 11 | 12 | /** 13 | * Created by dongx on 1/16/2017. 14 | */ 15 | object TrajIndexing { 16 | final val max_entries_per_node = 25 17 | //final val k_values = Array(1, 10, 30, 50, 70, 100) 18 | final val k_values = Array(10) 19 | //final val k = 10 20 | final val N = 1401138 21 | final val c = 5 22 | //final val c_values = Array(1, 3, 5, 7, 10) 23 | //final val c_values = Array(5) 24 | 25 | private class ResultOrdering extends Ordering[(Double, Int)] { 26 | override def compare(x: (Double, Int), y: (Double, Int)): Int = x._1.compare(y._1) 27 | } 28 | 29 | def getMBR(x: (Int, Array[(Int, LineSegment)])): MBR = { 30 | val pts = x._2.flatMap(p => Array(p._2.start, p._2.end)) 31 | var maxx = Double.MinValue 32 | var maxy = Double.MinValue 33 | var minx = Double.MaxValue 34 | var miny = Double.MaxValue 35 | pts.foreach(x => { 36 | maxx = Math.max(x.coord(0), maxx) 37 | maxy = Math.max(x.coord(1), maxy) 38 | minx = Math.min(x.coord(0), minx) 39 | miny = Math.min(x.coord(1), miny) 40 | }) 41 | MBR(Point(Array(minx, miny)), Point(Array(maxx, maxy))) 42 | } 43 | 44 | def main(args: Array[String]) : Unit = { 45 | val sparkConf = new SparkConf().setAppName("TrajIndexing").set("spark.locality.wait", "0") 46 | .set("spark.driver.maxResultSize", "4g") 47 | val sc = new SparkContext(sparkConf) 48 | 49 | if (args.length != 2) { 50 | println("usage: TrajIndexing ") 51 | System.exit(1) 52 | } 53 | 54 | val query_traj_filename = args(0) 55 | val traj_data_filename = args(1) 56 | 57 | val query_traj_file = Source.fromFile(query_traj_filename) 58 | val queries = query_traj_file.getLines().map { line => 59 | val splitted = line.split('\t') 60 | (splitted(0).toInt, LineSegment(Point(Array(splitted(1).toDouble, splitted(2).toDouble)), 61 | Point(Array(splitted(3).toDouble, splitted(4).toDouble)))) 62 | }.toArray.groupBy(_._1).map(x => x._2.map(_._2)) 63 | 64 | Thread.sleep(6000) 65 | 66 | val start1 = System.currentTimeMillis() 67 | val trajs = sc.textFile(traj_data_filename).mapPartitions(iter => { 68 | iter.map(x => { 69 | val splitted = x.split("\t") 70 | (splitted(0).toInt, 71 | LineSegment(Point(Array(splitted(1).toDouble, splitted(2).toDouble)), 72 | Point(Array(splitted(3).toDouble, splitted(4).toDouble)))) 73 | }).toArray.groupBy(_._1).map(now => (getMBR(now), (now._1, now._2.sortBy(_._1).map(_._2)))).iterator 74 | }) 75 | 76 | val partitioned_traj = STRTrajPartition(trajs, trajs.partitions.length, 0.01, max_entries_per_node) 77 | //val partitioned_traj = STRTrajPartition(trajs, trajs.partitions.length, 0.01, max_entries_per_node) 78 | 79 | val indexed_traj = partitioned_traj.mapPartitions(iter => { 80 | val data = iter.toArray 81 | var index: RTree = null 82 | if (data.length > 0) { 83 | index = RTree(data.zipWithIndex.map(x => (x._1._1, x._2, x._1._2._1)), 25) 84 | } 85 | Array((data.map(_._2), index)).iterator 86 | }).persist(StorageLevel.MEMORY_AND_DISK_SER) 87 | 88 | val stat = indexed_traj.mapPartitions(iter => iter.map(x => (x._2.root.m_mbr, x._1.length))).collect() 89 | val global_rtree = RTree.applyMBR(stat.zipWithIndex.map(x => (x._1._1, x._2, x._1._2)), max_entries_per_node) 90 | 91 | val end1 = System.currentTimeMillis() 92 | println("------------------------------------------------------------") 93 | println("Time to build indexes: " + (end1 - start1) / 1000.0) 94 | println("------------------------------------------------------------") 95 | 96 | k_values.foreach(k => { 97 | var tot_time = 0.0 98 | queries.foreach(query_traj => { 99 | val start2 = System.currentTimeMillis() 100 | println("------------------------------------------------------------") 101 | val bc_query = sc.broadcast(query_traj) 102 | val global_intersect = global_rtree.circleRange(query_traj, 0.0).map(_._2).toSet 103 | //val c = global_intersect.size 104 | println("Going to Sample:" + (c * k)) 105 | val sample_set = new PartitionPruningRDD(indexed_traj, global_intersect.contains).flatMap(_._1) 106 | .takeSample(withReplacement = false, c * k, System.currentTimeMillis()) 107 | 108 | val pruning_bound = sc.parallelize(sample_set, Math.min(c * k, sc.defaultParallelism)) 109 | .map(x => Trajectory.discreteFrechetDistance(x._2, bc_query.value)).collect().sorted.take(k).last 110 | //.map(x => Trajectory.hausdorffDistance(x._2, bc_query.value)).collect().sorted.take(k).last 111 | val end2 = System.currentTimeMillis() 112 | 113 | println("Time to calculate pruning bound: " + (end2 - start2) / 1000.0) 114 | println("The pruning bound is: " + pruning_bound) 115 | 116 | val start3 = System.currentTimeMillis() 117 | val bc_pruning_bound = sc.broadcast(pruning_bound) 118 | val global_prune_set = global_rtree.circleRange(query_traj, pruning_bound).map(_._2).toSet 119 | 120 | val pruned_rdd = new PartitionPruningRDD(indexed_traj, global_prune_set.contains) 121 | val filtered = pruned_rdd.flatMap(part => part._2.circleRange(bc_query.value, bc_pruning_bound.value) 122 | .map(x => part._1(x._2))) 123 | val res = filtered.repartition(Math.max(sc.defaultParallelism, filtered.partitions.length)) 124 | .map(x => (Trajectory.discreteFrechetDistance(bc_query.value, x._2), x._1)) 125 | //.map(x => (Trajectory.hausdorffDistance(bc_query.value, x._2), x._1)) 126 | .takeOrdered(k)(new ResultOrdering) 127 | 128 | val end3 = System.currentTimeMillis() 129 | println("# distance calculated: " + (filtered.count() + c * k)) 130 | println("Time to calculate Finalize Result: " + (end3 - start3) / 1000.0) 131 | println("Total Latency: " + ((end3 - start2) / 1000.0)) 132 | println("The results show as below:") 133 | res.foreach(println) 134 | println("------------------------------------------------------------") 135 | tot_time += (end3 - start2) / 1000.0 136 | bc_query.destroy() 137 | bc_pruning_bound.destroy() 138 | }) 139 | 140 | println("Average Latency for k = " + k + " is : " + (tot_time / 100.0)) 141 | println("===================================================") 142 | }) 143 | 144 | sc.stop() 145 | } 146 | } 147 | -------------------------------------------------------------------------------- /src/main/scala/edu/utah/cs/trajectory/TrajObjects.scala: -------------------------------------------------------------------------------- 1 | package edu.utah.cs.trajectory 2 | 3 | import edu.utah.cs.spatial.{LineSegment, Point} 4 | 5 | case class TrajMeta(traj_id: Int, seg_id: Int) 6 | 7 | case class Trajectory(id: Int, segments: Array[Point]) { 8 | def distanceFrom(otherTraj: Trajectory): Double = { 9 | Math.min(Trajectory.hDistance(this, otherTraj), Trajectory.hDistance(otherTraj, this)) 10 | } 11 | } 12 | 13 | object Trajectory { 14 | def RDPCompress(traj: Array[Point], epsilon: Double): Array[Point] = { 15 | val baseLineSeg = LineSegment(traj.head, traj.last) 16 | val dmax = traj.map(x => x.minDist(baseLineSeg)).zipWithIndex.maxBy(_._1) 17 | if (dmax._1 > epsilon) { 18 | RDPCompress(traj.slice(0, dmax._2 + 1), epsilon) ++ RDPCompress(traj.slice(dmax._2, traj.length), epsilon) 19 | } else { 20 | Array(traj.head, traj.last) 21 | } 22 | } 23 | 24 | def parseLine(line: String): Trajectory = { 25 | val splitted = line.split(" ") 26 | Trajectory(splitted(0).toInt, splitted.iterator.drop(1).map(_.toDouble).grouped(2).map(seq => Point(Array(seq(0), seq(1)))).toArray) 27 | } 28 | 29 | def hDistance(traj1: Trajectory, traj2: Trajectory): Double = { 30 | traj1.segments.iterator.take(traj1.segments.length - 1).zip(traj1.segments.iterator.drop(1)).map { 31 | case (q0, q1) => 32 | val qSegment = LineSegment(q0, q1) 33 | traj2.segments.iterator.take(traj2.segments.length - 1).zip(traj2.segments.iterator.drop(1)).map { 34 | case (p0, p1) => qSegment.minDist(LineSegment(p0, p1)) 35 | }.min 36 | }.max 37 | } 38 | 39 | def distanceFrom(seg_iter: Iterable[Tuple2[Int, LineSegment]], 40 | traj2: Array[LineSegment]): Double = { 41 | seg_iter.map { case (_, seg1) => 42 | traj2.iterator.map { seg2 => 43 | seg1.minDist(seg2) 44 | }.min 45 | }.max 46 | } 47 | 48 | def hausdorffDistance(x: Array[LineSegment], y: Array[LineSegment]): Double = { 49 | Math.max(x.map(seg_1 => y.map(seg_2 => seg_1.matchDist(seg_2)).min).max, 50 | y.map(seg_1 => x.map(seg_2 => seg_1.matchDist(seg_2)).min).max) 51 | } 52 | 53 | def discreteFrechetDistance(x: Array[LineSegment], y: Array[LineSegment]): Double = { 54 | val n = x.length 55 | val m = y.length 56 | val ca: Array[Array[Double]] = Array.fill[Double](n, m)(-1.0) 57 | var i = 0 58 | while (i < n) { 59 | var j = 0 60 | while (j < m) { 61 | if (i == 0 && j == 0) ca(i)(j) = x(i).matchDist(y(j)) 62 | else if (i == 0) ca(i)(j) = Math.max(ca(i)(j - 1), x(i).matchDist(y(j))) 63 | else if (j == 0) ca(i)(j) = Math.max(ca(i - 1)(j), x(i).matchDist(y(j))) 64 | else ca(i)(j) = Math.max(Math.min(Math.min(ca(i - 1)(j), ca(i)(j - 1)), ca(i - 1)(j - 1)), x(i).matchDist(y(j))) 65 | j += 1 66 | } 67 | i += 1 68 | } 69 | ca.last.last 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /src/main/scala/edu/utah/cs/trajectory/TrajSampling.scala: -------------------------------------------------------------------------------- 1 | package edu.utah.cs.trajectory 2 | 3 | import java.io.{BufferedWriter, File, FileWriter} 4 | 5 | import edu.utah.cs.spatial.{LineSegment, Point} 6 | import org.apache.spark.{SparkConf, SparkContext} 7 | 8 | /** 9 | * Created by dongx on 1/17/17. 10 | */ 11 | object TrajSampling { 12 | def main(args: Array[String]): Unit = { 13 | val sparkConf = new SparkConf().setAppName("TrajSampling") 14 | val sc = new SparkContext(sparkConf) 15 | 16 | Thread.sleep(3000) 17 | 18 | if (args.length < 2) { 19 | println("usage: TrajSampling ") 20 | System.exit(1) 21 | } 22 | 23 | val input_file_path = args(0) 24 | val output_file_path = args(1) 25 | val cnt = args(2).toInt 26 | 27 | val sampled_trajs = sc.textFile(input_file_path).mapPartitions(iter => { 28 | iter.map(x => { 29 | val splitted = x.split("\t") 30 | (splitted(0).toInt, 31 | LineSegment(Point(Array(splitted(1).toDouble, splitted(2).toDouble)), 32 | Point(Array(splitted(3).toDouble, splitted(4).toDouble)))) 33 | }).toArray.groupBy(_._1).map(now => (now._1, now._2.sortBy(_._1).map(_._2))).iterator 34 | }).takeSample(withReplacement = false, cnt, System.currentTimeMillis()) 35 | 36 | val file = new File(output_file_path) 37 | val bw = new BufferedWriter(new FileWriter(file)) 38 | 39 | for (i <- sampled_trajs.indices) { 40 | val cur_traj = sampled_trajs(i)._2 41 | cur_traj.foreach(x => bw.write(i + "\t" + x.toTSV + "\n")) 42 | } 43 | 44 | bw.close() 45 | 46 | sc.stop() 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/main/scala/edu/utah/cs/trajectory/VPTreeST.scala: -------------------------------------------------------------------------------- 1 | package edu.utah.cs.trajectory 2 | 3 | import edu.utah.cs.index.VPTree 4 | import edu.utah.cs.spatial.{LineSegment, Point} 5 | import edu.utah.cs.util.MetricObject 6 | 7 | import scala.io.Source 8 | import scala.collection.mutable 9 | 10 | /** 11 | * Created by dongx on 2/1/17. 12 | */ 13 | object VPTreeST { 14 | //final val k_values = Array(10, 30, 50, 70, 100) 15 | final val k = 10 16 | 17 | private case class VPTraj(id: Int, data: Array[LineSegment]) extends MetricObject { 18 | override def distance(o: MetricObject): Double = { 19 | Trajectory.hausdorffDistance(data, o.asInstanceOf[VPTraj].data) 20 | } 21 | } 22 | 23 | def main(args: Array[String]) : Unit = { 24 | if (args.length != 2) { 25 | println("usage: BaseLine ") 26 | System.exit(1) 27 | } 28 | 29 | val query_traj_filename = args(0) 30 | val traj_data_filename = args(1) 31 | 32 | val query_traj_file = Source.fromFile(query_traj_filename) 33 | val query_traj = query_traj_file.getLines().map { line => 34 | val splitted = line.split('\t') 35 | LineSegment(Point(Array(splitted(1).toDouble, splitted(2).toDouble)), 36 | Point(Array(splitted(3).toDouble, splitted(4).toDouble))) 37 | }.toArray 38 | 39 | val traj_data_file = Source.fromFile(traj_data_filename) 40 | val cur_traj = mutable.ListBuffer[LineSegment]() 41 | val trajs = mutable.ListBuffer[VPTraj]() 42 | val ans = mutable.ListBuffer[(Double, Int)]() 43 | var last_traj_id = -1 44 | val new_iter = traj_data_file.getLines().map(cur => { 45 | val x = cur.split("\t") 46 | (LineSegment(Point(Array(x(1).toDouble, x(2).toDouble)), Point(Array(x(3).toDouble, x(4).toDouble))), x(0).toInt) 47 | }) 48 | var i = 0 49 | while (new_iter.hasNext) { 50 | val now = new_iter.next 51 | if (now._2 != last_traj_id) { 52 | if (cur_traj.nonEmpty) trajs += VPTraj(last_traj_id, cur_traj.toArray) 53 | last_traj_id = now._2 54 | i += 1 55 | //println("checking " + i + " trajectory....") 56 | cur_traj.clear() 57 | } 58 | cur_traj += now._1 59 | } 60 | if (cur_traj.nonEmpty) trajs += VPTraj(last_traj_id, cur_traj.toArray) 61 | //assert(ans.size == N) 62 | val tree = VPTree(trajs.toArray) 63 | tree.knn(VPTraj(-1, query_traj), k)._1.map(x => (x._1.id, x._2)).foreach(println) 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /src/main/scala/edu/utah/cs/trajectory/VPTreeSolution.scala: -------------------------------------------------------------------------------- 1 | package edu.utah.cs.trajectory 2 | 3 | import edu.utah.cs.index.VPTree 4 | import edu.utah.cs.partitioner.IDPartition 5 | import edu.utah.cs.spatial.{LineSegment, Point} 6 | import edu.utah.cs.util.MetricObject 7 | import org.apache.spark.rdd.PartitionPruningRDD 8 | import org.apache.spark.storage.StorageLevel 9 | import org.apache.spark.{SparkConf, SparkContext} 10 | 11 | import scala.io.Source 12 | import scala.util.Random 13 | 14 | /** 15 | * Created by dongx on 2/1/17. 16 | */ 17 | object VPTreeSolution { 18 | final val max_entries_per_node = 25 19 | //final val k_values = Array(10, 30, 50, 70, 100) 20 | final val k = 10 21 | final val c = 5 22 | 23 | private case class VPTraj(id: Int, data: Array[LineSegment]) extends MetricObject { 24 | override def distance(o: MetricObject): Double = { 25 | Trajectory.hausdorffDistance(data, o.asInstanceOf[VPTraj].data) 26 | } 27 | } 28 | 29 | private class ResultOrdering extends Ordering[(Double, Int)] { 30 | override def compare(x: (Double, Int), y: (Double, Int)): Int = x._1.compare(y._1) 31 | } 32 | 33 | private def shuffle[T](data: Array[T]) = { 34 | var i = 0 35 | val n = data.length 36 | while (i < n - 1) { 37 | val tmp = i + Random.nextInt(n - i) 38 | var t = data(i) 39 | data(i) = data(tmp) 40 | data(tmp) = data(i) 41 | i += 1 42 | } 43 | } 44 | 45 | def main(args: Array[String]) : Unit = { 46 | val sparkConf = new SparkConf().setAppName("VPTreeSolution") 47 | .set("spark.locality.wait", "0").set("spark.driver.maxResultSize", "4g") 48 | val sc = new SparkContext(sparkConf) 49 | 50 | if (args.length != 2) { 51 | println("usage: VPTreeSolution ") 52 | System.exit(1) 53 | } 54 | 55 | val query_traj_filename = args(0) 56 | val traj_data_filename = args(1) 57 | 58 | val query_traj_file = Source.fromFile(query_traj_filename) 59 | val queries = query_traj_file.getLines().map { line => 60 | val splitted = line.split('\t') 61 | (splitted(0).toInt, LineSegment(Point(Array(splitted(1).toDouble, splitted(2).toDouble)), 62 | Point(Array(splitted(3).toDouble, splitted(4).toDouble)))) 63 | }.toArray.groupBy(_._1).map(x => x._2.map(_._2)) 64 | 65 | Thread.sleep(6000) 66 | 67 | val start1 = System.currentTimeMillis() 68 | 69 | val trajs = sc.textFile(traj_data_filename).mapPartitions(iter => { 70 | iter.map(x => { 71 | val splitted = x.split("\t") 72 | (splitted(0).toInt, 73 | LineSegment(Point(Array(splitted(1).toDouble, splitted(2).toDouble)), 74 | Point(Array(splitted(3).toDouble, splitted(4).toDouble)))) 75 | }).toArray.groupBy(_._1).map(now => VPTraj(now._1, now._2.map(_._2))).iterator 76 | }) 77 | 78 | val pivots = trajs.takeSample(withReplacement = false, trajs.partitions.length, System.currentTimeMillis()).map(_.data) 79 | val bc_pivots = sc.broadcast(pivots) 80 | val bc_pivots_vptree = sc.broadcast(VPTree(pivots.zipWithIndex.map(x => VPTraj(x._2, x._1)))) 81 | val traj_with_pivot = trajs.mapPartitions(iter => { 82 | iter.map(x => { 83 | (bc_pivots_vptree.value.knn(x, 1)._1.head._1.id, x) 84 | }) 85 | }) 86 | val parted_by_pivot = IDPartition(traj_with_pivot, pivots.length) 87 | val indexed = parted_by_pivot.mapPartitionsWithIndex((id, iter) => { 88 | val data = iter.map(_._2.asInstanceOf[VPTraj]).toArray 89 | val pivot = bc_pivots.value(id) 90 | val cover_radius = data.map(x => Trajectory.hausdorffDistance(x.data, pivot)).max 91 | val vp_tree = VPTree(data) 92 | Array((pivot, cover_radius, data.length, vp_tree)).iterator 93 | }).persist(StorageLevel.MEMORY_AND_DISK_SER) 94 | 95 | val stats = indexed.map(x => (x._1, x._2, x._3)).collect() 96 | .zipWithIndex.map(x => (x._1._1, x._1._2, x._1._3, x._2)) 97 | 98 | val end1 = System.currentTimeMillis() 99 | println("Time to build index: " + ((end1 - start1) / 1000.0)) 100 | 101 | 102 | var tot_time = 0.0 103 | queries.foreach(query => { 104 | val start2 = System.currentTimeMillis() 105 | println("----------------------------------------------") 106 | val sorted_pivots = stats.map(x => (Trajectory.hausdorffDistance(x._1, query), x._2, x._3, x._4)).sortBy(_._1) 107 | var i = 0 108 | var sum = 0 109 | while (sum < k) { 110 | sum += sorted_pivots(i)._3 111 | i += 1 112 | } 113 | 114 | val prune_set = sorted_pivots.slice(0, i).map(_._4).toSet 115 | val bc_query = sc.broadcast(query) 116 | val bc_k = sc.broadcast(k) 117 | // val first_filter = new PartitionPruningRDD(indexed, prune_set.contains) 118 | // .flatMap(i_part => { 119 | // i_part._4.knn(VPTraj(0, bc_query.value), bc_k.value)._1.map(x => (x._2, x._1.id)) 120 | // }).takeOrdered(k)(new ResultOrdering) 121 | 122 | val first_filter = new PartitionPruningRDD(indexed, prune_set.contains) 123 | .aggregate((Array[(Double, Int)](), 0))((now, part) => { 124 | val knn_res = part._4.knn(VPTraj(0, bc_query.value), bc_k.value) 125 | ((knn_res._1.map(x => (x._2, x._1.id)) ++ now._1).sortBy(_._1).take(bc_k.value), now._2 + knn_res._2) 126 | }, (left, right) => { 127 | ((left._1 ++ right._1).sortBy(_._1).take(bc_k.value), left._2 + right._2) 128 | }) 129 | 130 | val tick1 = System.currentTimeMillis() 131 | println("Time for first filter: " + ((tick1 - start2) / 1000.0)) 132 | 133 | val pruning_bound = first_filter._1.last._1 134 | val global_prune_set = 135 | sorted_pivots.filter(x => x._1 - x._2 <= pruning_bound).map(_._4).toSet -- prune_set 136 | val bc_pruning_bound = sc.broadcast(pruning_bound) 137 | 138 | // val second_filter = new PartitionPruningRDD(indexed, global_prune_set.contains) 139 | // .flatMap(i_part => { 140 | // i_part._4.knn(VPTraj(0, bc_query.value), k, bc_pruning_bound.value)._1.map(x => (x._2, x._1.id)) 141 | // }).takeOrdered(k)(new ResultOrdering) 142 | 143 | val second_filter = new PartitionPruningRDD(indexed, global_prune_set.contains) 144 | .aggregate((Array[(Double, Int)](), 0))((now, part) => { 145 | val knn_res = part._4.knn(VPTraj(0, bc_query.value), bc_k.value, bc_pruning_bound.value) 146 | ((knn_res._1.map(x => (x._2, x._1.id)) ++ now._1).sortBy(_._1).take(bc_k.value), now._2 + knn_res._2) 147 | }, (left, right) => { 148 | ((left._1 ++ right._1).sortBy(_._1).take(bc_k.value), left._2 + right._2) 149 | }) 150 | 151 | val final_res = (first_filter._1 ++ second_filter._1).sortBy(_._1).take(k) 152 | 153 | val end2 = System.currentTimeMillis() 154 | println("Time for second filter and final merge: " + ((end2 - tick1) / 1000.0)) 155 | println("# of trajs checked distance:" + (first_filter._2 + second_filter._2 + pivots.length)) 156 | println("Total Latency: " + ((end2 - start2) / 1000.0)) 157 | final_res.foreach(println) 158 | tot_time += (end2 - start2) / 1000.0 159 | println("----------------------------------------------") 160 | bc_k.destroy() 161 | bc_query.destroy() 162 | bc_pruning_bound.destroy() 163 | }) 164 | 165 | println("Average Latency: " + (tot_time / 100.0)) 166 | 167 | bc_pivots.destroy() 168 | bc_pivots_vptree.destroy() 169 | sc.stop() 170 | } 171 | } 172 | -------------------------------------------------------------------------------- /src/main/scala/edu/utah/cs/util/BitArray.scala: -------------------------------------------------------------------------------- 1 | package edu.utah.cs.util 2 | 3 | /** 4 | * Created by dongx on 10/4/16. 5 | */ 6 | object BitArray { 7 | def create(length: Int): Array[Int] = { 8 | Array.fill[Int](math.ceil(length / 32.0).toInt){0} 9 | } 10 | 11 | def get(bytes: Array[Int], id: Int) = { 12 | (bytes(id / 32) & (1 << (id % 32))) != 0 13 | } 14 | 15 | def set(bytes: Array[Int], id: Int) = { 16 | bytes(id / 32) = bytes(id / 32) | (1 << (id % 32)) 17 | } 18 | 19 | def or(a: Array[Int], b: Array[Int]) = { 20 | a.zip(b).map(x => x._1 | x._2) 21 | } 22 | 23 | def and(a: Array[Int], b: Array[Int]) = { 24 | a.zip(b).map(x => x._1 & x._2) 25 | } 26 | 27 | def flip(a: Array[Int]) = a.map(~_) 28 | 29 | def count(a: Array[Int]) = { 30 | a.map(x => x.toBinaryString.count(_ == '1')).sum 31 | } 32 | 33 | } 34 | -------------------------------------------------------------------------------- /src/main/scala/edu/utah/cs/util/BitMap.scala: -------------------------------------------------------------------------------- 1 | package edu.utah.cs.util 2 | 3 | /** 4 | * Created by dongx on 1/19/17. 5 | */ 6 | case class BitMapMeta(num_bits: Int) 7 | 8 | object BitMap { 9 | var meta: BitMapMeta = null 10 | 11 | def put(bf: Array[Int], key: Int): Unit = BitArray.set(bf, key) 12 | 13 | def contains(bf: Array[Int], key: Int): Boolean = BitArray.get(bf, key) 14 | } 15 | -------------------------------------------------------------------------------- /src/main/scala/edu/utah/cs/util/BloomFilter.scala: -------------------------------------------------------------------------------- 1 | package edu.utah.cs.util 2 | 3 | import scala.util.Random 4 | 5 | /** 6 | * Created by dongx on 10/4/16. 7 | */ 8 | case class BloomFilterMeta(num_bits: Int, num_hashs: Int) { 9 | val seeds = (1 to num_hashs).map(x => (Random.nextInt(Integer.MAX_VALUE), Random.nextInt(Integer.MAX_VALUE))) 10 | } 11 | 12 | object BloomFilter { 13 | var meta: BloomFilterMeta = null 14 | 15 | private def calcHash(seed: (Int, Int), key: Int) = 16 | (((seed._1 % meta.num_bits) * (key & meta.num_bits) + seed._2 % meta.num_bits) % meta.num_bits + meta.num_bits) % meta.num_bits 17 | 18 | def put(bf: Array[Int], key: Int): Unit = { 19 | meta.seeds.foreach(seed => { 20 | BitArray.set(bf, calcHash(seed, key)) 21 | }) 22 | } 23 | 24 | def mayContains(bf: Array[Int], key: Int): Boolean = { 25 | meta.seeds.foreach(seed => { 26 | if (!BitArray.get(bf, calcHash(seed, key))) return false 27 | }) 28 | true 29 | } 30 | 31 | def optimalNumBits(num_items: Long, fp_rate: Double): Int = { 32 | math.ceil(-1 * num_items * math.log(fp_rate) / math.log(2) / math.log(2)).toInt 33 | } 34 | 35 | def optimalNumHashes(num_items: Long, num_bits: Long): Int = { 36 | math.ceil(num_bits / num_items * math.log(2)).toInt 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/main/scala/edu/utah/cs/util/MetricObject.scala: -------------------------------------------------------------------------------- 1 | package edu.utah.cs.util 2 | 3 | /** 4 | * Created by dongx on 2/3/17. 5 | */ 6 | abstract class MetricObject { 7 | def distance(o: MetricObject): Double 8 | } 9 | --------------------------------------------------------------------------------