├── 10 queries (for testing your program ONLY).txt ├── 10 queries results (for testing your program ONLY).txt ├── 100 Queries.txt ├── Core_functions.py ├── Dataset for R-Tree.txt ├── RTree.py ├── Region_tree.py ├── dataset.txt ├── test_query.txt └── test_query_result.txt /10 queries (for testing your program ONLY).txt: -------------------------------------------------------------------------------- 1 | 17840 18840 13971 14971 2 | 33451 34451 29693 30693 3 | 791 1791 2515 3515 4 | 81921 82921 94973 95973 5 | 75678 76678 53545 54545 6 | 90965 91965 11078 12078 7 | 69904 70904 67308 68308 8 | 7187 8187 56997 57997 9 | 34375 35375 60942 61942 10 | 57144 58144 24954 25954 -------------------------------------------------------------------------------- /10 queries results (for testing your program ONLY).txt: -------------------------------------------------------------------------------- 1 | 15 2 | 13 3 | 9 4 | 8 5 | 8 6 | 8 7 | 12 8 | 10 9 | 9 10 | 11 -------------------------------------------------------------------------------- /100 Queries.txt: -------------------------------------------------------------------------------- 1 | 17840 18840 13971 14971 2 | 33451 34451 29693 30693 3 | 791 1791 2515 3515 4 | 81921 82921 94973 95973 5 | 75678 76678 53545 54545 6 | 90965 91965 11078 12078 7 | 69904 70904 67308 68308 8 | 7187 8187 56997 57997 9 | 34375 35375 60942 61942 10 | 57144 58144 24954 25954 11 | 92689 93689 41529 42529 12 | 37632 38632 15609 16609 13 | 35578 36578 70910 71910 14 | 96034 97034 20504 21504 15 | 59420 60420 97747 98747 16 | 43505 44505 77261 78261 17 | 11718 12718 76956 77956 18 | 6953 7953 12509 13509 19 | 79472 80472 88875 89875 20 | 7481 8481 55149 56149 21 | 42419 43419 98446 99446 22 | 66228 67228 12323 13323 23 | 65753 66753 73415 74415 24 | 69320 70320 128 1128 25 | 34357 35357 26463 27463 26 | 25083 26083 27046 28046 27 | 67992 68992 62715 63715 28 | 42656 43656 3570 4570 29 | 33625 34625 38690 39690 30 | 24074 25074 93045 94045 31 | 36436 37436 67579 68579 32 | 70305 71305 48154 49154 33 | 44535 45535 77259 78259 34 | 60664 61664 24006 25006 35 | 66133 67133 68145 69145 36 | 79155 80155 8552 9552 37 | 66591 67591 45383 46383 38 | 20875 21875 32344 33344 39 | 18798 19798 90196 91196 40 | 32472 33472 53156 54156 41 | 16659 17659 57555 58555 42 | 80202 81202 84652 85652 43 | 20270 21270 22858 23858 44 | 88222 89222 53896 54896 45 | 61548 62548 12296 13296 46 | 46940 47940 97985 98985 47 | 79875 80875 17245 18245 48 | 46139 47139 24410 25410 49 | 94505 95505 6802 7802 50 | 48416 49416 60638 61638 51 | 74948 75948 27571 28571 52 | 69190 70190 41539 42539 53 | 72954 73954 90066 91066 54 | 73883 74883 91752 92752 55 | 80261 81261 6355 7355 56 | 44907 45907 96920 97920 57 | 63911 64911 25109 26109 58 | 81571 82571 84181 85181 59 | 47968 48968 69793 70793 60 | 38076 39076 9516 10516 61 | 82089 83089 85017 86017 62 | 7500 8500 61964 62964 63 | 2262 3262 53640 54640 64 | 86374 87374 96767 97767 65 | 60443 61443 34789 35789 66 | 57404 58404 35390 36390 67 | 62361 63361 26594 27594 68 | 76929 77929 35314 36314 69 | 16659 17659 50812 51812 70 | 27066 28066 96921 97921 71 | 57167 58167 71974 72974 72 | 93841 94841 21077 22077 73 | 97083 98083 75412 76412 74 | 5258 6258 45051 46051 75 | 45204 46204 43335 44335 76 | 54567 55567 27292 28292 77 | 28351 29351 62068 63068 78 | 89257 90257 30614 31614 79 | 15708 16708 75630 76630 80 | 27381 28381 76151 77151 81 | 10419 11419 84786 85786 82 | 11541 12541 72780 73780 83 | 11379 12379 88470 89470 84 | 8093 9093 28039 29039 85 | 39282 40282 35160 36160 86 | 24959 25959 96449 97449 87 | 7133 8133 18799 19799 88 | 17526 18526 4216 5216 89 | 94212 95212 22784 23784 90 | 49267 50267 39415 40415 91 | 66119 67119 3834 4834 92 | 66708 67708 94471 95471 93 | 65903 66903 55964 56964 94 | 25085 26085 81611 82611 95 | 31593 32593 52466 53466 96 | 57761 58761 42013 43013 97 | 37252 38252 69303 70303 98 | 14792 15792 48631 49631 99 | 57773 58773 22886 23886 100 | 76671 77671 97055 98055 101 | -------------------------------------------------------------------------------- /Core_functions.py: -------------------------------------------------------------------------------- 1 | # This file contains some core functions of implementing an R-Tree. You can refer to this example to build up R-Trees and run queries. 2 | import math 3 | import sys 4 | 5 | B = 4 6 | 7 | # We set B=4, and I suggest you to set B=3 or B=4; 8 | 9 | 10 | def sequential_query(points, query): 11 | result = 0 12 | for point in points: 13 | if query['x1'] <= point['x'] <= query['x2'] and query['y1'] <= point['y'] <= query['y2']: 14 | result = result + 1 15 | return result 16 | 17 | 18 | class Node(object): 19 | def __init__(self): 20 | self.id = 0 21 | # for internal nodes 22 | self.child_nodes = [] 23 | # for leaf nodes 24 | self.data_points = [] 25 | self.parent = None 26 | self.MBR = { 27 | 'x1': -1, 28 | 'y1': -1, 29 | 'x2': -1, 30 | 'y2': -1, 31 | } 32 | 33 | def perimeter(self): 34 | # only calculate the half perimeter here 35 | return (self.MBR['x2'] - self.MBR['x1']) + (self.MBR['y2'] - self.MBR['y1']) 36 | 37 | def is_underflow(self): 38 | if self.is_leaf(): 39 | if self.data_points.__len__() < math.ceil(B / 2): 40 | return True 41 | else: 42 | return False 43 | else: 44 | if self.child_nodes.__len__() < math.ceil(B / 2): 45 | return True 46 | else: 47 | return False 48 | 49 | def is_overflow(self): 50 | if self.is_leaf(): 51 | if self.data_points.__len__() > B: 52 | return True 53 | else: 54 | return False 55 | else: 56 | if self.child_nodes.__len__() > B: 57 | return True 58 | else: 59 | return False 60 | 61 | def is_root(self): 62 | if self.parent is None: 63 | return True 64 | else: 65 | return False 66 | 67 | def is_leaf(self): 68 | if self.child_nodes.__len__() == 0: 69 | return True 70 | else: 71 | return False 72 | 73 | 74 | class RTree(object): 75 | def __init__(self): 76 | self.root = Node() 77 | 78 | def query(self, node, query): 79 | num = 0 80 | if node.is_leaf(): 81 | for point in node.data_points: 82 | if self.is_covered(point, query): 83 | num = num + 1 84 | return num 85 | else: 86 | for child in node.child_nodes: 87 | if self.is_intersect(child, query): 88 | num = num + self.query(child, query) 89 | return num 90 | 91 | def is_intersect(self, node, query): 92 | # if two mbrs are intersected, then: 93 | # |center1_x - center2_x| <= length1 / 2 + length2 / 2 and: 94 | # |center1_y - center2_y| <= width1 / 2 + width2 / 2 95 | center1_x = (node.MBR['x2'] + node.MBR['x1']) / 2 96 | center1_y = (node.MBR['y2'] + node.MBR['y1']) / 2 97 | length1 = node.MBR['x2'] - node.MBR['x1'] 98 | width1 = node.MBR['y2'] - node.MBR['y1'] 99 | center2_x = (query['x2'] + query['x1']) / 2 100 | center2_y = (query['y2'] + query['y1']) / 2 101 | length2 = query['x2'] - query['x1'] 102 | width2 = query['y2'] - query['y1'] 103 | if abs(center1_x - center2_x) <= length1 / 2 + length2 / 2 and\ 104 | abs(center1_y - center2_y) <= width1 / 2 + width2 / 2: 105 | return True 106 | else: 107 | return False 108 | 109 | def is_covered(self, point, query): 110 | x1, x2, y1, y2 = query['x1'], query['x2'], query['y1'], query['y2'] 111 | if x1 <= point['x'] <= x2 and y1 <= point['y'] <= y2: 112 | return True 113 | else: 114 | return False 115 | 116 | def insert(self, u, p): 117 | if u.is_leaf(): 118 | self.add_data_point(u, p) 119 | if u.is_overflow(): 120 | self.handle_overflow(u) 121 | else: 122 | v = self.choose_subtree(u, p) 123 | self.insert(v, p) 124 | self.update_mbr(v) 125 | 126 | 127 | # return the child whose MBR requires the minimum increase in perimeter to cover p 128 | def choose_subtree(self, u, p): 129 | if u.is_leaf(): 130 | return u 131 | else: 132 | min_increase = sys.maxsize 133 | best_child = None 134 | for child in u.child_nodes: 135 | if min_increase > self.peri_increase(child, p): 136 | min_increase = self.peri_increase(child, p) 137 | best_child = child 138 | # return self.choose_subtree(best_child, p) 139 | return best_child 140 | 141 | def peri_increase(self, node, p): 142 | # new perimeter - original perimeter = increase of perimeter 143 | origin_mbr = node.MBR 144 | x1, x2, y1, y2 = origin_mbr['x1'], origin_mbr['x2'], origin_mbr['y1'], origin_mbr['y2'] 145 | increase = (max([x1, x2, p['x']]) - min([x1, x2, p['x']]) + 146 | max([y1, y2, p['y']]) - min([y1, y2, p['y']])) - node.perimeter() 147 | return increase 148 | 149 | def handle_overflow(self, u): 150 | u1, u2 = self.split(u) 151 | # if u is root, create a new root with s1 and s2 as its' children 152 | if u.is_root(): 153 | new_root = Node() 154 | self.add_child(new_root, u1) 155 | self.add_child(new_root, u2) 156 | self.root = new_root 157 | self.update_mbr(new_root) 158 | # if u is not root, delete u, and set s1 and s2 as u's parent's new children 159 | else: 160 | w = u.parent 161 | # copy the information of s1 into u 162 | w.child_nodes.remove(u) 163 | self.add_child(w, u1) 164 | self.add_child(w, u2) 165 | if w.is_overflow(): 166 | self.handle_overflow(w) 167 | self.update_mbr(w) 168 | 169 | def split(self, u): 170 | # split u into s1 and s2 171 | best_s1 = Node() 172 | best_s2 = Node() 173 | best_perimeter = sys.maxsize 174 | # u is a leaf node 175 | if u.is_leaf(): 176 | m = u.data_points.__len__() 177 | # create two different kinds of divides 178 | divides = [sorted(u.data_points, key=lambda data_point: data_point['x']), 179 | sorted(u.data_points, key=lambda data_point: data_point['y'])] 180 | for divide in divides: 181 | for i in range(math.ceil(0.4 * B), m - math.ceil(0.4 * B) + 1): 182 | s1 = Node() 183 | s1.data_points = divide[0: i] 184 | self.update_mbr(s1) 185 | s2 = Node() 186 | s2.data_points = divide[i: divide.__len__()] 187 | self.update_mbr(s2) 188 | if best_perimeter > s1.perimeter() + s2.perimeter(): 189 | best_perimeter = s1.perimeter() + s2.perimeter() 190 | best_s1 = s1 191 | best_s2 = s2 192 | 193 | # u is a internal node 194 | else: 195 | # create four different kinds of divides 196 | m = u.child_nodes.__len__() 197 | divides = [sorted(u.child_nodes, key=lambda child_node: child_node.MBR['x1']), 198 | sorted(u.child_nodes, key=lambda child_node: child_node.MBR['x2']), 199 | sorted(u.child_nodes, key=lambda child_node: child_node.MBR['y1']), 200 | sorted(u.child_nodes, key=lambda child_node: child_node.MBR['y2'])] 201 | for divide in divides: 202 | for i in range(math.ceil(0.4 * B), m - math.ceil(0.4 * B) + 1): 203 | s1 = Node() 204 | s1.child_nodes = divide[0: i] 205 | self.update_mbr(s1) 206 | s2 = Node() 207 | s2.child_nodes = divide[i: divide.__len__()] 208 | self.update_mbr(s2) 209 | if best_perimeter > s1.perimeter() + s2.perimeter(): 210 | best_perimeter = s1.perimeter() + s2.perimeter() 211 | best_s1 = s1 212 | best_s2 = s2 213 | 214 | for child in best_s1.child_nodes: 215 | child.parent = best_s1 216 | for child in best_s2.child_nodes: 217 | child.parent = best_s2 218 | 219 | return best_s1, best_s2 220 | 221 | def add_child(self, node, child): 222 | node.child_nodes.append(child) 223 | child.parent = node 224 | # self.update_mbr(node) 225 | if child.MBR['x1'] < node.MBR['x1']: 226 | node.MBR['x1'] = child.MBR['x1'] 227 | if child.MBR['x2'] > node.MBR['x2']: 228 | node.MBR['x2'] = child.MBR['x2'] 229 | if child.MBR['y1'] < node.MBR['y1']: 230 | node.MBR['y1'] = child.MBR['y1'] 231 | if child.MBR['y2'] > node.MBR['y2']: 232 | node.MBR['y2'] = child.MBR['y2'] 233 | 234 | def add_data_point(self, node, data_point): 235 | node.data_points.append(data_point) 236 | # self.update_mbr(node) 237 | if data_point['x'] < node.MBR['x1']: 238 | node.MBR['x1'] = data_point['x'] 239 | if data_point['x'] > node.MBR['x2']: 240 | node.MBR['x2'] = data_point['x'] 241 | if data_point['y'] < node.MBR['y1']: 242 | node.MBR['y1'] = data_point['y'] 243 | if data_point['y'] > node.MBR['y2']: 244 | node.MBR['y2'] = data_point['y'] 245 | 246 | def update_mbr(self, node): 247 | # print("update_mbr") 248 | x_list = [] 249 | y_list = [] 250 | if node.is_leaf(): 251 | x_list = [point['x'] for point in node.data_points] 252 | y_list = [point['y'] for point in node.data_points] 253 | else: 254 | x_list = [child.MBR['x1'] for child in node.child_nodes] + [child.MBR['x2'] for child in node.child_nodes] 255 | y_list = [child.MBR['y1'] for child in node.child_nodes] + [child.MBR['y2'] for child in node.child_nodes] 256 | new_mbr = { 257 | 'x1': min(x_list), 258 | 'x2': max(x_list), 259 | 'y1': min(y_list), 260 | 'y2': max(y_list) 261 | } 262 | node.MBR = new_mbr 263 | 264 | 265 | 266 | 267 | -------------------------------------------------------------------------------- /RTree.py: -------------------------------------------------------------------------------- 1 | import sys 2 | # from Core_functions import 3 | from Region_tree import RegionTree, Point, Rect, sequential_query 4 | import time 5 | 6 | data_file = "" 7 | queries_file = "" 8 | 9 | 10 | def time_it(func, *args): 11 | start = time.time() 12 | result = func(*args) 13 | end = time.time() 14 | return {'result': result, 'time': (end - start)} 15 | 16 | 17 | def construct_r_tree(data_points): 18 | R_tree = RegionTree() 19 | temp_counter = 0 20 | print("\033[H\033[J") 21 | print("build R-Tree:\n0.0%\n", end="\r") 22 | for i in range(len(data_points)): 23 | if temp_counter >= len(data_points) / 1000: 24 | print("\033[H\033[J") 25 | print("build R-Tree:\n{:.1f}%\n".format(100 * i / len(data_points)), end="\r") 26 | temp_counter = temp_counter % (len(data_points) / 1000) 27 | R_tree.insert_point(data_points[i], cur_node=R_tree.root) 28 | temp_counter += 1 29 | 30 | return R_tree 31 | 32 | 33 | def main(args): 34 | if len(args) != 3: 35 | print("Error: Invalid numbers of arguments. Require 2 arguments.") 36 | else: 37 | data_filename = args[1] 38 | queries_filename = args[2] 39 | 40 | # Read data file 41 | data_file = open(data_filename, "r") 42 | input = data_file.read().split('\n') 43 | data_file.close() 44 | data_size = int(input[0]) 45 | data_points = [] 46 | for i in range(data_size): 47 | id, x, y = input[i + 1].split(" ") 48 | data_points.append(Point(id, int(x), int(y))) 49 | # Create R tree 50 | R_tree = construct_r_tree(data_points) 51 | 52 | # Read queries file 53 | queries_file = open(queries_filename, "r") 54 | input = queries_file.read().split('\n') 55 | queries_file.close() 56 | # Load and run queries 57 | queries = [] 58 | time_sum_sequential = 0 59 | time_sum_r_tree = 0 60 | number_of_queries = 0 61 | output_file = open("./result.txt", "w+") 62 | results = [] 63 | for line in input: 64 | if len(line.split(" ")) == 4: 65 | x1, x2, y1, y2 = line.split(" ") 66 | queries.append(Rect(int(x1), int(y1), int(x2), int(y2))) 67 | number_of_queries += 1 68 | query = queries[-1] 69 | print(query) 70 | # Run and time each sequential & R tree query 71 | # sequential 72 | sequential_run = time_it(sequential_query, data_points, query) 73 | time_sum_sequential += sequential_run['time'] 74 | # R tree 75 | r_tree_run = time_it(R_tree.region_query, query) 76 | time_sum_r_tree += r_tree_run['time'] 77 | results.append("{} - {} \n".format(r_tree_run['result'], sequential_run['result'])) 78 | print('\ntotal time for sequential queries: {}'.format(time_sum_sequential)) 79 | output_file.write('\ntotal time for sequential queries: {}\n'.format(time_sum_sequential)) 80 | print('average time for every sequential query: {}\n'.format(time_sum_sequential / number_of_queries)) 81 | output_file.write( 82 | 'average time for every sequential query: {}\n\n'.format(time_sum_sequential / number_of_queries)) 83 | print('total time for R-Tree queries: {}'.format(time_sum_r_tree)) 84 | output_file.write('total time for R-Tree queries: {}\n'.format(time_sum_r_tree)) 85 | print('average time for every R-Tree query: {}\n'.format(time_sum_r_tree / number_of_queries)) 86 | output_file.write('average time for every R-Tree query: {}\n\n'.format(time_sum_r_tree / number_of_queries)) 87 | print('R-Tree is {} times faster then sequential query'.format(time_sum_sequential / time_sum_r_tree)) 88 | output_file.write( 89 | 'R-Tree is {} times faster then sequential query\n\n'.format(time_sum_sequential / time_sum_r_tree)) 90 | output_file.writelines(results) 91 | 92 | output_file.close() 93 | 94 | 95 | if __name__ == "__main__": 96 | main(sys.argv) 97 | -------------------------------------------------------------------------------- /Region_tree.py: -------------------------------------------------------------------------------- 1 | import math 2 | import sys 3 | 4 | 5 | # B = 4 6 | class Rect: 7 | def __init__(self, x1, y1, x2, y2): 8 | self.x1 = x1 9 | self.y1 = y1 10 | self.x2 = x2 11 | self.y2 = y2 12 | 13 | def perimeter(self): 14 | return 2 * (abs(self.x2 - self.x1) + abs(self.y2 - self.y1)) 15 | 16 | def is_overlap(self, rect): 17 | if self.y1 > rect.y2 or self.y2 < rect.y1 or self.x1 > rect.x2 or self.x2 < rect.y1: 18 | return False 19 | return True 20 | 21 | def contain_rect(self, rect): 22 | return self.x1 < rect.x1 and self.y1 < rect.y1 and self.x2 > rect.x2 and self.y2 > rect.y2 23 | 24 | def has_point(self, point): 25 | return self.x1 <= point.x <= self.x2 and self.y1 <= point.y <= self.y2 26 | 27 | def __str__(self): 28 | return "Rect: ({}, {}), ({}, {})".format(self.x1, self.y1, self.x2, self.y2) 29 | 30 | 31 | class Point: 32 | def __init__(self, id, x, y): 33 | self.id = id 34 | self.x = x 35 | self.y = y 36 | 37 | def __str__(self): 38 | return "Point #{}: ({}, {})".format(self.id, self.x, self.y) 39 | 40 | 41 | def sequential_query(points, query): 42 | result = 0 43 | for point in points: 44 | if query.x1 <= point.x <= query.x2 and query.y1 <= point.y <= query.y2: 45 | result = result + 1 46 | return result 47 | 48 | 49 | class Node(object): 50 | def __init__(self, B): 51 | self.B = B 52 | self.id = 0 53 | # for internal nodes 54 | self.child_nodes = [] 55 | # for leaf nodespyth 56 | self.data_points = [] 57 | self.parent_node = None 58 | self.MBR = Rect(-1, -1, -1, -1) 59 | 60 | def add_point(self, point): 61 | # update in the right position to keep the list ordered 62 | self.add_points([point]) 63 | pass 64 | 65 | def add_points(self, points): 66 | self.data_points += points 67 | # update MBR 68 | self.update_MBR() 69 | pass 70 | 71 | def perimeter_increase_with_point(self, point): 72 | x1 = point.x if point.x < self.MBR.x1 else self.MBR.x1 73 | y1 = point.y if point.y < self.MBR.y1 else self.MBR.y1 74 | x2 = point.x if point.x > self.MBR.x2 else self.MBR.x2 75 | y2 = point.y if point.y > self.MBR.y2 else self.MBR.y2 76 | return Rect(x1, y1, x2, y2).perimeter() - self.perimeter() 77 | 78 | def perimeter(self): 79 | # only calculate the half perimeter here 80 | return self.MBR.perimeter() 81 | 82 | def is_underflow(self): 83 | return (self.is_leaf() and len(self.data_points) < math.ceil(self.B / 2)) or \ 84 | (not self.is_leaf() and len(self.child_nodes) < math.ceil(self.B / 2)) 85 | 86 | def is_overflow(self): 87 | return (self.is_leaf() and len(self.data_points) > self.B) or \ 88 | (not self.is_leaf() and len(self.child_nodes) > self.B) 89 | 90 | def is_root(self): 91 | return self.parent_node is None 92 | 93 | def is_leaf(self): 94 | return len(self.child_nodes) == 0 95 | 96 | def add_child_node(self, node): 97 | self.add_child_nodes([node]) 98 | pass 99 | 100 | def add_child_nodes(self, nodes): 101 | for node in nodes: 102 | node.parent_node = self 103 | self.child_nodes.append(node) 104 | self.update_MBR() 105 | pass 106 | 107 | def update_MBR(self): 108 | if self.is_leaf(): 109 | self.MBR.x1 = min([point.x for point in self.data_points]) 110 | self.MBR.x2 = max([point.x for point in self.data_points]) 111 | self.MBR.y1 = min([point.y for point in self.data_points]) 112 | self.MBR.y2 = max([point.y for point in self.data_points]) 113 | else: 114 | self.MBR.x1 = min([child.MBR.x1 for child in self.child_nodes]) 115 | self.MBR.x2 = max([child.MBR.x2 for child in self.child_nodes]) 116 | self.MBR.y1 = min([child.MBR.y1 for child in self.child_nodes]) 117 | self.MBR.y2 = max([child.MBR.y2 for child in self.child_nodes]) 118 | if self.parent_node and not self.parent_node.MBR.contain_rect(self.MBR): 119 | self.parent_node.update_MBR() 120 | pass 121 | 122 | # Get perimeter of an MBR formed by a list of data points 123 | @staticmethod 124 | def get_points_MBR_perimeter(points): 125 | x1 = min([point.x for point in points]) 126 | x2 = max([point.x for point in points]) 127 | y1 = min([point.y for point in points]) 128 | y2 = max([point.y for point in points]) 129 | return Rect(x1, y1, x2, y2).perimeter() 130 | 131 | @staticmethod 132 | def get_nodes_MBR_perimeter(nodes): 133 | x1 = min([node.MBR.x1 for node in nodes]) 134 | x2 = max([node.MBR.x2 for node in nodes]) 135 | y1 = min([node.MBR.y1 for node in nodes]) 136 | y2 = max([node.MBR.y2 for node in nodes]) 137 | return Rect(x1, y1, x2, y2).perimeter() 138 | 139 | 140 | class RegionTree: 141 | def __init__(self, B=4): 142 | self.B = B 143 | self.root = Node(self.B) 144 | 145 | def insert_point(self, point, cur_node=None): 146 | # init U as node 147 | # print("{} is leaf: {}".format(self.root, self.root.is_leaf())) 148 | if cur_node is None: 149 | cur_node = self.root 150 | 151 | # print("{} is leaf: {}".format(cur_node, cur_node.is_leaf())) 152 | # Insertion logic start 153 | if cur_node.is_leaf(): 154 | cur_node.add_point(point) 155 | # handle overflow 156 | if cur_node.is_overflow(): 157 | self.handle_overflow(cur_node) 158 | else: 159 | chosen_child = self.choose_best_child(cur_node, point) 160 | self.insert_point(point, cur_node=chosen_child) 161 | 162 | # Find a suitable one to expand: 163 | @staticmethod 164 | def choose_best_child(node, point): 165 | best_child = None 166 | best_perimeter = 0 167 | # Scan the child nodes 168 | for item in node.child_nodes: 169 | if node.child_nodes.index(item) == 0 or best_perimeter > item.perimeter_increase_with_point(point): 170 | best_child = item 171 | best_perimeter = item.perimeter_increase_with_point(point) 172 | return best_child 173 | 174 | # WIP 175 | def handle_overflow(self, node): 176 | node, new_node = self.split_leaf_node(node) if node.is_leaf() else self.split_internal_node(node) 177 | 178 | if self.root is node: 179 | self.root = Node(self.B) 180 | self.root.add_child_nodes([node, new_node]) 181 | else: 182 | node.parent_node.add_child_node(new_node) 183 | if node.parent_node.is_overflow(): 184 | self.handle_overflow(node.parent_node) 185 | 186 | # WIP 187 | def split_leaf_node(self, node): 188 | m = len(node.data_points) 189 | best_perimeter = -1 190 | best_set_1 = [] 191 | best_set_2 = [] 192 | # Run x axis 193 | all_point_sorted_by_x = sorted(node.data_points, key=lambda point: point.x) 194 | for i in range(int(0.4 * m), int(m * 0.6) + 1): 195 | list_point_1 = all_point_sorted_by_x[:i] 196 | list_point_2 = all_point_sorted_by_x[i:] 197 | temp_sum_perimeter = Node.get_points_MBR_perimeter(list_point_1) \ 198 | + Node.get_points_MBR_perimeter(list_point_2) 199 | if best_perimeter == -1 or best_perimeter > temp_sum_perimeter: 200 | best_perimeter = temp_sum_perimeter 201 | best_set_1 = list_point_1 202 | best_set_2 = list_point_2 203 | # Run y axis 204 | all_point_sorted_by_y = sorted(node.data_points, key=lambda point: point.y) 205 | for i in range(int(0.4 * m), int(m * 0.6) + 1): 206 | list_point_1 = all_point_sorted_by_y[:i] 207 | list_point_2 = all_point_sorted_by_y[i:] 208 | temp_sum_perimeter = Node.get_points_MBR_perimeter(list_point_1) \ 209 | + Node.get_points_MBR_perimeter(list_point_2) 210 | if best_perimeter == -1 or best_perimeter > temp_sum_perimeter: 211 | best_perimeter = temp_sum_perimeter 212 | best_set_1 = list_point_1 213 | best_set_2 = list_point_2 214 | node.data_points = best_set_1 215 | node.update_MBR() 216 | new_node = Node(self.B) 217 | new_node.add_points(best_set_2) 218 | return node, new_node 219 | 220 | # WIP 221 | def split_internal_node(self, node): 222 | m = len(node.child_nodes) 223 | best_perimeter = -1 224 | best_set_1 = [] 225 | best_set_2 = [] 226 | # Run x axis 227 | all_node_sorted_by_x = sorted(node.child_nodes, key=lambda child: child.MBR.x1) 228 | for i in range(int(0.4 * m), int(m * 0.6) + 1): 229 | list_node_1 = all_node_sorted_by_x[:i] 230 | list_node_2 = all_node_sorted_by_x[i:] 231 | temp_sum_perimeter = Node.get_nodes_MBR_perimeter(list_node_1) \ 232 | + Node.get_nodes_MBR_perimeter(list_node_2) 233 | if best_perimeter == -1 or best_perimeter > temp_sum_perimeter: 234 | best_perimeter = temp_sum_perimeter 235 | best_set_1 = list_node_1 236 | best_set_2 = list_node_2 237 | # Run y axis 238 | all_node_sorted_by_y = sorted(node.child_nodes, key=lambda child: child.MBR.y1) 239 | for i in range(int(0.4 * m), int(m * 0.6) + 1): 240 | list_node_1 = all_node_sorted_by_y[:i] 241 | list_node_2 = all_node_sorted_by_y[i:] 242 | temp_sum_perimeter = Node.get_nodes_MBR_perimeter(list_node_1) \ 243 | + Node.get_nodes_MBR_perimeter(list_node_2) 244 | if best_perimeter == -1 or best_perimeter > temp_sum_perimeter: 245 | best_perimeter = temp_sum_perimeter 246 | best_set_1 = list_node_1 247 | best_set_2 = list_node_2 248 | node.child_nodes = best_set_1 249 | node.update_MBR() 250 | new_node = Node(self.B) 251 | new_node.add_child_nodes(best_set_2) 252 | return node, new_node 253 | 254 | # Take in a Rect and return number of data point that is covered by the R tree. 255 | def region_query(self, rect, node=None): 256 | # initiate with root 257 | if node is None: 258 | node = self.root 259 | 260 | if node.is_leaf(): 261 | # print("get here") 262 | count = 0 263 | for point in node.data_points: 264 | if rect.has_point(point): 265 | count += 1 266 | return count 267 | else: 268 | # print([child.MBR for child in node.child_nodes]) 269 | total = 0 270 | for child in node.child_nodes: 271 | # print("{} and {} is overlapped {}".format(rect, child.MBR, rect.is_overlap(child.MBR))) 272 | if rect.is_overlap(child.MBR): 273 | total += self.region_query(rect, child) 274 | return total 275 | 276 | 277 | def test_the_shit(): 278 | tree = RegionTree(3) 279 | import random 280 | for i in range(15): 281 | tree.insert_point(Point(random.randint(0, 50), random.randint(0, 50), random.randint(0, 50))) 282 | Rect(57144, 24954, 58144, 25954).is_overlap(Rect(1, 52163, 100000, 100000)) 283 | pass 284 | -------------------------------------------------------------------------------- /test_query.txt: -------------------------------------------------------------------------------- 1 | 17840 18840 13971 14971 2 | 33451 34451 29693 30693 3 | 791 1791 2515 3515 4 | 81921 82921 94973 95973 5 | 75678 76678 53545 54545 6 | 90965 91965 11078 12078 7 | 69904 70904 67308 68308 8 | 7187 8187 56997 57997 9 | 34375 35375 60942 61942 10 | 57144 58144 24954 25954 -------------------------------------------------------------------------------- /test_query_result.txt: -------------------------------------------------------------------------------- 1 | 15 2 | 13 3 | 9 4 | 8 5 | 8 6 | 8 7 | 12 8 | 10 9 | 9 10 | 11 --------------------------------------------------------------------------------