├── .gitignore ├── LICENSE ├── README.md ├── argos ├── __init__.py ├── cluster.py ├── io.py ├── noise.py ├── plot.py ├── synthetic.py ├── tool │ ├── __init__.py │ └── stopwatch.py └── util.py ├── data.png ├── demo ├── demo.ipynb ├── demo1.png ├── demo2.png └── kmedoid.py ├── gmmClus.py ├── hausdorf_benchmark.py ├── hdbscanClus.py ├── kmedoidsClus.py ├── merger.py ├── report.pdf ├── report.py ├── result.png ├── segmentation.png └── synthetic_demo.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Selçuk Gülcan 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Comparing Trajectory Clustering Methods 2 | 3 | ## Update (Feb 2022) 4 | 5 | If you have a problem downloading the public dataset described in the demo file, please try [this link](https://seljuk.me/upload/CVRR_dataset_trajectory_clustering.zip). 6 | 7 | ## Update (Feb 2022) 8 | 9 | I recently published [a blog post](https://seljuk.me/notes-on-trajectory-clustering.html) regarding trajectory clustering. It suplements the repo in a more theoretical level, you may check it out if the general approach is not clear. 10 | 11 | ## Update (Feb 2019) 12 | 13 | Added a [notebook](demo/demo.ipynb) demonstrating every step of the project. Please look at that first, it is more shorter and understandable than other parts of the project. It also shows these steps on a public dataset. 14 | 15 | Public Dataset: 16 | 17 | ![Public Dataset](demo/demo1.png) 18 | 19 | Clustered Trajectories: 20 | 21 | ![Clustered Trajectories](demo/demo2.png) 22 | 23 | ---- 24 | 25 | ## Introduction 26 | 27 | This was my pattern recognition course term project. The goal is to compare 4 clustering algorithms (k-medoids, gaussian mixture model, dbscan and hdbscan) on civil flight data. More detail can be found in report.pdf file. 28 | 29 | ![A snapshot of data](data.png) 30 | 31 | Resulting clusters look like this: 32 | 33 | ![Resulting clusters with one method](result.png) 34 | 35 | Trajectory segmentation is applied to reduce the number of sample points and hausdorff distance is used to compare the similarity between trajectories. 36 | 37 | ![Trajectory Segmentation](segmentation.png) 38 | -------------------------------------------------------------------------------- /argos/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seljukgulcan/comparing-trajectory-clustering-methods/0aa90cf202629f31c4b00aac350217182a49aa93/argos/__init__.py -------------------------------------------------------------------------------- /argos/cluster.py: -------------------------------------------------------------------------------- 1 | from scipy.sparse import lil_matrix 2 | from scipy.spatial.distance import directed_hausdorff 3 | import numpy as np 4 | import math 5 | 6 | def calculate_distance_matrix(traj_list, threshold): 7 | 8 | def hausdorf(traj1, traj2): 9 | d = max(directed_hausdorff(traj1, traj2)[0], directed_hausdorff(traj2, traj1)[0]) 10 | return d 11 | 12 | size = len(traj_list) 13 | 14 | for i in range(size): 15 | traj_list[i] = np.array(traj_list[i]) 16 | 17 | D = lil_matrix((size, size)) 18 | 19 | for i in range(size): 20 | for j in range(i + 1, size): 21 | distance = hausdorf(traj_list[i], traj_list[j]) 22 | if distance < threshold: 23 | D[i, j] = distance 24 | D[j, i] = distance 25 | 26 | return D 27 | 28 | def calculate_dense_distance_matrix(traj_list): 29 | 30 | def hausdorf(traj1, traj2): 31 | d = max(directed_hausdorff(traj1, traj2)[0], directed_hausdorff(traj2, traj1)[0]) 32 | return d 33 | 34 | size = len(traj_list) 35 | 36 | for i in range(size): 37 | traj_list[i] = np.array(traj_list[i]) 38 | 39 | D = np.empty((size, size)) 40 | 41 | for i in range(size): 42 | for j in range(i + 1, size): 43 | distance = hausdorf(traj_list[i], traj_list[j]) 44 | D[i, j] = distance 45 | D[j, i] = distance 46 | 47 | return D 48 | 49 | def kMedoids(D, k, tmax=100): 50 | # determine dimensions of distance matrix D 51 | m, n = D.shape 52 | 53 | #D = D.todense() 54 | #D[D == 0] = math.inf 55 | 56 | if k > n: 57 | raise Exception('too many medoids') 58 | # randomly initialize an array of k medoid indices 59 | M = np.arange(n) 60 | np.random.shuffle(M) 61 | M = np.sort(M[:k]) 62 | 63 | # create a copy of the array of medoid indices 64 | Mnew = np.copy(M) 65 | 66 | # initialize a dictionary to represent clusters 67 | C = {} 68 | for t in range(tmax): 69 | # determine clusters, i. e. arrays of data indices 70 | J = np.argmin(D[:,M], axis=1) 71 | 72 | for kappa in range(k): 73 | C[kappa] = np.where(J==kappa)[0] 74 | # update cluster medoids 75 | for kappa in range(k): 76 | J = np.mean(D[np.ix_(C[kappa],C[kappa])],axis=1) 77 | j = np.argmin(J) 78 | Mnew[kappa] = C[kappa][j] 79 | np.sort(Mnew) 80 | # check for convergence 81 | if np.array_equal(M, Mnew): 82 | break 83 | M = np.copy(Mnew) 84 | else: 85 | # final update of cluster memberships 86 | J = np.argmin(D[:,M], axis=1) 87 | for kappa in range(k): 88 | C[kappa] = np.where(J==kappa)[0] 89 | 90 | # return results 91 | return M, C -------------------------------------------------------------------------------- /argos/io.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.sparse import lil_matrix 3 | import scipy 4 | import pickle 5 | 6 | def save(d, filename): 7 | f = open(filename, "wb") 8 | pickle.dump( d, f, pickle.HIGHEST_PROTOCOL) 9 | f.close() 10 | ''' 11 | f = open(filename, "w") 12 | f.write(str(d)) 13 | f.close() 14 | ''' 15 | 16 | 17 | def load(filename): 18 | ''' 19 | f = open(filename, "r") 20 | content = f.read() 21 | d = eval(content) 22 | ''' 23 | f = open(filename, "rb") 24 | d = pickle.load( f) 25 | f.close() 26 | return d 27 | 28 | 29 | def save_distance_matrix(x, filename): 30 | x = scipy.sparse.csr_matrix(x) 31 | scipy.sparse.save_npz(filename, x) 32 | 33 | 34 | def load_distance_matrix(filename): 35 | d = scipy.sparse.load_npz(filename) 36 | d = lil_matrix(d) 37 | return d 38 | -------------------------------------------------------------------------------- /argos/noise.py: -------------------------------------------------------------------------------- 1 | import argos.util as util 2 | 3 | _moved_threshold = 10000 # If a plane does not move longer than this distance (meter) then this flight is removed. 4 | _speed_threshold = 50 # If a plane moves slower than this (m/sec), then it is considered stopped. 5 | _length_threshold = 20 # Path should contain this many points 6 | 7 | _segmentation_angle_threshold = 5 # In degree 8 | 9 | def remove_noise( traj_dict): 10 | 11 | keys_to_removed = [] 12 | 13 | for key in traj_dict.keys(): 14 | path = traj_dict[key]["path"] 15 | 16 | length = len(path) 17 | 18 | if length < _length_threshold: 19 | keys_to_removed.append(key) 20 | 21 | else: 22 | start = path[0] 23 | middle = path[length // 2] 24 | end = path[length - 1] 25 | 26 | d1 = util.distance(start, middle) 27 | d2 = util.distance(middle, end) 28 | 29 | if d1 < _moved_threshold and d2 < _moved_threshold: 30 | keys_to_removed.append(key) 31 | 32 | for key in keys_to_removed: 33 | traj_dict.pop(key) 34 | 35 | 36 | def _shorten_traj( traj): 37 | 38 | retval = [x for x in traj if x[2] > _speed_threshold] 39 | return retval 40 | 41 | 42 | def shorten(traj_dict): 43 | 44 | for key in traj_dict.keys(): 45 | path = traj_dict[key]["path"] 46 | path = _shorten_traj(path) 47 | traj_dict[key]["path"] = path 48 | traj_dict[key]["count"] = len(path) 49 | 50 | 51 | def _segmentation(traj): 52 | retval = [] 53 | 54 | azimuth = 1000 55 | for point in traj: 56 | d_azimuth = abs( azimuth - point[3]) 57 | if d_azimuth > _segmentation_angle_threshold: 58 | azimuth = point[3] 59 | retval.append(point) 60 | 61 | retval.append( traj[len(traj) - 1]) 62 | 63 | return retval 64 | 65 | 66 | def segmentation_list(traj_list): 67 | 68 | for i in range(len(traj_list)): 69 | traj_list[i] = _segmentation(traj_list[i]) -------------------------------------------------------------------------------- /argos/plot.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import matplotlib.image as mpimg 3 | 4 | _base_color = plt.rcParams['axes.prop_cycle'].by_key()['color'][0] 5 | 6 | 7 | def plot_traj(x, color=_base_color, alpha=1.0): 8 | plt.plot([x[0] for x in x], [x[1] for x in x], c=color, alpha=alpha) 9 | 10 | def plot_map(): 11 | img = mpimg.imread("turkey.png") 12 | extent = [0, 1639740, 0, 670220] 13 | plt.axis(extent) 14 | plt.tight_layout() 15 | plt.imshow(img, zorder=0, extent=extent) 16 | plt.axes().invert_yaxis() 17 | plt.show() -------------------------------------------------------------------------------- /argos/synthetic.py: -------------------------------------------------------------------------------- 1 | import random 2 | import math 3 | import argos.util as util 4 | 5 | 6 | def generate_traj(point_list, noise): 7 | traj = [] 8 | for point in point_list: 9 | azimuth = random.random() * math.pi 10 | distance = random.random() * noise 11 | noised_point = util.get_point(point, azimuth, distance) 12 | traj.append(noised_point) 13 | 14 | return traj 15 | 16 | 17 | def generate_cluster(n, length, noise): 18 | canvas_range = 100 19 | azimuth_range = math.pi / 6 20 | distance_range = 10 21 | 22 | x = random.random() * canvas_range - canvas_range * 0.5 23 | y = random.random() * canvas_range - canvas_range * 0.5 24 | azimuth = random.random() * math.pi * 2 25 | point = (x, y) 26 | 27 | point_list = [] 28 | for i in range(length): 29 | distance = random.random() * distance_range 30 | point = util.get_point(point, azimuth, distance) 31 | azimuth = azimuth + random.random() * azimuth_range - azimuth_range * 0.5 32 | point_list.append(point) 33 | 34 | traj_list = [] 35 | for i in range(n): 36 | traj_list.append(generate_traj(point_list, noise)) 37 | return traj_list 38 | -------------------------------------------------------------------------------- /argos/tool/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seljukgulcan/comparing-trajectory-clustering-methods/0aa90cf202629f31c4b00aac350217182a49aa93/argos/tool/__init__.py -------------------------------------------------------------------------------- /argos/tool/stopwatch.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | 4 | class Stopwatch: 5 | 6 | def __init__(self): 7 | self.start_time = 0 8 | 9 | def start(self): 10 | self.start_time = time.time() 11 | 12 | def stop(self, message = "Stopwatch stopped"): 13 | elapsed = time.time() - self.start_time 14 | print( "%s : %.2f sn" % (message, elapsed)) 15 | -------------------------------------------------------------------------------- /argos/util.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | map_width = 1639.74 * 1000 # in meters 4 | map_height = 670.22 * 1000 # in meters 5 | 6 | map_lon_left = 26 7 | map_lon_right = 45 8 | map_lon_delta = map_lon_right - map_lon_left 9 | 10 | map_lat_bottom = 36 11 | map_lat_bottom_degree = map_lat_bottom * math.pi / 180 12 | 13 | 14 | def get_point(point, azimuth, distance): 15 | x = point[0] + distance * math.cos(azimuth) 16 | y = point[1] + distance * math.sin(azimuth) 17 | 18 | return x, y 19 | 20 | 21 | def distance(point1, point2): 22 | x = abs(point1[0] - point2[0]) 23 | y = abs(point1[1] - point2[1]) 24 | d = math.sqrt(x ** 2 + y ** 2) 25 | return d 26 | 27 | 28 | def geo_to_xy(lat, lon): 29 | 30 | # According to mercator projection it calculates x and y distances in meter relative to top left point 31 | # Taken from https://stackoverflow.com/questions/2103924/mercator-longitude-and-latitude-calculations-to-x-and-y-on-a-cropped-map-of-the/10401734#10401734 32 | x = (lon - map_lon_left) * (map_width / map_lon_delta) 33 | 34 | lat = lat * math.pi / 180 35 | world_map_width = ((map_width / map_lon_delta) * 360) / (2 * math.pi) 36 | map_offset_y = (world_map_width / 2 * math.log((1 + math.sin(map_lat_bottom_degree)) / (1 - math.sin(map_lat_bottom_degree)))) 37 | y = map_height - ((world_map_width / 2 * math.log((1 + math.sin(lat)) / (1 - math.sin(lat)))) - map_offset_y) 38 | x = int(x) 39 | y = int(y) 40 | return x, y 41 | 42 | 43 | def ft_to_m(ft): 44 | return 0.3048 * ft 45 | 46 | 47 | def kt_to_kph(kt): 48 | return 1.852 * kt 49 | 50 | 51 | def fpm_to_mps(fpm): 52 | return 0.0051 * fpm 53 | 54 | 55 | def traj_dict_to_traj_list(traj_dict): 56 | traj_list = [] 57 | for key in traj_dict.keys(): 58 | traj_list.append( traj_dict[key]["path"]) 59 | return traj_list 60 | 61 | 62 | def summary_traj_list(traj_list): 63 | print("Total trajectories : %s" % len(traj_list)) 64 | min = math.inf 65 | max = 0 66 | sum = 0 67 | for traj in traj_list: 68 | length = len(traj) 69 | if length > max: 70 | max = length 71 | if length < min: 72 | min = length 73 | sum += length 74 | 75 | avg = sum / len(traj_list) 76 | print("Average length : %s" % avg) 77 | print("Minimum length : %s" % min) 78 | print("Maximum length : %s" % max) 79 | 80 | 81 | def summary_traj_dict(traj_dict): 82 | l = traj_dict_to_traj_list(traj_dict) 83 | summary_traj_list(l) -------------------------------------------------------------------------------- /data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seljukgulcan/comparing-trajectory-clustering-methods/0aa90cf202629f31c4b00aac350217182a49aa93/data.png -------------------------------------------------------------------------------- /demo/demo1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seljukgulcan/comparing-trajectory-clustering-methods/0aa90cf202629f31c4b00aac350217182a49aa93/demo/demo1.png -------------------------------------------------------------------------------- /demo/demo2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seljukgulcan/comparing-trajectory-clustering-methods/0aa90cf202629f31c4b00aac350217182a49aa93/demo/demo2.png -------------------------------------------------------------------------------- /demo/kmedoid.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | 4 | def kMedoids(D, k, tmax=100): 5 | # determine dimensions of distance matrix D 6 | m, n = D.shape 7 | 8 | np.fill_diagonal(D, math.inf) 9 | 10 | if k > n: 11 | raise Exception('too many medoids') 12 | # randomly initialize an array of k medoid indices 13 | M = np.arange(n) 14 | np.random.shuffle(M) 15 | M = np.sort(M[:k]) 16 | 17 | # create a copy of the array of medoid indices 18 | Mnew = np.copy(M) 19 | 20 | # initialize a dictionary to represent clusters 21 | C = {} 22 | for t in range(tmax): 23 | # determine clusters, i. e. arrays of data indices 24 | J = np.argmin(D[:,M], axis=1) 25 | 26 | for kappa in range(k): 27 | C[kappa] = np.where(J==kappa)[0] 28 | # update cluster medoids 29 | for kappa in range(k): 30 | J = np.mean(D[np.ix_(C[kappa],C[kappa])],axis=1) 31 | j = np.argmin(J) 32 | Mnew[kappa] = C[kappa][j] 33 | np.sort(Mnew) 34 | # check for convergence 35 | if np.array_equal(M, Mnew): 36 | break 37 | M = np.copy(Mnew) 38 | else: 39 | # final update of cluster memberships 40 | J = np.argmin(D[:,M], axis=1) 41 | for kappa in range(k): 42 | C[kappa] = np.where(J==kappa)[0] 43 | 44 | np.fill_diagonal(D, 0) 45 | 46 | # return results 47 | return M, C -------------------------------------------------------------------------------- /gmmClus.py: -------------------------------------------------------------------------------- 1 | import argos.io as io 2 | import argos.plot as tplot 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | from sklearn import metrics 6 | from sklearn.mixture import GMM 7 | 8 | traj_list = io.load("1_traj_seg.dt") 9 | traj_list = traj_list[:1000] 10 | 11 | X = np.fromfile("gaussian_representation.dat", dtype=float) 12 | D = io.load_distance_matrix("distance1.npz") 13 | 14 | no_of_cluster = 12 15 | gmm = GMM(n_components=no_of_cluster, n_iter=1000) 16 | labels = gmm.fit_predict(X) 17 | 18 | # Postprocessing 19 | 20 | clusters = [[] for i in range(no_of_cluster)] 21 | no = len(traj_list) 22 | for i in range(no): 23 | label = int(labels[i]) 24 | clusters[label].append(traj_list[i]) 25 | 26 | silhoutte_score = metrics.silhouette_score(D, labels, sample_size=1000) 27 | print("Silhoutte Coefficient : %.3f" % silhoutte_score) 28 | 29 | # Plotting Clustered Trajectories 30 | color_list = plt.rcParams['axes.prop_cycle'].by_key()['color'] 31 | for i in range(no_of_cluster): 32 | for traj in clusters[i]: 33 | next_color = color_list[i % len(color_list)] 34 | tplot.plot_traj(traj, next_color, alpha=1) 35 | 36 | tplot.plot_map() 37 | -------------------------------------------------------------------------------- /hausdorf_benchmark.py: -------------------------------------------------------------------------------- 1 | import math 2 | import matplotlib.pyplot as plt 3 | import random 4 | import time 5 | import numpy as np 6 | from scipy.spatial.distance import directed_hausdorff 7 | 8 | def distance(point1, point2): 9 | retval = (point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2 10 | retval = math.sqrt(retval) 11 | return retval 12 | 13 | 14 | def hausdorf_oneway(traj1, traj2): 15 | # It is a slow implementation. This guy has the fast version: 16 | # https://github.com/mavillan/py-hausdorff 17 | 18 | max = 0 19 | 20 | for point1 in traj1: 21 | 22 | min = math.inf 23 | for point2 in traj2: 24 | 25 | d = distance(point1, point2) 26 | if d < min: 27 | min = d 28 | 29 | if min > max: 30 | max = min 31 | 32 | return max 33 | 34 | 35 | def hausdorf(traj1, traj2): 36 | d1 = hausdorf_oneway(traj1, traj2) 37 | d2 = hausdorf_oneway(traj2, traj1) 38 | 39 | if d1 > d2: 40 | return d1 41 | 42 | return d2 43 | 44 | def scipy_hausdorff( u, v): 45 | 46 | # Trajectories are assumed to be given as np array 47 | d = max(directed_hausdorff(u, v)[0], directed_hausdorff(v, u)[0]) 48 | return d 49 | 50 | 51 | if __name__ == "__main__": 52 | print("hi") 53 | 54 | traj_list = [] 55 | traj_count = 1000 56 | traj_length = 100 57 | 58 | start = time.time() 59 | for i in range(traj_count): 60 | traj_list.append([]) 61 | for j in range(traj_length): 62 | x = random.random() * 100 63 | y = random.random() * 100 64 | traj_list[i].append( (x, y)) 65 | 66 | end = time.time() 67 | elapsed = end - start 68 | print("Creating trajectories took %s seconds" % elapsed) 69 | 70 | start = time.time() 71 | for i in range(len(traj_list)): 72 | traj_list[i] = np.array(traj_list[i]) 73 | 74 | end = time.time() 75 | elapsed = end - start 76 | print("Numpifing trajectories took %s seconds" % elapsed) 77 | 78 | start = time.time() 79 | 80 | for i in range(traj_count): 81 | for j in range(i, traj_count): 82 | scipy_hausdorff(traj_list[i], traj_list[j]) 83 | 84 | end = time.time() 85 | elapsed = end - start 86 | print("Calculating hausdorf distances took %s seconds" % elapsed) 87 | -------------------------------------------------------------------------------- /hdbscanClus.py: -------------------------------------------------------------------------------- 1 | import argos.io as io 2 | import argos.plot as tplot 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | import hdbscan 6 | from sklearn import metrics 7 | 8 | traj_list = io.load("1_traj_seg.dt") 9 | traj_list = traj_list[:1000] 10 | min_samples = 1 11 | min_cluster_size = 2 12 | 13 | D = io.load_distance_matrix("distance1.npz") 14 | 15 | dbscan = hdbscan.HDBSCAN(min_cluster_size=min_cluster_size, min_samples=min_samples, metric="precomputed", memory="hdbscan_cache") 16 | dbscan.fit(D) 17 | 18 | # Postprocessing 19 | no_of_labels = np.max(dbscan.labels_) + 1 20 | 21 | print("Total number of clusters : %s" % no_of_labels) 22 | 23 | clusters = [[] for i in range(no_of_labels)] 24 | outliers = [] 25 | no = len(traj_list) 26 | for i in range(no): 27 | label = dbscan.labels_[i] 28 | if label == -1: 29 | outliers.append(traj_list[i]) 30 | else: 31 | clusters[label].append(traj_list[i]) 32 | 33 | no_of_noise = len(outliers) 34 | print("Number of noise points %s" % no_of_noise) 35 | print("Noise Percentage : %.3f" % (no_of_noise / no)) 36 | 37 | silhoutte_score = metrics.silhouette_score(D, dbscan.labels_) 38 | print("Silhoutte Coefficient : %.3f" % silhoutte_score) 39 | 40 | # Plotting Clustered Trajectories 41 | color_list = plt.rcParams['axes.prop_cycle'].by_key()['color'] 42 | for i in range(no_of_labels): 43 | for traj in clusters[i]: 44 | next_color = color_list[0 % len(color_list)] 45 | tplot.plot_traj(traj, next_color, alpha=1) 46 | #tplot.plot_traj(traj) 47 | 48 | for traj in outliers: 49 | tplot.plot_traj(traj, "r") 50 | 51 | tplot.plot_map() 52 | -------------------------------------------------------------------------------- /kmedoidsClus.py: -------------------------------------------------------------------------------- 1 | import argos.io as io 2 | import argos.plot as tplot 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | import hdbscan 6 | from sklearn import metrics 7 | import argos.cluster as cluster 8 | 9 | traj_list = io.load("1_traj_seg.dt") 10 | traj_list = traj_list[:1000] 11 | 12 | ''' 13 | D = io.load_distance_matrix("distance1.npz") 14 | D = D[:1000,:1000] 15 | ''' 16 | 17 | #D = cluster.calculate_dense_distance_matrix(traj_list) 18 | #D.tofile("dense.dat") 19 | D = np.fromfile("dense.dat", dtype=float) 20 | print(D.shape) 21 | D = D.reshape((1000, 1000)) 22 | 23 | K = 20 24 | M, C = cluster.kMedoids(D, K) 25 | 26 | # Postprocessing 27 | 28 | print("Total number of clusters : %s" % K) 29 | 30 | labels = np.zeros((len(traj_list),)) 31 | 32 | lol = 0 33 | for i in range(K): 34 | for index in C[i]: 35 | lol += 1 36 | labels[index] = i 37 | 38 | print(lol) 39 | 40 | clusters = [[] for i in range(K)] 41 | no = len(traj_list) 42 | for i in range(no): 43 | label = int(labels[i]) 44 | clusters[label].append(traj_list[i]) 45 | 46 | silhoutte_score = metrics.silhouette_score(D, labels, sample_size=1000) 47 | print("Silhoutte Coefficient : %.3f" % silhoutte_score) 48 | 49 | sse_list = [] 50 | for h in range(2, 80): 51 | M, C = cluster.kMedoids(D, h) 52 | sse = 0 53 | for i in range(h): 54 | medoid_index = M[i] 55 | sse += np.sum(D[medoid_index,C[i]] ** 2) 56 | sse_list.append(sse) 57 | print(sse) 58 | 59 | plt.plot(range(2, 80), sse_list) 60 | plt.xlabel("K") 61 | plt.ylabel("SSE") 62 | plt.title("Sum of Squared Error") 63 | plt.show() 64 | 65 | 66 | # Plotting Clustered Trajectories 67 | color_list = plt.rcParams['axes.prop_cycle'].by_key()['color'] 68 | for i in range(K): 69 | for traj in clusters[i]: 70 | next_color = color_list[i % len(color_list)] 71 | tplot.plot_traj(traj, next_color, alpha=1) 72 | 73 | tplot.plot_map() 74 | -------------------------------------------------------------------------------- /merger.py: -------------------------------------------------------------------------------- 1 | import argos.io as io 2 | import math 3 | import argos.plot as tplot 4 | import argos.util as util 5 | import argos.noise as reduc 6 | import argos.cluster as cluster 7 | from argos.tool.stopwatch import Stopwatch 8 | 9 | id = 0 10 | id_list = [] 11 | traj_list = [] 12 | for i in range(7): 13 | filename = "%s.traj" % i 14 | traj_dict = io.load(filename) 15 | 16 | reduc.shorten(traj_dict) 17 | reduc.remove_noise(traj_dict) 18 | 19 | for key in traj_dict: 20 | id_list.append( (id, key)) 21 | id += 1 22 | path = traj_dict[key]["path"] 23 | traj_list.append(path) 24 | 25 | io.save(traj_list, "%s_traj.dt" % i) 26 | 27 | reduc.segmentation_list(traj_list) 28 | 29 | io.save(traj_list, "%s_traj_seg.dt" % i) 30 | traj_list = [] 31 | io.save(id_list, "id_list.dt") 32 | exit(0) 33 | 34 | 35 | s = Stopwatch() 36 | n = 100 37 | 38 | traj_dict = io.load("1.traj") 39 | 40 | util.summary_traj_dict(traj_dict) 41 | 42 | reduc.shorten(traj_dict) 43 | 44 | 45 | util.summary_traj_dict(traj_dict) 46 | 47 | reduc.remove_noise(traj_dict) 48 | 49 | util.summary_traj_dict(traj_dict) 50 | 51 | reduc.remove_noise(traj_dict) 52 | reduc.shorten(traj_dict) 53 | 54 | util.summary_traj_dict(traj_dict) 55 | 56 | traj_list = util.traj_dict_to_traj_list(traj_dict) 57 | 58 | util.summary_traj_list(traj_list) 59 | 60 | s.start() 61 | D1 = cluster.calculate_distance_matrix(traj_list[:n], 80000) 62 | s.stop() 63 | 64 | reduc.segmentation_list(traj_list) 65 | 66 | util.summary_traj_list(traj_list) 67 | 68 | 69 | traj_list = traj_list[:n] 70 | 71 | s.start() 72 | D2 = cluster.calculate_distance_matrix(traj_list, 80000) 73 | s.stop() 74 | 75 | sum = 0 76 | diff_count = 0 77 | for i in range(n): 78 | for j in range(i, n): 79 | diff = abs( D1[i,j] - D2[i, j]) 80 | sum += diff 81 | if diff > 1000: 82 | diff_count += 1 83 | 84 | print(diff_count) 85 | 86 | print(sum) 87 | exit(0) 88 | 89 | countFlied = 0 90 | countFlied2 = 0 91 | traj_list = [] 92 | suspi = None 93 | for key in traj_dict.keys(): 94 | path = traj_dict[key]["path"] 95 | count = 0 96 | flied = False 97 | 98 | start = path[0] 99 | end = path[len(path) - 1] 100 | if abs(start[0] - end[0]) > 30000 or abs(start[1] - end[1]) > 30000: 101 | countFlied2 += 1 102 | 103 | for point in path: 104 | if point[2] < 1 and point[5] < 1: 105 | count += 1 106 | if not flied and point[5] > 5: 107 | flied = True 108 | countFlied += 1 109 | 110 | if not flied: 111 | suspi = path 112 | 113 | traj_list.append(path) 114 | 115 | print(countFlied) 116 | print(countFlied2) 117 | 118 | ''' 119 | for traj in traj_list: 120 | tplot.plot_traj(traj) 121 | ''' 122 | 123 | print(suspi) 124 | tplot.plot_traj(suspi) 125 | 126 | tplot.plot_map() 127 | 128 | exit(0) 129 | 130 | 131 | filename1 = "3.traj" 132 | filename2 = "6.traj" 133 | 134 | d1 = io.load(filename1) 135 | d2 = io.load(filename2) 136 | 137 | key1 = set(d1.keys()) 138 | key2 = set(d2.keys()) 139 | common = key1.intersection(key2) 140 | 141 | print(len(common)) 142 | 143 | for key in common: 144 | d1[key]["path"] = d1[key]["path"] + d2[key]["path"] 145 | d1[key]["count"] = d1[key]["count"] + d2[key]["count"] 146 | d2.pop(key) 147 | 148 | io.save(d1, filename1) 149 | io.save(d2, filename2) 150 | -------------------------------------------------------------------------------- /report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seljukgulcan/comparing-trajectory-clustering-methods/0aa90cf202629f31c4b00aac350217182a49aa93/report.pdf -------------------------------------------------------------------------------- /report.py: -------------------------------------------------------------------------------- 1 | import argos.util as util 2 | import math 3 | import matplotlib.pyplot as plt 4 | import argos.noise as reduc 5 | 6 | base_color = plt.rcParams['axes.prop_cycle'].by_key()['color'][0] 7 | def plot_traj(x, color=base_color): # To plot a set of points in 2D 8 | plt.scatter([x[0] for x in x], [x[1] for x in x], c=color, s = 20) 9 | plt.plot([x[0] for x in x], [x[1] for x in x], c=color) 10 | 11 | 12 | def create_point( point, azimuth): 13 | az = (azimuth / 180) * math.pi 14 | p = util.get_point( point, az, 10) 15 | a = [] 16 | a.append(p[0]) 17 | a.append(p[1]) 18 | a.append(0) 19 | a.append(azimuth) 20 | return a 21 | 22 | 23 | traj = [] 24 | start = [0, 0, 0, 30] 25 | p = start 26 | for i in range(20): 27 | p = create_point(p, 30) 28 | traj.append(p) 29 | 30 | for i in range(25): 31 | p = create_point(p, 10) 32 | traj.append(p) 33 | 34 | az = 10 35 | for i in range(20): 36 | az = az + 10 37 | p = create_point(p, az) 38 | traj.append(p) 39 | 40 | plot_traj(traj) 41 | 42 | for point in traj: 43 | point[0] += 300 44 | 45 | traj = reduc._segmentation(traj) 46 | plot_traj(traj, "r") 47 | plt.axes().set_aspect(1) 48 | plt.title("Trajectory Segmentation") 49 | plt.show() -------------------------------------------------------------------------------- /result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seljukgulcan/comparing-trajectory-clustering-methods/0aa90cf202629f31c4b00aac350217182a49aa93/result.png -------------------------------------------------------------------------------- /segmentation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seljukgulcan/comparing-trajectory-clustering-methods/0aa90cf202629f31c4b00aac350217182a49aa93/segmentation.png -------------------------------------------------------------------------------- /synthetic_demo.py: -------------------------------------------------------------------------------- 1 | # Synthetic Demo 2 | 3 | import random 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | from sklearn.cluster import DBSCAN 7 | from argos.cluster import calculate_distance_matrix 8 | from argos.tool.stopwatch import Stopwatch 9 | from argos.synthetic import generate_cluster 10 | import argos.plot as aplt 11 | 12 | 13 | if __name__ == "__main__": 14 | print("Synthetic Demo") 15 | s = Stopwatch() 16 | 17 | # Parameters of Demo 18 | no_of_cluster = 20 19 | no_of_traj = 100 20 | traj_length = 100 21 | eps = 10 22 | min_samples = 5 23 | noise = 5 24 | 25 | # Generating Trajectories 26 | no_of_outlier = int( no_of_traj * 0.05) 27 | no_of_traj_each_cluster = no_of_traj // no_of_cluster 28 | 29 | print("-----") 30 | print("No of Clusters : %s" % no_of_cluster) 31 | print("No of Outliers : %s" % no_of_outlier) 32 | print("-----") 33 | 34 | normal_traj_list = [] 35 | for i in range(no_of_cluster): 36 | traj_list = generate_cluster(no_of_traj_each_cluster, traj_length, noise) 37 | normal_traj_list += traj_list 38 | 39 | outlier_traj_list = [] 40 | for i in range(no_of_outlier): 41 | outlier_traj_list += generate_cluster(1, traj_length, noise) 42 | 43 | # Plotting Generated Trajectories 44 | 45 | plt.figure(1) 46 | plt.subplot(121) 47 | 48 | for traj in normal_traj_list: 49 | aplt.plot_traj(traj) 50 | 51 | for traj in outlier_traj_list: 52 | aplt.plot_traj(traj, "r") 53 | 54 | # Precomputation 55 | 56 | traj_list = normal_traj_list + outlier_traj_list 57 | random.shuffle(traj_list) 58 | 59 | # Calculating Distance Matrix 60 | s.start() 61 | D = calculate_distance_matrix( traj_list, eps) 62 | s.stop("Distance matrix calculated") 63 | 64 | # Clustering 65 | s.start() 66 | dbscan = DBSCAN(eps=eps, min_samples=min_samples, metric="precomputed") 67 | dbscan.fit(D) 68 | s.stop("Clustering is done") 69 | 70 | # Postprocessing 71 | no_of_labels = np.max(dbscan.labels_) + 1 72 | 73 | print("Total number of clusters : %s" % no_of_labels) 74 | 75 | clusters = [[] for i in range(no_of_labels)] 76 | outliers = [] 77 | no = len(traj_list) 78 | for i in range(no): 79 | label = dbscan.labels_[i] 80 | if label == -1: 81 | outliers.append(traj_list[i]) 82 | else: 83 | clusters[label].append(traj_list[i]) 84 | 85 | no_of_noise = len(outliers) 86 | print("Number of noise points %s" % no_of_noise) 87 | print("Noise Percentage : %.3f" % (no_of_noise / no)) 88 | 89 | # Plotting Clustered Trajectories 90 | plt.subplot(122) 91 | color_list = plt.rcParams['axes.prop_cycle'].by_key()['color'] 92 | for i in range(no_of_labels): 93 | for traj in clusters[i]: 94 | next_color = color_list[i % len(color_list)] 95 | aplt.plot_traj(traj, next_color, alpha=0.3) 96 | 97 | for traj in outliers: 98 | aplt.plot_traj(traj, "k") 99 | 100 | plt.show() --------------------------------------------------------------------------------