├── .gitignore
├── LICENSE
├── README.md
├── argos
    ├── __init__.py
    ├── cluster.py
    ├── io.py
    ├── noise.py
    ├── plot.py
    ├── synthetic.py
    ├── tool
    │   ├── __init__.py
    │   └── stopwatch.py
    └── util.py
├── data.png
├── demo
    ├── demo.ipynb
    ├── demo1.png
    ├── demo2.png
    └── kmedoid.py
├── gmmClus.py
├── hausdorf_benchmark.py
├── hdbscanClus.py
├── kmedoidsClus.py
├── merger.py
├── report.pdf
├── report.py
├── result.png
├── segmentation.png
└── synthetic_demo.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Selçuk Gülcan
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Comparing Trajectory Clustering Methods
 2 | 
 3 | ## Update (Feb 2022)
 4 | 
 5 | If you have a problem downloading the public dataset described in the demo file, please try [this link](https://seljuk.me/upload/CVRR_dataset_trajectory_clustering.zip).
 6 | 
 7 | ## Update (Feb 2022)
 8 | 
 9 | I recently published [a blog post](https://seljuk.me/notes-on-trajectory-clustering.html) regarding trajectory clustering. It suplements the repo in a more theoretical level, you may check it out if the general approach is not clear.
10 | 
11 | ## Update (Feb 2019)
12 | 
13 | Added a [notebook](demo/demo.ipynb) demonstrating every step of the project. Please look at that first, it is more shorter and understandable than other parts of the project. It also shows these steps on a public dataset.
14 | 
15 | Public Dataset:
16 | 
17 | ![Public Dataset](demo/demo1.png)
18 | 
19 | Clustered Trajectories:
20 | 
21 | ![Clustered Trajectories](demo/demo2.png)
22 | 
23 | ----
24 | 
25 | ## Introduction
26 | 
27 | This was my pattern recognition course term project. The goal is to compare 4 clustering algorithms (k-medoids, gaussian mixture model, dbscan and hdbscan) on civil flight data. More detail can be found in report.pdf file.
28 | 
29 | ![A snapshot of data](data.png)
30 | 
31 | Resulting clusters look like this:
32 | 
33 | ![Resulting clusters with one method](result.png)
34 | 
35 | Trajectory segmentation is applied to reduce the number of sample points and hausdorff distance is used to compare the similarity between trajectories.
36 | 
37 | ![Trajectory Segmentation](segmentation.png)
38 | 


--------------------------------------------------------------------------------
/argos/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seljukgulcan/comparing-trajectory-clustering-methods/0aa90cf202629f31c4b00aac350217182a49aa93/argos/__init__.py


--------------------------------------------------------------------------------
/argos/cluster.py:
--------------------------------------------------------------------------------
 1 | from scipy.sparse import lil_matrix
 2 | from scipy.spatial.distance import directed_hausdorff
 3 | import numpy as np
 4 | import math
 5 | 
 6 | def calculate_distance_matrix(traj_list, threshold):
 7 | 
 8 |     def hausdorf(traj1, traj2):
 9 |         d = max(directed_hausdorff(traj1, traj2)[0], directed_hausdorff(traj2, traj1)[0])
10 |         return d
11 | 
12 |     size = len(traj_list)
13 | 
14 |     for i in range(size):
15 |         traj_list[i] = np.array(traj_list[i])
16 | 
17 |     D = lil_matrix((size, size))
18 | 
19 |     for i in range(size):
20 |         for j in range(i + 1, size):
21 |             distance = hausdorf(traj_list[i], traj_list[j])
22 |             if distance < threshold:
23 |                 D[i, j] = distance
24 |                 D[j, i] = distance
25 | 
26 |     return D
27 | 
28 | def calculate_dense_distance_matrix(traj_list):
29 | 
30 |     def hausdorf(traj1, traj2):
31 |         d = max(directed_hausdorff(traj1, traj2)[0], directed_hausdorff(traj2, traj1)[0])
32 |         return d
33 | 
34 |     size = len(traj_list)
35 | 
36 |     for i in range(size):
37 |         traj_list[i] = np.array(traj_list[i])
38 | 
39 |     D = np.empty((size, size))
40 | 
41 |     for i in range(size):
42 |         for j in range(i + 1, size):
43 |             distance = hausdorf(traj_list[i], traj_list[j])
44 |             D[i, j] = distance
45 |             D[j, i] = distance
46 | 
47 |     return D
48 | 
49 | def kMedoids(D, k, tmax=100):
50 |     # determine dimensions of distance matrix D
51 |     m, n = D.shape
52 | 
53 |     #D = D.todense()
54 |     #D[D == 0] = math.inf
55 | 
56 |     if k > n:
57 |         raise Exception('too many medoids')
58 |     # randomly initialize an array of k medoid indices
59 |     M = np.arange(n)
60 |     np.random.shuffle(M)
61 |     M = np.sort(M[:k])
62 | 
63 |     # create a copy of the array of medoid indices
64 |     Mnew = np.copy(M)
65 | 
66 |     # initialize a dictionary to represent clusters
67 |     C = {}
68 |     for t in range(tmax):
69 |         # determine clusters, i. e. arrays of data indices
70 |         J = np.argmin(D[:,M], axis=1)
71 | 
72 |         for kappa in range(k):
73 |             C[kappa] = np.where(J==kappa)[0]
74 |         # update cluster medoids
75 |         for kappa in range(k):
76 |             J = np.mean(D[np.ix_(C[kappa],C[kappa])],axis=1)
77 |             j = np.argmin(J)
78 |             Mnew[kappa] = C[kappa][j]
79 |         np.sort(Mnew)
80 |         # check for convergence
81 |         if np.array_equal(M, Mnew):
82 |             break
83 |         M = np.copy(Mnew)
84 |     else:
85 |         # final update of cluster memberships
86 |         J = np.argmin(D[:,M], axis=1)
87 |         for kappa in range(k):
88 |             C[kappa] = np.where(J==kappa)[0]
89 | 
90 |     # return results
91 |     return M, C


--------------------------------------------------------------------------------
/argos/io.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from scipy.sparse import lil_matrix
 3 | import scipy
 4 | import pickle
 5 | 
 6 | def save(d, filename):
 7 |     f = open(filename, "wb")
 8 |     pickle.dump( d, f, pickle.HIGHEST_PROTOCOL)
 9 |     f.close()
10 |     '''
11 |     f = open(filename, "w")
12 |     f.write(str(d))
13 |     f.close()
14 |     '''
15 | 
16 | 
17 | def load(filename):
18 |     '''
19 |     f = open(filename, "r")
20 |     content = f.read()
21 |     d = eval(content)
22 |     '''
23 |     f = open(filename, "rb")
24 |     d = pickle.load( f)
25 |     f.close()
26 |     return d
27 | 
28 | 
29 | def save_distance_matrix(x, filename):
30 |     x = scipy.sparse.csr_matrix(x)
31 |     scipy.sparse.save_npz(filename, x)
32 | 
33 | 
34 | def load_distance_matrix(filename):
35 |     d = scipy.sparse.load_npz(filename)
36 |     d = lil_matrix(d)
37 |     return d
38 | 


--------------------------------------------------------------------------------
/argos/noise.py:
--------------------------------------------------------------------------------
 1 | import argos.util as util
 2 | 
 3 | _moved_threshold = 10000  # If a plane does not move longer than this distance (meter) then this flight is removed.
 4 | _speed_threshold = 50  # If a plane moves slower than this (m/sec), then it is considered stopped.
 5 | _length_threshold = 20  # Path should contain this many points
 6 | 
 7 | _segmentation_angle_threshold = 5  # In degree
 8 | 
 9 | def remove_noise( traj_dict):
10 | 
11 |     keys_to_removed = []
12 | 
13 |     for key in traj_dict.keys():
14 |         path = traj_dict[key]["path"]
15 | 
16 |         length = len(path)
17 | 
18 |         if length < _length_threshold:
19 |             keys_to_removed.append(key)
20 | 
21 |         else:
22 |             start = path[0]
23 |             middle = path[length // 2]
24 |             end = path[length - 1]
25 | 
26 |             d1 = util.distance(start, middle)
27 |             d2 = util.distance(middle, end)
28 | 
29 |             if d1 < _moved_threshold and d2 < _moved_threshold:
30 |                 keys_to_removed.append(key)
31 | 
32 |     for key in keys_to_removed:
33 |         traj_dict.pop(key)
34 | 
35 | 
36 | def _shorten_traj( traj):
37 | 
38 |     retval = [x for x in traj if x[2] > _speed_threshold]
39 |     return retval
40 | 
41 | 
42 | def shorten(traj_dict):
43 | 
44 |     for key in traj_dict.keys():
45 |         path = traj_dict[key]["path"]
46 |         path = _shorten_traj(path)
47 |         traj_dict[key]["path"] = path
48 |         traj_dict[key]["count"] = len(path)
49 | 
50 | 
51 | def _segmentation(traj):
52 |     retval = []
53 | 
54 |     azimuth = 1000
55 |     for point in traj:
56 |         d_azimuth = abs( azimuth - point[3])
57 |         if d_azimuth > _segmentation_angle_threshold:
58 |             azimuth = point[3]
59 |             retval.append(point)
60 | 
61 |     retval.append( traj[len(traj) - 1])
62 | 
63 |     return retval
64 | 
65 | 
66 | def segmentation_list(traj_list):
67 | 
68 |     for i in range(len(traj_list)):
69 |         traj_list[i] = _segmentation(traj_list[i])


--------------------------------------------------------------------------------
/argos/plot.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import matplotlib.image as mpimg
 3 | 
 4 | _base_color = plt.rcParams['axes.prop_cycle'].by_key()['color'][0]
 5 | 
 6 | 
 7 | def plot_traj(x, color=_base_color, alpha=1.0):
 8 |     plt.plot([x[0] for x in x], [x[1] for x in x], c=color, alpha=alpha)
 9 | 
10 | def plot_map():
11 |     img = mpimg.imread("turkey.png")
12 |     extent = [0, 1639740, 0, 670220]
13 |     plt.axis(extent)
14 |     plt.tight_layout()
15 |     plt.imshow(img, zorder=0, extent=extent)
16 |     plt.axes().invert_yaxis()
17 |     plt.show()


--------------------------------------------------------------------------------
/argos/synthetic.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import math
 3 | import argos.util as util
 4 | 
 5 | 
 6 | def generate_traj(point_list, noise):
 7 |     traj = []
 8 |     for point in point_list:
 9 |         azimuth = random.random() * math.pi
10 |         distance = random.random() * noise
11 |         noised_point = util.get_point(point, azimuth, distance)
12 |         traj.append(noised_point)
13 | 
14 |     return traj
15 | 
16 | 
17 | def generate_cluster(n, length, noise):
18 |     canvas_range = 100
19 |     azimuth_range = math.pi / 6
20 |     distance_range = 10
21 | 
22 |     x = random.random() * canvas_range - canvas_range * 0.5
23 |     y = random.random() * canvas_range - canvas_range * 0.5
24 |     azimuth = random.random() * math.pi * 2
25 |     point = (x, y)
26 | 
27 |     point_list = []
28 |     for i in range(length):
29 |         distance = random.random() * distance_range
30 |         point = util.get_point(point, azimuth, distance)
31 |         azimuth = azimuth + random.random() * azimuth_range - azimuth_range * 0.5
32 |         point_list.append(point)
33 | 
34 |     traj_list = []
35 |     for i in range(n):
36 |         traj_list.append(generate_traj(point_list, noise))
37 |     return traj_list
38 | 


--------------------------------------------------------------------------------
/argos/tool/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seljukgulcan/comparing-trajectory-clustering-methods/0aa90cf202629f31c4b00aac350217182a49aa93/argos/tool/__init__.py


--------------------------------------------------------------------------------
/argos/tool/stopwatch.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | 
 4 | class Stopwatch:
 5 | 
 6 |     def __init__(self):
 7 |         self.start_time = 0
 8 | 
 9 |     def start(self):
10 |         self.start_time = time.time()
11 | 
12 |     def stop(self, message = "Stopwatch stopped"):
13 |         elapsed = time.time() - self.start_time
14 |         print( "%s : %.2f sn" % (message, elapsed))
15 | 


--------------------------------------------------------------------------------
/argos/util.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | map_width = 1639.74 * 1000  # in meters
 4 | map_height = 670.22 * 1000  # in meters
 5 | 
 6 | map_lon_left = 26
 7 | map_lon_right = 45
 8 | map_lon_delta = map_lon_right - map_lon_left
 9 | 
10 | map_lat_bottom = 36
11 | map_lat_bottom_degree = map_lat_bottom * math.pi / 180
12 | 
13 | 
14 | def get_point(point, azimuth, distance):
15 |     x = point[0] + distance * math.cos(azimuth)
16 |     y = point[1] + distance * math.sin(azimuth)
17 | 
18 |     return x, y
19 | 
20 | 
21 | def distance(point1, point2):
22 |     x = abs(point1[0] - point2[0])
23 |     y = abs(point1[1] - point2[1])
24 |     d = math.sqrt(x ** 2 + y ** 2)
25 |     return d
26 | 
27 | 
28 | def geo_to_xy(lat, lon):
29 | 
30 |     # According to mercator projection it calculates x and y distances in meter relative to top left point
31 |     # Taken from https://stackoverflow.com/questions/2103924/mercator-longitude-and-latitude-calculations-to-x-and-y-on-a-cropped-map-of-the/10401734#10401734
32 |     x = (lon - map_lon_left) * (map_width / map_lon_delta)
33 | 
34 |     lat = lat * math.pi / 180
35 |     world_map_width = ((map_width / map_lon_delta) * 360) / (2 * math.pi)
36 |     map_offset_y = (world_map_width / 2 * math.log((1 + math.sin(map_lat_bottom_degree)) / (1 - math.sin(map_lat_bottom_degree))))
37 |     y = map_height - ((world_map_width / 2 * math.log((1 + math.sin(lat)) / (1 - math.sin(lat)))) - map_offset_y)
38 |     x = int(x)
39 |     y = int(y)
40 |     return x, y
41 | 
42 | 
43 | def ft_to_m(ft):
44 |     return 0.3048 * ft
45 | 
46 | 
47 | def kt_to_kph(kt):
48 |     return 1.852 * kt
49 | 
50 | 
51 | def fpm_to_mps(fpm):
52 |     return 0.0051 * fpm
53 | 
54 | 
55 | def traj_dict_to_traj_list(traj_dict):
56 |     traj_list = []
57 |     for key in traj_dict.keys():
58 |         traj_list.append( traj_dict[key]["path"])
59 |     return traj_list
60 | 
61 | 
62 | def summary_traj_list(traj_list):
63 |     print("Total trajectories : %s" % len(traj_list))
64 |     min = math.inf
65 |     max = 0
66 |     sum = 0
67 |     for traj in traj_list:
68 |         length = len(traj)
69 |         if length > max:
70 |             max = length
71 |         if length < min:
72 |             min = length
73 |         sum += length
74 | 
75 |     avg = sum / len(traj_list)
76 |     print("Average length : %s" % avg)
77 |     print("Minimum length : %s" % min)
78 |     print("Maximum length : %s" % max)
79 | 
80 | 
81 | def summary_traj_dict(traj_dict):
82 |     l = traj_dict_to_traj_list(traj_dict)
83 |     summary_traj_list(l)


--------------------------------------------------------------------------------
/data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seljukgulcan/comparing-trajectory-clustering-methods/0aa90cf202629f31c4b00aac350217182a49aa93/data.png


--------------------------------------------------------------------------------
/demo/demo1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seljukgulcan/comparing-trajectory-clustering-methods/0aa90cf202629f31c4b00aac350217182a49aa93/demo/demo1.png


--------------------------------------------------------------------------------
/demo/demo2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seljukgulcan/comparing-trajectory-clustering-methods/0aa90cf202629f31c4b00aac350217182a49aa93/demo/demo2.png


--------------------------------------------------------------------------------
/demo/kmedoid.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import math
 3 | 
 4 | def kMedoids(D, k, tmax=100):
 5 |     # determine dimensions of distance matrix D
 6 |     m, n = D.shape
 7 | 
 8 |     np.fill_diagonal(D, math.inf)
 9 | 
10 |     if k > n:
11 |         raise Exception('too many medoids')
12 |     # randomly initialize an array of k medoid indices
13 |     M = np.arange(n)
14 |     np.random.shuffle(M)
15 |     M = np.sort(M[:k])
16 | 
17 |     # create a copy of the array of medoid indices
18 |     Mnew = np.copy(M)
19 | 
20 |     # initialize a dictionary to represent clusters
21 |     C = {}
22 |     for t in range(tmax):
23 |         # determine clusters, i. e. arrays of data indices
24 |         J = np.argmin(D[:,M], axis=1)
25 | 
26 |         for kappa in range(k):
27 |             C[kappa] = np.where(J==kappa)[0]
28 |         # update cluster medoids
29 |         for kappa in range(k):
30 |             J = np.mean(D[np.ix_(C[kappa],C[kappa])],axis=1)
31 |             j = np.argmin(J)
32 |             Mnew[kappa] = C[kappa][j]
33 |         np.sort(Mnew)
34 |         # check for convergence
35 |         if np.array_equal(M, Mnew):
36 |             break
37 |         M = np.copy(Mnew)
38 |     else:
39 |         # final update of cluster memberships
40 |         J = np.argmin(D[:,M], axis=1)
41 |         for kappa in range(k):
42 |             C[kappa] = np.where(J==kappa)[0]
43 | 
44 |     np.fill_diagonal(D, 0)
45 | 
46 |     # return results
47 |     return M, C


--------------------------------------------------------------------------------
/gmmClus.py:
--------------------------------------------------------------------------------
 1 | import argos.io as io
 2 | import argos.plot as tplot
 3 | import matplotlib.pyplot as plt
 4 | import numpy as np
 5 | from sklearn import metrics
 6 | from sklearn.mixture import GMM
 7 | 
 8 | traj_list = io.load("1_traj_seg.dt")
 9 | traj_list = traj_list[:1000]
10 | 
11 | X = np.fromfile("gaussian_representation.dat", dtype=float)
12 | D = io.load_distance_matrix("distance1.npz")
13 | 
14 | no_of_cluster = 12
15 | gmm = GMM(n_components=no_of_cluster,  n_iter=1000)
16 | labels = gmm.fit_predict(X)
17 | 
18 | #  Postprocessing
19 | 
20 | clusters = [[] for i in range(no_of_cluster)]
21 | no = len(traj_list)
22 | for i in range(no):
23 |     label = int(labels[i])
24 |     clusters[label].append(traj_list[i])
25 | 
26 | silhoutte_score = metrics.silhouette_score(D, labels, sample_size=1000)
27 | print("Silhoutte Coefficient : %.3f" % silhoutte_score)
28 | 
29 | #  Plotting Clustered Trajectories
30 | color_list = plt.rcParams['axes.prop_cycle'].by_key()['color']
31 | for i in range(no_of_cluster):
32 |     for traj in clusters[i]:
33 |         next_color = color_list[i % len(color_list)]
34 |         tplot.plot_traj(traj, next_color, alpha=1)
35 | 
36 | tplot.plot_map()
37 | 


--------------------------------------------------------------------------------
/hausdorf_benchmark.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import matplotlib.pyplot as plt
 3 | import random
 4 | import time
 5 | import numpy as np
 6 | from scipy.spatial.distance import directed_hausdorff
 7 | 
 8 | def distance(point1, point2):
 9 |     retval = (point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2
10 |     retval = math.sqrt(retval)
11 |     return retval
12 | 
13 | 
14 | def hausdorf_oneway(traj1, traj2):
15 |     # It is a slow implementation. This guy has the fast version:
16 |     # https://github.com/mavillan/py-hausdorff
17 | 
18 |     max = 0
19 | 
20 |     for point1 in traj1:
21 | 
22 |         min = math.inf
23 |         for point2 in traj2:
24 | 
25 |             d = distance(point1, point2)
26 |             if d < min:
27 |                 min = d
28 | 
29 |         if min > max:
30 |             max = min
31 | 
32 |     return max
33 | 
34 | 
35 | def hausdorf(traj1, traj2):
36 |     d1 = hausdorf_oneway(traj1, traj2)
37 |     d2 = hausdorf_oneway(traj2, traj1)
38 | 
39 |     if d1 > d2:
40 |         return d1
41 | 
42 |     return d2
43 | 
44 | def scipy_hausdorff( u, v):
45 | 
46 |     #  Trajectories are assumed to be given as np array
47 |     d = max(directed_hausdorff(u, v)[0], directed_hausdorff(v, u)[0])
48 |     return d
49 | 
50 | 
51 | if __name__ == "__main__":
52 |     print("hi")
53 | 
54 |     traj_list = []
55 |     traj_count = 1000
56 |     traj_length = 100
57 | 
58 |     start = time.time()
59 |     for i in range(traj_count):
60 |         traj_list.append([])
61 |         for j in range(traj_length):
62 |             x = random.random() * 100
63 |             y = random.random() * 100
64 |             traj_list[i].append( (x, y))
65 | 
66 |     end = time.time()
67 |     elapsed = end - start
68 |     print("Creating trajectories took %s seconds" % elapsed)
69 | 
70 |     start = time.time()
71 |     for i in range(len(traj_list)):
72 |         traj_list[i] = np.array(traj_list[i])
73 | 
74 |     end = time.time()
75 |     elapsed = end - start
76 |     print("Numpifing trajectories took %s seconds" % elapsed)
77 | 
78 |     start = time.time()
79 | 
80 |     for i in range(traj_count):
81 |         for j in range(i, traj_count):
82 |             scipy_hausdorff(traj_list[i], traj_list[j])
83 | 
84 |     end = time.time()
85 |     elapsed = end - start
86 |     print("Calculating hausdorf distances took %s seconds" % elapsed)
87 | 


--------------------------------------------------------------------------------
/hdbscanClus.py:
--------------------------------------------------------------------------------
 1 | import argos.io as io
 2 | import argos.plot as tplot
 3 | import matplotlib.pyplot as plt
 4 | import numpy as np
 5 | import hdbscan
 6 | from sklearn import metrics
 7 | 
 8 | traj_list = io.load("1_traj_seg.dt")
 9 | traj_list = traj_list[:1000]
10 | min_samples = 1
11 | min_cluster_size = 2
12 | 
13 | D = io.load_distance_matrix("distance1.npz")
14 | 
15 | dbscan = hdbscan.HDBSCAN(min_cluster_size=min_cluster_size, min_samples=min_samples, metric="precomputed", memory="hdbscan_cache")
16 | dbscan.fit(D)
17 | 
18 | #  Postprocessing
19 | no_of_labels = np.max(dbscan.labels_) + 1
20 | 
21 | print("Total number of clusters : %s" % no_of_labels)
22 | 
23 | clusters = [[] for i in range(no_of_labels)]
24 | outliers = []
25 | no = len(traj_list)
26 | for i in range(no):
27 |     label = dbscan.labels_[i]
28 |     if label == -1:
29 |         outliers.append(traj_list[i])
30 |     else:
31 |         clusters[label].append(traj_list[i])
32 | 
33 | no_of_noise = len(outliers)
34 | print("Number of noise points %s" % no_of_noise)
35 | print("Noise Percentage : %.3f" % (no_of_noise / no))
36 | 
37 | silhoutte_score = metrics.silhouette_score(D, dbscan.labels_)
38 | print("Silhoutte Coefficient : %.3f" % silhoutte_score)
39 | 
40 | #  Plotting Clustered Trajectories
41 | color_list = plt.rcParams['axes.prop_cycle'].by_key()['color']
42 | for i in range(no_of_labels):
43 |     for traj in clusters[i]:
44 |         next_color = color_list[0 % len(color_list)]
45 |         tplot.plot_traj(traj, next_color, alpha=1)
46 |         #tplot.plot_traj(traj)
47 | 
48 | for traj in outliers:
49 |     tplot.plot_traj(traj, "r")
50 | 
51 | tplot.plot_map()
52 | 


--------------------------------------------------------------------------------
/kmedoidsClus.py:
--------------------------------------------------------------------------------
 1 | import argos.io as io
 2 | import argos.plot as tplot
 3 | import matplotlib.pyplot as plt
 4 | import numpy as np
 5 | import hdbscan
 6 | from sklearn import metrics
 7 | import argos.cluster as cluster
 8 | 
 9 | traj_list = io.load("1_traj_seg.dt")
10 | traj_list = traj_list[:1000]
11 | 
12 | '''
13 | D = io.load_distance_matrix("distance1.npz")
14 | D = D[:1000,:1000]
15 | '''
16 | 
17 | #D = cluster.calculate_dense_distance_matrix(traj_list)
18 | #D.tofile("dense.dat")
19 | D = np.fromfile("dense.dat", dtype=float)
20 | print(D.shape)
21 | D = D.reshape((1000, 1000))
22 | 
23 | K = 20
24 | M, C = cluster.kMedoids(D, K)
25 | 
26 | #  Postprocessing
27 | 
28 | print("Total number of clusters : %s" % K)
29 | 
30 | labels = np.zeros((len(traj_list),))
31 | 
32 | lol = 0
33 | for i in range(K):
34 |     for index in C[i]:
35 |         lol += 1
36 |         labels[index] = i
37 | 
38 | print(lol)
39 | 
40 | clusters = [[] for i in range(K)]
41 | no = len(traj_list)
42 | for i in range(no):
43 |     label = int(labels[i])
44 |     clusters[label].append(traj_list[i])
45 | 
46 | silhoutte_score = metrics.silhouette_score(D, labels, sample_size=1000)
47 | print("Silhoutte Coefficient : %.3f" % silhoutte_score)
48 | 
49 | sse_list = []
50 | for h in range(2, 80):
51 |     M, C = cluster.kMedoids(D, h)
52 |     sse = 0
53 |     for i in range(h):
54 |         medoid_index = M[i]
55 |         sse += np.sum(D[medoid_index,C[i]] ** 2)
56 |     sse_list.append(sse)
57 |     print(sse)
58 | 
59 | plt.plot(range(2, 80), sse_list)
60 | plt.xlabel("K")
61 | plt.ylabel("SSE")
62 | plt.title("Sum of Squared Error")
63 | plt.show()
64 | 
65 | 
66 | #  Plotting Clustered Trajectories
67 | color_list = plt.rcParams['axes.prop_cycle'].by_key()['color']
68 | for i in range(K):
69 |     for traj in clusters[i]:
70 |         next_color = color_list[i % len(color_list)]
71 |         tplot.plot_traj(traj, next_color, alpha=1)
72 | 
73 | tplot.plot_map()
74 | 


--------------------------------------------------------------------------------
/merger.py:
--------------------------------------------------------------------------------
  1 | import argos.io as io
  2 | import math
  3 | import argos.plot as tplot
  4 | import argos.util as util
  5 | import argos.noise as reduc
  6 | import argos.cluster as cluster
  7 | from argos.tool.stopwatch import Stopwatch
  8 | 
  9 | id = 0
 10 | id_list = []
 11 | traj_list = []
 12 | for i in range(7):
 13 |     filename = "%s.traj" % i
 14 |     traj_dict = io.load(filename)
 15 | 
 16 |     reduc.shorten(traj_dict)
 17 |     reduc.remove_noise(traj_dict)
 18 | 
 19 |     for key in traj_dict:
 20 |         id_list.append( (id, key))
 21 |         id += 1
 22 |         path = traj_dict[key]["path"]
 23 |         traj_list.append(path)
 24 | 
 25 |     io.save(traj_list, "%s_traj.dt" % i)
 26 | 
 27 |     reduc.segmentation_list(traj_list)
 28 | 
 29 |     io.save(traj_list, "%s_traj_seg.dt" % i)
 30 |     traj_list = []
 31 | io.save(id_list, "id_list.dt")
 32 | exit(0)
 33 | 
 34 | 
 35 | s = Stopwatch()
 36 | n = 100
 37 | 
 38 | traj_dict = io.load("1.traj")
 39 | 
 40 | util.summary_traj_dict(traj_dict)
 41 | 
 42 | reduc.shorten(traj_dict)
 43 | 
 44 | 
 45 | util.summary_traj_dict(traj_dict)
 46 | 
 47 | reduc.remove_noise(traj_dict)
 48 | 
 49 | util.summary_traj_dict(traj_dict)
 50 | 
 51 | reduc.remove_noise(traj_dict)
 52 | reduc.shorten(traj_dict)
 53 | 
 54 | util.summary_traj_dict(traj_dict)
 55 | 
 56 | traj_list = util.traj_dict_to_traj_list(traj_dict)
 57 | 
 58 | util.summary_traj_list(traj_list)
 59 | 
 60 | s.start()
 61 | D1 = cluster.calculate_distance_matrix(traj_list[:n], 80000)
 62 | s.stop()
 63 | 
 64 | reduc.segmentation_list(traj_list)
 65 | 
 66 | util.summary_traj_list(traj_list)
 67 | 
 68 | 
 69 | traj_list = traj_list[:n]
 70 | 
 71 | s.start()
 72 | D2 = cluster.calculate_distance_matrix(traj_list, 80000)
 73 | s.stop()
 74 | 
 75 | sum = 0
 76 | diff_count = 0
 77 | for i in range(n):
 78 |     for j in range(i, n):
 79 |         diff = abs( D1[i,j] - D2[i, j])
 80 |         sum += diff
 81 |         if diff > 1000:
 82 |             diff_count += 1
 83 | 
 84 | print(diff_count)
 85 | 
 86 | print(sum)
 87 | exit(0)
 88 | 
 89 | countFlied = 0
 90 | countFlied2 = 0
 91 | traj_list = []
 92 | suspi = None
 93 | for key in traj_dict.keys():
 94 |     path = traj_dict[key]["path"]
 95 |     count = 0
 96 |     flied = False
 97 | 
 98 |     start = path[0]
 99 |     end = path[len(path) - 1]
100 |     if abs(start[0] - end[0]) > 30000 or abs(start[1] - end[1]) > 30000:
101 |         countFlied2 += 1
102 | 
103 |     for point in path:
104 |         if point[2] < 1 and point[5] < 1:
105 |             count += 1
106 |         if not flied and point[5] > 5:
107 |             flied = True
108 |             countFlied += 1
109 | 
110 |     if not flied:
111 |         suspi = path
112 | 
113 |     traj_list.append(path)
114 | 
115 | print(countFlied)
116 | print(countFlied2)
117 | 
118 | '''
119 | for traj in traj_list:
120 |     tplot.plot_traj(traj)
121 | '''
122 | 
123 | print(suspi)
124 | tplot.plot_traj(suspi)
125 | 
126 | tplot.plot_map()
127 | 
128 | exit(0)
129 | 
130 | 
131 | filename1 = "3.traj"
132 | filename2 = "6.traj"
133 | 
134 | d1 = io.load(filename1)
135 | d2 = io.load(filename2)
136 | 
137 | key1 = set(d1.keys())
138 | key2 = set(d2.keys())
139 | common = key1.intersection(key2)
140 | 
141 | print(len(common))
142 | 
143 | for key in common:
144 |     d1[key]["path"] = d1[key]["path"] + d2[key]["path"]
145 |     d1[key]["count"] = d1[key]["count"] + d2[key]["count"]
146 |     d2.pop(key)
147 | 
148 | io.save(d1, filename1)
149 | io.save(d2, filename2)
150 | 


--------------------------------------------------------------------------------
/report.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seljukgulcan/comparing-trajectory-clustering-methods/0aa90cf202629f31c4b00aac350217182a49aa93/report.pdf


--------------------------------------------------------------------------------
/report.py:
--------------------------------------------------------------------------------
 1 | import argos.util as util
 2 | import math
 3 | import matplotlib.pyplot as plt
 4 | import argos.noise as reduc
 5 | 
 6 | base_color = plt.rcParams['axes.prop_cycle'].by_key()['color'][0]
 7 | def plot_traj(x, color=base_color):  # To plot a set of points in 2D
 8 |   plt.scatter([x[0] for x in x], [x[1] for x in x], c=color, s = 20)
 9 |   plt.plot([x[0] for x in x], [x[1] for x in x], c=color)
10 | 
11 | 
12 | def create_point( point, azimuth):
13 |     az = (azimuth / 180) * math.pi
14 |     p = util.get_point( point, az, 10)
15 |     a = []
16 |     a.append(p[0])
17 |     a.append(p[1])
18 |     a.append(0)
19 |     a.append(azimuth)
20 |     return a
21 | 
22 | 
23 | traj = []
24 | start = [0, 0, 0, 30]
25 | p = start
26 | for i in range(20):
27 |     p = create_point(p, 30)
28 |     traj.append(p)
29 | 
30 | for i in range(25):
31 |     p = create_point(p, 10)
32 |     traj.append(p)
33 | 
34 | az = 10
35 | for i in range(20):
36 |     az = az + 10
37 |     p = create_point(p, az)
38 |     traj.append(p)
39 | 
40 | plot_traj(traj)
41 | 
42 | for point in traj:
43 |     point[0] += 300
44 | 
45 | traj = reduc._segmentation(traj)
46 | plot_traj(traj, "r")
47 | plt.axes().set_aspect(1)
48 | plt.title("Trajectory Segmentation")
49 | plt.show()


--------------------------------------------------------------------------------
/result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seljukgulcan/comparing-trajectory-clustering-methods/0aa90cf202629f31c4b00aac350217182a49aa93/result.png


--------------------------------------------------------------------------------
/segmentation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seljukgulcan/comparing-trajectory-clustering-methods/0aa90cf202629f31c4b00aac350217182a49aa93/segmentation.png


--------------------------------------------------------------------------------
/synthetic_demo.py:
--------------------------------------------------------------------------------
  1 | #  Synthetic Demo
  2 | 
  3 | import random
  4 | import matplotlib.pyplot as plt
  5 | import numpy as np
  6 | from sklearn.cluster import DBSCAN
  7 | from argos.cluster import calculate_distance_matrix
  8 | from argos.tool.stopwatch import Stopwatch
  9 | from argos.synthetic import  generate_cluster
 10 | import argos.plot as aplt
 11 | 
 12 | 
 13 | if __name__ == "__main__":
 14 |     print("Synthetic Demo")
 15 |     s = Stopwatch()
 16 | 
 17 |     #  Parameters of Demo
 18 |     no_of_cluster = 20
 19 |     no_of_traj = 100
 20 |     traj_length = 100
 21 |     eps = 10
 22 |     min_samples = 5
 23 |     noise = 5
 24 | 
 25 |     #  Generating Trajectories
 26 |     no_of_outlier = int( no_of_traj * 0.05)
 27 |     no_of_traj_each_cluster = no_of_traj // no_of_cluster
 28 | 
 29 |     print("-----")
 30 |     print("No of Clusters : %s" % no_of_cluster)
 31 |     print("No of Outliers : %s" % no_of_outlier)
 32 |     print("-----")
 33 | 
 34 |     normal_traj_list = []
 35 |     for i in range(no_of_cluster):
 36 |         traj_list = generate_cluster(no_of_traj_each_cluster, traj_length, noise)
 37 |         normal_traj_list += traj_list
 38 | 
 39 |     outlier_traj_list = []
 40 |     for i in range(no_of_outlier):
 41 |         outlier_traj_list += generate_cluster(1, traj_length, noise)
 42 | 
 43 |     #  Plotting Generated Trajectories
 44 | 
 45 |     plt.figure(1)
 46 |     plt.subplot(121)
 47 | 
 48 |     for traj in normal_traj_list:
 49 |         aplt.plot_traj(traj)
 50 | 
 51 |     for traj in outlier_traj_list:
 52 |         aplt.plot_traj(traj, "r")
 53 | 
 54 |     #  Precomputation
 55 | 
 56 |     traj_list = normal_traj_list + outlier_traj_list
 57 |     random.shuffle(traj_list)
 58 | 
 59 |     #  Calculating Distance Matrix
 60 |     s.start()
 61 |     D = calculate_distance_matrix( traj_list, eps)
 62 |     s.stop("Distance matrix calculated")
 63 | 
 64 |     #  Clustering
 65 |     s.start()
 66 |     dbscan = DBSCAN(eps=eps, min_samples=min_samples, metric="precomputed")
 67 |     dbscan.fit(D)
 68 |     s.stop("Clustering is done")
 69 | 
 70 |     #  Postprocessing
 71 |     no_of_labels = np.max(dbscan.labels_) + 1
 72 | 
 73 |     print("Total number of clusters : %s" % no_of_labels)
 74 | 
 75 |     clusters = [[] for i in range(no_of_labels)]
 76 |     outliers = []
 77 |     no = len(traj_list)
 78 |     for i in range(no):
 79 |         label = dbscan.labels_[i]
 80 |         if label == -1:
 81 |             outliers.append(traj_list[i])
 82 |         else:
 83 |             clusters[label].append(traj_list[i])
 84 | 
 85 |     no_of_noise = len(outliers)
 86 |     print("Number of noise points %s" % no_of_noise)
 87 |     print("Noise Percentage : %.3f" % (no_of_noise / no))
 88 | 
 89 |     #  Plotting Clustered Trajectories
 90 |     plt.subplot(122)
 91 |     color_list = plt.rcParams['axes.prop_cycle'].by_key()['color']
 92 |     for i in range(no_of_labels):
 93 |         for traj in clusters[i]:
 94 |             next_color = color_list[i % len(color_list)]
 95 |             aplt.plot_traj(traj, next_color, alpha=0.3)
 96 | 
 97 |     for traj in outliers:
 98 |         aplt.plot_traj(traj, "k")
 99 | 
100 |     plt.show()


--------------------------------------------------------------------------------