├── .gitignore ├── README.md ├── candidates_preparation.py ├── interactive_voting.py ├── position_context.py ├── s1_preprocessing.py ├── s2_main.py ├── s3_visualize.py └── utils ├── data_loader.py ├── dis.py ├── display.py ├── preprocessing.py └── vector_haversine_distances.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | .idea -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # IVMM 2 | Implement of IVMM map matching method. 3 | 4 | ## Getting Start 5 | 6 | ### Usage 7 | Run `s1`, `s2`, `s3` in turn. 8 | 9 | ### Input and Output 10 | **Input file (.csv) sample**:
11 | 12 | | plate | color | longitude | latitude | timestamp | velocity | status | 13 | | ----- |:-----:| ---------:| --------:| ---------:| --------:| ------:| 14 | |粤123456|蓝的|113.961098|22.553101|2014-07-03 00:00:02|17|0| 15 | |粤123456|蓝的|113.962303|22.547001|2014-07-03 00:01:48|21|0| 16 | |粤123456|蓝的|113.962997|22.547001|2014-07-03 00:02:18|0|0| 17 | 18 | **Output file (.csv) sample**: 19 | 20 | | |i_p_i|e_i|end_node|edge_progress|x|y|oneway|length|u|v|plate|longitude|latitude|timestamp|velocity|dis_f_pre| 21 | | ----- |:-----:| ---------:| --------:| ---------:| --------:| ------:| ------:| ------:| ------:| ------:| ------:| ------:| ------:| ------:| ------:| ------:| 22 | |0|877675.0|45266|0|0.77|113.9611|22.5531|True|205.163|1116415224|1116415555|粤123456|113.961098|22.553101|2014-07-03 00:00:02|17|| 23 | |1|1419978.0|72794|0|0.07|113.9623|22.5470|False|140.21|2528898679|2528898707|粤123456|113.962303|22.547001|2014-07-03 00:01:48|21|689.48| 24 | |2|1419997.0|72795|0|0.41|113.9629|22.5470|False|127.939|2528898679|2528898834|粤123456|113.962997|22.547001|2014-07-03 00:02:18|0|71.27| 25 | 26 | ## Developer 27 | Issues are welcome. 28 | 29 | ## Reference 30 | Please refer to the paper for details of the method: @inproceedings{yuan2010interactive, 31 | title={An interactive-voting based map matching algorithm}, 32 | author={Yuan, Jing and Zheng, Yu and Zhang, Chengyang and Xie, Xing and Sun, Guang-Zhong}, 33 | booktitle={2010 Eleventh international conference on mobile data management}, 34 | pages={43--52}, 35 | year={2010}, 36 | organization={IEEE} 37 | } -------------------------------------------------------------------------------- /candidates_preparation.py: -------------------------------------------------------------------------------- 1 | from tqdm import tqdm 2 | import osmnx as ox 3 | from utils import data_loader 4 | import utils.display as display 5 | 6 | import pandas as pd 7 | import numpy as np 8 | import geopandas as gpd 9 | from osmnx import utils_graph 10 | from osmnx import utils_geo 11 | try: 12 | from sklearn.neighbors import BallTree 13 | except ImportError: 14 | BallTree = None 15 | 16 | 17 | def extended_edges(edges): 18 | # transform edges into evenly spaced points 19 | edges["points"] = edges.apply( 20 | lambda x: utils_geo.redistribute_vertices(x.geometry, dist=0.0001), axis=1 21 | ) 22 | 23 | # develop edges data for each created points 24 | extended = ( 25 | edges["points"] 26 | .apply([pd.Series]) 27 | .stack() 28 | .reset_index(level=1) 29 | .join(edges.drop(columns=['oneway', 'length', 'geometry', 30 | 'u', 'v', 'points'])) 31 | .reset_index() 32 | ) 33 | 34 | # give each created points [index of edge 'e_i'] and [index of point in edge 'p_i_e_i'] 35 | extended.rename(columns={'index': 'e_i', 'level_1': 'p_i_e_i'}, inplace=True) 36 | extended['end_node'] = 0 37 | extended.loc[extended['e_i'] != extended['e_i'].shift(), 'end_node'] = 1 38 | extended.loc[extended['e_i'] != extended['e_i'].shift(-1), 'end_node'] = 2 39 | points_count = extended.groupby(['e_i']).size() 40 | # 'edge_progress' means distance progress along edge 41 | points_count.rename('edge_points_count', inplace=True) 42 | extended = extended.merge(points_count, left_on='e_i', right_index=True, ) 43 | extended['edge_progress'] = extended['p_i_e_i'] / (extended['edge_points_count'] - 1) 44 | return extended 45 | 46 | 47 | def make_tree(gdf_extended_edges): 48 | # check if we were able to import sklearn.neighbors.BallTree successfully 49 | if not BallTree: 50 | raise ImportError( 51 | "The scikit-learn package must be installed to use this optional feature." 52 | ) 53 | 54 | # haversine requires data in form of [lat, lng] and inputs/outputs in units of radians 55 | gpd_extend = gpd.GeoDataFrame(gdf_extended_edges, geometry='Series') 56 | nodes = pd.DataFrame({"x": gpd_extend['Series'].x, "y": gpd_extend['Series'].y}) 57 | nodes_rad = np.deg2rad(nodes[["y", "x"]].values.astype(np.float)) 58 | 59 | # build a ball tree for haversine nearest node search 60 | tree = BallTree(nodes_rad, metric="haversine") 61 | return tree 62 | 63 | 64 | def dis_bt_tp_ep(trajectory_point, ball_tree): 65 | # distance between trajectory points and extended points 66 | 67 | # prepare points to be queried 68 | X = trajectory_point['longitude'] 69 | Y = trajectory_point['latitude'] 70 | points = np.array([Y, X]).T 71 | points_rad = np.deg2rad(points) 72 | 73 | # query the tree for node in a radius to each trajectory point 74 | r = 150 / 6378000 75 | idx, dis = ball_tree.query_radius(points_rad, r=r, return_distance=True) 76 | 77 | # special case: all trajectory points have no neighbor points 78 | if 0 == sum([len(sub_idx) for sub_idx in idx]): 79 | return None 80 | 81 | # associate distances to edges 82 | # build dis between trajectory points and each neighbor points 83 | dis_b_tp_pie = ( 84 | pd.Series(dis) 85 | .apply([pd.Series]) 86 | .stack() 87 | * 6371000 88 | ) 89 | # add global interpolated index to each neighbor points 90 | dis_b_tp_pie.rename(columns={'Series': 'residual'}, inplace=True) 91 | idx = ( 92 | pd.Series(idx) 93 | .apply(pd.Series, dtype=int) 94 | .stack() 95 | ) 96 | dis_b_tp_pie.set_index([dis_b_tp_pie.index.get_level_values(0), idx], inplace=True) 97 | 98 | # rename index to trajectory points index and (global, not inside edge)interpolated points index 99 | dis_b_tp_pie.index.rename(['t_p_i', 'i_p_i'], inplace=True) 100 | 101 | # res.sort_values('residual', inplace=True) 102 | return dis_b_tp_pie 103 | 104 | 105 | def make_candidates_set(dis_b_tp_pie, extended): 106 | dis_b_tp_pie = dis_b_tp_pie.merge(extended[['e_i', 'end_node', 'edge_progress']], 107 | left_on='i_p_i', 108 | right_index=True, 109 | how='left') 110 | 111 | # distance between trajectory points and edges 112 | dis_b_tp_e_i = dis_b_tp_pie.groupby(['t_p_i', 'e_i'])['residual'].idxmin() 113 | dis_b_tp_e = dis_b_tp_pie.loc[dis_b_tp_e_i] 114 | 115 | # 5 nearest edge is candidate edge 116 | candidate_set = ( 117 | dis_b_tp_e 118 | .groupby(level=0, group_keys=False) 119 | .apply(lambda grp: grp.nsmallest(5, 'residual')) 120 | ) 121 | 122 | gpd_extend = gpd.GeoDataFrame(extended, geometry='Series') 123 | nodes = pd.DataFrame({"x": gpd_extend['Series'].x, "y": gpd_extend['Series'].y}) 124 | candidate_set = candidate_set.merge(nodes, left_on='i_p_i', right_index=True, how='left') 125 | candidate_set.reset_index(inplace=True) 126 | candidate_set = candidate_set.round({'x': 6, 'y': 6}) 127 | candidate_set.drop_duplicates(subset=['t_p_i', 'x', 'y'], inplace=True) 128 | return candidate_set 129 | 130 | 131 | if __name__ == '__main__': 132 | display.configure_pandas() 133 | tqdm.pandas() 134 | 135 | # load trajectory data 136 | data = data_loader.load_vehicles(n=1, max_length=0) 137 | cur_vehicle = data[0][0] 138 | 139 | # load road network 140 | truncated_roads = ox.load_graphml('db/truncated_graph.graphml') 141 | # ox.plot_graph(truncated_roads) 142 | # truncated_roads = data_loader.load_drive_graph() 143 | 144 | # transform graph into DataFrame 145 | edges = utils_graph.graph_to_gdfs(truncated_roads, nodes=False, fill_edge_geometry=True) 146 | edges.drop(columns=['osmid', 'highway', 'bridge', 'name', 'key'], inplace=True) 147 | 148 | # extended edges to points, for following tree 149 | extended = extended_edges(edges) 150 | 151 | tree = make_tree(extended) 152 | 153 | dis_b_tp_pie = dis_bt_tp_ep(cur_vehicle, tree) 154 | 155 | candidates_set = make_candidates_set(dis_b_tp_pie, extended) 156 | 157 | print(candidates_set) 158 | extended.to_csv('result/extended_edges.csv', index=False) 159 | candidates_set.to_csv('result/candidate_point.csv', index=False) 160 | -------------------------------------------------------------------------------- /interactive_voting.py: -------------------------------------------------------------------------------- 1 | import os 2 | import math 3 | import numpy as np 4 | import pandas as pd 5 | 6 | from utils import data_loader 7 | import utils.display as display 8 | 9 | from sklearn.metrics.pairwise import haversine_distances 10 | from scipy.stats import norm 11 | from dask.distributed import Client 12 | from dask import delayed 13 | 14 | 15 | def find_sequence(c_s, weight, phi, i, k, a_i, trajectory_len): 16 | t_phi = [item.copy() for item in phi] 17 | # c_s['f'] = np.NINF 18 | # c_s['pre'] = None 19 | f = [np.NINF] * len(c_s.index) 20 | pre = [None] * len(c_s.index) 21 | # set f of first trajectory point 22 | s = c_s['t_p_i'].searchsorted(0, side='left') 23 | e = c_s['t_p_i'].searchsorted(0, side='right') 24 | # c_s.loc[0, 'f'] = weight[0] * c_s.loc[0, 'epsilon'].values 25 | f[s:e] = weight[0] * c_s.iloc[s:e, c_s.columns.get_loc('epsilon')] 26 | 27 | for s in range(a_i): 28 | if s != k: 29 | if 0 == i: 30 | f[s] = np.NINF 31 | # c_s.loc[(i, s), 'f'] = np.NINF 32 | else: 33 | t_phi[i - 1][:, s] = np.NINF 34 | 35 | j_pre_s = 0 36 | j_pre_e = c_s['t_p_i'].searchsorted(0, side='right') 37 | for j in range(1, trajectory_len): 38 | j_s = c_s['t_p_i'].searchsorted(j, side='left') 39 | j_e = c_s['t_p_i'].searchsorted(j, side='right') 40 | for s in range(j_e - j_s): 41 | # print(c_s.loc[j - 1, 'f'], t_phi[j - 1][:, s]) 42 | last_f = f[j_pre_s: j_pre_e] 43 | f[j_s + s] = max(last_f + t_phi[j - 1][:, s]) 44 | pre[j_s + s] = np.argmax(last_f + t_phi[j - 1][:, s]) 45 | # c_s.loc[(j, s), 'f'] = max(c_s.loc[j - 1, 'f'] + t_phi[j - 1][:, s]) 46 | # c_s.loc[(j, s), 'pre'] = np.argmax(c_s.loc[j - 1, 'f'] + t_phi[j - 1][:, s]) 47 | j_pre_s = j_s 48 | j_pre_e = j_e 49 | 50 | r_list = [] 51 | last_i = trajectory_len - 1 52 | lst_i_s = c_s['t_p_i'].searchsorted(last_i, side='left') 53 | c = np.argmax(f[lst_i_s:]) 54 | for j in reversed(range(1, trajectory_len)): 55 | s = c_s['t_p_i'].searchsorted(j, side='left') 56 | e = c_s['t_p_i'].searchsorted(j, side='right') 57 | r_list.append(c) 58 | c = pre[s+c] 59 | r_list.append(c) 60 | 61 | return max(f[lst_i_s:]), list(reversed(r_list)) 62 | 63 | 64 | def traverse_trajectory_point(i, M, cur_vehicle, candidate_set, trajectory_len): 65 | # compute phi for current trajectory point 66 | phi = [item.copy() for item in M] 67 | # prepare trajectory location 68 | 69 | loc_i = cur_vehicle.iloc[[i], [cur_vehicle.columns.get_loc('latitude'), 70 | cur_vehicle.columns.get_loc('longitude')]].values 71 | loc_all = cur_vehicle.loc[cur_vehicle.index != i, ['latitude', 'longitude']].values 72 | # compute trajectory point distance 73 | 74 | tp_dis = 6371009 * haversine_distances(np.radians(loc_i), np.radians(loc_all))[0] 75 | 76 | weight = np.exp(-(tp_dis * tp_dis) / (7000 * 7000)) 77 | for _, w in enumerate(weight): 78 | phi[_] = w * phi[_] 79 | 80 | s = candidate_set['t_p_i'].searchsorted(i, side='left') 81 | e = candidate_set['t_p_i'].searchsorted(i, side='right') 82 | 83 | c_s_f_v = [] 84 | c_s_p = [] 85 | for k in range(e - s): 86 | c_s = candidate_set.copy() 87 | f_value, P = find_sequence(c_s, weight, phi, i, k, e - s, trajectory_len) 88 | c_s_f_v.append(f_value) 89 | if np.isinf(f_value): 90 | # print('Bad local optimal path occurs, ignored.') 91 | continue 92 | c_s_p.append(P) 93 | return c_s_f_v, c_s_p 94 | 95 | 96 | def vote(res_set, seg_name): 97 | vote = [dict() for idx in range(len(res_set))] 98 | for idx, item in enumerate(res_set): 99 | for path in item[1]: 100 | for i_idx, c_p in enumerate(path): 101 | vote[i_idx][c_p] = vote[i_idx][c_p] + 1 if c_p in vote[i_idx] else 1 102 | global_optimal_path = [] 103 | for idx, item in enumerate(vote): 104 | best = [] 105 | best_v = 0 106 | for k, v in item.items(): 107 | if v > best_v: 108 | best = [k] 109 | best_v = v 110 | elif v == best_v: 111 | best.append(k) 112 | if not len(best): 113 | return global_optimal_path 114 | 115 | global_optimal_path.append(best[np.argmax([res_set[idx][0][i] for i in best])]) 116 | if np.isinf(best[np.argmax([res_set[idx][0][i] for i in best])]): 117 | print(seg_name, "卧槽,坏了") 118 | return global_optimal_path 119 | 120 | 121 | if __name__ == '__main__': 122 | display.configure_pandas() 123 | 124 | ssm_path = 'result/ssm' 125 | diag_M = np.load(os.path.join(ssm_path, 'temp.npy')) 126 | 127 | # load trajectory data 128 | data = data_loader.load_vehicles(n=1, max_length=3) 129 | cur_vehicle = data[0][0] 130 | 131 | # load candidate set 132 | candidate_set = pd.read_csv('result/candidate_point.csv') 133 | 134 | first_seg_len = candidate_set.loc[candidate_set['t_p_i']==candidate_set.index[0]].shape[0] 135 | M = [] 136 | for t_p_i in range(candidate_set['t_p_i'].min(), candidate_set['t_p_i'].max()): 137 | pre_s = candidate_set['t_p_i'].searchsorted(t_p_i, side='left') 138 | pre_e = candidate_set['t_p_i'].searchsorted(t_p_i, side='right') 139 | next_s = candidate_set['t_p_i'].searchsorted(t_p_i + 1, side='left') 140 | next_e = candidate_set['t_p_i'].searchsorted(t_p_i + 1, side='right') 141 | M.append(diag_M[pre_s:pre_e, next_s - first_seg_len:next_e - first_seg_len]) 142 | 143 | epsilon_u = 5 144 | epsilon_sigma = 10 145 | candidate_set['epsilon'] = ( 146 | norm(epsilon_u, epsilon_sigma).pdf(candidate_set['residual']) 147 | * epsilon_sigma 148 | * math.sqrt(2 * math.pi) 149 | ) 150 | print(candidate_set) 151 | c_s = candidate_set.groupby('t_p_i').apply(lambda x: x.reset_index(drop=True)) 152 | c_s.index.rename(['i', 'k'], inplace=True) 153 | c_s.drop(columns='t_p_i', inplace=True) 154 | 155 | F = [] 156 | P = [] 157 | res_set = [] 158 | client = Client(n_workers=4) 159 | for i in range(candidate_set['t_p_i'].min(), candidate_set['t_p_i'].max() + 1): 160 | res = delayed(traverse_trajectory_point)(i, M, cur_vehicle, candidate_set, c_s, 10) 161 | res_set.append(res) 162 | compute = delayed(vote)(res_set, F, P) 163 | global_optimal_path = compute.compute() 164 | print(c_s.loc[[(i, j) for i, j in enumerate(global_optimal_path)]]) 165 | client.close() 166 | -------------------------------------------------------------------------------- /position_context.py: -------------------------------------------------------------------------------- 1 | import os 2 | import math 3 | import pandas as pd 4 | import osmnx as ox 5 | import networkx as nx 6 | import numpy as np 7 | import scipy 8 | 9 | from pathlib import Path 10 | from scipy.stats import norm 11 | 12 | from utils import data_loader 13 | import utils.vector_haversine_distances as v_harv_dis 14 | 15 | 16 | def compute_epsilon(residual, epsilon_u=5, epsilon_sigma=10): 17 | return ( 18 | norm(epsilon_u, epsilon_sigma).pdf(residual) 19 | * epsilon_sigma 20 | * math.sqrt(2 * math.pi) 21 | ) 22 | 23 | 24 | def compute_shortest_path_len(road_network, pre_point, next_point): 25 | shortest_path_length = np.PINF 26 | if (pre_point['e_i'] == next_point['e_i']) \ 27 | and not ((pre_point['oneway']) 28 | and (pre_point['i_p_i'] != next_point['i_p_i']) 29 | and (next_point['edge_progress'] - pre_point['edge_progress'] < 0)): 30 | shortest_path_length = abs(next_point['edge_progress'] - pre_point['edge_progress']) * pre_point['length'] 31 | elif (0 == pre_point['end_node']) & (0 == next_point['end_node']): 32 | if pre_point['oneway'] & next_point['oneway']: 33 | 34 | try: 35 | temp_length = (pre_point['length'] * (1 - pre_point['edge_progress']) 36 | + nx.shortest_path_length(road_network, 37 | pre_point['v'], next_point['u'], 38 | weight='length') 39 | + next_point['length'] * next_point['edge_progress'] 40 | ) 41 | if temp_length < shortest_path_length: 42 | shortest_path_length = temp_length 43 | except: 44 | pass 45 | elif ~pre_point['oneway'] & next_point['oneway']: 46 | # pre seg forward, next seg forward 47 | try: 48 | temp_length = (pre_point['length'] * (1 - pre_point['edge_progress']) 49 | + nx.shortest_path_length(road_network, 50 | pre_point['v'], next_point['u'], 51 | weight='length') 52 | + next_point['length'] * next_point['edge_progress'] 53 | ) 54 | if temp_length < shortest_path_length: 55 | shortest_path_length = temp_length 56 | except: 57 | pass 58 | # pre seg backward, next seg forward 59 | try: 60 | temp_length = (pre_point['length'] * pre_point['edge_progress'] 61 | + nx.shortest_path_length(road_network, 62 | pre_point['u'], next_point['u'], 63 | weight='length') 64 | + next_point['length'] * next_point['edge_progress'] 65 | ) 66 | if temp_length < shortest_path_length: 67 | shortest_path_length = temp_length 68 | except: 69 | pass 70 | elif pre_point['oneway'] & ~next_point['oneway']: 71 | # pre seg forward, next seg forward 72 | try: 73 | temp_length = (pre_point['length'] * (1 - pre_point['edge_progress']) 74 | + nx.shortest_path_length(road_network, 75 | pre_point['v'], next_point['u'], 76 | weight='length') 77 | + next_point['length'] * next_point['edge_progress'] 78 | ) 79 | if temp_length < shortest_path_length: 80 | shortest_path_length = temp_length 81 | except: 82 | pass 83 | # pre seg forward, next seg backward 84 | try: 85 | temp_length = (pre_point['length'] * (1 - pre_point['edge_progress']) 86 | + nx.shortest_path_length(road_network, 87 | pre_point['v'], next_point['v'], 88 | weight='length') 89 | + next_point['length'] * (1 - next_point['edge_progress']) 90 | ) 91 | if temp_length < shortest_path_length: 92 | shortest_path_length = temp_length 93 | except: 94 | pass 95 | elif ~pre_point['oneway'] & ~next_point['oneway']: 96 | # pre seg forward, next seg forward 97 | try: 98 | temp_length = (pre_point['length'] * (1 - pre_point['edge_progress']) 99 | + nx.shortest_path_length(road_network, 100 | pre_point['v'], next_point['u'], 101 | weight='length') 102 | + next_point['length'] * next_point['edge_progress'] 103 | ) 104 | if temp_length < shortest_path_length: 105 | shortest_path_length = temp_length 106 | except: 107 | pass 108 | # pre seg backward, next seg forward 109 | try: 110 | temp_length = (pre_point['length'] * pre_point['edge_progress'] 111 | + nx.shortest_path_length(road_network, 112 | pre_point['u'], next_point['u'], 113 | weight='length') 114 | + next_point['length'] * next_point['edge_progress'] 115 | ) 116 | if temp_length < shortest_path_length: 117 | shortest_path_length = temp_length 118 | except: 119 | pass 120 | # pre seg forward, next seg backward 121 | try: 122 | temp_length = ( 123 | pre_point['length'] * (1 - pre_point['edge_progress']) 124 | + nx.shortest_path_length(road_network, pre_point['v'], next_point['v'], weight='length') 125 | + next_point['length'] * (1 - next_point['edge_progress']) 126 | ) 127 | if temp_length < shortest_path_length: 128 | shortest_path_length = temp_length 129 | except: 130 | pass 131 | # pre seg backward, next seg backward 132 | try: 133 | temp_length = ( 134 | pre_point['length'] * pre_point['edge_progress'] 135 | + nx.shortest_path_length(road_network, pre_point['u'], next_point['v'], weight='length') 136 | + next_point['length'] * (1 - next_point['edge_progress']) 137 | ) 138 | if temp_length < shortest_path_length: 139 | shortest_path_length = temp_length 140 | except: 141 | pass 142 | elif (0 != pre_point['end_node']) & (0 == next_point['end_node']): 143 | if 1 == pre_point['end_node']: 144 | pre_node = 'u' 145 | elif 2 == pre_point['end_node']: 146 | pre_node = 'v' 147 | if next_point['oneway']: 148 | try: 149 | temp_length = ( 150 | nx.shortest_path_length(road_network, pre_point[pre_node], next_point['u'], 151 | weight='length') 152 | + next_point['length'] * next_point['edge_progress'] 153 | ) 154 | if temp_length < shortest_path_length: 155 | shortest_path_length = temp_length 156 | except: 157 | pass 158 | elif ~next_point['oneway']: 159 | # next seg forward 160 | try: 161 | temp_length = ( 162 | nx.shortest_path_length(road_network, pre_point[pre_node], next_point['u'], 163 | weight='length') 164 | + next_point['length'] * next_point['edge_progress'] 165 | ) 166 | if temp_length < shortest_path_length: 167 | shortest_path_length = temp_length 168 | except: 169 | pass 170 | # next seg backward 171 | try: 172 | temp_length = ( 173 | nx.shortest_path_length(road_network, pre_point[pre_node], next_point['v'], 174 | weight='length') 175 | + next_point['length'] * (1 - next_point['edge_progress']) 176 | ) 177 | if temp_length < shortest_path_length: 178 | shortest_path_length = temp_length 179 | except: 180 | pass 181 | elif (0 == pre_point['end_node']) & (0 != next_point['end_node']): 182 | if 1 == next_point['end_node']: 183 | next_node = 'u' 184 | elif 2 == next_point['end_node']: 185 | next_node = 'v' 186 | if pre_point['oneway']: 187 | try: 188 | temp_length = ( 189 | pre_point['length'] * (1 - pre_point['edge_progress']) 190 | + nx.shortest_path_length(road_network, pre_point['v'], next_point[next_node], 191 | weight='length') 192 | ) 193 | if temp_length < shortest_path_length: 194 | shortest_path_length = temp_length 195 | except: 196 | pass 197 | elif ~pre_point['oneway']: 198 | # next seg forward 199 | try: 200 | temp_length = ( 201 | pre_point['length'] * (1 - pre_point['edge_progress']) 202 | + nx.shortest_path_length(road_network, pre_point['v'], next_point[next_node], 203 | weight='length') 204 | ) 205 | if temp_length < shortest_path_length: 206 | shortest_path_length = temp_length 207 | except: 208 | pass 209 | # next seg backward 210 | try: 211 | temp_length = ( 212 | pre_point['length'] * pre_point['edge_progress'] 213 | + nx.shortest_path_length(road_network, pre_point['u'], next_point[next_node], 214 | weight='length') 215 | ) 216 | if temp_length < shortest_path_length: 217 | shortest_path_length = temp_length 218 | except: 219 | pass 220 | elif (0 != pre_point['end_node']) & (0 != next_point['end_node']): 221 | if 1 == pre_point['end_node']: 222 | pre_node = 'u' 223 | elif 2 == pre_point['end_node']: 224 | pre_node = 'v' 225 | if 1 == next_point['end_node']: 226 | next_node = 'u' 227 | elif 2 == next_point['end_node']: 228 | next_node = 'v' 229 | try: 230 | temp_length = ( 231 | nx.shortest_path_length(road_network, pre_point[pre_node], next_point[next_node], 232 | weight='length') 233 | ) 234 | if temp_length < shortest_path_length: 235 | shortest_path_length = temp_length 236 | except: 237 | pass 238 | return shortest_path_length 239 | 240 | 241 | def compute_static_score_matrix(candidate_set, trajectory, road_network, traj_len): 242 | M = [] 243 | for t_p_i in range(traj_len - 1): 244 | pre_s = candidate_set['t_p_i'].searchsorted(t_p_i, side='left') 245 | pre_e = candidate_set['t_p_i'].searchsorted(t_p_i, side='right') 246 | next_s = candidate_set['t_p_i'].searchsorted(t_p_i + 1, side='left') 247 | next_e = candidate_set['t_p_i'].searchsorted(t_p_i + 1, side='right') 248 | M_i = np.full((pre_e - pre_s, next_e - next_s), np.NINF) 249 | # print(t_p_i) 250 | for i in range(pre_e - pre_s): 251 | for j in range(next_e - next_s): 252 | epsilon = candidate_set.iat[next_s + j, candidate_set.columns.get_loc('epsilon')] 253 | d_from_pre = trajectory.iloc[t_p_i + 1, trajectory.columns.get_loc('dis_f_pre')] 254 | pre_seg = candidate_set.iloc[pre_s + i] 255 | next_seg = candidate_set.iloc[next_s + j] 256 | shortest_path_length = compute_shortest_path_len(road_network, pre_seg, next_seg) 257 | if shortest_path_length <= d_from_pre: 258 | M_i[i, j] = epsilon 259 | elif np.isinf(shortest_path_length): 260 | M_i[i, j] = np.NINF 261 | else: 262 | # print(epsilon, d_from_pre, shortest_path_length, epsilon * d_from_pre / shortest_path_length) 263 | M_i[i, j] = epsilon * d_from_pre / shortest_path_length 264 | # print(M_i) 265 | M.append(M_i) 266 | return M 267 | 268 | 269 | if __name__ == '__main__': 270 | # load road network 271 | truncated_roads = ox.load_graphml('db/truncated_graph.graphml') 272 | edges = ox.utils_graph.graph_to_gdfs(truncated_roads, nodes=False, 273 | fill_edge_geometry=True) 274 | 275 | # load candidate set 276 | candidate_set = pd.read_csv('result/candidate_point.csv') 277 | # load trajectory data 278 | data = data_loader.load_vehicles(n=1, max_length=3) 279 | cur_vehicle = data[0] 280 | 281 | cur_vehicle['dis_f_pre'] = v_harv_dis.haversine_np(cur_vehicle['longitude'], 282 | cur_vehicle['latitude'], 283 | cur_vehicle.shift()['longitude'], 284 | cur_vehicle.shift()['latitude']) 285 | 286 | # compute epsilon of each candidate point 287 | candidate_set['epsilon'] = compute_epsilon(candidate_set['residual']) 288 | 289 | # develop edges data(oneway, length, u, v) for each candidate points 290 | candidate_set = candidate_set.merge(edges[['oneway', 'length', 'u', 'v']], left_on='e_i', right_index=True) 291 | 292 | M = compute_static_score_matrix(candidate_set, cur_vehicle, truncated_roads) 293 | 294 | M = scipy.linalg.block_diag(*M) 295 | 296 | # static score matrix path 297 | ssm_path = 'result/ssm' 298 | Path(ssm_path).mkdir(parents=True, exist_ok=True) 299 | np.save(os.path.join(ssm_path, 'temp'), M) 300 | 301 | -------------------------------------------------------------------------------- /s1_preprocessing.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tqdm 3 | import datetime 4 | import pandas as pd 5 | 6 | from pathlib import Path 7 | from utils import display 8 | from utils import vector_haversine_distances 9 | 10 | if __name__ == '__main__': 11 | tqdm.tqdm.pandas() 12 | display.configure_pandas() 13 | ################################################################################################################### 14 | # Function3: Split trajectory of each vehicle to individual files. 15 | # Create Time: 7.24 later 16 | # path = 'db/original_trajectories.csv' 17 | # trajectory = pd.read_csv(path, parse_dates=['timestamp']) 18 | # 19 | # path = 'db/trajectory_by_license/' 20 | # if not os.path.exists(path): 21 | # os.makedirs(path) 22 | # 23 | # def split_by_vehicle(license_trajectory): 24 | # license_trajectory.to_csv(path + license_trajectory.name + '.csv', index=False) 25 | # 26 | # trajectory.groupby('plate').progress_apply(split_by_vehicle) 27 | ################################################################################################################### 28 | 29 | ################################################################################################################### 30 | # Split trajectory of a vehicle to split files by stay point. 31 | # Create Time: 8.10 32 | path = 'db/original_trajectories.csv' 33 | trajectory = pd.read_csv(path, parse_dates=['timestamp']) 34 | 35 | trajectory['dis_f_pre'] = vector_haversine_distances.haversine_np(trajectory['longitude'], 36 | trajectory['latitude'], 37 | trajectory['longitude'].shift(), 38 | trajectory['latitude'].shift()) 39 | trajectory['interval'] = trajectory['timestamp'] - trajectory['timestamp'].shift() 40 | print(trajectory.loc[(trajectory['plate'] == '粤B4BX08') 41 | & (trajectory['timestamp'] > datetime.datetime(2014, 7, 16, 11, 40))]) 42 | trajectory.loc[trajectory['plate'] != trajectory['plate'].shift(), ['dis_f_pre', 'interval']] = [None, None] 43 | print(trajectory.loc[(trajectory['plate'] == '粤B4BX08') 44 | & (trajectory['timestamp'] > datetime.datetime(2014, 7, 16, 11, 40))]) 45 | 46 | # 大间隔点 47 | trajectory['big_interval'] = trajectory['interval'] > datetime.timedelta(minutes=30) 48 | # 大间隔且点距超过0.1KM的定为异常点 49 | trajectory['valid'] = ~ (trajectory['big_interval'] & (trajectory['dis_f_pre'] > 0.5)) 50 | # 给出停止点:速度等于0或者距离小于100m 51 | trajectory['stop'] = (((trajectory['velocity'] == 0) & ~trajectory['big_interval']) 52 | | (trajectory['big_interval'] & trajectory['valid'])) 53 | # 给出分组 54 | trajectory['grp'] = ((trajectory['stop'] != trajectory['stop'].shift()) 55 | | (trajectory['plate'] != trajectory['plate'].shift()) 56 | ).cumsum() 57 | 58 | # Add last timestamp to each point 59 | trajectory['last_timestamp'] = trajectory['timestamp'].shift() 60 | trajectory.loc[trajectory['plate'] != trajectory['plate'].shift(), 'last_timestamp'] = None 61 | 62 | # Make a special trajectory that switch timestamp to last one if point is end of a big interval. 63 | df_special_traj = trajectory.copy() 64 | df_special_traj['begin_time'] = df_special_traj['timestamp'] 65 | df_special_traj.loc[(df_special_traj['big_interval']) & (df_special_traj['valid']), 'begin_time'] = \ 66 | df_special_traj.loc[(df_special_traj['big_interval']) & (df_special_traj['valid']), 'last_timestamp'] 67 | 68 | rest_grp = (df_special_traj.groupby('grp')['stop'].first() 69 | & ( 70 | (df_special_traj.groupby('grp')['timestamp'].last() - 71 | df_special_traj.groupby('grp')['begin_time'].first() 72 | ) > pd.Timedelta(minutes=30) 73 | ) 74 | ) 75 | df_special_traj.loc[df_special_traj['grp'].isin(rest_grp.loc[rest_grp==True].index), 'rest'] = True 76 | df_special_traj['rest'] = df_special_traj['rest'].fillna(value=False) 77 | df_special_traj['new_seg'] = 0 78 | df_special_traj.loc[((df_special_traj['rest'] != df_special_traj['rest'].shift()) 79 | | (df_special_traj['plate'] != df_special_traj['plate'].shift())), 'new_seg'] = 1 80 | df_special_traj['grp'] = df_special_traj['new_seg'].cumsum() 81 | 82 | def save_non_rest(seg, path): 83 | if not seg.at[seg.index[0], 'rest']: 84 | seg[['plate', 'color', 'longitude', 'latitude', 'timestamp', 'velocity', 'dis_f_pre']]\ 85 | .to_csv(os.path.join(path, 86 | seg.at[seg.index[0], 'plate'] + '_' + str(seg.name) + '.csv'), 87 | index=False) 88 | 89 | seg_path = 'db/segment' 90 | Path(seg_path).mkdir(parents=True, exist_ok=True) 91 | print(trajectory.loc[(trajectory['plate'] == '粤B4BX08') 92 | & (trajectory['timestamp'] > datetime.datetime(2014, 7, 16, 11, 40))]) 93 | df_special_traj.groupby('grp').progress_apply(save_non_rest, path=seg_path,) 94 | ################################################################################################################### 95 | -------------------------------------------------------------------------------- /s2_main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | import osmnx as ox 4 | import pandas as pd 5 | import candidates_preparation 6 | import position_context 7 | import interactive_voting 8 | 9 | from tqdm import tqdm 10 | from utils import data_loader 11 | 12 | tqdm.pandas() 13 | 14 | 15 | def task(m=0, n=94739, res_path='result/seg'): 16 | data = data_loader.load_vehicles(m=m, n=n, max_length=0) 17 | for vehicle, seg_name in tqdm(data): 18 | if len(vehicle.index) < 2: 19 | print(seg_name, 'length less than 2') 20 | continue 21 | dis_b_tp_pie = candidates_preparation.dis_bt_tp_ep(vehicle, tree) 22 | if dis_b_tp_pie is None: 23 | print(seg_name, 'No candidate points.') 24 | continue 25 | candidate_set = candidates_preparation.make_candidates_set(dis_b_tp_pie, extended) 26 | if len(candidate_set.index) <= 2: 27 | print(seg_name, 'Candidate points no greater than 2') 28 | continue 29 | # compute epsilon of each candidate point 30 | candidate_set['epsilon'] = position_context.compute_epsilon(candidate_set['residual']) 31 | # develop edges data(oneway, length, u, v) for each candidate points 32 | candidate_set = candidate_set.merge(edges[['oneway', 'length', 'u', 'v']], left_on='e_i', right_index=True, 33 | how='left') 34 | vehicle = vehicle.loc[vehicle.index.isin(candidate_set['t_p_i'].unique())] 35 | # reset trajectory points index to order number from 0 36 | vehicle.reset_index(drop=True, inplace=True) 37 | candidate_set['t_p_i'] = (candidate_set['t_p_i'] != candidate_set['t_p_i'].shift()).cumsum() - 1 38 | 39 | trajectory_len = len(vehicle.index) 40 | 41 | # print(candidate_set.iloc[295:310]) 42 | # %% 43 | 44 | M = position_context.compute_static_score_matrix(candidate_set, vehicle, road_network, trajectory_len) 45 | 46 | # %% 47 | 48 | c_s = candidate_set.groupby('t_p_i').apply(lambda x: x.reset_index(drop=True)) 49 | c_s.index.rename(['i', 'k'], inplace=True) 50 | c_s.drop(columns='t_p_i', inplace=True) 51 | 52 | # %% 53 | 54 | res_set = [] 55 | for i in range(trajectory_len): 56 | # res = delayed(interactive_voting.traverse_trajectory_point)(i, M, vehicle, candidate_set, c_s) 57 | res = interactive_voting.traverse_trajectory_point(i, M, vehicle, candidate_set, trajectory_len) 58 | res_set.append(res) 59 | # compute = delayed(interactive_voting.vote)(res_set) 60 | # global_optimal_path = compute.compute() 61 | global_optimal_path = interactive_voting.vote(res_set, seg_name) 62 | # P = c_s.loc[[(i, j) for i, j in enumerate(global_optimal_path)]] 63 | if not len(global_optimal_path): 64 | print(seg_name, 'no connective path') 65 | continue 66 | P = candidate_set.iloc[[candidate_set['t_p_i'].searchsorted(i) + j for i, j in enumerate(global_optimal_path)]] 67 | P.reset_index(drop=True, inplace=True) 68 | pd.concat([P[['i_p_i', 'e_i', 'end_node', 'edge_progress', 'x', 'y', 'oneway', 'length', 'u', 'v']], 69 | vehicle], axis=1).to_csv(os.path.join(res_path, seg_name)) 70 | # client.close() 71 | 72 | 73 | if __name__ == '__main__': 74 | # road_network = data_loader.load_drive_graph() 75 | # ox.io.save_graphml(road_network, filepath='db/shenzhen-drive.osm', gephi=False, encoding='utf-8') 76 | ## or 77 | # road_network = ox.graph_from_place('Shenzhen, Guangdong, China', network_type='drive') 78 | ## the road network data is created by above code segment 79 | road_network = ox.load_graphml('db/shenzhen-drive-20200813.osm') 80 | 81 | edges = ox.utils_graph.graph_to_gdfs(road_network, nodes=False, fill_edge_geometry=True) 82 | edges.drop(columns=['osmid', 'highway', 'bridge', 'name', 'key'], inplace=True) 83 | 84 | # extended = candidates_preparation.extended_edges(edges) 85 | # with open('db/shenzhen_drive_edges_extend.pkl', 'wb') as f: 86 | # pickle.dump(extended, f) 87 | ## extended edges is created by above code segment 88 | with open('db/shenzhen_drive_edges_extend.pkl', 'rb') as f: 89 | extended = pickle.load(f) 90 | 91 | # tree = candidates_preparation.make_tree(extended) 92 | # with open('db/shenzhen_drive_extended_edges_tree.pkl', 'wb') as f: 93 | # pickle.dump(tree, f) 94 | ## tree is created by above code segment 95 | with open('db/shenzhen_drive_extended_edges_tree.pkl', 'rb') as f: 96 | tree = pickle.load(f) 97 | 98 | task(m=9261, n=10000, res_path='result/seg/0-1') 99 | -------------------------------------------------------------------------------- /s3_visualize.py: -------------------------------------------------------------------------------- 1 | import osmnx as ox 2 | import pandas as pd 3 | import networkx as nx 4 | import numpy as np 5 | 6 | from utils import data_loader, display 7 | 8 | 9 | def compute_shortest_path_len(road_network, pre_point, next_point): 10 | shortest_path_length = np.PINF 11 | node_path = [] 12 | if (pre_point['e_i'] == next_point['e_i']) \ 13 | and not ((pre_point['oneway']) 14 | and (pre_point['i_p_i'] != next_point['i_p_i']) 15 | and (next_point['edge_progress'] - pre_point['edge_progress'] < 0)): 16 | shortest_path_length = abs(next_point['edge_progress'] - pre_point['edge_progress']) * pre_point['length'] 17 | print(pre_point['u'], pre_point['v'], pre_point['end_node'], next_point['end_node']) 18 | elif (0 == pre_point['end_node']) & (0 == next_point['end_node']): 19 | if pre_point['oneway'] & next_point['oneway']: 20 | try: 21 | temp_length = (pre_point['length'] * (1 - pre_point['edge_progress']) 22 | + nx.shortest_path_length(road_network, 23 | pre_point['v'], next_point['u'], 24 | weight='length') 25 | + next_point['length'] * next_point['edge_progress'] 26 | ) 27 | if temp_length < shortest_path_length: 28 | shortest_path_length = temp_length 29 | node_path = [pre_point['v'], next_point['u']] 30 | except: 31 | pass 32 | elif ~pre_point['oneway'] & next_point['oneway']: 33 | # pre seg forward, next seg forward 34 | try: 35 | temp_length = (pre_point['length'] * (1 - pre_point['edge_progress']) 36 | + nx.shortest_path_length(road_network, 37 | pre_point['v'], next_point['u'], 38 | weight='length') 39 | + next_point['length'] * next_point['edge_progress'] 40 | ) 41 | if temp_length < shortest_path_length: 42 | shortest_path_length = temp_length 43 | node_path = [pre_point['v'], next_point['u']] 44 | except: 45 | pass 46 | # pre seg backward, next seg forward 47 | try: 48 | temp_length = (pre_point['length'] * pre_point['edge_progress'] 49 | + nx.shortest_path_length(road_network, 50 | pre_point['u'], next_point['u'], 51 | weight='length') 52 | + next_point['length'] * next_point['edge_progress'] 53 | ) 54 | if temp_length < shortest_path_length: 55 | shortest_path_length = temp_length 56 | node_path = [pre_point['u'], next_point['u']] 57 | except: 58 | pass 59 | elif pre_point['oneway'] & ~next_point['oneway']: 60 | # pre seg forward, next seg forward 61 | try: 62 | temp_length = (pre_point['length'] * (1 - pre_point['edge_progress']) 63 | + nx.shortest_path_length(road_network, 64 | pre_point['v'], next_point['u'], 65 | weight='length') 66 | + next_point['length'] * next_point['edge_progress'] 67 | ) 68 | if temp_length < shortest_path_length: 69 | shortest_path_length = temp_length 70 | node_path = [pre_point['v'], next_point['u']] 71 | except: 72 | pass 73 | # pre seg forward, next seg backward 74 | try: 75 | temp_length = (pre_point['length'] * (1 - pre_point['edge_progress']) 76 | + nx.shortest_path_length(road_network, 77 | pre_point['v'], next_point['v'], 78 | weight='length') 79 | + next_point['length'] * (1 - next_point['edge_progress']) 80 | ) 81 | if temp_length < shortest_path_length: 82 | shortest_path_length = temp_length 83 | node_path = [pre_point['v'], next_point['v']] 84 | except: 85 | pass 86 | elif ~pre_point['oneway'] & ~next_point['oneway']: 87 | # pre seg forward, next seg forward 88 | try: 89 | temp_length = (pre_point['length'] * (1 - pre_point['edge_progress']) 90 | + nx.shortest_path_length(road_network, 91 | pre_point['v'], next_point['u'], 92 | weight='length') 93 | + next_point['length'] * next_point['edge_progress'] 94 | ) 95 | if temp_length < shortest_path_length: 96 | shortest_path_length = temp_length 97 | node_path = [pre_point['v'], next_point['u']] 98 | except: 99 | pass 100 | # pre seg backward, next seg forward 101 | try: 102 | temp_length = (pre_point['length'] * pre_point['edge_progress'] 103 | + nx.shortest_path_length(road_network, 104 | pre_point['u'], next_point['u'], 105 | weight='length') 106 | + next_point['length'] * next_point['edge_progress'] 107 | ) 108 | if temp_length < shortest_path_length: 109 | shortest_path_length = temp_length 110 | node_path = [pre_point['u'], next_point['u']] 111 | except: 112 | pass 113 | # pre seg forward, next seg backward 114 | try: 115 | temp_length = ( 116 | pre_point['length'] * (1 - pre_point['edge_progress']) 117 | + nx.shortest_path_length(road_network, pre_point['v'], next_point['v'], weight='length') 118 | + next_point['length'] * (1 - next_point['edge_progress']) 119 | ) 120 | if temp_length < shortest_path_length: 121 | shortest_path_length = temp_length 122 | node_path = [pre_point['v'], next_point['v']] 123 | except: 124 | pass 125 | # pre seg backward, next seg backward 126 | try: 127 | temp_length = ( 128 | pre_point['length'] * pre_point['edge_progress'] 129 | + nx.shortest_path_length(road_network, pre_point['u'], next_point['v'], weight='length') 130 | + next_point['length'] * (1 - next_point['edge_progress']) 131 | ) 132 | if temp_length < shortest_path_length: 133 | shortest_path_length = temp_length 134 | node_path = [pre_point['u'], next_point['v']] 135 | except: 136 | pass 137 | elif (0 != pre_point['end_node']) & (0 == next_point['end_node']): 138 | if 1 == pre_point['end_node']: 139 | pre_node = 'u' 140 | elif 2 == pre_point['end_node']: 141 | pre_node = 'v' 142 | if next_point['oneway']: 143 | try: 144 | temp_length = ( 145 | nx.shortest_path_length(road_network, pre_point[pre_node], next_point['u'], 146 | weight='length') 147 | + next_point['length'] * next_point['edge_progress'] 148 | ) 149 | if temp_length < shortest_path_length: 150 | shortest_path_length = temp_length 151 | node_path = [pre_point[pre_node], next_point['u']] 152 | except: 153 | pass 154 | elif ~next_point['oneway']: 155 | # next seg forward 156 | try: 157 | temp_length = ( 158 | nx.shortest_path_length(road_network, pre_point[pre_node], next_point['u'], 159 | weight='length') 160 | + next_point['length'] * next_point['edge_progress'] 161 | ) 162 | if temp_length < shortest_path_length: 163 | shortest_path_length = temp_length 164 | node_path = [pre_point[pre_node], next_point['u']] 165 | except: 166 | pass 167 | # next seg backward 168 | try: 169 | temp_length = ( 170 | nx.shortest_path_length(road_network, pre_point[pre_node], next_point['v'], 171 | weight='length') 172 | + next_point['length'] * (1 - next_point['edge_progress']) 173 | ) 174 | if temp_length < shortest_path_length: 175 | shortest_path_length = temp_length 176 | node_path = [pre_point[pre_node], next_point['v']] 177 | except: 178 | pass 179 | elif (0 == pre_point['end_node']) & (0 != next_point['end_node']): 180 | if 1 == next_point['end_node']: 181 | next_node = 'u' 182 | elif 2 == next_point['end_node']: 183 | next_node = 'v' 184 | if pre_point['oneway']: 185 | try: 186 | temp_length = ( 187 | pre_point['length'] * (1 - pre_point['edge_progress']) 188 | + nx.shortest_path_length(road_network, pre_point['v'], next_point[next_node], 189 | weight='length') 190 | ) 191 | if temp_length < shortest_path_length: 192 | shortest_path_length = temp_length 193 | node_path = [pre_point['v'], next_point[next_node]] 194 | except: 195 | pass 196 | elif ~pre_point['oneway']: 197 | # next seg forward 198 | try: 199 | temp_length = ( 200 | pre_point['length'] * (1 - pre_point['edge_progress']) 201 | + nx.shortest_path_length(road_network, pre_point['v'], next_point[next_node], 202 | weight='length') 203 | ) 204 | if temp_length < shortest_path_length: 205 | shortest_path_length = temp_length 206 | node_path = [pre_point['v'], next_point[next_node]] 207 | except: 208 | pass 209 | # next seg backward 210 | try: 211 | temp_length = ( 212 | pre_point['length'] * pre_point['edge_progress'] 213 | + nx.shortest_path_length(road_network, pre_point['u'], next_point[next_node], 214 | weight='length') 215 | ) 216 | if temp_length < shortest_path_length: 217 | shortest_path_length = temp_length 218 | node_path = [pre_point['u'], next_point[next_node]] 219 | except: 220 | pass 221 | elif (0 != pre_point['end_node']) & (0 != next_point['end_node']): 222 | if 1 == pre_point['end_node']: 223 | pre_node = 'u' 224 | elif 2 == pre_point['end_node']: 225 | pre_node = 'v' 226 | if 1 == next_point['end_node']: 227 | next_node = 'u' 228 | elif 2 == next_point['end_node']: 229 | next_node = 'v' 230 | try: 231 | temp_length = ( 232 | nx.shortest_path_length(road_network, pre_point[pre_node], next_point[next_node], 233 | weight='length') 234 | ) 235 | if temp_length < shortest_path_length: 236 | shortest_path_length = temp_length 237 | node_path = [pre_point[pre_node], next_point[next_node]] 238 | except: 239 | pass 240 | 241 | if len(node_path): 242 | node_path = nx.shortest_path(road_network, node_path[0], node_path[1], weight='length') 243 | return shortest_path_length, node_path 244 | 245 | 246 | if __name__ == '__main__': 247 | display.configure_pandas() 248 | # road_network = data_loader.load_drive_graph() 249 | # ox.io.save_graphml(road_network, filepath='db/shenzhen-drive.osm', gephi=False, encoding='utf-8') 250 | ## the road network data is created by above code segment 251 | road_network = ox.load_graphml('db/shenzhen-drive-20200813.osm') 252 | 253 | path = 'result/seg/0-1/粤B14337_4832.csv' 254 | road_path = pd.read_csv(path) 255 | shenzhen_road_network = ox.truncate.truncate_graph_bbox(road_network, 256 | road_path['y'].max() + 0.03, 257 | road_path['y'].min() - 0.03, 258 | road_path['x'].max() + 0.03, 259 | road_path['x'].min() - 0.03) 260 | 261 | route = [] 262 | for i in range(len(road_path.index))[1:]: 263 | if 75 == i: 264 | print('cool') 265 | _, sub_path = compute_shortest_path_len(shenzhen_road_network, road_path.iloc[i-1], road_path.iloc[i]) 266 | print(sub_path) 267 | if len(route) and len(sub_path) and (route[-1] == sub_path[0]): 268 | route.extend(sub_path[1:]) 269 | else: 270 | route.extend(sub_path) 271 | print(route) 272 | 273 | ox.plot_graph_route(shenzhen_road_network, route, route_linewidth=2, dpi=1200) 274 | -------------------------------------------------------------------------------- /utils/data_loader.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import osmnx as ox 3 | import glob 4 | import ntpath 5 | 6 | 7 | def path_leaf(path): 8 | head, tail = ntpath.split(path) 9 | return tail or ntpath.basename(head) 10 | 11 | 12 | def load_vehicles(m=0, n=94739, max_length=0): 13 | vehicles = glob.glob('db/segment/*.csv') 14 | if n > len(vehicles): 15 | print('data_loader.load_vehicles: there is not existing %d vehicles, only %d' % (n, len(vehicles))) 16 | used = vehicles[m:n] 17 | res = [] 18 | for vehicle in used: 19 | res.append(( 20 | pd.read_csv(vehicle, usecols=['plate','longitude','latitude','timestamp','velocity','dis_f_pre'], 21 | parse_dates=['timestamp']), path_leaf(vehicle) 22 | )) 23 | if max_length: 24 | for i in range(n): 25 | res[i] = (res[i][0].iloc[:max_length], res[i][1]) 26 | return res 27 | 28 | 29 | def load_drive_graph(path='db/shenzhen20170701-all.osm',): 30 | """ 31 | load original open street map data of a city and extract drivable road network 32 | :param path: original osm xml format data 33 | :return: drivable graph 34 | """ 35 | shenzhen_all = ox.graph_from_xml(path) 36 | sz_nodes, sz_edges = ox.graph_to_gdfs(shenzhen_all, fill_edge_geometry=True) 37 | 38 | not_valid_highway = 'cycleway|footway|path|pedestrian|steps|track|corridor|elevator' \ 39 | '|escalator|proposed|construction|bridleway|abandoned|platform' \ 40 | '|raceway|service'.split('|') 41 | not_valid_service = 'parking|parking_aisle|driveway|private|emergency_access'.split('|') 42 | print('original_edges', sz_edges.shape) 43 | 44 | sz_gdfs = sz_edges.loc[((sz_edges['highway'].notna()) 45 | & (sz_edges['area'] != 'yes') 46 | & (sz_edges['access'] != 'private') 47 | & (~sz_edges['service'].isin(not_valid_service)))] 48 | 49 | for tag in not_valid_highway: 50 | sz_gdfs = sz_gdfs.loc[sz_gdfs['highway'] != tag].copy(deep=True) 51 | print('drivable_edges:', sz_gdfs.shape) 52 | 53 | shenzhen_drive = ox.graph_from_gdfs(sz_nodes, sz_gdfs) 54 | 55 | return shenzhen_drive 56 | -------------------------------------------------------------------------------- /utils/dis.py: -------------------------------------------------------------------------------- 1 | """Functions to calculate distances and find nearest node/edge(s) to point(s).""" 2 | 3 | import networkx as nx 4 | import numpy as np 5 | import pandas as pd 6 | from shapely.geometry import Point 7 | from tqdm import tqdm 8 | 9 | from osmnx import utils 10 | from osmnx import utils_geo 11 | from osmnx import utils_graph 12 | 13 | # scipy and sklearn are optional dependencies for faster nearest node search 14 | try: 15 | from scipy.spatial import cKDTree 16 | except ImportError: 17 | cKDTree = None 18 | try: 19 | from sklearn.neighbors import BallTree 20 | except ImportError: 21 | BallTree = None 22 | 23 | 24 | def get_nearest_edge(G, point, return_geom=False, return_dist=False): 25 | """ 26 | Return the nearest edge to a point, by minimum euclidean distance. 27 | 28 | Parameters 29 | ---------- 30 | G : networkx.MultiDiGraph 31 | input graph 32 | point : tuple 33 | the (lat, lng) or (y, x) point for which we will find the nearest edge 34 | in the graph 35 | return_geom : bool 36 | Optionally return the geometry of the nearest edge 37 | return_dist : bool 38 | Optionally return the distance in graph's coordinates' units between 39 | the point and the nearest edge 40 | 41 | Returns 42 | ------- 43 | tuple 44 | Graph edge unique identifier as a tuple of (u, v, key). 45 | Or a tuple of (u, v, key, geom) if return_geom is True. 46 | Or a tuple of (u, v, key, dist) if return_dist is True. 47 | Or a tuple of (u, v, key, geom, dist) if return_geom and return_dist are True. 48 | """ 49 | # get u, v, key, geom from all the graph edges 50 | gdf_edges = utils_graph.graph_to_gdfs(G, nodes=False, fill_edge_geometry=True) 51 | 52 | edges = gdf_edges[["u", "v", "key", "geometry"]].values 53 | 54 | # convert lat/lng point to x/y for shapely distance operation 55 | xy_point = Point(reversed(point)) 56 | 57 | # calculate euclidean distance from each edge's geometry to this point 58 | edge_distances = [(edge, xy_point.distance(edge[3])) for edge in edges] 59 | 60 | # the nearest edge minimizes the distance to the point 61 | (u, v, key, geom), dist = min(edge_distances, key=lambda x: x[1]) 62 | utils.log(f"Found nearest edge ({u, v, key}) to point {point}") 63 | 64 | # return results requested by caller 65 | if return_dist and return_geom: 66 | return u, v, key, geom, dist 67 | elif return_dist: 68 | return u, v, key, dist 69 | elif return_geom: 70 | return u, v, key, geom 71 | else: 72 | return u, v, key 73 | 74 | 75 | def get_nearest_edges(G, X, Y, method=None, dist=0.0001): 76 | """ 77 | Return the graph edges nearest to a list of points. 78 | 79 | Pass in points as separate vectors of X and Y coordinates. The 'kdtree' 80 | method is by far the fastest with large data sets, but only finds 81 | approximate nearest edges if working in unprojected coordinates like 82 | lat-lng (it precisely finds the nearest edge if working in projected 83 | coordinates). The 'balltree' method is second fastest with large data 84 | sets, but it is precise if working in unprojected coordinates like 85 | lat-lng. As a rule of thumb, if you have a small graph just use 86 | method=None. If you have a large graph with lat-lng coordinates, use 87 | method='balltree'. If you have a large graph with projected coordinates, 88 | use method='kdtree'. Note that if you are working in units of lat-lng, 89 | the X vector corresponds to longitude and the Y vector corresponds 90 | to latitude. The method creates equally distanced points along the edges 91 | of the network. Then, these points are used in a kdTree or BallTree search 92 | to identify which is nearest.Note that this method will not give the exact 93 | perpendicular point along the edge, but the smaller the *dist* parameter, 94 | the closer the solution will be. 95 | 96 | Parameters 97 | ---------- 98 | G : networkx.MultiDiGraph 99 | input graph 100 | X : list-like 101 | The vector of longitudes or x's for which we will find the nearest 102 | edge in the graph. For projected graphs, use the projected coordinates, 103 | usually in meters. 104 | Y : list-like 105 | The vector of latitudes or y's for which we will find the nearest 106 | edge in the graph. For projected graphs, use the projected coordinates, 107 | usually in meters. 108 | method : string {None, 'kdtree', 'balltree'} 109 | Which method to use for finding nearest edge to each point. 110 | If None, we manually find each edge one at a time using 111 | get_nearest_edge. If 'kdtree' we use 112 | scipy.spatial.cKDTree for very fast euclidean search. Recommended for 113 | projected graphs. If 'balltree', we use sklearn.neighbors.BallTree for 114 | fast haversine search. Recommended for unprojected graphs. 115 | 116 | dist : float 117 | spacing length along edges. Units are the same as the geom; Degrees for 118 | unprojected geometries and meters for projected geometries. The smaller 119 | the value, the more points are created. 120 | 121 | Returns 122 | ------- 123 | ne : np.array 124 | array of nearest edges represented by u and v (the IDs of the nodes 125 | they link) and key 126 | """ 127 | if method is None: 128 | # calculate nearest edge one at a time for each (y, x) point 129 | ne = [get_nearest_edge(G, (y, x)) for x, y in tqdm(zip(X, Y))] 130 | 131 | elif method == "kdtree": 132 | 133 | # check if we were able to import scipy.spatial.cKDTree successfully 134 | if not cKDTree: 135 | raise ImportError("The scipy package must be installed to use this optional feature.") 136 | 137 | # transform graph into DataFrame 138 | edges = utils_graph.graph_to_gdfs(G, nodes=False, fill_edge_geometry=True) 139 | 140 | # transform edges into evenly spaced points 141 | edges["points"] = edges.apply( 142 | lambda x: utils_geo.redistribute_vertices(x.geometry, dist), axis=1 143 | ) 144 | 145 | # develop edges data for each created points 146 | extended = ( 147 | edges["points"] 148 | .apply([pd.Series]) 149 | .stack() 150 | .reset_index(level=1, drop=True) 151 | .join(edges) 152 | .reset_index() 153 | ) 154 | 155 | # Prepare btree arrays 156 | nbdata = np.array( 157 | list( 158 | zip( 159 | extended["Series"].apply(lambda x: x.x), extended["Series"].apply(lambda x: x.y) 160 | ) 161 | ) 162 | ) 163 | 164 | # build a k-d tree for euclidean nearest node search 165 | btree = cKDTree(data=nbdata, compact_nodes=True, balanced_tree=True) 166 | 167 | # query the tree for nearest node to each point 168 | points = np.array([X, Y]).T 169 | dist, idx = btree.query(points, k=1) # Returns ids of closest point 170 | eidx = extended.loc[idx, "index"] 171 | ne = edges.loc[eidx, ["u", "v", "key"]] 172 | 173 | elif method == "balltree": 174 | # check if we were able to import sklearn.neighbors.BallTree successfully 175 | if not BallTree: 176 | raise ImportError( 177 | "The scikit-learn package must be installed to use this optional feature." 178 | ) 179 | 180 | # transform graph into DataFrame 181 | edges = utils_graph.graph_to_gdfs(G, nodes=False, fill_edge_geometry=True) 182 | 183 | # transform edges into evenly spaced points 184 | edges["points"] = edges.apply( 185 | lambda x: utils_geo.redistribute_vertices(x.geometry, dist), axis=1 186 | ) 187 | 188 | # develop edges data for each created points 189 | extended = ( 190 | edges["points"] 191 | .apply([pd.Series]) 192 | .stack() 193 | .reset_index(level=1, drop=True) 194 | .join(edges) 195 | .reset_index() 196 | ) 197 | 198 | # haversine requires data in form of [lat, lng] and inputs/outputs in units of radians 199 | nodes = pd.DataFrame( 200 | { 201 | "x": extended["Series"].apply(lambda x: x.x), 202 | "y": extended["Series"].apply(lambda x: x.y), 203 | } 204 | ) 205 | nodes_rad = np.deg2rad(nodes[["y", "x"]].values.astype(np.float)) 206 | points = np.array([Y, X]).T 207 | points_rad = np.deg2rad(points) 208 | 209 | # build a ball tree for haversine nearest node search 210 | tree = BallTree(nodes_rad, metric="haversine") 211 | 212 | # query the tree for nearest node to each point 213 | idx = tree.query(points_rad, k=5, return_distance=False) 214 | print(idx) 215 | eidx = extended.loc[idx[:, 0], "index"] 216 | ne = edges.loc[eidx, ["u", "v", "key"]] 217 | 218 | else: 219 | raise ValueError("You must pass a valid method name, or None.") 220 | 221 | utils.log(f"Found nearest edges to {len(X)} points") 222 | 223 | return np.array(ne) 224 | -------------------------------------------------------------------------------- /utils/display.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import logging 3 | 4 | 5 | def configure_logging(): 6 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s', datefmt='%d-%b-%y %H:%M:%S') 7 | return 8 | 9 | 10 | def configure_pandas(): 11 | desired_width = 1280 12 | pd.set_option('display.width', desired_width) 13 | pd.set_option('display.max_columns', 20) 14 | return 15 | -------------------------------------------------------------------------------- /utils/preprocessing.py: -------------------------------------------------------------------------------- 1 | import os 2 | import datetime 3 | import pandas as pd 4 | import utils.vector_haversine_distances as vector_haversine_distances 5 | 6 | from pathlib import Path 7 | 8 | 9 | def datetime_format_transfer(df, columns_list, format_str='%Y-%m-%dT%H:%M:%S.%fZ'): 10 | for column in columns_list: 11 | df[column] = df[column].apply(lambda x: datetime.datetime.strptime(x, format_str)) 12 | return df 13 | 14 | 15 | def split_trajectory_by_rest_status(trajectory, destination_path="db/seg", 16 | plate="plate", speed='velocity', 17 | timestamp="timestamp", longitude="longitude", latitude="latitude", 18 | condition=None, save=True): 19 | """ 20 | split_trajectory_by_rest_status 21 | :param save: 22 | :param speed: 23 | :param trajectory: 24 | :param destination_path: 25 | :param plate: 26 | :param timestamp: 27 | :param longitude: 28 | :param latitude: 29 | :param condition: for shenzhen 2018 data, it claim 'use' is '营运' 30 | :return: 31 | """ 32 | # if there is(are) condition(s) to filter trajectory, do it 33 | if not condition: 34 | mark = [True] * len(trajectory.index) 35 | for k, v in condition: 36 | mark = mark & trajectory[k] == v 37 | trajectory = trajectory.loc[mark] 38 | # Calculate distance from previous point 39 | 40 | trajectory['dis_f_pre'] = vector_haversine_distances.haversine_np(trajectory[longitude], 41 | trajectory[latitude], 42 | trajectory[longitude].shift(), 43 | trajectory[latitude].shift()) 44 | 45 | # Calculate time interval length from previous point 46 | trajectory['interval'] = trajectory[timestamp] - trajectory[timestamp].shift() 47 | # Set dis_f_pre and interval of first point of each plate as None 48 | trajectory.loc[trajectory[plate] != trajectory[plate].shift(), ['dis_f_pre', 'interval']] = [None, None] 49 | # Give annotation of big interval point, which defined by condition the interval bigger than 30 minutes 50 | trajectory['big_interval'] = trajectory['interval'] > datetime.timedelta(minutes=30) 51 | # Give 'valid' annotation, which indicate points not caused by GPS data missing 52 | # data missing means: there is big interval, and the begin and end of the interval is far 53 | trajectory['valid'] = ~ (trajectory['big_interval'] & (trajectory['dis_f_pre'] > 0.5)) 54 | # Give stop annotation, which indicate the vehicle is stay 55 | trajectory['stop'] = (((trajectory['dis_f_pre'] < 0.1) & ~trajectory['big_interval']) 56 | | (trajectory['big_interval'] & trajectory['valid'])) 57 | 58 | # group trajectory by continuous same stop status point 59 | trajectory['grp'] = ((trajectory['stop'] != trajectory['stop'].shift()) 60 | | (trajectory[plate] != trajectory[plate].shift()) 61 | ).cumsum() 62 | # Add last timestamp to each point, for later usage 63 | trajectory['last_timestamp'] = trajectory[timestamp].shift() 64 | trajectory.loc[trajectory[plate] != trajectory[plate].shift(), 'last_timestamp'] = None 65 | # Make a special trajectory that switch timestamp to last one if 66 | # two condition is satisfied (1) point is end of a big interval (2) point is stop. 67 | # Doing this to avoid missing the stay interval before the point which is in end of a big interval 68 | df_special_traj = trajectory.copy() 69 | df_special_traj['begin_time'] = df_special_traj[timestamp] 70 | df_special_traj.loc[(df_special_traj['big_interval']) & (df_special_traj['valid']), 'begin_time'] = \ 71 | df_special_traj.loc[(df_special_traj['big_interval']) & (df_special_traj['valid']), 'last_timestamp'] 72 | # Select rest group, which satisfied (1) group status is stop (2) group duration longer than 30 minutes 73 | rest_grp = (df_special_traj.groupby('grp')['stop'].first() 74 | & ( 75 | (df_special_traj.groupby('grp')[timestamp].last() - 76 | df_special_traj.groupby('grp')['begin_time'].first() 77 | ) > pd.Timedelta(minutes=30) 78 | ) 79 | ) 80 | df_special_traj.loc[df_special_traj['grp'].isin(rest_grp.loc[rest_grp==True].index), 'rest'] = True 81 | df_special_traj['rest'] = df_special_traj['rest'].fillna(value=False) 82 | # Grouping by continuous same rest status point 83 | df_special_traj['new_seg'] = 0 84 | df_special_traj.loc[((df_special_traj['rest'] != df_special_traj['rest'].shift()) 85 | | (df_special_traj[plate] != df_special_traj[plate].shift())), 'new_seg'] = 1 86 | df_special_traj['grp'] = df_special_traj['new_seg'].cumsum() 87 | if save: 88 | def save_non_rest(seg, path): 89 | if not seg.at[seg.index[0], 'rest']: 90 | seg[[plate, longitude, latitude, timestamp, speed, 'dis_f_pre']]\ 91 | .to_csv(os.path.join(path, 92 | seg.at[seg.index[0], plate] + '_' + str(seg.name) + '.csv'), 93 | index=False) 94 | Path(destination_path).mkdir(parents=True, exist_ok=True) 95 | df_special_traj.groupby('grp').apply(save_non_rest, path=destination_path,) 96 | 97 | -------------------------------------------------------------------------------- /utils/vector_haversine_distances.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | 4 | 5 | def haversine_np(lon1, lat1, lon2, lat2, miles=False): 6 | """ 7 | Calculate the great-circle distance bewteen two points on the Earth surface. 8 | 9 | :input: 4 GPS coordinates, containing the latitude and longitude of each point 10 | in decimal degrees. 11 | 12 | Example: haversine(45.7597, 4.8422, 48.8567, 2.3508) 13 | 14 | :output: Returns the distance bewteen the two points. 15 | The default unit is kilometers. Miles can be returned 16 | if the ``miles`` parameter is set to True. 17 | 18 | """ 19 | AVG_EARTH_RADIUS = 6371.0088 # in km 20 | 21 | lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2]) 22 | 23 | dlon = lon2 - lon1 24 | dlat = lat2 - lat1 25 | d = np.sin(dlat * 0.5) ** 2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon * 0.5) ** 2 26 | h = 2 * AVG_EARTH_RADIUS * np.arcsin(np.sqrt(d)) 27 | if miles: 28 | return h * 0.621371 # in miles 29 | else: 30 | return h * 1000 # in meters 31 | 32 | 33 | if __name__ == '__main__': 34 | print(haversine_np([np.nan, 113.961098], [np.nan, 22.553101], [113.962997, 113.962303], [22.547001, 22.547001])) 35 | print(haversine_np(114.007401,22.535500, 114.0090009,22.53423323)) --------------------------------------------------------------------------------