├── LICENSE.txt ├── README.md ├── association-analysis ├── association_data.xlsx ├── indicators.py └── singlevariable.csv ├── data-collection ├── README.md ├── collect_preprocess_URN_data.py ├── test.rar ├── train.rar └── world_city_20200715.txt ├── inter-city-network-homogeneity ├── cross_f1score.json ├── cross_f1score_shifted.json └── cross_samples_f1score.json ├── intra-city-network-homogeneity ├── README.md ├── figure1.defg_result │ ├── .ipynb_checkpoints │ │ └── figure1.b_c-checkpoint.ipynb │ ├── precision_recall_america.json │ ├── precision_recall_asia.json │ ├── precision_recall_europe.json │ └── precision_recall_total.json ├── large_city_network_result │ └── README.md ├── models │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── __init__.cpython-38.pyc │ │ ├── distmult.cpython-37.pyc │ │ ├── distmult.cpython-38.pyc │ │ ├── gat.cpython-37.pyc │ │ ├── graph_sage.cpython-37.pyc │ │ ├── node2vec.cpython-37.pyc │ │ ├── relational_gcn.cpython-37.pyc │ │ ├── relational_gcn.cpython-38.pyc │ │ ├── spectral_gcn.cpython-37.pyc │ │ └── struc2vec.cpython-37.pyc │ ├── distmult.py │ ├── gat.py │ ├── graph_sage.py │ ├── node2vec.py │ ├── relational_gcn.py │ ├── spectral_gcn.py │ └── struc2vec.py ├── shifted_result │ └── README.md ├── tester │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── __init__.cpython-38.pyc │ │ ├── gnn_tester.cpython-37.pyc │ │ ├── gnn_tester.cpython-38.pyc │ │ ├── vec_tester.cpython-37.pyc │ │ └── vec_tester.cpython-38.pyc │ ├── cross_tester.py │ ├── gnn_tester.py │ └── vec_tester.py ├── trainer │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── gnn_trainer.cpython-37.pyc │ │ └── vec_trainer.cpython-37.pyc │ ├── gat_trainer.py │ ├── gnn_trainer.py │ ├── graph_sage_trainer.py │ ├── node2vec_trainer.py │ ├── relational_gcn_trainer.py │ ├── spectral_gcn_trainer.py │ ├── struc2vec_trainer.py │ └── vec_trainer.py └── utils │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-37.pyc │ ├── __init__.cpython-38.pyc │ ├── data_loader.cpython-37.pyc │ └── data_loader.cpython-38.pyc │ ├── analyze.py │ └── data_loader.py ├── main-figure ├── 001.png ├── 002.png ├── 003.png ├── 004.png ├── 004_part.png └── 005.png ├── road-classification ├── README.md ├── kmean_pca_analysis.py ├── measures.py ├── results │ ├── f1_score_test_result.json │ ├── test_set_index.txt │ └── training_set_index.txt └── tools │ ├── Bridges.py │ └── __pycache__ │ └── Bridges.cpython-36.pyc └── takeaway-1 └── F1-30-30.txt /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 jiang719 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Quantifying the Spatial Homogeneity of Urban Road Networks via Graph Neural Networks 2 | (Publication DOI: 10.1038/s42256-022-00462-y) 3 | 4 | [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5866593.svg)](https://doi.org/10.5281/zenodo.5866593) 5 | 6 | A graph neural network approach that calculates the intra-city and inter-city spatial homogeneity of urban road networks (URNs) 7 | 8 | ## Introduction 9 | 10 | * The spatial homogeneity of URNs measures the similarity of intersection connection patterns between the subnetwork and the entire network. 11 | It captures the multi-hop node neighborhood relationships, and holds potential for applications in urban science, network science, and urban computing. 12 | * This GitHub repository presents a user-friendly method for quantifying the network homogeneity of URNs on a global scale. 13 | * Additionally, URN classification, URN network irregularity (NI) computation, analysis of socioeconomic factors, and inter-city homogeneity analysis are also incorporated. 14 | 15 | ## Publication 16 | 17 | **Quantifying the Spatial Homogeneity of Urban Road Networks via Graph Neural Networks** 18 | Jiawei Xue, Nan Jiang, Senwei Liang, Qiyuan Pang, Takahiro Yabe, Satish V Ukkusuri\*, Jianzhu Ma\*, March 2022, Nature Machine Intelligence. 19 | 20 | ## Journal/Media Coverage 21 | **Nature Machine Intelligence**: https://www.nature.com/articles/s42256-022-00476-6 22 | 23 | **Nature Computational Science**: https://www.nature.com/articles/s43588-022-00244-x 24 | 25 | **Tech Xplore**: https://techxplore.com/news/2022-05-graph-neural-networks-spatial-homogeneity.html 26 | 27 | **Peking University News**: https://news.pku.edu.cn/jxky/b7c965cbb640434ca109da42c94d7e39.htm 28 | 29 | **Beijing University of Posts and Telecommunications**: https://lib.bupt.edu.cn/a/zuixingonggao/2022/0905/4240.html 30 | 31 | ## Requirements 32 | * Python 3.6 33 | * NetworkX 2.1 34 | * OSMnx 0.11.4 35 | * PyTorch 1.0 36 | 37 | ## Directory Structure 38 | 39 | * **data-collection**: Collect and preprocess road network data for 30 cities across the United States, Europe, and Asia. 40 | * **intra-city-network-homogeneity**: Conduct link prediction on URNs by utilizing six distinct encoders, including relational GCN, and a decoder known as DistMult, followed by the computation of F1 scores. 41 | * **road-classification**: Execute URN classification and discover its correlations with F1 scores. 42 | * **association-analysis**: Perform a correlation analysis between F1 scores and socioeconomic factors as well as network topology metrics. 43 | * **inter-city-network-homogeneity**: Obtain inter-city homogeneity by training graph neural network (GNN) models on city A and subsequently testing them on city B. 44 | 45 | ## Methods 46 | a. Description of spatial homogeneity. 47 | b. A road network near 40.71798°N, 74.00053°W in New York City. © OpenStreetMap contributors. 48 | c. Message-passing mechanism between adjacent layers in the GNN. 49 | d. Connecting strength S of a pair of nodes. 50 | e. We define the road network spatial homogeneity as the F1 score of the best GNN model with a well-tuned connecting strength threshold δ. 51 | 52 |

53 | 54 |

55 | 56 | ## Takeaway: the similarity between road networks in two cities 57 | * We compute the spatial homogeneity by training the GNN model on road networks in city A, and testing it on road networks in city B. 58 | * We ultimately gain 30*30=900 F1 scores for the following 30 cities. 59 | * Each entry in the following 30*30 matrix represents the directional similarity of road networks in two cities. 60 | * Please refer to the section [**Transfer learning reveals intercity similarity**](https://www.researchgate.net/publication/348169398_Quantifying_the_Spatial_Homogeneity_of_Urban_Road_Networks_via_Graph_Neural_Networks) in our paper. 61 | 62 |

63 | 64 |

65 | 66 | * For those interested in applying our homogeneity score in their research across various domains, such as, 67 | * **Transfer learning (computer science)**, refs [1],[2], 68 | * **Global road network analysis (urban science)**, refs [3],[4], 69 | * **Global congestion analysis, accident analysis (transportation engineering)**, refs [5],[6], 70 | * **Urban infrastructure evaluation (economics, sociology)**, refs [7],[8], please refer to [**takeaway-1/F1-30-30.txt**](https://github.com/jiang719/road-network-predictability/blob/master/takeaway-1/F1-30-30.txt) under this GitHub page to access these 30*30=900 values. 71 | 72 | with 73 | 74 | | Index | Authors | Title | Publication | 75 | | :-----| :-----| :-----| :-----| 76 | | 1 | Wei, Y., Zheng, Y., & Yang, Q.| Transfer knowledge between cities. | SIGKDD, 2016 | 77 | | 2 | He, T., Bao, J., Li, R., Ruan, S., Li, Y., Song, L., ... & Zheng, Y.| What is the human mobility in a new city: Transfer mobility knowledge across cities. | The Web Conference, 2020 | 78 | | 3 | Barrington-Leigh, C., & Millard-Ball, A.| Global trends toward urban street-network sprawl.| PNAS, 2020 | 79 | | 4 | Burghardt, K., Uhl, J. H., Lerman, K., & Leyk, S.| Road network evolution in the urban and rural United States since 1900. | Computers, Environment and Urban Systems, 2022 | 80 | | 5 | Çolak, S., Lima, A., & González, M. C.| Understanding congested travel in urban areas. | Nature Communications, 2016 | 81 | | 6 | Thompson, J., Stevenson, M., Wijnands, J. S., Nice, K. A., Aschwanden, G. D., Silver, J., ... & Morrison, C. N.| A global analysis of urban design types and road transport injury: an image processing study. | The Lancet Planetary Health, 2020| 82 | | 7 | Bettencourt, L. M., Lobo, J., Helbing, D., Kühnert, C., & West, G. B.|Growth, innovation, scaling, and the pace of life in cities.|PNAS, 2007 | 83 | | 8 | Arcaute, E., Hatna, E., Ferguson, P., Youn, H., Johansson, A., & Batty, M.|Constructing cities, deconstructing scaling laws.|Journal of the Royal Society Interface, 2015 | 84 | 85 | 86 | ## Reference 87 | | Model | Authors | Publication | Venue | 88 | | :-----| :-----| :-----| :-----| 89 | | Node2vec | Grover, A. and Leskovec, J. | node2vec: Scalable feature learning for networks. | SIGKDD, 2016 | 90 | | Struc2vec | Ribeiro, L.F., Saverese, P.H. and Figueiredo, D.R. | struc2vec: Learning node representations from structural identity. | SIGKDD, 2017 | 91 | | Spectral GCN | Kipf, T. N. and Welling, M. | Semi-supervised classification with graph convolutional networks. | ICLR, 2017 | 92 | | GraphSAGE | Hamilton, W. L., Ying, R. and Leskovec, J. | Inductive representation learning on large graphs. | NIPS, 2017 | 93 | | Graph Attention Network | Velickovic, P., Cucurull, G., Casanova, A., Romero, A., Lio, P. and Bengio, Y.| Graph attention networks. | ICLR, 2018 | 94 | | Relational GCN | Schlichtkrull, M., Kipf, T.N., Bloem, P., Van Den Berg, R., Titov, I. and Welling, M. | Modeling relational data with graph convolutional networks. | The Semantic Web, ESWC 2018 | 95 | | DistMult | Yang, B., Yih, W., He, X., Gao, J. and Deng, L. | Embedding entities and relations for learning and inference in knowledge bases. | ICLR, 2015 | 96 | | Review | Zhou, J., Cui, G., Hu, S., Zhang, Z., Yang, C., Liu, Z., Wang, L., Li, C. and Sun, M.| Graph neural networks: A review of methods and applications. | AI Open, 2020 | 97 | 98 | ## License 99 | MIT license 100 | 101 | -------------------------------------------------------------------------------- /association-analysis/association_data.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiang719/road-network-predictability/35a83b7053fb7e220918e32232206fa7c20cb2a6/association-analysis/association_data.xlsx -------------------------------------------------------------------------------- /association-analysis/indicators.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import pandas as pd 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | import seaborn as sns 7 | from sklearn import model_selection 8 | import statsmodels.api as sm 9 | import scipy 10 | from statsmodels.stats.outliers_influence import variance_inflation_factor 11 | import copy 12 | from mpl_toolkits.axes_grid1 import make_axes_locatable 13 | from matplotlib.backends.backend_pdf import PdfPages 14 | import matplotlib as mpl 15 | 16 | 17 | def confusion_mat(lst): 18 | TP = 0 19 | TN = 0 20 | FP = 0 21 | FN = 0 22 | for k in range(len(lst)): 23 | target = lst[k]['target'] 24 | predict = lst[k]['predict'] 25 | if target == 1 and predict == 1: 26 | TP += 1 27 | elif target ==1 and predict == 0: 28 | FN += 1 29 | elif target == 0 and predict == 1: 30 | FP += 1 31 | elif target == 0 and predict == 0: 32 | TN += 1 33 | assert TP+TN+FP+FN == len(lst) 34 | return TP, TN, FP, FN 35 | 36 | def indexs(lst, ele): 37 | for i in range(len(lst)): 38 | if lst[i] == ele: 39 | break 40 | return i 41 | ''' 42 | filepath = './results' 43 | filenames = os.listdir(filepath) 44 | 45 | data = {} 46 | for filename in filenames: 47 | city = filename[15:-12] 48 | file = filepath + '/' + filename 49 | with open(file, 'r') as f: 50 | results = json.load(f) 51 | f.close() 52 | data[city] = results 53 | 54 | 55 | #f = open('./results/Relational-GCN-result.json', 'r') 56 | #data = json.load(f) 57 | 58 | cities = data.keys() 59 | 60 | F1 = {} 61 | 62 | for city in cities: 63 | F1[city] = {} 64 | samples = data[city].keys() 65 | for sample in samples: 66 | lst = data[city][sample] 67 | TP, TN, FP, FN = confusion_mat(lst) 68 | if (TP + FP) < 1e-17: 69 | precision = 0.0 70 | else: 71 | precision = TP*1.0/(TP + FP) 72 | if (TP + FN) < 1e-17: 73 | recall = 0.0 74 | else: 75 | recall = TP*1.0/(TP + FN) 76 | #print(precision, recall) 77 | if (precision+recall) < 1e-17: 78 | F1[city][sample] = 0.0 79 | else: 80 | F1[city][sample] = 2.0*precision*recall/(precision + recall) 81 | #print(F1) 82 | f1 = {} 83 | for city in cities: 84 | f1[city] = {'sample_1': 0, 'sample_2': 0} 85 | samples = F1[city].keys() 86 | num = len(samples)/2.0 87 | sample_1 = 0.0 88 | sample_2 = 0.0 89 | for sample in samples: 90 | #print(sample) 91 | if sample[-1] == '1': 92 | sample_1 += F1[city][sample]/num 93 | elif sample[-1] == '2': 94 | sample_2 += F1[city][sample]/num 95 | f1[city]['sample_1'] = sample_1 96 | f1[city]['sample_2'] = sample_2 97 | ''' 98 | #print(f1) 99 | census = pd.read_excel('data.xlsx') 100 | census_heat = copy.deepcopy(census) 101 | 102 | years = [1950, 1960, 1970, 1980, 1990, 2000, 2010] 103 | cities_cs = list(census.loc[:, 'City']) 104 | cities = cities_cs 105 | #print(cities) 106 | ''' 107 | for i in range(1,8): 108 | print('Lambdas_'+ '%1d' % i) 109 | for city in cities: 110 | ind = cities_cs.index(city) 111 | print(census.loc[ind, 'Population-2020']/census.loc[ind, 'Population-'+ '%4d' % years[i-1]]) 112 | 113 | print('ratio0') 114 | for city in cities: 115 | ind = cities.index(city) 116 | print(census.loc[ind, 'road_number']/census.loc[ind, 'intersection_number']) 117 | print('ratio1') 118 | for city in cities: 119 | ind = cities.index(city) 120 | print(census.loc[ind, 'road_length']/census.loc[ind, 'intersection_number']) 121 | ''' 122 | 123 | 124 | 125 | mpl.rc('font', **{'family' : 'sans-serif', 'sans-serif' : ['Myriad Pro']}) 126 | mpl.rcParams['pdf.fonttype'] = 42 127 | 128 | census = census.drop('Population-1950', axis = 1) 129 | census = census.drop('Population-1960', axis = 1) 130 | census = census.drop('Population-1970', axis = 1) 131 | census = census.drop('Population-1980', axis = 1) 132 | census = census.drop('Population-1990', axis = 1) 133 | census = census.drop('Population-2000', axis = 1) 134 | census = census.drop('Population-2010', axis = 1) 135 | census = census.drop('Budget', axis = 1) 136 | census_model = copy.deepcopy(census) 137 | census_model = census_model.drop('Lambdas_2', axis = 1) 138 | census_model = census_model.drop('Lambdas_1', axis = 1) 139 | census_model = census_model.drop('Lambdas_4', axis = 1) 140 | census_model = census_model.drop('Lambdas_5', axis = 1) 141 | census_model = census_model.drop('Lambdas_6', axis = 1) 142 | census_model = census_model.drop('Lambdas_7', axis = 1) 143 | census_model = census_model.drop('road_length', axis = 1) 144 | census_model = census_model.drop('intersection_number', axis = 1) 145 | census_model = census_model.drop('road_number', axis = 1) 146 | census_model = census_model.drop('land_area_net', axis = 1) 147 | census_model = census_model.drop('average_road_length', axis = 1) 148 | census_model = census_model.drop('airport_annual_passengers', axis = 1) 149 | census_model = census_model.drop('Population_2020', axis = 1) 150 | census_model = census_model.drop('Area', axis = 1) 151 | census_model = census_model.drop('road_length_density', axis = 1) 152 | census_model = census_model.drop('road_number_density', axis = 1) 153 | census_model = census_model.drop('intersection_density', axis = 1) 154 | census_model = census_model.drop('ratio0', axis = 1) 155 | census_model = census_model.drop('ratio_road', axis = 1) 156 | #census_model = census_model.drop('GDP', axis = 1) 157 | census_model = census_model.drop('Betweenness', axis = 1) 158 | #census_model = census_model.drop('average_betweennese', axis = 1) 159 | train = census_model 160 | train = train.drop([26]) 161 | 162 | #model = sm.formula.ols('F1 ~ Population + Area + GDP + Lambdas_1 + Lambdas_2 + Lambdas_3 + Lambdas_4 + Lambdas_5 + Lambdas_6 + Lambdas_7', data = train).fit() 163 | model = sm.formula.ols('F1 ~ Lambdas_3 + GDP + ratio1 + average_betweennese', data = train).fit() 164 | 165 | 166 | ybar = train.F1.mean() 167 | #print('F1 mean: ', ybar) 168 | 169 | p = model.df_model 170 | n = train.shape[0] 171 | RSS = np.sum((model.fittedvalues - ybar)**2) 172 | ESS = np.sum(model.resid**2) 173 | F = (RSS/p)/(ESS/(n-p-1)) 174 | #print('F value(by hand): ', F) 175 | #print('F value(by model):', model.fvalue) 176 | 177 | F_Theory = scipy.stats.f.ppf(q=0.95, dfn = p, dfd = n-p-1) 178 | print('F value of theory: ', F_Theory) 179 | print(model.summary()) 180 | 181 | plt.rcParams['font.sans-serif'] = ['Microsoft Yahei'] 182 | plt.rcParams['axes.unicode_minus'] = False 183 | sns.distplot(a = census.F1, bins = 10, fit = scipy.stats.norm, norm_hist = True, 184 | hist_kws={'color':'steelblue', 'edgecolor':'black'}, 185 | kde_kws={'color':'black', 'linestyle':'--', 'label':'pdf'}, 186 | fit_kws={'color':'red', 'linestyle':':', 'label':'Gaussian distribution'}) 187 | plt.legend() 188 | plt.savefig('hist.jpg') 189 | #plt.show() 190 | plt.close() 191 | 192 | X = sm.add_constant(train.loc[:, ['GDP', 'Lambdas_3', 'ratio1', 'average_betweennese']]) 193 | 194 | vif = pd.DataFrame() 195 | vif['feature'] = X.columns 196 | vif['VIF Factor'] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])] 197 | print('variance_inflation_factor: ') 198 | print(vif, '\n') 199 | 200 | print('Pearson correlation: ') 201 | print(census_model.drop('F1', axis = 1).corrwith(census_model.F1)) 202 | 203 | 204 | ND = ["Low node_den","Medium node_den","High node_den"] 205 | LD = ["Large link_den","Medium link_den","Small link_den"] 206 | #census = census_heat 207 | nds = list(census.loc[:,'intersection_density']) 208 | nds.sort() 209 | nd1 = nds[10] 210 | nd2 = nds[20] 211 | #print(gdps) 212 | lstcity = list(census.loc[:, 'City']) 213 | 214 | CITIES = {'LG_LL':[], 'LG_ML':[], 'LG_SL':[], 'MG_LL':[], 'MG_ML':[], 'MG_SL':[], 'HG_LL':[], 'HG_ML':[], 'HG_SL':[]} 215 | lds = list(census.loc[:,'road_length_density']) 216 | lds.sort() 217 | ld1 = lds[10] 218 | ld2 = lds[20] 219 | gL = 0 220 | gM = 0 221 | gH = 0 222 | lS = 0 223 | lM = 0 224 | lL = 0 225 | for city in range(30): 226 | gdp = census.loc[city, 'intersection_density'] 227 | lbd = census.loc[city, 'road_length_density'] 228 | if gdp < nd1: 229 | fg = 'L' 230 | gL += 1 231 | elif gdp < nd2: 232 | fg = 'M' 233 | gM += 1 234 | else: 235 | fg = 'H' 236 | gH += 1 237 | if lbd < ld1: 238 | fl = 'S' 239 | lS += 1 240 | elif lbd < ld2: 241 | fl = 'M' 242 | lM += 1 243 | else: 244 | fl = 'L' 245 | lL += 1 246 | CITIES[fg + 'G_' + fl + 'L'].append(city) 247 | print(CITIES) 248 | f1_values = [] 249 | f1_nums = [] 250 | for label in CITIES.keys(): 251 | f1 = 0.0 252 | for city in CITIES[label]: 253 | f1 += census.loc[city, 'F1'] 254 | if len(CITIES[label]) > 0: 255 | f1_values.append(round(f1/len(CITIES[label]),2)) 256 | else: 257 | f1_values.append(round(f1,2)) 258 | f1_nums.append(len(CITIES[label])) 259 | f1_value = np.array([f1_values[0:3], f1_values[3:6], f1_values[6:9]]) 260 | f1_value = np.transpose(f1_value) 261 | f1_num = np.array([f1_nums[0:3], f1_nums[3:6], f1_nums[6:9]]) 262 | f1_num = np.transpose(f1_num) 263 | 264 | fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(5,5),dpi=1200) 265 | im = ax.imshow(f1_value,cmap='viridis') 266 | #fig.colorbar(im, ax=ax) 267 | divider = make_axes_locatable(ax) 268 | cax = divider.append_axes("right", size="5%", pad=0.05) 269 | cax.tick_params(labelsize=10) 270 | 271 | fig.colorbar(im, cax=cax) 272 | 273 | ax.set_xticks(np.arange(len(ND))) 274 | ax.set_yticks(np.arange(len(LD))) 275 | ax.set_xticklabels(ND) 276 | ax.set_yticklabels(LD) 277 | 278 | plt.setp(ax.get_xticklabels(), rotation=0, rotation_mode="anchor",fontsize =10) 279 | plt.setp(ax.get_yticklabels(), rotation=90,ha= 'center',rotation_mode="anchor",fontsize =10) 280 | 281 | for i in range(len(ND)): 282 | for j in range(len(LD)): 283 | if f1_value[i,j] > 0: 284 | text = ax.text(j, i, f1_value[i, j], 285 | ha="center", va="center", color="w",fontsize =20) 286 | else: 287 | text = ax.text(j, i, 'None', 288 | ha="center", va="center", color="w",fontsize =20) 289 | #ax.set_title("Average F1 scores") 290 | fig.tight_layout() 291 | #plt.title(lbd_num, horizontalalignment = 'left') 292 | plt.savefig('density' + '.svg',bbox_inches = 'tight') 293 | plt.savefig('density' + '_heatmap.jpg',bbox_inches = 'tight') #change the name here 294 | #pdf = PdfPages(lbd_num + '.pdf') 295 | plt.savefig('density' + '.pdf',bbox_inches = 'tight') #change the name here 296 | #pdf.savefig() 297 | #plt.show() 298 | plt.close() 299 | #pdf.close() 300 | 301 | -------------------------------------------------------------------------------- /association-analysis/singlevariable.csv: -------------------------------------------------------------------------------- 1 | statistics,R2,F,P,Pearson,DW 2 | GDP,0.162,5.406,0.028,0.402,1.97 3 | Population-2020,0.048,1.421,0.243,-0.22,2.147 4 | Lambda_1,0.115,3.525,0.071,-0.124,2.018 5 | airport_annual_passengers,0.002,0.057,0.813,-0.045,2.141 6 | land_area_net,0.071,27.99,0,0.707,1.873 7 | intersection_number,0.14,4.556,0.042,0.374,2.137 8 | road_number,0.133,4.284,0.048,0.364,2.116 9 | road_length,0.301,12.03,0.002,0.548,2.037 10 | intersection_density,0.02,0.572,0.456,0.142,2.156 11 | road_number_density,0.033,0.9614,0.335,0.182,2.155 12 | road_length_density,0.123,3.935,0.057,0.351,2.234 13 | road_number/intersection_number,0.293,11.58,0.002,0.541,1.945 14 | Betweenness,0.077,2.346,0.137,0.278,2.309 15 | road_length/road_number,0.002,0.042,0.837,-0.039,2.108 16 | average_betweennese,0.461,23.94,0,-0.679,2.477 17 | -------------------------------------------------------------------------------- /data-collection/README.md: -------------------------------------------------------------------------------- 1 | ## This file contains the codes and the obtained road network data. 2 | -------------------------------------------------------------------------------- /data-collection/collect_preprocess_URN_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # In[1]: 5 | 6 | 7 | import math 8 | import random 9 | import json 10 | import copy 11 | import timeit 12 | import random 13 | import numpy as np 14 | import osmnx as ox 15 | import networkx as nx 16 | import requests 17 | import geopandas as gpd 18 | import matplotlib.cm as cm 19 | from math import sin, cos, sqrt, atan2,radians 20 | import matplotlib.colors as colors 21 | ox.config(use_cache=True, log_console=True) 22 | 23 | 24 | # # part 1: functions 25 | 26 | # In[2]: 27 | 28 | 29 | import os 30 | # create a folder if necessary 31 | def mkdir(path): 32 | folder = os.path.exists(path) 33 | if not folder: 34 | os.makedirs(path) 35 | print('A new folder created.') 36 | else: 37 | print('Has already been created.') 38 | def edges2dict(edgeFinal): 39 | dicts = [] 40 | n = len(edgeFinal) 41 | for i in range(0,n): 42 | dict1 = {'start':int(edgeFinal[i][0]), 'end':int(edgeFinal[i][1]),'inSample1':edgeFinal[i][3],'inSample2':edgeFinal[i][4]} 43 | dicts.append(dict1) 44 | return dicts 45 | def nodes2dict(nodeFinal): 46 | dicts = [] 47 | n = len(nodeFinal) 48 | for i in range(0,n): 49 | dict1 = {'osmid': int(nodeFinal[i][0]), 'lon': nodeFinal[i][1],'lat':nodeFinal[i][2]} 50 | dicts.append(dict1) 51 | return dicts 52 | 53 | 54 | # # part 2: node merge 55 | 56 | # In[3]: 57 | 58 | 59 | # function 1 for node merge 60 | def distancePair(loc1,loc2): #loc1:[lon1,lat1]; loc2:[lon2,lat2] 61 | R = 6373.0 62 | lon1, lat1 = radians(loc1[0]),radians(loc1[1]) 63 | lon2, lat2 = radians(loc2[0]),radians(loc2[1]) 64 | dlon = lon2 - lon1 65 | dlat = lat2 - lat1 66 | a = (sin(dlat/2))**2 + cos(lat1) * cos(lat2) * (sin(dlon/2))**2 67 | c = 2 * atan2(sqrt(a), sqrt(1-a)) 68 | distance = R * c 69 | return distance 70 | # function 2 for node merge 71 | def neighborCell(nx,ny,x,y): # nx,ny: 100; x,y:0,1,2,...,100 72 | neighbor = list() 73 | xList = [x-1,x,x+1] 74 | yList = [y-1,y,y+1] 75 | for i in range(3): 76 | for j in range(3): 77 | if xList[i]>=0 and xList[i]<=nx and yList[j]>=0 and yList[j]<= ny: 78 | neighbor.append((xList[i],yList[j])) 79 | return neighbor 80 | 81 | 82 | # In[4]: 83 | 84 | 85 | # the input is G1 (drive) and G6 (all private), and we need to generate a G = G1.union(G6) 86 | def OSMnx_graph(G1): 87 | #======================================================= step 0 88 | #step_0.1 generate G: lo, la, nodeOsmid, edgeInfor 89 | G1nodes = list(G1.nodes(data = True)) 90 | G1_node = {str(G1nodes[i][1]['osmid']): (G1nodes[i][1]['lon'],G1nodes[i][1]['lat']) for i in range(len(G1nodes))} 91 | G_node =copy.deepcopy(G1_node) 92 | 93 | #step_0.2 edges 94 | G1edges = list(G1.edges(data = True)) 95 | G1_edge = {(int(G1edges[i][0]),int(G1edges[i][1])):1 for i in range(len(G1edges))} #1: drivable 96 | G_edge =copy.deepcopy(G1_edge) 97 | 98 | # step_0: get 1)node_osmid; 2) node logitude; 3) node latitude; 4) edge (from,to) 99 | #step_0.3 input 100 | lo = [G_node[i][0] for i in G_node.keys()] 101 | la = [G_node[i][1] for i in G_node.keys()] 102 | nodeOsmid = [int(i) for i in G_node.keys()] 103 | edgeInfor = [(i[0],i[1],G_edge[i]) for i in G_edge.keys()] 104 | # =================================================================== 105 | # step_1: decide nX, nY 106 | loMin, loMax, laMin, laMax = np.min(lo), np.max(lo), np.min(la), np.max(la) 107 | R = 6373.0 108 | d = 0.03 # the merging threshold = 30 meters. #change: July 15,2020 109 | dymax = 2*R*cos(laMax*math.pi/180.0)*math.pi/360.0 110 | nX = math.floor((loMax-loMin)*dymax/d) 111 | unitX = (loMax - loMin)/nX 112 | nY = math.floor((laMax-laMin)*math.pi*R/(180.0*d)) 113 | unitY = (laMax - laMin)/nY 114 | # =================================================================== 115 | # step_2 go through all the nodes 116 | mapping = {} 117 | for i in range(nX+1): 118 | for j in range(nY+1): 119 | mapping[(i,j)]=list() 120 | for i in range(len(lo)): 121 | long, lati = lo[i], la[i] 122 | x = math.floor((long-loMin)*nX/(loMax-loMin)) 123 | y = math.floor((lati-laMin)*nY/(laMax-laMin)) 124 | mapping[(x,y)].append(i) 125 | # =================================================================== 126 | # step_3 near relationship 127 | nearResult = list() 128 | for i in range(nX+1): 129 | for j in range(nY+1): 130 | count = (nY+1)*i+j 131 | neighbor = neighborCell(nX,nY,i,j) 132 | neighborNodes = list() 133 | for k in range(len(neighbor)): 134 | neighborNodes = neighborNodes + mapping[neighbor[k]] 135 | for n1 in range(len(mapping[(i,j)])): 136 | for n2 in range(len(neighborNodes)): 137 | node1 = mapping[(i,j)][n1] 138 | node2 = neighborNodes[n2] 139 | loc1 = [lo[node1], la[node1]] 140 | loc2 = [lo[node2], la[node2]] 141 | if (distancePair(loc1,loc2)) < d: 142 | if node1< node2 and (node1,node2) not in nearResult: 143 | nearResult.append((node1,node2)) 144 | if node2< node1 and (node2,node1) not in nearResult: 145 | nearResult.append((node2,node1)) 146 | # =================================================================== 147 | # step_4 merge operation 148 | nodeReach = {} 149 | for i in range(len(lo)): 150 | nodeReach[i]=[i] 151 | for k in range(len(nearResult)): 152 | i = nearResult[k][0] 153 | j = nearResult[k][1] 154 | iList = nodeReach[i] 155 | jList = nodeReach[j] 156 | ijList = list(set(iList).union(set(jList))) 157 | for p in range(len(ijList)): 158 | nodeReach[ijList[p]]=ijList 159 | # ===================================================================== 160 | # step_5 get new information 161 | loNew = list() 162 | laNew = list() 163 | minOSMid = list() 164 | for i in range(len(lo)): 165 | xList = [lo[nodeReach[i][k]] for k in range(len(nodeReach[i]))] 166 | yList = [la[nodeReach[i][k]] for k in range(len(nodeReach[i]))] 167 | idList = [nodeOsmid[nodeReach[i][k]] for k in range(len(nodeReach[i]))] 168 | xAver = np.mean(xList) 169 | yAver = np.mean(yList) 170 | loNew.append(xAver) 171 | laNew.append(yAver) 172 | minOSMid.append(np.min(idList)) 173 | # ===================================================================== 174 | # step_6 get final node 175 | nodeFinal = list() 176 | minOSMidClear = list(set(minOSMid)) 177 | for i in range(len(minOSMidClear)): 178 | indexGet = minOSMid.index(minOSMidClear[i]) 179 | nodeFinal.append((minOSMid[indexGet],loNew[indexGet],laNew[indexGet])) 180 | # ===================================================================== 181 | # step_7 refresh the edge result 182 | edgeListRaw = [(str(minOSMid[nodeOsmid.index(edgeInfor[i][0])]),str(minOSMid[nodeOsmid.index(edgeInfor[i][1])]),edgeInfor[i][2]) for i in range(len(edgeInfor))] 183 | edgeNearFinal = list(set(edgeListRaw)) 184 | edgeFinal = list() 185 | for i in range(len(edgeNearFinal)): 186 | if int(edgeNearFinal[i][0])> int(edgeNearFinal[i][1]): 187 | #we set the start point index of an edge is smaller than end point 188 | edgeFinal.append((edgeNearFinal[i][1],edgeNearFinal[i][0],edgeNearFinal[i][2])) 189 | else: 190 | edgeFinal.append((edgeNearFinal[i][0],edgeNearFinal[i][1],edgeNearFinal[i][2])) 191 | edgeFinal = list(set(edgeFinal)) 192 | # ===================================================================== 193 | # step_8 clear edgeFinal 194 | validIndex1 = list() 195 | nodeFinalList = [nodeFinal[i][0] for i in range(len(nodeFinal))] 196 | for i in range(len(edgeFinal)): 197 | if (int(edgeFinal[i][0]) in nodeFinalList) and (int(edgeFinal[i][1]) in nodeFinalList) : 198 | validIndex1.append(i) 199 | edgeFinalFinal = [edgeFinal[validIndex1[i]] for i in range(len(validIndex1))] 200 | # ===================================================================== 201 | # step_9 clear nodeFinal 202 | nodeIdRaw = [nodeFinal[i][0] for i in range(len(nodeFinal))] 203 | nodeReceive = list() 204 | for i in range(len(edgeFinalFinal)): 205 | nodeReceive.append(int(edgeFinalFinal[i][0])) 206 | nodeReceive.append(int(edgeFinalFinal[i][1])) 207 | nodeReceive = list(set(nodeReceive)) 208 | validIndex2 = list() 209 | for i in range(len(nodeIdRaw)): 210 | if (nodeIdRaw[i] in nodeReceive) : 211 | validIndex2.append(i) 212 | nodeFinalFinal = [nodeFinal[validIndex2[i]] for i in range(len(validIndex2))] 213 | return [nodeFinalFinal,edgeFinalFinal] 214 | 215 | 216 | # In[5]: 217 | 218 | 219 | def sample(nodeInfor,edgeInfor): 220 | # step0: get the information 221 | nodeId = [nodeInfor[i][0] for i in range(len(nodeInfor))] 222 | longitude = [nodeInfor[i][1] for i in range(len(nodeInfor))] 223 | latitude = [nodeInfor[i][2] for i in range(len(nodeInfor))] 224 | 225 | # step1: generate the graph 226 | n = len(nodeId) 227 | A1 = np.array([[0] * n] * n) 228 | Graph1 = nx.Graph(A1) 229 | 230 | # step2: label 231 | column = [str(nodeId[i]) for i in range(n)] 232 | mapping = {0:str(nodeId[0])} 233 | for i in range(0,len(column)-1): 234 | mapping.setdefault(i+1,column[i+1]) 235 | Graph1 = nx.relabel_nodes(Graph1,mapping) 236 | 237 | # step3: geolocation 238 | #POS = list() 239 | #for i in range(0,n): 240 | # POS.append((float(longitude[i]),float(latitude[i]))) 241 | #for i in range(0,n): 242 | # Graph1.nodes[column[i]]['pos'] = POS[i] 243 | 244 | # step4: add edge 245 | edgeSet1 = list() 246 | for i in range(len(edgeInfor)): 247 | edgeRow = edgeInfor[i] 248 | edgeSet1.append((str(edgeRow[0]),str(edgeRow[1]))) 249 | edgeSet = list(set(edgeSet1)) 250 | Graph1.add_edges_from(edgeSet) 251 | 252 | # step5: get the mininal spanning tree 253 | deleteNumber = int(len(Graph1.edges) * 0.20) 254 | 255 | T = nx.minimum_spanning_tree(Graph1) 256 | potentialDelete = list(set(Graph1.edges) - set(T.edges)) 257 | #print ("potentialDelete",len(potentialDelete)) 258 | realDelete1 = random.sample(potentialDelete, deleteNumber) 259 | realDelete2 = random.sample(potentialDelete, deleteNumber) 260 | print ("len(realDelete1)",len(realDelete1),"len(realDelete2)",len(realDelete2)) 261 | 262 | # step6: prepare the output file 263 | edgeInforNew = list() 264 | for i in range(len(edgeInfor)): 265 | edgeRow = edgeInfor[i] 266 | item = list() 267 | if (str(edgeRow[0]),str(edgeRow[1])) in realDelete1 or (str(edgeRow[1]),str(edgeRow[0]))in realDelete1: 268 | item = [edgeRow[0],edgeRow[1],edgeRow[2],0] 269 | else: 270 | item = [edgeRow[0],edgeRow[1],edgeRow[2],1] 271 | if (str(edgeRow[0]),str(edgeRow[1])) in realDelete2 or (str(edgeRow[1]),str(edgeRow[0]))in realDelete2: 272 | item.append(0) 273 | else: 274 | item.append(1) 275 | edgeInforNew.append(item) 276 | 277 | #step7: transform the form 278 | returnEdgeInforNew = list() 279 | for i in range(len(edgeInforNew)): 280 | returnEdgeInforNew.append((edgeInforNew[i][0],edgeInforNew[i][1],edgeInforNew[i][2],edgeInforNew[i][3],edgeInforNew[i][4])) 281 | #print (returnEdgeInforNew) 282 | return returnEdgeInforNew 283 | 284 | 285 | # # part 3: main function 286 | 287 | # In[6]: 288 | 289 | 290 | cities=[] 291 | f = open( "./world_city_20200715.txt", "r" ) 292 | for line in f.readlines(): 293 | linestr = line.strip() 294 | linestrlist = linestr.split("\t") 295 | cities.append(linestrlist) 296 | 297 | 298 | # In[7]: 299 | 300 | 301 | def getTrainIndex(n): #the input region is a n by n region. 302 | trainIndex = list() 303 | for i in range(n*n): 304 | row = math.floor(i/n) 305 | col = np.mod(i,n) 306 | if row%2 == 0 or col%2 == 0: 307 | trainIndex.append(i) 308 | return trainIndex 309 | 310 | 311 | # In[8]: 312 | 313 | 314 | squareLength = [int(cities[i][5]) for i in range(len(cities))] 315 | trainSize = [int(cities[i][6]) for i in range(len(cities))] 316 | testSize = [int(cities[i][7]) for i in range(len(cities))] 317 | allIndex = list() 318 | for i in range(0,len(cities)): 319 | # get train 320 | fullList = list(range(squareLength[i] * squareLength[i])) 321 | train = getTrainIndex(squareLength[i]) 322 | random.shuffle(train) 323 | # get test 324 | test = list(set(fullList) - set(train)) 325 | random.shuffle(test) 326 | allIndex.append([train,test]) 327 | 328 | 329 | # In[9]: 330 | 331 | 332 | pwd1 = './train/' 333 | pwd2 = './test/' 334 | mkdir(pwd1) 335 | mkdir(pwd2) 336 | # range of distance 337 | distance = 500 338 | 339 | 340 | # In[10]: 341 | 342 | 343 | def clearSameNodeEdge(edgeInfo): 344 | newEdgeInfo = list() 345 | for i in range(len(edgeInfo)): 346 | start = edgeInfo[i][0] 347 | end = edgeInfo[i][1] 348 | if start != end: 349 | newEdgeInfo.append(edgeInfo[i]) 350 | return newEdgeInfo 351 | 352 | 353 | # # part 4: training data 354 | 355 | # In[11]: 356 | 357 | 358 | start0 = timeit.default_timer() 359 | countReal = 0 360 | count = 0 361 | for i in range(0,len(cities)): 362 | # find the latitude and longitude of the city 363 | lat = float(cities[i][3]) 364 | lon = float(cities[i][4]) 365 | location = (lat,lon) 366 | LAT = squareLength[i]*0.01 #longitude, latitude range 367 | LON = LAT 368 | npd = squareLength[i] 369 | dlat = 0.01 370 | dlon = 0.01 371 | ################ collect training data ############################### 372 | filename = pwd1 + cities[i][1] #!!!!change pwd1,2,3,4 373 | for j1 in range(0,trainSize[i]): #!!!!change trainSize,validateSize,test1Size,test2Size 374 | start1 = timeit.default_timer() 375 | j = allIndex[i][0][j1] #!!!!change 0,1,2,3 376 | row = math.floor(j/npd) 377 | col = np.mod(j,npd) 378 | lat1 = lat - 0.500*LAT + row*dlat*1.000 379 | lon1 = lon - 0.500*LON + col*dlon*1.000 380 | location1 = [lat1,lon1] 381 | print ("location1", location1) 382 | count += 1 383 | #distance = random.randint(min,max) 384 | try : 385 | G1 = ox.graph_from_point(location1, distance=distance, distance_type='bbox', network_type='drive') 386 | except: 387 | print ("the graph is null") 388 | else: 389 | G1 = ox.project_graph(G1) 390 | if (len(G1)>10): 391 | # merge the node 392 | graphResult = OSMnx_graph(G1) 393 | nodeFinal = graphResult[0] 394 | rawEdgeFinal = graphResult[1] 395 | print ("len(rawEdgeFinal)",len(rawEdgeFinal)) 396 | rawEdgeFinal = clearSameNodeEdge(rawEdgeFinal) 397 | print ("len(rawEdgeFina)",len(rawEdgeFinal)) 398 | #test whether it is ok to sample, edge num > 1.26 node num 399 | realEdgeFinal = [(rawEdgeFinal[p][0],rawEdgeFinal[p][1]) for p in range(len(rawEdgeFinal))] 400 | realEdgeFinal = list(set(realEdgeFinal)) 401 | if len(realEdgeFinal) > 1.26*len(nodeFinal): 402 | edgeFinal = sample(nodeFinal,rawEdgeFinal) 403 | subfile = filename + str(j) +'nodes'+'.json' 404 | nodefile = open(subfile,'w') 405 | nodes = nodes2dict(nodeFinal) 406 | json.dump(nodes,nodefile) 407 | nodefile.close() 408 | 409 | # save edges as a json file 410 | subfile = filename + str(j) +'edges'+'.json' 411 | edgefile = open(subfile,'w') 412 | edges = edges2dict(edgeFinal) 413 | json.dump(edges,edgefile) 414 | edgefile.close() 415 | countReal += 1 416 | print ("count",count," countReal",countReal) 417 | stop1 = timeit.default_timer() 418 | print('running time per iteration:', stop1 - start1) 419 | stop2 = timeit.default_timer() 420 | print('running time until now:', stop2 - start0) 421 | print ("========================================================") 422 | stop0 = timeit.default_timer() 423 | print('total running time:', stop0 - start0) 424 | 425 | 426 | # # part 5 testing data 427 | 428 | # In[12]: 429 | 430 | 431 | start0 = timeit.default_timer() 432 | countReal = 0 433 | count = 0 434 | for i in range(0,len(cities)): 435 | # find the latitude and longitude of the city 436 | lat = float(cities[i][3]) 437 | lon = float(cities[i][4]) 438 | location = (lat,lon) 439 | LAT = squareLength[i]*0.01 #longitude, latitude range 440 | LON = LAT 441 | npd = squareLength[i] 442 | dlat = 0.01 443 | dlon = 0.01 444 | ################ collect training data ############################### 445 | filename = pwd2 + cities[i][1] #!!!!change pwd1,2,3,4 446 | for j1 in range(0,testSize[i]): #!!!!change trainSize,validateSize,test1Size,test2Size 447 | start1 = timeit.default_timer() 448 | j = allIndex[i][1][j1] #!!!!change 0,1,2,3 449 | row = math.floor(j/npd) 450 | col = np.mod(j,npd) 451 | lat1 = lat - 0.500*LAT + row*dlat*1.000 452 | lon1 = lon - 0.500*LON + col*dlon*1.000 453 | location1 = [lat1,lon1] 454 | print ("location1", location1) 455 | count += 1 456 | try : 457 | G1 = ox.graph_from_point(location1, distance=distance, distance_type='bbox', network_type='drive') 458 | except: 459 | print ("the graph is null") 460 | else: 461 | G1 = ox.project_graph(G1) 462 | if (len(G1)>10): 463 | # merge the node 464 | mergeResult = OSMnx_graph(G1) 465 | nodeFinal = mergeResult[0] 466 | rawEdgeFinal = mergeResult[1] 467 | print ("len(rawEdgeFinal)",len(rawEdgeFinal)) 468 | rawEdgeFinal = clearSameNodeEdge(rawEdgeFinal) 469 | print ("len(rawEdgeFina)",len(rawEdgeFinal)) 470 | #test whether it is ok to sample, edge num > 1.26 node num 471 | realEdgeFinal = [(rawEdgeFinal[p][0],rawEdgeFinal[p][1]) for p in range(len(rawEdgeFinal))] 472 | realEdgeFianl = list(set(realEdgeFinal)) 473 | if len(realEdgeFianl) > 1.26*len(nodeFinal): 474 | edgeFinal = sample(nodeFinal,rawEdgeFinal) 475 | subfile = filename + str(j) +'nodes'+'.json' 476 | nodefile = open(subfile,'w') 477 | nodes = nodes2dict(nodeFinal) 478 | json.dump(nodes,nodefile) 479 | nodefile.close() 480 | 481 | # save edges as a json file 482 | subfile = filename + str(j) +'edges'+'.json' 483 | edgefile = open(subfile,'w') 484 | edges = edges2dict(edgeFinal) 485 | json.dump(edges,edgefile) 486 | edgefile.close() 487 | countReal += 1 488 | print ("count",count," countReal",countReal) 489 | stop1 = timeit.default_timer() 490 | print('running time per iteration:', stop1 - start1) 491 | stop2 = timeit.default_timer() 492 | print('running time until now:', stop2 - start0) 493 | print ("========================================================") 494 | stop0 = timeit.default_timer() 495 | print('running time per iteration:', stop0 - start0) 496 | 497 | 498 | # In[ ]: 499 | 500 | 501 | 502 | 503 | -------------------------------------------------------------------------------- /data-collection/test.rar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiang719/road-network-predictability/35a83b7053fb7e220918e32232206fa7c20cb2a6/data-collection/test.rar -------------------------------------------------------------------------------- /data-collection/train.rar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiang719/road-network-predictability/35a83b7053fb7e220918e32232206fa7c20cb2a6/data-collection/train.rar -------------------------------------------------------------------------------- /data-collection/world_city_20200715.txt: -------------------------------------------------------------------------------- 1 | 1 New york USA 40.73 -73.94 20 300 100 2 | 2 Los angeles USA 34.04 -118.24 20 300 100 3 | 3 Chicago USA 41.87 -87.68 20 300 100 4 | 4 Houston USA 29.75 -95.36 20 300 100 5 | 5 Philadelphia USA 39.95 -75.16 20 300 100 6 | 6 Phoenix USA 33.45 -112.07 20 300 100 7 | 7 San diego USA 32.73 -117.11 20 300 100 8 | 8 San antonio USA 29.42 -98.49 20 300 100 9 | 9 Dallas USA 32.78 -96.8 20 300 100 10 | 10 Detroit USA 42.38 -83.09 20 300 100 11 | 11 Tokyo Japan 35.69 139.76 20 300 100 12 | 12 Osaka Japan 34.67 135.5 20 300 100 13 | 13 Beijing China 39.91 116.4 20 300 100 14 | 14 Shanghai China 31.23 121.48 20 300 100 15 | 15 Guangzhou China 23.12 113.28 20 300 100 16 | 16 Shenzhen China 22.54 113.98 20 300 100 17 | 17 Hongkong Hongkong 22.31 114.18 20 300 100 18 | 18 Singapore Singapore 1.34 103.86 20 300 100 19 | 19 Seoul South Korea 37.54 126.98 20 300 100 20 | 20 Delhi India 28.64 77.21 20 300 100 21 | 21 London England 51.51 -0.13 20 300 100 22 | 22 Paris France 48.86 2.34 20 300 100 23 | 23 Moscow Russia 55.75 37.62 20 300 100 24 | 24 Amsterdam Netherland 52.38 4.9 20 300 100 25 | 25 Milan Italy 45.47 9.18 20 300 100 26 | 26 Madrid Spain 40.42 -3.7 20 300 100 27 | 27 Brussels Belgian 50.85 4.36 20 300 100 28 | 28 Berlin Germany 52.52 13.4 20 300 100 29 | 29 Munich Germany 48.14 11.58 20 300 100 30 | 30 Stockholm Sweden 59.33 18.06 20 300 100 31 | -------------------------------------------------------------------------------- /inter-city-network-homogeneity/cross_f1score_shifted.json: -------------------------------------------------------------------------------- 1 | { 2 | "Amsterdam": { 3 | "Amsterdam": 0.2837, 4 | "Beijing": 0.3137, 5 | "Berlin": 0.3201, 6 | "Brussels": 0.3144, 7 | "Chicago": 0.4321, 8 | "Dallas": 0.3979, 9 | "Delhi": 0.2402, 10 | "Detroit": 0.3763, 11 | "Guangzhou": 0.3117, 12 | "Hongkong": 0.2992, 13 | "Houston": 0.3941, 14 | "London": 0.2864, 15 | "Los angeles": 0.35, 16 | "Madrid": 0.2849, 17 | "Milan": 0.3124, 18 | "Moscow": 0.226, 19 | "Munich": 0.3424, 20 | "New york": 0.3789, 21 | "Osaka": 0.2333, 22 | "Paris": 0.3048, 23 | "Philadelphia": 0.3534, 24 | "Phoenix": 0.3562, 25 | "San antonio": 0.4099, 26 | "San diego": 0.3867, 27 | "Seoul": 0.2469, 28 | "Shanghai": 0.3175, 29 | "Shenzhen": 0.2747, 30 | "Singapore": 0.3364, 31 | "Stockholm": 0.2857, 32 | "Tokyo": 0.2322 33 | }, 34 | "Beijing": { 35 | "Amsterdam": 0.1733, 36 | "Beijing": 0.3686, 37 | "Berlin": 0.3319, 38 | "Brussels": 0.2116, 39 | "Chicago": 0.4655, 40 | "Dallas": 0.3502, 41 | "Delhi": 0.1473, 42 | "Detroit": 0.3466, 43 | "Guangzhou": 0.327, 44 | "Hongkong": 0.2698, 45 | "Houston": 0.363, 46 | "London": 0.198, 47 | "Los angeles": 0.348, 48 | "Madrid": 0.1976, 49 | "Milan": 0.2275, 50 | "Moscow": 0.2783, 51 | "Munich": 0.2763, 52 | "New york": 0.2894, 53 | "Osaka": 0.1138, 54 | "Paris": 0.2024, 55 | "Philadelphia": 0.2358, 56 | "Phoenix": 0.3891, 57 | "San antonio": 0.3874, 58 | "San diego": 0.3378, 59 | "Seoul": 0.1245, 60 | "Shanghai": 0.3347, 61 | "Shenzhen": 0.3028, 62 | "Singapore": 0.3472, 63 | "Stockholm": 0.181, 64 | "Tokyo": 0.1175 65 | }, 66 | "Berlin": { 67 | "Amsterdam": 0.2611, 68 | "Beijing": 0.3863, 69 | "Berlin": 0.3686, 70 | "Brussels": 0.3466, 71 | "Chicago": 0.5508, 72 | "Dallas": 0.4749, 73 | "Delhi": 0.2266, 74 | "Detroit": 0.4563, 75 | "Guangzhou": 0.3467, 76 | "Hongkong": 0.2874, 77 | "Houston": 0.4371, 78 | "London": 0.2708, 79 | "Los angeles": 0.4541, 80 | "Madrid": 0.2807, 81 | "Milan": 0.3176, 82 | "Moscow": 0.4223, 83 | "Munich": 0.3813, 84 | "New york": 0.4162, 85 | "Osaka": 0.1653, 86 | "Paris": 0.2977, 87 | "Philadelphia": 0.3342, 88 | "Phoenix": 0.4446, 89 | "San antonio": 0.4586, 90 | "San diego": 0.3962, 91 | "Seoul": 0.1945, 92 | "Shanghai": 0.4128, 93 | "Shenzhen": 0.332, 94 | "Singapore": 0.3789, 95 | "Stockholm": 0.2741, 96 | "Tokyo": 0.1636 97 | }, 98 | "Brussels": { 99 | "Amsterdam": 0.3126, 100 | "Beijing": 0.3707, 101 | "Berlin": 0.4181, 102 | "Brussels": 0.3865, 103 | "Chicago": 0.5383, 104 | "Dallas": 0.4575, 105 | "Delhi": 0.2712, 106 | "Detroit": 0.4908, 107 | "Guangzhou": 0.3169, 108 | "Hongkong": 0.2764, 109 | "Houston": 0.4734, 110 | "London": 0.3033, 111 | "Los angeles": 0.4585, 112 | "Madrid": 0.3387, 113 | "Milan": 0.3682, 114 | "Moscow": 0.3789, 115 | "Munich": 0.4257, 116 | "New york": 0.4757, 117 | "Osaka": 0.2609, 118 | "Paris": 0.3496, 119 | "Philadelphia": 0.4244, 120 | "Phoenix": 0.4345, 121 | "San antonio": 0.4683, 122 | "San diego": 0.4396, 123 | "Seoul": 0.2901, 124 | "Shanghai": 0.382, 125 | "Shenzhen": 0.3628, 126 | "Singapore": 0.3775, 127 | "Stockholm": 0.312, 128 | "Tokyo": 0.2578 129 | }, 130 | "Chicago": { 131 | "Amsterdam": 0.2758, 132 | "Beijing": 0.3975, 133 | "Berlin": 0.4156, 134 | "Brussels": 0.3374, 135 | "Chicago": 0.6792, 136 | "Dallas": 0.4828, 137 | "Delhi": 0.243, 138 | "Detroit": 0.5066, 139 | "Guangzhou": 0.3618, 140 | "Hongkong": 0.3038, 141 | "Houston": 0.5184, 142 | "London": 0.2998, 143 | "Los angeles": 0.5322, 144 | "Madrid": 0.3158, 145 | "Milan": 0.3541, 146 | "Moscow": 0.36, 147 | "Munich": 0.4209, 148 | "New york": 0.4604, 149 | "Osaka": 0.2992, 150 | "Paris": 0.3041, 151 | "Philadelphia": 0.4094, 152 | "Phoenix": 0.4858, 153 | "San antonio": 0.5221, 154 | "San diego": 0.4777, 155 | "Seoul": 0.2746, 156 | "Shanghai": 0.3981, 157 | "Shenzhen": 0.3191, 158 | "Singapore": 0.3687, 159 | "Stockholm": 0.2726, 160 | "Tokyo": 0.2953 161 | }, 162 | "Dallas": { 163 | "Amsterdam": 0.3391, 164 | "Beijing": 0.4089, 165 | "Berlin": 0.436, 166 | "Brussels": 0.3883, 167 | "Chicago": 0.5947, 168 | "Dallas": 0.5053, 169 | "Delhi": 0.2792, 170 | "Detroit": 0.5304, 171 | "Guangzhou": 0.3549, 172 | "Hongkong": 0.3122, 173 | "Houston": 0.4846, 174 | "London": 0.3232, 175 | "Los angeles": 0.496, 176 | "Madrid": 0.3522, 177 | "Milan": 0.3781, 178 | "Moscow": 0.4004, 179 | "Munich": 0.4413, 180 | "New york": 0.5019, 181 | "Osaka": 0.2506, 182 | "Paris": 0.3452, 183 | "Philadelphia": 0.4365, 184 | "Phoenix": 0.4687, 185 | "San antonio": 0.4976, 186 | "San diego": 0.4731, 187 | "Seoul": 0.2829, 188 | "Shanghai": 0.4106, 189 | "Shenzhen": 0.3592, 190 | "Singapore": 0.397, 191 | "Stockholm": 0.336, 192 | "Tokyo": 0.255 193 | }, 194 | "Delhi": { 195 | "Amsterdam": 0.3443, 196 | "Beijing": 0.3615, 197 | "Berlin": 0.3856, 198 | "Brussels": 0.3818, 199 | "Chicago": 0.5197, 200 | "Dallas": 0.463, 201 | "Delhi": 0.329, 202 | "Detroit": 0.4713, 203 | "Guangzhou": 0.3247, 204 | "Hongkong": 0.2823, 205 | "Houston": 0.4802, 206 | "London": 0.3368, 207 | "Los angeles": 0.4378, 208 | "Madrid": 0.3736, 209 | "Milan": 0.3667, 210 | "Moscow": 0.3764, 211 | "Munich": 0.4294, 212 | "New york": 0.4938, 213 | "Osaka": 0.3492, 214 | "Paris": 0.3731, 215 | "Philadelphia": 0.4653, 216 | "Phoenix": 0.4139, 217 | "San antonio": 0.4663, 218 | "San diego": 0.4145, 219 | "Seoul": 0.3661, 220 | "Shanghai": 0.3542, 221 | "Shenzhen": 0.3306, 222 | "Singapore": 0.3593, 223 | "Stockholm": 0.3171, 224 | "Tokyo": 0.36 225 | }, 226 | "Detroit": { 227 | "Amsterdam": 0.3036, 228 | "Beijing": 0.4083, 229 | "Berlin": 0.4512, 230 | "Brussels": 0.3963, 231 | "Chicago": 0.6494, 232 | "Dallas": 0.5191, 233 | "Delhi": 0.2839, 234 | "Detroit": 0.5704, 235 | "Guangzhou": 0.3641, 236 | "Hongkong": 0.303, 237 | "Houston": 0.5174, 238 | "London": 0.3147, 239 | "Los angeles": 0.5196, 240 | "Madrid": 0.3372, 241 | "Milan": 0.3594, 242 | "Moscow": 0.4371, 243 | "Munich": 0.4468, 244 | "New york": 0.5248, 245 | "Osaka": 0.26, 246 | "Paris": 0.3552, 247 | "Philadelphia": 0.4248, 248 | "Phoenix": 0.4746, 249 | "San antonio": 0.4858, 250 | "San diego": 0.4905, 251 | "Seoul": 0.29, 252 | "Shanghai": 0.4622, 253 | "Shenzhen": 0.3632, 254 | "Singapore": 0.3555, 255 | "Stockholm": 0.351, 256 | "Tokyo": 0.2682 257 | }, 258 | "Guangzhou": { 259 | "Amsterdam": 0.1379, 260 | "Beijing": 0.3049, 261 | "Berlin": 0.2746, 262 | "Brussels": 0.2011, 263 | "Chicago": 0.3567, 264 | "Dallas": 0.2695, 265 | "Delhi": 0.1389, 266 | "Detroit": 0.3034, 267 | "Guangzhou": 0.3293, 268 | "Hongkong": 0.2588, 269 | "Houston": 0.2914, 270 | "London": 0.1402, 271 | "Los angeles": 0.2849, 272 | "Madrid": 0.1434, 273 | "Milan": 0.1342, 274 | "Moscow": 0.2847, 275 | "Munich": 0.2544, 276 | "New york": 0.2377, 277 | "Osaka": 0.0659, 278 | "Paris": 0.1776, 279 | "Philadelphia": 0.1969, 280 | "Phoenix": 0.307, 281 | "San antonio": 0.281, 282 | "San diego": 0.2681, 283 | "Seoul": 0.1037, 284 | "Shanghai": 0.3598, 285 | "Shenzhen": 0.2335, 286 | "Singapore": 0.3191, 287 | "Stockholm": 0.1735, 288 | "Tokyo": 0.0699 289 | }, 290 | "Hongkong": { 291 | "Amsterdam": 0.1093, 292 | "Beijing": 0.1997, 293 | "Berlin": 0.1496, 294 | "Brussels": 0.1284, 295 | "Chicago": 0.2251, 296 | "Dallas": 0.1832, 297 | "Delhi": 0.1141, 298 | "Detroit": 0.1883, 299 | "Guangzhou": 0.2635, 300 | "Hongkong": 0.2333, 301 | "Houston": 0.2022, 302 | "London": 0.1217, 303 | "Los angeles": 0.1837, 304 | "Madrid": 0.1381, 305 | "Milan": 0.1306, 306 | "Moscow": 0.1173, 307 | "Munich": 0.1183, 308 | "New york": 0.1758, 309 | "Osaka": 0.1398, 310 | "Paris": 0.1247, 311 | "Philadelphia": 0.1479, 312 | "Phoenix": 0.197, 313 | "San antonio": 0.1853, 314 | "San diego": 0.1985, 315 | "Seoul": 0.1088, 316 | "Shanghai": 0.2041, 317 | "Shenzhen": 0.2128, 318 | "Singapore": 0.2897, 319 | "Stockholm": 0.115, 320 | "Tokyo": 0.1545 321 | }, 322 | "Houston": { 323 | "Amsterdam": 0.3533, 324 | "Beijing": 0.3964, 325 | "Berlin": 0.4472, 326 | "Brussels": 0.4089, 327 | "Chicago": 0.614, 328 | "Dallas": 0.5092, 329 | "Delhi": 0.3105, 330 | "Detroit": 0.5354, 331 | "Guangzhou": 0.3578, 332 | "Hongkong": 0.3175, 333 | "Houston": 0.5418, 334 | "London": 0.3659, 335 | "Los angeles": 0.5141, 336 | "Madrid": 0.378, 337 | "Milan": 0.3873, 338 | "Moscow": 0.3856, 339 | "Munich": 0.4641, 340 | "New york": 0.5452, 341 | "Osaka": 0.2999, 342 | "Paris": 0.3661, 343 | "Philadelphia": 0.4778, 344 | "Phoenix": 0.4435, 345 | "San antonio": 0.5306, 346 | "San diego": 0.5057, 347 | "Seoul": 0.3274, 348 | "Shanghai": 0.4184, 349 | "Shenzhen": 0.3598, 350 | "Singapore": 0.3832, 351 | "Stockholm": 0.3575, 352 | "Tokyo": 0.3132 353 | }, 354 | "London": { 355 | "Amsterdam": 0.3511, 356 | "Beijing": 0.3687, 357 | "Berlin": 0.4177, 358 | "Brussels": 0.4242, 359 | "Chicago": 0.5468, 360 | "Dallas": 0.5102, 361 | "Delhi": 0.3351, 362 | "Detroit": 0.5059, 363 | "Guangzhou": 0.3318, 364 | "Hongkong": 0.3075, 365 | "Houston": 0.4886, 366 | "London": 0.3791, 367 | "Los angeles": 0.4712, 368 | "Madrid": 0.386, 369 | "Milan": 0.4062, 370 | "Moscow": 0.3516, 371 | "Munich": 0.452, 372 | "New york": 0.522, 373 | "Osaka": 0.3499, 374 | "Paris": 0.3964, 375 | "Philadelphia": 0.5147, 376 | "Phoenix": 0.4493, 377 | "San antonio": 0.5069, 378 | "San diego": 0.4446, 379 | "Seoul": 0.3678, 380 | "Shanghai": 0.3566, 381 | "Shenzhen": 0.3316, 382 | "Singapore": 0.3833, 383 | "Stockholm": 0.3617, 384 | "Tokyo": 0.3513 385 | }, 386 | "Los angeles": { 387 | "Amsterdam": 0.3198, 388 | "Beijing": 0.4064, 389 | "Berlin": 0.4359, 390 | "Brussels": 0.404, 391 | "Chicago": 0.6586, 392 | "Dallas": 0.5313, 393 | "Delhi": 0.2714, 394 | "Detroit": 0.5455, 395 | "Guangzhou": 0.364, 396 | "Hongkong": 0.308, 397 | "Houston": 0.5418, 398 | "London": 0.3034, 399 | "Los angeles": 0.5326, 400 | "Madrid": 0.3518, 401 | "Milan": 0.3605, 402 | "Moscow": 0.3887, 403 | "Munich": 0.4457, 404 | "New york": 0.5259, 405 | "Osaka": 0.2606, 406 | "Paris": 0.3512, 407 | "Philadelphia": 0.4481, 408 | "Phoenix": 0.4882, 409 | "San antonio": 0.5397, 410 | "San diego": 0.5104, 411 | "Seoul": 0.2811, 412 | "Shanghai": 0.4102, 413 | "Shenzhen": 0.3458, 414 | "Singapore": 0.3902, 415 | "Stockholm": 0.3102, 416 | "Tokyo": 0.2683 417 | }, 418 | "Madrid": { 419 | "Amsterdam": 0.3557, 420 | "Beijing": 0.3805, 421 | "Berlin": 0.449, 422 | "Brussels": 0.4205, 423 | "Chicago": 0.5711, 424 | "Dallas": 0.5191, 425 | "Delhi": 0.3322, 426 | "Detroit": 0.5219, 427 | "Guangzhou": 0.3182, 428 | "Hongkong": 0.2802, 429 | "Houston": 0.5078, 430 | "London": 0.371, 431 | "Los angeles": 0.4876, 432 | "Madrid": 0.3924, 433 | "Milan": 0.4007, 434 | "Moscow": 0.4245, 435 | "Munich": 0.4721, 436 | "New york": 0.5342, 437 | "Osaka": 0.3188, 438 | "Paris": 0.3839, 439 | "Philadelphia": 0.4944, 440 | "Phoenix": 0.4586, 441 | "San antonio": 0.5279, 442 | "San diego": 0.4564, 443 | "Seoul": 0.355, 444 | "Shanghai": 0.3812, 445 | "Shenzhen": 0.3582, 446 | "Singapore": 0.3649, 447 | "Stockholm": 0.3407, 448 | "Tokyo": 0.3306 449 | }, 450 | "Milan": { 451 | "Amsterdam": 0.3165, 452 | "Beijing": 0.3756, 453 | "Berlin": 0.4076, 454 | "Brussels": 0.3643, 455 | "Chicago": 0.5446, 456 | "Dallas": 0.4588, 457 | "Delhi": 0.2624, 458 | "Detroit": 0.4833, 459 | "Guangzhou": 0.3277, 460 | "Hongkong": 0.2968, 461 | "Houston": 0.4704, 462 | "London": 0.2959, 463 | "Los angeles": 0.4506, 464 | "Madrid": 0.3268, 465 | "Milan": 0.3448, 466 | "Moscow": 0.347, 467 | "Munich": 0.4168, 468 | "New york": 0.4587, 469 | "Osaka": 0.22, 470 | "Paris": 0.3504, 471 | "Philadelphia": 0.4137, 472 | "Phoenix": 0.4002, 473 | "San antonio": 0.4792, 474 | "San diego": 0.4422, 475 | "Seoul": 0.278, 476 | "Shanghai": 0.4186, 477 | "Shenzhen": 0.3439, 478 | "Singapore": 0.3832, 479 | "Stockholm": 0.3165, 480 | "Tokyo": 0.2283 481 | }, 482 | "Moscow": { 483 | "Amsterdam": 0.1359, 484 | "Beijing": 0.2815, 485 | "Berlin": 0.2574, 486 | "Brussels": 0.1875, 487 | "Chicago": 0.3186, 488 | "Dallas": 0.2888, 489 | "Delhi": 0.1633, 490 | "Detroit": 0.2614, 491 | "Guangzhou": 0.3109, 492 | "Hongkong": 0.2779, 493 | "Houston": 0.2582, 494 | "London": 0.1589, 495 | "Los angeles": 0.2655, 496 | "Madrid": 0.1713, 497 | "Milan": 0.1886, 498 | "Moscow": 0.2342, 499 | "Munich": 0.2335, 500 | "New york": 0.2492, 501 | "Osaka": 0.142, 502 | "Paris": 0.1701, 503 | "Philadelphia": 0.195, 504 | "Phoenix": 0.313, 505 | "San antonio": 0.2844, 506 | "San diego": 0.2812, 507 | "Seoul": 0.1408, 508 | "Shanghai": 0.3239, 509 | "Shenzhen": 0.2263, 510 | "Singapore": 0.3388, 511 | "Stockholm": 0.1452, 512 | "Tokyo": 0.1388 513 | }, 514 | "Munich": { 515 | "Amsterdam": 0.2973, 516 | "Beijing": 0.3691, 517 | "Berlin": 0.4137, 518 | "Brussels": 0.3568, 519 | "Chicago": 0.5472, 520 | "Dallas": 0.4535, 521 | "Delhi": 0.2518, 522 | "Detroit": 0.4811, 523 | "Guangzhou": 0.3415, 524 | "Hongkong": 0.2887, 525 | "Houston": 0.4573, 526 | "London": 0.3036, 527 | "Los angeles": 0.456, 528 | "Madrid": 0.318, 529 | "Milan": 0.3472, 530 | "Moscow": 0.4074, 531 | "Munich": 0.4289, 532 | "New york": 0.4442, 533 | "Osaka": 0.192, 534 | "Paris": 0.327, 535 | "Philadelphia": 0.3967, 536 | "Phoenix": 0.4315, 537 | "San antonio": 0.4726, 538 | "San diego": 0.4235, 539 | "Seoul": 0.2428, 540 | "Shanghai": 0.3756, 541 | "Shenzhen": 0.3502, 542 | "Singapore": 0.3834, 543 | "Stockholm": 0.312, 544 | "Tokyo": 0.21 545 | }, 546 | "New york": { 547 | "Amsterdam": 0.3705, 548 | "Beijing": 0.4199, 549 | "Berlin": 0.5005, 550 | "Brussels": 0.4544, 551 | "Chicago": 0.6445, 552 | "Dallas": 0.5453, 553 | "Delhi": 0.3397, 554 | "Detroit": 0.5698, 555 | "Guangzhou": 0.3637, 556 | "Hongkong": 0.3158, 557 | "Houston": 0.5578, 558 | "London": 0.3852, 559 | "Los angeles": 0.5296, 560 | "Madrid": 0.4202, 561 | "Milan": 0.4242, 562 | "Moscow": 0.4151, 563 | "Munich": 0.4877, 564 | "New york": 0.5853, 565 | "Osaka": 0.3441, 566 | "Paris": 0.4172, 567 | "Philadelphia": 0.5243, 568 | "Phoenix": 0.4743, 569 | "San antonio": 0.5641, 570 | "San diego": 0.5246, 571 | "Seoul": 0.3703, 572 | "Shanghai": 0.4128, 573 | "Shenzhen": 0.3722, 574 | "Singapore": 0.3915, 575 | "Stockholm": 0.3755, 576 | "Tokyo": 0.3608 577 | }, 578 | "Osaka": { 579 | "Amsterdam": 0.3591, 580 | "Beijing": 0.3649, 581 | "Berlin": 0.4386, 582 | "Brussels": 0.4099, 583 | "Chicago": 0.5459, 584 | "Dallas": 0.4784, 585 | "Delhi": 0.3417, 586 | "Detroit": 0.4857, 587 | "Guangzhou": 0.3165, 588 | "Hongkong": 0.2992, 589 | "Houston": 0.5035, 590 | "London": 0.3716, 591 | "Los angeles": 0.4629, 592 | "Madrid": 0.4245, 593 | "Milan": 0.3908, 594 | "Moscow": 0.381, 595 | "Munich": 0.4458, 596 | "New york": 0.5126, 597 | "Osaka": 0.4267, 598 | "Paris": 0.3979, 599 | "Philadelphia": 0.5072, 600 | "Phoenix": 0.4287, 601 | "San antonio": 0.5308, 602 | "San diego": 0.4477, 603 | "Seoul": 0.4126, 604 | "Shanghai": 0.3814, 605 | "Shenzhen": 0.341, 606 | "Singapore": 0.3714, 607 | "Stockholm": 0.3375, 608 | "Tokyo": 0.4454 609 | }, 610 | "Paris": { 611 | "Amsterdam": 0.3637, 612 | "Beijing": 0.3906, 613 | "Berlin": 0.4591, 614 | "Brussels": 0.4231, 615 | "Chicago": 0.5552, 616 | "Dallas": 0.5093, 617 | "Delhi": 0.3177, 618 | "Detroit": 0.5298, 619 | "Guangzhou": 0.3375, 620 | "Hongkong": 0.2829, 621 | "Houston": 0.5085, 622 | "London": 0.3551, 623 | "Los angeles": 0.4888, 624 | "Madrid": 0.3853, 625 | "Milan": 0.3986, 626 | "Moscow": 0.3834, 627 | "Munich": 0.4603, 628 | "New york": 0.5331, 629 | "Osaka": 0.3002, 630 | "Paris": 0.4102, 631 | "Philadelphia": 0.49, 632 | "Phoenix": 0.4359, 633 | "San antonio": 0.5214, 634 | "San diego": 0.4539, 635 | "Seoul": 0.3381, 636 | "Shanghai": 0.386, 637 | "Shenzhen": 0.3697, 638 | "Singapore": 0.3831, 639 | "Stockholm": 0.3279, 640 | "Tokyo": 0.3126 641 | }, 642 | "Philadelphia": { 643 | "Amsterdam": 0.3876, 644 | "Beijing": 0.408, 645 | "Berlin": 0.4657, 646 | "Brussels": 0.4514, 647 | "Chicago": 0.6231, 648 | "Dallas": 0.5477, 649 | "Delhi": 0.3486, 650 | "Detroit": 0.5675, 651 | "Guangzhou": 0.3468, 652 | "Hongkong": 0.2914, 653 | "Houston": 0.5427, 654 | "London": 0.3979, 655 | "Los angeles": 0.5205, 656 | "Madrid": 0.4267, 657 | "Milan": 0.4322, 658 | "Moscow": 0.418, 659 | "Munich": 0.4854, 660 | "New york": 0.5766, 661 | "Osaka": 0.3884, 662 | "Paris": 0.4213, 663 | "Philadelphia": 0.5497, 664 | "Phoenix": 0.4765, 665 | "San antonio": 0.5661, 666 | "San diego": 0.4907, 667 | "Seoul": 0.3917, 668 | "Shanghai": 0.4076, 669 | "Shenzhen": 0.3719, 670 | "Singapore": 0.3859, 671 | "Stockholm": 0.3763, 672 | "Tokyo": 0.4077 673 | }, 674 | "Phoenix": { 675 | "Amsterdam": 0.2576, 676 | "Beijing": 0.3797, 677 | "Berlin": 0.3543, 678 | "Brussels": 0.3046, 679 | "Chicago": 0.5357, 680 | "Dallas": 0.4292, 681 | "Delhi": 0.2221, 682 | "Detroit": 0.4266, 683 | "Guangzhou": 0.3475, 684 | "Hongkong": 0.2682, 685 | "Houston": 0.4044, 686 | "London": 0.2519, 687 | "Los angeles": 0.4294, 688 | "Madrid": 0.2553, 689 | "Milan": 0.2876, 690 | "Moscow": 0.3389, 691 | "Munich": 0.3708, 692 | "New york": 0.3915, 693 | "Osaka": 0.1943, 694 | "Paris": 0.2658, 695 | "Philadelphia": 0.3066, 696 | "Phoenix": 0.4347, 697 | "San antonio": 0.4332, 698 | "San diego": 0.4087, 699 | "Seoul": 0.2237, 700 | "Shanghai": 0.345, 701 | "Shenzhen": 0.3095, 702 | "Singapore": 0.3677, 703 | "Stockholm": 0.2539, 704 | "Tokyo": 0.2066 705 | }, 706 | "San antonio": { 707 | "Amsterdam": 0.3491, 708 | "Beijing": 0.3925, 709 | "Berlin": 0.4318, 710 | "Brussels": 0.4225, 711 | "Chicago": 0.6018, 712 | "Dallas": 0.5145, 713 | "Delhi": 0.2971, 714 | "Detroit": 0.5182, 715 | "Guangzhou": 0.3707, 716 | "Hongkong": 0.3278, 717 | "Houston": 0.5329, 718 | "London": 0.3344, 719 | "Los angeles": 0.505, 720 | "Madrid": 0.3692, 721 | "Milan": 0.3958, 722 | "Moscow": 0.4061, 723 | "Munich": 0.4592, 724 | "New york": 0.5127, 725 | "Osaka": 0.3219, 726 | "Paris": 0.3667, 727 | "Philadelphia": 0.4601, 728 | "Phoenix": 0.4743, 729 | "San antonio": 0.546, 730 | "San diego": 0.5038, 731 | "Seoul": 0.3176, 732 | "Shanghai": 0.4153, 733 | "Shenzhen": 0.3441, 734 | "Singapore": 0.3746, 735 | "Stockholm": 0.3338, 736 | "Tokyo": 0.3233 737 | }, 738 | "San diego": { 739 | "Amsterdam": 0.2567, 740 | "Beijing": 0.3403, 741 | "Berlin": 0.3603, 742 | "Brussels": 0.3376, 743 | "Chicago": 0.5299, 744 | "Dallas": 0.4391, 745 | "Delhi": 0.2277, 746 | "Detroit": 0.4383, 747 | "Guangzhou": 0.3071, 748 | "Hongkong": 0.3056, 749 | "Houston": 0.4159, 750 | "London": 0.2668, 751 | "Los angeles": 0.4188, 752 | "Madrid": 0.2962, 753 | "Milan": 0.2954, 754 | "Moscow": 0.3325, 755 | "Munich": 0.353, 756 | "New york": 0.3964, 757 | "Osaka": 0.2308, 758 | "Paris": 0.302, 759 | "Philadelphia": 0.3505, 760 | "Phoenix": 0.4105, 761 | "San antonio": 0.4595, 762 | "San diego": 0.3956, 763 | "Seoul": 0.247, 764 | "Shanghai": 0.3376, 765 | "Shenzhen": 0.287, 766 | "Singapore": 0.3529, 767 | "Stockholm": 0.216, 768 | "Tokyo": 0.2237 769 | }, 770 | "Seoul": { 771 | "Amsterdam": 0.3552, 772 | "Beijing": 0.3564, 773 | "Berlin": 0.415, 774 | "Brussels": 0.4115, 775 | "Chicago": 0.525, 776 | "Dallas": 0.4799, 777 | "Delhi": 0.3356, 778 | "Detroit": 0.5001, 779 | "Guangzhou": 0.3251, 780 | "Hongkong": 0.2877, 781 | "Houston": 0.4824, 782 | "London": 0.3717, 783 | "Los angeles": 0.4571, 784 | "Madrid": 0.4036, 785 | "Milan": 0.3855, 786 | "Moscow": 0.3886, 787 | "Munich": 0.4481, 788 | "New york": 0.5162, 789 | "Osaka": 0.3826, 790 | "Paris": 0.3981, 791 | "Philadelphia": 0.51, 792 | "Phoenix": 0.4109, 793 | "San antonio": 0.4902, 794 | "San diego": 0.4515, 795 | "Seoul": 0.3955, 796 | "Shanghai": 0.3833, 797 | "Shenzhen": 0.3439, 798 | "Singapore": 0.3644, 799 | "Stockholm": 0.3549, 800 | "Tokyo": 0.4033 801 | }, 802 | "Shanghai": { 803 | "Amsterdam": 0.15, 804 | "Beijing": 0.263, 805 | "Berlin": 0.2355, 806 | "Brussels": 0.1911, 807 | "Chicago": 0.3058, 808 | "Dallas": 0.2437, 809 | "Delhi": 0.1371, 810 | "Detroit": 0.268, 811 | "Guangzhou": 0.3149, 812 | "Hongkong": 0.2621, 813 | "Houston": 0.2545, 814 | "London": 0.1158, 815 | "Los angeles": 0.2756, 816 | "Madrid": 0.149, 817 | "Milan": 0.1809, 818 | "Moscow": 0.2793, 819 | "Munich": 0.2144, 820 | "New york": 0.2313, 821 | "Osaka": 0.0867, 822 | "Paris": 0.159, 823 | "Philadelphia": 0.1769, 824 | "Phoenix": 0.2486, 825 | "San antonio": 0.2443, 826 | "San diego": 0.2379, 827 | "Seoul": 0.1104, 828 | "Shanghai": 0.3253, 829 | "Shenzhen": 0.2366, 830 | "Singapore": 0.3375, 831 | "Stockholm": 0.175, 832 | "Tokyo": 0.0849 833 | }, 834 | "Shenzhen": { 835 | "Amsterdam": 0.2329, 836 | "Beijing": 0.3305, 837 | "Berlin": 0.3506, 838 | "Brussels": 0.2725, 839 | "Chicago": 0.4344, 840 | "Dallas": 0.3519, 841 | "Delhi": 0.1932, 842 | "Detroit": 0.3824, 843 | "Guangzhou": 0.3258, 844 | "Hongkong": 0.2774, 845 | "Houston": 0.3274, 846 | "London": 0.2282, 847 | "Los angeles": 0.3761, 848 | "Madrid": 0.2552, 849 | "Milan": 0.2545, 850 | "Moscow": 0.2867, 851 | "Munich": 0.3079, 852 | "New york": 0.3323, 853 | "Osaka": 0.2561, 854 | "Paris": 0.2541, 855 | "Philadelphia": 0.2818, 856 | "Phoenix": 0.3571, 857 | "San antonio": 0.3662, 858 | "San diego": 0.3433, 859 | "Seoul": 0.2407, 860 | "Shanghai": 0.3797, 861 | "Shenzhen": 0.3103, 862 | "Singapore": 0.3444, 863 | "Stockholm": 0.2147, 864 | "Tokyo": 0.2503 865 | }, 866 | "Singapore": { 867 | "Amsterdam": 0.137, 868 | "Beijing": 0.25, 869 | "Berlin": 0.1814, 870 | "Brussels": 0.1416, 871 | "Chicago": 0.2552, 872 | "Dallas": 0.2004, 873 | "Delhi": 0.1106, 874 | "Detroit": 0.2552, 875 | "Guangzhou": 0.3097, 876 | "Hongkong": 0.2525, 877 | "Houston": 0.2082, 878 | "London": 0.1337, 879 | "Los angeles": 0.2248, 880 | "Madrid": 0.1644, 881 | "Milan": 0.1387, 882 | "Moscow": 0.2368, 883 | "Munich": 0.1721, 884 | "New york": 0.1934, 885 | "Osaka": 0.1725, 886 | "Paris": 0.1612, 887 | "Philadelphia": 0.1726, 888 | "Phoenix": 0.2073, 889 | "San antonio": 0.2218, 890 | "San diego": 0.2535, 891 | "Seoul": 0.1559, 892 | "Shanghai": 0.261, 893 | "Shenzhen": 0.1705, 894 | "Singapore": 0.3131, 895 | "Stockholm": 0.1531, 896 | "Tokyo": 0.1577 897 | }, 898 | "Stockholm": { 899 | "Amsterdam": 0.2298, 900 | "Beijing": 0.3031, 901 | "Berlin": 0.3055, 902 | "Brussels": 0.2807, 903 | "Chicago": 0.4112, 904 | "Dallas": 0.3661, 905 | "Delhi": 0.1953, 906 | "Detroit": 0.3916, 907 | "Guangzhou": 0.3163, 908 | "Hongkong": 0.2719, 909 | "Houston": 0.3708, 910 | "London": 0.2563, 911 | "Los angeles": 0.382, 912 | "Madrid": 0.2511, 913 | "Milan": 0.255, 914 | "Moscow": 0.2887, 915 | "Munich": 0.3284, 916 | "New york": 0.3479, 917 | "Osaka": 0.197, 918 | "Paris": 0.2626, 919 | "Philadelphia": 0.3061, 920 | "Phoenix": 0.3516, 921 | "San antonio": 0.3768, 922 | "San diego": 0.3394, 923 | "Seoul": 0.2164, 924 | "Shanghai": 0.2825, 925 | "Shenzhen": 0.2617, 926 | "Singapore": 0.3269, 927 | "Stockholm": 0.2192, 928 | "Tokyo": 0.2045 929 | }, 930 | "Tokyo": { 931 | "Amsterdam": 0.351, 932 | "Beijing": 0.3586, 933 | "Berlin": 0.4054, 934 | "Brussels": 0.4, 935 | "Chicago": 0.5001, 936 | "Dallas": 0.4676, 937 | "Delhi": 0.346, 938 | "Detroit": 0.4758, 939 | "Guangzhou": 0.3327, 940 | "Hongkong": 0.2785, 941 | "Houston": 0.4762, 942 | "London": 0.3669, 943 | "Los angeles": 0.437, 944 | "Madrid": 0.4173, 945 | "Milan": 0.3957, 946 | "Moscow": 0.3826, 947 | "Munich": 0.4279, 948 | "New york": 0.4937, 949 | "Osaka": 0.4185, 950 | "Paris": 0.3878, 951 | "Philadelphia": 0.4968, 952 | "Phoenix": 0.4043, 953 | "San antonio": 0.486, 954 | "San diego": 0.4296, 955 | "Seoul": 0.4174, 956 | "Shanghai": 0.3754, 957 | "Shenzhen": 0.3387, 958 | "Singapore": 0.3739, 959 | "Stockholm": 0.3367, 960 | "Tokyo": 0.4354 961 | } 962 | } -------------------------------------------------------------------------------- /intra-city-network-homogeneity/README.md: -------------------------------------------------------------------------------- 1 | # Road network prediction 2 | 3 | This section pertains to the execution of road network prediction. We forecast the missing road segments within an incomplete road network by employing six distinct models, which include Node2vec, Struc2vec, Sprectral GCN, GraphSAGE, Graph Attention Network, and Relational GCN. 4 | 5 | ## Directory Structure 6 | * models: implementation of the six road network prediction models 7 | * trainer: code to train road network prediction models 8 | * tester: code to test road network prediction models 9 | * utils: code to for data loader and analyzing the best threshold 10 | * figure1.defg_result: code to visualize the evaluation and comparison of different models 11 | * shifted_result: Relational GCN model's result on the same cities where the urban networks are slightly displaced. 12 | * large_city_network_result: Relational GCN model's result on the six cities with larger road networks (30*30km). 13 | 14 | ## Training 15 | * To train Node2vec + DistMult 16 | ``` 17 | cd trainer 18 | python node2vec_trainer.py 19 | ``` 20 | * To train Struc2vec + DistMult 21 | ``` 22 | python struc2vec_trainer.py 23 | ``` 24 | * To train Spectral GCN + DistMult 25 | ``` 26 | python spectral_gcn_trainer.py 27 | ``` 28 | * To train GraphSAGE + DistMult 29 | ``` 30 | python graph_sage_trainer.py 31 | ``` 32 | * To train Graph Attention Network + DistMult 33 | ``` 34 | python gat_trainer.py 35 | ``` 36 | * To train Relational GCN + DistMult 37 | ``` 38 | python relational_gcn_trainer.py 39 | ``` 40 | -------------------------------------------------------------------------------- /intra-city-network-homogeneity/large_city_network_result/README.md: -------------------------------------------------------------------------------- 1 | # result link 2 | https://drive.google.com/drive/folders/1YZYJHrO1t7zyCWgiecnvk__cCEB0a7iI?usp=sharing 3 | -------------------------------------------------------------------------------- /intra-city-network-homogeneity/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiang719/road-network-predictability/35a83b7053fb7e220918e32232206fa7c20cb2a6/intra-city-network-homogeneity/models/__init__.py -------------------------------------------------------------------------------- /intra-city-network-homogeneity/models/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiang719/road-network-predictability/35a83b7053fb7e220918e32232206fa7c20cb2a6/intra-city-network-homogeneity/models/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /intra-city-network-homogeneity/models/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiang719/road-network-predictability/35a83b7053fb7e220918e32232206fa7c20cb2a6/intra-city-network-homogeneity/models/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /intra-city-network-homogeneity/models/__pycache__/distmult.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiang719/road-network-predictability/35a83b7053fb7e220918e32232206fa7c20cb2a6/intra-city-network-homogeneity/models/__pycache__/distmult.cpython-37.pyc -------------------------------------------------------------------------------- /intra-city-network-homogeneity/models/__pycache__/distmult.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiang719/road-network-predictability/35a83b7053fb7e220918e32232206fa7c20cb2a6/intra-city-network-homogeneity/models/__pycache__/distmult.cpython-38.pyc -------------------------------------------------------------------------------- /intra-city-network-homogeneity/models/__pycache__/gat.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiang719/road-network-predictability/35a83b7053fb7e220918e32232206fa7c20cb2a6/intra-city-network-homogeneity/models/__pycache__/gat.cpython-37.pyc -------------------------------------------------------------------------------- /intra-city-network-homogeneity/models/__pycache__/graph_sage.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiang719/road-network-predictability/35a83b7053fb7e220918e32232206fa7c20cb2a6/intra-city-network-homogeneity/models/__pycache__/graph_sage.cpython-37.pyc -------------------------------------------------------------------------------- /intra-city-network-homogeneity/models/__pycache__/node2vec.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiang719/road-network-predictability/35a83b7053fb7e220918e32232206fa7c20cb2a6/intra-city-network-homogeneity/models/__pycache__/node2vec.cpython-37.pyc -------------------------------------------------------------------------------- /intra-city-network-homogeneity/models/__pycache__/relational_gcn.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiang719/road-network-predictability/35a83b7053fb7e220918e32232206fa7c20cb2a6/intra-city-network-homogeneity/models/__pycache__/relational_gcn.cpython-37.pyc -------------------------------------------------------------------------------- /intra-city-network-homogeneity/models/__pycache__/relational_gcn.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiang719/road-network-predictability/35a83b7053fb7e220918e32232206fa7c20cb2a6/intra-city-network-homogeneity/models/__pycache__/relational_gcn.cpython-38.pyc -------------------------------------------------------------------------------- /intra-city-network-homogeneity/models/__pycache__/spectral_gcn.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiang719/road-network-predictability/35a83b7053fb7e220918e32232206fa7c20cb2a6/intra-city-network-homogeneity/models/__pycache__/spectral_gcn.cpython-37.pyc -------------------------------------------------------------------------------- /intra-city-network-homogeneity/models/__pycache__/struc2vec.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiang719/road-network-predictability/35a83b7053fb7e220918e32232206fa7c20cb2a6/intra-city-network-homogeneity/models/__pycache__/struc2vec.cpython-37.pyc -------------------------------------------------------------------------------- /intra-city-network-homogeneity/models/distmult.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class DistMult(nn.Module): 6 | def __init__(self, embed_dim): 7 | super(DistMult, self).__init__() 8 | self.embed_dim = embed_dim 9 | self.linear = nn.Linear(embed_dim, embed_dim) 10 | self.weight = nn.Parameter(torch.rand(embed_dim), requires_grad=True) 11 | 12 | def forward(self, start, end): 13 | # [B, 50], [B, 50] 14 | start = torch.tanh(self.linear(start)) 15 | end = torch.tanh(self.linear(end)) 16 | if start.dim() == 2: 17 | score = (start * self.weight).unsqueeze(1) # [B, 1, 50] 18 | score = torch.bmm(score, end.unsqueeze(2)) # [B, 1, 50] x [B, 50, 1] => [B, 1, 1] 19 | score = torch.sigmoid(score.squeeze(2)) 20 | elif start.dim() == 3: 21 | score = torch.bmm(start * self.weight, end.permute(0, 2, 1)) # [B, L, H] x [B, H, L] => [B, L, L] 22 | score = torch.sigmoid(score.unsqueeze(-1)).view(-1, 1) # [B, L, L, 1] => [B x L x L, 1] 23 | return torch.log(torch.cat([1 - score, score], dim=1) + 1e-32) # [B, 2] 24 | -------------------------------------------------------------------------------- /intra-city-network-homogeneity/models/gat.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from models.distmult import DistMult 5 | 6 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 7 | 8 | 9 | class GATLayer(nn.Module): 10 | def __init__(self, input_dim, hidden_dim, label_num, dropout=0.1): 11 | super(GATLayer, self).__init__() 12 | self.hidden_dim = hidden_dim 13 | self.label_num = label_num 14 | self.dropout = dropout 15 | 16 | self.W = nn.Parameter(torch.zeros(size=(input_dim + 2, hidden_dim))) 17 | nn.init.xavier_uniform_(self.W.data) 18 | self.norm = nn.LayerNorm(hidden_dim) 19 | self.a = nn.Parameter(torch.zeros(size=(label_num + 1, 2 * hidden_dim, 1))) 20 | nn.init.xavier_uniform_(self.a.data) 21 | #self.r = nn.Parameter(torch.rand(label_num + 1)) 22 | 23 | def forward(self, inputs): 24 | x, feature, adjs = inputs['x'], inputs['feature'], inputs['adj'] 25 | 26 | x = torch.cat([x, feature], dim=2) 27 | x = torch.matmul(x, self.W) # [B, L, H] 28 | x = self.norm(x) 29 | x = F.dropout(torch.tanh(x), p=self.dropout, training=self.training) 30 | 31 | bsz, num = x.size(0), x.size(1) 32 | h = torch.cat([x.repeat(1, 1, num).view(bsz, num*num, -1), 33 | x.repeat(1, num, 1)], dim=2).view(bsz*num*num, 1, -1) # [BxLxL, 1, 2H] 34 | #h = torch.matmul(h, self.a).squeeze(-1) # [BxLxL, 1] 35 | h = torch.cat([torch.matmul(h, self.a[i]).squeeze(-1) 36 | for i in range(self.label_num + 1)], dim=1) # [BxLxL, label_num+1] 37 | h = h.view(bsz, num, num, -1).permute(0, 3, 1, 2) # [B, label_num+1, L, L] 38 | e = torch.tanh(h) 39 | 40 | attention = -9e15 * torch.ones(bsz, num, num).to(device) 41 | attention = torch.where( 42 | torch.eye(num).repeat(bsz, 1, 1).to(device) > 0, e[:, -1, :, :], attention 43 | ) 44 | for i in range(self.label_num): 45 | attention = torch.where(adjs[:, i, :, :] > 0, e[:, i, :, :], attention) 46 | attention = F.softmax(attention, dim=2) 47 | output = torch.bmm(attention, x) 48 | 49 | return output 50 | 51 | 52 | class MultiGATLayer(nn.Module): 53 | def __init__(self, hidden_dim, heads_num, label_num, dropout=0.1): 54 | super(MultiGATLayer, self).__init__() 55 | self.inner_dim = int(hidden_dim / 2) 56 | self.attentions = [GATLayer(hidden_dim, self.inner_dim, label_num, dropout) 57 | for _ in range(heads_num)] 58 | for i, attention in enumerate(self.attentions): 59 | self.add_module('attention_{}'.format(i), attention) 60 | self.dropout = dropout 61 | self.fc = nn.Linear(self.inner_dim * heads_num, hidden_dim) 62 | self.norm = nn.LayerNorm(hidden_dim) 63 | 64 | def forward(self, inputs): 65 | x = torch.cat([att(inputs) for att in self.attentions], dim=2) 66 | x = torch.tanh(self.norm(self.fc(x))) 67 | x = F.dropout(x, p=self.dropout, training=self.training) 68 | 69 | return { 70 | 'x': x + inputs['x'], 71 | 'feature': inputs['feature'], 72 | 'adj': inputs['adj'] 73 | } 74 | 75 | 76 | class GAT(nn.Module): 77 | def __init__(self, max_number, hidden_dim, heads_num, label_num, gat_layer=3, dropout=0.1): 78 | super(GAT, self).__init__() 79 | self.hidden_dim = hidden_dim 80 | self.heads_num = heads_num 81 | self.label_num = label_num 82 | self.gat_layer = gat_layer 83 | self.embedding = nn.Embedding(max_number, hidden_dim-2, padding_idx=0) 84 | self.gats = nn.ModuleList([MultiGATLayer(hidden_dim, heads_num, label_num, dropout) 85 | for _ in range(self.gat_layer)]) 86 | self.distmult = DistMult(hidden_dim) 87 | 88 | def forward(self, inputs): 89 | x, feature, adj = inputs['x'], inputs['feature'], inputs['adj'] 90 | embed = self.embedding(x) 91 | embed = torch.cat([embed, feature], dim=2) 92 | 93 | gat_result = {'x': embed, 'feature': feature, 'adj': adj} 94 | for gat in self.gats: 95 | gat_result = gat(gat_result) 96 | embed = gat_result['x'] 97 | 98 | return self.distmult(embed, embed) # [bsz x L x L, 2] 99 | 100 | -------------------------------------------------------------------------------- /intra-city-network-homogeneity/models/graph_sage.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from models.distmult import DistMult 6 | 7 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 8 | 9 | 10 | class GSAGELayer(nn.Module): 11 | def __init__(self, hidden_dim, label_num, dropout=0.1): 12 | super(GSAGELayer, self).__init__() 13 | self.hidden_dim = hidden_dim 14 | self.label_num = label_num 15 | self.dropout = dropout 16 | 17 | self.fc = nn.Linear(hidden_dim + 2, hidden_dim) 18 | self.norm = nn.LayerNorm(hidden_dim) 19 | 20 | self.W = nn.Parameter(torch.rand(label_num, hidden_dim, int(hidden_dim/2)).to(device)) 21 | nn.init.xavier_normal_(self.W) 22 | self.aggregator = nn.LSTM(input_size=int(hidden_dim/2), hidden_size=hidden_dim, batch_first=True) 23 | 24 | def forward(self, inputs): 25 | x, feature, adjs = inputs['x'], inputs['feature'], inputs['adj'] 26 | x = torch.cat([x, feature], dim=2) 27 | x = self.fc(x) 28 | x = self.norm(x) 29 | x = F.dropout(torch.tanh(x), self.dropout, training=self.training) 30 | 31 | bsz = x.size(0) 32 | max_neighbor = torch.zeros(bsz, x.size(1)) 33 | for i in range(int(self.label_num)): 34 | for j in range(bsz): 35 | for k in range(x.size(1)): 36 | max_neighbor[j, k] += torch.sum(adjs[j, i, k, :]) 37 | max_neighbor = int(torch.max(max_neighbor)) 38 | 39 | supports = torch.zeros(bsz, x.size(1), self.label_num, max_neighbor, int(self.hidden_dim/2)).to(device) 40 | for i in range(int(self.label_num)): 41 | for j in range(bsz): 42 | for k in range(x.size(1)): 43 | supports[j, k, i, :int(torch.sum(adjs[j, i, k])), :] = \ 44 | torch.matmul(x[j, adjs[j, i, k] == 1, :], self.W[i]) 45 | # [B, L, r, max_neighbor, H] 46 | supports = supports.view(-1, max_neighbor, int(self.hidden_dim/2)) 47 | supports, _ = self.aggregator(supports) 48 | supports = supports[:, -1, :].view(bsz, x.size(1), self.label_num, self.hidden_dim) 49 | 50 | output = x + torch.mean(supports, dim=2) 51 | return { 52 | 'x': output + x, 53 | 'feature': feature, 54 | 'adj': inputs['adj'], 55 | } 56 | 57 | 58 | class GSAGE(nn.Module): 59 | def __init__(self, max_number, hidden_dim, label_num, gsage_layer=2, dropout=0.1): 60 | super(GSAGE, self).__init__() 61 | self.hidden_dim = hidden_dim 62 | self.label_num = label_num 63 | self.gsage_layer = gsage_layer 64 | self.embedding = nn.Embedding(max_number, hidden_dim - 2, padding_idx=0) 65 | self.gsages = nn.ModuleList([GSAGELayer(hidden_dim, label_num, dropout=dropout) for _ in range(gsage_layer)]) 66 | self.distmult = DistMult(hidden_dim) 67 | 68 | def forward(self, inputs): 69 | x, feature, adj = inputs['x'], inputs['feature'], inputs['adj'] 70 | embed = self.embedding(x) 71 | embed = torch.cat([embed, feature], dim=2) 72 | 73 | gsage_result = {'x': embed, 'feature': feature, 'adj': adj} 74 | for gsage in self.gsages: 75 | gsage_result = gsage(gsage_result) 76 | embed = gsage_result['x'] # [bsz, L, H] 77 | 78 | return self.distmult(embed, embed) # [bsz x L x L, 2] 79 | 80 | -------------------------------------------------------------------------------- /intra-city-network-homogeneity/models/node2vec.py: -------------------------------------------------------------------------------- 1 | from GraphEmbedding.ge.models import node2vec 2 | 3 | 4 | class Node2Vec(): 5 | def __init__(self, walk_length=15, num_walks=200, p=0.25, q=4, workers=1): 6 | self.walk_length = walk_length 7 | self.num_walks = num_walks 8 | self.p = p 9 | self.q = q 10 | self.workers = workers 11 | self.model = None 12 | 13 | def build_model(self, G): 14 | self.model = node2vec.Node2Vec( 15 | G, walk_length=self.walk_length, num_walks=self.num_walks, 16 | p=self.p, q=self.q, workers=self.workers 17 | ) 18 | 19 | def train(self, embed_size=50, window_size=5, iter=7): 20 | assert self.model is not None 21 | self.model.train(embed_size, window_size, iter) 22 | return self.model.get_embeddings() 23 | 24 | 25 | if __name__ == "__main__": 26 | from utils.data_loader import DataLoader 27 | test = DataLoader('E:/python-workspace/CityRoadPrediction/data_20200610/test/') 28 | model = Node2Vec() 29 | model.build_model(test.build_source_graph(0)) 30 | embeds = model.train() 31 | -------------------------------------------------------------------------------- /intra-city-network-homogeneity/models/relational_gcn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from models.distmult import DistMult 6 | 7 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 8 | 9 | 10 | class RGCNLayer(nn.Module): 11 | def __init__(self, hidden_dim, relation_num, v_num, dropout=0.1): 12 | super(RGCNLayer, self).__init__() 13 | self.hidden_dim = hidden_dim 14 | self.relation_num = relation_num 15 | self.fc = nn.Linear(hidden_dim + 2, hidden_dim) 16 | self.norm = nn.LayerNorm(hidden_dim) 17 | self.W = nn.Parameter(torch.rand(hidden_dim, v_num, hidden_dim).to(device)) 18 | self.a = nn.Parameter(torch.rand(relation_num + 1, v_num).to(device)) 19 | self.dropout = dropout 20 | 21 | nn.init.xavier_normal_(self.W) 22 | nn.init.xavier_normal_(self.a) 23 | 24 | def forward(self, inputs): 25 | x, feature, _adjs = inputs['x'], inputs['feature'], inputs['adj'] 26 | 27 | x = torch.cat([x, feature], dim=2) 28 | #print(x.size()) 29 | x = self.fc(x) 30 | x = self.norm(x) 31 | x = F.dropout(torch.tanh(x), self.dropout) 32 | 33 | bsz = x.size(0) 34 | supports = [] 35 | adjs = [torch.eye(x.size(1)).repeat(bsz, 1, 1).to(device)] 36 | for i in range(int(self.relation_num)): 37 | adjs.append(_adjs[:, i, :, :]) 38 | 39 | for adj in adjs: 40 | supports.append(torch.bmm(adj, x).unsqueeze(1)) 41 | # [bsz, L, L] x [bsz, L, H] = [bsz, L, H] -> [bsz, 1, L, H] 42 | supports = torch.cat(supports, dim=1) # [bzs, 1+relation_num, L, H] 43 | 44 | output = torch.matmul(self.a, self.W).permute(1, 0, 2) # [1+r, v] x [H, v, H] = [H, 1+r, H] -> [1+r, H, H] 45 | output = torch.matmul(supports, output) # [bsz, 1+r, L, H] x [1+r, H, H] = [bsz, 1+r, L, H] 46 | output = torch.sum(output, dim=1) 47 | #output /= (self.relation_num + 1) # [bsz, L, H] 48 | output = F.dropout(output, self.dropout) 49 | 50 | return { 51 | 'x': output + x, 52 | 'feature': feature, 53 | 'adj': inputs['adj'], 54 | } 55 | 56 | 57 | class RGCN(nn.Module): 58 | def __init__(self, max_number, hidden_dim, relation_num, v_num, gcn_layer=2, dropout=0.1): 59 | super(RGCN, self).__init__() 60 | self.hidden_dim = hidden_dim 61 | self.relation_num = relation_num 62 | self.v_num = v_num 63 | self.gcn_layer = gcn_layer 64 | self.embedding = nn.Embedding(max_number, hidden_dim-2, padding_idx=0) 65 | self.gcns = nn.ModuleList([RGCNLayer(hidden_dim, relation_num, v_num, dropout) 66 | for _ in range(gcn_layer)]) 67 | self.distmult = DistMult(hidden_dim) 68 | 69 | def forward(self, inputs): 70 | x, feature, adj = inputs['x'], inputs['feature'], inputs['adj'] 71 | embed = self.embedding(x) 72 | embed = torch.cat([embed, feature], dim=2) 73 | 74 | gcn_result = {'x': embed, 'feature': feature, 'adj': adj} 75 | for gcn in self.gcns: 76 | gcn_result = gcn(gcn_result) 77 | embed = gcn_result['x'] # [bsz, L, H] 78 | 79 | return self.distmult(embed, embed) # [bsz x L x L, 2] 80 | 81 | -------------------------------------------------------------------------------- /intra-city-network-homogeneity/models/spectral_gcn.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | from models.distmult import DistMult 7 | 8 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 9 | 10 | 11 | def normalize_adj(adj): 12 | adj_ = copy.deepcopy(adj) 13 | #bsz, max_number = adj_.size(0), adj_.size(1) 14 | #adj_ += torch.eye(max_number).repeat(bsz, 1, 1).to(device) 15 | rowsum = adj_.sum(-1) 16 | degree_mat_inv_sqrt = torch.diag_embed(torch.pow(rowsum, -0.5)) 17 | return torch.bmm(torch.bmm(degree_mat_inv_sqrt, adj_), degree_mat_inv_sqrt) 18 | 19 | 20 | class SGCNLayer(nn.Module): 21 | def __init__(self, hidden_dim, label_num, dropout=0.1): 22 | super(SGCNLayer, self).__init__() 23 | self.hidden_dim = hidden_dim 24 | self.label_num = label_num 25 | self.dropout = dropout 26 | 27 | self.fc = nn.Linear(hidden_dim + 2, hidden_dim) 28 | self.norm = nn.LayerNorm(hidden_dim) 29 | self.W = nn.Parameter(torch.rand(label_num + 1, hidden_dim, hidden_dim).to(device)) 30 | #self.a = nn.Parameter(torch.rand(label_num + 1, v_num).to(device)) 31 | nn.init.xavier_uniform_(self.W.data) 32 | #nn.init.xavier_uniform_(self.a.data) 33 | 34 | def forward(self, inputs): 35 | x, feature, adjs = inputs['x'], inputs['feature'], inputs['adj'] 36 | 37 | x = torch.cat([x, feature], dim=2) 38 | x = self.fc(x) # [B, L, H] 39 | x = self.norm(x) 40 | x = F.dropout(torch.tanh(x), p=self.dropout, training=self.training) 41 | 42 | bsz, max_number = adjs.size(0), adjs.size(2) 43 | ''' 44 | supports = [torch.bmm(torch.eye(max_number).repeat(bsz, 1, 1), x).unsqueeze(1)] 45 | for i in range(self.label_num): 46 | supports.append(torch.bmm(adjs[:, i, :, :], x).unsqueeze(1)) 47 | supports = torch.cat(supports, dim=1) 48 | output = torch.matmul(self.a, self.W).permute(1, 0, 2) # [1+r, v] x [H, v, H] = [H, 1+r, H] -> [1+r, H, H] 49 | output = torch.matmul(supports, output) # [bsz, 1+r, L, H] x [1+r, H, H] = [bsz, 1+r, L, H] 50 | output = torch.sum(output, dim=1) 51 | ''' 52 | 53 | output = torch.bmm(torch.eye(max_number).repeat(bsz, 1, 1), x) 54 | output = torch.matmul(output, self.W[-1]) 55 | for i in range(self.label_num): 56 | output += torch.matmul(torch.bmm(adjs[:, i, :, :], x), self.W[i]) 57 | 58 | output /= (self.label_num + 1) 59 | output = F.dropout(output, p=self.dropout, training=self.training) 60 | 61 | return { 62 | 'x': output + inputs['x'], 63 | 'feature': inputs['feature'], 64 | 'adj': inputs['adj'] 65 | } 66 | 67 | 68 | class SGCN(nn.Module): 69 | def __init__(self, max_number, hidden_dim, label_num, gcn_layer=3, dropout=0.1): 70 | super(SGCN, self).__init__() 71 | self.hidden_dim = hidden_dim 72 | self.label_num = label_num 73 | self.gcn_layer = gcn_layer 74 | self.embedding = nn.Embedding(max_number, hidden_dim-2, padding_idx=0) 75 | self.gcns = nn.ModuleList([SGCNLayer(hidden_dim, label_num, dropout=dropout) for _ in range(gcn_layer)]) 76 | self.distmult = DistMult(hidden_dim) 77 | 78 | def forward(self, inputs): 79 | x, feature, adj = inputs['x'], inputs['feature'], inputs['adj'] 80 | embed = self.embedding(x) 81 | embed = torch.cat([embed, feature], dim=2) 82 | 83 | gcn_result = {'x': embed, 'feature': feature, 'adj': adj} 84 | for gcn in self.gcns: 85 | gcn_result = gcn(gcn_result) 86 | embed = gcn_result['x'] # [bsz, L, H] 87 | 88 | return self.distmult(embed, embed) # [bsz x L x L, 2] 89 | 90 | -------------------------------------------------------------------------------- /intra-city-network-homogeneity/models/struc2vec.py: -------------------------------------------------------------------------------- 1 | from GraphEmbedding.ge.models import struc2vec 2 | 3 | 4 | class Struc2Vec(): 5 | def __init__(self, walk_length=30, num_walks=200, workers=1): 6 | self.walk_length = walk_length 7 | self.num_walks = num_walks 8 | self.workers = workers 9 | self.model = None 10 | 11 | def build_model(self, G): 12 | self.model = struc2vec.Struc2Vec( 13 | G, walk_length=self.walk_length, num_walks=self.num_walks, workers=self.workers 14 | ) 15 | 16 | def train(self, embed_size=50, window_size=5, iter=10): 17 | assert self.model is not None 18 | self.model.train(embed_size, window_size, iter) 19 | return self.model.get_embeddings() 20 | 21 | 22 | if __name__ == "__main__": 23 | from utils.data_loader import DataLoader 24 | test = DataLoader('E:/python-workspace/CityRoadPrediction/data_20200610/test/') 25 | model = Struc2Vec() 26 | model.build_model(test.build_source_graph(0)) 27 | embeds = model.train() 28 | -------------------------------------------------------------------------------- /intra-city-network-homogeneity/shifted_result/README.md: -------------------------------------------------------------------------------- 1 | # result link 2 | https://drive.google.com/drive/folders/1LC69liSOWRzq1aWB9cL6aDmljesJmhFm?usp=sharing 3 | -------------------------------------------------------------------------------- /intra-city-network-homogeneity/tester/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiang719/road-network-predictability/35a83b7053fb7e220918e32232206fa7c20cb2a6/intra-city-network-homogeneity/tester/__init__.py -------------------------------------------------------------------------------- /intra-city-network-homogeneity/tester/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiang719/road-network-predictability/35a83b7053fb7e220918e32232206fa7c20cb2a6/intra-city-network-homogeneity/tester/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /intra-city-network-homogeneity/tester/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiang719/road-network-predictability/35a83b7053fb7e220918e32232206fa7c20cb2a6/intra-city-network-homogeneity/tester/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /intra-city-network-homogeneity/tester/__pycache__/gnn_tester.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiang719/road-network-predictability/35a83b7053fb7e220918e32232206fa7c20cb2a6/intra-city-network-homogeneity/tester/__pycache__/gnn_tester.cpython-37.pyc -------------------------------------------------------------------------------- /intra-city-network-homogeneity/tester/__pycache__/gnn_tester.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiang719/road-network-predictability/35a83b7053fb7e220918e32232206fa7c20cb2a6/intra-city-network-homogeneity/tester/__pycache__/gnn_tester.cpython-38.pyc -------------------------------------------------------------------------------- /intra-city-network-homogeneity/tester/__pycache__/vec_tester.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiang719/road-network-predictability/35a83b7053fb7e220918e32232206fa7c20cb2a6/intra-city-network-homogeneity/tester/__pycache__/vec_tester.cpython-37.pyc -------------------------------------------------------------------------------- /intra-city-network-homogeneity/tester/__pycache__/vec_tester.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiang719/road-network-predictability/35a83b7053fb7e220918e32232206fa7c20cb2a6/intra-city-network-homogeneity/tester/__pycache__/vec_tester.cpython-38.pyc -------------------------------------------------------------------------------- /intra-city-network-homogeneity/tester/cross_tester.py: -------------------------------------------------------------------------------- 1 | import json 2 | import pickle 3 | 4 | from tester.gnn_tester import GNNTester, get_edge_labels 5 | from utils.data_loader import DataLoader 6 | 7 | 8 | def cross_f1(train, test): 9 | cities = set(train.cities) & set(test.cities) 10 | cities = sorted(list(cities)) 11 | 12 | result = {} 13 | for c1 in cities: 14 | print('model:', c1) 15 | result[c1] = {} 16 | load = pickle.load(open(data_dir + 'data_2020715/relational-gcn/models/' + c1 + '_distmult.pkl', 'rb')) 17 | model = load['rgcn'] 18 | for c2 in cities: 19 | test.initialize() 20 | test.load_dir_datas(c2) 21 | tester = GNNTester(test_data=test, city=c2) 22 | max_number = tester.get_max_number() 23 | 24 | f1 = tester.improved_test(model, max_number, get_edge_labels(), int(load['max_number'])) 25 | result[c1][c2] = f1 26 | print(c1, c2, f1) 27 | json.dump(result, open(data_dir + 'cross_f1score.json', 'w'), indent=2) 28 | 29 | 30 | def corss_sample_f1(train, test): 31 | cities = set(train.cities) & set(test.cities) 32 | cities = sorted(list(cities)) 33 | 34 | result = {} 35 | for c1 in cities: 36 | result[c1] = {} 37 | load = pickle.load(open(data_dir + 'data_2020715/relational-gcn/models/' + c1 + '_distmult.pkl', 'rb')) 38 | model = load['rgcn'] 39 | for c2 in cities: 40 | if c2 == c1: 41 | continue 42 | result[c1][c2] = {} 43 | 44 | train.initialize() 45 | train.load_dir_datas(c2) 46 | tester = GNNTester(test_data=train, city=c2) 47 | max_number = tester.get_max_number() 48 | f1, res = tester.improved_test(model, max_number, get_edge_labels(), int(load['max_number'])-1) 49 | result[c1][c2].update(res) 50 | 51 | test.initialize() 52 | test.load_dir_datas(c2) 53 | tester = GNNTester(test_data=test, city=c2) 54 | max_number = tester.get_max_number() 55 | f1, res = tester.improved_test(model, max_number, get_edge_labels(), int(load['max_number'])-1) 56 | result[c1][c2].update(res) 57 | json.dump(result, open(data_dir + 'cross_samples_f1score.json', 'w'), indent=2) 58 | json.dump(result, open(data_dir + 'cross_samples_f1score.json', 'w'), indent=2) 59 | 60 | 61 | if __name__ == "__main__": 62 | data_dir = 'D:/data/road-network-predictability/' 63 | train = DataLoader(data_dir + 'data_2020715/train/') 64 | test = DataLoader(data_dir + 'data_2020715/test/') 65 | corss_sample_f1(train, test) 66 | 67 | -------------------------------------------------------------------------------- /intra-city-network-homogeneity/tester/gnn_tester.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import math 3 | import pickle 4 | from functools import cmp_to_key 5 | import numpy as np 6 | import torch 7 | 8 | from tester.vec_tester import is_valid 9 | 10 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 11 | 12 | 13 | def angle(n1, n2): 14 | x1, y1 = n1['lon'], n1['lat'] 15 | x2, y2 = n2['lon'], n2['lat'] 16 | if x1 == x2 and y1 <= y2: 17 | return 0 18 | if x1 == x2 and y1 > y2: 19 | return 4 20 | k = (y2 - y1) / (x2 - x1) 21 | a = math.degrees(math.atan(k)) 22 | if a >= 67.5: 23 | return 0 if y1 < y2 else 4 24 | elif a >= 22.5: 25 | return 1 if y1 < y2 else 5 26 | elif a >= -22.5: 27 | if a >= 0: 28 | return 2 if y1 < y2 else 6 29 | else: 30 | return 6 if y1 < y2 else 2 31 | elif a >= -67.5: 32 | return 7 if y1 < y2 else 3 33 | else: 34 | return 0 if y1 < y2 else 4 35 | 36 | 37 | def edge_label(n1, n2): 38 | ang = angle(n1, n2) 39 | return str(ang) 40 | 41 | 42 | def get_edge_labels(): 43 | labels = {} 44 | for ang in range(8): 45 | k = str(ang) 46 | labels[k] = len(labels) 47 | return labels 48 | 49 | 50 | def compare(n1, n2): 51 | if n1['lon'] == n2['lon']: 52 | return n1['lat'] - n2['lat'] 53 | return n1['lon'] - n2['lon'] 54 | 55 | 56 | class GNNTester(): 57 | def __init__(self, test_data, city): 58 | self.test_loader = test_data 59 | self.city = city 60 | self.id2node = {} 61 | self.initialize() 62 | 63 | def initialize(self): 64 | for k, v in self.test_loader.data[self.city].items(): 65 | for node in v['nodes']: 66 | ids = node['osmid'] 67 | if ids not in self.id2node: 68 | self.id2node[ids] = node 69 | 70 | def get_max_number(self): 71 | max_number = 0 72 | for index in self.test_loader.data[self.city]: 73 | max_number = max(max_number, len(self.test_loader[index]['nodes'])) 74 | return max_number + 1 75 | 76 | def prepare_batch_data(self, data, max_number, edge_labels, cut_number=None): 77 | X = torch.zeros(len(data), max_number).long().to(device) 78 | F = torch.zeros(len(data), max_number, 2).to(device) 79 | A = torch.zeros(len(data), len(edge_labels), max_number, max_number).to(device) 80 | N, S, T = [], [], [] 81 | for i, d in enumerate(data): 82 | nodes = copy.deepcopy(d['nodes']) 83 | min_lon = np.min([n['lon'] for n in nodes]) 84 | max_lon = np.max([n['lon'] for n in nodes]) 85 | min_lat = np.min([n['lat'] for n in nodes]) 86 | max_lat = np.max([n['lat'] for n in nodes]) 87 | for n in nodes: 88 | n['lon'] = (n['lon'] - min_lon) / (max_lon - min_lon) 89 | n['lat'] = (n['lat'] - min_lat) / (max_lat - min_lat) 90 | nodes.sort(key=cmp_to_key(compare)) 91 | source_edges = copy.deepcopy(d['source_edges']) 92 | target_edges = copy.deepcopy(d['target_edges']) 93 | N.append(nodes) 94 | S.append(source_edges) 95 | T.append(target_edges) 96 | 97 | id2index = {n['osmid']: i for i, n in enumerate(nodes)} 98 | 99 | x = [i + 1 for i in range(len(nodes))] 100 | x += [0] * (max_number - len(x)) 101 | if cut_number is not None: 102 | x = [min(x[i], cut_number) for i in range(len(x))] 103 | f = [[n['lon'], n['lat']] for n in nodes] 104 | f += [[0, 0] for i in range(max_number - len(f))] 105 | x = torch.LongTensor(x).to(device) 106 | f = torch.Tensor(f).to(device) 107 | adj = torch.zeros(len(edge_labels), max_number, max_number).to(device) 108 | 109 | for edge in source_edges: 110 | start, end = id2index[edge['start']], id2index[edge['end']] 111 | l1 = edge_labels[edge_label(nodes[start], nodes[end])] 112 | l2 = edge_labels[edge_label(nodes[end], nodes[start])] 113 | adj[l1, start, end] = 1. 114 | adj[l2, end, start] = 1. 115 | 116 | X[i], F[i], A[i] = x, f, adj 117 | return X, F, A, N, S, T 118 | 119 | def improved_test(self, model, max_number, edge_labels, cut_max_number): 120 | model.eval() 121 | right, wrong, total = 0, 0, 0 122 | 123 | if self.city in ['Hongkong', 'Guangzhou', 'Singapore']: 124 | th = 0.2 125 | elif self.city in ['Beijing', 'Shanghai', 'Shenzhen']: 126 | th = 0.45 127 | else: 128 | th = 0.6 129 | 130 | data = [self.test_loader[i] for i in range(len(self.test_loader))] 131 | ids = [self.test_loader.ids[i] for i in range(len(self.test_loader))] 132 | batch_size = 32 133 | result = {} 134 | for _ in range(0, len(data), batch_size): 135 | X, F, A, N, S, T = \ 136 | self.prepare_batch_data(data[_: _ + batch_size], max_number, edge_labels, cut_number=cut_max_number) 137 | output = model({ 138 | 'x': X, 139 | 'feature': F, 140 | 'adj': A 141 | }).view(X.size(0), X.size(1), X.size(1), 2).to('cpu') 142 | for i in range(len(output)): 143 | predict = output[i][..., 1] 144 | existed_edges = S[i] 145 | cand_edges = [] 146 | number = len(N[i]) 147 | 148 | idx = ids[_ + i] 149 | r, w, t = 0, 0, 0 150 | for j in range(number): 151 | for k in range(j + 1, number): 152 | start, end = N[i][j]['osmid'], N[i][k]['osmid'] 153 | if {'start': start, 'end': end} in T[i] or {'start': end, 'end': start} in T[i]: 154 | target = 1 155 | else: 156 | target = 0 157 | cand_edges.append({ 158 | 'start': start, 159 | 'end': end, 160 | 'score': float(predict[j][k]), 161 | 'target': target, 162 | }) 163 | cand_edges.sort(key=lambda e: e['score'], reverse=True) 164 | for edge in cand_edges: 165 | if edge['score'] < np.log(th): 166 | break 167 | if is_valid(edge, existed_edges, self.id2node): 168 | existed_edges.append(edge) 169 | if edge['target'] == 1: 170 | r += 1 171 | else: 172 | w += 1 173 | t = len(T[i]) 174 | precision = r / (r + w + 1e-9) 175 | recall = r / (t + 1e-9) 176 | f1 = 2 * precision * recall / (precision + recall + 1e-9) 177 | result[idx] = round(f1, 4) 178 | 179 | right += r 180 | wrong += w 181 | total += t 182 | precision = right / (right + wrong + 1e-9) 183 | recall = right / (total + 1e-9) 184 | f1 = 2 * precision * recall / (precision + recall + 1e-9) 185 | return round(f1, 4), result 186 | 187 | def test(self, model, max_number, edge_labels, result_dir): 188 | model.eval() 189 | right, wrong, total = 0, 0, 0 190 | 191 | test_result = {} 192 | data = [self.test_loader[i] for i in range(len(self.test_loader))] 193 | batch_size = 32 194 | for _ in range(0, len(data), batch_size): 195 | X, F, A, N, S, T = self.prepare_batch_data(data[_: _ + batch_size], max_number, edge_labels) 196 | output = model({ 197 | 'x': X, 198 | 'feature': F, 199 | 'adj': A 200 | }).view(X.size(0), X.size(1), X.size(1), 2).to('cpu') 201 | for i in range(len(output)): 202 | ids = self.test_loader.ids[_ + i] 203 | predict = output[i][..., 1] 204 | existed_edges = S[i] 205 | cand_edges = [] 206 | number = len(N[i]) 207 | 208 | for j in range(number): 209 | for k in range(j + 1, number): 210 | start, end = N[i][j]['osmid'], N[i][k]['osmid'] 211 | if {'start': start, 'end': end} in T[i] or {'start': end, 'end': start} in T[i]: 212 | target = 1 213 | else: 214 | target = 0 215 | cand_edges.append({ 216 | 'start': start, 217 | 'end': end, 218 | 'score': float(predict[j][k]), 219 | 'target': target, 220 | }) 221 | cand_edges.sort(key=lambda e: e['score'], reverse=True) 222 | test_result[ids] = cand_edges 223 | for edge in cand_edges: 224 | if edge['score'] < np.log(0.5): 225 | break 226 | #if is_valid(edge, existed_edges, self.id2node): 227 | existed_edges.append(edge) 228 | if edge['target'] == 1: 229 | right += 1 230 | else: 231 | wrong += 1 232 | total += len(T[i]) 233 | precision = right / (right + wrong + 1e-9) 234 | recall = right / (total + 1e-9) 235 | f1 = 2 * precision * recall / (precision + recall + 1e-9) 236 | #pickle.dump(test_result, open(result_dir + self.city + '_result.pkl', 'wb')) 237 | return right, wrong, total, precision, recall, f1 238 | 239 | -------------------------------------------------------------------------------- /intra-city-network-homogeneity/tester/vec_tester.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import math 3 | import torch 4 | import copy 5 | import numpy as np 6 | from utils.data_loader import DataLoader 7 | 8 | 9 | def is_intersect(node1, node2, node3, node4): 10 | lon1, lat1 = node1['lon'], node1['lat'] 11 | lon2, lat2 = node2['lon'], node2['lat'] 12 | lon3, lat3 = node3['lon'], node3['lat'] 13 | lon4, lat4 = node4['lon'], node4['lat'] 14 | distance1_3 = abs(lon1 - lon3) * 100000 + abs(lat1 - lat3) * 100000 15 | distance1_4 = abs(lon1 - lon4) * 100000 + abs(lat1 - lat4) * 100000 16 | distance2_3 = abs(lon2 - lon3) * 100000 + abs(lat2 - lat3) * 100000 17 | distance2_4 = abs(lon2 - lon4) * 100000 + abs(lat2 - lat4) * 100000 18 | min_distance = np.min([distance1_3, distance1_4, distance2_3, distance2_4]) 19 | if min_distance == 0: 20 | return False 21 | else: 22 | if np.max([lon1, lon2]) < np.min([lon3, lon4]) or np.max([lon3, lon4]) < np.min([lon1, lon2]): 23 | return False 24 | else: 25 | sort_points = np.sort([lon1, lon2, lon3, lon4]) 26 | left_point, right_point = sort_points[1], sort_points[2] 27 | if lon1 == lon2: 28 | value_point1 = [lat1, lat2] 29 | else: 30 | value_point1 = [(lat2-lat1)/(lon2-lon1)*(left_point-lon1)+lat1, (lat2-lat1)/(lon2-lon1)*(right_point-lon1)+lat1] 31 | if lon3 == lon4: 32 | value_point2 = [lat3, lat4] 33 | else: 34 | value_point2 = [(lat4 - lat3) / (lon4 - lon3) * (left_point - lon3) + lat3, 35 | (lat4 - lat3) / (lon4 - lon3) * (right_point - lon3) + lat3] 36 | if np.max(value_point1) < np.min(value_point2) or np.max(value_point2) < np.min(value_point1): 37 | return False 38 | else: 39 | return True 40 | 41 | 42 | def is_acute(node1, node2, node3, node4): 43 | lon1, lat1 = node1['lon'], node1['lat'] 44 | lon2, lat2 = node2['lon'], node2['lat'] 45 | lon3, lat3 = node3['lon'], node3['lat'] 46 | lon4, lat4 = node4['lon'], node4['lat'] 47 | distance1_3 = abs(lon1-lon3)*100000 + abs(lat1-lat3)*100000 48 | distance1_4 = abs(lon1-lon4)*100000 + abs(lat1-lat4)*100000 49 | distance2_3 = abs(lon2-lon3)*100000 + abs(lat2-lat3)*100000 50 | distance2_4 = abs(lon2-lon4)*100000 + abs(lat2-lat4)*100000 51 | min_distance = np.min([distance1_3, distance1_4, distance2_3, distance2_4]) 52 | if min_distance > 0: 53 | return False 54 | else: 55 | if distance1_3 == min_distance: 56 | x1,y1 = lon2-lon1, lat2-lat1 57 | x2,y2 = lon4-lon3, lat4-lat3 58 | if distance1_4 == min_distance: 59 | x1,y1 = lon2-lon1, lat2-lat1 60 | x2,y2 = lon3-lon4, lat3-lat4 61 | if distance2_3 == min_distance: 62 | x1,y1 = lon1-lon2, lat1-lat2 63 | x2,y2 = lon4-lon3, lat4-lat3 64 | if distance2_4 == min_distance: 65 | x1,y1 = lon1-lon2, lat1-lat2 66 | x2,y2 = lon3-lon4, lat3-lat4 67 | 68 | vector_1 = [x1, y1] 69 | vector_2 = [x2, y2] 70 | unit_vector_1 = vector_1 / np.linalg.norm(vector_1) 71 | unit_vector_2 = vector_2 / np.linalg.norm(vector_2) 72 | dot_product = np.dot(unit_vector_1, unit_vector_2) 73 | angle = np.arccos(dot_product) / math.pi * 180 74 | if angle < 40: 75 | return True 76 | else: 77 | return False 78 | 79 | 80 | def is_valid(new_edge, existed_edges, id2node): 81 | for edge in existed_edges: 82 | if is_intersect( 83 | id2node[new_edge['start']], id2node[new_edge['end']], 84 | id2node[edge['start']], id2node[edge['end']] 85 | ) or is_acute( 86 | id2node[new_edge['start']], id2node[new_edge['end']], 87 | id2node[edge['start']], id2node[edge['end']] 88 | ): 89 | return False 90 | return True 91 | 92 | 93 | class VecTester(): 94 | def __init__(self, embed_dim, test_data, city, data_dir): 95 | self.embed_dim = embed_dim 96 | self.test_loader = test_data 97 | self.city = city 98 | self.embedding = {} 99 | self.id2node = {} 100 | self.initialize(data_dir) 101 | 102 | def initialize(self, data_dir): 103 | for k, v in self.test_loader.data[self.city].items(): 104 | for node in v['nodes']: 105 | ids = node['osmid'] 106 | if ids not in self.id2node: 107 | self.id2node[ids] = node 108 | #self.embedding = pickle.load(open(data_dir + 'test/' + self.city + '_embedding.pkl', 'rb')) 109 | 110 | def test(self, model, result_dir): 111 | model.eval() 112 | model = model.to('cpu') 113 | right, wrong, total = 0, 0, 0 114 | 115 | test_result = {} 116 | for ids in self.embedding: 117 | existed_edges = copy.deepcopy(self.test_loader[ids]['source_edges']) 118 | cand_edges = [] 119 | batch_size = 101 120 | for i in range(0, len(self.embedding[ids]), batch_size): 121 | start = torch.Tensor([sample['start_embedding'].tolist() 122 | for sample in self.embedding[ids][i: i + batch_size]]) 123 | end = torch.Tensor([sample['end_embedding'].tolist() 124 | for sample in self.embedding[ids][i: i + batch_size]]) 125 | 126 | output = model(start, end).squeeze(0) 127 | for j in range(output.size(0)): 128 | sample = self.embedding[ids][i + j] 129 | edge = {'start': int(sample['start_id']), 'end': int(sample['end_id']), 130 | 'score': float(output[j][1]), 'target': sample['target']} 131 | cand_edges.append(edge) 132 | cand_edges.sort(key=lambda e: e['score'], reverse=True) 133 | test_result[ids] = cand_edges 134 | for edge in cand_edges: 135 | if edge['score'] < np.log(0.5): 136 | break 137 | #if is_valid(edge, existed_edges, self.id2node): 138 | existed_edges.append(edge) 139 | if {'start': edge['start'], 'end': edge['end']} in self.test_loader[ids]['target_edges'] or \ 140 | {'start': edge['end'], 'end': edge['start']} in self.test_loader[ids]['target_edges']: 141 | right += 1 142 | else: 143 | wrong += 1 144 | 145 | total += len(self.test_loader[ids]['target_edges']) 146 | precision = right / (right + wrong + 1e-9) 147 | recall = right / (total + 1e-9) 148 | f1 = 2 * precision * recall / (precision + recall + 1e-9) 149 | pickle.dump(test_result, open(result_dir + self.city + '_result.pkl', 'wb')) 150 | return right, wrong, total, precision, recall, f1 151 | 152 | 153 | if __name__ == "__main__": 154 | test = DataLoader('E:/python-workspace/CityRoadPrediction/data_20200610/test/') 155 | test.load_dir_datas('Akron') 156 | tester = VecTester(embed_dim=50, test_data=test, city='Akron') 157 | print(test[0]['source_edges']) 158 | -------------------------------------------------------------------------------- /intra-city-network-homogeneity/trainer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiang719/road-network-predictability/35a83b7053fb7e220918e32232206fa7c20cb2a6/intra-city-network-homogeneity/trainer/__init__.py -------------------------------------------------------------------------------- /intra-city-network-homogeneity/trainer/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiang719/road-network-predictability/35a83b7053fb7e220918e32232206fa7c20cb2a6/intra-city-network-homogeneity/trainer/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /intra-city-network-homogeneity/trainer/__pycache__/gnn_trainer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiang719/road-network-predictability/35a83b7053fb7e220918e32232206fa7c20cb2a6/intra-city-network-homogeneity/trainer/__pycache__/gnn_trainer.cpython-37.pyc -------------------------------------------------------------------------------- /intra-city-network-homogeneity/trainer/__pycache__/vec_trainer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiang719/road-network-predictability/35a83b7053fb7e220918e32232206fa7c20cb2a6/intra-city-network-homogeneity/trainer/__pycache__/vec_trainer.cpython-37.pyc -------------------------------------------------------------------------------- /intra-city-network-homogeneity/trainer/gat_trainer.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | import torch 4 | 5 | from models.gat import GAT 6 | from tester.gnn_tester import GNNTester 7 | from trainer.gnn_trainer import GNNTrainer 8 | from utils.data_loader import DataLoader 9 | 10 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 11 | torch.set_num_threads(6) 12 | 13 | 14 | class GATTrainer(GNNTrainer): 15 | def __init__(self, train_data, city, tester): 16 | super().__init__(train_data, city, tester) 17 | self.model = GAT( 18 | max_number=self.max_number, 19 | hidden_dim=50, 20 | heads_num=8, 21 | label_num=len(self.edge_labels), 22 | gat_layer=2, 23 | dropout=0.1, 24 | ).to(device) 25 | 26 | def save_model(self, best_model): 27 | obj = { 28 | 'max_number': self.max_number, 29 | 'hidden_dim': self.model.hidden_dim, 30 | 'heads_num': self.model.heads_num, 31 | 'label_num': self.model.label_num, 32 | 'gat_layer': self.model.gat_layer, 33 | 'city': self.city, 34 | 'gat': best_model, 35 | } 36 | pickle.dump(obj, open(data_dir + 'data_2020715/gat/models/' + 37 | self.city + '_distmult.pkl', 'wb')) 38 | 39 | 40 | if __name__ == "__main__": 41 | data_dir = 'E:/python-workspace/CityRoadPrediction/' 42 | train = DataLoader(data_dir + 'data_2020715/train/') 43 | test = DataLoader(data_dir + 'data_2020715/test/') 44 | 45 | cities = set(train.cities) & set(test.cities) 46 | cities = sorted(list(cities)) 47 | for city in cities: 48 | print(city) 49 | train.initialize() 50 | train.load_dir_datas(city) 51 | test.initialize() 52 | test.load_dir_datas(city) 53 | tester = GNNTester(test_data=test, city=city) 54 | trainer = GATTrainer(train, city, tester) 55 | trainer.train_model(result_dir='E:/python-workspace/CityRoadPrediction/data_2020715/gat/result/') 56 | -------------------------------------------------------------------------------- /intra-city-network-homogeneity/trainer/gnn_trainer.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import random 3 | from functools import cmp_to_key 4 | import numpy as np 5 | import torch 6 | import torch.nn as nn 7 | 8 | from tester.gnn_tester import get_edge_labels, compare, edge_label 9 | 10 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 11 | 12 | 13 | class GNNTrainer(): 14 | def __init__(self, train_data, city, tester): 15 | self.train_loader = train_data 16 | self.tester = tester 17 | self.city = city 18 | self.max_number = self.get_max_number() 19 | self.edge_labels = get_edge_labels() 20 | self.model = None 21 | 22 | def get_max_number(self): 23 | max_number = 0 24 | for index in self.train_loader.data[self.city]: 25 | max_number = max(max_number, len(self.train_loader[index]['nodes'])) 26 | for index in self.tester.test_loader.data[self.city]: 27 | max_number = max(max_number, len(self.tester.test_loader[index]['nodes'])) 28 | return max_number + 1 29 | 30 | def prepare_batch_data(self, data): 31 | X = torch.zeros(len(data), self.max_number).long().to(device) 32 | F = torch.zeros(len(data), self.max_number, 2).to(device) 33 | A = torch.zeros(len(data), len(self.edge_labels), self.max_number, self.max_number).to(device) 34 | T = torch.zeros(len(data), self.max_number, self.max_number).long().to(device) 35 | for i, d in enumerate(data): 36 | nodes = copy.deepcopy(d['nodes']) 37 | min_lon = np.min([n['lon'] for n in nodes]) 38 | max_lon = np.max([n['lon'] for n in nodes]) 39 | min_lat = np.min([n['lat'] for n in nodes]) 40 | max_lat = np.max([n['lat'] for n in nodes]) 41 | for n in nodes: 42 | n['lon'] = (n['lon'] - min_lon) / (max_lon - min_lon) 43 | n['lat'] = (n['lat'] - min_lat) / (max_lat - min_lat) 44 | nodes.sort(key=cmp_to_key(compare)) 45 | source_edges = copy.deepcopy(d['source_edges']) 46 | target_edges = copy.deepcopy(d['target_edges']) 47 | id2index = {n['osmid']: i for i, n in enumerate(nodes)} 48 | 49 | x = [i+1 for i in range(len(nodes))] 50 | x += [0] * (self.max_number - len(x)) 51 | f = [[n['lon'], n['lat']] for n in nodes] 52 | f += [[0, 0] for i in range(self.max_number - len(f))] 53 | x = torch.LongTensor(x).to(device) 54 | f = torch.Tensor(f).to(device) 55 | adj = torch.zeros(len(self.edge_labels), self.max_number, self.max_number).to(device) 56 | target = torch.zeros(self.max_number, self.max_number).long().to(device) 57 | 58 | for edge in source_edges: 59 | start, end = id2index[edge['start']], id2index[edge['end']] 60 | l1 = self.edge_labels[edge_label(nodes[start], nodes[end])] 61 | l2 = self.edge_labels[edge_label(nodes[end], nodes[start])] 62 | adj[l1, start, end] = 1. 63 | adj[l2, end, start] = 1. 64 | target[start, end] = -1 65 | target[end, start] = -1 66 | 67 | for edge in target_edges: 68 | start, end = id2index[edge['start']], id2index[edge['end']] 69 | target[start, end] = 1 70 | target[end, start] = 1 71 | number = len(nodes) 72 | target[:, number:] = -1 73 | target[number:, :] = -1 74 | 75 | X[i], F[i], A[i], T[i] = x, f, adj, target 76 | return X, F, A, T 77 | 78 | def train_model(self, batch_size=4, epochs=10, result_dir=None): 79 | print('train data:', len(self.train_loader)) 80 | optimizer = torch.optim.SGD(self.model.parameters(), lr=0.05, momentum=0.9) 81 | loss_fct = nn.NLLLoss() 82 | data = [self.train_loader[i] for i in range(len(self.train_loader))] 83 | best_model = None 84 | best_f1 = 0. 85 | for epoch in range(epochs): 86 | self.model.train() 87 | if epoch == 5: 88 | for param in optimizer.param_groups: 89 | param['lr'] = 0.02 90 | random.shuffle(data) 91 | epoch_loss = 0 92 | right, wrong, total = 0, 0, 0 93 | for i in range(0, len(data), batch_size): 94 | X, F, A, T = self.prepare_batch_data(data[i: i + batch_size]) 95 | output = self.model({ 96 | 'x': X, 97 | 'feature': F, 98 | 'adj': A 99 | }) 100 | output = output.view(-1, 2) 101 | T = T.view(-1) 102 | 103 | index = (T == 0).nonzero().squeeze(-1) 104 | T0 = T.index_select(0, index) 105 | output0 = output.index_select(0, index) 106 | index = (T == 1).nonzero().squeeze(-1) 107 | T1 = T.index_select(0, index) 108 | output1 = output.index_select(0, index) 109 | T_ = torch.cat([T1, T0] + [T1 for _ in range(1, int(len(T0) / len(T1) / 4))], dim=0) 110 | output_ = torch.cat([output1, output0] + 111 | [output1 for _ in range(1, int(len(T0) / len(T1) / 4))], dim=0) 112 | 113 | optimizer.zero_grad() 114 | loss = loss_fct(output_, T_) 115 | loss.backward() 116 | optimizer.step() 117 | epoch_loss += loss.item() 118 | 119 | index = (T == 1).nonzero().squeeze(-1) 120 | right += (output.index_select(0, index)[:, 1] > 121 | float(torch.log(torch.Tensor([0.5]).to(device)))).nonzero().size(0) 122 | index = (T == 0).nonzero().squeeze(-1) 123 | wrong += (output.index_select(0, index)[:, 1] > 124 | float(torch.log(torch.Tensor([0.5]).to(device)))).nonzero().size(0) 125 | total += (T == 1).nonzero().size(0) 126 | 127 | precision = right / (right + wrong + 1e-9) 128 | recall = right / (total + 1e-9) 129 | f1 = 2 * recall * precision / (recall + precision + 1e-9) 130 | print('epoch: {}, loss: {}, right: {}, wrong: {}, precision: {}, recall: {}, f1: {}'.format( 131 | epoch + 1, round(epoch_loss, 4), right, wrong, round(precision, 4), round(recall, 4), round(f1, 4) 132 | )) 133 | right, wrong, total, precision, recall, f1 = \ 134 | self.tester.test(self.model, self.max_number, self.edge_labels, result_dir) 135 | print('test, right: {}, wrong: {}, total:{}, precision: {}, recall: {}, f1: {}'.format( 136 | right, wrong, total, round(precision, 4), round(recall, 4), round(f1, 4) 137 | )) 138 | if f1 > best_f1: 139 | best_f1 = f1 140 | best_model = copy.deepcopy(self.model) 141 | self.save_model(best_model) 142 | right, wrong, total, precision, recall, f1 = \ 143 | self.tester.test(best_model, self.max_number, self.edge_labels, result_dir) 144 | print('final f1:', f1) 145 | 146 | def save_model(self, model): 147 | raise NotImplementedError 148 | 149 | -------------------------------------------------------------------------------- /intra-city-network-homogeneity/trainer/graph_sage_trainer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import pickle 3 | 4 | from models.graph_sage import GSAGE 5 | from tester.gnn_tester import GNNTester 6 | from trainer.gnn_trainer import GNNTrainer 7 | from utils.data_loader import DataLoader 8 | 9 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 10 | 11 | 12 | class GSAGETrainer(GNNTrainer): 13 | def __init__(self, train_data, city, tester): 14 | super().__init__(train_data, city, tester) 15 | self.model = GSAGE( 16 | max_number=self.max_number, 17 | hidden_dim=50, 18 | label_num=len(self.edge_labels), 19 | gsage_layer=2, 20 | dropout=0.1, 21 | ).to(device) 22 | 23 | def save_model(self, best_model): 24 | obj = { 25 | 'max_number': self.max_number, 26 | 'hidden_dim': self.model.hidden_dim, 27 | 'label_num': self.model.label_num, 28 | 'gsage_layer': self.model.gsage_layer, 29 | 'city': self.city, 30 | 'gsage': best_model, 31 | } 32 | pickle.dump(obj, open(data_dir + 'data_2020715/graph-sage/models/' + 33 | self.city + '_distmult.pkl', 'wb')) 34 | 35 | 36 | if __name__ == "__main__": 37 | data_dir = 'E:/python-workspace/CityRoadPrediction/' 38 | train = DataLoader(data_dir + 'data_2020715/train/') 39 | test = DataLoader(data_dir + 'data_2020715/test/') 40 | 41 | cities = set(train.cities) & set(test.cities) 42 | cities = sorted(list(cities)) 43 | for city in cities: 44 | print(city) 45 | train.initialize() 46 | train.load_dir_datas(city) 47 | test.initialize() 48 | test.load_dir_datas(city) 49 | tester = GNNTester(test_data=test, city=city) 50 | trainer = GSAGETrainer(train, city, tester) 51 | trainer.train_model(result_dir='E:/python-workspace/CityRoadPrediction/data_2020715/graph-sage/result/') 52 | -------------------------------------------------------------------------------- /intra-city-network-homogeneity/trainer/node2vec_trainer.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | from abc import ABC 3 | 4 | from models.node2vec import Node2Vec 5 | from tester.vec_tester import VecTester 6 | from trainer.vec_trainer import VecTrainer 7 | from utils.data_loader import DataLoader 8 | 9 | 10 | class Node2VecTrainer(VecTrainer): 11 | def __init__(self, embed_dim, train_data, city, tester): 12 | super().__init__(embed_dim, train_data, city, tester) 13 | self.vec_model = Node2Vec(num_walks=400) 14 | 15 | def save_distmult(self): 16 | obj = { 17 | 'embed_dim': self.embed_dim, 18 | 'city': self.city, 19 | 'distmult': self.distmult, 20 | } 21 | pickle.dump(obj, open(data_dir + 'data_2020715/node2vec/models/' + 22 | self.city + '_distmult.pkl', 'wb')) 23 | 24 | 25 | if __name__ == "__main__": 26 | data_dir = 'E:/python-workspace/CityRoadPrediction/' 27 | train = DataLoader(data_dir + 'data_2020715/train/') 28 | test = DataLoader(data_dir + 'data_2020715/test/') 29 | 30 | cities = set(train.cities) & set(test.cities) 31 | cities = sorted(list(cities)) 32 | for city in cities: 33 | print(city) 34 | train.initialize() 35 | train.load_dir_datas(city) 36 | test.initialize() 37 | test.load_dir_datas(city) 38 | tester = VecTester(embed_dim=50, test_data=test, city=city, data_dir=data_dir + 'data_2020715/node2vec/') 39 | trainer = Node2VecTrainer(embed_dim=50, train_data=train, city=city, tester=tester) 40 | trainer.prepare_train_embedding(data_dir + 'data_2020715/node2vec/') 41 | #trainer.train_distmult(data_dir=data_dir + 'data_2020715/node2vec/', 42 | # result_dir=data_dir + 'data_20200715/node2vec/result/') 43 | 44 | 45 | -------------------------------------------------------------------------------- /intra-city-network-homogeneity/trainer/relational_gcn_trainer.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import torch 3 | 4 | from models.relational_gcn import RGCN 5 | from tester.gnn_tester import GNNTester 6 | from trainer.gnn_trainer import GNNTrainer 7 | from utils.data_loader import DataLoader 8 | 9 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 10 | 11 | 12 | class RGCNTrainer(GNNTrainer): 13 | def __init__(self, train_data, city, tester): 14 | super().__init__(train_data, city, tester) 15 | self.model = RGCN( 16 | max_number=self.max_number, 17 | hidden_dim=50, 18 | relation_num=len(self.edge_labels), 19 | v_num=4, 20 | gcn_layer=3, 21 | dropout=0. 22 | ).to(device) 23 | 24 | def save_model(self, best_model): 25 | obj = { 26 | 'max_number': self.max_number, 27 | 'hidden_dim': self.model.hidden_dim, 28 | 'relation_num': self.model.relation_num, 29 | 'v_num': self.model.v_num, 30 | 'gcn_layer': self.model.gcn_layer, 31 | 'city': self.city, 32 | 'rgcn': best_model, 33 | } 34 | pickle.dump(obj, open(data_dir + 'data_2020715/relational-gcn/models/' + 35 | self.city + '_distmult.pkl', 'wb')) 36 | 37 | 38 | if __name__ == "__main__": 39 | data_dir = 'E:/python-workspace/CityRoadPrediction/' 40 | train = DataLoader(data_dir + 'data_2020715/train/') 41 | test = DataLoader(data_dir + 'data_2020715/test/') 42 | 43 | cities = set(train.cities) & set(test.cities) 44 | cities = sorted(list(cities)) 45 | for city in cities: 46 | if city not in ['Osaka', 'Tokyo']: 47 | continue 48 | print(city) 49 | train.initialize() 50 | train.load_dir_datas(city) 51 | test.initialize() 52 | test.load_dir_datas(city) 53 | tester = GNNTester(test_data=test, city=city) 54 | trainer = RGCNTrainer(train, city, tester) 55 | trainer.train_model(result_dir=data_dir + 'data_2020715/relational-gcn/result/') 56 | 57 | 58 | -------------------------------------------------------------------------------- /intra-city-network-homogeneity/trainer/spectral_gcn_trainer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import pickle 3 | 4 | from models.spectral_gcn import SGCN 5 | from tester.gnn_tester import GNNTester 6 | from trainer.gnn_trainer import GNNTrainer 7 | from utils.data_loader import DataLoader 8 | 9 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 10 | 11 | 12 | class SGCNTrainer(GNNTrainer): 13 | def __init__(self, train_data, city, tester): 14 | super().__init__(train_data, city, tester) 15 | self.model = SGCN( 16 | max_number=self.max_number, 17 | hidden_dim=50, 18 | label_num=len(self.edge_labels), 19 | gcn_layer=3, 20 | dropout=0., 21 | ).to(device) 22 | 23 | def save_model(self, best_model): 24 | obj = { 25 | 'max_number': self.max_number, 26 | 'hidden_dim': self.model.hidden_dim, 27 | 'label_num': self.model.label_num, 28 | 'gcn_layer': self.model.gcn_layer, 29 | 'city': self.city, 30 | 'sgcn': best_model, 31 | } 32 | pickle.dump(obj, open(data_dir + 'data_2020715/spectral-gcn/models/' + 33 | self.city + '_distmult.pkl', 'wb')) 34 | 35 | 36 | if __name__ == "__main__": 37 | data_dir = 'E:/python-workspace/CityRoadPrediction/' 38 | train = DataLoader(data_dir + 'data_2020715/train/') 39 | test = DataLoader(data_dir + 'data_2020715/test/') 40 | 41 | cities = set(train.cities) & set(test.cities) 42 | cities = sorted(list(cities)) 43 | for city in cities: 44 | print(city) 45 | train.initialize() 46 | train.load_dir_datas(city) 47 | test.initialize() 48 | test.load_dir_datas(city) 49 | tester = GNNTester(test_data=test, city=city) 50 | trainer = SGCNTrainer(train, city, tester) 51 | trainer.train_model(result_dir='E:/python-workspace/CityRoadPrediction/data_2020715/spectral-gcn/result/') 52 | -------------------------------------------------------------------------------- /intra-city-network-homogeneity/trainer/struc2vec_trainer.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | from models.struc2vec import Struc2Vec 4 | from tester.vec_tester import VecTester 5 | from trainer.vec_trainer import VecTrainer 6 | from utils.data_loader import DataLoader 7 | 8 | 9 | class Struc2VecTrainer(VecTrainer): 10 | def __init__(self, embed_dim, train_data, city, tester): 11 | super().__init__(embed_dim, train_data, city, tester) 12 | self.vec_model = Struc2Vec(num_walks=200) 13 | 14 | def save_model(self, model): 15 | obj = { 16 | 'embed_dim': self.embed_dim, 17 | 'city': self.city, 18 | 'distmult': model, 19 | } 20 | pickle.dump(obj, open(data_dir + 'data_20200610/struc2vec/models/' + 21 | self.city + '_distmult.pkl', 'wb')) 22 | 23 | 24 | if __name__ == "__main__": 25 | data_dir = 'E:/python-workspace/CityRoadPrediction/' 26 | train = DataLoader(data_dir + 'data_20200610/train/') 27 | test = DataLoader(data_dir + 'data_20200610/test/') 28 | 29 | cities = set(train.cities) & set(test.cities) 30 | cities = sorted(list(cities)) 31 | for city in cities: 32 | if city < 'Stockton': 33 | continue 34 | print(city) 35 | train.initialize() 36 | train.load_dir_datas(city) 37 | test.initialize() 38 | test.load_dir_datas(city) 39 | tester = VecTester(embed_dim=50, test_data=test, city=city, data_dir=data_dir + 'data_20200610/struc2vec/') 40 | trainer = Struc2VecTrainer(embed_dim=50, train_data=train, city=city, tester=tester) 41 | #trainer.prepare_train_embedding(data_dir + 'data_20200610/struc2vec/') 42 | trainer.train_distmult(data_dir=data_dir + 'data_20200610/struc2vec/', 43 | result_dir=data_dir + 'data_20200610/struc2vec/result/') 44 | -------------------------------------------------------------------------------- /intra-city-network-homogeneity/trainer/vec_trainer.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import pickle 3 | import random 4 | import numpy as np 5 | 6 | import torch 7 | import torch.nn as nn 8 | 9 | from models.distmult import DistMult 10 | from utils.data_loader import DataLoader 11 | 12 | 13 | class VecTrainer(): 14 | def __init__(self, embed_dim, train_data, city, tester): 15 | self.embed_dim = embed_dim 16 | self.train_loader = train_data 17 | self.tester = tester 18 | self.city = city 19 | self.distmult = DistMult(embed_dim) 20 | self.embedding = [] 21 | self.vec_model = None 22 | 23 | def prepare_train_embedding(self, data_dir): 24 | data = copy.deepcopy(self.train_loader.data[self.city]) 25 | data.update(self.tester.test_loader.data[self.city]) 26 | keys = sorted(list(data.keys())) 27 | embeds = {} 28 | for i in range(0, len(keys), 40): 29 | print(self.city, i, len(keys)) 30 | nodes, edges = [], [] 31 | for index in keys[i: i + 40]: 32 | nodes += data[index]['nodes'] 33 | edges += data[index]['source_edges'] 34 | G = DataLoader.build_graph(nodes, edges) 35 | self.vec_model.build_model(G) 36 | embeds.update(self.vec_model.train(embed_size=self.embed_dim)) 37 | for index in self.train_loader.data[self.city]: 38 | positive, negative = [], [] 39 | sample = self.train_loader.data[self.city][index] 40 | for i, n1 in enumerate(sample['nodes']): 41 | for j, n2 in enumerate(sample['nodes'][i + 1:]): 42 | if {'start': n1['osmid'], 'end': n2['osmid']} in sample['target_edges'] or \ 43 | {'start': n2['osmid'], 'end': n1['osmid']} in sample['target_edges']: 44 | positive.append([n1['osmid'], n2['osmid'], 1]) 45 | elif {'start': n1['osmid'], 'end': n2['osmid']} not in sample['source_edges'] and \ 46 | {'start': n2['osmid'], 'end': n1['osmid']} not in sample['source_edges']: 47 | negative.append([n1['osmid'], n2['osmid'], 0]) 48 | samples = positive + negative 49 | for (start, end, target) in samples: 50 | self.embedding.append({ 51 | 'start_id': str(start), 52 | 'end_id': str(end), 53 | 'start_embedding': embeds[str(start)] if str(start) in embeds else np.zeros(self.embed_dim), 54 | 'end_embedding': embeds[str(end)] if str(end) in embeds else np.zeros(self.embed_dim), 55 | 'target': target, 56 | }) 57 | pickle.dump(self.embedding, 58 | open(data_dir + 'train/' + self.city + '_embedding.pkl', 'wb')) 59 | 60 | test_embedding = {} 61 | for index in self.tester.test_loader.data[self.city]: 62 | positive, negative = [], [] 63 | sample = self.tester.test_loader.data[self.city][index] 64 | for i, n1 in enumerate(sample['nodes']): 65 | for j, n2 in enumerate(sample['nodes'][i + 1:]): 66 | if {'start': n1['osmid'], 'end': n2['osmid']} in sample['target_edges'] or \ 67 | {'start': n2['osmid'], 'end': n1['osmid']} in sample['target_edges']: 68 | positive.append([n1['osmid'], n2['osmid'], 1]) 69 | elif {'start': n1['osmid'], 'end': n2['osmid']} not in sample['source_edges'] and \ 70 | {'start': n2['osmid'], 'end': n1['osmid']} not in sample['source_edges']: 71 | negative.append([n1['osmid'], n2['osmid'], 0]) 72 | samples = positive + negative 73 | test_embedding[index] = [] 74 | for (start, end, target) in samples: 75 | test_embedding[index].append({ 76 | 'start_id': str(start), 77 | 'end_id': str(end), 78 | 'start_embedding': embeds[str(start)] if str(start) in embeds else np.zeros(self.embed_dim), 79 | 'end_embedding': embeds[str(end)] if str(end) in embeds else np.zeros(self.embed_dim), 80 | 'target': target, 81 | }) 82 | print(self.city, len(self.embedding), len(test_embedding)) 83 | pickle.dump(test_embedding, 84 | open(data_dir + 'test/' + self.city + '_embedding.pkl', 'wb')) 85 | 86 | def train_distmult(self, batch_size=128, epochs=7, data_dir=None, result_dir=None): 87 | samples = pickle.load(open(data_dir + 'train/' + self.city + '_embedding.pkl', 'rb')) 88 | test_data = [] 89 | for k, v in self.tester.embedding.items(): 90 | test_data += [str(s['start_id']) + '_' + str(s['end_id']) for s in v] + \ 91 | [str(s['end_id']) + '_' + str(s['start_id']) for s in v] 92 | test_data = set(test_data) 93 | 94 | positive = [s for s in samples if s['target'] == 1 and 95 | str(s['start_id']) + '_' + str(s['end_id']) not in test_data] 96 | negative = [s for s in samples if s['target'] == 0 and 97 | str(s['start_id']) + '_' + str(s['end_id']) not in test_data] 98 | #positive = [s for s in samples if s['target'] == 1] 99 | #negative = [s for s in samples if s['target'] == 0] 100 | self.embedding = positive + negative 101 | for _ in range(1, int(len(negative) / len(positive) / 4)): 102 | self.embedding += positive 103 | print('train data:', len(self.embedding)) 104 | 105 | optimizer = torch.optim.SGD(self.distmult.parameters(), lr=0.01, momentum=0.9) 106 | loss_fct = nn.NLLLoss() 107 | best_model = None 108 | best_f1 = -1. 109 | for epoch in range(epochs): 110 | self.distmult.train() 111 | random.shuffle(self.embedding) 112 | epoch_loss = 0 113 | right, wrong, total = 0, 0, 0 114 | for i in range(0, len(self.embedding), batch_size): 115 | starts = [torch.Tensor(e['start_embedding'].tolist()).unsqueeze(0) 116 | for e in self.embedding[i: i + batch_size]] 117 | ends = [torch.Tensor(e['end_embedding'].tolist()).unsqueeze(0) 118 | for e in self.embedding[i: i + batch_size]] 119 | targets = torch.LongTensor([e['target'] for e in self.embedding[i: i + batch_size]]) 120 | starts = torch.cat(starts, dim=0) 121 | ends = torch.cat(ends, dim=0) 122 | 123 | output = self.distmult(starts, ends) 124 | 125 | optimizer.zero_grad() 126 | loss = loss_fct(output, targets) 127 | loss.backward() 128 | optimizer.step() 129 | epoch_loss += loss.item() 130 | 131 | output = output.to('cpu') 132 | index = (targets == 1).nonzero().squeeze(-1) 133 | right += (output.index_select(0, index)[:, 1] > 134 | float(torch.log(torch.Tensor([0.5])))).nonzero().size(0) 135 | index = (targets == 0).nonzero().squeeze(-1) 136 | wrong += (output.index_select(0, index)[:, 1] > 137 | float(torch.log(torch.Tensor([0.5])))).nonzero().size(0) 138 | total += (targets == 1).nonzero().size(0) 139 | 140 | precision = right / (right + wrong + 1e-9) 141 | recall = right / (total + 1e-9) 142 | f1 = 2 * recall * precision / (recall + precision + 1e-9) 143 | print('epoch: {}, loss: {}, right: {}, wrong: {}, precision: {}, recall: {}, f1: {}'.format( 144 | epoch + 1, round(epoch_loss, 4), right, wrong, round(precision, 4), round(recall, 4), round(f1, 4) 145 | )) 146 | 147 | right, wrong, total, precision, recall, f1 = self.tester.test(self.distmult, result_dir) 148 | print('test, right: {}, wrong: {}, total:{}, precision: {}, recall: {}, f1: {}'.format( 149 | right, wrong, total, round(precision, 4), round(recall, 4), round(f1, 4) 150 | )) 151 | if f1 > best_f1 and epoch >= 3: 152 | best_f1 = f1 153 | best_model = copy.deepcopy(self.distmult) 154 | self.save_model(best_model) 155 | right, wrong, total, precision, recall, f1 = self.tester.test(best_model, result_dir) 156 | print('final f1:', f1) 157 | 158 | def save_distmult(self, model): 159 | raise NotImplementedError 160 | 161 | -------------------------------------------------------------------------------- /intra-city-network-homogeneity/utils/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /intra-city-network-homogeneity/utils/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiang719/road-network-predictability/35a83b7053fb7e220918e32232206fa7c20cb2a6/intra-city-network-homogeneity/utils/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /intra-city-network-homogeneity/utils/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiang719/road-network-predictability/35a83b7053fb7e220918e32232206fa7c20cb2a6/intra-city-network-homogeneity/utils/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /intra-city-network-homogeneity/utils/__pycache__/data_loader.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiang719/road-network-predictability/35a83b7053fb7e220918e32232206fa7c20cb2a6/intra-city-network-homogeneity/utils/__pycache__/data_loader.cpython-37.pyc -------------------------------------------------------------------------------- /intra-city-network-homogeneity/utils/__pycache__/data_loader.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiang719/road-network-predictability/35a83b7053fb7e220918e32232206fa7c20cb2a6/intra-city-network-homogeneity/utils/__pycache__/data_loader.cpython-38.pyc -------------------------------------------------------------------------------- /intra-city-network-homogeneity/utils/analyze.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import json 3 | import os 4 | import math 5 | import pickle 6 | import numpy as np 7 | import matplotlib.pyplot as plt 8 | from sklearn import metrics 9 | 10 | from tester.vec_tester import is_valid 11 | from utils.data_loader import DataLoader 12 | 13 | 14 | def load_city_result(city, model, data_dir): 15 | return pickle.load(open(data_dir + model + '/result/' + city + '_result.pkl', 'rb')) 16 | 17 | 18 | def load_model_result(model, data_dir): 19 | files = os.listdir(data_dir + model + '/result/') 20 | result = {} 21 | for file in files: 22 | city = file.split('_')[0].strip() 23 | #if city not in ['Guangzhou']: 24 | # continue 25 | result[city] = load_city_result(city, model, data_dir) 26 | return result 27 | 28 | 29 | def roc(models): 30 | for i, model in enumerate(models): 31 | print(model) 32 | result = load_model_result(model.lower(), data_dir) 33 | y = [] 34 | for city in result: 35 | for index, v in result[city].items(): 36 | for sample in v: 37 | y.append({ 38 | 'score': sample['score'], 39 | 'target': int(sample['target']) 40 | }) 41 | del result 42 | y = sorted(y, key=lambda e: e['score'], reverse=True) 43 | y_score, y_label = [_['score'] for _ in y], [_['target'] for _ in y] 44 | print(len(y_score)) 45 | fpr, tpr, thresholds = metrics.roc_curve(y_label, y_score, pos_label=1) 46 | roc = metrics.roc_auc_score(y_label, y_score) 47 | 48 | plt.plot(fpr, tpr, label=model + ': ' + str(round(roc, 3))) 49 | plt.legend() 50 | plt.xlabel('False positive rate') 51 | plt.ylabel('True positive rate') 52 | plt.title('ROC curve on Top10 cities') 53 | plt.show() 54 | 55 | 56 | def precision_recall(models): 57 | def metrics(Y): 58 | positive = sum([y['target'] for y in Y]) 59 | 60 | thresholds = np.linspace(1, 1e-9, 1000) 61 | precision, recall = [], [] 62 | index = 0 63 | right, wrong = 0, 0 64 | #best_f1, best_threshold = 0., 0. 65 | for _, th in enumerate(thresholds): 66 | for i in range(index, len(Y)): 67 | if Y[i]['score'] < math.log(th): 68 | index = i 69 | break 70 | if Y[i]['target'] == 1: 71 | right += 1 72 | else: 73 | wrong += 1 74 | p = 1.0 * right / (right + wrong + 1e-9) 75 | r = 1.0 * right / positive 76 | precision.append(p) 77 | recall.append(r) 78 | #f1 = 2 * p * r / (p + r + 1e-9) 79 | #if f1 > best_f1: 80 | # best_f1 = f1 81 | # best_threshold = th 82 | 83 | pr_sort = {r: p for p, r in zip(precision, recall)} 84 | pr_sort.pop(0) 85 | pr_sort = [[p, r] for r, p in pr_sort.items()] 86 | pr_sort.sort(key=lambda e: e[1]) 87 | precision, recall = [r[0] for r in pr_sort], [r[1] for r in pr_sort] 88 | return precision, recall 89 | 90 | for i, model in enumerate(models): 91 | print(model) 92 | result = load_model_result(model.lower(), data_dir) 93 | y = [] 94 | for city in result: 95 | for index, v in result[city].items(): 96 | for sample in v: 97 | y.append({ 98 | 'score': sample['score'], 99 | 'target': int(sample['target']) 100 | }) 101 | del result 102 | y = sorted(y, key=lambda e: e['score'], reverse=True) 103 | precision, recall = metrics(y) 104 | print(len(y)) 105 | plt.plot(recall, precision, label=model) 106 | plt.legend() 107 | plt.xlabel('Recall') 108 | plt.ylabel('Precision') 109 | plt.title('Precision-Recall curve on Top10 cities') 110 | plt.show() 111 | 112 | 113 | def best_threshold(model): 114 | def metrics(Y): 115 | positive = sum([y['target'] for y in Y]) 116 | 117 | thresholds = [0.5, 0.45, 0.4, 0.35, 0.3, 0.25] 118 | index = 0 119 | right, wrong = 0, 0 120 | existed_edges = {ids: test[ids]['source_edges'] for ids in test.ids} 121 | id2node = {node['osmid']: node for ids in test.ids for node in test[ids]['nodes']} 122 | best_f1, best_th = 0, 0 123 | for _, th in enumerate(thresholds): 124 | for i in range(index, len(Y)): 125 | if Y[i]['score'] < math.log(th): 126 | index = i 127 | break 128 | if is_valid({'start': Y[i]['start'], 'end': Y[i]['end']}, existed_edges[Y[i]['id']], id2node): 129 | existed_edges[Y[i]['id']].append({'start': Y[i]['start'], 'end': Y[i]['end']}) 130 | if Y[i]['target'] == 1: 131 | right += 1 132 | else: 133 | wrong += 1 134 | p = 1.0 * right / (right + wrong + 1e-9) 135 | r = 1.0 * right / positive 136 | f1 = 2 * p * r / (p + r + 1e-9) 137 | if best_f1 < f1: 138 | best_f1 = f1 139 | best_th = th 140 | print(p, r, best_f1, best_th) 141 | return best_f1, best_th 142 | 143 | test = DataLoader('E:/python-workspace/CityRoadPrediction/data_20200610/test/') 144 | test.load_all_datas() 145 | result = load_model_result(model.lower(), data_dir) 146 | y = [] 147 | for city in result: 148 | for index, v in result[city].items(): 149 | for sample in v: 150 | y.append({ 151 | 'id': index, 152 | 'start': sample['start'], 153 | 'end': sample['end'], 154 | 'score': sample['score'], 155 | 'target': int(sample['target']) 156 | }) 157 | del result 158 | y = sorted(y, key=lambda e: e['score'], reverse=True) 159 | f1, th = metrics(y) 160 | print(f1, th) 161 | 162 | 163 | def predict(model): 164 | def metrics(Y, ids): 165 | positive = sum([y['target'] for y in Y]) 166 | 167 | if city in ['Hongkong', 'Guangzhou', 'Singapore']: 168 | thresholds = 0.2 169 | elif city in ['Beijing', 'Shanghai', 'Shenzhen']: 170 | thresholds = 0.45 171 | else: 172 | thresholds = 0.6 173 | 174 | right, wrong = 0, 0 175 | existed_edges = test[ids]['source_edges'] 176 | id2node = {node['osmid']: node for node in test[ids]['nodes']} 177 | new_Y = [] 178 | for i in range(len(Y)): 179 | y = copy.deepcopy(Y[i]) 180 | if Y[i]['score'] > math.log(thresholds): 181 | if is_valid({'start': Y[i]['start'], 'end': Y[i]['end']}, existed_edges, id2node): 182 | existed_edges.append({'start': Y[i]['start'], 'end': Y[i]['end']}) 183 | y['predict'] = 1 184 | if Y[i]['target'] == 1: 185 | right += 1 186 | else: 187 | wrong += 1 188 | else: 189 | y['predict'] = 0 190 | else: 191 | y['predict'] = 0 192 | y.pop('id') 193 | new_Y.append(y) 194 | p = 1.0 * right / (right + wrong + 1e-9) 195 | r = 1.0 * right / positive 196 | f1 = 2 * p * r / (p + r + 1e-9) 197 | print(index, p, r, f1) 198 | return right, wrong, positive, new_Y 199 | 200 | test = DataLoader('E:/python-workspace/CityRoadPrediction/data_2020715/test/') 201 | test.load_all_datas() 202 | result = load_model_result(model.lower(), data_dir) 203 | right, wrong, total = 0, 0, 0 204 | for city in result: 205 | new_result = {} 206 | r_, w_, t_ = 0, 0, 0 207 | for index, v in result[city].items(): 208 | y = [] 209 | for sample in v: 210 | y.append({ 211 | 'id': index, 212 | 'start': sample['start'], 213 | 'end': sample['end'], 214 | 'score': sample['score'], 215 | 'target': int(sample['target']) 216 | }) 217 | y = sorted(y, key=lambda e: e['score'], reverse=True) 218 | r, w, t, y = metrics(y, index) 219 | r_ += r 220 | w_ += w 221 | t_ += t 222 | new_result[index] = y 223 | p = 1.0 * r_ / (r_ + w_ + 1e-9) 224 | r = 1.0 * r_ / t_ 225 | f1 = 2 * p * r / (p + r + 1e-9) 226 | print(city, r_, w_, t_, p, r, f1) 227 | right += r_ 228 | wrong += w_ 229 | total += t_ 230 | json.dump(new_result, open(data_dir + 'relational-gcn/final/Relational-GCN-' + city + '-result.json', 'w'), indent=2) 231 | p = 1.0 * right / (right + wrong + 1e-9) 232 | r = 1.0 * right / total 233 | f1 = 2 * p * r / (p + r + 1e-9) 234 | print(p, r, f1) 235 | 236 | 237 | if __name__ == "__main__": 238 | data_dir = 'E:/python-workspace/CityRoadPrediction/data_2020715/' 239 | models = ['Relational-GCN'] 240 | 241 | #roc(models) 242 | #precision_recall(models) 243 | #best_threshold('Relational-GCN') 244 | #predict('Relational-GCN') 245 | -------------------------------------------------------------------------------- /intra-city-network-homogeneity/utils/data_loader.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import json 4 | import numpy as np 5 | import networkx as nx 6 | 7 | 8 | class DataLoader(): 9 | def __init__(self, data_dir): 10 | self.data_dir = data_dir 11 | self.data = {} 12 | self.ids = [] 13 | self.cities = set() 14 | self.samples = {} 15 | self.load_cities() 16 | 17 | def load_cities(self): 18 | files = os.listdir(self.data_dir) 19 | for file in files: 20 | name = file[:-5] 21 | number = re.findall('[0-9]+', name)[0] 22 | city, attr = name.split(number) 23 | if city not in self.cities: 24 | self.cities.add(city) 25 | if city not in self.samples: 26 | self.samples[city] = set() 27 | self.samples[city].add(number) 28 | self.cities = list(self.cities) 29 | for city in self.samples: 30 | self.samples[city] = list(self.samples[city]) 31 | 32 | def load_all_datas(self): 33 | for city in self.cities: 34 | self.load_dir_datas(city) 35 | 36 | def initialize(self): 37 | self.data = {} 38 | self.ids = [] 39 | 40 | def load_one_sample(self, sample): 41 | name = sample[:-5] 42 | number = re.findall('[0-9]+', name)[0] 43 | city, attr = name.split(number) 44 | id1 = city + '_' + number + '_1' 45 | id2 = city + '_' + number + '_2' 46 | if city not in self.data: 47 | self.data[city] = {} 48 | if id1 not in self.data[city]: 49 | self.data[city][id1] = {'id': id1, 'nodes': [], 'source_edges': [], 'target_edges': []} 50 | self.data[city][id2] = {'id': id2, 'nodes': [], 'source_edges': [], 'target_edges': []} 51 | self.ids += [id1, id2] 52 | data = json.load(open(self.data_dir + sample, 'r')) 53 | if attr == 'nodes': 54 | self.data[city][id1]['nodes'] = data 55 | self.data[city][id2]['nodes'] = data 56 | else: 57 | for edge in data: 58 | if edge['inSample1'] == 1: 59 | self.data[city][id1]['source_edges'].append({ 60 | 'start': edge['start'], 61 | 'end': edge['end'], 62 | }) 63 | if edge['inSample1'] == 0: 64 | self.data[city][id1]['target_edges'].append({ 65 | 'start': edge['start'], 66 | 'end': edge['end'], 67 | }) 68 | if edge['inSample2'] == 1: 69 | self.data[city][id2]['source_edges'].append({ 70 | 'start': edge['start'], 71 | 'end': edge['end'], 72 | }) 73 | if edge['inSample2'] == 0: 74 | self.data[city][id2]['target_edges'].append({ 75 | 'start': edge['start'], 76 | 'end': edge['end'], 77 | }) 78 | 79 | def load_dir_datas(self, cityname): 80 | files = os.listdir(self.data_dir) 81 | for file in files: 82 | name = file[:-5] 83 | number = re.findall('[0-9]+', name)[0] 84 | city, attr = name.split(number) 85 | if city != cityname: 86 | continue 87 | id1 = city + '_' + number + '_1' 88 | id2 = city + '_' + number + '_2' 89 | if city not in self.data: 90 | self.data[city] = {} 91 | if id1 not in self.data[city]: 92 | self.data[city][id1] = {'id': id1, 'nodes': [], 'source_edges': [], 'target_edges': []} 93 | self.data[city][id2] = {'id': id2, 'nodes': [], 'source_edges': [], 'target_edges': []} 94 | self.ids += [id1, id2] 95 | 96 | data = json.load(open(self.data_dir + file, 'r')) 97 | if attr == 'nodes': 98 | self.data[city][id1]['nodes'] = data 99 | self.data[city][id2]['nodes'] = data 100 | else: 101 | for edge in data: 102 | if edge['inSample1'] == 1: 103 | self.data[city][id1]['source_edges'].append({ 104 | 'start': edge['start'], 105 | 'end': edge['end'], 106 | }) 107 | if edge['inSample1'] == 0: 108 | self.data[city][id1]['target_edges'].append({ 109 | 'start': edge['start'], 110 | 'end': edge['end'], 111 | }) 112 | if edge['inSample2'] == 1: 113 | self.data[city][id2]['source_edges'].append({ 114 | 'start': edge['start'], 115 | 'end': edge['end'], 116 | }) 117 | if edge['inSample2'] == 0: 118 | self.data[city][id2]['target_edges'].append({ 119 | 'start': edge['start'], 120 | 'end': edge['end'], 121 | }) 122 | 123 | def __len__(self): 124 | return len(self.ids) 125 | 126 | def __getitem__(self, index): 127 | if type(index) == int: 128 | index = self.ids[index] 129 | city, number, idx = index.split('_') 130 | return self.data[city][index] 131 | 132 | @staticmethod 133 | def build_graph(nodes, edges): 134 | ids = [str(n['osmid']) for n in nodes] 135 | edges = [(str(e['start']), str(e['end'])) for e in edges] + [(str(e['end']), str(e['start'])) for e in edges] 136 | graph = nx.DiGraph(np.array([[0] * len(nodes)] * len(nodes))) 137 | 138 | mapping = {i: ids[i] for i in range(len(ids))} 139 | graph = nx.relabel_nodes(graph, mapping) 140 | graph.add_edges_from(edges) 141 | return graph 142 | 143 | def build_source_graph(self, index): 144 | data = self[index] 145 | return DataLoader.build_graph(data['nodes'], data['source_edges']) 146 | 147 | def build_full_graph(self, index): 148 | data = self[index] 149 | return DataLoader.build_graph(data['nodes'], data['source_edges'] + data['target_edges']) 150 | 151 | 152 | if __name__ == "__main__": 153 | train = DataLoader('E:/python-workspace/CityRoadPrediction/data_20200610/train/') 154 | test = DataLoader('E:/python-workspace/CityRoadPrediction/data_20200610/test/') 155 | print('city num:', len(train.data)) 156 | for k in train.data: 157 | if k not in test.data: 158 | print(k, len(train.data[k])) 159 | else: 160 | print(k, len(train.data[k]), len(test.data[k])) 161 | 162 | -------------------------------------------------------------------------------- /main-figure/001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiang719/road-network-predictability/35a83b7053fb7e220918e32232206fa7c20cb2a6/main-figure/001.png -------------------------------------------------------------------------------- /main-figure/002.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiang719/road-network-predictability/35a83b7053fb7e220918e32232206fa7c20cb2a6/main-figure/002.png -------------------------------------------------------------------------------- /main-figure/003.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiang719/road-network-predictability/35a83b7053fb7e220918e32232206fa7c20cb2a6/main-figure/003.png -------------------------------------------------------------------------------- /main-figure/004.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiang719/road-network-predictability/35a83b7053fb7e220918e32232206fa7c20cb2a6/main-figure/004.png -------------------------------------------------------------------------------- /main-figure/004_part.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiang719/road-network-predictability/35a83b7053fb7e220918e32232206fa7c20cb2a6/main-figure/004_part.png -------------------------------------------------------------------------------- /main-figure/005.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiang719/road-network-predictability/35a83b7053fb7e220918e32232206fa7c20cb2a6/main-figure/005.png -------------------------------------------------------------------------------- /road-classification/README.md: -------------------------------------------------------------------------------- 1 | # Road network classification 2 | 3 | This section is the implementation of road network classification and predictability analysis. We aggregate 11 measures to extract the topology features from a road network. These measures consist of the node degree distribution, road circuity and dendricity. Based on these measures, we study the relation between 4 | 5 | * Network predictability vs. network types 6 | * Network predictability vs. principle component of aggregated measures 7 | 8 | ## Feature extraction 9 | * Extract the features of the road networks from training set, 10 | ``` 11 | python measures.py --mode train 12 | ``` 13 | * Extract the features of the road networks from testing set, 14 | ``` 15 | python measures.py --mode test 16 | ``` 17 | 18 | ## Visualization 19 | 20 | * To get F1 value VS cities, you can run the command 21 | ``` 22 | python kmean_pca_analysis.py --mode f1_vs_city 23 | ``` 24 | * We classify the road networks into 4 types using k-mean clustering, and visualize them via PCA. The command is followed, 25 | ``` 26 | python kmean_pca_analysis.py --mode pca_visualize 27 | ``` 28 | * To get the center point of each road type, you can run the command 29 | ``` 30 | python kmean_pca_analysis.py --mode center 31 | ``` 32 | * To get the road type distribution in a city, you can run the command 33 | ``` 34 | python kmean_pca_analysis.py --mode city_ratio 35 | ``` 36 | * To get the f1 value VS road network types, you can run the command 37 | ``` 38 | python kmean_pca_analysis.py --mode f1_vs_type 39 | ``` 40 | * To get the f1 value VS PC1, you can run the command 41 | ``` 42 | python kmean_pca_analysis.py --mode f1_vs_PCA1 43 | ``` 44 | -------------------------------------------------------------------------------- /road-classification/kmean_pca_analysis.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import json 3 | from sklearn.decomposition import PCA 4 | import matplotlib.pyplot as plt 5 | import networkx as nx 6 | import math 7 | import glob 8 | from sklearn.cluster import KMeans 9 | import os 10 | from scipy.spatial.distance import cdist 11 | from sklearn.metrics import f1_score 12 | from sklearn import linear_model 13 | import scipy 14 | 15 | font1 = {'family' : 'Arial', 16 | 'weight' : 'normal', 17 | 'size' : 23, 18 | } 19 | font2 = {'family' : 'Arial', 20 | 'weight' : 'normal', 21 | 'size' : 30, 22 | } 23 | 24 | def nodes_to_list(nodes): 25 | new_nodes = [] 26 | for n in nodes: 27 | new_nodes.append([n['osmid'],n['lon'],n['lat']]) 28 | return new_nodes 29 | 30 | def edges_to_dict(edges, sample=1): 31 | old_edges = {} 32 | for e in edges: 33 | if sample == 1: 34 | if e['start'] not in old_edges: 35 | old_edges[e['start']] = [] 36 | old_edges[e['start']].append(e['end']) 37 | if sample == 2: 38 | if e['start'] not in old_edges: 39 | old_edges[e['start']] = [] 40 | old_edges[e['start']].append(e['end']) 41 | return old_edges 42 | 43 | def load_graph(file_name, sample=1): 44 | nodes = json.load(open('../codeJiaweiXue_2020715_dataCollection/train/'+file_name+'nodes.json', 'r')) 45 | edges = json.load(open('../codeJiaweiXue_2020715_dataCollection/train/'+file_name+'edges.json', 'r')) 46 | old_edges = edges_to_dict(edges, sample=sample) 47 | return nodes, old_edges 48 | 49 | def visualization(nodeInfor, predictEdges, oldEdges, newEdges, city_name, cluster, rank, title): 50 | # step0: get the information 51 | nodeId = [nodeInfor[i][0] for i in range(len(nodeInfor))] 52 | longitude = [nodeInfor[i][1] for i in range(len(nodeInfor))] 53 | latitude = [nodeInfor[i][2] for i in range(len(nodeInfor))] 54 | 55 | # step1: generate the graph 56 | n = len(nodeId) 57 | A1 = np.array([[0] * n] * n) 58 | Graph1 = nx.Graph(A1) 59 | 60 | # step 2: label 61 | column = [str(nodeId[i]) for i in range(n)] 62 | mapping = {0: str(nodeId[0])} 63 | for i in range(0, len(column) - 1): 64 | mapping.setdefault(i + 1, column[i + 1]) 65 | Graph1 = nx.relabel_nodes(Graph1, mapping) 66 | 67 | # step3: geolocation 68 | POS = list() 69 | for i in range(0, n): 70 | POS.append((float(longitude[i]), float(latitude[i]))) 71 | for i in range(0, n): 72 | Graph1.nodes[column[i]]['pos'] = POS[i] 73 | 74 | num = 0 75 | # step 4: add edge 76 | for start in oldEdges: 77 | for end in oldEdges[start]: 78 | num = num + 1 79 | Graph1.add_edge(str(start), str(end), color='black', weight=1) 80 | # print('old num', num) 81 | for start in newEdges: 82 | for end in newEdges[start]: 83 | if (not (start in predictEdges and end in predictEdges[start])) and \ 84 | (not (end in predictEdges and start in predictEdges[end])): 85 | Graph1.add_edge(str(start), str(end), color='blue', weight=2) 86 | for start in predictEdges: 87 | for end in predictEdges[start]: 88 | if (start in newEdges and end in newEdges[start]) or \ 89 | (end in newEdges and start in newEdges[end]): 90 | Graph1.add_edge(str(start), str(end), color='green', weight=5) 91 | else: 92 | Graph1.add_edge(str(start), str(end), color='red', weight=2) 93 | 94 | edges = Graph1.edges() 95 | colors = [Graph1[u][v]['color'] for u, v in edges] 96 | weights = [Graph1[u][v]['weight'] for u, v in edges] 97 | # print(nx.cycle_basis(Graph1)) 98 | plt.figure(1, figsize=(6, 6)) 99 | if title: 100 | if rank>=0: 101 | plt.title('city: {} cluster: {} rank: {}'.format(city_name, cluster, rank)) 102 | else: 103 | plt.title('city: {} cluster: {}'.format(city_name, cluster)) 104 | 105 | nx.draw(Graph1, nx.get_node_attributes(Graph1, 'pos'), edge_color=colors, width=weights, node_size=10)#, with_labels = True) 106 | plt.show() 107 | if not os.path.exists('figures/'+str(cluster)): 108 | os.mkdir('figures/'+str(cluster)+'/') 109 | # if title: 110 | # plt.savefig('figures/{}/cluster_{}_'.format(cluster, cluster)+city_name+'.png') 111 | # else: 112 | # plt.savefig('figures/{}/rank_{}_cluster_{}_'.format(cluster, rank, cluster) + city_name + '.png') 113 | # plt.clf() 114 | 115 | def visualize(city_name, cluster, rank = -1, title = True): 116 | sample = 1 117 | nodes, old_edges = load_graph(city_name, sample) 118 | visualization(nodes_to_list(nodes), dict(), old_edges, dict(), city_name, cluster, rank = rank, title = title) 119 | 120 | def pca_visualize(k): 121 | with open('results/training_set_index.txt') as json_file: 122 | city_index = json.load(json_file) 123 | data = np.zeros((len(city_index), 11)) 124 | num_2_cityname = {} 125 | for city_num, city in enumerate(city_index): 126 | num_2_cityname[city_num] = city 127 | for idx_num, attribute in enumerate(city_index[city]): 128 | data[city_num, idx_num] = city_index[city][attribute] 129 | print('training data shape: ', data.shape) 130 | 131 | data_mean = np.mean(data, axis=0, keepdims=True) 132 | data_std = np.std(data, axis=0, keepdims=True) 133 | data = (data - data_mean)/data_std 134 | 135 | k = k 136 | 137 | kmeanModel = KMeans(n_clusters=k, random_state = 1) 138 | kmeanModel.fit(data) 139 | 140 | # print(kmeanModel.labels_== 1) 141 | pca = PCA(n_components=6) 142 | newData = pca.fit_transform(data) 143 | print((np.transpose(pca.components_)).shape) 144 | print('PCA component', (np.transpose(pca.components_))) 145 | print('explained variance', pca.explained_variance_) 146 | print('explained variance ratio', pca.explained_variance_ratio_) 147 | 148 | # the index of cluster is ordered by the value of PCA1 149 | change_order = True 150 | if change_order: 151 | SDNi = np.zeros(k) 152 | change_order_mapping = {} 153 | for i in range(k): 154 | SDNi[i] = np.mean(newData[kmeanModel.labels_== i][:,0]) 155 | argsorted_SDNi = np.argsort(SDNi) 156 | for i in range(k): 157 | change_order_mapping[i] = np.where(argsorted_SDNi==i)[0][0] 158 | for i in range(len(kmeanModel.labels_)): 159 | kmeanModel.labels_[i] = change_order_mapping[kmeanModel.labels_[i]] 160 | 161 | cluster_centers_ = np.zeros((k, data.shape[1])) 162 | for i in range(k): 163 | cluster_centers_[i,:] = np.mean(data[kmeanModel.labels_== i], axis = 0, keepdims=True) 164 | 165 | pair_distance = cdist(data, cluster_centers_, 'euclidean') 166 | 167 | fig, ax1 = plt.subplots(nrows=1, ncols=1, figsize=(7, 7))#, dpi = 1200) 168 | plt.plot(newData[kmeanModel.labels_== 0][:,0], newData[kmeanModel.labels_== 0][:,1], 'o', markersize=3, label="Type 1") 169 | plt.plot(newData[kmeanModel.labels_== 1][:,0], newData[kmeanModel.labels_== 1][:,1], 'o', markersize=3, label="Type 2") 170 | plt.plot(newData[kmeanModel.labels_== 2][:,0], newData[kmeanModel.labels_== 2][:,1], 'o', markersize=3, label="Type 3") 171 | plt.plot(newData[kmeanModel.labels_== 3][:,0], newData[kmeanModel.labels_== 3][:,1], 'o', markersize=3, label="Type 4") 172 | 173 | plt.yticks(fontsize=22) 174 | plt.xticks(fontsize=22) 175 | plt.xlabel('Dimension 1', font1) 176 | plt.ylabel('Dimension 2', font1) 177 | plt.legend(loc="best", fontsize=21.3, markerscale=3., labelspacing = 0.2, borderpad = 0.25) 178 | plt.tight_layout() 179 | plt.savefig('figures/pca.png', bbox_inches='tight') 180 | plt.show() 181 | 182 | def city_ratio(k): 183 | with open('results/training_set_index.txt') as json_file: 184 | city_index = json.load(json_file) 185 | data = np.zeros((len(city_index), 11)) 186 | num_2_cityname = {} 187 | for city_num, city in enumerate(city_index): 188 | num_2_cityname[city_num] = city 189 | for idx_num, attribute in enumerate(city_index[city]): 190 | data[city_num, idx_num] = city_index[city][attribute] 191 | print('training data shape: ', data.shape) 192 | 193 | data_mean = np.mean(data, axis=0, keepdims=True) 194 | data_std = np.std(data, axis=0, keepdims=True) 195 | data = (data - data_mean)/data_std 196 | 197 | with open('results/test_set_index.txt') as json_file: 198 | city_index = json.load(json_file) 199 | test_data = np.zeros((len(city_index), 11)) 200 | test_num_2_cityname = {} 201 | for city_num, city in enumerate(city_index): 202 | test_num_2_cityname[city_num] = city 203 | for idx_num, attribute in enumerate(city_index[city]): 204 | test_data[city_num, idx_num] = city_index[city][attribute] 205 | print('testing data shape: ', test_data.shape) 206 | 207 | test_data = (test_data - data_mean) / data_std 208 | 209 | k = k 210 | 211 | kmeanModel = KMeans(n_clusters=k, random_state=1) 212 | kmeanModel.fit(data) 213 | 214 | pca = PCA(n_components=6) 215 | newData = pca.fit_transform(data) 216 | test_newData = np.matmul(test_data, np.transpose(pca.components_)) 217 | 218 | ### the index of cluster is ordered by the increasing order of PCA1 219 | change_order = True 220 | if change_order: 221 | SDNi = np.zeros(k) 222 | change_order_mapping = {} 223 | for i in range(k): 224 | SDNi[i] = np.mean(newData[kmeanModel.labels_ == i][:, 0]) 225 | argsorted_SDNi = np.argsort(SDNi) 226 | for i in range(k): 227 | change_order_mapping[i] = np.where(argsorted_SDNi == i)[0][0] 228 | for i in range(len(kmeanModel.labels_)): 229 | kmeanModel.labels_[i] = change_order_mapping[kmeanModel.labels_[i]] 230 | 231 | ### get the label for the testing data 232 | cluster_centers_ = np.zeros((k, data.shape[1])) 233 | for i in range(k): 234 | cluster_centers_[i, :] = np.mean(data[kmeanModel.labels_ == i], axis=0, keepdims=True) 235 | 236 | pair_distance = cdist(test_data, cluster_centers_, 'euclidean') 237 | test_data_assign_label = np.argmin(pair_distance, axis=1) 238 | 239 | ## read test data f1 value 240 | test_result = json.load(open('results/f1_score_test_result.json', 'r')) 241 | test_result_ = {} 242 | for city in test_result: 243 | for city_name in test_result[city]: 244 | new_city_name = city_name.split('_')[0] + city_name.split('_')[1] + '_' + city_name.split('_')[2] 245 | test_result_[new_city_name] = test_result[city][city_name] 246 | 247 | results_city = {} 248 | 249 | cityName = ['Chicago', 'New york', 'Los angeles', 'Tokyo', 'Berlin', 'Phoenix', 'Paris', 'London', 'Hongkong', 'Singapore'] 250 | for city in cityName: 251 | results_city[city] = [] 252 | 253 | for idx in range(test_data_assign_label.shape[0]): 254 | for city in cityName: 255 | if city in test_num_2_cityname[idx]: 256 | results_city[city].append(test_result_[test_num_2_cityname[idx] + '_1']) 257 | results_city[city].append(test_result_[test_num_2_cityname[idx] + '_2']) 258 | 259 | # we sort the order of visualiztion by its median f1 value 260 | def sortFunc(e): 261 | return np.median(results_city[e]) 262 | cityName.sort(key=sortFunc, reverse=True) 263 | 264 | count = {} 265 | 266 | for city in cityName: 267 | if city not in count: 268 | count[city] = [] 269 | for idx, label in enumerate(test_data_assign_label): 270 | for city in cityName: 271 | if city in test_num_2_cityname[idx]: 272 | count[city].append(label) 273 | 274 | ratio = {} 275 | for city in count: 276 | if city not in ratio: 277 | ratio[city] = np.zeros(k) 278 | for i in range(k): 279 | ratio[city][i] = np.sum(np.array(count[city])==i) 280 | ratio[city] = ratio[city]/np.sum(ratio[city]) 281 | 282 | category_names = ['T ' + str(i + 1) for i in range(6)] 283 | 284 | results = {} 285 | for city in count: 286 | results[city] = ratio[city] 287 | # step 2: figure, label 288 | labels = cityName#list(results.keys()) 289 | data = np.array(list(results.values())) 290 | 291 | ## this is used to control the length of each bar 292 | data_visualize = np.array(list(results.values()))+0.05 293 | data_visualize = data_visualize/np.sum(data_visualize, axis=1, keepdims=True) 294 | data_cum = data_visualize.cumsum(axis=1) 295 | 296 | # category_colors = plt.get_cmap('RdYlGn')(np.linspace(0.1, 1.0, data.shape[1])) 297 | category_colors = ['pink', 'lightblue', 'lightgreen', 'lightyellow', 'lightsalmon'] 298 | fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(8, 7))#, dpi=1200) 299 | 300 | ax.invert_yaxis() 301 | ax.xaxis.set_visible(False) 302 | ax.set_xlim(0, np.sum(data, axis=1).max()) 303 | 304 | for i, (colname, color) in enumerate(zip(category_names, category_colors)): 305 | if i>=k: 306 | break 307 | widths = data_visualize[:, i] 308 | starts = data_cum[:, i] - widths 309 | ax.barh(labels, width = widths, left=starts, height=0.85, label=colname, color=color, edgecolor="black") 310 | xcenters = starts + widths / 2 311 | text_color = 'black' 312 | for y, (x, c) in enumerate(zip(xcenters, data[:, i])): 313 | ax.text(x, y, '{}%'.format(int(round(c*100))), ha='center', va='center', color=text_color, fontsize=12) 314 | ax.legend(ncol=4, bbox_to_anchor=(0, 1), loc='lower left', fontsize=18, labelspacing = 0.1, borderpad = 0.20) 315 | plt.yticks(fontsize=18, rotation=45) 316 | plt.tight_layout() 317 | plt.savefig('figures/city_ratio.png', bbox_inches='tight') 318 | plt.show() 319 | 320 | def f1_vs_network_type(k): 321 | with open('results/training_set_index.txt') as json_file: 322 | city_index = json.load(json_file) 323 | data = np.zeros((len(city_index), 11)) 324 | num_2_cityname = {} 325 | for city_num, city in enumerate(city_index): 326 | num_2_cityname[city_num] = city 327 | for idx_num, attribute in enumerate(city_index[city]): 328 | data[city_num, idx_num] = city_index[city][attribute] 329 | print('training data shape: ', data.shape) 330 | 331 | data_mean = np.mean(data, axis=0, keepdims=True) 332 | data_std = np.std(data, axis=0, keepdims=True) 333 | data = (data - data_mean)/data_std 334 | 335 | with open('results/test_set_index.txt') as json_file: 336 | city_index = json.load(json_file) 337 | test_data = np.zeros((len(city_index), 11)) 338 | test_num_2_cityname = {} 339 | for city_num, city in enumerate(city_index): 340 | test_num_2_cityname[city_num] = city 341 | for idx_num, attribute in enumerate(city_index[city]): 342 | test_data[city_num, idx_num] = city_index[city][attribute] 343 | print('test data shape: ', test_data.shape) 344 | 345 | test_data = (test_data - data_mean)/data_std 346 | 347 | k = k 348 | 349 | kmeanModel = KMeans(n_clusters=k, random_state = 1) 350 | kmeanModel.fit(data) 351 | 352 | pca = PCA(n_components=6) 353 | newData = pca.fit_transform(data) 354 | test_newData = np.matmul(test_data,np.transpose(pca.components_)) 355 | 356 | change_order = True 357 | if change_order: 358 | SDNi = np.zeros(k) 359 | change_order_mapping = {} 360 | for i in range(k): 361 | SDNi[i] = np.mean(newData[kmeanModel.labels_== i][:,0]) 362 | argsorted_SDNi = np.argsort(SDNi) 363 | for i in range(k): 364 | change_order_mapping[i] = np.where(argsorted_SDNi==i)[0][0] 365 | for i in range(len(kmeanModel.labels_)): 366 | kmeanModel.labels_[i] = change_order_mapping[kmeanModel.labels_[i]] 367 | 368 | cluster_centers_ = np.zeros((k, data.shape[1])) 369 | for i in range(k): 370 | cluster_centers_[i,:] = np.mean(data[kmeanModel.labels_== i], axis = 0, keepdims=True) 371 | 372 | pair_distance = cdist(test_data, cluster_centers_, 'euclidean') 373 | test_data_assign_label = np.argmin(pair_distance, axis = 1) 374 | 375 | ## read test data f1 value 376 | test_result = json.load(open('results/f1_score_test_result.json', 'r')) 377 | test_result_ = {} 378 | for city in test_result: 379 | for city_name in test_result[city]: 380 | new_city_name = city_name.split('_')[0]+city_name.split('_')[1]+'_'+city_name.split('_')[2] 381 | test_result_[new_city_name] = test_result[city][city_name] 382 | 383 | results_road_type = {} 384 | for i in range(k): 385 | results_road_type[i] = [] 386 | for idx in range(test_data_assign_label.shape[0]): 387 | if test_num_2_cityname[idx]+'_1' in test_result_: 388 | results_road_type[test_data_assign_label[idx]].append(test_result_[test_num_2_cityname[idx] + '_1']) 389 | results_road_type[test_data_assign_label[idx]].append(test_result_[test_num_2_cityname[idx] + '_2']) 390 | 391 | all_data = [] 392 | for i in range(k): 393 | all_data.append(results_road_type[i]) 394 | cityName = ['Type 1', 'Type 2', 'Type 3', 'Type 4'] 395 | # step 2: figure 396 | fig, ax1 = plt.subplots(nrows=1, ncols=1, figsize=(7, 5))#, dpi=1200) 397 | bplot1 = ax1.boxplot(all_data, 398 | vert=True, # vertical box alignment 399 | patch_artist=True, # fill with color 400 | labels=cityName) # will be used to label x-ticks 401 | # ax1.set_title("F1 scores for different road network types", font1, fontsize=20) 402 | ax1.set_yticks([0, 0.2, 0.4, 0.6, 0.8, 1.0]) 403 | plt.yticks(fontsize=22) 404 | plt.xticks(fontsize=22, rotation=0) 405 | # step 3: color 406 | colors = ['pink', 'lightblue', 'lightgreen', 'lightyellow', 'lightsalmon'] 407 | for patch, color in zip(bplot1['boxes'], colors): 408 | patch.set_facecolor(color) 409 | # step 4: grid 410 | ax1.yaxis.grid(True) 411 | plt.ylabel('F1 score', font1, fontsize=22) 412 | plt.tight_layout() 413 | plt.savefig('figures/citytype_vs_f1.png', bbox_inches='tight') 414 | plt.show() 415 | 416 | def f1_vs_city(): 417 | with open('results/test_set_index.txt') as json_file: 418 | city_index = json.load(json_file) 419 | test_data = np.zeros((len(city_index), 11)) 420 | test_num_2_cityname = {} 421 | for city_num, city in enumerate(city_index): 422 | test_num_2_cityname[city_num] = city 423 | for idx_num, attribute in enumerate(city_index[city]): 424 | test_data[city_num, idx_num] = city_index[city][attribute] 425 | print('test data shape: ', test_data.shape) 426 | 427 | ## read test data f1 value 428 | test_result = json.load(open('results/f1_score_test_result.json', 'r')) 429 | test_result_ = {} 430 | for city in test_result: 431 | for city_name in test_result[city]: 432 | new_city_name = city_name.split('_')[0]+city_name.split('_')[1]+'_'+city_name.split('_')[2] 433 | test_result_[new_city_name] = test_result[city][city_name] 434 | print(len(test_result_)) 435 | results_city = {} 436 | cityName = ['Chicago', 'New york', 'Los angeles', 'Tokyo', 'Berlin', 'Phoenix', 'Paris', 'London', 'Hongkong', 'Singapore'] 437 | for city in cityName: 438 | results_city[city] = [] 439 | 440 | for idx in range(len(city_index)): 441 | for city in cityName: 442 | if city in test_num_2_cityname[idx]: 443 | print(idx, test_num_2_cityname[idx], test_result_[test_num_2_cityname[idx]+'_1'], test_result_[test_num_2_cityname[idx]+'_2']) 444 | results_city[city].append(test_result_[test_num_2_cityname[idx] + '_1']) 445 | results_city[city].append(test_result_[test_num_2_cityname[idx] + '_2']) 446 | 447 | for city in cityName: 448 | print(city, len(results_city[city])/2) 449 | 450 | #### 451 | all_data = [] 452 | for city in cityName: 453 | all_data.append(results_city[city]) 454 | 455 | # # step 2: figure 456 | fig, ax1 = plt.subplots(nrows=1, ncols=1, figsize=(7, 5))#, dpi=1200) 457 | bplot1 = ax1.boxplot(all_data, 458 | vert=True, # vertical box alignment 459 | patch_artist=True, # fill with color 460 | labels=cityName) # will be used to label x-ticks 461 | # ax1.set_title("F1 scores for different cities", font1, fontsize=20) 462 | ax1.set_yticks([0, 0.2, 0.4, 0.6, 0.8, 1.0]) 463 | plt.yticks(fontsize=22) 464 | plt.xticks(fontsize=16, rotation=52) 465 | # step 3: color 466 | colors = ['pink', 'lightblue', 'lightgreen', 'lightyellow', 'lightsalmon', 'pink', 'lightblue', 'lightgreen', 467 | 'lightyellow', 'lightsalmon', ] 468 | for patch, color in zip(bplot1['boxes'], colors): 469 | patch.set_facecolor(color) 470 | # step 4: grid 471 | plt.ylabel('F1 score', font1, fontsize=22) 472 | ax1.yaxis.grid(True) 473 | plt.tight_layout() 474 | plt.savefig('figures/city_vs_f1.png', bbox_inches='tight') 475 | plt.show() 476 | 477 | def f1_vs_PCA1(): 478 | with open('results/training_set_index.txt') as json_file: 479 | city_index = json.load(json_file) 480 | data = np.zeros((len(city_index), 11)) 481 | num_2_cityname = {} 482 | for city_num, city in enumerate(city_index): 483 | num_2_cityname[city_num] = city 484 | for idx_num, attribute in enumerate(city_index[city]): 485 | data[city_num, idx_num] = city_index[city][attribute] 486 | print('data shape: ', data.shape) 487 | 488 | data_mean = np.mean(data, axis=0, keepdims=True) 489 | data_std = np.std(data, axis=0, keepdims=True) 490 | data = (data - data_mean) / data_std 491 | 492 | with open('results/test_set_index.txt') as json_file: 493 | city_index = json.load(json_file) 494 | test_data = np.zeros((len(city_index), 11)) 495 | test_num_2_cityname = {} 496 | for city_num, city in enumerate(city_index): 497 | test_num_2_cityname[city_num] = city 498 | for idx_num, attribute in enumerate(city_index[city]): 499 | test_data[city_num, idx_num] = city_index[city][attribute] 500 | print('test data shape: ', test_data.shape) 501 | 502 | test_data = (test_data - data_mean) / data_std 503 | 504 | pca = PCA(n_components=6) 505 | newData = pca.fit_transform(data) 506 | 507 | test_newData = np.matmul(test_data, np.transpose(pca.components_)) 508 | 509 | test_result = json.load(open('results/f1_score_test_result.json', 'r')) 510 | test_result_ = {} 511 | for city in test_result: 512 | for city_name in test_result[city]: 513 | new_city_name = city_name.split('_')[0] + city_name.split('_')[1] + '_' + city_name.split('_')[2] 514 | test_result_[new_city_name] = test_result[city][city_name] 515 | 516 | f1_score = np.zeros(test_newData.shape[0]) 517 | for i in range(test_newData.shape[0]): 518 | f1_score[i] = (test_result_[test_num_2_cityname[i] + '_1'] + test_result_[test_num_2_cityname[i] + '_2']) / 2 519 | 520 | fig, ax1 = plt.subplots(nrows=1, ncols=1, figsize=(7, 7))#, dpi = 1200) 521 | plt.scatter(test_newData[:, 0], f1_score, s=3, label='Road network') 522 | 523 | # Create linear regression object 524 | regr = linear_model.LinearRegression() 525 | # Train the model using the training sets 526 | regr.fit(test_newData[:, 0].reshape(-1, 1), np.reshape(f1_score, (-1, 1))) 527 | 528 | test_X = np.arange(test_newData[:, 0].min(), test_newData[:, 0].max(), 0.05).reshape(-1, 1) 529 | # Make predictions using the testing set 530 | test_y_pred = regr.predict(test_X) 531 | 532 | plt.plot(test_X, test_y_pred, linewidth=3, label='Linear regression', color='r') 533 | 534 | pca1_f1 = np.column_stack((test_newData[:, 0]/5, f1_score)) 535 | 536 | pts = np.array([[-4.,0.9], [4.0,0.5], [4.0,0.1]]) 537 | color = plt.rcParams['axes.prop_cycle'].by_key()['color'] 538 | color.remove(color[3]) 539 | color.remove(color[0]) 540 | pts[:,0] = pts[:,0]/5 541 | for i in range(pts.shape[0]): 542 | distance = cdist(pca1_f1, pts[i:i+1]) 543 | idx = np.argmin(distance) 544 | plt.scatter(test_newData[idx, 0], f1_score[idx], s=144, marker = 'X',label = test_num_2_cityname[idx], color = color[i]) 545 | print('pearson corre:', scipy.stats.pearsonr(test_newData[:, 0], f1_score)) 546 | 547 | plt.yticks(fontsize=22) 548 | plt.xticks(fontsize=22) 549 | plt.xlabel('PCA1', font1) 550 | plt.ylabel('F1 score', font1) 551 | plt.legend(loc="best", fontsize=19, markerscale=0.98, ncol=1, labelspacing = 0.1, borderpad = 0.20) 552 | plt.tight_layout() 553 | plt.savefig('figures/pca1_vs_f1.png', bbox_inches='tight') 554 | plt.show() 555 | 556 | def pca_visualize_center_radar(k): 557 | with open('results/training_set_index.txt') as json_file: 558 | city_index = json.load(json_file) 559 | data = np.zeros((len(city_index), 11)) 560 | num_2_cityname = {} 561 | for city_num, city in enumerate(city_index): 562 | num_2_cityname[city_num] = city 563 | for idx_num, attribute in enumerate(city_index[city]): 564 | data[city_num, idx_num] = city_index[city][attribute] 565 | print('data shape: ', data.shape) 566 | 567 | data_mean = np.mean(data, axis=0, keepdims=True) 568 | data_std = np.std(data, axis=0, keepdims=True) 569 | data = (data - data_mean)/data_std 570 | 571 | k = k 572 | 573 | kmeanModel = KMeans(n_clusters=k, random_state = 1) 574 | kmeanModel.fit(data) 575 | 576 | # print(kmeanModel.labels_== 1) 577 | pca = PCA(n_components=6) 578 | newData = pca.fit_transform(data) 579 | 580 | change_order = True 581 | 582 | if change_order: 583 | SDNi = np.zeros(k) 584 | change_order_mapping = {} 585 | for i in range(k): 586 | SDNi[i] = np.mean(newData[kmeanModel.labels_== i][:,0]) 587 | argsorted_SDNi = np.argsort(SDNi) 588 | for i in range(k): 589 | change_order_mapping[i] = np.where(argsorted_SDNi==i)[0][0] 590 | for i in range(len(kmeanModel.labels_)): 591 | kmeanModel.labels_[i] = change_order_mapping[kmeanModel.labels_[i]] 592 | 593 | cluster_centers_ = np.zeros((k, data.shape[1])) 594 | 595 | for i in range(k): 596 | # print(kmeanModel.labels_== i) 597 | cluster_centers_[i,:] = np.mean(data[kmeanModel.labels_== i], axis = 0, keepdims=True) 598 | # cluster_centers_ = cluster_centers_*data_std + data_mean 599 | name_list = ['avg degree', 'frc degree 1', 'frc degree 2', 'frc degree 3', 'frc degree 4', 'log circuity (r<0.5)', 'log circuity (r>0.5)', 'frc bridge edges', 'frc dead-end edges', 'frc bridge length', 'frc dead-end length'] 600 | num_list = cluster_centers_[0] 601 | 602 | plt.style.use('ggplot') 603 | fig = plt.figure(figsize=(9.8, 7))#, dpi=1200) 604 | # polar coordinate 605 | ax = fig.add_subplot(111, polar=True) 606 | # data 607 | feature = name_list 608 | values = cluster_centers_[0] 609 | 610 | N = len(values) 611 | 612 | angles = np.linspace(0, 2 * np.pi, N, endpoint=False) 613 | angles = np.concatenate((angles, [angles[0]])) 614 | 615 | color = ['b', 'orange', 'g', 'r'] 616 | for i in range(cluster_centers_.shape[0]): 617 | values = cluster_centers_[i] 618 | values = np.concatenate((values, [values[0]])) 619 | ax.plot(angles, values, 'o-', linewidth=2, label = 'Type '+ str(i+1), color = color[i]) 620 | # fill color 621 | ax.fill(angles, values, alpha=0.1, color = color[i]) 622 | 623 | 624 | ax.set_thetagrids(angles * 180 / np.pi, feature, fontsize = 16, color ='k') 625 | ax.grid(True) 626 | # show figure 627 | ax.yaxis.set_ticklabels([]) 628 | plt.legend(loc ='best', bbox_to_anchor=(0.7, 0.65, 0.5, 0.5), fontsize = 18) 629 | plt.tight_layout() 630 | plt.savefig('figures/center.png', bbox_inches='tight') 631 | plt.show() 632 | 633 | if __name__ == '__main__': 634 | import argparse 635 | parser = argparse.ArgumentParser(description='get the measures for networks') 636 | parser.add_argument('--mode', default='', type=str) 637 | args = parser.parse_args() 638 | 639 | if not os.path.isdir('figures'): 640 | os.mkdir('figures') 641 | 642 | if args.mode == 'pca_visualize': 643 | pca_visualize(k=4) 644 | 645 | if args.mode == 'city_ratio': 646 | city_ratio(k=4) 647 | 648 | if args.mode == 'f1_vs_type': 649 | f1_vs_network_type(k=4) 650 | 651 | if args.mode == 'f1_vs_city': 652 | f1_vs_city() 653 | 654 | if args.mode == 'f1_vs_PCA1': 655 | f1_vs_PCA1() 656 | 657 | if args.mode == 'center': 658 | pca_visualize_center_radar(k=4) -------------------------------------------------------------------------------- /road-classification/measures.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import networkx as nx 4 | import json 5 | import math 6 | import glob 7 | import os 8 | 9 | def nodes_to_list(nodes): 10 | new_nodes = [] 11 | for n in nodes: 12 | new_nodes.append([n['osmid'],n['lon'],n['lat']]) 13 | return new_nodes 14 | 15 | def nodes_to_dict(nodes): 16 | new_nodes = {} 17 | for n in nodes: 18 | new_nodes[n['osmid']] = (n['lon'], n['lat']) 19 | return new_nodes 20 | 21 | def edges_to_dict(edges, sample=1): 22 | old_edges = {} 23 | for e in edges: 24 | if sample == 1: 25 | if e['start'] not in old_edges: 26 | old_edges[e['start']] = [] 27 | old_edges[e['start']].append(e['end']) 28 | if sample == 2: 29 | if e['start'] not in old_edges: 30 | old_edges[e['start']] = [] 31 | old_edges[e['start']].append(e['end']) 32 | return old_edges 33 | 34 | def load_graph(file_name, train = True, sample=1): 35 | if train: 36 | nodes = json.load(open('../codeJiaweiXue_2020715_dataCollection/train/' + file_name + 'nodes.json', 'r')) 37 | edges = json.load(open('../codeJiaweiXue_2020715_dataCollection/train/' + file_name + 'edges.json', 'r')) 38 | else: 39 | nodes = json.load(open('../codeJiaweiXue_2020715_dataCollection/test/'+file_name+'nodes.json', 'r')) 40 | edges = json.load(open('../codeJiaweiXue_2020715_dataCollection/test/'+file_name+'edges.json', 'r')) 41 | old_edges = edges_to_dict(edges, sample=sample) 42 | return nodes, old_edges 43 | 44 | def visualization(nodeInfor, predictEdges, oldEdges, newEdges): 45 | # step0: get the information 46 | nodeId = [nodeInfor[i][0] for i in range(len(nodeInfor))] 47 | longitude = [nodeInfor[i][1] for i in range(len(nodeInfor))] 48 | latitude = [nodeInfor[i][2] for i in range(len(nodeInfor))] 49 | 50 | # step1: generate the graph 51 | n = len(nodeId) 52 | A1 = np.array([[0] * n] * n) 53 | Graph1 = nx.Graph(A1) 54 | 55 | # step 2: label 56 | column = [str(nodeId[i]) for i in range(n)] 57 | mapping = {0: str(nodeId[0])} 58 | for i in range(0, len(column) - 1): 59 | mapping.setdefault(i + 1, column[i + 1]) 60 | Graph1 = nx.relabel_nodes(Graph1, mapping) 61 | 62 | # step3: geolocation 63 | POS = list() 64 | for i in range(0, n): 65 | POS.append((float(longitude[i]), float(latitude[i]))) 66 | for i in range(0, n): 67 | Graph1.nodes[column[i]]['pos'] = POS[i] 68 | 69 | num = 0 70 | # step 4: add edge 71 | for start in oldEdges: 72 | for end in oldEdges[start]: 73 | num = num + 1 74 | Graph1.add_edge(str(start), str(end), color='black', weight=1) 75 | # print('old num', num) 76 | for start in newEdges: 77 | for end in newEdges[start]: 78 | if (not (start in predictEdges and end in predictEdges[start])) and \ 79 | (not (end in predictEdges and start in predictEdges[end])): 80 | Graph1.add_edge(str(start), str(end), color='blue', weight=2) 81 | for start in predictEdges: 82 | for end in predictEdges[start]: 83 | if (start in newEdges and end in newEdges[start]) or \ 84 | (end in newEdges and start in newEdges[end]): 85 | Graph1.add_edge(str(start), str(end), color='green', weight=5) 86 | else: 87 | Graph1.add_edge(str(start), str(end), color='red', weight=2) 88 | 89 | edges = Graph1.edges() 90 | colors = [Graph1[u][v]['color'] for u, v in edges] 91 | weights = [Graph1[u][v]['weight'] for u, v in edges] 92 | # print(nx.cycle_basis(Graph1)) 93 | print("node number", len(Graph1.nodes)) 94 | print("edge number", len(Graph1.edges)) 95 | 96 | plt.figure(1, figsize=(6, 6)) 97 | nx.draw(Graph1, nx.get_node_attributes(Graph1, 'pos'), edge_color=colors, width=weights, node_size=10)#, with_labels = True) 98 | plt.show() 99 | 100 | def degree_distribution(old_edges): 101 | edges_degree_dict = {} 102 | for dict_name in old_edges: 103 | if dict_name not in edges_degree_dict: 104 | edges_degree_dict[dict_name] = 0 105 | for v in old_edges[dict_name]: 106 | if v not in edges_degree_dict: 107 | edges_degree_dict[v] = 0 108 | for dict_name in old_edges: 109 | for v in old_edges[dict_name]: 110 | edges_degree_dict[dict_name] +=1 111 | edges_degree_dict[v] += 1 112 | 113 | ## assign the large degree node with a fixed value 4 114 | for v in edges_degree_dict: 115 | if edges_degree_dict[v]>4: 116 | edges_degree_dict[v] = 4 117 | 118 | summary = np.zeros(len(edges_degree_dict)) 119 | 120 | count = 0 121 | for idx, v in enumerate(edges_degree_dict): 122 | count += edges_degree_dict[v] 123 | summary[idx] = edges_degree_dict[v] 124 | 125 | pos_avg_count = count/len(edges_degree_dict) 126 | frac_degree1 = np.sum(summary == 1) / len(edges_degree_dict) 127 | frac_degree2 = np.sum(summary == 2) / len(edges_degree_dict) 128 | frac_degree3 = np.sum(summary == 3) / len(edges_degree_dict) 129 | frac_degree4 = np.sum(summary == 4) / len(edges_degree_dict) 130 | return pos_avg_count, frac_degree1, frac_degree2, frac_degree3, frac_degree4 131 | 132 | def shortest_path_distance(nodes, old_edges): 133 | nodes_dict = nodes_to_dict(nodes) 134 | nodes_list = [v for v in nodes_dict] 135 | nodes_id_to_idx = {} 136 | for i, v in enumerate(nodes_list): 137 | nodes_id_to_idx[v] = i 138 | 139 | distances = {} 140 | for node_name in old_edges: 141 | if node_name not in distances: 142 | distances[node_name] = {} 143 | for node_name_neighbour in old_edges[node_name]: 144 | if node_name_neighbour not in distances: 145 | distances[node_name_neighbour] = {} 146 | point1 = nodes_dict[node_name] 147 | point2 = nodes_dict[node_name_neighbour] 148 | distance = math.sqrt((point1[0] - point2[0])**2 + (point1[1] - point2[1])**2) 149 | distances[node_name][node_name_neighbour] = distance 150 | distances[node_name_neighbour][node_name] = distance 151 | 152 | shortest_path_distance_matrix = np.zeros((len(nodes_list), len(nodes_list))) 153 | for i in range(len(nodes_list)): 154 | shortest_distance = DjikstraAlg(nodes_list, nodes_list[i], distances) 155 | for node_name in shortest_distance: 156 | shortest_path_distance_matrix[i, nodes_id_to_idx[node_name]] = shortest_distance[node_name] 157 | 158 | straight_line_distance_matrix = np.zeros((len(nodes_list), len(nodes_list))) 159 | for i in range(len(nodes_list)): 160 | for j in range(len(nodes_list)): 161 | point1 = nodes_dict[nodes_list[i]] 162 | point2 = nodes_dict[nodes_list[j]] 163 | straight_line_distance_matrix[i,j] = math.sqrt((point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2) 164 | 165 | return shortest_path_distance_matrix, straight_line_distance_matrix 166 | 167 | def DjikstraAlg(nodes, current_node, distances): 168 | unvisited = {node: float('inf') for node in nodes} 169 | visited = {} 170 | 171 | current = current_node 172 | currentDistance = 0 173 | unvisited[current] = currentDistance 174 | 175 | while True: 176 | for neighbour, distance in distances[current].items(): 177 | if neighbour not in unvisited: continue 178 | newDistance = currentDistance + distance 179 | if unvisited[neighbour] is float('inf') or unvisited[neighbour] > newDistance: 180 | unvisited[neighbour] = newDistance 181 | visited[current] = currentDistance 182 | del unvisited[current] 183 | if not unvisited: break 184 | candidates = [node for node in unvisited.items() if node[1]] 185 | current, currentDistance = sorted(candidates, key=lambda x: x[1])[0] 186 | 187 | return visited 188 | 189 | def LogCircuity(nodes, old_edges, r1, r2): 190 | shortest_path_distance_matrix, straight_line_distance_matrix = shortest_path_distance(nodes, old_edges) 191 | between_r1_r2 = np.logical_and(straight_line_distance_matrixr1) 192 | sum_straight_line = np.sum(straight_line_distance_matrix * between_r1_r2) 193 | sum_network_path = np.sum(shortest_path_distance_matrix * between_r1_r2) 194 | # print(sum_network_path, sum_straight_line) 195 | # print(np.log10(sum_network_path) - np.log10(sum_straight_line)) 196 | if sum_network_path ==0 or sum_straight_line ==0: 197 | return 0 198 | else: 199 | return np.log10(sum_network_path) - np.log10(sum_straight_line) 200 | 201 | def Bridge(nodes, old_edges): 202 | from tools.Bridges import Graph 203 | graph = Graph(len(nodes)) 204 | nodes_dict = nodes_to_dict(nodes) 205 | nodes_list = [v for v in nodes_dict] 206 | nodes_id_to_idx = {} 207 | for i, v in enumerate(nodes_list): 208 | nodes_id_to_idx[v] = i 209 | 210 | ######### find the total edges 211 | edges_count = 0 212 | for node_name in old_edges: 213 | for node_name_neighbour in old_edges[node_name]: 214 | graph.addEdge(nodes_id_to_idx[node_name], nodes_id_to_idx[node_name_neighbour]) 215 | edges_count += 1 216 | 217 | ######## find all bridges and dead-ends 218 | graph.bridge() 219 | bridge_bool = np.zeros(len(graph.bridges)) 220 | ### classify the node as bridge or dead-end (has a node with degree 1) 221 | edges_degree_dict = {} 222 | for dict_name in old_edges: 223 | if dict_name not in edges_degree_dict: 224 | edges_degree_dict[dict_name] = 0 225 | for v in old_edges[dict_name]: 226 | if v not in edges_degree_dict: 227 | edges_degree_dict[v] = 0 228 | for dict_name in old_edges: 229 | for v in old_edges[dict_name]: 230 | edges_degree_dict[dict_name] += 1 231 | edges_degree_dict[v] += 1 232 | 233 | for idx, edge in enumerate(graph.bridges): 234 | # print(nodes_list[edge[0]], edges_degree_dict[nodes_list[edge[0]]]) 235 | if edges_degree_dict[nodes_list[edge[0]]] == 1 or edges_degree_dict[nodes_list[edge[1]]] == 1: 236 | bridge_bool[idx] = 0 237 | else: 238 | bridge_bool[idx] = 1 239 | 240 | frac_edge_bridges = sum(bridge_bool)/float(edges_count) 241 | frac_edge_deadends = sum(1 - bridge_bool)/float(edges_count) 242 | 243 | ####### find all edges length 244 | length_sum = 0 245 | for node_name in old_edges: 246 | for node_name_neighbour in old_edges[node_name]: 247 | point1 = nodes_dict[node_name] 248 | point2 = nodes_dict[node_name_neighbour] 249 | distance = math.sqrt((point1[0] - point2[0])**2 + (point1[1] - point2[1])**2) 250 | length_sum = length_sum + distance 251 | 252 | length_sum_bridge = 0 253 | length_sum_deadend = 0 254 | 255 | for idx, edge in enumerate(graph.bridges): 256 | point1 = nodes_dict[nodes_list[edge[0]]] 257 | point2 = nodes_dict[nodes_list[edge[1]]] 258 | distance = math.sqrt((point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2) 259 | if bridge_bool[idx] == 1: 260 | length_sum_bridge = length_sum_bridge + distance 261 | else: 262 | length_sum_deadend = length_sum_deadend + distance 263 | 264 | frac_length_bridges = length_sum_bridge / length_sum 265 | frac_length_deadend = length_sum_deadend / length_sum 266 | # for idx, edge in enumerate(graph.bridges): 267 | # print(nodes_list[edge[0]], nodes_list[edge[1]], bridge_bool[idx]) 268 | # print('hehe', graph.bridges) 269 | # for v in graph.bridges: 270 | # print(nodes_list[v[0]], nodes_list[v[1]]) 271 | # return len(graph.bridges)/float(edges_count) 272 | return frac_edge_bridges, frac_edge_deadends, frac_length_bridges, frac_length_deadend 273 | 274 | def index_city(city_name, train, visualize = False): 275 | sample = 1 276 | nodes, old_edges = load_graph(city_name, train, sample) 277 | ''' 278 | check the uniqueness expression 279 | ''' 280 | index_dict = {} 281 | 282 | #### get degree distribution #### 283 | pos_avg_count, frac_degree1, frac_degree2, frac_degree3, frac_degree4 = degree_distribution(old_edges) 284 | index_dict['pos_degree'] = pos_avg_count 285 | index_dict['frac_degree1'] = frac_degree1 286 | index_dict['frac_degree2'] = frac_degree2 287 | index_dict['frac_degree3'] = frac_degree3 288 | index_dict['frac_degree4'] = frac_degree4 289 | 290 | #### circuity #### 291 | index_dict['log_circuity_0_0p005'] = LogCircuity(nodes, old_edges, 0.0, 0.005) 292 | index_dict['log_circuity_0p005_0p02'] = LogCircuity(nodes, old_edges, 0.005, 0.02) 293 | 294 | #### dendricity #### 295 | frac_edge_bridges, frac_edge_deadends, frac_length_bridges, frac_length_deadend = Bridge(nodes, old_edges) 296 | index_dict['frac_edge_bridges'] = frac_edge_bridges 297 | index_dict['frac_edge_deadends'] = frac_edge_deadends 298 | index_dict['frac_length_bridges'] = frac_length_bridges 299 | index_dict['frac_length_deadend'] = frac_length_deadend 300 | 301 | if visualize: 302 | visualization(nodes_to_list(nodes), dict(), old_edges, dict()) 303 | return index_dict 304 | 305 | if __name__ == '__main__': 306 | import argparse 307 | 308 | parser = argparse.ArgumentParser(description='get the measures for networks') 309 | parser.add_argument('--mode', default='train', type=str) 310 | args = parser.parse_args() 311 | 312 | if args.mode == 'train': 313 | train = True 314 | else: 315 | train = False 316 | 317 | if train: 318 | file_list = glob.glob('../codeJiaweiXue_2020715_dataCollection/train/*edges.json') 319 | else: 320 | file_list = glob.glob('../codeJiaweiXue_2020715_dataCollection/test/*edges.json') 321 | ''' 322 | use in window 323 | ''' 324 | name_list = [] 325 | max_distance = 0 326 | for file_name in file_list: 327 | name_list.append((file_name.split('\\')[-1]).split('edges.json')[0]) 328 | 329 | all_city_index = {} 330 | for idx, city in enumerate(name_list): 331 | print('{}/{} city: {}'.format(idx, len(name_list), city)) 332 | all_city_index[city] = index_city(city, train = train) 333 | 334 | if not os.path.isdir('results'): 335 | os.mkdir('results') 336 | 337 | if train: 338 | with open('results/training_set_index.txt', 'w') as outfile: 339 | json.dump(all_city_index, outfile) 340 | else: 341 | with open('results/test_set_index.txt', 'w') as outfile: 342 | json.dump(all_city_index, outfile) 343 | 344 | -------------------------------------------------------------------------------- /road-classification/tools/Bridges.py: -------------------------------------------------------------------------------- 1 | # Python program to find bridges in a given undirected graph 2 | # Complexity : O(V+E) 3 | # adapted from https://www.geeksforgeeks.org/bridge-in-a-graph/ 4 | 5 | from collections import defaultdict 6 | 7 | 8 | # This class represents an undirected graph using adjacency list representation 9 | class Graph: 10 | 11 | def __init__(self, vertices): 12 | self.V = vertices # No. of vertices 13 | self.graph = defaultdict(list) # default dictionary to store graph 14 | self.Time = 0 15 | self.bridges = [] 16 | # function to add an edge to graph 17 | def addEdge(self, u, v): 18 | self.graph[u].append(v) 19 | self.graph[v].append(u) 20 | 21 | '''A recursive function that finds and prints bridges 22 | using DFS traversal 23 | u --> The vertex to be visited next 24 | visited[] --> keeps tract of visited vertices 25 | disc[] --> Stores discovery times of visited vertices 26 | parent[] --> Stores parent vertices in DFS tree''' 27 | 28 | def bridgeUtil(self, u, visited, parent, low, disc): 29 | 30 | # Mark the current node as visited and print it 31 | visited[u] = True 32 | 33 | # Initialize discovery time and low value 34 | disc[u] = self.Time 35 | low[u] = self.Time 36 | self.Time += 1 37 | 38 | # Recur for all the vertices adjacent to this vertex 39 | for v in self.graph[u]: 40 | # If v is not visited yet, then make it a child of u 41 | # in DFS tree and recur for it 42 | if visited[v] == False: 43 | parent[v] = u 44 | self.bridgeUtil(v, visited, parent, low, disc) 45 | 46 | # Check if the subtree rooted with v has a connection to 47 | # one of the ancestors of u 48 | low[u] = min(low[u], low[v]) 49 | 50 | ''' If the lowest vertex reachable from subtree 51 | under v is below u in DFS tree, then u-v is 52 | a bridge''' 53 | if low[v] > disc[u]: 54 | # print("%d %d" % (u, v)) 55 | self.bridges.append((u, v)) 56 | 57 | 58 | elif v != parent[u]: # Update low value of u for parent function calls. 59 | low[u] = min(low[u], disc[v]) 60 | 61 | # DFS based function to find all bridges. It uses recursive 62 | 63 | # function bridgeUtil() 64 | def bridge(self): 65 | 66 | # Mark all the vertices as not visited and Initialize parent and visited, 67 | # and ap(articulation point) arrays 68 | visited = [False] * (self.V) 69 | disc = [float("Inf")] * (self.V) 70 | low = [float("Inf")] * (self.V) 71 | parent = [-1] * (self.V) 72 | 73 | # Call the recursive helper function to find bridges 74 | # in DFS tree rooted with vertex 'i' 75 | for i in range(self.V): 76 | if visited[i] == False: 77 | self.bridgeUtil(i, visited, parent, low, disc) 78 | -------------------------------------------------------------------------------- /road-classification/tools/__pycache__/Bridges.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiang719/road-network-predictability/35a83b7053fb7e220918e32232206fa7c20cb2a6/road-classification/tools/__pycache__/Bridges.cpython-36.pyc --------------------------------------------------------------------------------