├── Code ├── CF_extraction.py ├── Main.py ├── data_analysis.py ├── data_cleaning.py ├── data_transformation.py ├── model_calibration.py └── plot_result.py ├── README.md └── requirements.txt /Code/CF_extraction.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from sklearn.linear_model import LinearRegression 3 | from sklearn.metrics import r2_score 4 | from data_transformation import * 5 | from data_cleaning import * 6 | 7 | 8 | def Waymo_extract_df(input_path): 9 | state_features = { 10 | 'state/past/x': 11 | tf.io.FixedLenFeature([128, 10], tf.float32, default_value=None), 12 | 'state/past/y': 13 | tf.io.FixedLenFeature([128, 10], tf.float32, default_value=None), 14 | 'state/past/speed': 15 | tf.io.FixedLenFeature([128, 10], tf.float32, default_value=None), 16 | 'state/past/length': 17 | tf.io.FixedLenFeature([128, 10], tf.float32, default_value=None), 18 | 'state/current/x': 19 | tf.io.FixedLenFeature([128, 1], tf.float32, default_value=None), 20 | 'state/current/y': 21 | tf.io.FixedLenFeature([128, 1], tf.float32, default_value=None), 22 | 'state/current/speed': 23 | tf.io.FixedLenFeature([128, 1], tf.float32, default_value=None), 24 | 'state/current/length': 25 | tf.io.FixedLenFeature([128, 1], tf.float32, default_value=None), 26 | 'state/future/x': 27 | tf.io.FixedLenFeature([128, 80], tf.float32, default_value=None), 28 | 'state/future/y': 29 | tf.io.FixedLenFeature([128, 80], tf.float32, default_value=None), 30 | 'state/future/speed': 31 | tf.io.FixedLenFeature([128, 80], tf.float32, default_value=None), 32 | 'state/future/length': 33 | tf.io.FixedLenFeature([128, 80], tf.float32, default_value=None), 34 | 'state/id': 35 | tf.io.FixedLenFeature([128], tf.float32, default_value=None), 36 | 'state/type': 37 | tf.io.FixedLenFeature([128], tf.float32, default_value=None), 38 | 'state/is_sdc': 39 | tf.io.FixedLenFeature([128], tf.int64, default_value=None), 40 | } 41 | 42 | def _parse_function(example_proto): 43 | return tf.io.parse_single_example(example_proto, state_features) 44 | 45 | raw_dataset = tf.data.TFRecordDataset(input_path) 46 | parsed_dataset = raw_dataset.map(_parse_function) 47 | 48 | data_rows = [] 49 | 50 | traj_id = 0 51 | for parsed_record in parsed_dataset: 52 | def extract_data(field_name): 53 | return parsed_record[field_name].numpy() 54 | 55 | data_to_extract = ['state/current/x', 'state/current/y', 'state/current/speed', 'state/current/length', 56 | 'state/past/x', 'state/past/y', 'state/past/speed', 'state/past/length', 57 | 'state/future/x', 'state/future/y', 'state/future/speed', 'state/future/length', 58 | 'state/id', 'state/is_sdc', 'state/type'] 59 | 60 | extracted_data = {key: extract_data(key) for key in data_to_extract} 61 | 62 | def add_trajectory_data(time_range, data, time): 63 | for j in time_range: 64 | row = {'Trajectory_ID': traj_id} 65 | for i in range(128): 66 | row.update({ 67 | f'id_{i}': data['state/id'][i], 68 | f'is_av_{i}': data['state/is_sdc'][i], 69 | f'type_{i}': data['state/type'][i], 70 | f'x_{i}': data['state/' + time + '/x'][i][j], 71 | f'y_{i}': data['state/' + time + '/y'][i][j], 72 | f'length_{i}': data['state/' + time + '/length'][i][j], 73 | f'speed_{i}': data['state/' + time + '/speed'][i][j], 74 | }) 75 | data_rows.append(row) 76 | 77 | add_trajectory_data(range(10), extracted_data, 'past') 78 | add_trajectory_data(range(1), extracted_data, 'current') 79 | add_trajectory_data(range(80), extracted_data, 'future') 80 | 81 | traj_id += 1 82 | 83 | dataframe = pd.DataFrame(data_rows) 84 | return dataframe 85 | 86 | 87 | def Waymo_extract_cf_traj(data, straight_threshold=0.9, direction_threshold=0.985, 88 | relative_diff_threshold=0.2): 89 | """ 90 | Processes trajectories to filter out non-straight or non-following ones based on specified thresholds. 91 | 92 | Parameters: 93 | - data: DataFrame containing trajectory data. 94 | - straight_threshold: R² threshold to determine if the trajectory is straight. 95 | - stable_threshold: Unused parameter in this context. 96 | - direction_threshold: Cosine similarity threshold to determine if trajectories are moving in the same direction. 97 | - relative_diff_threshold: Threshold to filter out trajectories based on relative difference in calculated values. 98 | """ 99 | # 1. Remove non-straight trajectories 100 | is_traj_stright = {} 101 | av_directions = {} 102 | for traj_id, group in data.groupby('Trajectory_ID'): 103 | # Step 1: Identify if all 'is_av_{i}' columns are 1 for each trajectory 104 | av_idx = None 105 | for i in range(128): 106 | if all(group[f'is_av_{i}'] == 1): 107 | av_idx = i 108 | break 109 | if av_idx is None: 110 | continue 111 | 112 | # Extract 'x' and 'y' coordinates and fit a linear regression model 113 | X = group[[f'x_{av_idx}']].values.reshape(-1, 1) 114 | y = group[f'y_{av_idx}'].values.reshape(-1, 1) 115 | model = LinearRegression() 116 | model.fit(X, y) 117 | y_pred = model.predict(X) 118 | r2 = r2_score(y, y_pred) 119 | 120 | # Check if the trajectory is straight based on the R² value 121 | if r2 >= straight_threshold: 122 | slope = model.coef_[0][0] 123 | direction_vector = np.array([1, slope]) 124 | direction_norm = np.linalg.norm(direction_vector) 125 | direction_vector = direction_vector / direction_norm 126 | av_directions[traj_id] = (av_idx, direction_vector) 127 | is_traj_stright[traj_id] = True 128 | else: 129 | is_traj_stright[traj_id] = False 130 | 131 | # Remove trajectories that are not straight 132 | traj_ids_to_delete = [traj_id for traj_id, result in is_traj_stright.items() if result == False] 133 | data = data[~data['Trajectory_ID'].isin(traj_ids_to_delete)] 134 | 135 | # 2. Remove trajectories that are not following another vehicle 136 | nearest_vehicle_indices = {} 137 | traj_ids_to_delete = [] 138 | cosine_threshold = direction_threshold 139 | 140 | for traj_id, group in data.groupby('Trajectory_ID'): 141 | av_idx = av_directions[traj_id][0] 142 | ref_x_direction, ref_y_direction = av_directions[traj_id][1] 143 | nearest_vehicles_per_row = [] 144 | found_nearest_in_all_rows = True 145 | first_row = True 146 | for row_idx, row in group.iterrows(): 147 | min_distance = float('inf') 148 | nearest_vehicle_idx = None 149 | for i in range(128): 150 | if i == av_idx: 151 | continue 152 | 153 | # Skip opposite direction traffic 154 | if not first_row: 155 | first_row = False 156 | current_row = row 157 | previous_row = group.loc[row_idx - 1] 158 | dot_product = (current_row[f'x_{i}'] - previous_row[f'x_{i}']) * \ 159 | (current_row[f'x_{av_idx}'] - previous_row[f'x_{av_idx}']) + \ 160 | (current_row[f'y_{i}'] - previous_row[f'y_{i}']) * \ 161 | (current_row[f'y_{av_idx}'] - previous_row[f'y_{av_idx}']) 162 | if dot_product < 0: 163 | continue 164 | 165 | # Remove vehicles not in a straight line 166 | x_direction = row[f'x_{i}'] - row[f'x_{av_idx}'] 167 | y_direction = row[f'y_{i}'] - row[f'y_{av_idx}'] 168 | vector_length = (x_direction ** 2 + y_direction ** 2) ** 0.5 169 | vector_cosine = ((x_direction * ref_x_direction + y_direction * ref_y_direction) 170 | / vector_length) if vector_length != 0 else 0 171 | # Check if moving in the same direction based on cosine similarity 172 | if vector_cosine >= cosine_threshold: 173 | distance = vector_length 174 | if distance < min_distance: 175 | min_distance = distance 176 | nearest_vehicle_idx = i 177 | if nearest_vehicle_idx is None: 178 | found_nearest_in_all_rows = False 179 | break 180 | else: 181 | nearest_vehicles_per_row.append(nearest_vehicle_idx) 182 | 183 | # Add trajectories to deletion list if they don't consistently follow the same vehicle 184 | if not found_nearest_in_all_rows: 185 | traj_ids_to_delete.append(traj_id) 186 | elif len(set(nearest_vehicles_per_row)) > 1: 187 | traj_ids_to_delete.append(traj_id) 188 | else: 189 | nearest_vehicle_indices[traj_id] = nearest_vehicles_per_row[0] 190 | 191 | # Delete trajectories that do not follow a single vehicle 192 | data = data[~data['Trajectory_ID'].isin(traj_ids_to_delete)] 193 | 194 | # 3. Organize a new DataFrame with filtered data 195 | new_rows = [] 196 | for traj_id, group in data.groupby('Trajectory_ID'): 197 | if traj_id not in av_directions or traj_id not in nearest_vehicle_indices: 198 | continue 199 | 200 | av_idx = av_directions[traj_id][0] 201 | nearest_idx = nearest_vehicle_indices[traj_id] 202 | 203 | if nearest_idx is None: 204 | continue 205 | for _, row in group.iterrows(): 206 | leader_x = row[f'x_{av_idx}'] 207 | leader_y = row[f'y_{av_idx}'] 208 | leader_length = row[f'length_{av_idx}'] 209 | leader_speed = row[f'speed_{av_idx}'] 210 | follower_x = row[f'x_{nearest_idx}'] 211 | follower_y = row[f'y_{nearest_idx}'] 212 | follower_length = row[f'length_{nearest_idx}'] 213 | follower_speed = row[f'speed_{nearest_idx}'] 214 | new_row = {'Trajectory_ID': traj_id, 'leader_speed': leader_speed, 'follower_speed': follower_speed, 215 | 'leader_x': leader_x, 'leader_y': leader_y, 'leader_length': leader_length, 216 | 'follower_x': follower_x, 'follower_y': follower_y, 'follower_length': follower_length} 217 | new_rows.append(new_row) 218 | 219 | df = pd.DataFrame(new_rows) 220 | 221 | # 4. Remove trajectories where the space gap and speed difference do not match 222 | num_before = len(df) 223 | 224 | df['Space_Gap'] = (np.sqrt( 225 | (df['leader_x'] - df['follower_x']) ** 2 + (df['leader_y'] - df['follower_y']) ** 2)) - df[ 226 | 'leader_length'] / 2 - df['follower_length'] / 2 227 | 228 | df['Speed_Diff'] = df['leader_speed'] - df['follower_speed'] 229 | 230 | grouped = df.groupby('Trajectory_ID').agg( 231 | Speed_Diff_Mean=('Speed_Diff', 'mean'), 232 | Space_Gap_Change=('Space_Gap', lambda x: x.iloc[-1] - x.iloc[0]) 233 | ) 234 | 235 | grouped['Relative_Diff'] = abs(grouped['Space_Gap_Change'] - (grouped['Speed_Diff_Mean'] * 9.1)) / ( 236 | grouped['Speed_Diff_Mean'] * 9.1) 237 | 238 | traj_to_remove_relative = grouped[grouped['Relative_Diff'] > relative_diff_threshold].index 239 | 240 | df = df[~df['Trajectory_ID'].isin(traj_to_remove_relative)] 241 | 242 | df.drop(['Space_Gap', 'Speed_Diff'], axis=1, inplace=True) 243 | 244 | num_after = len(df) 245 | print(f'total traj num: {num_before / 91}, delete traj num: {(num_before - num_after) / 91}.') 246 | 247 | return df 248 | 249 | 250 | def Argo2_extract_df(input_paths, traj_id): 251 | df_list = [] 252 | for input_path in input_paths: 253 | data = pd.read_parquet(input_path) 254 | 255 | df = pd.DataFrame( 256 | columns=['Trajectory_ID', 'Time_Index', 'ID', 'x', 'y', 'speed']) 257 | df['Time_Index'] = data.iloc[:, 4] 258 | df['Trajectory_ID'] = traj_id 259 | df['ID'] = data.iloc[:, 1].replace({'AV': 0}) 260 | df['x'] = data.iloc[:, 5] 261 | df['y'] = data.iloc[:, 6] 262 | df['speed'] = np.linalg.norm(data.iloc[:, [8, 9]].values, axis=1) 263 | 264 | def reshape_group_optimized(group): 265 | group = group.reset_index(drop=True) 266 | reshaped_data_info = group.loc[0, ['Trajectory_ID', 'Time_Index']].to_dict() 267 | columns_data = [] 268 | for i, row in group.iterrows(): 269 | suffix = f"_{row['ID']}" 270 | temp_df = pd.DataFrame({ 271 | f'ID_{suffix}': [row['ID']], 272 | f'x_{suffix}': [row['x']], 273 | f'y_{suffix}': [row['y']], 274 | f'speed_{suffix}': [row['speed']] 275 | }) 276 | columns_data.append(temp_df) 277 | reshaped_data = pd.concat([pd.DataFrame(reshaped_data_info, index=[0])] + columns_data, axis=1) 278 | return reshaped_data 279 | 280 | df = df.groupby(['Trajectory_ID', 'Time_Index']).apply(reshape_group_optimized).reset_index( 281 | drop=True) 282 | df = df.groupby(['Trajectory_ID', 'Time_Index']).first().unstack( 283 | fill_value=0).stack(future_stack=True).reset_index() 284 | 285 | columns = ['Trajectory_ID', 'Time_Index'] 286 | for i in range(int((len(df.columns) - 2) / 4)): 287 | columns += [f'ID_{i}', f'x_{i}', f'y_{i}', f'speed_{i}'] 288 | df.columns = columns 289 | 290 | df_list.append(df) 291 | 292 | traj_id += 1 293 | 294 | df = pd.concat(df_list) 295 | return df 296 | 297 | 298 | def Argo2_extract_cf_traj(data, output_path, straight_threshold=0.9, direction_threshold=0.985, 299 | relative_diff_threshold=0.2): 300 | """ 301 | Filters and processes car-following trajectories from the Argoverse dataset. 302 | 303 | Parameters: 304 | - data: DataFrame containing trajectory data. 305 | - output_path: Path where the processed data should be saved. 306 | - straight_threshold: Threshold for R^2 to consider a trajectory as straight. 307 | - stable_threshold: Unused threshold, might be reserved for future use. 308 | - direction_threshold: Cosine similarity threshold to determine direction alignment. 309 | - relative_diff_threshold: Threshold for the relative difference in speed and gap changes. 310 | """ 311 | 312 | # 1. Remove non-straight trajectories 313 | is_traj_straight = {} 314 | av_directions = {} 315 | for traj_id, group in data.groupby('Trajectory_ID'): 316 | # Step 1: Identify which vehicle is the AV and check if all its trajectory points are labeled as AV 317 | av_idx = None 318 | for i in range(int((len(group.columns) - 2) / 4)): 319 | if all(group[f'ID_{i}'] == 0): 320 | av_idx = i 321 | break 322 | if av_idx is None: 323 | continue 324 | 325 | # Extract x and y coordinates and fit a linear regression model 326 | X = group[[f'x_{av_idx}']].values.reshape(-1, 1) 327 | y = group[[f'y_{av_idx}']].values.reshape(-1, 1) 328 | model = LinearRegression() 329 | model.fit(X, y) 330 | y_pred = model.predict(X) 331 | r2 = r2_score(y, y_pred) 332 | 333 | # Check if the trajectory is straight using the R² value 334 | if r2 >= straight_threshold: 335 | slope = model.coef_[0][0] 336 | direction_vector = np.array([1, slope]) 337 | direction_norm = np.linalg.norm(direction_vector) 338 | direction_vector /= direction_norm 339 | av_directions[traj_id] = (av_idx, direction_vector) 340 | is_traj_straight[traj_id] = True 341 | else: 342 | is_traj_straight[traj_id] = False 343 | 344 | # Remove trajectories that are not considered straight 345 | traj_ids_to_delete = [traj_id for traj_id, result in is_traj_straight.items() if not result] 346 | data = data[~data['Trajectory_ID'].isin(traj_ids_to_delete)] 347 | 348 | # 2. Remove trajectories that are not following any vehicle 349 | nearest_vehicle_indices = {} 350 | traj_ids_to_delete = [] 351 | for traj_id, group in data.groupby('Trajectory_ID'): 352 | av_idx = av_directions[traj_id][0] 353 | ref_x_direction, ref_y_direction = av_directions[traj_id][1] 354 | nearest_vehicles_per_row = [] 355 | found_nearest_in_all_rows = True 356 | first_row = True 357 | for row_idx, row in group.iterrows(): 358 | min_distance = float('inf') 359 | nearest_vehicle_idx = None 360 | for i in range(int((len(group.columns) - 2) / 4)): 361 | if i == av_idx: 362 | continue 363 | 364 | # Exclude oncoming traffic 365 | if not first_row: 366 | first_row = False 367 | current_row = row 368 | previous_row = group.loc[row_idx - 1] 369 | dot_product = (current_row[f'x_{i}'] - previous_row[f'x_{i}']) * \ 370 | (current_row[f'x_{av_idx}'] - previous_row[f'x_{av_idx}']) + \ 371 | (current_row[f'y_{i}'] - previous_row[f'y_{i}']) * \ 372 | (current_row[f'y_{av_idx}'] - previous_row[f'y_{av_idx}']) 373 | if dot_product < 0: 374 | continue 375 | 376 | # Check if vehicles are moving in a straight line and in the same direction 377 | x_direction = row[f'x_{i}'] - row[f'x_{av_idx}'] 378 | y_direction = row[f'y_{i}'] - row[f'y_{av_idx}'] 379 | vector_length = np.sqrt(x_direction ** 2 + y_direction ** 2) 380 | vector_cosine = ((x_direction * ref_x_direction + y_direction * ref_y_direction) / 381 | vector_length) if vector_length != 0 else 0 382 | if vector_cosine >= direction_threshold: 383 | distance = vector_length 384 | if distance < min_distance: 385 | min_distance = distance 386 | nearest_vehicle_idx = i 387 | if nearest_vehicle_idx is None: 388 | found_nearest_in_all_rows = False 389 | break 390 | else: 391 | nearest_vehicles_per_row.append(nearest_vehicle_idx) 392 | 393 | # Remove trajectories if they do not consistently follow the same vehicle 394 | if not found_nearest_in_all_rows or len(set(nearest_vehicles_per_row)) > 1: 395 | traj_ids_to_delete.append(traj_id) 396 | else: 397 | nearest_vehicle_indices[traj_id] = nearest_vehicles_per_row[0] 398 | 399 | # Remove data for trajectories without a consistent following vehicle 400 | data = data[~data['Trajectory_ID'].isin(traj_ids_to_delete)] 401 | 402 | # 3. Prepare a new DataFrame with the filtered data 403 | new_rows = [] 404 | for traj_id, group in data.groupby('Trajectory_ID'): 405 | if traj_id not in av_directions or traj_id not in nearest_vehicle_indices: 406 | continue # Skip if the trajectory ID is not in the filtered set 407 | 408 | av_idx = av_directions[traj_id][0] 409 | nearest_idx = nearest_vehicle_indices[traj_id] 410 | for _, row in group.iterrows(): 411 | leader_x = row[f'x_{av_idx}'] 412 | leader_y = row[f'y_{av_idx}'] 413 | leader_speed = row[f'speed_{av_idx}'] 414 | follower_x = row[f'x_{nearest_idx}'] 415 | follower_y = row[f'y_{nearest_idx}'] 416 | follower_speed = row[f'speed_{nearest_idx}'] 417 | new_row = {'Trajectory_ID': traj_id, 'leader_speed': leader_speed, 'follower_speed': follower_speed, 418 | 'leader_x': leader_x, 'leader_y': leader_y, 'follower_x': follower_x, 'follower_y': follower_y} 419 | new_rows.append(new_row) 420 | 421 | # Create DataFrame from the filtered data 422 | df = pd.DataFrame(new_rows) 423 | df = df.reset_index(drop=True) 424 | row_count = data.shape[0] 425 | if row_count % 110 != 0: 426 | print(f"Error: The number of rows ({row_count}) is not a multiple of 110.") 427 | df['Trajectory_ID'] = df.index // 110 428 | 429 | # 4. Remove trajectories where the space gap and speed difference do not match 430 | num_before = len(df) 431 | 432 | df['Space_Gap'] = (np.sqrt( 433 | (df['leader_x'] - df['follower_x']) ** 2 + (df['leader_y'] - df['follower_y']) ** 2)) - default_vehicle_length 434 | 435 | df['Speed_Diff'] = df['leader_speed'] - df['follower_speed'] 436 | 437 | grouped = df.groupby('Trajectory_ID').agg( 438 | Speed_Diff_Mean=('Speed_Diff', 'mean'), 439 | Space_Gap_Change=('Space_Gap', lambda x: x.iloc[-1] - x.iloc[0]) 440 | ) 441 | 442 | grouped['Relative_Diff'] = abs(grouped['Space_Gap_Change'] - (grouped['Speed_Diff_Mean'] * 11)) / ( 443 | grouped['Speed_Diff_Mean'] * 11) 444 | 445 | traj_to_remove_relative = grouped[grouped['Relative_Diff'] > relative_diff_threshold].index 446 | 447 | df = df[~df['Trajectory_ID'].isin(traj_to_remove_relative)] 448 | 449 | df.drop(['Space_Gap', 'Speed_Diff'], axis=1, inplace=True) 450 | 451 | num_after = len(df) 452 | print( 453 | f'Total number of trajectories: {num_before / 110}, number of trajectories deleted: {(num_before - num_after) / 110}.') 454 | 455 | df.to_csv(output_path, index=False) 456 | 457 | 458 | def combine(): 459 | for i in range(1): 460 | merge_data_list = [] 461 | for j in range(25): 462 | cf_path = f'./Dataset/Argoverse/data/val/CF_trajectories_{i * 25 + j + 1}.csv' 463 | merge_data_list.append(cf_path) 464 | merge_data_path = f'./Dataset/Argoverse/output/step0_CF_trajectory_{i}.csv' 465 | merge_data(merge_data_list, merge_data_path) 466 | -------------------------------------------------------------------------------- /Code/Main.py: -------------------------------------------------------------------------------- 1 | from CF_extraction import * 2 | from data_transformation import * 3 | from data_cleaning import * 4 | from model_calibration import * 5 | from data_analysis import * 6 | from pathlib import Path 7 | 8 | 9 | def Vanderbilt_two_vehicle_ACC(): 10 | # Step 1: Convert dataset to a uniform car-following data format and analyze statistics. 11 | original_data_path = './Dataset/Vanderbilt/data/Two-vehicle ACC driving, Tennessee 2019/Processed_CAN_Data_a.csv' 12 | uniform_format_path = './Dataset/Vanderbilt/output/step1_two_vehicle_ACC.csv' 13 | Vanderbilt_convert_format(original_data_path, uniform_format_path) 14 | step1_stat_result_path = './Dataset/Vanderbilt/output/step1_analysis_two_vehicle_ACC' 15 | analyze_statistics(uniform_format_path, step1_stat_result_path) 16 | 17 | # Step 2: Clean data and revise trajectory IDs for further analysis. 18 | clean_data = fill_and_clean(uniform_format_path, 10, [None, 10, None, 10, 10, None, None], 19 | 1e10, -1e10, 1e10, -1e10, 20 | 1e10, -1e10, 1e10, -1e10, 1e10, -1e10) 21 | step2_data_path = f'./Dataset/Vanderbilt/output/step2_two_vehicle_ACC.csv' 22 | revise_traj_id(clean_data, step2_data_path, 0.1, 70, 0, 0) 23 | step2_stat_result_path = './Dataset/Vanderbilt/output/step2_analysis_two_vehicle_ACC' 24 | analyze_statistics(step2_data_path, step2_stat_result_path) 25 | 26 | # Step 3: Further clean and refine data, and prepare for performance analysis. 27 | clean_data = fill_and_clean(step2_data_path, 10, None, 28 | 120, 1e-5, 1e10, 0.1, 29 | 1e10, 0.1, 5, -5, 5, -5) 30 | step3_data_path = f'./Dataset/Vanderbilt/output/step3_two_vehicle_ACC.csv' 31 | revise_traj_id(clean_data, step3_data_path, 0.1, 70, 0, 0) 32 | step3_stat_result_path = './Dataset/Vanderbilt/output/step3_analysis_two_vehicle_ACC' 33 | analyze_statistics(step3_data_path, step3_stat_result_path) 34 | 35 | # Analysis 36 | performance_result_path = './Dataset/Vanderbilt/output/performance_metrics_two_vehicle_ACC.csv' 37 | analyze_AV_performance(step3_data_path, performance_result_path) 38 | 39 | scatter_plot_path = './Dataset/Vanderbilt/output/scatter_two_vehicle_ACC' 40 | draw_scatter(step3_data_path, scatter_plot_path) 41 | 42 | calibration_result_path = './Dataset/Vanderbilt/output/calibration_two_vehicle_ACC.csv' 43 | linear_regression = CFModelRegress(step3_data_path, 0.1) 44 | linear_regression.main(calibration_result_path, "linear") 45 | 46 | 47 | def MicroSimACC(): 48 | # Step 1: Convert dataset to a uniform car-following data format and analyze statistics. 49 | merge_data_list = [] 50 | for p1 in [35, 60]: 51 | for p2 in [0, 15, 25, 35, 45, 50, 55]: 52 | if p2 > p1 - 10: continue 53 | for p3 in ['L', 'M', 'S']: 54 | for p4 in range(1, 5): 55 | original_data_path = \ 56 | f'./Dataset/MicroSimACC/data/2-Vehicle ACC Car Following Experiments (CCF, Same Desired Speed)/{p1}_{p2}_{p3}_{p4}.csv' 57 | uniform_format_path = f'./Dataset/MicroSimACC/output/step1_same_speed_{p1}_{p2}_{p3}_{p4}.csv' 58 | MicroSimACC_convert_format(original_data_path, uniform_format_path) 59 | merge_data_list.append(uniform_format_path) 60 | 61 | merge_data_path = f'./Dataset/MicroSimACC/output/step1_merge.csv' 62 | merge_data(merge_data_list, merge_data_path) 63 | step1_stat_result_path = './Dataset/MicroSimACC/output/step1_analysis' 64 | analyze_statistics(merge_data_path, step1_stat_result_path) 65 | 66 | # Step 2: Clean data and revise trajectory IDs for further analysis. 67 | clean_data = fill_and_clean(merge_data_path, 10, [None, 10, None, 10, 10, None, None], 68 | 1e10, -1e10, 1e10, -1e10, 69 | 1e10, -1e10, 1e10, -1e10, 1e10, -1e10) 70 | step2_data_path = f'./Dataset/MicroSimACC/output/step2.csv' 71 | revise_traj_id(clean_data, step2_data_path, 0.2, 70, 0, 0) 72 | step2_stat_result_path = './Dataset/MicroSimACC/output/step2_analysis' 73 | analyze_statistics(step2_data_path, step2_stat_result_path) 74 | 75 | # Step 3: Further clean and refine data, and prepare for performance analysis. 76 | clean_data = fill_and_clean(step2_data_path, 10, None, 77 | 120, 1e-5, 1e10, 0.1, 78 | 1e10, 0.1, 5, -5, 5, -5) 79 | step3_data_path = f'./Dataset/MicroSimACC/output/step3.csv' 80 | revise_traj_id(clean_data, step3_data_path, 0.2, 70, 0, 0) 81 | step3_stat_result_path = './Dataset/MicroSimACC/output/step3_analysis' 82 | analyze_statistics(step3_data_path, step3_stat_result_path) 83 | 84 | # Analysis 85 | performance_result_path = './Dataset/MicroSimACC/output/performance_metrics.csv' 86 | analyze_AV_performance(step3_data_path, performance_result_path) 87 | 88 | scatter_plot_path = './Dataset/MicroSimACC/output/scatter' 89 | draw_scatter(step3_data_path, scatter_plot_path) 90 | 91 | calibration_result_path = './Dataset/MicroSimACC/output/calibration.csv' 92 | linear_regression = CFModelRegress(step3_data_path, 0.1) 93 | linear_regression.main(calibration_result_path, "linear") 94 | 95 | 96 | def CATS_ACC(): 97 | # Step 1: Convert dataset to a uniform car-following data format and analyze statistics. 98 | merge_data_list = [] 99 | for i in range(1, 6): 100 | original_data_path = f'./Dataset/CATS/data/ACC/test1118/test{i}.xlsx' 101 | uniform_format_path = f'./Dataset/CATS/output/step1_ACC_test1118_{i}.csv' 102 | CATSACC_convert_format(original_data_path, uniform_format_path) 103 | merge_data_list.append(uniform_format_path) 104 | for i in range(1, 9): 105 | original_data_path = f'./Dataset/CATS/data/ACC/test1124/test{i}.xlsx' 106 | uniform_format_path = f'./Dataset/CATS/output/step1_ACC_test1124_{i}.csv' 107 | CATSACC_convert_format(original_data_path, uniform_format_path) 108 | merge_data_list.append(uniform_format_path) 109 | 110 | merge_data_path = f'./Dataset/CATS/output/step1_ACC_merge.csv' 111 | merge_data(merge_data_list, merge_data_path) 112 | step1_stat_result_path = './Dataset/CATS/output/step1_analysis_ACC' 113 | analyze_statistics(merge_data_path, step1_stat_result_path) 114 | 115 | # Step 2: Clean data and revise trajectory IDs for further analysis. 116 | clean_data = fill_and_clean(merge_data_path, 10, [None, 10, None, 10, 10, None, None], 117 | 1e10, -1e10, 1e10, -1e10, 118 | 1e10, -1e10, 1e10, -1e10, 1e10, -1e10) 119 | step2_data_path = f'./Dataset/CATS/output/step2_ACC.csv' 120 | revise_traj_id(clean_data, step2_data_path, 0.1, 70, 0, 0) 121 | step2_stat_result_path = './Dataset/CATS/output/step2_analysis_ACC' 122 | analyze_statistics(step2_data_path, step2_stat_result_path) 123 | 124 | # Step 3: Further clean and refine data, and prepare for performance analysis. 125 | clean_data = fill_and_clean(step2_data_path, 10, None, 126 | 120, 1e-5, 1e10, 0.1, 127 | 1e10, 0.1, 5, -5, 5, -5) 128 | step3_data_path = f'./Dataset/CATS/output/step3_ACC.csv' 129 | revise_traj_id(clean_data, step3_data_path, 0.1, 70, 0, 0) 130 | step3_stat_result_path = './Dataset/CATS/output/step3_analysis_ACC' 131 | analyze_statistics(step3_data_path, step3_stat_result_path) 132 | 133 | # Analysis 134 | performance_result_path = './Dataset/CATS/output/performance_metrics_ACC.csv' 135 | analyze_AV_performance(step3_data_path, performance_result_path) 136 | scatter_plot_path = './Dataset/CATS/output/scatter_ACC' 137 | draw_scatter(step3_data_path, scatter_plot_path) 138 | 139 | calibration_result_path = './Dataset/CATS/output/calibration_ACC.csv' 140 | linear_regression = CFModelRegress(step3_data_path, 0.1) 141 | linear_regression.main(calibration_result_path, 'linear') 142 | 143 | 144 | def CATS_platoon(): 145 | # Step 1: Convert dataset to a uniform car-following data format and analyze statistics. 146 | original_data_path = f'./Dataset/CATS/data/Platoon' 147 | uniform_format_path = f'./Dataset/CATS/output/step1_platoon.csv' 148 | CATSPlatoon_convert_format(original_data_path, uniform_format_path) 149 | step1_stat_result_path = './Dataset/CATS/output/step1_analysis_platoon' 150 | analyze_statistics(uniform_format_path, step1_stat_result_path) 151 | 152 | # Step 2: Clean data and revise trajectory IDs for further analysis. 153 | clean_data = fill_and_clean(uniform_format_path, 10, [None, 10, None, 10, 10, None, None], 154 | 1e10, -1e10, 1e10, -1e10, 155 | 1e10, -1e10, 1e10, -1e10, 1e10, -1e10) 156 | step2_data_path = f'./Dataset/CATS/output/step2_platoon.csv' 157 | revise_traj_id(clean_data, step2_data_path, 1, 90, 0, 0) 158 | step2_stat_result_path = './Dataset/CATS/output/step2_analysis_platoon' 159 | analyze_statistics(step2_data_path, step2_stat_result_path) 160 | 161 | # Step 3: Further clean and refine data, and prepare for performance analysis. 162 | clean_data = fill_and_clean(step2_data_path, 10, None, 163 | 120, 1e-5, 1e10, 0.1, 164 | 1e10, 0.1, 5, -5, 5, -5) 165 | step3_data_path = f'./Dataset/CATS/output/step3_platoon.csv' 166 | revise_traj_id(clean_data, step3_data_path, 1, 90, 0, 0) 167 | step3_stat_result_path = './Dataset/CATS/output/step3_analysis_platoon' 168 | analyze_statistics(step3_data_path, step3_stat_result_path) 169 | 170 | # Analysis 171 | performance_result_path = './Dataset/CATS/output/performance_metrics_platoon.csv' 172 | analyze_AV_performance(step3_data_path, performance_result_path) 173 | scatter_plot_path = './Dataset/CATS/output/scatter_platoon' 174 | draw_scatter(step3_data_path, scatter_plot_path) 175 | 176 | calibration_result_path = './Dataset/CATS/output/calibration_platoon.csv' 177 | linear_regression = CFModelRegress(step3_data_path, 1) 178 | linear_regression.main(calibration_result_path, 'linear') 179 | 180 | 181 | def CATS_UWM(): 182 | merge_data_list = [] 183 | for i in range(1, 6): 184 | original_data_path = f'./Dataset/CATS/data/UWM/Test{i}.csv' 185 | uniform_format_path = f'./Dataset/CATS/output/step1_UWM_test{i}.csv' 186 | CATSUW_convert_format(original_data_path, uniform_format_path) 187 | merge_data_list.append(uniform_format_path) 188 | 189 | merge_data_path = f'./Dataset/CATS/output/step1_UWM_merge.csv' 190 | merge_data(merge_data_list, merge_data_path) 191 | step1_stat_result_path = './Dataset/CATS/output/step1_analysis_UWM' 192 | analyze_statistics(merge_data_path, step1_stat_result_path) 193 | 194 | clean_data = fill_and_clean(merge_data_path, 10, [None, 10, None, 10, 10, None, None], 195 | 1e10, -1e10, 1e10, -1e10, 196 | 1e10, -1e10, 1e10, -1e10, 1e10, -1e10) 197 | step2_data_path = f'./Dataset/CATS/output/step2_UWM.csv' 198 | revise_traj_id(clean_data, step2_data_path, 0.1, 70, 0, 0) 199 | step2_stat_result_path = './Dataset/CATS/output/step2_analysis_UWM' 200 | analyze_statistics(step2_data_path, step2_stat_result_path) 201 | 202 | # Step 3: Further clean and refine data, and prepare for performance analysis. 203 | clean_data = fill_and_clean(step2_data_path, 10, None, 204 | 120, 1e-5, 1e10, 0.1, 205 | 1e10, 0.1, 5, -5, 5, -5) 206 | step3_data_path = f'./Dataset/CATS/output/step3_UWM.csv' 207 | revise_traj_id(clean_data, step3_data_path, 0.1, 70, 0, 0) 208 | step3_stat_result_path = './Dataset/CATS/output/step3_analysis_UWM' 209 | analyze_statistics(step3_data_path, step3_stat_result_path) 210 | 211 | # Analysis 212 | performance_result_path = './Dataset/CATS/output/performance_metrics_UWM.csv' 213 | analyze_AV_performance(step3_data_path, performance_result_path) 214 | scatter_plot_path = './Dataset/CATS/output/scatter_UWM' 215 | draw_scatter(step3_data_path, scatter_plot_path) 216 | 217 | calibration_result_path = './Dataset/CATS/output/calibration_UWM.csv' 218 | linear_regression = CFModelRegress(step3_data_path, 0.1) 219 | linear_regression.main(calibration_result_path, 'linear') 220 | 221 | 222 | def OpenACC_Casale(): 223 | id_map = { 224 | "Hyundai": 0, 225 | "Rexton": 1 226 | } 227 | 228 | merge_data_list = [] 229 | for i in range(3, 12): 230 | original_data_path = f'./Dataset/OpenACC/data/Casale/part{i}.csv' 231 | uniform_format_path = f'./Dataset/OpenACC/output/step1_Casale_{i}.csv' 232 | OpenACC_convert_format(original_data_path, uniform_format_path, id_map) 233 | merge_data_list.append(uniform_format_path) 234 | 235 | merge_data_path = f'./Dataset/OpenACC/output/step1_Casale_merge.csv' 236 | merge_data(merge_data_list, merge_data_path) 237 | step1_stat_result_path = './Dataset/OpenACC/output/step1_analysis_Casale' 238 | analyze_statistics(merge_data_path, step1_stat_result_path) 239 | 240 | clean_data = fill_and_clean(merge_data_path, 10, [None, 10, None, 10, 10, None, None], 241 | 1e10, -1e10, 1e10, -1e10, 242 | 1e10, -1e10, 1e10, -1e10, 1e10, -1e10) 243 | step2_data_path = './Dataset/OpenACC/output/step2_Casale.csv' 244 | revise_traj_id(clean_data, step2_data_path, 0.1, 70, 0, 0) 245 | step2_stat_result_path = './Dataset/OpenACC/output/step2_analysis_Casale' 246 | analyze_statistics(step2_data_path, step2_stat_result_path) 247 | 248 | # Step 3: Further clean and refine data, and prepare for performance analysis. 249 | clean_data = fill_and_clean(step2_data_path, 10, None, 250 | 120, 1e-5, 1e10, 0.1, 251 | 1e10, 0.1, 5, -5, 5, -5) 252 | step3_data_path = f'./Dataset/OpenACC/output/step3_Casale.csv' 253 | revise_traj_id(clean_data, step3_data_path, 0.1, 70, 0, 0) 254 | step3_stat_result_path = './Dataset/OpenACC/output/step3_analysis_Casale' 255 | analyze_statistics(step3_data_path, step3_stat_result_path) 256 | 257 | # Analysis 258 | performance_result_path = './Dataset/OpenACC/output/performance_metrics_Casale.csv' 259 | analyze_AV_performance(step3_data_path, performance_result_path) 260 | scatter_plot_path = './Dataset/OpenACC/output/scatter_Casale' 261 | draw_scatter(step3_data_path, scatter_plot_path) 262 | 263 | calibration_result_path = './Dataset/OpenACC/output/calibration_Casale.csv' 264 | linear_regression = CFModelRegress(step3_data_path, 0.1) 265 | linear_regression.main(calibration_result_path, 'linear') 266 | 267 | 268 | def OpenACC_Vicolungo(): 269 | file_names = ['JRC-VC_260219_part2', 'JRC-VC_260219_part3', 'JRC-VC_260219_part4_highway', 270 | 'VC-JRC_260219_part1', 'VC-JRC_260219_part2', 'VC-JRC_270219_part1', 'VC-JRC_270219_part2', 271 | 'VC-JRC_280219_part2', 'VC-JRC_280219_part3'] 272 | id_map = { 273 | "Ford(S-Max)": 0, 274 | "KIA(Niro)": 1, 275 | "Mini(Cooper)": 2, 276 | "Mitsubishi(OutlanderPHEV)": 3, 277 | "Mitsubishi(SpaceStar)": 4, 278 | "Peugeot(3008GTLine)": 5, 279 | "VW(GolfE)": 6 280 | } 281 | 282 | merge_data_list = [] 283 | for i in range(1, 8): 284 | original_data_path = f'./Dataset/OpenACC/data/Vicolungo/' + file_names[i] + '.csv' 285 | uniform_format_path = f'./Dataset/OpenACC/output/step1_Vicolungo_{i}.csv' 286 | OpenACC_convert_format(original_data_path, uniform_format_path, id_map) 287 | merge_data_list.append(uniform_format_path) 288 | 289 | merge_data_path = f'./Dataset/OpenACC/output/step1_Vicolungo_merge.csv' 290 | merge_data(merge_data_list, merge_data_path) 291 | step1_stat_result_path = './Dataset/OpenACC/output/step1_analysis_Vicolungo' 292 | analyze_statistics(merge_data_path, step1_stat_result_path, False) 293 | 294 | clean_data = fill_and_clean(merge_data_path, 10, [None, 10, None, 10, 10, None, None], 295 | 1e10, -1e10, 1e10, -1e10, 296 | 1e10, -1e10, 1e10, -1e10, 1e10, -1e10) 297 | step2_data_path = f'./Dataset/OpenACC/output/step2_Vicolungo.csv' 298 | revise_traj_id(clean_data, step2_data_path, 0.1, 70, 0, 0) 299 | step2_stat_result_path = F'./Dataset/OpenACC/output/step2_analysis_Vicolungo' 300 | analyze_statistics(step2_data_path, step2_stat_result_path) 301 | 302 | # Step 3: Further clean and refine data, and prepare for performance analysis. 303 | clean_data = fill_and_clean(step2_data_path, 10, None, 304 | 120, 1e-5, 1e10, 0.1, 305 | 1e10, 0.1, 5, -5, 5, -5) 306 | step3_data_path = f'./Dataset/OpenACC/output/step3_Vicolungo.csv' 307 | revise_traj_id(clean_data, step3_data_path, 0.1, 70, 0, 0) 308 | step3_stat_result_path = './Dataset/OpenACC/output/step3_analysis_Vicolungo' 309 | analyze_statistics(step3_data_path, step3_stat_result_path) 310 | 311 | # Analysis 312 | performance_result_path = './Dataset/OpenACC/output/performance_metrics_Vicolungo.csv' 313 | analyze_AV_performance(step3_data_path, performance_result_path) 314 | scatter_plot_path = './Dataset/OpenACC/output/scatter_Vicolungo' 315 | draw_scatter(step3_data_path, scatter_plot_path) 316 | 317 | calibration_result_path = './Dataset/OpenACC/output/calibration_Vicolungo.csv' 318 | linear_regression = CFModelRegress(step3_data_path, 0.1) 319 | linear_regression.main(calibration_result_path, 'linear') 320 | 321 | 322 | def OpenACC_ASta(): 323 | id_map = { 324 | "Audi(A6)": 0, 325 | "Audi(A8)": 1, 326 | "BMW(X5)": 2, 327 | "Mercedes(AClass)": 3, 328 | "Tesla(Model3)": 4 329 | } 330 | 331 | merge_data_list = [] 332 | for i in range(1, 11): 333 | if i != 3 and i != 10: # ignore human driving data 334 | original_data_path = f'./Dataset/OpenACC/data/ASta/ASta_platoon{i}.csv' 335 | uniform_format_path = f'./Dataset/OpenACC/output/step1_ASta_{i}.csv' 336 | OpenACC_convert_format(original_data_path, uniform_format_path, id_map) 337 | merge_data_list.append(uniform_format_path) 338 | 339 | merge_data_path = f'./Dataset/OpenACC/output/step1_ASta_merge.csv' 340 | merge_data(merge_data_list, merge_data_path) 341 | step1_stat_result_path = './Dataset/OpenACC/output/step1_analysis_ASta' 342 | analyze_statistics(merge_data_path, step1_stat_result_path) 343 | 344 | clean_data = fill_and_clean(merge_data_path, 10, [None, 10, None, 10, 10, None, None], 345 | 1e10, -1e10, 1e10, -1e10, 346 | 1e10, -1e10, 1e10, -1e10, 1e10, -1e10) 347 | step2_data_path = './Dataset/OpenACC/output/step2_ASta.csv' 348 | revise_traj_id(clean_data, step2_data_path, 0.1, 70, 0, 0) 349 | step2_stat_result_path = F'./Dataset/OpenACC/output/step2_analysis_ASta' 350 | analyze_statistics(step2_data_path, step2_stat_result_path) 351 | 352 | # Step 3: Further clean and refine data, and prepare for performance analysis. 353 | clean_data = fill_and_clean(step2_data_path, 10, None, 354 | 120, 1e-5, 1e10, 0.1, 355 | 1e10, 0.1, 5, -5, 5, -5) 356 | step3_data_path = f'./Dataset/OpenACC/output/step3_ASta.csv' 357 | revise_traj_id(clean_data, step3_data_path, 0.1, 70, 0, 0) 358 | step3_stat_result_path = './Dataset/OpenACC/output/step3_analysis_ASta' 359 | analyze_statistics(step3_data_path, step3_stat_result_path) 360 | 361 | # Analysis 362 | performance_result_path = './Dataset/OpenACC/output/performance_metrics_ASta.csv' 363 | analyze_AV_performance(step3_data_path, performance_result_path) 364 | scatter_plot_path = './Dataset/OpenACC/output/scatter_ASta' 365 | draw_scatter(step3_data_path, scatter_plot_path) 366 | 367 | calibration_result_path = './Dataset/OpenACC/output/calibration_ASta.csv' 368 | linear_regression = CFModelRegress(step3_data_path, 0.1) 369 | linear_regression.main(calibration_result_path, 'linear') 370 | 371 | 372 | def OpenACC_ZalaZone(): 373 | id_map = { 374 | "AUDI_A4": 0, 375 | "AUDI_E_TRON": 1, 376 | "BMW_I3": 2, 377 | "JAGUAR_I_PACE": 3, 378 | "MAZDA_3": 4, 379 | "MERCEDES_GLE450": 5, 380 | "SMART_TARGET": 6, 381 | "SKODA_TARGET": 7, 382 | "TESLA_MODEL3": 8, 383 | "TESLA_MODELS": 9, 384 | "TESLA_MODELX": 10, 385 | "TOYOTA_RAV4": 11 386 | } 387 | 388 | merge_data_list = [] 389 | for i in range(1, 27): 390 | original_data_path = f'./Dataset/OpenACC/data/ZalaZone/dynamic_part{i}.csv' 391 | uniform_format_path = f'./Dataset/OpenACC/output/step1_ZalaZone_dynamic_{i}.csv' 392 | OpenACC_convert_format(original_data_path, uniform_format_path, id_map) 393 | merge_data_list.append(uniform_format_path) 394 | 395 | for i in range(1, 48): 396 | if i != 30 and i != 40 and i != 43: # ignore human driving data 397 | original_data_path = f'./Dataset/OpenACC/data/ZalaZone/handling_part{i}.csv' 398 | uniform_format_path = f'./Dataset/OpenACC/output/step1_ZalaZone_handling_{i}.csv' 399 | OpenACC_convert_format(original_data_path, uniform_format_path, id_map) 400 | merge_data_list.append(uniform_format_path) 401 | 402 | merge_data_path = f'./Dataset/OpenACC/output/step1_ZalaZone_merge.csv' 403 | merge_data(merge_data_list, merge_data_path) 404 | step1_stat_result_path = './Dataset/OpenACC/output/step1_analysis_ZalaZone' 405 | analyze_statistics(merge_data_path, step1_stat_result_path) 406 | 407 | clean_data = fill_and_clean(merge_data_path, 10, [None, 10, None, 10, 10, None, None], 408 | 1e10, -1e10, 1e10, -1e10, 409 | 1e10, -1e10, 1e10, -1e10, 1e10, -1e10) 410 | step2_data_path = './Dataset/OpenACC/output/step2_ZalaZone.csv' 411 | revise_traj_id(clean_data, step2_data_path, 0.1, 70, 0, 0) 412 | step2_stat_result_path = F'./Dataset/OpenACC/output/step2_analysis_ZalaZone' 413 | analyze_statistics(step2_data_path, step2_stat_result_path) 414 | 415 | # Step 3: Further clean and refine data, and prepare for performance analysis. 416 | clean_data = fill_and_clean(step2_data_path, 10, None, 417 | 120, 1e-5, 1e10, 0.1, 418 | 1e10, 0.1, 5, -5, 5, -5) 419 | step3_data_path = f'./Dataset/OpenACC/output/step3_ZalaZone.csv' 420 | revise_traj_id(clean_data, step3_data_path, 0.1, 70, 0, 0) 421 | step3_stat_result_path = F'./Dataset/OpenACC/output/step3_analysis_ZalaZone' 422 | analyze_statistics(step3_data_path, step3_stat_result_path) 423 | 424 | # Analysis 425 | performance_result_path = './Dataset/OpenACC/output/performance_metrics_ZalaZone.csv' 426 | analyze_AV_performance(step3_data_path, performance_result_path) 427 | scatter_plot_path = './Dataset/OpenACC/output/scatter_ZalaZone' 428 | draw_scatter(step3_data_path, scatter_plot_path) 429 | 430 | calibration_result_path = './Dataset/OpenACC/output/calibration_ZalaZone.csv' 431 | linear_regression = CFModelRegress(step3_data_path, 0.1) 432 | linear_regression.main(calibration_result_path, 'linear') 433 | 434 | 435 | def Ohio_single_vehicle(): 436 | original_data_path = './Dataset/Ohio/data/Advanced_Driver_Assistance_System__ADAS_-Equipped_Single-Vehicle_Data_for_Central_Ohio.csv' 437 | uniform_format_path = './Dataset/Ohio/output/step1_single_vehicle.csv' 438 | Ohio_single_convert_format(original_data_path, uniform_format_path) 439 | step1_stat_result_path = './Dataset/Ohio/output/step1_analysis_single_vehicle' 440 | analyze_statistics(uniform_format_path, step1_stat_result_path) 441 | 442 | clean_data = fill_and_clean(uniform_format_path, 10, [None, 10, None, 10, 10, None, None], 443 | 1e10, -1e10, 1e10, -1e10, 444 | 1e10, -1e10, 1e10, -1e10, 1e10, -1e10) 445 | step2_data_path = f'./Dataset/Ohio/output/step2_single_vehicle.csv' 446 | revise_traj_id(clean_data, step2_data_path, 0.1, 70, 0, 0) 447 | step2_stat_result_path = f'./Dataset/Ohio/output/step2_analysis_single_vehicle' 448 | analyze_statistics(step2_data_path, step2_stat_result_path) 449 | 450 | # Step 3: Further clean and refine data, and prepare for performance analysis. 451 | clean_data = fill_and_clean(step2_data_path, 10, None, 452 | 120, 1e-5, 1e10, 0.1, 453 | 1e10, 0.1, 5, -5, 5, -5) 454 | step3_data_path = f'./Dataset/Ohio/output/step3_single_vehicle.csv' 455 | revise_traj_id(clean_data, step3_data_path, 0.1, 70, 0, 0) 456 | step3_stat_result_path = f'./Dataset/Ohio/output/step3_analysis_single_vehicle' 457 | analyze_statistics(step3_data_path, step3_stat_result_path) 458 | 459 | # Analysis 460 | performance_result_path = './Dataset/Ohio/output/performance_metrics_single_vehicle.csv' 461 | analyze_AV_performance(step3_data_path, performance_result_path) 462 | scatter_plot_path = './Dataset/Ohio/output/scatter_single_vehicle' 463 | draw_scatter(step3_data_path, scatter_plot_path) 464 | 465 | calibration_result_path = './Dataset/Ohio/output/calibration_single_vehicle.csv' 466 | linear_regression = CFModelRegress(step3_data_path, 0.1) 467 | linear_regression.main(calibration_result_path, 'linear') 468 | 469 | 470 | def Ohio_two_vehicle(): 471 | original_data_path = './Dataset/Ohio/data/Advanced_Driver_Assistance_System__ADAS_-Equipped_Two-Vehicle_Data_for_Central_Ohio.csv' 472 | uniform_format_path = './Dataset/Ohio/output/step1_two_vehicle' 473 | Ohio_two_convert_format(original_data_path, uniform_format_path) 474 | merge_data_list = [] 475 | for i in range(1, 3): 476 | merge_data_list.append(uniform_format_path + f'_{i}.csv') 477 | 478 | merge_data_path = './Dataset/Ohio/output/step1_two_vehicle_merge.csv' 479 | merge_data(merge_data_list, merge_data_path) 480 | step1_stat_result_path = './Dataset/Ohio/output/step1_analysis_two_vehicle' 481 | analyze_statistics(merge_data_path, step1_stat_result_path) 482 | 483 | clean_data = fill_and_clean(merge_data_path, 10, [None, 10, None, 10, 10, None, None], 484 | 1e10, -1e10, 1e10, -1e10, 485 | 1e10, -1e10, 1e10, -1e10, 1e10, -1e10) 486 | step2_data_path = './Dataset/Ohio/output/step2_two_vehicle.csv' 487 | revise_traj_id(clean_data, step2_data_path, 0.1, 70, 0, 0) 488 | step2_stat_result_path = './Dataset/Ohio/output/step2_analysis_two_vehicle' 489 | analyze_statistics(step2_data_path, step2_stat_result_path) 490 | 491 | # Step 3: Further clean and refine data, and prepare for performance analysis. 492 | clean_data = fill_and_clean(step2_data_path, 10, None, 493 | 120, 1e-5, 1e10, 0.1, 494 | 1e10, 0.1, 5, -5, 5, -5) 495 | step3_data_path = f'./Dataset/Ohio/output/step3_two_vehicle.csv' 496 | revise_traj_id(clean_data, step3_data_path, 0.1, 70, 0, 0) 497 | step3_stat_result_path = './Dataset/Ohio/output/step3_analysis_two_vehicle' 498 | analyze_statistics(step3_data_path, step3_stat_result_path) 499 | 500 | # Analysis 501 | performance_result_path = './Dataset/Ohio/output/performance_metrics_two_vehicle.csv' 502 | analyze_AV_performance(step3_data_path, performance_result_path) 503 | scatter_plot_path = './Dataset/Ohio/output/scatter_two_vehicle' 504 | draw_scatter(step3_data_path, scatter_plot_path) 505 | 506 | calibration_result_path = './Dataset/Ohio/output/calibration_two_vehicle.csv' 507 | linear_regression = CFModelRegress(step3_data_path, 0.1) 508 | linear_regression.main(calibration_result_path, 'linear') 509 | 510 | 511 | def Waymo_perception(): 512 | original_data_path = './Dataset/Waymo/data/Perception/car_following_trajectory.csv' 513 | uniform_format_path = './Dataset/Waymo/output/step1_perception.csv' 514 | Waymo_perception_convert_format(original_data_path, uniform_format_path) 515 | step1_stat_result_path = './Dataset/Waymo/output/step1_analysis_perception' 516 | analyze_statistics(uniform_format_path, step1_stat_result_path) 517 | 518 | clean_data = fill_and_clean(uniform_format_path, 10, [None, 10, None, 10, 10, None, None], 519 | 1e10, -1e10, 1e10, -1e10, 520 | 1e10, -1e10, 1e10, -1e10, 1e10, -1e10) 521 | step2_data_path = './Dataset/Waymo/output/step2_perception.csv' 522 | revise_traj_id(clean_data, step2_data_path, 0.1, 70, 0, 0) 523 | step2_stat_result_path = './Dataset/Waymo/output/step2_analysis_perception' 524 | analyze_statistics(step2_data_path, step2_stat_result_path) 525 | 526 | # Step 3: Further clean and refine data, and prepare for performance analysis. 527 | clean_data = fill_and_clean(step2_data_path, 10, None, 528 | 120, 1e-5, 1e10, 0.1, 529 | 1e10, 0.1, 5, -5, 5, -5) 530 | step3_data_path = f'./Dataset/Waymo/output/step3_perception.csv' 531 | revise_traj_id(clean_data, step3_data_path, 0.1, 70, 0, 0) 532 | step3_stat_result_path = './Dataset/Waymo/output/step3_analysis_perception' 533 | analyze_statistics(step3_data_path, step3_stat_result_path) 534 | 535 | # Analysis 536 | performance_result_path = './Dataset/Waymo/output/performance_metrics_perception.csv' 537 | analyze_AV_performance(step3_data_path, performance_result_path) 538 | scatter_plot_path = './Dataset/Waymo/output/scatter_perception' 539 | draw_scatter(step3_data_path, scatter_plot_path) 540 | 541 | calibration_result_path = './Dataset/Waymo/output/calibration_perception.csv' 542 | linear_regression = CFModelRegress(step3_data_path, 0.1) 543 | linear_regression.main(calibration_result_path, 'linear') 544 | 545 | 546 | def Waymo_motion(): 547 | # Step 1: Convert Vanderbilt dataset to a uniform car-following data format and analyze statistics. 548 | cf_trajectory_list = [] 549 | for i in range(1000): 550 | original_data_path = ( 551 | f'./Dataset/Waymo/data/Motion/uncompressed_tf_example_training_training_tfexample.tfrecord-' 552 | + '{:05}'.format(i) + '-of-01000') 553 | print("recording " + '{:05}'.format(i) + " ......") 554 | original_data = Waymo_extract_df(original_data_path) 555 | cf_trajectory_list.append(Waymo_extract_cf_traj(original_data)) 556 | if (i + 1) % 100 == 0: 557 | cf_path = f'./Dataset/Waymo/output/step0_CF_trajectory_motion_{i - 99}-{i}.csv' 558 | combined_df = pd.concat(cf_trajectory_list, axis=0, ignore_index=True) 559 | combined_df = combined_df.reset_index() 560 | combined_df['traj_id'] = combined_df.index // 91 561 | combined_df.to_csv(cf_path, index=False) 562 | cf_trajectory_list = [] 563 | 564 | merge_data_list = [] 565 | for i in range(0, 1000, 100): 566 | cf_path = f'./Dataset/Waymo/output/step0_CF_trajectory_motion_{i}-{i + 99}.csv' 567 | uniform_format_path = f'./Dataset/Waymo/output/step1_motion_{i / 100}.csv' 568 | Waymo_motion_convert_format(cf_path, uniform_format_path) 569 | merge_data_list.append(uniform_format_path) 570 | 571 | merge_data_path = f'./Dataset/Waymo/output/step1_motion_merge.csv' 572 | merge_data(merge_data_list, merge_data_path) 573 | step1_stat_result_path = './Dataset/Waymo/output/step1_analysis_motion' 574 | analyze_statistics(merge_data_path, step1_stat_result_path) 575 | 576 | # Step 2: Clean data and revise trajectory IDs for further analysis. 577 | clean_data = fill_and_clean(merge_data_path, 10, [None, 10, None, 10, 10, None, None], 578 | 1e10, -1e10, 1e10, -1e10, 579 | 1e10, -1e10, 1e10, -1e10, 1e10, -1e10) 580 | step2_data_path = f'./Dataset/Waymo/output/step2_motion.csv' 581 | revise_traj_id(clean_data, step2_data_path, 0.1, 70, 0, 0, False) 582 | step2_stat_result_path = f'./Dataset/Waymo/output/step2_analysis_motion' 583 | analyze_statistics(step2_data_path, step2_stat_result_path) 584 | 585 | # Step 3: Further clean and refine data, and prepare for performance analysis. 586 | clean_data = fill_and_clean(step2_data_path, 10, None, 587 | 120, 1e-5, 1e10, 0.1, 588 | 1e10, 0.1, 5, -5, 5, -5) 589 | step3_data_path = './Dataset/Waymo/output/step3_motion.csv' 590 | revise_traj_id(clean_data, step3_data_path, 0.1, 70, 0, 0) 591 | step3_stat_result_path = f'./Dataset/Waymo/output/step3_analysis_motion' 592 | analyze_statistics(step3_data_path, step3_stat_result_path) 593 | 594 | # Analysis 595 | performance_result_path = './Dataset/Waymo/output/performance_metrics_motion.csv' 596 | analyze_AV_performance(step3_data_path, performance_result_path) 597 | scatter_plot_path = './Dataset/Waymo/output/scatter_motion' 598 | draw_scatter(step3_data_path, scatter_plot_path) 599 | 600 | calibration_result_path = './Dataset/Waymo/output/calibration_motion.csv' 601 | linear_regression = CFModelRegress(step3_data_path, 0.1) 602 | linear_regression.main(calibration_result_path, 'linear') 603 | 604 | 605 | def Argoverse2(): 606 | # Step 1: Convert Vanderbilt dataset to a uniform car-following data format and analyze statistics. 607 | directory_path = Path('./Dataset/Argoverse/data/val') 608 | cf_trajectory_list = [] 609 | count = 1 610 | for original_data_path in directory_path.rglob('*'): 611 | original_data_path = str(original_data_path) + '/scenario_' + str(original_data_path)[-36:] + '.parquet' 612 | cf_trajectory_list.append(original_data_path) 613 | if len(cf_trajectory_list) == 25000: 614 | print(f"recording {count * len(cf_trajectory_list)} files......") 615 | original_data = Argo2_extract_df(cf_trajectory_list, 0) 616 | cf_path = f'./Dataset/Argoverse/output/CF_trajectories_{count}.csv' 617 | Argo2_extract_cf_traj(original_data, cf_path) 618 | cf_trajectory_list = [] 619 | count += 1 620 | 621 | merge_data_list = [] 622 | for i in range(1, 10): 623 | cf_path = f'./Dataset/Argoverse/output/step0_CF_trajectory_{i}.csv' 624 | uniform_format_path = f'./Dataset/Argoverse/output/step1_{i}.csv' 625 | Argoverse_convert_format(cf_path, uniform_format_path) 626 | merge_data_list.append(uniform_format_path) 627 | 628 | merge_data_path = f'./Dataset/Argoverse/output/step1_merge.csv' 629 | merge_data(merge_data_list, merge_data_path) 630 | step1_stat_result_path = './Dataset/Argoverse/output/step1_analysis' 631 | analyze_statistics(merge_data_path, step1_stat_result_path) 632 | 633 | # Step 2: Clean data and revise trajectory IDs for further analysis. 634 | clean_data = fill_and_clean(merge_data_path, 10, [None, 10, None, 10, 10, None], 635 | 1e10, -1e10, 1e10, -1e10, 636 | 1e10, -1e10, 1e10, -1e10, 1e10, -1e10) 637 | step2_data_path = f'./Dataset/Argoverse/output/step2.csv' 638 | revise_traj_id(clean_data, step2_data_path, 0.1, 70, 0, 0) 639 | step2_stat_result_path = './Dataset/Argoverse/output/step2_analysis' 640 | analyze_statistics(step2_data_path, step2_stat_result_path) 641 | 642 | # Step 3: Further clean and refine data, and prepare for performance analysis. 643 | clean_data = fill_and_clean(step2_data_path, 10, None, 644 | 120, 1e-5, 1e10, 0.1, 645 | 1e10, 0.1, 5, -5, 5, -5) 646 | step3_data_path = f'./Dataset/Argoverse/output/step3.csv' 647 | revise_traj_id(clean_data, step3_data_path, 0.1, 70, 0, 0) 648 | step3_stat_result_path = './Dataset/Argoverse/output/step3_analysis' 649 | analyze_statistics(step3_data_path, step3_stat_result_path) 650 | 651 | # Analysis 652 | performance_result_path = './Dataset/Argoverse/output/performance_metrics.csv' 653 | analyze_AV_performance(step3_data_path, performance_result_path) 654 | scatter_plot_path = './Dataset/Argoverse/output/scatte' 655 | draw_scatter(step3_data_path, scatter_plot_path) 656 | 657 | calibration_result_path = './Dataset/Argoverse/output/calibration.csv' 658 | linear_regression = CFModelRegress(step3_data_path, 0.1) 659 | linear_regression.main(calibration_result_path, 'linear') 660 | 661 | 662 | if __name__ == "__main__": 663 | # Main entry point for the data extraction and analysis. 664 | Vanderbilt_two_vehicle_ACC() 665 | 666 | MicroSimACC() 667 | 668 | CATS_ACC() 669 | CATS_platoon() 670 | CATS_UWM() 671 | 672 | OpenACC_Casale() 673 | OpenACC_Vicolungo() 674 | OpenACC_ASta() 675 | OpenACC_ZalaZone() 676 | 677 | Ohio_single_vehicle() 678 | Ohio_two_vehicle() 679 | 680 | Argoverse2() 681 | 682 | Waymo_perception() 683 | Waymo_motion() 684 | 685 | # Draw performance metrics distribution for various datasets. 686 | paths = [ 687 | './Dataset/Vanderbilt/output/performance_metrics_two_vehicle_ACC.csv', 688 | './Dataset/MicroSimACC/output/performance_metrics.csv', 689 | './Dataset/CATS/output/performance_metrics_ACC.csv', 690 | './Dataset/CATS/output/performance_metrics_platoon.csv', 691 | './Dataset/CATS/output/performance_metrics_UWM.csv', 692 | './Dataset/OpenACC/output/performance_metrics_Casale.csv', 693 | './Dataset/OpenACC/output/performance_metrics_Vicolungo.csv', 694 | './Dataset/OpenACC/output/performance_metrics_ASta.csv', 695 | './Dataset/OpenACC/output/performance_metrics_ZalaZone.csv', 696 | './Dataset/Ohio/output/performance_metrics_single_vehicle.csv', 697 | './Dataset/Ohio/output/performance_metrics_two_vehicle.csv', 698 | './Dataset/Waymo/output/performance_metrics_perception.csv', 699 | './Dataset/Waymo/output/performance_metrics_motion.csv', 700 | './Dataset/Argoverse/output/performance_metrics.csv' 701 | ] 702 | output_path = './Dataset/performance_metrics_' 703 | dataset_labels = ["Vanderbilt ACC", "MicroSimACC", "CATS ACC", "CATS Platoon", "CATS UWM", 704 | "OpenACC Casale", "OpenACC Vicolungo", "OpenACC ASta", "OpenACC ZalaZone", 705 | "Ohio Single", "Ohio Two", "Waymo Perception", "Waymo Motion", "Argoverse2"] 706 | draw_2D_perfromance_metrics(paths[0:5], output_path + '1_', dataset_labels[0:5]) 707 | draw_2D_perfromance_metrics(paths[5:9], output_path + '2_', dataset_labels[5:9]) 708 | draw_2D_perfromance_metrics(paths[9:], output_path + '3_', dataset_labels[9:]) 709 | 710 | # Compile statistics summary and draw distribution. 711 | output_path = './Analysis/statistics_summary_' 712 | merge_statistics_results(output_path) 713 | output_path = './Analysis/step3_distribution.png' 714 | draw_statistics_distribution(output_path) 715 | 716 | # Analyze label statistics and correlation 717 | paths = [ 718 | './Dataset/Vanderbilt/output/step3_two_vehicle_ACC.csv', 719 | './Dataset/MicroSimACC/output/step3.csv', 720 | './Dataset/CATS/output/step3_ACC.csv', 721 | './Dataset/CATS/output/step3_platoon.csv', 722 | './Dataset/CATS/output/step3_UWM.csv', 723 | './Dataset/OpenACC/output/step3_Casale.csv', 724 | './Dataset/OpenACC/output/step3_Vicolungo.csv', 725 | './Dataset/OpenACC/output/step3_ASta.csv', 726 | './Dataset/OpenACC/output/step3_ZalaZone.csv', 727 | './Dataset/Ohio/output/step3_single_vehicle.csv', 728 | './Dataset/Ohio/output/step3_two_vehicle.csv', 729 | './Dataset/Waymo/output/step3_perception.csv', 730 | './Dataset/Waymo/output/step3_motion.csv', 731 | './Dataset/Argoverse/output/step3.csv' 732 | ] 733 | output_path = f'./Analysis/labels_statistics_' 734 | dataset_labels = ["Vanderbilt ACC", "MicroSimACC", "CATS ACC", "CATS Platoon", "CATS UWM", 735 | "OpenACC Casale", "OpenACC Vicolungo", "OpenACC ASta", "OpenACC ZalaZone", 736 | "Ohio Single", "Ohio Two", "Waymo Perception", "Waymo Motion", "Argoverse2"] 737 | draw_2D_labels_statistics(paths[0:5], output_path + '1_', dataset_labels[0:5]) 738 | draw_2D_labels_statistics(paths[5:9], output_path + '2_', dataset_labels[5:9]) 739 | draw_2D_labels_statistics(paths[9:], output_path + '3_', dataset_labels[9:]) 740 | 741 | # Analyze label statistics and calculate correlation. 742 | output_path = './Analysis/correlation.csv' 743 | correlation(output_path) 744 | -------------------------------------------------------------------------------- /Code/data_analysis.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from PIL import Image 3 | import pandas as pd 4 | import seaborn as sns 5 | import matplotlib.pyplot as plt 6 | 7 | 8 | def analyze_statistics(input_path, output_path, has_kde=True): 9 | """Analyze statistical data and generate histograms for selected columns.""" 10 | df = pd.read_csv(input_path) 11 | 12 | # Define the columns for statistical analysis 13 | columns_to_describe = ['Speed_LV', 'Acc_LV', 'Speed_FAV', 'Acc_FAV', 'Spatial_Gap', 'Spatial_Headway'] 14 | statistics = df[columns_to_describe].describe() 15 | statistics.to_csv(output_path + '_statistics.csv') 16 | 17 | # Define columns to generate histograms 18 | columns_to_check = ['Speed_FAV', 'Acc_FAV', 'Spatial_Gap', 'Speed_Diff'] 19 | for column in columns_to_check: 20 | plt.figure(figsize=(18, 14)) 21 | plt.tick_params(axis='x', labelsize=40) # Set font size for x-axis labels 22 | plt.tick_params(axis='y', labelsize=40) # Set font size for y-axis labels 23 | if column == 'Acc_FAV': 24 | plt.title(f'Distribution of $a$', fontsize=50) 25 | sns.histplot(df[column], kde=has_kde, color=(42 / 255, 157 / 255, 140 / 255), line_kws={'linewidth': 5}) 26 | plt.xlabel('$a$ ($m/s^2$)', fontsize=50) 27 | elif column == 'Spatial_Gap': 28 | plt.title(f'Distribution of $s$', fontsize=50) 29 | sns.histplot(df[column], kde=has_kde, color=(233 / 255, 196 / 255, 107 / 255), line_kws={'linewidth': 5}) 30 | plt.xlabel('$s$ ($m$)', fontsize=50) 31 | elif column == 'Speed_Diff': 32 | plt.title(f'Distribution of $\Delta v$', fontsize=50) 33 | sns.histplot(df[column], kde=has_kde, color=(230 / 255, 111 / 255, 81 / 255), line_kws={'linewidth': 5}) 34 | plt.xlabel('$\Delta v$ ($m/s$)', fontsize=50) 35 | else: 36 | plt.title(f'Distribution of $v$', fontsize=50) 37 | sns.histplot(df[column], kde=has_kde, color=(75 / 255, 101 / 255, 175 / 255), line_kws={'linewidth': 5}) 38 | plt.xlabel('$v$ ($m/s$)', fontsize=50) 39 | plt.ylabel('Frequency', fontsize=50) 40 | plt.savefig(output_path + "_" + column + '.png') 41 | plt.close() 42 | 43 | 44 | def merge_statistics_results(output_path): 45 | name = ['Vanderbilt_two_vehicle_ACC', 46 | 'MicroSimACC', 47 | 'CATS_ACC', 48 | 'CATS_platoon', 49 | 'CATS_UW', 50 | 'OpenACC_Casale', 51 | 'OpenACC_Vicolungo', 52 | 'OpenACC_ASta', 53 | 'OpenACC_ZalaZone', 54 | 'Ohio_single_vehicle', 55 | 'Ohio_two_vehicle', 56 | 'Waymo_perception', 57 | 'Waymo_motion', 58 | 'Argoverse2'] 59 | input_path = [[ 60 | './Dataset/Vanderbilt/output/', 61 | './Dataset/MicroSimACC/output/', 62 | './Dataset/CATS/output/', 63 | './Dataset/CATS/output/', 64 | './Dataset/CATS/output/', 65 | './Dataset/OpenACC/output/', 66 | './Dataset/OpenACC/output/', 67 | './Dataset/OpenACC/output/', 68 | './Dataset/OpenACC/output/', 69 | './Dataset/Ohio/output/', 70 | './Dataset/Ohio/output/', 71 | './Dataset/Waymo/output/', 72 | './Dataset/Waymo/output/', 73 | './Dataset/Argoverse/output/' 74 | ], [ 75 | '_analysis_two_vehicle_ACC_statistics.csv', 76 | '_analysis_statistics.csv', 77 | '_analysis_ACC_statistics.csv', 78 | '_analysis_platoon_statistics.csv', 79 | '_analysis_UWM_statistics.csv', 80 | '_analysis_Casale_statistics.csv', 81 | '_analysis_Vicolungo_statistics.csv', 82 | '_analysis_ASta_statistics.csv', 83 | '_analysis_ZalaZone_statistics.csv', 84 | '_analysis_single_vehicle_statistics.csv', 85 | '_analysis_two_vehicle_statistics.csv', 86 | '_analysis_perception_statistics.csv', 87 | '_analysis_motion_statistics.csv', 88 | '_analysis_statistics.csv' 89 | ]] 90 | 91 | for step in ['step1', 'step2', 'step3']: 92 | 93 | temp_df = pd.DataFrame() 94 | for i in range(len(input_path[0])): 95 | file_path = input_path[0][i] + step + input_path[1][i] 96 | df = pd.read_csv(file_path, index_col=0) 97 | 98 | print(f"{name[i]}, {step} , {df.loc['count', 'Speed_LV']}") 99 | 100 | rows_to_keep = ['mean', 'std', 'min', 'max'] 101 | df = df.loc[df.index.isin(rows_to_keep)] 102 | 103 | df = df.reset_index().melt(id_vars=['index'], var_name='variable', value_name='value') 104 | 105 | df.columns = ['Statistics', 'Variables', name[i]] 106 | 107 | if temp_df.empty: 108 | temp_df = df 109 | else: 110 | temp_df = pd.concat([temp_df, df]) 111 | 112 | temp_df = temp_df.groupby(['Variables', 'Statistics'], sort=False).sum().reset_index() 113 | temp_df.to_csv(output_path + step + '.csv', index=False) 114 | 115 | 116 | def analyze_AV_performance(input_path, output_path): 117 | df = pd.read_csv(input_path) 118 | 119 | # Define a function to calculate Time to Collision (TTC), which measures safety. 120 | def calculate_TTC(space_gap, speed_diff): 121 | if speed_diff >= 0: 122 | return np.nan # If relative speed is non-negative, TTC is not defined (no collision expected). 123 | return -space_gap / speed_diff 124 | 125 | # Define a matrix for calculating the VT model, a vehicular dynamics model. 126 | K_matrix = np.array([ 127 | [-7.537, 0.4438, 0.1716, -0.0420], 128 | [0.0973, 0.0518, 0.0029, -0.0071], 129 | [-0.003, -7.42e-04, 1.09e-04, 1.16e-04], 130 | [5.3e-05, 6e-06, -1e-05, -6e-06] 131 | ]) 132 | 133 | # Calculate the VT model for fuel consumption and environmental impact. 134 | def calculate_VT_model(v, a, K): 135 | sum_j1_j2 = 0 136 | for j1 in range(4): 137 | for j2 in range(4): 138 | sum_j1_j2 += K[j1][j2] * (v ** j1) * (a ** j2) 139 | F = np.exp(sum_j1_j2) 140 | return F 141 | 142 | # Define the Vehicle Specific Power (VSP) model for vehicular dynamics. 143 | def calculate_VSP(v, a): 144 | return v * (1.1 * a + 0.132) + 3.02 * 10 ** (-4) * v ** 3 145 | 146 | def calculate_VSP_model(v, a): 147 | VSP = calculate_VSP(v, a) 148 | if VSP < -10: 149 | return 2.48e-03 150 | elif -10 <= VSP < 10: 151 | return 1.98e-03 * VSP ** 2 + 3.97e-02 * VSP + 2.01e-01 152 | else: 153 | return 7.93e-02 * VSP + 2.48e-03 154 | 155 | # Define the ARRB model, another vehicular dynamics model. 156 | def calculate_ARRB_model(v, a): 157 | return (0.666 + 0.019 * v + 0.001 * v ** 2 + 0.0005 * v ** 3 + 0.122 * a + 0.793 * max(a, 0) ** 2) 158 | 159 | # Process each group of data by calculating TTC. 160 | df['TTC'] = df.apply(lambda row: calculate_TTC(row['Spatial_Gap'], row['Speed_Diff']), axis=1) 161 | 162 | df['Time_Headway'] = df['Spatial_Headway'] / df['Speed_FAV'] 163 | 164 | df['Acc_speed_squared_deviation'] = (df['Acc_FAV'] - df['Acc_FAV'].mean()) ** 2 / df['Speed_FAV'].mean() 165 | df['Speed_squared_deviation'] = (df['Speed_FAV'] - df['Speed_FAV'].mean()) ** 2 166 | df['Acc_squared_deviation'] = (df['Acc_FAV'] - df['Acc_FAV'].mean()) ** 2 167 | 168 | df['VT_micro_model'] = df.apply( 169 | lambda row: calculate_VT_model(row['Speed_FAV'], row['Acc_FAV'], K_matrix), axis=1) 170 | 171 | df['VSP_model'] = df.apply(lambda row: calculate_VSP_model(row['Speed_FAV'], row['Acc_FAV']), 172 | axis=1) / 800 173 | 174 | df['ARRB_model'] = df.apply(lambda row: calculate_ARRB_model(row['Speed_FAV'], row['Acc_FAV']), 175 | axis=1) / 1000 176 | 177 | df['Fuel_consumption'] = df[['VT_micro_model', 'VSP_model', 'ARRB_model']].mean(axis=1) 178 | 179 | df = df[['TTC', 'Time_Headway', 'Acc_squared_deviation', 'Acc_speed_squared_deviation', 180 | 'Speed_squared_deviation', 'Fuel_consumption', 'VT_micro_model', 'VSP_model', 'ARRB_model']] 181 | df.to_csv(output_path, index=False) 182 | 183 | 184 | def draw_scatter(input_path, output_path): 185 | df = pd.read_csv(input_path) 186 | 187 | df['Smoothed_Acc_FAV'] = df['Acc_FAV'].rolling(window=3).mean() 188 | df.loc[2:, 'Acc_FAV'] = df.loc[2:, 'Smoothed_Acc_FAV'] 189 | 190 | colors = [(42 / 255, 157 / 255, 140 / 255), 191 | (233 / 255, 196 / 255, 107 / 255), 192 | (230 / 255, 111 / 255, 81 / 255)] 193 | 194 | plt.figure(figsize=(8, 8)) # Set the size of the image here 195 | plt.scatter(df['Spatial_Gap'], df['Acc_FAV'], color=colors[0], s=2) 196 | # plt.title('Relationship of $a^{\mathrm{f}}$ and $d$', fontsize=40) 197 | # plt.xlabel('$d$ ($\mathrm{m}$)', fontsize=30) 198 | # plt.ylabel('$a^{\mathrm{f}}$ ($\mathrm{m}/\mathrm{s}^2$)', fontsize=30) 199 | # plt.tick_params(axis='x', labelsize=30) 200 | # plt.tick_params(axis='y', labelsize=30) 201 | plt.xticks([]) 202 | plt.yticks([]) 203 | plt.tight_layout() 204 | plt.savefig(output_path + f'_Spatial_Gap.png') # _{name} 205 | plt.close() 206 | 207 | plt.figure(figsize=(8, 8)) # Set the size of the image here 208 | plt.scatter(df['Speed_FAV'], df['Acc_FAV'], color=colors[1], s=2) 209 | # plt.title('Relationship of $a^{\mathrm{f}}$ and $v^{\mathrm{f}}$', fontsize=40) 210 | # plt.xlabel('$v^{\mathrm{f}}$ ($\mathrm{m}/\mathrm{s}$)', fontsize=30) 211 | # plt.ylabel('$a^{\mathrm{f}}$ ($\mathrm{m}/\mathrm{s}^2$)', fontsize=30) 212 | # plt.tick_params(axis='x', labelsize=30) 213 | # plt.tick_params(axis='y', labelsize=30) 214 | plt.xticks([]) 215 | plt.yticks([]) 216 | plt.tight_layout() 217 | plt.savefig(output_path + f'_Speed_FAV.png') # _{name} 218 | plt.close() 219 | 220 | plt.figure(figsize=(8, 8)) # Set the size of the image here 221 | plt.scatter(df['Speed_Diff'], df['Acc_FAV'], color=colors[2], s=2) 222 | # plt.title('Relationship of $a^{\mathrm{f}}$ and $\Delta v$', fontsize=40) 223 | # plt.xlabel('$\Delta v$ ($\mathrm{m}/\mathrm{s}$)', fontsize=30) 224 | # plt.ylabel('$a^{\mathrm{f}}$ ($\mathrm{m}/\mathrm{s}^2$)', fontsize=30) 225 | # plt.tick_params(axis='x', labelsize=30) 226 | # plt.tick_params(axis='y', labelsize=30) 227 | plt.xticks([]) 228 | plt.yticks([]) 229 | plt.tight_layout() 230 | plt.savefig(output_path + f'_Speed_Diff.png') # _{name} 231 | plt.close() 232 | 233 | 234 | 235 | def draw_2D_perfromance_metrics(input_paths, output_path, dataset_labels): 236 | columns = ['TTC', 'Time_Headway', 'Acc_squared_deviation', 'Fuel_consumption',]# 237 | 238 | colors = [ 239 | (250 / 251, 134 / 255, 0), 240 | '#54B345', 241 | '#05B9E2', 242 | (231 / 255, 56 / 255, 71 / 255), 243 | (131 / 255, 64 / 255, 38 / 255) 244 | ] 245 | 246 | dfs = [pd.read_csv(path) for path in input_paths] 247 | 248 | for col in columns: 249 | plt.figure(figsize=(10, 8)) # Set the size of the image here 250 | 251 | bins = None 252 | if col == 'TTC': 253 | plt.title(f'Distribution of $TTC$', fontsize=40) 254 | bins = np.linspace(0, 250, 100) 255 | x_label = '$TTC$ ($\mathrm{s}$)' 256 | 257 | if col == 'Time_Headway': 258 | plt.title(r'Distribution of $\tau$', fontsize=40) 259 | bins = np.linspace(0, 8, 500) 260 | x_label = r'$\tau$ ($\mathrm{s}$)' 261 | 262 | if col == 'Acc_squared_deviation': 263 | plt.title(r'Distribution of $\alpha$', fontsize=40) 264 | bins = np.linspace(0, 0.4, 500) 265 | x_label = r'$\alpha$ ($\mathrm{m}^2/\mathrm{s}^4$)' 266 | 267 | if col == 'Fuel_consumption': 268 | plt.title(f'Distribution of $F$', fontsize=40) 269 | bins = np.linspace(0, 0.01, 500) 270 | x_label = '$F$ ($\mathrm{L}/\mathrm{s}$)' 271 | 272 | for i, df in enumerate(dfs): 273 | data = df[col].dropna() 274 | hist, edges = np.histogram(data, bins=bins, density=True) # 275 | x = (edges[:-1] + edges[1:]) / 2 # Compute the centers of histogram bins 276 | 277 | repeated_x = np.repeat(x, (hist * 1000).astype(int)) # Replicate data points based on their weights 278 | 279 | # Plot KDE using replicated data points 280 | sns.kdeplot(repeated_x, fill=True, label=dataset_labels[i], color=colors[len(colors) - i - 1], 281 | alpha=0.4 - 0.05 * i, linewidth=2) 282 | 283 | plt.xlabel(x_label, fontsize=35) 284 | plt.ylabel('Density', fontsize=35) 285 | plt.tick_params(axis='both', which='major', labelsize=30) 286 | plt.xlim(0, None) 287 | if col == 'Fuel_consumption': 288 | plt.ylim(None, 3000) 289 | plt.tight_layout() 290 | 291 | plt.savefig(output_path + col + '.png') 292 | plt.close() 293 | 294 | 295 | def draw_2D_labels_statistics(input_paths, output_path, dataset_labels, 296 | columns=['Speed_FAV', 'Acc_FAV', 'Spatial_Gap', 'Speed_Diff']): 297 | colors = [ 298 | (250 / 251, 134 / 255, 0), 299 | '#54B345', 300 | '#05B9E2', 301 | (231 / 255, 56 / 255, 71 / 255), 302 | (131 / 255, 64 / 255, 38 / 255) 303 | ] 304 | dfs = [pd.read_csv(path) for path in input_paths] 305 | 306 | for col in columns: 307 | plt.figure(figsize=(10, 8)) # Set the size of the image here 308 | 309 | if col == 'Speed_FAV': 310 | plt.title('Distribution of $v^{\mathrm{f}}$', fontsize=40) 311 | x_label = '$v^{\mathrm{f}}$ $(\mathrm{m}/\mathrm{s})$' 312 | 313 | if col == 'Acc_FAV': 314 | plt.title('Distribution of $a^{\mathrm{f}}$', fontsize=40) 315 | x_label = '$a^\mathrm{f}$ $(\mathrm{m}/\mathrm{s}^2)$' 316 | 317 | if col == 'Spatial_Gap': 318 | plt.title(f'Distribution of $g$', fontsize=40) 319 | x_label = '$g$ $(\mathrm{m})$' 320 | 321 | if col == 'Speed_Diff': 322 | plt.title(f'Distribution of $\Delta v$', fontsize=40) 323 | x_label = '$\Delta v$ $(\mathrm{m}/\mathrm{s})$' 324 | 325 | for i, df in enumerate(dfs): 326 | data = df[col].dropna() 327 | hist, edges = np.histogram(data, density=True) 328 | x = (edges[:-1] + edges[1:]) / 2 # Compute the centers of histogram bins 329 | 330 | repeated_x = np.repeat(x, (hist * 1000).astype(int)) # Replicate data points based on their weights 331 | 332 | # Plot KDE using replicated data points 333 | sns.kdeplot(repeated_x, fill=True, label=dataset_labels[i], color=colors[len(colors) - i - 1], 334 | alpha=0.5 - 0.07 * i, linewidth=2) 335 | 336 | plt.xlabel(x_label, fontsize=35) 337 | plt.ylabel('Density', fontsize=35) 338 | plt.tick_params(axis='both', which='major', labelsize=30) 339 | # plt.legend(loc='upper right', fontsize=30) 340 | 341 | if col == 'Speed_FAV' or col == 'Spatial_Gap': 342 | plt.xlim(0, None) 343 | 344 | plt.tight_layout() 345 | plt.savefig(output_path + col + '.png') 346 | plt.close() 347 | 348 | 349 | def draw_statistics_distribution(output_path): 350 | def crop_center(img): 351 | img_width, img_height = img.size 352 | return img.crop((100, 353 | 160, 354 | img_width - 160, 355 | img_height - 90)) 356 | 357 | img_paths = [ 358 | './Dataset/Vanderbilt/output/step3_analysis_two_vehicle_ACC_', 359 | './Dataset/MicroSimACC/output/step3_analysis_', 360 | './Dataset/CATS/output/step3_analysis_ACC_', 361 | './Dataset/CATS/output/step3_analysis_platoon_', 362 | './Dataset/CATS/output/step3_analysis_UWM_', 363 | './Dataset/OpenACC/output/step3_analysis_Casale_', 364 | './Dataset/OpenACC/output/step3_analysis_Vicolungo_', 365 | './Dataset/OpenACC/output/step3_analysis_ASta_', 366 | './Dataset/OpenACC/output/step3_analysis_ZalaZone_', 367 | './Dataset/Ohio/output/step3_analysis_single_vehicle_', 368 | './Dataset/Ohio/output/step3_analysis_two_vehicle_', 369 | './Dataset/Waymo/output/step3_analysis_perception_', 370 | './Dataset/Waymo/output/step3_analysis_motion_', 371 | './Dataset/Argoverse/output/step3_analysis_' 372 | ] 373 | 374 | row_images = [] 375 | 376 | for i in range(len(img_paths)): 377 | column_images = [] 378 | for var in ['Acc_FAV', 'Spatial_Gap', 'Speed_FAV', 'Speed_Diff']: 379 | column_images.append(Image.open(img_paths[i] + var + '.png')) 380 | 381 | column_images = [crop_center(img) for img in column_images] 382 | 383 | total_height = sum(img.height for img in column_images) 384 | max_width = max(img.width for img in column_images) 385 | 386 | combined_image_vertical = Image.new('RGB', (max_width, total_height)) 387 | 388 | y_offset = 0 389 | for img in column_images: 390 | combined_image_vertical.paste(img, (0, y_offset)) 391 | y_offset += img.height 392 | 393 | row_images.append(combined_image_vertical) 394 | 395 | total_width = sum(img.width for img in row_images) 396 | max_height = max(img.height for img in row_images) 397 | 398 | combined_image_horizontal = Image.new('RGB', (total_width, max_height)) 399 | 400 | # Paste the images into the new image 401 | x_offset = 0 402 | for img in row_images: 403 | combined_image_horizontal.paste(img, (x_offset, 0)) 404 | x_offset += img.width 405 | 406 | combined_image_horizontal.save(output_path) 407 | 408 | 409 | def correlation(output_path): 410 | input_paths = [ 411 | './Dataset/Vanderbilt/output/step3_two_vehicle_ACC.csv', 412 | './Dataset/MicroSimACC/output/step3.csv', 413 | './Dataset/CATS/output/step3_ACC.csv', 414 | './Dataset/CATS/output/step3_platoon.csv', 415 | './Dataset/CATS/output/step3_UWM.csv', 416 | './Dataset/OpenACC/output/step3_Casale.csv', 417 | './Dataset/OpenACC/output/step3_Vicolungo.csv', 418 | './Dataset/OpenACC/output/step3_ASta.csv', 419 | './Dataset/OpenACC/output/step3_ZalaZone.csv', 420 | './Dataset/Ohio/output/step3_single_vehicle.csv', 421 | './Dataset/Ohio/output/step3_two_vehicle.csv', 422 | './Dataset/Waymo/output/step3_perception.csv', 423 | './Dataset/Waymo/output/step3_motion.csv', 424 | './Dataset/Argoverse/output/step3.csv' 425 | ] 426 | 427 | all_data = [] 428 | 429 | for i, path in enumerate(input_paths): 430 | df = pd.read_csv(path) 431 | pearson_corr = df[['Acc_FAV', 'Spatial_Gap', 'Speed_FAV', 'Speed_Diff']].corr() 432 | spearman_corr = df[['Acc_FAV', 'Spatial_Gap', 'Speed_FAV', 'Speed_Diff']].corr(method='spearman') 433 | corr_data = { 434 | 'ID': i + 1, 435 | 'Pearson_Spatial_Gap': pearson_corr.at['Acc_FAV', 'Spatial_Gap'], 436 | 'Spearman_Spatial_Gap': spearman_corr.at['Acc_FAV', 'Spatial_Gap'], 437 | 'Pearson_Speed_FAV': pearson_corr.at['Acc_FAV', 'Speed_FAV'], 438 | 'Spearman_Speed_FAV': spearman_corr.at['Acc_FAV', 'Speed_FAV'], 439 | 'Pearson_Speed_Diff': pearson_corr.at['Acc_FAV', 'Speed_Diff'], 440 | 'Spearman_Speed_Diff': spearman_corr.at['Acc_FAV', 'Speed_Diff'] 441 | } 442 | all_data.append(corr_data) 443 | 444 | combined_data = pd.DataFrame(all_data) 445 | combined_data.to_csv(output_path, index=False) 446 | 447 | def check_row_number(): 448 | paths = [ 449 | './Dataset/Vanderbilt/output/step2_two_vehicle_ACC.csv', 450 | './Dataset/CATS/output/step2_ACC.csv', 451 | './Dataset/CATS/output/step2_platoon.csv', 452 | './Dataset/CATS/output/step2_UWM.csv', 453 | './Dataset/OpenACC/output/step2_Casale.csv', 454 | './Dataset/OpenACC/output/step2_Vicolungo.csv', 455 | './Dataset/OpenACC/output/step2_ASta.csv', 456 | './Dataset/OpenACC/output/step2_ZalaZone.csv', 457 | './Dataset/Ohio/output/step2_single_vehicle.csv', 458 | './Dataset/Ohio/output/step2_two_vehicle.csv', 459 | './Dataset/Waymo/output/step2_perception.csv', 460 | './Dataset/Waymo/output/step2_motion.csv', 461 | './Dataset/Argoverse/output/step2.csv' 462 | ] 463 | 464 | total_rows = 0 465 | 466 | for path in paths: 467 | try: 468 | df = pd.read_csv(path) 469 | total_rows += len(df) 470 | except Exception as e: 471 | print(f"Error reading {path}: {e}") 472 | 473 | print(f"Total rows across all CSV files: {total_rows}") -------------------------------------------------------------------------------- /Code/data_cleaning.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib as mpl 3 | import numpy as np 4 | 5 | def fill_and_clean(input_path, linear_fill_in, outlier, 6 | space_gap_upper, space_gap_lower, speed_FAV_upper, speed_FAV_lower, 7 | speed_LV_upper, speed_LV_lower, acc_FAV_upper, acc_FAV_lower, 8 | acc_LV_upper, acc_LV_lower): 9 | df = pd.read_csv(input_path) 10 | 11 | # Remove non-following mode data based on defined boundaries for each variable. 12 | df.loc[~(df['Spatial_Gap'].between(space_gap_lower, space_gap_upper)), 'Spatial_Gap'] = np.nan 13 | df.loc[~(df['Speed_FAV'].between(speed_FAV_lower, speed_FAV_upper)), 'Speed_FAV'] = np.nan 14 | df.loc[~(df['Acc_FAV'].between(acc_FAV_lower, acc_FAV_upper)), 'Acc_FAV'] = np.nan 15 | df.loc[~(df['Speed_LV'].between(speed_LV_lower, speed_LV_upper)), 'Speed_LV'] = np.nan 16 | df.loc[~(df['Acc_LV'].between(acc_LV_lower, acc_LV_upper)), 'Acc_LV'] = np.nan 17 | 18 | # Replace infinities with NaN 19 | df.replace([np.inf, -np.inf], np.nan, inplace=True) 20 | 21 | rows_to_delete = set() 22 | columns = ['Speed_FAV', 'Acc_FAV', 'Speed_LV', 'Acc_LV', 'Spatial_Gap', 'Speed_Diff'] 23 | for i in range(len(columns)): 24 | column = columns[i] 25 | # Remove values beyond ±3 standard deviations iteratively until no changes are needed. 26 | if outlier is not None and outlier[i] is not None: 27 | while True: 28 | mean = df[column].mean(skipna=True) 29 | std = df[column].std(skipna=True) 30 | outliers_condition = (df[column] < mean - outlier[i] * std) | (df[column] > mean + outlier[i] * std) 31 | if not df.loc[outliers_condition, column].empty: 32 | df.loc[outliers_condition, column] = np.nan 33 | else: 34 | break 35 | 36 | # Delete groups with excessive missing data. 37 | if linear_fill_in > 0: 38 | is_na = df[column].isna() 39 | na_groups = is_na.ne(is_na.shift()).cumsum() 40 | na_groups_sizes = na_groups[is_na].value_counts() 41 | groups_to_delete = na_groups_sizes[na_groups_sizes > linear_fill_in].index 42 | rows_to_delete.update(na_groups[na_groups.isin(groups_to_delete)].index) 43 | 44 | # Remove rows identified in the previous step 45 | df.drop(index=rows_to_delete, inplace=True) 46 | # Perform linear interpolation within each trajectory group 47 | df.groupby('Trajectory_ID').apply(lambda group: group.interpolate(method='linear')).reset_index(drop=True) 48 | # Drop rows that still contain NaNs after interpolation 49 | df.dropna(inplace=True) 50 | 51 | return df 52 | 53 | def revise_traj_id(df, output_path, time_step, fill_row_num, fill_start, fill_end, update_time=True): 54 | # Filter out trajectories shorter than a specified length. 55 | df = df.groupby('Trajectory_ID').filter(lambda x: len(x) >= fill_row_num) 56 | 57 | # Update time indices to ensure continuity if specified. 58 | if update_time: 59 | current_traj_ID = 0 60 | current_time_ID = 0 61 | df['Trajectory_ID'] = current_traj_ID 62 | df['new_Time_Index'] = current_time_ID 63 | 64 | previous_time_ID = df.iloc[0]['Time_Index'] - time_step 65 | for index, row in df.iterrows(): 66 | if index > 0 and abs(row['Time_Index'] - previous_time_ID) > time_step + 1e-5: 67 | current_traj_ID += 1 68 | current_time_ID = 0 69 | else: 70 | current_time_ID += time_step 71 | 72 | df.at[index, 'Trajectory_ID'] = current_traj_ID 73 | df.at[index, 'new_Time_Index'] = current_time_ID 74 | previous_time_ID = row['Time_Index'] 75 | 76 | df['Time_Index'] = df['new_Time_Index'] 77 | df.drop(columns=['new_Time_Index'], inplace=True) 78 | 79 | # Again filter trajectories that are too short. 80 | df = df.groupby('Trajectory_ID').filter(lambda x: len(x) >= fill_row_num) 81 | 82 | # Remove unstable trajectory sections. 83 | indices_to_remove = [] 84 | for Trajectory_ID, group in df.groupby('Trajectory_ID'): 85 | indices_max = group.nlargest(fill_end, 'Time_Index').index 86 | indices_min = group.nsmallest(fill_start, 'Time_Index').index 87 | indices_to_remove.extend(indices_max) 88 | indices_to_remove.extend(indices_min) 89 | df.drop(indices_to_remove, inplace=True) 90 | df = df.reset_index(drop=True) 91 | df['Time_Index'] -= fill_start * time_step 92 | 93 | # Adjust positions relative to the start of each trajectory. 94 | def adjust_positions(group): 95 | first_Pos_FAV = group['Pos_FAV'].iloc[0] 96 | group['Pos_FAV'] -= first_Pos_FAV 97 | group['Pos_LV'] -= first_Pos_FAV 98 | return group 99 | 100 | df = df.groupby('Trajectory_ID').apply(adjust_positions) 101 | 102 | # Update trajectory IDs to ensure continuity. 103 | unique_Trajectory_IDs = df['Trajectory_ID'].unique() 104 | Trajectory_ID_mapping = {old_id: new_id for new_id, old_id in enumerate(unique_Trajectory_IDs)} 105 | df['Trajectory_ID'] = df['Trajectory_ID'].map(Trajectory_ID_mapping) 106 | 107 | df.to_csv(output_path, index=False) 108 | 109 | def merge_data(merge_data_list, output_path): 110 | df_list = [] 111 | max_value = 0 112 | for path in merge_data_list: 113 | df = pd.read_csv(path) 114 | if not df.empty: 115 | df['Trajectory_ID'] += max_value 116 | max_value = df['Trajectory_ID'].max() + 1 117 | df_list.append(df) 118 | merged = pd.concat(df_list) 119 | merged.to_csv(output_path, index=False) 120 | -------------------------------------------------------------------------------- /Code/data_transformation.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from geopy.distance import geodesic 3 | import numpy as np 4 | 5 | default_vehicle_length = 4.5 6 | 7 | 8 | def Vanderbilt_convert_format(input_path, output_path): 9 | data = pd.read_csv(input_path, header=None) 10 | 11 | data.insert(0, 'Trajectory_ID', 0) 12 | for idx in range(3): 13 | data.insert(2 + idx, f'col_1{idx}', 0) 14 | for idx in range(3): 15 | data.insert(6 + idx, f'col_2{idx}', 0) 16 | for idx in range(2): 17 | data.insert(12 + idx, f'col_3{idx}', 0) 18 | 19 | data.columns = ['Trajectory_ID', 'Time_Index', 20 | 'ID_LV', 'Type_LV', 'Pos_LV', 'Speed_LV', 'Acc_LV', 21 | 'ID_FAV', 'Pos_FAV', 'Speed_FAV', 'Acc_FAV', 22 | 'Spatial_Gap', 'Spatial_Headway', 'Speed_Diff'] 23 | 24 | temp = data['Speed_LV'].copy() 25 | data['Speed_LV'] = data['Speed_FAV'] 26 | data['Speed_FAV'] = temp 27 | temp = data['Acc_FAV'].copy() 28 | data['Acc_FAV'] = data['Spatial_Gap'] 29 | data['Spatial_Gap'] = temp 30 | 31 | data['ID_LV'] = -1 32 | data['Type_LV'] = 0 33 | data['ID_FAV'] = 0 34 | 35 | data['Speed_Diff'] = data['Speed_LV'] - data['Speed_FAV'] 36 | data['Spatial_Headway'] = data['Spatial_Gap'] + default_vehicle_length 37 | 38 | data['Acc_LV'] = ((data['Speed_LV'] - data['Speed_LV'].shift(1)) / 0.1).shift(-1) 39 | 40 | average_speed = (data['Speed_FAV'] + data['Speed_FAV'].shift(1)) / 2 41 | data['Pos_FAV'] = (0.1 * average_speed).cumsum() 42 | data.loc[0, 'Pos_FAV'] = 0 43 | data['Pos_LV'] = data['Pos_FAV'] + data['Spatial_Headway'] 44 | data = data.iloc[:-1] 45 | 46 | data.to_csv(output_path, index=False) 47 | 48 | 49 | def MicroSimACC_convert_format(input_path, output_path): 50 | data = pd.read_csv(input_path) 51 | 52 | data = data.dropna() 53 | new_column_order = ['timestamps', 'Smoothed_speed1', 'Smoothed_acceleration1', 'Smoothed_speed2', 54 | 'Smoothed_acceleration2', 'Difference'] 55 | 56 | data['Smoothed_speed1']=data['Smoothed_speed1'] * 0.44704 # mph to m/s 57 | data['Smoothed_speed2'] = data['Smoothed_speed2'] * 0.44704 # mph to m/s 58 | 59 | data = data[new_column_order] 60 | data = data.reset_index(drop=True) 61 | 62 | data.insert(0, 'Trajectory_ID', 0) 63 | for idx in range(3): 64 | data.insert(2 + idx, f'col_1{idx}', 0) 65 | for idx in range(2): 66 | data.insert(7 + idx, f'col_2{idx}', 0) 67 | data.insert(11, f'col_3', 0) 68 | data.insert(13, f'col_4', 0) 69 | 70 | data.columns = ['Trajectory_ID', 'Time_Index', 71 | 'ID_LV', 'Type_LV', 'Pos_LV', 'Speed_LV', 'Acc_LV', 72 | 'ID_FAV', 'Pos_FAV', 'Speed_FAV', 'Acc_FAV', 73 | 'Spatial_Gap', 'Spatial_Headway', 'Speed_Diff'] 74 | 75 | data['Time_Index'] = np.arange(0, len(data) * 0.2, 0.2)[:len(data)] 76 | 77 | data['ID_LV'] = -1 78 | data['Type_LV'] = 0 79 | data['ID_FAV'] = 0 80 | 81 | data['Speed_Diff'] = data['Speed_LV'] - data['Speed_FAV'] 82 | data['Spatial_Gap'] = data['Spatial_Headway'] - default_vehicle_length 83 | 84 | data['Acc_LV'] = ((data['Speed_LV'] - data['Speed_LV'].shift(1)) / 0.1).shift(-1) 85 | data['Acc_FAV'] = ((data['Speed_FAV'] - data['Speed_FAV'].shift(1)) / 0.1).shift(-1) 86 | 87 | average_speed = (data['Speed_FAV'] + data['Speed_FAV'].shift(1)) / 2 88 | data['Pos_FAV'] = (0.2 * average_speed).cumsum() 89 | data.loc[0, 'Pos_FAV'] = 0 90 | data['Pos_LV'] = data['Pos_FAV'] + data['Spatial_Headway'] 91 | data = data.iloc[:-1] 92 | 93 | data.to_csv(output_path, index=False) 94 | 95 | 96 | def CATSACC_convert_format(input_path, output_path): 97 | column_names = ['id', 'time', 'lon', 'lat', 'speed'] 98 | all_sheets = pd.read_excel(input_path, sheet_name=None, header=None, names=column_names, engine='openpyxl') 99 | 100 | def process_time_string(time_str): 101 | return float(time_str[5:]) 102 | 103 | earliest_times = [] 104 | for sheet_name, sheet in all_sheets.items(): 105 | earliest_time_str = sheet['time'].iloc[0] 106 | earliest_time = process_time_string(earliest_time_str) 107 | earliest_times.append(earliest_time) 108 | start_time = min(earliest_times) 109 | 110 | df_list = [] 111 | for i in range(2): 112 | sheet1, sheet2 = list(all_sheets.values())[i:i + 2] 113 | 114 | sheet1 = sheet1.drop(columns=['id']) 115 | sheet2 = sheet2.drop(columns=['id']) 116 | 117 | df = pd.merge(sheet1, sheet2, on='time') 118 | 119 | def calculate_distance(row): 120 | location1 = (row['lat_x'], row['lon_x']) 121 | location2 = (row['lat_y'], row['lon_y']) 122 | return geodesic(location1, location2).kilometers * 1000 123 | 124 | df['Spatial_Headway'] = df.apply(calculate_distance, axis=1) 125 | df.drop(columns=['lat_x', 'lat_y', 'lon_x', 'lon_y'], inplace=True) 126 | df['time'] = df['time'].apply(lambda t: (process_time_string(t) - start_time)) 127 | 128 | if i == 0: 129 | df.insert(1, 'Id_l', -1) 130 | df.insert(3, 'Id_f', 0) 131 | df.insert(0, 'Trajectory_ID', 0) 132 | else: 133 | df.insert(1, 'Id_l', 0) 134 | df.insert(3, 'Id_f', 1) 135 | df.insert(0, 'Trajectory_ID', 1) 136 | 137 | df_list.append(df) 138 | data = pd.concat(df_list) 139 | 140 | data.insert(3, f'type_l', 0) 141 | data.insert(4, f'pos_l', 0) 142 | data.insert(6, f'acc_l', 0) 143 | data.insert(8, f'pos_f', 0) 144 | data.insert(10, f'acc', 0) 145 | data.insert(11, f'space_gap', 0) 146 | data.insert(13, f's_diff', 0) 147 | 148 | data.columns = ['Trajectory_ID', 'Time_Index', 149 | 'ID_LV', 'Type_LV', 'Pos_LV', 'Speed_LV', 'Acc_LV', 150 | 'ID_FAV', 'Pos_FAV', 'Speed_FAV', 'Acc_FAV', 151 | 'Spatial_Gap', 'Spatial_Headway', 'Speed_Diff'] 152 | 153 | data['Type_LV'] = data['ID_LV'].apply(lambda x: 1 if x in [0, 1] else 0) 154 | data['Speed_Diff'] = data['Speed_LV'] - data['Speed_FAV'] 155 | data['Spatial_Gap'] = data['Spatial_Headway'] - 4.92 156 | 157 | data = data.reset_index(drop=True) 158 | 159 | data['Acc_LV'] = ((data['Speed_LV'] - data['Speed_LV'].shift(1)) / 0.1).shift(-1) 160 | data['Acc_FAV'] = ((data['Speed_FAV'] - data['Speed_FAV'].shift(1)) / 0.1).shift(-1) 161 | 162 | average_speed = (data['Speed_FAV'] + data['Speed_FAV'].shift(1)) / 2 163 | data['Pos_FAV'] = (0.1 * average_speed).cumsum() 164 | data.loc[0, 'Pos_FAV'] = 0 165 | data['Pos_LV'] = data['Pos_FAV'] + data['Spatial_Headway'] 166 | data = data.iloc[:-1] 167 | 168 | data['Time_Index'] -= data['Time_Index'].min() 169 | 170 | data.to_csv(output_path, index=False) 171 | 172 | 173 | def CATSPlatoon_convert_format(input_path, output_path): 174 | def merge_sheets(file_path): 175 | column_names = ['id', 'time', 'lat', 'lon', 'speed'] 176 | xls = pd.ExcelFile(file_path) 177 | dfs = [] 178 | for sheet_name in xls.sheet_names: 179 | df = xls.parse(sheet_name) 180 | df.columns = column_names 181 | df.drop(columns=['id'], inplace=True) 182 | dfs.append(df) 183 | return pd.concat(dfs, ignore_index=True) 184 | 185 | df_leading = merge_sheets(input_path + "/Leading.xlsx") 186 | df_mid = merge_sheets(input_path + "/Black-Mid.xlsx") 187 | df_last = merge_sheets(input_path + "/Red-Last.xlsx") 188 | 189 | def process_time_string(time_str): 190 | return float(time_str[5:]) 191 | 192 | earliest_times = [] 193 | for sheet in [df_leading, df_mid, df_last]: 194 | earliest_time_str = sheet['time'].iloc[0] 195 | earliest_time = process_time_string(earliest_time_str) 196 | earliest_times.append(earliest_time) 197 | start_time = min(earliest_times) 198 | 199 | df_traj1 = pd.merge(df_leading, df_mid, on='time') 200 | df_traj2 = pd.merge(df_mid, df_last, on='time') 201 | 202 | count = 0 203 | for df in [df_traj1, df_traj2]: 204 | def calculate_distance(row): 205 | location1 = (row['lat_x'], row['lon_x']) 206 | location2 = (row['lat_y'], row['lon_y']) 207 | return geodesic(location1, location2).kilometers * 1000 208 | 209 | df['Spatial_Headway'] = df.apply(calculate_distance, axis=1) 210 | df.drop(columns=['lat_x', 'lat_y', 'lon_x', 'lon_y'], inplace=True) 211 | df['time'] = df['time'].apply(lambda t: (process_time_string(t) - start_time)) 212 | 213 | if count == 0: 214 | df.insert(1, 'Id_l', -1) 215 | df.insert(3, 'Id_f', 0) 216 | df.insert(0, 'Trajectory_ID', 0) 217 | else: 218 | df.insert(1, 'Id_l', 0) 219 | df.insert(3, 'Id_f', 1) 220 | df.insert(0, 'Trajectory_ID', 1) 221 | count += 1 222 | 223 | data = pd.concat([df_traj1, df_traj2]) 224 | 225 | data.insert(3, f'type_l', 0) 226 | data.insert(4, f'pos_l', 0) 227 | data.insert(6, f'acc_l', 0) 228 | data.insert(8, f'pos_f', 0) 229 | data.insert(10, f'acc', 0) 230 | data.insert(11, f'space_gap', 0) 231 | data.insert(13, f's_diff', 0) 232 | 233 | data.columns = ['Trajectory_ID', 'Time_Index', 234 | 'ID_LV', 'Type_LV', 'Pos_LV', 'Speed_LV', 'Acc_LV', 235 | 'ID_FAV', 'Pos_FAV', 'Speed_FAV', 'Acc_FAV', 236 | 'Spatial_Gap', 'Spatial_Headway', 'Speed_Diff'] 237 | 238 | data['Type_LV'] = data['ID_LV'].apply(lambda x: 1 if x in [0, 1] else 0) 239 | data['Speed_Diff'] = data['Speed_LV'] - data['Speed_FAV'] 240 | data['Spatial_Gap'] = data['Spatial_Headway'] - 4.92 241 | 242 | 243 | data = data.reset_index(drop=True) 244 | 245 | data['Acc_LV'] = ((data['Speed_LV'] - data['Speed_LV'].shift(1)) / 1).shift(-1) 246 | data['Acc_FAV'] = ((data['Speed_FAV'] - data['Speed_FAV'].shift(1)) / 1).shift(-1) 247 | 248 | average_speed = (data['Speed_FAV'] + data['Speed_FAV'].shift(1)) / 2 249 | data['Pos_FAV'] = (1 * average_speed).cumsum() 250 | data.loc[0, 'Pos_FAV'] = 0 251 | data['Pos_LV'] = data['Pos_FAV'] + data['Spatial_Headway'] 252 | data = data.iloc[:-1] 253 | 254 | data['Time_Index'] -= data['Time_Index'].min() 255 | 256 | data.to_csv(output_path, index=False) 257 | 258 | 259 | def CATSUW_convert_format(input_path, output_path): 260 | data = pd.read_csv(input_path) 261 | 262 | new_column_order = ['time', 'leader_p', 'leader_v', 'follower_p', 'follower_v'] 263 | data = data[new_column_order] 264 | 265 | data.insert(0, 'Trajectory_ID', 0) 266 | data.insert(2, 'ID_LV', -1) 267 | data.insert(3, 'Type_LV', -1) 268 | for idx in range(2): 269 | data.insert(6 + idx, f'col_2{idx}', 0) 270 | data.insert(10, f'acc', 0) 271 | data.insert(11, 'spacing', 0) 272 | data.insert(12, 'space_headway', 0) 273 | data.insert(13, 'speed_diff', 0) 274 | 275 | data.columns = ['Trajectory_ID', 'Time_Index', 276 | 'ID_LV', 'Type_LV', 'Pos_LV', 'Speed_LV', 'Acc_LV', 277 | 'ID_FAV', 'Pos_FAV', 'Speed_FAV', 'Acc_FAV', 278 | 'Spatial_Gap', 'Spatial_Headway', 'Speed_Diff'] 279 | 280 | data['ID_LV'] = -1 281 | data['Type_LV'] = 0 282 | data['ID_FAV'] = 0 283 | 284 | data.to_csv(output_path, index=False) 285 | 286 | data['Spatial_Gap'] = data['Pos_LV'] - data['Pos_FAV'] 287 | data['Speed_Diff'] = data['Speed_LV'] - data['Speed_FAV'] 288 | data['Spatial_Headway'] = data['Spatial_Gap'] + 4.92 289 | 290 | data['Acc_LV'] = ((data['Speed_LV'] - data['Speed_LV'].shift(1)) / 0.1).shift(-1) 291 | data['Acc_FAV'] = ((data['Speed_FAV'] - data['Speed_FAV'].shift(1)) / 0.1).shift(-1) 292 | 293 | data.to_csv(output_path, index=False) 294 | 295 | 296 | def OpenACC_convert_format(input_path, output_path, id_map): 297 | df = pd.read_csv(input_path, header=None, skiprows=1, nrows=1) 298 | id_row = df.values.tolist()[0][1:] 299 | id_row = [x for x in id_row if not pd.isna(x)] 300 | vehicle_ids = list(map(lambda x: id_map.get(x, x), id_row)) 301 | vehicle_num = len(vehicle_ids) 302 | 303 | data = pd.read_csv(input_path, skiprows=5) 304 | 305 | df_list = [] 306 | for i in range(vehicle_num): 307 | if all(column in data.columns for column in [f'Speed{i}', f'Speed{i + 1}', f'IVS{i}']): 308 | if f'Driver{i + 1}' in data.columns and f'Driver{i}' in data.columns: 309 | df = data[['Time', f'Driver{i}', f'Speed{i}', f'Speed{i + 1}', f'IVS{i}', f'Driver{i + 1}']].copy() 310 | df.columns = ['Time', 'Driver_l', 'Speed_l', 'Speed_f', 'IVS', 'Driver_f'] 311 | df = df[df['Driver_f'] != 'Human'] 312 | df.drop(columns=['Driver_f'], inplace=True) 313 | df['Driver_l'] = df['Driver_l'].replace({'Human': 0, 'ACC': 1}) 314 | df.insert(1, 'Id_l', vehicle_ids[i - 1]) 315 | df.insert(4, 'Id_f', vehicle_ids[i]) 316 | df.insert(0, 'Trajectory_ID', i - 1) 317 | df_list.append(df) 318 | else: 319 | df = data[['Time', f'Speed{i}', f'Speed{i + 1}', f'IVS{i}']].copy() 320 | df.columns = ['Time', 'Speed_l', 'Speed_f', 'IVS'] 321 | df.insert(1, 'Id_l', vehicle_ids[i - 1]) 322 | df.insert(2, 'Driver_l', 1) 323 | df.insert(4, 'Id_f', vehicle_ids[i]) 324 | df.insert(0, 'Trajectory_ID', i - 1) 325 | df_list.append(df) 326 | 327 | data = pd.concat(df_list) 328 | 329 | data.insert(4, f'pos_l', 0) 330 | data.insert(6, f'acc_l', 0) 331 | data.insert(8, f'pos_f', 0) 332 | data.insert(10, f'acc', 0) 333 | data.insert(12, f'space_headway', 0) 334 | data.insert(13, f's_diff', 0) 335 | 336 | data.columns = ['Trajectory_ID', 'Time_Index', 337 | 'ID_LV', 'Type_LV', 'Pos_LV', 'Speed_LV', 'Acc_LV', 338 | 'ID_FAV', 'Pos_FAV', 'Speed_FAV', 'Acc_FAV', 339 | 'Spatial_Gap', 'Spatial_Headway', 'Speed_Diff'] 340 | 341 | data['Speed_Diff'] = data['Speed_LV'] - data['Speed_FAV'] 342 | data['Spatial_Headway'] = data['Spatial_Gap'] + default_vehicle_length 343 | 344 | 345 | data = data.reset_index(drop=True) 346 | 347 | data['Acc_LV'] = ((data['Speed_LV'] - data['Speed_LV'].shift(1)) / 0.1).shift(-1) 348 | data['Acc_FAV'] = ((data['Speed_FAV'] - data['Speed_FAV'].shift(1)) / 0.1).shift(-1) 349 | 350 | average_speed = (data['Speed_FAV'] + data['Speed_FAV'].shift(1)) / 2 351 | data['Pos_FAV'] = (0.1 * average_speed).cumsum() 352 | data.loc[0, 'Pos_FAV'] = 0 353 | data['Pos_LV'] = data['Pos_FAV'] + data['Spatial_Headway'] 354 | data = data.iloc[:-1] 355 | 356 | data['Time_Index'] -= data['Time_Index'].min() 357 | 358 | data.to_csv(output_path, index=False) 359 | 360 | 361 | def Ohio_single_convert_format(input_path, output_path): 362 | related_columns = ['ID', 'Time', 'pos_x_av_f', 'speed_av', 'acc_av', 'pos_x_sv_f', 'speed_sv', 'acc_sv', 363 | 'closest_distance_longitudinal (gap)', 'distance_av (headway)', 'lane_id_av', 'lane_id_sv'] 364 | 365 | data = pd.read_csv(input_path, usecols=related_columns) 366 | 367 | data = data[data['lane_id_av'] == data['lane_id_sv']] 368 | data.drop(columns=['lane_id_sv', 'lane_id_av'], inplace=True) 369 | 370 | data = data[data['pos_x_av_f'] > data['pos_x_sv_f']] 371 | 372 | data['reset'] = data['ID'].diff() < 0 373 | data['traj_id'] = data['reset'].cumsum() + 1 374 | data.drop(columns=['reset'], inplace=True) 375 | 376 | idx = data.groupby(['traj_id', 'Time'])['closest_distance_longitudinal (gap)'].idxmin() 377 | follower_ids = data.loc[idx, ['traj_id', 'Time', 'ID']].set_index(['traj_id', 'Time']) 378 | data['tmp_index'] = data.index 379 | data = data.merge(follower_ids, on=['traj_id', 'Time'], how='left', suffixes=('', '_follower')) 380 | data.rename(columns={'ID_follower': 'follower_id'}, inplace=True) 381 | data.drop(columns=['tmp_index'], inplace=True) 382 | 383 | data = data[data['ID'] == data['follower_id']] 384 | data.drop(columns=['follower_id'], inplace=True) 385 | 386 | new_traj_id = 0 387 | traj_id_mapping = {} 388 | 389 | data.sort_values(by=['traj_id', 'ID'], inplace=True) 390 | for index, row in data.iterrows(): 391 | key = (row['traj_id'], row['ID']) 392 | if key not in traj_id_mapping: 393 | new_traj_id += 1 394 | traj_id_mapping[key] = new_traj_id 395 | data.at[index, 'traj_id'] = traj_id_mapping[key] 396 | 397 | cols = ['traj_id'] + [col for col in data.columns if col != 'traj_id'] 398 | data = data[cols] 399 | 400 | data['ID'] = -1 401 | data['traj_id'] -= 1 402 | 403 | data.insert(3, 'Type_LV', -1) 404 | data.insert(8, 'ID_FAV', 0) 405 | column_data = data['distance_av (headway)'] 406 | data.drop(columns=['distance_av (headway)'], inplace=True) 407 | data.insert(12, 'distance_av (headway)', column_data) 408 | data.insert(13, f's_diff', 0) 409 | 410 | data.columns = ['Trajectory_ID', 'Time_Index', 411 | 'ID_LV', 'Type_LV', 'Pos_LV', 'Speed_LV', 'Acc_LV', 412 | 'ID_FAV', 'Pos_FAV', 'Speed_FAV', 'Acc_FAV', 413 | 'Spatial_Gap', 'Spatial_Headway', 'Speed_Diff'] 414 | 415 | data['Time_Index'], data['ID_LV'] = data['ID_LV'], data['Time_Index'] 416 | 417 | data['Speed_Diff'] = data['Speed_LV'] - data['Speed_FAV'] 418 | 419 | data.to_csv(output_path, index=False) 420 | 421 | 422 | def Ohio_two_convert_format(input_path, output_path): 423 | related_columns = ['ID', 'Time', 'pos_x_av_f', 'speed_av', 'acc_av', 'pos_x_sv1_f', 'speed_sv1', 'acc_sv1', 424 | 'pos_x_sv2_f', 'speed_sv2', 'acc_sv2', 'lane_id_av', 'lane_id_sv1', 'lane_id_sv2', 'dim_x_av', 425 | 'dim_x_sv1', 'dim_x_sv2'] 426 | 427 | def find_one_vehicle(id, id2): 428 | data = pd.read_csv(input_path, usecols=related_columns) 429 | 430 | data['ID'] += 1 431 | 432 | data['reset'] = data['ID'].diff() < 0 433 | data['traj_id'] = data['reset'].cumsum() + 1 434 | data.drop(columns=['reset'], inplace=True) 435 | 436 | result = data.groupby(['traj_id', 'Time']).first().reset_index() 437 | 438 | sv_to_av_columns = { 439 | f'pos_x_sv{id2}_f': 'pos_x_av_f', 440 | f'speed_sv{id2}': 'speed_av', 441 | f'acc_sv{id2}': 'acc_av', 442 | f'lane_id_sv{id2}': 'lane_id_av', 443 | f'dim_x_sv{id2}': 'dim_x_av' 444 | } 445 | df_sv_as_av = result[ 446 | ['traj_id', 'Time', 'pos_x_sv1_f', 'speed_sv1', 'acc_sv1', 'pos_x_sv2_f', 'speed_sv2', 'acc_sv2', 447 | 'lane_id_sv1', 'lane_id_sv2', 'dim_x_sv1', 'dim_x_sv2']].copy() 448 | df_sv_as_av.rename(columns=sv_to_av_columns, inplace=True) 449 | df_sv_as_av['ID'] = id2 - 1 450 | 451 | data = pd.concat([data, df_sv_as_av], ignore_index=True) 452 | data = data.sort_values(by=['traj_id', 'ID', 'Time']).reset_index(drop=True) 453 | data = data.drop(columns=list(sv_to_av_columns.keys())) 454 | 455 | data = data[data['lane_id_av'] == data[f'lane_id_sv{id}']] 456 | data.drop(columns=[f'lane_id_sv{id}', 'lane_id_av'], inplace=True) 457 | 458 | data = data[data['pos_x_av_f'] > data[f'pos_x_sv{id}_f']] 459 | 460 | data['space_headway'] = data['pos_x_av_f'] - data[f'pos_x_sv{id}_f'] 461 | 462 | data['reset'] = data['ID'].diff() < 0 463 | data['traj_id'] = data['reset'].cumsum() + 1 464 | data.drop(columns=['reset'], inplace=True) 465 | 466 | idx = data.groupby(['traj_id', 'Time'])['space_headway'].idxmin() 467 | follower_ids = data.loc[idx, ['traj_id', 'Time', 'ID']].set_index(['traj_id', 'Time']) 468 | data['tmp_index'] = data.index 469 | data = data.merge(follower_ids, on=['traj_id', 'Time'], how='left', suffixes=('', '_follower')) 470 | data.rename(columns={'ID_follower': 'follower_id'}, inplace=True) 471 | data.drop(columns=['tmp_index'], inplace=True) 472 | 473 | data = data[data['ID'] == data['follower_id']] 474 | data.drop(columns=['follower_id'], inplace=True) 475 | 476 | new_traj_id = 0 477 | traj_id_mapping = {} 478 | 479 | data.sort_values(by=['traj_id', 'ID'], inplace=True) 480 | for index, row in data.iterrows(): 481 | key = (row['traj_id'], row['ID']) 482 | if key not in traj_id_mapping: 483 | new_traj_id += 1 484 | traj_id_mapping[key] = new_traj_id 485 | data.at[index, 'traj_id'] = traj_id_mapping[key] 486 | 487 | cols = ['traj_id'] + [col for col in data.columns if col != 'traj_id'] 488 | data = data[cols] 489 | 490 | data['traj_id'] -= 1 491 | 492 | data['ID'] = data['ID'].apply(lambda x: x if x in [0, 1] else -1) 493 | 494 | data.insert(3, 'Type_LV', 0) 495 | data.insert(8, 'ID_FAV', id - 1) 496 | data.insert(13, 'space_gap', 0) 497 | data.insert(15, 's_diff', 0) 498 | 499 | new_order = ['traj_id', 'Time', 'ID', 'Type_LV', 'pos_x_av_f', 500 | 'speed_av', 'acc_av', 'ID_FAV', f'pos_x_sv{id}_f', 501 | f'speed_sv{id}', f'acc_sv{id}', 'space_gap', 'space_headway', 502 | 's_diff', 'dim_x_av', f'dim_x_sv{id}'] 503 | data = data[new_order] 504 | 505 | data.columns = ['Trajectory_ID', 'Time_Index', 506 | 'ID_LV', 'Type_LV', 'Pos_LV', 'Speed_LV', 'Acc_LV', 507 | 'ID_FAV', 'Pos_FAV', 'Speed_FAV', 'Acc_FAV', 508 | 'Spatial_Gap', 'Spatial_Headway', 'Speed_Diff', 'Len_LV', 'Len_FAV'] 509 | 510 | data['Type_LV'] = data['ID_LV'].apply(lambda x: 1 if x in [0, 1] else 0) 511 | data['Speed_Diff'] = data['Speed_LV'] - data['Speed_FAV'] 512 | data['Spatial_Gap'] = data['Spatial_Headway'] - data['Len_LV'] / 2 - data['Len_FAV'] / 2 513 | 514 | 515 | data = data.drop(columns=['Len_LV', 'Len_FAV']) 516 | 517 | data.to_csv(output_path + f'_{id}.csv', index=False) 518 | 519 | find_one_vehicle(1, 2) 520 | find_one_vehicle(2, 1) 521 | 522 | 523 | def Waymo_perception_convert_format(input_path, output_path): 524 | related_columns = ['segment_id', 'local_veh_id', 'length', 'local_time', 'follower_id', 'leader_id', 525 | 'processed_position', 'processed_speed', 'processed_accer'] 526 | 527 | data = pd.read_csv(input_path, usecols=related_columns) 528 | data = data[data['follower_id'] == 0] 529 | 530 | def merge_rows(group): 531 | merged_df_list = [] 532 | 533 | for local_time in group['local_time'].unique(): 534 | temp_group = group[group['local_time'] == local_time] 535 | temp_dict = {'segment_id': temp_group['segment_id'].iloc[0], 'local_time': local_time} 536 | 537 | for _, row in temp_group.iterrows(): 538 | suffix = '_f' if row['local_veh_id'] == row['follower_id'] else '_l' 539 | for col in ['length', 'processed_position', 'processed_speed', 'processed_accer']: 540 | temp_dict[f'{col}{suffix}'] = row[col] 541 | temp_dict['follower_id'] = row['follower_id'] 542 | temp_dict['leader_id'] = row['leader_id'] 543 | 544 | merged_df_list.append(pd.DataFrame([temp_dict])) 545 | 546 | merged_df = pd.concat(merged_df_list, ignore_index=True) 547 | 548 | return merged_df 549 | 550 | data = data.groupby('segment_id').apply(merge_rows).reset_index(drop=True) 551 | 552 | new_order = [ 553 | 'segment_id', 'local_time', 554 | 'leader_id', 'processed_position_l', 'processed_speed_l', 'processed_accer_l', 555 | 'follower_id', 'processed_position_f', 'processed_speed_f', 'processed_accer_f', 556 | 'length_l', 'length_f' 557 | ] 558 | 559 | data = data.reindex(columns=new_order) 560 | 561 | data.insert(3, 'type', 0) 562 | data.insert(11, 'space_gap', 0) 563 | data.insert(12, 'space_headway', 0) 564 | data.insert(13, 's_diff', 0) 565 | 566 | data.columns = ['Trajectory_ID', 'Time_Index', 567 | 'ID_LV', 'Type_LV', 'Pos_LV', 'Speed_LV', 'Acc_LV', 568 | 'ID_FAV', 'Pos_FAV', 'Speed_FAV', 'Acc_FAV', 569 | 'Spatial_Gap', 'Spatial_Headway', 'Speed_Diff', 570 | 'length_l', 'length_f'] 571 | 572 | data['ID_LV'] = -1 573 | data['Type_LV'] = 0 574 | data['Speed_Diff'] = data['Speed_LV'] - data['Speed_FAV'] 575 | data['Spatial_Headway'] = data['Pos_LV'] - data['Pos_FAV'] 576 | data['Spatial_Gap'] = data['Spatial_Headway'] - data['length_l'] / 2 - data['length_f'] / 2 577 | 578 | data = data.drop(columns=['length_l', 'length_f']) 579 | 580 | data.to_csv(output_path, index=False) 581 | 582 | 583 | def Waymo_motion_convert_format(input_path, output_path): 584 | data = pd.read_csv(input_path) 585 | 586 | for idx in range(4): 587 | data.insert(1 + idx, f'col_{1 + idx}', 0) 588 | for idx in range(3): 589 | data.insert(6 + idx, f'col_{5 + idx}', 0) 590 | data.insert(10, f'Acc_FAV', 0) 591 | data.insert(11, f'Spatial_Gap', 0) 592 | data.insert(12, f'Spatial_Headway', 0) 593 | data.insert(13, 'Speed_Diff', 0) 594 | 595 | data.columns = ['Trajectory_ID', 'Time_Index', 'ID_LV', 'Type_LV', 'Pos_LV', 'Speed_LV', 'Acc_LV', 596 | 'ID_FAV', 'Pos_FAV', 'Speed_FAV', 'Acc_FAV', 597 | 'Spatial_Gap', 'Spatial_Headway', 'Speed_Diff', 598 | 'leader_x', 'leader_y', 'leader_length', 'follower_x', 'follower_y', 'follower_length'] 599 | 600 | data['Time_Index'] = data.groupby('Trajectory_ID').cumcount() / 10 601 | data['ID_LV'] = -1 602 | data['ID_FAV'] = 0 603 | 604 | data['Spatial_Headway'] = (np.sqrt( 605 | (data['leader_x'] - data['follower_x']) ** 2 + (data['leader_y'] - data['follower_y']) ** 2)) 606 | data['Spatial_Gap'] = data['Spatial_Headway'] - data['leader_length'] / 2 - data['follower_length'] / 2 607 | data['Speed_Diff'] = data['Speed_LV'] - data['Speed_FAV'] 608 | 609 | data = data.drop(['leader_x', 'leader_y', 'follower_x', 'follower_y', 'leader_length', 'follower_length'], axis=1) 610 | 611 | data['Acc_LV'] = ((data['Speed_LV'] - data['Speed_LV'].shift(1)) / 0.1).shift(-1) 612 | data['Acc_FAV'] = ((data['Speed_FAV'] - data['Speed_FAV'].shift(1)) / 0.1).shift(-1) 613 | 614 | average_speed = (data['Speed_FAV'] + data['Speed_FAV'].shift(1)) / 2 615 | data['Pos_FAV'] = (0.1 * average_speed).cumsum() 616 | data.loc[0, 'Pos_FAV'] = 0 617 | data['Pos_LV'] = data['Pos_FAV'] + data['Spatial_Headway'] 618 | data = data.iloc[:-1] 619 | 620 | data.to_csv(output_path, index=False) 621 | 622 | 623 | def Argoverse_convert_format(input_path, output_path): 624 | data = pd.read_csv(input_path) 625 | 626 | for idx in range(4): 627 | data.insert(1 + idx, f'col_{1 + idx}', 0) 628 | for idx in range(3): 629 | data.insert(6 + idx, f'col_{5 + idx}', 0) 630 | data.insert(10, f'Acc_FAV', 0) 631 | data.insert(11, f'Spatial_Gap', 0) 632 | data.insert(12, f'Spatial_Headway', 0) 633 | data.insert(13, 'Speed_Diff', 0) 634 | 635 | data.columns = ['Trajectory_ID', 'Time_Index', 'ID_LV', 'Type_LV', 'Pos_LV', 'Speed_LV', 'Acc_LV', 636 | 'ID_FAV', 'Pos_FAV', 'Speed_FAV', 'Acc_FAV', 637 | 'Spatial_Gap', 'Spatial_Headway', 'Speed_Diff', 638 | 'leader_x', 'leader_y', 'follower_x', 'follower_y'] 639 | 640 | data['Time_Index'] = data.groupby('Trajectory_ID').cumcount() / 10 641 | data['ID_LV'] = -1 642 | data['Type_LV'] = 0 643 | data['ID_FAV'] = 0 644 | 645 | data['Spatial_Headway'] = (np.sqrt( 646 | (data['leader_x'] - data['follower_x']) ** 2 + (data['leader_y'] - data['follower_y']) ** 2)) 647 | data['Spatial_Gap'] = data['Spatial_Headway'] - default_vehicle_length 648 | data['Speed_Diff'] = data['Speed_LV'] - data['Speed_FAV'] 649 | 650 | data = data.drop(['leader_x', 'leader_y', 'follower_x', 'follower_y'], axis=1) 651 | 652 | data['Acc_LV'] = ((data['Speed_LV'] - data['Speed_LV'].shift(1)) / 0.1).shift(-1) 653 | data['Acc_FAV'] = ((data['Speed_FAV'] - data['Speed_FAV'].shift(1)) / 0.1).shift(-1) 654 | 655 | average_speed = (data['Speed_FAV'] + data['Speed_FAV'].shift(1)) / 2 656 | data['Pos_FAV'] = (0.1 * average_speed).cumsum() 657 | data.loc[0, 'Pos_FAV'] = 0 658 | data['Pos_LV'] = data['Pos_FAV'] + data['Spatial_Headway'] 659 | data = data.iloc[:-1] 660 | 661 | data.to_csv(output_path, index=False) 662 | -------------------------------------------------------------------------------- /Code/model_calibration.py: -------------------------------------------------------------------------------- 1 | from sklearn.linear_model import LinearRegression 2 | from sklearn.metrics import mean_squared_error, r2_score 3 | import geatpy as ea 4 | import pandas as pd 5 | import numpy as np 6 | 7 | 8 | def IDM(arg, delta_d, v, delta_v): 9 | """ Calculate the Intelligent Driver Model (IDM) acceleration. """ 10 | v0, T, a, b, s0 = arg 11 | s_star = s0 + max(0, v * T + (v * delta_v) / (2 * ((a * b) ** 0.5))) 12 | small_value = 1e-5 # To avoid division by zero 13 | return a * (1 - (v / v0) ** 4 - (s_star / (delta_d + small_value)) ** 2) 14 | 15 | 16 | def FVD(arg, delta_d, v, delta_v): 17 | """ Calculate the Full Velocity Difference Model (FVD) acceleration. """ 18 | alpha, lamda, v_0, b, beta = arg 19 | V_star = v_0 * (np.tanh(delta_d / b - beta) - np.tanh(-beta)) 20 | ahat = alpha * (V_star - v) + lamda * delta_v 21 | return ahat 22 | 23 | 24 | class MyProblem(ea.Problem): 25 | """ A class to define optimization problems for evolutionary algorithms. """ 26 | 27 | def __init__(self, df, lb, ub): 28 | M = 1 # Number of objectives 29 | maxOrMin = [1] # 1 for minimization 30 | Dim = 5 # Number of decision variables 31 | varTypes = [0] * Dim # 0 for continuous variables 32 | lbin = [1] * Dim # 1 to include the lower bound 33 | ubin = [1] * Dim # 1 to include the upper bound 34 | ea.Problem.__init__(self, "", M, maxOrMin, Dim, varTypes, lb, ub, lbin, ubin) 35 | self.df = df 36 | 37 | def aimFunc(self, pop): 38 | """ Objective function for optimization. """ 39 | x = pop.Phen 40 | results = [] 41 | for parameters in x: 42 | arg = tuple(round(param, 3) for param in parameters[:5]) 43 | self.df['a_hat'] = self.df.apply(lambda row: IDM(arg, row['Spatial_Gap'], 44 | row['Speed_FAV'], 45 | -row['Speed_Diff']), axis=1) 46 | results.append(mean_squared_error(self.df['Acc_FAV'], self.df['a_hat'])) 47 | pop.ObjV = np.vstack(results) # Assign objective values to the population 48 | 49 | 50 | class CFModelRegress: 51 | """ A class for regression analysis of car-following models. """ 52 | 53 | def __init__(self, path, timestep, delay=0): 54 | df = pd.read_csv(path) 55 | self.dfs = {name: group for name, group in df.groupby('ID_FAV')} 56 | self.timestep = timestep 57 | self.delay = delay 58 | 59 | def addressOneTra(self, oneTraj): 60 | """ Prepare independent and dependent variables from trajectory data. """ 61 | xData = [] 62 | yData = [] 63 | delay_steps = int(self.delay / self.timestep) 64 | for t in range(delay_steps, oneTraj.shape[0]): 65 | xData.append([oneTraj['Spatial_Gap'].iloc[t - delay_steps], 66 | oneTraj['Speed_FAV'].iloc[t - delay_steps], 67 | oneTraj['Speed_Diff'].iloc[t - delay_steps]]) 68 | yData.append([oneTraj['Acc_FAV'].iloc[t]]) 69 | return xData, yData 70 | 71 | def reorganizeDataIndividualVeh(self): 72 | """ Reorganize data by vehicle for further analysis. """ 73 | reorganizedData = {} 74 | for veh, oneVehData in self.dfs.items(): 75 | sampleData = {"x": [], "y": []} 76 | for Trajectory_ID, group in oneVehData.groupby('Trajectory_ID'): 77 | x_oneTra, y_oneTra = self.addressOneTra(group) 78 | sampleData["x"] += x_oneTra 79 | sampleData["y"] += y_oneTra 80 | reorganizedData[veh] = sampleData 81 | return reorganizedData 82 | 83 | def linearRegression(self, veh, xData, yData): 84 | """ Perform linear regression analysis. """ 85 | xData_np = np.array(xData).reshape(len(xData), -1) 86 | yData_np = np.array(yData) 87 | model = LinearRegression() 88 | model.fit(xData_np, yData_np) 89 | y_pred = model.predict(xData_np) 90 | mse = mean_squared_error(yData_np, y_pred) 91 | coefficients = model.coef_.flatten() 92 | intercept = model.intercept_ 93 | r_squared = model.score(xData_np, yData_np) 94 | n_temp = xData_np.shape[0] 95 | k_temp = xData_np.shape[1] 96 | adjusted_r_squared = 1 - (1 - r_squared) * (n_temp - 1) / (n_temp - k_temp - 1) 97 | data = {'Vehicle': [veh], 'R2': [r_squared], 'RMSE': [np.sqrt(mse)]} 98 | for i, coef in enumerate(coefficients): 99 | data[f'Coef_{i}'] = coef 100 | data['Intercept'] = intercept 101 | return pd.DataFrame(data) 102 | 103 | def IDM_regression(self, veh, xData, yData): 104 | """ Perform regression using the IDM car-following model. """ 105 | xData_df = pd.DataFrame(xData) 106 | yData_df = pd.DataFrame(yData) 107 | df = pd.concat([xData_df, yData_df], axis=1) 108 | df.columns = ['Spatial_Gap', 'Speed_FAV', 'Speed_Diff', 'Acc_FAV'] 109 | problem = MyProblem(df, [0.1, 0.1, 20, 0.1, 0.1], [10, 10, 40, 10, 10]) # Parameters for FVD model 110 | Encoding = 'RI' 111 | NIND = 25 # Population size 112 | Field = ea.crtfld(Encoding, problem.varTypes, problem.ranges, problem.borders) 113 | population = ea.Population(Encoding, Field, NIND) 114 | myAlgorithm = ea.soea_SEGA_templet(problem, population) 115 | myAlgorithm.MAXGEN = 100 # Max generations 116 | myAlgorithm.verbose = True 117 | myAlgorithm.drawing = 1 118 | BestIndi, population = myAlgorithm.run() 119 | arg = tuple(round(param, 3) for param in BestIndi.Phen[0, :5]) 120 | df['a_hat'] = df.apply(lambda row: IDM(arg, row['Spatial_Gap'], 121 | row['Speed_FAV'], 122 | -row['Speed_Diff']), axis=1) 123 | r_squared = r2_score(df['Acc_FAV'], df['a_hat']) 124 | mse = mean_squared_error(df['Acc_FAV'], df['a_hat']) 125 | results = {'Vehicle': [veh], 'R2': [r_squared], 'RMSE': [np.sqrt(mse)]} 126 | for i in range(5): 127 | results[f'Coef_{i}'] = BestIndi.Phen[0, i] 128 | return pd.DataFrame(results) 129 | 130 | def main(self, output_path, model): 131 | """ Main function to run the regression analysis. """ 132 | allData = self.reorganizeDataIndividualVeh() 133 | df_list = [] 134 | for veh, data in allData.items(): 135 | if model == "linear": 136 | df_list.append(self.linearRegression(veh, data['x'], data['y'])) 137 | elif model == "IDM": 138 | df_list.append(self.IDM_regression(veh, data['x'], data['y'])) 139 | merged_df = pd.concat(df_list) 140 | merged_df.to_csv(output_path, index=False) 141 | return merged_df 142 | -------------------------------------------------------------------------------- /Code/plot_result.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from PIL import Image 4 | import matplotlib.pyplot as plt 5 | 6 | def combined(): 7 | # Load the images 8 | image1 = Image.open("./1.png") 9 | image2 = Image.open("./2.png") 10 | image3 = Image.open("./3.png") 11 | 12 | # Determine the size of the combined image 13 | total_width = image1.width + image2.width + image3.width 14 | max_height = max(image1.height, image2.height, image3.height) 15 | 16 | # Create a new empty image with the size to fit all three images 17 | combined_image = Image.new('RGB', (total_width, max_height)) 18 | 19 | # Paste the images next to each other 20 | combined_image.paste(image1, (0, 0)) 21 | combined_image.paste(image2, (image1.width, 0)) 22 | combined_image.paste(image3, (image1.width + image2.width, 0)) 23 | 24 | # Save the combined image 25 | combined_image_path = "combined_image.png" 26 | combined_image.save(combined_image_path) 27 | 28 | 29 | # Modified code with increased font sizes for the bar chart 30 | 31 | def pilar(): 32 | # Set the random seed for reproducibility 33 | np.random.seed(1) 34 | 35 | # Generate some data 36 | categories = ['Vanderbilt', 'CATS', 'OpenACC', 'Ohio', 'Waymo', 'Argoverse'] 37 | algorithms = ['Acceleration', 'Speed', 'Space'] 38 | data = np.random.randint(0, 150, size=(len(algorithms), len(categories))) 39 | data = np.array([[10, 11, 8, 10, 9, 8], [20, 22, 25, 22, 16, 17], [15, 17, 20, 18, 15, 16]]) 40 | # [[10, 20, 15], [11, 22, 17], [8, 25, 20], 41 | # [10, 22, 18], [9, 16, 15], [8, 17, 16]] 42 | 43 | # Create a bar chart 44 | fig, ax = plt.subplots(figsize=(16, 8)) 45 | 46 | # Set the positions and width for the bars 47 | positions = np.arange(len(categories)) 48 | width = 0.25 # Increase width for clarity 49 | 50 | # Plot bars for each algorithm 51 | for i in range(len(algorithms)): 52 | ax.bar(positions - width + (i * width), data[i], width=width, label=algorithms[i]) 53 | 54 | # Customize font sizes 55 | plt.rcParams.update({'font.size': 14}) # Update default rc settings for font size 56 | 57 | # Add some text for labels, title, and custom x-axis tick labels, etc. 58 | # ax.set_ylabel('Scores', fontsize=16) 59 | # ax.set_title('Scores by category and algorithm', fontsize=18) 60 | ax.set_xticks(positions) 61 | ax.set_xticklabels(categories, fontsize=18) 62 | # ax.set_ylim(0, 160) 63 | ax.legend(fontsize=18) 64 | 65 | # Display the bar chart 66 | plt.savefig('./scores.png') 67 | 68 | 69 | def scatter(input_path): 70 | df = pd.read_csv(input_path) 71 | 72 | for i in range(200, 400, 100): 73 | colors = [(42 / 255, 157 / 255, 140 / 255), 74 | (233 / 255, 196 / 255, 107 / 255), 75 | (230 / 255, 111 / 255, 81 / 255)] 76 | 77 | plt.figure(figsize=(15, 5)) 78 | 79 | # for name, group in df.groupby('ID_FAV'): 80 | plt.scatter(df['Space_Gap'][i:i + 100], df['Acc_FAV'][i:i + 100], color=colors[0], s=100) 81 | # plt.title('Acceleration and Space Gap', fontsize=20) 82 | plt.xlabel('$g$', fontsize=40) 83 | plt.ylabel('$a^{\mathrm{f}}$', fontsize=40) 84 | plt.tick_params(axis='x', labelbottom=False) # 不显示x轴刻度标签 85 | plt.tick_params(axis='y', labelleft=False) # 不显示y轴刻度标签 86 | plt.tight_layout() 87 | plt.savefig(f'{i}_Space_Gap.png') # _{name} 88 | plt.close() 89 | 90 | plt.figure(figsize=(15, 5)) 91 | 92 | plt.scatter(df['Speed_FAV'][i:i + 100], df['Acc_FAV'][i:i + 100], color=colors[1], s=100) 93 | plt.xlabel('$v^{\mathrm{f}}$', fontsize=40) 94 | plt.ylabel('$a^{\mathrm{f}}$', fontsize=40) 95 | plt.tick_params(axis='x', labelbottom=False) # 不显示x轴刻度标签 96 | plt.tick_params(axis='y', labelleft=False) # 不显示y轴刻度标签 97 | plt.tight_layout() 98 | plt.savefig(f'{i}_Speed_FAV.png') # _{name} 99 | plt.close() 100 | 101 | plt.figure(figsize=(15, 5)) 102 | 103 | plt.scatter(df['Speed_Diff'][i:i + 100], df['Acc_FAV'][i:i + 100], color=colors[2], s=100) 104 | # plt.title('Acceleration and Speed Difference', fontsize=20) 105 | plt.xlabel('$\Delta v$', fontsize=40) 106 | plt.ylabel('$a^{\mathrm{f}}$', fontsize=40) 107 | plt.tick_params(axis='x', labelbottom=False) # 不显示x轴刻度标签 108 | plt.tick_params(axis='y', labelleft=False) # 不显示y轴刻度标签 109 | plt.tight_layout() 110 | plt.savefig(f'{i}_Speed_Diff.png') # _{name} 111 | plt.close() 112 | 113 | 114 | def read_date(): 115 | original_data_path = './Dataset/Ohio/data/Advanced_Driver_Assistance_System__ADAS_-Equipped_Single-Vehicle_Data_for_Central_Ohio.csv' 116 | df = pd.read_csv(original_data_path) 117 | unique_dates = df['date'].drop_duplicates() 118 | print(unique_dates) 119 | 120 | print('-----') 121 | 122 | original_data_path = './Dataset/Ohio/data/Advanced_Driver_Assistance_System__ADAS_-Equipped_Two-Vehicle_Data_for_Central_Ohio.csv' 123 | df = pd.read_csv(original_data_path) 124 | unique_dates = df['date'].drop_duplicates() 125 | print(unique_dates) 126 | 127 | # scatter('./Dataset/OpenACC/output/step1_ASta_merge.csv') 128 | # pilar() 129 | # read_date() -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Ultra-AV: A unified longitudinal trajectory dataset for automated vehicle 2 | 3 | ## Introduction 4 | 5 | This repo provides the source code and data for the following paper: 6 | 7 | Zhou, H., Ma, K., Liang, S. et al. A unified longitudinal trajectory dataset for automated vehicle. Sci Data 11, 1123 (2024). https://doi.org/10.1038/s41597-024-03795-y 8 | 9 | We processed a unified trajectory dataset for automated vehicles' longitudinal behavior from 14 distinct sources. The extraction and cleaning of the dataset contains the following three steps - 1. extraction of longitudinal trajectory data, 2. general data cleaning, and 3. data-specific cleaning. The datasets obtained from Step 2 and Step 3 are the longitudinal and car-following trajectory data. We also analyzed and validated the data using multiple methods. The obtained datasets are provided in [Ultra-AV: A unified longitudinal trajectory dataset for automated vehicle (figshare.com)](https://figshare.com/articles/dataset/Ultra-AV_A_unified_longitudinal_trajectory_dataset_for_automated_vehicle/26339512). The Python code used to analyze the datasets can be found at https://github.com/CATS-Lab/Filed-Experiment-Data-ULTra-AV. We hope this dataset can benefit the study of microscopic longitudinal AV behaviors. 10 | 11 | ## Original Datasets 12 | 13 | We have examined 13 open-source datasets, each providing distinct insights into AV behavior across various driving conditions and scenarios. These open-source datasets are from six providers: 14 | 15 | - **Vanderbilt ACC Dataset** [1]. Collected in Nashville, Tennessee by Vanderbilt University research group. Available at - [https://acc-dataset.github.io/datasets/](https://acc-dataset.github.io/datasets/). 16 | - [Two-vehicle ACC driving, Tennessee 2019](https://github.com/CATS-Lab/Filed-Experiment-Data-AV_Platooning_Data) 17 | - **MircoSimACC Dataset** [2]. Collected in four cities in Florida, including Delray Beach, Loxahatchee, Boca Raton, and Parkland by the Florida Atlantic University research group. Available at -[https://github.com/microSIM-ACC/ICE](https://github.com/microSIM-ACC/ICE). 18 | - [ICE](https://github.com/microSIM-ACC/ICE) 19 | - **CATS Open Datasets** [3]. Three datasets were gathered in Tampa, Florida, and Madison, Wisconsin by the CATS Lab. Available at - [https://github.com/CATS-Lab](https://github.com/CATS-Lab). 20 | - [Filed-Experiment-Data-AV_Platooning_Data](https://github.com/CATS-Lab/Filed-Experiment-Data-AV_Platooning_Data) 21 | - [Filed-Experiment-Data-ACC_Data](https://github.com/CATS-Lab/Filed-Experiment-Data-ACC_Data) 22 | - [CATS-UWMadison-AV-Data](https://github.com/MarkMaaaaa/CATS-UWMadison-AV-Data) 23 | - **OpenACC Database** [4]. Four datasets were collected across Italy, Sweden, and Hungary by the European Commission's Joint Research Centre. Available at - [https://data.europa.eu/data/datasets/9702c950-c80f-4d2f-982f-44d06ea0009f?locale=en](https://data.europa.eu/data/datasets/9702c950-c80f-4d2f-982f-44d06ea0009f?locale=en). 24 | - [Casale](https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/TransportExpData/JRCDBT0001/LATEST/Casale/) 25 | - [Vicolungo](https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/TransportExpData/JRCDBT0001/LATEST/Vicolungo/) 26 | - [AstaZero](https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/TransportExpData/JRCDBT0001/LATEST/AstaZero/) 27 | - [ZalaZone](https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/TransportExpData/JRCDBT0001/LATEST/ZalaZone/) 28 | - **Central Ohio ACC Datasets** [5]. Two datasets were collated in Ohio by UCLA's Mobility Lab and Transportation Research Center. Available at - 29 | - [Advanced Driver Assistance System (ADAS)-Equipped Single-Vehicle Data for Central Ohio](https://catalog.data.gov/dataset/advanced-driver-assistance-system-adas-equipped-single-vehicle-data-for-central-ohio) 30 | - [Advanced Driver Assistance System (ADAS)-Equipped Two-Vehicle Data for Central Ohio](https://catalog.data.gov/dataset/advanced-driver-assistance-system-adas-equipped-two-vehicle-data-for-central-ohio) 31 | - **Waymo Open Dataset** [6, 7]. Two datasets were collected in six cities including San Francisco, Mountain View, and Los Angeles in California, Phoenix in Arizona, Detroit in Michigan, and Seattle in Washington by Waymo. Available at - 32 | - [Waymo Motion Dataset](https://waymo.com/open/data/motion/) 33 | - [Vehicle trajectory data processed from the Waymo Open Dataset](https://data.mendeley.com/datasets/wfn2c3437n/2) 34 | - **Argoverse 2 Motion Forecasting Dataset** [8]. Collected from Austin in Texas, Detroit in Michigan, Miami in Florida, Pittsburgh in Pennsylvania, Palo Alto in California, and Washington, D.C. by Argo AI with researchers from Carnegie Mellon University and the Georgia Institute of Technology. Available at - 35 | - [Argoverse 2 Motion Forecasting Dataset](https://www.argoverse.org/av2.html) 36 | 37 | For more details on the datasets, please refer to the reference and our paper. 38 | 39 | ## Installation 40 | 41 | All the data are provided in CSV format. If you want to run our source code, please make sure you follow the prerequisites below: 42 | 43 | 1. **Python 3** - Ensure you have a Python 3 environment set up. 44 | 2. **Required Packages** - Install all necessary packages listed in the `requirements.txt` file. 45 | 3. **Original data or the processed dataset** - Download the original data or our processed data from the link we provided and check the path of the data with our code if you want to process or analyze the data. 46 | 47 | We also recommend using other software packages such as R to effectively analyze the trajectory data. These tools are well-suited for handling the dataset's format. 48 | 49 | ## Usage 50 | 51 | ### Code 52 | 53 | The code related to our data processing and validation is all stored in folder `\Code`. This folder contains the following files: 54 | 55 | - **main.py** - The main function calls data processing and analysis functions for each dataset. 56 | - **trajectory_extraction.py** - Code used in Step 1 to extract AV longitudinal trajectories. 57 | - **data_transformation.py** - Code used in Step 1 to convert all datasets to a unified format. 58 | - **data_cleaning.py** - Code used in Steps 2 and 3 for data cleaning. 59 | - **data_analysis.py** - Code used to analyze data statistics, plot traffic performance of datasets, and plot scatter plots. 60 | - **model_calibration.py** - An example tool to use the processed data to calibrate a linear car-following model. 61 | 62 | To use this repo, run the Python script `main.py`. As you proceed through each Python script, always verify the paths for both the input and output files. This ensures that everything runs smoothly. 63 | 64 | ### Data 65 | 66 | Data attributes are shown below: 67 | 68 | | Label | Description | Notations and formulation | Unit | 69 | | ------------- | -------------------------------------------- | ------------------------------------------------------------ | ---- | 70 | | Trajectory_ID | ID of the longitudinal trajectory. | $i\in \mathcal{I}$. | N/A | 71 | | Time_Index | Common time stamp in one trajectory. | $t\in \mathcal{T}_i, i\in \mathcal{I}$. | s | 72 | | ID_LV | LV (lead vehicle) ID. | $c^{\mathrm{l}}_i, i\in \mathcal{I}$. Label each FAV with a different ID and all HVs with -1. | N/A | 73 | | Type\_LV | LV is an AV or human-driving vehicle. | Label AV with 1 and human-driving vehicles with 0. | N/A | 74 | | Pos_LV | LV position in the Frenet coordinates. | $p^{\mathrm{l}}_{it}=p^{\mathrm{f}}_{it}+h_{it}, i\in \mathcal{I}, t\in \mathcal{T}_i$. | m | 75 | | Speed_LV | LV speed. | $v^{\mathrm{l}}_{it}=\frac{p^{\mathrm{l}}_{i(t+1)}-p^{\mathrm{l}}_{it}}{\Delta t}, i\in \mathcal{I}, t\in \mathcal{T}_i$. | m/s | 76 | | Acc_LV | LV acceleration. | $a^{\mathrm{l}}_{it}=\frac{v^{\mathrm{l}}_{i(t+1)}-v^{\mathrm{l}}_{it}}{\Delta t}, i\in \mathcal{I}, t\in \mathcal{T}_i$. | m/s² | 77 | | ID_FAV | FAV (following automated vehicle) ID. | $c^{\mathrm{f}}_i, i\in \mathcal{I}$. Label each FAV with a different ID. | N/A | 78 | | Pos_FAV | FAV position in the Frenet coordinates. | $p^{\mathrm{f}}_{it}=p^{\mathrm{f}}_{i(t-1)}+\Delta t \cdot v^{\mathrm{f}}_{it}, i\in \mathcal{I}, t\in \mathcal{T}_i$. | m | 79 | | Speed_FAV | FAV speed. | $v^{\mathrm{f}}_{it}=\frac{p^{\mathrm{f}}_{i(t+1)}-p^{\mathrm{f}}_{it}}{\Delta t}, i\in \mathcal{I}, t\in \mathcal{T}_i$. | m/s | 80 | | Acc_FAV | FAV acceleration. | $a^{\mathrm{f}}_{it}=\frac{v^{\mathrm{f}}_{i(t+1)}-v^{\mathrm{f}}_{it}}{\Delta t}, i\in \mathcal{I}, t\in \mathcal{T}_i$. | m/s² | 81 | | Space_Gap | Bump-to-bump distance between two vehicles. | $g_{it}=p^{\mathrm{l}}_{it}-p^{\mathrm{f}}_{it} - l^{\mathrm{f}}/2 -l^{\mathrm{l}}/2, i\in \mathcal{I}, t\in \mathcal{T}_i$, where $l^{\mathrm{f}}$ and $l^{\mathrm{f}}$ are the length of the LV and the FAV. | m | 82 | | Space_Headway | Distance between the center of two vehicles. | $h_{it}=p^{\mathrm{l}}_{it}-p^{\mathrm{f}}_{it}, i\in \mathcal{I}, t\in \mathcal{T}_i$. | m | 83 | | Speed_Diff | Speed difference of the two vehicles. | $\Delta v_{it}=v^{\mathrm{l}}_{it}-v^{\mathrm{f}}_{it}, i\in \mathcal{I}, t\in \mathcal{T}_i$. | m/s | 84 | 85 | The FAV IDs are provided below: 86 | 87 | **Vanderbilt Two-vehicle ACC Dataset:** 88 | 89 | - 0 - A commercially available 2019 SUV with a full-speed range adaptive cruise control system. 90 | 91 | **MicroSimACC Dataset:** 92 | 93 | - 0 - Toyota Corolla LE 2020 94 | 95 | **CATS ACC Dataset:** 96 | 97 | - 0 - Lincoln MKZs 2016 (Black) 98 | - 1 - Lincoln MKZs 2017 (Red) 99 | 100 | **CATS Platoon Dataset:** 101 | 102 | - 0 - Lincoln MKZs 2016 (Black) 103 | - 1 - Lincoln MKZs 2017 (Red) 104 | 105 | **CATS UWM Dataset:** 106 | 107 | - 0 - Lincoln MKZs 2017 (Red) 108 | 109 | **OpenACC Casale Dataset:** 110 | 111 | - 0 - Rexton 112 | - 1 - Hyundai 113 | 114 | **OpenACC Vicolungo Dataset:** 115 | 116 | - 0 - Ford(S-Max) 117 | - 1 - KIA(Niro) 118 | - 2 - Mini(Cooper) 119 | - 3 - Mitsubishi(OutlanderPHEV) 120 | - 4 - Mitsubishi(SpaceStar) 121 | - 5 - Peugeot(3008GTLine) 122 | - 6 - VW(GolfE) 123 | 124 | **OpenACC Asta Dataset:** 125 | 126 | - 0 - Audi(A6) 127 | - 1 - Audi(A8) 128 | - 2 - BMW(X5) 129 | - 3 - Mercedes(AClass) 130 | - 4 - Tesla(Model3) 131 | 132 | **OpenACC ZalaZone Dataset:** 133 | 134 | - 0 - AUDI_A4 135 | - 1 - AUDI_E_TRON 136 | - 2 - BMW_I3 137 | - 3 - JAGUAR_I_PACE 138 | - 4 - MAZDA_3 139 | - 5 - MERCEDES_GLE450 140 | - 6 - SMART_TARGET 141 | - 7 - SKODA_TARGET 142 | - 8 - TESLA_MODEL3 143 | - 9 - TESLA_MODELS 144 | - 10 - TESLA_MODELX 145 | - 11 - TOYOTA_RAV4 146 | 147 | **Ohio Single-vehicle Dataset:** 148 | 149 | - 0 - retrofitted Tesla Sedan 150 | 151 | **Ohio Two-vehicle Dataset:** 152 | 153 | - 0 - retrofitted Tesla Sedan 154 | - 1 - retrofitted Ford Fusion Sedan 155 | 156 | **Waymo Perception Dataset:** 157 | 158 | - 0 - Waymo ADS-equipped vehicle 159 | 160 | **Waymo Motion Dataset:** 161 | 162 | - 0 - Waymo ADS-equipped vehicle 163 | 164 | **Argoverse 2 Motion Forecasting Dataset:** 165 | 166 | - 0 - Argo AI self-driving Ford 167 | 168 | For more details on the labels and the vehicle types, please refer to our paper. 169 | 170 | ## Developers 171 | 172 | Developer - Hang Zhou (hzhou364@wisc.edu). 173 | 174 | Code reviewer - Ke Ma (kma62@wisc.edu). 175 | 176 | If you have any questions, please feel free to contact CATS Lab in UW-Madison. We're here to help! 177 | 178 | ## Reference 179 | 180 | [1] Wang, Yanbing, George Gunter, Matthew Nice, and Daniel B. Work. "Estimating adaptive cruise control model parameters from on-board radar units." *arXiv preprint arXiv:1911.06454* (2019). 181 | 182 | [2] Yang, Mingyuan, Pablo Chon-Kan Munoz, Servet Lapardhaja, Yaobang Gong, Md Ashraful Imran, Md Tausif Murshed, Kemal Yagantekin, Md Mahede Hasan Khan, Xingan Kan, and Choungryeol Lee. "MicroSimACC: an open database for field experiments on the potential capacity impact of commercial Adaptive Cruise Control (ACC)." *Transportmetrica A: Transport Science* (2024): 1-30. 183 | 184 | [3] Shi, Xiaowei, and Xiaopeng Li. "Empirical study on car-following characteristics of commercial automated vehicles with different headway settings." *Transportation research part C: emerging technologies* 128 (2021): 103134. 185 | 186 | [4] Makridis, Michail, Konstantinos Mattas, Aikaterini Anesiadou, and Biagio Ciuffo. "OpenACC. An open database of car-following experiments to study the properties of commercial ACC systems." *Transportation research part C: emerging technologies* 125 (2021): 103047. 187 | 188 | [5] Xia, Xin, Zonglin Meng, Xu Han, Hanzhao Li, Takahiro Tsukiji, Runsheng Xu, Zhaoliang Zheng, and Jiaqi Ma. "An automated driving systems data acquisition and analytics platform." *Transportation research part C: emerging technologies* 151 (2023): 104120. 189 | 190 | [6] Hu, Xiangwang, Zuduo Zheng, Danjue Chen, Xi Zhang, and Jian Sun. "Processing, assessing, and enhancing the Waymo autonomous vehicle open dataset for driving behavior research." *Transportation Research Part C: Emerging Technologies* 134 (2022): 103490. 191 | 192 | [7] Ettinger, Scott, Shuyang Cheng, Benjamin Caine, Chenxi Liu, Hang Zhao, Sabeek Pradhan, Yuning Chai et al. "Large scale interactive motion forecasting for autonomous driving: The waymo open motion dataset." In *Proceedings of the IEEE/CVF International Conference on Computer Vision*, pp. 9710-9719. 2021. 193 | 194 | [8] Wilson, Benjamin, William Qi, Tanmay Agarwal, John Lambert, Jagjeet Singh, Siddhesh Khandelwal, Bowen Pan et al. "Argoverse 2: Next generation datasets for self-driving perception and forecasting." *arXiv preprint arXiv:2301.00493* (2023). 195 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CATS-Lab/Filed-Experiment-Data-ULTra-AV/90b984065740a2487519668076dc0ad951d54b7d/requirements.txt --------------------------------------------------------------------------------