├── Code
    ├── CF_extraction.py
    ├── Main.py
    ├── data_analysis.py
    ├── data_cleaning.py
    ├── data_transformation.py
    ├── model_calibration.py
    └── plot_result.py
├── README.md
└── requirements.txt


/Code/CF_extraction.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from sklearn.linear_model import LinearRegression
  3 | from sklearn.metrics import r2_score
  4 | from data_transformation import *
  5 | from data_cleaning import *
  6 | 
  7 | 
  8 | def Waymo_extract_df(input_path):
  9 |     state_features = {
 10 |         'state/past/x':
 11 |             tf.io.FixedLenFeature([128, 10], tf.float32, default_value=None),
 12 |         'state/past/y':
 13 |             tf.io.FixedLenFeature([128, 10], tf.float32, default_value=None),
 14 |         'state/past/speed':
 15 |             tf.io.FixedLenFeature([128, 10], tf.float32, default_value=None),
 16 |         'state/past/length':
 17 |             tf.io.FixedLenFeature([128, 10], tf.float32, default_value=None),
 18 |         'state/current/x':
 19 |             tf.io.FixedLenFeature([128, 1], tf.float32, default_value=None),
 20 |         'state/current/y':
 21 |             tf.io.FixedLenFeature([128, 1], tf.float32, default_value=None),
 22 |         'state/current/speed':
 23 |             tf.io.FixedLenFeature([128, 1], tf.float32, default_value=None),
 24 |         'state/current/length':
 25 |             tf.io.FixedLenFeature([128, 1], tf.float32, default_value=None),
 26 |         'state/future/x':
 27 |             tf.io.FixedLenFeature([128, 80], tf.float32, default_value=None),
 28 |         'state/future/y':
 29 |             tf.io.FixedLenFeature([128, 80], tf.float32, default_value=None),
 30 |         'state/future/speed':
 31 |             tf.io.FixedLenFeature([128, 80], tf.float32, default_value=None),
 32 |         'state/future/length':
 33 |             tf.io.FixedLenFeature([128, 80], tf.float32, default_value=None),
 34 |         'state/id':
 35 |             tf.io.FixedLenFeature([128], tf.float32, default_value=None),
 36 |         'state/type':
 37 |             tf.io.FixedLenFeature([128], tf.float32, default_value=None),
 38 |         'state/is_sdc':
 39 |             tf.io.FixedLenFeature([128], tf.int64, default_value=None),
 40 |     }
 41 | 
 42 |     def _parse_function(example_proto):
 43 |         return tf.io.parse_single_example(example_proto, state_features)
 44 | 
 45 |     raw_dataset = tf.data.TFRecordDataset(input_path)
 46 |     parsed_dataset = raw_dataset.map(_parse_function)
 47 | 
 48 |     data_rows = []
 49 | 
 50 |     traj_id = 0
 51 |     for parsed_record in parsed_dataset:
 52 |         def extract_data(field_name):
 53 |             return parsed_record[field_name].numpy()
 54 | 
 55 |         data_to_extract = ['state/current/x', 'state/current/y', 'state/current/speed', 'state/current/length',
 56 |                            'state/past/x', 'state/past/y', 'state/past/speed', 'state/past/length',
 57 |                            'state/future/x', 'state/future/y', 'state/future/speed', 'state/future/length',
 58 |                            'state/id', 'state/is_sdc', 'state/type']
 59 | 
 60 |         extracted_data = {key: extract_data(key) for key in data_to_extract}
 61 | 
 62 |         def add_trajectory_data(time_range, data, time):
 63 |             for j in time_range:
 64 |                 row = {'Trajectory_ID': traj_id}
 65 |                 for i in range(128):
 66 |                     row.update({
 67 |                         f'id_{i}': data['state/id'][i],
 68 |                         f'is_av_{i}': data['state/is_sdc'][i],
 69 |                         f'type_{i}': data['state/type'][i],
 70 |                         f'x_{i}': data['state/' + time + '/x'][i][j],
 71 |                         f'y_{i}': data['state/' + time + '/y'][i][j],
 72 |                         f'length_{i}': data['state/' + time + '/length'][i][j],
 73 |                         f'speed_{i}': data['state/' + time + '/speed'][i][j],
 74 |                     })
 75 |                 data_rows.append(row)
 76 | 
 77 |         add_trajectory_data(range(10), extracted_data, 'past')
 78 |         add_trajectory_data(range(1), extracted_data, 'current')
 79 |         add_trajectory_data(range(80), extracted_data, 'future')
 80 | 
 81 |         traj_id += 1
 82 | 
 83 |     dataframe = pd.DataFrame(data_rows)
 84 |     return dataframe
 85 | 
 86 | 
 87 | def Waymo_extract_cf_traj(data, straight_threshold=0.9, direction_threshold=0.985,
 88 |                           relative_diff_threshold=0.2):
 89 |     """
 90 |     Processes trajectories to filter out non-straight or non-following ones based on specified thresholds.
 91 | 
 92 |     Parameters:
 93 |     - data: DataFrame containing trajectory data.
 94 |     - straight_threshold: R² threshold to determine if the trajectory is straight.
 95 |     - stable_threshold: Unused parameter in this context.
 96 |     - direction_threshold: Cosine similarity threshold to determine if trajectories are moving in the same direction.
 97 |     - relative_diff_threshold: Threshold to filter out trajectories based on relative difference in calculated values.
 98 |     """
 99 |     # 1. Remove non-straight trajectories
100 |     is_traj_stright = {}
101 |     av_directions = {}
102 |     for traj_id, group in data.groupby('Trajectory_ID'):
103 |         # Step 1: Identify if all 'is_av_{i}' columns are 1 for each trajectory
104 |         av_idx = None
105 |         for i in range(128):
106 |             if all(group[f'is_av_{i}'] == 1):
107 |                 av_idx = i
108 |                 break
109 |         if av_idx is None:
110 |             continue
111 | 
112 |         # Extract 'x' and 'y' coordinates and fit a linear regression model
113 |         X = group[[f'x_{av_idx}']].values.reshape(-1, 1)
114 |         y = group[f'y_{av_idx}'].values.reshape(-1, 1)
115 |         model = LinearRegression()
116 |         model.fit(X, y)
117 |         y_pred = model.predict(X)
118 |         r2 = r2_score(y, y_pred)
119 | 
120 |         # Check if the trajectory is straight based on the R² value
121 |         if r2 >= straight_threshold:
122 |             slope = model.coef_[0][0]
123 |             direction_vector = np.array([1, slope])
124 |             direction_norm = np.linalg.norm(direction_vector)
125 |             direction_vector = direction_vector / direction_norm
126 |             av_directions[traj_id] = (av_idx, direction_vector)
127 |             is_traj_stright[traj_id] = True
128 |         else:
129 |             is_traj_stright[traj_id] = False
130 | 
131 |     # Remove trajectories that are not straight
132 |     traj_ids_to_delete = [traj_id for traj_id, result in is_traj_stright.items() if result == False]
133 |     data = data[~data['Trajectory_ID'].isin(traj_ids_to_delete)]
134 | 
135 |     # 2. Remove trajectories that are not following another vehicle
136 |     nearest_vehicle_indices = {}
137 |     traj_ids_to_delete = []
138 |     cosine_threshold = direction_threshold
139 | 
140 |     for traj_id, group in data.groupby('Trajectory_ID'):
141 |         av_idx = av_directions[traj_id][0]
142 |         ref_x_direction, ref_y_direction = av_directions[traj_id][1]
143 |         nearest_vehicles_per_row = []
144 |         found_nearest_in_all_rows = True
145 |         first_row = True
146 |         for row_idx, row in group.iterrows():
147 |             min_distance = float('inf')
148 |             nearest_vehicle_idx = None
149 |             for i in range(128):
150 |                 if i == av_idx:
151 |                     continue
152 | 
153 |                 # Skip opposite direction traffic
154 |                 if not first_row:
155 |                     first_row = False
156 |                     current_row = row
157 |                     previous_row = group.loc[row_idx - 1]
158 |                     dot_product = (current_row[f'x_{i}'] - previous_row[f'x_{i}']) * \
159 |                                   (current_row[f'x_{av_idx}'] - previous_row[f'x_{av_idx}']) + \
160 |                                   (current_row[f'y_{i}'] - previous_row[f'y_{i}']) * \
161 |                                   (current_row[f'y_{av_idx}'] - previous_row[f'y_{av_idx}'])
162 |                     if dot_product < 0:
163 |                         continue
164 | 
165 |                 # Remove vehicles not in a straight line
166 |                 x_direction = row[f'x_{i}'] - row[f'x_{av_idx}']
167 |                 y_direction = row[f'y_{i}'] - row[f'y_{av_idx}']
168 |                 vector_length = (x_direction ** 2 + y_direction ** 2) ** 0.5
169 |                 vector_cosine = ((x_direction * ref_x_direction + y_direction * ref_y_direction)
170 |                                  / vector_length) if vector_length != 0 else 0
171 |                 # Check if moving in the same direction based on cosine similarity
172 |                 if vector_cosine >= cosine_threshold:
173 |                     distance = vector_length
174 |                     if distance < min_distance:
175 |                         min_distance = distance
176 |                         nearest_vehicle_idx = i
177 |             if nearest_vehicle_idx is None:
178 |                 found_nearest_in_all_rows = False
179 |                 break
180 |             else:
181 |                 nearest_vehicles_per_row.append(nearest_vehicle_idx)
182 | 
183 |         # Add trajectories to deletion list if they don't consistently follow the same vehicle
184 |         if not found_nearest_in_all_rows:
185 |             traj_ids_to_delete.append(traj_id)
186 |         elif len(set(nearest_vehicles_per_row)) > 1:
187 |             traj_ids_to_delete.append(traj_id)
188 |         else:
189 |             nearest_vehicle_indices[traj_id] = nearest_vehicles_per_row[0]
190 | 
191 |     # Delete trajectories that do not follow a single vehicle
192 |     data = data[~data['Trajectory_ID'].isin(traj_ids_to_delete)]
193 | 
194 |     # 3. Organize a new DataFrame with filtered data
195 |     new_rows = []
196 |     for traj_id, group in data.groupby('Trajectory_ID'):
197 |         if traj_id not in av_directions or traj_id not in nearest_vehicle_indices:
198 |             continue
199 | 
200 |         av_idx = av_directions[traj_id][0]
201 |         nearest_idx = nearest_vehicle_indices[traj_id]
202 | 
203 |         if nearest_idx is None:
204 |             continue
205 |         for _, row in group.iterrows():
206 |             leader_x = row[f'x_{av_idx}']
207 |             leader_y = row[f'y_{av_idx}']
208 |             leader_length = row[f'length_{av_idx}']
209 |             leader_speed = row[f'speed_{av_idx}']
210 |             follower_x = row[f'x_{nearest_idx}']
211 |             follower_y = row[f'y_{nearest_idx}']
212 |             follower_length = row[f'length_{nearest_idx}']
213 |             follower_speed = row[f'speed_{nearest_idx}']
214 |             new_row = {'Trajectory_ID': traj_id, 'leader_speed': leader_speed, 'follower_speed': follower_speed,
215 |                        'leader_x': leader_x, 'leader_y': leader_y, 'leader_length': leader_length,
216 |                        'follower_x': follower_x, 'follower_y': follower_y, 'follower_length': follower_length}
217 |             new_rows.append(new_row)
218 | 
219 |     df = pd.DataFrame(new_rows)
220 | 
221 |     # 4. Remove trajectories where the space gap and speed difference do not match
222 |     num_before = len(df)
223 | 
224 |     df['Space_Gap'] = (np.sqrt(
225 |         (df['leader_x'] - df['follower_x']) ** 2 + (df['leader_y'] - df['follower_y']) ** 2)) - df[
226 |                           'leader_length'] / 2 - df['follower_length'] / 2
227 | 
228 |     df['Speed_Diff'] = df['leader_speed'] - df['follower_speed']
229 | 
230 |     grouped = df.groupby('Trajectory_ID').agg(
231 |         Speed_Diff_Mean=('Speed_Diff', 'mean'),
232 |         Space_Gap_Change=('Space_Gap', lambda x: x.iloc[-1] - x.iloc[0])
233 |     )
234 | 
235 |     grouped['Relative_Diff'] = abs(grouped['Space_Gap_Change'] - (grouped['Speed_Diff_Mean'] * 9.1)) / (
236 |             grouped['Speed_Diff_Mean'] * 9.1)
237 | 
238 |     traj_to_remove_relative = grouped[grouped['Relative_Diff'] > relative_diff_threshold].index
239 | 
240 |     df = df[~df['Trajectory_ID'].isin(traj_to_remove_relative)]
241 | 
242 |     df.drop(['Space_Gap', 'Speed_Diff'], axis=1, inplace=True)
243 | 
244 |     num_after = len(df)
245 |     print(f'total traj num: {num_before / 91}, delete traj num: {(num_before - num_after) / 91}.')
246 | 
247 |     return df
248 | 
249 | 
250 | def Argo2_extract_df(input_paths, traj_id):
251 |     df_list = []
252 |     for input_path in input_paths:
253 |         data = pd.read_parquet(input_path)
254 | 
255 |         df = pd.DataFrame(
256 |             columns=['Trajectory_ID', 'Time_Index', 'ID', 'x', 'y', 'speed'])
257 |         df['Time_Index'] = data.iloc[:, 4]
258 |         df['Trajectory_ID'] = traj_id
259 |         df['ID'] = data.iloc[:, 1].replace({'AV': 0})
260 |         df['x'] = data.iloc[:, 5]
261 |         df['y'] = data.iloc[:, 6]
262 |         df['speed'] = np.linalg.norm(data.iloc[:, [8, 9]].values, axis=1)
263 | 
264 |         def reshape_group_optimized(group):
265 |             group = group.reset_index(drop=True)
266 |             reshaped_data_info = group.loc[0, ['Trajectory_ID', 'Time_Index']].to_dict()
267 |             columns_data = []
268 |             for i, row in group.iterrows():
269 |                 suffix = f"_{row['ID']}"
270 |                 temp_df = pd.DataFrame({
271 |                     f'ID_{suffix}': [row['ID']],
272 |                     f'x_{suffix}': [row['x']],
273 |                     f'y_{suffix}': [row['y']],
274 |                     f'speed_{suffix}': [row['speed']]
275 |                 })
276 |                 columns_data.append(temp_df)
277 |             reshaped_data = pd.concat([pd.DataFrame(reshaped_data_info, index=[0])] + columns_data, axis=1)
278 |             return reshaped_data
279 | 
280 |         df = df.groupby(['Trajectory_ID', 'Time_Index']).apply(reshape_group_optimized).reset_index(
281 |             drop=True)
282 |         df = df.groupby(['Trajectory_ID', 'Time_Index']).first().unstack(
283 |             fill_value=0).stack(future_stack=True).reset_index()
284 | 
285 |         columns = ['Trajectory_ID', 'Time_Index']
286 |         for i in range(int((len(df.columns) - 2) / 4)):
287 |             columns += [f'ID_{i}', f'x_{i}', f'y_{i}', f'speed_{i}']
288 |         df.columns = columns
289 | 
290 |         df_list.append(df)
291 | 
292 |         traj_id += 1
293 | 
294 |     df = pd.concat(df_list)
295 |     return df
296 | 
297 | 
298 | def Argo2_extract_cf_traj(data, output_path, straight_threshold=0.9, direction_threshold=0.985,
299 |                           relative_diff_threshold=0.2):
300 |     """
301 |     Filters and processes car-following trajectories from the Argoverse dataset.
302 | 
303 |     Parameters:
304 |     - data: DataFrame containing trajectory data.
305 |     - output_path: Path where the processed data should be saved.
306 |     - straight_threshold: Threshold for R^2 to consider a trajectory as straight.
307 |     - stable_threshold: Unused threshold, might be reserved for future use.
308 |     - direction_threshold: Cosine similarity threshold to determine direction alignment.
309 |     - relative_diff_threshold: Threshold for the relative difference in speed and gap changes.
310 |     """
311 | 
312 |     # 1. Remove non-straight trajectories
313 |     is_traj_straight = {}
314 |     av_directions = {}
315 |     for traj_id, group in data.groupby('Trajectory_ID'):
316 |         # Step 1: Identify which vehicle is the AV and check if all its trajectory points are labeled as AV
317 |         av_idx = None
318 |         for i in range(int((len(group.columns) - 2) / 4)):
319 |             if all(group[f'ID_{i}'] == 0):
320 |                 av_idx = i
321 |                 break
322 |         if av_idx is None:
323 |             continue
324 | 
325 |         # Extract x and y coordinates and fit a linear regression model
326 |         X = group[[f'x_{av_idx}']].values.reshape(-1, 1)
327 |         y = group[[f'y_{av_idx}']].values.reshape(-1, 1)
328 |         model = LinearRegression()
329 |         model.fit(X, y)
330 |         y_pred = model.predict(X)
331 |         r2 = r2_score(y, y_pred)
332 | 
333 |         # Check if the trajectory is straight using the R² value
334 |         if r2 >= straight_threshold:
335 |             slope = model.coef_[0][0]
336 |             direction_vector = np.array([1, slope])
337 |             direction_norm = np.linalg.norm(direction_vector)
338 |             direction_vector /= direction_norm
339 |             av_directions[traj_id] = (av_idx, direction_vector)
340 |             is_traj_straight[traj_id] = True
341 |         else:
342 |             is_traj_straight[traj_id] = False
343 | 
344 |     # Remove trajectories that are not considered straight
345 |     traj_ids_to_delete = [traj_id for traj_id, result in is_traj_straight.items() if not result]
346 |     data = data[~data['Trajectory_ID'].isin(traj_ids_to_delete)]
347 | 
348 |     # 2. Remove trajectories that are not following any vehicle
349 |     nearest_vehicle_indices = {}
350 |     traj_ids_to_delete = []
351 |     for traj_id, group in data.groupby('Trajectory_ID'):
352 |         av_idx = av_directions[traj_id][0]
353 |         ref_x_direction, ref_y_direction = av_directions[traj_id][1]
354 |         nearest_vehicles_per_row = []
355 |         found_nearest_in_all_rows = True
356 |         first_row = True
357 |         for row_idx, row in group.iterrows():
358 |             min_distance = float('inf')
359 |             nearest_vehicle_idx = None
360 |             for i in range(int((len(group.columns) - 2) / 4)):
361 |                 if i == av_idx:
362 |                     continue
363 | 
364 |                 # Exclude oncoming traffic
365 |                 if not first_row:
366 |                     first_row = False
367 |                     current_row = row
368 |                     previous_row = group.loc[row_idx - 1]
369 |                     dot_product = (current_row[f'x_{i}'] - previous_row[f'x_{i}']) * \
370 |                                   (current_row[f'x_{av_idx}'] - previous_row[f'x_{av_idx}']) + \
371 |                                   (current_row[f'y_{i}'] - previous_row[f'y_{i}']) * \
372 |                                   (current_row[f'y_{av_idx}'] - previous_row[f'y_{av_idx}'])
373 |                     if dot_product < 0:
374 |                         continue
375 | 
376 |                 # Check if vehicles are moving in a straight line and in the same direction
377 |                 x_direction = row[f'x_{i}'] - row[f'x_{av_idx}']
378 |                 y_direction = row[f'y_{i}'] - row[f'y_{av_idx}']
379 |                 vector_length = np.sqrt(x_direction ** 2 + y_direction ** 2)
380 |                 vector_cosine = ((x_direction * ref_x_direction + y_direction * ref_y_direction) /
381 |                                  vector_length) if vector_length != 0 else 0
382 |                 if vector_cosine >= direction_threshold:
383 |                     distance = vector_length
384 |                     if distance < min_distance:
385 |                         min_distance = distance
386 |                         nearest_vehicle_idx = i
387 |             if nearest_vehicle_idx is None:
388 |                 found_nearest_in_all_rows = False
389 |                 break
390 |             else:
391 |                 nearest_vehicles_per_row.append(nearest_vehicle_idx)
392 | 
393 |         # Remove trajectories if they do not consistently follow the same vehicle
394 |         if not found_nearest_in_all_rows or len(set(nearest_vehicles_per_row)) > 1:
395 |             traj_ids_to_delete.append(traj_id)
396 |         else:
397 |             nearest_vehicle_indices[traj_id] = nearest_vehicles_per_row[0]
398 | 
399 |     # Remove data for trajectories without a consistent following vehicle
400 |     data = data[~data['Trajectory_ID'].isin(traj_ids_to_delete)]
401 | 
402 |     # 3. Prepare a new DataFrame with the filtered data
403 |     new_rows = []
404 |     for traj_id, group in data.groupby('Trajectory_ID'):
405 |         if traj_id not in av_directions or traj_id not in nearest_vehicle_indices:
406 |             continue  # Skip if the trajectory ID is not in the filtered set
407 | 
408 |         av_idx = av_directions[traj_id][0]
409 |         nearest_idx = nearest_vehicle_indices[traj_id]
410 |         for _, row in group.iterrows():
411 |             leader_x = row[f'x_{av_idx}']
412 |             leader_y = row[f'y_{av_idx}']
413 |             leader_speed = row[f'speed_{av_idx}']
414 |             follower_x = row[f'x_{nearest_idx}']
415 |             follower_y = row[f'y_{nearest_idx}']
416 |             follower_speed = row[f'speed_{nearest_idx}']
417 |             new_row = {'Trajectory_ID': traj_id, 'leader_speed': leader_speed, 'follower_speed': follower_speed,
418 |                        'leader_x': leader_x, 'leader_y': leader_y, 'follower_x': follower_x, 'follower_y': follower_y}
419 |             new_rows.append(new_row)
420 | 
421 |     # Create DataFrame from the filtered data
422 |     df = pd.DataFrame(new_rows)
423 |     df = df.reset_index(drop=True)
424 |     row_count = data.shape[0]
425 |     if row_count % 110 != 0:
426 |         print(f"Error: The number of rows ({row_count}) is not a multiple of 110.")
427 |     df['Trajectory_ID'] = df.index // 110
428 | 
429 |     # 4. Remove trajectories where the space gap and speed difference do not match
430 |     num_before = len(df)
431 | 
432 |     df['Space_Gap'] = (np.sqrt(
433 |         (df['leader_x'] - df['follower_x']) ** 2 + (df['leader_y'] - df['follower_y']) ** 2)) - default_vehicle_length
434 | 
435 |     df['Speed_Diff'] = df['leader_speed'] - df['follower_speed']
436 | 
437 |     grouped = df.groupby('Trajectory_ID').agg(
438 |         Speed_Diff_Mean=('Speed_Diff', 'mean'),
439 |         Space_Gap_Change=('Space_Gap', lambda x: x.iloc[-1] - x.iloc[0])
440 |     )
441 | 
442 |     grouped['Relative_Diff'] = abs(grouped['Space_Gap_Change'] - (grouped['Speed_Diff_Mean'] * 11)) / (
443 |             grouped['Speed_Diff_Mean'] * 11)
444 | 
445 |     traj_to_remove_relative = grouped[grouped['Relative_Diff'] > relative_diff_threshold].index
446 | 
447 |     df = df[~df['Trajectory_ID'].isin(traj_to_remove_relative)]
448 | 
449 |     df.drop(['Space_Gap', 'Speed_Diff'], axis=1, inplace=True)
450 | 
451 |     num_after = len(df)
452 |     print(
453 |         f'Total number of trajectories: {num_before / 110}, number of trajectories deleted: {(num_before - num_after) / 110}.')
454 | 
455 |     df.to_csv(output_path, index=False)
456 | 
457 | 
458 | def combine():
459 |     for i in range(1):
460 |         merge_data_list = []
461 |         for j in range(25):
462 |             cf_path = f'./Dataset/Argoverse/data/val/CF_trajectories_{i * 25 + j + 1}.csv'
463 |             merge_data_list.append(cf_path)
464 |         merge_data_path = f'./Dataset/Argoverse/output/step0_CF_trajectory_{i}.csv'
465 |         merge_data(merge_data_list, merge_data_path)
466 | 


--------------------------------------------------------------------------------
/Code/Main.py:
--------------------------------------------------------------------------------
  1 | from CF_extraction import *
  2 | from data_transformation import *
  3 | from data_cleaning import *
  4 | from model_calibration import *
  5 | from data_analysis import *
  6 | from pathlib import Path
  7 | 
  8 | 
  9 | def Vanderbilt_two_vehicle_ACC():
 10 |     # Step 1: Convert dataset to a uniform car-following data format and analyze statistics.
 11 |     original_data_path = './Dataset/Vanderbilt/data/Two-vehicle ACC driving, Tennessee 2019/Processed_CAN_Data_a.csv'
 12 |     uniform_format_path = './Dataset/Vanderbilt/output/step1_two_vehicle_ACC.csv'
 13 |     Vanderbilt_convert_format(original_data_path, uniform_format_path)
 14 |     step1_stat_result_path = './Dataset/Vanderbilt/output/step1_analysis_two_vehicle_ACC'
 15 |     analyze_statistics(uniform_format_path, step1_stat_result_path)
 16 | 
 17 |     # Step 2: Clean data and revise trajectory IDs for further analysis.
 18 |     clean_data = fill_and_clean(uniform_format_path, 10, [None, 10, None, 10, 10, None, None],
 19 |                                 1e10, -1e10, 1e10, -1e10,
 20 |                                 1e10, -1e10, 1e10, -1e10, 1e10, -1e10)
 21 |     step2_data_path = f'./Dataset/Vanderbilt/output/step2_two_vehicle_ACC.csv'
 22 |     revise_traj_id(clean_data, step2_data_path, 0.1, 70, 0, 0)
 23 |     step2_stat_result_path = './Dataset/Vanderbilt/output/step2_analysis_two_vehicle_ACC'
 24 |     analyze_statistics(step2_data_path, step2_stat_result_path)
 25 | 
 26 |     # Step 3: Further clean and refine data, and prepare for performance analysis.
 27 |     clean_data = fill_and_clean(step2_data_path, 10, None,
 28 |                                 120, 1e-5, 1e10, 0.1,
 29 |                                 1e10, 0.1, 5, -5, 5, -5)
 30 |     step3_data_path = f'./Dataset/Vanderbilt/output/step3_two_vehicle_ACC.csv'
 31 |     revise_traj_id(clean_data, step3_data_path, 0.1, 70, 0, 0)
 32 |     step3_stat_result_path = './Dataset/Vanderbilt/output/step3_analysis_two_vehicle_ACC'
 33 |     analyze_statistics(step3_data_path, step3_stat_result_path)
 34 | 
 35 |     # Analysis
 36 |     performance_result_path = './Dataset/Vanderbilt/output/performance_metrics_two_vehicle_ACC.csv'
 37 |     analyze_AV_performance(step3_data_path, performance_result_path)
 38 | 
 39 |     scatter_plot_path = './Dataset/Vanderbilt/output/scatter_two_vehicle_ACC'
 40 |     draw_scatter(step3_data_path, scatter_plot_path)
 41 | 
 42 |     calibration_result_path = './Dataset/Vanderbilt/output/calibration_two_vehicle_ACC.csv'
 43 |     linear_regression = CFModelRegress(step3_data_path, 0.1)
 44 |     linear_regression.main(calibration_result_path, "linear")
 45 | 
 46 | 
 47 | def MicroSimACC():
 48 |     # Step 1: Convert dataset to a uniform car-following data format and analyze statistics.
 49 |     merge_data_list = []
 50 |     for p1 in [35, 60]:
 51 |         for p2 in [0, 15, 25, 35, 45, 50, 55]:
 52 |             if p2 > p1 - 10: continue
 53 |             for p3 in ['L', 'M', 'S']:
 54 |                 for p4 in range(1, 5):
 55 |                     original_data_path = \
 56 |                         f'./Dataset/MicroSimACC/data/2-Vehicle ACC Car Following Experiments (CCF, Same Desired Speed)/{p1}_{p2}_{p3}_{p4}.csv'
 57 |                     uniform_format_path = f'./Dataset/MicroSimACC/output/step1_same_speed_{p1}_{p2}_{p3}_{p4}.csv'
 58 |                     MicroSimACC_convert_format(original_data_path, uniform_format_path)
 59 |                     merge_data_list.append(uniform_format_path)
 60 | 
 61 |     merge_data_path = f'./Dataset/MicroSimACC/output/step1_merge.csv'
 62 |     merge_data(merge_data_list, merge_data_path)
 63 |     step1_stat_result_path = './Dataset/MicroSimACC/output/step1_analysis'
 64 |     analyze_statistics(merge_data_path, step1_stat_result_path)
 65 | 
 66 |     # Step 2: Clean data and revise trajectory IDs for further analysis.
 67 |     clean_data = fill_and_clean(merge_data_path, 10, [None, 10, None, 10, 10, None, None],
 68 |                                 1e10, -1e10, 1e10, -1e10,
 69 |                                 1e10, -1e10, 1e10, -1e10, 1e10, -1e10)
 70 |     step2_data_path = f'./Dataset/MicroSimACC/output/step2.csv'
 71 |     revise_traj_id(clean_data, step2_data_path, 0.2, 70, 0, 0)
 72 |     step2_stat_result_path = './Dataset/MicroSimACC/output/step2_analysis'
 73 |     analyze_statistics(step2_data_path, step2_stat_result_path)
 74 | 
 75 |     # Step 3: Further clean and refine data, and prepare for performance analysis.
 76 |     clean_data = fill_and_clean(step2_data_path, 10, None,
 77 |                                 120, 1e-5, 1e10, 0.1,
 78 |                                 1e10, 0.1, 5, -5, 5, -5)
 79 |     step3_data_path = f'./Dataset/MicroSimACC/output/step3.csv'
 80 |     revise_traj_id(clean_data, step3_data_path, 0.2, 70, 0, 0)
 81 |     step3_stat_result_path = './Dataset/MicroSimACC/output/step3_analysis'
 82 |     analyze_statistics(step3_data_path, step3_stat_result_path)
 83 | 
 84 |     # Analysis
 85 |     performance_result_path = './Dataset/MicroSimACC/output/performance_metrics.csv'
 86 |     analyze_AV_performance(step3_data_path, performance_result_path)
 87 | 
 88 |     scatter_plot_path = './Dataset/MicroSimACC/output/scatter'
 89 |     draw_scatter(step3_data_path, scatter_plot_path)
 90 | 
 91 |     calibration_result_path = './Dataset/MicroSimACC/output/calibration.csv'
 92 |     linear_regression = CFModelRegress(step3_data_path, 0.1)
 93 |     linear_regression.main(calibration_result_path, "linear")
 94 | 
 95 | 
 96 | def CATS_ACC():
 97 |     # Step 1: Convert dataset to a uniform car-following data format and analyze statistics.
 98 |     merge_data_list = []
 99 |     for i in range(1, 6):
100 |         original_data_path = f'./Dataset/CATS/data/ACC/test1118/test{i}.xlsx'
101 |         uniform_format_path = f'./Dataset/CATS/output/step1_ACC_test1118_{i}.csv'
102 |         CATSACC_convert_format(original_data_path, uniform_format_path)
103 |         merge_data_list.append(uniform_format_path)
104 |     for i in range(1, 9):
105 |         original_data_path = f'./Dataset/CATS/data/ACC/test1124/test{i}.xlsx'
106 |         uniform_format_path = f'./Dataset/CATS/output/step1_ACC_test1124_{i}.csv'
107 |         CATSACC_convert_format(original_data_path, uniform_format_path)
108 |         merge_data_list.append(uniform_format_path)
109 | 
110 |     merge_data_path = f'./Dataset/CATS/output/step1_ACC_merge.csv'
111 |     merge_data(merge_data_list, merge_data_path)
112 |     step1_stat_result_path = './Dataset/CATS/output/step1_analysis_ACC'
113 |     analyze_statistics(merge_data_path, step1_stat_result_path)
114 | 
115 |     # Step 2: Clean data and revise trajectory IDs for further analysis.
116 |     clean_data = fill_and_clean(merge_data_path, 10, [None, 10, None, 10, 10, None, None],
117 |                                 1e10, -1e10, 1e10, -1e10,
118 |                                 1e10, -1e10, 1e10, -1e10, 1e10, -1e10)
119 |     step2_data_path = f'./Dataset/CATS/output/step2_ACC.csv'
120 |     revise_traj_id(clean_data, step2_data_path, 0.1, 70, 0, 0)
121 |     step2_stat_result_path = './Dataset/CATS/output/step2_analysis_ACC'
122 |     analyze_statistics(step2_data_path, step2_stat_result_path)
123 | 
124 |     # Step 3: Further clean and refine data, and prepare for performance analysis.
125 |     clean_data = fill_and_clean(step2_data_path, 10, None,
126 |                                 120, 1e-5, 1e10, 0.1,
127 |                                 1e10, 0.1, 5, -5, 5, -5)
128 |     step3_data_path = f'./Dataset/CATS/output/step3_ACC.csv'
129 |     revise_traj_id(clean_data, step3_data_path, 0.1, 70, 0, 0)
130 |     step3_stat_result_path = './Dataset/CATS/output/step3_analysis_ACC'
131 |     analyze_statistics(step3_data_path, step3_stat_result_path)
132 | 
133 |     # Analysis
134 |     performance_result_path = './Dataset/CATS/output/performance_metrics_ACC.csv'
135 |     analyze_AV_performance(step3_data_path, performance_result_path)
136 |     scatter_plot_path = './Dataset/CATS/output/scatter_ACC'
137 |     draw_scatter(step3_data_path, scatter_plot_path)
138 | 
139 |     calibration_result_path = './Dataset/CATS/output/calibration_ACC.csv'
140 |     linear_regression = CFModelRegress(step3_data_path, 0.1)
141 |     linear_regression.main(calibration_result_path, 'linear')
142 | 
143 | 
144 | def CATS_platoon():
145 |     # Step 1: Convert dataset to a uniform car-following data format and analyze statistics.
146 |     original_data_path = f'./Dataset/CATS/data/Platoon'
147 |     uniform_format_path = f'./Dataset/CATS/output/step1_platoon.csv'
148 |     CATSPlatoon_convert_format(original_data_path, uniform_format_path)
149 |     step1_stat_result_path = './Dataset/CATS/output/step1_analysis_platoon'
150 |     analyze_statistics(uniform_format_path, step1_stat_result_path)
151 | 
152 |     # Step 2: Clean data and revise trajectory IDs for further analysis. 
153 |     clean_data = fill_and_clean(uniform_format_path, 10, [None, 10, None, 10, 10, None, None],
154 |                                 1e10, -1e10, 1e10, -1e10,
155 |                                 1e10, -1e10, 1e10, -1e10, 1e10, -1e10)
156 |     step2_data_path = f'./Dataset/CATS/output/step2_platoon.csv'
157 |     revise_traj_id(clean_data, step2_data_path, 1, 90, 0, 0)
158 |     step2_stat_result_path = './Dataset/CATS/output/step2_analysis_platoon'
159 |     analyze_statistics(step2_data_path, step2_stat_result_path)
160 | 
161 |     # Step 3: Further clean and refine data, and prepare for performance analysis.
162 |     clean_data = fill_and_clean(step2_data_path, 10, None,
163 |                                 120, 1e-5, 1e10, 0.1,
164 |                                 1e10, 0.1, 5, -5, 5, -5)
165 |     step3_data_path = f'./Dataset/CATS/output/step3_platoon.csv'
166 |     revise_traj_id(clean_data, step3_data_path, 1, 90, 0, 0)
167 |     step3_stat_result_path = './Dataset/CATS/output/step3_analysis_platoon'
168 |     analyze_statistics(step3_data_path, step3_stat_result_path)
169 | 
170 |     # Analysis
171 |     performance_result_path = './Dataset/CATS/output/performance_metrics_platoon.csv'
172 |     analyze_AV_performance(step3_data_path, performance_result_path)
173 |     scatter_plot_path = './Dataset/CATS/output/scatter_platoon'
174 |     draw_scatter(step3_data_path, scatter_plot_path)
175 | 
176 |     calibration_result_path = './Dataset/CATS/output/calibration_platoon.csv'
177 |     linear_regression = CFModelRegress(step3_data_path, 1)
178 |     linear_regression.main(calibration_result_path, 'linear')
179 | 
180 | 
181 | def CATS_UWM():
182 |     merge_data_list = []
183 |     for i in range(1, 6):
184 |         original_data_path = f'./Dataset/CATS/data/UWM/Test{i}.csv'
185 |         uniform_format_path = f'./Dataset/CATS/output/step1_UWM_test{i}.csv'
186 |         CATSUW_convert_format(original_data_path, uniform_format_path)
187 |         merge_data_list.append(uniform_format_path)
188 | 
189 |     merge_data_path = f'./Dataset/CATS/output/step1_UWM_merge.csv'
190 |     merge_data(merge_data_list, merge_data_path)
191 |     step1_stat_result_path = './Dataset/CATS/output/step1_analysis_UWM'
192 |     analyze_statistics(merge_data_path, step1_stat_result_path)
193 | 
194 |     clean_data = fill_and_clean(merge_data_path, 10, [None, 10, None, 10, 10, None, None],
195 |                                 1e10, -1e10, 1e10, -1e10,
196 |                                 1e10, -1e10, 1e10, -1e10, 1e10, -1e10)
197 |     step2_data_path = f'./Dataset/CATS/output/step2_UWM.csv'
198 |     revise_traj_id(clean_data, step2_data_path, 0.1, 70, 0, 0)
199 |     step2_stat_result_path = './Dataset/CATS/output/step2_analysis_UWM'
200 |     analyze_statistics(step2_data_path, step2_stat_result_path)
201 | 
202 |     # Step 3: Further clean and refine data, and prepare for performance analysis.
203 |     clean_data = fill_and_clean(step2_data_path, 10, None,
204 |                                 120, 1e-5, 1e10, 0.1,
205 |                                 1e10, 0.1, 5, -5, 5, -5)
206 |     step3_data_path = f'./Dataset/CATS/output/step3_UWM.csv'
207 |     revise_traj_id(clean_data, step3_data_path, 0.1, 70, 0, 0)
208 |     step3_stat_result_path = './Dataset/CATS/output/step3_analysis_UWM'
209 |     analyze_statistics(step3_data_path, step3_stat_result_path)
210 | 
211 |     # Analysis
212 |     performance_result_path = './Dataset/CATS/output/performance_metrics_UWM.csv'
213 |     analyze_AV_performance(step3_data_path, performance_result_path)
214 |     scatter_plot_path = './Dataset/CATS/output/scatter_UWM'
215 |     draw_scatter(step3_data_path, scatter_plot_path)
216 | 
217 |     calibration_result_path = './Dataset/CATS/output/calibration_UWM.csv'
218 |     linear_regression = CFModelRegress(step3_data_path, 0.1)
219 |     linear_regression.main(calibration_result_path, 'linear')
220 | 
221 | 
222 | def OpenACC_Casale():
223 |     id_map = {
224 |         "Hyundai": 0,
225 |         "Rexton": 1
226 |     }
227 | 
228 |     merge_data_list = []
229 |     for i in range(3, 12):
230 |         original_data_path = f'./Dataset/OpenACC/data/Casale/part{i}.csv'
231 |         uniform_format_path = f'./Dataset/OpenACC/output/step1_Casale_{i}.csv'
232 |         OpenACC_convert_format(original_data_path, uniform_format_path, id_map)
233 |         merge_data_list.append(uniform_format_path)
234 | 
235 |     merge_data_path = f'./Dataset/OpenACC/output/step1_Casale_merge.csv'
236 |     merge_data(merge_data_list, merge_data_path)
237 |     step1_stat_result_path = './Dataset/OpenACC/output/step1_analysis_Casale'
238 |     analyze_statistics(merge_data_path, step1_stat_result_path)
239 | 
240 |     clean_data = fill_and_clean(merge_data_path, 10, [None, 10, None, 10, 10, None, None],
241 |                                 1e10, -1e10, 1e10, -1e10,
242 |                                 1e10, -1e10, 1e10, -1e10, 1e10, -1e10)
243 |     step2_data_path = './Dataset/OpenACC/output/step2_Casale.csv'
244 |     revise_traj_id(clean_data, step2_data_path, 0.1, 70, 0, 0)
245 |     step2_stat_result_path = './Dataset/OpenACC/output/step2_analysis_Casale'
246 |     analyze_statistics(step2_data_path, step2_stat_result_path)
247 | 
248 |     # Step 3: Further clean and refine data, and prepare for performance analysis.
249 |     clean_data = fill_and_clean(step2_data_path, 10, None,
250 |                                 120, 1e-5, 1e10, 0.1,
251 |                                 1e10, 0.1, 5, -5, 5, -5)
252 |     step3_data_path = f'./Dataset/OpenACC/output/step3_Casale.csv'
253 |     revise_traj_id(clean_data, step3_data_path, 0.1, 70, 0, 0)
254 |     step3_stat_result_path = './Dataset/OpenACC/output/step3_analysis_Casale'
255 |     analyze_statistics(step3_data_path, step3_stat_result_path)
256 | 
257 |     # Analysis
258 |     performance_result_path = './Dataset/OpenACC/output/performance_metrics_Casale.csv'
259 |     analyze_AV_performance(step3_data_path, performance_result_path)
260 |     scatter_plot_path = './Dataset/OpenACC/output/scatter_Casale'
261 |     draw_scatter(step3_data_path, scatter_plot_path)
262 | 
263 |     calibration_result_path = './Dataset/OpenACC/output/calibration_Casale.csv'
264 |     linear_regression = CFModelRegress(step3_data_path, 0.1)
265 |     linear_regression.main(calibration_result_path, 'linear')
266 | 
267 | 
268 | def OpenACC_Vicolungo():
269 |     file_names = ['JRC-VC_260219_part2', 'JRC-VC_260219_part3', 'JRC-VC_260219_part4_highway',
270 |                   'VC-JRC_260219_part1', 'VC-JRC_260219_part2', 'VC-JRC_270219_part1', 'VC-JRC_270219_part2',
271 |                   'VC-JRC_280219_part2', 'VC-JRC_280219_part3']
272 |     id_map = {
273 |         "Ford(S-Max)": 0,
274 |         "KIA(Niro)": 1,
275 |         "Mini(Cooper)": 2,
276 |         "Mitsubishi(OutlanderPHEV)": 3,
277 |         "Mitsubishi(SpaceStar)": 4,
278 |         "Peugeot(3008GTLine)": 5,
279 |         "VW(GolfE)": 6
280 |     }
281 | 
282 |     merge_data_list = []
283 |     for i in range(1, 8):
284 |         original_data_path = f'./Dataset/OpenACC/data/Vicolungo/' + file_names[i] + '.csv'
285 |         uniform_format_path = f'./Dataset/OpenACC/output/step1_Vicolungo_{i}.csv'
286 |         OpenACC_convert_format(original_data_path, uniform_format_path, id_map)
287 |         merge_data_list.append(uniform_format_path)
288 | 
289 |     merge_data_path = f'./Dataset/OpenACC/output/step1_Vicolungo_merge.csv'
290 |     merge_data(merge_data_list, merge_data_path)
291 |     step1_stat_result_path = './Dataset/OpenACC/output/step1_analysis_Vicolungo'
292 |     analyze_statistics(merge_data_path, step1_stat_result_path, False)
293 | 
294 |     clean_data = fill_and_clean(merge_data_path, 10, [None, 10, None, 10, 10, None, None],
295 |                                 1e10, -1e10, 1e10, -1e10,
296 |                                 1e10, -1e10, 1e10, -1e10, 1e10, -1e10)
297 |     step2_data_path = f'./Dataset/OpenACC/output/step2_Vicolungo.csv'
298 |     revise_traj_id(clean_data, step2_data_path, 0.1, 70, 0, 0)
299 |     step2_stat_result_path = F'./Dataset/OpenACC/output/step2_analysis_Vicolungo'
300 |     analyze_statistics(step2_data_path, step2_stat_result_path)
301 | 
302 |     # Step 3: Further clean and refine data, and prepare for performance analysis.
303 |     clean_data = fill_and_clean(step2_data_path, 10, None,
304 |                                 120, 1e-5, 1e10, 0.1,
305 |                                 1e10, 0.1, 5, -5, 5, -5)
306 |     step3_data_path = f'./Dataset/OpenACC/output/step3_Vicolungo.csv'
307 |     revise_traj_id(clean_data, step3_data_path, 0.1, 70, 0, 0)
308 |     step3_stat_result_path = './Dataset/OpenACC/output/step3_analysis_Vicolungo'
309 |     analyze_statistics(step3_data_path, step3_stat_result_path)
310 | 
311 |     # Analysis
312 |     performance_result_path = './Dataset/OpenACC/output/performance_metrics_Vicolungo.csv'
313 |     analyze_AV_performance(step3_data_path, performance_result_path)
314 |     scatter_plot_path = './Dataset/OpenACC/output/scatter_Vicolungo'
315 |     draw_scatter(step3_data_path, scatter_plot_path)
316 | 
317 |     calibration_result_path = './Dataset/OpenACC/output/calibration_Vicolungo.csv'
318 |     linear_regression = CFModelRegress(step3_data_path, 0.1)
319 |     linear_regression.main(calibration_result_path, 'linear')
320 | 
321 | 
322 | def OpenACC_ASta():
323 |     id_map = {
324 |         "Audi(A6)": 0,
325 |         "Audi(A8)": 1,
326 |         "BMW(X5)": 2,
327 |         "Mercedes(AClass)": 3,
328 |         "Tesla(Model3)": 4
329 |     }
330 | 
331 |     merge_data_list = []
332 |     for i in range(1, 11):
333 |         if i != 3 and i != 10:  # ignore human driving data
334 |             original_data_path = f'./Dataset/OpenACC/data/ASta/ASta_platoon{i}.csv'
335 |             uniform_format_path = f'./Dataset/OpenACC/output/step1_ASta_{i}.csv'
336 |             OpenACC_convert_format(original_data_path, uniform_format_path, id_map)
337 |             merge_data_list.append(uniform_format_path)
338 | 
339 |     merge_data_path = f'./Dataset/OpenACC/output/step1_ASta_merge.csv'
340 |     merge_data(merge_data_list, merge_data_path)
341 |     step1_stat_result_path = './Dataset/OpenACC/output/step1_analysis_ASta'
342 |     analyze_statistics(merge_data_path, step1_stat_result_path)
343 | 
344 |     clean_data = fill_and_clean(merge_data_path, 10, [None, 10, None, 10, 10, None, None],
345 |                                 1e10, -1e10, 1e10, -1e10,
346 |                                 1e10, -1e10, 1e10, -1e10, 1e10, -1e10)
347 |     step2_data_path = './Dataset/OpenACC/output/step2_ASta.csv'
348 |     revise_traj_id(clean_data, step2_data_path, 0.1, 70, 0, 0)
349 |     step2_stat_result_path = F'./Dataset/OpenACC/output/step2_analysis_ASta'
350 |     analyze_statistics(step2_data_path, step2_stat_result_path)
351 | 
352 |     # Step 3: Further clean and refine data, and prepare for performance analysis.
353 |     clean_data = fill_and_clean(step2_data_path, 10, None,
354 |                                 120, 1e-5, 1e10, 0.1,
355 |                                 1e10, 0.1, 5, -5, 5, -5)
356 |     step3_data_path = f'./Dataset/OpenACC/output/step3_ASta.csv'
357 |     revise_traj_id(clean_data, step3_data_path, 0.1, 70, 0, 0)
358 |     step3_stat_result_path = './Dataset/OpenACC/output/step3_analysis_ASta'
359 |     analyze_statistics(step3_data_path, step3_stat_result_path)
360 | 
361 |     # Analysis
362 |     performance_result_path = './Dataset/OpenACC/output/performance_metrics_ASta.csv'
363 |     analyze_AV_performance(step3_data_path, performance_result_path)
364 |     scatter_plot_path = './Dataset/OpenACC/output/scatter_ASta'
365 |     draw_scatter(step3_data_path, scatter_plot_path)
366 | 
367 |     calibration_result_path = './Dataset/OpenACC/output/calibration_ASta.csv'
368 |     linear_regression = CFModelRegress(step3_data_path, 0.1)
369 |     linear_regression.main(calibration_result_path, 'linear')
370 | 
371 | 
372 | def OpenACC_ZalaZone():
373 |     id_map = {
374 |         "AUDI_A4": 0,
375 |         "AUDI_E_TRON": 1,
376 |         "BMW_I3": 2,
377 |         "JAGUAR_I_PACE": 3,
378 |         "MAZDA_3": 4,
379 |         "MERCEDES_GLE450": 5,
380 |         "SMART_TARGET": 6,
381 |         "SKODA_TARGET": 7,
382 |         "TESLA_MODEL3": 8,
383 |         "TESLA_MODELS": 9,
384 |         "TESLA_MODELX": 10,
385 |         "TOYOTA_RAV4": 11
386 |     }
387 | 
388 |     merge_data_list = []
389 |     for i in range(1, 27):
390 |         original_data_path = f'./Dataset/OpenACC/data/ZalaZone/dynamic_part{i}.csv'
391 |         uniform_format_path = f'./Dataset/OpenACC/output/step1_ZalaZone_dynamic_{i}.csv'
392 |         OpenACC_convert_format(original_data_path, uniform_format_path, id_map)
393 |         merge_data_list.append(uniform_format_path)
394 | 
395 |     for i in range(1, 48):
396 |         if i != 30 and i != 40 and i != 43:  # ignore human driving data
397 |             original_data_path = f'./Dataset/OpenACC/data/ZalaZone/handling_part{i}.csv'
398 |             uniform_format_path = f'./Dataset/OpenACC/output/step1_ZalaZone_handling_{i}.csv'
399 |             OpenACC_convert_format(original_data_path, uniform_format_path, id_map)
400 |             merge_data_list.append(uniform_format_path)
401 | 
402 |     merge_data_path = f'./Dataset/OpenACC/output/step1_ZalaZone_merge.csv'
403 |     merge_data(merge_data_list, merge_data_path)
404 |     step1_stat_result_path = './Dataset/OpenACC/output/step1_analysis_ZalaZone'
405 |     analyze_statistics(merge_data_path, step1_stat_result_path)
406 | 
407 |     clean_data = fill_and_clean(merge_data_path, 10, [None, 10, None, 10, 10, None, None],
408 |                                 1e10, -1e10, 1e10, -1e10,
409 |                                 1e10, -1e10, 1e10, -1e10, 1e10, -1e10)
410 |     step2_data_path = './Dataset/OpenACC/output/step2_ZalaZone.csv'
411 |     revise_traj_id(clean_data, step2_data_path, 0.1, 70, 0, 0)
412 |     step2_stat_result_path = F'./Dataset/OpenACC/output/step2_analysis_ZalaZone'
413 |     analyze_statistics(step2_data_path, step2_stat_result_path)
414 | 
415 |     # Step 3: Further clean and refine data, and prepare for performance analysis.
416 |     clean_data = fill_and_clean(step2_data_path, 10, None,
417 |                                 120, 1e-5, 1e10, 0.1,
418 |                                 1e10, 0.1, 5, -5, 5, -5)
419 |     step3_data_path = f'./Dataset/OpenACC/output/step3_ZalaZone.csv'
420 |     revise_traj_id(clean_data, step3_data_path, 0.1, 70, 0, 0)
421 |     step3_stat_result_path = F'./Dataset/OpenACC/output/step3_analysis_ZalaZone'
422 |     analyze_statistics(step3_data_path, step3_stat_result_path)
423 | 
424 |     # Analysis
425 |     performance_result_path = './Dataset/OpenACC/output/performance_metrics_ZalaZone.csv'
426 |     analyze_AV_performance(step3_data_path, performance_result_path)
427 |     scatter_plot_path = './Dataset/OpenACC/output/scatter_ZalaZone'
428 |     draw_scatter(step3_data_path, scatter_plot_path)
429 | 
430 |     calibration_result_path = './Dataset/OpenACC/output/calibration_ZalaZone.csv'
431 |     linear_regression = CFModelRegress(step3_data_path, 0.1)
432 |     linear_regression.main(calibration_result_path, 'linear')
433 | 
434 | 
435 | def Ohio_single_vehicle():
436 |     original_data_path = './Dataset/Ohio/data/Advanced_Driver_Assistance_System__ADAS_-Equipped_Single-Vehicle_Data_for_Central_Ohio.csv'
437 |     uniform_format_path = './Dataset/Ohio/output/step1_single_vehicle.csv'
438 |     Ohio_single_convert_format(original_data_path, uniform_format_path)
439 |     step1_stat_result_path = './Dataset/Ohio/output/step1_analysis_single_vehicle'
440 |     analyze_statistics(uniform_format_path, step1_stat_result_path)
441 | 
442 |     clean_data = fill_and_clean(uniform_format_path, 10, [None, 10, None, 10, 10, None, None],
443 |                                 1e10, -1e10, 1e10, -1e10,
444 |                                 1e10, -1e10, 1e10, -1e10, 1e10, -1e10)
445 |     step2_data_path = f'./Dataset/Ohio/output/step2_single_vehicle.csv'
446 |     revise_traj_id(clean_data, step2_data_path, 0.1, 70, 0, 0)
447 |     step2_stat_result_path = f'./Dataset/Ohio/output/step2_analysis_single_vehicle'
448 |     analyze_statistics(step2_data_path, step2_stat_result_path)
449 | 
450 |     # Step 3: Further clean and refine data, and prepare for performance analysis.
451 |     clean_data = fill_and_clean(step2_data_path, 10, None,
452 |                                 120, 1e-5, 1e10, 0.1,
453 |                                 1e10, 0.1, 5, -5, 5, -5)
454 |     step3_data_path = f'./Dataset/Ohio/output/step3_single_vehicle.csv'
455 |     revise_traj_id(clean_data, step3_data_path, 0.1, 70, 0, 0)
456 |     step3_stat_result_path = f'./Dataset/Ohio/output/step3_analysis_single_vehicle'
457 |     analyze_statistics(step3_data_path, step3_stat_result_path)
458 | 
459 |     # Analysis
460 |     performance_result_path = './Dataset/Ohio/output/performance_metrics_single_vehicle.csv'
461 |     analyze_AV_performance(step3_data_path, performance_result_path)
462 |     scatter_plot_path = './Dataset/Ohio/output/scatter_single_vehicle'
463 |     draw_scatter(step3_data_path, scatter_plot_path)
464 | 
465 |     calibration_result_path = './Dataset/Ohio/output/calibration_single_vehicle.csv'
466 |     linear_regression = CFModelRegress(step3_data_path, 0.1)
467 |     linear_regression.main(calibration_result_path, 'linear')
468 | 
469 | 
470 | def Ohio_two_vehicle():
471 |     original_data_path = './Dataset/Ohio/data/Advanced_Driver_Assistance_System__ADAS_-Equipped_Two-Vehicle_Data_for_Central_Ohio.csv'
472 |     uniform_format_path = './Dataset/Ohio/output/step1_two_vehicle'
473 |     Ohio_two_convert_format(original_data_path, uniform_format_path)
474 |     merge_data_list = []
475 |     for i in range(1, 3):
476 |         merge_data_list.append(uniform_format_path + f'_{i}.csv')
477 | 
478 |     merge_data_path = './Dataset/Ohio/output/step1_two_vehicle_merge.csv'
479 |     merge_data(merge_data_list, merge_data_path)
480 |     step1_stat_result_path = './Dataset/Ohio/output/step1_analysis_two_vehicle'
481 |     analyze_statistics(merge_data_path, step1_stat_result_path)
482 | 
483 |     clean_data = fill_and_clean(merge_data_path, 10, [None, 10, None, 10, 10, None, None],
484 |                                 1e10, -1e10, 1e10, -1e10,
485 |                                 1e10, -1e10, 1e10, -1e10, 1e10, -1e10)
486 |     step2_data_path = './Dataset/Ohio/output/step2_two_vehicle.csv'
487 |     revise_traj_id(clean_data, step2_data_path, 0.1, 70, 0, 0)
488 |     step2_stat_result_path = './Dataset/Ohio/output/step2_analysis_two_vehicle'
489 |     analyze_statistics(step2_data_path, step2_stat_result_path)
490 | 
491 |     # Step 3: Further clean and refine data, and prepare for performance analysis.
492 |     clean_data = fill_and_clean(step2_data_path, 10, None,
493 |                                 120, 1e-5, 1e10, 0.1,
494 |                                 1e10, 0.1, 5, -5, 5, -5)
495 |     step3_data_path = f'./Dataset/Ohio/output/step3_two_vehicle.csv'
496 |     revise_traj_id(clean_data, step3_data_path, 0.1, 70, 0, 0)
497 |     step3_stat_result_path = './Dataset/Ohio/output/step3_analysis_two_vehicle'
498 |     analyze_statistics(step3_data_path, step3_stat_result_path)
499 | 
500 |     # Analysis
501 |     performance_result_path = './Dataset/Ohio/output/performance_metrics_two_vehicle.csv'
502 |     analyze_AV_performance(step3_data_path, performance_result_path)
503 |     scatter_plot_path = './Dataset/Ohio/output/scatter_two_vehicle'
504 |     draw_scatter(step3_data_path, scatter_plot_path)
505 | 
506 |     calibration_result_path = './Dataset/Ohio/output/calibration_two_vehicle.csv'
507 |     linear_regression = CFModelRegress(step3_data_path, 0.1)
508 |     linear_regression.main(calibration_result_path, 'linear')
509 | 
510 | 
511 | def Waymo_perception():
512 |     original_data_path = './Dataset/Waymo/data/Perception/car_following_trajectory.csv'
513 |     uniform_format_path = './Dataset/Waymo/output/step1_perception.csv'
514 |     Waymo_perception_convert_format(original_data_path, uniform_format_path)
515 |     step1_stat_result_path = './Dataset/Waymo/output/step1_analysis_perception'
516 |     analyze_statistics(uniform_format_path, step1_stat_result_path)
517 | 
518 |     clean_data = fill_and_clean(uniform_format_path, 10, [None, 10, None, 10, 10, None, None],
519 |                                 1e10, -1e10, 1e10, -1e10,
520 |                                 1e10, -1e10, 1e10, -1e10, 1e10, -1e10)
521 |     step2_data_path = './Dataset/Waymo/output/step2_perception.csv'
522 |     revise_traj_id(clean_data, step2_data_path, 0.1, 70, 0, 0)
523 |     step2_stat_result_path = './Dataset/Waymo/output/step2_analysis_perception'
524 |     analyze_statistics(step2_data_path, step2_stat_result_path)
525 | 
526 |     # Step 3: Further clean and refine data, and prepare for performance analysis.
527 |     clean_data = fill_and_clean(step2_data_path, 10, None,
528 |                                 120, 1e-5, 1e10, 0.1,
529 |                                 1e10, 0.1, 5, -5, 5, -5)
530 |     step3_data_path = f'./Dataset/Waymo/output/step3_perception.csv'
531 |     revise_traj_id(clean_data, step3_data_path, 0.1, 70, 0, 0)
532 |     step3_stat_result_path = './Dataset/Waymo/output/step3_analysis_perception'
533 |     analyze_statistics(step3_data_path, step3_stat_result_path)
534 | 
535 |     # Analysis
536 |     performance_result_path = './Dataset/Waymo/output/performance_metrics_perception.csv'
537 |     analyze_AV_performance(step3_data_path, performance_result_path)
538 |     scatter_plot_path = './Dataset/Waymo/output/scatter_perception'
539 |     draw_scatter(step3_data_path, scatter_plot_path)
540 | 
541 |     calibration_result_path = './Dataset/Waymo/output/calibration_perception.csv'
542 |     linear_regression = CFModelRegress(step3_data_path, 0.1)
543 |     linear_regression.main(calibration_result_path, 'linear')
544 | 
545 | 
546 | def Waymo_motion():
547 |     # Step 1: Convert Vanderbilt dataset to a uniform car-following data format and analyze statistics.
548 |     cf_trajectory_list = []
549 |     for i in range(1000):
550 |         original_data_path = (
551 |                 f'./Dataset/Waymo/data/Motion/uncompressed_tf_example_training_training_tfexample.tfrecord-'
552 |                 + '{:05}'.format(i) + '-of-01000')
553 |         print("recording " + '{:05}'.format(i) + " ......")
554 |         original_data = Waymo_extract_df(original_data_path)
555 |         cf_trajectory_list.append(Waymo_extract_cf_traj(original_data))
556 |         if (i + 1) % 100 == 0:
557 |             cf_path = f'./Dataset/Waymo/output/step0_CF_trajectory_motion_{i - 99}-{i}.csv'
558 |             combined_df = pd.concat(cf_trajectory_list, axis=0, ignore_index=True)
559 |             combined_df = combined_df.reset_index()
560 |             combined_df['traj_id'] = combined_df.index // 91
561 |             combined_df.to_csv(cf_path, index=False)
562 |             cf_trajectory_list = []
563 | 
564 |     merge_data_list = []
565 |     for i in range(0, 1000, 100):
566 |         cf_path = f'./Dataset/Waymo/output/step0_CF_trajectory_motion_{i}-{i + 99}.csv'
567 |         uniform_format_path = f'./Dataset/Waymo/output/step1_motion_{i / 100}.csv'
568 |         Waymo_motion_convert_format(cf_path, uniform_format_path)
569 |         merge_data_list.append(uniform_format_path)
570 | 
571 |     merge_data_path = f'./Dataset/Waymo/output/step1_motion_merge.csv'
572 |     merge_data(merge_data_list, merge_data_path)
573 |     step1_stat_result_path = './Dataset/Waymo/output/step1_analysis_motion'
574 |     analyze_statistics(merge_data_path, step1_stat_result_path)
575 | 
576 |     # Step 2: Clean data and revise trajectory IDs for further analysis.
577 |     clean_data = fill_and_clean(merge_data_path, 10, [None, 10, None, 10, 10, None, None],
578 |                                 1e10, -1e10, 1e10, -1e10,
579 |                                 1e10, -1e10, 1e10, -1e10, 1e10, -1e10)
580 |     step2_data_path = f'./Dataset/Waymo/output/step2_motion.csv'
581 |     revise_traj_id(clean_data, step2_data_path, 0.1, 70, 0, 0, False)
582 |     step2_stat_result_path = f'./Dataset/Waymo/output/step2_analysis_motion'
583 |     analyze_statistics(step2_data_path, step2_stat_result_path)
584 | 
585 |     # Step 3: Further clean and refine data, and prepare for performance analysis.
586 |     clean_data = fill_and_clean(step2_data_path, 10, None,
587 |                                 120, 1e-5, 1e10, 0.1,
588 |                                 1e10, 0.1, 5, -5, 5, -5)
589 |     step3_data_path = './Dataset/Waymo/output/step3_motion.csv'
590 |     revise_traj_id(clean_data, step3_data_path, 0.1, 70, 0, 0)
591 |     step3_stat_result_path = f'./Dataset/Waymo/output/step3_analysis_motion'
592 |     analyze_statistics(step3_data_path, step3_stat_result_path)
593 | 
594 |     # Analysis
595 |     performance_result_path = './Dataset/Waymo/output/performance_metrics_motion.csv'
596 |     analyze_AV_performance(step3_data_path, performance_result_path)
597 |     scatter_plot_path = './Dataset/Waymo/output/scatter_motion'
598 |     draw_scatter(step3_data_path, scatter_plot_path)
599 | 
600 |     calibration_result_path = './Dataset/Waymo/output/calibration_motion.csv'
601 |     linear_regression = CFModelRegress(step3_data_path, 0.1)
602 |     linear_regression.main(calibration_result_path, 'linear')
603 | 
604 | 
605 | def Argoverse2():
606 |     # Step 1: Convert Vanderbilt dataset to a uniform car-following data format and analyze statistics.
607 |     directory_path = Path('./Dataset/Argoverse/data/val')
608 |     cf_trajectory_list = []
609 |     count = 1
610 |     for original_data_path in directory_path.rglob('*'):
611 |         original_data_path = str(original_data_path) + '/scenario_' + str(original_data_path)[-36:] + '.parquet'
612 |         cf_trajectory_list.append(original_data_path)
613 |         if len(cf_trajectory_list) == 25000:
614 |             print(f"recording {count * len(cf_trajectory_list)} files......")
615 |             original_data = Argo2_extract_df(cf_trajectory_list, 0)
616 |             cf_path = f'./Dataset/Argoverse/output/CF_trajectories_{count}.csv'
617 |             Argo2_extract_cf_traj(original_data, cf_path)
618 |             cf_trajectory_list = []
619 |             count += 1
620 | 
621 |     merge_data_list = []
622 |     for i in range(1, 10):
623 |         cf_path = f'./Dataset/Argoverse/output/step0_CF_trajectory_{i}.csv'
624 |         uniform_format_path = f'./Dataset/Argoverse/output/step1_{i}.csv'
625 |         Argoverse_convert_format(cf_path, uniform_format_path)
626 |         merge_data_list.append(uniform_format_path)
627 | 
628 |     merge_data_path = f'./Dataset/Argoverse/output/step1_merge.csv'
629 |     merge_data(merge_data_list, merge_data_path)
630 |     step1_stat_result_path = './Dataset/Argoverse/output/step1_analysis'
631 |     analyze_statistics(merge_data_path, step1_stat_result_path)
632 | 
633 |     # Step 2: Clean data and revise trajectory IDs for further analysis.
634 |     clean_data = fill_and_clean(merge_data_path, 10, [None, 10, None, 10, 10, None],
635 |                                 1e10, -1e10, 1e10, -1e10,
636 |                                 1e10, -1e10, 1e10, -1e10, 1e10, -1e10)
637 |     step2_data_path = f'./Dataset/Argoverse/output/step2.csv'
638 |     revise_traj_id(clean_data, step2_data_path, 0.1, 70, 0, 0)
639 |     step2_stat_result_path = './Dataset/Argoverse/output/step2_analysis'
640 |     analyze_statistics(step2_data_path, step2_stat_result_path)
641 | 
642 |     # Step 3: Further clean and refine data, and prepare for performance analysis.
643 |     clean_data = fill_and_clean(step2_data_path, 10, None,
644 |                                 120, 1e-5, 1e10, 0.1,
645 |                                 1e10, 0.1, 5, -5, 5, -5)
646 |     step3_data_path = f'./Dataset/Argoverse/output/step3.csv'
647 |     revise_traj_id(clean_data, step3_data_path, 0.1, 70, 0, 0)
648 |     step3_stat_result_path = './Dataset/Argoverse/output/step3_analysis'
649 |     analyze_statistics(step3_data_path, step3_stat_result_path)
650 | 
651 |     # Analysis
652 |     performance_result_path = './Dataset/Argoverse/output/performance_metrics.csv'
653 |     analyze_AV_performance(step3_data_path, performance_result_path)
654 |     scatter_plot_path = './Dataset/Argoverse/output/scatte'
655 |     draw_scatter(step3_data_path, scatter_plot_path)
656 | 
657 |     calibration_result_path = './Dataset/Argoverse/output/calibration.csv'
658 |     linear_regression = CFModelRegress(step3_data_path, 0.1)
659 |     linear_regression.main(calibration_result_path, 'linear')
660 | 
661 | 
662 | if __name__ == "__main__":
663 |     # Main entry point for the data extraction and analysis.
664 |     Vanderbilt_two_vehicle_ACC()
665 | 
666 |     MicroSimACC()
667 | 
668 |     CATS_ACC()
669 |     CATS_platoon()
670 |     CATS_UWM()
671 | 
672 |     OpenACC_Casale()
673 |     OpenACC_Vicolungo()
674 |     OpenACC_ASta()
675 |     OpenACC_ZalaZone()
676 | 
677 |     Ohio_single_vehicle()
678 |     Ohio_two_vehicle()
679 | 
680 |     Argoverse2()
681 | 
682 |     Waymo_perception()
683 |     Waymo_motion()
684 | 
685 |     # Draw performance metrics distribution for various datasets.
686 |     paths = [
687 |         './Dataset/Vanderbilt/output/performance_metrics_two_vehicle_ACC.csv',
688 |         './Dataset/MicroSimACC/output/performance_metrics.csv',
689 |         './Dataset/CATS/output/performance_metrics_ACC.csv',
690 |         './Dataset/CATS/output/performance_metrics_platoon.csv',
691 |         './Dataset/CATS/output/performance_metrics_UWM.csv',
692 |         './Dataset/OpenACC/output/performance_metrics_Casale.csv',
693 |         './Dataset/OpenACC/output/performance_metrics_Vicolungo.csv',
694 |         './Dataset/OpenACC/output/performance_metrics_ASta.csv',
695 |         './Dataset/OpenACC/output/performance_metrics_ZalaZone.csv',
696 |         './Dataset/Ohio/output/performance_metrics_single_vehicle.csv',
697 |         './Dataset/Ohio/output/performance_metrics_two_vehicle.csv',
698 |         './Dataset/Waymo/output/performance_metrics_perception.csv',
699 |         './Dataset/Waymo/output/performance_metrics_motion.csv',
700 |         './Dataset/Argoverse/output/performance_metrics.csv'
701 |     ]
702 |     output_path = './Dataset/performance_metrics_'
703 |     dataset_labels = ["Vanderbilt ACC", "MicroSimACC", "CATS ACC", "CATS Platoon", "CATS UWM",
704 |                       "OpenACC Casale", "OpenACC Vicolungo", "OpenACC ASta", "OpenACC ZalaZone",
705 |                       "Ohio Single", "Ohio Two", "Waymo Perception", "Waymo Motion", "Argoverse2"]
706 |     draw_2D_perfromance_metrics(paths[0:5], output_path + '1_', dataset_labels[0:5])
707 |     draw_2D_perfromance_metrics(paths[5:9], output_path + '2_', dataset_labels[5:9])
708 |     draw_2D_perfromance_metrics(paths[9:], output_path + '3_', dataset_labels[9:])
709 | 
710 |     # Compile statistics summary and draw distribution.
711 |     output_path = './Analysis/statistics_summary_'
712 |     merge_statistics_results(output_path)
713 |     output_path = './Analysis/step3_distribution.png'
714 |     draw_statistics_distribution(output_path)
715 | 
716 |     # Analyze label statistics and correlation
717 |     paths = [
718 |         './Dataset/Vanderbilt/output/step3_two_vehicle_ACC.csv',
719 |         './Dataset/MicroSimACC/output/step3.csv',
720 |         './Dataset/CATS/output/step3_ACC.csv',
721 |         './Dataset/CATS/output/step3_platoon.csv',
722 |         './Dataset/CATS/output/step3_UWM.csv',
723 |         './Dataset/OpenACC/output/step3_Casale.csv',
724 |         './Dataset/OpenACC/output/step3_Vicolungo.csv',
725 |         './Dataset/OpenACC/output/step3_ASta.csv',
726 |         './Dataset/OpenACC/output/step3_ZalaZone.csv',
727 |         './Dataset/Ohio/output/step3_single_vehicle.csv',
728 |         './Dataset/Ohio/output/step3_two_vehicle.csv',
729 |         './Dataset/Waymo/output/step3_perception.csv',
730 |         './Dataset/Waymo/output/step3_motion.csv',
731 |         './Dataset/Argoverse/output/step3.csv'
732 |     ]
733 |     output_path = f'./Analysis/labels_statistics_'
734 |     dataset_labels = ["Vanderbilt ACC", "MicroSimACC", "CATS ACC", "CATS Platoon", "CATS UWM",
735 |                       "OpenACC Casale", "OpenACC Vicolungo", "OpenACC ASta", "OpenACC ZalaZone",
736 |                       "Ohio Single", "Ohio Two", "Waymo Perception", "Waymo Motion", "Argoverse2"]
737 |     draw_2D_labels_statistics(paths[0:5], output_path + '1_', dataset_labels[0:5])
738 |     draw_2D_labels_statistics(paths[5:9], output_path + '2_', dataset_labels[5:9])
739 |     draw_2D_labels_statistics(paths[9:], output_path + '3_', dataset_labels[9:])
740 | 
741 |     # Analyze label statistics and calculate correlation.
742 |     output_path = './Analysis/correlation.csv'
743 |     correlation(output_path)
744 | 


--------------------------------------------------------------------------------
/Code/data_analysis.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from PIL import Image
  3 | import pandas as pd
  4 | import seaborn as sns
  5 | import matplotlib.pyplot as plt
  6 | 
  7 | 
  8 | def analyze_statistics(input_path, output_path, has_kde=True):
  9 |     """Analyze statistical data and generate histograms for selected columns."""
 10 |     df = pd.read_csv(input_path)
 11 | 
 12 |     # Define the columns for statistical analysis
 13 |     columns_to_describe = ['Speed_LV', 'Acc_LV', 'Speed_FAV', 'Acc_FAV', 'Spatial_Gap', 'Spatial_Headway']
 14 |     statistics = df[columns_to_describe].describe()
 15 |     statistics.to_csv(output_path + '_statistics.csv')
 16 | 
 17 |     # Define columns to generate histograms
 18 |     columns_to_check = ['Speed_FAV', 'Acc_FAV', 'Spatial_Gap', 'Speed_Diff']
 19 |     for column in columns_to_check:
 20 |         plt.figure(figsize=(18, 14))
 21 |         plt.tick_params(axis='x', labelsize=40)  # Set font size for x-axis labels
 22 |         plt.tick_params(axis='y', labelsize=40)  # Set font size for y-axis labels
 23 |         if column == 'Acc_FAV':
 24 |             plt.title(f'Distribution of $a$', fontsize=50)
 25 |             sns.histplot(df[column], kde=has_kde, color=(42 / 255, 157 / 255, 140 / 255), line_kws={'linewidth': 5})
 26 |             plt.xlabel('$a$ ($m/s^2$)', fontsize=50)
 27 |         elif column == 'Spatial_Gap':
 28 |             plt.title(f'Distribution of $s$', fontsize=50)
 29 |             sns.histplot(df[column], kde=has_kde, color=(233 / 255, 196 / 255, 107 / 255), line_kws={'linewidth': 5})
 30 |             plt.xlabel('$s$ ($m$)', fontsize=50)
 31 |         elif column == 'Speed_Diff':
 32 |             plt.title(f'Distribution of $\Delta v$', fontsize=50)
 33 |             sns.histplot(df[column], kde=has_kde, color=(230 / 255, 111 / 255, 81 / 255), line_kws={'linewidth': 5})
 34 |             plt.xlabel('$\Delta v$ ($m/s$)', fontsize=50)
 35 |         else:
 36 |             plt.title(f'Distribution of $v$', fontsize=50)
 37 |             sns.histplot(df[column], kde=has_kde, color=(75 / 255, 101 / 255, 175 / 255), line_kws={'linewidth': 5})
 38 |             plt.xlabel('$v$ ($m/s$)', fontsize=50)
 39 |         plt.ylabel('Frequency', fontsize=50)
 40 |         plt.savefig(output_path + "_" + column + '.png')
 41 |         plt.close()
 42 | 
 43 | 
 44 | def merge_statistics_results(output_path):
 45 |     name = ['Vanderbilt_two_vehicle_ACC',
 46 |             'MicroSimACC',
 47 |             'CATS_ACC',
 48 |             'CATS_platoon',
 49 |             'CATS_UW',
 50 |             'OpenACC_Casale',
 51 |             'OpenACC_Vicolungo',
 52 |             'OpenACC_ASta',
 53 |             'OpenACC_ZalaZone',
 54 |             'Ohio_single_vehicle',
 55 |             'Ohio_two_vehicle',
 56 |             'Waymo_perception',
 57 |             'Waymo_motion',
 58 |             'Argoverse2']
 59 |     input_path = [[
 60 |         './Dataset/Vanderbilt/output/',
 61 |         './Dataset/MicroSimACC/output/',
 62 |         './Dataset/CATS/output/',
 63 |         './Dataset/CATS/output/',
 64 |         './Dataset/CATS/output/',
 65 |         './Dataset/OpenACC/output/',
 66 |         './Dataset/OpenACC/output/',
 67 |         './Dataset/OpenACC/output/',
 68 |         './Dataset/OpenACC/output/',
 69 |         './Dataset/Ohio/output/',
 70 |         './Dataset/Ohio/output/',
 71 |         './Dataset/Waymo/output/',
 72 |         './Dataset/Waymo/output/',
 73 |         './Dataset/Argoverse/output/'
 74 |     ], [
 75 |         '_analysis_two_vehicle_ACC_statistics.csv',
 76 |         '_analysis_statistics.csv',
 77 |         '_analysis_ACC_statistics.csv',
 78 |         '_analysis_platoon_statistics.csv',
 79 |         '_analysis_UWM_statistics.csv',
 80 |         '_analysis_Casale_statistics.csv',
 81 |         '_analysis_Vicolungo_statistics.csv',
 82 |         '_analysis_ASta_statistics.csv',
 83 |         '_analysis_ZalaZone_statistics.csv',
 84 |         '_analysis_single_vehicle_statistics.csv',
 85 |         '_analysis_two_vehicle_statistics.csv',
 86 |         '_analysis_perception_statistics.csv',
 87 |         '_analysis_motion_statistics.csv',
 88 |         '_analysis_statistics.csv'
 89 |     ]]
 90 | 
 91 |     for step in ['step1', 'step2', 'step3']:
 92 | 
 93 |         temp_df = pd.DataFrame()
 94 |         for i in range(len(input_path[0])):
 95 |             file_path = input_path[0][i] + step + input_path[1][i]
 96 |             df = pd.read_csv(file_path, index_col=0)
 97 | 
 98 |             print(f"{name[i]}, {step} , {df.loc['count', 'Speed_LV']}")
 99 | 
100 |             rows_to_keep = ['mean', 'std', 'min', 'max']
101 |             df = df.loc[df.index.isin(rows_to_keep)]
102 | 
103 |             df = df.reset_index().melt(id_vars=['index'], var_name='variable', value_name='value')
104 | 
105 |             df.columns = ['Statistics', 'Variables', name[i]]
106 | 
107 |             if temp_df.empty:
108 |                 temp_df = df
109 |             else:
110 |                 temp_df = pd.concat([temp_df, df])
111 | 
112 |         temp_df = temp_df.groupby(['Variables', 'Statistics'], sort=False).sum().reset_index()
113 |         temp_df.to_csv(output_path + step + '.csv', index=False)
114 | 
115 | 
116 | def analyze_AV_performance(input_path, output_path):
117 |     df = pd.read_csv(input_path)
118 | 
119 |     # Define a function to calculate Time to Collision (TTC), which measures safety.
120 |     def calculate_TTC(space_gap, speed_diff):
121 |         if speed_diff >= 0:
122 |             return np.nan  # If relative speed is non-negative, TTC is not defined (no collision expected).
123 |         return -space_gap / speed_diff
124 | 
125 |     # Define a matrix for calculating the VT model, a vehicular dynamics model.
126 |     K_matrix = np.array([
127 |         [-7.537, 0.4438, 0.1716, -0.0420],
128 |         [0.0973, 0.0518, 0.0029, -0.0071],
129 |         [-0.003, -7.42e-04, 1.09e-04, 1.16e-04],
130 |         [5.3e-05, 6e-06, -1e-05, -6e-06]
131 |     ])
132 | 
133 |     # Calculate the VT model for fuel consumption and environmental impact.
134 |     def calculate_VT_model(v, a, K):
135 |         sum_j1_j2 = 0
136 |         for j1 in range(4):
137 |             for j2 in range(4):
138 |                 sum_j1_j2 += K[j1][j2] * (v ** j1) * (a ** j2)
139 |         F = np.exp(sum_j1_j2)
140 |         return F
141 | 
142 |     # Define the Vehicle Specific Power (VSP) model for vehicular dynamics.
143 |     def calculate_VSP(v, a):
144 |         return v * (1.1 * a + 0.132) + 3.02 * 10 ** (-4) * v ** 3
145 | 
146 |     def calculate_VSP_model(v, a):
147 |         VSP = calculate_VSP(v, a)
148 |         if VSP < -10:
149 |             return 2.48e-03
150 |         elif -10 <= VSP < 10:
151 |             return 1.98e-03 * VSP ** 2 + 3.97e-02 * VSP + 2.01e-01
152 |         else:
153 |             return 7.93e-02 * VSP + 2.48e-03
154 | 
155 |     # Define the ARRB model, another vehicular dynamics model.
156 |     def calculate_ARRB_model(v, a):
157 |         return (0.666 + 0.019 * v + 0.001 * v ** 2 + 0.0005 * v ** 3 + 0.122 * a + 0.793 * max(a, 0) ** 2)
158 | 
159 |     # Process each group of data by calculating TTC.
160 |     df['TTC'] = df.apply(lambda row: calculate_TTC(row['Spatial_Gap'], row['Speed_Diff']), axis=1)
161 | 
162 |     df['Time_Headway'] = df['Spatial_Headway'] / df['Speed_FAV']
163 | 
164 |     df['Acc_speed_squared_deviation'] = (df['Acc_FAV'] - df['Acc_FAV'].mean()) ** 2 / df['Speed_FAV'].mean()
165 |     df['Speed_squared_deviation'] = (df['Speed_FAV'] - df['Speed_FAV'].mean()) ** 2
166 |     df['Acc_squared_deviation'] = (df['Acc_FAV'] - df['Acc_FAV'].mean()) ** 2
167 | 
168 |     df['VT_micro_model'] = df.apply(
169 |         lambda row: calculate_VT_model(row['Speed_FAV'], row['Acc_FAV'], K_matrix), axis=1)
170 | 
171 |     df['VSP_model'] = df.apply(lambda row: calculate_VSP_model(row['Speed_FAV'], row['Acc_FAV']),
172 |                                axis=1) / 800
173 | 
174 |     df['ARRB_model'] = df.apply(lambda row: calculate_ARRB_model(row['Speed_FAV'], row['Acc_FAV']),
175 |                                 axis=1) / 1000
176 | 
177 |     df['Fuel_consumption'] = df[['VT_micro_model', 'VSP_model', 'ARRB_model']].mean(axis=1)
178 | 
179 |     df = df[['TTC', 'Time_Headway', 'Acc_squared_deviation', 'Acc_speed_squared_deviation',
180 |              'Speed_squared_deviation', 'Fuel_consumption', 'VT_micro_model', 'VSP_model', 'ARRB_model']]
181 |     df.to_csv(output_path, index=False)
182 | 
183 | 
184 | def draw_scatter(input_path, output_path):
185 |     df = pd.read_csv(input_path)
186 | 
187 |     df['Smoothed_Acc_FAV'] = df['Acc_FAV'].rolling(window=3).mean()
188 |     df.loc[2:, 'Acc_FAV'] = df.loc[2:, 'Smoothed_Acc_FAV']
189 | 
190 |     colors = [(42 / 255, 157 / 255, 140 / 255),
191 |               (233 / 255, 196 / 255, 107 / 255),
192 |               (230 / 255, 111 / 255, 81 / 255)]
193 | 
194 |     plt.figure(figsize=(8, 8))  # Set the size of the image here
195 |     plt.scatter(df['Spatial_Gap'], df['Acc_FAV'], color=colors[0], s=2)
196 |     # plt.title('Relationship of $a^{\mathrm{f}}$ and $d$', fontsize=40)
197 |     # plt.xlabel('$d$ ($\mathrm{m}$)', fontsize=30)
198 |     # plt.ylabel('$a^{\mathrm{f}}$ ($\mathrm{m}/\mathrm{s}^2$)', fontsize=30)
199 |     # plt.tick_params(axis='x', labelsize=30)
200 |     # plt.tick_params(axis='y', labelsize=30)
201 |     plt.xticks([])
202 |     plt.yticks([])
203 |     plt.tight_layout()
204 |     plt.savefig(output_path + f'_Spatial_Gap.png')  # _{name}
205 |     plt.close()
206 | 
207 |     plt.figure(figsize=(8, 8))  # Set the size of the image here
208 |     plt.scatter(df['Speed_FAV'], df['Acc_FAV'], color=colors[1], s=2)
209 |     # plt.title('Relationship of $a^{\mathrm{f}}$ and $v^{\mathrm{f}}$', fontsize=40)
210 |     # plt.xlabel('$v^{\mathrm{f}}$ ($\mathrm{m}/\mathrm{s}$)', fontsize=30)
211 |     # plt.ylabel('$a^{\mathrm{f}}$ ($\mathrm{m}/\mathrm{s}^2$)', fontsize=30)
212 |     # plt.tick_params(axis='x', labelsize=30)
213 |     # plt.tick_params(axis='y', labelsize=30)
214 |     plt.xticks([])
215 |     plt.yticks([])
216 |     plt.tight_layout()
217 |     plt.savefig(output_path + f'_Speed_FAV.png')  # _{name}
218 |     plt.close()
219 | 
220 |     plt.figure(figsize=(8, 8))  # Set the size of the image here
221 |     plt.scatter(df['Speed_Diff'], df['Acc_FAV'], color=colors[2], s=2)
222 |     # plt.title('Relationship of $a^{\mathrm{f}}$ and $\Delta v$', fontsize=40)
223 |     # plt.xlabel('$\Delta v$ ($\mathrm{m}/\mathrm{s}$)', fontsize=30)
224 |     # plt.ylabel('$a^{\mathrm{f}}$ ($\mathrm{m}/\mathrm{s}^2$)', fontsize=30)
225 |     # plt.tick_params(axis='x', labelsize=30)
226 |     # plt.tick_params(axis='y', labelsize=30)
227 |     plt.xticks([])
228 |     plt.yticks([])
229 |     plt.tight_layout()
230 |     plt.savefig(output_path + f'_Speed_Diff.png')  # _{name}
231 |     plt.close()
232 | 
233 | 
234 | 
235 | def draw_2D_perfromance_metrics(input_paths, output_path, dataset_labels):
236 |     columns = ['TTC', 'Time_Headway', 'Acc_squared_deviation', 'Fuel_consumption',]#
237 | 
238 |     colors = [
239 |         (250 / 251, 134 / 255, 0),
240 |         '#54B345',
241 |         '#05B9E2',
242 |         (231 / 255, 56 / 255, 71 / 255),
243 |         (131 / 255, 64 / 255, 38 / 255)
244 |     ]
245 | 
246 |     dfs = [pd.read_csv(path) for path in input_paths]
247 | 
248 |     for col in columns:
249 |         plt.figure(figsize=(10, 8))  # Set the size of the image here
250 | 
251 |         bins = None
252 |         if col == 'TTC':
253 |             plt.title(f'Distribution of $TTC$', fontsize=40)
254 |             bins = np.linspace(0, 250, 100)
255 |             x_label = '$TTC$ ($\mathrm{s}$)'
256 | 
257 |         if col == 'Time_Headway':
258 |             plt.title(r'Distribution of $\tau$', fontsize=40)
259 |             bins = np.linspace(0, 8, 500)
260 |             x_label = r'$\tau$ ($\mathrm{s}$)'
261 | 
262 |         if col == 'Acc_squared_deviation':
263 |             plt.title(r'Distribution of $\alpha$', fontsize=40)
264 |             bins = np.linspace(0, 0.4, 500)
265 |             x_label = r'$\alpha$ ($\mathrm{m}^2/\mathrm{s}^4$)'
266 | 
267 |         if col == 'Fuel_consumption':
268 |             plt.title(f'Distribution of $F$', fontsize=40)
269 |             bins = np.linspace(0, 0.01, 500)
270 |             x_label = '$F$ ($\mathrm{L}/\mathrm{s}$)'
271 | 
272 |         for i, df in enumerate(dfs):
273 |             data = df[col].dropna()
274 |             hist, edges = np.histogram(data, bins=bins, density=True)  #
275 |             x = (edges[:-1] + edges[1:]) / 2  # Compute the centers of histogram bins
276 | 
277 |             repeated_x = np.repeat(x, (hist * 1000).astype(int))  # Replicate data points based on their weights
278 | 
279 |             # Plot KDE using replicated data points
280 |             sns.kdeplot(repeated_x, fill=True, label=dataset_labels[i], color=colors[len(colors) - i - 1],
281 |                         alpha=0.4 - 0.05 * i, linewidth=2)
282 | 
283 |         plt.xlabel(x_label, fontsize=35)
284 |         plt.ylabel('Density', fontsize=35)
285 |         plt.tick_params(axis='both', which='major', labelsize=30)
286 |         plt.xlim(0, None)
287 |         if col == 'Fuel_consumption':
288 |             plt.ylim(None, 3000)
289 |         plt.tight_layout()
290 | 
291 |         plt.savefig(output_path + col + '.png')
292 |         plt.close()
293 | 
294 | 
295 | def draw_2D_labels_statistics(input_paths, output_path, dataset_labels,
296 |                               columns=['Speed_FAV', 'Acc_FAV', 'Spatial_Gap', 'Speed_Diff']):
297 |     colors = [
298 |         (250 / 251, 134 / 255, 0),
299 |         '#54B345',
300 |         '#05B9E2',
301 |         (231 / 255, 56 / 255, 71 / 255),
302 |         (131 / 255, 64 / 255, 38 / 255)
303 |     ]
304 |     dfs = [pd.read_csv(path) for path in input_paths]
305 | 
306 |     for col in columns:
307 |         plt.figure(figsize=(10, 8))  # Set the size of the image here
308 | 
309 |         if col == 'Speed_FAV':
310 |             plt.title('Distribution of $v^{\mathrm{f}}$', fontsize=40)
311 |             x_label = '$v^{\mathrm{f}}$ $(\mathrm{m}/\mathrm{s})$'
312 | 
313 |         if col == 'Acc_FAV':
314 |             plt.title('Distribution of $a^{\mathrm{f}}$', fontsize=40)
315 |             x_label = '$a^\mathrm{f}$ $(\mathrm{m}/\mathrm{s}^2)$'
316 | 
317 |         if col == 'Spatial_Gap':
318 |             plt.title(f'Distribution of $g$', fontsize=40)
319 |             x_label = '$g$ $(\mathrm{m})$'
320 | 
321 |         if col == 'Speed_Diff':
322 |             plt.title(f'Distribution of $\Delta v$', fontsize=40)
323 |             x_label = '$\Delta v$ $(\mathrm{m}/\mathrm{s})$'
324 | 
325 |         for i, df in enumerate(dfs):
326 |             data = df[col].dropna()
327 |             hist, edges = np.histogram(data, density=True)
328 |             x = (edges[:-1] + edges[1:]) / 2  # Compute the centers of histogram bins
329 | 
330 |             repeated_x = np.repeat(x, (hist * 1000).astype(int))  # Replicate data points based on their weights
331 | 
332 |             # Plot KDE using replicated data points
333 |             sns.kdeplot(repeated_x, fill=True, label=dataset_labels[i], color=colors[len(colors) - i - 1],
334 |                         alpha=0.5 - 0.07 * i, linewidth=2)
335 | 
336 |         plt.xlabel(x_label, fontsize=35)
337 |         plt.ylabel('Density', fontsize=35)
338 |         plt.tick_params(axis='both', which='major', labelsize=30)
339 |         # plt.legend(loc='upper right', fontsize=30)
340 | 
341 |         if col == 'Speed_FAV' or col == 'Spatial_Gap':
342 |             plt.xlim(0, None)
343 | 
344 |         plt.tight_layout()
345 |         plt.savefig(output_path + col + '.png')
346 |         plt.close()
347 | 
348 | 
349 | def draw_statistics_distribution(output_path):
350 |     def crop_center(img):
351 |         img_width, img_height = img.size
352 |         return img.crop((100,
353 |                          160,
354 |                          img_width - 160,
355 |                          img_height - 90))
356 | 
357 |     img_paths = [
358 |         './Dataset/Vanderbilt/output/step3_analysis_two_vehicle_ACC_',
359 |         './Dataset/MicroSimACC/output/step3_analysis_',
360 |         './Dataset/CATS/output/step3_analysis_ACC_',
361 |         './Dataset/CATS/output/step3_analysis_platoon_',
362 |         './Dataset/CATS/output/step3_analysis_UWM_',
363 |         './Dataset/OpenACC/output/step3_analysis_Casale_',
364 |         './Dataset/OpenACC/output/step3_analysis_Vicolungo_',
365 |         './Dataset/OpenACC/output/step3_analysis_ASta_',
366 |         './Dataset/OpenACC/output/step3_analysis_ZalaZone_',
367 |         './Dataset/Ohio/output/step3_analysis_single_vehicle_',
368 |         './Dataset/Ohio/output/step3_analysis_two_vehicle_',
369 |         './Dataset/Waymo/output/step3_analysis_perception_',
370 |         './Dataset/Waymo/output/step3_analysis_motion_',
371 |         './Dataset/Argoverse/output/step3_analysis_'
372 |     ]
373 | 
374 |     row_images = []
375 | 
376 |     for i in range(len(img_paths)):
377 |         column_images = []
378 |         for var in ['Acc_FAV', 'Spatial_Gap', 'Speed_FAV', 'Speed_Diff']:
379 |             column_images.append(Image.open(img_paths[i] + var + '.png'))
380 | 
381 |         column_images = [crop_center(img) for img in column_images]
382 | 
383 |         total_height = sum(img.height for img in column_images)
384 |         max_width = max(img.width for img in column_images)
385 | 
386 |         combined_image_vertical = Image.new('RGB', (max_width, total_height))
387 | 
388 |         y_offset = 0
389 |         for img in column_images:
390 |             combined_image_vertical.paste(img, (0, y_offset))
391 |             y_offset += img.height
392 | 
393 |         row_images.append(combined_image_vertical)
394 | 
395 |     total_width = sum(img.width for img in row_images)
396 |     max_height = max(img.height for img in row_images)
397 | 
398 |     combined_image_horizontal = Image.new('RGB', (total_width, max_height))
399 | 
400 |     # Paste the images into the new image
401 |     x_offset = 0
402 |     for img in row_images:
403 |         combined_image_horizontal.paste(img, (x_offset, 0))
404 |         x_offset += img.width
405 | 
406 |     combined_image_horizontal.save(output_path)
407 | 
408 | 
409 | def correlation(output_path):
410 |     input_paths = [
411 |         './Dataset/Vanderbilt/output/step3_two_vehicle_ACC.csv',
412 |         './Dataset/MicroSimACC/output/step3.csv',
413 |         './Dataset/CATS/output/step3_ACC.csv',
414 |         './Dataset/CATS/output/step3_platoon.csv',
415 |         './Dataset/CATS/output/step3_UWM.csv',
416 |         './Dataset/OpenACC/output/step3_Casale.csv',
417 |         './Dataset/OpenACC/output/step3_Vicolungo.csv',
418 |         './Dataset/OpenACC/output/step3_ASta.csv',
419 |         './Dataset/OpenACC/output/step3_ZalaZone.csv',
420 |         './Dataset/Ohio/output/step3_single_vehicle.csv',
421 |         './Dataset/Ohio/output/step3_two_vehicle.csv',
422 |         './Dataset/Waymo/output/step3_perception.csv',
423 |         './Dataset/Waymo/output/step3_motion.csv',
424 |         './Dataset/Argoverse/output/step3.csv'
425 |     ]
426 | 
427 |     all_data = []
428 | 
429 |     for i, path in enumerate(input_paths):
430 |         df = pd.read_csv(path)
431 |         pearson_corr = df[['Acc_FAV', 'Spatial_Gap', 'Speed_FAV', 'Speed_Diff']].corr()
432 |         spearman_corr = df[['Acc_FAV', 'Spatial_Gap', 'Speed_FAV', 'Speed_Diff']].corr(method='spearman')
433 |         corr_data = {
434 |             'ID': i + 1,
435 |             'Pearson_Spatial_Gap': pearson_corr.at['Acc_FAV', 'Spatial_Gap'],
436 |             'Spearman_Spatial_Gap': spearman_corr.at['Acc_FAV', 'Spatial_Gap'],
437 |             'Pearson_Speed_FAV': pearson_corr.at['Acc_FAV', 'Speed_FAV'],
438 |             'Spearman_Speed_FAV': spearman_corr.at['Acc_FAV', 'Speed_FAV'],
439 |             'Pearson_Speed_Diff': pearson_corr.at['Acc_FAV', 'Speed_Diff'],
440 |             'Spearman_Speed_Diff': spearman_corr.at['Acc_FAV', 'Speed_Diff']
441 |         }
442 |         all_data.append(corr_data)
443 | 
444 |     combined_data = pd.DataFrame(all_data)
445 |     combined_data.to_csv(output_path, index=False)
446 | 
447 | def check_row_number():
448 |     paths = [
449 |         './Dataset/Vanderbilt/output/step2_two_vehicle_ACC.csv',
450 |         './Dataset/CATS/output/step2_ACC.csv',
451 |         './Dataset/CATS/output/step2_platoon.csv',
452 |         './Dataset/CATS/output/step2_UWM.csv',
453 |         './Dataset/OpenACC/output/step2_Casale.csv',
454 |         './Dataset/OpenACC/output/step2_Vicolungo.csv',
455 |         './Dataset/OpenACC/output/step2_ASta.csv',
456 |         './Dataset/OpenACC/output/step2_ZalaZone.csv',
457 |         './Dataset/Ohio/output/step2_single_vehicle.csv',
458 |         './Dataset/Ohio/output/step2_two_vehicle.csv',
459 |         './Dataset/Waymo/output/step2_perception.csv',
460 |         './Dataset/Waymo/output/step2_motion.csv',
461 |         './Dataset/Argoverse/output/step2.csv'
462 |     ]
463 | 
464 |     total_rows = 0
465 | 
466 |     for path in paths:
467 |         try:
468 |             df = pd.read_csv(path)
469 |             total_rows += len(df)
470 |         except Exception as e:
471 |             print(f"Error reading {path}: {e}")
472 | 
473 |     print(f"Total rows across all CSV files: {total_rows}")


--------------------------------------------------------------------------------
/Code/data_cleaning.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import matplotlib as mpl
  3 | import numpy as np
  4 | 
  5 | def fill_and_clean(input_path, linear_fill_in, outlier,
  6 |                    space_gap_upper, space_gap_lower, speed_FAV_upper, speed_FAV_lower,
  7 |                    speed_LV_upper, speed_LV_lower, acc_FAV_upper, acc_FAV_lower,
  8 |                    acc_LV_upper, acc_LV_lower):
  9 |     df = pd.read_csv(input_path)
 10 | 
 11 |     # Remove non-following mode data based on defined boundaries for each variable.
 12 |     df.loc[~(df['Spatial_Gap'].between(space_gap_lower, space_gap_upper)), 'Spatial_Gap'] = np.nan
 13 |     df.loc[~(df['Speed_FAV'].between(speed_FAV_lower, speed_FAV_upper)), 'Speed_FAV'] = np.nan
 14 |     df.loc[~(df['Acc_FAV'].between(acc_FAV_lower, acc_FAV_upper)), 'Acc_FAV'] = np.nan
 15 |     df.loc[~(df['Speed_LV'].between(speed_LV_lower, speed_LV_upper)), 'Speed_LV'] = np.nan
 16 |     df.loc[~(df['Acc_LV'].between(acc_LV_lower, acc_LV_upper)), 'Acc_LV'] = np.nan
 17 | 
 18 |     # Replace infinities with NaN
 19 |     df.replace([np.inf, -np.inf], np.nan, inplace=True)
 20 | 
 21 |     rows_to_delete = set()
 22 |     columns = ['Speed_FAV', 'Acc_FAV', 'Speed_LV', 'Acc_LV', 'Spatial_Gap', 'Speed_Diff']
 23 |     for i in range(len(columns)):
 24 |         column = columns[i]
 25 |         # Remove values beyond ±3 standard deviations iteratively until no changes are needed.
 26 |         if outlier is not None and outlier[i] is not None:
 27 |             while True:
 28 |                 mean = df[column].mean(skipna=True)
 29 |                 std = df[column].std(skipna=True)
 30 |                 outliers_condition = (df[column] < mean - outlier[i] * std) | (df[column] > mean + outlier[i] * std)
 31 |                 if not df.loc[outliers_condition, column].empty:
 32 |                     df.loc[outliers_condition, column] = np.nan
 33 |                 else:
 34 |                     break
 35 | 
 36 |         # Delete groups with excessive missing data.
 37 |         if linear_fill_in > 0:
 38 |             is_na = df[column].isna()
 39 |             na_groups = is_na.ne(is_na.shift()).cumsum()
 40 |             na_groups_sizes = na_groups[is_na].value_counts()
 41 |             groups_to_delete = na_groups_sizes[na_groups_sizes > linear_fill_in].index
 42 |             rows_to_delete.update(na_groups[na_groups.isin(groups_to_delete)].index)
 43 | 
 44 |     # Remove rows identified in the previous step
 45 |     df.drop(index=rows_to_delete, inplace=True)
 46 |     # Perform linear interpolation within each trajectory group
 47 |     df.groupby('Trajectory_ID').apply(lambda group: group.interpolate(method='linear')).reset_index(drop=True)
 48 |     # Drop rows that still contain NaNs after interpolation
 49 |     df.dropna(inplace=True)
 50 | 
 51 |     return df
 52 | 
 53 | def revise_traj_id(df, output_path, time_step, fill_row_num, fill_start, fill_end, update_time=True):
 54 |     # Filter out trajectories shorter than a specified length.
 55 |     df = df.groupby('Trajectory_ID').filter(lambda x: len(x) >= fill_row_num)
 56 | 
 57 |     # Update time indices to ensure continuity if specified.
 58 |     if update_time:
 59 |         current_traj_ID = 0
 60 |         current_time_ID = 0
 61 |         df['Trajectory_ID'] = current_traj_ID
 62 |         df['new_Time_Index'] = current_time_ID
 63 | 
 64 |         previous_time_ID = df.iloc[0]['Time_Index'] - time_step
 65 |         for index, row in df.iterrows():
 66 |             if index > 0 and abs(row['Time_Index'] - previous_time_ID) > time_step + 1e-5:
 67 |                 current_traj_ID += 1
 68 |                 current_time_ID = 0
 69 |             else:
 70 |                 current_time_ID += time_step
 71 | 
 72 |             df.at[index, 'Trajectory_ID'] = current_traj_ID
 73 |             df.at[index, 'new_Time_Index'] = current_time_ID
 74 |             previous_time_ID = row['Time_Index']
 75 | 
 76 |         df['Time_Index'] = df['new_Time_Index']
 77 |         df.drop(columns=['new_Time_Index'], inplace=True)
 78 | 
 79 |     # Again filter trajectories that are too short.
 80 |     df = df.groupby('Trajectory_ID').filter(lambda x: len(x) >= fill_row_num)
 81 | 
 82 |     # Remove unstable trajectory sections.
 83 |     indices_to_remove = []
 84 |     for Trajectory_ID, group in df.groupby('Trajectory_ID'):
 85 |         indices_max = group.nlargest(fill_end, 'Time_Index').index
 86 |         indices_min = group.nsmallest(fill_start, 'Time_Index').index
 87 |         indices_to_remove.extend(indices_max)
 88 |         indices_to_remove.extend(indices_min)
 89 |     df.drop(indices_to_remove, inplace=True)
 90 |     df = df.reset_index(drop=True)
 91 |     df['Time_Index'] -= fill_start * time_step
 92 | 
 93 |     # Adjust positions relative to the start of each trajectory.
 94 |     def adjust_positions(group):
 95 |         first_Pos_FAV = group['Pos_FAV'].iloc[0]
 96 |         group['Pos_FAV'] -= first_Pos_FAV
 97 |         group['Pos_LV'] -= first_Pos_FAV
 98 |         return group
 99 | 
100 |     df = df.groupby('Trajectory_ID').apply(adjust_positions)
101 | 
102 |     # Update trajectory IDs to ensure continuity.
103 |     unique_Trajectory_IDs = df['Trajectory_ID'].unique()
104 |     Trajectory_ID_mapping = {old_id: new_id for new_id, old_id in enumerate(unique_Trajectory_IDs)}
105 |     df['Trajectory_ID'] = df['Trajectory_ID'].map(Trajectory_ID_mapping)
106 | 
107 |     df.to_csv(output_path, index=False)
108 | 
109 | def merge_data(merge_data_list, output_path):
110 |     df_list = []
111 |     max_value = 0
112 |     for path in merge_data_list:
113 |         df = pd.read_csv(path)
114 |         if not df.empty:
115 |             df['Trajectory_ID'] += max_value
116 |             max_value = df['Trajectory_ID'].max() + 1
117 |             df_list.append(df)
118 |     merged = pd.concat(df_list)
119 |     merged.to_csv(output_path, index=False)
120 | 


--------------------------------------------------------------------------------
/Code/data_transformation.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | from geopy.distance import geodesic
  3 | import numpy as np
  4 | 
  5 | default_vehicle_length = 4.5
  6 | 
  7 | 
  8 | def Vanderbilt_convert_format(input_path, output_path):
  9 |     data = pd.read_csv(input_path, header=None)
 10 | 
 11 |     data.insert(0, 'Trajectory_ID', 0)
 12 |     for idx in range(3):
 13 |         data.insert(2 + idx, f'col_1{idx}', 0)
 14 |     for idx in range(3):
 15 |         data.insert(6 + idx, f'col_2{idx}', 0)
 16 |     for idx in range(2):
 17 |         data.insert(12 + idx, f'col_3{idx}', 0)
 18 | 
 19 |     data.columns = ['Trajectory_ID', 'Time_Index',
 20 |                     'ID_LV', 'Type_LV', 'Pos_LV', 'Speed_LV', 'Acc_LV',
 21 |                     'ID_FAV', 'Pos_FAV', 'Speed_FAV', 'Acc_FAV',
 22 |                     'Spatial_Gap', 'Spatial_Headway', 'Speed_Diff']
 23 | 
 24 |     temp = data['Speed_LV'].copy()
 25 |     data['Speed_LV'] = data['Speed_FAV']
 26 |     data['Speed_FAV'] = temp
 27 |     temp = data['Acc_FAV'].copy()
 28 |     data['Acc_FAV'] = data['Spatial_Gap']
 29 |     data['Spatial_Gap'] = temp
 30 | 
 31 |     data['ID_LV'] = -1
 32 |     data['Type_LV'] = 0
 33 |     data['ID_FAV'] = 0
 34 | 
 35 |     data['Speed_Diff'] = data['Speed_LV'] - data['Speed_FAV']
 36 |     data['Spatial_Headway'] = data['Spatial_Gap'] + default_vehicle_length
 37 | 
 38 |     data['Acc_LV'] = ((data['Speed_LV'] - data['Speed_LV'].shift(1)) / 0.1).shift(-1)
 39 | 
 40 |     average_speed = (data['Speed_FAV'] + data['Speed_FAV'].shift(1)) / 2
 41 |     data['Pos_FAV'] = (0.1 * average_speed).cumsum()
 42 |     data.loc[0, 'Pos_FAV'] = 0
 43 |     data['Pos_LV'] = data['Pos_FAV'] + data['Spatial_Headway']
 44 |     data = data.iloc[:-1]
 45 | 
 46 |     data.to_csv(output_path, index=False)
 47 | 
 48 | 
 49 | def MicroSimACC_convert_format(input_path, output_path):
 50 |     data = pd.read_csv(input_path)
 51 | 
 52 |     data = data.dropna()
 53 |     new_column_order = ['timestamps', 'Smoothed_speed1', 'Smoothed_acceleration1', 'Smoothed_speed2',
 54 |                         'Smoothed_acceleration2', 'Difference']
 55 | 
 56 |     data['Smoothed_speed1']=data['Smoothed_speed1'] * 0.44704 # mph to m/s
 57 |     data['Smoothed_speed2'] = data['Smoothed_speed2'] * 0.44704  # mph to m/s
 58 | 
 59 |     data = data[new_column_order]
 60 |     data = data.reset_index(drop=True)
 61 | 
 62 |     data.insert(0, 'Trajectory_ID', 0)
 63 |     for idx in range(3):
 64 |         data.insert(2 + idx, f'col_1{idx}', 0)
 65 |     for idx in range(2):
 66 |         data.insert(7 + idx, f'col_2{idx}', 0)
 67 |     data.insert(11, f'col_3', 0)
 68 |     data.insert(13, f'col_4', 0)
 69 | 
 70 |     data.columns = ['Trajectory_ID', 'Time_Index',
 71 |                     'ID_LV', 'Type_LV', 'Pos_LV', 'Speed_LV', 'Acc_LV',
 72 |                     'ID_FAV', 'Pos_FAV', 'Speed_FAV', 'Acc_FAV',
 73 |                     'Spatial_Gap', 'Spatial_Headway', 'Speed_Diff']
 74 | 
 75 |     data['Time_Index'] = np.arange(0, len(data) * 0.2, 0.2)[:len(data)]
 76 | 
 77 |     data['ID_LV'] = -1
 78 |     data['Type_LV'] = 0
 79 |     data['ID_FAV'] = 0
 80 | 
 81 |     data['Speed_Diff'] = data['Speed_LV'] - data['Speed_FAV']
 82 |     data['Spatial_Gap'] = data['Spatial_Headway'] - default_vehicle_length
 83 | 
 84 |     data['Acc_LV'] = ((data['Speed_LV'] - data['Speed_LV'].shift(1)) / 0.1).shift(-1)
 85 |     data['Acc_FAV'] = ((data['Speed_FAV'] - data['Speed_FAV'].shift(1)) / 0.1).shift(-1)
 86 | 
 87 |     average_speed = (data['Speed_FAV'] + data['Speed_FAV'].shift(1)) / 2
 88 |     data['Pos_FAV'] = (0.2 * average_speed).cumsum()
 89 |     data.loc[0, 'Pos_FAV'] = 0
 90 |     data['Pos_LV'] = data['Pos_FAV'] + data['Spatial_Headway']
 91 |     data = data.iloc[:-1]
 92 | 
 93 |     data.to_csv(output_path, index=False)
 94 | 
 95 | 
 96 | def CATSACC_convert_format(input_path, output_path):
 97 |     column_names = ['id', 'time', 'lon', 'lat', 'speed']
 98 |     all_sheets = pd.read_excel(input_path, sheet_name=None, header=None, names=column_names, engine='openpyxl')
 99 | 
100 |     def process_time_string(time_str):
101 |         return float(time_str[5:])
102 | 
103 |     earliest_times = []
104 |     for sheet_name, sheet in all_sheets.items():
105 |         earliest_time_str = sheet['time'].iloc[0]
106 |         earliest_time = process_time_string(earliest_time_str)
107 |         earliest_times.append(earliest_time)
108 |     start_time = min(earliest_times)
109 | 
110 |     df_list = []
111 |     for i in range(2):
112 |         sheet1, sheet2 = list(all_sheets.values())[i:i + 2]
113 | 
114 |         sheet1 = sheet1.drop(columns=['id'])
115 |         sheet2 = sheet2.drop(columns=['id'])
116 | 
117 |         df = pd.merge(sheet1, sheet2, on='time')
118 | 
119 |         def calculate_distance(row):
120 |             location1 = (row['lat_x'], row['lon_x'])
121 |             location2 = (row['lat_y'], row['lon_y'])
122 |             return geodesic(location1, location2).kilometers * 1000
123 | 
124 |         df['Spatial_Headway'] = df.apply(calculate_distance, axis=1)
125 |         df.drop(columns=['lat_x', 'lat_y', 'lon_x', 'lon_y'], inplace=True)
126 |         df['time'] = df['time'].apply(lambda t: (process_time_string(t) - start_time))
127 | 
128 |         if i == 0:
129 |             df.insert(1, 'Id_l', -1)
130 |             df.insert(3, 'Id_f', 0)
131 |             df.insert(0, 'Trajectory_ID', 0)
132 |         else:
133 |             df.insert(1, 'Id_l', 0)
134 |             df.insert(3, 'Id_f', 1)
135 |             df.insert(0, 'Trajectory_ID', 1)
136 | 
137 |         df_list.append(df)
138 |     data = pd.concat(df_list)
139 | 
140 |     data.insert(3, f'type_l', 0)
141 |     data.insert(4, f'pos_l', 0)
142 |     data.insert(6, f'acc_l', 0)
143 |     data.insert(8, f'pos_f', 0)
144 |     data.insert(10, f'acc', 0)
145 |     data.insert(11, f'space_gap', 0)
146 |     data.insert(13, f's_diff', 0)
147 | 
148 |     data.columns = ['Trajectory_ID', 'Time_Index',
149 |                     'ID_LV', 'Type_LV', 'Pos_LV', 'Speed_LV', 'Acc_LV',
150 |                     'ID_FAV', 'Pos_FAV', 'Speed_FAV', 'Acc_FAV',
151 |                     'Spatial_Gap', 'Spatial_Headway', 'Speed_Diff']
152 | 
153 |     data['Type_LV'] = data['ID_LV'].apply(lambda x: 1 if x in [0, 1] else 0)
154 |     data['Speed_Diff'] = data['Speed_LV'] - data['Speed_FAV']
155 |     data['Spatial_Gap'] = data['Spatial_Headway'] - 4.92
156 | 
157 |     data = data.reset_index(drop=True)
158 | 
159 |     data['Acc_LV'] = ((data['Speed_LV'] - data['Speed_LV'].shift(1)) / 0.1).shift(-1)
160 |     data['Acc_FAV'] = ((data['Speed_FAV'] - data['Speed_FAV'].shift(1)) / 0.1).shift(-1)
161 | 
162 |     average_speed = (data['Speed_FAV'] + data['Speed_FAV'].shift(1)) / 2
163 |     data['Pos_FAV'] = (0.1 * average_speed).cumsum()
164 |     data.loc[0, 'Pos_FAV'] = 0
165 |     data['Pos_LV'] = data['Pos_FAV'] + data['Spatial_Headway']
166 |     data = data.iloc[:-1]
167 | 
168 |     data['Time_Index'] -= data['Time_Index'].min()
169 | 
170 |     data.to_csv(output_path, index=False)
171 | 
172 | 
173 | def CATSPlatoon_convert_format(input_path, output_path):
174 |     def merge_sheets(file_path):
175 |         column_names = ['id', 'time', 'lat', 'lon', 'speed']
176 |         xls = pd.ExcelFile(file_path)
177 |         dfs = []
178 |         for sheet_name in xls.sheet_names:
179 |             df = xls.parse(sheet_name)
180 |             df.columns = column_names
181 |             df.drop(columns=['id'], inplace=True)
182 |             dfs.append(df)
183 |         return pd.concat(dfs, ignore_index=True)
184 | 
185 |     df_leading = merge_sheets(input_path + "/Leading.xlsx")
186 |     df_mid = merge_sheets(input_path + "/Black-Mid.xlsx")
187 |     df_last = merge_sheets(input_path + "/Red-Last.xlsx")
188 | 
189 |     def process_time_string(time_str):
190 |         return float(time_str[5:])
191 | 
192 |     earliest_times = []
193 |     for sheet in [df_leading, df_mid, df_last]:
194 |         earliest_time_str = sheet['time'].iloc[0]
195 |         earliest_time = process_time_string(earliest_time_str)
196 |         earliest_times.append(earliest_time)
197 |     start_time = min(earliest_times)
198 | 
199 |     df_traj1 = pd.merge(df_leading, df_mid, on='time')
200 |     df_traj2 = pd.merge(df_mid, df_last, on='time')
201 | 
202 |     count = 0
203 |     for df in [df_traj1, df_traj2]:
204 |         def calculate_distance(row):
205 |             location1 = (row['lat_x'], row['lon_x'])
206 |             location2 = (row['lat_y'], row['lon_y'])
207 |             return geodesic(location1, location2).kilometers * 1000
208 | 
209 |         df['Spatial_Headway'] = df.apply(calculate_distance, axis=1)
210 |         df.drop(columns=['lat_x', 'lat_y', 'lon_x', 'lon_y'], inplace=True)
211 |         df['time'] = df['time'].apply(lambda t: (process_time_string(t) - start_time))
212 | 
213 |         if count == 0:
214 |             df.insert(1, 'Id_l', -1)
215 |             df.insert(3, 'Id_f', 0)
216 |             df.insert(0, 'Trajectory_ID', 0)
217 |         else:
218 |             df.insert(1, 'Id_l', 0)
219 |             df.insert(3, 'Id_f', 1)
220 |             df.insert(0, 'Trajectory_ID', 1)
221 |         count += 1
222 | 
223 |     data = pd.concat([df_traj1, df_traj2])
224 |     
225 |     data.insert(3, f'type_l', 0)
226 |     data.insert(4, f'pos_l', 0)
227 |     data.insert(6, f'acc_l', 0)
228 |     data.insert(8, f'pos_f', 0)
229 |     data.insert(10, f'acc', 0)
230 |     data.insert(11, f'space_gap', 0)
231 |     data.insert(13, f's_diff', 0)
232 | 
233 |     data.columns = ['Trajectory_ID', 'Time_Index',
234 |                     'ID_LV', 'Type_LV', 'Pos_LV', 'Speed_LV', 'Acc_LV',
235 |                     'ID_FAV', 'Pos_FAV', 'Speed_FAV', 'Acc_FAV',
236 |                     'Spatial_Gap', 'Spatial_Headway', 'Speed_Diff']
237 | 
238 |     data['Type_LV'] = data['ID_LV'].apply(lambda x: 1 if x in [0, 1] else 0)
239 |     data['Speed_Diff'] = data['Speed_LV'] - data['Speed_FAV']
240 |     data['Spatial_Gap'] = data['Spatial_Headway'] - 4.92
241 |     
242 | 
243 |     data = data.reset_index(drop=True)
244 | 
245 |     data['Acc_LV'] = ((data['Speed_LV'] - data['Speed_LV'].shift(1)) / 1).shift(-1)
246 |     data['Acc_FAV'] = ((data['Speed_FAV'] - data['Speed_FAV'].shift(1)) / 1).shift(-1)
247 | 
248 |     average_speed = (data['Speed_FAV'] + data['Speed_FAV'].shift(1)) / 2
249 |     data['Pos_FAV'] = (1 * average_speed).cumsum()
250 |     data.loc[0, 'Pos_FAV'] = 0
251 |     data['Pos_LV'] = data['Pos_FAV'] + data['Spatial_Headway']
252 |     data = data.iloc[:-1]
253 | 
254 |     data['Time_Index'] -= data['Time_Index'].min()
255 | 
256 |     data.to_csv(output_path, index=False)
257 | 
258 | 
259 | def CATSUW_convert_format(input_path, output_path):
260 |     data = pd.read_csv(input_path)
261 | 
262 |     new_column_order = ['time', 'leader_p', 'leader_v', 'follower_p', 'follower_v']
263 |     data = data[new_column_order]
264 | 
265 |     data.insert(0, 'Trajectory_ID', 0)
266 |     data.insert(2, 'ID_LV', -1)
267 |     data.insert(3, 'Type_LV', -1)
268 |     for idx in range(2):
269 |         data.insert(6 + idx, f'col_2{idx}', 0)
270 |     data.insert(10, f'acc', 0)
271 |     data.insert(11, 'spacing', 0)
272 |     data.insert(12, 'space_headway', 0)
273 |     data.insert(13, 'speed_diff', 0)
274 | 
275 |     data.columns = ['Trajectory_ID', 'Time_Index',
276 |                     'ID_LV', 'Type_LV', 'Pos_LV', 'Speed_LV', 'Acc_LV',
277 |                     'ID_FAV', 'Pos_FAV', 'Speed_FAV', 'Acc_FAV',
278 |                     'Spatial_Gap', 'Spatial_Headway', 'Speed_Diff']
279 | 
280 |     data['ID_LV'] = -1
281 |     data['Type_LV'] = 0
282 |     data['ID_FAV'] = 0
283 | 
284 |     data.to_csv(output_path, index=False)
285 | 
286 |     data['Spatial_Gap'] = data['Pos_LV'] - data['Pos_FAV']
287 |     data['Speed_Diff'] = data['Speed_LV'] - data['Speed_FAV']
288 |     data['Spatial_Headway'] = data['Spatial_Gap'] + 4.92
289 | 
290 |     data['Acc_LV'] = ((data['Speed_LV'] - data['Speed_LV'].shift(1)) / 0.1).shift(-1)
291 |     data['Acc_FAV'] = ((data['Speed_FAV'] - data['Speed_FAV'].shift(1)) / 0.1).shift(-1)
292 | 
293 |     data.to_csv(output_path, index=False)
294 | 
295 | 
296 | def OpenACC_convert_format(input_path, output_path, id_map):
297 |     df = pd.read_csv(input_path, header=None, skiprows=1, nrows=1)
298 |     id_row = df.values.tolist()[0][1:]
299 |     id_row = [x for x in id_row if not pd.isna(x)]
300 |     vehicle_ids = list(map(lambda x: id_map.get(x, x), id_row))
301 |     vehicle_num = len(vehicle_ids)
302 | 
303 |     data = pd.read_csv(input_path, skiprows=5)
304 | 
305 |     df_list = []
306 |     for i in range(vehicle_num):
307 |         if all(column in data.columns for column in [f'Speed{i}', f'Speed{i + 1}', f'IVS{i}']):
308 |             if f'Driver{i + 1}' in data.columns and f'Driver{i}' in data.columns:
309 |                 df = data[['Time', f'Driver{i}', f'Speed{i}', f'Speed{i + 1}', f'IVS{i}', f'Driver{i + 1}']].copy()
310 |                 df.columns = ['Time', 'Driver_l', 'Speed_l', 'Speed_f', 'IVS', 'Driver_f']
311 |                 df = df[df['Driver_f'] != 'Human']
312 |                 df.drop(columns=['Driver_f'], inplace=True)
313 |                 df['Driver_l'] = df['Driver_l'].replace({'Human': 0, 'ACC': 1})
314 |                 df.insert(1, 'Id_l', vehicle_ids[i - 1])
315 |                 df.insert(4, 'Id_f', vehicle_ids[i])
316 |                 df.insert(0, 'Trajectory_ID', i - 1)
317 |                 df_list.append(df)
318 |             else:
319 |                 df = data[['Time', f'Speed{i}', f'Speed{i + 1}', f'IVS{i}']].copy()
320 |                 df.columns = ['Time', 'Speed_l', 'Speed_f', 'IVS']
321 |                 df.insert(1, 'Id_l', vehicle_ids[i - 1])
322 |                 df.insert(2, 'Driver_l', 1)
323 |                 df.insert(4, 'Id_f', vehicle_ids[i])
324 |                 df.insert(0, 'Trajectory_ID', i - 1)
325 |                 df_list.append(df)
326 | 
327 |     data = pd.concat(df_list)
328 | 
329 |     data.insert(4, f'pos_l', 0)
330 |     data.insert(6, f'acc_l', 0)
331 |     data.insert(8, f'pos_f', 0)
332 |     data.insert(10, f'acc', 0)
333 |     data.insert(12, f'space_headway', 0)
334 |     data.insert(13, f's_diff', 0)
335 | 
336 |     data.columns = ['Trajectory_ID', 'Time_Index',
337 |                     'ID_LV', 'Type_LV', 'Pos_LV', 'Speed_LV', 'Acc_LV',
338 |                     'ID_FAV', 'Pos_FAV', 'Speed_FAV', 'Acc_FAV',
339 |                     'Spatial_Gap', 'Spatial_Headway', 'Speed_Diff']
340 | 
341 |     data['Speed_Diff'] = data['Speed_LV'] - data['Speed_FAV']
342 |     data['Spatial_Headway'] = data['Spatial_Gap'] + default_vehicle_length
343 |     
344 | 
345 |     data = data.reset_index(drop=True)
346 | 
347 |     data['Acc_LV'] = ((data['Speed_LV'] - data['Speed_LV'].shift(1)) / 0.1).shift(-1)
348 |     data['Acc_FAV'] = ((data['Speed_FAV'] - data['Speed_FAV'].shift(1)) / 0.1).shift(-1)
349 | 
350 |     average_speed = (data['Speed_FAV'] + data['Speed_FAV'].shift(1)) / 2
351 |     data['Pos_FAV'] = (0.1 * average_speed).cumsum()
352 |     data.loc[0, 'Pos_FAV'] = 0
353 |     data['Pos_LV'] = data['Pos_FAV'] + data['Spatial_Headway']
354 |     data = data.iloc[:-1]
355 | 
356 |     data['Time_Index'] -= data['Time_Index'].min()
357 | 
358 |     data.to_csv(output_path, index=False)
359 | 
360 | 
361 | def Ohio_single_convert_format(input_path, output_path):
362 |     related_columns = ['ID', 'Time', 'pos_x_av_f', 'speed_av', 'acc_av', 'pos_x_sv_f', 'speed_sv', 'acc_sv',
363 |                        'closest_distance_longitudinal (gap)', 'distance_av (headway)', 'lane_id_av', 'lane_id_sv']
364 | 
365 |     data = pd.read_csv(input_path, usecols=related_columns)
366 | 
367 |     data = data[data['lane_id_av'] == data['lane_id_sv']]
368 |     data.drop(columns=['lane_id_sv', 'lane_id_av'], inplace=True)
369 | 
370 |     data = data[data['pos_x_av_f'] > data['pos_x_sv_f']]
371 | 
372 |     data['reset'] = data['ID'].diff() < 0
373 |     data['traj_id'] = data['reset'].cumsum() + 1
374 |     data.drop(columns=['reset'], inplace=True)
375 | 
376 |     idx = data.groupby(['traj_id', 'Time'])['closest_distance_longitudinal (gap)'].idxmin()
377 |     follower_ids = data.loc[idx, ['traj_id', 'Time', 'ID']].set_index(['traj_id', 'Time'])
378 |     data['tmp_index'] = data.index
379 |     data = data.merge(follower_ids, on=['traj_id', 'Time'], how='left', suffixes=('', '_follower'))
380 |     data.rename(columns={'ID_follower': 'follower_id'}, inplace=True)
381 |     data.drop(columns=['tmp_index'], inplace=True)
382 | 
383 |     data = data[data['ID'] == data['follower_id']]
384 |     data.drop(columns=['follower_id'], inplace=True)
385 | 
386 |     new_traj_id = 0
387 |     traj_id_mapping = {}
388 | 
389 |     data.sort_values(by=['traj_id', 'ID'], inplace=True)
390 |     for index, row in data.iterrows():
391 |         key = (row['traj_id'], row['ID'])
392 |         if key not in traj_id_mapping:
393 |             new_traj_id += 1
394 |             traj_id_mapping[key] = new_traj_id
395 |         data.at[index, 'traj_id'] = traj_id_mapping[key]
396 | 
397 |     cols = ['traj_id'] + [col for col in data.columns if col != 'traj_id']
398 |     data = data[cols]
399 | 
400 |     data['ID'] = -1
401 |     data['traj_id'] -= 1
402 | 
403 |     data.insert(3, 'Type_LV', -1)
404 |     data.insert(8, 'ID_FAV', 0)
405 |     column_data = data['distance_av (headway)']
406 |     data.drop(columns=['distance_av (headway)'], inplace=True)
407 |     data.insert(12, 'distance_av (headway)', column_data)
408 |     data.insert(13, f's_diff', 0)
409 | 
410 |     data.columns = ['Trajectory_ID', 'Time_Index',
411 |                     'ID_LV', 'Type_LV', 'Pos_LV', 'Speed_LV', 'Acc_LV',
412 |                     'ID_FAV', 'Pos_FAV', 'Speed_FAV', 'Acc_FAV',
413 |                     'Spatial_Gap', 'Spatial_Headway', 'Speed_Diff']
414 | 
415 |     data['Time_Index'], data['ID_LV'] = data['ID_LV'], data['Time_Index']
416 | 
417 |     data['Speed_Diff'] = data['Speed_LV'] - data['Speed_FAV']
418 | 
419 |     data.to_csv(output_path, index=False)
420 | 
421 | 
422 | def Ohio_two_convert_format(input_path, output_path):
423 |     related_columns = ['ID', 'Time', 'pos_x_av_f', 'speed_av', 'acc_av', 'pos_x_sv1_f', 'speed_sv1', 'acc_sv1',
424 |                        'pos_x_sv2_f', 'speed_sv2', 'acc_sv2', 'lane_id_av', 'lane_id_sv1', 'lane_id_sv2', 'dim_x_av',
425 |                        'dim_x_sv1', 'dim_x_sv2']
426 | 
427 |     def find_one_vehicle(id, id2):
428 |         data = pd.read_csv(input_path, usecols=related_columns)
429 | 
430 |         data['ID'] += 1
431 | 
432 |         data['reset'] = data['ID'].diff() < 0
433 |         data['traj_id'] = data['reset'].cumsum() + 1
434 |         data.drop(columns=['reset'], inplace=True)
435 | 
436 |         result = data.groupby(['traj_id', 'Time']).first().reset_index()
437 | 
438 |         sv_to_av_columns = {
439 |             f'pos_x_sv{id2}_f': 'pos_x_av_f',
440 |             f'speed_sv{id2}': 'speed_av',
441 |             f'acc_sv{id2}': 'acc_av',
442 |             f'lane_id_sv{id2}': 'lane_id_av',
443 |             f'dim_x_sv{id2}': 'dim_x_av'
444 |         }
445 |         df_sv_as_av = result[
446 |             ['traj_id', 'Time', 'pos_x_sv1_f', 'speed_sv1', 'acc_sv1', 'pos_x_sv2_f', 'speed_sv2', 'acc_sv2',
447 |              'lane_id_sv1', 'lane_id_sv2', 'dim_x_sv1', 'dim_x_sv2']].copy()
448 |         df_sv_as_av.rename(columns=sv_to_av_columns, inplace=True)
449 |         df_sv_as_av['ID'] = id2 - 1
450 | 
451 |         data = pd.concat([data, df_sv_as_av], ignore_index=True)
452 |         data = data.sort_values(by=['traj_id', 'ID', 'Time']).reset_index(drop=True)
453 |         data = data.drop(columns=list(sv_to_av_columns.keys()))
454 | 
455 |         data = data[data['lane_id_av'] == data[f'lane_id_sv{id}']]
456 |         data.drop(columns=[f'lane_id_sv{id}', 'lane_id_av'], inplace=True)
457 | 
458 |         data = data[data['pos_x_av_f'] > data[f'pos_x_sv{id}_f']]
459 | 
460 |         data['space_headway'] = data['pos_x_av_f'] - data[f'pos_x_sv{id}_f']
461 | 
462 |         data['reset'] = data['ID'].diff() < 0
463 |         data['traj_id'] = data['reset'].cumsum() + 1
464 |         data.drop(columns=['reset'], inplace=True)
465 | 
466 |         idx = data.groupby(['traj_id', 'Time'])['space_headway'].idxmin()
467 |         follower_ids = data.loc[idx, ['traj_id', 'Time', 'ID']].set_index(['traj_id', 'Time'])
468 |         data['tmp_index'] = data.index
469 |         data = data.merge(follower_ids, on=['traj_id', 'Time'], how='left', suffixes=('', '_follower'))
470 |         data.rename(columns={'ID_follower': 'follower_id'}, inplace=True)
471 |         data.drop(columns=['tmp_index'], inplace=True)
472 | 
473 |         data = data[data['ID'] == data['follower_id']]
474 |         data.drop(columns=['follower_id'], inplace=True)
475 | 
476 |         new_traj_id = 0
477 |         traj_id_mapping = {}
478 | 
479 |         data.sort_values(by=['traj_id', 'ID'], inplace=True)
480 |         for index, row in data.iterrows():
481 |             key = (row['traj_id'], row['ID'])
482 |             if key not in traj_id_mapping:
483 |                 new_traj_id += 1
484 |                 traj_id_mapping[key] = new_traj_id
485 |             data.at[index, 'traj_id'] = traj_id_mapping[key]
486 | 
487 |         cols = ['traj_id'] + [col for col in data.columns if col != 'traj_id']
488 |         data = data[cols]
489 | 
490 |         data['traj_id'] -= 1
491 | 
492 |         data['ID'] = data['ID'].apply(lambda x: x if x in [0, 1] else -1)
493 | 
494 |         data.insert(3, 'Type_LV', 0)
495 |         data.insert(8, 'ID_FAV', id - 1)
496 |         data.insert(13, 'space_gap', 0)
497 |         data.insert(15, 's_diff', 0)
498 | 
499 |         new_order = ['traj_id', 'Time', 'ID', 'Type_LV', 'pos_x_av_f',
500 |                      'speed_av', 'acc_av', 'ID_FAV', f'pos_x_sv{id}_f',
501 |                      f'speed_sv{id}', f'acc_sv{id}', 'space_gap', 'space_headway',
502 |                      's_diff', 'dim_x_av', f'dim_x_sv{id}']
503 |         data = data[new_order]
504 | 
505 |         data.columns = ['Trajectory_ID', 'Time_Index',
506 |                         'ID_LV', 'Type_LV', 'Pos_LV', 'Speed_LV', 'Acc_LV',
507 |                         'ID_FAV', 'Pos_FAV', 'Speed_FAV', 'Acc_FAV',
508 |                         'Spatial_Gap', 'Spatial_Headway', 'Speed_Diff', 'Len_LV', 'Len_FAV']
509 | 
510 |         data['Type_LV'] = data['ID_LV'].apply(lambda x: 1 if x in [0, 1] else 0)
511 |         data['Speed_Diff'] = data['Speed_LV'] - data['Speed_FAV']
512 |         data['Spatial_Gap'] = data['Spatial_Headway'] - data['Len_LV'] / 2 - data['Len_FAV'] / 2
513 |         
514 | 
515 |         data = data.drop(columns=['Len_LV', 'Len_FAV'])
516 | 
517 |         data.to_csv(output_path + f'_{id}.csv', index=False)
518 | 
519 |     find_one_vehicle(1, 2)
520 |     find_one_vehicle(2, 1)
521 | 
522 | 
523 | def Waymo_perception_convert_format(input_path, output_path):
524 |     related_columns = ['segment_id', 'local_veh_id', 'length', 'local_time', 'follower_id', 'leader_id',
525 |                        'processed_position', 'processed_speed', 'processed_accer']
526 | 
527 |     data = pd.read_csv(input_path, usecols=related_columns)
528 |     data = data[data['follower_id'] == 0]
529 | 
530 |     def merge_rows(group):
531 |         merged_df_list = []
532 | 
533 |         for local_time in group['local_time'].unique():
534 |             temp_group = group[group['local_time'] == local_time]
535 |             temp_dict = {'segment_id': temp_group['segment_id'].iloc[0], 'local_time': local_time}
536 | 
537 |             for _, row in temp_group.iterrows():
538 |                 suffix = '_f' if row['local_veh_id'] == row['follower_id'] else '_l'
539 |                 for col in ['length', 'processed_position', 'processed_speed', 'processed_accer']:
540 |                     temp_dict[f'{col}{suffix}'] = row[col]
541 |                 temp_dict['follower_id'] = row['follower_id']
542 |                 temp_dict['leader_id'] = row['leader_id']
543 | 
544 |             merged_df_list.append(pd.DataFrame([temp_dict]))
545 | 
546 |         merged_df = pd.concat(merged_df_list, ignore_index=True)
547 | 
548 |         return merged_df
549 | 
550 |     data = data.groupby('segment_id').apply(merge_rows).reset_index(drop=True)
551 | 
552 |     new_order = [
553 |         'segment_id', 'local_time',
554 |         'leader_id', 'processed_position_l', 'processed_speed_l', 'processed_accer_l',
555 |         'follower_id', 'processed_position_f', 'processed_speed_f', 'processed_accer_f',
556 |         'length_l', 'length_f'
557 |     ]
558 | 
559 |     data = data.reindex(columns=new_order)
560 | 
561 |     data.insert(3, 'type', 0)
562 |     data.insert(11, 'space_gap', 0)
563 |     data.insert(12, 'space_headway', 0)
564 |     data.insert(13, 's_diff', 0)
565 | 
566 |     data.columns = ['Trajectory_ID', 'Time_Index',
567 |                     'ID_LV', 'Type_LV', 'Pos_LV', 'Speed_LV', 'Acc_LV',
568 |                     'ID_FAV', 'Pos_FAV', 'Speed_FAV', 'Acc_FAV',
569 |                     'Spatial_Gap', 'Spatial_Headway', 'Speed_Diff',
570 |                     'length_l', 'length_f']
571 | 
572 |     data['ID_LV'] = -1
573 |     data['Type_LV'] = 0
574 |     data['Speed_Diff'] = data['Speed_LV'] - data['Speed_FAV']
575 |     data['Spatial_Headway'] = data['Pos_LV'] - data['Pos_FAV']
576 |     data['Spatial_Gap'] = data['Spatial_Headway'] - data['length_l'] / 2 - data['length_f'] / 2
577 | 
578 |     data = data.drop(columns=['length_l', 'length_f'])
579 | 
580 |     data.to_csv(output_path, index=False)
581 | 
582 | 
583 | def Waymo_motion_convert_format(input_path, output_path):
584 |     data = pd.read_csv(input_path)
585 | 
586 |     for idx in range(4):
587 |         data.insert(1 + idx, f'col_{1 + idx}', 0)
588 |     for idx in range(3):
589 |         data.insert(6 + idx, f'col_{5 + idx}', 0)
590 |     data.insert(10, f'Acc_FAV', 0)
591 |     data.insert(11, f'Spatial_Gap', 0)
592 |     data.insert(12, f'Spatial_Headway', 0)
593 |     data.insert(13, 'Speed_Diff', 0)
594 | 
595 |     data.columns = ['Trajectory_ID', 'Time_Index', 'ID_LV', 'Type_LV', 'Pos_LV', 'Speed_LV', 'Acc_LV',
596 |                     'ID_FAV', 'Pos_FAV', 'Speed_FAV', 'Acc_FAV',
597 |                     'Spatial_Gap', 'Spatial_Headway', 'Speed_Diff',
598 |                     'leader_x', 'leader_y', 'leader_length', 'follower_x', 'follower_y', 'follower_length']
599 | 
600 |     data['Time_Index'] = data.groupby('Trajectory_ID').cumcount() / 10
601 |     data['ID_LV'] = -1
602 |     data['ID_FAV'] = 0
603 | 
604 |     data['Spatial_Headway'] = (np.sqrt(
605 |         (data['leader_x'] - data['follower_x']) ** 2 + (data['leader_y'] - data['follower_y']) ** 2))
606 |     data['Spatial_Gap'] = data['Spatial_Headway'] - data['leader_length'] / 2 - data['follower_length'] / 2
607 |     data['Speed_Diff'] = data['Speed_LV'] - data['Speed_FAV']
608 | 
609 |     data = data.drop(['leader_x', 'leader_y', 'follower_x', 'follower_y', 'leader_length', 'follower_length'], axis=1)
610 | 
611 |     data['Acc_LV'] = ((data['Speed_LV'] - data['Speed_LV'].shift(1)) / 0.1).shift(-1)
612 |     data['Acc_FAV'] = ((data['Speed_FAV'] - data['Speed_FAV'].shift(1)) / 0.1).shift(-1)
613 | 
614 |     average_speed = (data['Speed_FAV'] + data['Speed_FAV'].shift(1)) / 2
615 |     data['Pos_FAV'] = (0.1 * average_speed).cumsum()
616 |     data.loc[0, 'Pos_FAV'] = 0
617 |     data['Pos_LV'] = data['Pos_FAV'] + data['Spatial_Headway']
618 |     data = data.iloc[:-1]
619 | 
620 |     data.to_csv(output_path, index=False)
621 | 
622 | 
623 | def Argoverse_convert_format(input_path, output_path):
624 |     data = pd.read_csv(input_path)
625 | 
626 |     for idx in range(4):
627 |         data.insert(1 + idx, f'col_{1 + idx}', 0)
628 |     for idx in range(3):
629 |         data.insert(6 + idx, f'col_{5 + idx}', 0)
630 |     data.insert(10, f'Acc_FAV', 0)
631 |     data.insert(11, f'Spatial_Gap', 0)
632 |     data.insert(12, f'Spatial_Headway', 0)
633 |     data.insert(13, 'Speed_Diff', 0)
634 | 
635 |     data.columns = ['Trajectory_ID', 'Time_Index', 'ID_LV', 'Type_LV', 'Pos_LV', 'Speed_LV', 'Acc_LV',
636 |                     'ID_FAV', 'Pos_FAV', 'Speed_FAV', 'Acc_FAV',
637 |                     'Spatial_Gap', 'Spatial_Headway', 'Speed_Diff',
638 |                     'leader_x', 'leader_y', 'follower_x', 'follower_y']
639 | 
640 |     data['Time_Index'] = data.groupby('Trajectory_ID').cumcount() / 10
641 |     data['ID_LV'] = -1
642 |     data['Type_LV'] = 0
643 |     data['ID_FAV'] = 0
644 | 
645 |     data['Spatial_Headway'] = (np.sqrt(
646 |         (data['leader_x'] - data['follower_x']) ** 2 + (data['leader_y'] - data['follower_y']) ** 2))
647 |     data['Spatial_Gap'] = data['Spatial_Headway'] - default_vehicle_length
648 |     data['Speed_Diff'] = data['Speed_LV'] - data['Speed_FAV']
649 | 
650 |     data = data.drop(['leader_x', 'leader_y', 'follower_x', 'follower_y'], axis=1)
651 | 
652 |     data['Acc_LV'] = ((data['Speed_LV'] - data['Speed_LV'].shift(1)) / 0.1).shift(-1)
653 |     data['Acc_FAV'] = ((data['Speed_FAV'] - data['Speed_FAV'].shift(1)) / 0.1).shift(-1)
654 | 
655 |     average_speed = (data['Speed_FAV'] + data['Speed_FAV'].shift(1)) / 2
656 |     data['Pos_FAV'] = (0.1 * average_speed).cumsum()
657 |     data.loc[0, 'Pos_FAV'] = 0
658 |     data['Pos_LV'] = data['Pos_FAV'] + data['Spatial_Headway']
659 |     data = data.iloc[:-1]
660 | 
661 |     data.to_csv(output_path, index=False)
662 | 


--------------------------------------------------------------------------------
/Code/model_calibration.py:
--------------------------------------------------------------------------------
  1 | from sklearn.linear_model import LinearRegression
  2 | from sklearn.metrics import mean_squared_error, r2_score
  3 | import geatpy as ea
  4 | import pandas as pd
  5 | import numpy as np
  6 | 
  7 | 
  8 | def IDM(arg, delta_d, v, delta_v):
  9 |     """ Calculate the Intelligent Driver Model (IDM) acceleration. """
 10 |     v0, T, a, b, s0 = arg
 11 |     s_star = s0 + max(0, v * T + (v * delta_v) / (2 * ((a * b) ** 0.5)))
 12 |     small_value = 1e-5  # To avoid division by zero
 13 |     return a * (1 - (v / v0) ** 4 - (s_star / (delta_d + small_value)) ** 2)
 14 | 
 15 | 
 16 | def FVD(arg, delta_d, v, delta_v):
 17 |     """ Calculate the Full Velocity Difference Model (FVD) acceleration. """
 18 |     alpha, lamda, v_0, b, beta = arg
 19 |     V_star = v_0 * (np.tanh(delta_d / b - beta) - np.tanh(-beta))
 20 |     ahat = alpha * (V_star - v) + lamda * delta_v
 21 |     return ahat
 22 | 
 23 | 
 24 | class MyProblem(ea.Problem):
 25 |     """ A class to define optimization problems for evolutionary algorithms. """
 26 | 
 27 |     def __init__(self, df, lb, ub):
 28 |         M = 1  # Number of objectives
 29 |         maxOrMin = [1]  # 1 for minimization
 30 |         Dim = 5  # Number of decision variables
 31 |         varTypes = [0] * Dim  # 0 for continuous variables
 32 |         lbin = [1] * Dim  # 1 to include the lower bound
 33 |         ubin = [1] * Dim  # 1 to include the upper bound
 34 |         ea.Problem.__init__(self, "", M, maxOrMin, Dim, varTypes, lb, ub, lbin, ubin)
 35 |         self.df = df
 36 | 
 37 |     def aimFunc(self, pop):
 38 |         """ Objective function for optimization. """
 39 |         x = pop.Phen
 40 |         results = []
 41 |         for parameters in x:
 42 |             arg = tuple(round(param, 3) for param in parameters[:5])
 43 |             self.df['a_hat'] = self.df.apply(lambda row: IDM(arg, row['Spatial_Gap'],
 44 |                                                              row['Speed_FAV'],
 45 |                                                              -row['Speed_Diff']), axis=1)
 46 |             results.append(mean_squared_error(self.df['Acc_FAV'], self.df['a_hat']))
 47 |         pop.ObjV = np.vstack(results)  # Assign objective values to the population
 48 | 
 49 | 
 50 | class CFModelRegress:
 51 |     """ A class for regression analysis of car-following models. """
 52 | 
 53 |     def __init__(self, path, timestep, delay=0):
 54 |         df = pd.read_csv(path)
 55 |         self.dfs = {name: group for name, group in df.groupby('ID_FAV')}
 56 |         self.timestep = timestep
 57 |         self.delay = delay
 58 | 
 59 |     def addressOneTra(self, oneTraj):
 60 |         """ Prepare independent and dependent variables from trajectory data. """
 61 |         xData = []
 62 |         yData = []
 63 |         delay_steps = int(self.delay / self.timestep)
 64 |         for t in range(delay_steps, oneTraj.shape[0]):
 65 |             xData.append([oneTraj['Spatial_Gap'].iloc[t - delay_steps],
 66 |                           oneTraj['Speed_FAV'].iloc[t - delay_steps],
 67 |                           oneTraj['Speed_Diff'].iloc[t - delay_steps]])
 68 |             yData.append([oneTraj['Acc_FAV'].iloc[t]])
 69 |         return xData, yData
 70 | 
 71 |     def reorganizeDataIndividualVeh(self):
 72 |         """ Reorganize data by vehicle for further analysis. """
 73 |         reorganizedData = {}
 74 |         for veh, oneVehData in self.dfs.items():
 75 |             sampleData = {"x": [], "y": []}
 76 |             for Trajectory_ID, group in oneVehData.groupby('Trajectory_ID'):
 77 |                 x_oneTra, y_oneTra = self.addressOneTra(group)
 78 |                 sampleData["x"] += x_oneTra
 79 |                 sampleData["y"] += y_oneTra
 80 |             reorganizedData[veh] = sampleData
 81 |         return reorganizedData
 82 | 
 83 |     def linearRegression(self, veh, xData, yData):
 84 |         """ Perform linear regression analysis. """
 85 |         xData_np = np.array(xData).reshape(len(xData), -1)
 86 |         yData_np = np.array(yData)
 87 |         model = LinearRegression()
 88 |         model.fit(xData_np, yData_np)
 89 |         y_pred = model.predict(xData_np)
 90 |         mse = mean_squared_error(yData_np, y_pred)
 91 |         coefficients = model.coef_.flatten()
 92 |         intercept = model.intercept_
 93 |         r_squared = model.score(xData_np, yData_np)
 94 |         n_temp = xData_np.shape[0]
 95 |         k_temp = xData_np.shape[1]
 96 |         adjusted_r_squared = 1 - (1 - r_squared) * (n_temp - 1) / (n_temp - k_temp - 1)
 97 |         data = {'Vehicle': [veh], 'R2': [r_squared], 'RMSE': [np.sqrt(mse)]}
 98 |         for i, coef in enumerate(coefficients):
 99 |             data[f'Coef_{i}'] = coef
100 |         data['Intercept'] = intercept
101 |         return pd.DataFrame(data)
102 | 
103 |     def IDM_regression(self, veh, xData, yData):
104 |         """ Perform regression using the IDM car-following model. """
105 |         xData_df = pd.DataFrame(xData)
106 |         yData_df = pd.DataFrame(yData)
107 |         df = pd.concat([xData_df, yData_df], axis=1)
108 |         df.columns = ['Spatial_Gap', 'Speed_FAV', 'Speed_Diff', 'Acc_FAV']
109 |         problem = MyProblem(df, [0.1, 0.1, 20, 0.1, 0.1], [10, 10, 40, 10, 10])  # Parameters for FVD model
110 |         Encoding = 'RI'
111 |         NIND = 25  # Population size
112 |         Field = ea.crtfld(Encoding, problem.varTypes, problem.ranges, problem.borders)
113 |         population = ea.Population(Encoding, Field, NIND)
114 |         myAlgorithm = ea.soea_SEGA_templet(problem, population)
115 |         myAlgorithm.MAXGEN = 100  # Max generations
116 |         myAlgorithm.verbose = True
117 |         myAlgorithm.drawing = 1
118 |         BestIndi, population = myAlgorithm.run()
119 |         arg = tuple(round(param, 3) for param in BestIndi.Phen[0, :5])
120 |         df['a_hat'] = df.apply(lambda row: IDM(arg, row['Spatial_Gap'],
121 |                                                row['Speed_FAV'],
122 |                                                -row['Speed_Diff']), axis=1)
123 |         r_squared = r2_score(df['Acc_FAV'], df['a_hat'])
124 |         mse = mean_squared_error(df['Acc_FAV'], df['a_hat'])
125 |         results = {'Vehicle': [veh], 'R2': [r_squared], 'RMSE': [np.sqrt(mse)]}
126 |         for i in range(5):
127 |             results[f'Coef_{i}'] = BestIndi.Phen[0, i]
128 |         return pd.DataFrame(results)
129 | 
130 |     def main(self, output_path, model):
131 |         """ Main function to run the regression analysis. """
132 |         allData = self.reorganizeDataIndividualVeh()
133 |         df_list = []
134 |         for veh, data in allData.items():
135 |             if model == "linear":
136 |                 df_list.append(self.linearRegression(veh, data['x'], data['y']))
137 |             elif model == "IDM":
138 |                 df_list.append(self.IDM_regression(veh, data['x'], data['y']))
139 |         merged_df = pd.concat(df_list)
140 |         merged_df.to_csv(output_path, index=False)
141 |         return merged_df
142 | 


--------------------------------------------------------------------------------
/Code/plot_result.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | from PIL import Image
  4 | import matplotlib.pyplot as plt
  5 | 
  6 | def combined():
  7 |     # Load the images
  8 |     image1 = Image.open("./1.png")
  9 |     image2 = Image.open("./2.png")
 10 |     image3 = Image.open("./3.png")
 11 | 
 12 |     # Determine the size of the combined image
 13 |     total_width = image1.width + image2.width + image3.width
 14 |     max_height = max(image1.height, image2.height, image3.height)
 15 | 
 16 |     # Create a new empty image with the size to fit all three images
 17 |     combined_image = Image.new('RGB', (total_width, max_height))
 18 | 
 19 |     # Paste the images next to each other
 20 |     combined_image.paste(image1, (0, 0))
 21 |     combined_image.paste(image2, (image1.width, 0))
 22 |     combined_image.paste(image3, (image1.width + image2.width, 0))
 23 | 
 24 |     # Save the combined image
 25 |     combined_image_path = "combined_image.png"
 26 |     combined_image.save(combined_image_path)
 27 | 
 28 | 
 29 | # Modified code with increased font sizes for the bar chart
 30 | 
 31 | def pilar():
 32 |     # Set the random seed for reproducibility
 33 |     np.random.seed(1)
 34 | 
 35 |     # Generate some data
 36 |     categories = ['Vanderbilt', 'CATS', 'OpenACC', 'Ohio', 'Waymo', 'Argoverse']
 37 |     algorithms = ['Acceleration', 'Speed', 'Space']
 38 |     data = np.random.randint(0, 150, size=(len(algorithms), len(categories)))
 39 |     data = np.array([[10, 11, 8, 10, 9, 8], [20, 22, 25, 22, 16, 17], [15, 17, 20, 18, 15, 16]])
 40 |     # [[10, 20, 15], [11, 22, 17], [8, 25, 20],
 41 |     # [10, 22, 18], [9, 16, 15], [8, 17, 16]]
 42 | 
 43 |     # Create a bar chart
 44 |     fig, ax = plt.subplots(figsize=(16, 8))
 45 | 
 46 |     # Set the positions and width for the bars
 47 |     positions = np.arange(len(categories))
 48 |     width = 0.25  # Increase width for clarity
 49 | 
 50 |     # Plot bars for each algorithm
 51 |     for i in range(len(algorithms)):
 52 |         ax.bar(positions - width + (i * width), data[i], width=width, label=algorithms[i])
 53 | 
 54 |     # Customize font sizes
 55 |     plt.rcParams.update({'font.size': 14})  # Update default rc settings for font size
 56 | 
 57 |     # Add some text for labels, title, and custom x-axis tick labels, etc.
 58 |     # ax.set_ylabel('Scores', fontsize=16)
 59 |     # ax.set_title('Scores by category and algorithm', fontsize=18)
 60 |     ax.set_xticks(positions)
 61 |     ax.set_xticklabels(categories, fontsize=18)
 62 |     # ax.set_ylim(0, 160)
 63 |     ax.legend(fontsize=18)
 64 | 
 65 |     # Display the bar chart
 66 |     plt.savefig('./scores.png')
 67 | 
 68 | 
 69 | def scatter(input_path):
 70 |     df = pd.read_csv(input_path)
 71 | 
 72 |     for i in range(200, 400, 100):
 73 |         colors = [(42 / 255, 157 / 255, 140 / 255),
 74 |                   (233 / 255, 196 / 255, 107 / 255),
 75 |                   (230 / 255, 111 / 255, 81 / 255)]
 76 | 
 77 |         plt.figure(figsize=(15, 5))
 78 | 
 79 |         # for name, group in df.groupby('ID_FAV'):
 80 |         plt.scatter(df['Space_Gap'][i:i + 100], df['Acc_FAV'][i:i + 100], color=colors[0], s=100)
 81 |         # plt.title('Acceleration and Space Gap', fontsize=20)
 82 |         plt.xlabel('$g$', fontsize=40)
 83 |         plt.ylabel('$a^{\mathrm{f}}$', fontsize=40)
 84 |         plt.tick_params(axis='x', labelbottom=False)  # 不显示x轴刻度标签
 85 |         plt.tick_params(axis='y', labelleft=False)  # 不显示y轴刻度标签
 86 |         plt.tight_layout()
 87 |         plt.savefig(f'{i}_Space_Gap.png')  # _{name}
 88 |         plt.close()
 89 | 
 90 |         plt.figure(figsize=(15, 5))
 91 | 
 92 |         plt.scatter(df['Speed_FAV'][i:i + 100], df['Acc_FAV'][i:i + 100], color=colors[1], s=100)
 93 |         plt.xlabel('$v^{\mathrm{f}}$', fontsize=40)
 94 |         plt.ylabel('$a^{\mathrm{f}}$', fontsize=40)
 95 |         plt.tick_params(axis='x', labelbottom=False)  # 不显示x轴刻度标签
 96 |         plt.tick_params(axis='y', labelleft=False)  # 不显示y轴刻度标签
 97 |         plt.tight_layout()
 98 |         plt.savefig(f'{i}_Speed_FAV.png')  # _{name}
 99 |         plt.close()
100 | 
101 |         plt.figure(figsize=(15, 5))
102 | 
103 |         plt.scatter(df['Speed_Diff'][i:i + 100], df['Acc_FAV'][i:i + 100], color=colors[2], s=100)
104 |         # plt.title('Acceleration and Speed Difference', fontsize=20)
105 |         plt.xlabel('$\Delta v$', fontsize=40)
106 |         plt.ylabel('$a^{\mathrm{f}}$', fontsize=40)
107 |         plt.tick_params(axis='x', labelbottom=False)  # 不显示x轴刻度标签
108 |         plt.tick_params(axis='y', labelleft=False)  # 不显示y轴刻度标签
109 |         plt.tight_layout()
110 |         plt.savefig(f'{i}_Speed_Diff.png')  # _{name}
111 |         plt.close()
112 | 
113 | 
114 | def read_date():
115 |     original_data_path = './Dataset/Ohio/data/Advanced_Driver_Assistance_System__ADAS_-Equipped_Single-Vehicle_Data_for_Central_Ohio.csv'
116 |     df = pd.read_csv(original_data_path)
117 |     unique_dates = df['date'].drop_duplicates()
118 |     print(unique_dates)
119 | 
120 |     print('-----')
121 | 
122 |     original_data_path = './Dataset/Ohio/data/Advanced_Driver_Assistance_System__ADAS_-Equipped_Two-Vehicle_Data_for_Central_Ohio.csv'
123 |     df = pd.read_csv(original_data_path)
124 |     unique_dates = df['date'].drop_duplicates()
125 |     print(unique_dates)
126 | 
127 | # scatter('./Dataset/OpenACC/output/step1_ASta_merge.csv')
128 | # pilar()
129 | # read_date()


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Ultra-AV: A unified longitudinal trajectory dataset for automated vehicle
  2 | 
  3 | ## Introduction
  4 | 
  5 | This repo provides the source code and data for the following paper:
  6 | 
  7 | Zhou, H., Ma, K., Liang, S. et al. A unified longitudinal trajectory dataset for automated vehicle. Sci Data 11, 1123 (2024). https://doi.org/10.1038/s41597-024-03795-y
  8 | 
  9 | We processed a unified trajectory dataset for automated vehicles' longitudinal behavior from 14 distinct sources. The extraction and cleaning of the dataset contains the following three steps - 1. extraction of longitudinal trajectory data, 2. general data cleaning, and 3. data-specific cleaning. The datasets obtained from Step 2 and Step 3 are the longitudinal and car-following trajectory data. We also analyzed and validated the data using multiple methods. The obtained datasets are provided in [Ultra-AV: A unified longitudinal trajectory dataset for automated vehicle (figshare.com)](https://figshare.com/articles/dataset/Ultra-AV_A_unified_longitudinal_trajectory_dataset_for_automated_vehicle/26339512). The Python code used to analyze the datasets can be found at https://github.com/CATS-Lab/Filed-Experiment-Data-ULTra-AV. We hope this dataset can benefit the study of microscopic longitudinal AV behaviors.
 10 | 
 11 | ## Original Datasets
 12 | 
 13 | We have examined 13 open-source datasets, each providing distinct insights into AV behavior across various driving conditions and scenarios. These open-source datasets are from six providers:
 14 | 
 15 | - **Vanderbilt ACC Dataset** [1]. Collected in Nashville, Tennessee by Vanderbilt University research group. Available at - [https://acc-dataset.github.io/datasets/](https://acc-dataset.github.io/datasets/).
 16 |   - [Two-vehicle ACC driving, Tennessee 2019](https://github.com/CATS-Lab/Filed-Experiment-Data-AV_Platooning_Data)
 17 | - **MircoSimACC Dataset** [2]. Collected in four cities in Florida, including Delray Beach, Loxahatchee, Boca Raton, and Parkland by the Florida Atlantic University research group. Available at  -[https://github.com/microSIM-ACC/ICE](https://github.com/microSIM-ACC/ICE).
 18 |   - [ICE](https://github.com/microSIM-ACC/ICE)
 19 | - **CATS Open Datasets** [3]. Three datasets were gathered in Tampa, Florida, and Madison, Wisconsin by the CATS Lab. Available at - [https://github.com/CATS-Lab](https://github.com/CATS-Lab).
 20 |   - [Filed-Experiment-Data-AV_Platooning_Data](https://github.com/CATS-Lab/Filed-Experiment-Data-AV_Platooning_Data)
 21 |   - [Filed-Experiment-Data-ACC_Data](https://github.com/CATS-Lab/Filed-Experiment-Data-ACC_Data)
 22 |   - [CATS-UWMadison-AV-Data](https://github.com/MarkMaaaaa/CATS-UWMadison-AV-Data)
 23 | - **OpenACC Database** [4]. Four datasets were collected across Italy, Sweden, and Hungary by the European Commission's Joint Research Centre. Available at - [https://data.europa.eu/data/datasets/9702c950-c80f-4d2f-982f-44d06ea0009f?locale=en](https://data.europa.eu/data/datasets/9702c950-c80f-4d2f-982f-44d06ea0009f?locale=en).
 24 |   - [Casale](https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/TransportExpData/JRCDBT0001/LATEST/Casale/)
 25 |   - [Vicolungo](https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/TransportExpData/JRCDBT0001/LATEST/Vicolungo/)
 26 |   - [AstaZero](https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/TransportExpData/JRCDBT0001/LATEST/AstaZero/)
 27 |   - [ZalaZone](https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/TransportExpData/JRCDBT0001/LATEST/ZalaZone/)
 28 | - **Central Ohio ACC Datasets** [5]. Two datasets were collated in Ohio by UCLA's Mobility Lab and Transportation Research Center. Available at - 
 29 |   - [Advanced Driver Assistance System (ADAS)-Equipped Single-Vehicle Data for Central Ohio](https://catalog.data.gov/dataset/advanced-driver-assistance-system-adas-equipped-single-vehicle-data-for-central-ohio)
 30 |   - [Advanced Driver Assistance System (ADAS)-Equipped Two-Vehicle Data for Central Ohio](https://catalog.data.gov/dataset/advanced-driver-assistance-system-adas-equipped-two-vehicle-data-for-central-ohio)
 31 | - **Waymo Open Dataset** [6, 7]. Two datasets were collected in six cities including San Francisco, Mountain View, and Los Angeles in California, Phoenix in Arizona, Detroit in Michigan, and Seattle in Washington by Waymo. Available at - 
 32 |   - [Waymo Motion Dataset](https://waymo.com/open/data/motion/)
 33 |   - [Vehicle trajectory data processed from the Waymo Open Dataset](https://data.mendeley.com/datasets/wfn2c3437n/2)
 34 | - **Argoverse 2 Motion Forecasting Dataset** [8]. Collected from Austin in Texas, Detroit in Michigan, Miami in Florida, Pittsburgh in Pennsylvania, Palo Alto in California, and Washington, D.C. by Argo AI with researchers from Carnegie Mellon University and the Georgia Institute of Technology. Available at - 
 35 |   - [Argoverse 2 Motion Forecasting Dataset](https://www.argoverse.org/av2.html)
 36 | 
 37 | For more details on the datasets, please refer to the reference and our paper.
 38 | 
 39 | ## Installation
 40 | 
 41 | All the data are provided in CSV format. If you want to run our source code, please make sure you follow the prerequisites below:
 42 | 
 43 | 1. **Python 3** - Ensure you have a Python 3 environment set up.
 44 | 2. **Required Packages** - Install all necessary packages listed in the `requirements.txt` file.
 45 | 3. **Original data or the processed dataset** - Download the original data or our processed data from the link we provided and check the path of the data with our code if you want to process or analyze the data.
 46 | 
 47 | We also recommend using other software packages such as R to effectively analyze the trajectory data. These tools are well-suited for handling the dataset's format.
 48 | 
 49 | ## Usage
 50 | 
 51 | ### Code
 52 | 
 53 | The code related to our data processing and validation is all stored in folder `\Code`. This folder contains the following files:
 54 | 
 55 | - **main.py** - The main function calls data processing and analysis functions for each dataset.
 56 | - **trajectory_extraction.py** - Code used in Step 1 to extract AV longitudinal trajectories.
 57 | - **data_transformation.py** - Code used in Step 1 to convert all datasets to a unified format.
 58 | - **data_cleaning.py** - Code used in Steps 2 and 3 for data cleaning.
 59 | - **data_analysis.py** - Code used to analyze data statistics, plot traffic performance of datasets, and plot scatter plots.
 60 | - **model_calibration.py** - An example tool to use the processed data to calibrate a linear car-following model.
 61 | 
 62 | To use this repo, run the Python script `main.py`. As you proceed through each Python script, always verify the paths for both the input and output files. This ensures that everything runs smoothly.
 63 | 
 64 | ### Data
 65 | 
 66 | Data attributes are shown below:
 67 | 
 68 | | Label         | Description                                  | Notations and formulation                                    | Unit |
 69 | | ------------- | -------------------------------------------- | ------------------------------------------------------------ | ---- |
 70 | | Trajectory_ID | ID of the longitudinal trajectory.           | $i\in \mathcal{I}$.                                          | N/A  |
 71 | | Time_Index    | Common time stamp in one trajectory.         | $t\in \mathcal{T}_i, i\in \mathcal{I}$.                      | s    |
 72 | | ID_LV         | LV (lead vehicle) ID.                        | $c^{\mathrm{l}}_i, i\in \mathcal{I}$. Label each FAV with a different ID and all HVs with -1. | N/A  |
 73 | | Type\_LV      | LV is an AV or human-driving vehicle.           | Label AV with 1 and human-driving vehicles with 0.           | N/A  |
 74 | | Pos_LV        | LV position in the Frenet coordinates.        | $p^{\mathrm{l}}_{it}=p^{\mathrm{f}}_{it}+h_{it}, i\in \mathcal{I}, t\in \mathcal{T}_i$. | m    |
 75 | | Speed_LV      | LV speed.                                    | $v^{\mathrm{l}}_{it}=\frac{p^{\mathrm{l}}_{i(t+1)}-p^{\mathrm{l}}_{it}}{\Delta t}, i\in \mathcal{I}, t\in \mathcal{T}_i$. | m/s  |
 76 | | Acc_LV        | LV acceleration.                             | $a^{\mathrm{l}}_{it}=\frac{v^{\mathrm{l}}_{i(t+1)}-v^{\mathrm{l}}_{it}}{\Delta t}, i\in \mathcal{I}, t\in \mathcal{T}_i$. | m/s² |
 77 | | ID_FAV        | FAV (following automated vehicle) ID.        | $c^{\mathrm{f}}_i, i\in \mathcal{I}$. Label each FAV with a different ID. | N/A  |
 78 | | Pos_FAV       | FAV position in the Frenet coordinates.       | $p^{\mathrm{f}}_{it}=p^{\mathrm{f}}_{i(t-1)}+\Delta t \cdot v^{\mathrm{f}}_{it}, i\in \mathcal{I}, t\in \mathcal{T}_i$. | m    |
 79 | | Speed_FAV     | FAV speed.                                   | $v^{\mathrm{f}}_{it}=\frac{p^{\mathrm{f}}_{i(t+1)}-p^{\mathrm{f}}_{it}}{\Delta t}, i\in \mathcal{I}, t\in \mathcal{T}_i$. | m/s  |
 80 | | Acc_FAV       | FAV acceleration.                            | $a^{\mathrm{f}}_{it}=\frac{v^{\mathrm{f}}_{i(t+1)}-v^{\mathrm{f}}_{it}}{\Delta t}, i\in \mathcal{I}, t\in \mathcal{T}_i$. | m/s² |
 81 | | Space_Gap     | Bump-to-bump distance between two vehicles.  | $g_{it}=p^{\mathrm{l}}_{it}-p^{\mathrm{f}}_{it} - l^{\mathrm{f}}/2 -l^{\mathrm{l}}/2, i\in \mathcal{I}, t\in \mathcal{T}_i$, where $l^{\mathrm{f}}$ and $l^{\mathrm{f}}$ are the length of the LV and the FAV. | m    |
 82 | | Space_Headway | Distance between the center of two vehicles. | $h_{it}=p^{\mathrm{l}}_{it}-p^{\mathrm{f}}_{it}, i\in \mathcal{I}, t\in \mathcal{T}_i$. | m    |
 83 | | Speed_Diff    | Speed difference of the two vehicles.        | $\Delta v_{it}=v^{\mathrm{l}}_{it}-v^{\mathrm{f}}_{it}, i\in \mathcal{I}, t\in \mathcal{T}_i$. | m/s  |
 84 | 
 85 | The FAV IDs are provided below:
 86 | 
 87 | **Vanderbilt Two-vehicle ACC Dataset:**
 88 | 
 89 | - 0 - A commercially available 2019 SUV with a full-speed range adaptive cruise control system.
 90 | 
 91 | **MicroSimACC Dataset:**
 92 | 
 93 | - 0 - Toyota Corolla LE 2020
 94 | 
 95 | **CATS ACC Dataset:**
 96 | 
 97 | - 0 - Lincoln MKZs 2016 (Black)
 98 | - 1 - Lincoln MKZs 2017 (Red)
 99 | 
100 | **CATS Platoon Dataset:**
101 | 
102 | - 0 - Lincoln MKZs 2016 (Black)
103 | - 1 - Lincoln MKZs 2017 (Red)
104 | 
105 | **CATS UWM Dataset:**
106 | 
107 | - 0 - Lincoln MKZs 2017 (Red)
108 | 
109 | **OpenACC Casale Dataset:**
110 | 
111 | - 0 - Rexton
112 | - 1 - Hyundai	
113 | 
114 | **OpenACC Vicolungo Dataset:**
115 | 
116 | - 0 - Ford(S-Max)
117 | - 1 - KIA(Niro)
118 | - 2 - Mini(Cooper)
119 | - 3 - Mitsubishi(OutlanderPHEV)
120 | - 4 - Mitsubishi(SpaceStar)
121 | - 5 - Peugeot(3008GTLine)
122 | - 6 - VW(GolfE)
123 | 
124 | **OpenACC Asta Dataset:**
125 | 
126 | - 0 - Audi(A6)
127 | - 1 - Audi(A8)
128 | - 2 - BMW(X5)
129 | - 3 - Mercedes(AClass)	
130 | - 4 - Tesla(Model3)	
131 | 
132 | **OpenACC ZalaZone Dataset:**
133 | 
134 | - 0 - AUDI_A4
135 | - 1 - AUDI_E_TRON
136 | - 2 - BMW_I3
137 | - 3 - JAGUAR_I_PACE
138 | - 4 - MAZDA_3
139 | - 5 - MERCEDES_GLE450
140 | - 6 - SMART_TARGET
141 | - 7 - SKODA_TARGET
142 | - 8 - TESLA_MODEL3
143 | - 9 - TESLA_MODELS
144 | - 10 - TESLA_MODELX
145 | - 11 - TOYOTA_RAV4
146 | 
147 | **Ohio Single-vehicle Dataset:**
148 | 
149 | - 0 - retrofitted Tesla Sedan
150 | 
151 | **Ohio Two-vehicle Dataset:**
152 | 
153 | - 0 - retrofitted Tesla Sedan
154 | - 1 - retrofitted Ford Fusion Sedan
155 | 
156 | **Waymo Perception Dataset:**
157 | 
158 | - 0 - Waymo ADS-equipped vehicle
159 | 
160 | **Waymo Motion Dataset:**
161 | 
162 | - 0 - Waymo ADS-equipped vehicle
163 | 
164 | **Argoverse 2 Motion Forecasting Dataset:**
165 | 
166 | - 0 - Argo AI self-driving Ford
167 | 
168 | For more details on the labels and the vehicle types, please refer to our paper.
169 | 
170 | ## Developers
171 | 
172 | Developer - Hang Zhou (hzhou364@wisc.edu).
173 | 
174 | Code reviewer - Ke Ma (kma62@wisc.edu).
175 | 
176 | If you have any questions, please feel free to contact CATS Lab in UW-Madison. We're here to help!
177 | 
178 | ## Reference
179 | 
180 | [1] Wang, Yanbing, George Gunter, Matthew Nice, and Daniel B. Work. "Estimating adaptive cruise control model parameters from on-board radar units." *arXiv preprint arXiv:1911.06454* (2019).
181 | 
182 | [2] Yang, Mingyuan, Pablo Chon-Kan Munoz, Servet Lapardhaja, Yaobang Gong, Md Ashraful Imran, Md Tausif Murshed, Kemal Yagantekin, Md Mahede Hasan Khan, Xingan Kan, and Choungryeol Lee. "MicroSimACC: an open database for field experiments on the potential capacity impact of commercial Adaptive Cruise Control (ACC)." *Transportmetrica A: Transport Science* (2024): 1-30.
183 | 
184 | [3] Shi, Xiaowei, and Xiaopeng Li. "Empirical study on car-following characteristics of commercial automated vehicles with different headway settings." *Transportation research part C: emerging technologies* 128 (2021): 103134.
185 | 
186 | [4] Makridis, Michail, Konstantinos Mattas, Aikaterini Anesiadou, and Biagio Ciuffo. "OpenACC. An open database of car-following experiments to study the properties of commercial ACC systems." *Transportation research part C: emerging technologies* 125 (2021): 103047.
187 | 
188 | [5] Xia, Xin, Zonglin Meng, Xu Han, Hanzhao Li, Takahiro Tsukiji, Runsheng Xu, Zhaoliang Zheng, and Jiaqi Ma. "An automated driving systems data acquisition and analytics platform." *Transportation research part C: emerging technologies* 151 (2023): 104120.
189 | 
190 | [6] Hu, Xiangwang, Zuduo Zheng, Danjue Chen, Xi Zhang, and Jian Sun. "Processing, assessing, and enhancing the Waymo autonomous vehicle open dataset for driving behavior research." *Transportation Research Part C: Emerging Technologies* 134 (2022): 103490.
191 | 
192 | [7] Ettinger, Scott, Shuyang Cheng, Benjamin Caine, Chenxi Liu, Hang Zhao, Sabeek Pradhan, Yuning Chai et al. "Large scale interactive motion forecasting for autonomous driving: The waymo open motion dataset." In *Proceedings of the IEEE/CVF International Conference on Computer Vision*, pp. 9710-9719. 2021.
193 | 
194 | [8] Wilson, Benjamin, William Qi, Tanmay Agarwal, John Lambert, Jagjeet Singh, Siddhesh Khandelwal, Bowen Pan et al. "Argoverse 2: Next generation datasets for self-driving perception and forecasting." *arXiv preprint arXiv:2301.00493* (2023).
195 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CATS-Lab/Filed-Experiment-Data-ULTra-AV/90b984065740a2487519668076dc0ad951d54b7d/requirements.txt


--------------------------------------------------------------------------------