├── Vehicles'_change_trajectory_process.py ├── Vehicles'_start&braking_trajectory_process.py ├── Weight-based Adaptive Data Stream Clustering Aglorithm.py └── lankershim_smoothing.py /Vehicles'_change_trajectory_process.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | datain=pd.read_csv('./lankershim_data_rectify_20220322.csv') 5 | data=datain[['Vehicle_ID','Frame_ID','v_Vel','v_Acc','Space_Headway','Time_Headway','Lane_ID']] 6 | data=data.sort_values(by=["Vehicle_ID","Frame_ID"],ascending=True) 7 | # data=data.drop_duplicates(subset=['Global_Time'], keep='first', inplace=False) 8 | Vehicle_id=data['Vehicle_ID'].unique() 9 | data_visiable=data 10 | data_visiable['Lane change']=data_visiable['Lane_ID'].shift(1) 11 | # pd.set_option('display.max_columns', None) 12 | data_visiable['Lane change']=data_visiable['Lane change']-data_visiable['Lane_ID']#错位相减 13 | data_s = data_visiable.fillna(0)# 14 | print(data_s)#遍历增加各车辆的变道信息 15 | change_df = pd.DataFrame(columns=['ID','start','end']) 16 | c=datain.columns 17 | select_data=pd.DataFrame(columns=c) 18 | select_data['time']=[] 19 | print(select_data) 20 | for id in Vehicle_id: 21 | print(id) 22 | data_vehicle=data_s[data_s['Vehicle_ID']==id] 23 | time_change=data_vehicle[data_vehicle['Lane change']!=0] 24 | array=time_change['Frame_ID'].unique() 25 | array_time=data_vehicle['Frame_ID'].unique() 26 | for i in array: 27 | a=i-50 28 | b=i+50 29 | if np.any(array_time == a): 30 | a = a 31 | else: 32 | a =np.min(array_time) 33 | if np.any(array_time == b): 34 | b = b 35 | else: 36 | b =np.max(array_time) 37 | change_df= change_df.append({'ID':id,'start':a,'end':b},ignore_index=True) 38 | 39 | change_id=change_df['ID'].unique() 40 | for id in change_id: 41 | change_vehicle_data=datain[datain['Vehicle_ID']==id] 42 | num=change_df[change_df['ID']==id] 43 | print(id) 44 | for p in range(0, len(num)): 45 | s=num.iloc[p]['start'] 46 | e=num.iloc[p]['end'] 47 | data_select=change_vehicle_data[(change_vehicle_data['Frame_ID']>=s) & (change_vehicle_data['Frame_ID']<=e)].copy() 48 | data_select['time'] = s 49 | select_data=select_data.append(data_select) 50 | 51 | select_data.to_csv('./vehicle_change_trajectory_data20220322.csv') -------------------------------------------------------------------------------- /Vehicles'_start&braking_trajectory_process.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | datain=pd.read_csv('./lankershim_data_rectify_20220322.csv') 5 | data=datain[['Vehicle_ID','Frame_ID','v_Vel','v_Acc','Space_Headway','Time_Headway']] 6 | data=data.sort_values(by=["Vehicle_ID","Frame_ID"],ascending=True) 7 | # data=data.drop_duplicates(subset=['Global_Time'], keep='first', inplace=False) 8 | Vehicle_id=data['Vehicle_ID'].unique() 9 | data_visiable=data 10 | data_visiable['brake']=data_visiable['v_Vel'].shift(1) 11 | data_visiable['start']=data_visiable['v_Vel'].shift(-1) 12 | # pd.set_option('display.max_columns', None) 13 | data_visiable['start']=data_visiable['start']-data_visiable['v_Vel'] 14 | data_visiable['brake']=data_visiable['brake']-data_visiable['v_Vel']#错位相减 15 | data_s = data_visiable.fillna(0)# 16 | print(data_s)#遍历增加各车辆的启动和制动信息 17 | 18 | change_df_start = pd.DataFrame(columns=['ID','start','end']) 19 | change_df_brake = pd.DataFrame(columns=['ID','start','end']) 20 | 21 | c=datain.columns #['Vehicle_ID','Frame_ID','v_Vel','v_Acc','Space_Headway','Time_Headway'] 22 | select_data_start=pd.DataFrame(columns=c) 23 | select_data_brake=pd.DataFrame(columns=c) 24 | select_data_start['time']=[] 25 | select_data_brake['time']=[] 26 | 27 | # print(select_data) 28 | 29 | for id in Vehicle_id: 30 | print(id) 31 | data_vehicle=data_s[data_s['Vehicle_ID']==id] 32 | time_change_start=data_vehicle[(data_vehicle['v_Vel']==0) & (data_vehicle['start']>0)] 33 | time_change_brake=data_vehicle[(data_vehicle['v_Vel'] == 0) & (data_vehicle['brake']>0)] 34 | 35 | array_start=time_change_start['Frame_ID'].unique() 36 | array_brake=time_change_brake['Frame_ID'].unique() 37 | 38 | array_time=data_vehicle['Frame_ID'].unique() 39 | 40 | for i in array_start: 41 | a=i+37 42 | b=i 43 | if np.any(array_time == a): 44 | a = a 45 | else: 46 | a =np.max(array_time) 47 | 48 | change_df_start= change_df_start.append({'ID':id,'start':b,'end':a},ignore_index=True) 49 | for i in array_brake: 50 | a = i - 37 51 | b = i 52 | if np.any(array_time == a): 53 | a = a 54 | else: 55 | a = np.min(array_time) 56 | 57 | change_df_brake = change_df_brake.append({'ID': id, 'start': a, 'end': b}, ignore_index=True) 58 | 59 | 60 | start_id=change_df_start['ID'].unique() 61 | for id in start_id: 62 | start_vehicle_data=datain[datain['Vehicle_ID']==id] 63 | num=change_df_start[change_df_start['ID']==id] 64 | print(id) 65 | for p in range(0, len(num)): 66 | s=num.iloc[p]['start'] 67 | e=num.iloc[p]['end'] 68 | data_select=start_vehicle_data[(start_vehicle_data['Frame_ID']>=s) & (start_vehicle_data['Frame_ID']<=e)].copy() 69 | data_select['time'] = s 70 | select_data_start=select_data_start.append(data_select) 71 | 72 | brake_id=change_df_start['ID'].unique() 73 | for id in brake_id: 74 | brake_vehicle_data=datain[datain['Vehicle_ID']==id] 75 | num=change_df_brake[change_df_brake['ID']==id] 76 | print(id) 77 | for p in range(0, len(num)): 78 | s=num.iloc[p]['start'] 79 | e=num.iloc[p]['end'] 80 | data_select=brake_vehicle_data[(brake_vehicle_data['Frame_ID']>=s) & (brake_vehicle_data['Frame_ID']<=e)].copy() 81 | data_select['time'] = s 82 | select_data_brake=select_data_brake.append(data_select) 83 | 84 | 85 | 86 | select_data_start.to_csv('./select_data_start.csv') 87 | select_data_brake.to_csv('./select_data_brake.csv') 88 | -------------------------------------------------------------------------------- /Weight-based Adaptive Data Stream Clustering Aglorithm.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | def updata_centroid_k(X): 4 | V=X[:, 0][:, np.newaxis] 5 | A=X[:, 1][:, np.newaxis] 6 | # S=X[:, 2][:, np.newaxis] 7 | S=X[:, 2] 8 | S=S[np.where(S < 50)] 9 | S=S[:, np.newaxis] 10 | k_predict_data=[V,A,S] 11 | k=0 12 | k_array=[] 13 | for data in k_predict_data: 14 | 15 | model = KernelDensity(bandwidth=3, kernel='gaussian') 16 | model.fit(data) 17 | x_range = np.linspace(data.min() , data.max(), 500 ) 18 | x_log_prob = model.score_samples(x_range[:, np.newaxis]) 19 | x_prob = np.exp(x_log_prob) 20 | greater=argrelextrema(x_prob, np.greater) 21 | l=len(greater[0]) 22 | k_array=np.append(k_array,l) 23 | k_min=np.min(k_array) 24 | k_max=np.max(k_array) 25 | a=int(k_min)+1 26 | b=int(k_max+k_min)+2 27 | F=0 28 | for n in range(2,b,1): 29 | # print(n) 30 | kmeans = KMeans(n_clusters=n,random_state=0).fit(X) 31 | a=len(np.unique(kmeans.labels_)) 32 | if a==1 : 33 | k=2 34 | else: 35 | sc= silhouette_score(X,kmeans.labels_) 36 | 37 | if sc > F: 38 | F=sc 39 | k=n 40 | print("k is:",k) 41 | return k 42 | 43 | def set_weight(mu,n): 44 | time_array=np.arange(1, n+1, 1) 45 | weight=[] 46 | for t in time_array: 47 | t=float(t) 48 | # print(mu) 49 | w=1-2**(-mu*t) 50 | 51 | weight=np.append(weight,w) 52 | return weight 53 | 54 | def find_clusters(C_array,X): 55 | 56 | centroid_label = pairwise_distances_argmin_min(X,C_array) 57 | centroid_label = centroid_label[0] 58 | return centroid_label 59 | 60 | def factor_generator(x_prior,x_post): 61 | js_score=[] 62 | for i in range(0,3): 63 | x_prior_=x_prior[:,i] 64 | x_prior_=x_prior_[:,np.newaxis] 65 | x_post_=x_post[:,i] 66 | x_post_=x_post_[:,np.newaxis] 67 | js=caculate_jensenshannon(x_prior_,x_post_) 68 | js_score=np.append(js_score,js) 69 | js_score=np.max(js_score) 70 | return js_score 71 | 72 | def caculate_kl(x_prior,x_post): 73 | 74 | model_prior = KernelDensity(bandwidth=1, kernel='gaussian') 75 | model_prior.fit(x_prior) 76 | model_post = KernelDensity(bandwidth=1, kernel='gaussian') 77 | model_post.fit(x_post) 78 | min_ = min(x_prior.min(), x_post.min()) 79 | max_ = max(x_prior.max(), x_post.max()) 80 | x_range= np.linspace(start=min_, stop=max_, num=500) 81 | 82 | y_log_prior = model_prior.score_samples(x_range[:, np.newaxis]) 83 | y_prior= np.exp(y_log_prior) 84 | y_log_post = model_post.score_samples(x_range[:, np.newaxis]) 85 | y_post = np.exp(y_log_post) 86 | kl = scipy.stats.entropy(y_prior, y_post) 87 | return kl 88 | 89 | def caculate_jensenshannon(x_prior,x_post): 90 | model_prior = KernelDensity(bandwidth=1, kernel='gaussian') 91 | model_prior.fit(x_prior) 92 | model_post = KernelDensity(bandwidth=1, kernel='gaussian') 93 | model_post.fit(x_post) 94 | min_ = min(x_prior.min(), x_post.min()) 95 | max_ = max(x_prior.max(), x_post.max()) 96 | x_range= np.linspace(start=min_, stop=max_, num=500) 97 | 98 | y_log_prior = model_prior.score_samples(x_range[:, np.newaxis]) 99 | y_prior= np.exp(y_log_prior) 100 | y_log_post = model_post.score_samples(x_range[:, np.newaxis]) 101 | y_post = np.exp(y_log_post) 102 | js = jensenshannon(y_prior, y_post) 103 | return js 104 | 105 | def Initialize_the_centroid(Bigdata): 106 | X_1=Bigdata[0:n] 107 | kernel= updata_centroid_k(X_1) 108 | kmeans = KMeans(n_clusters=kernel, random_state=0).fit(X_1) 109 | C=np.insert(X_1,0,kmeans.labels_,axis=1) 110 | label=kmeans.labels_ 111 | C_array=[] 112 | id=np.unique(C[:,0]) 113 | 114 | for i in id: 115 | c_average= C[np.where(C[:, 0] == i)] 116 | c_average=cal_Cmass(c_average) 117 | print(type(c_average)) 118 | C_array=np.append(C_array,c_average[1:4]) 119 | # print("array:",C_array) 120 | C_array=C_array.reshape(kernel,3) 121 | return C_array,label 122 | 123 | 124 | 125 | Bigdata=pd.read_csv('./brake_data_events.csv') 126 | Bigdata=Bigdata.sort_values(by=["time"],ascending=True) 127 | Bigdata=Bigdata[['v_Vel','v_Acc','Time_Headway']] 128 | Bigdata_size=len(Bigdata) 129 | n=500 130 | ID=0 131 | Threshold=0.08 132 | count=0 133 | mu=1 134 | 135 | for i in range (0,Bigdata_size-n):#遍历所有数据 136 | 137 | count+=1 138 | data_prior=Bigdata[ID:ID+n] 139 | data_current=Bigdata[i:i+n]#获取当前数据和滑动窗口数据 140 | 141 | data_point=Bigdata[i+n,:]#获取流进来的数据点 142 | data_point=data_point.reshape(1,3) 143 | # print(data_prior) 144 | # print(type(data_point)) 145 | js_score=0 146 | if count%200==0: 147 | js_score=factor_generator_kl(data_prior,data_current) 148 | 149 | if js_score >Threshold: 150 | mu=1 151 | count=1 152 | ID=i 153 | print(ID) 154 | print("run") 155 | k=updata_centroid_k(data_current) 156 | C_array=update_centroid_position(data_current,k) 157 | centroid_label=find_clusters(C_array,data_point) 158 | label_add=np.append(label_add,centroid_label) 159 | 160 | else: 161 | 162 | if count>n: 163 | mu=1 164 | count=1 165 | ID=i 166 | print(ID) 167 | # print(data_current) 168 | k=updata_centroid_k(data_current) 169 | C_array=update_centroid_position(data_current,k) 170 | centroid_label=find_clusters(C_array,data_point) 171 | label_add=np.append(label_add,centroid_label) 172 | 173 | else: 174 | centroid_label=find_clusters(C_array,data_point) 175 | label_add=np.append(label_add,centroid_label) 176 | -------------------------------------------------------------------------------- /lankershim_smoothing.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # 4 | 5 | """ 6 | This code 7 | 1. smoothes out the noise in the local x and local y values in the Lankershim Dataset 8 | 2. recomputes the velocites and accelerations 9 | 3. saves smoothed dataset to three separate csv files 10 | """ 11 | 12 | from scipy import signal 13 | import pandas as pd 14 | import numpy as np 15 | import numexpr 16 | 17 | 18 | def get_file_name(index, file_name): 19 | if index == 0: 20 | return file_name[0].split('.', 1)[0] 21 | elif index == 1: 22 | return file_name[1].split('.', 1)[0] 23 | else: 24 | return file_name[2].split('.', 1)[0] 25 | 26 | 27 | def get_smoothed_x_y_vel_accel(dataset, window): 28 | """ 29 | this function returns four numpy arrays representing the smoothed 30 | 1) local x, 2) local y, 3) velocity, 4) acceleration for a given numpy dataset. 31 | It relies on two helper functions get_smoothed_x_y and get_smoothed_vel_accel 32 | :param dataset: numpy array representing the dataset to smooth it's local X , Y, velocity, acceleration 33 | The numpy array should contains info for a single vehicle ID 34 | otherwise result smoothed values are incorrect 35 | :param window: a smoothing window must be an odd integer value 36 | if it set to 11 this means points are smoothed with 1 second interval equivalent to 10 points 37 | if it set to 21 this means points are smoothed with 2 second interval equivalent to 20 points 38 | """ 39 | smoothed_x_values, smoothed_y_values = get_smoothed_x_y(dataset, window) 40 | 41 | initial_vel = dataset[0, 11] 42 | initial_accel = dataset[0, 12] 43 | 44 | time_values = dataset[:, time_column] 45 | smoothed_vel, smoothed_accel = get_smoothed_vel_accel(smoothed_x_values, smoothed_y_values, 46 | time_values, initial_vel, initial_accel) 47 | return smoothed_x_values, smoothed_y_values, smoothed_vel, smoothed_accel 48 | 49 | 50 | def get_smoothed_x_y(dataset, window): 51 | """ 52 | this function computes the smoothed local x and local y using savgol_filter for a given numpy dataset 53 | and returns two numpy arrays containing the smoothed x and y values. 54 | :param dataset: numpy array representing the dataset to smooth it's local X , Y, velocity, acceleration 55 | The numpy array should contains info for a single vehicle ID 56 | otherwise result smoothed values are incorrect 57 | :param window: a smoothing window must be an odd integer value 58 | if it set to 11 this means points are smoothed with 1 second interval equivalent to 10 points 59 | if it set to 21 this means points are smoothed with 2 second interval equivalent to 20 points 60 | """ 61 | smoothed_x_values = signal.savgol_filter(dataset[:, local_x], window, 3) 62 | smoothed_y_values = signal.savgol_filter(dataset[:, local_y], window, 3) 63 | 64 | return smoothed_x_values, smoothed_y_values 65 | 66 | 67 | def get_smoothed_vel_accel(smoothed_x_values, smoothed_y_values, time_values, initial_vel, initial_accel): 68 | """ 69 | This function recomputes the velocity and acceleration for a given array of smoothed x, y values, time value 70 | To speedup calculation we use matrix functions to compute the values. For example, to compute velocity , 71 | the x and y values are stacked to form matrix A. Then matrix B is then formed from Matrix A, but skipping t 72 | he first row. This implies that the x, y in first row in matrix B, are the next values of x and y in 73 | first row of matrix A. With two matrixes containing the current x, y and next x, y values we use fast matrix 74 | expressions to compute the smoothed velocities 75 | 76 | The function returns two numpy arrays representing the smoothed velocity and acceleration; 77 | 78 | :param smoothed_x_values: a numpy array of smoothed x values 79 | :param smoothed_y_values: a numpy array of smoothed y values 80 | :param time_values: a numpy array of smoothed time values values for the given x and y 81 | :param initial_vel: a single number containing the initial velocity 82 | :param initial_accel: a single number containing the initial acceleration 83 | """ 84 | #create matrix of A containing current x and y and matrix B containing next x and y values 85 | x_y_matrix_A = np.column_stack((smoothed_x_values, smoothed_y_values)) 86 | x_y_matrix_B = x_y_matrix_A [1:, :] 87 | #remove last row as it has no next values 88 | x_y_matrix_A = x_y_matrix_A[0:-1, :] 89 | 90 | # compute distance travelled between current and next x, y values 91 | dist_temp = numexpr.evaluate('sum((x_y_matrix_B - x_y_matrix_A)**2, 1)') 92 | dist = numexpr.evaluate('sqrt(dist_temp)') 93 | 94 | # create matrix A containing current time values, and matrix B containing next time values 95 | t_matrix_A = time_values 96 | t_matrix_B = t_matrix_A [1:] 97 | # remove last row 98 | t_matrix_A = t_matrix_A[0:-1] 99 | 100 | # evaluate smoothed velocity by dividing distance over delta time 101 | vel = numexpr.evaluate('dist * 1000/ (t_matrix_B - t_matrix_A)') 102 | smoothed_velocities = np.insert(vel, 0, initial_vel, axis=0) 103 | 104 | # create matrix A containing current velocities and matrix B containing next velocities 105 | vel_matrix_A = smoothed_velocities 106 | vel_matrix_B = vel_matrix_A [1:] 107 | # remove last row 108 | vel_matrix_A = vel_matrix_A[0:-1] 109 | 110 | # compute smoothed acceleration by dividing the delta velocity over delta time 111 | acc = numexpr.evaluate('(vel_matrix_B - vel_matrix_A) * 1000/ (t_matrix_B - t_matrix_A)') 112 | smoothed_accelaration = np.insert(acc, 0, initial_accel, axis=0) 113 | 114 | return np.array(smoothed_velocities), np.array(smoothed_accelaration) 115 | 116 | 117 | def smooth_dataset(window, train, file_names): 118 | """ 119 | this function loops over a set of train data, and set of unique vehicle ids 120 | and for each vehicle id in each training dataset, it requests from helper methods the smoothed 121 | x, y, vel, accel values and replaces the old values with the smoothed values. Finally the new 122 | smoothed dataset is printed to a file 123 | :param dataset: data frame representing the dataset to smooth it's local X and Y 124 | :param train: a list of 3 numpy arrays containing the original Lankershim data 125 | """ 126 | # find unique vehicle ids in all the datasets, in the previous version 127 | vehicle_ids = [train[0]['Vehicle_ID'].unique(), train[1]['Vehicle_ID'].unique(), train[2]['Vehicle_ID'].unique()] 128 | 129 | # convert to numpy arrays to fascilitate matrix operations to compute velocity and acceleration 130 | numpy_trains = [train[0].to_numpy(), train[1].to_numpy(), train[2].to_numpy()] 131 | 132 | for i in range(3): #in each dataset 133 | numpy_train = numpy_trains[i] 134 | print(f"##### smoothing x, y, vel, accl values in train data {str(i)}") 135 | 136 | # for each unique vehicle id smooth x and y, recompute vel and acel 137 | for vehicle in vehicle_ids[i]: 138 | # create a filter for given vehicle id and use it to create a numpy array containing info only for that vehicle 139 | filter = numpy_train[:,0] == vehicle 140 | numpy_vehicle_dataset = numpy_train[filter,:] 141 | 142 | smoothed_x_values, smoothed_y_values, smoothed_vel,smoothed_accel = \ 143 | get_smoothed_x_y_vel_accel(numpy_vehicle_dataset, window) 144 | 145 | # replace values of x, y, vel, accel, with new smoothed values 146 | numpy_train[filter, local_x] = [x for x in smoothed_x_values] 147 | numpy_train[filter, local_y] = [x for x in smoothed_y_values] 148 | numpy_train[filter, v_vel] = [x for x in smoothed_vel] 149 | numpy_train[filter, v_acc] = [x for x in smoothed_accel] 150 | 151 | # print to file 152 | file_name = get_file_name(i, file_names) 153 | file_path = path_to_smoothed_dataset + file_name + '_smoothed_' + str(window) + '.csv' 154 | with open(file_path, 'w') as f: 155 | np.savetxt(file_path, numpy_trains[i], delimiter=",") 156 | 157 | 158 | def main(): 159 | # smooth window must be an odd value 160 | smoothing_window = 21 161 | print(f"Smoothing window is set to {str(smoothing_window)}") 162 | 163 | # change the file names as needed 164 | global file_names 165 | file_names = ['0750_0805_us101.csv', '0805_0820_us101.csv', '0820_0835_us101.csv'] 166 | 167 | # define the index of columns containing vehicle id, time, local x, local y, velocity and acceleration 168 | # these indexes correspond to the original dataset if not modified 169 | # the indexes help treat the dataset as matrix and perform smoothing using matrix functions 170 | global vehicle_id, time_column, local_x, local_y, v_vel, v_acc 171 | vehicle_id, time_column, local_x, local_y, v_vel, v_acc = 0, 3, 4, 5, 11, 12 172 | 173 | # specify the path to the input Lankershim dataset and the path to the output smoothed dataset 174 | global path_to_dataset, path_to_smoothed_dataset 175 | path_to_dataset = 'C:/.../Lankershim/dataset/' 176 | path_to_smoothed_dataset = 'C:/.../smoothed/' 177 | 178 | 179 | # load the Lankershim data from the CSV files 180 | train1 = pd.read_csv(path_to_dataset + file_names[0], engine='c') 181 | train2 = pd.read_csv(path_to_dataset + file_names[1], engine='c') 182 | train3 = pd.read_csv(path_to_dataset + file_names[2], engine='c') 183 | 184 | train = [train1, train2, train3] 185 | 186 | smooth_dataset(smoothing_window, train, file_names) 187 | 188 | if __name__ == '__main__': 189 | main() 190 | --------------------------------------------------------------------------------