├── README.md
├── data_prepare.py
├── evaluate&predict.py
└── train.py


/README.md:
--------------------------------------------------------------------------------
1 | # Prediction-Based-GNSS-Spoofing-Attack-Detection-for-Autonomous-Vehicle
2 | Experimental realization for Prediction-Based GNSS Spoofing Attack Detection for Autonomous Vehicle in python
3 | more infomation see https://blog.csdn.net/weixin_44546393/article/details/114264497
4 | 


--------------------------------------------------------------------------------
/data_prepare.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import pandas as pd
  4 | import pyproj
  5 | from scipy.interpolate import make_interp_spline
  6 | import math
  7 | import matplotlib.pyplot as plt
  8 | # a = 6378137
  9 | # b = 6356752.3142
 10 | # esq = 6.69437999014 * 0.001
 11 | # e1sq = 6.73949674228 * 0.001
 12 | # def ecef2geodetic(ecef, radians=False):
 13 | #   """
 14 | #   Convert ECEF coordinates to geodetic using ferrari's method
 15 | #   """
 16 | #   # Save shape and export column
 17 | #   ecef = np.atleast_1d(ecef)
 18 | #   input_shape = ecef.shape
 19 | #   ecef = np.atleast_2d(ecef)
 20 | #   x, y, z = ecef[:, 0], ecef[:, 1], ecef[:, 2]
 21 | #
 22 | #   ratio = 1.0 if radians else (180.0 / np.pi)
 23 | #
 24 | #   # Conver from ECEF to geodetic using Ferrari's methods
 25 | #   # https://en.wikipedia.org/wiki/Geographic_coordinate_conversion#Ferrari.27s_solution
 26 | #   r = np.sqrt(x * x + y * y)
 27 | #   Esq = a * a - b * b
 28 | #   F = 54 * b * b * z * z
 29 | #   G = r * r + (1 - esq) * z * z - esq * Esq
 30 | #   C = (esq * esq * F * r * r) / (pow(G, 3))
 31 | #   S = np.cbrt(1 + C + np.sqrt(C * C + 2 * C))
 32 | #   P = F / (3 * pow((S + 1 / S + 1), 2) * G * G)
 33 | #   Q = np.sqrt(1 + 2 * esq * esq * P)
 34 | #   r_0 =  -(P * esq * r) / (1 + Q) + np.sqrt(0.5 * a * a*(1 + 1.0 / Q) - \
 35 | #         P * (1 - esq) * z * z / (Q * (1 + Q)) - 0.5 * P * r * r)
 36 | #   U = np.sqrt(pow((r - esq * r_0), 2) + z * z)
 37 | #   V = np.sqrt(pow((r - esq * r_0), 2) + (1 - esq) * z * z)
 38 | #   Z_0 = b * b * z / (a * V)
 39 | #   h = U * (1 - b * b / (a * V))
 40 | #   lat = ratio*np.arctan((z + e1sq * Z_0) / r)
 41 | #   lon = ratio*np.arctan2(y, x)
 42 | #
 43 | #   # stack the new columns and return to the original shape
 44 | #   geodetic = np.column_stack((lat, lon, h))
 45 | #   return geodetic.reshape(input_shape)
 46 | # 在字符串指定位置的插入字符
 47 | def str_insert(str_origin, pos, str_add):
 48 |     str_list = list(str_origin)    # 字符串转list
 49 |     str_list.insert(pos, str_add)  # 在指定位置插入字符串
 50 |     str_out = ''.join(str_list)    # 空字符连接
 51 |     return  str_out
 52 | 
 53 | # 数组去重
 54 | def unique(old_list):
 55 |     newList = []
 56 |     # 判断相邻时间是否相等
 57 |     if np.any(old_list[1:] == old_list[:-1]):
 58 |         for x in old_list:
 59 |             if x in newList:
 60 |                 # 若相等，则加上一个微小的数使其不等
 61 |                 x = x + 0.005
 62 |             newList.append(x)
 63 |         return np.array(newList)
 64 |     else: return old_list
 65 | 
 66 | def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
 67 |     n_vars = 1 if type(data) is list else data.shape[1]
 68 |     df = pd.DataFrame(data)
 69 |     column_names = ['lats', 'lons', 'CAN_speeds', 'steering_angles', 'acceleration_forward']
 70 |     cols, names = list(), list()
 71 |     # input sequence (t-n, ... t-1)
 72 |     for i in range(n_in, 0, -1):
 73 |         cols.append(df.shift(i))
 74 |         names += [('%s(t-%d)' % (j, i)) for j in column_names]
 75 |     # forecast sequence (t, t+1, ... t+n)
 76 |     for i in range(0, n_out):
 77 |         cols.append(df.shift(-i))
 78 |         if i == 0:
 79 |             names += [('%s(t)' % (j)) for j in column_names]
 80 |         else:
 81 |             names += [('%s(t+%d)' % (j, i)) for j in column_names]
 82 |     # put it all together
 83 |     agg = pd.concat(cols, axis=1)
 84 |     agg.columns = names
 85 |     # drop rows with NaN values
 86 |     if dropnan:
 87 |         agg.dropna(inplace=True)
 88 |     return agg
 89 | 
 90 | EARTH_REDIUS = 6378.137
 91 | 
 92 | def rad(d):
 93 |     return d * math.pi / 180.0
 94 | def getDistance(lat1, lng1, lat2, lng2):
 95 |     # 对数组取元素做运算
 96 |     res = []
 97 |     for i in range(len(lat1)):
 98 |         radLat1 = rad(lat1[i])
 99 |         radLat2 = rad(lat2[i])
100 |         a = radLat1 - radLat2
101 |         b = rad(lng1[i]) - rad(lng2[i])
102 |         s = 2 * math.asin(math.sqrt(math.pow(math.sin(a / 2), 2) + math.cos(radLat1) * math.cos(radLat2) * math.pow(
103 |             math.sin(b / 2), 2)))
104 |         s = s * EARTH_REDIUS * 1000
105 |         res.append(s)
106 |     return res
107 | 
108 | if __name__ == '__main__':
109 | 
110 |     # 用于GNSS坐标转化
111 |     position_transformer = pyproj.Transformer.from_crs(
112 |                 {"proj": 'geocent', "ellps": 'WGS84', "datum": 'WGS84'},
113 |                 {"proj": 'latlong', "ellps": 'WGS84', "datum": 'WGS84'},
114 |             )
115 |     dataset_directory = 'D:\comma2k19'
116 |     chunk_set = []
117 |     for chunk in os.listdir(dataset_directory):
118 |         # 忽略生成的csv文件
119 |         if ".csv" in chunk:
120 |             continue
121 |         # 如果序号为单个时在前补零，以便后面排序
122 |         if len(chunk) == 7:
123 |             used_name = chunk
124 |             chunk = str_insert(chunk,6,'0')
125 |             os.rename(os.path.join(dataset_directory, used_name), os.path.join(dataset_directory, chunk))
126 |         chunk_set.append(os.path.join(dataset_directory, chunk))
127 |     # 将序号小的片段放在前面
128 |     chunk_set.sort()
129 |     # 选一个chunk来训练（200分钟）
130 |     chunk_index = 0
131 |     route_set = []
132 |     for route_id in os.listdir(chunk_set[chunk_index]):
133 |         # 忽略生成的csv文件
134 |         if ".csv" in route_id:
135 |             continue
136 |         route_set.append(os.path.join(chunk_set[chunk_index], route_id))
137 |     segment_set = []
138 |     # 选一个路段训练
139 |     route_index = 9
140 |     for segment in os.listdir(route_set[route_index]):
141 |         # 如果序号为单个时在前补零，以便后面排序
142 |         if len(segment) == 1:
143 |             used_name = segment
144 |             segment = '0'+segment
145 |             os.rename(os.path.join(route_set[route_index], used_name),os.path.join(route_set[route_index], segment))
146 |         segment_set.append(os.path.join(route_set[route_index], segment))
147 |     # 将序号小的片段放在前面
148 |     segment_set.sort()
149 |     times = []
150 |     lons = []
151 |     lats = []
152 |     orientations = []
153 |     CAN_speeds = []
154 |     steering_angles = []
155 |     acceleration_forward = []
156 |     for main_dir in segment_set:
157 |         # 导入GNSS的时间和位置(pose)并将位置转化为经纬度
158 |         temp_GNSS_time = np.load(main_dir + '\\global_pose\\frame_times')
159 |         times = np.append(times, temp_GNSS_time)
160 |         # 打印每一段的长度
161 |         print(len(temp_GNSS_time))
162 |         positions = np.load(main_dir + '\\global_pose\\frame_positions')
163 |         positions = position_transformer.transform(positions[:, 0], positions[:, 1], positions[:, 2], radians=False)
164 |         lats = np.append(lats, positions[1])
165 |         lons = np.append(lons, positions[0])
166 |         # Conver from ECEF to geodetic using Ferrari's methods
167 |         # positions = ecef2geodetic(positions)
168 |         # lats = np.append(lats, positions[:, 0])
169 |         # lons = np.append(lons, positions[:, 1])
170 |         # 暂时不用orientation
171 |         # orientation = np.load(main_dir + '\\global_pose\\frame_orientations')
172 |         # orientations = np.append(orientations, np.load(main_dir + '\\global_pose\\frame_orientations'))
173 |         temp_CAN_times = np.load(main_dir + '\\processed_log\\CAN\\speed\\t')
174 |         # 确保时间无重复值
175 |         temp_CAN_speed_times = unique(temp_CAN_times)
176 |         # 对CAN数据按照GNSS参考时间插值
177 |         temp_CAN_speeds = make_interp_spline(temp_CAN_speed_times, np.load(main_dir + '\\processed_log\\CAN\\speed\\value'))(temp_GNSS_time).flatten()
178 |         CAN_speeds = np.append(CAN_speeds, temp_CAN_speeds)
179 |         # CAN_angles_times和CAN_speed_times有时不一致
180 |         temp_CAN_angles_times = np.load(main_dir + '\\processed_log\\CAN\\steering_angle\\t')
181 |         temp_steering_angles = np.load(main_dir + '\\processed_log\\CAN\\steering_angle\\value')
182 |         temp_CAN_angles_times = unique(temp_CAN_angles_times)
183 |         temp_steering_angles = make_interp_spline(temp_CAN_angles_times, temp_steering_angles)(temp_GNSS_time)
184 |         steering_angles = np.append(steering_angles, temp_steering_angles)
185 |         # 对IMU数据按照GNSS参考时间插值
186 |         temp_IMU_times = np.load(main_dir + '\\processed_log\\IMU\\accelerometer\\t')
187 |         temp_acceleration_forward = make_interp_spline(temp_IMU_times, np.load(main_dir +
188 |                                 '\\processed_log\\IMU\\accelerometer\\value')[:, 0])(temp_GNSS_time)
189 |         acceleration_forward = np.append(acceleration_forward, temp_acceleration_forward)
190 | 
191 |     DataSet = list(zip(times, lats, lons, CAN_speeds, steering_angles, acceleration_forward))
192 |     column_names = ['times', 'lats', 'lons', 'CAN_speeds', 'steering_angles', 'acceleration_forward']
193 |     df = pd.DataFrame(data=DataSet, columns=column_names)
194 |     times = df['times'].values
195 |     df = df.set_index(['times'], drop=True)
196 |     values = df.values.astype('float64')
197 |     # 转为监督学习问题
198 |     reframed = series_to_supervised(values, 1, 1)
199 |     # 计算距离
200 |     lons_t = reframed['lons(t)'].values
201 |     lats_t = reframed['lats(t)'].values
202 |     distance = np.array(getDistance(lats[:-1], lons[:-1], lats_t, lons_t))
203 |     # drop columns we don't want to predict including（CAN_speed,steering_angel, acceleration_forward)
204 |     reframed.drop(reframed.columns[[0, 1, 5, 6, 7, 8, 9]], axis=1, inplace=True)
205 |     # 时间和计算的距离添加到数据集
206 |     reframed['distance'] = distance
207 |     reframed['times'] = times[: -1]
208 |     # for i in distance:
209 |     #     if i > 100:
210 |     #         print(i)
211 |     plt.plot(times[:-1], distance)
212 |     plt.xlabel('Boot time (s)', fontsize=18)
213 |     plt.ylabel('Distance travelled during single timestamp (m) ', fontsize=12)
214 |     plt.show()
215 |     # 将合并的数据集保存到.csv文件中
216 |     reframed.to_csv(route_set[route_index]+".csv", index=False, sep=',')
217 | 
218 | 
219 | 


--------------------------------------------------------------------------------
/evaluate&predict.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import math
 3 | from sklearn.metrics import mean_squared_error
 4 | import matplotlib.pyplot as plt
 5 | import tensorflow as tf
 6 | from sklearn.preprocessing import MinMaxScaler
 7 | gpus = tf.config.experimental.list_physical_devices('GPU')
 8 | for gpu in gpus:
 9 |     tf.config.experimental.set_memory_growth(gpu, True)
10 | def average(seq, total=0.0):
11 |   num = 0
12 |   for item in seq:
13 |     total += item
14 |     num += 1
15 |   return total / num
16 | 
17 | if __name__ == '__main__':
18 | 
19 |     CSV_FILE_PATH = 'D:\\comma2k19\\Chunk_03\\99c94dc769b5d96e_2018-05-01--08-13-53.csv'
20 |     df = pd.read_csv(CSV_FILE_PATH)
21 |     values = df.to_numpy()
22 |     times = values[:, -1]
23 |     distance = values[:, -2]
24 |     model = tf.keras.models.load_model('lstm.model')
25 |     test_X = values[:, :3]
26 |     # 因为训练的时候输入特征是归一化的，所以预测的时候也要将输入特征归一化
27 |     scaler = MinMaxScaler(feature_range=(0, 1))
28 |     test_X = scaler.fit_transform(test_X)
29 |     test_X = test_X.reshape((test_X.shape[0], 1, test_X.shape[1]))
30 |     # train_len = (int)(0.75 * len(values[:, 0]))
31 |     # train = values[:train_len, :]
32 |     # test = values[train_len:, :]
33 |     test_y = distance
34 |     yhat = model.predict(test_X)[:, 0]
35 |     rmse = math.sqrt(mean_squared_error(yhat, test_y))
36 |     print('Test RMSE: %.3f' % rmse)
37 |     scores = model.evaluate(test_X, test_y)
38 |     rmse = math.sqrt(mean_squared_error(yhat, test_y))
39 |     plt.plot(times, yhat, label='prediction')
40 |     plt.plot(times, distance, label="ground_truth")
41 |     plt.title('Comparison between truth and prediction', fontsize=18)
42 |     plt.xlabel('Boot time (s)', fontsize=18)
43 |     plt.ylabel('Distance travelled during single timestamp (m) ', fontsize=12)
44 |     plt.legend()
45 |     plt.show()
46 |     min = min((distance - yhat), key=abs)
47 |     max = max((distance - yhat), key=abs)
48 |     avr = average(distance-yhat)
49 |     print('Min:%f' % min)
50 |     print('Max:%f' % max)
51 |     print('average:%f' % avr)
52 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import math
 3 | import numpy as np
 4 | from sklearn.preprocessing import MinMaxScaler
 5 | from keras.models import Sequential
 6 | from keras.layers import Dense
 7 | from keras.layers import LSTM
 8 | from sklearn.metrics import mean_squared_error
 9 | import matplotlib.pyplot as plt
10 | from keras import optimizers
11 | import tensorflow as tf
12 | gpus = tf.config.experimental.list_physical_devices('GPU')
13 | for gpu in gpus:
14 |     tf.config.experimental.set_memory_growth(gpu, True)
15 | 
16 | 
17 | 
18 | if __name__ == '__main__':
19 | 
20 |     train_CSV_FILE_PATH = 'D:\\comma2k19\\Chunk_01\\b0c9d2329ad1606b_2018-08-02--08-34-47.csv'
21 |     test_CSV_FILE_PATH = 'D:\\comma2k19\\Chunk_01\\b0c9d2329ad1606b_2018-08-01--21-13-49.csv'
22 |     train_df = pd.read_csv(train_CSV_FILE_PATH)
23 |     test_df = pd.read_csv(test_CSV_FILE_PATH)
24 |     train_values = train_df.to_numpy()
25 |     train_times = train_values[:, -1]
26 |     train_distance = train_values[:, -2]
27 |     test_values = test_df.to_numpy()
28 |     test_times = test_values[:, -1]
29 |     test_distance = test_values[:, -2]
30 |     # 将输入特征归一化
31 |     scaler = MinMaxScaler(feature_range=(0, 1))
32 |     train_X, train_y = scaler.fit_transform(train_values[:, :-2]), train_distance
33 |     test_X, test_y = scaler.fit_transform(test_values[:, :-2]), test_distance
34 |     # # 将四分之三作为训练集
35 |     # train_len = len(times)
36 |     # train = values[:train_len, :]
37 |     # test = values[train_len:, :]
38 |     # 划分输入（CAN_speed,steering_angel, acceleration_forward）输出（distance)
39 |     # train_X, train_y = train, distance[:train_len]
40 |     # test_X, test_y = test, distance[train_len:]
41 |     # 将输入（X）改造为LSTM的输入格式，即[samples, timesteps, features]
42 |     train_X = train_X.reshape((train_X.shape[0], 1, train_X.shape[1]))
43 |     test_X = test_X.reshape((test_X.shape[0], 1, test_X.shape[1]))
44 | 
45 |     # 设计网络
46 |     model = Sequential()
47 |     model.add(LSTM(50, input_shape=(train_X.shape[1], train_X.shape[2])))
48 |     model.add(Dense(1))
49 |     # 设置学习率等参数
50 |     # adam = optimizers.Adam(lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
51 |     model.compile(loss='mae', optimizer='adam')
52 |     # fit network
53 |     history = model.fit(train_X, train_y, epochs=100, batch_size=50, validation_data=(test_X, test_y), verbose=2,
54 |                         shuffle=False)
55 |     model.save('lstm.model')
56 |     # full_X = values[:, :3]
57 |     # full_X = full_X.reshape((full_X.shape[0], 1, full_X.shape[1]))
58 |     train_yhat = model.predict(train_X)[:, 0]
59 |     test_yhat = model.predict(test_X)[:, 0]
60 |     rmse = math.sqrt(mean_squared_error(test_yhat, test_y))
61 |     print('Test RMSE: %.3f' % rmse)
62 |     # plot history
63 |     plt.plot(history.history['loss'], label='train')
64 |     plt.plot(history.history['val_loss'], label='test')
65 |     # plt.plot(times, yhat, label='prediction')
66 |     # plt.plot(times, distance, label="ground_truth")
67 |     # plt.title('Comparison between truth and prediction', fontsize=18)
68 |     # plt.xlabel('Boot time (s)', fontsize=18)
69 |     # plt.ylabel('Distance travelled during single timestamp (m) ', fontsize=12)
70 |     plt.legend()
71 |     plt.show()


--------------------------------------------------------------------------------