├── README.md ├── collect_data.py ├── dnn_test.py ├── atm2.txt ├── dnn_regression.py ├── ppo_test.py ├── ppo_model.py ├── ppo_train.py ├── missile_3d.py └── OAgent_sim.py /README.md: -------------------------------------------------------------------------------- 1 | # Computational-TSG 2 | 预测-校正学习计算制导律 3 | -------------------------------------------------------------------------------- /collect_data.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.io 3 | from OAgent_sim import OAgentSim 4 | 5 | oAgent = OAgentSim() 6 | dict_launch = {} 7 | 8 | pi = 3.141592653589793 9 | RAD = 180 / pi 10 | 11 | for launch_time in range(int(40)): 12 | oAgent.modify() 13 | print("========", launch_time + 1, "========") 14 | step = [] 15 | done = False 16 | while done is False: 17 | done = oAgent.step(action=0, h=0.02) 18 | v, theta, r, q, x, y, t = oAgent.collect() 19 | step.append([v, theta, r, q, x, y]) 20 | s = np.array(step) 21 | t = theta * RAD * np.ones([s.shape[0], 1]) # 换算为角度 22 | dict_launch['dnn{0}'.format(launch_time)] = np.concatenate([s, t], axis=1) 23 | flight_data = {'flight_data': dict_launch} 24 | scipy.io.savemat('./flight_data.mat', flight_data) 25 | -------------------------------------------------------------------------------- /dnn_test.py: -------------------------------------------------------------------------------- 1 | import scipy.io 2 | import random 3 | import tensorflow.compat.v1 as tf 4 | tf.compat.v1.disable_eager_execution() 5 | from dnn_regression import load_data, init_network 6 | 7 | 8 | class DeepNeuralNetwork: 9 | def __init__(self, x_num=6, path='./dnn_model'): 10 | self.graph = tf.Graph() 11 | with self.graph.as_default(): 12 | self.x, y, trainer, loss = init_network(x_num=x_num) 13 | self.sess = tf.Session(graph=self.graph) 14 | with self.sess.as_default(): 15 | with self.graph.as_default(): 16 | self.y = self.sess.graph.get_tensor_by_name(name='layers/n_outputs/BiasAdd:0') 17 | saver = tf.train.Saver() 18 | checkpoint_path = tf.train.latest_checkpoint(path) 19 | saver.restore(self.sess, checkpoint_path) 20 | 21 | # 转换为tf-lite模型 22 | convert = tf.lite.TFLiteConverter.from_session(self.sess, [self.x], [self.y]) 23 | tflite_model = convert.convert() 24 | with open('dnn_model.tflite', 'wb') as f: 25 | f.write(tflite_model) 26 | 27 | def predict(self, state): 28 | with self.sess.as_default(): 29 | with self.sess.graph.as_default(): 30 | return self.y.eval(feed_dict={self.x: state}) 31 | 32 | 33 | if __name__ == '__main__': 34 | dnn = DeepNeuralNetwork() 35 | x_train, y_train, x_test, y_test, x_vali, y_vali, train_set_size = load_data(shuffle_flag=False) 36 | # p = int(random.uniform(0, train_set_size - 1e6)) 37 | dict_DNN = {'y_hat': dnn.predict(x_test), 'y': y_test} 38 | dataDNN = r'dnn_test.mat' 39 | scipy.io.savemat(dataDNN, dict_DNN) 40 | -------------------------------------------------------------------------------- /atm2.txt: -------------------------------------------------------------------------------- 1 | 0 1.225e+00 3.403e+02 2 | 2000 1.007e+00 3.325e+02 3 | 4000 8.193e-01 3.246e+02 4 | 6000 6.601e-01 3.165e+02 5 | 8000 5.258e-01 3.081e+02 6 | 10000 4.135e-01 2.995e+02 7 | 12000 3.119e-01 2.951e+02 8 | 14000 2.279e-01 2.951e+02 9 | 16000 1.665e-01 2.951e+02 10 | 18000 1.216e-01 2.951e+02 11 | 20000 8.891e-02 2.951e+02 12 | 22000 6.451e-02 2.964e+02 13 | 24000 4.694e-02 2.977e+02 14 | 26000 3.426e-02 2.991e+02 15 | 28000 2.508e-02 3.004e+02 16 | 30000 1.841e-02 3.017e+02 17 | 32000 1.355e-02 3.030e+02 18 | 34000 9.887e-03 3.065e+02 19 | 36000 7.257e-03 3.101e+02 20 | 38000 5.366e-03 3.137e+02 21 | 40000 3.995e-03 3.172e+02 22 | 42000 2.995e-03 3.207e+02 23 | 44000 2.259e-03 3.241e+02 24 | 46000 1.714e-03 3.275e+02 25 | 48000 1.317e-03 3.298e+02 26 | 50000 1.027e-03 3.298e+02 27 | 52000 8.055e-04 3.288e+02 28 | 54000 6.389e-04 3.254e+02 29 | 56000 5.044e-04 3.220e+02 30 | 58000 3.962e-04 3.186e+02 31 | 60000 3.096e-04 3.151e+02 32 | 62000 2.407e-04 3.115e+02 33 | 64000 1.860e-04 3.080e+02 34 | 66000 1.429e-04 3.044e+02 35 | 68000 1.091e-04 3.007e+02 36 | 70000 8.281e-05 2.971e+02 37 | 72000 6.236e-05 2.934e+02 38 | 74000 4.637e-05 2.907e+02 39 | 76000 3.430e-05 2.880e+02 40 | 78000 2.523e-05 2.853e+02 41 | 80000 1.845e-05 2.825e+02 42 | 82000 1.341e-05 2.797e+02 43 | 84000 9.690e-06 2.769e+02 44 | 86000 6.955e-06 2.741e+02 -------------------------------------------------------------------------------- /dnn_regression.py: -------------------------------------------------------------------------------- 1 | import random 2 | import scipy.io 3 | import numpy as np 4 | import tensorflow.compat.v1 as tf 5 | tf.compat.v1.disable_eager_execution() 6 | 7 | 8 | def load_data(collect_time=1000, shuffle_flag=True): 9 | # m = scipy.io.loadmat('./flight_data.mat') 10 | # data_raw = m['flight_data'][0, 0]['dnn0'] 11 | # for collect_i in range(1, collect_time): 12 | # data_raw = np.concatenate((data_raw, m['flight_data'][0, 0]['dnn{}'.format(collect_i)]), axis=0) 13 | m = scipy.io.loadmat('./flight_data_concatenate.mat') 14 | data_raw = np.array(m['data_raw']) 15 | data_raw = np.true_divide(data_raw, [1e2, -1, 1e4, -1, 1e4, 1e4, 1]) 16 | print(np.mean(data_raw, axis=0)) 17 | if shuffle_flag: 18 | np.random.shuffle(data_raw) 19 | size_raw = len(data_raw) 20 | size_train = round(size_raw * 0.96) 21 | x_train_set = data_raw[:size_train, :-1] 22 | y_train_set = data_raw[:size_train, -1:] 23 | 24 | size_test = size_train + round(size_raw * 0.02) 25 | x_test_set = data_raw[size_train:size_test, :-1] 26 | y_test_set = data_raw[size_train:size_test, -1:] 27 | 28 | x_validation_set = data_raw[size_test:, :-1] 29 | y_validation_set = data_raw[size_test:, -1:] 30 | print('data load DONE! Train data size=', size_train) 31 | return x_train_set, y_train_set, x_test_set, y_test_set, x_validation_set, y_validation_set, size_train 32 | 33 | 34 | def init_network(x_num=6, learn_rate=0.0001): 35 | with tf.name_scope("placeholder"): 36 | x = tf.placeholder(tf.float32, shape=[None, x_num], name='x') 37 | y = tf.placeholder(tf.float32, shape=[None, 1], name='y') 38 | with tf.name_scope("layers"): 39 | l1 = x 40 | for i in range(3): 41 | l1 = tf.layers.dense(l1, units=100, activation=tf.nn.relu, name="n_hidden_" + str(i)) 42 | y_hat = tf.layers.dense(l1, units=1, name="n_outputs") 43 | with tf.name_scope("loss"): 44 | loss_func = tf.reduce_mean(tf.square(y_hat - y), name="loss") 45 | with tf.name_scope("train"): 46 | optimizer = tf.train.AdamOptimizer(learning_rate=learn_rate) 47 | trainer = optimizer.minimize(loss_func) 48 | return x, y, trainer, loss_func 49 | 50 | 51 | if __name__ == '__main__': 52 | sess = tf.InteractiveSession() 53 | x, y, train_step, loss_mse = init_network() 54 | 55 | x_train, y_train, x_test, y_test, x_vali, y_vali, train_set_size = load_data() 56 | 57 | load_flag = True 58 | 59 | sess.run(tf.global_variables_initializer()) 60 | saver = tf.train.Saver() 61 | if load_flag is True: 62 | checkpoint_path = tf.train.latest_checkpoint('./dnn_model') 63 | saver.restore(sess, checkpoint_path) 64 | print("test loss mse = %g" % loss_mse.eval(feed_dict={x: x_test, y: y_test})) 65 | 66 | steps = int(1e5) 67 | batch_size = int(1e4) 68 | for i in range(steps): 69 | # batch = ([], []) 70 | # p = random.sample(range(train_set_size), batch_size) 71 | # for k in p: 72 | # batch[0].append(x_train[k]) 73 | # batch[1].append(y_train[k]) 74 | # train_step.run(feed_dict={x: batch[0], y: batch[1]}) 75 | p = int(random.uniform(0, train_set_size - batch_size)) 76 | train_step.run(feed_dict={x: x_train[p:p + batch_size, :], y: y_train[p:p + batch_size, :]}) 77 | if i % (steps / 10) == 0: 78 | train_loss_mse = loss_mse.eval(feed_dict={x: x_test, y: y_test}) 79 | print('Training...{:.1f}%'.format(i / steps * 100), 'train loss mse =', train_loss_mse) 80 | print("test loss mse = %g" % loss_mse.eval(feed_dict={x: x_vali, y: y_vali})) 81 | 82 | saver.save(sess, r'.\dnn_model\flight') 83 | -------------------------------------------------------------------------------- /ppo_test.py: -------------------------------------------------------------------------------- 1 | import random 2 | import math 3 | import numpy as np 4 | import tensorflow.python.keras as keras 5 | import matplotlib.pyplot as plt 6 | from scipy.io import savemat 7 | from missile_3d import MISSILE 8 | from ppo_model import PPO 9 | from dnn_test import DeepNeuralNetwork 10 | 11 | BATCH_SIZE = 32 # update batch size 12 | GAMMA = 0.99 13 | TEST_EPISODES = 100 14 | REWARD_SAVE_CASE = 0 15 | dnny = DeepNeuralNetwork() # 纵向通道 16 | dnnz = keras.models.load_model("./dnn_model/flight.h5") # 侧向通道 17 | pi = 3.141592653589793 18 | RAD = 180 / pi # 弧度转角度 19 | 20 | 21 | class OMissile(MISSILE): 22 | def __init__(self): 23 | super().__init__() 24 | v, theta, psi, r, q_theta, q_psi, x, y, z, t = self.collect() 25 | self.nav_theta = 0. 26 | self.nav_psiv = 0. 27 | tmp_y = np.array([[v * math.cos(self.nav_psiv) / 1e2, theta / -1, r / 1e4, q_theta / -1, x / 1e4, y / 1e4]]) 28 | tmp_z = np.array([[v * math.cos(self.nav_theta) / 1e2, psi / -1, r / 1e4, -q_psi / -1, x / 1e4, z / 1e4]]) 29 | self.iay = float(dnny.predict(tmp_y)) 30 | self.iaz = float(dnnz.predict(tmp_z)) 31 | self.tgo = r / v 32 | 33 | def get_ia(self): 34 | v, theta, psi, r, q_theta, q_psi, x, y, z, t = self.collect() 35 | # tmp_y = np.array([[v / 1e2, theta / -1, r / 1e4, q_theta / -1, x / 1e4, y / 1e4]]) 36 | # tmp_z = np.array([[v / 1e2, psi / -1, r / 1e4, -q_psi / -1, x / 1e4, z / 1e4]]) 37 | tmp_y = np.array([[v * math.cos(self.nav_psiv) / 1e2, theta / -1, r / 1e4, q_theta / -1, x / 1e4, y / 1e4]]) 38 | tmp_z = np.array([[v * math.cos(self.nav_theta) / 1e2, psi / -1, r / 1e4, -q_psi / -1, x / 1e4, z / 1e4]]) 39 | self.iay = float(dnny.predict(tmp_y)) 40 | self.iaz = float(dnnz.predict(tmp_z)) 41 | return self.iay, self.iaz 42 | 43 | def get_tgo(self): 44 | v, theta, psi, r, q_theta, q_psi, x, y, z, t = self.collect() 45 | self.tgo = r / v 46 | return self.tgo 47 | 48 | def get_state(self, y_target, z_target): 49 | iay, iaz = self.get_ia() 50 | tgo = self.get_tgo() 51 | state_y = np.array([(y_target - iay) / tgo]) 52 | state_z = np.array([(z_target - iaz) / tgo]) 53 | return state_y, state_z 54 | 55 | 56 | if __name__ == '__main__': 57 | env = OMissile() 58 | # set the init parameter 59 | state_dim = 1 60 | action_dim = 1 61 | action_bound = 2 * 9.81 # action limitation 62 | 63 | # test 64 | agent_y = PPO(state_dim, action_dim, action_bound, r'./ppo_model') 65 | agent_z = PPO(state_dim, action_dim, action_bound, r'./ppo_model') 66 | flight_data = {} 67 | for episode in range(TEST_EPISODES): 68 | desired_iay = [] # 期望的ia 69 | actual_iay = [] # 实际的ia 70 | impact_angle_errory = [] # ia的误差 71 | desired_iaz = [] # 期望的ia 72 | actual_iaz = [] # 实际的ia 73 | impact_angle_errorz = [] # ia的误差 74 | 75 | env.modify([200., 0. / RAD, 0. / RAD, -20000., 10000., 5000.]) # [200., 0. / RAD, 0. / RAD, -20000., 10000., 5000.] 76 | tdy = random.uniform(-60., -20.) 77 | tdz = random.uniform(-20., 20.) 78 | 79 | # expre = 1 80 | # tdy = [-20., -40., -60.][expre] 81 | # tdz = [20., 0., -20.][expre] 82 | 83 | t = [] 84 | state_y, state_z = env.get_state(tdy, tdz) 85 | done = False 86 | TSG = False 87 | while done is False: 88 | if TSG: 89 | done, env.nav_theta, env.nav_psiv = env.step(tdy / RAD, tdz / RAD, h=0.01) # TSG 90 | else: 91 | action_y = agent_y.get_action(state_y, greedy=True) # use the mean of distribution as action 92 | action_z = agent_z.get_action(state_z, greedy=True) # use the mean of distribution as action 93 | done, env.nav_theta, env.nav_psiv = env.step(action_y, -action_z, h=0.01) 94 | state_y, state_z = env.get_state(tdy, tdz) 95 | 96 | desired_iay.append(tdy) 97 | actual_iay.append(env.iay) 98 | impact_angle_errory.append(tdy - env.iay) 99 | 100 | desired_iaz.append(tdz) 101 | actual_iaz.append(env.iaz) 102 | impact_angle_errorz.append(tdz - env.iaz) 103 | 104 | # tmpenergy = 0 105 | # for e in range(len(env.reac)): 106 | # tmpenergy = tmpenergy + env.reac[e] ** 2 * 0.1 107 | # print(tmpenergy) 108 | # env.plot_data() 109 | # plt.ioff() 110 | # plt.show() 111 | 112 | flight_data['sim_{}'.format(episode)] = env.save_data() 113 | flight_data['time{}'.format(episode)] = {'desired_iay': np.array(desired_iay), 114 | 'actual_iay': np.array(actual_iay), 115 | 'impact_time_errory': np.array(impact_angle_errory), 116 | 'desired_iaz': np.array(desired_iaz), 117 | 'actual_iaz': np.array(actual_iaz), 118 | 'impact_time_errorz': np.array(impact_angle_errorz)} 119 | 120 | # print the result 121 | print('Testing | Episode: {}/{} | Target: {:.2f}/{:.2f} | Actual: {:.2f}/{:.2f} | Error: {:.2f}/{:.2f}' 122 | .format(episode + 1, TEST_EPISODES, 123 | tdy, tdz, 124 | env.Y[2] * RAD, env.Y[3] * RAD, 125 | tdy - env.Y[2] * RAD, tdz - env.Y[3] * RAD)) 126 | # plt.figure(3) 127 | # plt.ion() 128 | # plt.clf() 129 | # plt.subplots_adjust(hspace=0.6) 130 | # plt.subplot(2, 1, 1) 131 | # plt.plot(np.array(env.reY)[:, 0], np.array(desired_iay)[:-1], 'k--', label='desired ia') 132 | # plt.plot(np.array(env.reY)[:, 0], np.array(actual_iay)[:-1], 'k-', label='predict ia') 133 | # plt.plot(np.array(env.reY)[:, 0], np.array(env.reY)[:, 2] * RAD, 'r-', label='actual theta') 134 | # plt.xlabel('Time (s)') 135 | # plt.ylabel('impact angle of xoy') 136 | # plt.legend() 137 | # plt.grid() 138 | # 139 | # plt.subplot(2, 1, 2) 140 | # plt.plot() 141 | # plt.plot(np.array(env.reY)[:, 0], np.array(desired_iaz)[:-1], 'k--', label='desired ia') 142 | # plt.plot(np.array(env.reY)[:, 0], np.array(actual_iaz)[:-1], 'k-', label='predict ia') 143 | # plt.plot(np.array(env.reY)[:, 0], np.array(env.reY)[:, 3] * RAD, 'r-', label='actual psi') 144 | # plt.xlabel('Time (s)') 145 | # plt.ylabel('impact angle of xoz') 146 | # plt.grid() 147 | # 148 | # plt.pause(0.1) 149 | savemat('./mats/flight_sim_ppo_monte_3d.mat', flight_data) 150 | # savemat('./mats/flight_sim_tsg_{}.mat'.format(3), flight_data) 151 | -------------------------------------------------------------------------------- /ppo_model.py: -------------------------------------------------------------------------------- 1 | import tensorflow.compat.v1 as tf 2 | tf.compat.v1.disable_eager_execution() 3 | import numpy as np 4 | 5 | A_LR = 0.0001 # learning rate for actor 6 | C_LR = 0.0002 # learning rate for critic 7 | A_UPDATE_STEPS = 10 # actor update steps 8 | C_UPDATE_STEPS = 10 # critic update steps 9 | METHOD = [dict(name='kl_pen', kl_target=0.01, lam=0.5), # KL penalty 10 | dict(name='clip', epsilon=0.2) # Clipped surrogate objective 11 | ][1] 12 | 13 | 14 | class PPO(object): 15 | def __init__(self, state_dim, action_dim, action_bound, load_path=None): 16 | self.load_flag = load_path 17 | graph = tf.Graph() 18 | with graph.as_default(): # 构建计算图 19 | self.state = tf.placeholder(tf.float32, [None, state_dim], 'state') 20 | # critic 21 | with tf.name_scope('critic'): 22 | l1 = tf.layers.dense(self.state, 64, tf.nn.relu) 23 | l1 = tf.layers.dense(l1, 64, tf.nn.relu) 24 | self.v = tf.layers.dense(l1, 1) 25 | with tf.name_scope('train_critic'): 26 | # Update actor network 27 | self.discounted_r = tf.placeholder(tf.float32, [None, 1], 'discounted_r') # 折扣回报函数 28 | self.advantage = self.discounted_r - self.v 29 | self.critic_loss = tf.reduce_mean(tf.square(self.advantage)) 30 | self.critic_train_op = tf.train.AdamOptimizer(C_LR).minimize(self.critic_loss) 31 | 32 | # actor 33 | with tf.name_scope('actor'): 34 | def _build_anet(state, name, trainable): 35 | with tf.variable_scope(name): 36 | l1 = tf.layers.dense(state, 64, tf.nn.relu, trainable=trainable) 37 | l1 = tf.layers.dense(l1, 64, tf.nn.relu, trainable=trainable) 38 | mean = action_bound * tf.layers.dense(l1, action_dim, tf.nn.tanh, trainable=trainable) 39 | logstd = tf.Variable(np.zeros(action_dim, dtype=np.float32), trainable=trainable) 40 | norm_dist = tf.distributions.Normal(loc=mean, scale=tf.exp(logstd)) # 正态分布 41 | params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=name) # 提取图中的元素 42 | return norm_dist, params 43 | 44 | pi, pi_params = _build_anet(self.state, 'pi', trainable=True) # 新策略 45 | old_pi, old_pi_params = _build_anet(self.state, 'oldpi', trainable=False) # 旧策略 46 | 47 | # 预先定义两个操作 48 | with tf.name_scope('update_oldpi'): # 将pi网络参数赋给oldpi网络 49 | self.update_oldpi_op = [oldp.assign(p) for p, oldp in zip(pi_params, old_pi_params)] 50 | with tf.name_scope('action'): # 执行动作 51 | self.act_op = pi.loc[0] # 直接选择均值作为动作 52 | self.sample_op = tf.squeeze(pi.sample(1), axis=0)[0] # 从正态分布中随机选择一个动作 53 | 54 | with tf.name_scope('train_critic'): 55 | self.action = tf.placeholder(tf.float32, [None, action_dim], 'action') # 创建action占位符 56 | self.adv = tf.placeholder(tf.float32, [None, 1], 'advantage') # 创建A(s,a)占位符 57 | ratio = tf.exp(pi.log_prob(self.action) - old_pi.log_prob(self.action)) # 比率函数r(θ) 58 | surr = ratio * self.adv # 目标函数的第一项 59 | 60 | if METHOD['name'] == 'kl_pen': # 第一类目标函数(KL散度型) 61 | self.tflam = tf.placeholder(tf.float32, None, 62 | 'lambda') # 新加入的参数,Adaptive KL Penalty Coefficient 63 | kl = tf.distributions.kl_divergence(old_pi, pi) # 计算KL散度,衡量两个分布的差异 64 | self.kl_mean = tf.reduce_mean(kl) # d参数,KL散度的均值 65 | self.action_loss = -tf.reduce_mean(surr - self.tflam * kl) # 目标函数 66 | 67 | else: # 第二类目标函数(CLIP型) 68 | self.action_loss = -tf.reduce_mean( 69 | tf.minimum(surr, 70 | tf.clip_by_value(ratio, 1. - METHOD['epsilon'], 71 | 1. + METHOD['epsilon']) * self.adv) 72 | ) # 目标函数 73 | self.action_train_op = tf.train.AdamOptimizer(A_LR).minimize(self.action_loss) # 使用Adam优化目标函数 74 | 75 | self.sess = tf.Session(graph=graph) 76 | 77 | # 加载模型 78 | with self.sess.as_default(): 79 | with self.sess.graph.as_default(): 80 | if load_path is None: 81 | self.sess.run(tf.global_variables_initializer()) # 初始化计算图 82 | else: 83 | saver = tf.train.Saver() 84 | saver.restore(self.sess, tf.train.latest_checkpoint(checkpoint_dir=load_path)) 85 | 86 | # # 转换为tf-lite模型 87 | # convert = tf.lite.TFLiteConverter.from_session(self.sess, [self.state], [self.act_op]) 88 | # tflite_model = convert.convert() 89 | # with open('ppo_model.tflite', 'wb') as f: 90 | # f.write(tflite_model) 91 | 92 | self.state_buffer, self.action_buffer = [], [] 93 | self.reward_buffer, self.cumulative_reward_buffer = [], [] 94 | self.action_bound = action_bound 95 | 96 | def store_transition(self, state, action, reward): 97 | """ 98 | Store state, action, reward at each step 99 | :param state: 100 | :param action: 101 | :param reward: 102 | :return: None 103 | """ 104 | self.state_buffer.append(state) 105 | self.action_buffer.append(action) 106 | self.reward_buffer.append(reward) 107 | 108 | def finish_path(self, next_state, done, GAMMA=0.9): 109 | """ 110 | Calculate cumulative reward 111 | :param next_state: 112 | :param done: 113 | :param GAMMA: 114 | :return: None 115 | """ 116 | if done: 117 | v_s_ = 0 118 | else: 119 | v_s_ = self.get_v(np.array(next_state, np.float32)) 120 | discounted_r = [] 121 | for r in self.reward_buffer[::-1]: 122 | v_s_ = r + GAMMA * v_s_ 123 | discounted_r.append(v_s_) 124 | discounted_r.reverse() 125 | discounted_r = np.array(discounted_r)[:, np.newaxis] 126 | self.cumulative_reward_buffer.extend(discounted_r) 127 | self.reward_buffer.clear() 128 | 129 | def update(self): 130 | """ 131 | Update parameter with the constraint of KL divergent 132 | :return: None 133 | """ 134 | s = np.array(self.state_buffer, np.float32) 135 | a = np.array(self.action_buffer, np.float32) 136 | r = np.array(self.cumulative_reward_buffer, np.float32) 137 | with self.sess.as_default(): 138 | with self.sess.graph.as_default(): 139 | self.sess.run(self.update_oldpi_op) # oldpi=pi 140 | adv = self.sess.run(self.advantage, {self.state: s, self.discounted_r: r}) # 通过critic得到advantage value 141 | 142 | # update actor 143 | if METHOD['name'] == 'kl_pen': # KL散度型 144 | for _ in range(A_UPDATE_STEPS): # 145 | _, kl = self.sess.run([self.action_train_op, self.kl_mean], 146 | {self.state: s, self.action: a, self.adv: adv, self.tflam: METHOD['lam']}) 147 | if kl > 4 * METHOD['kl_target']: # 根据d和d目标的均值动态调整Adaptive KL Penalty Coefficient 148 | break 149 | elif kl < METHOD['kl_target'] / 1.5: # adaptive lambda, this is in OpenAI's paper 150 | METHOD['lam'] /= 2 # 放松KL约束限制 151 | elif kl > METHOD['kl_target'] * 1.5: 152 | METHOD['lam'] *= 2 # 增强KL约束限制 153 | METHOD['lam'] = np.clip(METHOD['lam'], 1e-4, 154 | 10) # sometimes explode, this clipping is my solution 155 | else: # clipping method, find this is better (OpenAI's paper) 156 | [self.sess.run(self.action_train_op, {self.state: s, self.action: a, self.adv: adv}) for _ in 157 | range(A_UPDATE_STEPS)] 158 | 159 | # update critic 160 | [self.sess.run(self.critic_train_op, {self.state: s, self.discounted_r: r}) for _ in 161 | range(C_UPDATE_STEPS)] 162 | 163 | self.state_buffer.clear() 164 | self.action_buffer.clear() 165 | self.cumulative_reward_buffer.clear() 166 | self.reward_buffer.clear() 167 | 168 | def get_action(self, s, greedy=False): 169 | s = s[np.newaxis, :] 170 | with self.sess.as_default(): 171 | with self.sess.graph.as_default(): 172 | if greedy: 173 | action = self.sess.run(self.act_op, {self.state: s}) 174 | else: 175 | action = self.sess.run(self.sample_op, {self.state: s}) 176 | return np.clip(action, -self.action_bound, self.action_bound) 177 | 178 | def get_v(self, s): 179 | with self.sess.as_default(): 180 | with self.sess.graph.as_default(): 181 | if s.ndim < 2: 182 | s = s[np.newaxis, :] 183 | return self.sess.run(self.v, {self.state: s})[0, 0] 184 | 185 | def save_model(self, save_path): 186 | with self.sess.as_default(): 187 | with self.sess.graph.as_default(): 188 | if self.load_flag is None: # 未加载模型 189 | saver = tf.train.Saver() 190 | saver.save(self.sess, save_path=save_path) 191 | # print("the model in {} save success".format(save_path)) 192 | -------------------------------------------------------------------------------- /ppo_train.py: -------------------------------------------------------------------------------- 1 | import random 2 | import time 3 | import math 4 | import os 5 | import numpy as np 6 | import tensorflow.python.keras as keras 7 | import matplotlib.pyplot as plt 8 | from scipy.io import savemat 9 | from OAgent_sim import OAgentSim 10 | from ppo_model import PPO 11 | from dnn_test import DeepNeuralNetwork 12 | 13 | BATCH_SIZE = 32 # update batch size 14 | TRAIN_EPISODES = 1000 # total number of episodes for training 15 | TEST_EPISODES = 100 # total number of episodes for testing 16 | GAMMA = 0.99 17 | REWARD_SAVE_CASE = 0 18 | # dnn = DeepNeuralNetwork() 19 | dnn = keras.models.load_model("./dnn_model/flight.h5") 20 | # tgo = DeepNeuralNetwork(4, 'tgo_model') 21 | pi = 3.141592653589793 22 | RAD = 180 / pi # 弧度转角度 23 | 24 | 25 | class OMissile(OAgentSim): 26 | def __init__(self, no_=0, missile=None, target=None): 27 | super().__init__(no_=0, missile=None, target=None) 28 | v, theta, r, q, x, y, t = self.collect() 29 | self.ia = float(dnn.predict(np.array([[v / 1e2, theta / -1, r / 1e4, q / -1, x / 1e4, y / 1e4]]))) 30 | self.tgo = r / v # float(tgo.predict([[v / 315, theta / -0.6, x / -1e4, y / 1.5e4]])) 31 | 32 | def get_ia(self): 33 | v, theta, r, q, x, y, t = self.collect() 34 | dnn_state = np.array([[v / 1e2, theta / -1, r / 1e4, q / -1, x / 1e4, y / 1e4]]) 35 | self.ia = float(dnn.predict(dnn_state)) 36 | return self.ia 37 | 38 | def get_tgo(self, dnn_state=None): 39 | v, theta, r, q, x, y, t = self.collect() 40 | # dnn_state = [v / 315, theta / -0.6, x / -1e4, y / 1.5e4] 41 | # self.tgo = float(tgo.predict([dnn_state])) 42 | self.tgo = r / v 43 | return self.tgo 44 | 45 | def get_state(self, a_target): 46 | ia = self.get_ia() 47 | tgo = self.get_tgo() 48 | state_local = [(a_target - ia) / tgo] 49 | return np.array(state_local) 50 | 51 | def get_reward(self, t_target): 52 | v, theta, r, q, x, y, t = self.collect() 53 | tgo = self.get_tgo() 54 | e_local = (t_target - self.ia) / tgo 55 | # vy = v * math.sin(theta) # y向速度 56 | # zem = y / tgo + vy 57 | k1 = 0.8 58 | k2 = 0.2 59 | reward_local = k1 * math.exp(-(e_local / 1e0) ** 2) + \ 60 | k2 * math.exp(-(self.ac / 1e1) ** 2) 61 | return np.array(reward_local) 62 | 63 | 64 | if __name__ == '__main__': 65 | env = OMissile() 66 | 67 | # set the init parameter 68 | state_dim = 1 69 | action_dim = 1 70 | action_bound = 2 * 9.81 # action limitation 71 | t0 = time.time() 72 | model_num = 0 73 | 74 | train = False # choose train or test 75 | if train: 76 | agent = PPO(state_dim, action_dim, action_bound) 77 | dict_episode_reward = {'all_episode_reward': [], 'episode_reward': [], 'target_angle': [], 'actual_angle': []} 78 | dict_episode_angle = {'desired ia': [], 'actual ia': [], 'impact angle error': []} 79 | all_episode_reward = [] 80 | for episode in range(int(TRAIN_EPISODES)): 81 | env.modify() # 0.时间,1.速度,2.弹道倾角,3.导弹x,4.导弹y,5.质量 82 | if episode % 10 == 0: 83 | td = random.choice([-10., -20., -30., -40., -50., -60., -70., -80., -90.]) + 50. 84 | desired_ia = [] # 期望的ia 85 | actual_ia = [] # 实际的ia 86 | impact_angle_error = [] # ia的误差 87 | state = env.get_state(td) 88 | episode_reward = 0 89 | done = False 90 | while done is False: 91 | # collect state, action and reward 92 | action = agent.get_action(state) # get new action with old state 93 | done = env.step(action=float(action), h=0.1) 94 | state_ = env.get_state(td) # get new state with new action 95 | reward = env.get_reward(td) # get new reward with new action 96 | agent.store_transition(state, action, reward) # train with old state 97 | state = state_ # update state 98 | episode_reward += reward 99 | 100 | desired_ia.append(td) 101 | actual_ia.append(env.ia) 102 | impact_angle_error.append(td - env.ia) 103 | 104 | # update ppo 105 | if len(agent.state_buffer) >= BATCH_SIZE: 106 | agent.finish_path(state_, done, GAMMA=GAMMA) 107 | agent.update() 108 | # end of one episode 109 | env.plot_data(figure_num=0) 110 | 111 | # use the terminal data to update once 112 | if len(agent.reward_buffer) != 0: 113 | agent.reward_buffer[-1] -= env.R + (td - env.Y[2] * RAD) ** 2 114 | agent.finish_path(state, done, GAMMA=GAMMA) 115 | agent.update() 116 | episode_reward -= env.R + (td - env.Y[2] * RAD) ** 2 117 | 118 | # print the result 119 | episode_reward = episode_reward / env.Y[0] # calculate the average episode reward 120 | print('Training | Episode: {}/{} | Average Episode Reward: {:.2f} | Running Time: {:.2f} | ' 121 | 'Target Angle: {:.2f} | Actual Angle: {:.2f} | Error Angle: {:.2f}' 122 | .format(episode + 1, TRAIN_EPISODES, episode_reward, time.time() - t0, 123 | td, env.Y[2] * RAD, td - env.Y[2] * RAD)) 124 | 125 | # plt.figure(1) 126 | # plt.ion() 127 | # plt.clf() 128 | # plt.subplots_adjust(hspace=0.6) 129 | # plt.subplot(2, 1, 1) 130 | # plt.plot(np.array(env.reY)[:, 0], np.array(desired_ia)[:-1], 'k--', label='desired ia') 131 | # plt.plot(np.array(env.reY)[:, 0], np.array(actual_ia)[:-1], 'k-', label='actual ia') 132 | # plt.xlabel('Time (s)') 133 | # plt.ylabel('angle(s)') 134 | # plt.legend() 135 | # plt.grid() 136 | # 137 | # plt.subplot(2, 1, 2) 138 | # plt.plot() 139 | # plt.plot(np.array(env.reY)[:, 0], np.array(impact_angle_error)[:-1], 'k-') 140 | # plt.xlabel('Time (s)') 141 | # plt.ylabel('error(s)') 142 | # plt.grid() 143 | # 144 | # plt.pause(0.1) 145 | # 146 | # calculate the discounted episode reward 147 | if episode == 0: 148 | all_episode_reward.append(episode_reward) 149 | else: 150 | all_episode_reward.append(all_episode_reward[-1] * .9 + episode_reward * .1) 151 | 152 | # save the episode data 153 | dict_episode_reward['episode_reward'].append(episode_reward) 154 | dict_episode_reward['all_episode_reward'] = all_episode_reward 155 | dict_episode_reward['target_angle'].append(td) 156 | dict_episode_reward['actual_angle'].append(env.Y[2] * RAD) 157 | 158 | # save model and data 159 | if episode_reward > REWARD_SAVE_CASE: 160 | REWARD_SAVE_CASE = episode_reward 161 | # if abs(td - env.Y[0]) < 0.5: 162 | agent.save_model('./ppo_model/agent{}'.format(model_num)) 163 | savemat('./ppo_reward.mat', dict_episode_reward) 164 | model_num = (model_num + 1) % 20 165 | 166 | agent.save_model('./ppo_model/agent_end') 167 | savemat('./ppo_reward.mat', dict_episode_reward) 168 | 169 | plt.figure(2) 170 | plt.plot(all_episode_reward) 171 | if not os.path.exists('image'): 172 | os.makedirs('image') 173 | plt.savefig(os.path.join('image', '_'.join(['PPO', time.strftime("%Y_%m%d_%H%M")]))) 174 | plt.ioff() 175 | plt.show() 176 | else: 177 | # test 178 | agent = PPO(state_dim, action_dim, action_bound, r'./ppo_model') 179 | flight_data = {} 180 | for episode in range(100): # TEST_EPISODES 181 | env.modify() # [0., 200., 0 / RAD, -20000., 20000., 200.] 182 | # td = env.get_ia() * random.uniform(0.5, 1.5) 183 | td = random.uniform(-10., -90.) + 50. 184 | # td = -60.0 185 | desired_ia = [] # 期望的ia 186 | actual_ia = [] # 实际的ia 187 | impact_angle_error = [] # ia的误差 188 | state = env.get_state(td) 189 | # action = 0 190 | episode_reward = 0 191 | done = False 192 | t = [] 193 | PPO_flag = True 194 | while done is False: 195 | action = agent.get_action(state, greedy=True) # use the mean of distribution as action 196 | if PPO_flag: 197 | done = env.step(action=action, h=0.1) 198 | else: 199 | done = env.step(action=td / RAD, h=0.1) 200 | state = env.get_state(td) 201 | reward = env.get_reward(td) 202 | episode_reward += reward 203 | 204 | desired_ia.append(td) 205 | actual_ia.append(env.ia) 206 | impact_angle_error.append(td - env.ia) 207 | 208 | # tmpenergy = 0 209 | # for e in range(len(env.reac)): 210 | # tmpenergy = tmpenergy + env.reac[e] ** 2 * 0.1 211 | # print(tmpenergy) 212 | # env.plot_data(figure_num=0) 213 | # plt.ioff() 214 | # plt.show() 215 | 216 | flight_data['sim_{}'.format(episode)] = env.save_data() 217 | flight_data['time{}'.format(episode)] = {'desired_ia': np.array(desired_ia), 218 | 'actual_ia': np.array(actual_ia), 219 | 'impact_time_error': np.array(impact_angle_error)} 220 | 221 | # print the result 222 | episode_reward = episode_reward / env.Y[0] # calculate the average episode reward 223 | print('Testing | Episode: {}/{} | Average Episode Reward: {:.2f} | Running Time: {:.2f} | ' 224 | 'Target: {:.2f} | Actual: {:.2f} | Error: {:.2f}' 225 | .format(episode + 1, TEST_EPISODES, episode_reward, time.time() - t0, 226 | td, env.Y[2] * RAD, td - env.Y[2] * RAD)) # actual_ia[-1])) 227 | plt.figure(1) 228 | plt.ion() 229 | plt.clf() 230 | plt.subplots_adjust(hspace=0.6) 231 | plt.subplot(2, 1, 1) 232 | plt.plot(np.array(env.reY)[:, 0], np.array(desired_ia)[:-1], 'k--', label='desired ia') 233 | plt.plot(np.array(env.reY)[:, 0], np.array(actual_ia)[:-1], 'k-', label='actual ia') 234 | plt.xlabel('Time (s)') 235 | plt.ylabel('$impact angle(s)$') 236 | plt.legend() 237 | plt.grid() 238 | 239 | plt.subplot(2, 1, 2) 240 | plt.plot() 241 | plt.plot(np.array(env.reY)[:, 0], np.array(impact_angle_error)[:-1], 'k-') 242 | plt.xlabel('Time (s)') 243 | plt.ylabel('impact angle error(s)') 244 | plt.grid() 245 | 246 | plt.pause(0.1) 247 | 248 | savemat('./flight_sim_tsg_monte.mat', flight_data) 249 | -------------------------------------------------------------------------------- /missile_3d.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | import matplotlib.pyplot as plt # clf()清图 # cla()清坐标轴 # close()关窗口 4 | from scipy import interpolate 5 | from scipy.io import savemat 6 | from math import sin, cos, atan2, sqrt, asin 7 | 8 | # 常量 9 | pi = 3.141592653589793 10 | RAD = 180 / pi # 弧度转角度 11 | g = 9.81 # 重力加速度 12 | S = 0.0572555 # 特征面积 13 | 14 | plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签 15 | plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号 16 | 17 | p_flag = False # 是否考虑推力 18 | 19 | Ma2 = np.array([[0.4, 39.056, 0.4604, 39.072], 20 | [0.6, 40.801, 0.4682, 39.735], 21 | [0.8, 41.372, 0.4635, 39.242], 22 | [0.9, 42.468, 0.4776, 40.351]]) 23 | 24 | 25 | def load_atm(path): 26 | file = open(path) 27 | atm_str = file.read().split() 28 | atm = [] 29 | for _ in range(0, len(atm_str), 3): 30 | atm.append([float(atm_str[_]), float(atm_str[_ + 1]), float(atm_str[_ + 2])]) 31 | return np.array(atm) 32 | 33 | 34 | class MISSILE: 35 | def __init__(self, missile=None, target=None): 36 | if missile is None: 37 | missile = [300., 38 | 0. / RAD, 39 | 0. / RAD, 40 | -20000., 41 | 20000., 42 | 0.] # 1.速度,2.弹道倾角,3.弹道偏角 4.导弹x,5.导弹y,6.导弹z y指向天 43 | if target is None: 44 | target = [0., 0., 0., -90. / RAD, 0. / RAD] # 目标x,y,z,theta,psi 45 | 46 | self.V = np.array(missile[:3]) # v,theta,psi 47 | self.Euler = np.array(missile[3:]) # x,y,z 48 | self.Y = np.concatenate([[0.], self.V, self.Euler, [200.]]) # 0.时间, 1.速度,2.位置,3.重量 49 | 50 | self.Rt = target[:3] # 目标信息 51 | self.Vt = target[3:] # 52 | 53 | self.X = 0. # 阻力drag force 54 | self.L = 0. # 升力lift force 55 | self.B = 0. # 侧向力 56 | R = self.Rt - self.Euler 57 | self.R = np.linalg.norm(R) # 弹目距离 58 | self.q_theta = atan2(R[1], sqrt(R[0] ** 2 + R[2] ** 2)) # 弹目视线与水平面夹角 59 | self.q_psi = atan2(R[2], R[0]) # 弹目视线与北向夹角,北偏西为正 60 | self.Rdot = 0. 61 | self.qdot_theta = 0. 62 | self.qdot_psi = 0. 63 | self.acy = 0. # 制导指令 64 | self.acz = 0. 65 | self.aby = 0. # 偏置项 66 | self.abz = 0. # 偏置项 67 | self.alpha = 0. # 攻角 68 | self.beta = 0. # 侧滑角 69 | 70 | # 创建插值函数 71 | atm = load_atm('atm2.txt') # 大气参数 72 | self.f_ma = interpolate.interp1d(atm[:, 0], atm[:, 2], 'linear') 73 | self.f_rho = interpolate.interp1d(atm[:, 0], atm[:, 1], 'linear') 74 | 75 | k = 1.0 76 | self.f_clalpha = interpolate.interp1d(Ma2[:, 0], Ma2[:, 1] * k, 'cubic') 77 | self.f_cd0 = interpolate.interp1d(Ma2[:, 0], Ma2[:, 2] * k, 'cubic') 78 | self.f_cdalpha = interpolate.interp1d(Ma2[:, 0], Ma2[:, 3] * k, 'cubic') 79 | 80 | # 全弹道历史信息 81 | self.reY, self.reacy, self.reacz = [], [], [] 82 | 83 | def modify(self, missile=None): # 修改导弹初始状态 84 | if missile is None: 85 | missile = [random.uniform(200, 300), 86 | random.uniform(0, 45) / RAD, 87 | random.uniform(-5, 5) / RAD, 88 | random.uniform(-30000., -10000), 89 | random.uniform(10000, 30000), 90 | random.uniform(-2000, 2000)] # 1.速度,2.弹道倾角,3.弹道偏角 4.导弹x,5.导弹y,6.导弹z 91 | 92 | if missile[2] * missile[5] > 0: 93 | missile[2] *= -1 94 | self.V = np.array(missile[:3]) # v,theta,psi 95 | self.Euler = np.array(missile[3:]) # x,y,z 96 | self.Y = np.concatenate([[0.], self.V, self.Euler, [200.]]) # 0.时间, 1.速度,2.位置,3.重量 97 | 98 | R_nue = self.Rt - self.Euler 99 | self.R = np.linalg.norm(R_nue) # 弹目距离 100 | self.q_theta = atan2(R_nue[1], sqrt(R_nue[0] ** 2 + R_nue[2] ** 2)) # 弹目视线与水平面夹角 101 | self.q_psi = atan2(R_nue[2], R_nue[0]) # 弹目视线与北向夹角,北偏西为正 102 | 103 | k = random.uniform(0.8, 1.2) 104 | # k = 1.0 105 | self.f_clalpha = interpolate.interp1d(Ma2[:, 0], Ma2[:, 1] * k, 'cubic') 106 | self.f_cd0 = interpolate.interp1d(Ma2[:, 0], Ma2[:, 2] * k, 'cubic') 107 | self.f_cdalpha = interpolate.interp1d(Ma2[:, 0], Ma2[:, 3] * k, 'cubic') 108 | 109 | self.reY, self.reacy, self.reacz = [], [], [] 110 | return self.collect() 111 | 112 | def get_ma(self, y, v): # 计算马赫数 113 | y = max(0, y) 114 | sonic = self.f_ma(y) 115 | return v / sonic 116 | 117 | def get_rho(self, y): # 计算空气密度 118 | y = max(0, y) 119 | return self.f_rho(y) 120 | 121 | def get_clalpha(self, ma): 122 | return self.f_clalpha(max(min(ma, 0.9), 0.4)) 123 | 124 | def get_cd0(self, ma): 125 | return self.f_cd0(max(min(ma, 0.9), 0.4)) 126 | 127 | def get_cdalpha(self, ma): 128 | return self.f_cdalpha(max(min(ma, 0.9), 0.4)) 129 | 130 | def dery(self, Y): # 右端子函数 131 | v, theta, psi = Y[1:4] # v,theta,psi 132 | x, y, z = Y[4:7] # x,y,z 133 | m = Y[-1] 134 | dy = np.array(Y) 135 | 136 | # 速度倾斜角gammav = 0 137 | dy[0] = 1 # t 138 | dy[1] = (-self.X / m) - g * sin(theta) # v 139 | dy[2] = (self.L - m * g * cos(theta)) / (m * v) # theta 140 | dy[3] = -self.B / (m * v * cos(theta)) # psi 141 | dy[4] = v * cos(theta) * cos(psi) # x北向 142 | dy[5] = v * sin(theta) # y天向 143 | dy[6] = -v * cos(theta) * sin(psi) # 东向 144 | dy[7] = 0 145 | return dy 146 | 147 | def step(self, action_y=0., action_z=0., h=0.001): 148 | if self.Y[0] < 400: 149 | self.V = v, theta, psi = self.Y[1:4] # v,theta,psi 150 | self.Euler = x, y, z = self.Y[4:7] # x,y,z 151 | m = self.Y[-1] # 弹重 152 | RHO = self.get_rho(y) # 大气密度 153 | ma = self.get_ma(y, v) # 马赫数 154 | 155 | Q = 0.5 * RHO * v ** 2 # 动压 156 | 157 | R_nue = self.Rt - self.Euler 158 | R = self.R # 上一周期的弹目距离 159 | self.R = np.linalg.norm(R_nue) # 弹目距离 160 | 161 | self.q_theta = q_theta = atan2(R_nue[1], sqrt(R_nue[0] ** 2 + R_nue[2] ** 2)) # 弹目视线与水平面夹角 162 | self.q_psi = q_psi = atan2(R_nue[2], R_nue[0]) # 弹目视线与北向夹角,北偏西为正 163 | 164 | v_nue = [v * cos(theta) * cos(psi), v * sin(theta), -v * cos(theta) * sin(psi)] 165 | nav_theta = atan2(v_nue[1], sqrt(v_nue[0] ** 2 + v_nue[2] ** 2)) # 根据卫星解算出的速度倾角(速度与水平面夹角) 166 | nav_psiv = atan2(v_nue[2], v_nue[0]) # 根据卫星解算出的速度与北向夹角 167 | 168 | # if R < 2: 169 | # return True, nav_theta, nav_psiv 170 | if y < 0: 171 | print("弹已落地, 弹目距离={:.1f}".format(self.R)) 172 | return True, nav_theta, nav_psiv 173 | elif self.R > R: 174 | print("远离目标, 弹目距离={:.1f}".format(self.R)) 175 | return True, nav_theta, nav_psiv 176 | 177 | R = self.R # 当前周期的弹目距离 178 | 179 | # 计算前置角 180 | eta1 = q_theta - nav_theta 181 | eta2 = q_psi - nav_psiv 182 | 183 | # 计算视线角变化率 184 | self.qdot_theta = qdot_theta = v * sin(eta1) / R 185 | self.qdot_psi = qdot_psi = v * cos(nav_theta) * sin(eta2) / R * cos(q_theta) 186 | 187 | # 实际项目中,飞行系数插值的三个维度分别为马赫数mach,舵偏角delta,攻角alpha 188 | cl_alpha = self.get_clalpha(ma) 189 | 190 | # self.aby = action_y 191 | # self.abz = action_z 192 | # self.acy = acy = 3 * v * qdot_theta + cos(nav_theta) * g + float(action_y) # 制导指令 193 | # self.acz = acz = 3 * v * cos(nav_theta) * qdot_psi + float(action_z) # 制导指令 194 | 195 | # 弹道成型 196 | tsg_n = 0.5 197 | Nv = 2 * (tsg_n + 2) 198 | Nq = (tsg_n + 1) * (tsg_n + 2) 199 | tgo = R / v 200 | acy = Nv * v * qdot_theta + cos(nav_theta) * g + \ 201 | Nq * v * (q_theta - action_y) / tgo 202 | acz = Nv * v * cos(nav_theta) * qdot_psi + \ 203 | Nq * v * cos(nav_theta) * (q_psi + action_z) / tgo 204 | 205 | m_max = 3 * g 206 | self.acy = acy = np.clip(acy, -m_max, m_max) 207 | self.acz = acz = np.clip(acz, -m_max, m_max) 208 | 209 | alpha = (m * acy) / (Q * S * cl_alpha) # 使用了sin(x)=x的近似,在10°以内满足这一关系 210 | beta = (m * acz) / (Q * S * cl_alpha) # 使用了sin(x)=x的近似,在10°以内满足这一关系 211 | 212 | a_max = 20. / RAD 213 | self.alpha = alpha = np.clip(alpha, -a_max, a_max) 214 | self.beta = beta = np.clip(beta, -a_max, a_max) 215 | 216 | cd = self.get_cd0(ma) + self.get_cdalpha(ma) * (alpha ** 2 + beta ** 2) # 阻力系数 217 | cl = cl_alpha * alpha # 升力系数 218 | cb = cl_alpha * beta # 侧向力系数 219 | 220 | self.X = cd * Q * S # 阻力 221 | self.L = cl * Q * S # 升力 222 | self.B = cb * Q * S # 侧向力 223 | 224 | def rk4(func, Y, h=0.1): 225 | k1 = h * func(Y) 226 | k2 = h * func(Y + 0.5 * k1) 227 | k3 = h * func(Y + 0.5 * k2) 228 | k4 = h * func(Y + k3) 229 | output = Y + (k1 + 2 * k2 + 2 * k3 + k4) / 6 230 | return output 231 | 232 | self.Y = rk4(self.dery, self.Y, h) 233 | if self.Y[2] > 2 * pi: 234 | self.Y[2] = self.Y[2] - 2 * pi 235 | if self.Y[2] < -2 * pi: 236 | self.Y[2] = self.Y[2] + 2 * pi 237 | 238 | if self.Y[3] > 2 * pi: 239 | self.Y[3] = self.Y[3] - 2 * pi 240 | if self.Y[3] < -2 * pi: 241 | self.Y[3] = self.Y[3] + 2 * pi 242 | 243 | self.reY.append(self.Y) 244 | self.reacy.append(self.acy) 245 | self.reacz.append(self.acz) 246 | return False, nav_theta, nav_psiv 247 | else: 248 | print("超时!未击中目标!") 249 | # self.plot_data() 250 | return True, 0., 0. 251 | 252 | def plot_data(self): 253 | reY = np.array(self.reY) 254 | # plt.ion() 255 | plt.clf() 256 | fig = plt.axes(projection='3d') 257 | fig.plot3D(reY[:, 4] / 1000, reY[:, 6] / 1000, reY[:, 5] / 1000, "k-") 258 | fig.set_xlabel("x") 259 | fig.set_ylabel("y") 260 | fig.set_zlabel("z") 261 | 262 | fig = plt.figure(2) 263 | fig.clf() 264 | ax1 = fig.add_subplot(211) 265 | ax1.plot(reY[:, 4] / 1000, reY[:, 5] / 1000, 'k-') 266 | ax1.set_xlabel("x") 267 | ax1.set_ylabel("y") 268 | plt.grid() 269 | 270 | ax2 = fig.add_subplot(212) 271 | ax2.plot(reY[:, 4] / 1000, reY[:, 6] / 1000, 'k-') 272 | ax1.set_xlabel("x") 273 | ax1.set_ylabel("y") 274 | plt.grid() 275 | 276 | # plt.pause(0.1) 277 | plt.show() 278 | 279 | def collect(self): 280 | t = self.Y[0] # 时间 281 | v = self.Y[1] # 速度 282 | theta = self.Y[2] # 弹道倾角 283 | psi = self.Y[3] # 弹道偏角 284 | r = self.R # 弹目距离 285 | q_theta = self.q_theta # 弹目视线角 286 | q_psi = self.q_psi # 弹目视线角 287 | x = self.Y[4] # 弹轴向位置 288 | y = self.Y[5] # 弹纵向位置 289 | z = self.Y[6] # 弹横向位置 290 | return v, theta, psi, r, q_theta, q_psi, x, y, z, t 291 | 292 | def save_data(self): 293 | data = {'Y': np.array(self.reY), 294 | 'R': np.array(self.R), 295 | 'acy': np.array(self.reacy), 296 | 'acz': np.array(self.reacz)} 297 | return data 298 | 299 | 300 | if __name__ == '__main__': 301 | env = MISSILE() 302 | flight_data = {} 303 | for i in range(100): 304 | env.modify([200., 0. / RAD, 0. / RAD, -20000., 10000., 5000.]) 305 | tdy = random.uniform(-60., -20.) / RAD 306 | tdz = random.uniform(-20., 20.) / RAD 307 | done = False 308 | while done is False: 309 | done, a, b = env.step(tdy, tdz, h=0.01) # TSG 310 | env.plot_data() 311 | flight_data['sim_{}'.format(i)] = env.save_data() 312 | flight_data['time{}'.format(i)] = {'error_y': [(tdy - env.Y[2]) * RAD, 0], 313 | 'error_z': [(tdz - env.Y[3]) * RAD, 0]} 314 | print('Testing | Episode: {}/{} | Target: {:.2f}/{:.2f} | Actual: {:.2f}/{:.2f} | Error: {:.2f}/{:.2f}' 315 | .format(i + 1, 100, 316 | tdy * RAD, tdz * RAD, 317 | env.Y[2] * RAD, env.Y[3] * RAD, 318 | (tdy - env.Y[2]) * RAD, (tdz - env.Y[3]) * RAD)) 319 | savemat('./mats/flight_sim_tsg_monte_3d.mat', flight_data) 320 | -------------------------------------------------------------------------------- /OAgent_sim.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | import random 4 | import matplotlib.pyplot as plt # clf()清图 # cla()清坐标轴 # close()关窗口 5 | from scipy import interpolate 6 | 7 | # 常量 8 | pi = 3.141592653589793 9 | RAD = 180 / pi # 弧度转角度 10 | g = 0. # 重力加速度 11 | S = 0.0572555 # 特征面积 12 | 13 | plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签 14 | plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号 15 | 16 | p_flag = False # 是否考虑推力 17 | 18 | Ma2 = np.array([[0.4, 39.056, 0.4604, 39.072], 19 | [0.6, 40.801, 0.4682, 39.735], 20 | [0.8, 41.372, 0.4635, 39.242], 21 | [0.9, 42.468, 0.4776, 40.351]]) 22 | 23 | 24 | def load_atm(path): 25 | file = open(path) 26 | atm_str = file.read().split() 27 | atm = [] 28 | for _ in range(0, len(atm_str), 3): 29 | atm.append([float(atm_str[_]), float(atm_str[_ + 1]), float(atm_str[_ + 2])]) 30 | return np.array(atm) 31 | 32 | 33 | class OAgentSim: 34 | def __init__(self, no_=0, missile=None, target=None): 35 | self.no_ = no_ # 导弹编号 36 | 37 | if missile is None: 38 | missile = [0., 39 | 300., 40 | 45. / RAD, 41 | -20000., 42 | 10000, 43 | 200] # 0.时间,1.速度,2.弹道倾角,3.导弹x,4.导弹y,5.质量 44 | if target is None: 45 | target = [0., 0., -150.] # 目标x,y,落角 46 | 47 | self.Y = np.array(missile) 48 | self.xt, self.yt, self.qt = target[0], target[1], target[2] / RAD # 目标信息 49 | 50 | self.X = 0. # 阻力drag force 51 | self.L = 0. # 升力lift force 52 | Rx = self.xt - self.Y[3] 53 | Ry = self.yt - self.Y[4] 54 | self.R = np.linalg.norm([Rx, Ry], ord=2) # 弹目距离 55 | self.q = math.atan2(Ry, Rx) # 弹目视线角 56 | self.Rdot = 0. 57 | self.qdot = 0. 58 | self.ac = 0. # 制导指令 59 | self.ab = 0. # 偏置项 60 | self.alpha = 0. # 攻角 61 | if p_flag: 62 | self.P = 1000. # 推力push force 63 | self.mc = 0.4 # 每秒损耗质量 64 | else: 65 | self.P = 0. # 推力push force 66 | self.mc = 0. # 每秒损耗质量 67 | # self.tgo = (1 + (self.Y[2] - self.q) ** 2 / 10) * self.R / self.Y[1] # T_go = (1-(theta-lambda)^2/10)*R_go/V 68 | 69 | # 创建插值函数 70 | atm = load_atm('atm2.txt') # 大气参数 71 | self.f_ma = interpolate.interp1d(atm[:, 0], atm[:, 2], 'linear') 72 | self.f_rho = interpolate.interp1d(atm[:, 0], atm[:, 1], 'linear') 73 | 74 | k = 1.0 75 | self.f_clalpha = interpolate.interp1d(Ma2[:, 0], Ma2[:, 1] * k, 'cubic') 76 | self.f_cd0 = interpolate.interp1d(Ma2[:, 0], Ma2[:, 2] * k, 'cubic') 77 | self.f_cdalpha = interpolate.interp1d(Ma2[:, 0], Ma2[:, 3] * k, 'cubic') 78 | 79 | # 全弹道历史信息 80 | self.reY, self.reac, self.reCX, self.reP, self.realpha, self.reR = [], [], [], [], [], [] 81 | 82 | def modify(self, missile=None): # 修改导弹初始状态 83 | if missile is None: 84 | missile = [0., 85 | random.uniform(200, 300), 86 | random.uniform(-20, 20) / RAD, 87 | random.uniform(-30000., -10000), 88 | random.uniform(-20000, 20000), 89 | 200] # 0.时间,1.速度,2.弹道倾角,3.导弹x,4.导弹y,5.质量 90 | # missile = [0., 91 | # random.uniform(200, 300), 92 | # random.uniform(0, 45) / RAD, 93 | # random.uniform(-30000., -10000), 94 | # random.uniform(10000, 30000), 95 | # 200] # 0.时间,1.速度,2.弹道倾角,3.导弹x,4.导弹y,5.质量 96 | self.Y = np.array(missile) 97 | Rx = self.xt - self.Y[3] 98 | Ry = self.yt - self.Y[4] 99 | self.R = np.linalg.norm([Rx, Ry], ord=2) # 弹目距离 100 | self.q = math.atan2(Ry, Rx) # 弹目视线角 101 | 102 | k = random.uniform(0.8, 1.2) 103 | self.f_clalpha = interpolate.interp1d(Ma2[:, 0], Ma2[:, 1] * k, 'cubic') 104 | self.f_cd0 = interpolate.interp1d(Ma2[:, 0], Ma2[:, 2] * k, 'cubic') 105 | self.f_cdalpha = interpolate.interp1d(Ma2[:, 0], Ma2[:, 3] * k, 'cubic') 106 | 107 | # self.tgo = (1 + (self.Y[2] - self.q) ** 2 / 10) * self.R / self.Y[1] # T_go = (1-(theta-lambda)^2/10)*R_go/V 108 | self.reY, self.reac, self.reCX, self.reP, self.realpha, self.reR = [], [], [], [], [], [] 109 | return self.collect() 110 | 111 | def terminate(self): 112 | self.X = 0. # 阻力drag force 113 | self.L = 0. # 升力lift force 114 | self.R = 0. # 弹目距离range 115 | self.q = 0. # 弹目视线角 116 | self.Rdot = 0. 117 | self.qdot = 0. 118 | self.ac = 0. # 制导指令 119 | self.alpha = 0. # 攻角 120 | if p_flag: 121 | self.P = 1000. # 推力push force 122 | self.mc = 0.4 # 每秒损耗质量 123 | else: 124 | self.P = 0. # 推力push force 125 | self.mc = 0. # 每秒损耗质量 126 | 127 | def get_ma(self, y, v): # 计算马赫数 128 | y = max(0, y) 129 | if g == 0.: 130 | y = 10000 131 | sonic = self.f_ma(y) 132 | return v / sonic 133 | 134 | def get_rho(self, y): # 计算空气密度 135 | y = max(0, y) 136 | if g == 0.: 137 | y = 10000 138 | return self.f_rho(y) 139 | 140 | def get_clalpha(self, ma): 141 | return self.f_clalpha(max(min(ma, 0.9), 0.4)) 142 | 143 | def get_cd0(self, ma): 144 | return self.f_cd0(max(min(ma, 0.9), 0.4)) 145 | 146 | def get_cdalpha(self, ma): 147 | return self.f_cdalpha(max(min(ma, 0.9), 0.4)) 148 | 149 | def dery(self, Y): # 右端子函数 150 | v = Y[1] 151 | theta = Y[2] 152 | m = Y[5] 153 | dy = np.array(Y) 154 | dy[0] = 1 155 | dy[1] = (self.P * math.cos(self.alpha) - self.X) / m - g * math.sin(theta) 156 | dy[2] = (self.P * math.sin(self.alpha) + self.L - m * g * math.cos(theta)) / (v * m) 157 | dy[3] = v * math.cos(theta) 158 | dy[4] = v * math.sin(theta) 159 | dy[5] = -self.mc 160 | return dy 161 | 162 | def step(self, action=0., h=0.001): 163 | if self.Y[0] < 600: 164 | x = self.Y[3] # 横向位置 165 | y = self.Y[4] # 纵向位置 166 | v = self.Y[1] # 速度 167 | theta = self.Y[2] # 弹道倾角 168 | m = self.Y[5] # 弹重 169 | RHO = self.get_rho(y) # 大气密度 170 | ma = self.get_ma(y, v) # 马赫数 171 | 172 | Q = 0.5 * RHO * v ** 2 # 动压 173 | 174 | Rx = self.xt - x 175 | Ry = self.yt - y 176 | vx = -v * math.cos(theta) # x向速度 177 | vy = -v * math.sin(theta) # y向速度 178 | self.R = R = np.linalg.norm([Rx, Ry], ord=2) # 弹目距离 179 | self.qdot = qdot = (Rx * vy - Ry * vx) / R ** 2 180 | self.q = q = math.atan2(Ry, Rx) # 弹目视线角 181 | self.Rdot = Rdot = (Rx * vx + Ry * vy) / R 182 | # self.tgo = tgo = (1 + (theta - q) ** 2 / 10) * R / v 183 | 184 | if R < 20.: 185 | return True 186 | # elif y < 0: 187 | # print("弹已落地, 弹目距离={:.1f}".format(R)) 188 | # return True 189 | # elif Rdot >= 0: 190 | # print("逐渐远离目标...") 191 | # return True 192 | 193 | # 工作时序 194 | t = self.Y[0] 195 | if p_flag: 196 | if t <= 86: # 第86s燃料耗尽 197 | self.mc = 0.4 # 秒流量 198 | self.P = 1000 # 推力 199 | else: 200 | self.mc = 0. 201 | self.P = 0. 202 | 203 | # 实际项目中,飞行系数插值的三个维度分别为马赫数mach,舵偏角delta,攻角alpha 204 | cl_alpha = self.get_clalpha(ma) 205 | 206 | self.ab = action 207 | # self.ac = ac = 3 * v * qdot + math.cos(theta) * g + float(action) # 制导指令 208 | tsg_n = 0. 209 | self.ac = ac = 2 * (tsg_n + 2) * v * qdot + math.cos(theta) * g + \ 210 | (tsg_n + 1) * (tsg_n + 2) * v * (q - action) / (R / v) # 弹道成型 211 | self.alpha = alpha = (m * ac) / (Q * S * cl_alpha + self.P) # 使用了sin(x)=x的近似,在10°以内满足这一关系 212 | if alpha > 15.0 / RAD: 213 | self.alpha = alpha = 15.0 / RAD 214 | elif alpha < -15.0 / RAD: 215 | self.alpha = alpha = -15.0 / RAD 216 | 217 | cd = self.get_cd0(ma) + self.get_cdalpha(ma) * alpha ** 2 # 阻力系数 218 | cl = cl_alpha * alpha # 升力系数 219 | 220 | self.X = cd * Q * S # 阻力 221 | self.L = cl * Q * S # 升力 222 | 223 | def rk4(func, Y, h=0.1): 224 | k1 = h * func(Y) 225 | k2 = h * func(Y + 0.5 * k1) 226 | k3 = h * func(Y + 0.5 * k2) 227 | k4 = h * func(Y + k3) 228 | output = Y + (k1 + 2 * k2 + 2 * k3 + k4) / 6 229 | return output 230 | 231 | self.Y = rk4(self.dery, self.Y, h) 232 | if self.Y[2] > 2 * pi: 233 | self.Y[2] = self.Y[2] - 2 * pi 234 | if self.Y[2] < -2 * pi: 235 | self.Y[2] = self.Y[2] + 2 * pi 236 | 237 | self.reY.append(self.Y) 238 | self.reac.append(self.ac) 239 | self.reCX.append(self.X) 240 | self.reP.append(action) 241 | self.realpha.append(self.alpha) 242 | self.reR.append(self.R) 243 | return False 244 | else: 245 | print("超时!未击中目标!") 246 | # self.plot_data() 247 | return True 248 | 249 | def collect(self): 250 | t = self.Y[0] # 时间 251 | v = self.Y[1] # 速度 252 | theta = self.Y[2] # 弹道倾角 253 | r = self.R # 弹目距离 254 | q = self.q # 弹目视线角 255 | x = self.Y[3] # 弹横向位置 256 | y = self.Y[4] # 弹纵向位置 257 | return v, theta, r, q, x, y, t 258 | 259 | def plot_data(self, figure_num=0): 260 | reY = np.array(self.reY) 261 | reac = np.array(self.reac) 262 | reCX = np.array(self.reCX) 263 | reP = np.array(self.reP) 264 | realpha = np.array(self.realpha) 265 | reR = np.array(self.reR) 266 | plt.figure(figure_num) 267 | plt.ion() 268 | plt.clf() 269 | # 弹道曲线 270 | plt.subplots_adjust(hspace=0.6) 271 | plt.subplot(3, 3, 1) 272 | plt.plot(reY[:, 3] / 1000, reY[:, 4] / 1000, 'k-') 273 | plt.xlabel('Firing Range (km)') 274 | plt.ylabel('altitude (km)') 275 | plt.title('弹道曲线') 276 | plt.grid() 277 | 278 | # 速度曲线 279 | plt.subplot(3, 3, 2) 280 | plt.plot(reY[:, 0], reY[:, 1], 'k-') 281 | plt.xlabel('Time (s)') 282 | plt.ylabel('Speed (m/s)') 283 | plt.title('速度') 284 | plt.grid() 285 | 286 | # 弹道倾角 287 | plt.subplot(3, 3, 3) 288 | plt.plot(reY[:, 0], reY[:, 2] * RAD, 'k-') 289 | plt.xlabel('Time (s)') 290 | plt.ylabel('Theta (°)') 291 | plt.title('弹道倾角') 292 | plt.grid() 293 | 294 | # 过载指令 295 | plt.subplot(3, 3, 4) 296 | plt.plot(reY[:, 0], reac, 'k-') 297 | plt.xlabel('time (s)') 298 | plt.ylabel('ac') 299 | plt.title('过载指令') 300 | plt.grid() 301 | 302 | # 推力 303 | plt.subplot(3, 3, 5) 304 | plt.plot(reY[:, 0], reP, 'k-') 305 | plt.xlabel('Time (s)') 306 | plt.ylabel('action') 307 | plt.title('偏置项') 308 | plt.grid() 309 | 310 | # 高度曲线 311 | plt.subplot(3, 3, 6) 312 | plt.plot(reY[:, 0], reY[:, 4] / 1000, 'k-') 313 | plt.xlabel('time (s)') 314 | plt.ylabel('altitude (km)') 315 | plt.title('高度') 316 | plt.grid() 317 | 318 | # 阻力变化曲线 319 | plt.subplot(3, 3, 7) 320 | plt.plot(reY[:, 0], reCX, 'k-') 321 | plt.title('阻力') 322 | plt.grid() 323 | 324 | # 平衡攻角曲线 325 | plt.subplot(3, 3, 8) 326 | plt.plot(reY[:, 0], realpha * RAD, 'k-') 327 | plt.title('平衡攻角') 328 | plt.grid() 329 | 330 | # 弹目距离 331 | plt.subplot(3, 3, 9) 332 | plt.plot(reY[:, 0], reR, 'k-') 333 | plt.title('弹目距离') 334 | plt.grid() 335 | 336 | plt.pause(0.1) 337 | 338 | def save_data(self): 339 | data = {'reY': np.array(self.reY), 340 | 'reac': np.array(self.reac), 341 | 'reCX': np.array(self.reCX), 342 | 'reP': np.array(self.reP), 343 | 'realpha': np.array(self.realpha), 344 | 'reR': np.array(self.reR)} 345 | return data 346 | 347 | 348 | if __name__ == '__main__': 349 | oAgent = OAgentSim() 350 | for i in range(1): 351 | oAgent.modify([0., 300., 0. / RAD, -20000., 20000, 200]) 352 | done = False 353 | while done is False: 354 | done = oAgent.step(-179. / RAD, 0.01) # 单步运行 355 | oAgent.plot_data() 356 | print(oAgent.Y[2] * RAD) 357 | plt.ioff() 358 | plt.show() 359 | --------------------------------------------------------------------------------