├── README.md
├── collect_data.py
├── dnn_test.py
├── atm2.txt
├── dnn_regression.py
├── ppo_test.py
├── ppo_model.py
├── ppo_train.py
├── missile_3d.py
└── OAgent_sim.py


/README.md:
--------------------------------------------------------------------------------
1 | # Computational-TSG
2 | 预测-校正学习计算制导律
3 | 


--------------------------------------------------------------------------------
/collect_data.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import scipy.io
 3 | from OAgent_sim import OAgentSim
 4 | 
 5 | oAgent = OAgentSim()
 6 | dict_launch = {}
 7 | 
 8 | pi = 3.141592653589793
 9 | RAD = 180 / pi
10 | 
11 | for launch_time in range(int(40)):
12 |     oAgent.modify()
13 |     print("========", launch_time + 1, "========")
14 |     step = []
15 |     done = False
16 |     while done is False:
17 |         done = oAgent.step(action=0, h=0.02)
18 |         v, theta, r, q, x, y, t = oAgent.collect()
19 |         step.append([v, theta, r, q, x, y])
20 |     s = np.array(step)
21 |     t = theta * RAD * np.ones([s.shape[0], 1])  # 换算为角度
22 |     dict_launch['dnn{0}'.format(launch_time)] = np.concatenate([s, t], axis=1)
23 | flight_data = {'flight_data': dict_launch}
24 | scipy.io.savemat('./flight_data.mat', flight_data)
25 | 


--------------------------------------------------------------------------------
/dnn_test.py:
--------------------------------------------------------------------------------
 1 | import scipy.io
 2 | import random
 3 | import tensorflow.compat.v1 as tf
 4 | tf.compat.v1.disable_eager_execution()
 5 | from dnn_regression import load_data, init_network
 6 | 
 7 | 
 8 | class DeepNeuralNetwork:
 9 |     def __init__(self, x_num=6, path='./dnn_model'):
10 |         self.graph = tf.Graph()
11 |         with self.graph.as_default():
12 |             self.x, y, trainer, loss = init_network(x_num=x_num)
13 |         self.sess = tf.Session(graph=self.graph)
14 |         with self.sess.as_default():
15 |             with self.graph.as_default():
16 |                 self.y = self.sess.graph.get_tensor_by_name(name='layers/n_outputs/BiasAdd:0')
17 |                 saver = tf.train.Saver()
18 |                 checkpoint_path = tf.train.latest_checkpoint(path)
19 |                 saver.restore(self.sess, checkpoint_path)
20 | 
21 |                 # 转换为tf-lite模型
22 |                 convert = tf.lite.TFLiteConverter.from_session(self.sess, [self.x], [self.y])
23 |                 tflite_model = convert.convert()
24 |                 with open('dnn_model.tflite', 'wb') as f:
25 |                     f.write(tflite_model)
26 | 
27 |     def predict(self, state):
28 |         with self.sess.as_default():
29 |             with self.sess.graph.as_default():
30 |                 return self.y.eval(feed_dict={self.x: state})
31 | 
32 | 
33 | if __name__ == '__main__':
34 |     dnn = DeepNeuralNetwork()
35 |     x_train, y_train, x_test, y_test, x_vali, y_vali, train_set_size = load_data(shuffle_flag=False)
36 |     # p = int(random.uniform(0, train_set_size - 1e6))
37 |     dict_DNN = {'y_hat': dnn.predict(x_test), 'y': y_test}
38 |     dataDNN = r'dnn_test.mat'
39 |     scipy.io.savemat(dataDNN, dict_DNN)
40 | 


--------------------------------------------------------------------------------
/atm2.txt:
--------------------------------------------------------------------------------
 1 |   	0    1.225e+00     3.403e+02
 2 |            2000    1.007e+00     3.325e+02
 3 |            4000    8.193e-01     3.246e+02
 4 |            6000    6.601e-01     3.165e+02
 5 |            8000    5.258e-01     3.081e+02
 6 |           10000    4.135e-01     2.995e+02
 7 |           12000    3.119e-01     2.951e+02
 8 |           14000    2.279e-01     2.951e+02
 9 |           16000    1.665e-01     2.951e+02
10 |           18000    1.216e-01     2.951e+02
11 |           20000    8.891e-02     2.951e+02
12 |           22000    6.451e-02     2.964e+02
13 |           24000    4.694e-02     2.977e+02
14 |           26000    3.426e-02     2.991e+02
15 |           28000    2.508e-02     3.004e+02
16 |           30000    1.841e-02     3.017e+02
17 |           32000    1.355e-02     3.030e+02
18 |           34000    9.887e-03     3.065e+02
19 |           36000    7.257e-03     3.101e+02
20 |           38000    5.366e-03     3.137e+02
21 |           40000    3.995e-03     3.172e+02
22 |           42000    2.995e-03     3.207e+02
23 |           44000    2.259e-03     3.241e+02
24 |           46000    1.714e-03     3.275e+02
25 |           48000    1.317e-03     3.298e+02
26 |           50000    1.027e-03     3.298e+02
27 |           52000    8.055e-04     3.288e+02
28 |           54000    6.389e-04     3.254e+02
29 |           56000    5.044e-04     3.220e+02
30 |           58000    3.962e-04     3.186e+02
31 |           60000    3.096e-04     3.151e+02
32 |           62000    2.407e-04     3.115e+02
33 |           64000    1.860e-04     3.080e+02
34 |           66000    1.429e-04     3.044e+02
35 |           68000    1.091e-04     3.007e+02
36 |           70000    8.281e-05     2.971e+02
37 |           72000    6.236e-05     2.934e+02
38 |           74000    4.637e-05     2.907e+02
39 |           76000    3.430e-05     2.880e+02
40 |           78000    2.523e-05     2.853e+02
41 |           80000    1.845e-05     2.825e+02
42 |           82000    1.341e-05     2.797e+02
43 |           84000    9.690e-06     2.769e+02
44 |           86000    6.955e-06     2.741e+02


--------------------------------------------------------------------------------
/dnn_regression.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import scipy.io
 3 | import numpy as np
 4 | import tensorflow.compat.v1 as tf
 5 | tf.compat.v1.disable_eager_execution()
 6 | 
 7 | 
 8 | def load_data(collect_time=1000, shuffle_flag=True):
 9 |     # m = scipy.io.loadmat('./flight_data.mat')
10 |     # data_raw = m['flight_data'][0, 0]['dnn0']
11 |     # for collect_i in range(1, collect_time):
12 |     #     data_raw = np.concatenate((data_raw,  m['flight_data'][0, 0]['dnn{}'.format(collect_i)]), axis=0)
13 |     m = scipy.io.loadmat('./flight_data_concatenate.mat')
14 |     data_raw = np.array(m['data_raw'])
15 |     data_raw = np.true_divide(data_raw, [1e2, -1, 1e4, -1, 1e4, 1e4, 1])
16 |     print(np.mean(data_raw, axis=0))
17 |     if shuffle_flag:
18 |         np.random.shuffle(data_raw)
19 |     size_raw = len(data_raw)
20 |     size_train = round(size_raw * 0.96)
21 |     x_train_set = data_raw[:size_train, :-1]
22 |     y_train_set = data_raw[:size_train, -1:]
23 | 
24 |     size_test = size_train + round(size_raw * 0.02)
25 |     x_test_set = data_raw[size_train:size_test, :-1]
26 |     y_test_set = data_raw[size_train:size_test, -1:]
27 | 
28 |     x_validation_set = data_raw[size_test:, :-1]
29 |     y_validation_set = data_raw[size_test:, -1:]
30 |     print('data load DONE! Train data size=', size_train)
31 |     return x_train_set, y_train_set, x_test_set, y_test_set, x_validation_set, y_validation_set, size_train
32 | 
33 | 
34 | def init_network(x_num=6, learn_rate=0.0001):
35 |     with tf.name_scope("placeholder"):
36 |         x = tf.placeholder(tf.float32, shape=[None, x_num], name='x')
37 |         y = tf.placeholder(tf.float32, shape=[None, 1], name='y')
38 |     with tf.name_scope("layers"):
39 |         l1 = x
40 |         for i in range(3):
41 |             l1 = tf.layers.dense(l1, units=100, activation=tf.nn.relu, name="n_hidden_" + str(i))
42 |         y_hat = tf.layers.dense(l1, units=1, name="n_outputs")
43 |     with tf.name_scope("loss"):
44 |         loss_func = tf.reduce_mean(tf.square(y_hat - y), name="loss")
45 |     with tf.name_scope("train"):
46 |         optimizer = tf.train.AdamOptimizer(learning_rate=learn_rate)
47 |         trainer = optimizer.minimize(loss_func)
48 |     return x, y, trainer, loss_func
49 | 
50 | 
51 | if __name__ == '__main__':
52 |     sess = tf.InteractiveSession()
53 |     x, y, train_step, loss_mse = init_network()
54 | 
55 |     x_train, y_train, x_test, y_test, x_vali, y_vali, train_set_size = load_data()
56 | 
57 |     load_flag = True
58 | 
59 |     sess.run(tf.global_variables_initializer())
60 |     saver = tf.train.Saver()
61 |     if load_flag is True:
62 |         checkpoint_path = tf.train.latest_checkpoint('./dnn_model')
63 |         saver.restore(sess, checkpoint_path)
64 |         print("test loss mse = %g" % loss_mse.eval(feed_dict={x: x_test, y: y_test}))
65 | 
66 |     steps = int(1e5)
67 |     batch_size = int(1e4)
68 |     for i in range(steps):
69 |         # batch = ([], [])
70 |         # p = random.sample(range(train_set_size), batch_size)
71 |         # for k in p:
72 |         #     batch[0].append(x_train[k])
73 |         #     batch[1].append(y_train[k])
74 |         # train_step.run(feed_dict={x: batch[0], y: batch[1]})
75 |         p = int(random.uniform(0, train_set_size - batch_size))
76 |         train_step.run(feed_dict={x: x_train[p:p + batch_size, :], y: y_train[p:p + batch_size, :]})
77 |         if i % (steps / 10) == 0:
78 |             train_loss_mse = loss_mse.eval(feed_dict={x: x_test, y: y_test})
79 |             print('Training...{:.1f}%'.format(i / steps * 100), 'train loss mse =', train_loss_mse)
80 |     print("test loss mse = %g" % loss_mse.eval(feed_dict={x: x_vali, y: y_vali}))
81 | 
82 |     saver.save(sess, r'.\dnn_model\flight')
83 | 


--------------------------------------------------------------------------------
/ppo_test.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import math
  3 | import numpy as np
  4 | import tensorflow.python.keras as keras
  5 | import matplotlib.pyplot as plt
  6 | from scipy.io import savemat
  7 | from missile_3d import MISSILE
  8 | from ppo_model import PPO
  9 | from dnn_test import DeepNeuralNetwork
 10 | 
 11 | BATCH_SIZE = 32  # update batch size
 12 | GAMMA = 0.99
 13 | TEST_EPISODES = 100
 14 | REWARD_SAVE_CASE = 0
 15 | dnny = DeepNeuralNetwork()  # 纵向通道
 16 | dnnz = keras.models.load_model("./dnn_model/flight.h5")  # 侧向通道
 17 | pi = 3.141592653589793
 18 | RAD = 180 / pi  # 弧度转角度
 19 | 
 20 | 
 21 | class OMissile(MISSILE):
 22 |     def __init__(self):
 23 |         super().__init__()
 24 |         v, theta, psi, r, q_theta, q_psi, x, y, z, t = self.collect()
 25 |         self.nav_theta = 0.
 26 |         self.nav_psiv = 0.
 27 |         tmp_y = np.array([[v * math.cos(self.nav_psiv) / 1e2, theta / -1, r / 1e4, q_theta / -1, x / 1e4, y / 1e4]])
 28 |         tmp_z = np.array([[v * math.cos(self.nav_theta) / 1e2, psi / -1, r / 1e4, -q_psi / -1, x / 1e4, z / 1e4]])
 29 |         self.iay = float(dnny.predict(tmp_y))
 30 |         self.iaz = float(dnnz.predict(tmp_z))
 31 |         self.tgo = r / v
 32 | 
 33 |     def get_ia(self):
 34 |         v, theta, psi, r, q_theta, q_psi, x, y, z, t = self.collect()
 35 |         # tmp_y = np.array([[v / 1e2, theta / -1, r / 1e4, q_theta / -1, x / 1e4, y / 1e4]])
 36 |         # tmp_z = np.array([[v / 1e2, psi / -1, r / 1e4, -q_psi / -1, x / 1e4, z / 1e4]])
 37 |         tmp_y = np.array([[v * math.cos(self.nav_psiv) / 1e2, theta / -1, r / 1e4, q_theta / -1, x / 1e4, y / 1e4]])
 38 |         tmp_z = np.array([[v * math.cos(self.nav_theta) / 1e2, psi / -1, r / 1e4, -q_psi / -1, x / 1e4, z / 1e4]])
 39 |         self.iay = float(dnny.predict(tmp_y))
 40 |         self.iaz = float(dnnz.predict(tmp_z))
 41 |         return self.iay, self.iaz
 42 | 
 43 |     def get_tgo(self):
 44 |         v, theta, psi, r, q_theta, q_psi, x, y, z, t = self.collect()
 45 |         self.tgo = r / v
 46 |         return self.tgo
 47 | 
 48 |     def get_state(self, y_target, z_target):
 49 |         iay, iaz = self.get_ia()
 50 |         tgo = self.get_tgo()
 51 |         state_y = np.array([(y_target - iay) / tgo])
 52 |         state_z = np.array([(z_target - iaz) / tgo])
 53 |         return state_y, state_z
 54 | 
 55 | 
 56 | if __name__ == '__main__':
 57 |     env = OMissile()
 58 |     # set the init parameter
 59 |     state_dim = 1
 60 |     action_dim = 1
 61 |     action_bound = 2 * 9.81  # action limitation
 62 | 
 63 |     # test
 64 |     agent_y = PPO(state_dim, action_dim, action_bound, r'./ppo_model')
 65 |     agent_z = PPO(state_dim, action_dim, action_bound, r'./ppo_model')
 66 |     flight_data = {}
 67 |     for episode in range(TEST_EPISODES):
 68 |         desired_iay = []  # 期望的ia
 69 |         actual_iay = []  # 实际的ia
 70 |         impact_angle_errory = []  # ia的误差
 71 |         desired_iaz = []  # 期望的ia
 72 |         actual_iaz = []  # 实际的ia
 73 |         impact_angle_errorz = []  # ia的误差
 74 | 
 75 |         env.modify([200., 0. / RAD, 0. / RAD, -20000., 10000., 5000.])  # [200., 0. / RAD, 0. / RAD, -20000., 10000., 5000.]
 76 |         tdy = random.uniform(-60., -20.)
 77 |         tdz = random.uniform(-20., 20.)
 78 | 
 79 |         # expre = 1
 80 |         # tdy = [-20., -40., -60.][expre]
 81 |         # tdz = [20., 0., -20.][expre]
 82 | 
 83 |         t = []
 84 |         state_y, state_z = env.get_state(tdy, tdz)
 85 |         done = False
 86 |         TSG = False
 87 |         while done is False:
 88 |             if TSG:
 89 |                 done, env.nav_theta, env.nav_psiv = env.step(tdy / RAD, tdz / RAD, h=0.01)  # TSG
 90 |             else:
 91 |                 action_y = agent_y.get_action(state_y, greedy=True)  # use the mean of distribution as action
 92 |                 action_z = agent_z.get_action(state_z, greedy=True)  # use the mean of distribution as action
 93 |                 done, env.nav_theta, env.nav_psiv = env.step(action_y, -action_z, h=0.01)
 94 |             state_y, state_z = env.get_state(tdy, tdz)
 95 | 
 96 |             desired_iay.append(tdy)
 97 |             actual_iay.append(env.iay)
 98 |             impact_angle_errory.append(tdy - env.iay)
 99 | 
100 |             desired_iaz.append(tdz)
101 |             actual_iaz.append(env.iaz)
102 |             impact_angle_errorz.append(tdz - env.iaz)
103 | 
104 |         # tmpenergy = 0
105 |         # for e in range(len(env.reac)):
106 |         #     tmpenergy = tmpenergy + env.reac[e] ** 2 * 0.1
107 |         # print(tmpenergy)
108 |         # env.plot_data()
109 |         # plt.ioff()
110 |         # plt.show()
111 | 
112 |         flight_data['sim_{}'.format(episode)] = env.save_data()
113 |         flight_data['time{}'.format(episode)] = {'desired_iay': np.array(desired_iay),
114 |                                                  'actual_iay': np.array(actual_iay),
115 |                                                  'impact_time_errory': np.array(impact_angle_errory),
116 |                                                  'desired_iaz': np.array(desired_iaz),
117 |                                                  'actual_iaz': np.array(actual_iaz),
118 |                                                  'impact_time_errorz': np.array(impact_angle_errorz)}
119 | 
120 |         # print the result
121 |         print('Testing | Episode: {}/{} | Target: {:.2f}/{:.2f} | Actual: {:.2f}/{:.2f} | Error: {:.2f}/{:.2f}'
122 |               .format(episode + 1, TEST_EPISODES,
123 |                       tdy, tdz,
124 |                       env.Y[2] * RAD, env.Y[3] * RAD,
125 |                       tdy - env.Y[2] * RAD, tdz - env.Y[3] * RAD))
126 |         # plt.figure(3)
127 |         # plt.ion()
128 |         # plt.clf()
129 |         # plt.subplots_adjust(hspace=0.6)
130 |         # plt.subplot(2, 1, 1)
131 |         # plt.plot(np.array(env.reY)[:, 0], np.array(desired_iay)[:-1], 'k--', label='desired ia')
132 |         # plt.plot(np.array(env.reY)[:, 0], np.array(actual_iay)[:-1], 'k-', label='predict ia')
133 |         # plt.plot(np.array(env.reY)[:, 0], np.array(env.reY)[:, 2] * RAD, 'r-', label='actual theta')
134 |         # plt.xlabel('Time (s)')
135 |         # plt.ylabel('impact angle of xoy')
136 |         # plt.legend()
137 |         # plt.grid()
138 |         #
139 |         # plt.subplot(2, 1, 2)
140 |         # plt.plot()
141 |         # plt.plot(np.array(env.reY)[:, 0], np.array(desired_iaz)[:-1], 'k--', label='desired ia')
142 |         # plt.plot(np.array(env.reY)[:, 0], np.array(actual_iaz)[:-1], 'k-', label='predict ia')
143 |         # plt.plot(np.array(env.reY)[:, 0], np.array(env.reY)[:, 3] * RAD, 'r-', label='actual psi')
144 |         # plt.xlabel('Time (s)')
145 |         # plt.ylabel('impact angle of xoz')
146 |         # plt.grid()
147 |         #
148 |         # plt.pause(0.1)
149 |     savemat('./mats/flight_sim_ppo_monte_3d.mat', flight_data)
150 |     # savemat('./mats/flight_sim_tsg_{}.mat'.format(3), flight_data)
151 | 


--------------------------------------------------------------------------------
/ppo_model.py:
--------------------------------------------------------------------------------
  1 | import tensorflow.compat.v1 as tf
  2 | tf.compat.v1.disable_eager_execution()
  3 | import numpy as np
  4 | 
  5 | A_LR = 0.0001  # learning rate for actor
  6 | C_LR = 0.0002  # learning rate for critic
  7 | A_UPDATE_STEPS = 10  # actor update steps
  8 | C_UPDATE_STEPS = 10  # critic update steps
  9 | METHOD = [dict(name='kl_pen', kl_target=0.01, lam=0.5),  # KL penalty
 10 |           dict(name='clip', epsilon=0.2)  # Clipped surrogate objective
 11 |           ][1]
 12 | 
 13 | 
 14 | class PPO(object):
 15 |     def __init__(self, state_dim, action_dim, action_bound, load_path=None):
 16 |         self.load_flag = load_path
 17 |         graph = tf.Graph()
 18 |         with graph.as_default():  # 构建计算图
 19 |             self.state = tf.placeholder(tf.float32, [None, state_dim], 'state')
 20 |             # critic
 21 |             with tf.name_scope('critic'):
 22 |                 l1 = tf.layers.dense(self.state, 64, tf.nn.relu)
 23 |                 l1 = tf.layers.dense(l1, 64, tf.nn.relu)
 24 |                 self.v = tf.layers.dense(l1, 1)
 25 |                 with tf.name_scope('train_critic'):
 26 |                     # Update actor network
 27 |                     self.discounted_r = tf.placeholder(tf.float32, [None, 1], 'discounted_r')  # 折扣回报函数
 28 |                     self.advantage = self.discounted_r - self.v
 29 |                     self.critic_loss = tf.reduce_mean(tf.square(self.advantage))
 30 |                     self.critic_train_op = tf.train.AdamOptimizer(C_LR).minimize(self.critic_loss)
 31 | 
 32 |             # actor
 33 |             with tf.name_scope('actor'):
 34 |                 def _build_anet(state, name, trainable):
 35 |                     with tf.variable_scope(name):
 36 |                         l1 = tf.layers.dense(state, 64, tf.nn.relu, trainable=trainable)
 37 |                         l1 = tf.layers.dense(l1, 64, tf.nn.relu, trainable=trainable)
 38 |                         mean = action_bound * tf.layers.dense(l1, action_dim, tf.nn.tanh, trainable=trainable)
 39 |                         logstd = tf.Variable(np.zeros(action_dim, dtype=np.float32), trainable=trainable)
 40 |                         norm_dist = tf.distributions.Normal(loc=mean, scale=tf.exp(logstd))  # 正态分布
 41 |                         params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=name)  # 提取图中的元素
 42 |                     return norm_dist, params
 43 | 
 44 |                 pi, pi_params = _build_anet(self.state, 'pi', trainable=True)  # 新策略
 45 |                 old_pi, old_pi_params = _build_anet(self.state, 'oldpi', trainable=False)  # 旧策略
 46 | 
 47 |                 # 预先定义两个操作
 48 |                 with tf.name_scope('update_oldpi'):  # 将pi网络参数赋给oldpi网络
 49 |                     self.update_oldpi_op = [oldp.assign(p) for p, oldp in zip(pi_params, old_pi_params)]
 50 |                 with tf.name_scope('action'):  # 执行动作
 51 |                     self.act_op = pi.loc[0]  # 直接选择均值作为动作
 52 |                     self.sample_op = tf.squeeze(pi.sample(1), axis=0)[0]  # 从正态分布中随机选择一个动作
 53 | 
 54 |                 with tf.name_scope('train_critic'):
 55 |                     self.action = tf.placeholder(tf.float32, [None, action_dim], 'action')  # 创建action占位符
 56 |                     self.adv = tf.placeholder(tf.float32, [None, 1], 'advantage')  # 创建A(s,a)占位符
 57 |                     ratio = tf.exp(pi.log_prob(self.action) - old_pi.log_prob(self.action))  # 比率函数r(θ)
 58 |                     surr = ratio * self.adv  # 目标函数的第一项
 59 | 
 60 |                     if METHOD['name'] == 'kl_pen':  # 第一类目标函数(KL散度型)
 61 |                         self.tflam = tf.placeholder(tf.float32, None,
 62 |                                                     'lambda')  # 新加入的参数，Adaptive KL Penalty Coefficient
 63 |                         kl = tf.distributions.kl_divergence(old_pi, pi)  # 计算KL散度，衡量两个分布的差异
 64 |                         self.kl_mean = tf.reduce_mean(kl)  # d参数,KL散度的均值
 65 |                         self.action_loss = -tf.reduce_mean(surr - self.tflam * kl)  # 目标函数
 66 | 
 67 |                     else:  # 第二类目标函数(CLIP型)
 68 |                         self.action_loss = -tf.reduce_mean(
 69 |                             tf.minimum(surr,
 70 |                                        tf.clip_by_value(ratio, 1. - METHOD['epsilon'],
 71 |                                                         1. + METHOD['epsilon']) * self.adv)
 72 |                         )  # 目标函数
 73 |                     self.action_train_op = tf.train.AdamOptimizer(A_LR).minimize(self.action_loss)  # 使用Adam优化目标函数
 74 | 
 75 |         self.sess = tf.Session(graph=graph)
 76 | 
 77 |         # 加载模型
 78 |         with self.sess.as_default():
 79 |             with self.sess.graph.as_default():
 80 |                 if load_path is None:
 81 |                     self.sess.run(tf.global_variables_initializer())  # 初始化计算图
 82 |                 else:
 83 |                     saver = tf.train.Saver()
 84 |                     saver.restore(self.sess, tf.train.latest_checkpoint(checkpoint_dir=load_path))
 85 | 
 86 |                     # # 转换为tf-lite模型
 87 |                     # convert = tf.lite.TFLiteConverter.from_session(self.sess, [self.state], [self.act_op])
 88 |                     # tflite_model = convert.convert()
 89 |                     # with open('ppo_model.tflite', 'wb') as f:
 90 |                     #     f.write(tflite_model)
 91 | 
 92 |         self.state_buffer, self.action_buffer = [], []
 93 |         self.reward_buffer, self.cumulative_reward_buffer = [], []
 94 |         self.action_bound = action_bound
 95 | 
 96 |     def store_transition(self, state, action, reward):
 97 |         """
 98 |         Store state, action, reward at each step
 99 |         :param state:
100 |         :param action:
101 |         :param reward:
102 |         :return: None
103 |         """
104 |         self.state_buffer.append(state)
105 |         self.action_buffer.append(action)
106 |         self.reward_buffer.append(reward)
107 | 
108 |     def finish_path(self, next_state, done, GAMMA=0.9):
109 |         """
110 |         Calculate cumulative reward
111 |         :param next_state:
112 |         :param done:
113 |         :param GAMMA:
114 |         :return: None
115 |         """
116 |         if done:
117 |             v_s_ = 0
118 |         else:
119 |             v_s_ = self.get_v(np.array(next_state, np.float32))
120 |         discounted_r = []
121 |         for r in self.reward_buffer[::-1]:
122 |             v_s_ = r + GAMMA * v_s_
123 |             discounted_r.append(v_s_)
124 |         discounted_r.reverse()
125 |         discounted_r = np.array(discounted_r)[:, np.newaxis]
126 |         self.cumulative_reward_buffer.extend(discounted_r)
127 |         self.reward_buffer.clear()
128 | 
129 |     def update(self):
130 |         """
131 |         Update parameter with the constraint of KL divergent
132 |         :return: None
133 |         """
134 |         s = np.array(self.state_buffer, np.float32)
135 |         a = np.array(self.action_buffer, np.float32)
136 |         r = np.array(self.cumulative_reward_buffer, np.float32)
137 |         with self.sess.as_default():
138 |             with self.sess.graph.as_default():
139 |                 self.sess.run(self.update_oldpi_op)  # oldpi=pi
140 |                 adv = self.sess.run(self.advantage, {self.state: s, self.discounted_r: r})  # 通过critic得到advantage value
141 | 
142 |                 # update actor
143 |                 if METHOD['name'] == 'kl_pen':  # KL散度型
144 |                     for _ in range(A_UPDATE_STEPS):  #
145 |                         _, kl = self.sess.run([self.action_train_op, self.kl_mean],
146 |                                               {self.state: s, self.action: a, self.adv: adv, self.tflam: METHOD['lam']})
147 |                         if kl > 4 * METHOD['kl_target']:  # 根据d和d目标的均值动态调整Adaptive KL Penalty Coefficient
148 |                             break
149 |                         elif kl < METHOD['kl_target'] / 1.5:  # adaptive lambda, this is in OpenAI's paper
150 |                             METHOD['lam'] /= 2  # 放松KL约束限制
151 |                         elif kl > METHOD['kl_target'] * 1.5:
152 |                             METHOD['lam'] *= 2  # 增强KL约束限制
153 |                         METHOD['lam'] = np.clip(METHOD['lam'], 1e-4,
154 |                                                 10)  # sometimes explode, this clipping is my solution
155 |                 else:  # clipping method, find this is better (OpenAI's paper)
156 |                     [self.sess.run(self.action_train_op, {self.state: s, self.action: a, self.adv: adv}) for _ in
157 |                      range(A_UPDATE_STEPS)]
158 | 
159 |                 # update critic
160 |                 [self.sess.run(self.critic_train_op, {self.state: s, self.discounted_r: r}) for _ in
161 |                  range(C_UPDATE_STEPS)]
162 | 
163 |         self.state_buffer.clear()
164 |         self.action_buffer.clear()
165 |         self.cumulative_reward_buffer.clear()
166 |         self.reward_buffer.clear()
167 | 
168 |     def get_action(self, s, greedy=False):
169 |         s = s[np.newaxis, :]
170 |         with self.sess.as_default():
171 |             with self.sess.graph.as_default():
172 |                 if greedy:
173 |                     action = self.sess.run(self.act_op, {self.state: s})
174 |                 else:
175 |                     action = self.sess.run(self.sample_op, {self.state: s})
176 |         return np.clip(action, -self.action_bound, self.action_bound)
177 | 
178 |     def get_v(self, s):
179 |         with self.sess.as_default():
180 |             with self.sess.graph.as_default():
181 |                 if s.ndim < 2:
182 |                     s = s[np.newaxis, :]
183 |                 return self.sess.run(self.v, {self.state: s})[0, 0]
184 | 
185 |     def save_model(self, save_path):
186 |         with self.sess.as_default():
187 |             with self.sess.graph.as_default():
188 |                 if self.load_flag is None:  # 未加载模型
189 |                     saver = tf.train.Saver()
190 |                     saver.save(self.sess, save_path=save_path)
191 |                     # print("the model in {} save success".format(save_path))
192 | 


--------------------------------------------------------------------------------
/ppo_train.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import time
  3 | import math
  4 | import os
  5 | import numpy as np
  6 | import tensorflow.python.keras as keras
  7 | import matplotlib.pyplot as plt
  8 | from scipy.io import savemat
  9 | from OAgent_sim import OAgentSim
 10 | from ppo_model import PPO
 11 | from dnn_test import DeepNeuralNetwork
 12 | 
 13 | BATCH_SIZE = 32  # update batch size
 14 | TRAIN_EPISODES = 1000  # total number of episodes for training
 15 | TEST_EPISODES = 100  # total number of episodes for testing
 16 | GAMMA = 0.99
 17 | REWARD_SAVE_CASE = 0
 18 | # dnn = DeepNeuralNetwork()
 19 | dnn = keras.models.load_model("./dnn_model/flight.h5")
 20 | # tgo = DeepNeuralNetwork(4, 'tgo_model')
 21 | pi = 3.141592653589793
 22 | RAD = 180 / pi  # 弧度转角度
 23 | 
 24 | 
 25 | class OMissile(OAgentSim):
 26 |     def __init__(self, no_=0, missile=None, target=None):
 27 |         super().__init__(no_=0, missile=None, target=None)
 28 |         v, theta, r, q, x, y, t = self.collect()
 29 |         self.ia = float(dnn.predict(np.array([[v / 1e2, theta / -1, r / 1e4, q / -1, x / 1e4, y / 1e4]])))
 30 |         self.tgo = r / v  # float(tgo.predict([[v / 315, theta / -0.6, x / -1e4, y / 1.5e4]]))
 31 | 
 32 |     def get_ia(self):
 33 |         v, theta, r, q, x, y, t = self.collect()
 34 |         dnn_state = np.array([[v / 1e2, theta / -1, r / 1e4, q / -1, x / 1e4, y / 1e4]])
 35 |         self.ia = float(dnn.predict(dnn_state))
 36 |         return self.ia
 37 | 
 38 |     def get_tgo(self, dnn_state=None):
 39 |         v, theta, r, q, x, y, t = self.collect()
 40 |         # dnn_state = [v / 315, theta / -0.6, x / -1e4, y / 1.5e4]
 41 |         # self.tgo = float(tgo.predict([dnn_state]))
 42 |         self.tgo = r / v
 43 |         return self.tgo
 44 | 
 45 |     def get_state(self, a_target):
 46 |         ia = self.get_ia()
 47 |         tgo = self.get_tgo()
 48 |         state_local = [(a_target - ia) / tgo]
 49 |         return np.array(state_local)
 50 | 
 51 |     def get_reward(self, t_target):
 52 |         v, theta, r, q, x, y, t = self.collect()
 53 |         tgo = self.get_tgo()
 54 |         e_local = (t_target - self.ia) / tgo
 55 |         # vy = v * math.sin(theta)  # y向速度
 56 |         # zem = y / tgo + vy
 57 |         k1 = 0.8
 58 |         k2 = 0.2
 59 |         reward_local = k1 * math.exp(-(e_local / 1e0) ** 2) + \
 60 |                        k2 * math.exp(-(self.ac / 1e1) ** 2)
 61 |         return np.array(reward_local)
 62 | 
 63 | 
 64 | if __name__ == '__main__':
 65 |     env = OMissile()
 66 | 
 67 |     # set the init parameter
 68 |     state_dim = 1
 69 |     action_dim = 1
 70 |     action_bound = 2 * 9.81  # action limitation
 71 |     t0 = time.time()
 72 |     model_num = 0
 73 | 
 74 |     train = False  # choose train or test
 75 |     if train:
 76 |         agent = PPO(state_dim, action_dim, action_bound)
 77 |         dict_episode_reward = {'all_episode_reward': [], 'episode_reward': [], 'target_angle': [], 'actual_angle': []}
 78 |         dict_episode_angle = {'desired ia': [], 'actual ia': [], 'impact angle error': []}
 79 |         all_episode_reward = []
 80 |         for episode in range(int(TRAIN_EPISODES)):
 81 |             env.modify()  # 0.时间,1.速度,2.弹道倾角,3.导弹x,4.导弹y,5.质量
 82 |             if episode % 10 == 0:
 83 |                 td = random.choice([-10., -20., -30., -40., -50., -60., -70., -80., -90.]) + 50.
 84 |             desired_ia = []  # 期望的ia
 85 |             actual_ia = []  # 实际的ia
 86 |             impact_angle_error = []  # ia的误差
 87 |             state = env.get_state(td)
 88 |             episode_reward = 0
 89 |             done = False
 90 |             while done is False:
 91 |                 # collect state, action and reward
 92 |                 action = agent.get_action(state)  # get new action with old state
 93 |                 done = env.step(action=float(action), h=0.1)
 94 |                 state_ = env.get_state(td)  # get new state with new action
 95 |                 reward = env.get_reward(td)  # get new reward with new action
 96 |                 agent.store_transition(state, action, reward)  # train with old state
 97 |                 state = state_  # update state
 98 |                 episode_reward += reward
 99 | 
100 |                 desired_ia.append(td)
101 |                 actual_ia.append(env.ia)
102 |                 impact_angle_error.append(td - env.ia)
103 | 
104 |                 # update ppo
105 |                 if len(agent.state_buffer) >= BATCH_SIZE:
106 |                     agent.finish_path(state_, done, GAMMA=GAMMA)
107 |                     agent.update()
108 |             # end of one episode
109 |             env.plot_data(figure_num=0)
110 | 
111 |             # use the terminal data to update once
112 |             if len(agent.reward_buffer) != 0:
113 |                 agent.reward_buffer[-1] -= env.R + (td - env.Y[2] * RAD) ** 2
114 |                 agent.finish_path(state, done, GAMMA=GAMMA)
115 |                 agent.update()
116 |             episode_reward -= env.R + (td - env.Y[2] * RAD) ** 2
117 | 
118 |             # print the result
119 |             episode_reward = episode_reward / env.Y[0]  # calculate the average episode reward
120 |             print('Training | Episode: {}/{} | Average Episode Reward: {:.2f} | Running Time: {:.2f} | '
121 |                   'Target Angle: {:.2f} | Actual Angle: {:.2f} | Error Angle: {:.2f}'
122 |                   .format(episode + 1, TRAIN_EPISODES, episode_reward, time.time() - t0,
123 |                           td, env.Y[2] * RAD, td - env.Y[2] * RAD))
124 | 
125 |             #     plt.figure(1)
126 |             #     plt.ion()
127 |             #     plt.clf()
128 |             #     plt.subplots_adjust(hspace=0.6)
129 |             #     plt.subplot(2, 1, 1)
130 |             #     plt.plot(np.array(env.reY)[:, 0], np.array(desired_ia)[:-1], 'k--', label='desired ia')
131 |             #     plt.plot(np.array(env.reY)[:, 0], np.array(actual_ia)[:-1], 'k-', label='actual ia')
132 |             #     plt.xlabel('Time (s)')
133 |             #     plt.ylabel('angle(s)')
134 |             #     plt.legend()
135 |             #     plt.grid()
136 |             #
137 |             #     plt.subplot(2, 1, 2)
138 |             #     plt.plot()
139 |             #     plt.plot(np.array(env.reY)[:, 0], np.array(impact_angle_error)[:-1], 'k-')
140 |             #     plt.xlabel('Time (s)')
141 |             #     plt.ylabel('error(s)')
142 |             #     plt.grid()
143 |             #
144 |             #     plt.pause(0.1)
145 |             #
146 |             # calculate the discounted episode reward
147 |             if episode == 0:
148 |                 all_episode_reward.append(episode_reward)
149 |             else:
150 |                 all_episode_reward.append(all_episode_reward[-1] * .9 + episode_reward * .1)
151 | 
152 |             # save the episode data
153 |             dict_episode_reward['episode_reward'].append(episode_reward)
154 |             dict_episode_reward['all_episode_reward'] = all_episode_reward
155 |             dict_episode_reward['target_angle'].append(td)
156 |             dict_episode_reward['actual_angle'].append(env.Y[2] * RAD)
157 | 
158 |             # save model and data
159 |             if episode_reward > REWARD_SAVE_CASE:
160 |                 REWARD_SAVE_CASE = episode_reward
161 |                 # if abs(td - env.Y[0]) < 0.5:
162 |                 agent.save_model('./ppo_model/agent{}'.format(model_num))
163 |                 savemat('./ppo_reward.mat', dict_episode_reward)
164 |                 model_num = (model_num + 1) % 20
165 | 
166 |         agent.save_model('./ppo_model/agent_end')
167 |         savemat('./ppo_reward.mat', dict_episode_reward)
168 | 
169 |         plt.figure(2)
170 |         plt.plot(all_episode_reward)
171 |         if not os.path.exists('image'):
172 |             os.makedirs('image')
173 |         plt.savefig(os.path.join('image', '_'.join(['PPO', time.strftime("%Y_%m%d_%H%M")])))
174 |         plt.ioff()
175 |         plt.show()
176 |     else:
177 |         # test
178 |         agent = PPO(state_dim, action_dim, action_bound, r'./ppo_model')
179 |         flight_data = {}
180 |         for episode in range(100):  # TEST_EPISODES
181 |             env.modify()  # [0., 200., 0 / RAD, -20000., 20000., 200.]
182 |             # td = env.get_ia() * random.uniform(0.5, 1.5)
183 |             td = random.uniform(-10., -90.) + 50.
184 |             # td = -60.0
185 |             desired_ia = []  # 期望的ia
186 |             actual_ia = []  # 实际的ia
187 |             impact_angle_error = []  # ia的误差
188 |             state = env.get_state(td)
189 |             # action = 0
190 |             episode_reward = 0
191 |             done = False
192 |             t = []
193 |             PPO_flag = True
194 |             while done is False:
195 |                 action = agent.get_action(state, greedy=True)  # use the mean of distribution as action
196 |                 if PPO_flag:
197 |                     done = env.step(action=action, h=0.1)
198 |                 else:
199 |                     done = env.step(action=td / RAD, h=0.1)
200 |                 state = env.get_state(td)
201 |                 reward = env.get_reward(td)
202 |                 episode_reward += reward
203 | 
204 |                 desired_ia.append(td)
205 |                 actual_ia.append(env.ia)
206 |                 impact_angle_error.append(td - env.ia)
207 | 
208 |             # tmpenergy = 0
209 |             # for e in range(len(env.reac)):
210 |             #     tmpenergy = tmpenergy + env.reac[e] ** 2 * 0.1
211 |             # print(tmpenergy)
212 |             # env.plot_data(figure_num=0)
213 |             # plt.ioff()
214 |             # plt.show()
215 | 
216 |             flight_data['sim_{}'.format(episode)] = env.save_data()
217 |             flight_data['time{}'.format(episode)] = {'desired_ia': np.array(desired_ia),
218 |                                                      'actual_ia': np.array(actual_ia),
219 |                                                      'impact_time_error': np.array(impact_angle_error)}
220 | 
221 |             # print the result
222 |             episode_reward = episode_reward / env.Y[0]  # calculate the average episode reward
223 |             print('Testing | Episode: {}/{} | Average Episode Reward: {:.2f} | Running Time: {:.2f} | '
224 |                   'Target: {:.2f} | Actual: {:.2f} | Error: {:.2f}'
225 |                   .format(episode + 1, TEST_EPISODES, episode_reward, time.time() - t0,
226 |                           td, env.Y[2] * RAD, td - env.Y[2] * RAD))  # actual_ia[-1]))
227 |             plt.figure(1)
228 |             plt.ion()
229 |             plt.clf()
230 |             plt.subplots_adjust(hspace=0.6)
231 |             plt.subplot(2, 1, 1)
232 |             plt.plot(np.array(env.reY)[:, 0], np.array(desired_ia)[:-1], 'k--', label='desired ia')
233 |             plt.plot(np.array(env.reY)[:, 0], np.array(actual_ia)[:-1], 'k-', label='actual ia')
234 |             plt.xlabel('Time (s)')
235 |             plt.ylabel('$impact angle(s)$')
236 |             plt.legend()
237 |             plt.grid()
238 | 
239 |             plt.subplot(2, 1, 2)
240 |             plt.plot()
241 |             plt.plot(np.array(env.reY)[:, 0], np.array(impact_angle_error)[:-1], 'k-')
242 |             plt.xlabel('Time (s)')
243 |             plt.ylabel('impact angle error(s)')
244 |             plt.grid()
245 | 
246 |             plt.pause(0.1)
247 | 
248 |         savemat('./flight_sim_tsg_monte.mat', flight_data)
249 | 


--------------------------------------------------------------------------------
/missile_3d.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import random
  3 | import matplotlib.pyplot as plt  # clf()清图  # cla()清坐标轴  # close()关窗口
  4 | from scipy import interpolate
  5 | from scipy.io import savemat
  6 | from math import sin, cos, atan2, sqrt, asin
  7 | 
  8 | # 常量
  9 | pi = 3.141592653589793
 10 | RAD = 180 / pi  # 弧度转角度
 11 | g = 9.81  # 重力加速度
 12 | S = 0.0572555  # 特征面积
 13 | 
 14 | plt.rcParams['font.sans-serif'] = ['SimHei']  # 用来正常显示中文标签
 15 | plt.rcParams['axes.unicode_minus'] = False  # 用来正常显示负号
 16 | 
 17 | p_flag = False  # 是否考虑推力
 18 | 
 19 | Ma2 = np.array([[0.4, 39.056, 0.4604, 39.072],
 20 |                 [0.6, 40.801, 0.4682, 39.735],
 21 |                 [0.8, 41.372, 0.4635, 39.242],
 22 |                 [0.9, 42.468, 0.4776, 40.351]])
 23 | 
 24 | 
 25 | def load_atm(path):
 26 |     file = open(path)
 27 |     atm_str = file.read().split()
 28 |     atm = []
 29 |     for _ in range(0, len(atm_str), 3):
 30 |         atm.append([float(atm_str[_]), float(atm_str[_ + 1]), float(atm_str[_ + 2])])
 31 |     return np.array(atm)
 32 | 
 33 | 
 34 | class MISSILE:
 35 |     def __init__(self, missile=None, target=None):
 36 |         if missile is None:
 37 |             missile = [300.,
 38 |                        0. / RAD,
 39 |                        0. / RAD,
 40 |                        -20000.,
 41 |                        20000.,
 42 |                        0.]  # 1.速度,2.弹道倾角,3.弹道偏角 4.导弹x,5.导弹y,6.导弹z  y指向天
 43 |         if target is None:
 44 |             target = [0., 0., 0., -90. / RAD, 0. / RAD]  # 目标x,y,z,theta,psi
 45 | 
 46 |         self.V = np.array(missile[:3])  # v,theta,psi
 47 |         self.Euler = np.array(missile[3:])  # x,y,z
 48 |         self.Y = np.concatenate([[0.], self.V, self.Euler, [200.]])  # 0.时间, 1.速度，2.位置，3.重量
 49 | 
 50 |         self.Rt = target[:3]  # 目标信息
 51 |         self.Vt = target[3:]  #
 52 | 
 53 |         self.X = 0.  # 阻力drag force
 54 |         self.L = 0.  # 升力lift force
 55 |         self.B = 0.  # 侧向力
 56 |         R = self.Rt - self.Euler
 57 |         self.R = np.linalg.norm(R)  # 弹目距离
 58 |         self.q_theta = atan2(R[1], sqrt(R[0] ** 2 + R[2] ** 2))  # 弹目视线与水平面夹角
 59 |         self.q_psi = atan2(R[2], R[0])  # 弹目视线与北向夹角，北偏西为正
 60 |         self.Rdot = 0.
 61 |         self.qdot_theta = 0.
 62 |         self.qdot_psi = 0.
 63 |         self.acy = 0.  # 制导指令
 64 |         self.acz = 0.
 65 |         self.aby = 0.  # 偏置项
 66 |         self.abz = 0.  # 偏置项
 67 |         self.alpha = 0.  # 攻角
 68 |         self.beta = 0.  # 侧滑角
 69 | 
 70 |         # 创建插值函数
 71 |         atm = load_atm('atm2.txt')  # 大气参数
 72 |         self.f_ma = interpolate.interp1d(atm[:, 0], atm[:, 2], 'linear')
 73 |         self.f_rho = interpolate.interp1d(atm[:, 0], atm[:, 1], 'linear')
 74 | 
 75 |         k = 1.0
 76 |         self.f_clalpha = interpolate.interp1d(Ma2[:, 0], Ma2[:, 1] * k, 'cubic')
 77 |         self.f_cd0 = interpolate.interp1d(Ma2[:, 0], Ma2[:, 2] * k, 'cubic')
 78 |         self.f_cdalpha = interpolate.interp1d(Ma2[:, 0], Ma2[:, 3] * k, 'cubic')
 79 | 
 80 |         # 全弹道历史信息
 81 |         self.reY, self.reacy, self.reacz = [], [], []
 82 | 
 83 |     def modify(self, missile=None):  # 修改导弹初始状态
 84 |         if missile is None:
 85 |             missile = [random.uniform(200, 300),
 86 |                        random.uniform(0, 45) / RAD,
 87 |                        random.uniform(-5, 5) / RAD,
 88 |                        random.uniform(-30000., -10000),
 89 |                        random.uniform(10000, 30000),
 90 |                        random.uniform(-2000, 2000)]  # 1.速度,2.弹道倾角,3.弹道偏角 4.导弹x,5.导弹y,6.导弹z
 91 | 
 92 |         if missile[2] * missile[5] > 0:
 93 |             missile[2] *= -1
 94 |         self.V = np.array(missile[:3])  # v,theta,psi
 95 |         self.Euler = np.array(missile[3:])  # x,y,z
 96 |         self.Y = np.concatenate([[0.], self.V, self.Euler, [200.]])  # 0.时间, 1.速度，2.位置，3.重量
 97 | 
 98 |         R_nue = self.Rt - self.Euler
 99 |         self.R = np.linalg.norm(R_nue)  # 弹目距离
100 |         self.q_theta = atan2(R_nue[1], sqrt(R_nue[0] ** 2 + R_nue[2] ** 2))  # 弹目视线与水平面夹角
101 |         self.q_psi = atan2(R_nue[2], R_nue[0])  # 弹目视线与北向夹角，北偏西为正
102 | 
103 |         k = random.uniform(0.8, 1.2)
104 |         # k = 1.0
105 |         self.f_clalpha = interpolate.interp1d(Ma2[:, 0], Ma2[:, 1] * k, 'cubic')
106 |         self.f_cd0 = interpolate.interp1d(Ma2[:, 0], Ma2[:, 2] * k, 'cubic')
107 |         self.f_cdalpha = interpolate.interp1d(Ma2[:, 0], Ma2[:, 3] * k, 'cubic')
108 | 
109 |         self.reY, self.reacy, self.reacz = [], [], []
110 |         return self.collect()
111 | 
112 |     def get_ma(self, y, v):  # 计算马赫数
113 |         y = max(0, y)
114 |         sonic = self.f_ma(y)
115 |         return v / sonic
116 | 
117 |     def get_rho(self, y):  # 计算空气密度
118 |         y = max(0, y)
119 |         return self.f_rho(y)
120 | 
121 |     def get_clalpha(self, ma):
122 |         return self.f_clalpha(max(min(ma, 0.9), 0.4))
123 | 
124 |     def get_cd0(self, ma):
125 |         return self.f_cd0(max(min(ma, 0.9), 0.4))
126 | 
127 |     def get_cdalpha(self, ma):
128 |         return self.f_cdalpha(max(min(ma, 0.9), 0.4))
129 | 
130 |     def dery(self, Y):  # 右端子函数
131 |         v, theta, psi = Y[1:4]  # v,theta,psi
132 |         x, y, z = Y[4:7]  # x,y,z
133 |         m = Y[-1]
134 |         dy = np.array(Y)
135 | 
136 |         # 速度倾斜角gammav = 0
137 |         dy[0] = 1  # t
138 |         dy[1] = (-self.X / m) - g * sin(theta)  # v
139 |         dy[2] = (self.L - m * g * cos(theta)) / (m * v)  # theta
140 |         dy[3] = -self.B / (m * v * cos(theta))  # psi
141 |         dy[4] = v * cos(theta) * cos(psi)  # x北向
142 |         dy[5] = v * sin(theta)  # y天向
143 |         dy[6] = -v * cos(theta) * sin(psi)  # 东向
144 |         dy[7] = 0
145 |         return dy
146 | 
147 |     def step(self, action_y=0., action_z=0., h=0.001):
148 |         if self.Y[0] < 400:
149 |             self.V = v, theta, psi = self.Y[1:4]  # v,theta,psi
150 |             self.Euler = x, y, z = self.Y[4:7]  # x,y,z
151 |             m = self.Y[-1]  # 弹重
152 |             RHO = self.get_rho(y)  # 大气密度
153 |             ma = self.get_ma(y, v)  # 马赫数
154 | 
155 |             Q = 0.5 * RHO * v ** 2  # 动压
156 | 
157 |             R_nue = self.Rt - self.Euler
158 |             R = self.R  # 上一周期的弹目距离
159 |             self.R = np.linalg.norm(R_nue)  # 弹目距离
160 | 
161 |             self.q_theta = q_theta = atan2(R_nue[1], sqrt(R_nue[0] ** 2 + R_nue[2] ** 2))  # 弹目视线与水平面夹角
162 |             self.q_psi = q_psi = atan2(R_nue[2], R_nue[0])  # 弹目视线与北向夹角，北偏西为正
163 | 
164 |             v_nue = [v * cos(theta) * cos(psi), v * sin(theta), -v * cos(theta) * sin(psi)]
165 |             nav_theta = atan2(v_nue[1], sqrt(v_nue[0] ** 2 + v_nue[2] ** 2))  # 根据卫星解算出的速度倾角(速度与水平面夹角)
166 |             nav_psiv = atan2(v_nue[2], v_nue[0])  # 根据卫星解算出的速度与北向夹角
167 | 
168 |             # if R < 2:
169 |             #     return True, nav_theta, nav_psiv
170 |             if y < 0:
171 |                 print("弹已落地, 弹目距离={:.1f}".format(self.R))
172 |                 return True, nav_theta, nav_psiv
173 |             elif self.R > R:
174 |                 print("远离目标, 弹目距离={:.1f}".format(self.R))
175 |                 return True, nav_theta, nav_psiv
176 | 
177 |             R = self.R  # 当前周期的弹目距离
178 | 
179 |             # 计算前置角
180 |             eta1 = q_theta - nav_theta
181 |             eta2 = q_psi - nav_psiv
182 | 
183 |             # 计算视线角变化率
184 |             self.qdot_theta = qdot_theta = v * sin(eta1) / R
185 |             self.qdot_psi = qdot_psi = v * cos(nav_theta) * sin(eta2) / R * cos(q_theta)
186 | 
187 |             # 实际项目中，飞行系数插值的三个维度分别为马赫数mach，舵偏角delta，攻角alpha
188 |             cl_alpha = self.get_clalpha(ma)
189 | 
190 |             # self.aby = action_y
191 |             # self.abz = action_z
192 |             # self.acy = acy = 3 * v * qdot_theta + cos(nav_theta) * g + float(action_y)  # 制导指令
193 |             # self.acz = acz = 3 * v * cos(nav_theta) * qdot_psi + float(action_z)  # 制导指令
194 | 
195 |             # 弹道成型
196 |             tsg_n = 0.5
197 |             Nv = 2 * (tsg_n + 2)
198 |             Nq = (tsg_n + 1) * (tsg_n + 2)
199 |             tgo = R / v
200 |             acy = Nv * v * qdot_theta + cos(nav_theta) * g + \
201 |                   Nq * v * (q_theta - action_y) / tgo
202 |             acz = Nv * v * cos(nav_theta) * qdot_psi + \
203 |                   Nq * v * cos(nav_theta) * (q_psi + action_z) / tgo
204 | 
205 |             m_max = 3 * g
206 |             self.acy = acy = np.clip(acy, -m_max, m_max)
207 |             self.acz = acz = np.clip(acz, -m_max, m_max)
208 | 
209 |             alpha = (m * acy) / (Q * S * cl_alpha)  # 使用了sin(x)=x的近似，在10°以内满足这一关系
210 |             beta = (m * acz) / (Q * S * cl_alpha)  # 使用了sin(x)=x的近似，在10°以内满足这一关系
211 | 
212 |             a_max = 20. / RAD
213 |             self.alpha = alpha = np.clip(alpha, -a_max, a_max)
214 |             self.beta = beta = np.clip(beta, -a_max, a_max)
215 | 
216 |             cd = self.get_cd0(ma) + self.get_cdalpha(ma) * (alpha ** 2 + beta ** 2)  # 阻力系数
217 |             cl = cl_alpha * alpha  # 升力系数
218 |             cb = cl_alpha * beta  # 侧向力系数
219 | 
220 |             self.X = cd * Q * S  # 阻力
221 |             self.L = cl * Q * S  # 升力
222 |             self.B = cb * Q * S  # 侧向力
223 | 
224 |             def rk4(func, Y, h=0.1):
225 |                 k1 = h * func(Y)
226 |                 k2 = h * func(Y + 0.5 * k1)
227 |                 k3 = h * func(Y + 0.5 * k2)
228 |                 k4 = h * func(Y + k3)
229 |                 output = Y + (k1 + 2 * k2 + 2 * k3 + k4) / 6
230 |                 return output
231 | 
232 |             self.Y = rk4(self.dery, self.Y, h)
233 |             if self.Y[2] > 2 * pi:
234 |                 self.Y[2] = self.Y[2] - 2 * pi
235 |             if self.Y[2] < -2 * pi:
236 |                 self.Y[2] = self.Y[2] + 2 * pi
237 | 
238 |             if self.Y[3] > 2 * pi:
239 |                 self.Y[3] = self.Y[3] - 2 * pi
240 |             if self.Y[3] < -2 * pi:
241 |                 self.Y[3] = self.Y[3] + 2 * pi
242 | 
243 |             self.reY.append(self.Y)
244 |             self.reacy.append(self.acy)
245 |             self.reacz.append(self.acz)
246 |             return False, nav_theta, nav_psiv
247 |         else:
248 |             print("超时！未击中目标！")
249 |             # self.plot_data()
250 |             return True, 0., 0.
251 | 
252 |     def plot_data(self):
253 |         reY = np.array(self.reY)
254 |         # plt.ion()
255 |         plt.clf()
256 |         fig = plt.axes(projection='3d')
257 |         fig.plot3D(reY[:, 4] / 1000, reY[:, 6] / 1000, reY[:, 5] / 1000, "k-")
258 |         fig.set_xlabel("x")
259 |         fig.set_ylabel("y")
260 |         fig.set_zlabel("z")
261 | 
262 |         fig = plt.figure(2)
263 |         fig.clf()
264 |         ax1 = fig.add_subplot(211)
265 |         ax1.plot(reY[:, 4] / 1000, reY[:, 5] / 1000, 'k-')
266 |         ax1.set_xlabel("x")
267 |         ax1.set_ylabel("y")
268 |         plt.grid()
269 | 
270 |         ax2 = fig.add_subplot(212)
271 |         ax2.plot(reY[:, 4] / 1000, reY[:, 6] / 1000, 'k-')
272 |         ax1.set_xlabel("x")
273 |         ax1.set_ylabel("y")
274 |         plt.grid()
275 | 
276 |         # plt.pause(0.1)
277 |         plt.show()
278 | 
279 |     def collect(self):
280 |         t = self.Y[0]  # 时间
281 |         v = self.Y[1]  # 速度
282 |         theta = self.Y[2]  # 弹道倾角
283 |         psi = self.Y[3]  # 弹道偏角
284 |         r = self.R  # 弹目距离
285 |         q_theta = self.q_theta  # 弹目视线角
286 |         q_psi = self.q_psi  # 弹目视线角
287 |         x = self.Y[4]  # 弹轴向位置
288 |         y = self.Y[5]  # 弹纵向位置
289 |         z = self.Y[6]  # 弹横向位置
290 |         return v, theta, psi, r, q_theta, q_psi, x, y, z, t
291 | 
292 |     def save_data(self):
293 |         data = {'Y': np.array(self.reY),
294 |                 'R': np.array(self.R),
295 |                 'acy': np.array(self.reacy),
296 |                 'acz': np.array(self.reacz)}
297 |         return data
298 | 
299 | 
300 | if __name__ == '__main__':
301 |     env = MISSILE()
302 |     flight_data = {}
303 |     for i in range(100):
304 |         env.modify([200., 0. / RAD, 0. / RAD, -20000., 10000., 5000.])
305 |         tdy = random.uniform(-60., -20.) / RAD
306 |         tdz = random.uniform(-20., 20.) / RAD
307 |         done = False
308 |         while done is False:
309 |             done, a, b = env.step(tdy, tdz, h=0.01)  # TSG
310 |         env.plot_data()
311 |         flight_data['sim_{}'.format(i)] = env.save_data()
312 |         flight_data['time{}'.format(i)] = {'error_y': [(tdy - env.Y[2]) * RAD, 0],
313 |                                            'error_z': [(tdz - env.Y[3]) * RAD, 0]}
314 |         print('Testing | Episode: {}/{} | Target: {:.2f}/{:.2f} | Actual: {:.2f}/{:.2f} | Error: {:.2f}/{:.2f}'
315 |               .format(i + 1, 100,
316 |                       tdy * RAD, tdz * RAD,
317 |                       env.Y[2] * RAD, env.Y[3] * RAD,
318 |                       (tdy - env.Y[2]) * RAD, (tdz - env.Y[3]) * RAD))
319 |     savemat('./mats/flight_sim_tsg_monte_3d.mat', flight_data)
320 | 


--------------------------------------------------------------------------------
/OAgent_sim.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import numpy as np
  3 | import random
  4 | import matplotlib.pyplot as plt  # clf()清图  # cla()清坐标轴  # close()关窗口
  5 | from scipy import interpolate
  6 | 
  7 | # 常量
  8 | pi = 3.141592653589793
  9 | RAD = 180 / pi  # 弧度转角度
 10 | g = 0.  # 重力加速度
 11 | S = 0.0572555  # 特征面积
 12 | 
 13 | plt.rcParams['font.sans-serif'] = ['SimHei']  # 用来正常显示中文标签
 14 | plt.rcParams['axes.unicode_minus'] = False  # 用来正常显示负号
 15 | 
 16 | p_flag = False  # 是否考虑推力
 17 | 
 18 | Ma2 = np.array([[0.4, 39.056, 0.4604, 39.072],
 19 |                 [0.6, 40.801, 0.4682, 39.735],
 20 |                 [0.8, 41.372, 0.4635, 39.242],
 21 |                 [0.9, 42.468, 0.4776, 40.351]])
 22 | 
 23 | 
 24 | def load_atm(path):
 25 |     file = open(path)
 26 |     atm_str = file.read().split()
 27 |     atm = []
 28 |     for _ in range(0, len(atm_str), 3):
 29 |         atm.append([float(atm_str[_]), float(atm_str[_ + 1]), float(atm_str[_ + 2])])
 30 |     return np.array(atm)
 31 | 
 32 | 
 33 | class OAgentSim:
 34 |     def __init__(self, no_=0, missile=None, target=None):
 35 |         self.no_ = no_  # 导弹编号
 36 | 
 37 |         if missile is None:
 38 |             missile = [0.,
 39 |                        300.,
 40 |                        45. / RAD,
 41 |                        -20000.,
 42 |                        10000,
 43 |                        200]  # 0.时间,1.速度,2.弹道倾角,3.导弹x,4.导弹y,5.质量
 44 |         if target is None:
 45 |             target = [0., 0., -150.]  # 目标x,y,落角
 46 | 
 47 |         self.Y = np.array(missile)
 48 |         self.xt, self.yt, self.qt = target[0], target[1], target[2] / RAD  # 目标信息
 49 | 
 50 |         self.X = 0.  # 阻力drag force
 51 |         self.L = 0.  # 升力lift force
 52 |         Rx = self.xt - self.Y[3]
 53 |         Ry = self.yt - self.Y[4]
 54 |         self.R = np.linalg.norm([Rx, Ry], ord=2)  # 弹目距离
 55 |         self.q = math.atan2(Ry, Rx)  # 弹目视线角
 56 |         self.Rdot = 0.
 57 |         self.qdot = 0.
 58 |         self.ac = 0.  # 制导指令
 59 |         self.ab = 0.  # 偏置项
 60 |         self.alpha = 0.  # 攻角
 61 |         if p_flag:
 62 |             self.P = 1000.  # 推力push force
 63 |             self.mc = 0.4  # 每秒损耗质量
 64 |         else:
 65 |             self.P = 0.  # 推力push force
 66 |             self.mc = 0.  # 每秒损耗质量
 67 |         # self.tgo = (1 + (self.Y[2] - self.q) ** 2 / 10) * self.R / self.Y[1]  # T_go = (1-(theta-lambda)^2/10)*R_go/V
 68 | 
 69 |         # 创建插值函数
 70 |         atm = load_atm('atm2.txt')  # 大气参数
 71 |         self.f_ma = interpolate.interp1d(atm[:, 0], atm[:, 2], 'linear')
 72 |         self.f_rho = interpolate.interp1d(atm[:, 0], atm[:, 1], 'linear')
 73 | 
 74 |         k = 1.0
 75 |         self.f_clalpha = interpolate.interp1d(Ma2[:, 0], Ma2[:, 1] * k, 'cubic')
 76 |         self.f_cd0 = interpolate.interp1d(Ma2[:, 0], Ma2[:, 2] * k, 'cubic')
 77 |         self.f_cdalpha = interpolate.interp1d(Ma2[:, 0], Ma2[:, 3] * k, 'cubic')
 78 | 
 79 |         # 全弹道历史信息
 80 |         self.reY, self.reac, self.reCX, self.reP, self.realpha, self.reR = [], [], [], [], [], []
 81 | 
 82 |     def modify(self, missile=None):  # 修改导弹初始状态
 83 |         if missile is None:
 84 |             missile = [0.,
 85 |                        random.uniform(200, 300),
 86 |                        random.uniform(-20, 20) / RAD,
 87 |                        random.uniform(-30000., -10000),
 88 |                        random.uniform(-20000, 20000),
 89 |                        200]  # 0.时间,1.速度,2.弹道倾角,3.导弹x,4.导弹y,5.质量
 90 |             # missile = [0.,
 91 |             #            random.uniform(200, 300),
 92 |             #            random.uniform(0, 45) / RAD,
 93 |             #            random.uniform(-30000., -10000),
 94 |             #            random.uniform(10000, 30000),
 95 |             #            200]  # 0.时间,1.速度,2.弹道倾角,3.导弹x,4.导弹y,5.质量
 96 |         self.Y = np.array(missile)
 97 |         Rx = self.xt - self.Y[3]
 98 |         Ry = self.yt - self.Y[4]
 99 |         self.R = np.linalg.norm([Rx, Ry], ord=2)  # 弹目距离
100 |         self.q = math.atan2(Ry, Rx)  # 弹目视线角
101 | 
102 |         k = random.uniform(0.8, 1.2)
103 |         self.f_clalpha = interpolate.interp1d(Ma2[:, 0], Ma2[:, 1] * k, 'cubic')
104 |         self.f_cd0 = interpolate.interp1d(Ma2[:, 0], Ma2[:, 2] * k, 'cubic')
105 |         self.f_cdalpha = interpolate.interp1d(Ma2[:, 0], Ma2[:, 3] * k, 'cubic')
106 | 
107 |         # self.tgo = (1 + (self.Y[2] - self.q) ** 2 / 10) * self.R / self.Y[1]  # T_go = (1-(theta-lambda)^2/10)*R_go/V
108 |         self.reY, self.reac, self.reCX, self.reP, self.realpha, self.reR = [], [], [], [], [], []
109 |         return self.collect()
110 | 
111 |     def terminate(self):
112 |         self.X = 0.  # 阻力drag force
113 |         self.L = 0.  # 升力lift force
114 |         self.R = 0.  # 弹目距离range
115 |         self.q = 0.  # 弹目视线角
116 |         self.Rdot = 0.
117 |         self.qdot = 0.
118 |         self.ac = 0.  # 制导指令
119 |         self.alpha = 0.  # 攻角
120 |         if p_flag:
121 |             self.P = 1000.  # 推力push force
122 |             self.mc = 0.4  # 每秒损耗质量
123 |         else:
124 |             self.P = 0.  # 推力push force
125 |             self.mc = 0.  # 每秒损耗质量
126 | 
127 |     def get_ma(self, y, v):  # 计算马赫数
128 |         y = max(0, y)
129 |         if g == 0.:
130 |             y = 10000
131 |         sonic = self.f_ma(y)
132 |         return v / sonic
133 | 
134 |     def get_rho(self, y):  # 计算空气密度
135 |         y = max(0, y)
136 |         if g == 0.:
137 |             y = 10000
138 |         return self.f_rho(y)
139 | 
140 |     def get_clalpha(self, ma):
141 |         return self.f_clalpha(max(min(ma, 0.9), 0.4))
142 | 
143 |     def get_cd0(self, ma):
144 |         return self.f_cd0(max(min(ma, 0.9), 0.4))
145 | 
146 |     def get_cdalpha(self, ma):
147 |         return self.f_cdalpha(max(min(ma, 0.9), 0.4))
148 | 
149 |     def dery(self, Y):  # 右端子函数
150 |         v = Y[1]
151 |         theta = Y[2]
152 |         m = Y[5]
153 |         dy = np.array(Y)
154 |         dy[0] = 1
155 |         dy[1] = (self.P * math.cos(self.alpha) - self.X) / m - g * math.sin(theta)
156 |         dy[2] = (self.P * math.sin(self.alpha) + self.L - m * g * math.cos(theta)) / (v * m)
157 |         dy[3] = v * math.cos(theta)
158 |         dy[4] = v * math.sin(theta)
159 |         dy[5] = -self.mc
160 |         return dy
161 | 
162 |     def step(self, action=0., h=0.001):
163 |         if self.Y[0] < 600:
164 |             x = self.Y[3]  # 横向位置
165 |             y = self.Y[4]  # 纵向位置
166 |             v = self.Y[1]  # 速度
167 |             theta = self.Y[2]  # 弹道倾角
168 |             m = self.Y[5]  # 弹重
169 |             RHO = self.get_rho(y)  # 大气密度
170 |             ma = self.get_ma(y, v)  # 马赫数
171 | 
172 |             Q = 0.5 * RHO * v ** 2  # 动压
173 | 
174 |             Rx = self.xt - x
175 |             Ry = self.yt - y
176 |             vx = -v * math.cos(theta)  # x向速度
177 |             vy = -v * math.sin(theta)  # y向速度
178 |             self.R = R = np.linalg.norm([Rx, Ry], ord=2)  # 弹目距离
179 |             self.qdot = qdot = (Rx * vy - Ry * vx) / R ** 2
180 |             self.q = q = math.atan2(Ry, Rx)  # 弹目视线角
181 |             self.Rdot = Rdot = (Rx * vx + Ry * vy) / R
182 |             # self.tgo = tgo = (1 + (theta - q) ** 2 / 10) * R / v
183 | 
184 |             if R < 20.:
185 |                 return True
186 |             # elif y < 0:
187 |             #     print("弹已落地, 弹目距离={:.1f}".format(R))
188 |             #     return True
189 |             # elif Rdot >= 0:
190 |             #     print("逐渐远离目标...")
191 |             #     return True
192 | 
193 |             # 工作时序
194 |             t = self.Y[0]
195 |             if p_flag:
196 |                 if t <= 86:  # 第86s燃料耗尽
197 |                     self.mc = 0.4  # 秒流量
198 |                     self.P = 1000  # 推力
199 |                 else:
200 |                     self.mc = 0.
201 |                     self.P = 0.
202 | 
203 |             # 实际项目中，飞行系数插值的三个维度分别为马赫数mach，舵偏角delta，攻角alpha
204 |             cl_alpha = self.get_clalpha(ma)
205 | 
206 |             self.ab = action
207 |             # self.ac = ac = 3 * v * qdot + math.cos(theta) * g + float(action)  # 制导指令
208 |             tsg_n = 0.
209 |             self.ac = ac = 2 * (tsg_n + 2) * v * qdot + math.cos(theta) * g + \
210 |                            (tsg_n + 1) * (tsg_n + 2) * v * (q - action) / (R / v)  # 弹道成型
211 |             self.alpha = alpha = (m * ac) / (Q * S * cl_alpha + self.P)  # 使用了sin(x)=x的近似，在10°以内满足这一关系
212 |             if alpha > 15.0 / RAD:
213 |                 self.alpha = alpha = 15.0 / RAD
214 |             elif alpha < -15.0 / RAD:
215 |                 self.alpha = alpha = -15.0 / RAD
216 | 
217 |             cd = self.get_cd0(ma) + self.get_cdalpha(ma) * alpha ** 2  # 阻力系数
218 |             cl = cl_alpha * alpha  # 升力系数
219 | 
220 |             self.X = cd * Q * S  # 阻力
221 |             self.L = cl * Q * S  # 升力
222 | 
223 |             def rk4(func, Y, h=0.1):
224 |                 k1 = h * func(Y)
225 |                 k2 = h * func(Y + 0.5 * k1)
226 |                 k3 = h * func(Y + 0.5 * k2)
227 |                 k4 = h * func(Y + k3)
228 |                 output = Y + (k1 + 2 * k2 + 2 * k3 + k4) / 6
229 |                 return output
230 | 
231 |             self.Y = rk4(self.dery, self.Y, h)
232 |             if self.Y[2] > 2 * pi:
233 |                 self.Y[2] = self.Y[2] - 2 * pi
234 |             if self.Y[2] < -2 * pi:
235 |                 self.Y[2] = self.Y[2] + 2 * pi
236 | 
237 |             self.reY.append(self.Y)
238 |             self.reac.append(self.ac)
239 |             self.reCX.append(self.X)
240 |             self.reP.append(action)
241 |             self.realpha.append(self.alpha)
242 |             self.reR.append(self.R)
243 |             return False
244 |         else:
245 |             print("超时！未击中目标！")
246 |             # self.plot_data()
247 |             return True
248 | 
249 |     def collect(self):
250 |         t = self.Y[0]  # 时间
251 |         v = self.Y[1]  # 速度
252 |         theta = self.Y[2]  # 弹道倾角
253 |         r = self.R  # 弹目距离
254 |         q = self.q  # 弹目视线角
255 |         x = self.Y[3]  # 弹横向位置
256 |         y = self.Y[4]  # 弹纵向位置
257 |         return v, theta, r, q, x, y, t
258 | 
259 |     def plot_data(self, figure_num=0):
260 |         reY = np.array(self.reY)
261 |         reac = np.array(self.reac)
262 |         reCX = np.array(self.reCX)
263 |         reP = np.array(self.reP)
264 |         realpha = np.array(self.realpha)
265 |         reR = np.array(self.reR)
266 |         plt.figure(figure_num)
267 |         plt.ion()
268 |         plt.clf()
269 |         # 弹道曲线
270 |         plt.subplots_adjust(hspace=0.6)
271 |         plt.subplot(3, 3, 1)
272 |         plt.plot(reY[:, 3] / 1000, reY[:, 4] / 1000, 'k-')
273 |         plt.xlabel('Firing Range (km)')
274 |         plt.ylabel('altitude (km)')
275 |         plt.title('弹道曲线')
276 |         plt.grid()
277 | 
278 |         # 速度曲线
279 |         plt.subplot(3, 3, 2)
280 |         plt.plot(reY[:, 0], reY[:, 1], 'k-')
281 |         plt.xlabel('Time (s)')
282 |         plt.ylabel('Speed (m/s)')
283 |         plt.title('速度')
284 |         plt.grid()
285 | 
286 |         # 弹道倾角
287 |         plt.subplot(3, 3, 3)
288 |         plt.plot(reY[:, 0], reY[:, 2] * RAD, 'k-')
289 |         plt.xlabel('Time (s)')
290 |         plt.ylabel('Theta (°)')
291 |         plt.title('弹道倾角')
292 |         plt.grid()
293 | 
294 |         # 过载指令
295 |         plt.subplot(3, 3, 4)
296 |         plt.plot(reY[:, 0], reac, 'k-')
297 |         plt.xlabel('time (s)')
298 |         plt.ylabel('ac')
299 |         plt.title('过载指令')
300 |         plt.grid()
301 | 
302 |         # 推力
303 |         plt.subplot(3, 3, 5)
304 |         plt.plot(reY[:, 0], reP, 'k-')
305 |         plt.xlabel('Time (s)')
306 |         plt.ylabel('action')
307 |         plt.title('偏置项')
308 |         plt.grid()
309 | 
310 |         # 高度曲线
311 |         plt.subplot(3, 3, 6)
312 |         plt.plot(reY[:, 0], reY[:, 4] / 1000, 'k-')
313 |         plt.xlabel('time (s)')
314 |         plt.ylabel('altitude (km)')
315 |         plt.title('高度')
316 |         plt.grid()
317 | 
318 |         # 阻力变化曲线
319 |         plt.subplot(3, 3, 7)
320 |         plt.plot(reY[:, 0], reCX, 'k-')
321 |         plt.title('阻力')
322 |         plt.grid()
323 | 
324 |         # 平衡攻角曲线
325 |         plt.subplot(3, 3, 8)
326 |         plt.plot(reY[:, 0], realpha * RAD, 'k-')
327 |         plt.title('平衡攻角')
328 |         plt.grid()
329 | 
330 |         # 弹目距离
331 |         plt.subplot(3, 3, 9)
332 |         plt.plot(reY[:, 0], reR, 'k-')
333 |         plt.title('弹目距离')
334 |         plt.grid()
335 | 
336 |         plt.pause(0.1)
337 | 
338 |     def save_data(self):
339 |         data = {'reY': np.array(self.reY),
340 |                 'reac': np.array(self.reac),
341 |                 'reCX': np.array(self.reCX),
342 |                 'reP': np.array(self.reP),
343 |                 'realpha': np.array(self.realpha),
344 |                 'reR': np.array(self.reR)}
345 |         return data
346 | 
347 | 
348 | if __name__ == '__main__':
349 |     oAgent = OAgentSim()
350 |     for i in range(1):
351 |         oAgent.modify([0., 300., 0. / RAD, -20000., 20000, 200])
352 |         done = False
353 |         while done is False:
354 |             done = oAgent.step(-179. / RAD, 0.01)  # 单步运行
355 |         oAgent.plot_data()
356 |         print(oAgent.Y[2] * RAD)
357 |     plt.ioff()
358 |     plt.show()
359 | 


--------------------------------------------------------------------------------