├── Docs
    ├── plant.gif
    ├── simulink_model.png
    └── tracking.gif
├── IQL_conventional
    ├── __init__.py
    ├── agents
    │   ├── __init__.py
    │   ├── models.py
    │   ├── policies.py
    │   └── utils.py
    ├── config
    │   ├── config_ford.ini
    │   └── config_gym.ini
    ├── env
    │   ├── __init__.py
    │   ├── env_ford.py
    │   ├── test.py
    │   └── utils.py
    ├── main.py
    ├── trainer.py
    └── utils.py
├── README.md
└── examples
    ├── PV
        ├── parameters.m
        ├── pv_inverter.slx
        ├── pv_inverter_pid.slx
        └── pv_inverter_pv.slx
    ├── plant_ex
        ├── plant.py
        └── plant.slx
    └── tracking
        ├── linear_controller.m
        ├── tracking.py
        ├── tracking.slx
        └── tracking1.slx


/Docs/plant.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DongChen06/Python2Simulink/f7660e4d2ef656e74e0cde6837b2652c432ef288/Docs/plant.gif


--------------------------------------------------------------------------------
/Docs/simulink_model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DongChen06/Python2Simulink/f7660e4d2ef656e74e0cde6837b2652c432ef288/Docs/simulink_model.png


--------------------------------------------------------------------------------
/Docs/tracking.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DongChen06/Python2Simulink/f7660e4d2ef656e74e0cde6837b2652c432ef288/Docs/tracking.gif


--------------------------------------------------------------------------------
/IQL_conventional/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DongChen06/Python2Simulink/f7660e4d2ef656e74e0cde6837b2652c432ef288/IQL_conventional/__init__.py


--------------------------------------------------------------------------------
/IQL_conventional/agents/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DongChen06/Python2Simulink/f7660e4d2ef656e74e0cde6837b2652c432ef288/IQL_conventional/agents/__init__.py


--------------------------------------------------------------------------------
/IQL_conventional/agents/models.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from .utils import *
  3 | from .policies import *
  4 | import logging
  5 | import multiprocessing as mp
  6 | import numpy as np
  7 | import tensorflow as tf
  8 | 
  9 | 
 10 | class A2C:
 11 |     def __init__(self, n_s, n_a, total_step, model_config, seed=0, n_f=None):
 12 |         # load parameters
 13 |         self.name = 'a2c'
 14 |         self.n_agent = 1
 15 |         # init reward norm/clip
 16 |         self.reward_clip = model_config.getfloat('reward_clip')
 17 |         self.reward_norm = model_config.getfloat('reward_norm')
 18 |         self.n_s = n_s
 19 |         self.n_a = n_a
 20 |         self.n_step = model_config.getint('batch_size')
 21 |         # init tf
 22 |         tf.reset_default_graph()
 23 |         tf.set_random_seed(seed)
 24 |         config = tf.ConfigProto(allow_soft_placement=True)
 25 |         self.sess = tf.Session(config=config)
 26 |         self.policy = self._init_policy(n_s, n_a, n_f, model_config)
 27 |         self.saver = tf.train.Saver(max_to_keep=15)
 28 |         if total_step:
 29 |             # training
 30 |             self.total_step = total_step
 31 |             self._init_scheduler(model_config)
 32 |             self._init_train(model_config)
 33 |         self.sess.run(tf.global_variables_initializer())
 34 | 
 35 |     def _init_policy(self, n_s, n_a, n_f, model_config, agent_name=None):
 36 |         n_fw = model_config.getint('num_fw')
 37 |         n_ft = model_config.getint('num_ft')
 38 |         n_lstm = model_config.getint('num_lstm')
 39 |         policy = None
 40 |         return policy
 41 | 
 42 |     def _init_scheduler(self, model_config):
 43 |         lr_init = model_config.getfloat('lr_init')
 44 |         lr_decay = model_config.get('lr_decay')
 45 |         beta_init = model_config.getfloat('entropy_coef_init')
 46 |         beta_decay = model_config.get('entropy_decay')
 47 |         if lr_decay == 'constant':
 48 |             self.lr_scheduler = Scheduler(lr_init, decay=lr_decay)
 49 |         if beta_decay == 'constant':
 50 |             self.beta_scheduler = Scheduler(beta_init, decay=beta_decay)
 51 | 
 52 |     def _init_train(self, model_config):
 53 |         # init loss
 54 |         v_coef = model_config.getfloat('value_coef')
 55 |         max_grad_norm = model_config.getfloat('max_grad_norm')
 56 |         alpha = model_config.getfloat('rmsp_alpha')
 57 |         epsilon = model_config.getfloat('rmsp_epsilon')
 58 |         self.policy.prepare_loss(v_coef, max_grad_norm, alpha, epsilon)
 59 | 
 60 |         # init replay buffer
 61 |         gamma = model_config.getfloat('gamma')
 62 |         self.trans_buffer = OnPolicyBuffer(gamma)
 63 | 
 64 |     def save(self, model_dir, global_step):
 65 |         self.saver.save(self.sess, model_dir + 'checkpoint',
 66 |                         global_step=global_step)
 67 | 
 68 |     def load(self, model_dir, checkpoint=None):
 69 |         save_file = None
 70 |         save_step = 0
 71 |         if os.path.exists(model_dir):
 72 |             if checkpoint is None:
 73 |                 for file in os.listdir(model_dir):
 74 |                     if file.startswith('checkpoint'):
 75 |                         prefix = file.split('.')[0]
 76 |                         tokens = prefix.split('-')
 77 |                         if len(tokens) != 2:
 78 |                             continue
 79 |                         cur_step = int(tokens[1])
 80 |                         if cur_step > save_step:
 81 |                             save_file = prefix
 82 |                             save_step = cur_step
 83 |             else:
 84 |                 save_file = 'checkpoint-' + str(int(checkpoint))
 85 |         if save_file is not None:
 86 |             self.saver.restore(self.sess, model_dir + save_file)
 87 |             logging.info('Checkpoint loaded: %s' % save_file)
 88 |             return True
 89 |         logging.error('Can not find old checkpoint for %s' % model_dir)
 90 |         return False
 91 | 
 92 |     def reset(self):
 93 |         self.policy._reset()
 94 | 
 95 |     def backward(self, R, summary_writer=None, global_step=None):
 96 |         cur_lr = self.lr_scheduler.get(self.n_step)
 97 |         cur_beta = self.beta_scheduler.get(self.n_step)
 98 |         obs, acts, dones, Rs, Advs = self.trans_buffer.sample_transition(R)
 99 |         self.policy.backward(self.sess, obs, acts, dones, Rs, Advs, cur_lr, cur_beta,
100 |                              summary_writer=summary_writer, global_step=global_step)
101 | 
102 |     def forward(self, ob, done, out_type='pv'):
103 |         return self.policy.forward(self.sess, ob, done, out_type)
104 | 
105 |     def add_transition(self, ob, action, reward, value, done):
106 |         # Hard code the reward norm for negative reward only
107 |         if (self.reward_norm):
108 |             reward /= self.reward_norm
109 |         if self.reward_clip:
110 |             reward = np.clip(reward, -self.reward_clip, self.reward_clip)
111 |         self.trans_buffer.add_transition(ob, action, reward, value, done)
112 | 
113 | 
114 | class IQL(A2C):
115 |     def __init__(self, n_s_ls, n_a_ls, total_step, model_config, seed=0, model_type='dqn'):
116 |         self.name = 'iql'
117 |         self.model_type = model_type
118 |         self.agents = []
119 |         self.n_agent = len(n_s_ls)
120 |         self.reward_clip = model_config.getfloat('reward_clip')
121 |         self.reward_norm = model_config.getfloat('reward_norm')
122 |         self.n_s_ls = n_s_ls
123 |         self.n_a_ls = n_a_ls
124 |         self.n_step = model_config.getint('batch_size')
125 |         # init tf
126 |         tf.reset_default_graph()
127 |         tf.set_random_seed(seed)
128 |         config = tf.ConfigProto(allow_soft_placement=True)
129 |         self.sess = tf.Session(config=config)
130 |         self.policy_ls = []
131 |         for i, (n_s, n_a) in enumerate(zip(self.n_s_ls, self.n_a_ls)):
132 |             # agent_name is needed to differentiate multi-agents
133 |             self.policy_ls.append(self._init_policy(n_s, n_a, model_config,
134 |                                                     agent_name='{:d}a'.format(i)))
135 |         self.saver = tf.train.Saver(max_to_keep=5)
136 |         if total_step:
137 |             # training
138 |             self.total_step = total_step
139 |             self._init_scheduler(model_config)
140 |             self._init_train(model_config)
141 |         self.cur_step = 0
142 |         self.sess.run(tf.global_variables_initializer())
143 | 
144 |     def _init_policy(self, n_s, n_a, model_config, agent_name=None):
145 |         if self.model_type == 'dqn':
146 |             n_h = model_config.getint('num_h')
147 |             n_fc = model_config.getint('num_fc')
148 |             policy = DeepQPolicy(n_s, n_a, self.n_step, n_fc0=n_fc, n_fc=n_h,
149 |                                  name=agent_name)
150 |         return policy
151 | 
152 |     def _init_scheduler(self, model_config):
153 |         lr_init = model_config.getfloat('lr_init')
154 |         lr_decay = model_config.get('lr_decay')
155 |         eps_init = model_config.getfloat('epsilon_init')
156 |         eps_decay = model_config.get('epsilon_decay')
157 |         if lr_decay == 'constant':
158 |             self.lr_scheduler = Scheduler(lr_init, decay=lr_decay)
159 |         else:
160 |             lr_min = model_config.getfloat('lr_min')
161 |             self.lr_scheduler = Scheduler(
162 |                 lr_init, lr_min, self.total_step, decay=lr_decay)
163 |         if eps_decay == 'constant':
164 |             self.eps_scheduler = Scheduler(eps_init, decay=eps_decay)
165 |         else:
166 |             eps_min = model_config.getfloat('epsilon_min')
167 |             eps_ratio = model_config.getfloat('epsilon_ratio')
168 |             self.eps_scheduler = Scheduler(eps_init, eps_min, self.total_step * eps_ratio,
169 |                                            decay=eps_decay)
170 | 
171 |     def _init_train(self, model_config):
172 |         # init loss
173 |         max_grad_norm = model_config.getfloat('max_grad_norm')
174 |         gamma = model_config.getfloat('gamma')
175 |         buffer_size = model_config.getfloat('buffer_size')
176 |         self.trans_buffer_ls = []
177 |         for i in range(self.n_agent):
178 |             self.policy_ls[i].prepare_loss(max_grad_norm, gamma)
179 |             self.trans_buffer_ls.append(ReplayBuffer(buffer_size, self.n_step))
180 | 
181 |     def backward(self, summary_writer=None, global_step=None):
182 |         # update networks
183 |         cur_lr = self.lr_scheduler.get(self.n_step)
184 |         if self.trans_buffer_ls[0].size < self.trans_buffer_ls[0].batch_size:
185 |             return
186 |         for i in range(self.n_agent):
187 |             for k in range(10):  # update network 10 times
188 |                 obs, acts, next_obs, rs, dones = self.trans_buffer_ls[i].sample_transition()
189 |                 if i == 0:
190 |                     self.policy_ls[i].backward(self.sess, obs, np.squeeze(acts), next_obs, dones, rs, cur_lr,
191 |                                                summary_writer=summary_writer,
192 |                                                global_step=global_step + k)
193 |                 else:
194 |                     self.policy_ls[i].backward(
195 |                         self.sess, obs, acts, next_obs, dones, rs, cur_lr)
196 | 
197 |     def forward(self, obs, mode='act', stochastic=False):
198 |         # get actions and policies
199 |         if mode == 'explore':
200 |             eps = self.eps_scheduler.get(1)
201 |         action = []
202 |         qs_ls = []
203 |         for i in range(self.n_agent):
204 |             qs = self.policy_ls[i].forward(self.sess, obs)  # here we only have one agent, ori = obs[i]
205 |             if (mode == 'explore') and (np.random.random() < eps):
206 |                 action.append(np.random.randint(self.n_a_ls[i]))
207 |             else:
208 |                 if not stochastic:
209 |                     action.append(np.argmax(qs))
210 |                 else:
211 |                     qs = qs / np.sum(qs)
212 |                     action.append(np.random.choice(np.arange(len(qs)), p=qs))
213 |             qs_ls.append(qs)
214 |         return action, qs_ls
215 | 
216 |     def reset(self):
217 |         # do nothing
218 |         return
219 | 
220 |     def add_transition(self, obs, actions, rewards, next_obs, done):
221 |         # add experiences to buffers accordingly
222 |         if (self.reward_norm):
223 |             rewards = rewards / self.reward_norm
224 |         if self.reward_clip:
225 |             rewards = np.clip(rewards, -self.reward_clip, self.reward_clip)
226 |         for i in range(self.n_agent):
227 |             self.trans_buffer_ls[i].add_transition(obs, actions,
228 |                                                    rewards, next_obs, done)


--------------------------------------------------------------------------------
/IQL_conventional/agents/policies.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | from .utils import *
  4 | 
  5 | 
  6 | class QPolicy:
  7 |     def __init__(self, n_a, n_s, n_step, policy_name, agent_name):
  8 |         self.name = policy_name
  9 |         if agent_name is not None:
 10 |             # for multi-agent system
 11 |             self.name += '_' + str(agent_name)
 12 |         self.n_a = n_a
 13 |         self.n_s = n_s
 14 |         self.n_step = n_step
 15 | 
 16 |     def forward(self, ob, *_args, **_kwargs):
 17 |         raise NotImplementedError()
 18 | 
 19 |     def _build_fc_net(self, h, n_fc_ls):
 20 |         for i, n_fc in enumerate(n_fc_ls):
 21 |             h = fc(h, 'q_fc_%d' % i, n_fc)
 22 |         q = fc(h, 'q', self.n_a, act=lambda x: x)
 23 |         return tf.squeeze(q)
 24 | 
 25 |     def _build_net(self):
 26 |         raise NotImplementedError()
 27 | 
 28 |     def prepare_loss(self, max_grad_norm, gamma):
 29 |         self.A = tf.placeholder(tf.int32, [self.n_step])
 30 |         self.S1 = tf.placeholder(
 31 |             tf.float32, [self.n_step, self.n_s])
 32 |         self.R = tf.placeholder(tf.float32, [self.n_step])
 33 |         self.DONE = tf.placeholder(tf.bool, [self.n_step])
 34 |         A_sparse = tf.one_hot(self.A, self.n_a)
 35 | 
 36 |         # backward, calculate loss
 37 |         with tf.variable_scope(self.name + '_q', reuse=True):
 38 |             q0s = self._build_net(self.S)
 39 |             q0 = tf.reduce_sum(q0s * A_sparse, axis=1)
 40 |         with tf.variable_scope(self.name + '_q', reuse=True):
 41 |             q1s = self._build_net(self.S1)
 42 |             q1 = tf.reduce_max(q1s, axis=1)
 43 |         tq = tf.stop_gradient(tf.where(self.DONE, self.R, self.R + gamma * q1))
 44 |         self.loss = tf.reduce_mean(tf.square(q0 - tq))
 45 | 
 46 |         wts = tf.trainable_variables(scope=self.name)
 47 |         grads = tf.gradients(self.loss, wts)
 48 |         if max_grad_norm > 0:
 49 |             grads, self.grad_norm = tf.clip_by_global_norm(
 50 |                 grads, max_grad_norm)
 51 |         self.lr = tf.placeholder(tf.float32, [])
 52 |         self.optimizer = tf.train.AdamOptimizer(learning_rate=self.lr)
 53 |         self._train = self.optimizer.apply_gradients(list(zip(grads, wts)))        
 54 |         # monitor training
 55 |         if self.name.endswith('_0a'):
 56 |             summaries = []
 57 |             summaries.append(tf.summary.scalar(
 58 |                 'train/%s_loss' % self.name, self.loss))
 59 |             summaries.append(tf.summary.scalar('train/%s_q' %
 60 |                                                self.name, tf.reduce_mean(q0)))
 61 |             summaries.append(tf.summary.scalar('train/%s_tq' %
 62 |                                                self.name, tf.reduce_mean(tq)))
 63 |             summaries.append(tf.summary.scalar(
 64 |                 'train/%s_gradnorm' % self.name, self.grad_norm))
 65 |             self.summary = tf.summary.merge(summaries)
 66 | 
 67 | 
 68 | class DeepQPolicy(QPolicy):
 69 |     def __init__(self, n_s, n_a, n_step, n_fc0=128, n_fc=64, name=None):
 70 |         super().__init__(n_a, n_s, n_step, 'dqn', name)
 71 |         self.n_fc = n_fc
 72 |         self.n_fc0 = n_fc0
 73 |         self.S = tf.placeholder(tf.float32, [None, n_s])
 74 |         with tf.variable_scope(self.name + '_q'):
 75 |             self.qvalues = self._build_net(self.S)
 76 | 
 77 |     def _build_net(self, S):
 78 |         h0 = fc(S[:, :self.n_s], 'q_fcw', self.n_fc0)
 79 |         h1 = fc(S[:, self.n_s:], 'q_fct', self.n_fc0 / 4)
 80 |         h = tf.concat([h0, h1], 1)
 81 |         return self._build_fc_net(h, [self.n_fc])
 82 | 
 83 |     def forward(self, sess, ob):
 84 |         return sess.run(self.qvalues, {self.S: np.array([ob])})
 85 | 
 86 |     def backward(self, sess, obs, acts, next_obs, dones, rs, cur_lr,
 87 |                  summary_writer=None, global_step=None):
 88 |         # update networks
 89 |         if summary_writer is None:
 90 |             ops = self._train
 91 |         else:
 92 |             ops = [self.summary, self._train]
 93 |         outs = sess.run(ops,
 94 |                         {self.S: obs,
 95 |                          self.A: acts,
 96 |                          self.S1: next_obs,
 97 |                          self.DONE: dones,
 98 |                          self.R: rs,
 99 |                          self.lr: cur_lr})
100 |         if summary_writer is not None:
101 |             summary_writer.add_summary(outs[0], global_step=global_step)


--------------------------------------------------------------------------------
/IQL_conventional/agents/utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import random
  3 | import tensorflow as tf
  4 | 
  5 | """
  6 | initializers
  7 | """
  8 | DEFAULT_SCALE = np.sqrt(2)
  9 | DEFAULT_MODE = 'fan_in'
 10 | 
 11 | 
 12 | def ortho_init(scale=DEFAULT_SCALE, mode=None):
 13 |     def _ortho_init(shape, dtype, partition_info=None):
 14 |         # lasagne ortho init for tf
 15 |         shape = tuple(shape)
 16 |         if len(shape) == 2:  # fc: in, out
 17 |             flat_shape = shape
 18 |         elif (len(shape) == 3) or (len(shape) == 4):  # 1d/2dcnn: (in_h), in_w, in_c, out
 19 |             flat_shape = (np.prod(shape[:-1]), shape[-1])
 20 |         a = np.random.standard_normal(flat_shape)
 21 |         u, _, v = np.linalg.svd(a, full_matrices=False)
 22 |         q = u if u.shape == flat_shape else v  # pick the one with the correct shape
 23 |         q = q.reshape(shape)
 24 |         return (scale * q).astype(np.float32)
 25 |     return _ortho_init
 26 | 
 27 | 
 28 | def norm_init(scale=DEFAULT_SCALE, mode=DEFAULT_MODE):
 29 |     def _norm_init(shape, dtype, partition_info=None):
 30 |         shape = tuple(shape)
 31 |         if len(shape) == 2:
 32 |             n_in = shape[0]
 33 |         elif (len(shape) == 3) or (len(shape) == 4):
 34 |             n_in = np.prod(shape[:-1])
 35 |         a = np.random.standard_normal(shape)
 36 |         if mode == 'fan_in':
 37 |             n = n_in
 38 |         elif mode == 'fan_out':
 39 |             n = shape[-1]
 40 |         elif mode == 'fan_avg':
 41 |             n = 0.5 * (n_in + shape[-1])
 42 |         return (scale * a / np.sqrt(n)).astype(np.float32)
 43 | 
 44 | 
 45 | DEFAULT_METHOD = ortho_init
 46 | """
 47 | layers
 48 | """
 49 | 
 50 | 
 51 | def conv(x, scope, n_out, f_size, stride=1, pad='VALID', f_size_w=None, act=tf.nn.relu,
 52 |          conv_dim=1, init_scale=DEFAULT_SCALE, init_mode=None, init_method=DEFAULT_METHOD):
 53 |     with tf.variable_scope(scope):
 54 |         b = tf.get_variable(
 55 |             "b", [n_out], initializer=tf.constant_initializer(0.0))
 56 |         if conv_dim == 1:
 57 |             n_c = x.shape[2].value
 58 |             w = tf.get_variable("w", [f_size, n_c, n_out],
 59 |                                 initializer=init_method(init_scale, init_mode))
 60 |             z = tf.nn.conv1d(x, w, stride=stride, padding=pad) + b
 61 |         elif conv_dim == 2:
 62 |             n_c = x.shape[3].value
 63 |             if f_size_w is None:
 64 |                 f_size_w = f_size
 65 |             w = tf.get_variable("w", [f_size, f_size_w, n_c, n_out],
 66 |                                 initializer=init_method(init_scale, init_mode))
 67 |             z = tf.nn.conv2d(
 68 |                 x, w, strides=[1, stride, stride, 1], padding=pad) + b
 69 |         return act(z)
 70 | 
 71 | 
 72 | def fc(x, scope, n_out, act=tf.nn.relu, init_scale=DEFAULT_SCALE,
 73 |        init_mode=DEFAULT_MODE, init_method=DEFAULT_METHOD):
 74 |     with tf.variable_scope(scope):
 75 |         n_in = x.shape[1].value
 76 |         w = tf.get_variable("w", [n_in, n_out],
 77 |                             initializer=init_method(init_scale, init_mode))
 78 |         b = tf.get_variable(
 79 |             "b", [n_out], initializer=tf.constant_initializer(0.0))
 80 |         z = tf.matmul(x, w) + b
 81 |         return act(z)
 82 | 
 83 | 
 84 | def batch_to_seq(x):
 85 |     n_step = x.shape[0].value
 86 |     if len(x.shape) == 1:
 87 |         x = tf.expand_dims(x, -1)
 88 |     return tf.split(axis=0, num_or_size_splits=n_step, value=x)
 89 | 
 90 | 
 91 | def seq_to_batch(x):
 92 |     return tf.concat(axis=0, values=x)
 93 | 
 94 | 
 95 | def lstm(xs, dones, s, scope, init_scale=DEFAULT_SCALE, init_mode=DEFAULT_MODE,
 96 |          init_method=DEFAULT_METHOD):
 97 |     xs = batch_to_seq(xs)
 98 |     # need dones to reset states
 99 |     dones = batch_to_seq(dones)
100 |     n_in = xs[0].shape[1].value
101 |     n_out = s.shape[0] // 2
102 |     with tf.variable_scope(scope):
103 |         wx = tf.get_variable("wx", [n_in, n_out*4],
104 |                              initializer=init_method(init_scale, init_mode))
105 |         wh = tf.get_variable("wh", [n_out, n_out*4],
106 |                              initializer=init_method(init_scale, init_mode))
107 |         b = tf.get_variable(
108 |             "b", [n_out*4], initializer=tf.constant_initializer(0.0))
109 |     s = tf.expand_dims(s, 0)
110 |     c, h = tf.split(axis=1, num_or_size_splits=2, value=s)
111 |     for ind, (x, done) in enumerate(zip(xs, dones)):
112 |         c = c * (1-done)
113 |         h = h * (1-done)
114 |         z = tf.matmul(x, wx) + tf.matmul(h, wh) + b
115 |         i, f, o, u = tf.split(axis=1, num_or_size_splits=4, value=z)
116 |         i = tf.nn.sigmoid(i)
117 |         f = tf.nn.sigmoid(f)
118 |         o = tf.nn.sigmoid(o)
119 |         u = tf.tanh(u)
120 |         c = f*c + i*u
121 |         h = o*tf.tanh(c)
122 |         xs[ind] = h
123 |     s = tf.concat(axis=1, values=[c, h])
124 |     return seq_to_batch(xs), tf.squeeze(s)
125 | 
126 | 
127 | def test_layers():
128 |     print(tf.__version__)
129 |     tf.reset_default_graph()
130 |     sess = tf.Session()
131 |     n_step = 5
132 |     fc_x = tf.placeholder(tf.float32, [None, 10])
133 |     lstm_x = tf.placeholder(tf.float32, [n_step, 2])
134 |     lstm_done = tf.placeholder(tf.float32, [n_step])
135 |     lstm_s = tf.placeholder(tf.float32, [20])
136 |     conv1_x = tf.placeholder(tf.float32, [None, 8, 1])
137 |     conv2_x = tf.placeholder(tf.float32, [None, 8, 8, 1])
138 |     fc_out = fc(fc_x, 'fc', 10)
139 |     lstm_out, lstm_ns = lstm(lstm_x, lstm_done, lstm_s, 'lstm')
140 |     conv1_out = conv(conv1_x, 'conv1', 10, 4, conv_dim=1)
141 |     conv2_out = conv(conv2_x, 'conv2', 10, 4, conv_dim=2)
142 |     sess.run(tf.global_variables_initializer())
143 |     inputs = {'fc': {fc_x: np.random.randn(n_step, 10)},
144 |               'lstm_done': {lstm_x: np.zeros((n_step, 2)),
145 |                             lstm_done: np.ones(n_step),
146 |                             lstm_s: np.random.randn(20)},
147 |               'lstm': {lstm_x: np.random.randn(n_step, 2),
148 |                        lstm_done: np.zeros(n_step),
149 |                        lstm_s: np.random.randn(20)},
150 |               'conv1': {conv1_x: np.random.randn(n_step, 8, 1)},
151 |               'conv2': {conv2_x: np.random.randn(n_step, 8, 8, 1)}}
152 |     outputs = {'fc': [fc_out], 'lstm_done': [lstm_out, lstm_ns],
153 |                'conv1': [conv1_out], 'conv2': [conv2_out],
154 |                'lstm': [lstm_out, lstm_ns]}
155 |     for scope in ['fc', 'lstm', 'conv1', 'conv2']:
156 |         print(scope)
157 |         wts = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope)
158 |         for wt in wts:
159 |             wt_val = wt.eval(sess)
160 |             print(wt_val.shape)
161 |             print(np.mean(wt_val), np.std(wt_val),
162 |                   np.min(wt_val), np.max(wt_val))
163 |     print('=====================================')
164 |     for x_name in inputs:
165 |         print(x_name)
166 |         out = sess.run(outputs[x_name], inputs[x_name])
167 |         if x_name.startswith('lstm'):
168 |             print(out[0])
169 |             print(out[1])
170 |         else:
171 |             print(out[0].shape)
172 | 
173 | 
174 | """
175 | buffers
176 | """
177 | 
178 | 
179 | class TransBuffer:
180 |     def reset(self):
181 |         self.buffer = []
182 | 
183 |     @property
184 |     def size(self):
185 |         return len(self.buffer)
186 | 
187 |     def add_transition(self, ob, a, r, *_args, **_kwargs):
188 |         raise NotImplementedError()
189 | 
190 |     def sample_transition(self, *_args, **_kwargs):
191 |         raise NotImplementedError()
192 | 
193 | 
194 | class OnPolicyBuffer(TransBuffer):
195 |     def __init__(self, gamma):
196 |         self.gamma = gamma
197 |         self.reset()
198 | 
199 |     def reset(self, done=False):
200 |         # the done before each step is required
201 |         self.obs = []
202 |         self.acts = []
203 |         self.rs = []
204 |         self.vs = []
205 |         self.dones = [done]
206 | 
207 |     def add_transition(self, ob, a, r, v, done):
208 |         self.obs.append(ob)
209 |         self.acts.append(a)
210 |         self.rs.append(r)
211 |         self.vs.append(v)
212 |         self.dones.append(done)
213 | 
214 |     def _add_R_Adv(self, R):
215 |         Rs = []
216 |         Advs = []
217 |         # use post-step dones here
218 |         for r, v, done in zip(self.rs[::-1], self.vs[::-1], self.dones[:0:-1]):
219 |             R = r + self.gamma * R * (1.-done)
220 |             Adv = R - v
221 |             Rs.append(R)
222 |             Advs.append(Adv)
223 |         Rs.reverse()
224 |         Advs.reverse()
225 |         self.Rs = Rs
226 |         self.Advs = Advs
227 | 
228 |     def sample_transition(self, R, discrete=True):
229 |         self._add_R_Adv(R)
230 |         obs = np.array(self.obs, dtype=np.float32)
231 |         if discrete:
232 |             acts = np.array(self.acts, dtype=np.int32)
233 |         else:
234 |             acts = np.array(self.acts, dtype=np.float32)
235 |         Rs = np.array(self.Rs, dtype=np.float32)
236 |         Advs = np.array(self.Advs, dtype=np.float32)
237 |         # use pre-step dones here
238 |         dones = np.array(self.dones[:-1], dtype=np.bool)
239 |         self.reset(self.dones[-1])
240 |         return obs, acts, dones, Rs, Advs
241 | 
242 | 
243 | class ReplayBuffer(TransBuffer):
244 |     def __init__(self, buffer_size, batch_size):
245 |         self.buffer_size = buffer_size
246 |         self.batch_size = batch_size
247 |         self.cum_size = 0
248 |         self.buffer = []
249 | 
250 |     def add_transition(self, ob, a, r, next_ob, done):
251 |         experience = (ob, a, r, next_ob, done)
252 |         if self.cum_size < self.buffer_size:
253 |             self.buffer.append(experience)
254 |         else:
255 |             ind = int(self.cum_size % self.buffer_size)
256 |             self.buffer[ind] = experience
257 |         self.cum_size += 1
258 | 
259 |     def reset(self):
260 |         self.buffer = []
261 |         self.cum_size = 0
262 | 
263 |     def sample_transition(self):
264 |         # Randomly sample batch_size examples
265 |         minibatch = random.sample(self.buffer, self.batch_size)
266 |         state_batch = np.asarray([data[0] for data in minibatch])
267 |         action_batch = np.asarray([data[1] for data in minibatch])
268 |         next_state_batch = np.asarray([data[3] for data in minibatch])
269 |         reward_batch = np.asarray([data[2] for data in minibatch])
270 |         done_batch = np.asarray([data[4] for data in minibatch])
271 |         return state_batch, action_batch, next_state_batch, reward_batch, done_batch
272 | 
273 |     @property
274 |     def size(self):
275 |         return min(self.buffer_size, self.cum_size)
276 | 
277 | 
278 | """
279 | util functions
280 | """
281 | 
282 | 
283 | class Scheduler:
284 |     def __init__(self, val_init, val_min=0, total_step=0, decay='linear'):
285 |         self.val = val_init
286 |         self.N = float(total_step)
287 |         self.val_min = val_min
288 |         self.decay = decay
289 |         self.n = 0
290 | 
291 |     def get(self, n_step):
292 |         self.n += n_step
293 |         if self.decay == 'linear':
294 |             return max(self.val_min, self.val * (1 - self.n / self.N))
295 |         else:
296 |             return self.val
297 | 
298 | 
299 | if __name__ == '__main__':
300 |     test_layers()
301 | 


--------------------------------------------------------------------------------
/IQL_conventional/config/config_ford.ini:
--------------------------------------------------------------------------------
 1 | [MODEL_CONFIG]
 2 | max_grad_norm = 40
 3 | gamma = 0.99
 4 | lr_init = 1e-4
 5 | lr_decay = constant
 6 | epsilon_init = 0.9
 7 | epsilon_min = 0.1
 8 | epsilon_decay = linear
 9 | epsilon_ratio = 0.5
10 | num_fc = 128
11 | num_h = 64
12 | batch_size = 64
13 | buffer_size = 1e6
14 | reward_norm = 1.0
15 | reward_clip = 5.0
16 | 
17 | 
18 | [TRAIN_CONFIG]
19 | total_step = 1.5e6
20 | test_interval = 1e4
21 | log_interval = 10000
22 | 
23 | 
24 | [ENV_CONFIG]
25 | sample_time = 0.2
26 | episode_length = 765
27 | # 1 for True and 0 for False
28 | discrete = 1
29 | rendering = 0
30 | # objective is used to choose different reward functions
31 | objective = max_flow
32 | seed = 42
33 | test_seeds = 10000,20000,30000


--------------------------------------------------------------------------------
/IQL_conventional/config/config_gym.ini:
--------------------------------------------------------------------------------
 1 | [MODEL_CONFIG]
 2 | max_grad_norm = 40
 3 | gamma = 0.99
 4 | lr_init = 1e-4
 5 | lr_decay = constant
 6 | epsilon_init = 0.9
 7 | epsilon_min = 0.1
 8 | epsilon_decay = linear
 9 | epsilon_ratio = 0.5
10 | num_fc = 128
11 | num_h = 64
12 | batch_size = 128
13 | buffer_size = 1e6
14 | reward_norm = 1.0
15 | reward_clip = 5.0
16 | 
17 | [TRAIN_CONFIG]
18 | total_step = 1e6
19 | test_interval = 2e4
20 | log_interval = 1e4
21 | rendering = 0
22 | 
23 | [ENV_CONFIG]
24 | scenario = Acrobot-v1
25 | seed = 0 


--------------------------------------------------------------------------------
/IQL_conventional/env/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DongChen06/Python2Simulink/f7660e4d2ef656e74e0cde6837b2652c432ef288/IQL_conventional/env/__init__.py


--------------------------------------------------------------------------------
/IQL_conventional/env/env_ford.py:
--------------------------------------------------------------------------------
  1 | import matlab.engine
  2 | import numpy as np
  3 | 
  4 | import sys
  5 | sys.path.append("../")
  6 | 
  7 | import gym
  8 | import argparse
  9 | import configparser
 10 | import time
 11 | import random
 12 | from collections import deque
 13 | from gym.utils import seeding
 14 | from env.utils import *
 15 | 
 16 | 
 17 | discrete_resolution = 10
 18 | 
 19 | 
 20 | def parse_args():
 21 |     default_base_dir = '/home/derek/PycharmProjects/Python2Simulink/DDQN_Ford/Data'
 22 |     default_config_dir = 'DDQN_Ford\config\config_ford.ini'
 23 |     parser = argparse.ArgumentParser()
 24 |     parser.add_argument('--base-dir', type=str, required=False,
 25 |                         default=default_base_dir, help="experiment base dir")
 26 |     parser.add_argument('--config-dir', type=str, required=False,
 27 |                         default=default_config_dir, help="experiment config dir")
 28 |     parser.add_argument('--is_training', type=str, required=False,
 29 |                         default=True, help="True=train, False=evaluation")
 30 |     parser.add_argument('--test-mode', type=str, required=False,
 31 |                         default='no_test',
 32 |                         help="test mode during training",
 33 |                         choices=['no_test', 'in_train_test', 'after_train_test', 'all_test'])
 34 | 
 35 |     args = parser.parse_args()
 36 |     return args
 37 | 
 38 | 
 39 | class FordEnv(gym.Env):
 40 |     """
 41 |     This is the environment for ford project which is built on Matlab and python.
 42 | 
 43 |     Observation:
 44 |     Type: Box(7)
 45 |     Num	Observation                 Min         Max
 46 |     0	VehicleSpd_mph               0          100
 47 |     1	Engine_Spd_c__radps         -1e4        1e4
 48 |     2	MG1_Spd_radps               -1e4        1e4
 49 |     3	MG2_Spd_radps               -1e4        1e4
 50 |     4   Acc_pad                      0           1
 51 |     5   Dec_pad                      0           1
 52 |     6   WheelTqDemand_Nm           -1e4         1e4
 53 | 
 54 |     Actions:
 55 |         Type: Discrete(discrete_resolution)
 56 |         Num	Action
 57 |         0	Push cart to the left
 58 |         1	Push cart to the right
 59 |     """
 60 | 
 61 |     def __init__(self, config, modelName='tracking', discrete=True, time_step=765):
 62 |         # Setup gym environment
 63 |         self.modelName = config.get('modelName')
 64 |         self.model_address = config.get('modelAddress')
 65 |         # file name of parameters, we need to run it first
 66 |         self.rendering = int(config.getfloat('rendering'))
 67 |         self.sample_time = config.getfloat('sample_time')
 68 |         self.episode_length = int(config.getfloat('episode_length'))
 69 |         self.seed(66)
 70 | 
 71 |         low = np.array([0, -1e4, -1e4, -1e4, 0, 0, -1e4])
 72 |         high = np.array([100, 1e4, 1e4, 1e4, 1, 1, 1e4])
 73 | 
 74 |         if discrete is True:
 75 |             self.action_space = gym.spaces.Discrete(discrete_resolution)
 76 |             self.observation_space = gym.spaces.Box(
 77 |                 low, high, dtype=np.float32)
 78 |         else:
 79 |             self.action_space = gym.spaces.Box(np.array([-1, 0]), np.array([1, 1]),
 80 |                                                dtype=np.float32)
 81 |             self.observation_space = gym.spaces.Box(
 82 |                 -high, high, dtype=np.float32)
 83 | 
 84 |         try:
 85 | 
 86 |             # initialize matlab and env
 87 |             self.engMAT = MatEng()
 88 | 
 89 |         except Exception as e:
 90 |             self.close()
 91 |             raise e
 92 | 
 93 |     def seed(self, seed=None):
 94 |         self.np_random, seed = seeding.np_random(seed)
 95 |         return [seed]
 96 | 
 97 |     def reset(self, ):
 98 |         self.steps = 0
 99 |         # reset the matlab model
100 |         self.obs = self.engMAT.reset_env(self.rendering)
101 | 
102 |     def close(self):
103 |         self.engMAT.disconnect()
104 | 
105 |     def render(self, ):
106 |         self.engMAT.updateFig()
107 | 
108 |     def step(self, action):
109 |         if action is not None:
110 |             obs_new, self.last_reward, self.terminal_state, _ = self.engMAT.run_step(
111 |                 action)
112 | 
113 |         if self.rendering:
114 |             self.render()
115 | 
116 |         if self.steps >= int(self.episode_length / self.sample_time) - 1:
117 |             self.terminal_state = True
118 | 
119 |         self.steps += 1
120 | 
121 |         return obs_new, self.last_reward, self.terminal_state, _
122 | 
123 | 
124 | if __name__ == "__main__":
125 |     args = parse_args()
126 |     base_dir = args.base_dir
127 |     config_dir = args.config_dir
128 |     config = configparser.ConfigParser()
129 |     config.read(config_dir)
130 |     epoch = 0
131 |     # Example of using FordEnv with sample controller
132 |     env = FordEnv(config['ENV_CONFIG'])
133 |     action_size = env.action_space.n
134 |     print('--------------')
135 |     print("Simulation starting...")
136 |     while True:
137 |         env.reset()
138 |         rewards = 0
139 |         last_reward = 0
140 |         while True:
141 |             # print('--------------')
142 |             # print("steps = ", env.steps)
143 |             # print("rewards = ", last_reward)
144 |             action = np.random.randint(action_size, size=1)
145 |             # Take an action
146 |             obs, last_reward, done, _ = env.step(4)  # action[0], 4
147 |             rewards += last_reward
148 |             if done:
149 |                 break
150 |         print('--------------')
151 |         print("steps = ", env.steps)
152 |         print("rewards = ", rewards)
153 |         epoch += 1
154 |     env.close()
155 | 


--------------------------------------------------------------------------------
/IQL_conventional/env/test.py:
--------------------------------------------------------------------------------
 1 | import matlab.engine
 2 | 
 3 | modelAddress = r'C:\Users\Dong\Google Drive\Dong Chen\Ford_proj\CX482_IVA_PDP_EncryptedSimulinkModel'
 4 | modelName = 'Cx482_IVA_forPDP_wDriverModel_realtime_v27_ProtecModel'
 5 | 
 6 | eng = matlab.engine.start_matlab()
 7 | eng.cd(modelAddress, nargout=0)
 8 | # eng.ls(nargout=0)
 9 | eng.Run_Sim(nargout=0)
10 | try:
11 |     print("Connected to Matlab")
12 |     eng.eval("model = '{}'".format(modelName), nargout=0)
13 |     eng.eval("load_system(model)", nargout=0)
14 | 
15 |     eng.set_param('{}/Optimal Controller/u1'.format(modelName),
16 |                   'value', str(0), nargout=0)
17 |     eng.set_param(modelName, 'SimulationCommand', 'start',
18 |                   'SimulationCommand', 'pause', nargout=0)
19 |     print('----')
20 |     v_mph = eng.eval('v_mph')
21 |     if(type(v_mph) == float):
22 |         print(eng.eval('v_mph'))
23 |         print(eng.eval('target_speed'))
24 |         print(eng.eval('Fuel_kg'))
25 |         print(eng.eval('Acc_pad'))
26 |     else:
27 |         target_speed = eng.eval('target_speed')
28 |         print(v_mph[-1][0])
29 |         print(target_speed[-1][0])
30 |         Fuel_kg = eng.eval('Fuel_kg')
31 |         print(Fuel_kg[-1][0])
32 |         Acc_pad = eng.eval('Acc_pad')
33 |         print(Acc_pad[-1][0])
34 |     while (eng.get_param(modelName, 'SimulationStatus') != ('stopped' or 'terminating')):
35 |         eng.set_param('{}/Optimal Controller/u1'.format(modelName),
36 |                       'value', str(0), nargout=0)
37 |         eng.set_param(modelName, 'SimulationCommand', 'continue',
38 |                       'SimulationCommand', 'pause', nargout=0)
39 |         print('----')
40 |         v_mph = eng.eval('v_mph')
41 |         if(type(v_mph) == float):
42 |             print(eng.eval('v_mph'))
43 |             print(eng.eval('target_speed'))
44 |             print(eng.eval('Fuel_kg'))
45 |             print(eng.eval('Acc_pad'))
46 |         else:
47 |             print(1)
48 |             target_speed = eng.eval('target_speed')
49 |             print(v_mph[-1][0])
50 |             print(target_speed[-1][0])
51 |             Fuel_kg = eng.eval('Fuel_kg')
52 |             print(Fuel_kg[-1][0])
53 |             Acc_pad = eng.eval('Acc_pad')
54 |             print(Acc_pad[-1][0])
55 | 
56 | except Exception as e:
57 |     print("eng is closed")
58 |     eng.set_param(modelName, 'SimulationCommand', 'stop', nargout=0)
59 |     eng.quit()
60 | 


--------------------------------------------------------------------------------
/IQL_conventional/env/utils.py:
--------------------------------------------------------------------------------
  1 | import matlab.engine
  2 | import matplotlib.pyplot as plt
  3 | import time
  4 | 
  5 | 
  6 | class MatEng():
  7 |     def __init__(self):
  8 |         self.model_address = r'C:\Users\Dong\Google Drive\Dong Chen\Ford_proj\CX482_IVA_PDP_EncryptedSimulinkModel'
  9 |         self.modelName = 'Cx482_IVA_forPDP_wDriverModel_realtime_v27_ProtecModel'
 10 |         self.eng = None
 11 |         
 12 |     def reset_env(self, rendering=False):
 13 |         self.terminal_state = False
 14 |         self.last_reward = 0
 15 |         self.t = 0
 16 |         self.tHist = []
 17 |         self.x1Hist = []
 18 |         self.x2Hist = []
 19 |         # reuse last engine to save loading time
 20 |         if self.eng == None:
 21 |             print("Starting matlab")
 22 |             self.eng = matlab.engine.start_matlab()
 23 |         else:
 24 |             # reset matlab after one epoch
 25 |             self.eng.close("all", nargout=0)
 26 |             self.eng.bdclose("all", nargout=0)
 27 |             self.eng.clear("classes", nargout=0)
 28 |             if rendering:
 29 |                 self.terminate_fig()
 30 | 
 31 |         # go to the model folder
 32 |         self.eng.cd(self.model_address, nargout=0)
 33 |         # run the simulation configurations (parameters)
 34 |         # eng.ls(nargout=0)
 35 |         self.eng.Run_Sim(nargout=0)
 36 |         # Load the model
 37 |         self.eng.eval("model = '{}'".format(self.modelName), nargout=0)
 38 |         self.eng.eval("load_system(model)", nargout=0)
 39 | 
 40 |         self.setControlAction(0)
 41 |         print("Initialized Model")
 42 |         # enable fast restart
 43 |         self.eng.set_param(self.modelName, 'FastRestart', 'on',  nargout=0)
 44 |         # Start Simulation and then Instantly pause
 45 |         self.eng.set_param(self.modelName, 'SimulationCommand',
 46 |                            'start', 'SimulationCommand', 'pause', nargout=0)
 47 |         obs = self.getObservations()
 48 |         if rendering:
 49 |             # initialize plot
 50 |             self.initialize_plot()
 51 |         return obs
 52 | 
 53 |     def setControlAction(self,  u1):
 54 |         # set value of control action
 55 |         self.eng.set_param(
 56 |             '{}/Optimal Controller/u1'.format(self.modelName), 'value', str(u1), nargout=0)
 57 | 
 58 |     def getObservations(self, ):
 59 |         # get system Output and Time History
 60 |         tHist = self.eng.eval('tHist')
 61 |         v_mph = self.eng.eval('v_mph')
 62 |         engine_spd = self.eng.eval('engine_spd')
 63 |         MG1_spd = self.eng.eval('MG1_spd')
 64 |         MG2_spd = self.eng.eval('MG2_spd')
 65 |         Acc_pad = self.eng.eval('Acc_pad')
 66 |         Dec_pad = self.eng.eval('Dec_pad')
 67 |         WheelTD = self.eng.eval('WheelTD')
 68 |         Fuel_kg = self.eng.eval('Fuel_kg')
 69 |         SOC_C = self.eng.eval('SOC_C')
 70 |         target_speed = self.eng.eval('target_speed')
 71 |         eng_ori = self.eng.eval('eng_ori')
 72 |         eng_new = self.eng.eval('eng_new')
 73 |         if(type(v_mph) == float):
 74 |             self.Fuel_kg = Fuel_kg
 75 |             self.SOC_C = SOC_C
 76 |             self.target_speed = target_speed
 77 |             # for plotting use
 78 |             self.tHist.append(tHist)
 79 |             # self.x1Hist.append(eng_ori)
 80 |             # self.x2Hist.append(eng_new)
 81 |             self.x1Hist.append(v_mph)
 82 |             self.x2Hist.append(target_speed * 0.621371192237334)
 83 |             # self.x1Hist.append(int(Fuel_kg) * 1000)
 84 |             return (v_mph, engine_spd, MG1_spd, MG2_spd, Acc_pad, Dec_pad, WheelTD)
 85 |         else:
 86 |             self.Fuel_kg = Fuel_kg[-1][0]
 87 |             self.SOC_C = SOC_C[-1][0]
 88 |             self.target_speed = target_speed[-1][0]
 89 |             # for plotting use
 90 |             self.tHist.append(tHist[-1][0])
 91 |             # self.x1Hist.append(eng_ori[-1][0])
 92 |             # self.x2Hist.append(eng_new[-1][0])  # target_speed[-1][0] * 0.621371192237334
 93 |             self.x1Hist.append(v_mph[-1][0])
 94 |             self.x2Hist.append(target_speed[-1][0] * 0.621371192237334)
 95 |             # self.x1Hist.append(int(Fuel_kg[-1][0]) * 1000)
 96 |             return (v_mph[-1][0], engine_spd[-1][0], MG1_spd[-1][0], MG2_spd[-1][0], Acc_pad[-1][0], Dec_pad[-1][0], WheelTD[-1][0])
 97 | 
 98 |     def run_step(self, action):
 99 |         u1 = -50 + (action + 1) * 10
100 |         # u1 = -200
101 |         # if u1 < 0:
102 |         #     u1 = 0
103 |         # u1 = -10 + (action + 1) * 2
104 |         # Set the Control Action
105 |         self.setControlAction(u1)
106 |         # start = time.time()
107 |         # Pause the Simulation for each timestep
108 |         # self.eng.workspace['Pause_time'] = self.t + 0.3
109 |         self.eng.set_param(self.modelName, 'SimulationCommand',
110 |                            'StepForward', nargout=0)
111 |         # tHist = self.eng.eval('tHist')
112 |         # if type(tHist) == float:
113 |         #     self.t = tHist
114 |         # else:
115 |         #     self.t = tHist[-1][0]
116 |         # print(self.t)
117 |         # end = time.time()
118 |         # print(end - start)
119 |         # start = time.time()
120 |         obs = self.getObservations()
121 |         # end = time.time()
122 |         # print(end - start)
123 | 
124 |         # compute the reward
125 |         self.reward_fn()
126 | 
127 |         # if (self.eng.get_param(self.modelName, 'SimulationStatus') == ('stopped' or 'terminating')):
128 |         #     print(True)
129 |         #     self.terminal_state = True
130 | 
131 |         return obs, self.last_reward, self.terminal_state, True
132 | 
133 |     def reward_fn(self,):
134 |         # reward = fuel_consumption + speed_tracking + SOC
135 |         self.last_reward = self.Fuel_kg + self.SOC_C + self.target_speed
136 | 
137 |     def disconnect(self,):
138 |         print("eng is closed")
139 |         self.eng.set_param(
140 |             self.modelName, 'SimulationCommand', 'stop', nargout=0)
141 |         self.eng.quit()
142 | 
143 |     def initialize_plot(self, ):
144 |         # Initialize the graph
145 |         self.fig = plt.figure()
146 |         self.fig1, = plt.plot(self.tHist, self.x1Hist,
147 |                               color='red', linewidth=1)
148 |         self.fig2, = plt.plot(self.tHist, self.x2Hist, color='k', linewidth=1)
149 |         # for speed tracking
150 |         plt.xlim(0, 800)
151 |         plt.ylim(-10, 100)
152 |         # engine torque
153 |         # plt.xlim(0, 800)
154 |         # plt.ylim(-50, 400)
155 |         # for fuel consumption
156 |         # plt.xlim(0, 800)
157 |         # plt.ylim(0, 3)
158 |         plt.ylabel("Output")
159 |         plt.xlabel("Time(s)")
160 |         # plt.legend('x1', 'x2', loc='upper right')
161 |         plt.title("System Response")
162 | 
163 |     def updateFig(self, ):
164 |         # Update the Graph
165 |         self.fig1.set_xdata(self.tHist)
166 |         self.fig1.set_ydata(self.x1Hist)
167 |         self.fig2.set_xdata(self.tHist)
168 |         self.fig2.set_ydata(self.x2Hist)
169 |         plt.ion()
170 |         plt.pause(0.001)
171 |         plt.show()
172 | 
173 |     def terminate_fig(self,):
174 |         plt.close(self.fig)
175 |         # plt.close(self.fig2)
176 | 


--------------------------------------------------------------------------------
/IQL_conventional/main.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, division
 2 | import os
 3 | 
 4 | os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
 5 | os.environ["CUDA_VISIBLE_DEVICES"] = "1"
 6 | 
 7 | import argparse
 8 | import configparser
 9 | import gym
10 | import tensorflow as tf
11 | import tensorflow.contrib.layers as layers
12 | from env.env_ford import FordEnv
13 | from utils import *
14 | from trainer import *
15 | from agents.models import IQL
16 | 
17 | 
18 | def parse_args():
19 |     default_base_dir = 'Data'
20 |     default_config_dir = 'config/config_gym.ini'
21 |     parser = argparse.ArgumentParser()
22 |     parser.add_argument('--base-dir', type=str, required=False,
23 |                         default=default_base_dir, help="experiment base dir")
24 |     parser.add_argument('--config-dir', type=str, required=False,
25 |                         default=default_config_dir, help="experiment config dir")
26 |     parser.add_argument('--is_training', type=str, required=False,
27 |                         default=True, help="True=train, False=evaluation")
28 |     parser.add_argument('--test-mode', type=str, required=False,
29 |                         default='no_test',
30 |                         help="test mode during training",
31 |                         choices=['no_test', 'in_train_test', 'after_train_test', 'all_test'])
32 | 
33 |     args = parser.parse_args()
34 |     return args
35 | 
36 | 
37 | def train_fn(args):
38 |     base_dir = args.base_dir
39 |     dirs = init_dir(base_dir)
40 |     init_log(dirs['log'])
41 |     config_dir = args.config_dir
42 |     # copy_file(config_dir, dirs['data'])
43 |     config = configparser.ConfigParser()
44 |     config.read(config_dir)
45 | 
46 |     # test during training, test after training
47 |     in_test, post_test = init_test_flag(args.test_mode)
48 | 
49 |     # Initialize environment
50 |     print("Initializing environment")
51 |     # env = FordEnv(config['ENV_CONFIG'])
52 |     env = gym.make("CartPole-v0")
53 |     n_s = env.observation_space.shape
54 |     logging.info('Training: s dim: %d, a dim %d' %
55 |                  (n_s[0], env.action_space.n))
56 |     n_s_ls = [n_s[0]]
57 |     n_a_ls = [env.action_space.n]
58 |     # init step counter
59 |     total_step = int(config.getfloat('TRAIN_CONFIG', 'total_step'))
60 |     test_step = int(config.getfloat('TRAIN_CONFIG', 'test_interval'))
61 |     log_step = int(config.getfloat('TRAIN_CONFIG', 'log_interval'))
62 |     global_counter = Counter(total_step, test_step, log_step)
63 | 
64 |     seed = config.getint('ENV_CONFIG', 'seed')
65 | 
66 |     model = IQL(n_s_ls, n_a_ls, total_step, config['MODEL_CONFIG'],
67 |                 seed=0, model_type='dqn')
68 | 
69 |     summary_writer = tf.summary.FileWriter(dirs['log'])
70 |     trainer = Trainer(env, model, global_counter,
71 |                       summary_writer, in_test, output_path=dirs['data'])
72 |     trainer.run()
73 | 
74 |     # post-training test
75 |     if post_test:
76 |         tester = Tester(env, model, global_counter,
77 |                         summary_writer, dirs['data'])
78 |         tester.run_offline(dirs['data'])
79 | 
80 |     # save model
81 |     final_step = global_counter.cur_step
82 |     logging.info('Training: save final model at step %d ...' % final_step)
83 |     model.save(dirs['model'], final_step)
84 | 
85 | 
86 | def evaluate_fn(args):
87 |     pass
88 | 
89 | 
90 | if __name__ == '__main__':
91 |     args = parse_args()
92 |     if args.is_training is True:
93 |         train_fn(args)
94 |     else:
95 |         evaluate_fn(args)
96 | 


--------------------------------------------------------------------------------
/IQL_conventional/trainer.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import numpy as np
  3 | import tensorflow as tf
  4 | import pandas as pd
  5 | import time
  6 | 
  7 | 
  8 | class Trainer():
  9 |     def __init__(self, env, model, global_counter, summary_writer, run_test, output_path=None, rendering=False):
 10 |         self.cur_step = 0
 11 |         self.rendering = rendering
 12 |         self.global_counter = global_counter
 13 |         self.env = env
 14 |         self.agent = 'iql'  # TODO
 15 |         self.model = model
 16 |         self.sess = self.model.sess
 17 |         self.n_step = self.model.n_step  # bacth size
 18 |         self.summary_writer = summary_writer
 19 |         self.run_test = run_test  # ToDo 
 20 |         # assert self.env.T % self.n_step == 0
 21 |         self.data = []
 22 |         self.output_path = output_path
 23 |         if run_test:
 24 |             self.test_num = self.env.test_num
 25 |             logging.info('Testing: total test num: %d' % self.test_num)
 26 |         self._init_summary()
 27 | 
 28 |     def _init_summary(self):
 29 |         self.train_reward = tf.placeholder(tf.float32, [])
 30 |         self.train_summary = tf.summary.scalar(
 31 |             'train_reward', self.train_reward)
 32 |         self.test_reward = tf.placeholder(tf.float32, [])
 33 |         self.test_summary = tf.summary.scalar('test_reward', self.test_reward)
 34 | 
 35 |     def _add_summary(self, reward, global_step, is_train=True):
 36 |         if is_train:
 37 |             summ = self.sess.run(self.train_summary, {
 38 |                                  self.train_reward: reward})
 39 |         else:
 40 |             summ = self.sess.run(self.test_summary, {self.test_reward: reward})
 41 |         self.summary_writer.add_summary(summ, global_step=global_step)
 42 | 
 43 |     def take_action(self, prev_ob, prev_done):
 44 |         #  take actions for a batch size
 45 |         ob = prev_ob
 46 |         done = prev_done
 47 |         rewards = 0  # ori = []
 48 |         for _ in range(self.n_step):
 49 |             if self.agent.endswith('a2c'):
 50 |                 policy, value = self.model.forward(ob, done)
 51 |                 action = []
 52 |                 for pi in policy:
 53 |                     action.append(np.random.choice(np.arange(len(pi)), p=pi))
 54 |             else:
 55 |                 action, policy = self.model.forward(ob, mode='explore')
 56 |             next_ob, reward, done, _ = self.env.step(action[0])  # ori = action, global_reward
 57 |             if self.rendering:
 58 |                 self.env.render()
 59 |             rewards += reward
 60 |             global_step = self.global_counter.next()
 61 |             self.cur_step += 1
 62 |             self.model.add_transition(ob, action, reward, next_ob, done)
 63 |             if done:
 64 |                 break
 65 |             ob = next_ob
 66 |         return ob, done, _, rewards
 67 | 
 68 |     def evaluate(self, test_ind, demo=False, policy_type='default'):
 69 |         # test function
 70 |         ob = self.env.reset(gui=demo, test_ind=test_ind)
 71 |         # note this done is pre-decision to reset LSTM states!
 72 |         done = True
 73 |         self.model.reset()
 74 |         rewards = []
 75 |         while True:
 76 |             if self.agent == 'greedy':
 77 |                 action = self.model.forward(ob)
 78 |             elif self.agent.endswith('a2c'):
 79 |                 # policy-based on-poicy learning
 80 |                 policy = self.model.forward(ob, done, 'p')
 81 |                 if self.agent == 'ma2c':
 82 |                     self.env.update_fingerprint(policy)
 83 |                 if self.agent == 'a2c':
 84 |                     if policy_type != 'deterministic':
 85 |                         action = np.random.choice(
 86 |                             np.arange(len(policy)), p=policy)
 87 |                     else:
 88 |                         action = np.argmax(np.array(policy))
 89 |                 else:
 90 |                     action = []
 91 |                     for pi in policy:
 92 |                         if policy_type != 'deterministic':
 93 |                             action.append(np.random.choice(
 94 |                                 np.arange(len(pi)), p=pi))
 95 |                         else:
 96 |                             action.append(np.argmax(np.array(pi)))
 97 |             else:
 98 |                 # value-based off-policy learning
 99 |                 if policy_type != 'stochastic':
100 |                     action, _ = self.model.forward(ob)
101 |                 else:
102 |                     action, _ = self.model.forward(ob, stochastic=True)
103 |             next_ob, reward, done, global_reward = self.env.step(action)
104 |             rewards.append(global_reward)
105 |             if done:
106 |                 break
107 |             ob = next_ob
108 |         mean_reward = np.mean(np.array(rewards))
109 |         std_reward = np.std(np.array(rewards))
110 |         return mean_reward, std_reward
111 | 
112 |     def run(self):
113 |         while not self.global_counter.should_stop():
114 |             # test or not
115 |             if self.run_test and self.global_counter.should_test():
116 |                 rewards = []
117 |                 global_step = self.global_counter.cur_step
118 |                 for test_ind in range(self.test_num):
119 |                     mean_reward, std_reward = self.evaluate(test_ind)
120 |                     self.env.terminate()
121 |                     rewards.append(mean_reward)
122 |                     log = {'agent': self.agent,
123 |                            'step': global_step,
124 |                            'test_id': test_ind,
125 |                            'avg_reward': mean_reward,
126 |                            'std_reward': std_reward}
127 |                     self.data.append(log)
128 |                 avg_reward = np.mean(np.array(rewards))
129 |                 self._add_summary(avg_reward, global_step, is_train=False)
130 |                 logging.info('Testing: global step %d, avg R: %.2f' %
131 |                              (global_step, avg_reward))
132 | 
133 |             # train
134 |             ob = self.env.reset()
135 |             done = True
136 |             self.model.reset()
137 |             self.cur_step = 0
138 |             rewards = []
139 |             while True:
140 |                 ob, done, _, cur_rewards = self.take_action(ob, done)
141 |                 rewards.append(cur_rewards)  # ori
142 |                 global_step = self.global_counter.cur_step
143 |                 # update network for each bach size steps
144 |                 self.model.backward(self.summary_writer, global_step)
145 |                 # termination
146 |                 if done:
147 |                     break
148 |             rewards = np.array(rewards)  # reward for one epoch
149 |             mean_reward = np.mean(rewards)
150 |             std_reward = np.std(rewards)
151 |             log = {'agent': self.agent,
152 |                    'step': global_step,
153 |                    'test_id': -1,
154 |                    'avg_reward': mean_reward,
155 |                    'std_reward': std_reward}
156 |             self.data.append(log)
157 |             self._add_summary(mean_reward, global_step)
158 |             self.summary_writer.flush()
159 |         df = pd.DataFrame(self.data) # data: dictionary
160 |         df.to_csv(self.output_path + 'train_reward.csv')
161 | 
162 | 
163 | class Tester(Trainer):
164 |     def __init__(self, env, model, global_counter, summary_writer, output_path):
165 |         super().__init__(env, model, global_counter, summary_writer)
166 |         self.env.train_mode = False
167 |         self.test_num = self.env.test_num
168 |         self.output_path = output_path
169 |         self.data = []
170 |         logging.info('Testing: total test num: %d' % self.test_num)
171 | 
172 |     def _init_summary(self):
173 |         self.reward = tf.placeholder(tf.float32, [])
174 |         self.summary = tf.summary.scalar('test_reward', self.reward)
175 | 
176 |     def run_offline(self):
177 |         # enable traffic measurments for offline test
178 |         is_record = True
179 |         record_stats = False
180 |         self.env.cur_episode = 0
181 |         self.env.init_data(is_record, record_stats, self.output_path)
182 |         rewards = []
183 |         for test_ind in range(self.test_num):
184 |             rewards.append(self.evaluate(test_ind))
185 |             self.env.terminate()
186 |             time.sleep(2)
187 |             self.env.collect_tripinfo()
188 |         avg_reward = np.mean(np.array(rewards))
189 |         logging.info('Offline testing: avg R: %.2f' % avg_reward)
190 |         self.env.output_data()
191 | 
192 |     def run_online(self, coord):
193 |         self.env.cur_episode = 0
194 |         while not coord.should_stop():
195 |             time.sleep(30)
196 |             if self.global_counter.should_test():
197 |                 rewards = []
198 |                 global_step = self.global_counter.cur_step
199 |                 for test_ind in range(self.test_num):
200 |                     cur_reward = self.evaluate(test_ind)
201 |                     self.env.terminate()
202 |                     rewards.append(cur_reward)
203 |                     log = {'agent': self.agent,
204 |                            'step': global_step,
205 |                            'test_id': test_ind,
206 |                            'reward': cur_reward}
207 |                     self.data.append(log)
208 |                 avg_reward = np.mean(np.array(rewards))
209 |                 self._add_summary(avg_reward, global_step)
210 |                 logging.info('Testing: global step %d, avg R: %.2f' %
211 |                              (global_step, avg_reward))
212 |                 # self.global_counter.update_test(avg_reward)
213 |         df = pd.DataFrame(self.data)
214 |         df.to_csv(self.output_path + 'train_reward.csv')
215 | 
216 | 
217 | class Evaluator(Tester):
218 |     def __init__(self, env, model, output_path, demo=False, policy_type='default'):
219 |         self.env = env
220 |         self.model = model
221 |         self.agent = self.env.agent
222 |         self.env.train_mode = False
223 |         self.test_num = self.env.test_num
224 |         self.output_path = output_path
225 |         self.demo = demo
226 |         self.policy_type = policy_type
227 | 
228 |     def run(self):
229 |         is_record = True
230 |         record_stats = False
231 |         self.env.cur_episode = 0
232 |         self.env.init_data(is_record, record_stats, self.output_path)
233 |         time.sleep(1)
234 |         for test_ind in range(self.test_num):
235 |             reward, _ = self.evaluate(
236 |                 test_ind, demo=self.demo, policy_type=self.policy_type)
237 |             self.env.terminate()
238 |             logging.info('test %i, avg reward %.2f' % (test_ind, reward))
239 |             time.sleep(2)
240 |             self.env.collect_tripinfo()
241 |         self.env.output_data()


--------------------------------------------------------------------------------
/IQL_conventional/utils.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | import logging
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | import time
  6 | import os
  7 | import pandas as pd
  8 | import subprocess
  9 | 
 10 | 
 11 | def check_dir(cur_dir):
 12 |     if not os.path.exists(cur_dir):
 13 |         return False
 14 |     return True
 15 | 
 16 | 
 17 | def copy_file(src_dir, tar_dir):
 18 |     cmd = 'cp %s %s' % (src_dir, tar_dir)
 19 |     subprocess.check_call(cmd, shell=True)
 20 | 
 21 | 
 22 | def find_file(cur_dir, suffix='.ini'):
 23 |     for file in os.listdir(cur_dir):
 24 |         if file.endswith(suffix):
 25 |             return cur_dir + '/' + file
 26 |     logging.error('Cannot find %s file' % suffix)
 27 |     return None
 28 | 
 29 | 
 30 | def init_dir(base_dir, pathes=['log', 'data', 'model']):
 31 |     if not os.path.exists(base_dir):
 32 |         os.mkdir(base_dir)
 33 |     dirs = {}
 34 |     for path in pathes:
 35 |         cur_dir = base_dir + "/%s/" % path
 36 |         if not os.path.exists(cur_dir):
 37 |             os.mkdir(cur_dir)
 38 |         dirs[path] = cur_dir
 39 |     return dirs
 40 | 
 41 | 
 42 | def init_log(log_dir):
 43 |     logging.basicConfig(format='%(asctime)s [%(levelname)s] %(message)s',
 44 |                         level=logging.INFO,
 45 |                         handlers=[
 46 |                             logging.FileHandler('%s/%d.log' %
 47 |                                                 (log_dir, time.time())),
 48 |                             logging.StreamHandler()
 49 |                         ])
 50 | 
 51 | 
 52 | def init_test_flag(test_mode):
 53 |     if test_mode == 'no_test':
 54 |         return False, False
 55 |     if test_mode == 'in_train_test':
 56 |         return True, False
 57 |     if test_mode == 'after_train_test':
 58 |         return False, True
 59 |     if test_mode == 'all_test':
 60 |         return True, True
 61 |     return False, False
 62 | 
 63 | 
 64 | def plot_train(data_dirs, labels):
 65 |     pass
 66 | 
 67 | 
 68 | def plot_evaluation(data_dirs, labels):
 69 |     pass
 70 | 
 71 | 
 72 | class Counter:
 73 |     def __init__(self, total_step, test_step, log_step):
 74 |         self.counter = itertools.count(1)
 75 |         self.cur_step = 0
 76 |         self.cur_test_step = 0
 77 |         self.total_step = total_step
 78 |         self.test_step = test_step
 79 |         self.log_step = log_step
 80 |         self.stop = False
 81 |         # self.init_test = True
 82 | 
 83 |     def next(self):
 84 |         self.cur_step = next(self.counter)
 85 |         return self.cur_step
 86 | 
 87 |     def should_test(self):
 88 |         # if self.init_test:
 89 |         #     self.init_test = False
 90 |         #     return True
 91 |         test = False
 92 |         if (self.cur_step - self.cur_test_step) >= self.test_step:
 93 |             test = True
 94 |             self.cur_test_step = self.cur_step
 95 |         return test
 96 | 
 97 |     # def update_test(self, reward):
 98 |     #     if self.prev_reward is not None:
 99 |     #         if abs(self.prev_reward - reward) <= self.delta_reward:
100 |     #             self.stop = True
101 |     #     self.prev_reward = reward
102 | 
103 |     def should_log(self):
104 |         return (self.cur_step % self.log_step == 0)
105 | 
106 |     def should_stop(self):
107 |         if self.cur_step >= self.total_step:
108 |             return True
109 |         return self.stop
110 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Python2Simulink
 2 | ===============
 3 | #### Robotics and Intelligent Vehicle Automation Lab (RIVAL)
 4 | - Built by Dong Chen
 5 | - Started on Jan.11, 2020
 6 | 
 7 | A bridge between Python and Simulink.
 8 | This file aims to build a bridge between Python and Simulink. At each time step, the python script will send a command (input) to the simulink model, then the simulink model executes for one step and then returns the results to Python scripts for the decision usage. 
 9 | 
10 | ## Install MATLAB Engine API for Python
11 | -------
12 | 
13 | Install the MATLAB Engine API follow the instruction [Installation](https://www.mathworks.com/help/matlab/matlab_external/install-the-matlab-engine-for-python.html).
14 | 
15 | ## Applications
16 | -------
17 | 
18 | 1. plant example
19 | 
20 | In this example, we build a PI controller to regulate a secord-order system to a reference value (10 here). The Python script compute the control input and sends then value to the Simulink model. Then Simulink model runs for one step and returns the output value to the python script. 
21 | 
22 | <p align="center">
23 |      <img src="Docs/plant.gif" alt="output_example" width="60%" height="60%">
24 |      <br>Fig.1 Figure of regulation result
25 | </p>
26 | 
27 | 2. tracking example
28 | 
29 | In this example, the controller tries to control the variables x1 and x2 to a regulate track xd1 and xd2. There is also a code-like model and controller built by [Matlab code](examples/tracking/linear_controller.m).
30 | 
31 | The simulink model looks like this,
32 | 
33 | <p align="center">
34 |      <img src="Docs/simulink_model.png" alt="output_example" width="60%" height="60%">
35 |      <br>Fig.2 simulink model
36 | </p>
37 | 
38 | where the u1 and u2 are control input from the Python script. The state variables x1 and x2 will be sent to the python script. 
39 | 
40 | <p align="center">
41 |      <img src="Docs/tracking.gif" alt="output_example" width="60%" height="60%">
42 |      <br>Fig.3 Tracking result, x1(blue) and xd1(orange)
43 | </p>
44 | 
45 | 3. PV Control
46 | 
47 | This model is based on the Tutorial [here](https://www.youtube.com/watch?v=A9FhgHS1JsE).
48 | 
49 | ## Gym-like Reinforcement Learning interface combined with Python and Matlab
50 | 
51 | The Gym-like environment file is located in [IQL_conventional](IQL_conventional/) folder:
52 | 
53 | - [env](DDQN_Ford/env/) folder contains the environment file.
54 | - [config](DDQN_Ford/config/) folder contains the system setting for RL model and environment.
55 | - [agent](DDQN_Ford/agents/) folder contains the RL model and policies.
56 |  
57 |  
58 | ## API
59 | -------
60 | 
61 | - start the engine and connect to Matlab; Load the model:
62 | 
63 | ```
64 | matlab.engine.start_matlab()
65 | self.eng.eval("model = '{}'".format(self.modelName),nargout=0)
66 | self.eng.eval("load_system(model)",nargout=0)
67 |  ```
68 | 
69 | - eng.eval('out.output')
70 | 
71 | - eng.workspace()
72 | 
73 | - self.eng.set_param('{}/u'.format(self.modelName),'value',str(u),nargout=0): set the control input u.
74 | 
75 | - Start Simulation and then Instantly pause and read output.
76 | 
77 | ```
78 | eng.set_param(self.modelName,'SimulationCommand','start','SimulationCommand','pause',nargout=0)
79 | out = self.eng.workspace['out']
80 | self.eng.eval('out.output'), self.eng.eval('out.tout')
81 | ```
82 | 
83 | - set_param
84 | 
85 | learn how to set parameters of blocks [check here](https://www.mathworks.com/help/simulink/slref/set_param.html)
86 | 
87 | ## Reference:
88 | -------
89 | 
90 | 1. [link1](https://stackoverflow.com/questions/48864281/executing-step-by-step-a-simulink-model-from-python)
91 | 2. [Calling MATLAB from Python](https://www.mathworks.com/help/matlab/matlab-engine-for-python.html)
92 | 3. [Troubleshoot MATLAB Errors in Python](https://www.mathworks.com/help/matlab/matlab_external/troubleshoot-matlab-errors-in-python.html)
93 | 4. [Call User Scripts and Functions from Python](https://www.mathworks.com/help/matlab/matlab_external/call-user-script-and-function-from-python.html)
94 | 5. [usage of eng.set_param](https://www.mathworks.com/help/simulink/slref/set_param.html)
95 | 


--------------------------------------------------------------------------------
/examples/PV/parameters.m:
--------------------------------------------------------------------------------
 1 | % inverter filter calculation
 2 | 
 3 | Tss = 2.5e-6;  % sampling time
 4 | 
 5 | P = 10e3;  % rated active power
 6 | U = 380;  % inverter phase2phases voltage
 7 | f= 50;  % frequency
 8 | fsw = 5e3;  % swicthing frequency
 9 | 
10 | Cfmax = (0.05*P) / (2*pi*f*U^2);  % = 11uF
11 | 
12 | Lf = (0.1*U^2)/(2*pi*f*P); % 4.6mH
13 | RLf = Lf*100; %resistance of inductor


--------------------------------------------------------------------------------
/examples/PV/pv_inverter.slx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DongChen06/Python2Simulink/f7660e4d2ef656e74e0cde6837b2652c432ef288/examples/PV/pv_inverter.slx


--------------------------------------------------------------------------------
/examples/PV/pv_inverter_pid.slx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DongChen06/Python2Simulink/f7660e4d2ef656e74e0cde6837b2652c432ef288/examples/PV/pv_inverter_pid.slx


--------------------------------------------------------------------------------
/examples/PV/pv_inverter_pv.slx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DongChen06/Python2Simulink/f7660e4d2ef656e74e0cde6837b2652c432ef288/examples/PV/pv_inverter_pv.slx


--------------------------------------------------------------------------------
/examples/plant_ex/plant.py:
--------------------------------------------------------------------------------
  1 | # This file is built on https://gist.github.com/SoutrikBandyopadhyay/dae1f4c3f871b62ab1e2d6766e5fd579
  2 | 
  3 | import matlab.engine
  4 | import matplotlib.pyplot as plt
  5 | 
  6 | class SimulinkPlant:
  7 |     def __init__(self,modelName = 'plant'):
  8 |         
  9 |         self.modelName = modelName #The name of the Simulink Model (To be placed in the same directory as the Python Code) 
 10 |         #Logging the variables
 11 |         self.yHist = 0 
 12 |         self.tHist = 0 
 13 |         
 14 |     def setControlAction(self,u):
 15 |         #Helper Function to set value of control action
 16 |         self.eng.set_param('{}/u'.format(self.modelName),'value',str(u),nargout=0)
 17 |     
 18 |     def getHistory(self):
 19 |         #Helper Function to get Plant Output and Time History
 20 |         out = self.eng.workspace['out']
 21 |         return self.eng.eval('out.output'), self.eng.eval('out.tout'),
 22 |         
 23 |     def connectToMatlab(self):
 24 |         print("Starting matlab")
 25 |         self.eng = matlab.engine.start_matlab()
 26 |         print("Connected to Matlab")
 27 |         
 28 |         #Load the model
 29 |         self.eng.eval("model = '{}'".format(self.modelName),nargout=0)
 30 |         self.eng.eval("load_system(model)",nargout=0)
 31 |         
 32 |         #Initialize Control Action to 0
 33 |         self.setControlAction(0)
 34 |         print("Initialized Model")
 35 |         
 36 |         #Start Simulation and then Instantly pause
 37 |         self.eng.set_param(self.modelName,'SimulationCommand','start','SimulationCommand','pause',nargout=0)
 38 |         self.yHist,self.tHist = self.getHistory()
 39 |     
 40 |     def connectController(self,controller):
 41 |         self.controller = controller
 42 |         self.controller.initialize()
 43 |     
 44 |     def simulate(self):
 45 |         # Control Loop
 46 |         while(self.eng.get_param(self.modelName,'SimulationStatus') != ('stopped' or 'terminating')):
 47 |             #Generate the Control action based on the past outputs
 48 |             u = self.controller.getControlEffort(self.yHist,self.tHist)
 49 |             #Set that Control Action
 50 |             self.setControlAction(u)
 51 |             #Pause the Simulation for each timestep
 52 |             self.eng.set_param(self.modelName,'SimulationCommand','continue','SimulationCommand','pause',nargout=0)
 53 |             self.yHist,self.tHist = self.getHistory()
 54 |         
 55 |     def disconnect(self):
 56 |         self.eng.set_param(self.modelName,'SimulationCommand','stop',nargout=0)
 57 |         self.eng.quit()
 58 | 
 59 | 
 60 | class PIController:
 61 |     def __init__(self):
 62 |         
 63 |         #Maintain a History of Variables
 64 |         self.yHist = []
 65 |         self.tHist = []
 66 |         self.uHist = []
 67 |         self.eSum = 0
 68 |         
 69 |     def initialize(self):
 70 |         
 71 |         #Initialize the graph
 72 |         self.fig, = plt.plot(self.tHist,self.yHist)
 73 |         plt.xlim(0,10)
 74 |         plt.ylim(0,20)
 75 |         plt.ylabel("Plant Output")
 76 |         plt.xlabel("Time(s)")
 77 |         plt.title("Plant Response")
 78 |         
 79 |     def updateGraph(self):
 80 |         # Update the Graph
 81 |         self.fig.set_xdata(self.tHist)
 82 |         self.fig.set_ydata(self.yHist)
 83 |         plt.ion()
 84 |         plt.pause(0.1)
 85 |         plt.show()
 86 |         
 87 |     
 88 |     def getControlEffort(self,yHist,tHist):
 89 |         # Returns control action based on past outputs
 90 |         self.yHist = yHist
 91 |         self.tHist = tHist
 92 |         self.updateGraph()
 93 |         if(type(self.yHist) == float):
 94 |             y = self.yHist
 95 |         else:
 96 |             y = self.yHist[-1][0]
 97 |         # Set Point is 10
 98 |         e = 10-y
 99 |         self.eSum += e
100 |         u = 1*e + 0.001*self.eSum
101 |         print(y)
102 |         self.uHist.append(u)
103 |         return u
104 |         
105 |         
106 |             
107 | plant = SimulinkPlant(modelName="plant")
108 | #Establishes a Connection
109 | plant.connectToMatlab()
110 | 
111 | #Instantiates the controller
112 | controller = PIController()
113 | plant.connectController(controller)
114 | 
115 | #Control Loop
116 | plant.simulate()
117 | 
118 | #Closes Connection to MATLAB
119 | plant.disconnect()
120 | 


--------------------------------------------------------------------------------
/examples/plant_ex/plant.slx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DongChen06/Python2Simulink/f7660e4d2ef656e74e0cde6837b2652c432ef288/examples/plant_ex/plant.slx


--------------------------------------------------------------------------------
/examples/tracking/linear_controller.m:
--------------------------------------------------------------------------------
 1 | clc,
 2 | clear all;
 3 | % linear system dot_x = A*x+B*u
 4 | % dot_x1 = x2+u1
 5 | % dot_x2 = x1+x2+u2
 6 | % x is the system state, u is the control input
 7 | % xd is the reference trajectory
 8 | % tracking problem
 9 | 
10 | % initial value of system state
11 | x0 = [1;1];
12 | x = x0;
13 | A = [0 1;1 1];
14 | B = eye(2);
15 | 
16 | % sampling time 
17 | dt = 0.01;
18 | % simulaiton time
19 | t_total = 10;
20 | % record data
21 | x_history = [];
22 | u_history = [];
23 | xd_history = [];
24 | 
25 | for t=0:dt:t_total
26 |     % generate reference trajecotry
27 |     xd = [sin(t)+0.1;-cos(0.5*t)-0.1];
28 |     
29 |     % define system error
30 |     e = x-xd;
31 |     
32 |     % control input
33 |     % k is the control gain
34 |     k = 8;
35 |     u = -k*e - A*x + [cos(t);0.5*sin(0.5*t)];
36 |     
37 |     % system update
38 |     dot_x = A*x+B*u;
39 |     x = x+dt*dot_x;
40 |    
41 |     x_history = [x_history x];
42 |     xd_history = [xd_history xd];
43 |     u_history = [u_history u];
44 | end
45 | 
46 | figure()
47 | hold on;
48 | subplot(2,1,1);
49 | plot(0:dt:t_total,x_history(1,:),0:dt:t_total,xd_history(1,:));
50 | ylabel('x1/xd1');
51 | subplot(2,1,2);
52 | plot(0:dt:t_total,x_history(2,:),0:dt:t_total,xd_history(2,:));
53 | xlabel('time');
54 | ylabel('x2/xd2');
55 | hold off;
56 | 
57 | figure()
58 | hold on;
59 | subplot(2,1,1);
60 | plot(0:dt:t_total,u_history(1,:));
61 | ylabel('u1');
62 | subplot(2,1,2);
63 | plot(0:dt:t_total,u_history(2,:));
64 | xlabel('time');
65 | ylabel('u2');
66 | hold off;
67 | 


--------------------------------------------------------------------------------
/examples/tracking/tracking.py:
--------------------------------------------------------------------------------
  1 | import matlab.engine
  2 | import numpy as np
  3 | import matplotlib.pyplot as plt
  4 | 
  5 | 
  6 | class Simulink2Python:
  7 |     def __init__(self, modelName='tracking'):
  8 |         # The name of the Simulink Model (To be placed in the same directory as the Python Code)
  9 |         self.modelName = modelName
 10 | 
 11 |     def setControlAction(self, u1, u2):
 12 |         # Helper Function to set value of control action
 13 |         # here we can try set multiple value in one line, while I failed.
 14 |         self.eng.set_param('{}/u1'.format(self.modelName), 'value', str(u1), nargout=0)
 15 |         self.eng.set_param('{}/u2'.format(self.modelName), 'value', str(u2), nargout=0)
 16 | 
 17 |     def getHistory(self):
 18 |         # Helper Function to get system Output and Time History
 19 |         # out = self.eng.workspace['x1']
 20 |         return self.eng.eval('out.x1'), self.eng.eval('out.x2'), self.eng.eval('out.tout')
 21 | 
 22 |     def connectToMatlab(self):
 23 |         print("Starting matlab")
 24 |         self.eng = matlab.engine.start_matlab()
 25 |         print("Connected to Matlab")
 26 | 
 27 |         # Load the model
 28 |         self.eng.eval("model = '{}'".format(self.modelName), nargout=0)
 29 |         self.eng.eval("load_system(model)", nargout=0)
 30 | 
 31 |         # Initialize Control Action to 0
 32 |         self.setControlAction(0, 0)
 33 |         print("Initialized Model")
 34 | 
 35 |         # Start Simulation and then Instantly pause
 36 |         self.eng.set_param(self.modelName, 'SimulationCommand', 'start', 'SimulationCommand', 'pause', nargout=0)
 37 |         self.x1, self.x2, self.t = self.getHistory()
 38 | 
 39 |     def connectController(self, controller):
 40 |         self.controller = controller
 41 |         self.controller.initialize()
 42 | 
 43 |     def simulate(self):
 44 |         # Control Loop
 45 |         while (self.eng.get_param(self.modelName, 'SimulationStatus') != ('stopped' or 'terminating')):
 46 |             # Generate the Control action based on the past outputs
 47 |             u1, u2 = self.controller.getControlEffort(self.x1, self.x2, self.t)
 48 |             # Set that Control Action
 49 |             self.setControlAction(u1, u2)
 50 |             # Pause the Simulation for each timestep
 51 |             self.eng.set_param(self.modelName, 'SimulationCommand', 'continue', 'SimulationCommand', 'pause', nargout=0)
 52 |             self.x1, self.x2, self.t = self.getHistory()
 53 | 
 54 |     def disconnect(self):
 55 |         self.eng.set_param(self.modelName, 'SimulationCommand', 'stop', nargout=0)
 56 |         self.eng.quit()
 57 | 
 58 | 
 59 | class Controller:
 60 |     def __init__(self):
 61 |         # Maintain a History of Variables
 62 |         self.x1Hist = []
 63 |         self.x2Hist = []
 64 |         self.xd1Hist = []
 65 |         self.xd2Hist = []
 66 |         self.tHist = []
 67 |         self.u1Hist = []
 68 |         self.u2Hist = []
 69 |         self.k = 2  # k is the control gain
 70 | 
 71 |     def initialize(self):
 72 |         # Initialize the graph
 73 |         self.fig1, = plt.plot(self.tHist, self.x1Hist)
 74 |         self.fig2, = plt.plot(self.tHist, self.xd1Hist)
 75 |         plt.xlim(0, 10)
 76 |         plt.ylim(-0.5, 2)
 77 |         plt.ylabel("Output")
 78 |         plt.xlabel("Time(s)")
 79 |         # plt.legend('x1', 'xd1', loc='upper right')
 80 |         plt.title("System Response")
 81 | 
 82 |     def updateGraph(self):
 83 |         # Update the Graph
 84 |         self.fig1.set_xdata(self.tHist)
 85 |         self.fig1.set_ydata(self.x1Hist)
 86 |         self.fig2.set_xdata(self.tHist)
 87 |         self.fig2.set_ydata(self.xd1Hist)
 88 |         plt.ion()
 89 |         plt.pause(0.01)
 90 |         plt.show()
 91 | 
 92 |     def getControlEffort(self, x1, x2, t):
 93 |         # Returns control action based on past outputs
 94 |         if (type(x1) == float):
 95 |             self.x1 = x1
 96 |             self.x2 = x2
 97 |             self.t = t
 98 |         else:
 99 |             self.x1 = x1[-1][0]
100 |             self.x2 = x2[-1][0]
101 |             self.t = t[-1][0]
102 | 
103 |         self.updateGraph()
104 | 
105 |         # Set Point for x1 and x2 and compute the error terms
106 |         xd1 = np.sin(self.t) + 0.1
107 |         xd2 = -np.cos(0.5 * self.t) - 0.1
108 |         e1 = self.x1 - xd1
109 |         e2 = self.x2 - xd2
110 | 
111 |         # compute the control inputs
112 |         u1 = -self.k * e1 - self.x2 + np.cos(self.t)
113 |         u2 = -self.k * e2 - self.x1 - self.x2 + 0.5 * np.sin(0.5 * self.t)
114 | 
115 |         # update histories
116 |         self.x1Hist.append(self.x1)
117 |         self.xd1Hist.append(xd1)
118 |         self.x2Hist.append(self.x2)
119 |         self.xd2Hist.append(xd2)
120 |         self.tHist.append(self.t)
121 |         self.u1Hist.append(u1)
122 |         self.u2Hist.append(u2)
123 |         return u1, u2
124 | 
125 | 
126 | tracker = Simulink2Python(modelName="tracking1")
127 | # Establishes a Connection
128 | tracker.connectToMatlab()
129 | 
130 | # Instantiates the controller
131 | controller = Controller()
132 | tracker.connectController(controller)
133 | 
134 | # Control Loop
135 | tracker.simulate()
136 | 
137 | # Closes Connection to MATLAB
138 | tracker.disconnect()
139 | 


--------------------------------------------------------------------------------
/examples/tracking/tracking.slx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DongChen06/Python2Simulink/f7660e4d2ef656e74e0cde6837b2652c432ef288/examples/tracking/tracking.slx


--------------------------------------------------------------------------------
/examples/tracking/tracking1.slx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DongChen06/Python2Simulink/f7660e4d2ef656e74e0cde6837b2652c432ef288/examples/tracking/tracking1.slx


--------------------------------------------------------------------------------