├── README.md ├── agent ├── replay_buffer.py └── trainer.py ├── config.py ├── env ├── config │ ├── 20200218_due │ ├── 20200218_slices │ ├── 20200218_wip │ ├── paper_proccfg │ ├── paper_proccfg_constant │ └── paper_setupcfg ├── job_generator.py ├── simul_pms.py ├── util_sim.py └── wrapper.py ├── main.py ├── model ├── __init__.py ├── a2c.py ├── dqn.py ├── nn_ops.py └── util_nn.py ├── test.py └── utils ├── core ├── Job.py ├── __init__.py └── timeline.py ├── util.py └── visualize ├── logger.py └── viz_state.py /README.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | 3 | Distributed version of Deep Reinforcement Learning (DRL)-based scheduling agents for minimizing tardiness 4 | 5 | Codes are implemented by python 3.6, tensorflow 1.14 6 | 7 | (2023.07) new branch tf2-compatible is available!! python 3.10 & tensorflow 2.x was also tested w/o errors 8 | 9 | When using this repository for academic purposes, 10 | please cites our paper "Deep Reinforcement Learning for Minimizing Tardiness in Parallel Machine Scheduling With Sequence Dependent Family Setups", IEEE Access (2021) 11 | 12 | URL: https://ieeexplore.ieee.org/document/9486959 13 | 14 | # Outline 15 | agent package : replay buffer, DRL agent trainer 16 | 17 | env package: simulator environments with parallel machines, Wrapper() class for generating 2-D states 18 | 19 | model package: Deep Q-Network (DQN) and relevant methods modified from other researchers' previous works 20 | 21 | utils package: core objects, logging experimental results, state visualization 22 | 23 | config.py: specify experiments configuration including hyperparameters of DQN 24 | 25 | main.py: train DQN and automatic validation (tensorboard) 26 | 27 | test.py: performance comparison with heuristics, obtain schedules from trained DQNs 28 | 29 | 30 | # Requirements 31 | py36 tf14 32 | 33 | pip install opencv-python pandas 34 | 35 | # Instruction for general users 36 | Run main.py, then experiments with Dataset1 (paper) can be reproduced. 37 | 38 | Every hyperparameters and simulation configurations are specified in config.py (args) 39 | 40 | Users might modify each argument. For example, to reproduce the experiment of Dataset7 (paper) with learning rate of 0.1, use commands as follow: 41 | 42 | python main.py --did=4 --F=7 --lr=0.1 43 | 44 | For the usage of arguments, check annotations of parser definition in config.py 45 | 46 | 47 | Experimental results are automatically logged in new folder named 'results' 48 | 49 | For distributed version, no files are made in best_models, gantt 50 | 51 | At every args.save_freq episodes, trained DQN are saved as checkpoint file in 'results/models' folder. 52 | 53 | Simultaneously, tensorboard summary are reported including following information: 54 | 55 | 1. training loss (per each train step), cumulative rewards (per each episode), cumulative Q-values (per each episode) 56 | 2. validation results (saved DQN) as cumulative rewards, which are equal to total tardiness in this research. 57 | 58 | # Instruction for reproducing the experiments 59 | 8 Datasets can be reproduced by refering annotations in config.py 60 | 61 | Stochastic processing and setup time (table 5 in the paper) can be reproduced by modifying STOCHASTICITY variable in env/simul_pms.py. 62 | 63 | For datasets 1 to 4, I recommend args.bucket=7200, args.save_freq=1000 64 | 65 | For datasets 5 to 8, args.bucket=5400, args.save_freq=20 66 | 67 | Other hyperparameters does not need to be modified for reproducing the results. 68 | 69 | To exclude parameter sharing (FBS-1D in the paper), change args.state_type=1D (default is 2D) 70 | 71 | For LBF-Q, args.oopt=upm2007 72 | 73 | For TPDQN, args.oopt=fab2018 74 | 75 | For heuristics including SSTEDD, COVERT, Run test.py --test_mode=logic --ropt=tardiness, then check the summary/test_performance_logic.csv (unit is reward, not tardiness hour) 76 | 77 | # To advanced users, future researchers 78 | As stated in the paper, DRL hyperparemeters are essential but hard to be optimized. 79 | 80 | Try various values of args.GAMMA, freq_tar, freq_on, warmup, eps, lr, and so on. 81 | 82 | As in 150th line of main.py, new random seeds are setted at every new episode. This scheme can be re-considered. 83 | 84 | 85 | After being acquiant to the codes, modify util_sim.py, simul_pms.py to simulate scheduling problems of yours own. 86 | 87 | (Distribution of production requirements, due-dates, initial machine status, Time table of processing, setup time, ...) 88 | 89 | Modify wrapper.py to generate state vectors appropriate for your problems. 90 | 91 | -------------------------------------------------------------------------------- /agent/replay_buffer.py: -------------------------------------------------------------------------------- 1 | from collections import deque 2 | import random 3 | from model.util_nn import * 4 | 5 | 6 | class DataBuffer(object): 7 | def __init__(self): 8 | self.count = 0 9 | self.buffer = deque() 10 | def add(self, s, a, r, t, s2, f_a): 11 | experience = (s, a, r, t, s2, f_a) 12 | self.buffer.append(experience) 13 | self.count += 1 14 | def get(self): 15 | batch = self.buffer 16 | s_batch = np.array([_[0] for _ in batch]) 17 | a_batch = np.array([_[1] for _ in batch]) 18 | r_batch = np.array([_[2] for _ in batch]) 19 | t_batch = np.array([_[3] for _ in batch]) 20 | s2_batch = np.array([_[4] for _ in batch]) 21 | # a2_batch = np.array([_[5] for _ in batch]) 22 | return s_batch, a_batch, r_batch, t_batch, s2_batch 23 | def clear(self): 24 | self.buffer.clear() 25 | self.count = 0 26 | 27 | class Experience(object): 28 | def __init__(self, state, action, reward, terminal, auxtarget=None, last_action=None, last_reward=None): 29 | self.state = state 30 | self.action = action 31 | self.reward = reward 32 | self.terminal = terminal 33 | self.auxtarget = auxtarget 34 | self.last_action = last_action 35 | self.last_reward = last_reward 36 | 37 | def get_auxin(self, action_size): 38 | """ 39 | Return one hot vectored last action + last reward. 40 | """ 41 | return Experience.concat_action_and_reward(self.last_action, action_size, 42 | self.last_reward) 43 | 44 | def get_action_reward(self, action_size): 45 | """ 46 | Return one hot vectored action + reward. 47 | """ 48 | return Experience.concat_action_and_reward(self.action, action_size, 49 | self.reward) 50 | 51 | @staticmethod 52 | def concat_action_and_reward(action, action_size, reward): 53 | """ 54 | Return one hot vectored action and reward. 55 | """ 56 | action_reward = np.zeros([action_size + 1]) 57 | action_reward[action] = 1.0 58 | action_reward[-1] = float(reward) 59 | return action_reward 60 | 61 | 62 | class ReplayBuffer(object): 63 | 64 | def __init__(self, buffer_size, random_seed=123): 65 | """ 66 | The right side of the deque contains the most recent experiences 67 | """ 68 | self.buffer_size = buffer_size 69 | self.count = 0 70 | self.buffer = []#deque() 71 | self._next_idx = 0 72 | random.seed(random_seed) 73 | 74 | def add_experience(self, experience): 75 | if self.count < self.buffer_size: 76 | self.buffer.append(experience) 77 | self.count += 1 78 | else: 79 | # self.pop() 80 | self.buffer[self._next_idx] = experience 81 | self._next_idx = (self._next_idx+1) % self.buffer_size 82 | 83 | def add(self, s, s_, a, r, t, s2, s2_, f_a): 84 | experience = (s, s_, a, r, t, s2, s2_, f_a) 85 | if self.count < self.buffer_size: 86 | self.buffer.append(experience) 87 | self.count += 1 88 | else: 89 | # self.pop() 90 | self.buffer[self._next_idx] = experience 91 | self._next_idx = (self._next_idx+1) % self.buffer_size 92 | 93 | # def pop(self): 94 | # self.buffer.popleft() 95 | # self.count -= 1 96 | 97 | def size(self): 98 | return self.count 99 | 100 | def sample_batch(self, batch_size): 101 | batch = [] 102 | if self.count < batch_size: 103 | batch = random.sample(self.buffer, self.count) 104 | else: 105 | batch = random.sample(self.buffer, batch_size) 106 | 107 | return self.unwrap(batch) 108 | 109 | def sample_sequence(self, recent=True, seq_size=None): 110 | batch = [] 111 | if recent: 112 | idx = (self._next_idx-1)%self.buffer_size 113 | batch.append(self.buffer[idx]) 114 | while self.buffer[idx][4] is False: 115 | idx = (idx - 1) % self.buffer_size 116 | batch.append(self.buffer[idx]) 117 | else: 118 | idx = random.randint(0, self.count - 1) 119 | batch.append(self.buffer[idx]) 120 | while self.buffer[idx][4] is False and (seq_size>1 if seq_size is not None else True): 121 | if seq_size is not None: seq_size -= 1 122 | idx = (idx + 1) % self.buffer_size 123 | batch.append(self.buffer[idx]) 124 | return self.unwrap(batch) 125 | 126 | def unwrap(self, batch): 127 | if type(batch[0])==Experience: 128 | return batch 129 | else: 130 | s_batch = np.array([_[0] for _ in batch]) 131 | xs_batch = np.array([_[1] for _ in batch]) 132 | a_batch = np.array([_[2] for _ in batch]) 133 | r_batch = np.array([_[3] for _ in batch]) 134 | t_batch = np.array([_[4] for _ in batch]) 135 | s2_batch = np.array([_[5] for _ in batch]) 136 | xs2_batch = np.array([_[6] for _ in batch]) 137 | f_a_batch = np.array([_[7] for _ in batch]) 138 | # a2_batch = np.array([_[5] for _ in batch]) 139 | return s_batch, xs_batch, a_batch, r_batch, t_batch, s2_batch, xs2_batch, f_a_batch 140 | 141 | def clear(self): 142 | self.deque.clear() 143 | self.count = 0 144 | 145 | class FlatteningReplayBuffer(object): 146 | def __init__(self, buffer_size, buffer_num, random_seed=123): 147 | self.set = list() 148 | self.buffer_size = buffer_size 149 | self.buffer_num = buffer_num 150 | self.count = 0 151 | for i in range(buffer_num): 152 | self.set.append(ReplayBuffer(buffer_size)) 153 | def add(self, s, a, r, t, s2, f_a): 154 | action = 0 155 | for i in range(len(a)): 156 | if a[i] == 1: action = i 157 | buffer = self.set[action] 158 | if self.count < self.buffer_size: 159 | self.count += 1 160 | else: 161 | buffer.pop() 162 | self.count -= 1 163 | buffer.add(s, a, r, t, s2, f_a) 164 | def sample_batch(self, batch_size, flattenFlag : bool = True): 165 | 166 | buffer_num = len(self.set) 167 | 168 | per_size = int(batch_size / buffer_num) 169 | remainder = batch_size % buffer_num 170 | s_batch, a_batch, r_batch, t_batch, s2_batch, f_a_batch = self.set[0].sample_batch(per_size+remainder) 171 | for i in range(1, buffer_num): 172 | buffer = self.set[i] 173 | t1,t2,t3,t4,t5,t6=buffer.sample_batch(per_size) 174 | s_batch = np.concatenate((s_batch, t1)) 175 | a_batch = np.concatenate((a_batch, t2)) 176 | r_batch = np.concatenate((r_batch, t3)) 177 | t_batch = np.concatenate((t_batch, t4)) 178 | s2_batch = np.concatenate((s2_batch, t5)) 179 | temp = f_a_batch.tolist() 180 | temp.extend(t6.tolist()) 181 | f_a_batch = np.array(temp) 182 | 183 | return s_batch, a_batch, r_batch, t_batch, s2_batch, f_a_batch 184 | -------------------------------------------------------------------------------- /agent/trainer.py: -------------------------------------------------------------------------------- 1 | from agent.replay_buffer import * 2 | from utils.visualize.logger import instance_log 3 | from config import * 4 | import numpy as np 5 | import tensorflow as tf 6 | import random 7 | from model import * 8 | 9 | 10 | class Trainer(object): 11 | 12 | def __init__(self, sess, optimizer, idx=0, exp_idx=0, global_step=None, use_buff=True, use_hist=False): 13 | self.Epsilon = args.eps 14 | self.eps = self.Epsilon 15 | self.Epsilon_epoch = args.max_episode * args.eps_ratio 16 | self.TAU = 1 17 | self.Episode = 1 18 | self.TimeStep = 0 19 | self.RememberFlag = False 20 | self.cutday = -1 21 | 22 | """For experiences""" 23 | self.dat = ReplayBuffer(args.max_buffersize) 24 | self.temp_buffer = None 25 | 26 | """For checking training performance""" 27 | self.reward_history = [] 28 | self.reward_total = 0 29 | self.trigger_cnt = 0 30 | self.decision_cnt = 0 31 | self.cumQ = 0 32 | self.cumV_real = 0 33 | self.setupNum = 0 34 | self.RewardMemo = "" 35 | 36 | """Network objects""" 37 | self.summary_writer = None 38 | self.summary_dir = args.summary_dir 39 | self.nn = None 40 | is_conv = True if type(args.state_dim) == list and len(args.state_dim)>2 else False 41 | print(args.auxin_dim) 42 | self.nn = PDQN(sess, action_dim=args.action_dim, input_dim=args.state_dim, auxin_dim=args.auxin_dim, 43 | optimizer=optimizer, tau=self.TAU, name="dqn_pms_{}".format(exp_idx), layers=args.hid_dims, 44 | is_train=args.is_train, is_duel = args.is_duel, 45 | global_step=global_step, summary_dir=self.summary_dir, weight_hist=use_hist) 46 | self.use_buff = use_buff 47 | if args.is_train: 48 | self.record = instance_log(args.gantt_dir, 'instances_{}'.format(args.timestamp)) 49 | else: 50 | self.record = instance_log(args.gantt_dir, 'test_instances_{}'.format(args.timestamp)) 51 | 52 | def getDecisionNum(self): 53 | return self.decision_cnt 54 | 55 | def SetEpisode(self, episode, num_decision=None): 56 | self.reward_total = 0 57 | if num_decision == None: 58 | self.is_terminated=False 59 | else: 60 | if self.trigger_cnt != 0: self.num_decision = self.trigger_cnt 61 | else: self.num_decision = num_decision 62 | self.trigger_cnt = 0 63 | self.decision_cnt = 0 64 | self.setupNum = 0 65 | self.cumQ = 0 66 | self.cumV_real = 0 67 | self.loss_history = np.zeros(300, dtype=float) 68 | self.Episode = episode 69 | self.eps = self.Epsilon 70 | self.record.clearInfo() 71 | self.targetupFlag=True 72 | 73 | # self.memory() 74 | 75 | def memory(self): 76 | import os, psutil 77 | pid = os.getpid() 78 | py = psutil.Process(pid) 79 | memoryUse = py.memory_info()[0] / 2. ** 30 # memory use in GB...I think 80 | print('memory use:', memoryUse) 81 | 82 | def getSummary(self): 83 | return self.nn.getSummary() 84 | 85 | def writeSummary(self): 86 | summ = self.getSummary() 87 | if summ is False: 88 | print("There is no summary writer") 89 | return 90 | episode_summary = tf.Summary() 91 | episode_summary.value.add(simple_value=self.reward_total, node_name='reward/cumulative_reward', tag='reward/cumulative_reward') 92 | episode_summary.value.add(simple_value=self.cumQ, node_name='reward/cumulative_Q', tag='reward/cumulative_Q') 93 | L_avg = 0 94 | if len(self.loss_history) > 0: 95 | L_avg = np.mean(self.loss_history) 96 | episode_summary.value.add(simple_value=L_avg, tag='loss/episodic_loss') 97 | # if args.use_vp: 98 | # episode_summary.value.add(simple_value=self.nn.loss_tf.summary.scalar('loss/batch_loss_v', tf.subtract(self.loss_end, self.loss) 99 | summ.add_summary(episode_summary, self.Episode) 100 | summ.flush() 101 | 102 | def get_action(self, observe): 103 | 104 | ''' 105 | DRL agent recieves observations from a simulator or environments. 106 | :return: next observation, action vector, current scheduling time 107 | ''' 108 | 109 | self.TimeStep += 1 110 | self.trigger_cnt += 1 111 | state = observe['state'] 112 | feasible_action_index = observe['feasibility'] 113 | curr_time = observe['time'] 114 | if len(feasible_action_index) == 0: 115 | return observe, -1, curr_time 116 | 117 | if self.check_exploration(): 118 | return observe, random.Random().choice(feasible_action_index), curr_time # randrange(args.action_dim) 119 | 120 | if args.auxin_dim != 0: 121 | auxin = observe['auxin'] 122 | else: 123 | auxin = [] 124 | deterministric = True 125 | logits = list(self.nn.critic_predict([state],[auxin], feasibility=feasible_action_index)[0]) # numpy array predictions to list 126 | # print(logits) 127 | if args.is_duel: 128 | q, adv, val = self.nn.critic_predict([state],[auxin]) 129 | logits = list(q[0]) 130 | if self.TimeStep % 10000 == 0: print('q', logits, 'adv', adv[0], 'val', val[0]) 131 | if deterministric: 132 | if len(feasible_action_index) == args.action_dim: 133 | action = logits.index(max(logits)) 134 | else: 135 | max_logit = -1000000 136 | max_index = -1 137 | for i in feasible_action_index: 138 | now_logit = logits[i] 139 | if max_logit < now_logit: 140 | max_logit = now_logit 141 | max_index = i 142 | action = max_index 143 | else: # probabilistic action 144 | if sum(logits) != 1: 145 | logits[-1] += 1 - sum(logits) 146 | action = -1 147 | while action not in feasible_action_index: 148 | action = np.random.choice(len(logits), 1, p=logits)[0] 149 | 150 | for prod_idx in range(len(logits)): self.record.appendInfo('Qvalue_%03d' % prod_idx, logits[prod_idx]) 151 | # if args.use_nost and action % 10 == action // 10: action = 0 152 | now_value = logits[action] 153 | self.cumQ += float(now_value) 154 | self.decision_cnt += 1 155 | 156 | return observe, action, curr_time 157 | 158 | def check_exploration(self): 159 | if not args.is_train: 160 | return False 161 | if args.warmup > self.TimeStep: 162 | return True 163 | # epsilon-greedy policy 164 | epi = self.Episode 165 | if self.Epsilon_epoch != 0: 166 | eps = self.Epsilon * max(0, self.Epsilon_epoch - epi) / self.Epsilon_epoch# decaying eps 167 | self.eps=eps 168 | if np.random.rand() < self.eps: 169 | return True 170 | return False 171 | def getEps(self): return self.eps 172 | 173 | def remember_record(self, pre_observe, action, reward, terminalFlag): 174 | self.reward_history.append(reward) 175 | self.reward_total += reward 176 | state = pre_observe['state'] 177 | for state_idx in range(len(state)): 178 | self.record.appendInfo('state_{:04d}'.format(state_idx), state[state_idx]) 179 | self.record.appendInfo('action', np.where(action[0]==1)[0][0]) 180 | self.record.appendInfo('reward', reward) 181 | self.record.appendInfo('terminal', terminalFlag) 182 | self.record.saveInfo() 183 | def remember(self, pre_observe, action, observe, reward, terminalFlag): 184 | feasible_action_index_list = observe['feasibility'] 185 | curr_time = observe['time'] 186 | self.reward_history.append(reward) 187 | self.reward_total += reward 188 | if args.auxin_dim != 0: 189 | auxin = pre_observe['auxin'] 190 | next_auxin = observe['auxin'] 191 | else: 192 | auxin = [] 193 | next_auxin = [] 194 | state = pre_observe['state'] 195 | next_state= observe['state'] 196 | if 'upm' in args.oopt: 197 | self.train_step([np.array(state)], np.reshape(action, (args.action_dim,)), reward, 198 | np.array(feasible_action_index_list), [np.array(next_state)], terminalFlag) 199 | return 200 | for state_idx in range(len(state)): 201 | self.record.appendInfo('state_{:04d}'.format(state_idx), state[state_idx]) 202 | self.record.appendInfo('action', np.where(action[0]==1)[0][0]) 203 | self.record.appendInfo('reward', reward) 204 | self.record.appendInfo('terminal', terminalFlag) 205 | self.dat.add(np.reshape(state, (args.state_dim)), 206 | auxin, 207 | np.reshape(action, (args.action_dim,)), 208 | reward, 209 | terminalFlag, 210 | np.reshape(next_state, (args.state_dim)), 211 | next_auxin, 212 | np.array(feasible_action_index_list),) 213 | 214 | if self.TimeStep > args.warmup and (self.TimeStep - args.warmup) % args.freq_on == 0 and args.is_train: 215 | self.train_network(terminalFlag) 216 | if args.sampling == 'pretrain': 217 | if self.TimeStep <= args.warmup: 218 | if self.temp_buffer is None: self.temp_buffer = DataBuffer() 219 | self.temp_buffer.add(np.reshape(state, (args.state_dim,)), 220 | np.reshape(action, (args.action_dim,)), 221 | reward, terminalFlag, 222 | np.reshape(next_state, (args.state_dim,)), 223 | np.array(feasible_action_index_list)) 224 | if self.TimeStep == args.warmup and terminalFlag: 225 | self.pretrain() 226 | 227 | self.record.saveInfo() 228 | if args.freq_tar==0: #for tmu=0.01 in TPDQN 229 | if self.TimeStep % 100 == 0 and args.is_train: 230 | self.update_target_network() 231 | elif terminalFlag and self.targetupFlag: 232 | if self.Episode % args.freq_tar == 0 and args.is_train: 233 | self.update_target_network() 234 | self.targetupFlag= False 235 | 236 | def pretrain(self): 237 | s_batch, a_batch, r_batch, t_batch, s2_batch = self.temp_buffer.get() 238 | temp = 0 239 | found_flag=False 240 | real_cq = 0 241 | y_i = [] 242 | for i in range(self.temp_buffer.count): 243 | reverse_idx = -(i + 1) 244 | if t_batch[reverse_idx]: 245 | found_flag=True 246 | temp=0 247 | # print(reverse_idx) 248 | if found_flag is False: 249 | continue 250 | temp = temp * args.GAMMA + r_batch[reverse_idx] 251 | real_cq += temp 252 | y_i.insert(0, temp) 253 | # print(y_i, prob_next) 254 | self.cumV_real = real_cq 255 | epoch_size = len(y_i) 256 | y_i = np.reshape(y_i, (epoch_size, 1)) 257 | w_i = np.ones(epoch_size) 258 | epoch = 0 259 | # loss = 100 260 | while epoch < 2000: 261 | loss, train_op, predicted_Q, target_Q, action = self.critic_train( 262 | np.reshape(w_i, (epoch_size, 1)), s_batch[:epoch_size], a_batch[:epoch_size], 263 | np.reshape(y_i, (epoch_size, 1)), self.TimeStep) 264 | epoch += 1 265 | print(loss, epoch_size, epoch) 266 | 267 | def train_step(self, s, a, r, feas, s_, t): 268 | target_q = self.nn.critic_predict(s_, [], feas) 269 | 270 | if t: 271 | y = r 272 | else: 273 | if len(feas) == args.action_dim: 274 | y = r + args.GAMMA * np.amax(target_q) 275 | else: 276 | max_index = -1 277 | max_value = -1000000.0 278 | for act in feas: 279 | if target_q[act] > max_value: 280 | max_index = act 281 | max_value = target_q[act] 282 | y = r + args.GAMMA * target_q[max_index] 283 | 284 | w = np.reshape(np.ones(1), (1,1)) 285 | loss, train_op, predicted_Q, target_Q, action = self.nn.critic_train( 286 | [[1]], s, [], [a], [[y]], self.TimeStep) 287 | self.loss_history = np.roll(self.loss_history, 1) 288 | self.loss_history[0] = loss 289 | 290 | 291 | def train_network(self, terminal=False): 292 | if isinstance(self.dat, ReplayBuffer) or isinstance(self.dat, FlatteningReplayBuffer): 293 | s_batch, xs_batch, a_batch, r_batch, t_batch, s2_batch, xs2_batch, feasible_action = self.dat.sample_batch(args.batchsize) 294 | 295 | if isinstance(self.nn, BaseNetwork): 296 | target_q = self.nn.critic_target_predict(s2_batch, xs2_batch, feasible_action) 297 | if args.is_double: origin_q = self.nn.critic_predict(s2_batch, xs2_batch, feasible_action) 298 | y_i = [] 299 | dat_size = min(args.batchsize, len(s_batch)) 300 | # w_i = np.array(np.ones(self.MINIBATCH_SIZE)) 301 | w_i = np.ones(dat_size) 302 | for k in range(dat_size): 303 | if t_batch[k]: 304 | y_i.append(r_batch[k]) 305 | else: 306 | if args.is_double: 307 | max_index = -1 308 | max_value = -1000000.0 309 | for act in range(args.action_dim): 310 | if origin_q[k][act] > max_value: 311 | max_index = act 312 | max_value = origin_q[k][act] 313 | y_i.append(r_batch[k] + args.GAMMA * target_q[k][max_index]) 314 | else: 315 | if len(feasible_action) == args.action_dim: 316 | y_i.append(r_batch[k] + args.GAMMA * np.amax(target_q[k])) 317 | else: 318 | max_index = -1 319 | max_value = -1000000.0 320 | for act in feasible_action[k]: 321 | # if args.use_nost and act % 10 == act // 10: act = 0 322 | if target_q[k][act] > max_value: 323 | max_index = act 324 | max_value = target_q[k][act] 325 | y_i.append(r_batch[k] + args.GAMMA * target_q[k][max_index]) 326 | 327 | 328 | loss, train_op, predicted_Q, target_Q, action = self.nn.critic_train( 329 | np.reshape(w_i, (dat_size, 1)), s_batch, xs_batch, a_batch, 330 | np.reshape(y_i, (dat_size, 1)), self.TimeStep) 331 | # print(loss) 332 | 333 | self.loss_history = np.roll(self.loss_history, 1) 334 | self.loss_history[0] = loss 335 | elif isinstance(self.nn, CriticNetwork): 336 | pass 337 | 338 | def update_target_network(self): 339 | if isinstance(self.nn, PDQN): 340 | self.nn.update_critic() 341 | 342 | def toString(self, learning_instances, denominator): 343 | msg = '' 344 | info_col = ['state', 'action', 'reward', 't', 'next state'] 345 | for k in range(len(learning_instances[0])): 346 | for i in range(len(info_col)): 347 | msg += info_col[i] + ':[' 348 | if type(learning_instances[i][k]) == np.ndarray: 349 | for l in range(len(learning_instances[i][k])): 350 | msg += str(learning_instances[i][k][l]) + ',' 351 | else: 352 | msg += str(learning_instances[i][k]) 353 | msg += ']' + denominator 354 | msg += '\n' 355 | 356 | return msg 357 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | import os, argparse, time 2 | 3 | def str2bool(v): 4 | if v.lower() in ('yes', 'true', 't', 'y', '1'): 5 | return True 6 | elif v.lower() in ('no', 'false', 'f', 'n', '0'): 7 | return False 8 | elif v.isdigit(): 9 | return int(v) 10 | else: 11 | raise argparse.ArgumentTypeError('Boolean value expected.') 12 | 13 | DATANAME = ['sd2','sd2e1','sd2e3','sd2t5','sd5','sd5e1','sd5e3','sd5t5','sd', 'sd10', 'sd15'] 14 | """ 15 | Dataset 1, 3: sd2 (args.did=0) with args.F=10, args.F=7 16 | Dataset 2, 4: sd2t5 (args.did=3) with args.F=10, args.F=7 17 | Dataset 5, 7: sd5 (args.did=4) with args.F=10, args.F=7 18 | Dataset 6, 8: sd5t5 (args.did=7) with args.F=10, args.F=7 19 | The others are experimental datasets. 20 | Dataset is determined by # of machines and jobs (a.k.a. "scales"), # of families, and due-date tightness in 21 | Refer the papers for details: https://ieeexplore.ieee.org/document/9486959 22 | """ 23 | # checking arguments 24 | def check_args(args): 25 | # --checkpoint_dir 26 | # for i in range(0, len(DatasetList)): 27 | 28 | folder_name = '{}_{}_{}'.format(DATANAME[args.did] if args.F==10 else DATANAME[args.did]+'f'+str(args.F), args.oopt, args.key) 29 | args.save_dir = args.root_dir + folder_name 30 | args.gantt_dir = os.path.join(args.save_dir, 'gantt') 31 | args.model_dir = os.path.join(args.save_dir, 'models') 32 | args.best_model_dir = os.path.join(args.save_dir, 'best_models') 33 | args.summary_dir = os.path.join(args.save_dir, 'summary') 34 | args.timestamp = time.strftime("%Y%m%d_%H%M%S", time.gmtime()) 35 | if type(args.auxin_dim)==list and len(args.auxin_dim)==1: args.auxin_dim = args.auxin_dim[0] 36 | if 'a3c' in args.key: 37 | args.state_dim = args.state_dim[0] 38 | if 'upm' in args.oopt: 39 | args.action_dim = 4 40 | args.hid_dims = [] 41 | args.bucket = 0 42 | args.auxin_dim = 0 43 | args.batchsize = 1 44 | args.warmup = 0 45 | if args.did == -1: 46 | args.max_episode = 40000 47 | args.save_freq = 400 48 | args.state_dim = [450] 49 | elif args.did < 4: 50 | args.max_episode = 20000 51 | args.save_freq = 200 52 | args.state_dim = [900] 53 | else: 54 | args.max_episode = 2500 55 | args.save_freq = 25 56 | args.state_dim = [2250] 57 | if args.oopt =='upm2012': 58 | args.state_dim = [110] 59 | elif args.oopt=='upm2007': 60 | args.GAMMA = 0.002 61 | args.lr = 0.001 62 | elif 'fab' in args.oopt: 63 | args.action_dim = args.F 64 | args.auxin_dim = 0 65 | args.hid_dims = [512, 128, 21] 66 | args.bucket = 0 67 | if args.did == -1: 68 | args.state_dim = [210*3 + args.F*(221)] 69 | args.max_episode = 2000 70 | args.save_freq = 20 71 | elif args.did < 4: 72 | args.state_dim = [420*3 + args.F*(441)] 73 | args.max_episode = 800 74 | args.save_freq = 8 75 | else: 76 | args.state_dim = [1050 * 3 + args.F * (1101)] 77 | args.max_episode = 50 78 | args.save_freq = 1 79 | 80 | args.ropt = 'tardiness' 81 | args.GAMMA = 0.9 82 | args.warmup = 32 83 | args.batchsize = 32 84 | args.lr= 0.000001 85 | args.freq_tar = 0 86 | args.eps_ratio = 0 87 | elif args.oopt == 'ours2007': 88 | args.action_dim = args.F*args.F 89 | args.auxin_dim = 0 90 | if args.did == -1: args.state_dim = [450] 91 | elif args.did < 4: args.state_dim = [900] 92 | else: args.state_dim = [2250] 93 | else: 94 | args.action_dim = args.F*args.F 95 | if args.state_type == '1D': 96 | args.state_dim = [args.F * (args.F*2+42) + 2] 97 | elif args.state_type == 'manual': 98 | pass 99 | else: 100 | args.state_dim = [args.F, args.F*2+42] # default 101 | if not os.path.exists(args.save_dir): 102 | os.makedirs(args.save_dir) 103 | if not os.path.exists(args.gantt_dir): 104 | os.mkdir(args.gantt_dir) 105 | if not os.path.exists(args.model_dir): 106 | os.mkdir(args.model_dir) 107 | if not os.path.exists(args.best_model_dir): 108 | os.mkdir(args.best_model_dir) 109 | if not os.path.exists(args.summary_dir): 110 | os.mkdir(args.summary_dir) 111 | # --batch_size 112 | assert args.batchsize >= 1, 'batch size must be larger than or equal to one' 113 | return args 114 | 115 | desc = "PMS experiment" 116 | parser = argparse.ArgumentParser(description=desc) 117 | parser.add_argument('--env', type=str, default='pms') 118 | parser.add_argument('--result_dir', type=str, default='results', 119 | help='Directory name to save the generated images') 120 | parser.add_argument('--key', type=str, default='default') # key for identifying experiments, results file 121 | parser.add_argument('--did', type=int, default=0) 122 | parser.add_argument('--eid', type=int, default=0) 123 | parser.add_argument('--test_mode', type=str, default='logic') 124 | parser.add_argument('--root_dir', type=str, default='./results/') 125 | parser.add_argument('--save_freq', type=int, default=5000) 126 | parser.add_argument('--viz', type=str2bool, default=False) 127 | parser.add_argument('--is_load', type=str2bool, default=False) 128 | parser.add_argument('--use_hist', type=str2bool, default=False) 129 | 130 | # For Reinforcement Learning 131 | parser.add_argument('--lr', type=float, default=0.0025) # learning rate 132 | parser.add_argument('--nn', type=str, default='keep') 133 | parser.add_argument('--is_duel', type=str2bool, default=False) # Dueling DQN option can be used 134 | parser.add_argument('--is_noisy', type=str2bool, default=False) 135 | parser.add_argument('--is_double', type=str2bool, default=False) 136 | parser.add_argument('--is_train', type=str2bool, default=True) 137 | parser.add_argument('--is_first', type=str2bool, default=True) 138 | parser.add_argument('--eps', type=float, default=0.2) # Initial epsilon value 139 | parser.add_argument('--eps_ratio', type=float, default=0.9) # portion of exploration episodes 140 | parser.add_argument('--warmup', type=int, default=24000) # number of time stpes for random exploration 141 | parser.add_argument('--GAMMA', type=float, default=1) 142 | parser.add_argument('--batchsize', type=int, default=32) 143 | parser.add_argument('--freq_tar', type=int, default=50) # Target network update frequency (unit: episodes) 144 | parser.add_argument('--freq_on', type=int, default=1) # Target network update frequency (unit: time steps) 145 | parser.add_argument('--max_episode', type=int, default=100000) 146 | parser.add_argument('--max_buffersize', type=int, default=100000) 147 | 148 | parser.add_argument('--policy', type=str, default='dqn_logic') 149 | parser.add_argument('--sampling', type=str, default='td') 150 | parser.add_argument('--F', type=int, default=10) 151 | parser.add_argument('--action_dim', type=int, default=10) # action dim is automatically set to args.F * args.F 152 | parser.add_argument('--auxin_dim', type=int, default=[0], nargs='+') # auxin is S_inv in the paper 153 | parser.add_argument('--state_dim', '-s', type=int, default=[40], nargs='+') # -s 10 52 means 10 X 52 2-D state 154 | parser.add_argument('--state_type', type=str, default='2D') # use args.state_type=manual for changing args.state_dim 155 | parser.add_argument('--share_num', type=int, default=0) 156 | parser.add_argument('--chg_freq', type=int, default=1) 157 | parser.add_argument('--hid_dims', type=int, default=[64, 32, 16], nargs='+', 158 | help='hidden dimensions (default: [64, 32])') 159 | 160 | parser.add_argument('--oopt', type=str, default='default') # You can modify oopt for other baseline models 161 | parser.add_argument('--sopt', type=str, default='default') # For advanced users 162 | parser.add_argument('--use', type=int, default=[1,2,4,5,6,7], nargs='+') # selectively choose state features 163 | parser.add_argument('--K', type=int, default=0) # state concatenation (obsolute) 164 | parser.add_argument('--ropt', type=str, default='epochtard') # Option for reward generation. 165 | 166 | parser.add_argument('--change_qty', type=str2bool, default=False) 167 | parser.add_argument('--qty', type=str, default=None) 168 | parser.add_argument('--cutday', type=int, default=-1) 169 | parser.add_argument('--repeat', type=int, default=1) 170 | parser.add_argument('--config_load', type=str, default=None) 171 | parser.add_argument('--bucket', type=int, default=0) # Time intervals of period (T in the paper) 172 | parser.add_argument('--equality_flag', type=bool, default=False) 173 | 174 | args = check_args(parser.parse_args()) 175 | args.DATASET = DATANAME 176 | 177 | -------------------------------------------------------------------------------- /env/config/20200218_due: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BohyungPaeng/DRL-Scheduling-tf/f310a3f57acfceb48cc128e791a4832a9270290e/env/config/20200218_due -------------------------------------------------------------------------------- /env/config/20200218_slices: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BohyungPaeng/DRL-Scheduling-tf/f310a3f57acfceb48cc128e791a4832a9270290e/env/config/20200218_slices -------------------------------------------------------------------------------- /env/config/20200218_wip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BohyungPaeng/DRL-Scheduling-tf/f310a3f57acfceb48cc128e791a4832a9270290e/env/config/20200218_wip -------------------------------------------------------------------------------- /env/config/paper_proccfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BohyungPaeng/DRL-Scheduling-tf/f310a3f57acfceb48cc128e791a4832a9270290e/env/config/paper_proccfg -------------------------------------------------------------------------------- /env/config/paper_proccfg_constant: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BohyungPaeng/DRL-Scheduling-tf/f310a3f57acfceb48cc128e791a4832a9270290e/env/config/paper_proccfg_constant -------------------------------------------------------------------------------- /env/config/paper_setupcfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BohyungPaeng/DRL-Scheduling-tf/f310a3f57acfceb48cc128e791a4832a9270290e/env/config/paper_setupcfg -------------------------------------------------------------------------------- /env/job_generator.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from config import * 3 | from random import randint, uniform, shuffle, Random, sample, choice 4 | 5 | def generate_jobs(num_stream_jobs): 6 | 7 | # time and job size 8 | all_t = [] 9 | all_size = [] 10 | 11 | # generate streaming sequence 12 | t = 0 13 | for _ in range(num_stream_jobs): 14 | if args.job_distribution == 'uniform': 15 | size = int(np.random.uniform( 16 | args.job_size_min, args.job_size_max)) 17 | elif args.job_distribution == 'pareto': 18 | size = int((np.random.pareto( 19 | args.job_size_pareto_shape) + 1) * \ 20 | args.job_size_pareto_scale) 21 | else: 22 | print('Job distribution', args.job_distribution, 'does not exist') 23 | 24 | if args.cap_job_size: 25 | size = min(int(args.job_size_max), size) 26 | 27 | t += int(np.random.exponential(args.job_interval)) 28 | 29 | all_t.append(t) 30 | all_size.append(size) 31 | 32 | return all_t, all_size 33 | 34 | def divide_uniform(number, parts_number, ratio=0, criteria=0): 35 | if criteria >= number: 36 | d = randint(0, parts_number - 1) 37 | parts = [0]*parts_number 38 | parts[d] = number 39 | return parts 40 | if parts_number > number: 41 | raise ValueError("Number of parts can't be higher than the number"); 42 | 43 | parts = [] 44 | number_rest = number 45 | average = number / parts_number 46 | 47 | for i in range(1, parts_number + 1): 48 | if (i == parts_number): 49 | parts.append(number_rest) 50 | break 51 | else: 52 | new_number = int(uniform(1-ratio, 1+ratio) * average) 53 | number_rest -= new_number 54 | parts.append(new_number) 55 | 56 | shuffle(parts) 57 | return parts 58 | 59 | def divide_geometric(number, parts_number, allow_zero=False, criteria=0): 60 | """ 61 | Divide a given number into bunch of digits. 62 | Better way) 63 | 1.Push N zeroes in it. 64 | 2.Push K-1 ones in it. 65 | 3.Shuffle the array. 66 | 4.#s of Zero are bucket size. 67 | """ 68 | if criteria >= number: 69 | d = randint(0, parts_number - 1) 70 | parts = [0]*parts_number 71 | parts[d] = number 72 | return parts 73 | if parts_number > number: 74 | raise ValueError("Number of parts can't be higher than the number"); 75 | 76 | parts = [] 77 | number_rest = number 78 | average = number / parts_number 79 | 80 | for i in range(1, parts_number + 1): 81 | if (i == parts_number): 82 | parts.append(number_rest) 83 | break 84 | else: 85 | new_number = np.random.randint(0, number_rest) if allow_zero else np.random.randint(1, (number_rest - (parts_number - i)) // 2) 86 | 87 | number_rest -= new_number 88 | parts.append(new_number) 89 | 90 | return parts 91 | 92 | 93 | import env.util_sim as util 94 | import sys 95 | 96 | def generate_wip_type(slices, wip_op='wall', swapFlag=False): 97 | wip_type = [-1] * util.M 98 | type_order = list() 99 | for i in range(len(slices)): 100 | type_order.append(i) 101 | type_order.sort(key=lambda t: slices[t], reverse=True) 102 | 103 | 104 | for i in range(util.M): 105 | if wip_op == 'wall': 106 | wip_type[i] = i % util.F 107 | elif wip_op == 'w1': 108 | wip_type[i] = type_order[0] 109 | elif wip_op == 'w2': 110 | if i < util.M * 0.2: 111 | wip_type[i] = 1#type_order[1] 112 | else: 113 | wip_type[i] = 0#type_order[0] 114 | elif wip_op == 'w4': 115 | if i < util.M * 0.5: 116 | wip_type[i] = type_order[0] 117 | elif i < util.M * 0.8: 118 | wip_type[i] = type_order[1] 119 | elif i < util.M * 0.9: 120 | wip_type[i] = type_order[2] 121 | else: 122 | wip_type[i] = type_order[3] 123 | if swapFlag: 124 | n = randint(1, int(round(util.M * 0.2))) 125 | selected_resource_list = sample(range(util.M), n) 126 | for i in selected_resource_list: 127 | wip_type[i] = choice(type_order) 128 | print("swap initial setup status of machines: ", selected_resource_list) 129 | return wip_type 130 | 131 | estL = None 132 | def get_cmax_estimated(policy=None, slices=None, p_mj_list=None): 133 | global estL 134 | if estL is None: 135 | # computing L 136 | if policy == 'uniform': 137 | estL = 0 138 | for i in range(util.F): 139 | min_p = min([util.getProcessingTime(j, i) for j in range(util.M)]) 140 | estL += min_p 141 | min_s = 1000000000 142 | for i in range(util.M): 143 | for j in range(util.F): 144 | for k in range(util.F): 145 | st = util.getSetupTime(i, j, k) 146 | if st != 0 and st < min_s: 147 | min_s = st 148 | print(min_p, min_s, estL) 149 | estL += min_s * util.N 150 | estL /= util.M # tabu setting 151 | print('uniform style L', estL) 152 | elif policy == 'TABU': 153 | ''' 154 | job의 type을 정하는 logic을 따라해서 각 job의 min processing time과 min setup time을 계산 (type은 밑에서 정하므로) 155 | ''' 156 | estL=0 157 | for p in range(util.F): 158 | for d in range(util.Horizon): 159 | quantity = slices[p][d] 160 | for j in range(quantity): 161 | min_p = sys.maxsize 162 | min_s = sys.maxsize 163 | for m in range(util.M): 164 | pt = util.getProcessingTime(m, p) 165 | if pt < min_p: 166 | min_p = pt 167 | for p_ in range(util.F): 168 | st = util.getSetupTime(m, p, p_) 169 | if st != 0 and st < min_s: 170 | min_s = st 171 | estL += min_p + min_s 172 | estL /= util.M 173 | print('Tabu Style L', estL) 174 | elif policy == 'Chen': 175 | jobID_type = list() 176 | for p in range(util.F): 177 | for d in range(util.Horizon): 178 | quantity = slices[p][d] 179 | for j in range(quantity): 180 | jobID_type.append(p) 181 | C_max = 0 182 | # for j in range(util.N): 183 | # for m in range(util.M): 184 | # C_max += util.PTable[m][jobID_type[j]] * p_mj_list[m][j] / (util.M * util.M) 185 | # for i in range(util.N): 186 | # C_max += util.STable[m][jobID_type[i]][jobID_type[j]] / (util.N * util.M * util.M) 187 | for j in range(util.N): 188 | prod = jobID_type[j] 189 | for m in range(util.M): 190 | C_max += util.getProcessingTime(m, prod) / (util.M * util.M) 191 | for f in range(util.F): 192 | C_max += util.getSetupTime(m, f, prod) / (util.F * util.M * util.M) 193 | print('Chen Style L', C_max) 194 | estL = C_max 195 | 196 | def get_due_list(job_num=1, tightness=0.4, R=0.8, L=None, slices=None, info=None): 197 | R = 0.4 # tabu setting 198 | T = tightness # tabu setting (loose) 199 | # T = 0.5 # tabu setting (tight) 200 | if L is None: 201 | # get_cmax_estimated('uniform', slices, info) 202 | get_cmax_estimated('Chen', slices, info) 203 | L = estL 204 | 205 | if job_num == 1: return uniform(L*(1-T-R), L*(1-T+R)) // util.TimeUnit * util.TimeUnit 206 | due_list = list() 207 | for i in range(job_num): 208 | due_list.append( int(uniform(L*(1-T-R), L*(1-T+R))) ) 209 | return due_list 210 | 211 | total_proc_time = 0 212 | def get_due_VNS(B=6, alpha=0.25, beta=0.25): 213 | ''' 214 | B : max batch size [3,6] 215 | alpha : arrival factor [0,25, 0.5, 0.750 216 | beta : due date factor 217 | ''' 218 | global total_proc_time 219 | if total_proc_time == 0: 220 | print('total_proc_time calculation') 221 | ''' 222 | VNS 논문에서는 기계별로 job type에 따라 processing time이 다르지 않음 223 | ''' 224 | # # identical parallel machine 225 | # for p in range(util.F): 226 | # total_proc_time += util.PTable[0][p] * (8 * util.M) 227 | # total_proc_time = total_proc_time / (util.M * B) 228 | ''' 229 | 기계별로 job type에 따라 processing time이 다를 경우 230 | ''' 231 | for m in range(util.M): 232 | for p in range(util.F): 233 | total_proc_time += util.getProcessingTime(macID=m, type=p) * (8 * util.M) 234 | total_proc_time = total_proc_time / (util.M * util.M * B) 235 | arrival = uniform(0, alpha * total_proc_time)// util.TimeUnit * util.TimeUnit 236 | due = arrival + uniform(0, beta * total_proc_time)// util.TimeUnit * util.TimeUnit 237 | return arrival, due 238 | -------------------------------------------------------------------------------- /env/util_sim.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from config import * 3 | from collections import defaultdict 4 | import pickle, random, sys, math 5 | 6 | STable = defaultdict(list) 7 | PTable = defaultdict(list) 8 | TimeUnit = 60 9 | Horizon = 3 10 | UnitTime = 24 * 60 * TimeUnit 11 | # UnitTime = 180 * TimeUnit * 1.53 12 | # UnitTime = 60 * TimeUnit * 29.47 13 | """RI setting""" 14 | # UnitTime = 60 * TimeUnit * 23 15 | # Horizon = 4 16 | ObserveRange = 2880* TimeUnit 17 | ST = 360* TimeUnit # setup time 18 | PT = 180* TimeUnit # processing time 19 | PTMODE = 'default' 20 | # PTMODE = 'identical' 21 | # PTMODE = 'constant' 22 | M = 20 # of machine 23 | '''new''' 24 | N = 420 # of job 25 | '''old''' 26 | # N = 150 # of job 27 | F = 10 # number of product type (a.k.a. family) args.action_dim 28 | eta = 2.0 29 | ddt = 0.4 30 | AC = UnitTime / PT # average capacity(job #) for one day 31 | weight = [1.1, 0.9, 1.3, 1.25, 1.22, 0.77, 0.65, 0.80, 0.99, 1.0] 32 | 33 | MIX_AVG = [67, 43, 29, 21, 16, 12, 9, 7, 4, 2] 34 | 35 | BEFORE_AS_STOCKER_FLAG = False 36 | """ Env Static parameters """ 37 | dailyMaxSetup = 0 38 | UTILCUTTIME = 86400 39 | avgLotSize = 1000 40 | 41 | """Decision related Env""" 42 | # Simulation Day로 끊을것인지 결정 43 | timeCut = True 44 | datCutDay = 1 45 | 46 | # M, F, N이 config에 따라 바뀌므로 바꿔주는 세팅 47 | def setM(m): 48 | global M 49 | M = m 50 | # VNS 논문의 세팅 51 | # global M, N 52 | # M = m 53 | # N = 8 * M * F 54 | 55 | def setP(p): 56 | global F 57 | F = p 58 | # VNS 논문의 세팅 59 | # global F, N 60 | # F = p 61 | # N = 8 * M * F 62 | 63 | def setN(n): 64 | global N 65 | N = n 66 | 67 | def saveTable(file_name): 68 | f = open(file_name+'_setupcfg', 'wb') 69 | pickle.dump(STable, f) 70 | f = open(file_name+'_proccfg', 'wb') 71 | pickle.dump(PTable, f) 72 | print(STable) 73 | print(PTable) 74 | 75 | def loadTable(file_name): 76 | fullname = 'env/config/{}_setupcfg'.format(file_name) 77 | print('load STable, PTable : ', fullname) 78 | f = open(fullname, 'rb') 79 | global STable, PTable 80 | STable = pickle.load(f) 81 | fullname = 'env/config/{}_proccfg'.format(file_name) 82 | f = open(fullname, 'rb') 83 | PTable = pickle.load(f) 84 | # print(STable) 85 | # for m in range(M): 86 | # print(m, np.mean(np.array(STable[m])), np.std(np.array(STable[m]))) 87 | # exit() 88 | 89 | def setSetupTable(opt='constant'): 90 | for m in range(M): 91 | table = np.zeros((F, F)) 92 | for f in range(F): 93 | for t in range(F): 94 | st = 0 95 | if f!=t: 96 | if opt=='constant': st = ST 97 | elif opt=='uniform': st = ST*random.uniform(0.7,1.2) 98 | elif opt=='VNS': st = 0 99 | elif opt=='Chen': st = TimeUnit*random.uniform(10, 100) 100 | table[f][t]=int(st) 101 | STable[m]=table 102 | 103 | def setProcTable(opt='constant'): 104 | tt = [[1.1, 0.9, 1.3, 1.25, 1.22, 0.77, 0.65, 0.80, 0.99, 1.0], 105 | [1.0, 1.0, 1.25, 1.3, 1.22, 0.77, 0.65, 0.99, 0.80, 1.0], 106 | [1.0, 1.0, 1.25, 1.3, 1.22, 0.77, 0.65, 0.99, 0.80, 1.0], 107 | [1.0, 1.0, 1.25, 1.3, 1.22, 0.77, 0.65, 0.99, 0.80, 1.0], 108 | [1.1, 0.9, 1.3, 1.25, 1.22, 0.77, 0.65, 0.80, 0.99, 1.0], 109 | [1.05, 0.95, 1.3, 1.25, 0.77, 1.22, 0.65, 0.80, 0.99, 1.0], 110 | [0.95, 1.05, 1.3, 1.25, 0.77, 1.22, 0.65, 0.80, 0.99, 1.0], 111 | [1.1, 0.9, 1.3, 1.25, 1.22, 0.77, 0.65, 0.80, 0.99, 1.0], 112 | [1.1, 0.9, 1.3, 1.25, 1.22, 0.77, 0.65, 0.80, 0.99, 1.0], 113 | [0.9, 1.1, 1.3, 1.25, 0.77, 1.22, 0.65, 1.0, 0.99, 0.8]] 114 | for m in range(M): 115 | if opt == 'VNS' and m > 0: 116 | PTable[m] = table 117 | # VNS paper has identical processing time for each machine (IPMS problem) 118 | continue 119 | else: 120 | table = np.zeros(F) 121 | for p in range(F): 122 | if opt == 'constant': 123 | pt = PT 124 | elif opt == 'uniform': 125 | pt = PT*random.uniform(0.9,1.1) 126 | elif opt == 'manual': 127 | pt = PT*tt[m%10][p] 128 | elif opt == 'VNS': 129 | temp = random.Random().random() 130 | if temp < 0.2: 131 | pt = 0.2 * PT 132 | elif temp < 0.4: 133 | pt = 0.4 * PT 134 | elif temp < 0.7: 135 | pt = 1 * PT 136 | elif temp < 0.9: 137 | pt = 1.6 * PT 138 | else: 139 | pt = 2.0 * PT 140 | else: 141 | pt = 0 142 | table[p] = int(pt) 143 | PTable[m]=table 144 | # print('SETTING NEW PROC', PTable) 145 | 146 | def getSetupTime(mach=0, from_type=0, to_type=0): 147 | mach=0 148 | st = STable[mach % F][from_type][to_type] 149 | if st < ST*0.05: st *= TimeUnit 150 | return st * (eta/2) 151 | 152 | def getProcessingTime(macID=0, type=0, factor=1): 153 | pt = PTable[macID % F][type] 154 | # Chen setting에서는 TimeUnit이 기준이라 주석 처리 155 | if pt < PT * 0.05: 156 | pt *= TimeUnit 157 | if PTMODE == 'default': 158 | return pt * factor 159 | if PTMODE == 'constant': 160 | return PT 161 | if PTMODE == 'identical': 162 | pt = weight 163 | return pt[type]*PT 164 | 165 | 166 | def getMaxTime(): 167 | maxst = max([np.max(STable[x]) for x in STable.keys()]) 168 | maxpt = max([np.max(PTable[x]) for x in PTable.keys()]) 169 | if maxst < ST*0.05: maxst *= TimeUnit 170 | if maxpt < PT*0.05: maxpt *= TimeUnit 171 | return maxst, maxpt 172 | 173 | def getIndexGap(job_type, plan, machine_setup_type, curr_time, fin_job=None): 174 | """ 175 | EQP GAP: BEST_EQP - NOW_EQP 176 | BEST_EQP: target plan * avg_proc / rolling time 177 | 178 | Progress rate gap in PKG: (best fin - fin) / plan for rolling time(1-day) 179 | """ 180 | avg_proc = 0 181 | num_mac = 0 182 | plan_type = [j for j in plan if j.type == job_type] 183 | num_job = len(plan_type) 184 | if num_job == 0: return -num_mac 185 | 186 | if fin_job is None: 187 | for mach, setup in enumerate(machine_setup_type): 188 | if setup == job_type: avg_proc += PTable[mach][job_type] 189 | num_mac += 1 190 | avg_proc /= num_mac 191 | rolling_time = max(j.due for j in plan_type) - curr_time 192 | UPED = rolling_time / avg_proc 193 | BEST_EQP = num_job / UPED 194 | return BEST_EQP - num_mac 195 | else: 196 | best_fin = (num_job + fin_job) / max(j.due for j in plan_type) * curr_time 197 | return (best_fin - fin_job) / num_job 198 | 199 | def getIndexNeeds(job_type, plan, machine_setup_type, option='division'): 200 | num_job = 0 201 | for j in plan: 202 | if j.type == job_type: 203 | num_job += 1 204 | num_mac = 0 205 | for t in machine_setup_type: 206 | if t == job_type: 207 | num_mac += 1 208 | 209 | if option == 'division': 210 | # if num_job != 0 and num_job<=3: num_job = 3 211 | if num_mac == 0: return num_job * 2 212 | return num_job / num_mac 213 | else: 214 | return num_job - num_mac 215 | 216 | def getReqEQP(): 217 | """ 218 | BEST_EQP_REQ = (목표치 – 실제치) / (차수 * target_per_day) 219 | :return: 220 | """ 221 | 222 | 223 | k1 = 0 224 | k2 = 0 225 | s_bar = ST 226 | def setParamATCS(due_list): 227 | eta = ST / PT 228 | beta = eta * 30 / N # 30 is expected number of setup 229 | cmax = N * PT * (1+beta) / M #4860 for default 230 | 231 | R_due = (np.max(due_list)-np.min(due_list))/cmax 232 | tau = 1 - (np.mean(due_list) / cmax) 233 | global k1, k2, s_bar 234 | if R_due<0.5: k1 = 4.5+R_due 235 | else: k1 = 6-2*R_due 236 | k2 = tau/2/np.sqrt(eta) 237 | s_bar = np.mean(np.array(list(STable.values()))) * TimeUnit 238 | # k2 = 1 239 | print(k1, k2, s_bar) 240 | return k1, k2 241 | 242 | def getIndexATCS(due, proc, curr, setup): 243 | WSPT = 1 / proc 244 | slackness = max(due-proc-curr, 0) 245 | MSR = math.exp(-slackness/(k1*PT)) 246 | SU = math.exp(-setup/(k2*s_bar)) 247 | 248 | # print(WSPT, MSR, SU) 249 | 250 | return WSPT*MSR*SU 251 | 252 | # VNS 논문에서처럼 k1, k2 값을 찾기위해 만듬 253 | def getIndexATCS_(due, proc, curr, setup, max_arrival, k_1, k_2): 254 | WSPT = 1 / proc 255 | slackness = max(due - proc - curr + max(max_arrival - curr, 0), 0) 256 | MSR = math.exp(-slackness / (k_1 * PT)) 257 | SU = math.exp(-setup / (k_2 * s_bar)) 258 | 259 | return WSPT * MSR * SU 260 | 261 | def getIndexBATCS(dues, proc, curr, setup, max_arrival, k_1, k_2): 262 | BATCS = 0 263 | for i in range(len(dues)): 264 | WSPT = 1 / proc 265 | slackness = max(dues[i] - proc - curr + max(max_arrival - curr, 0), 0) 266 | MSR = math.exp(-slackness/(k_1*PT)) 267 | SU = math.exp(-setup/(k_2*s_bar)) 268 | BATCS += WSPT*MSR*SU 269 | 270 | return BATCS * len(dues) 271 | 272 | # Calculate k1 as in VNS paper 273 | def getIndexATC(due, proc, curr, max_arrival, k): 274 | WSPT = 1 / proc 275 | slackness = max(due - proc - curr + max(max_arrival - curr, 0), 0) 276 | MSR = math.exp(-slackness/(k*PT)) 277 | return WSPT*MSR 278 | 279 | def getIndexBATC(dues, proc, curr, max_arrival, k): 280 | BATC = 0 281 | for i in range(len(dues)): 282 | WSPT = 1 / proc 283 | slackness = max(dues[i] - proc - curr + max(max_arrival - curr, 0), 0) 284 | MSR = math.exp(-slackness/(k*PT)) 285 | BATC += WSPT*MSR 286 | return BATC * len(dues) 287 | -------------------------------------------------------------------------------- /env/wrapper.py: -------------------------------------------------------------------------------- 1 | from collections import deque, defaultdict 2 | import sys, copy, enum 3 | import numpy as np 4 | import cv2 5 | 6 | from utils.core.Job import Job 7 | import env.util_sim as util 8 | 9 | printFlag = True 10 | debugFlag = False 11 | 12 | class Location(enum.Enum): 13 | Before = 0 14 | Reenter = 4 15 | Waiting = 1 16 | Resource = 2 17 | Finished = 3 18 | 19 | class ProductCounter(object): 20 | def __init__(self, prod, initial_map): 21 | self.prod = prod 22 | self.sn = 0 23 | self.an = 0 24 | self.info_map = dict() # key is Location, value is List of information in that location 25 | for name, member in Location.__members__.items(): 26 | self.info_map[member]= list() 27 | for map in initial_map: 28 | l, v = map 29 | self.info_map[l].append(v) 30 | self.total = len(initial_map) 31 | def __len__(self): return self.total # Tot Job Plan per Prod, Same index with prod_list 32 | 33 | def count(self, loc): 34 | return len(self.info_map[loc]) 35 | def info(self, loc): 36 | return self.info_map[loc] 37 | 38 | def push(self, loc, info): 39 | self.info_map[loc].append(info) 40 | def pop(self, loc, info): 41 | self.info_map[loc].remove(info) 42 | 43 | class Wrapper(object): 44 | def __init__(self): 45 | 46 | '''Model statics : independent of certain product''' 47 | self.env_name = "SimEnvSim" 48 | self.prod_list = list() 49 | self.StateGroupList = [] # 200307 deprecated, pt will not be grouped for paper, 200428 [] means no group summation 50 | self.MaxProcTime = 0 51 | self.MaxSetupTime = 0 52 | self.ObserveRange = util.UnitTime 53 | 54 | '''Model dynamics''' 55 | self.plan_prod_list = list() # Tot Job Plan per Prod, Same index with prod_list 56 | self.due_prod_list = defaultdict(list) 57 | self._prodTimeCube = [] 58 | 59 | '''Dynamic status''' 60 | self.from_type = None 61 | self.prod_counter = defaultdict(ProductCounter) # dict from F to cnt 62 | #Cumulative status 63 | self.max_decision = 0 64 | self.machine_setup_type = [0] * util.M 65 | 66 | '''State Utility''' 67 | self.action_history = [] 68 | self.state_history = deque() 69 | self.reward_history = [] 70 | self.episode = 0 71 | self.reward_dict = defaultdict(float) 72 | 73 | def SetStateParameters(self, action_dim, auxin_dim, state_dim, bucket, use=[0,1,2,3], auxin_use=[4], 74 | sopt='mat', ropt='targetlag', normopt='total'): 75 | self.action_dim = action_dim 76 | self.auxin_dim = auxin_dim 77 | self.auxin_use = auxin_use 78 | if type(state_dim) is not list: self.state_dim = [state_dim] 79 | else: self.state_dim = state_dim 80 | self.bucket = bucket 81 | self.time_window = bucket if bucket!=0 else sys.maxsize 82 | # self.num_window = 5 83 | self.Hw = 6 84 | self.Hp = 5 85 | self.sopt = sopt 86 | self.ropt = ropt 87 | self.use = use 88 | self.normopt = normopt 89 | 90 | self.policy_mach = 'SPT' 91 | # self.policy_mach = 'FIFO' 92 | self.state_interval_time = 1800 93 | self.state_max_time = 7200 94 | self.use_scaling_factor = True 95 | 96 | # if self.episode % 100 == 0: 97 | # print(self.__dict__) 98 | # print(len(self.prod_list), ) 99 | def set_counter(self, codelist:list, plan:list, printFlag=False): 100 | ''' 101 | ProdCodeQtyList: [Code, Plan] value insert 102 | :param prodQtyList: 103 | :return: 104 | ''' 105 | self.prod_list = codelist 106 | self.plan_prod_list.clear() 107 | self.due_prod_list.clear() 108 | self.prod_counter.clear() 109 | self.curr_time = 0 110 | self.episode += 1 111 | self.state_history.clear() 112 | self.action_history.clear() 113 | self.reward_history.clear() 114 | 115 | #FIXME : Comparement for Whle machine is required 116 | if self.MaxProcTime == 0: # Calculate once when Max processing time is not specified 117 | max_st, max_pt = util.getMaxTime() 118 | self.MaxProcTime = max_st + max_pt 119 | self.MaxSetupTime = max_st 120 | print('Max setup time is', self.MaxSetupTime, self.MaxProcTime) 121 | tuplelist_byprod=defaultdict(list) 122 | for job in plan: 123 | if job.type in self.prod_list: 124 | if job.arrival > 0: 125 | tupleinfo = (Location.Before, job) #dummy before cnter 126 | if util.BEFORE_AS_STOCKER_FLAG: 127 | tuplelist_byprod[job.type].append((Location.Waiting, job)) 128 | else: tupleinfo = (Location.Waiting, job) 129 | tuplelist_byprod[job.type].append(tupleinfo) 130 | self.due_prod_list[job.type].append(job.due) 131 | self.tot_plan_qty = 0 132 | for idx in range(len(self.prod_list)): 133 | prod = self.prod_list[idx] 134 | self.prod_counter[prod]=ProductCounter(prod, initial_map=tuplelist_byprod[prod]) 135 | self.plan_prod_list.append(len(self.prod_counter[prod]))#({prod: len(self.prod_counter[prod])}) 136 | self.tot_plan_qty += len(self.prod_counter[prod]) 137 | 138 | self.from_type = self.get_from_type(plan) 139 | if self.normopt is None: 140 | self.normalization_factor = 1 141 | elif self.normopt == 'total': 142 | # To minimize the loss of information, constant normalization factor is the best 143 | self.normalization_factor = self.tot_plan_qty 144 | else: 145 | self.normalization_factor = self.plan_prod_list[:] 146 | self.max_plan_factor = max(self.plan_prod_list) / util.Horizon 147 | if printFlag: 148 | # print('Loading TotPlanQty of {} products'.format(len(self.prod_list)), self.prod_list) 149 | print('Total is {} set Remained prodCodeLPSTList as '.format(self.tot_plan_qty), self.plan_prod_list, self.due_prod_list) 150 | # printFlag = False 151 | 152 | def update(self, j, loc_from, loc_to): 153 | assert isinstance(j, Job) 154 | counter= self.prod_counter[j.type] 155 | assert isinstance(counter, ProductCounter) 156 | # counter.pop(loc_from, j.__getattribute__(self.loc_to_infostr(loc_from))) 157 | # counter.push(loc_to, j.__getattribute__(self.loc_to_infostr(loc_to))) 158 | counter.pop(loc_from, j) 159 | if loc_to is not None: counter.push(loc_to, j) 160 | def loc_to_infostr(self, loc): 161 | if loc == Location.Finished: 162 | return 'due' 163 | elif loc == Location.Waiting: 164 | return 'arrival' 165 | elif loc == Location.Resource: 166 | return 'mac' 167 | else: 168 | return 'idx' 169 | 170 | def get_count(self, prod, loc): return self.prod_counter[prod].count(loc) 171 | def get_info(self, prod, loc, attr=None): 172 | info_list = self.prod_counter[prod].info(loc) 173 | if attr is None: return info_list 174 | attr_list = [info_list[j].__getattribute__(attr) for j in range(len(info_list))] 175 | return attr_list 176 | 177 | def set_decision_time(self, curr_time): self.curr_time = curr_time 178 | # f3. remaining tim 179 | def getState(self, recent_job, mac_dict, plan=None, filter=None, res_gantt=None): 180 | if recent_job is not None: 181 | mac_dict.update({self.curr_time:recent_job}) 182 | self.from_type = recent_job.type 183 | 184 | state = self._getFamilyBasedState(mac_dict=mac_dict, res_gantt=res_gantt, filter=filter) 185 | 186 | if 7 in self.use: 187 | # 7. setup count 188 | # state.extend([self.prod_counter[prod].sn/24 for prod in self.prod_list]) 189 | # rslt = list(self._get_res_state(res_gantt).reshape(-1)) 190 | # state.extend(rslt) 191 | pass 192 | if 8 in self.use: 193 | # 8. Action history ((F+1) * decision num) 194 | rslt = self._getHistoryVector(10) # self.max_decision-1) 195 | state.extend(rslt) 196 | # print('history', rslt) 197 | 198 | if len(self.state_dim)==1: 199 | auxin = [1] if len(plan) <= util.M else [0] 200 | auxin.extend(self._get_auxin()) 201 | state.extend(auxin) 202 | observation = {'state': state, 'reward': self.reward_dict} 203 | 204 | if self.auxin_dim != 0: 205 | auxin = [1] if len(plan)<=util.M else [0] 206 | auxin.extend(self._get_auxin()) 207 | observation.update({'auxin':auxin}) 208 | return observation 209 | 210 | def get_from_type(self, plan, mac_dict=None): 211 | if self.action_dim == util.F * util.F: return None 212 | machine_type_list = self.get_from_idlist(mac_dict) 213 | if len(machine_type_list)==0: 214 | machine_type_list = self.get_from_idlist() 215 | print('all machine is not idle in this bucket:', machine_type_list) 216 | machine_type_list.sort(key=lambda job_type: util.getIndexGap(job_type, plan, self.machine_setup_type, self.curr_time, 217 | fin_job=self.get_count(job_type, Location.Finished))) 218 | 219 | from_type = machine_type_list[0] 220 | self.from_type = from_type 221 | return from_type 222 | 223 | def set_from_mach(self, mac_dict=None): 224 | self.from_mach = -np.ones((util.F, util.F)) 225 | for from_type in self.prod_list: 226 | if self.policy_mach is None or self.policy_mach == 'RANDOM' or self.policy_mach == 'FIFO': 227 | self.from_mach[from_type] = [self.get_from_mach(from_type, mac_dict)] * util.F 228 | elif self.policy_mach == 'SPT': 229 | self.from_mach[from_type] = self.get_from_mach(from_type, mac_dict) 230 | 231 | def get_from_mach(self, from_type=None, mac_dict=None): 232 | if self.policy_mach is None or self.policy_mach == 'RANDOM': 233 | mach_list = list() 234 | for end_time, job in mac_dict.items(): 235 | if job.type == from_type: mach_list.append(job.mac) 236 | return np.random.choice(mach_list) 237 | elif self.policy_mach == 'FIFO': 238 | for end_time, job in sorted(mac_dict.items()): 239 | if job.type == from_type and end_time-self.curr_time 3: 264 | machine_setup_type.append(self.machine_setup_type[job.mac]) 265 | # print('feasi check', job.type, self.get_count(job.type, Location.Waiting)) 266 | if remain_num == 0: #setup should be occured 267 | prior_setup_type.append(self.machine_setup_type[job.mac]) 268 | elif remain_num == 1 and end_time + job.pt - self.curr_time < self.bucket: 269 | prior_setup_type.append(self.machine_setup_type[job.mac]) 270 | if len(prior_setup_type) != 0: 271 | return list(set(prior_setup_type)) 272 | return list(set(machine_setup_type)) 273 | 274 | def _get_last_action(self, from_action=False, to_action=True, flatten=True): 275 | rslt_action = np.zeros((len(self.prod_list), 2)) 276 | if len(self.action_history) != 0: 277 | last_action_from = self.action_history[-1] // len(self.prod_list) 278 | rslt_action[last_action_from,0] += 1 279 | last_action = self.action_history[-1] % len(self.prod_list) # always history means to-type action 280 | rslt_action[last_action,1] += 1 281 | if from_action and to_action: 282 | pass 283 | elif from_action: 284 | rslt_action = rslt_action[:,0] 285 | elif to_action: 286 | rslt_action = rslt_action[:,1] 287 | else: 288 | pass 289 | if flatten: return list(rslt_action.reshape(-1)) 290 | else: return rslt_action 291 | def _get_auxin(self): 292 | 293 | # 0 ~ 2 : from type related state 294 | if 0 in self.auxin_use: 295 | rslt = self._get_from_state() 296 | else: rslt = [[0]] * 3 #null 2D from-type state 297 | 298 | share_auxin = True if type(self.auxin_dim) is list and len(self.auxin_dim) == 2 else False 299 | 300 | # 3 : last action vector 301 | rslt_action = self._get_last_action(True, True, False) 302 | # rslt_action = self._get_last_action() 303 | if share_auxin: rslt.append([rslt_action] * len(self.prod_list)) 304 | else: rslt.append(rslt_action) 305 | 306 | state = [] 307 | for filter_idx in self.prod_list: 308 | state_by_filter = [] 309 | for i in self.auxin_use: 310 | if i >= 4: continue 311 | elif type(rslt[i][0]) is list or type(rslt[i][0]) is np.ndarray: 312 | state_by_filter.extend(rslt[i][filter_idx]) 313 | else: 314 | state_by_filter.append(rslt[i][filter_idx]) 315 | 316 | if share_auxin: 317 | state.append(state_by_filter) 318 | else: 319 | state.extend(state_by_filter) 320 | 321 | # 4 : last reward 322 | if 4 in self.auxin_use: 323 | last_reward = 0 if len(self.reward_history) == 0 else self.reward_history[-1] 324 | last_reward /= 20 325 | if share_auxin: 326 | for state_by_f in state: 327 | state_by_f.append(last_reward) 328 | else: 329 | state.append(last_reward) 330 | return state 331 | def _get_from_state(self, mac_dict=None): #machine or from-type related state 332 | rslt = [] 333 | 334 | # 4. setup Time (S * 1) 335 | rslt4 = self.normalization_maxabs(self._getSetupTimeVector(), self.MaxSetupTime) 336 | 337 | # 5. Setup State (S * 1, one hot encoding) 338 | if mac_dict is not None: 339 | rslt5 = self._get_last_action(True, True, flatten=False) 340 | else: 341 | rslt5 = self._getSetupTypeVector() 342 | 343 | # 6. Processing Time (S * 1) 344 | rslt6 = self.normalization_maxabs(self._getProcTimeVector(), self.MaxSetupTime) 345 | 346 | rslt.append(rslt4) 347 | rslt.append(rslt5) 348 | rslt.append(rslt6) 349 | return rslt 350 | def _getFamilyBasedState(self, mac_dict, res_gantt=None, filter=None): 351 | """ 352 | get FBS in the paper 353 | 1. Calculate state features using various methods contained in Wrapper() class. 354 | 2. Generate list of the state vectors named 'rslt' 355 | 3. Stack rslt as 2-D matrix based on t he family type """ 356 | 357 | # 0. Processd Job Count( F * 1) 358 | rslt0 = self._getTrackOutCnt() 359 | 360 | # 1. Processing Job Count (F * 5), Discretized by remaining time. 361 | # [[20%], [40%], [60%], [80%], [80%+], 362 | # [20%], [40%], [60%], [80%], [80%+] ... ] 363 | 364 | if self.sopt == 'time': 365 | """ S_p (paper) """ 366 | rslt1 = self._getProcessingCnt(mac_dict) 367 | elif self.sopt == 'proc': 368 | #. This is experimental states (Processing job attributes) 369 | rslt1 = self._get_proc_info(mac_dict) 370 | else: 371 | rslt1 = self._getProcessingCnt(mac_dict) 372 | rslt1 = self.normalization_maxabs(rslt1, util.M) 373 | rslt1 = np.concatenate((rslt1, self._get_proc_info(mac_dict)), axis=1) 374 | 375 | # 2. Waiting job (N_F * 2H_W) 376 | # rslt2 = self._getWaitingCnt() #. This is experimental states (based on observation time range) 377 | """ S_w (Paper) """ 378 | rslt2 = self._getTimeCnt(Location.Waiting, job_info=True) 379 | 380 | # 3. Before entrance (F * 4), This is experimental (when ready time is not zero) 381 | rslt3 = self._getEnterCnt() 382 | 383 | rslt0 = self.normalization_maxabs(rslt0, self.normalization_factor) 384 | rslt2 = self.normalization_maxabs(rslt2, self.normalization_factor) 385 | rslt3 = self.normalization_maxabs(rslt3, self.normalization_factor) 386 | 387 | rslt = [] 388 | rslt.append(rslt0) 389 | rslt.append(rslt1) 390 | rslt.append(rslt2) 391 | rslt.append(rslt3) 392 | rslt.extend(self._get_from_state(mac_dict=mac_dict)) 393 | rslt7 = self._get_res_gantt(res_gantt) 394 | # rslt7 = self._get_res_state(res_gantt) 395 | rslt.append(rslt7) 396 | #-------------------------------------------- 397 | 398 | return self._stackFamily(rslt, filter) 399 | 400 | def _stackFamily(self, rslt, filter): 401 | """ 402 | stack rslt list to the 2-D state 403 | Two function of this method 404 | 1. filter (list of product code or product family name) 405 | As default, filter function is not used. 406 | 2. feature usage 407 | Developers can omit some state features by controlling self.use (specified by args.use) 408 | """ 409 | 410 | state = [] 411 | filter_idx_list = [] 412 | # FIXME : Is it possible to generate only state vector included in filter?? (Short time, Short code) 413 | if filter is None: 414 | filter_idx_list = list(range(len(self.prod_list))) 415 | else: 416 | for code in filter: 417 | if code in self.prod_list: 418 | filter_idx_list.append(self.prod_list.index(code)) 419 | else: 420 | filter_idx_list.append(code) 421 | for filter_idx in filter_idx_list: 422 | state_by_filter = [] 423 | for i in self.use: 424 | if i >= 8: continue 425 | 426 | if type(rslt[i][0]) is list or type(rslt[i][0]) is np.ndarray: 427 | state_by_filter.extend(rslt[i][filter_idx]) 428 | else: 429 | state_by_filter.append(rslt[i][filter_idx]) 430 | 431 | if len(self.state_dim)==2: 432 | # print(filter_idx, state_by_filter) 433 | state.append(state_by_filter) 434 | else: state.extend(state_by_filter) 435 | return state 436 | 437 | def addHistory(self, last_action, last_reward): 438 | self.action_history.append(last_action) 439 | self.reward_history.append(last_reward) 440 | 441 | def _getTimeCnt(self, loc, job_info = False, mac_dict = None): 442 | """ 443 | S_w (paper): Count jobs with respects to their due-date as in the paper 444 | H_w (paper) : Hyperparemeters that specify the # of time window to consider 445 | """ 446 | if loc == Location.Finished: num_window=2 #if fin2 mode 447 | elif loc == Location.Before: num_window= self.Hw 448 | else: num_window=self.Hw*2 449 | if job_info: num_window += 4 450 | rslt = np.zeros((len(self.prod_list), num_window)) 451 | #F * Loc_num * Window_num 452 | if mac_dict is not None: 453 | for end_time, job in mac_dict.items(): 454 | slack = job.due-end_time 455 | rslt[job.type][self.time_to_window(slack)]+=1 456 | else: 457 | for prod in self.prod_list: 458 | if loc == Location.Waiting: 459 | due_list = self.get_info(prod, loc, 'due') 460 | pt_list = self.get_info(prod, loc, 'pt') 461 | for due, pt in zip(due_list,pt_list): 462 | slack = due - self.curr_time #- pt#util.PT 463 | rslt[prod][self.time_to_window(slack)]+=1 464 | if job_info: 465 | idx = -4 466 | for due in sorted(due_list): 467 | slack = ((due - self.curr_time) / self.time_window) / (self.Hw) * 2 468 | rslt[prod][idx] = min(1, max(0, slack + 0.5)) 469 | idx += 1 470 | if idx == 0: break 471 | 472 | elif loc == Location.Finished: 473 | late_list = self.get_info(prod, loc, 'late') 474 | for late in late_list: 475 | if num_window==2: rslt[prod][0 if late<0 else 1] += 1 476 | else: rslt[prod][self.time_to_window(-late)] += 1 477 | elif loc == Location.Before: 478 | arr_list = self.get_info(prod, loc, 'arrival') 479 | for arr in arr_list: 480 | time_till_arrive = arr - self.curr_time #should always positive number 481 | if time_till_arrive<=0: print('ERROR! Before Location should be updated') 482 | rslt[prod][self.time_to_window(time_till_arrive)-num_window] += 1 483 | 484 | return rslt 485 | 486 | def time_to_window(self, time): 487 | w = int(np.floor(time / self.time_window)) 488 | if 'target' in self.ropt: w += 1 489 | else: w += 1 #self.num_window+1 490 | if w > (self.Hw-1)*2: return self.Hw*2-1 # max value 491 | elif w <= 0: return 0 492 | else: return w 493 | 494 | 495 | def _calTardiness(self, loc, job): 496 | assert isinstance(job, Job) 497 | if loc == Location.Resource: 498 | job.mac 499 | self.curr_time 500 | 501 | def _getTrackOutCnt(self): 502 | """ # of finished jobs, third column of S_h (paper) """ 503 | if self.sopt == 'time': return self._getTimeCnt(Location.Finished) 504 | rslt = [] 505 | for type in self.prod_list: 506 | rslt.append(self.get_count(type, Location.Finished)) 507 | return rslt 508 | 509 | def _get_proc_info(self, mac_dict : dict): 510 | p = len(self.prod_list) 511 | n = 4 512 | l = 3+1 # info num: slack, rem, setup, proc 513 | rslt = np.zeros((p, n*l)) 514 | prod_cnt = [0] * p 515 | for end_time, job in sorted(mac_dict.items()): 516 | temp_cnt = prod_cnt[job.type] 517 | if temp_cnt >= n: continue 518 | prod_cnt[job.type]+=1 519 | remain = (end_time - self.curr_time) / self.MaxProcTime 520 | rslt[job.type][temp_cnt*l] = remain 521 | if job.idx < 0: continue 522 | slack = ((end_time - job.due) / self.time_window) / (self.Hw)*2 523 | rslt[job.type][temp_cnt*l+1] = min(1,max(0,slack+0.5)) 524 | rslt[job.type][temp_cnt*l+2] = job.st / self.MaxProcTime 525 | rslt[job.type][temp_cnt*l+3] = job.pt / self.MaxProcTime 526 | 527 | return rslt 528 | 529 | 530 | def _getProcessingCnt(self, mac_dict : dict): 531 | """ 532 | S_p (paper) : processing job count (N_F * H_p) 533 | H_p (paper) : self.Hp, # of time window to consider 534 | """ 535 | # rslt = [[0] * lvl for _ in range(len(self.prod_list))] 536 | rslt = np.zeros((len(self.prod_list), self.Hp)) 537 | for end_time, job in mac_dict.items(): 538 | remain = end_time - self.curr_time 539 | # remain_lvl = int(np.floor(lvl * remain / self.MaxProcTime)) 540 | remain_lvl = int(np.floor(remain / self.time_window)) 541 | if remain_lvl >= self.Hp: remain_lvl = self.Hp-1 542 | rslt[job.type][remain_lvl] += 1 543 | # if remain > job.pt: #setup going-on state 544 | # rslt[job.type][lvl] = util.M #util.M is right? 545 | # if self.sopt == 'time': 546 | # tc= self._getTimeCnt(Location.Resource, mac_dict=mac_dict) 547 | # rslt = np.concatenate((rslt, tc), axis=1) 548 | # # return tc 549 | return rslt 550 | 551 | def _get_res_gantt(self, res_gantt : dict): 552 | """ S_h (paper) : Machine history of each family """ 553 | res_state = np.zeros((util.F, 3)) 554 | for res_id, res_history in res_gantt.items(): 555 | if len(res_history) > 0: 556 | for his in res_history: 557 | res_state[his.job_type, 1] += his.time_setup 558 | res_state[his.job_type, 2] += his.time_proc 559 | his = res_history[-1] 560 | til_setup = his.decision_time + his.time_setup - self.curr_time 561 | til_proc = til_setup + his.time_proc 562 | if til_setup > 0: 563 | res_state[his.job_type, 0] = 1 # setup going on 564 | res_state[his.job_type, 1] -= til_setup 565 | if til_proc: res_state[his.job_type, 2] -= til_proc 566 | 567 | max_time = self.curr_time * util.M if self.curr_time != 0 else 1 568 | res_state[:, 1] /= max_time 569 | res_state[:, 2] /= max_time 570 | return res_state 571 | def _get_res_state(self, res_gantt : dict): 572 | """ 0514. experimental features 573 | 1. setup going-on 574 | 2. setup history 575 | 3. pass job history (proc) 576 | """ 577 | 578 | res_state = np.zeros((util.M, 3)) 579 | for res_id, res_history in res_gantt.items(): 580 | proc, setup, idle = 0, 0, 0 581 | if len(res_history) > 0: 582 | for his in res_history: 583 | proc += his.time_proc 584 | setup += his.time_setup 585 | his = res_history[-1] 586 | til_setup = his.decision_time + his.time_setup - self.curr_time 587 | til_proc = til_setup + his.time_proc 588 | if til_setup > 0: 589 | res_state[res_id, 0] = 1 # setup going on 590 | setup -= til_setup 591 | if til_proc: proc -= til_proc 592 | 593 | res_state[res_id, 1] = setup / self.curr_time 594 | res_state[res_id, 2] = proc / self.curr_time 595 | return res_state 596 | 597 | 598 | def normalization_maxabs(self, rslt:list, factor=None): 599 | if debugFlag: return rslt 600 | is_list = (type(rslt[0]) == list) or (type(rslt[0]) == np.ndarray) 601 | # print(is_list, type(rslt[0])) 602 | if type(factor) is list: factor = factor[:] 603 | 604 | if factor is None: #default minmax 605 | factor = max(rslt) 606 | for prod_idx in range(len(rslt)): 607 | temp = rslt[prod_idx] 608 | temp_factor = factor 609 | if type(factor) is list: temp_factor = factor[prod_idx] 610 | if temp_factor == 0: temp_factor = max(max(temp),1) if is_list else max(temp,1) # better smoothing than 0.0001 611 | if is_list: 612 | temp_rslt = np.array(temp) / temp_factor 613 | rslt[prod_idx] = list(temp_rslt) 614 | elif type(rslt[0])==np.ndarray: 615 | rslt[prod_idx] = temp / temp_factor 616 | else: 617 | rslt[prod_idx] = float( temp / temp_factor ) 618 | 619 | return rslt 620 | 621 | def fab2018(self, mac_dict): 622 | """state of TPDQN in V-C. Performance Comparisons in the paper""" 623 | rslt1 = np.zeros((util.M, util.F)) 624 | # 1. res setup type M * F (one-hot) 625 | for _, job in mac_dict.items(): 626 | rslt1[job.mac][job.type] = 1 627 | rslt2 = np.zeros((util.N, util.F)) 628 | rslt3 = np.zeros((util.N, 3)) 629 | # 2. job setup type N * F (one-hot) 630 | # 3. job location N * loc (one-hot) 631 | 632 | rslt4 = np.zeros(util.F) 633 | # 4. normalzied deviation of set due date for current operation (here, family) 634 | for prod in self.prod_list: 635 | for loc in [Location.Waiting, Location.Resource, Location.Finished]: 636 | job_list = self.get_info(prod, loc) 637 | if loc == Location.Waiting: 638 | set_due = np.array(self.get_info(prod,loc,attr='due')) 639 | if len(set_due)==0: rslt4[prod]=0 640 | else: rslt4[prod] = np.std(set_due) / util.UnitTime 641 | for j in job_list: 642 | if j.idx >= 0: 643 | rslt2[int(j.idx)][prod] = 1 644 | rslt3[int(j.idx)][loc.value-1] = 1 645 | state = [] 646 | state.extend(rslt1.reshape(-1)) 647 | state.extend(rslt2.reshape(-1)) 648 | state.extend(rslt3.reshape(-1)) 649 | state.extend(rslt4.reshape(-1)) 650 | return state 651 | 652 | def upm2007(self, res_gantt): 653 | """state of LBF-Q in V-C. Performance Comparisons in the paper""" 654 | state = list() 655 | rslt1 = np.zeros((util.N, 2)) 656 | loc_idx_dict = {Location.Waiting:1, Location.Finished:-1, Location.Resource:0} 657 | max_due = 0 658 | for prod in self.prod_list: 659 | for loc in [Location.Waiting, Location.Resource, Location.Finished]: 660 | job_list = self.get_info(prod, loc) 661 | for j in job_list: 662 | if j.idx >= 0: 663 | rslt1[int(j.idx)][0] = j.due-self.curr_time #ej 664 | rslt1[int(j.idx)][1] = loc_idx_dict[loc] #qj 665 | 666 | if j.due > max_due: max_due = j.due 667 | rslt1[:,0] /= max_due 668 | rslt2 = np.zeros((util.M, 3)) 669 | b = self.curr_time #latest release time in original paper 670 | res_state = np.zeros((util.F, 3)) 671 | for res_id, res_history in res_gantt.items(): 672 | lst = 0 673 | recent_job = -1 674 | proc_job = -1 675 | if len(res_history) > 0: 676 | for i in range(1,len(res_history)+1): 677 | his = res_history[-i] 678 | if his.time_setup > 0: 679 | lst = self.curr_time-his.decision_time 680 | break 681 | his = res_history[-1] 682 | if his.decision_time + his.time_setup + his.time_proc > self.curr_time: 683 | proc_job = his.job_id 684 | if len(res_history) > 1: 685 | recent_job = res_history[-2].job_id 686 | else: 687 | recent_job = his.job_id 688 | rslt2[res_id][0] = (recent_job+1) / util.N # T0i latest job processed 689 | rslt2[res_id][1] = (proc_job+1) / util.N # Ti job processing 690 | if proc_job==-1: lst = 0 691 | rslt2[res_id][2] = lst / self.MaxProcTime # ti time from latest setup start 692 | state.extend(rslt1.reshape(-1)) 693 | state.extend(rslt2.reshape(-1)) 694 | return state 695 | 696 | def upm2012(self, mac_dict, from_type): 697 | state = [] 698 | proc_dict = defaultdict(list) 699 | mean_pt = [0] * util.F 700 | # f1. waiting jobs, pow(2, -1/NJ) 701 | for prod in self.prod_list: 702 | nj = self.get_count(prod, Location.Waiting) 703 | v = 0 if nj==0 else pow(2, -1/nj) 704 | state.append(v) 705 | for mach in range(util.M): 706 | proc_dict[prod].append(util.getProcessingTime(mach, prod)+util.getSetupTime(mach, from_type, prod)) 707 | mean_pt[prod] = float(np.mean(proc_dict[prod]) / util.M) 708 | """ 709 | util.M features 710 | f2. prod type 711 | f3. average proc 712 | f4. average slack 713 | """ 714 | temp = np.zeros((util.M, 3)) 715 | for end_time, job in mac_dict.items(): 716 | temp[job.mac][0] = float((job.type+1) / util.F) 717 | temp[job.mac][1] = float((end_time-self.curr_time)/mean_pt[job.type]) 718 | temp[job.mac][2] = float((job.due-self.curr_time)/mean_pt[job.type]) 719 | state.extend(temp.reshape(-1)) 720 | """ 721 | util.F features 722 | f5. min tightness 723 | f6. max tightness 724 | f7. average tightness 725 | f8. time interval number of tightness 726 | """ 727 | temp = np.zeros((util.F, 3 + 4)) 728 | for prod in self.prod_list: 729 | due_list = self.get_info(prod, Location.Waiting, 'due') 730 | if len(due_list) == 0: continue 731 | temp[prod][0] = float((np.min(due_list)-self.curr_time)/mean_pt[prod]) 732 | temp[prod][1] = float((np.max(due_list)-self.curr_time)/mean_pt[prod]) 733 | temp[prod][2] = float((np.average(due_list)-self.curr_time)/mean_pt[prod]) 734 | g_cnt = [0] * 4 735 | for due in due_list: 736 | if due-self.curr_time > np.max(proc_dict[prod]): g_cnt[0] += 1 737 | elif due-self.curr_time > np.min(proc_dict[prod]): g_cnt[1] += 1 738 | elif due-self.curr_time > 0: g_cnt[2] += 1 739 | else: g_cnt[3] += 1 740 | for g in range(4): 741 | temp[prod][3+g] = 0 if g_cnt[g] == 0 else pow(2, -1/g_cnt[g]) 742 | state.extend(temp.reshape(-1)) 743 | 744 | return state 745 | 746 | """ From here, methods related to experimental state features""" 747 | def _getWaitingCnt(self): 748 | """ 749 | S_w (experimental) : waiting job count ( dimension: F * 3) 750 | 751 | """ 752 | rslt = [[0, 0, 0] for _ in range(len(self.prod_list))] 753 | 754 | for type in range(len(self.prod_list)): 755 | temp_lpst_list = self.get_info(type, Location.Waiting, 'due') 756 | for lpst in temp_lpst_list: 757 | if lpst <= self.curr_time: #already late job 758 | rslt[type][0] += 1 759 | elif lpst <= self.curr_time + self.ObserveRange: 760 | rslt[type][1] += 1 761 | else: # future job more than observ range 762 | rslt[type][2] += 1 763 | pass 764 | 765 | return rslt 766 | 767 | def _getEnterCnt(self): 768 | """Given distribution of ready time/reentrant time to State vector""" 769 | """relative arrival time based""" 770 | if 'time' in self.sopt: return self._getTimeCnt(Location.Before) 771 | 772 | """absolute due time based""" 773 | rslt = np.zeros((len(self.prod_list), 3)) 774 | for type in range(len(self.prod_list)): 775 | temp_lpst_list = self.get_info(type, Location.Before, 'arrival') 776 | for lpst in temp_lpst_list: 777 | if lpst <= self.curr_time: #already late 778 | rslt[type][0] += 1 779 | elif lpst <= self.curr_time + self.ObserveRange: 780 | rslt[type][1] += 1 781 | else: # future job more than observ range 782 | rslt[type][2] += 1 783 | pass 784 | return rslt 785 | 786 | def _getProcTimeVector(self): 787 | state = [] 788 | if self.from_type is None: 789 | for to_type in self.prod_list: 790 | temp_list = list() 791 | for from_type in self.prod_list: 792 | if self.policy_mach == 'RANDOM': 793 | from_mach_list = [i for i, x in enumerate(self.machine_setup_type) if x == from_type] 794 | temp_tot = 0 795 | temp_cnt = 0 796 | for from_mach in from_mach_list: 797 | temp = float(util.getProcessingTime(from_mach, to_type)) 798 | if temp < 0: temp = 0 799 | temp_tot += temp 800 | temp_cnt += 1 801 | temp_list.append(0 if temp_cnt == 0 else float(temp_tot / temp_cnt)) 802 | else: # machine was dispatched(determined) by set_from_mach 803 | from_mach = self.from_mach[from_type][to_type] 804 | temp_list.append(0 if from_mach == -1 else float(util.getProcessingTime(from_mach, to_type))) 805 | state.append(temp_list) 806 | return state 807 | 808 | from_mach_list = self.from_mach[self.from_type] if self.from_type is not None else [0] 809 | for to_type in self.prod_list: 810 | temp_tot, temp_cnt = 0, 0 811 | for mach in from_mach_list: 812 | temp_tot += util.getProcessingTime(mach, to_type) 813 | temp_cnt += 1 814 | state.append([float(temp_tot/temp_cnt)]) 815 | return state 816 | def _getSetupTimeVector(self): 817 | state = [] 818 | 819 | if self.from_type is None: 820 | for to_type in self.prod_list: 821 | temp_list = list() 822 | for from_type in self.prod_list: 823 | if self.policy_mach == 'RANDOM': 824 | from_mach_list = [i for i, x in enumerate(self.machine_setup_type) if x == from_type] 825 | temp_tot = 0 826 | temp_cnt = 0 827 | for from_mach in from_mach_list: 828 | temp = float(util.getSetupTime(from_mach, from_type, to_type)) 829 | if temp < 0: temp = 0 830 | temp_tot += temp 831 | temp_cnt += 1 832 | temp_list.append(self.MaxSetupTime if temp_cnt == 0 else float(temp_tot / temp_cnt)) 833 | else: # machine was dispatched(determined) by set_from_mach 834 | from_mach = self.from_mach[from_type][to_type] 835 | temp_list.append(self.MaxSetupTime if from_mach == -1 else float(util.getSetupTime(from_mach, from_type, to_type))) 836 | # temp_list.append(0 if from_mach == -1 else float(util.getSetupTime(from_mach, from_type, to_type))) 837 | state.append(temp_list) 838 | return state 839 | state = [] 840 | 841 | for to_type in self.prod_list: 842 | temp_tot = 0 843 | temp_cnt = 0 844 | for from_type in [self.from_type]: 845 | for from_mach in self.from_mach[from_type] : 846 | temp = float(util.getSetupTime(from_mach, from_type, to_type)) 847 | if temp < 0: temp = 0 848 | temp_tot += temp 849 | temp_cnt += 1 850 | state.append([float(temp_tot / temp_cnt)]) 851 | return state 852 | def _getSetupTypeVector(self): 853 | if self.from_type is None: # from_type is decided by agent 854 | rslt_macst = np.diag([1]*len(self.prod_list)) 855 | else: 856 | # 4. Setup State (S * 1, one hot encoding) 857 | mac_setup = self.from_type 858 | rslt_macst = [0] * len(self.prod_list) 859 | rslt_macst[mac_setup] += 1 860 | return self.normalization_maxabs(rslt_macst, 1) 861 | 862 | def _getHistoryVector(self, window_size=1): 863 | # vector_size = 1+len(self.prod_list) 864 | vector_size = 1+self.action_dim 865 | his_vector = [0] * (window_size * vector_size) 866 | his = self.action_history 867 | for i in range(window_size): 868 | if i>=len(his): 869 | his_vector[vector_size * i + vector_size - 1] += 1 870 | continue 871 | codeIdx = his[-(i+1)] 872 | his_vector[vector_size*i+codeIdx] += 1 873 | return his_vector -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import argparse, datetime, numpy, os, sys, csv, random 2 | import tensorflow as tf 3 | from agent.trainer import Trainer 4 | from env.simul_pms import PMSSim 5 | from test import test_online, test_procedure, call_config_list 6 | from config import * 7 | from utils.util import * 8 | import pandas as pd 9 | 10 | class PerformanceRecord(object): 11 | def __init__(self, save_dir='C:/results', filename='performance', format = 'csv'): 12 | self.save_dir = save_dir 13 | self.filename = filename+'.'+format 14 | self.log_columns = ['Episode', 'idx', 'Util', 'Reward', 'cQ', 'real_cV', 'Lot Choice', 'Setup', 'Setup Time', 15 | 'Makespan', 'Time', 'Loss', 'Satisfaction Rate'] 16 | # self.exp_key = vars(args)['key'] 17 | with open(os.path.join(self.save_dir, self.filename), mode='a', newline='\n') as f: 18 | csv.writer(f).writerow(vars(args)) 19 | csv.writer(f).writerow(vars(args).values()) 20 | f_writer = csv.DictWriter(f, fieldnames=self.log_columns) 21 | temp_dict = dict() 22 | for column in self.log_columns: 23 | temp_dict.update({column: column}) 24 | f_writer.writerow(temp_dict) 25 | f.close() 26 | self.KPIs = [] 27 | self.best = None 28 | self.current_episode=0 29 | self.best_episodes=[] 30 | self.best_model_idx_valid = 0 31 | self.best_dict=dict() 32 | def update_best(self, key, value): 33 | if key not in self.best_dict: 34 | self.best_dict[key]=value 35 | elif self.best_dict[key] < value: 36 | self.best_dict[key] = value 37 | if key == 'single': 38 | self.best_model_idx_valid = int(self.current_episode) // args.save_freq - 1 39 | else: 40 | pass 41 | def get_best(self, key, listFlag=True): 42 | if listFlag: # key:valid -> get average of every best results on validation problems 'valid##' 43 | best_list = [self.best_dict[k] for k in self.best_dict.keys() if key in k] 44 | return sum(best_list) / len(best_list) 45 | else: 46 | return self.best_dict[key] 47 | 48 | def write(self, performance, stat, reverse=True): 49 | with open(os.path.join(self.save_dir, self.filename), mode='a', newline='\n') as f: 50 | f_writer = csv.DictWriter(f, fieldnames=self.log_columns) 51 | temp_dict = dict() 52 | for i, column in enumerate(self.log_columns): 53 | temp_dict.update({column: performance[i]}) 54 | f_writer.writerow(temp_dict) 55 | f.close() 56 | # 0: epi, 1:dataid, 2:util, 3:cR, 4:cQ, 5:cV, 6:total_tardiness 57 | # 7:decisions, 8:setupnum, 9:setup time, 10:makespan, 11: elapsed_time, 12:L_avg 58 | kpi = float(performance[6]) 59 | self.current_episode = performance[0] 60 | if stat: self.KPIs.append(kpi) 61 | if self.best is None or (kpi>self.best if reverse else kpi=100: agentObj.SetEpisode(args.max_episode-episode/2) #make epsilon 142 | ST_TIME = datetime.datetime.now() 143 | if idx >= 300: 144 | env.set_random_seed(idx) 145 | else: 146 | # if (episode-1) % args.chg_freq==0: 147 | # env = random.Random().choice(envlist) 148 | # env.set_random_seed(random.Random().randint(0,300)) 149 | # env.set_random_seed(random.Random().randint(0,100)) 150 | env.set_random_seed(episode+500) 151 | # if agentObj.eps == 0: env.set_random_seed(random.Random().randint(0,300)) 152 | # env.set_random_seed((episode//3) % 300) 153 | # if args.max_episode-episode<1000: 154 | # env = test_env 155 | env.reset() 156 | done = False 157 | observe = env.observe(args.oopt) 158 | # run experiment 159 | while not done: 160 | pre_observe, action, curr_time = agentObj.get_action(observe) 161 | act_vec = np.zeros([1, args.action_dim]) 162 | act_vec[0, action] = 1 163 | # interact with environment 164 | if args.bucket == 0: 165 | observe, reward, done = env.step(action) 166 | agentObj.remember(pre_observe, act_vec, observe, reward, done) 167 | else: 168 | observe, reward, done = env.step_bucket(action) 169 | # if env.wall_time.curr_bucket <= 1: 170 | # agentObj.remember_record(pre_observe, act_vec, reward, done) 171 | # else: 172 | agentObj.remember(pre_observe, act_vec, observe, reward, done) 173 | 174 | elapsed_time = (datetime.datetime.now() - ST_TIME).total_seconds() 175 | performance = get_performance(episode, agentObj, env, elapsed_time) 176 | bestFlag = sr.write(performance, stat=True if episode > args.max_episode * 0.9 else False) 177 | 178 | exp_idx = 0 179 | if episode % args.save_freq == 0: 180 | model_dir = '{}/models/{}_{}_{:07d}/'.format(args.save_dir, str(idx), str(exp_idx), episode) 181 | save(sess, model_dir, saver) 182 | agentObj.record.fileWrite(episode) 183 | perform_summary = tf.Summary() 184 | perform_summary.value.add(simple_value=sr.best, node_name="reward/train_bestR",tag="reward/train_bestR") 185 | if len(valid_seed_list) == 0: # single test 186 | performance = test_online(agentObj=agentObj, env=env, episode=episode, showFlag=True) 187 | reward_test = float(performance[6]) 188 | sr.update_best('single', reward_test) 189 | perform_summary.value.add(simple_value=reward_test, node_name="reward/test", tag="reward/test") 190 | if episode == args.max_episode: 191 | sr.writeSummary('{:2f},{:2f}'.format(reward_test,sr.get_best('single', listFlag=False))) 192 | else: # normal validation 193 | # if last save_freq uses single_env strategy 194 | # if episode == args.max_episode: 195 | if idx == -1: 196 | performance = test_online(agentObj=agentObj, env=env, episode=episode, showFlag=True) 197 | reward_test = float(performance[6]) 198 | perform_summary.value.add(simple_value=reward_test, node_name="reward/test", tag="reward/test") 199 | env = valid_env 200 | reward_avg = list() 201 | for valid_seed in valid_seed_list: 202 | env.set_random_seed(valid_seed) 203 | reward_valid = test_online(agentObj=agentObj, env=env, episode=episode, showFlag=False) 204 | reward_avg.append(reward_valid) 205 | sr.update_best('valid{}'.format(valid_seed), reward_valid) 206 | if episode + args.save_freq * 10 >= args.max_episode: sr.update_best( 207 | '10last{}'.format(valid_seed), reward_valid) 208 | if (episode * 10) % args.max_episode == 0: sr.update_best('10sample{}'.format(valid_seed), 209 | reward_valid) 210 | reward_avg = sum(reward_avg) / len(reward_avg) 211 | sr.update_best('single', reward_avg) 212 | best_avg = sr.get_best('valid') 213 | print('Validation result : ', reward_avg) 214 | perform_summary.value.add(simple_value=reward_avg, node_name="reward/valid_avgR", tag="reward/valid_avgR") 215 | perform_summary.value.add(simple_value=best_avg, node_name="reward/valid_bestR", tag="reward/valid_bestR") 216 | 217 | if agentObj.getSummary(): agentObj.getSummary().add_summary(perform_summary, episode) 218 | print('Best Validation result : ', sr.best_dict) 219 | if bestFlag and agentObj.reward_total>181: # FIXME : Change Time 220 | model_dir = '{}/best_models/{}_{}_{:07d}/'.format(args.save_dir, str(idx), str(exp_idx), episode) 221 | save(sess, model_dir, saver) 222 | if episode % args.save_freq != 0: agentObj.record.fileWrite(episode) 223 | 224 | if agentObj.getSummary() and args.is_train: agentObj.writeSummary() 225 | 226 | sess.close() 227 | 228 | tf.reset_default_graph() 229 | total_training_time = datetime.datetime.now() - FIRST_ST_TIME 230 | tt_hour = (total_training_time.days * 86400 + total_training_time.seconds) / 3600 231 | test_multi_rslt = test_procedure(tf_config=tf_config, best_model_idx=sr.best_model_idx_valid) 232 | sr.writeSummary('{:2f},{:2f},{:2f},{:2f},{:2f},{:2f},{}'. 233 | format(reward_avg, sr.get_best('single', listFlag=False), sr.get_best('10last'), 234 | sr.get_best('10sample'), best_avg, tt_hour, test_multi_rslt)) 235 | print("Total elapsed time: {}\t hour: {} sec ".format(MAX_EPISODE, total_training_time)) 236 | 237 | if __name__ == "__main__": 238 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333, allow_growth=True) 239 | config = tf.ConfigProto(device_count={'GPU': 0}, gpu_options=gpu_options) 240 | if args.is_train: 241 | # for i in range(-1, 0): 242 | # for i in range(args.repeat): 243 | # for i in range(324, 325): 244 | # with tf.device('/cpu:0'): 245 | # args.did=9 246 | # args.bucket = 5400 247 | # args.save_freq = int(20 * (args.bucket / 5400)) * 4 248 | # args.max_episode = 100 * args.save_freq 249 | train(args.eid, config) 250 | else: 251 | from test import test_model_singleprocesser 252 | test_model_singleprocesser(1, config) -------------------------------------------------------------------------------- /model/__init__.py: -------------------------------------------------------------------------------- 1 | from .dqn import PDQN, BaseNetwork -------------------------------------------------------------------------------- /model/a2c.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from utils import * 4 | from config import * 5 | from model.nn_ops import * 6 | import sys 7 | 8 | class CriticAgent(object): 9 | def __init__(self, sess, 10 | input_dim, 11 | hid_dims=args.hid_dims, output_dim=1, 12 | eps=args.entropy_eps, act_fn=leaky_relu, 13 | optimizer=tf.train.AdamOptimizer, 14 | scope='critic_agent'): 15 | 16 | self.sess = sess 17 | self.scope = scope 18 | 19 | self.eps = eps 20 | self.act_fn = act_fn 21 | self.optimizer = optimizer 22 | 23 | self.input_dim = input_dim 24 | self.hid_dims = hid_dims 25 | self.output_dim = output_dim 26 | 27 | # input dimension: [batch_size, num_workers + 1] 28 | self.inputs = tf.placeholder(tf.float32, [None, self.input_dim]) 29 | 30 | # initialize nn parameters 31 | self.weights, self.bias = self.nn_init( 32 | self.input_dim, self.hid_dims, self.output_dim) 33 | 34 | # actor network 35 | self.values = self.critic_network( 36 | self.inputs, self.weights, self.bias) 37 | 38 | # groundtruth for training 39 | self.actual_values = tf.placeholder(tf.float32, [None, 1]) 40 | 41 | # define loss 42 | self.loss = tf.reduce_sum(tf.square(self.actual_values - self.values)) 43 | 44 | # get training parameters 45 | self.params = tf.get_collection( 46 | tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.scope) 47 | 48 | # operations for setting parameters 49 | self.input_params, self.set_params_op = \ 50 | self.define_params_op() 51 | 52 | # actor gradients 53 | self.critic_gradients = tf.gradients(self.loss, self.params) 54 | 55 | # adaptive learning rate 56 | self.lr_rate = tf.placeholder(tf.float32, shape=[]) 57 | 58 | # actor optimizer 59 | self.critic_opt = self.optimizer(self.lr_rate).minimize(self.loss) 60 | 61 | # apply gradient directly to update parameters 62 | self.apply_grads = self.optimizer(self.lr_rate).\ 63 | apply_gradients(zip(self.critic_gradients, self.params)) 64 | 65 | def nn_init(self, input_dim, hid_dims, output_dim): 66 | weights = [] 67 | bias = [] 68 | 69 | curr_in_dim = input_dim 70 | 71 | # hidden layers 72 | for hid_dim in hid_dims: 73 | weights.append( 74 | glorot([curr_in_dim, hid_dim], scope=self.scope)) 75 | bias.append( 76 | zeros([hid_dim], scope=self.scope)) 77 | curr_in_dim = hid_dim 78 | 79 | # output layer 80 | weights.append(glorot([curr_in_dim, output_dim], scope=self.scope)) 81 | bias.append(zeros([output_dim], scope=self.scope)) 82 | 83 | return weights, bias 84 | 85 | def critic_network(self, inputs, weights, bias): 86 | 87 | # non-linear feed forward 88 | x = inputs 89 | 90 | for (w, b) in zip(weights[:-1], bias[:-1]): 91 | x = tf.matmul(x, w) 92 | x += b 93 | x = self.act_fn(x) 94 | 95 | # final linear output layer 96 | x = tf.matmul(x, weights[-1]) 97 | x += bias[-1] 98 | 99 | return x 100 | 101 | def apply_gradients(self, gradients, lr_rate): 102 | self.sess.run(self.apply_grads, feed_dict={ 103 | i: d for i, d in zip( 104 | self.critic_gradients + [self.lr_rate], 105 | gradients + [lr_rate]) 106 | }) 107 | 108 | def define_params_op(self): 109 | # define operations for setting network parameters 110 | input_params = [] 111 | for param in self.params: 112 | input_params.append( 113 | tf.placeholder(tf.float32, shape=param.get_shape())) 114 | set_params_op = [] 115 | for idx, param in enumerate(input_params): 116 | set_params_op.append(self.params[idx].assign(param)) 117 | return input_params, set_params_op 118 | 119 | def get_params(self): 120 | return self.sess.run(self.params) 121 | 122 | def set_params(self, input_params): 123 | self.sess.run(self.set_params_op, feed_dict={ 124 | i: d for i, d in zip(self.input_params, input_params) 125 | }) 126 | 127 | def predict(self, inputs): 128 | return self.sess.run(self.values, feed_dict={ 129 | self.inputs: inputs 130 | }) 131 | 132 | def get_gradients(self, inputs, actual_values): 133 | return self.sess.run( 134 | [self.critic_gradients, self.loss], 135 | feed_dict={ 136 | self.inputs: inputs, 137 | self.actual_values: actual_values 138 | }) 139 | 140 | def compute_gradients(self, batch_inputs, batch_actual_values): 141 | # sopt into batch format 142 | inputs = np.vstack(batch_inputs) 143 | actual_values = np.vstack(batch_actual_values) 144 | 145 | # invoke learning model 146 | gradients, loss = self.get_gradients( 147 | inputs, actual_values) 148 | 149 | return gradients, loss 150 | 151 | class ActorAgent(object): 152 | def __init__(self, sess, eps=args.entropy_eps, act_fn=leaky_relu, 153 | optimizer=tf.train.AdamOptimizer, 154 | scope='actor_agent'): 155 | 156 | self.sess = sess 157 | self.scope = scope 158 | 159 | self.eps = eps 160 | self.act_fn = act_fn 161 | self.optimizer = optimizer 162 | 163 | self.input_dim = args.state_dim 164 | self.hid_dims = args.hid_dims 165 | self.output_dim = args.action_dim # priority (to softmax over) 166 | 167 | # input dimension: [batch_size, num_workers + 1] 168 | self.inputs = tf.placeholder(tf.float32, [None, self.input_dim]) 169 | 170 | # initialize nn parameters 171 | self.weights, self.bias = self.nn_init( 172 | self.input_dim, self.hid_dims, self.output_dim) 173 | 174 | # actor network 175 | self.act_probs = self.actor_network( 176 | self.inputs, self.weights, self.bias) 177 | 178 | # sample an action (from OpenAI baselines) 179 | logits = tf.log(self.act_probs) 180 | noise = tf.random_uniform(tf.shape(logits)) 181 | self.act = tf.argmax(logits - tf.log(-tf.log(noise)), 1) 182 | 183 | # selected action: [batch_size, num_workers] 184 | self.act_vec = tf.placeholder(tf.float32, [None, self.output_dim]) 185 | 186 | # advantage term 187 | self.adv = tf.placeholder(tf.float32, [None, 1]) 188 | 189 | # use entropy to promote exploration, this term decays over time 190 | self.entropy_weight = tf.placeholder(tf.float32, ()) 191 | 192 | # select action probability 193 | self.selected_act_prob = tf.reduce_sum(tf.multiply( 194 | self.act_probs, self.act_vec), 195 | reduction_indices=1, keep_dims=True) 196 | 197 | # actor loss due to advantge (negated) 198 | self.adv_loss = tf.reduce_sum(tf.multiply( 199 | tf.log(self.selected_act_prob + \ 200 | self.eps), -self.adv)) 201 | 202 | # entropy loss (normalized) 203 | self.entropy_loss = tf.reduce_sum(tf.multiply( 204 | self.act_probs, tf.log(self.act_probs + self.eps))) / \ 205 | np.log(args.action_dim) 206 | 207 | # define combined loss 208 | self.loss = self.adv_loss + self.entropy_weight * self.entropy_loss 209 | 210 | # get training parameters 211 | self.params = tf.get_collection( 212 | tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.scope) 213 | 214 | # operations for setting parameters 215 | self.input_params, self.set_params_op = \ 216 | self.define_params_op() 217 | 218 | # actor gradients 219 | self.act_gradients = tf.gradients(self.loss, self.params) 220 | 221 | # adaptive learning rate 222 | self.lr_rate = tf.placeholder(tf.float32, shape=[]) 223 | 224 | # actor optimizer 225 | self.act_opt = self.optimizer(self.lr_rate).minimize(self.loss) 226 | 227 | # apply gradient directly to update parameters 228 | self.apply_grads = self.optimizer(self.lr_rate).\ 229 | apply_gradients(zip(self.act_gradients, self.params)) 230 | 231 | def nn_init(self, input_dim, hid_dims, output_dim): 232 | weights = [] 233 | bias = [] 234 | 235 | curr_in_dim = input_dim 236 | 237 | # hidden layers 238 | for hid_dim in hid_dims: 239 | weights.append( 240 | glorot([curr_in_dim, hid_dim], scope=self.scope)) 241 | bias.append( 242 | zeros([hid_dim], scope=self.scope)) 243 | curr_in_dim = hid_dim 244 | 245 | # output layer 246 | weights.append(glorot([curr_in_dim, output_dim], scope=self.scope)) 247 | bias.append(zeros([output_dim], scope=self.scope)) 248 | 249 | return weights, bias 250 | 251 | def actor_network(self, inputs, weights, bias): 252 | 253 | # non-linear feed forward 254 | x = inputs 255 | 256 | for (w, b) in zip(weights[:-1], bias[:-1]): 257 | x = tf.matmul(x, w) 258 | x += b 259 | x = self.act_fn(x) 260 | 261 | # final linear output layer 262 | x = tf.matmul(x, weights[-1]) 263 | x += bias[-1] 264 | 265 | # softmax 266 | x = tf.nn.softmax(x, dim=-1) 267 | 268 | return x 269 | 270 | def apply_gradients(self, gradients, lr_rate): 271 | self.sess.run(self.apply_grads, feed_dict={ 272 | i: d for i, d in zip( 273 | self.act_gradients + [self.lr_rate], 274 | gradients + [lr_rate]) 275 | }) 276 | 277 | def define_params_op(self): 278 | # define operations for setting network parameters 279 | input_params = [] 280 | for param in self.params: 281 | input_params.append( 282 | tf.placeholder(tf.float32, shape=param.get_shape())) 283 | set_params_op = [] 284 | for idx, param in enumerate(input_params): 285 | set_params_op.append(self.params[idx].assign(param)) 286 | return input_params, set_params_op 287 | 288 | def get_params(self): 289 | return self.sess.run(self.params) 290 | 291 | def set_params(self, input_params): 292 | self.sess.run(self.set_params_op, feed_dict={ 293 | i: d for i, d in zip(self.input_params, input_params) 294 | }) 295 | 296 | def predict(self, inputs): 297 | return self.sess.run(self.act, feed_dict={ 298 | self.inputs: inputs 299 | }) 300 | 301 | def get_gradients(self, inputs, act_vec, adv, entropy_weight): 302 | return self.sess.run( 303 | [self.act_gradients, [self.adv_loss, self.entropy_loss]], 304 | feed_dict={ 305 | self.inputs: inputs, 306 | self.act_vec: act_vec, 307 | self.adv: adv, 308 | self.entropy_weight: entropy_weight 309 | }) 310 | 311 | def compute_gradients(self, batch_inputs, batch_act_vec, \ 312 | batch_adv, entropy_weight): 313 | # sopt into batch format 314 | inputs = np.vstack(batch_inputs) 315 | act_vec = np.vstack(batch_act_vec) 316 | # invoke learning model 317 | gradients, loss = self.get_gradients( 318 | inputs, act_vec, batch_adv, entropy_weight) 319 | # append baseline loss 320 | loss.append(np.mean(batch_adv ** 2)) 321 | 322 | return gradients, loss 323 | 324 | # def get_action(self, state): 325 | # 326 | # workers, job, _ = state 327 | # 328 | # inputs = np.zeros([1, args.action_dim + 1]) 329 | # 330 | # for worker in workers: 331 | # inputs[0, worker.worker_id] = \ 332 | # min(sum(j.size for j in worker.queue) / \ 333 | # args.job_size_norm_factor / 5.0, # normalization 334 | # 20.0) 335 | # inputs[0, -1] = min(job.size / args.job_size_norm_factor, 10.0) # normalization 336 | # 337 | # action = self.predict(inputs) 338 | # 339 | # return action[0] 340 | 341 | def get_action_feasibility(self, observe): 342 | 343 | temp = observe['state'] 344 | feasibility = observe['feasibility'] 345 | curr_time = observe['time'] 346 | state = np.zeros([1, len(temp)]) 347 | state[0] = temp 348 | prob = self.sess.run(self.act_probs, feed_dict={self.inputs: state})[0] 349 | max_logit = -sys.maxsize 350 | action = 0 351 | for a in feasibility: 352 | if prob[a]>=max_logit: 353 | max_logit = prob[a] 354 | action = a 355 | return state, action, curr_time 356 | -------------------------------------------------------------------------------- /model/dqn.py: -------------------------------------------------------------------------------- 1 | from model.util_nn import * 2 | import tensorflow as tf 3 | import numpy as np 4 | 5 | class BaseNetwork(object): 6 | def __init__(self, sess, input_dim, action_dim, update_option, name, optimizer, tau, 7 | initializer=tf.contrib.layers.variance_scaling_initializer(mode="FAN_IN")): 8 | """ 9 | Abstarct class for creating networks 10 | :param input_dim: 11 | :param action_dim: 12 | :param stddev: 13 | """ 14 | 15 | # if use soft update, tau should not be None 16 | self.tau = tau 17 | 18 | self.update_option = update_option 19 | self.input_dim = input_dim 20 | self.action_dim = action_dim 21 | self.initializer = initializer 22 | self.sess = sess 23 | 24 | # build network 25 | self.build(name) 26 | self.network_param = [v for v in tf.get_collection(tf.GraphKeys.MODEL_VARIABLES) if name in v.name and 27 | "target" not in v.name] 28 | 29 | # build target 30 | self.build_target("target_%s" % name) 31 | self.target_param = [v for v in tf.get_collection(tf.GraphKeys.MODEL_VARIABLES) if name in v.name and 32 | "target" in v.name] 33 | 34 | self.gradients = None 35 | 36 | # optimizer 37 | self.optimizer = optimizer 38 | 39 | def create_update_op(self): 40 | import time 41 | st = time.time() 42 | print('start time', st) 43 | if self.update_option == "soft_update": 44 | update_op = [tf.assign(target_param, (1 - self.tau) * target_param + self.tau * network_param) 45 | for target_param, network_param in zip(self.target_param, self.network_param)] 46 | else: 47 | update_op = [tf.assign(target_param, network_param) 48 | for target_param, network_param in zip(self.target_param, self.network_param)] 49 | print('elapsed time', time.time()-st) 50 | return update_op 51 | 52 | def create_train_op(self): 53 | return self.optimizer.apply_gradients([(g, v) for g, v in zip(self.gradients, self.network_param)]) 54 | 55 | def build(self, name): 56 | """ 57 | Abstract method, to be implemented by child classes 58 | """ 59 | raise NotImplementedError("Not implemented") 60 | 61 | def build_target(self, name): 62 | """ 63 | Abstract method, to be implemented by child classes 64 | """ 65 | raise NotImplementedError("Not implemented") 66 | 67 | def compute_gradient(self): 68 | """ 69 | Abstract method, compute gradient in order to be used by self.optimizer 70 | """ 71 | raise NotImplementedError("Not implemented") 72 | def init_summary(self, weight_hist, summary_dir, name): 73 | # Writes Tensorboard summaries to disk 74 | self.summary_writer = None 75 | self.summary_writer_hist = None 76 | self.hist_freq = weight_hist 77 | summary_list = [] 78 | if summary_dir: 79 | if self.is_train: 80 | summary_dir = os.path.join(summary_dir, "summaries_{}".format(name)) 81 | else: 82 | summary_dir = os.path.join(summary_dir, "summaries_test_{}".format(name)) 83 | 84 | if not os.path.exists(summary_dir): 85 | os.makedirs(summary_dir) 86 | self.summary_writer = tf.summary.FileWriter(summary_dir+'/scalar') 87 | if self.is_train and self.hist_freq: self.summary_writer_hist = tf.summary.FileWriter(summary_dir+'/hist') 88 | # Summaries for Tensorboard 89 | if weight_hist: 90 | var_list = tf.trainable_variables() 91 | # self.grads = optimizer.compute_gradients(self.loss, var_list=var_list) 92 | # for grad, var in self.grads: 93 | # if var.op.name.split("/")[0] != name: 94 | # continue 95 | # if grad is not None: 96 | # if 'weights' in var.op.name: 97 | # summary_list.append(tf.summary.histogram(var.op.name + "/gradients", grad)) 98 | 99 | for var in var_list: 100 | if var.op.name.split("/")[0] != name: 101 | continue 102 | if 'weights' in var.op.name or 'kernel' in var.op.name or 'bias' in var.op.name: 103 | weight_name = var.op.name.split("/")[1] + "/" + var.op.name.split("/")[2] 104 | with tf.variable_scope(name, reuse=True): 105 | weight = tf.get_variable(weight_name) 106 | summary_list.append(tf.summary.histogram(var.op.name, weight)) 107 | summary_list.extend([ 108 | # tf.summary.histogram('fc1_weight ' % name , (weights)), 109 | # tf.summary.histogram('fc1_bias' , b), 110 | tf.summary.scalar('logits/max_predicted_q_value', tf.reduce_max(self.predicted_Q)), 111 | # tf.summary.scalar('loss/batch_loss_%s' % name, self.loss), 112 | tf.summary.scalar('loss/batch_loss', self.loss), 113 | ]) 114 | if self.is_duel: 115 | summary_list.extend([ 116 | tf.summary.scalar('logits/value_estimated', tf.reduce_max(self.value[0])), 117 | tf.summary.scalar('logits/max_predicted_a_value', tf.reduce_max(self.adv[0])) 118 | ]) 119 | 120 | print('Setting {} length of default summary'.format(len(summary_list))) 121 | self.summary = tf.summary.merge(summary_list) 122 | 123 | 124 | class PDQN(BaseNetwork): 125 | def __init__(self, sess, input_dim, action_dim, auxin_dim, tau, optimizer, name, global_step, 126 | is_duel=False, is_train=True, layers=[64,32], summary_dir=None, weight_hist=False): 127 | """ 128 | Initialize critic network. The critic network maintains a copy of itself and target updating ops 129 | Args 130 | input_dim: dimension of input space, if is length one, we assume it is low dimension. 131 | action_dim: dimension of action space. 132 | stddev: standard deviation for initializing network params. 133 | """ 134 | self.is_train = is_train 135 | self.is_duel = is_duel 136 | self.auxin_dim = auxin_dim 137 | self.layers=layers 138 | self.name = name 139 | # else: 140 | # self.name = name+'_test' 141 | 142 | super(PDQN, self).__init__(sess, input_dim, action_dim, update_option="soft_update", 143 | name=self.name, optimizer=optimizer, tau=tau) 144 | self.update_op = self.create_update_op() 145 | 146 | self.is_training = tf.placeholder(dtype=tf.bool, name="bn_is_train") 147 | 148 | # for critic network, the we need one more input variable: y to compute the loss 149 | # this input variable is fed by: r + gamma * target(s_t+1, action(s_t+1)) 150 | self.target_Q = tf.placeholder(tf.float32, shape=[None, 1], name="target_q") 151 | 152 | self.action = tf.placeholder(tf.float32, shape=[None, self.action_dim], name="selected_action") 153 | # self.tp = tf.transpose(self.action) 154 | 155 | self.predicted_Q = tf.reduce_sum(tf.multiply(self.action, self.net), axis=1, keep_dims=True) # batch_size 156 | 157 | self.global_step = global_step 158 | self.loss = self._loss() 159 | self.train_op = self.optimizer.minimize(self.loss, global_step=self.global_step) 160 | self.init_summary(weight_hist, summary_dir, name) 161 | 162 | def _loss(self): 163 | # bellman loss 164 | diff = self.target_Q - self.predicted_Q 165 | delta = 0.5 166 | loss = tf.where(tf.abs(diff) < delta, 0.5 * tf.square(diff), delta * tf.abs(diff) - 0.5 * (delta ** 2)) 167 | # For Prioritized Replay buffer(Importance Sampling) 168 | self.weight = tf.placeholder(tf.float32, shape=[None, 1], name="weight_is") 169 | loss= tf.reduce_mean(tf.multiply(self.weight, loss)) 170 | self.loss_comp=[loss] 171 | return loss 172 | 173 | def base_encoder_cells(self, x, name='', reuse=False): 174 | if len(self.input_dim)==2: 175 | net = None 176 | for i in range(self.input_dim[0]): 177 | _input = tf.squeeze(tf.slice(x, [0,i,0],[-1,1,-1]),axis=1) 178 | # print(i, self.sess.run([_input, tf.shape(_input)], feed_dict={self.x:np.zeros((2,10,13))})) 179 | net_temp = self._base_encoder_cells(_input, name=name, reuse=tf.AUTO_REUSE) 180 | if net is None: net = net_temp 181 | else: net = tf.concat([net,net_temp],1) 182 | else: net = self._base_encoder_cells(x, name=name, reuse=reuse) 183 | return net 184 | def _base_encoder_cells(self, x, name='', reuse=False): 185 | with tf.variable_scope(name, reuse=reuse): 186 | net = tf.identity(x) 187 | # Default initializer is xavier_initializer 188 | for l in range(len(self.layers)): 189 | h_dim = self.layers[l] 190 | # net = tf.contrib.layers.fully_connected(net, num_outputs=h_dim, activation_fn=tf.nn.relu, scope="fc{}".format(l) 191 | # # ,normalizer_fn=tf.contrib.layers.layer_norm 192 | # ) 193 | net = dense_layer(net, output_dim=h_dim, activation_fn=tf.nn.relu, scope="fc{}".format(l)) 194 | 195 | return net 196 | 197 | def value_layer(self, net, name='', reuse=False): 198 | with tf.variable_scope(name, reuse=reuse): 199 | # net = dense_layer(net, output_dim=8, activation_fn=tf.sigmoid, scope="fc_val") 200 | # net = dense_layer(net, output_dim=1, activation_fn=None, scope="value") 201 | net = tf.contrib.layers.fully_connected(net, activation_fn=None, num_outputs=self.action_dim, 202 | # weights_regularizer=regularizer, 203 | weights_initializer=tf.random_uniform_initializer(-3e-3, 3e-3), 204 | biases_initializer=tf.constant_initializer(.0, dtype=tf.float32), 205 | scope="q", reuse=reuse) 206 | return net 207 | def fusion_layer(self, net, auxin, name='', reuse=False): 208 | net = tf.concat([net, auxin], 1) 209 | # with tf.variable_scope(name, reuse=reuse): 210 | # net = dense_layer(net, output_dim=100, activation_fn=tf.nn.relu, scope="fc_fus") 211 | # net = dense_layer(net, output_dim=16, activation_fn=tf.nn.relu, scope="fc_fus2") 212 | return net 213 | 214 | def build(self, name): 215 | self.x = tf.placeholder(dtype=tf.float32, shape=[None, *self.input_dim], name="%s_input" % name) 216 | net = self.base_encoder_cells(self.x, name) 217 | if self.auxin_dim > 0: 218 | self.auxin = tf.placeholder(dtype=tf.float32, shape=[None, self.auxin_dim], name="%s_auxin" % name) 219 | # net = tf.concat([net, self.auxin], 1) 220 | # net = dense_layer(net, output_dim=64, activation_fn=tf.nn.relu, scope="%s_fc_fus" % name) 221 | net = self.fusion_layer(net, self.auxin, name) 222 | 223 | # last layer 224 | if self.is_duel: 225 | with tf.variable_scope(name): 226 | # net1 = tf.identity(net); net2 = tf.identity(net) 227 | net1 = dense_layer(net, output_dim=8, activation_fn=tf.sigmoid, scope="fc_val") 228 | self.value = dense_layer(net1, output_dim=1, activation_fn=None, scope="value") 229 | net2 = dense_layer(net, output_dim=8, activation_fn=tf.sigmoid, scope="fc_adv") 230 | # value = dense_layer(net1, 1, 231 | # weights_initializer=tf.random_uniform_initializer(-3e-3, 3e-3), 232 | # scope="value", use_bias=True) 233 | # adv = dense_layer(net2, self.action_dim, 234 | # weights_initializer=tf.random_uniform_initializer(-3e-3, 3e-3), 235 | # scope="advantage", use_bias=True) 236 | # value = self.value_layer(net, name) 237 | self.adv = dense_layer(net2, self.action_dim, activation_fn = None, scope="advantage") 238 | self.net = self.value + (self.adv - tf.reduce_mean(self.adv, reduction_indices=[1, ], keep_dims=True)) 239 | 240 | else: 241 | self.net = self.value_layer(net, name=name) 242 | def build_target(self, name): 243 | self.target_x = tf.placeholder(dtype=tf.float32, shape=[None, *self.input_dim], name="%s_input" % name) 244 | net = self.base_encoder_cells(self.target_x, name) 245 | if self.auxin_dim > 0: 246 | self.target_auxin = tf.placeholder(dtype=tf.float32, shape=[None, self.auxin_dim], name="%s_auxin" % name) 247 | # net = tf.concat([net, self.target_auxin], 1) 248 | # net = dense_layer(net, output_dim=16, activation_fn=tf.nn.relu, scope="%s_fc_fus" % name) 249 | net = self.fusion_layer(net, self.target_auxin, name) 250 | # last layer 251 | if self.is_duel: 252 | 253 | with tf.variable_scope(name): 254 | # net1 = tf.identity(net); net2 = tf.identity(net) 255 | net1 = dense_layer(net, output_dim=8, activation_fn=tf.sigmoid, scope="fc_val") 256 | value = dense_layer(net1, output_dim=1, activation_fn=None, scope="value") 257 | net2 = dense_layer(net, output_dim=8, activation_fn=tf.sigmoid, scope="fc_adv") 258 | # value = dense_layer(net1, 1, 259 | # weights_initializer=tf.random_uniform_initializer(-3e-3, 3e-3), 260 | # scope="value", use_bias=True) 261 | # adv = dense_layer(net2, self.action_dim, 262 | # weights_initializer=tf.random_uniform_initializer(-3e-3, 3e-3), 263 | # scope="advantage", use_bias=True) 264 | # value = self.value_layer(net, name) 265 | adv = dense_layer(net2, self.action_dim, activation_fn=None, scope="advantage") 266 | self.target_net = value + (adv - tf.reduce_mean(adv, reduction_indices=[1, ], keep_dims=True)) 267 | 268 | else: 269 | self.target_net = self.value_layer(net, name=name) 270 | 271 | 272 | def compute_gradient(self): 273 | grad = tf.gradients(self.loss, self.network_param, name="critic_gradients") 274 | # action_grad = tf.gradients(self.net, self.action, name="action_gradient") 275 | return grad 276 | 277 | # def action_gradients(self, inputs, actions): 278 | # return self.sess.run(self.action_grads, {self.x: inputs, self.action: actions}) 279 | def critic_predict(self, state, auxin, feasibility=None): 280 | return self._critic_predict(state, auxin) 281 | 282 | def critic_target_predict(self, state, auxin, feasibility=None): 283 | return self._critic_target_predict(state, auxin) 284 | 285 | def _critic_predict(self, state, auxin, summary=False): 286 | """ 287 | If summary is True, we also get the q value. This is used for logging. 288 | """ 289 | # if summary: 290 | # return self.sess.run(self.q_summary, feed_dict={self.critic.action: action, self.critic.x: state}) 291 | # else: 292 | feed_dict = {self.x: state} 293 | if self.auxin_dim > 0: feed_dict.update({self.auxin: auxin}) 294 | 295 | if self.is_duel: 296 | net, adv, value = self.sess.run([self.net, self.adv, self.value], 297 | feed_dict=feed_dict) 298 | # print("adv", net[1], "value", net[2], "Q", net[0]) 299 | # net = self.sess.run(self.net, feed_dict={self.x: state}) 300 | # print(self.sess.run(self.advantage)) 301 | # print(net) 302 | return net, adv, value 303 | else: return self.sess.run(self.net, feed_dict=feed_dict) 304 | 305 | def _critic_target_predict(self, state, auxin): 306 | feed_dict = {self.target_x: state} 307 | if self.auxin_dim > 0: feed_dict.update({self.target_auxin: auxin}) 308 | # if self.use_lstm: feed_dict.update({self.target_c_in: self.lstm_state_out_target[0], self.target_h_in:self.lstm_state_out_target[1]}) 309 | return self.sess.run(self.target_net, feed_dict=feed_dict) 310 | 311 | def critic_train(self, weight, inputs, auxin, action, target_q_value, train_step): 312 | feed_dict = {self.weight: weight, self.target_Q: target_q_value, self.action: action} 313 | if self.auxin_dim > 0: 314 | feed_dict.update({self.auxin:auxin}) 315 | feed_dict.update({self.x:inputs}) 316 | 317 | summary, loss, loss_comp, train_op, predicted_Q, target_Q, action = \ 318 | self.sess.run( 319 | [self.summary, self.loss, self.loss_comp, self.train_op, self.predicted_Q, self.target_Q, self.action], 320 | feed_dict=feed_dict) 321 | 322 | # print(loss, loss_b, loss_r) 323 | # print(self.sess.run()) 324 | if self.summary_writer_hist: 325 | if train_step % self.hist_freq == 0: 326 | self.summary_writer_hist.add_summary(summary, global_step=train_step) 327 | print('write hist summary', len(summary)) 328 | return loss, train_op, predicted_Q, target_Q, action 329 | # def critic_train(self, estimated_q_value, predicted_q_value): 330 | # return self.sess.run([self.loss, self.train], 331 | # feed_dict={self.y: predicted_q_value, self.q: estimated_q_value}) 332 | def update_critic(self): 333 | self.sess.run(self.update_op) 334 | # print(self.sess.run(self.target_param)) 335 | def getSummary(self): 336 | return self.summary_writer 337 | 338 | def get_action(self): 339 | raise NotImplementedError("Not implemented") -------------------------------------------------------------------------------- /model/nn_ops.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from tensorflow.python.framework import ops 4 | from tensorflow.python.ops import math_ops 5 | 6 | 7 | def fc_nn_init(input_dim, hid_dims, output_dim, scope='default'): 8 | weights = [] 9 | bias = [] 10 | 11 | curr_in_dim = input_dim 12 | 13 | # hidden layers 14 | for hid_dim in hid_dims: 15 | weights.append( 16 | glorot([curr_in_dim, hid_dim], scope=scope)) 17 | bias.append( 18 | zeros([hid_dim], scope=scope)) 19 | curr_in_dim = hid_dim 20 | 21 | # output layer 22 | weights.append(glorot([curr_in_dim, output_dim], scope=scope)) 23 | bias.append(zeros([output_dim], scope=scope)) 24 | 25 | return weights, bias 26 | 27 | 28 | def fc_nn(inputs, weights, bias, act_fn): 29 | # non-linear feed forward 30 | x = inputs 31 | 32 | for (w, b) in zip(weights[:-1], bias[:-1]): 33 | x = tf.matmul(x, w) 34 | x += b 35 | x = act_fn(x) 36 | 37 | # final linear output layer 38 | x = tf.matmul(x, weights[-1]) 39 | x += bias[-1] 40 | 41 | return x 42 | 43 | 44 | def glorot(shape, dtype=tf.float32, scope='default'): 45 | # Xavier Glorot & Yoshua Bengio (AISTATS 2010) initialization (Eqn 16) 46 | with tf.variable_scope(scope): 47 | init_range = np.sqrt(6.0 / (shape[0] + shape[1])) 48 | init = tf.random_uniform( 49 | shape, minval=-init_range, maxval=init_range, dtype=dtype) 50 | return tf.Variable(init) 51 | 52 | 53 | def leaky_relu(features, alpha=0.2, name=None): 54 | """Compute the Leaky ReLU activation function. 55 | "Rectifier Nonlinearities Improve Neural Network Acoustic Models" 56 | AL Maas, AY Hannun, AY Ng - Proc. ICML, 2013 57 | http://web.stanford.edu/~awni/papers/relu_hybrid_icml2013_final.pdf 58 | Args: 59 | features: A `Tensor` representing preactivation values. 60 | alpha: Slope of the activation function at x < 0. 61 | name: A name for the operation (optional). 62 | Returns: 63 | The activation value. 64 | """ 65 | with ops.name_scope(name, "LeakyRelu", [features, alpha]): 66 | features = ops.convert_to_tensor(features, name="features") 67 | alpha = ops.convert_to_tensor(alpha, name="alpha") 68 | return math_ops.maximum(alpha * features, features) 69 | 70 | 71 | def normalize(inputs, activation, reuse, scope, norm): 72 | if norm == 'batch_norm': 73 | return tf.contrib.layers.batch_norm( 74 | inputs, activation_fn=activation, reuse=reuse, scope=scope) 75 | elif norm == 'layer_norm': 76 | return tf.contrib.layers.layer_norm( 77 | inputs, activation_fn=activation, reuse=reuse, scope=scope) 78 | elif norm == 'None': 79 | if activation is not None: 80 | return activation(inputs) 81 | else: 82 | return inputs 83 | 84 | 85 | def ones(shape, dtype=tf.float32, scope='default'): 86 | with tf.variable_scope(scope): 87 | init = tf.ones(shape, dtype=dtype) 88 | return tf.Variable(init) 89 | 90 | 91 | def zeros(shape, dtype=tf.float32, scope='default'): 92 | with tf.variable_scope(scope): 93 | init = tf.zeros(shape, dtype=dtype) 94 | return tf.Variable(init) -------------------------------------------------------------------------------- /model/util_nn.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import os 3 | import numpy as np 4 | import math 5 | from tensorflow.contrib.layers.python.layers import initializers 6 | from config import args 7 | is_train = tf.constant(args.is_train) 8 | 9 | def dense_layer(x, output_dim, scope, 10 | weights_initializer=initializers.xavier_initializer(), 11 | biases_initializer=tf.zeros_initializer, 12 | use_bias=True, activation_fn=None): 13 | """ 14 | A convenient function for constructing fully connected layers 15 | """ 16 | 17 | shape = x.get_shape().as_list() 18 | if len(shape) == 2: # if the previous layer is fully connected, the shape of X is (N, D) 19 | D = shape[1] 20 | else: # if the previous layer is convolutional, the shape of X is (N, H, W, C) 21 | N, H, W, C = shape 22 | D = H * W * C 23 | x = tf.reshape(x, (-1, D)) 24 | 25 | with tf.variable_scope(scope): 26 | w = tf.get_variable("weights", shape=(D, output_dim), initializer=weights_initializer) 27 | tf.add_to_collection(tf.GraphKeys.MODEL_VARIABLES, w) 28 | # calculate 29 | x = tf.matmul(x, w) 30 | 31 | if use_bias: 32 | b = tf.get_variable("biases", shape=output_dim, initializer=biases_initializer) 33 | tf.add_to_collection(tf.GraphKeys.MODEL_VARIABLES, b) 34 | x = tf.nn.bias_add(x, b) 35 | # x = batch_norm(x, is_train=is_train, scope='bn' ) 36 | if activation_fn != None: return activation_fn(x) 37 | else: return x 38 | 39 | 40 | def conv2d(x, filter_size, stride, output_size, initializer, scope, use_bias, padding="VALID"): 41 | """ 42 | A convenient function for constructing convolutional layer 43 | """ 44 | 45 | # input x should be (N, H, W, C) 46 | N, H, W, C = x.get_shape().as_list() 47 | stride = (1, stride, stride, 1) 48 | 49 | with tf.variable_scope(scope): 50 | w = tf.get_variable("W", shape=(filter_size, filter_size, C, output_size), initializer=initializer, 51 | dtype=tf.float32) 52 | tf.add_to_collection(tf.GraphKeys.MODEL_VARIABLES, w) 53 | x = tf.nn.conv2d(x, w, strides=stride, padding=padding) 54 | 55 | if use_bias: 56 | b = tf.get_variable("b", shape=output_size, initializer=tf.constant_initializer(0.01), dtype=tf.float32) 57 | tf.add_to_collection(tf.GraphKeys.MODEL_VARIABLES, b) 58 | x = tf.nn.bias_add(x, b) 59 | 60 | return x 61 | 62 | 63 | def batch_norm(x, is_train, scope): 64 | """ 65 | A wrapper for batch normalization layer 66 | """ 67 | train_time = tf.contrib.layers.batch_norm(x, decay=0.9, scope="%s/bn" % scope, center=True, scale=False, 68 | updates_collections=None, is_training=True, reuse=None) 69 | test_time = tf.contrib.layers.batch_norm(x, decay=0.9, scope="%s/bn" % scope, center=True, scale=False, 70 | updates_collections=None, is_training=False, reuse=True) 71 | 72 | x = tf.cond(is_train, lambda: train_time, lambda: test_time) 73 | return x 74 | def noisy_dense(x, isTrain, size, scope, bias=True, activation_fn=tf.identity): 75 | 76 | # the function used in eq.7,8 77 | def f(x): 78 | return tf.multiply(tf.sign(x), tf.pow(tf.abs(x), 0.5)) 79 | # Initializer of \mu and \sigma 80 | mu_init = tf.random_uniform_initializer(minval=-1*1/np.power(x.get_shape().as_list()[1], 0.5), 81 | maxval=1*1/np.power(x.get_shape().as_list()[1], 0.5)) 82 | sigma_init = tf.constant_initializer(0.4/np.power(x.get_shape().as_list()[1], 0.5)) 83 | # Sample noise from gaussian 84 | p = sample_noise([x.get_shape().as_list()[1], 1]) 85 | q = sample_noise([1, size]) 86 | f_p = f(p); f_q = f(q) 87 | w_epsilon = f_p*f_q; b_epsilon = tf.squeeze(f_q) 88 | 89 | # w = w_mu + w_sigma*w_epsilon 90 | with tf.variable_scope(scope): 91 | w_mu = tf.get_variable("w_mu", [x.get_shape()[1], size], initializer=mu_init) 92 | w_sigma = tf.get_variable("w_sigma", [x.get_shape()[1], size], initializer=sigma_init) 93 | tf.add_to_collection(tf.GraphKeys.MODEL_VARIABLES, w_mu) 94 | tf.add_to_collection(tf.GraphKeys.MODEL_VARIABLES, w_sigma) 95 | 96 | if isTrain: 97 | w = w_mu + tf.multiply(w_sigma, w_epsilon) 98 | else: 99 | w = w_mu 100 | 101 | ret = tf.matmul(x, w) 102 | if bias: 103 | # b = b_mu + b_sigma*b_epsilon 104 | b_mu = tf.get_variable("b_mu", [size], initializer=mu_init) 105 | b_sigma = tf.get_variable("b_sigma", [size], initializer=sigma_init) 106 | tf.add_to_collection(tf.GraphKeys.MODEL_VARIABLES, b_mu) 107 | tf.add_to_collection(tf.GraphKeys.MODEL_VARIABLES, b_sigma) 108 | 109 | if isTrain: 110 | b = b_mu + tf.multiply(b_sigma, b_epsilon) 111 | else: 112 | b = b_mu 113 | 114 | return activation_fn(ret + b) 115 | else: 116 | return activation_fn(ret) 117 | 118 | def sample_noise(shape): 119 | noise = tf.random_normal(shape) 120 | return noise 121 | 122 | def save(sess, save_dir, saver): 123 | """ 124 | Save all model parameters and replay memory to self.save_dir folder. 125 | The save_path should be models/env_name/name_of_agent. 126 | """ 127 | # path to the checkpoint name 128 | if not os.path.exists(save_dir): 129 | os.makedirs(save_dir) 130 | path = os.path.join(save_dir, "AkC") 131 | print("Saving the model to path %s" % path) 132 | # self.memory.save(self.save_dir) 133 | print(saver.save(sess, path)) 134 | print("Done saving!") 135 | 136 | 137 | def restore(sess, save_dir, saver): 138 | """ 139 | Restore model parameters and replay memory from self.save_dir folder. 140 | The name of the folder should be models/env_name 141 | """ 142 | # TODO: Need to find a better way to store memory data. Storing all states is not efficient. 143 | ckpts = tf.train.get_checkpoint_state(save_dir) 144 | if ckpts and ckpts.model_checkpoint_path: 145 | ckpt = ckpts.model_checkpoint_path 146 | saver.restore(sess, ckpt) 147 | # self.memory.restore(save_dir) 148 | print("Successfully load the model %s" % ckpt) 149 | # print("Memory size is:") 150 | # self.memory.size() 151 | else: 152 | print("Model Restore Failed %s" % save_dir) 153 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import argparse, datetime, os, sys, csv 2 | import tensorflow as tf 3 | import numpy as np 4 | from collections import defaultdict 5 | 6 | from agent.trainer import Trainer 7 | from env.simul_pms import PMSSim 8 | from config import * 9 | from utils.util import * 10 | from utils.visualize.logger import instance_log 11 | 12 | if args.viz: 13 | from utils.visualize.viz_state import VizState 14 | viz = VizState(file_path=args.summary_dir, 15 | colormap=[ list(range(12)), list(range(12,20)) ], 16 | imshow=True) 17 | # colormap=[[0, 1], list(range(2, 7)), list(range(7, 15)), list(range(15,19)), [19,20,21], list(range(22,40))]) 18 | 19 | class TestRecord(object): 20 | def __init__(self, save_dir='C:/results', filename='test_performance', format = 'csv'): 21 | self.save_dir = save_dir 22 | self.filename = filename+'.'+format 23 | # self.log_columns = list() 24 | self.KPIs = defaultdict(list) 25 | def add_performance(self, column, value): 26 | self.KPIs[column].append(value) 27 | def __len__(self): 28 | if len(self.KPIs.keys())==0: return 0 29 | firstkey = list(self.KPIs.keys())[0] 30 | return len(self.KPIs[firstkey]) 31 | def summary_performance(self, msg=None): 32 | if len(self) == 0: return 33 | idx = len(self)-1 34 | temp = list() 35 | for c, v in self.KPIs.items(): 36 | if type(c) is str: continue 37 | temp.append(float(v[idx])) 38 | avg = np.mean(temp) 39 | self.add_performance('avg', avg) 40 | if msg is None: print('average : ' , avg) 41 | else: print(msg, avg) 42 | def summary_performance_last10(self): 43 | if len(self) == 0: return 44 | best_dict_last = defaultdict(list) 45 | for idx in range(len(self)-1): 46 | for c, v in self.KPIs.items(): 47 | if type(c) is str: continue 48 | temp_performance = float(v[idx]) 49 | if idx < len(self)-2: best_dict_last[c].append(temp_performance) 50 | for c, temp_list in best_dict_last.items(): 51 | self.add_performance(c,np.max(temp_list)) 52 | self.summary_performance('10last average : ') 53 | 54 | def summary_performance_whole(self): 55 | if len(self) == 0: return 56 | best_dict_last = defaultdict(list) 57 | best_dict_sample = defaultdict(list) 58 | for idx in range(len(self)): 59 | for c, v in self.KPIs.items(): 60 | if type(c) is str: continue 61 | temp_performance = float(v[idx]) 62 | if idx >= len(self)-10: best_dict_last[c].append(temp_performance) 63 | if idx == len(self)-1 or idx<9: best_dict_sample[c].append(temp_performance) 64 | for c, temp_list in best_dict_sample.items(): 65 | self.add_performance(c,np.max(temp_list)) 66 | self.summary_performance('sample average : ') 67 | for c, temp_list in best_dict_last.items(): 68 | self.add_performance(c,np.max(temp_list)) 69 | self.summary_performance('10last average : ') 70 | 71 | def write(self): 72 | with open(os.path.join(self.save_dir, self.filename), mode='a', newline='\n') as f: 73 | f_writer = csv.DictWriter(f, fieldnames=self.KPIs.keys()) 74 | temp_dict = dict() 75 | for column in self.KPIs.keys(): 76 | temp_dict.update({column: column}) 77 | f_writer.writerow(temp_dict) 78 | for row in range(len(self)): 79 | temp_dict.clear() 80 | for column in self.KPIs.keys(): 81 | temp_dict.update({column: self.KPIs[column][row]}) 82 | f_writer.writerow(temp_dict) 83 | f.close() 84 | 85 | def test_logic_seed_pkg(logic): 86 | for did in [3]: 87 | record = instance_log(args.gantt_dir, 'test_logic_instances_{}'.format(args.timestamp)) 88 | rslt = TestRecord(save_dir=args.summary_dir, filename='test_performance_logic') 89 | env = PMSSim(config_load=None, record=record, opt_mix='geomsort', data_name=args.DATASET[did]) 90 | 91 | seed_list = range(10000,10030) 92 | ST_TIME = datetime.datetime.now() 93 | 94 | rslt.add_performance(str(did)+'logic', logic) 95 | for seed in seed_list: 96 | env.set_random_seed(seed) 97 | env.reset() 98 | 99 | done = False 100 | observe = env.observe() 101 | # run experiment 102 | total_reward = 0 103 | while not done: 104 | # interact with environment 105 | observe, reward, done = env.step_logic(pol=logic) 106 | total_reward += reward 107 | 108 | rslt.add_performance(seed, total_reward) 109 | 110 | elapsed_time = (datetime.datetime.now() - ST_TIME).total_seconds() 111 | print('elapsed time per problem: ', elapsed_time/len(seed_list)) 112 | rslt.summary_performance() 113 | rslt.write() 114 | 115 | def test_logic_seed(): 116 | # logics = ['ssu', 'seq_needs', 'seq_lst', 'seq_spt', 'wcovert'] 117 | logics = ['ssu', 'lst', 'wcovert', 'seq_lst'] 118 | # logics = ['ssu', 'seq_needs'] # for stoch 119 | for did in [args.did]: 120 | record = instance_log(args.gantt_dir, 'test_logic_instances_{}'.format(args.timestamp)) 121 | rslt = TestRecord(save_dir=args.summary_dir, filename='test_performance_logic') 122 | env = PMSSim(config_load=None, log=record, opt_mix='geomsort', data_name=args.DATASET[did]) 123 | 124 | seed_list = list(range(300,330)) * 1 125 | args.bucket = 0 126 | args.auxin_dim=0 127 | for logic in logics: 128 | 129 | ST_TIME = datetime.datetime.now() 130 | 131 | rslt.add_performance(str(did)+'logic', logic) 132 | cnt = 0 133 | for seed in seed_list: 134 | env.set_random_seed(seed) 135 | env.reset() 136 | 137 | done = False 138 | observe = env.observe() 139 | # run experiment 140 | total_reward = 0 141 | record.clearInfo() 142 | while not done: 143 | # interact with environment 144 | observe, reward, done = env.step_logic(pol=logic) 145 | total_reward += reward 146 | record.saveInfo() 147 | # record.fileWrite(cnt//30*30+seed, logic) 148 | # rslt.add_performance(cnt//30*30+seed, total_reward) 149 | record.fileWrite(cnt, logic) 150 | rslt.add_performance(cnt, total_reward) 151 | cnt += 1 152 | 153 | elapsed_time = (datetime.datetime.now() - ST_TIME).total_seconds() 154 | print('elapsed time per problem: ', elapsed_time/len(seed_list)) 155 | rslt.summary_performance() 156 | rslt.write() 157 | 158 | def test_logic(config: str): 159 | # logics = ['ssu', 'seq_needs', 'atcs', 'seq_lst', 'seq_spt', 'wcovert'] 160 | # logics = ['seq_needs'] 161 | logics = ['ssu', 'seq_needs', 'lst', 'spt', 'wcovert'] 162 | record = instance_log(args.gantt_dir, 'test_logic_instances_{}'.format(args.timestamp)) 163 | if len(config)<4: 164 | env = PMSSim(config_load=None, log=record, opt_mix='geomsort') 165 | env.set_random_seed(int(config)) 166 | else: 167 | env = PMSSim(config_load=config, log=record) 168 | env.set_random_seed(0) 169 | ST_TIME = datetime.datetime.now() 170 | 171 | args.bucket=0 172 | for logic in logics: 173 | record.clearInfo() 174 | env.reset() 175 | done = False 176 | observe = env.observe() 177 | # run experiment 178 | total_reward = 0 179 | while not done: 180 | # interact with environment 181 | observe, reward, done = env.step_logic(pol=logic) 182 | total_reward += reward 183 | record.saveInfo() 184 | 185 | elapsed_time = (datetime.datetime.now() - ST_TIME).total_seconds() 186 | L_avg = 0 187 | kpi = record.get_KPI() 188 | util, cmax, total_setup_time, avg_satisfaction_rate = kpi.get_util(), kpi.get_makespan(), kpi.get_total_setup(), kpi.get_throughput_rate() 189 | 190 | performance_msg = 'Run: %s(%s) / Util: %.5f / Reward: %5.2f / cQ : %5.2f(real:%5.2f) / Lot Choice: %d(Call %d) / Setup : %d / Setup Time : %.2f hour / Makespan : %s / Elapsed t: %5.2f sec / loss: %3.5f / Demand Rate : %.5f(%.2f)' % ( 191 | logic, config, util, total_reward, 0, 0, env.decision_number, 0, env.setup_cnt, total_setup_time//60, str(cmax), elapsed_time, L_avg, avg_satisfaction_rate, kpi.get_total_tardiness()) 192 | print(performance_msg) 193 | return elapsed_time 194 | 195 | # record.fileWrite(logic, 'viewer') 196 | 197 | def test_online(agentObj, env, episode, showFlag=False): 198 | args.is_train = False 199 | # env = SimEnvSim(agentObj.record) 200 | 201 | agentObj.SetEpisode(episode) 202 | if args.viz: viz.new_episode() 203 | ST_TIME = datetime.datetime.now() 204 | 205 | env.reset() 206 | done = False 207 | observe = env.observe(args.oopt) 208 | # run experiment 209 | while not done: 210 | state, action, curr_time = agentObj.get_action(observe) 211 | act_vec = np.zeros([1, args.action_dim]) 212 | act_vec[0, action] = 1 213 | # interact with environment 214 | if args.bucket == 0 or (isinstance(env, TDSim) and 'learner_decision_' in DARTSPolicy): 215 | observe, reward, done = env.step(action) 216 | else: 217 | observe, reward, done = env.step_bucket(action) 218 | if args.viz: 219 | if type(args.state_dim) is int or len(args.state_dim)<=2: 220 | viz.viz_img_2d(state['state'], prod=args.action_dim) 221 | else: 222 | viz.viz_img_3d(state['state']) 223 | # agentObj.remember(state, act_vec, observe, reward, done) 224 | agentObj.remember_record(state, act_vec, reward, done) 225 | 226 | if showFlag is False: 227 | args.is_train = True 228 | if isinstance(env, PMSSim): 229 | return env.get_mean_tardiness(env.get_tardiness_hour(agentObj.reward_total)) 230 | elif isinstance(env, TDSim): 231 | return agentObj.reward_total 232 | elapsed_time = (datetime.datetime.now() - ST_TIME).total_seconds() 233 | 234 | if isinstance(env, PMSSim): performance = get_performance(episode, agentObj, env, elapsed_time, True) 235 | else: performance = get_performance_pkg(episode, agentObj, env, elapsed_time) 236 | # L_avg = 0 237 | # if len(agentObj.loss_history) > 0: L_avg = np.mean(agentObj.loss_history) 238 | agentObj.record.fileWrite(episode, 'test_viewer') 239 | kpi = agentObj.record.get_KPI() 240 | # util=kpi.get_util(); 241 | # cmax_str = '%02dday %dmin' % (kpi.get_makespan() // (24 * 60), (kpi.get_makespan() % (24 * 60))) 242 | # performance_msg = 'TEST: %07d(%d) / Util: %.5f / cR: %5.2f / cQ : %5.2f(real:%5.2f) / Setup : %d / Setup Time : %.2f ' \ 243 | # 'hour / cmax : %s / loss: %3.5f / Demand Rate : %.5f / Elapsed t: %5.2f sec / Decision: %d(Call %d) ' % ( 244 | # episode, 0, kpi.get_util(), agentObj.reward_total, agentObj.cumQ, agentObj.cumV_real, 245 | # env.setup_cnt, kpi.get_total_setup(), cmax_str, L_avg, kpi.get_throughput_rate(), 246 | # elapsed_time, agentObj.getDecisionNum(), agentObj.trigger_cnt) 247 | # print(performance_msg) 248 | # performance = ['%07d' % episode, 249 | # '%d' % 0, 250 | # '%.5f' % kpi.get_util(), 251 | # '%5.2f' % agentObj.reward_total, 252 | # '%5.2f' % agentObj.cumQ, 253 | # '%5.2f' % agentObj.cumV_real, 254 | # '%d' % agentObj.getDecisionNum(), 255 | # '%d' % env.setup_cnt, 256 | # '%d' % int(kpi.get_total_setup() / 3600), 257 | # '%s' % str(kpi.get_makespan()), 258 | # '%5.2f' % elapsed_time, 259 | # '%3.5f' % L_avg, 260 | # '%.5f' % kpi.get_throughput_rate()] 261 | # agentObj.writeSummary() 262 | perform_summary = tf.Summary() 263 | perform_summary.value.add(simple_value=agentObj.reward_total, node_name="reward/test_cR", tag="reward/test_cR") 264 | perform_summary.value.add(simple_value=agentObj.cumQ, node_name="reward/test_cQ", tag="reward/test_cQ") 265 | perform_summary.value.add(simple_value=agentObj.cumV_real, node_name="reward/test_cV_real", tag="reward/test_cV_real") 266 | perform_summary.value.add(simple_value=agentObj.setupNum, node_name="KPI/test_nst", tag="KPI/test_nst") 267 | perform_summary.value.add(simple_value=kpi.get_total_setup() / 60, node_name="KPI/test_tst", tag="KPI/test_tst") 268 | perform_summary.value.add(simple_value=kpi.get_makespan(), node_name="KPI/test_cmax", tag="KPI/test_cmax") 269 | perform_summary.value.add(simple_value=kpi.get_throughput_rate(), node_name="KPI/test_thr", tag="KPI/test_thr") 270 | perform_summary.value.add(simple_value=kpi.get_total_tardiness() / 60, node_name="KPI/total_tard", tag="KPI/total_tard") 271 | if agentObj.getSummary(): 272 | agentObj.getSummary().add_summary(perform_summary, episode) 273 | agentObj.getSummary().flush() 274 | 275 | total_time = datetime.datetime.now() - ST_TIME 276 | # performances.writeSummary() 277 | print("Online test elapsed time: {}\t hour: {} sec ".format(episode, total_time)) 278 | args.is_train = True 279 | return performance 280 | 281 | def test_procedure(tf_config, key=None, best_model_idx=None): 282 | MAX_EPISODE = 1 283 | episode = 0 284 | args.is_train = False 285 | exp_idx = args.eid 286 | if key is None: key = args.key 287 | rslt = TestRecord(save_dir=args.summary_dir, filename='test_performance' + str(key)) 288 | with tf.Session(config=tf_config) as sess: 289 | FIRST_ST_TIME = datetime.datetime.now() 290 | print('Activate Neural network start ...') 291 | global_step = tf.Variable(0, trainable=False) 292 | lr = args.lr 293 | if 'upm' in args.oopt: 294 | agentObj = Trainer(sess, tf.train.GradientDescentOptimizer(lr), 295 | global_step=global_step, use_hist=False, exp_idx=exp_idx) 296 | elif 'fab' in args.oopt: 297 | agentObj = Trainer(sess, tf.train.AdamOptimizer(lr), 298 | global_step=global_step, use_hist=False, exp_idx=exp_idx) 299 | else: 300 | agentObj = Trainer(sess, tf.train.RMSPropOptimizer(lr, 0.99, 0.0, 1e-6), 301 | global_step=global_step, use_hist=False, exp_idx=exp_idx) 302 | sess.run(tf.global_variables_initializer()) 303 | 304 | # config_list = call_config_list() 305 | config_list = list(range(300, 330)) 306 | # config_list=[300] 307 | # config_list.append(args.config_load) 308 | # config_list = [args.config_load]#[485] 309 | 310 | saver = tf.train.Saver(max_to_keep=args.max_episode) 311 | 312 | model_files = os.listdir(args.model_dir) 313 | model_files.sort() 314 | length = len(model_files) 315 | if best_model_idx is not None: 316 | best_model_file = model_files[best_model_idx] 317 | model_files = [model_files[k] for k in range(length) if k >= length - 10] # last 10 selection 318 | # model_files = model_files[-1:] 319 | test_did = list() 320 | if best_model_idx is not None: # automatic test procedure 321 | model_files.append(best_model_file) 322 | if args.did == 0 or args.did == 4: 323 | # add DID: te_tau, te_eta 324 | test_did.extend([args.did + 1, args.did + 2, args.did + 3]) 325 | # only add te_base 326 | elif args.did < 4: 327 | test_did.append(0) 328 | elif args.did < 8: 329 | test_did.append(4) 330 | else: # pilot large scale 331 | test_did.extend([0,4]) 332 | # add DID: te_Nm 333 | if args.did >= 4: 334 | test_did.append(args.did - 4) 335 | else: 336 | test_did.append(args.did + 4) 337 | test_did.append(100) 338 | else: # manual test procedure 339 | # test_did = [0, 3, 4, 7] 340 | # test_did = [99] 341 | test_did = [4,5,6,7,100] 342 | summary_str = '' 343 | # test_did = [10] 344 | print("CHECK LENGTH", args.model_dir, len(model_files)) 345 | for data_idx in test_did: 346 | # print('START', args.DATASET[data_idx]) 347 | if data_idx == 100: 348 | env = PMSSim(config_load=None, log=agentObj.record, opt_mix='geomsort',data_name=args.DATASET[args.did]) 349 | else: 350 | env = PMSSim(config_load=None, log=agentObj.record, opt_mix='geomsort', data_name=args.DATASET[data_idx]) 351 | elapsed_total = 0 352 | for model_file_name in model_files: 353 | if args.is_train is False: 354 | # model_saved_dir = os.path.join(os.curdir, 'results', args.key) 355 | # model_file_name = os.listdir(os.path.join(model_saved_dir, 'models'))[0] 356 | model_dir = '{}/{}/'.format(args.model_dir, model_file_name) # str((episode)*freq_save)) 357 | restore(sess, model_dir, saver) 358 | rslt.add_performance('models', 'DS{}_{}'.format(data_idx, str(model_file_name))) 359 | for config_load in config_list: 360 | episode += 1 361 | agentObj.SetEpisode(episode) 362 | ST_TIME = datetime.datetime.now() 363 | 364 | if args.env == 'pms': 365 | if type(config_load) == int: 366 | env.set_random_seed(config_load) 367 | else: 368 | env = PMSSim(config_load=config_load, log=agentObj.record) 369 | elif args.env == 'pkg': 370 | from utils.problemIO.problem_reader import ProblemReaderDB 371 | pr = ProblemReaderDB("problemSet_PCG_darts_bh") 372 | pi = pr.generateProblem(1, False) 373 | pi.twistInTarget(0.1, 0.1) 374 | # pi.setInTarget('-SDP_01 16000 22000 27000 -2MCP_01 27000 21000 12000 -3MCP_01 15000 6000 9000') # 148 375 | env = TDSim(pi, agentObj.record) 376 | env.reset() 377 | done = False 378 | observe = env.observe(args.oopt) 379 | # run experiment 380 | while not done: 381 | state, action, curr_time = agentObj.get_action(observe) 382 | act_vec = np.zeros([1, args.action_dim]) 383 | act_vec[0, action] = 1 384 | # interact with environment 385 | if args.bucket == 0 or (isinstance(env, TDSim) and 'learner_decision_' in DARTSPolicy): 386 | observe, reward, done = env.step(action) 387 | else: 388 | observe, reward, done = env.step_bucket(action) 389 | agentObj.remember_record(state, act_vec, reward, done) # test에서는 불필요 390 | 391 | elapsed_time = (datetime.datetime.now() - ST_TIME).total_seconds() 392 | elapsed_total += elapsed_time 393 | if isinstance(env, PMSSim): 394 | performance = get_performance(episode, agentObj, env, elapsed_time, True) 395 | else: 396 | performance = get_performance_pkg(episode, agentObj, env, elapsed_time) 397 | rslt.add_performance(column=config_load, value=performance[6]) 398 | agentObj.writeSummary() 399 | if True: agentObj.record.fileWrite(episode, 'viewer') 400 | print('average elapsed time: ', elapsed_total / len(config_list)) 401 | rslt.summary_performance() 402 | if best_model_idx is None: 403 | rslt.add_performance('models', 'DS{}_last10'.format(data_idx)) 404 | rslt.summary_performance_last10() 405 | # rslt.summary_performance_whole() 406 | # rslt.add_performance('models', 'DS{}_sample10'.format(data_idx)) 407 | # rslt.add_performance('models', 'DS{}_last10'.format(data_idx)) 408 | else: 409 | rslt.add_performance('models', 'DS{}_last10'.format(data_idx)) 410 | rslt.summary_performance_last10() 411 | print(rslt.KPIs['avg']) 412 | best_avg = rslt.KPIs['avg'][-2] 413 | last10_avg = rslt.KPIs['avg'][-1] 414 | summary_str += '{:.3f}|{:.3f},'.format(best_avg,last10_avg) 415 | # rslt_list.extend(rslt.KPIs['avg'][-2:]) 416 | # print('Final results print', rslt_list) 417 | 418 | rslt.write() 419 | rslt.KPIs.clear() 420 | total_time = datetime.datetime.now() - FIRST_ST_TIME 421 | # performances.writeSummary() 422 | print("Total elapsed time: {}\t hour: {} sec ".format(MAX_EPISODE, total_time)) 423 | sess.close() 424 | return summary_str 425 | 426 | def test_model_multiprocesser(tf_config, key=None): 427 | MAX_EPISODE = 1 428 | episode = 0 429 | args.is_train = False 430 | exp_idx = args.eid 431 | if key is None: key = args.key 432 | rslt = TestRecord(save_dir=args.summary_dir,filename='test_performance'+str(key)) 433 | with tf.Session(config=tf_config) as sess: 434 | FIRST_ST_TIME = datetime.datetime.now() 435 | 436 | print('Activate Neural network start ...') 437 | global_step = tf.Variable(0, trainable=False) 438 | lr = args.lr 439 | if 'upm' in args.oopt: 440 | agentObj = Trainer(sess, tf.train.GradientDescentOptimizer(lr), 441 | global_step=global_step, use_hist=False, exp_idx=exp_idx) 442 | elif 'fab' in args.oopt: 443 | agentObj = Trainer(sess, tf.train.AdamOptimizer(lr), 444 | global_step=global_step, use_hist=False, exp_idx=exp_idx) 445 | else: 446 | agentObj = Trainer(sess, tf.train.RMSPropOptimizer(lr, 0.99, 0.0, 1e-6), 447 | global_step=global_step, use_hist=False, exp_idx=exp_idx) 448 | sess.run(tf.global_variables_initializer()) 449 | 450 | # config_list = call_config_list() 451 | config_list = list(range(300, 330)) * 1 452 | # config_list = list(range(300, 330)) * 30 453 | # config_list=[300] 454 | # config_list.append(args.config_load) 455 | # config_list = [args.config_load]#[485] 456 | 457 | saver = tf.train.Saver(max_to_keep=args.max_episode) 458 | if args.is_train is False: 459 | freq_save = args.save_freq 460 | # model_saved_dir = args.save_dir + key 461 | # model_files = os.listdir(os.path.join(model_saved_dir, 'models')) 462 | model_files= os.listdir(args.model_dir) 463 | model_files.sort() 464 | length = len(model_files) 465 | # model_files = [model_files[k] for k in range(length) if (k+1) % (length/10) == 0 or k>=length-10] 466 | model_files = [model_files[k] for k in range(length) if k>=length-10] # last 10 selection 467 | # model_files = model_files[-1:] 468 | print("CHECK LENGTH", args.model_dir, len(model_files)) 469 | for data_idx in [4]: 470 | print('START', args.DATASET[data_idx]) 471 | env = PMSSim(config_load=None, log=agentObj.record, opt_mix='geomsort', data_name=args.DATASET[data_idx]) 472 | elapsed_total = 0 473 | for model_file_name in model_files: 474 | episode = 0 475 | if args.is_train is False: 476 | # model_saved_dir = os.path.join(os.curdir, 'results', args.key) 477 | # model_file_name = os.listdir(os.path.join(model_saved_dir, 'models'))[0] 478 | model_dir = '{}/{}/'.format(args.model_dir, model_file_name) # str((episode)*freq_save)) 479 | restore(sess, model_dir, saver) 480 | rslt.add_performance('models','DS{}_{}'.format(data_idx,str(model_file_name))) 481 | for config_load in config_list: 482 | agentObj.SetEpisode(episode) 483 | ST_TIME = datetime.datetime.now() 484 | 485 | if args.env == 'pms': 486 | if type(config_load) == int: 487 | env.set_random_seed(config_load) 488 | else: 489 | env = PMSSim(config_load=config_load, log=agentObj.record) 490 | elif args.env == 'pkg': 491 | from utils.problemIO.problem_reader import ProblemReaderDB 492 | pr = ProblemReaderDB("problemSet_PCG_darts_bh") 493 | pi = pr.generateProblem(1, False) 494 | pi.twistInTarget(0.1, 0.1) 495 | # pi.setInTarget('-SDP_01 16000 22000 27000 -2MCP_01 27000 21000 12000 -3MCP_01 15000 6000 9000') # 148 496 | env = TDSim(pi, agentObj.record) 497 | env.reset() 498 | done = False 499 | observe = env.observe(args.oopt) 500 | # run experiment 501 | while not done: 502 | state, action, curr_time = agentObj.get_action(observe) 503 | act_vec = np.zeros([1, args.action_dim]) 504 | act_vec[0, action] = 1 505 | # interact with environment 506 | 507 | if args.bucket == 0 or (isinstance(env, TDSim) and 'learner_decision_' in DARTSPolicy): 508 | observe, reward, done = env.step(action) 509 | else: 510 | observe, reward, done = env.step_bucket(action) 511 | agentObj.remember_record(state, act_vec, reward, done) # test에서는 불필요 512 | 513 | elapsed_time = (datetime.datetime.now() - ST_TIME).total_seconds() 514 | elapsed_total += elapsed_time 515 | if isinstance(env, PMSSim): 516 | performance = get_performance(episode, agentObj, env, elapsed_time, True) 517 | else: 518 | performance = get_performance_pkg(episode, agentObj, env, elapsed_time) 519 | # rslt.add_performance(column=episode//30*30+config_load, value=performance[6]) 520 | rslt.add_performance(column=episode, value=performance[6]) 521 | agentObj.writeSummary() 522 | if True: agentObj.record.fileWrite(episode, 'viewer') 523 | episode += 1 524 | print('average elapsed time: ', elapsed_total/len(config_list)) 525 | rslt.summary_performance() 526 | rslt.summary_performance_whole() 527 | rslt.add_performance('models', 'DS{}_sample10'.format(data_idx)) 528 | rslt.add_performance('models', 'DS{}_last10'.format(data_idx)) 529 | rslt.write() 530 | rslt.KPIs.clear() 531 | total_time = datetime.datetime.now() - FIRST_ST_TIME 532 | # performances.writeSummary() 533 | print("Total elapsed time: {}\t hour: {} sec ".format(MAX_EPISODE, total_time)) 534 | sess.close() 535 | def test_model_singleprocesser(idx: int, tf_config=None, config_load=None): 536 | rslt = TestRecord(save_dir=args.summary_dir, filename='test_performance' + str(idx)) 537 | with tf.Session(config=tf_config) as sess: 538 | episode = 0 539 | args.is_train=False 540 | FIRST_ST_TIME = datetime.datetime.now() 541 | print('Activate Neural network start ...') 542 | global_step = tf.Variable(0, trainable=False) 543 | agentObj = Trainer(sess, tf.train.RMSPropOptimizer(args.lr, 0.99, 0.0, 1e-6), global_step, use_hist=False) 544 | sess.run(tf.global_variables_initializer()) 545 | saver = tf.train.Saver(max_to_keep=args.max_episode) 546 | model_saved_dir = 'D:\BH-PAPER/2105_PKG\mixinf_d4f9_da_BLnoswap' # args.save_dir # args.save_dir + args.key 547 | if config_load is None: config_load = args.config_load 548 | 549 | model_files = os.listdir(os.path.join(model_saved_dir, 'models')) 550 | model_files.sort() 551 | model_files = model_files[-10:] 552 | for model_file_name in model_files: 553 | episode += 1 554 | if args.is_train is False: 555 | # model_saved_dir = os.path.join(os.curdir, 'results', args.key) 556 | # model_file_name = os.listdir(os.path.join(model_saved_dir, 'models'))[0] 557 | model_dir = '{}/models/{}/'.format(model_saved_dir, model_file_name) #str((episode)*freq_save)) 558 | restore(sess, model_dir, saver) 559 | rslt.add_performance('models','singleTDS_{}'.format(str(model_file_name))) 560 | if args.env == 'pms': 561 | env = PMSSim(config_load=config_load, log=agentObj.record) 562 | elif args.env == 'pkg': 563 | from utils.problemIO.problem_reader import ProblemReaderDB 564 | from main_pkg import set_problem 565 | pr = ProblemReaderDB("problemSet_darts_bh") 566 | pi = pr.generateProblem(8, False) 567 | set_problem(8, pi) 568 | # pi.twistInTarget(0.1, 0.1) 569 | # pi.setInTarget('-SDP_01 16000 22000 27000 -2MCP_01 27000 21000 12000 -3MCP_01 15000 6000 9000') # 148 570 | env = TDSim(pi, agentObj.record) 571 | agentObj.SetEpisode(episode) 572 | ST_TIME = datetime.datetime.now() 573 | 574 | env.reset() 575 | done = False 576 | observe = env.observe(args.oopt) 577 | # run experiment 578 | while not done: 579 | state, action, curr_time = agentObj.get_action(observe) 580 | act_vec = np.zeros([1, args.action_dim]) 581 | act_vec[0, action] = 1 582 | # interact with environment 583 | if args.bucket == 0: 584 | observe, reward, done = env.step(action) 585 | else: 586 | observe, reward, done = env.step_bucket(action) 587 | agentObj.remember(state,act_vec,observe, reward, done) # test에서는 불필요 588 | 589 | 590 | elapsed_time = (datetime.datetime.now() - ST_TIME).total_seconds() 591 | if isinstance(env, PMSSim): 592 | performance = get_performance(episode, agentObj, env, elapsed_time, True) 593 | else: 594 | performance = get_performance_pkg(episode, agentObj, env, elapsed_time) 595 | # L_avg = 0 596 | # if len(agentObj.loss_history) > 0: L_avg = np.mean(agentObj.loss_history) 597 | # kpi = agentObj.record.get_KPI() 598 | # util, cmax, total_setup_time, avg_satisfaction_rate = kpi.get_util(), kpi.get_makespan(), kpi.get_total_setup(), kpi.get_throughput_rate() 599 | # 600 | # performance_msg = 'Run: %07d(%d) / Util: %.5f / Reward: %5.2f / cQ : %5.2f(real:%5.2f) / Lot Choice: %d(Call %d) / Setup : %d / Setup Time : %.2f hour / Makespan : %s / Elapsed t: %5.2f sec / loss: %3.5f / Demand Rate : %.5f' % ( 601 | # episode, idx, util, agentObj.reward_total, agentObj.cumQ, agentObj.cumV_real, agentObj.getDecisionNum(), agentObj.trigger_cnt, 602 | # env.setup_cnt, total_setup_time, str(cmax), elapsed_time, L_avg, avg_satisfaction_rate) 603 | # print(performance_msg) 604 | agentObj.writeSummary() 605 | rslt.add_performance(column='cR', value=performance[3]) 606 | rslt.add_performance(column='int', value=sum(list(env.counters.cumulativeInTargetCompletion.values())) / 1000.0) 607 | if True: agentObj.record.fileWrite(episode, 'viewer') 608 | # perform_summary = tf.Summary() 609 | # perform_summary.value.add(simple_value=agentObj.reward_total, node_name="cR", tag="cR") 610 | # perform_summary.value.add(simple_value=agentObj.cumQ, node_name="cQ", tag="cQ") 611 | # perform_summary.value.add(simple_value=L_avg, node_name="L_episode", tag="L_episode") 612 | # if agentObj.getSummary() and args.is_train: 613 | # agentObj.getSummary().add_summary(perform_summary, episode) 614 | # agentObj.getSummary().flush() 615 | rslt.write() 616 | total_time = datetime.datetime.now() - FIRST_ST_TIME 617 | # performances.writeSummary() 618 | print("Total elapsed time: {}\t hour: {} sec ".format(len(model_files), total_time)) 619 | 620 | if __name__ == "__main__": 621 | # from tensorflow.python.client import device_lib 622 | # for x in device_lib.list_local_devices(): 623 | # print(x.name, x.device_type) 624 | """ 200229 625 | comment: I realized that my code had a call to an undocumented method (device_lib.list_local_devices)[ 626 | https://github.com/tensorflow/tensorflow/blob/d42facc3cc9611f0c9722c81551a7404a0bd3f6b/tensorflow/python/client/device_lib.py#L27] 627 | which was creating a default session. 628 | device_count{} opt still doesn't work 629 | 'GPU':0 has same effect with 'CUDA_V...':-1 630 | """ 631 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333, allow_growth=True) 632 | tf_config = tf.ConfigProto(device_count={'GPU': 0}, gpu_options=gpu_options) 633 | # with tf.device('/cpu:0'): 634 | # for i in range(args.repeat): 635 | if args.test_mode == 'single_logic': 636 | config_list = [] 637 | # config_list = call_config_list() 638 | config_list.append(args.config_load) 639 | for config in config_list: 640 | test_logic(config) 641 | elif args.test_mode == 'logic': 642 | test_logic_seed() 643 | elif args.test_mode == 'single': 644 | # for i in range(30): 645 | test_model_singleprocesser(1,tf_config) 646 | elif args.test_mode == 'multi_pool': 647 | # TypeError: can't pickle _thread.RLock objects (Maybe tensorflow doesn't support POOL) 648 | from functools import partial 649 | import multiprocessing as mp 650 | p = mp.Pool() 651 | with tf.Session(config=tf_config) as sess: 652 | aa = 1 653 | ab = sess 654 | func = partial(test_model_singleprocesser, aa, ab) 655 | p.map(func, ['o1_wall', 'o2_wall']) 656 | # test_model_multiprocesser(1,tf_config) 657 | p.close() 658 | p.join() 659 | sess.close() 660 | elif args.test_mode =='multi': 661 | # for key in os.listdir(args.save_dir): 662 | # test_model_multiprocesser(tf_config=tf_config, key=key) 663 | test_model_multiprocesser(tf_config=tf_config) 664 | elif args.test_mode == 'manual': 665 | test_procedure(tf_config) 666 | elif args.test_mode == 'ig': 667 | from IG import run_env, read_sequence 668 | import copy 669 | # ig_dir = args.gantt_dir 670 | ig_dir = 'D:\PythonSpace\TDSA/results\mixinf_sd5_default_ri\ig_dir' 671 | file_list = os.listdir(ig_dir) 672 | rslt = TestRecord(save_dir='./', filename='test_performance_ig') 673 | rslt.add_performance('ig','ig') 674 | for file in file_list: 675 | if 'csv' not in file: continue 676 | config_load = file.split('.')[0].split('_')[-1] 677 | print(config_load) 678 | test_sequence = read_sequence(os.path.join(ig_dir,file)) 679 | for iter in range(30): 680 | reward = run_env(config=str(int(config_load)+iter), sequence=copy.deepcopy(test_sequence)) 681 | print(reward) 682 | rslt.add_performance(int(config_load)+30*iter,reward) 683 | rslt.summary_performance() 684 | rslt.write() -------------------------------------------------------------------------------- /utils/core/Job.py: -------------------------------------------------------------------------------- 1 | class Job(object): 2 | def __init__(self, idx, type, pt, arrival, due = 0, late=100000): 3 | self.idx = idx 4 | self.type = type 5 | self.pt = pt 6 | self.due = due 7 | self.late = late 8 | self.arrival = arrival 9 | self.mac = -1 10 | self.st = 0 -------------------------------------------------------------------------------- /utils/core/__init__.py: -------------------------------------------------------------------------------- 1 | from .timeline import WallTime 2 | -------------------------------------------------------------------------------- /utils/core/timeline.py: -------------------------------------------------------------------------------- 1 | import heapq 2 | import itertools 3 | 4 | class WallTime(object): 5 | """ 6 | A global time object distributed to all workers 7 | """ 8 | def __init__(self, bucket_size, window_num=1): 9 | self.curr_time = 0.0 10 | self.curr_bucket = 0.0 11 | self.bucket_size = bucket_size 12 | self.window_size = bucket_size / window_num 13 | self.timestep = bucket_size 14 | 15 | def update(self, new_time): 16 | if self.bucket_size == 0: 17 | self.timestep = new_time - self.curr_time 18 | self.curr_time = new_time 19 | # if self.bucket_size!=0: 20 | # self.curr_bucket = int(self.curr_time / self.bucket_size) 21 | def get_now_bucket(self): return self.curr_bucket * self.bucket_size 22 | def get_next_bucket(self): return (self.curr_bucket+1)*self.bucket_size 23 | 24 | def update_bucket(self, terminal): 25 | assert self.bucket_size != 0 26 | self.curr_bucket += 1 27 | new_time = self.curr_bucket * self.bucket_size 28 | if new_time < self.curr_time: 29 | if not terminal: print("Warning: Bucket was skipped!!!!", new_time, self.curr_time) 30 | self.curr_bucket = int(self.curr_time / self.bucket_size) 31 | # self.curr_time = self.curr_bucket * self.bucket_size 32 | # print("Sett curr_time to ", self.curr_time) 33 | else: 34 | self.curr_time = new_time 35 | def update_window(self, plus=True): 36 | if plus: self.curr_time += self.window_size 37 | else: self.curr_time -= self.window_size 38 | 39 | def check_bucket(self, new_time): 40 | now_bucket = int(new_time / self.bucket_size) 41 | return True if now_bucket == self.curr_bucket else False 42 | 43 | def reset(self): 44 | self.curr_time = 0.0 45 | self.curr_bucket = 0.0 46 | 47 | class Timeline(object): 48 | def __init__(self): 49 | # priority queue 50 | self.pq = [] 51 | # tie breaker 52 | self.counter = itertools.count() 53 | 54 | def __len__(self): 55 | return len(self.pq) 56 | 57 | def peek(self): 58 | if len(self.pq) > 0: 59 | (key, counter, item) = self.pq[0] 60 | # print("peek", key, counter, item) 61 | return key, item 62 | else: 63 | return None, None 64 | 65 | def push(self, key, item): 66 | heapq.heappush(self.pq, 67 | (key, next(self.counter), item)) 68 | # print("push", key, self.counter, item) 69 | 70 | def pop(self): 71 | if len(self.pq) > 0: 72 | (key, counter, item) = heapq.heappop(self.pq) 73 | # print("pop", key, counter, item) 74 | return key, item 75 | else: 76 | return None, None 77 | def to_dict(self): 78 | dict = {} 79 | for i in range(len(self.pq)): 80 | info = self.pq[i] 81 | if info[2] is None: continue 82 | dict.update({info[0]:info[2]}) 83 | return dict 84 | 85 | def reset(self): 86 | self.pq = [] 87 | self.counter = itertools.count() 88 | -------------------------------------------------------------------------------- /utils/util.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import os, csv 3 | import numpy as np 4 | import pandas as pd 5 | 6 | 7 | def get_performance(episode, agentObj, env, elapsed_time, test=False): 8 | L_avg = 0 9 | if len(agentObj.loss_history) > 0: L_avg = np.mean(agentObj.loss_history) 10 | kpi = agentObj.record.get_KPI() 11 | util, cmax, total_setup_time, avg_satisfaction_rate = kpi.get_util(), kpi.get_makespan(), kpi.get_total_setup(), kpi.get_throughput_rate() 12 | cmax_str = '%02dd%dmin' % (cmax // (24 * 60 * 60), (cmax % (24 * 60 * 60)) // 60) 13 | 14 | tardiness_hour = -kpi.get_total_tardiness()/60.0 15 | 16 | flag = 'Test' if test else 'Run' 17 | performance_msg = '%s: %07d(%s) / Util: %.5f / cR: %5.2f / cQ : %5.2f(real:%5.2f) / Setup: %d(%.2f hour)' \ 18 | '/ cmax: %s / loss: %3.5f / Demand: %.5f(%.2f) / Elapsed t: %5.2fsec / Decision: %d, %dstep ' % ( 19 | flag, episode, env.config_load, util, agentObj.reward_total, agentObj.cumQ, agentObj.cumV_real, 20 | env.setup_cnt, total_setup_time/60, cmax_str, L_avg, avg_satisfaction_rate, tardiness_hour, 21 | elapsed_time, agentObj.getDecisionNum(), agentObj.trigger_cnt) 22 | print(performance_msg) 23 | performance = ['%07d' % episode, 24 | '%s' % env.config_load, 25 | '%.5f' % util, 26 | '%5.2f' % agentObj.reward_total, 27 | '%5.2f' % agentObj.cumQ, 28 | '%5.2f' % agentObj.cumV_real, 29 | '%5.2f' % env.get_mean_tardiness(tardiness_hour), 30 | '%d' % agentObj.getDecisionNum(), 31 | '%d' % env.setup_cnt, 32 | '%d' % int(total_setup_time / 3600), 33 | '%s' % str(cmax), 34 | '%5.2f' % elapsed_time, 35 | '%3.5f' % L_avg, 36 | '%.5f' % avg_satisfaction_rate] 37 | return performance 38 | def call_config_list_dir(dir_name): 39 | file_list=os.listdir('env/config/'+dir_name) 40 | config_list = list() 41 | for name in file_list: 42 | config_str = '_'.join(name.split('_')[:-1]) 43 | config_list.append(dir_name+'/'+config_str) 44 | return sorted(list(set(config_list))) 45 | def call_config_list(multiprocess=None, is_valid=False): 46 | if is_valid: 47 | wip_list = ['w4']#, 'wall', 'w2'] 48 | slice_type = ['o'] # ,'e'] 49 | slice_idx = list(range(1, 6)) 50 | else: 51 | wip_list = ['w4', 'wall', 'w2'] 52 | slice_type = ['oo'] # ,'e'] 53 | slice_idx = list(range(1, 16)) 54 | config_list = list() 55 | for config_wip in wip_list: 56 | config_sublist = list() 57 | for t in slice_type: 58 | for i in slice_idx: 59 | config_slice = '{}{}_{}'.format(t,i,config_wip) 60 | config_sublist.append(config_slice) 61 | if multiprocess is None: config_list.extend(config_sublist) 62 | else: config_list.append(config_sublist) 63 | 64 | return config_list 65 | 66 | def save(sess, save_dir, saver): 67 | """ 68 | Save all model parameters and replay memory to self.save_dir folder. 69 | The save_path should be models/env_name/name_of_agent. 70 | """ 71 | # path to the checkpoint name 72 | if not os.path.exists(save_dir): 73 | os.makedirs(save_dir) 74 | path = os.path.join(save_dir, "AkC") 75 | print("Saving the model to path %s" % path) 76 | # self.memory.save(self.save_dir) 77 | print(saver.save(sess, path)) 78 | print("Done saving!") 79 | 80 | 81 | def restore(sess, save_dir, saver): 82 | """ 83 | Restore model parameters and replay memory from self.save_dir folder. 84 | The name of the folder should be models/env_name 85 | """ 86 | # TODO: Need to find a better way to store memory data. Storing all states is not efficient. 87 | ckpts = tf.train.get_checkpoint_state(save_dir) 88 | if ckpts and ckpts.model_checkpoint_path: 89 | ckpt = ckpts.model_checkpoint_path 90 | saver.restore(sess, ckpt) 91 | # self.memory.restore(save_dir) 92 | print("Successfully load the model %s" % ckpt) 93 | # print("Memory size is:") 94 | # self.memory.size() 95 | else: 96 | print("Model Restore Failed %s" % save_dir) 97 | -------------------------------------------------------------------------------- /utils/visualize/logger.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict, OrderedDict 2 | from copy import copy 3 | import json 4 | import env.util_sim as util 5 | 6 | 7 | class log_decision(object): 8 | def __init__(self, idx, job_id, job_type, job_due, res_id, res_type, decision_time, time_setup, time_proc, reward 9 | ,action_vector=None, predicted_Q=None): 10 | self.idx =idx 11 | self.job_id=job_id 12 | self.job_type=job_type 13 | self.job_due=job_due 14 | self.res_id=res_id 15 | self.res_type=res_type 16 | self.decision_time=decision_time 17 | self.time_setup=time_setup 18 | self.time_proc=time_proc 19 | self.reward=reward 20 | 21 | class instance_log(object): 22 | def __init__(self, dir, filename): 23 | self.rts_info = list(dict()) 24 | self.rts_info_dict = defaultdict(dict) 25 | self.key_list = [] 26 | self.filepath = '{}/{}'.format(dir, str(filename)) 27 | self.decision_list = list() 28 | self.decision_list_recent = list() 29 | self.res_record_dict = defaultdict(list) 30 | self.job_record_dict = defaultdict(list) 31 | 32 | def __len__(self): return len(self.decision_list) 33 | 34 | def appendInfo(self, key, value): 35 | if key in self.rts_info_dict.keys(): 36 | if self.rts_info_dict[key] > value: #Do not replace the smaller value 37 | return 38 | else: 39 | self.key_list.append(key) 40 | self.rts_info_dict[key] = value 41 | 42 | def append_history(self, dec: log_decision): 43 | self.decision_list.append(dec) 44 | self.decision_list_recent.append(dec) 45 | self.res_record_dict[dec.res_id].append(dec) 46 | self.job_record_dict[dec.job_id].append(dec) 47 | #FIXME: calculate KPI here if necessary 48 | 49 | def get_res_history(self, idx): return self.res_record_dict[idx] 50 | def get_job_history(self, idx): return self.job_record_dict[idx] 51 | def saveInfo(self): 52 | self.rts_info.append(copy(self.rts_info_dict)) 53 | self.rts_info_dict.clear() 54 | self.decision_list_recent.clear() 55 | 56 | def clearInfo(self): 57 | self.rts_info.clear() 58 | self.key_list.clear() 59 | self.rts_info_dict.clear() 60 | self.decision_list.clear() 61 | self.decision_list_recent.clear() 62 | self.res_record_dict.clear() 63 | self.job_record_dict.clear() 64 | 65 | def get_timestamp(self, raw_time): 66 | result = raw_time * 1000 * 60/util.TimeUnit - 32400000 67 | return result 68 | 69 | def fileWrite(self, simul_iter, viewer_key=None): 70 | if len(self.rts_info) == 0: 71 | return 72 | f = open(self.filepath+'.csv', 'a') 73 | msg = "" 74 | keylist = sorted(set(self.key_list)) 75 | for row in range(len(self.rts_info)+1): 76 | for col in range(len(keylist)): 77 | if row == 0: 78 | msg += '%s,' % (keylist[col]) 79 | else: 80 | info = self.rts_info[row - 1][keylist[col]] 81 | if type(info) is str: 82 | msg += '%s,' % (info) 83 | elif type(info) is float: 84 | msg += '%f,' % (info) 85 | elif type(info) is int: 86 | msg += '%d,' % (info) 87 | elif type(info) is bool: 88 | msg += '%s,' % str(info) 89 | elif type(info) is list: 90 | float_flag=False 91 | for temp in info: 92 | if type(temp) == float: 93 | float_flag=True 94 | if float_flag: 95 | # msg += '[' 96 | # for temp in info: 97 | # msg += '(%3.3f) ' % temp 98 | # msg += ']' 99 | msg += '%s,' % str(info) 100 | else: 101 | msg += '[' 102 | for temp in info: 103 | msg += str(temp) + ' ' 104 | msg += ']' 105 | msg += ',' 106 | else: 107 | msg += '%s,' % str(info) #''not pre defined type,' 108 | msg += '\n' 109 | msg += '%s\n' % ('above result from iteration {}'.format(simul_iter)) 110 | f.write(msg) 111 | f.close() 112 | 113 | def get_KPI(self): 114 | rslt = KPI(self.__len__()) 115 | for decision_row in range(self.__len__()): 116 | decision_info = self.decision_list[decision_row] 117 | assert isinstance(decision_info, log_decision) 118 | res = str(decision_info.res_id) 119 | if float(decision_info.time_setup) > 0: 120 | st = decision_info.decision_time + decision_info.time_setup 121 | rslt.tst += decision_info.time_setup / util.TimeUnit 122 | else: 123 | st = decision_info.decision_time 124 | et = st + decision_info.time_proc 125 | if et > rslt.max_dict[res]: 126 | rslt.max_dict[res] = et 127 | rslt.tpt += decision_info.time_proc 128 | # print('process', res, decision_info['time_proc'], st, et) 129 | prod = decision_info.job_type 130 | if et <= decision_info.job_due: 131 | rslt.thr_dict[prod] += 1 132 | rslt.thr_dict_per_mach[res] += 1 133 | else: 134 | rslt.tard_dict[prod] += (et - decision_info.job_due) / util.TimeUnit 135 | 136 | rslt.calculate() 137 | return rslt 138 | 139 | class KPI(object): 140 | def __init__(self, decision_num): 141 | self.total = decision_num 142 | self.thr_dict = defaultdict(int) 143 | self.tard_dict = defaultdict(int) 144 | self.thr_dict_per_mach = defaultdict(int) 145 | self.max_dict = defaultdict(float) 146 | self.nst=0 147 | self.tst=0 #total setup time, min 148 | self.tpt=0 #total processing duration, min 149 | self.cmax=0 150 | self.util=0 151 | def calculate(self): 152 | max_list = list(self.max_dict.values()) 153 | self.cmax = max(max_list) 154 | self.util = self.tpt / sum(max_list) 155 | def get_total_setup(self): 156 | return self.tst 157 | def get_num_setup(self): 158 | return self.nst 159 | def get_makespan(self): 160 | return self.cmax 161 | def get_util(self): 162 | return self.util 163 | def get_throughput_rate(self): 164 | return sum(self.thr_dict.values()) / self.total 165 | def get_total_tardiness(self): 166 | return sum(self.tard_dict.values()) 167 | -------------------------------------------------------------------------------- /utils/visualize/viz_state.py: -------------------------------------------------------------------------------- 1 | import cv2, os 2 | import numpy as np 3 | from collections import defaultdict 4 | 5 | class VizState(object): 6 | def __init__(self, file_path, imshow=False, colormap=None): 7 | self.file_path = file_path 8 | self.epi = 0 9 | self.cnt = 0 10 | self.imshow = imshow 11 | if colormap is None: 12 | self.colormap = defaultdict(cv2.COLORMAP_TWILIGHT_SHIFTED) 13 | else: 14 | self.colormap = dict() 15 | colormap_list = [cv2.COLORMAP_SPRING, cv2.COLORMAP_AUTUMN, cv2.COLORMAP_WINTER, cv2.COLORMAP_COOL, 16 | cv2.COLORMAP_BONE, cv2.COLORMAP_MAGMA] 17 | for i in range(len(colormap)): 18 | temp_colormap = colormap_list[i%len(colormap_list)] 19 | idx_list = colormap[i] 20 | for j in idx_list: self.colormap[j]=temp_colormap 21 | 22 | def new_episode(self): 23 | self.epi += 1 24 | self.cnt = 0 25 | 26 | def viz_img_2d(self, state, prod=10): 27 | if type(state) is not np.ndarray: mat2d = np.array(state) 28 | if len(mat2d.shape)==1: mat2d = np.reshape(mat2d, (prod,-1)) 29 | elif len(mat2d.shape)==3: pass 30 | mat2d = np.array(mat2d * 255, dtype=np.uint8) 31 | h, w = mat2d.shape 32 | img = None 33 | for i in range(w): 34 | img_line = cv2.applyColorMap(mat2d[:,i], self.colormap[i]) 35 | if img is None: img = img_line 36 | else: img = np.concatenate((img, img_line),axis=1) 37 | img = cv2.resize(img, (w*10, h*10)) 38 | # img = cv2.resize(mat2d, (w * 10, h * 10)) 39 | if self.imshow: 40 | cv2.imshow('',img) 41 | cv2.waitKey(100) 42 | else: 43 | cv2.imwrite(os.path.join(self.file_path, 'epi{}_{}.png'.format(self.epi, self.cnt)), img) 44 | self.cnt+=1 45 | 46 | def viz_img_3d(self, state, col_num=5): 47 | if type(state) is not np.ndarray: state = np.array(state) 48 | h, w, c = state.shape 49 | line_space = 2 50 | full_w = (w+line_space)*col_num 51 | full_h = (h+line_space)*(c//col_num+1) 52 | canvas = np.ones((full_h, full_w), dtype=np.uint8) * 125 53 | for ch in range(c): 54 | top = (h+line_space) * (ch//col_num) 55 | left = (w+line_space) * (ch%col_num) 56 | mat2d = np.array(state[:,:,ch] * 255, dtype=np.uint8) 57 | canvas[top:top+h,left:left+w] = mat2d 58 | img = cv2.resize(canvas, (full_w*5, full_h*5)) 59 | if self.imshow: 60 | cv2.imshow('',img) 61 | cv2.waitKey(100) 62 | else: 63 | cv2.imwrite(os.path.join(self.file_path, 'epi{}_{}.png'.format(self.epi, self.cnt)), img) 64 | self.cnt+=1 65 | 66 | 67 | --------------------------------------------------------------------------------