├── DQN.py ├── Job_shop.py ├── Object_for_FJSP.py └── README.md /DQN.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" 4 | os.environ['TF_XLA_FLAGS'] = '--tf_xla_enable_xla_devices' 5 | import random 6 | from collections import deque 7 | from tensorflow.keras import layers,models 8 | from Job_shop import Situation 9 | from tensorflow.keras.optimizers import Adam 10 | 11 | 12 | class DQN: 13 | def __init__(self,Hid_Size1,Hid_Size2,Hid_Size3): 14 | self.Hid_Size1 = Hid_Size1 15 | self.Hid_Size2 = Hid_Size2 16 | self.Hid_Size3 = Hid_Size3 17 | 18 | # ------------Hidden layer=2 30 nodes each layer-------------- 19 | model = models.Sequential() 20 | model.add(layers.Input(shape=(5,))) 21 | model.add(layers.Dense(self.Hid_Size1, name='l1')) 22 | model.add(layers.Dense(self.Hid_Size1, name='l2')) 23 | # model.add(layers.Dense(self.Hid_Size1, name='l3')) 24 | # model.add(layers.Dense(self.Hid_Size1, name='l4')) 25 | # model.add(layers.Dense(self.Hid_Size1, name='l5')) 26 | model.add(layers.Dense(3, name='l6')) 27 | model.compile(loss='mse', 28 | optimizer=Adam(learning_rate=0.001)) 29 | # ------------Hidden layer=2 30 nodes each layer-------------- 30 | model1 = models.Sequential() 31 | model1.add(layers.Input(shape=(6,))) 32 | model1.add(layers.Dense(self.Hid_Size2, name='l1')) 33 | model1.add(layers.Dense(self.Hid_Size2, name='l2')) 34 | # model1.add(layers.Dense(self.Hid_Size2, name='l3')) 35 | # model1.add(layers.Dense(self.Hid_Size2, name='l4')) 36 | # model1.add(layers.Dense(self.Hid_Size2, name='l5')) 37 | model1.add(layers.Dense(5, name='l6')) 38 | model1.compile(loss='mse', 39 | optimizer=Adam(learning_rate=0.001)) 40 | # ------------Hidden layer=2 30 nodes each layer-------------- 41 | model2 = models.Sequential() 42 | model2.add(layers.Input(shape=(7,))) 43 | model2.add(layers.Dense(self.Hid_Size3, name='l1')) 44 | model2.add(layers.Dense(self.Hid_Size3, name='l2')) 45 | # model2.add(layers.Dense(self.Hid_Size3, name='l3')) 46 | # model2.add(layers.Dense(self.Hid_Size3, name='l4')) 47 | # model2.add(layers.Dense(self.Hid_Size3, name='l5')) 48 | model2.add(layers.Dense(3, name='l6')) 49 | model2.compile(loss='mse', 50 | optimizer=Adam(learning_rate=0.001)) 51 | # # model.summary() 52 | self.model = model 53 | self.model1 = model1 54 | self.model2 = model2 55 | 56 | #------------Q-network Parameters------------- 57 | self.gama = 0.95 # γ经验折损率 58 | # self.lr = 0.001 # 学习率 59 | self.global_step = 0 60 | self.update_target_steps = 200 # 更新目标网络的步长 61 | self.target_model = self.model 62 | self.target_model1 = self.model1 63 | self.target_model2 = self.model2 64 | 65 | # -------------------Agent------------------- 66 | self.e_greedy = 0.6 67 | self.e_greedy_decrement = 0.0001 68 | self.L = 20 # Number of training episodes L 69 | 70 | # ---------------Replay Buffer--------------- 71 | self.buffer = deque(maxlen=2000) 72 | self.Batch_size = 8 # Batch Size of Samples to perform gradient descent 73 | 74 | def replace_target(self): 75 | self.target_model.get_layer(name='l1').set_weights(self.model.get_layer(name='l1').get_weights()) 76 | self.target_model.get_layer(name='l2').set_weights(self.model.get_layer(name='l2').get_weights()) 77 | # self.target_model.get_layer(name='l3').set_weights(self.model.get_layer(name='l3').get_weights()) 78 | # self.target_model.get_layer(name='l4').set_weights(self.model.get_layer(name='l4').get_weights()) 79 | # self.target_model.get_layer(name='l5').set_weights(self.model.get_layer(name='l5').get_weights()) 80 | self.target_model.get_layer(name='l6').set_weights(self.model.get_layer(name='l6').get_weights()) 81 | 82 | self.target_model1.get_layer(name='l1').set_weights(self.model1.get_layer(name='l1').get_weights()) 83 | self.target_model1.get_layer(name='l2').set_weights(self.model1.get_layer(name='l2').get_weights()) 84 | # self.target_model1.get_layer(name='l3').set_weights(self.model1.get_layer(name='l3').get_weights()) 85 | # self.target_model1.get_layer(name='l4').set_weights(self.model1.get_layer(name='l4').get_weights()) 86 | # self.target_model1.get_layer(name='l5').set_weights(self.model1.get_layer(name='l5').get_weights()) 87 | self.target_model1.get_layer(name='l6').set_weights(self.model1.get_layer(name='l6').get_weights()) 88 | 89 | self.target_model2.get_layer(name='l1').set_weights(self.model2.get_layer(name='l1').get_weights()) 90 | self.target_model2.get_layer(name='l2').set_weights(self.model2.get_layer(name='l2').get_weights()) 91 | # self.target_model2.get_layer(name='l3').set_weights(self.model2.get_layer(name='l3').get_weights()) 92 | # self.target_model2.get_layer(name='l4').set_weights(self.model2.get_layer(name='l4').get_weights()) 93 | # self.target_model2.get_layer(name='l5').set_weights(self.model2.get_layer(name='l5').get_weights()) 94 | self.target_model2.get_layer(name='l6').set_weights(self.model2.get_layer(name='l6').get_weights()) 95 | 96 | def replay(self): 97 | if self.global_step % self.update_target_steps == 0: 98 | self.replace_target() 99 | # replay the history and train the model 100 | minibatch = random.sample(self.buffer, self.Batch_size) 101 | for state, action1, action2, reward, next_state, reward_id, done in minibatch: 102 | target = reward 103 | target1 = reward 104 | target2 = reward 105 | if not done: 106 | output = self.target_model.predict(next_state, verbose=0) 107 | k = np.max(output) 108 | target = (reward + self.gama * np.argmax(output)) 109 | next_state1 = np.expand_dims(np.append(next_state[0],k), 0) 110 | output1 = self.target_model1.predict(next_state1, verbose=0) 111 | k1 = np.max(output1) 112 | target1 = (reward + self.gama * np.argmax(output1)) 113 | next_state2 = np.expand_dims(np.append(next_state1[0],k1), 0) 114 | target2 = (reward + self.gama * np.argmax(self.target_model2.predict(next_state2, verbose=0))) 115 | target_f = self.model.predict(state, verbose=0) 116 | k = np.max(target_f) 117 | state1 = np.expand_dims(np.append(state[0],k),0) 118 | target_f1 = self.model1.predict(state1, verbose=0) 119 | k1 = np.max(target_f1) 120 | state2 = np.expand_dims(np.append(state1[0], k1), 0) 121 | target_f2 = self.model2.predict(state2, verbose=0) 122 | target_f[0][reward_id] = target 123 | target_f1[0][action1] = target1 124 | target_f2[0][action2] = target2 125 | self.model.fit(state, target_f, epochs=1, verbose=0) 126 | self.model1.fit(state1, target_f1, epochs=1, verbose=0) 127 | self.model2.fit(state2, target_f2, epochs=1, verbose=0) 128 | self.global_step += 1 129 | 130 | def Select_action(self,obs): 131 | # obs=np.expand_dims(obs,0) 132 | if random.random()>', '奖励规则:', rt, '执行作业规则:',act1, ',设备规则', act2,' ','将工件',at_trans[0],'安排到机器',at_trans[1]) 256 | Sit.scheduling(at_trans) 257 | obs_t=Sit.Features() 258 | TR_a_t1=Sit.TR_a 259 | TR_e_t1=Sit.TR_e 260 | if j==O_num-1: 261 | done=True 262 | #obs = obs_t 263 | obs_t = np.expand_dims(obs_t, 0) 264 | # obs = np.expand_dims(obs, 0) 265 | # print(obs,obs_t) 266 | Job = Sit.Jobs 267 | for Ji in range(len(Job)): 268 | if len(Job[Ji].End) > 0: 269 | endTime = max(Job[Ji].End) 270 | else: 271 | endTime = 0 272 | makespan_t1 = max(makespan_t1, endTime) 273 | if 0 == rt: 274 | #r_t = Sit.reward1(obs[0][9], obs[0][7], obs_t[0][9], obs_t[0][7]) 275 | r_t = Sit.reward1(TR_a_t,TR_e_t,TR_a_t1,TR_e_t1) 276 | elif 1 == rt: 277 | r_t = Sit.reward2(obs[0][0], obs_t[0][0]) 278 | else: 279 | # r_t = Sit.reward3(obs[0][7], obs_t[0][7]) 280 | r_t = Sit.reward3(makespan_t, makespan_t1) 281 | makespan_t = makespan_t1 282 | TR_a_t = TR_a_t1 283 | TR_e_t = TR_e_t1 284 | #-------------------- 285 | # if l == self.L - 1: 286 | # total_tadiness = 0 287 | # if len(Job[Ji].End) > 0: 288 | # for Ji in range(len(Job)): 289 | # if max(Job[Ji].End) > D[Ji]: 290 | # total_tadiness += abs(max(Job[Ji].End) - D[Ji]) 291 | # T_d = total_tadiness 292 | # U_k = sum(Sit.UK)/M_num 293 | # M_s = makespan_t 294 | # file.write(str(obs)) 295 | # file.write("," + str(T_d) + "," + str(U_k) + "," + str(M_s) + "," + str(rt) + "," + str(r_t)) 296 | # file.write("\n") 297 | # file.flush() 298 | # if done == True: 299 | # file.close() 300 | #-------------------- 301 | self._append((obs, act1, act2, r_t, obs_t, rt, done)) 302 | if k>self.Batch_size: 303 | # batch_obs, batch_action, batch_reward, batch_next_obs,done= self.sample() 304 | self.replay() 305 | Total_reward+=r_t 306 | obs=obs_t 307 | total_tadiness=0 308 | makespan=makespan_t 309 | uk_ave=sum(Sit.UK)/M_num 310 | Job=Sit.Jobs 311 | # E=0 312 | # K=[i for i in range(len(Job))] 313 | for Ji in range(len(Job)): 314 | if max(Job[Ji].End)>D[Ji]: 315 | total_tadiness+=abs(max(Job[Ji].End)-D[Ji]) 316 | print('<<<<<<<<<-----------------total_tardiness:',total_tadiness,'------------------->>>>>>>>>>') 317 | Total_tard.append(total_tadiness) 318 | print('<<<<<<<<<-----------------uk_ave:', uk_ave, '------------------->>>>>>>>>>') 319 | Total_uk_ave.append(uk_ave) 320 | print('<<<<<<<<<-----------------makespan:', makespan, '------------------->>>>>>>>>>') 321 | Total_makespan.append(makespan) 322 | print('<<<<<<<<<-----------------reward:',Total_reward,'------------------->>>>>>>>>>') 323 | TR.append(Total_reward) 324 | # plt.plot(K,End,color='y') 325 | # plt.plot(K,D,color='r') 326 | # plt.show() 327 | # plt.plot(x,Total_tard) 328 | # plt.show() 329 | return Total_tard,Total_uk_ave,Total_makespan 330 | 331 | def Instance_Generator(self,M_num, E_ave, New_insert): 332 | ''' 333 | :param M_num: Machine Number 334 | :param Initial_job: initial job number 335 | :param E_ave 336 | :return: Processing time,A:New Job arrive time, 337 | D:Deliver time, 338 | M_num: Machine Number, 339 | Op_num: Operation Number, 340 | J_num:Job NUMBER 341 | ''' 342 | E_ave = E_ave 343 | Initial_Job_num = 5 344 | Op_num = [random.randint(1, 20) for i in range(New_insert + Initial_Job_num)] 345 | Processing_time = [] 346 | for i in range(Initial_Job_num + New_insert): 347 | Job_i = [] 348 | for j in range(Op_num[i]): 349 | k = random.randint(1, M_num - 2) 350 | T = list(range(M_num)) 351 | random.shuffle(T) 352 | T = T[0:k + 1] 353 | O_i = list(np.ones(M_num) * (-1)) 354 | for M_i in range(len(O_i)): 355 | if M_i in T: 356 | O_i[M_i] = random.randint(1, 50) 357 | Job_i.append(O_i) 358 | Processing_time.append(Job_i) 359 | A1 = [0 for i in range(Initial_Job_num)] 360 | A = np.random.exponential(E_ave, size=New_insert) 361 | A = [int(A[i]) for i in range(len(A))] # New Insert Job arrive time 362 | A1.extend(A) 363 | T_ijave = [] 364 | for i in range(Initial_Job_num + New_insert): 365 | Tad = [] 366 | for j in range(Op_num[i]): 367 | T_ijk = [k for k in Processing_time[i][j] if k != -1] 368 | Tad.append(sum(T_ijk) / len(T_ijk)) 369 | T_ijave.append(sum(Tad)) 370 | #random.choice([0.5, 1.0, 1.5]) 371 | D1 = [int(T_ijave[i] * random.choice([0.5, 1.0, 1.5])) for i in range(Initial_Job_num)] 372 | D = [int(A1[i] + T_ijave[i] * random.choice([0.5, 1.0, 1.5])) for i in range(Initial_Job_num, Initial_Job_num + New_insert)] 373 | D1.extend(D) 374 | # ?DDT=0.5 375 | O_num = sum(Op_num) 376 | J = dict(enumerate(Op_num)) 377 | J_num = Initial_Job_num + New_insert 378 | 379 | # 每台设备切换时间 380 | Change_cutter_time = list(np.zeros(M_num)) 381 | # 每台设备损坏后维修时间 382 | Repair_time = list(np.zeros(M_num)) 383 | for i in range(M_num): 384 | Change_cutter_time[i] = random.randint(1, 20) 385 | Repair_time[i] = random.randint(1, 99) 386 | 387 | return Processing_time, A1, D1, M_num, Op_num, J, O_num, J_num, Change_cutter_time, Repair_time 388 | 389 | 390 | Total_Machine=[10, 20,30] #全部机器 391 | Job_insert=[20,30,40] #工件新到达个数 392 | #Initial_job=[20,30,40] #初始化任务数 393 | DDT=[0.5,1.0,1.5] #工件紧急程度 394 | E_ave=[50,100,200] #指数分布 395 | 396 | 397 | def train(e_ave, machine, job_insert): 398 | curr_dir = os.getcwd() 399 | file = open(curr_dir + '\\LS\\' + str(e_ave) + str(machine) + str(job_insert)+ 'result.txt', 'w+', encoding='utf-8') 400 | d = DQN(30, 30, 30) 401 | Processing_time, A, D, M_num, Op_num, J, O_num, J_num, Change_cutter_time, Repair_time = d.Instance_Generator(machine, e_ave, job_insert) 402 | Total_tard, Total_uk_ave, Total_makespan = d.main(J_num, M_num, O_num, J, Processing_time, D, A, Change_cutter_time, Repair_time) 403 | 404 | tard_ave = sum(Total_tard) / d.L 405 | uk_ave = sum(Total_uk_ave) / d.L 406 | makespan_ave = sum(Total_makespan) / d.L 407 | std1 = 0 408 | std2 = 0 409 | std3 = 0 410 | for ta in Total_tard: 411 | std1 += np.square(ta - tard_ave) 412 | for ua in Total_uk_ave: 413 | std2 += np.square(ua - uk_ave) 414 | for ma in Total_makespan: 415 | std3 += np.square(ma - makespan_ave) 416 | # 标准差 417 | std1 = np.sqrt(std1 / d.L) 418 | std2 = np.sqrt(std2 / d.L) 419 | std3 = np.sqrt(std3 / d.L) 420 | 421 | file.write(str("{:.2e}".format(tard_ave)) + "/" + str("{:.2e}".format(std1)) + "," + str( 422 | "{:.2e}".format(uk_ave)) + "/" + str("{:.2e}".format(std2)) + "," + str( 423 | "{:.2e}".format(makespan_ave)) + "/" + str("{:.2e}".format(std3))) 424 | file.write("\n") 425 | file.flush() 426 | file.close() 427 | 428 | # def call_back(v): 429 | # print('----> callback pid:', os.getpid(),',tid:',threading.currentThread().ident,',v:',v) 430 | 431 | 432 | if __name__ == '__main__': 433 | Total_Machine = [10, 20, 30] # 全部机器 434 | Job_insert = [20,30,40] # 工件新到达个数 435 | E_ave = [50, 100, 200] # 指数分布 436 | 437 | for e_ave in E_ave: 438 | for machine in Total_Machine: 439 | for job_insert in Job_insert: 440 | train(e_ave, machine, job_insert) 441 | # pool = multiprocessing.Pool(27) 442 | # results = [pool.apply_async(train, args=(e_ave, machine,job_insert), callback=call_back) for e_ave in E_ave for machine in Total_Machine for job_insert in Job_insert] 443 | # pool.close() 444 | # pool.join() 445 | -------------------------------------------------------------------------------- /Job_shop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kpzhang/DTPDQN/bf13970f74e06d1270d230c42b962c4b95d7b300/Job_shop.py -------------------------------------------------------------------------------- /Object_for_FJSP.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kpzhang/DTPDQN/bf13970f74e06d1270d230c42b962c4b95d7b300/Object_for_FJSP.py -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Dynamic Three-phase Parallel Deep Q-Network (DTPDQN) 2 | 3 | ## Dynamic Multi-Objective Flexible Job Shop Scheduling: A Three-Phase Deep Reinforcement Learning Approach 4 | 5 | 6 | This repo has three python files: DQN.py, Job_shop.py, and Object_for_FJSP.py. The following is a brief explanation about how the code is linked to our work. 7 | 8 | - DQN.py is the main running code. In this file, we mainly construct the dynamic multi-objective double deep Q-learning network (DTPDQN), and provide the instance generation algorithm (the function `Instance_Generator` in code). The uncertain events, including Job Insertion, Job Delivery Deadline Modification, Job Cancellation, Job Operation Modification, and Machine Addition are reflected in this file. And the three optimization objectives are also calculated in this file (the function `main` in code). 9 | 10 | - Job_shop.py contains feature calculation function (the function `Features` in code) to extract five states features as the input of DTPDQN. Five job dispatching rules (the function `job_rule1--job_rule5` in code), three machine dispatching rules (the function `machine_rule1--machine_rule5` in code) and three reward functions (the functions `reward1`, `reward2`, `reward3` in code) are provided in this file. It also has a scheduling function (the function `scheduling` in code) to assign jobs and machines. Where the other two uncertain events of Machine Breakdown and Machine Switching are embedded in this function scheduling. 11 | 12 | - Object_for_FJSP.py is the class to storage the scheduling information of scheduled jobs and machines. And it also provides a local search algorithm (the function `idle_time` in code) to further optimize the result of DTPDQN (LSDTPDQN). 13 | 14 | 15 | ## [Requirement] 16 | - Python >= 3.x 17 | - Numpy https://numpy.org/ 18 | - Keras https://keras.io/ 19 | --------------------------------------------------------------------------------