├── DQN.py ├── Instance_Generator.py ├── Job_Shop.py ├── Object_for_FJSP.py └── README.md /DQN.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | os.environ["CUDA_VISIBLE_DEVICES"] = "-1" 4 | import random 5 | from collections import deque 6 | from tensorflow.keras import layers,models 7 | import tensorflow as tf 8 | from Job_Shop import Situation 9 | from tensorflow.keras.optimizers import Adam 10 | from Instance_Generator import Processing_time,A,D,M_num,Op_num,J,O_num,J_num 11 | import matplotlib.pyplot as plt 12 | 13 | 14 | ''' 15 | 回放Memory处应该有点问题。需要改一下。 16 | 17 | ''' 18 | 19 | 20 | class DQN: 21 | def __init__(self,): 22 | self.Hid_Size = 30 23 | 24 | # ------------Hidden layer=5 30 nodes each layer-------------- 25 | model = models.Sequential() 26 | model.add(layers.Input(shape=(7,))) 27 | model.add(layers.Dense(self.Hid_Size, name='l1')) 28 | model.add(layers.Dense(self.Hid_Size, name='l2')) 29 | model.add(layers.Dense(self.Hid_Size, name='l3')) 30 | model.add(layers.Dense(self.Hid_Size, name='l4')) 31 | model.add(layers.Dense(self.Hid_Size, name='l5')) 32 | model.add(layers.Dense(6, name='l6')) 33 | model.compile(loss='mse', 34 | optimizer=Adam(learning_rate=0.001)) 35 | # # model.summary() 36 | self.model = model 37 | 38 | #------------Q-network Parameters------------- 39 | self.act_dim=[1,2,3,4,5,6] #神经网络的输出节点 40 | self.obs_n=[0,0,0,0,0,0,0] #神经网路的输入节点 41 | self.gama = 0.95 # γ经验折损率 42 | # self.lr = 0.001 # 学习率 43 | self.global_step = 0 44 | self.update_target_steps = 200 # 更新目标函数的步长 45 | self.target_model = self.model 46 | 47 | #-------------------Agent------------------- 48 | self.e_greedy=0.6 49 | self.e_greedy_decrement=0.0001 50 | self.L=40 #Number of training episodes L 51 | 52 | #---------------Replay Buffer--------------- 53 | self.buffer=deque(maxlen=2000) 54 | self.Batch_size=10 # Batch Size of Samples to perform gradient descent 55 | 56 | def replace_target(self): 57 | self.target_model.get_layer(name='l1').set_weights(self.model.get_layer(name='l1').get_weights()) 58 | self.target_model.get_layer(name='l2').set_weights(self.model.get_layer(name='l2').get_weights()) 59 | self.target_model.get_layer(name='l3').set_weights(self.model.get_layer(name='l3').get_weights()) 60 | self.target_model.get_layer(name='l4').set_weights(self.model.get_layer(name='l4').get_weights()) 61 | self.target_model.get_layer(name='l5').set_weights(self.model.get_layer(name='l5').get_weights()) 62 | self.target_model.get_layer(name='l6').set_weights(self.model.get_layer(name='l6').get_weights()) 63 | 64 | def replay(self): 65 | if self.global_step % self.update_target_steps == 0: 66 | self.replace_target() 67 | # replay the history and train the model 68 | minibatch = random.sample(self.buffer, self.Batch_size) 69 | for state, action, reward, next_state, done in minibatch: 70 | target = reward 71 | if not done: 72 | k=self.target_model.predict(next_state) 73 | target = (reward + self.gama * 74 | np.argmax(self.target_model.predict(next_state))) 75 | target_f = self.model.predict(state) 76 | target_f[0][action] = target 77 | self.model.fit(state, target_f, epochs=1, verbose=0) 78 | self.global_step += 1 79 | 80 | def Select_action(self,obs): 81 | # obs=np.expand_dims(obs,0) 82 | if random.random()>','执行action:',at,' ','将工件',at_trans[0],'安排到机器',at_trans[1]) 125 | Sit.scheduling(at_trans) 126 | obs_t=Sit.Features() 127 | if i==O_num-1: 128 | done=True 129 | #obs = obs_t 130 | obs_t = np.expand_dims(obs_t, 0) 131 | # obs = np.expand_dims(obs, 0) 132 | # print(obs,obs_t) 133 | r_t = Sit.reward(obs[0][6],obs[0][5],obs_t[0][6],obs_t[0][5],obs[0][0],obs_t[0][0]) 134 | self._append((obs,at,r_t,obs_t,done)) 135 | if k>self.Batch_size: 136 | # batch_obs, batch_action, batch_reward, batch_next_obs,done= self.sample() 137 | self.replay() 138 | Total_reward+=r_t 139 | obs=obs_t 140 | total_tadiness=0 141 | Job=Sit.Jobs 142 | E=0 143 | K=[i for i in range(len(Job))] 144 | End=[] 145 | for Ji in range(len(Job)): 146 | End.append(max(Job[Ji].End)) 147 | if max(Job[Ji].End)>D[Ji]: 148 | total_tadiness+=abs(max(Job[Ji].End)-D[Ji]) 149 | print('<<<<<<<<<-----------------total_tardiness:',total_tadiness,'------------------->>>>>>>>>>') 150 | Total_tard.append(total_tadiness) 151 | print('<<<<<<<<<-----------------reward:',Total_reward,'------------------->>>>>>>>>>') 152 | TR.append(Total_reward) 153 | plt.plot(K,End,color='y') 154 | plt.plot(K,D,color='r') 155 | plt.show() 156 | plt.plot(x,Total_tard) 157 | plt.show() 158 | return Total_reward 159 | 160 | 161 | d=DQN() 162 | d.main(J_num, M_num, O_num, J, Processing_time, D, A) 163 | 164 | 165 | -------------------------------------------------------------------------------- /Instance_Generator.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | 4 | Total_Machine=[10,20,30,40,50] #全部机器 5 | Initial_Job_num=20 #初始工件个数 6 | Job_insert=[50,100,200] #工件新到达个数 7 | DDT=[0.5,1.0,1.5] #工件紧急程度 8 | E_ave=[50,100,200] #指数分布 9 | 10 | def Instance_Generator(M_num,E_ave,New_insert,DDT): 11 | ''' 12 | :param M_num: Machine Number 13 | :param E_ave: exponetional distribution 14 | :param New_insert: New Job insert 15 | :param DDT:DDT 16 | :return: Processing time,A:New Job arrive time, 17 | D:Deliver time, 18 | M_num: Machine Number, 19 | Op_num: Operation Number, 20 | J_num:Job NUMBER 21 | ''' 22 | Initial_Job_num=5 23 | Op_num=[random.randint(1,5) for i in range(New_insert+Initial_Job_num)] 24 | Processing_time=[] 25 | for i in range(Initial_Job_num+New_insert): 26 | Job_i=[] 27 | for j in range(Op_num[i]): 28 | k=random.randint(1,M_num-2) 29 | T=list(range(M_num)) 30 | random.shuffle(T) 31 | T=T[0:k+1] 32 | O_i = list(np.ones(M_num) * (-1)) 33 | for M_i in range(len(O_i)): 34 | if M_i in T: 35 | O_i[M_i]=random.randint(1,50) 36 | Job_i.append(O_i) 37 | Processing_time.append(Job_i) 38 | A1=[0 for i in range(Initial_Job_num)] 39 | A=np.random.exponential(E_ave, size=New_insert) 40 | A=[int(A[i]) for i in range(len(A))]#New Insert Job arrive time 41 | A1.extend(A) 42 | T_ijave = [] 43 | for i in range(Initial_Job_num+New_insert): 44 | Tad = [] 45 | for j in range(Op_num[i]): 46 | T_ijk = [k for k in Processing_time[i][j] if k != -1] 47 | Tad.append(sum(T_ijk) / len(T_ijk)) 48 | T_ijave.append(sum(Tad)) 49 | D1=[int(T_ijave[i]*DDT) for i in range(Initial_Job_num)] 50 | D=[int(A1[i]+T_ijave[i]*DDT) for i in range(Initial_Job_num,Initial_Job_num+New_insert)] 51 | D1.extend(D) 52 | O_num=sum(Op_num) 53 | J=dict(enumerate(Op_num)) 54 | J_num=Initial_Job_num+New_insert 55 | 56 | return Processing_time,A1,D1,M_num,Op_num,J,O_num,J_num 57 | 58 | Processing_time,A,D,M_num,Op_num,J,O_num,J_num=Instance_Generator(10,50,10,0.5) 59 | print(Processing_time,A,D,M_num,Op_num,J,O_num,J_num) 60 | 61 | 62 | -------------------------------------------------------------------------------- /Job_Shop.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | from Instance_Generator import Processing_time,A,D,M_num,Op_num,J,O_num,J_num 4 | from Object_for_FJSP import Object 5 | 6 | class Situation: 7 | def __init__(self,J_num,M_num,O_num,J,Processing_time,D,Ai): 8 | self.Ai=Ai #工件到达时间 9 | self.D=D #交货期 10 | self.O_num=O_num #工序总数 11 | self.M_num=M_num #机器数 12 | self.J_num=J_num #工件数 13 | self.J=J #工件对应的工序数 14 | self.Processing_time = Processing_time # 加工时间 15 | self.CTK=[0 for i in range(M_num)] #各机器上最后一道工序的完工时间列表 16 | self.OP=[0 for i in range(J_num)] #各工件的已加工工序数列表 17 | self.UK=[0 for i in range(M_num)] #各机器的实际使用率 18 | self.CRJ=[0 for i in range(J_num)] #工件完工率 19 | # 工件集: 20 | self.Jobs=[] 21 | for i in range(J_num): 22 | F=Object(i) 23 | self.Jobs.append(F) 24 | #机器集 25 | self.Machines = [] 26 | for i in range(M_num): 27 | F = Object(i) 28 | self.Machines.append(F) 29 | 30 | #更新数据 31 | def _Update(self,Job,Machine): 32 | self.CTK[Machine]=max(self.Machines[Machine].End) 33 | self.OP[Job]+=1 34 | self.UK[Machine]=sum(self.Machines[Machine].T)/self.CTK[Machine] 35 | self.CRJ[Job]=self.OP[Job]/self.J[Job] 36 | 37 | #机器平均使用率 38 | def Features(self): 39 | 40 | #1 机器平均利用率 41 | U_ave=sum(self.UK)/self.M_num 42 | K=0 43 | for uk in self.UK: 44 | K+=np.square(uk-U_ave) 45 | #2 机器的使用率标准差 46 | U_std=np.sqrt(K/self.M_num) 47 | #3 平均工序完成率 48 | CRO_ave=sum(self.OP)/self.O_num 49 | #4 平均工件工序完成率 50 | CRJ_ave=sum(self.CRJ)/self.J_num 51 | K = 0 52 | for uk in self.CRJ: 53 | K += np.square(uk - CRJ_ave) 54 | #5 工件工序完成率标准差 55 | CRJ_std=np.sqrt(K/self.J_num) 56 | #6 Estimated tardiness rate Tard_e 57 | T_cur=sum(self.CTK)/self.M_num 58 | N_tard,N_left=0,0 59 | for i in range(self.J_num): 60 | if J[i]>self.OP[i]: 61 | N_left+=self.J[i]-self.OP[i] 62 | T_left=0 63 | for j in range(self.OP[i]+1,J[i]): 64 | M_ij=[k for k in self.Processing_time[i][j] if k>0 or k<999] 65 | T_left+=sum(M_ij)/len(M_ij) 66 | if T_left+T_cur>self.D[i]: 67 | N_tard+=self.J[i]-j+1 68 | try: 69 | Tard_e=N_tard/N_left 70 | except: 71 | Tard_e =9999 72 | #7 Actual tardiness rate Tard_a 73 | N_tard, N_left = 0, 0 74 | for i in range(self.J_num): 75 | if J[i] > self.OP[i]: 76 | N_left += self.J[i] - self.OP[i] 77 | try: 78 | if self.CTK[i] > self.D[i]: 79 | N_tard += self.J[i] - j 80 | except: 81 | pass 82 | try: 83 | Tard_a = N_tard / N_left 84 | except: 85 | Tard_a =9999 86 | return U_ave,U_std,CRO_ave,CRJ_ave,CRJ_std,Tard_e,Tard_a 87 | 88 | #Composite dispatching rule 1 89 | #return Job,Machine 90 | def rule1(self): 91 | #T_cur:平均完工时间 92 | T_cur = sum(self.CTK) / self.M_num 93 | #Tard_Job:不能按期完成的工件 94 | Tard_Job=[i for i in range(self.J_num) if self.OP[i]PT: 306 | if Idle[i][0]>last_ot: 307 | start_time=Idle[i][0] 308 | pass 309 | if Idle[i][0]PT: 310 | start_time=last_ot 311 | pass 312 | end_time=Start_time+PT 313 | self.Machines[Machine]._add(Start_time,end_time,Job,PT) 314 | self.Jobs[Job]._add(Start_time,end_time,Machine,PT) 315 | self._Update(Job,Machine) 316 | 317 | def reward(self,Ta_t,Te_t,Ta_t1,Te_t1,U_t,U_t1): 318 | ''' 319 | :param Ta_t: Tard_a(t) 320 | :param Te_t: Tard_e(t) 321 | :param Ta_t1: Tard_a(t+1) 322 | :param Te_t1: Tard_e(t+1) 323 | :param U_t: U_ave(t) 324 | :param U_t1: U_ave(t+1) 325 | :return: reward 326 | ''' 327 | if Ta_t1Ta_t: 331 | rt=-1 332 | else: 333 | if Te_t1Te_t: 337 | rt=1 338 | else: 339 | if U_t1>U_t: 340 | rt=1 341 | else: 342 | if U_t1>0.95*U_t: 343 | rt=0 344 | else: 345 | rt=-1 346 | return rt 347 | 348 | Sit=Situation(J_num,M_num,O_num,J,Processing_time,D,A) 349 | -------------------------------------------------------------------------------- /Object_for_FJSP.py: -------------------------------------------------------------------------------- 1 | class Object: 2 | def __init__(self,I): 3 | self.I=I 4 | self.Start=[] 5 | self.End=[] 6 | self.T=[] 7 | self.assign_for=[] 8 | 9 | def _add(self,S,E,obs,t): 10 | #obs:安排的对象 11 | self.Start.append(S) 12 | self.End.append(E) 13 | self.Start.sort() 14 | self.End.sort() 15 | self.T.append(t) 16 | self.assign_for.insert(self.End.index(E),obs) 17 | 18 | def idle_time(self): 19 | Idle=[] 20 | try: 21 | if self.Start[0]!=0: 22 | Idle.append([0,self.Start[0]]) 23 | K=[[self.End[i],self.Start[i+1]] for i in range(len(self.End)) if self.Start[i+1]-self.End[i]>0] 24 | Idle.extend(K) 25 | except: 26 | pass 27 | return Idle 28 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DRL_to_DFJSP 2 | this repository is used to reappear thesis《Dynamic scheduling for flexible job shop with new job insertions by deep reinforcement learning》 3 | --------------------------------------------------------------------------------