├── README.md └── new ├── A2C ├── A2C_fact.py ├── ActorCritic.py ├── __pycache__ │ ├── ActorCritic.cpython-37.pyc │ └── factory_sim.cpython-37.pyc ├── actor.h5 ├── critic.h5 ├── factory_sim.py ├── rollout_A2C.py └── training_graph.png ├── DQN ├── DQN_fact.py ├── DQN_model.h5 ├── DeepQNet.py ├── __pycache__ │ ├── DeepQNet.cpython-37.pyc │ └── factory_sim.cpython-37.pyc ├── factory_sim.py ├── results.txt └── rollout_DQN.py ├── V-PG ├── PG_Class.py ├── PG_fact.py ├── PG_model.h5 ├── __pycache__ │ ├── PG_Class.cpython-37.pyc │ └── factory_sim.cpython-37.pyc ├── factory_sim.py └── rollout_PG.py └── critical_Ratio ├── __pycache__ └── factory_sim.cpython-37.pyc ├── critical_ratio.py └── factory_sim.py /README.md: -------------------------------------------------------------------------------- 1 | # Deep Reinforcement Learning for Smart Factory Optimization 2 | 3 | In this work we describe an approach to using reinforcement learning techniques to optimize manufacturing processes. As a case study, the manufacturing system at the Western Digital Corporation facility in San Jose is used to model the factory system examined here. By first building a simulation of the factory system and then applying reinforcement learning techniques to it, reinforcement algorithms involving q-learning, Deep Q-Networks, Policy Gradients and Policy Gradient Search were developed and implemented on the simulation. Results for these methods are compared. 4 | 5 | ## Simulation 6 | Reinforcement learning algorithms are trained through experience by interacting with an environment and updating the policy in response to reward signals. However it is not feasible to do this training by on the actual factory system so a simulated factory environment was created for the Reinforcement learning algorithms to train on. This simulated environment was created in Python using the simulation package SimPy. 7 | 8 | In the simulated environment a python object is maintained for each machine and each cassette of wafers in the factory. The machine objects have methods which correspond to processing wafers on that machine. The machine objects also record the current operational status of the machines including whether or not they are currently processing a part and whether or not the machine is broken. Each machine can only process one cassette of wafers at a time and the processing time for that cassette is determined by the head type and sequence step for that cassette as well as the number of wafers within the cassette. All the experiments done so far assume there is the same number of wafers in each cassette, but in future work this may be generalized to allow for variable numbers of wafers. 9 | 10 | The wafer cassette objects represent cassettes of wafers and maintain information about the cassette such as the number 11 | of wafers in the cassette, the head type, and the sequence step of the wafers in that cassette. In the simulation the machines are organized into stations which each contain a set of machines which are all capable of performing the same operation. There is a recipe corresponding to each head type which indicates the sequence of stations that the cassette must be processed at in order to complete a cassette of wafers of that head type. Also included in the recipe are parameters which are used to calculate the processing time for each step in the sequence for that head type. 12 | -------------------------------------------------------------------------------- /new/A2C/A2C_fact.py: -------------------------------------------------------------------------------- 1 | import factory_sim as fact_sim 2 | import numpy as np 3 | import pandas as pd 4 | import math 5 | import matplotlib 6 | import random 7 | matplotlib.use('TkAgg') 8 | import matplotlib.pyplot as plt 9 | from itertools import chain 10 | import ActorCritic 11 | 12 | sim_time = 3e5 13 | WEEK = 24*7 14 | NO_OF_WEEKS = math.ceil(sim_time/WEEK) 15 | num_seq_steps = 10 16 | 17 | recipes = pd.read_csv('~/Desktop/GSR/fall19/random/test/recipes.csv') 18 | machines = pd.read_csv('~/Desktop/GSR/fall19/random/test/machines.csv') 19 | 20 | recipes = recipes[recipes.MAXIMUMLS != 0] 21 | 22 | # Create the machine dictionary (machine:station) 23 | machine_d = dict() 24 | for index, row in machines.iterrows(): 25 | d = {row[0]:row[1]} 26 | machine_d.update(d) 27 | 28 | # Modifying the above list to match the stations from the two datasets 29 | a = machines.TOOLSET.unique() 30 | b = recipes.TOOLSET.unique() 31 | common_stations = (set(a) & set(b)) 32 | ls = list(common_stations) 33 | 34 | # This dictionary has the correct set of stations 35 | modified_machine_dict = {k:v for k,v in machine_d.items() if v in ls} 36 | 37 | # Removing unncommon rows from recipes 38 | for index, row in recipes.iterrows(): 39 | if row[2] not in ls: 40 | recipes.drop(index, inplace=True) 41 | 42 | recipes = recipes.dropna() 43 | recipe_dict = dict() 44 | for ht in list(recipes.HT.unique()): 45 | temp = recipes.loc[recipes['HT'] == ht] 46 | if len(temp) > 1: 47 | ls = [] 48 | for index, row in temp.iterrows(): 49 | ls.append([row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9]]) 50 | d = {ht:ls} 51 | recipe_dict.update(d) 52 | else: 53 | ls = [] 54 | ls.append([row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9]]) 55 | d = {ht:ls} 56 | recipe_dict.update(d) 57 | 58 | # take only the first num_seq_steps sequence steps for each recipe to reduce the complexity of the simulation. 59 | for ht, step in recipe_dict.items(): 60 | recipe_dict[ht] = step[0:num_seq_steps] 61 | 62 | # Dictionary where the key is the name of the machine and the value is [station, proc_t] 63 | # machine_dict = {'m0': 's1', 'm2': 's2', 'm1': 's1', 'm3': 's2'} 64 | machine_dict = modified_machine_dict 65 | 66 | # recipes give the sequence of stations that must be processed at for the wafer of that head type to be completed 67 | # recipes = {"ht1": [["s1", 5, 0]], "ht2": [["s1", 5, 0], ["s2", 5, 0]]} 68 | recipes = recipe_dict 69 | 70 | wafers_per_box = 4 71 | 72 | break_mean = 1e5 73 | 74 | repair_mean = 20 75 | 76 | # average lead time for each head type 77 | head_types = recipes.keys() 78 | lead_dict = {} 79 | 80 | wip_levels = {} 81 | 82 | for ht in head_types: 83 | d = {ht:10000} 84 | lead_dict.update(d) 85 | 86 | w = {ht:10} 87 | wip_levels.update(w) 88 | 89 | 90 | #################################################### 91 | ########## CREATING THE STATE SPACE ############### 92 | #################################################### 93 | def get_state(sim): 94 | # Calculate the state space representation. 95 | # This returns a list containing the number of` parts in the factory for each combination of head type and sequence 96 | # step 97 | state_rep = [len([wafer for queue in sim.queue_lists.values() for wafer in queue if wafer.HT 98 | == ht and wafer.seq == s]) for ht in list(sim.recipes.keys()) for s in 99 | list(range(len(sim.recipes[ht]) + 1))] 100 | # b is a one-hot encoded list indicating which machine the next action will correspond to 101 | b = np.zeros(len(sim.machines_list)) 102 | b[sim.machines_list.index(sim.next_machine)] = 1 103 | state_rep.extend(b) 104 | # Append the due dates list to the state space for making the decision 105 | rolling_window = [] # This is the rolling window that will be appended to state space 106 | max_length_of_window = math.ceil(max(sim.lead_dict.values()) / (7*24*60)) # Max length of the window to roll 107 | current_time = sim.env.now # Calculating the current time 108 | current_week = math.ceil(current_time / (7*24*60)) #Calculating the current week 109 | 110 | for key, value in sim.due_wafers.items(): 111 | rolling_window.append(value[current_week:current_week+max_length_of_window]) #Adding only the values from current week up till the window length 112 | buffer_list = [] # This list stores value of previous unfinished wafers count 113 | buffer_list.append(sum(value[:current_week])) 114 | rolling_window.extend([buffer_list]) 115 | 116 | c = sum(rolling_window, []) 117 | state_rep.extend(c) # Appending the rolling window to state space 118 | return state_rep 119 | 120 | 121 | 122 | # Create the factory simulation object 123 | my_sim = fact_sim.FactorySim(sim_time, machine_dict, recipes, lead_dict, wafers_per_box, wip_levels) 124 | # start the simulation 125 | my_sim.start() 126 | # Retrieve machine object for first action choice 127 | mach = my_sim.next_machine 128 | # Save the state and allowed actions at the start for later use in training examples 129 | state = get_state(my_sim) 130 | allowed_actions = my_sim.allowed_actions 131 | # The action space is a list of tuples of the form [('ht1',0), ('ht1',1), ..., ('ht2', 0), ...] indicating the head 132 | # types and sequence steps for all allowed actions. 133 | action_space = list(chain.from_iterable(my_sim.station_HT_seq.values())) 134 | action_size = len(action_space) 135 | state_size = len(state) 136 | 137 | # Creating the A2C agent 138 | a2c_agent = ActorCritic.A2CAgent(state_size= state_size, action_space= action_space) 139 | 140 | order_count = 0 141 | 142 | while my_sim.env.now < sim_time: 143 | action = a2c_agent.choose_action(state, allowed_actions) 144 | 145 | my_sim.run_action(mach, action[0], action[1]) 146 | print('Step Reward:'+ str(my_sim.step_reward)) 147 | 148 | # Record the machine, state, allowed actions and reward at the new time step 149 | reward = my_sim.step_reward 150 | next_mach = my_sim.next_machine 151 | next_state = get_state(my_sim) 152 | next_allowed_actions = my_sim.allowed_actions 153 | 154 | 155 | print(f"state dimension: {len(state)}") 156 | print(f"next state dimension: {len(next_state)}") 157 | print("action space dimension:", action_size) 158 | print("State:", state) 159 | 160 | # Train the A2C Agent 161 | a2c_agent.train_model(state, action, reward, next_state) 162 | 163 | # Record the information for use again in the next training example 164 | mach, allowed_actions, state = next_mach, next_allowed_actions, next_state 165 | 166 | # Save the trained A2C Actor and Critic Models 167 | a2c_agent.save_model("actor.h5", "critic.h5") 168 | 169 | # Total wafers produced 170 | print("Total wafers produced:", len(my_sim.cycle_time)) 171 | 172 | 173 | #Wafers of each head type 174 | print("### Wafers of each head type ###") 175 | print(my_sim.complete_wafer_dict) 176 | 177 | # Plot the time taken to complete each wafer 178 | plt.plot(my_sim.cycle_time) 179 | plt.xlabel("Wafers") 180 | plt.ylabel("Cycle time") 181 | plt.title("The time taken to complete each wafer") 182 | plt.show() 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | -------------------------------------------------------------------------------- /new/A2C/ActorCritic.py: -------------------------------------------------------------------------------- 1 | from keras.models import Sequential 2 | from keras.layers import Dense, Dropout 3 | from keras.optimizers import Adam 4 | from collections import deque 5 | import numpy as np 6 | import random 7 | 8 | 9 | ######################################################################################################################################## 10 | #################################################################### CREATING A2C Class ################################################ 11 | ######################################################################################################################################## 12 | 13 | # Advantage Actor-Critic agent 14 | class A2CAgent: 15 | def __init__(self, state_size, action_space, epsilon_decay=0.8): 16 | self.state_size = state_size 17 | self.action_space = action_space 18 | self.action_size = len(action_space) 19 | self.value_size = 1 20 | self.epsilon = 1.0 21 | self.epsilon_min = 0.0 22 | self.epsilon_decay = epsilon_decay 23 | 24 | # Hyperparameters for Policy Gradient 25 | self.discount_factor = 0.99 26 | self.actor_lr = 0.001 27 | self.critic_lr = 0.005 28 | 29 | # Create model for policy network 30 | self.actor = self.build_actor() 31 | self.critic = self.build_critic() 32 | 33 | 34 | # Approximate policy and value using Neural Network 35 | # actor: state is input and probability of each action is output of model 36 | def build_actor(self): 37 | actor = Sequential() 38 | actor.add(Dense(400, input_dim= self.state_size, activation= 'relu', kernel_initializer='he_uniform')) 39 | actor.add(Dense(250, activation= 'relu', kernel_initializer='he_uniform')) 40 | actor.add(Dense(125, activation= 'relu', kernel_initializer='he_uniform')) 41 | actor.add(Dense(self.action_size, activation= 'softmax', kernel_initializer= 'he_uniform')) 42 | actor.summary() 43 | actor.compile(loss='categorical_crossentropy', optimizer=Adam(lr=self.actor_lr)) 44 | return actor 45 | 46 | # critic: state is input and value of state is the output of model 47 | def build_critic(self): 48 | critic = Sequential() 49 | critic.add(Dense(400, input_dim= self.state_size, activation= 'relu', kernel_initializer='he_uniform')) 50 | critic.add(Dense(250, activation= 'relu', kernel_initializer='he_uniform')) 51 | critic.add(Dense(125, activation= 'relu', kernel_initializer='he_uniform')) 52 | critic.add(Dense(50, activation= 'relu', kernel_initializer= 'he_uniform')) 53 | critic.add(Dense(self.value_size, activation= 'linear', kernel_initializer='he_uniform')) 54 | critic.summary() 55 | critic.compile(loss="mse", optimizer=Adam(lr=self.critic_lr)) 56 | return critic 57 | 58 | # Using the output of the policy network, pick action stochastically 59 | def choose_action(self, state, allowed_actions): 60 | self.epsilon *= self.epsilon_decay 61 | self.epsilon = max(self.epsilon_min, self.epsilon) 62 | r = np.random.random() 63 | 64 | if r < self.epsilon: 65 | print("******* CHOOSING A RANDOM ACTION *******") 66 | return random.choice(allowed_actions) 67 | 68 | state = np.array(state).reshape(1, self.state_size) 69 | pred = self.actor.predict(state) 70 | pred = sum(pred.tolist(), []) 71 | temp = [] 72 | for item in allowed_actions: 73 | temp.append(pred[self.action_space.index(item)]) 74 | print(" ********************* CHOOSING A PREDICTED ACTION **********************") 75 | return allowed_actions[np.argmax(temp)] 76 | 77 | 78 | # Update the policy network every episode 79 | def train_model(self, state, action, reward, next_state): 80 | target = np.zeros((1, self.value_size)) 81 | advantages = np.zeros((1, self.action_size)) 82 | state = np.array(state).reshape(1, self.state_size) 83 | next_state = np.array(state).reshape(1, self.state_size) 84 | value = self.critic.predict(state)[0] 85 | next_value = self.critic.predict(next_state)[0] 86 | 87 | action_id = self.action_space.index(action) 88 | 89 | advantages[0][action_id] = reward + self.discount_factor * (next_value) - value 90 | target[0][0] = reward + self.discount_factor*next_value 91 | 92 | self.actor.fit(state, advantages, epochs=1) 93 | self.critic.fit(state, target, epochs=1) 94 | 95 | 96 | # Save the actor and critic models 97 | def save_model(self, fn1, fn2): 98 | self.actor.save(fn1) 99 | self.critic.save(fn2) 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | -------------------------------------------------------------------------------- /new/A2C/__pycache__/ActorCritic.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/svikramank/DeepRLForFactoryOptimization/f129a460849f1d487ccea620fdcf553afbbd2e6d/new/A2C/__pycache__/ActorCritic.cpython-37.pyc -------------------------------------------------------------------------------- /new/A2C/__pycache__/factory_sim.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/svikramank/DeepRLForFactoryOptimization/f129a460849f1d487ccea620fdcf553afbbd2e6d/new/A2C/__pycache__/factory_sim.cpython-37.pyc -------------------------------------------------------------------------------- /new/A2C/actor.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/svikramank/DeepRLForFactoryOptimization/f129a460849f1d487ccea620fdcf553afbbd2e6d/new/A2C/actor.h5 -------------------------------------------------------------------------------- /new/A2C/critic.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/svikramank/DeepRLForFactoryOptimization/f129a460849f1d487ccea620fdcf553afbbd2e6d/new/A2C/critic.h5 -------------------------------------------------------------------------------- /new/A2C/factory_sim.py: -------------------------------------------------------------------------------- 1 | 2 | import simpy 3 | from collections import namedtuple, Counter 4 | from itertools import count, filterfalse 5 | import random 6 | import math 7 | 8 | #################################################### 9 | ########## CREATING THE WAFER CLASS ############### 10 | #################################################### 11 | class wafer_box(object): 12 | def __init__(self, sim_inst, number_wafers, HT, wafer_index, lead_dict): 13 | self.env = sim_inst.env 14 | self.name = f"w{wafer_index}" 15 | self.start_time = sim_inst.env.now 16 | self.number_wafers = number_wafers 17 | self.HT = HT 18 | self.seq = 0 19 | self.due_time = self.start_time + lead_dict[self.HT] 20 | 21 | #################################################### 22 | ########## CREATING THE MACHINE CLASS ############## 23 | #################################################### 24 | class Machine(object): 25 | def __init__(self, sim_inst, name, station, break_mean=None, repair_mean=None): 26 | self.env = sim_inst.env 27 | self.name = name 28 | self.station = station 29 | self.available = True 30 | self.broken = False 31 | self.wafer_being_proc = None 32 | self.parts_made = 0 33 | self.break_mean = break_mean 34 | 35 | if break_mean is not None: 36 | self.time_to_fail = self.time_to_failure() 37 | 38 | self.process = None 39 | self.repair_mean = repair_mean 40 | 41 | def time_to_failure(self): 42 | """Return time until next failure for a machine.""" 43 | return random.expovariate(1/self.break_mean) 44 | 45 | def time_to_repair(self): 46 | """Return time until next failure for a machine.""" 47 | return random.expovariate(1/self.repair_mean) 48 | 49 | def break_machine(self): 50 | """Break the machine after break_time""" 51 | assert not self.broken 52 | start = self.env.now 53 | try: 54 | yield self.env.timeout(self.time_to_fail) 55 | self.process.interrupt() 56 | self.time_to_fail = self.time_to_failure() 57 | except: 58 | self.time_to_fail -= self.env.now-start 59 | 60 | def get_proc_time(self, wafer, sim_inst): 61 | proc_step = sim_inst.recipes[wafer.HT][wafer.seq] 62 | A = proc_step[1] 63 | B = proc_step[2] 64 | LS = proc_step[3] 65 | include_load = proc_step[4] 66 | load = proc_step[5] 67 | include_unload = proc_step[6] 68 | unload = proc_step[7] 69 | proc_t = A * wafer.number_wafers + B * math.ceil(wafer.number_wafers/LS) 70 | 71 | if include_load == -1: 72 | proc_t += load 73 | if include_unload == -1: 74 | proc_t += unload 75 | return proc_t 76 | 77 | def part_process(self, wafer, sim_inst): 78 | # This function defines a process where a part of head type HT and sequence step seq is processed on the machine 79 | 80 | # get the amount of time it takes for the operation to run 81 | proc_t = self.get_proc_time(wafer, sim_inst) 82 | 83 | done_in = proc_t 84 | while done_in: 85 | try: 86 | if self.break_mean is not None: 87 | break_process = self.env.process(self.break_machine()) 88 | start = self.env.now 89 | print("started processing wafer %s on machine %s at %s"%(wafer.name, self.name, start)) 90 | # wait until the process is done 91 | yield sim_inst.env.timeout(done_in) 92 | # set the wafer being processed to None 93 | self.wafer_being_proc = None 94 | # set machine to be available to process part 95 | self.available = True 96 | print("Completed the process step of wafer %s on machine %s at %s and sent to " 97 | "next machine."%(wafer.name, self.name, self.env.now)) 98 | # set the wafer to be at the next step in the sequence 99 | wafer.seq += 1 100 | # if seq is not the last sequence step then find the next station and choose actions for each of the 101 | # available machines in that station 102 | if wafer.seq < (len(sim_inst.recipes[wafer.HT])): 103 | # add the part to the corresponding queue for the next operation in the sequence 104 | sim_inst.queue_lists[sim_inst.recipes[wafer.HT][wafer.seq][0]].append(wafer) 105 | else: 106 | # # add the part to the list of completed parts 107 | # sim_inst.queue_lists['complete'].append(wafer) 108 | sim_inst.cycle_time.append(self.env.now - wafer.start_time) 109 | print("Finished processing wafer %s at %s"%(wafer.name, self.env.now)) 110 | sim_inst.complete_wafer_dict[wafer.HT]+=1 111 | sim_inst.order_completed = True 112 | # Update the due_wafers dictionary to indicate that wafers of this head type were completed 113 | 114 | # Find the index of the earliest week for which there are one or more wafers of the given head type 115 | # due. 116 | week_index = next((i for i, x in enumerate(sim_inst.due_wafers[wafer.HT]) if x), None) 117 | 118 | # Subtract wafer,number_wafers wafers from the corresponding list element 119 | sim_inst.due_wafers[wafer.HT][week_index] -= wafer.number_wafers 120 | 121 | new_wafer = wafer_box(sim_inst, sim_inst.num_wafers, wafer.HT, sim_inst.wafer_index, 122 | sim_inst.lead_dict) 123 | sim_inst.queue_lists[sim_inst.recipes[wafer.HT][0][0]].append(new_wafer) 124 | lead_time = sim_inst.lead_dict[wafer.HT] 125 | total_processing_time = new_wafer.start_time + lead_time 126 | week_number = int(total_processing_time / (7 * 24 * 60)) 127 | sim_inst.due_wafers[wafer.HT][week_number] += sim_inst.num_wafers 128 | sim_inst.wafer_index += 1 129 | 130 | 131 | 132 | if self.break_mean is not None: 133 | break_process.interrupt() 134 | done_in = 0 135 | 136 | except simpy.Interrupt: 137 | self.broken = True 138 | done_in -= self.env.now - start 139 | yield self.env.timeout(self.time_to_repair()) 140 | self.broken = False 141 | 142 | # Parts completed by this machine 143 | self.parts_made += 1 144 | 145 | def get_allowed_actions(self, sim_inst): 146 | #find all (HT, seq) tuples with non zero queues at the station of this machine 147 | return sorted(list(set((wafer.HT, wafer.seq) for wafer in sim_inst.queue_lists[self.station]))) 148 | 149 | #################################################### 150 | ########## CREATING THE FACTORY CLASS ############## 151 | #################################################### 152 | class FactorySim(object): 153 | #Initialize simpy environment and set the amount of time the simulation will run for 154 | def __init__(self, sim_time, m_dict, recipes, lead_dict, wafers_per_box, wip_levels, break_mean=None, repair_mean=None): 155 | self.break_mean = break_mean 156 | self.repair_mean = repair_mean 157 | self.order_completed = False 158 | self.allowed_actions = None 159 | self.env = simpy.Environment() 160 | self.Sim_time = sim_time 161 | self.next_machine = None 162 | # self.dgr = dgr_dict 163 | self.lead_dict = lead_dict 164 | self.num_wafers = wafers_per_box 165 | self.wip_levels = wip_levels 166 | # self.machine_failure = False 167 | 168 | # Number of future weeks we want to look into for calculating due dates 169 | self.FUTURE_WEEKS = 100 170 | 171 | # Initialize an index that will be used to name each wafer box 172 | self.wafer_index = 0 173 | 174 | # Dictionary where the key is the name of the machine and the value is [station, proc_t] 175 | self.machine_dict = m_dict 176 | 177 | self.machines_list = [Machine(self, mach[0], mach[1], self.break_mean, self.repair_mean) for mach in self.machine_dict.items()] 178 | 179 | # create a list of all the station names 180 | self.stations = list(set(list(self.machine_dict.values()))) 181 | 182 | # sim_inst.recipes give the sequence of stations that must be processed at for the wafer of that head type to be completed 183 | self.recipes = recipes 184 | 185 | # create a list to store the number of complete wafers for each head type 186 | self.complete_wafer_dict = {} 187 | for ht in self.recipes.keys(): 188 | d = {ht:0} 189 | self.complete_wafer_dict.update(d) 190 | 191 | self.number_of_machines = len(self.machine_dict) 192 | 193 | # Create a dictionary which holds lists that will contain 194 | # the queues of wafer_box objects at each station and that have been completed 195 | self.queue_lists = {station: [] for station in self.stations} 196 | # self.queue_lists['complete'] = [] 197 | 198 | self.order_complete_time = 0 199 | self.cycle_time = [] 200 | self.step_reward = 0 201 | 202 | # Create a dictionary which holds the number of wafers due in a given week of each head type 203 | self.due_wafers = {} 204 | for ht in self.recipes.keys(): 205 | list_of_wafers_due_each_week = [0]*self.FUTURE_WEEKS 206 | d = {ht:list_of_wafers_due_each_week} 207 | self.due_wafers.update(d) 208 | 209 | # Creates a dictionary where the key is the toolset name and the value is a list of tuples of all head type and 210 | # sequence step combinations which may be processed at that station 211 | self.station_HT_seq = {station: [] for station in self.stations} 212 | 213 | for HT in self.recipes.keys(): 214 | for seq, step in enumerate(self.recipes[HT]): 215 | self.station_HT_seq[step[0]].append((HT, seq)) 216 | 217 | 218 | def start(self): 219 | for ht in self.wip_levels.keys(): 220 | for i in range(self.wip_levels[ht]): 221 | new_wafer = wafer_box(self, self.num_wafers, ht, self.wafer_index, self.lead_dict) 222 | self.queue_lists[self.recipes[ht][0][0]].append(new_wafer) 223 | lead_time = self.lead_dict[ht] 224 | total_processing_time = new_wafer.start_time + lead_time 225 | week_number = int(total_processing_time / (7*24*60)) 226 | self.due_wafers[ht][week_number] += self.num_wafers 227 | self.wafer_index += 1 228 | 229 | for machine in self.machines_list: 230 | if machine.available: 231 | allowed_actions = machine.get_allowed_actions(self) 232 | if len(allowed_actions) > 0: 233 | self.next_machine = machine 234 | self.allowed_actions = allowed_actions 235 | return 236 | while True: 237 | self.env.step() 238 | for machine in self.machines_list: 239 | if machine.available: 240 | allowed_actions = machine.get_allowed_actions(self) 241 | if len(allowed_actions) > 0: 242 | self.next_machine = machine 243 | self.allowed_actions = allowed_actions 244 | return 245 | 246 | 247 | def run_action(self, machine, ht, seq): 248 | self.order_completed = False 249 | self.step_reward = 0 250 | # Set the machine to be unavailable to process parts because it is now busy 251 | assert machine.available 252 | machine.available = False 253 | # Find the wafer that has that HT and seq 254 | wafer_choice = next(wafer for wafer in self.queue_lists[machine.station] if wafer.HT == ht and wafer.seq == seq) 255 | # set the wafer being processed on this machine to wafer_choice 256 | machine.wafer_being_proc = wafer_choice 257 | # Remove the part from it's queue 258 | self.queue_lists[machine.station].remove(wafer_choice) 259 | # Begin processing the part on the machine 260 | machine.process = self.env.process(machine.part_process(wafer_choice, self)) 261 | 262 | for machine in self.machines_list: 263 | if machine.available: 264 | allowed_actions = machine.get_allowed_actions(self) 265 | if len(allowed_actions) > 0: 266 | self.next_machine = machine 267 | self.allowed_actions = allowed_actions 268 | return 269 | while True: 270 | before_time = self.env.now 271 | self.env.step() 272 | time_change = self.env.now-before_time 273 | current_week = math.ceil(self.env.now / (7 * 24 * 60)) # Calculating the current week 274 | for key, value in self.due_wafers.items(): 275 | buffer_list = [] # This list stores value of previous unfinished wafers count 276 | buffer_list.append(sum(value[:current_week])) 277 | self.step_reward -= time_change*sum(buffer_list) 278 | 279 | for machine in self.machines_list: 280 | if machine.available: 281 | allowed_actions = machine.get_allowed_actions(self) 282 | if len(allowed_actions) > 0: 283 | self.next_machine = machine 284 | self.allowed_actions = allowed_actions 285 | return 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | -------------------------------------------------------------------------------- /new/A2C/rollout_A2C.py: -------------------------------------------------------------------------------- 1 | import factory_sim as fact_sim 2 | import numpy as np 3 | import pandas as pd 4 | import math 5 | import matplotlib 6 | import random 7 | matplotlib.use('TkAgg') 8 | import matplotlib.pyplot as plt 9 | from itertools import chain 10 | from keras.models import load_model 11 | 12 | sim_time = 1e5 13 | WEEK = 24*7 14 | NO_OF_WEEKS = math.ceil(sim_time/WEEK) 15 | num_seq_steps = 10 16 | 17 | recipes = pd.read_csv('~/Desktop/GSR/fall19/random/test/recipes.csv') 18 | machines = pd.read_csv('~/Desktop/GSR/fall19/random/test/machines.csv') 19 | 20 | recipes = recipes[recipes.MAXIMUMLS != 0] 21 | 22 | # Create the machine dictionary (machine:station) 23 | machine_d = dict() 24 | for index, row in machines.iterrows(): 25 | d = {row[0]:row[1]} 26 | machine_d.update(d) 27 | 28 | # Modifying the above list to match the stations from the two datasets 29 | a = machines.TOOLSET.unique() 30 | b = recipes.TOOLSET.unique() 31 | common_stations = (set(a) & set(b)) 32 | ls = list(common_stations) 33 | 34 | # This dictionary has the correct set of stations 35 | modified_machine_dict = {k:v for k,v in machine_d.items() if v in ls} 36 | 37 | # Removing unncommon rows from recipes 38 | for index, row in recipes.iterrows(): 39 | if row[2] not in ls: 40 | recipes.drop(index, inplace=True) 41 | 42 | recipes = recipes.dropna() 43 | recipe_dict = dict() 44 | for ht in list(recipes.HT.unique()): 45 | temp = recipes.loc[recipes['HT'] == ht] 46 | if len(temp) > 1: 47 | ls = [] 48 | for index, row in temp.iterrows(): 49 | ls.append([row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9]]) 50 | d = {ht:ls} 51 | recipe_dict.update(d) 52 | else: 53 | ls = [] 54 | ls.append([row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9]]) 55 | d = {ht:ls} 56 | recipe_dict.update(d) 57 | 58 | # take only the first num_seq_steps sequence steps for each recipe to reduce the complexity of the simulation. 59 | for ht, step in recipe_dict.items(): 60 | recipe_dict[ht] = step[0:num_seq_steps] 61 | 62 | # Dictionary where the key is the name of the machine and the value is [station, proc_t] 63 | # machine_dict = {'m0': 's1', 'm2': 's2', 'm1': 's1', 'm3': 's2'} 64 | machine_dict = modified_machine_dict 65 | 66 | # recipes give the sequence of stations that must be processed at for the wafer of that head type to be completed 67 | # recipes = {"ht1": [["s1", 5, 0]], "ht2": [["s1", 5, 0], ["s2", 5, 0]]} 68 | recipes = recipe_dict 69 | 70 | wafers_per_box = 4 71 | 72 | break_mean = 1e5 73 | 74 | repair_mean = 20 75 | 76 | # average lead time for each head type 77 | head_types = recipes.keys() 78 | lead_dict = {} 79 | 80 | wip_levels = {} 81 | 82 | for ht in head_types: 83 | d = {ht:10000} 84 | lead_dict.update(d) 85 | 86 | w = {ht:10} 87 | wip_levels.update(w) 88 | 89 | 90 | #################################################### 91 | ########## CREATING THE STATE SPACE ############### 92 | #################################################### 93 | def get_state(sim): 94 | # Calculate the state space representation. 95 | # This returns a list containing the number of` parts in the factory for each combination of head type and sequence 96 | # step 97 | state_rep = [len([wafer for queue in sim.queue_lists.values() for wafer in queue if wafer.HT 98 | == ht and wafer.seq == s]) for ht in list(sim.recipes.keys()) for s in 99 | list(range(len(sim.recipes[ht]) + 1))] 100 | # b is a one-hot encoded list indicating which machine the next action will correspond to 101 | b = np.zeros(len(sim.machines_list)) 102 | b[sim.machines_list.index(sim.next_machine)] = 1 103 | state_rep.extend(b) 104 | # Append the due dates list to the state space for making the decision 105 | rolling_window = [] # This is the rolling window that will be appended to state space 106 | max_length_of_window = math.ceil(max(sim.lead_dict.values()) / (7*24*60)) # Max length of the window to roll 107 | current_time = sim.env.now # Calculating the current time 108 | current_week = math.ceil(current_time / (7*24*60)) #Calculating the current week 109 | 110 | for key, value in sim.due_wafers.items(): 111 | rolling_window.append(value[current_week:current_week+max_length_of_window]) #Adding only the values from current week up till the window length 112 | buffer_list = [] # This list stores value of previous unfinished wafers count 113 | buffer_list.append(sum(value[:current_week])) 114 | rolling_window.extend([buffer_list]) 115 | 116 | c = sum(rolling_window, []) 117 | state_rep.extend(c) # Appending the rolling window to state space 118 | return state_rep 119 | 120 | 121 | 122 | ##################################################################### 123 | ######################### LOADING THE TRAINED POLICY ################ 124 | ##################################################################### 125 | actor = load_model("actor.h5") # Model used for choosing actions 126 | critic = load_model("critic.h5") # Not using critic anywhere though (just used for training A2C) 127 | 128 | 129 | # Action function to choose the best action given the q-function if not exploring based on epsilon 130 | def choose_action(state, allowed_actions, action_space): 131 | state = np.array(state).reshape(1, state_size) 132 | pred = actor.predict(state) 133 | pred = sum(pred.tolist(), []) 134 | temp = [] 135 | for item in allowed_actions: 136 | temp.append(pred[action_space.index(item)]) 137 | print(" ********************* CHOOSING A PREDICTED ACTION **********************") 138 | return allowed_actions[np.argmax(temp)] 139 | 140 | 141 | 142 | # Create the factory simulation object 143 | my_sim = fact_sim.FactorySim(sim_time, machine_dict, recipes, lead_dict, wafers_per_box, wip_levels) 144 | # start the simulation 145 | my_sim.start() 146 | # Retrieve machine object for first action choice 147 | mach = my_sim.next_machine 148 | # Save the state and allowed actions at the start for later use in training examples 149 | state = get_state(my_sim) 150 | allowed_actions = my_sim.allowed_actions 151 | # The action space is a list of tuples of the form [('ht1',0), ('ht1',1), ..., ('ht2', 0), ...] indicating the head 152 | # types and sequence steps for all allowed actions. 153 | action_space = list(chain.from_iterable(my_sim.station_HT_seq.values())) 154 | action_size = len(action_space) 155 | state_size = len(state) 156 | 157 | 158 | 159 | while my_sim.env.now < sim_time: 160 | action = choose_action(state, allowed_actions, action_space) 161 | 162 | my_sim.run_action(mach, action[0], action[1]) 163 | print('Step Reward:'+ str(my_sim.step_reward)) 164 | 165 | # Record the machine, state, allowed actions and reward at the new time step 166 | reward = my_sim.step_reward 167 | next_mach = my_sim.next_machine 168 | next_state = get_state(my_sim) 169 | next_allowed_actions = my_sim.allowed_actions 170 | 171 | print(f"state dimension: {len(state)}") 172 | print(f"next state dimension: {len(next_state)}") 173 | print("action space dimension:", action_size) 174 | print("State:", state) 175 | 176 | # Record the information for use again in the next training example 177 | mach, allowed_actions, state = next_mach, next_allowed_actions, next_state 178 | 179 | 180 | 181 | # Total wafers produced 182 | print("Total wafers produced:", len(my_sim.cycle_time)) 183 | 184 | 185 | #Wafers of each head type 186 | print("### Wafers of each head type ###") 187 | print(my_sim.complete_wafer_dict) 188 | 189 | # Plot the time taken to complete each wafer 190 | plt.plot(my_sim.cycle_time) 191 | plt.xlabel("Wafers") 192 | plt.ylabel("Cycle time") 193 | plt.title("The time taken to complete each wafer") 194 | plt.show() 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | -------------------------------------------------------------------------------- /new/A2C/training_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/svikramank/DeepRLForFactoryOptimization/f129a460849f1d487ccea620fdcf553afbbd2e6d/new/A2C/training_graph.png -------------------------------------------------------------------------------- /new/DQN/DQN_fact.py: -------------------------------------------------------------------------------- 1 | import factory_sim as fact_sim 2 | import numpy as np 3 | import pandas as pd 4 | import math 5 | import matplotlib 6 | import random 7 | matplotlib.use('TkAgg') 8 | import matplotlib.pyplot as plt 9 | from itertools import chain 10 | import DeepQNet 11 | 12 | sim_time = 3e5 13 | WEEK = 24*7 14 | NO_OF_WEEKS = math.ceil(sim_time/WEEK) 15 | num_seq_steps = 10 16 | 17 | recipes = pd.read_csv('~/Desktop/GSR/fall19/random/test/recipes.csv') 18 | machines = pd.read_csv('~/Desktop/GSR/fall19/random/test/machines.csv') 19 | 20 | recipes = recipes[recipes.MAXIMUMLS != 0] 21 | 22 | # Create the machine dictionary (machine:station) 23 | machine_d = dict() 24 | for index, row in machines.iterrows(): 25 | d = {row[0]:row[1]} 26 | machine_d.update(d) 27 | 28 | # Modifying the above list to match the stations from the two datasets 29 | a = machines.TOOLSET.unique() 30 | b = recipes.TOOLSET.unique() 31 | common_stations = (set(a) & set(b)) 32 | ls = list(common_stations) 33 | 34 | # This dictionary has the correct set of stations 35 | modified_machine_dict = {k:v for k,v in machine_d.items() if v in ls} 36 | 37 | # Removing unncommon rows from recipes 38 | for index, row in recipes.iterrows(): 39 | if row[2] not in ls: 40 | recipes.drop(index, inplace=True) 41 | 42 | recipes = recipes.dropna() 43 | recipe_dict = dict() 44 | for ht in list(recipes.HT.unique()): 45 | temp = recipes.loc[recipes['HT'] == ht] 46 | if len(temp) > 1: 47 | ls = [] 48 | for index, row in temp.iterrows(): 49 | ls.append([row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9]]) 50 | d = {ht:ls} 51 | recipe_dict.update(d) 52 | else: 53 | ls = [] 54 | ls.append([row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9]]) 55 | d = {ht:ls} 56 | recipe_dict.update(d) 57 | 58 | # take only the first num_seq_steps sequence steps for each recipe to reduce the complexity of the simulation. 59 | for ht, step in recipe_dict.items(): 60 | recipe_dict[ht] = step[0:num_seq_steps] 61 | 62 | # Dictionary where the key is the name of the machine and the value is [station, proc_t] 63 | # machine_dict = {'m0': 's1', 'm2': 's2', 'm1': 's1', 'm3': 's2'} 64 | machine_dict = modified_machine_dict 65 | 66 | # recipes give the sequence of stations that must be processed at for the wafer of that head type to be completed 67 | # recipes = {"ht1": [["s1", 5, 0]], "ht2": [["s1", 5, 0], ["s2", 5, 0]]} 68 | recipes = recipe_dict 69 | 70 | wafers_per_box = 4 71 | 72 | break_mean = 1e5 73 | 74 | repair_mean = 20 75 | 76 | # average lead time for each head type 77 | head_types = recipes.keys() 78 | lead_dict = {} 79 | 80 | wip_levels = {} 81 | 82 | for ht in head_types: 83 | d = {ht:10000} 84 | lead_dict.update(d) 85 | 86 | w = {ht:10} 87 | wip_levels.update(w) 88 | 89 | 90 | #################################################### 91 | ########## CREATING THE STATE SPACE ############### 92 | #################################################### 93 | def get_state(sim): 94 | # Calculate the state space representation. 95 | # This returns a list containing the number of` parts in the factory for each combination of head type and sequence 96 | # step 97 | state_rep = [len([wafer for queue in sim.queue_lists.values() for wafer in queue if wafer.HT 98 | == ht and wafer.seq == s]) for ht in list(sim.recipes.keys()) for s in 99 | list(range(len(sim.recipes[ht]) + 1))] 100 | # b is a one-hot encoded list indicating which machine the next action will correspond to 101 | b = np.zeros(len(sim.machines_list)) 102 | b[sim.machines_list.index(sim.next_machine)] = 1 103 | state_rep.extend(b) 104 | # Append the due dates list to the state space for making the decision 105 | rolling_window = [] # This is the rolling window that will be appended to state space 106 | max_length_of_window = math.ceil(max(sim.lead_dict.values()) / (7*24*60)) # Max length of the window to roll 107 | current_time = sim.env.now # Calculating the current time 108 | current_week = math.ceil(current_time / (7*24*60)) #Calculating the current week 109 | 110 | for key, value in sim.due_wafers.items(): 111 | rolling_window.append(value[current_week:current_week+max_length_of_window]) #Adding only the values from current week up till the window length 112 | buffer_list = [] # This list stores value of previous unfinished wafers count 113 | buffer_list.append(sum(value[:current_week])) 114 | rolling_window.extend([buffer_list]) 115 | 116 | c = sum(rolling_window, []) 117 | state_rep.extend(c) # Appending the rolling window to state space 118 | return state_rep 119 | 120 | 121 | 122 | # Create the factory simulation object 123 | my_sim = fact_sim.FactorySim(sim_time, machine_dict, recipes, lead_dict, wafers_per_box, wip_levels) 124 | # start the simulation 125 | my_sim.start() 126 | # Retrieve machine object for first action choice 127 | mach = my_sim.next_machine 128 | # Save the state and allowed actions at the start for later use in training examples 129 | state = get_state(my_sim) 130 | allowed_actions = my_sim.allowed_actions 131 | # The action space is a list of tuples of the form [('ht1',0), ('ht1',1), ..., ('ht2', 0), ...] indicating the head 132 | # types and sequence steps for all allowed actions. 133 | action_space = list(chain.from_iterable(my_sim.station_HT_seq.values())) 134 | action_size = len(action_space) 135 | state_size = len(state) 136 | 137 | # Creating the DQN agent 138 | dqn_agent = DeepQNet.DQN(state_space_dim= state_size, action_space= action_space) 139 | 140 | order_count = 0 141 | 142 | while my_sim.env.now < sim_time: 143 | action = dqn_agent.choose_action(state, allowed_actions) 144 | 145 | my_sim.run_action(mach, action[0], action[1]) 146 | print('Step Reward:'+ str(my_sim.step_reward)) 147 | # Record the machine, state, allowed actions and reward at the new time step 148 | next_mach = my_sim.next_machine 149 | next_state = get_state(my_sim) 150 | next_allowed_actions = my_sim.allowed_actions 151 | reward = my_sim.step_reward 152 | 153 | print(f"state dimension: {len(state)}") 154 | print(f"next state dimension: {len(next_state)}") 155 | print("action space dimension:", action_size) 156 | # record the information for use again in the next training example 157 | mach, allowed_actions, state = next_mach, next_allowed_actions, next_state 158 | print("State:", state) 159 | 160 | # Save the example for later training 161 | dqn_agent.remember(state, action, reward, next_state, next_allowed_actions) 162 | 163 | if my_sim.order_completed: 164 | # After each wafer completed, train the policy network 165 | dqn_agent.replay() 166 | order_count+= 1 167 | if order_count >= 20: 168 | # After every 20 processes update the target network and reset the order count 169 | dqn_agent.train_target() 170 | order_count = 0 171 | 172 | # Record the information for use again in the next training example 173 | mach, allowed_actions, state = next_mach, next_allowed_actions, next_state 174 | 175 | 176 | # Save the trained DQN policy network 177 | dqn_agent.save_model("DQN_model.h5") 178 | 179 | # Total wafers produced 180 | print("Total wafers produced:", len(my_sim.cycle_time)) 181 | 182 | 183 | #Wafers of each head type 184 | print("### Wafers of each head type ###") 185 | print(my_sim.complete_wafer_dict) 186 | 187 | # Plot the time taken to complete each wafer 188 | plt.plot(my_sim.cycle_time) 189 | plt.xlabel("Wafers") 190 | plt.ylabel("Cycle time") 191 | plt.title("The time taken to complete each wafer") 192 | plt.show() 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | -------------------------------------------------------------------------------- /new/DQN/DQN_model.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/svikramank/DeepRLForFactoryOptimization/f129a460849f1d487ccea620fdcf553afbbd2e6d/new/DQN/DQN_model.h5 -------------------------------------------------------------------------------- /new/DQN/DeepQNet.py: -------------------------------------------------------------------------------- 1 | from keras.models import Sequential 2 | from keras.layers import Dense, Dropout 3 | from keras.optimizers import Adam 4 | from collections import deque 5 | import numpy as np 6 | import random 7 | 8 | 9 | ######################################################################################################################################## 10 | #################################################################### CREATING Deep Q-learning Class #################################### 11 | ######################################################################################################################################## 12 | 13 | class DQN: 14 | def __init__(self, state_space_dim, action_space, gamma=0.9, epsilon_decay=0.8, tau=0.125, learning_rate=0.005): 15 | self.state_space_dim = state_space_dim 16 | self.action_space = action_space 17 | self.gamma = gamma 18 | self.epsilon = 1.0 19 | self.epsilon_min = 0.0 20 | self.epsilon_decay = epsilon_decay 21 | self.tau = tau 22 | self.learning_rate = learning_rate 23 | self.memory = deque(maxlen=2000) 24 | self.model = self.create_model() 25 | self.target_model = self.create_model() 26 | 27 | # Create the neural network model to train the q function 28 | def create_model(self): 29 | model = Sequential() 30 | model.add(Dense(400, input_dim= self.state_space_dim, activation='relu')) 31 | model.add(Dense(250, activation='relu')) 32 | model.add(Dense(125, activation='relu')) 33 | model.add(Dense(len(self.action_space))) 34 | model.compile(loss='mean_squared_error', optimizer=Adam(lr=self.learning_rate)) 35 | return model 36 | 37 | # Action function to choose the best action given the q-function if not exploring based on epsilon 38 | def choose_action(self, state, allowed_actions): 39 | self.epsilon *= self.epsilon_decay 40 | self.epsilon = max(self.epsilon_min, self.epsilon) 41 | r = np.random.random() 42 | if r < self.epsilon: 43 | print("******* CHOOSING A RANDOM ACTION *******") 44 | return random.choice(allowed_actions) 45 | # print(state) 46 | # print(len(state)) 47 | state = np.array(state).reshape(1, self.state_space_dim) 48 | pred = self.model.predict(state) 49 | pred = sum(pred.tolist(), []) 50 | temp = [] 51 | for item in allowed_actions: 52 | temp.append(pred[self.action_space.index(item)]) 53 | print(" ********************* CHOOSING A PREDICTED ACTION **********************") 54 | return allowed_actions[np.argmax(temp)] 55 | 56 | # Create replay buffer memory to sample randomly 57 | def remember(self, state, action, reward, next_state, next_allowed_actions): 58 | self.memory.append([state, action, reward, next_state, next_allowed_actions]) 59 | 60 | # Build the replay buffer 61 | def replay(self): 62 | batch_size = 32 63 | if len(self.memory) < batch_size: 64 | return 65 | samples = random.sample(self.memory, batch_size) 66 | for sample in samples: 67 | state, action, reward, new_state, new_allowed_actions = sample 68 | state = np.array(state).reshape(1, self.state_space_dim) 69 | target = self.target_model.predict(state) 70 | action_id = self.action_space.index(action) 71 | # if done: 72 | # target[0][action_id] = reward 73 | # else: 74 | # take max only from next_allowed_actions 75 | new_state = np.array(new_state).reshape(1,self.state_space_dim) 76 | next_pred = self.target_model.predict(new_state)[0] 77 | next_pred = next_pred.tolist() 78 | t = [] 79 | print("new_allowed_actions:", new_allowed_actions) 80 | for it in new_allowed_actions: 81 | t.append(next_pred[self.action_space.index(it)]) 82 | Q_future = max(t) 83 | target[0][action_id] = reward + self.gamma * Q_future 84 | self.model.fit(state, target, epochs=1, verbose=1) 85 | 86 | 87 | # Update our target network 88 | def train_target(self): 89 | weights = self.model.get_weights() 90 | target_weights = self.target_model.get_weights() 91 | for i in range(len(target_weights)): 92 | target_weights[i] = weights[i] * self.tau + target_weights[i] * (1 - self.tau) 93 | self.target_model.set_weights(target_weights) 94 | 95 | # Save our model 96 | def save_model(self, fn): 97 | self.model.save(fn) -------------------------------------------------------------------------------- /new/DQN/__pycache__/DeepQNet.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/svikramank/DeepRLForFactoryOptimization/f129a460849f1d487ccea620fdcf553afbbd2e6d/new/DQN/__pycache__/DeepQNet.cpython-37.pyc -------------------------------------------------------------------------------- /new/DQN/__pycache__/factory_sim.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/svikramank/DeepRLForFactoryOptimization/f129a460849f1d487ccea620fdcf553afbbd2e6d/new/DQN/__pycache__/factory_sim.cpython-37.pyc -------------------------------------------------------------------------------- /new/DQN/factory_sim.py: -------------------------------------------------------------------------------- 1 | 2 | import simpy 3 | from collections import namedtuple, Counter 4 | from itertools import count, filterfalse 5 | import random 6 | import math 7 | 8 | #################################################### 9 | ########## CREATING THE WAFER CLASS ############### 10 | #################################################### 11 | class wafer_box(object): 12 | def __init__(self, sim_inst, number_wafers, HT, wafer_index, lead_dict): 13 | self.env = sim_inst.env 14 | self.name = f"w{wafer_index}" 15 | self.start_time = sim_inst.env.now 16 | self.number_wafers = number_wafers 17 | self.HT = HT 18 | self.seq = 0 19 | self.due_time = self.start_time + lead_dict[self.HT] 20 | 21 | #################################################### 22 | ########## CREATING THE MACHINE CLASS ############## 23 | #################################################### 24 | class Machine(object): 25 | def __init__(self, sim_inst, name, station, break_mean=None, repair_mean=None): 26 | self.env = sim_inst.env 27 | self.name = name 28 | self.station = station 29 | self.available = True 30 | self.broken = False 31 | self.wafer_being_proc = None 32 | self.parts_made = 0 33 | self.break_mean = break_mean 34 | 35 | if break_mean is not None: 36 | self.time_to_fail = self.time_to_failure() 37 | 38 | self.process = None 39 | self.repair_mean = repair_mean 40 | 41 | def time_to_failure(self): 42 | """Return time until next failure for a machine.""" 43 | return random.expovariate(1/self.break_mean) 44 | 45 | def time_to_repair(self): 46 | """Return time until next failure for a machine.""" 47 | return random.expovariate(1/self.repair_mean) 48 | 49 | def break_machine(self): 50 | """Break the machine after break_time""" 51 | assert not self.broken 52 | start = self.env.now 53 | try: 54 | yield self.env.timeout(self.time_to_fail) 55 | self.process.interrupt() 56 | self.time_to_fail = self.time_to_failure() 57 | except: 58 | self.time_to_fail -= self.env.now-start 59 | 60 | def get_proc_time(self, wafer, sim_inst): 61 | proc_step = sim_inst.recipes[wafer.HT][wafer.seq] 62 | A = proc_step[1] 63 | B = proc_step[2] 64 | LS = proc_step[3] 65 | include_load = proc_step[4] 66 | load = proc_step[5] 67 | include_unload = proc_step[6] 68 | unload = proc_step[7] 69 | proc_t = A * wafer.number_wafers + B * math.ceil(wafer.number_wafers/LS) 70 | 71 | if include_load == -1: 72 | proc_t += load 73 | if include_unload == -1: 74 | proc_t += unload 75 | return proc_t 76 | 77 | def part_process(self, wafer, sim_inst): 78 | # This function defines a process where a part of head type HT and sequence step seq is processed on the machine 79 | 80 | # get the amount of time it takes for the operation to run 81 | proc_t = self.get_proc_time(wafer, sim_inst) 82 | 83 | done_in = proc_t 84 | while done_in: 85 | try: 86 | if self.break_mean is not None: 87 | break_process = self.env.process(self.break_machine()) 88 | start = self.env.now 89 | print("started processing wafer %s on machine %s at %s"%(wafer.name, self.name, start)) 90 | # wait until the process is done 91 | yield sim_inst.env.timeout(done_in) 92 | # set the wafer being processed to None 93 | self.wafer_being_proc = None 94 | # set machine to be available to process part 95 | self.available = True 96 | print("Completed the process step of wafer %s on machine %s at %s and sent to " 97 | "next machine."%(wafer.name, self.name, self.env.now)) 98 | # set the wafer to be at the next step in the sequence 99 | wafer.seq += 1 100 | # if seq is not the last sequence step then find the next station and choose actions for each of the 101 | # available machines in that station 102 | if wafer.seq < (len(sim_inst.recipes[wafer.HT])): 103 | # add the part to the corresponding queue for the next operation in the sequence 104 | sim_inst.queue_lists[sim_inst.recipes[wafer.HT][wafer.seq][0]].append(wafer) 105 | else: 106 | # # add the part to the list of completed parts 107 | # sim_inst.queue_lists['complete'].append(wafer) 108 | sim_inst.cycle_time.append(self.env.now - wafer.start_time) 109 | print("Finished processing wafer %s at %s"%(wafer.name, self.env.now)) 110 | sim_inst.complete_wafer_dict[wafer.HT]+=1 111 | sim_inst.order_completed = True 112 | # Update the due_wafers dictionary to indicate that wafers of this head type were completed 113 | 114 | # Find the index of the earliest week for which there are one or more wafers of the given head type 115 | # due. 116 | week_index = next((i for i, x in enumerate(sim_inst.due_wafers[wafer.HT]) if x), None) 117 | 118 | # Subtract wafer,number_wafers wafers from the corresponding list element 119 | sim_inst.due_wafers[wafer.HT][week_index] -= wafer.number_wafers 120 | 121 | new_wafer = wafer_box(sim_inst, sim_inst.num_wafers, wafer.HT, sim_inst.wafer_index, 122 | sim_inst.lead_dict) 123 | sim_inst.queue_lists[sim_inst.recipes[wafer.HT][0][0]].append(new_wafer) 124 | lead_time = sim_inst.lead_dict[wafer.HT] 125 | total_processing_time = new_wafer.start_time + lead_time 126 | week_number = int(total_processing_time / (7 * 24 * 60)) 127 | sim_inst.due_wafers[wafer.HT][week_number] += sim_inst.num_wafers 128 | sim_inst.wafer_index += 1 129 | 130 | 131 | 132 | if self.break_mean is not None: 133 | break_process.interrupt() 134 | done_in = 0 135 | 136 | except simpy.Interrupt: 137 | self.broken = True 138 | done_in -= self.env.now - start 139 | yield self.env.timeout(self.time_to_repair()) 140 | self.broken = False 141 | 142 | # Parts completed by this machine 143 | self.parts_made += 1 144 | 145 | def get_allowed_actions(self, sim_inst): 146 | #find all (HT, seq) tuples with non zero queues at the station of this machine 147 | return sorted(list(set((wafer.HT, wafer.seq) for wafer in sim_inst.queue_lists[self.station]))) 148 | 149 | #################################################### 150 | ########## CREATING THE FACTORY CLASS ############## 151 | #################################################### 152 | class FactorySim(object): 153 | #Initialize simpy environment and set the amount of time the simulation will run for 154 | def __init__(self, sim_time, m_dict, recipes, lead_dict, wafers_per_box, wip_levels, break_mean=None, repair_mean=None): 155 | self.break_mean = break_mean 156 | self.repair_mean = repair_mean 157 | self.order_completed = False 158 | self.allowed_actions = None 159 | self.env = simpy.Environment() 160 | self.Sim_time = sim_time 161 | self.next_machine = None 162 | # self.dgr = dgr_dict 163 | self.lead_dict = lead_dict 164 | self.num_wafers = wafers_per_box 165 | self.wip_levels = wip_levels 166 | # self.machine_failure = False 167 | 168 | # Number of future weeks we want to look into for calculating due dates 169 | self.FUTURE_WEEKS = 100 170 | 171 | # Initialize an index that will be used to name each wafer box 172 | self.wafer_index = 0 173 | 174 | # Dictionary where the key is the name of the machine and the value is [station, proc_t] 175 | self.machine_dict = m_dict 176 | 177 | self.machines_list = [Machine(self, mach[0], mach[1], self.break_mean, self.repair_mean) for mach in self.machine_dict.items()] 178 | 179 | # create a list of all the station names 180 | self.stations = list(set(list(self.machine_dict.values()))) 181 | 182 | # sim_inst.recipes give the sequence of stations that must be processed at for the wafer of that head type to be completed 183 | self.recipes = recipes 184 | 185 | # create a list to store the number of complete wafers for each head type 186 | self.complete_wafer_dict = {} 187 | for ht in self.recipes.keys(): 188 | d = {ht:0} 189 | self.complete_wafer_dict.update(d) 190 | 191 | self.number_of_machines = len(self.machine_dict) 192 | 193 | # Create a dictionary which holds lists that will contain 194 | # the queues of wafer_box objects at each station and that have been completed 195 | self.queue_lists = {station: [] for station in self.stations} 196 | # self.queue_lists['complete'] = [] 197 | 198 | self.order_complete_time = 0 199 | self.cycle_time = [] 200 | self.step_reward = 0 201 | 202 | # Create a dictionary which holds the number of wafers due in a given week of each head type 203 | self.due_wafers = {} 204 | for ht in self.recipes.keys(): 205 | list_of_wafers_due_each_week = [0]*self.FUTURE_WEEKS 206 | d = {ht:list_of_wafers_due_each_week} 207 | self.due_wafers.update(d) 208 | 209 | # Creates a dictionary where the key is the toolset name and the value is a list of tuples of all head type and 210 | # sequence step combinations which may be processed at that station 211 | self.station_HT_seq = {station: [] for station in self.stations} 212 | 213 | for HT in self.recipes.keys(): 214 | for seq, step in enumerate(self.recipes[HT]): 215 | self.station_HT_seq[step[0]].append((HT, seq)) 216 | 217 | 218 | def start(self): 219 | for ht in self.wip_levels.keys(): 220 | for i in range(self.wip_levels[ht]): 221 | new_wafer = wafer_box(self, self.num_wafers, ht, self.wafer_index, self.lead_dict) 222 | self.queue_lists[self.recipes[ht][0][0]].append(new_wafer) 223 | lead_time = self.lead_dict[ht] 224 | total_processing_time = new_wafer.start_time + lead_time 225 | week_number = int(total_processing_time / (7*24*60)) 226 | self.due_wafers[ht][week_number] += self.num_wafers 227 | self.wafer_index += 1 228 | 229 | for machine in self.machines_list: 230 | if machine.available: 231 | allowed_actions = machine.get_allowed_actions(self) 232 | if len(allowed_actions) > 0: 233 | self.next_machine = machine 234 | self.allowed_actions = allowed_actions 235 | return 236 | while True: 237 | self.env.step() 238 | for machine in self.machines_list: 239 | if machine.available: 240 | allowed_actions = machine.get_allowed_actions(self) 241 | if len(allowed_actions) > 0: 242 | self.next_machine = machine 243 | self.allowed_actions = allowed_actions 244 | return 245 | 246 | 247 | def run_action(self, machine, ht, seq): 248 | self.order_completed = False 249 | self.step_reward = 0 250 | # Set the machine to be unavailable to process parts because it is now busy 251 | assert machine.available 252 | machine.available = False 253 | # Find the wafer that has that HT and seq 254 | wafer_choice = next(wafer for wafer in self.queue_lists[machine.station] if wafer.HT == ht and wafer.seq == seq) 255 | # set the wafer being processed on this machine to wafer_choice 256 | machine.wafer_being_proc = wafer_choice 257 | # Remove the part from it's queue 258 | self.queue_lists[machine.station].remove(wafer_choice) 259 | # Begin processing the part on the machine 260 | machine.process = self.env.process(machine.part_process(wafer_choice, self)) 261 | 262 | for machine in self.machines_list: 263 | if machine.available: 264 | allowed_actions = machine.get_allowed_actions(self) 265 | if len(allowed_actions) > 0: 266 | self.next_machine = machine 267 | self.allowed_actions = allowed_actions 268 | return 269 | while True: 270 | before_time = self.env.now 271 | self.env.step() 272 | time_change = self.env.now-before_time 273 | current_week = math.ceil(self.env.now / (7 * 24 * 60)) # Calculating the current week 274 | for key, value in self.due_wafers.items(): 275 | buffer_list = [] # This list stores value of previous unfinished wafers count 276 | buffer_list.append(sum(value[:current_week])) 277 | self.step_reward -= time_change*sum(buffer_list) 278 | 279 | for machine in self.machines_list: 280 | if machine.available: 281 | allowed_actions = machine.get_allowed_actions(self) 282 | if len(allowed_actions) > 0: 283 | self.next_machine = machine 284 | self.allowed_actions = allowed_actions 285 | return 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | -------------------------------------------------------------------------------- /new/DQN/results.txt: -------------------------------------------------------------------------------- 1 | Critical Ratio 2 | 3 | #### For 3e5 time steps ### 4 | 5 | Total wafers produced: 4796 6 | ### Wafers of each head type ### 7 | {'ASGA': 540, 'CGUE': 540, 'FGUF': 536, 'GOUC': 530, 'H7UP': 530, 'LAUB': 530, 'LEUA': 530, 'MCUG': 530, 'MMUP': 530} 8 | 9 | 10 | 11 | 12 | 13 | DQN 14 | 15 | for ### for 3e5 timesteps ### 16 | Total wafers produced: 4796 17 | ### Wafers of each head type ### 18 | {'ASGA': 401, 'CGUE': 558, 'FGUF': 546, 'GOUC': 565, 'H7UP': 497, 'LAUB': 504, 'LEUA': 592, 'MCUG': 571, 'MMUP': 562} 19 | 20 | -------------------------------------------------------------------------------- /new/DQN/rollout_DQN.py: -------------------------------------------------------------------------------- 1 | import factory_sim as fact_sim 2 | import numpy as np 3 | import pandas as pd 4 | import math 5 | import matplotlib 6 | import random 7 | matplotlib.use('TkAgg') 8 | import matplotlib.pyplot as plt 9 | from itertools import chain 10 | from keras.models import load_model 11 | 12 | sim_time = 1e5 13 | WEEK = 24*7 14 | NO_OF_WEEKS = math.ceil(sim_time/WEEK) 15 | num_seq_steps = 10 16 | 17 | recipes = pd.read_csv('~/Desktop/GSR/fall19/random/test/recipes.csv') 18 | machines = pd.read_csv('~/Desktop/GSR/fall19/random/test/machines.csv') 19 | 20 | recipes = recipes[recipes.MAXIMUMLS != 0] 21 | 22 | # Create the machine dictionary (machine:station) 23 | machine_d = dict() 24 | for index, row in machines.iterrows(): 25 | d = {row[0]:row[1]} 26 | machine_d.update(d) 27 | 28 | # Modifying the above list to match the stations from the two datasets 29 | a = machines.TOOLSET.unique() 30 | b = recipes.TOOLSET.unique() 31 | common_stations = (set(a) & set(b)) 32 | ls = list(common_stations) 33 | 34 | # This dictionary has the correct set of stations 35 | modified_machine_dict = {k:v for k,v in machine_d.items() if v in ls} 36 | 37 | # Removing unncommon rows from recipes 38 | for index, row in recipes.iterrows(): 39 | if row[2] not in ls: 40 | recipes.drop(index, inplace=True) 41 | 42 | recipes = recipes.dropna() 43 | recipe_dict = dict() 44 | for ht in list(recipes.HT.unique()): 45 | temp = recipes.loc[recipes['HT'] == ht] 46 | if len(temp) > 1: 47 | ls = [] 48 | for index, row in temp.iterrows(): 49 | ls.append([row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9]]) 50 | d = {ht:ls} 51 | recipe_dict.update(d) 52 | else: 53 | ls = [] 54 | ls.append([row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9]]) 55 | d = {ht:ls} 56 | recipe_dict.update(d) 57 | 58 | # take only the first num_seq_steps sequence steps for each recipe to reduce the complexity of the simulation. 59 | for ht, step in recipe_dict.items(): 60 | recipe_dict[ht] = step[0:num_seq_steps] 61 | 62 | # Dictionary where the key is the name of the machine and the value is [station, proc_t] 63 | # machine_dict = {'m0': 's1', 'm2': 's2', 'm1': 's1', 'm3': 's2'} 64 | machine_dict = modified_machine_dict 65 | 66 | # recipes give the sequence of stations that must be processed at for the wafer of that head type to be completed 67 | # recipes = {"ht1": [["s1", 5, 0]], "ht2": [["s1", 5, 0], ["s2", 5, 0]]} 68 | recipes = recipe_dict 69 | 70 | wafers_per_box = 4 71 | 72 | break_mean = 1e5 73 | 74 | repair_mean = 20 75 | 76 | # average lead time for each head type 77 | head_types = recipes.keys() 78 | lead_dict = {} 79 | 80 | wip_levels = {} 81 | 82 | for ht in head_types: 83 | d = {ht:10000} 84 | lead_dict.update(d) 85 | 86 | w = {ht:10} 87 | wip_levels.update(w) 88 | 89 | 90 | #################################################### 91 | ########## CREATING THE STATE SPACE ############### 92 | #################################################### 93 | def get_state(sim): 94 | # Calculate the state space representation. 95 | # This returns a list containing the number of` parts in the factory for each combination of head type and sequence 96 | # step 97 | state_rep = [len([wafer for queue in sim.queue_lists.values() for wafer in queue if wafer.HT 98 | == ht and wafer.seq == s]) for ht in list(sim.recipes.keys()) for s in 99 | list(range(len(sim.recipes[ht]) + 1))] 100 | # b is a one-hot encoded list indicating which machine the next action will correspond to 101 | b = np.zeros(len(sim.machines_list)) 102 | b[sim.machines_list.index(sim.next_machine)] = 1 103 | state_rep.extend(b) 104 | # Append the due dates list to the state space for making the decision 105 | rolling_window = [] # This is the rolling window that will be appended to state space 106 | max_length_of_window = math.ceil(max(sim.lead_dict.values()) / (7*24*60)) # Max length of the window to roll 107 | current_time = sim.env.now # Calculating the current time 108 | current_week = math.ceil(current_time / (7*24*60)) #Calculating the current week 109 | 110 | for key, value in sim.due_wafers.items(): 111 | rolling_window.append(value[current_week:current_week+max_length_of_window]) #Adding only the values from current week up till the window length 112 | buffer_list = [] # This list stores value of previous unfinished wafers count 113 | buffer_list.append(sum(value[:current_week])) 114 | rolling_window.extend([buffer_list]) 115 | 116 | c = sum(rolling_window, []) 117 | state_rep.extend(c) # Appending the rolling window to state space 118 | return state_rep 119 | 120 | 121 | 122 | 123 | ##################################################################### 124 | ######################### LOADING THE TRAINED POLICY ################ 125 | ##################################################################### 126 | model = load_model("DQN_model.h5") 127 | 128 | 129 | # Action function to choose the best action given the q-function if not exploring based on epsilon 130 | def choose_action(state, allowed_actions, action_space): 131 | state = np.array(state).reshape(1, state_size) 132 | pred = model.predict(state) 133 | pred = sum(pred.tolist(), []) 134 | temp = [] 135 | for item in allowed_actions: 136 | temp.append(pred[action_space.index(item)]) 137 | print(" ********************* CHOOSING A PREDICTED ACTION **********************") 138 | return allowed_actions[np.argmax(temp)] 139 | 140 | 141 | 142 | 143 | # Create the factory simulation object 144 | my_sim = fact_sim.FactorySim(sim_time, machine_dict, recipes, lead_dict, wafers_per_box, wip_levels) 145 | # start the simulation 146 | my_sim.start() 147 | # Retrieve machine object for first action choice 148 | mach = my_sim.next_machine 149 | # Save the state and allowed actions at the start for later use in training examples 150 | state = get_state(my_sim) 151 | allowed_actions = my_sim.allowed_actions 152 | # The action space is a list of tuples of the form [('ht1',0), ('ht1',1), ..., ('ht2', 0), ...] indicating the head 153 | # types and sequence steps for all allowed actions. 154 | action_space = list(chain.from_iterable(my_sim.station_HT_seq.values())) 155 | action_size = len(action_space) 156 | state_size = len(state) 157 | 158 | 159 | order_count = 0 160 | 161 | while my_sim.env.now < sim_time: 162 | action = choose_action(state, allowed_actions, action_space) 163 | 164 | my_sim.run_action(mach, action[0], action[1]) 165 | print('Step Reward:'+ str(my_sim.step_reward)) 166 | # Record the machine, state, allowed actions and reward at the new time step 167 | next_mach = my_sim.next_machine 168 | next_state = get_state(my_sim) 169 | next_allowed_actions = my_sim.allowed_actions 170 | reward = my_sim.step_reward 171 | 172 | print(f"state dimension: {len(state)}") 173 | print(f"next state dimension: {len(next_state)}") 174 | print("action space dimension:", action_size) 175 | # record the information for use again in the next training example 176 | mach, allowed_actions, state = next_mach, next_allowed_actions, next_state 177 | # print("State:", state) 178 | 179 | 180 | # Record the information for use again in the next training example 181 | mach, allowed_actions, state = next_mach, next_allowed_actions, next_state 182 | 183 | 184 | # Total wafers produced 185 | print("Total wafers produced:", len(my_sim.cycle_time)) 186 | 187 | 188 | #Wafers of each head type 189 | print("### Wafers of each head type ###") 190 | print(my_sim.complete_wafer_dict) 191 | 192 | # Plot the time taken to complete each wafer 193 | plt.plot(my_sim.cycle_time) 194 | plt.xlabel("Wafers") 195 | plt.ylabel("Cycle time") 196 | plt.title("The time taken to complete each wafer") 197 | plt.show() 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | -------------------------------------------------------------------------------- /new/V-PG/PG_Class.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | from keras.layers import Dense 6 | 7 | from keras import optimizers 8 | from keras.models import Model 9 | from keras.layers import Input 10 | 11 | class PolGrad: 12 | def __init__(self, action_space, state_size, gamma = 0.9, 13 | epsilon = 1.0, epsilon_min = 0.00, epsilon_decay = 0.9999): 14 | self.gamma = gamma 15 | self.epsilon = epsilon 16 | self.epsilon_min = epsilon_min 17 | self.epsilon_decay = epsilon_decay 18 | self.action_space = action_space 19 | self.action_size = len(action_space) 20 | self.state_size = state_size 21 | 22 | self.model = self.create_model() 23 | self.target_model = self.create_model() 24 | 25 | @staticmethod 26 | def custom_loss(y_pred, y_true, discounted_episode_rewards, allowed_actions): 27 | neg_log_prob = tf.nn.softmax_cross_entropy_with_logits_v2(logits=y_pred, labels=y_true) 28 | loss = tf.reduce_mean(neg_log_prob * discounted_episode_rewards) 29 | return loss 30 | 31 | # Create the neural network model to train the q function 32 | def create_model(self): 33 | x = Input(shape=(self.state_size,), name='input') 34 | y_true = Input(shape=(self.action_size,), name='y_true') 35 | discounted_episode_rewards = Input(shape=(1,), name='rewards') 36 | allowed_actions = Input(shape=(self.action_size,), name='allowed_a') 37 | f = Dense(400, activation = 'sigmoid', kernel_initializer='glorot_uniform')(x) 38 | f = Dense(250, activation = 'sigmoid', kernel_initializer='glorot_uniform')(f) 39 | f = Dense(125, activation = 'sigmoid', kernel_initializer='glorot_uniform')(f) 40 | #logits = K.layers.Activation('linear')(f) 41 | y_pred = Dense(self.action_size, activation = 'softmax', kernel_initializer='glorot_uniform')(f) 42 | model = Model(inputs=[x, y_true, discounted_episode_rewards, allowed_actions], outputs = [y_pred]) 43 | model.add_loss(self.custom_loss(y_pred, y_true, discounted_episode_rewards, allowed_actions)) 44 | adam = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False) 45 | model.compile(loss = None, optimizer=adam, metrics=['mae']) 46 | return model 47 | 48 | 49 | # Action function to choose the best action given the q-function if not exploring based on epsilon 50 | def choose_action(self, state, allowed_actions): 51 | self.epsilon *= self.epsilon_decay 52 | self.epsilon = max(self.epsilon_min, self.epsilon) 53 | 54 | n = 0 55 | allowed_act_prob = np.zeros((1, self.action_size)) 56 | for i in self.action_space: 57 | for j in allowed_actions: 58 | if i == j: 59 | allowed_act_prob[0][n] = 1 60 | n+=1 61 | 62 | r = np.random.random() 63 | if r > self.epsilon: 64 | print(" ************* CHOOSING A PREDICTED ACTION *************") 65 | actions = np.ones((1, self.action_size)) 66 | rewards = np.ones((1, 1)) 67 | state = np.array(state).reshape(1, self.state_size) 68 | pred = self.model.predict([state, actions, rewards, allowed_act_prob]) 69 | allowed_act_prob_aux = allowed_act_prob * pred 70 | if np.sum(allowed_act_prob_aux) != 0: 71 | allowed_act_prob = allowed_act_prob_aux 72 | else: 73 | print("******* CHOOSING A RANDOM ACTION *******") 74 | all_sum = np.sum(allowed_act_prob) 75 | multiply = 1/all_sum 76 | allowed_act_prob *= multiply 77 | # select action w.r.t the actions prob 78 | action = np.random.choice(range(allowed_act_prob.shape[1]), p=allowed_act_prob.ravel()) 79 | return action 80 | 81 | # training our PG network 82 | def train_policy_gradient(self, states, actions, discounted_episode_rewards, allowed_actions): 83 | n = 0 84 | allowed_act_prob = np.zeros((states.shape[0], self.action_size)) 85 | for i in self.action_space: 86 | for idx, val in enumerate(allowed_actions): 87 | for j in val: 88 | if i == j: 89 | allowed_act_prob[idx][n] = 1.0 90 | n+=1 91 | self.model.fit([states, actions, discounted_episode_rewards, allowed_act_prob]) 92 | 93 | # Save our model 94 | def save_model(self, fn): 95 | self.model.save(fn) 96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /new/V-PG/PG_fact.py: -------------------------------------------------------------------------------- 1 | import factory_sim as fact_sim 2 | import numpy as np 3 | import pandas as pd 4 | import math 5 | import matplotlib 6 | import random 7 | matplotlib.use('TkAgg') 8 | import matplotlib.pyplot as plt 9 | from itertools import chain 10 | import PG_Class 11 | 12 | sim_time = 1e6 13 | WEEK = 24*7 14 | NO_OF_WEEKS = math.ceil(sim_time/WEEK) 15 | num_seq_steps = 10 16 | 17 | recipes = pd.read_csv('~/Desktop/GSR/fall19/random/test/recipes.csv') 18 | machines = pd.read_csv('~/Desktop/GSR/fall19/random/test/machines.csv') 19 | 20 | recipes = recipes[recipes.MAXIMUMLS != 0] 21 | 22 | # Create the machine dictionary (machine:station) 23 | machine_d = dict() 24 | for index, row in machines.iterrows(): 25 | d = {row[0]:row[1]} 26 | machine_d.update(d) 27 | 28 | # Modifying the above list to match the stations from the two datasets 29 | a = machines.TOOLSET.unique() 30 | b = recipes.TOOLSET.unique() 31 | common_stations = (set(a) & set(b)) 32 | ls = list(common_stations) 33 | 34 | # This dictionary has the correct set of stations 35 | modified_machine_dict = {k:v for k,v in machine_d.items() if v in ls} 36 | 37 | # Removing unncommon rows from recipes 38 | for index, row in recipes.iterrows(): 39 | if row[2] not in ls: 40 | recipes.drop(index, inplace=True) 41 | 42 | recipes = recipes.dropna() 43 | recipe_dict = dict() 44 | for ht in list(recipes.HT.unique()): 45 | temp = recipes.loc[recipes['HT'] == ht] 46 | if len(temp) > 1: 47 | ls = [] 48 | for index, row in temp.iterrows(): 49 | ls.append([row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9]]) 50 | d = {ht:ls} 51 | recipe_dict.update(d) 52 | else: 53 | ls = [] 54 | ls.append([row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9]]) 55 | d = {ht:ls} 56 | recipe_dict.update(d) 57 | 58 | # take only the first num_seq_steps sequence steps for each recipe to reduce the complexity of the simulation. 59 | for ht, step in recipe_dict.items(): 60 | recipe_dict[ht] = step[0:num_seq_steps] 61 | 62 | # Dictionary where the key is the name of the machine and the value is [station, proc_t] 63 | # machine_dict = {'m0': 's1', 'm2': 's2', 'm1': 's1', 'm3': 's2'} 64 | machine_dict = modified_machine_dict 65 | 66 | # recipes give the sequence of stations that must be processed at for the wafer of that head type to be completed 67 | # recipes = {"ht1": [["s1", 5, 0]], "ht2": [["s1", 5, 0], ["s2", 5, 0]]} 68 | recipes = recipe_dict 69 | 70 | wafers_per_box = 4 71 | 72 | break_mean = 1e5 73 | 74 | repair_mean = 20 75 | 76 | # average lead time for each head type 77 | head_types = recipes.keys() 78 | lead_dict = {} 79 | 80 | wip_levels = {} 81 | 82 | for ht in head_types: 83 | d = {ht:40000} 84 | lead_dict.update(d) 85 | 86 | w = {ht:10} 87 | wip_levels.update(w) 88 | 89 | 90 | # Simple pad utility function 91 | def pad(l, content, width): 92 | l.extend([content] * (width - len(l))) 93 | return l 94 | 95 | 96 | #################################################### 97 | ########## CREATING THE STATE SPACE ############### 98 | #################################################### 99 | def get_state(sim): 100 | # Calculate the state space representation. 101 | # This returns a list containing the number of` parts in the factory for each combination of head type and sequence 102 | # step 103 | state_rep = [len([wafer for queue in sim.queue_lists.values() for wafer in queue if wafer.HT 104 | == ht and wafer.seq == s]) for ht in list(sim.recipes.keys()) for s in 105 | list(range(len(sim.recipes[ht]) + 1))] 106 | # b is a one-hot encoded list indicating which machine the next action will correspond to 107 | b = np.zeros(len(sim.machines_list)) 108 | b[sim.machines_list.index(sim.next_machine)] = 1 109 | state_rep.extend(b) 110 | # Append the due dates list to the state space for making the decision 111 | rolling_window = [] # This is the rolling window that will be appended to state space 112 | max_length_of_window = math.ceil(max(sim.lead_dict.values()) / (7*24*60)) # Max length of the window to roll 113 | current_time = sim.env.now # Calculating the current time 114 | current_week = math.ceil(current_time / (7*24*60)) #Calculating the current week 115 | 116 | for key, value in sim.due_wafers.items(): 117 | k = value[current_week:current_week+max_length_of_window] #Adding only the values from current week up till the window length 118 | if len(k) < max_length_of_window: #if list is less than length of window, then append 0's 119 | k = pad(k, 0, max_length_of_window) 120 | 121 | rolling_window.append(k) 122 | buffer_list = [] # This list stores value of previous unfinished wafers count 123 | buffer_list.append(sum(value[:current_week])) 124 | rolling_window.extend([buffer_list]) 125 | print("rolling_window: ", rolling_window) 126 | c = sum(rolling_window, []) 127 | state_rep.extend(c) # Appending the rolling window to state space 128 | return state_rep 129 | 130 | 131 | 132 | # Create the factory simulation object 133 | my_sim = fact_sim.FactorySim(sim_time, machine_dict, recipes, lead_dict, wafers_per_box, wip_levels) 134 | # start the simulation 135 | my_sim.start() 136 | # Retrieve machine object for first action choice 137 | mach = my_sim.next_machine 138 | # Save the state and allowed actions at the start for later use in training examples 139 | state = get_state(my_sim) 140 | allowed_actions = my_sim.allowed_actions 141 | # The action space is a list of tuples of the form [('ht1',0), ('ht1',1), ..., ('ht2', 0), ...] indicating the head 142 | # types and sequence steps for all allowed actions. 143 | action_space = list(chain.from_iterable(my_sim.station_HT_seq.values())) 144 | action_size = len(action_space) 145 | state_size = len(state) 146 | 147 | # create the pol_grad object with the appropriate lenght of state and action space 148 | pol_grad = PG_Class.PolGrad(action_space, len(state)) 149 | 150 | episode_states, episode_actions, allRewards, episode_allowed_a = [],[],[],[] 151 | 152 | 153 | while my_sim.env.now < sim_time: 154 | episode_states.append(state) 155 | episode_allowed_a.append(allowed_actions) 156 | print("State shape is :", len(state)) 157 | action = pol_grad.choose_action(state, allowed_actions) 158 | action_ = np.zeros(action_size) 159 | action_[action] = 1 160 | episode_actions.append(action_) 161 | 162 | action = action_space[action] 163 | 164 | if my_sim.order_completed: 165 | # Calculate discounted reward 166 | episode_rewards_ = np.ones(np.asarray(episode_states).shape[0]) 167 | episode_rewards_ *= my_sim.step_reward 168 | pol_grad.train_policy_gradient(np.asarray(episode_states), np.asarray(episode_actions), episode_rewards_, episode_allowed_a) 169 | 170 | # Reset the transition stores 171 | episode_states, episode_actions, episode_allowed_a = [],[],[] 172 | 173 | my_sim.run_action(mach, action[0], action[1]) 174 | state = get_state(my_sim) 175 | allowed_actions = my_sim.allowed_actions 176 | mach = my_sim.next_machine 177 | 178 | print(my_sim.order_completed) 179 | print(state) 180 | print(my_sim.step_reward) 181 | 182 | 183 | # Save the trained PG policy network 184 | pol_grad.save_model("PG_model.h5") 185 | 186 | # Total wafers produced 187 | print("Total wafers produced:", len(my_sim.cycle_time)) 188 | 189 | 190 | #Wafers of each head type 191 | print("### Wafers of each head type ###") 192 | print(my_sim.complete_wafer_dict) 193 | 194 | # Plot the time taken to complete each wafer 195 | plt.plot(my_sim.cycle_time) 196 | plt.xlabel("Wafers") 197 | plt.ylabel("Cycle time") 198 | plt.title("The time taken to complete each wafer") 199 | plt.show() 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | -------------------------------------------------------------------------------- /new/V-PG/PG_model.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/svikramank/DeepRLForFactoryOptimization/f129a460849f1d487ccea620fdcf553afbbd2e6d/new/V-PG/PG_model.h5 -------------------------------------------------------------------------------- /new/V-PG/__pycache__/PG_Class.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/svikramank/DeepRLForFactoryOptimization/f129a460849f1d487ccea620fdcf553afbbd2e6d/new/V-PG/__pycache__/PG_Class.cpython-37.pyc -------------------------------------------------------------------------------- /new/V-PG/__pycache__/factory_sim.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/svikramank/DeepRLForFactoryOptimization/f129a460849f1d487ccea620fdcf553afbbd2e6d/new/V-PG/__pycache__/factory_sim.cpython-37.pyc -------------------------------------------------------------------------------- /new/V-PG/factory_sim.py: -------------------------------------------------------------------------------- 1 | 2 | import simpy 3 | from collections import namedtuple, Counter 4 | from itertools import count, filterfalse 5 | import random 6 | import math 7 | 8 | #################################################### 9 | ########## CREATING THE WAFER CLASS ############### 10 | #################################################### 11 | class wafer_box(object): 12 | def __init__(self, sim_inst, number_wafers, HT, wafer_index, lead_dict): 13 | self.env = sim_inst.env 14 | self.name = f"w{wafer_index}" 15 | self.start_time = sim_inst.env.now 16 | self.number_wafers = number_wafers 17 | self.HT = HT 18 | self.seq = 0 19 | self.due_time = self.start_time + lead_dict[self.HT] 20 | 21 | #################################################### 22 | ########## CREATING THE MACHINE CLASS ############## 23 | #################################################### 24 | class Machine(object): 25 | def __init__(self, sim_inst, name, station, break_mean=None, repair_mean=None): 26 | self.env = sim_inst.env 27 | self.name = name 28 | self.station = station 29 | self.available = True 30 | self.broken = False 31 | self.wafer_being_proc = None 32 | self.parts_made = 0 33 | self.break_mean = break_mean 34 | 35 | if break_mean is not None: 36 | self.time_to_fail = self.time_to_failure() 37 | 38 | self.process = None 39 | self.repair_mean = repair_mean 40 | 41 | def time_to_failure(self): 42 | """Return time until next failure for a machine.""" 43 | return random.expovariate(1/self.break_mean) 44 | 45 | def time_to_repair(self): 46 | """Return time until next failure for a machine.""" 47 | return random.expovariate(1/self.repair_mean) 48 | 49 | def break_machine(self): 50 | """Break the machine after break_time""" 51 | assert not self.broken 52 | start = self.env.now 53 | try: 54 | yield self.env.timeout(self.time_to_fail) 55 | self.process.interrupt() 56 | self.time_to_fail = self.time_to_failure() 57 | except: 58 | self.time_to_fail -= self.env.now-start 59 | 60 | def get_proc_time(self, wafer, sim_inst): 61 | proc_step = sim_inst.recipes[wafer.HT][wafer.seq] 62 | A = proc_step[1] 63 | B = proc_step[2] 64 | LS = proc_step[3] 65 | include_load = proc_step[4] 66 | load = proc_step[5] 67 | include_unload = proc_step[6] 68 | unload = proc_step[7] 69 | proc_t = A * wafer.number_wafers + B * math.ceil(wafer.number_wafers/LS) 70 | 71 | if include_load == -1: 72 | proc_t += load 73 | if include_unload == -1: 74 | proc_t += unload 75 | return proc_t 76 | 77 | def part_process(self, wafer, sim_inst): 78 | # This function defines a process where a part of head type HT and sequence step seq is processed on the machine 79 | 80 | # get the amount of time it takes for the operation to run 81 | proc_t = self.get_proc_time(wafer, sim_inst) 82 | 83 | done_in = proc_t 84 | while done_in: 85 | try: 86 | if self.break_mean is not None: 87 | break_process = self.env.process(self.break_machine()) 88 | start = self.env.now 89 | print("started processing wafer %s on machine %s at %s"%(wafer.name, self.name, start)) 90 | # wait until the process is done 91 | yield sim_inst.env.timeout(done_in) 92 | # set the wafer being processed to None 93 | self.wafer_being_proc = None 94 | # set machine to be available to process part 95 | self.available = True 96 | print("Completed the process step of wafer %s on machine %s at %s and sent to " 97 | "next machine."%(wafer.name, self.name, self.env.now)) 98 | # set the wafer to be at the next step in the sequence 99 | wafer.seq += 1 100 | # if seq is not the last sequence step then find the next station and choose actions for each of the 101 | # available machines in that station 102 | if wafer.seq < (len(sim_inst.recipes[wafer.HT])): 103 | # add the part to the corresponding queue for the next operation in the sequence 104 | sim_inst.queue_lists[sim_inst.recipes[wafer.HT][wafer.seq][0]].append(wafer) 105 | else: 106 | # # add the part to the list of completed parts 107 | # sim_inst.queue_lists['complete'].append(wafer) 108 | sim_inst.cycle_time.append(self.env.now - wafer.start_time) 109 | print("Finished processing wafer %s at %s"%(wafer.name, self.env.now)) 110 | sim_inst.complete_wafer_dict[wafer.HT]+=1 111 | sim_inst.order_completed = True 112 | # Update the due_wafers dictionary to indicate that wafers of this head type were completed 113 | 114 | # Find the index of the earliest week for which there are one or more wafers of the given head type 115 | # due. 116 | week_index = next((i for i, x in enumerate(sim_inst.due_wafers[wafer.HT]) if x), None) 117 | 118 | # Subtract wafer,number_wafers wafers from the corresponding list element 119 | sim_inst.due_wafers[wafer.HT][week_index] -= wafer.number_wafers 120 | 121 | new_wafer = wafer_box(sim_inst, sim_inst.num_wafers, wafer.HT, sim_inst.wafer_index, 122 | sim_inst.lead_dict) 123 | sim_inst.queue_lists[sim_inst.recipes[wafer.HT][0][0]].append(new_wafer) 124 | lead_time = sim_inst.lead_dict[wafer.HT] 125 | total_processing_time = new_wafer.start_time + lead_time 126 | week_number = int(total_processing_time / (7 * 24 * 60)) 127 | sim_inst.due_wafers[wafer.HT][week_number] += sim_inst.num_wafers 128 | sim_inst.wafer_index += 1 129 | 130 | 131 | 132 | if self.break_mean is not None: 133 | break_process.interrupt() 134 | done_in = 0 135 | 136 | except simpy.Interrupt: 137 | self.broken = True 138 | done_in -= self.env.now - start 139 | yield self.env.timeout(self.time_to_repair()) 140 | self.broken = False 141 | 142 | # Parts completed by this machine 143 | self.parts_made += 1 144 | 145 | def get_allowed_actions(self, sim_inst): 146 | #find all (HT, seq) tuples with non zero queues at the station of this machine 147 | return sorted(list(set((wafer.HT, wafer.seq) for wafer in sim_inst.queue_lists[self.station]))) 148 | 149 | #################################################### 150 | ########## CREATING THE FACTORY CLASS ############## 151 | #################################################### 152 | class FactorySim(object): 153 | #Initialize simpy environment and set the amount of time the simulation will run for 154 | def __init__(self, sim_time, m_dict, recipes, lead_dict, wafers_per_box, wip_levels, break_mean=None, repair_mean=None): 155 | self.break_mean = break_mean 156 | self.repair_mean = repair_mean 157 | self.order_completed = False 158 | self.allowed_actions = None 159 | self.env = simpy.Environment() 160 | self.Sim_time = sim_time 161 | self.next_machine = None 162 | # self.dgr = dgr_dict 163 | self.lead_dict = lead_dict 164 | self.num_wafers = wafers_per_box 165 | self.wip_levels = wip_levels 166 | # self.machine_failure = False 167 | 168 | # Number of future weeks we want to look into for calculating due dates 169 | self.FUTURE_WEEKS = 1000 170 | 171 | # Initialize an index that will be used to name each wafer box 172 | self.wafer_index = 0 173 | 174 | # Dictionary where the key is the name of the machine and the value is [station, proc_t] 175 | self.machine_dict = m_dict 176 | 177 | self.machines_list = [Machine(self, mach[0], mach[1], self.break_mean, self.repair_mean) for mach in self.machine_dict.items()] 178 | 179 | # create a list of all the station names 180 | self.stations = list(set(list(self.machine_dict.values()))) 181 | 182 | # sim_inst.recipes give the sequence of stations that must be processed at for the wafer of that head type to be completed 183 | self.recipes = recipes 184 | 185 | # create a list to store the number of complete wafers for each head type 186 | self.complete_wafer_dict = {} 187 | for ht in self.recipes.keys(): 188 | d = {ht:0} 189 | self.complete_wafer_dict.update(d) 190 | 191 | self.number_of_machines = len(self.machine_dict) 192 | 193 | # Create a dictionary which holds lists that will contain 194 | # the queues of wafer_box objects at each station and that have been completed 195 | self.queue_lists = {station: [] for station in self.stations} 196 | # self.queue_lists['complete'] = [] 197 | 198 | self.order_complete_time = 0 199 | self.cycle_time = [] 200 | self.step_reward = 0 201 | 202 | # Create a dictionary which holds the number of wafers due in a given week of each head type 203 | self.due_wafers = {} 204 | for ht in self.recipes.keys(): 205 | list_of_wafers_due_each_week = [0]*self.FUTURE_WEEKS 206 | d = {ht:list_of_wafers_due_each_week} 207 | self.due_wafers.update(d) 208 | 209 | # Creates a dictionary where the key is the toolset name and the value is a list of tuples of all head type and 210 | # sequence step combinations which may be processed at that station 211 | self.station_HT_seq = {station: [] for station in self.stations} 212 | 213 | for HT in self.recipes.keys(): 214 | for seq, step in enumerate(self.recipes[HT]): 215 | self.station_HT_seq[step[0]].append((HT, seq)) 216 | 217 | 218 | def start(self): 219 | for ht in self.wip_levels.keys(): 220 | for i in range(self.wip_levels[ht]): 221 | new_wafer = wafer_box(self, self.num_wafers, ht, self.wafer_index, self.lead_dict) 222 | self.queue_lists[self.recipes[ht][0][0]].append(new_wafer) 223 | lead_time = self.lead_dict[ht] 224 | total_processing_time = new_wafer.start_time + lead_time 225 | due_week_number = int(total_processing_time / (7*24*60)) 226 | self.due_wafers[ht][due_week_number] += self.num_wafers 227 | self.wafer_index += 1 228 | 229 | for machine in self.machines_list: 230 | if machine.available: 231 | allowed_actions = machine.get_allowed_actions(self) 232 | if len(allowed_actions) > 0: 233 | self.next_machine = machine 234 | self.allowed_actions = allowed_actions 235 | return 236 | while True: 237 | self.env.step() 238 | for machine in self.machines_list: 239 | if machine.available: 240 | allowed_actions = machine.get_allowed_actions(self) 241 | if len(allowed_actions) > 0: 242 | self.next_machine = machine 243 | self.allowed_actions = allowed_actions 244 | return 245 | 246 | 247 | def run_action(self, machine, ht, seq): 248 | self.order_completed = False 249 | self.step_reward = 0 250 | # Set the machine to be unavailable to process parts because it is now busy 251 | assert machine.available 252 | machine.available = False 253 | # Find the wafer that has that HT and seq 254 | wafer_choice = next(wafer for wafer in self.queue_lists[machine.station] if wafer.HT == ht and wafer.seq == seq) 255 | # set the wafer being processed on this machine to wafer_choice 256 | machine.wafer_being_proc = wafer_choice 257 | # Remove the part from it's queue 258 | self.queue_lists[machine.station].remove(wafer_choice) 259 | # Begin processing the part on the machine 260 | machine.process = self.env.process(machine.part_process(wafer_choice, self)) 261 | 262 | for machine in self.machines_list: 263 | if machine.available: 264 | allowed_actions = machine.get_allowed_actions(self) 265 | if len(allowed_actions) > 0: 266 | self.next_machine = machine 267 | self.allowed_actions = allowed_actions 268 | return 269 | while True: 270 | before_time = self.env.now 271 | self.env.step() 272 | time_change = self.env.now-before_time 273 | current_week = math.ceil(self.env.now / (7 * 24 * 60)) # Calculating the current week 274 | for key, value in self.due_wafers.items(): 275 | buffer_list = [] # This list stores value of previous unfinished wafers count 276 | buffer_list.append(sum(value[:current_week])) 277 | self.step_reward -= time_change*sum(buffer_list) 278 | 279 | for machine in self.machines_list: 280 | if machine.available: 281 | allowed_actions = machine.get_allowed_actions(self) 282 | if len(allowed_actions) > 0: 283 | self.next_machine = machine 284 | self.allowed_actions = allowed_actions 285 | return 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | -------------------------------------------------------------------------------- /new/V-PG/rollout_PG.py: -------------------------------------------------------------------------------- 1 | import factory_sim as fact_sim 2 | import numpy as np 3 | import pandas as pd 4 | import math 5 | import matplotlib 6 | import random 7 | matplotlib.use('TkAgg') 8 | import matplotlib.pyplot as plt 9 | from itertools import chain 10 | from keras.models import load_model 11 | 12 | sim_time = 1e5 13 | WEEK = 24*7 14 | NO_OF_WEEKS = math.ceil(sim_time/WEEK) 15 | num_seq_steps = 10 16 | 17 | recipes = pd.read_csv('~/Desktop/GSR/fall19/random/test/recipes.csv') 18 | machines = pd.read_csv('~/Desktop/GSR/fall19/random/test/machines.csv') 19 | 20 | recipes = recipes[recipes.MAXIMUMLS != 0] 21 | 22 | # Create the machine dictionary (machine:station) 23 | machine_d = dict() 24 | for index, row in machines.iterrows(): 25 | d = {row[0]:row[1]} 26 | machine_d.update(d) 27 | 28 | # Modifying the above list to match the stations from the two datasets 29 | a = machines.TOOLSET.unique() 30 | b = recipes.TOOLSET.unique() 31 | common_stations = (set(a) & set(b)) 32 | ls = list(common_stations) 33 | 34 | # This dictionary has the correct set of stations 35 | modified_machine_dict = {k:v for k,v in machine_d.items() if v in ls} 36 | 37 | # Removing unncommon rows from recipes 38 | for index, row in recipes.iterrows(): 39 | if row[2] not in ls: 40 | recipes.drop(index, inplace=True) 41 | 42 | recipes = recipes.dropna() 43 | recipe_dict = dict() 44 | for ht in list(recipes.HT.unique()): 45 | temp = recipes.loc[recipes['HT'] == ht] 46 | if len(temp) > 1: 47 | ls = [] 48 | for index, row in temp.iterrows(): 49 | ls.append([row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9]]) 50 | d = {ht:ls} 51 | recipe_dict.update(d) 52 | else: 53 | ls = [] 54 | ls.append([row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9]]) 55 | d = {ht:ls} 56 | recipe_dict.update(d) 57 | 58 | # take only the first num_seq_steps sequence steps for each recipe to reduce the complexity of the simulation. 59 | for ht, step in recipe_dict.items(): 60 | recipe_dict[ht] = step[0:num_seq_steps] 61 | 62 | # Dictionary where the key is the name of the machine and the value is [station, proc_t] 63 | # machine_dict = {'m0': 's1', 'm2': 's2', 'm1': 's1', 'm3': 's2'} 64 | machine_dict = modified_machine_dict 65 | 66 | # recipes give the sequence of stations that must be processed at for the wafer of that head type to be completed 67 | # recipes = {"ht1": [["s1", 5, 0]], "ht2": [["s1", 5, 0], ["s2", 5, 0]]} 68 | recipes = recipe_dict 69 | 70 | wafers_per_box = 4 71 | 72 | break_mean = 1e5 73 | 74 | repair_mean = 20 75 | 76 | # average lead time for each head type 77 | head_types = recipes.keys() 78 | lead_dict = {} 79 | 80 | wip_levels = {} 81 | 82 | for ht in head_types: 83 | d = {ht:40000} 84 | lead_dict.update(d) 85 | 86 | w = {ht:10} 87 | wip_levels.update(w) 88 | 89 | 90 | # Simple pad utility function 91 | def pad(l, content, width): 92 | l.extend([content] * (width - len(l))) 93 | return l 94 | 95 | 96 | #################################################### 97 | ########## CREATING THE STATE SPACE ############### 98 | #################################################### 99 | def get_state(sim): 100 | # Calculate the state space representation. 101 | # This returns a list containing the number of` parts in the factory for each combination of head type and sequence 102 | # step 103 | state_rep = [len([wafer for queue in sim.queue_lists.values() for wafer in queue if wafer.HT 104 | == ht and wafer.seq == s]) for ht in list(sim.recipes.keys()) for s in 105 | list(range(len(sim.recipes[ht]) + 1))] 106 | # b is a one-hot encoded list indicating which machine the next action will correspond to 107 | b = np.zeros(len(sim.machines_list)) 108 | b[sim.machines_list.index(sim.next_machine)] = 1 109 | state_rep.extend(b) 110 | # Append the due dates list to the state space for making the decision 111 | rolling_window = [] # This is the rolling window that will be appended to state space 112 | max_length_of_window = math.ceil(max(sim.lead_dict.values()) / (7*24*60)) # Max length of the window to roll 113 | current_time = sim.env.now # Calculating the current time 114 | current_week = math.ceil(current_time / (7*24*60)) #Calculating the current week 115 | 116 | for key, value in sim.due_wafers.items(): 117 | k = value[current_week:current_week+max_length_of_window] #Adding only the values from current week up till the window length 118 | if len(k) < max_length_of_window: #if list is less than length of window, then append 0's 119 | k = pad(k, 0, max_length_of_window) 120 | 121 | rolling_window.append(k) 122 | buffer_list = [] # This list stores value of previous unfinished wafers count 123 | buffer_list.append(sum(value[:current_week])) 124 | rolling_window.extend([buffer_list]) 125 | print("rolling_window: ", rolling_window) 126 | c = sum(rolling_window, []) 127 | state_rep.extend(c) # Appending the rolling window to state space 128 | return state_rep 129 | 130 | 131 | 132 | 133 | 134 | ##################################################################### 135 | ######################### LOADING THE TRAINED POLICY ################ 136 | ##################################################################### 137 | def my_custom_loss(): 138 | def custom_loss(y_pred, y_true, discounted_episode_rewards): 139 | neg_log_prob = tf.nn.softmax_cross_entropy_with_logits_v2(logits=y_pred, labels=y_true) 140 | loss = tf.reduce_mean(neg_log_prob * discounted_episode_rewards) 141 | return loss 142 | 143 | 144 | model = load_model("PG_model.h5", custom_objects={'custom_loss': my_custom_loss}) 145 | 146 | 147 | # Action function to choose the best action given the q-function if not exploring based on epsilon 148 | def choose_action(state, allowed_actions, action_space, state_size): 149 | n = 0 150 | allowed_act_prob = np.zeros((1, len(action_space))) 151 | for i in action_space: 152 | for j in allowed_actions: 153 | if i == j: 154 | allowed_act_prob[0][n] = 1 155 | n+=1 156 | print(" ************* CHOOSING A PREDICTED ACTION *************") 157 | actions = np.ones((1, len(action_space))) 158 | rewards = np.ones((1, 1)) 159 | state = np.array(state).reshape(1, state_size) 160 | pred = model.predict([state, actions, rewards, allowed_act_prob]) 161 | allowed_act_prob_aux = allowed_act_prob * pred 162 | if np.sum(allowed_act_prob_aux) != 0: 163 | allowed_act_prob = allowed_act_prob_aux 164 | 165 | all_sum = np.sum(allowed_act_prob) 166 | multiply = 1/all_sum 167 | allowed_act_prob *= multiply 168 | # select action w.r.t the actions prob 169 | action = np.random.choice(range(allowed_act_prob.shape[1]), p=allowed_act_prob.ravel()) 170 | return action 171 | 172 | 173 | # Create the factory simulation object 174 | my_sim = fact_sim.FactorySim(sim_time, machine_dict, recipes, lead_dict, wafers_per_box, wip_levels) 175 | # start the simulation 176 | my_sim.start() 177 | # Retrieve machine object for first action choice 178 | mach = my_sim.next_machine 179 | # Save the state and allowed actions at the start for later use in training examples 180 | state = get_state(my_sim) 181 | allowed_actions = my_sim.allowed_actions 182 | # The action space is a list of tuples of the form [('ht1',0), ('ht1',1), ..., ('ht2', 0), ...] indicating the head 183 | # types and sequence steps for all allowed actions. 184 | action_space = list(chain.from_iterable(my_sim.station_HT_seq.values())) 185 | action_size = len(action_space) 186 | state_size = len(state) 187 | 188 | 189 | order_count = 0 190 | 191 | while my_sim.env.now < sim_time: 192 | 193 | print("State shape is :", len(state)) 194 | action = pol_grad.choose_action(state, allowed_actions) 195 | 196 | action = action_space[action] 197 | 198 | my_sim.run_action(mach, action[0], action[1]) 199 | 200 | state = get_state(my_sim) 201 | allowed_actions = my_sim.allowed_actions 202 | mach = my_sim.next_machine 203 | 204 | print(my_sim.order_completed) 205 | print(state) 206 | print(my_sim.step_reward) 207 | 208 | # Total wafers produced 209 | print("Total wafers produced:", len(my_sim.cycle_time)) 210 | 211 | 212 | #Wafers of each head type 213 | print("### Wafers of each head type ###") 214 | print(my_sim.complete_wafer_dict) 215 | 216 | # Plot the time taken to complete each wafer 217 | plt.plot(my_sim.cycle_time) 218 | plt.xlabel("Wafers") 219 | plt.ylabel("Cycle time") 220 | plt.title("The time taken to complete each wafer") 221 | plt.show() 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | -------------------------------------------------------------------------------- /new/critical_Ratio/__pycache__/factory_sim.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/svikramank/DeepRLForFactoryOptimization/f129a460849f1d487ccea620fdcf553afbbd2e6d/new/critical_Ratio/__pycache__/factory_sim.cpython-37.pyc -------------------------------------------------------------------------------- /new/critical_Ratio/critical_ratio.py: -------------------------------------------------------------------------------- 1 | import factory_sim as fact_sim 2 | import numpy as np 3 | import pandas as pd 4 | import math 5 | import matplotlib 6 | import random 7 | matplotlib.use('TkAgg') 8 | import matplotlib.pyplot as plt 9 | from itertools import chain 10 | 11 | sim_time = 3e5 12 | WEEK = 24*7 13 | NO_OF_WEEKS = math.ceil(sim_time/WEEK) 14 | num_seq_steps = 10 15 | 16 | recipes = pd.read_csv('~/Desktop/GSR/fall19/random/test/recipes.csv') 17 | machines = pd.read_csv('~/Desktop/GSR/fall19/random/test/machines.csv') 18 | 19 | recipes = recipes[recipes.MAXIMUMLS != 0] 20 | 21 | # Create the machine dictionary (machine:station) 22 | machine_d = dict() 23 | for index, row in machines.iterrows(): 24 | d = {row[0]:row[1]} 25 | machine_d.update(d) 26 | 27 | # Modifying the above list to match the stations from the two datasets 28 | a = machines.TOOLSET.unique() 29 | b = recipes.TOOLSET.unique() 30 | common_stations = (set(a) & set(b)) 31 | ls = list(common_stations) 32 | 33 | # This dictionary has the correct set of stations 34 | modified_machine_dict = {k:v for k,v in machine_d.items() if v in ls} 35 | 36 | # Removing unncommon rows from recipes 37 | for index, row in recipes.iterrows(): 38 | if row[2] not in ls: 39 | recipes.drop(index, inplace=True) 40 | 41 | recipes = recipes.dropna() 42 | recipe_dict = dict() 43 | for ht in list(recipes.HT.unique()): 44 | temp = recipes.loc[recipes['HT'] == ht] 45 | if len(temp) > 1: 46 | ls = [] 47 | for index, row in temp.iterrows(): 48 | ls.append([row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9]]) 49 | d = {ht:ls} 50 | recipe_dict.update(d) 51 | else: 52 | ls = [] 53 | ls.append([row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9]]) 54 | d = {ht:ls} 55 | recipe_dict.update(d) 56 | 57 | # take only the first num_seq_steps sequence steps for each recipe to reduce the complexity of the simulation. 58 | for ht, step in recipe_dict.items(): 59 | recipe_dict[ht] = step[0:num_seq_steps] 60 | 61 | # Dictionary where the key is the name of the machine and the value is [station, proc_t] 62 | # machine_dict = {'m0': 's1', 'm2': 's2', 'm1': 's1', 'm3': 's2'} 63 | machine_dict = modified_machine_dict 64 | 65 | # recipes give the sequence of stations that must be processed at for the wafer of that head type to be completed 66 | # recipes = {"ht1": [["s1", 5, 0]], "ht2": [["s1", 5, 0], ["s2", 5, 0]]} 67 | recipes = recipe_dict 68 | 69 | wafers_per_box = 4 70 | 71 | break_mean = 1e5 72 | 73 | repair_mean = 20 74 | 75 | # average lead time for each head type 76 | head_types = recipes.keys() 77 | lead_dict = {} 78 | 79 | wip_levels = {} 80 | 81 | for ht in head_types: 82 | d = {ht:10000} 83 | lead_dict.update(d) 84 | 85 | w = {ht:10} 86 | wip_levels.update(w) 87 | 88 | 89 | #################################################### 90 | ########## CREATING THE STATE SPACE ############### 91 | #################################################### 92 | def get_state(sim): 93 | # Calculate the state space representation. 94 | # This returns a list containing the number of` parts in the factory for each combination of head type and sequence 95 | # step 96 | state_rep = [len([wafer for queue in sim.queue_lists.values() for wafer in queue if wafer.HT 97 | == ht and wafer.seq == s]) for ht in list(sim.recipes.keys()) for s in 98 | list(range(len(sim.recipes[ht]) + 1))] 99 | # b is a one-hot encoded list indicating which machine the next action will correspond to 100 | b = np.zeros(len(sim.machines_list)) 101 | b[sim.machines_list.index(sim.next_machine)] = 1 102 | state_rep.extend(b) 103 | # Append the due dates list to the state space for making the decision 104 | rolling_window = [] # This is the rolling window that will be appended to state space 105 | max_length_of_window = math.ceil(max(sim.lead_dict.values()) / (7*24*60)) # Max length of the window to roll 106 | current_time = sim.env.now # Calculating the current time 107 | current_week = math.ceil(current_time / (7*24*60)) #Calculating the current week 108 | 109 | for key, value in sim.due_wafers.items(): 110 | rolling_window.append(value[current_week:current_week+max_length_of_window]) #Adding only the values from current week up till the window length 111 | buffer_list = [] # This list stores value of previous unfinished wafers count 112 | buffer_list.append(sum(value[:current_week])) 113 | rolling_window.extend([buffer_list]) 114 | 115 | c = sum(rolling_window, []) 116 | state_rep.extend(c) # Appending the rolling window to state space 117 | return state_rep 118 | 119 | 120 | 121 | #################################################### 122 | ########## CHOOSING AN ACTION HERE ################ 123 | #################################################### 124 | 125 | ''' 126 | Critical Ratio. The critical ratio (CR) is calculated by dividing the time remaining until 127 | a job’s due date by the total shop time remaining for the job, which is defined as the 128 | setup, processing, move, and expected waiting times of all remaining operations, 129 | including the operation being scheduled. 130 | 131 | CR = (Due date - Today’s date) / (Total shop time remaining) 132 | 133 | The difference between the due date and today’s date must be in the same time units as 134 | the total shop time remaining. A ratio less than 1.0 implies that the job is behind schedule, 135 | and a ratio greater than 1.0 implies that the job is ahead of schedule. The job with 136 | the lowest CR is scheduled next. 137 | 138 | ''' 139 | 140 | def choose_action(sim, state, sim_time): 141 | wafer_list = sim.queue_lists[sim.next_machine.station] 142 | 143 | if len(wafer_list) == 1: 144 | waf_ = wafer_list[0] 145 | return (waf_.HT, waf_.seq) 146 | 147 | else: 148 | cr_ratio = {} 149 | for waf in wafer_list: 150 | cr_ = abs((waf.due_time - sim.env.now) / (sim_time - sim.env.now)) 151 | cr_ratio[waf] = cr_ 152 | waf_to_choose = min(cr_ratio, key=cr_ratio.get) 153 | best_action = (waf_to_choose.HT, waf_to_choose.seq) 154 | return best_action 155 | 156 | 157 | 158 | # Create the factory simulation object 159 | my_sim = fact_sim.FactorySim(sim_time, machine_dict, recipes, lead_dict, wafers_per_box, wip_levels) 160 | # start the simulation 161 | my_sim.start() 162 | # Retrieve machine object for first action choice 163 | mach = my_sim.next_machine 164 | # Save the state and allowed actions at the start for later use in training examples 165 | state = get_state(my_sim) 166 | # The action space is a list of tuples of the form [('ht1',0), ('ht1',1), ..., ('ht2', 0), ...] indicating the head 167 | # types and sequence steps for all allowed actions. 168 | action_space = list(chain.from_iterable(my_sim.station_HT_seq.values())) 169 | action_size = len(action_space) 170 | 171 | while my_sim.env.now < sim_time: 172 | action = choose_action(my_sim, state, sim_time) 173 | 174 | my_sim.run_action(mach, action[0], action[1]) 175 | print('Step Reward:'+ str(my_sim.step_reward)) 176 | # Record the machine, state, allowed actions and reward at the new time step 177 | next_mach = my_sim.next_machine 178 | next_state = get_state(my_sim) 179 | next_allowed_actions = my_sim.allowed_actions 180 | reward = my_sim.step_reward 181 | 182 | print(f"state dimension: {len(state)}") 183 | print(f"next state dimension: {len(next_state)}") 184 | print("action space dimension:", action_size) 185 | # record the information for use again in the next training example 186 | mach, allowed_actions, state = next_mach, next_allowed_actions, next_state 187 | print("State:", state) 188 | 189 | 190 | # Total wafers produced 191 | print("Total wafers produced:", len(my_sim.cycle_time)) 192 | 193 | 194 | #Wafers of each head type 195 | print("### Wafers of each head type ###") 196 | print(my_sim.complete_wafer_dict) 197 | 198 | # Plot the time taken to complete each wafer 199 | plt.plot(my_sim.cycle_time) 200 | plt.xlabel("Wafers") 201 | plt.ylabel("Cycle time") 202 | plt.title("The time taken to complete each wafer") 203 | plt.show() 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | -------------------------------------------------------------------------------- /new/critical_Ratio/factory_sim.py: -------------------------------------------------------------------------------- 1 | 2 | import simpy 3 | from collections import namedtuple, Counter 4 | from itertools import count, filterfalse 5 | import random 6 | import math 7 | 8 | #################################################### 9 | ########## CREATING THE WAFER CLASS ############### 10 | #################################################### 11 | class wafer_box(object): 12 | def __init__(self, sim_inst, number_wafers, HT, wafer_index, lead_dict): 13 | self.env = sim_inst.env 14 | self.name = f"w{wafer_index}" 15 | self.start_time = sim_inst.env.now 16 | self.number_wafers = number_wafers 17 | self.HT = HT 18 | self.seq = 0 19 | self.due_time = self.start_time + lead_dict[self.HT] 20 | 21 | #################################################### 22 | ########## CREATING THE MACHINE CLASS ############## 23 | #################################################### 24 | class Machine(object): 25 | def __init__(self, sim_inst, name, station, break_mean=None, repair_mean=None): 26 | self.env = sim_inst.env 27 | self.name = name 28 | self.station = station 29 | self.available = True 30 | self.broken = False 31 | self.wafer_being_proc = None 32 | self.parts_made = 0 33 | self.break_mean = break_mean 34 | 35 | if break_mean is not None: 36 | self.time_to_fail = self.time_to_failure() 37 | 38 | self.process = None 39 | self.repair_mean = repair_mean 40 | 41 | def time_to_failure(self): 42 | """Return time until next failure for a machine.""" 43 | return random.expovariate(1/self.break_mean) 44 | 45 | def time_to_repair(self): 46 | """Return time until next failure for a machine.""" 47 | return random.expovariate(1/self.repair_mean) 48 | 49 | def break_machine(self): 50 | """Break the machine after break_time""" 51 | assert not self.broken 52 | start = self.env.now 53 | try: 54 | yield self.env.timeout(self.time_to_fail) 55 | self.process.interrupt() 56 | self.time_to_fail = self.time_to_failure() 57 | except: 58 | self.time_to_fail -= self.env.now-start 59 | 60 | def get_proc_time(self, wafer, sim_inst): 61 | proc_step = sim_inst.recipes[wafer.HT][wafer.seq] 62 | A = proc_step[1] 63 | B = proc_step[2] 64 | LS = proc_step[3] 65 | include_load = proc_step[4] 66 | load = proc_step[5] 67 | include_unload = proc_step[6] 68 | unload = proc_step[7] 69 | proc_t = A * wafer.number_wafers + B * math.ceil(wafer.number_wafers/LS) 70 | 71 | if include_load == -1: 72 | proc_t += load 73 | if include_unload == -1: 74 | proc_t += unload 75 | return proc_t 76 | 77 | def part_process(self, wafer, sim_inst): 78 | # This function defines a process where a part of head type HT and sequence step seq is processed on the machine 79 | 80 | # get the amount of time it takes for the operation to run 81 | proc_t = self.get_proc_time(wafer, sim_inst) 82 | 83 | done_in = proc_t 84 | while done_in: 85 | try: 86 | if self.break_mean is not None: 87 | break_process = self.env.process(self.break_machine()) 88 | start = self.env.now 89 | print("started processing wafer %s on machine %s at %s"%(wafer.name, self.name, start)) 90 | # wait until the process is done 91 | yield sim_inst.env.timeout(done_in) 92 | # set the wafer being processed to None 93 | self.wafer_being_proc = None 94 | # set machine to be available to process part 95 | self.available = True 96 | print("Completed the process step of wafer %s on machine %s at %s and sent to " 97 | "next machine."%(wafer.name, self.name, self.env.now)) 98 | # set the wafer to be at the next step in the sequence 99 | wafer.seq += 1 100 | # if seq is not the last sequence step then find the next station and choose actions for each of the 101 | # available machines in that station 102 | if wafer.seq < (len(sim_inst.recipes[wafer.HT])): 103 | # add the part to the corresponding queue for the next operation in the sequence 104 | sim_inst.queue_lists[sim_inst.recipes[wafer.HT][wafer.seq][0]].append(wafer) 105 | else: 106 | # # add the part to the list of completed parts 107 | # sim_inst.queue_lists['complete'].append(wafer) 108 | sim_inst.cycle_time.append(self.env.now - wafer.start_time) 109 | print("Finished processing wafer %s at %s"%(wafer.name, self.env.now)) 110 | sim_inst.complete_wafer_dict[wafer.HT]+=1 111 | # Update the due_wafers dictionary to indicate that wafers of this head type were completed 112 | 113 | # Find the index of the earliest week for which there are one or more wafers of the given head type 114 | # due. 115 | week_index = next((i for i, x in enumerate(sim_inst.due_wafers[wafer.HT]) if x), None) 116 | 117 | # Subtract wafer,number_wafers wafers from the corresponding list element 118 | sim_inst.due_wafers[wafer.HT][week_index] -= wafer.number_wafers 119 | 120 | new_wafer = wafer_box(sim_inst, sim_inst.num_wafers, wafer.HT, sim_inst.wafer_index, 121 | sim_inst.lead_dict) 122 | sim_inst.queue_lists[sim_inst.recipes[wafer.HT][0][0]].append(new_wafer) 123 | lead_time = sim_inst.lead_dict[wafer.HT] 124 | total_processing_time = new_wafer.start_time + lead_time 125 | week_number = int(total_processing_time / (7 * 24 * 60)) 126 | sim_inst.due_wafers[wafer.HT][week_number] += sim_inst.num_wafers 127 | sim_inst.wafer_index += 1 128 | 129 | 130 | 131 | if self.break_mean is not None: 132 | break_process.interrupt() 133 | done_in = 0 134 | 135 | except simpy.Interrupt: 136 | self.broken = True 137 | done_in -= self.env.now - start 138 | yield self.env.timeout(self.time_to_repair()) 139 | self.broken = False 140 | 141 | # Parts completed by this machine 142 | self.parts_made += 1 143 | 144 | def get_allowed_actions(self, sim_inst): 145 | #find all (HT, seq) tuples with non zero queues at the station of this machine 146 | return sorted(list(set((wafer.HT, wafer.seq) for wafer in sim_inst.queue_lists[self.station]))) 147 | 148 | #################################################### 149 | ########## CREATING THE FACTORY CLASS ############## 150 | #################################################### 151 | class FactorySim(object): 152 | #Initialize simpy environment and set the amount of time the simulation will run for 153 | def __init__(self, sim_time, m_dict, recipes, lead_dict, wafers_per_box, wip_levels, break_mean=None, repair_mean=None): 154 | self.break_mean = break_mean 155 | self.repair_mean = repair_mean 156 | self.order_completed = False 157 | self.allowed_actions = None 158 | self.env = simpy.Environment() 159 | self.Sim_time = sim_time 160 | self.next_machine = None 161 | # self.dgr = dgr_dict 162 | self.lead_dict = lead_dict 163 | self.num_wafers = wafers_per_box 164 | self.wip_levels = wip_levels 165 | # self.machine_failure = False 166 | 167 | # Number of future weeks we want to look into for calculating due dates 168 | self.FUTURE_WEEKS = 100 169 | 170 | # Initialize an index that will be used to name each wafer box 171 | self.wafer_index = 0 172 | 173 | # Dictionary where the key is the name of the machine and the value is [station, proc_t] 174 | self.machine_dict = m_dict 175 | 176 | self.machines_list = [Machine(self, mach[0], mach[1], self.break_mean, self.repair_mean) for mach in self.machine_dict.items()] 177 | 178 | # create a list of all the station names 179 | self.stations = list(set(list(self.machine_dict.values()))) 180 | 181 | # sim_inst.recipes give the sequence of stations that must be processed at for the wafer of that head type to be completed 182 | self.recipes = recipes 183 | 184 | # create a list to store the number of complete wafers for each head type 185 | self.complete_wafer_dict = {} 186 | for ht in self.recipes.keys(): 187 | d = {ht:0} 188 | self.complete_wafer_dict.update(d) 189 | 190 | self.number_of_machines = len(self.machine_dict) 191 | 192 | # Create a dictionary which holds lists that will contain 193 | # the queues of wafer_box objects at each station and that have been completed 194 | self.queue_lists = {station: [] for station in self.stations} 195 | # self.queue_lists['complete'] = [] 196 | 197 | self.order_complete_time = 0 198 | self.cycle_time = [] 199 | self.step_reward = 0 200 | 201 | # Create a dictionary which holds the number of wafers due in a given week of each head type 202 | self.due_wafers = {} 203 | for ht in self.recipes.keys(): 204 | list_of_wafers_due_each_week = [0]*self.FUTURE_WEEKS 205 | d = {ht:list_of_wafers_due_each_week} 206 | self.due_wafers.update(d) 207 | 208 | # Creates a dictionary where the key is the toolset name and the value is a list of tuples of all head type and 209 | # sequence step combinations which may be processed at that station 210 | self.station_HT_seq = {station: [] for station in self.stations} 211 | 212 | for HT in self.recipes.keys(): 213 | for seq, step in enumerate(self.recipes[HT]): 214 | self.station_HT_seq[step[0]].append((HT, seq)) 215 | 216 | 217 | def start(self): 218 | for ht in self.wip_levels.keys(): 219 | for i in range(self.wip_levels[ht]): 220 | new_wafer = wafer_box(self, self.num_wafers, ht, self.wafer_index, self.lead_dict) 221 | self.queue_lists[self.recipes[ht][0][0]].append(new_wafer) 222 | lead_time = self.lead_dict[ht] 223 | total_processing_time = new_wafer.start_time + lead_time 224 | week_number = int(total_processing_time / (7*24*60)) 225 | self.due_wafers[ht][week_number] += self.num_wafers 226 | self.wafer_index += 1 227 | 228 | for machine in self.machines_list: 229 | if machine.available: 230 | allowed_actions = machine.get_allowed_actions(self) 231 | if len(allowed_actions) > 0: 232 | self.next_machine = machine 233 | self.allowed_actions = allowed_actions 234 | return 235 | while True: 236 | self.env.step() 237 | for machine in self.machines_list: 238 | if machine.available: 239 | allowed_actions = machine.get_allowed_actions(self) 240 | if len(allowed_actions) > 0: 241 | self.next_machine = machine 242 | self.allowed_actions = allowed_actions 243 | return 244 | 245 | 246 | def run_action(self, machine, ht, seq): 247 | self.order_completed = False 248 | self.step_reward = 0 249 | # Set the machine to be unavailable to process parts because it is now busy 250 | assert machine.available 251 | machine.available = False 252 | # Find the wafer that has that HT and seq 253 | wafer_choice = next(wafer for wafer in self.queue_lists[machine.station] if wafer.HT == ht and wafer.seq == seq) 254 | # set the wafer being processed on this machine to wafer_choice 255 | machine.wafer_being_proc = wafer_choice 256 | # Remove the part from it's queue 257 | self.queue_lists[machine.station].remove(wafer_choice) 258 | # Begin processing the part on the machine 259 | machine.process = self.env.process(machine.part_process(wafer_choice, self)) 260 | 261 | for machine in self.machines_list: 262 | if machine.available: 263 | allowed_actions = machine.get_allowed_actions(self) 264 | if len(allowed_actions) > 0: 265 | self.next_machine = machine 266 | self.allowed_actions = allowed_actions 267 | return 268 | while True: 269 | before_time = self.env.now 270 | self.env.step() 271 | time_change = self.env.now-before_time 272 | current_week = math.ceil(self.env.now / (7 * 24 * 60)) # Calculating the current week 273 | for key, value in self.due_wafers.items(): 274 | buffer_list = [] # This list stores value of previous unfinished wafers count 275 | buffer_list.append(sum(value[:current_week])) 276 | self.step_reward -= time_change*sum(buffer_list) 277 | 278 | for machine in self.machines_list: 279 | if machine.available: 280 | allowed_actions = machine.get_allowed_actions(self) 281 | if len(allowed_actions) > 0: 282 | self.next_machine = machine 283 | self.allowed_actions = allowed_actions 284 | return 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | --------------------------------------------------------------------------------