├── README.md
└── new
    ├── A2C
        ├── A2C_fact.py
        ├── ActorCritic.py
        ├── __pycache__
        │   ├── ActorCritic.cpython-37.pyc
        │   └── factory_sim.cpython-37.pyc
        ├── actor.h5
        ├── critic.h5
        ├── factory_sim.py
        ├── rollout_A2C.py
        └── training_graph.png
    ├── DQN
        ├── DQN_fact.py
        ├── DQN_model.h5
        ├── DeepQNet.py
        ├── __pycache__
        │   ├── DeepQNet.cpython-37.pyc
        │   └── factory_sim.cpython-37.pyc
        ├── factory_sim.py
        ├── results.txt
        └── rollout_DQN.py
    ├── V-PG
        ├── PG_Class.py
        ├── PG_fact.py
        ├── PG_model.h5
        ├── __pycache__
        │   ├── PG_Class.cpython-37.pyc
        │   └── factory_sim.cpython-37.pyc
        ├── factory_sim.py
        └── rollout_PG.py
    └── critical_Ratio
        ├── __pycache__
            └── factory_sim.cpython-37.pyc
        ├── critical_ratio.py
        └── factory_sim.py


/README.md:
--------------------------------------------------------------------------------
 1 | # Deep Reinforcement Learning for Smart Factory Optimization
 2 | 
 3 | In this work we describe an approach to using reinforcement learning techniques to optimize manufacturing processes. As a case study, the manufacturing system at the Western Digital Corporation facility in San Jose is used to model the factory system examined here. By first building a simulation of the factory system and then applying reinforcement learning techniques to it, reinforcement algorithms involving q-learning, Deep Q-Networks, Policy Gradients and Policy Gradient Search were developed and implemented on the simulation. Results for these methods are compared.
 4 | 
 5 | ## Simulation
 6 | Reinforcement learning algorithms are trained through experience by interacting with an environment and updating the policy in response to reward signals. However it is not feasible to do this training by on the actual factory system so a simulated factory environment was created for the Reinforcement learning algorithms to train on. This simulated environment was created in Python using the simulation package SimPy. 
 7 | 
 8 | In the simulated environment a python object is maintained for each machine and each cassette of wafers in the factory. The machine objects have methods which correspond to processing wafers on that machine. The machine objects also record the current operational status of the machines including whether or not they are currently processing a part and whether or not the machine is broken. Each machine can only process one cassette of wafers at a time and the processing time for that cassette is determined by the head type and sequence step for that cassette as well as the number of wafers within the cassette. All the experiments done so far assume there is the same number of wafers in each cassette, but in future work this may be generalized to allow for variable numbers of wafers.
 9 | 
10 | The wafer cassette objects represent cassettes of wafers and maintain information about the cassette such as the number
11 | of wafers in the cassette, the head type, and the sequence step of the wafers in that cassette. In the simulation the machines are organized into stations which each contain a set of machines which are all capable of performing the same operation. There is a recipe corresponding to each head type which indicates the sequence of stations that the cassette must be processed at in order to complete a cassette of wafers of that head type. Also included in the recipe are parameters which are used to calculate the processing time for each step in the sequence for that head type.
12 | 


--------------------------------------------------------------------------------
/new/A2C/A2C_fact.py:
--------------------------------------------------------------------------------
  1 | import factory_sim as fact_sim
  2 | import numpy as np
  3 | import pandas as pd
  4 | import math 
  5 | import matplotlib
  6 | import random
  7 | matplotlib.use('TkAgg')
  8 | import matplotlib.pyplot as plt
  9 | from itertools import chain
 10 | import ActorCritic
 11 | 
 12 | sim_time = 3e5
 13 | WEEK = 24*7
 14 | NO_OF_WEEKS = math.ceil(sim_time/WEEK)
 15 | num_seq_steps = 10
 16 | 
 17 | recipes = pd.read_csv('~/Desktop/GSR/fall19/random/test/recipes.csv')
 18 | machines = pd.read_csv('~/Desktop/GSR/fall19/random/test/machines.csv')
 19 | 
 20 | recipes = recipes[recipes.MAXIMUMLS != 0]
 21 | 
 22 | # Create the machine dictionary (machine:station)
 23 | machine_d = dict()
 24 | for index, row in machines.iterrows():
 25 |     d = {row[0]:row[1]}
 26 |     machine_d.update(d)
 27 | 
 28 | # Modifying the above list to match the stations from the two datasets 
 29 | a = machines.TOOLSET.unique()
 30 | b = recipes.TOOLSET.unique()
 31 | common_stations = (set(a) & set(b))
 32 | ls = list(common_stations)
 33 | 
 34 | # This dictionary has the correct set of stations
 35 | modified_machine_dict = {k:v for k,v in machine_d.items() if v in ls}
 36 | 
 37 | # Removing unncommon rows from recipes 
 38 | for index, row in recipes.iterrows():
 39 |     if row[2] not in ls:
 40 |         recipes.drop(index, inplace=True)
 41 | 
 42 | recipes = recipes.dropna()
 43 | recipe_dict = dict()
 44 | for ht in list(recipes.HT.unique()):
 45 |     temp = recipes.loc[recipes['HT'] == ht]
 46 |     if len(temp) > 1:
 47 |         ls = []
 48 |         for index, row in temp.iterrows():
 49 |             ls.append([row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9]])
 50 |         d  = {ht:ls}
 51 |         recipe_dict.update(d)
 52 |     else:
 53 |         ls = []
 54 |         ls.append([row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9]])
 55 |         d = {ht:ls}
 56 |         recipe_dict.update(d)
 57 | 
 58 | # take only the first num_seq_steps sequence steps for each recipe to reduce the complexity of the simulation.
 59 | for ht, step in recipe_dict.items():
 60 |     recipe_dict[ht] = step[0:num_seq_steps]
 61 | 
 62 | # Dictionary where the key is the name of the machine and the value is [station, proc_t]
 63 | # machine_dict = {'m0': 's1', 'm2': 's2', 'm1': 's1', 'm3': 's2'}
 64 | machine_dict = modified_machine_dict
 65 | 
 66 | # recipes give the sequence of stations that must be processed at for the wafer of that head type to be completed
 67 | # recipes = {"ht1": [["s1", 5, 0]], "ht2": [["s1", 5, 0], ["s2", 5, 0]]}
 68 | recipes = recipe_dict
 69 | 
 70 | wafers_per_box = 4
 71 | 
 72 | break_mean = 1e5
 73 | 
 74 | repair_mean = 20
 75 | 
 76 | # average lead time for each head type
 77 | head_types = recipes.keys()
 78 | lead_dict = {}
 79 | 
 80 | wip_levels = {}
 81 | 
 82 | for ht in head_types:
 83 |     d = {ht:10000}
 84 |     lead_dict.update(d)
 85 | 
 86 |     w = {ht:10}
 87 |     wip_levels.update(w)
 88 | 
 89 | 
 90 | ####################################################
 91 | ########## CREATING THE STATE SPACE  ###############
 92 | ####################################################
 93 | def get_state(sim):
 94 |     # Calculate the state space representation.
 95 |     # This returns a list containing the number of` parts in the factory for each combination of head type and sequence
 96 |     # step
 97 |     state_rep = [len([wafer for queue in sim.queue_lists.values() for wafer in queue if wafer.HT
 98 |                  == ht and wafer.seq == s]) for ht in list(sim.recipes.keys()) for s in
 99 |                  list(range(len(sim.recipes[ht]) + 1))]
100 |     # b is a one-hot encoded list indicating which machine the next action will correspond to
101 |     b = np.zeros(len(sim.machines_list))
102 |     b[sim.machines_list.index(sim.next_machine)] = 1
103 |     state_rep.extend(b)
104 |     # Append the due dates list to the state space for making the decision
105 |     rolling_window = [] # This is the rolling window that will be appended to state space
106 |     max_length_of_window = math.ceil(max(sim.lead_dict.values()) / (7*24*60)) # Max length of the window to roll 
107 |     current_time = sim.env.now # Calculating the current time
108 |     current_week = math.ceil(current_time / (7*24*60)) #Calculating the current week 
109 | 
110 |     for key, value in sim.due_wafers.items():
111 |         rolling_window.append(value[current_week:current_week+max_length_of_window]) #Adding only the values from current week up till the window length
112 |         buffer_list = [] # This list stores value of previous unfinished wafers count
113 |         buffer_list.append(sum(value[:current_week]))
114 |         rolling_window.extend([buffer_list])
115 | 
116 |     c = sum(rolling_window, [])
117 |     state_rep.extend(c) # Appending the rolling window to state space 
118 |     return state_rep
119 | 
120 | 
121 | 
122 | # Create the factory simulation object
123 | my_sim = fact_sim.FactorySim(sim_time, machine_dict, recipes, lead_dict, wafers_per_box, wip_levels)
124 | # start the simulation
125 | my_sim.start()
126 | # Retrieve machine object for first action choice
127 | mach = my_sim.next_machine
128 | # Save the state and allowed actions at the start for later use in training examples
129 | state = get_state(my_sim)
130 | allowed_actions = my_sim.allowed_actions
131 | # The action space is a list of tuples of the form [('ht1',0), ('ht1',1), ..., ('ht2', 0), ...] indicating the head
132 | # types and sequence steps for all allowed actions.
133 | action_space = list(chain.from_iterable(my_sim.station_HT_seq.values()))
134 | action_size = len(action_space)
135 | state_size = len(state)
136 | 
137 | # Creating the A2C agent
138 | a2c_agent = ActorCritic.A2CAgent(state_size= state_size, action_space= action_space)
139 | 
140 | order_count = 0
141 | 
142 | while my_sim.env.now < sim_time:
143 |     action = a2c_agent.choose_action(state, allowed_actions)
144 | 
145 |     my_sim.run_action(mach, action[0], action[1])
146 |     print('Step Reward:'+ str(my_sim.step_reward))
147 |     
148 |     # Record the machine, state, allowed actions and reward at the new time step
149 |     reward = my_sim.step_reward
150 |     next_mach = my_sim.next_machine
151 |     next_state = get_state(my_sim)
152 |     next_allowed_actions = my_sim.allowed_actions
153 |     
154 | 
155 |     print(f"state dimension: {len(state)}")
156 |     print(f"next state dimension: {len(next_state)}")
157 |     print("action space dimension:", action_size)
158 |     print("State:", state)
159 | 
160 |     # Train the A2C Agent
161 |     a2c_agent.train_model(state, action, reward, next_state)
162 | 
163 |     # Record the information for use again in the next training example
164 |     mach, allowed_actions, state = next_mach, next_allowed_actions, next_state
165 | 
166 | # Save the trained A2C Actor and Critic Models
167 | a2c_agent.save_model("actor.h5", "critic.h5")
168 | 
169 | # Total wafers produced
170 | print("Total wafers produced:", len(my_sim.cycle_time))
171 | 
172 | 
173 | #Wafers of each head type
174 | print("### Wafers of each head type ###")
175 | print(my_sim.complete_wafer_dict)
176 | 
177 | # Plot the time taken to complete each wafer
178 | plt.plot(my_sim.cycle_time)
179 | plt.xlabel("Wafers")
180 | plt.ylabel("Cycle time")
181 | plt.title("The time taken to complete each wafer")
182 | plt.show()
183 | 
184 | 
185 | 
186 | 
187 | 
188 | 
189 | 
190 | 


--------------------------------------------------------------------------------
/new/A2C/ActorCritic.py:
--------------------------------------------------------------------------------
  1 | from keras.models import Sequential
  2 | from keras.layers import Dense, Dropout
  3 | from keras.optimizers import Adam
  4 | from collections import deque
  5 | import numpy as np
  6 | import random
  7 | 
  8 | 
  9 | ########################################################################################################################################
 10 | #################################################################### CREATING A2C Class ################################################
 11 | ########################################################################################################################################
 12 | 
 13 | # Advantage Actor-Critic agent 
 14 | class A2CAgent:
 15 |     def __init__(self, state_size, action_space, epsilon_decay=0.8):
 16 |         self.state_size = state_size
 17 |         self.action_space = action_space
 18 |         self.action_size = len(action_space)
 19 |         self.value_size = 1
 20 |         self.epsilon = 1.0
 21 |         self.epsilon_min = 0.0
 22 |         self.epsilon_decay = epsilon_decay
 23 | 
 24 |         # Hyperparameters for Policy Gradient
 25 |         self.discount_factor = 0.99
 26 |         self.actor_lr = 0.001
 27 |         self.critic_lr = 0.005
 28 | 
 29 |         # Create model for policy network 
 30 |         self.actor = self.build_actor()
 31 |         self.critic = self.build_critic()
 32 | 
 33 | 
 34 |     # Approximate policy and value using Neural Network 
 35 |     # actor: state is input and probability of each action is output of model 
 36 |     def build_actor(self):
 37 |         actor = Sequential()
 38 |         actor.add(Dense(400, input_dim= self.state_size, activation= 'relu', kernel_initializer='he_uniform'))
 39 |         actor.add(Dense(250, activation= 'relu', kernel_initializer='he_uniform'))
 40 |         actor.add(Dense(125, activation= 'relu', kernel_initializer='he_uniform'))
 41 |         actor.add(Dense(self.action_size, activation= 'softmax', kernel_initializer= 'he_uniform'))
 42 |         actor.summary()
 43 |         actor.compile(loss='categorical_crossentropy', optimizer=Adam(lr=self.actor_lr))
 44 |         return actor
 45 | 
 46 |     # critic: state is input and value of state is the output of model 
 47 |     def build_critic(self):
 48 |         critic = Sequential()
 49 |         critic.add(Dense(400, input_dim= self.state_size, activation= 'relu', kernel_initializer='he_uniform'))
 50 |         critic.add(Dense(250, activation= 'relu', kernel_initializer='he_uniform'))
 51 |         critic.add(Dense(125, activation= 'relu', kernel_initializer='he_uniform'))
 52 |         critic.add(Dense(50, activation= 'relu', kernel_initializer= 'he_uniform'))
 53 |         critic.add(Dense(self.value_size, activation= 'linear', kernel_initializer='he_uniform'))
 54 |         critic.summary()
 55 |         critic.compile(loss="mse", optimizer=Adam(lr=self.critic_lr))
 56 |         return critic
 57 | 
 58 |     # Using the output of the policy network, pick action stochastically 
 59 |     def choose_action(self, state, allowed_actions):
 60 |         self.epsilon *= self.epsilon_decay
 61 |         self.epsilon = max(self.epsilon_min, self.epsilon)
 62 |         r = np.random.random()
 63 | 
 64 |         if r < self.epsilon:
 65 |             print("******* CHOOSING A RANDOM ACTION *******")
 66 |             return random.choice(allowed_actions)
 67 | 
 68 |         state = np.array(state).reshape(1, self.state_size)
 69 |         pred = self.actor.predict(state)
 70 |         pred = sum(pred.tolist(), [])
 71 |         temp = []
 72 |         for item in allowed_actions:
 73 |             temp.append(pred[self.action_space.index(item)])
 74 |         print(" ********************* CHOOSING A PREDICTED ACTION **********************")
 75 |         return allowed_actions[np.argmax(temp)]
 76 | 
 77 | 
 78 |     # Update the policy network every episode 
 79 |     def train_model(self, state, action, reward, next_state):
 80 |         target = np.zeros((1, self.value_size))
 81 |         advantages = np.zeros((1, self.action_size))
 82 |         state = np.array(state).reshape(1, self.state_size)
 83 |         next_state = np.array(state).reshape(1, self.state_size)
 84 |         value = self.critic.predict(state)[0]
 85 |         next_value = self.critic.predict(next_state)[0]
 86 | 
 87 |         action_id = self.action_space.index(action)
 88 | 
 89 |         advantages[0][action_id] = reward + self.discount_factor * (next_value) - value
 90 |         target[0][0] = reward + self.discount_factor*next_value
 91 | 
 92 |         self.actor.fit(state, advantages, epochs=1)
 93 |         self.critic.fit(state, target, epochs=1)
 94 | 
 95 | 
 96 |     # Save the actor and critic models
 97 |     def save_model(self, fn1, fn2):
 98 |         self.actor.save(fn1)
 99 |         self.critic.save(fn2)
100 | 
101 | 
102 | 
103 | 
104 | 
105 | 
106 | 
107 | 
108 | 
109 | 
110 | 
111 | 
112 | 
113 | 
114 | 
115 | 
116 | 
117 | 
118 | 
119 | 
120 | 
121 | 
122 | 
123 | 
124 | 
125 | 
126 | 
127 | 
128 | 
129 | 
130 | 
131 | 
132 | 
133 | 


--------------------------------------------------------------------------------
/new/A2C/__pycache__/ActorCritic.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/svikramank/DeepRLForFactoryOptimization/f129a460849f1d487ccea620fdcf553afbbd2e6d/new/A2C/__pycache__/ActorCritic.cpython-37.pyc


--------------------------------------------------------------------------------
/new/A2C/__pycache__/factory_sim.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/svikramank/DeepRLForFactoryOptimization/f129a460849f1d487ccea620fdcf553afbbd2e6d/new/A2C/__pycache__/factory_sim.cpython-37.pyc


--------------------------------------------------------------------------------
/new/A2C/actor.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/svikramank/DeepRLForFactoryOptimization/f129a460849f1d487ccea620fdcf553afbbd2e6d/new/A2C/actor.h5


--------------------------------------------------------------------------------
/new/A2C/critic.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/svikramank/DeepRLForFactoryOptimization/f129a460849f1d487ccea620fdcf553afbbd2e6d/new/A2C/critic.h5


--------------------------------------------------------------------------------
/new/A2C/factory_sim.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import simpy
  3 | from collections import namedtuple, Counter
  4 | from itertools import count, filterfalse
  5 | import random
  6 | import math
  7 | 
  8 | ####################################################
  9 | ########## CREATING THE WAFER CLASS  ###############
 10 | ####################################################
 11 | class wafer_box(object):
 12 |     def __init__(self, sim_inst, number_wafers, HT, wafer_index, lead_dict):
 13 |         self.env = sim_inst.env
 14 |         self.name = f"w{wafer_index}"
 15 |         self.start_time = sim_inst.env.now
 16 |         self.number_wafers = number_wafers
 17 |         self.HT = HT
 18 |         self.seq = 0
 19 |         self.due_time = self.start_time + lead_dict[self.HT]
 20 | 
 21 | ####################################################
 22 | ########## CREATING THE MACHINE CLASS ##############
 23 | ####################################################
 24 | class Machine(object):
 25 |     def __init__(self, sim_inst, name, station, break_mean=None, repair_mean=None):
 26 |         self.env = sim_inst.env
 27 |         self.name = name
 28 |         self.station = station
 29 |         self.available = True
 30 |         self.broken = False
 31 |         self.wafer_being_proc = None
 32 |         self.parts_made = 0
 33 |         self.break_mean = break_mean
 34 | 
 35 |         if break_mean is not None:
 36 |             self.time_to_fail = self.time_to_failure()
 37 | 
 38 |         self.process = None
 39 |         self.repair_mean = repair_mean
 40 | 
 41 |     def time_to_failure(self):
 42 |         """Return time until next failure for a machine."""
 43 |         return random.expovariate(1/self.break_mean)
 44 | 
 45 |     def time_to_repair(self):
 46 |         """Return time until next failure for a machine."""
 47 |         return random.expovariate(1/self.repair_mean)
 48 | 
 49 |     def break_machine(self):
 50 |         """Break the machine after break_time"""
 51 |         assert not self.broken
 52 |         start = self.env.now
 53 |         try:
 54 |             yield self.env.timeout(self.time_to_fail)
 55 |             self.process.interrupt()
 56 |             self.time_to_fail = self.time_to_failure()
 57 |         except:
 58 |             self.time_to_fail -= self.env.now-start
 59 | 
 60 |     def get_proc_time(self, wafer, sim_inst):
 61 |         proc_step = sim_inst.recipes[wafer.HT][wafer.seq]
 62 |         A = proc_step[1]
 63 |         B = proc_step[2]
 64 |         LS = proc_step[3]
 65 |         include_load = proc_step[4]
 66 |         load = proc_step[5]
 67 |         include_unload = proc_step[6]
 68 |         unload = proc_step[7]
 69 |         proc_t = A * wafer.number_wafers + B * math.ceil(wafer.number_wafers/LS)
 70 | 
 71 |         if include_load == -1:
 72 |             proc_t += load
 73 |         if include_unload == -1:
 74 |             proc_t += unload
 75 |         return proc_t
 76 | 
 77 |     def part_process(self, wafer, sim_inst):
 78 |         # This function defines a process where a part of head type HT and sequence step seq is processed on the machine
 79 | 
 80 |         # get the amount of time it takes for the operation to run
 81 |         proc_t = self.get_proc_time(wafer, sim_inst)
 82 | 
 83 |         done_in = proc_t
 84 |         while done_in:
 85 |             try:
 86 |                 if self.break_mean is not None:
 87 |                     break_process = self.env.process(self.break_machine())
 88 |                 start = self.env.now
 89 |                 print("started processing wafer %s on machine %s at %s"%(wafer.name, self.name, start))
 90 |                 # wait until the process is done
 91 |                 yield sim_inst.env.timeout(done_in)
 92 |                 # set the wafer being processed to None
 93 |                 self.wafer_being_proc = None
 94 |                 # set machine to be available to process part
 95 |                 self.available = True
 96 |                 print("Completed the process step of wafer %s on machine %s at %s and sent to "
 97 |                       "next machine."%(wafer.name, self.name, self.env.now))
 98 |                 # set the wafer to be at the next step in the sequence
 99 |                 wafer.seq += 1
100 |                 # if seq is not the last sequence step then find the next station and choose actions for each of the
101 |                 # available machines in that station
102 |                 if wafer.seq < (len(sim_inst.recipes[wafer.HT])):
103 |                     # add the part to the corresponding queue for the next operation in the sequence
104 |                     sim_inst.queue_lists[sim_inst.recipes[wafer.HT][wafer.seq][0]].append(wafer)
105 |                 else:
106 |                     # # add the part to the list of completed parts
107 |                     # sim_inst.queue_lists['complete'].append(wafer)
108 |                     sim_inst.cycle_time.append(self.env.now - wafer.start_time)
109 |                     print("Finished processing wafer %s at %s"%(wafer.name, self.env.now))
110 |                     sim_inst.complete_wafer_dict[wafer.HT]+=1
111 |                     sim_inst.order_completed = True
112 |                     # Update the due_wafers dictionary to indicate that wafers of this head type were completed
113 | 
114 |                     # Find the index of the earliest week for which there are one or more wafers of the given head type
115 |                     # due.
116 |                     week_index = next((i for i, x in enumerate(sim_inst.due_wafers[wafer.HT]) if x), None)
117 | 
118 |                     # Subtract wafer,number_wafers wafers from the corresponding list element 
119 |                     sim_inst.due_wafers[wafer.HT][week_index] -= wafer.number_wafers
120 | 
121 |                     new_wafer = wafer_box(sim_inst, sim_inst.num_wafers, wafer.HT, sim_inst.wafer_index,
122 |                                           sim_inst.lead_dict)
123 |                     sim_inst.queue_lists[sim_inst.recipes[wafer.HT][0][0]].append(new_wafer)
124 |                     lead_time = sim_inst.lead_dict[wafer.HT]
125 |                     total_processing_time = new_wafer.start_time + lead_time
126 |                     week_number = int(total_processing_time / (7 * 24 * 60))
127 |                     sim_inst.due_wafers[wafer.HT][week_number] += sim_inst.num_wafers
128 |                     sim_inst.wafer_index += 1
129 | 
130 | 
131 | 
132 |                 if self.break_mean is not None:
133 |                     break_process.interrupt()
134 |                 done_in = 0
135 | 
136 |             except simpy.Interrupt:
137 |                 self.broken = True
138 |                 done_in -= self.env.now - start
139 |                 yield self.env.timeout(self.time_to_repair())
140 |                 self.broken = False
141 | 
142 |         # Parts completed by this machine
143 |         self.parts_made += 1
144 | 
145 |     def get_allowed_actions(self, sim_inst):
146 |         #find all (HT, seq) tuples with non zero queues at the station of this machine
147 |         return sorted(list(set((wafer.HT, wafer.seq) for wafer in sim_inst.queue_lists[self.station])))
148 | 
149 | ####################################################
150 | ########## CREATING THE FACTORY CLASS ##############
151 | ####################################################
152 | class FactorySim(object):
153 |     #Initialize simpy environment and set the amount of time the simulation will run for
154 |     def __init__(self, sim_time, m_dict, recipes, lead_dict, wafers_per_box, wip_levels, break_mean=None, repair_mean=None):
155 |         self.break_mean = break_mean
156 |         self.repair_mean = repair_mean
157 |         self.order_completed = False
158 |         self.allowed_actions = None
159 |         self.env = simpy.Environment()
160 |         self.Sim_time = sim_time
161 |         self.next_machine = None
162 |         # self.dgr = dgr_dict
163 |         self.lead_dict = lead_dict
164 |         self.num_wafers = wafers_per_box
165 |         self.wip_levels = wip_levels
166 |         # self.machine_failure = False
167 | 
168 |         # Number of future weeks we want to look into for calculating due dates
169 |         self.FUTURE_WEEKS = 100
170 | 
171 |         # Initialize an index that will be used to name each wafer box
172 |         self.wafer_index = 0
173 | 
174 |         # Dictionary where the key is the name of the machine and the value is [station, proc_t]
175 |         self.machine_dict = m_dict
176 | 
177 |         self.machines_list = [Machine(self, mach[0], mach[1], self.break_mean, self.repair_mean) for mach in self.machine_dict.items()]
178 | 
179 |         # create a list of all the station names
180 |         self.stations = list(set(list(self.machine_dict.values())))
181 | 
182 |         # sim_inst.recipes give the sequence of stations that must be processed at for the wafer of that head type to be completed
183 |         self.recipes = recipes
184 | 
185 |         # create a list to store the number of complete wafers for each head type
186 |         self.complete_wafer_dict = {}
187 |         for ht in self.recipes.keys():
188 |             d = {ht:0}
189 |             self.complete_wafer_dict.update(d)
190 | 
191 |         self.number_of_machines = len(self.machine_dict)
192 | 
193 |         # Create a dictionary which holds lists that will contain 
194 |         # the queues of wafer_box objects at each station and that have been completed
195 |         self.queue_lists = {station: [] for station in self.stations}
196 |         # self.queue_lists['complete'] = []
197 | 
198 |         self.order_complete_time = 0
199 |         self.cycle_time = []
200 |         self.step_reward = 0
201 | 
202 |         # Create a dictionary which holds the number of wafers due in a given week of each head type
203 |         self.due_wafers = {}
204 |         for ht in self.recipes.keys():
205 |             list_of_wafers_due_each_week = [0]*self.FUTURE_WEEKS
206 |             d = {ht:list_of_wafers_due_each_week}
207 |             self.due_wafers.update(d)
208 | 
209 |         # Creates a dictionary where the key is the toolset name and the value is a list of tuples of all head type and
210 |         # sequence step combinations which may be processed at that station
211 |         self.station_HT_seq = {station: [] for station in self.stations}
212 | 
213 |         for HT in self.recipes.keys():
214 |             for seq, step in enumerate(self.recipes[HT]):
215 |                 self.station_HT_seq[step[0]].append((HT, seq))
216 | 
217 | 
218 |     def start(self):
219 |         for ht in self.wip_levels.keys():
220 |             for i in range(self.wip_levels[ht]):
221 |                 new_wafer = wafer_box(self, self.num_wafers, ht, self.wafer_index, self.lead_dict)
222 |                 self.queue_lists[self.recipes[ht][0][0]].append(new_wafer)
223 |                 lead_time = self.lead_dict[ht]
224 |                 total_processing_time = new_wafer.start_time + lead_time
225 |                 week_number = int(total_processing_time / (7*24*60))
226 |                 self.due_wafers[ht][week_number] += self.num_wafers
227 |                 self.wafer_index += 1
228 | 
229 |         for machine in self.machines_list:
230 |             if machine.available:
231 |                 allowed_actions = machine.get_allowed_actions(self)
232 |                 if len(allowed_actions) > 0:
233 |                     self.next_machine = machine
234 |                     self.allowed_actions = allowed_actions
235 |                     return
236 |         while True:
237 |             self.env.step()
238 |             for machine in self.machines_list:
239 |                 if machine.available:
240 |                     allowed_actions = machine.get_allowed_actions(self)
241 |                     if len(allowed_actions) > 0:
242 |                         self.next_machine = machine
243 |                         self.allowed_actions = allowed_actions
244 |                         return
245 | 
246 | 
247 |     def run_action(self, machine, ht, seq):
248 |         self.order_completed = False
249 |         self.step_reward = 0
250 |         # Set the machine to be unavailable to process parts because it is now busy
251 |         assert machine.available
252 |         machine.available = False
253 |         # Find the wafer that has that HT and seq
254 |         wafer_choice = next(wafer for wafer in self.queue_lists[machine.station] if wafer.HT == ht and wafer.seq == seq)
255 |         # set the wafer being processed on this machine to wafer_choice
256 |         machine.wafer_being_proc = wafer_choice
257 |         # Remove the part from it's queue
258 |         self.queue_lists[machine.station].remove(wafer_choice)
259 |         # Begin processing the part on the machine
260 |         machine.process = self.env.process(machine.part_process(wafer_choice, self))
261 | 
262 |         for machine in self.machines_list:
263 |             if machine.available:
264 |                 allowed_actions = machine.get_allowed_actions(self)
265 |                 if len(allowed_actions) > 0:
266 |                     self.next_machine = machine
267 |                     self.allowed_actions = allowed_actions
268 |                     return
269 |         while True:
270 |             before_time = self.env.now
271 |             self.env.step()
272 |             time_change = self.env.now-before_time
273 |             current_week = math.ceil(self.env.now / (7 * 24 * 60))  # Calculating the current week
274 |             for key, value in self.due_wafers.items():
275 |                 buffer_list = []  # This list stores value of previous unfinished wafers count
276 |                 buffer_list.append(sum(value[:current_week]))
277 |                 self.step_reward -= time_change*sum(buffer_list)
278 | 
279 |             for machine in self.machines_list:
280 |                 if machine.available:
281 |                     allowed_actions = machine.get_allowed_actions(self)
282 |                     if len(allowed_actions) > 0:
283 |                         self.next_machine = machine
284 |                         self.allowed_actions = allowed_actions
285 |                         return
286 | 
287 | 
288 | 
289 | 
290 | 
291 | 
292 | 
293 | 


--------------------------------------------------------------------------------
/new/A2C/rollout_A2C.py:
--------------------------------------------------------------------------------
  1 | import factory_sim as fact_sim
  2 | import numpy as np
  3 | import pandas as pd
  4 | import math 
  5 | import matplotlib
  6 | import random
  7 | matplotlib.use('TkAgg')
  8 | import matplotlib.pyplot as plt
  9 | from itertools import chain
 10 | from keras.models import load_model
 11 | 
 12 | sim_time = 1e5
 13 | WEEK = 24*7
 14 | NO_OF_WEEKS = math.ceil(sim_time/WEEK)
 15 | num_seq_steps = 10
 16 | 
 17 | recipes = pd.read_csv('~/Desktop/GSR/fall19/random/test/recipes.csv')
 18 | machines = pd.read_csv('~/Desktop/GSR/fall19/random/test/machines.csv')
 19 | 
 20 | recipes = recipes[recipes.MAXIMUMLS != 0]
 21 | 
 22 | # Create the machine dictionary (machine:station)
 23 | machine_d = dict()
 24 | for index, row in machines.iterrows():
 25 |     d = {row[0]:row[1]}
 26 |     machine_d.update(d)
 27 | 
 28 | # Modifying the above list to match the stations from the two datasets 
 29 | a = machines.TOOLSET.unique()
 30 | b = recipes.TOOLSET.unique()
 31 | common_stations = (set(a) & set(b))
 32 | ls = list(common_stations)
 33 | 
 34 | # This dictionary has the correct set of stations
 35 | modified_machine_dict = {k:v for k,v in machine_d.items() if v in ls}
 36 | 
 37 | # Removing unncommon rows from recipes 
 38 | for index, row in recipes.iterrows():
 39 |     if row[2] not in ls:
 40 |         recipes.drop(index, inplace=True)
 41 | 
 42 | recipes = recipes.dropna()
 43 | recipe_dict = dict()
 44 | for ht in list(recipes.HT.unique()):
 45 |     temp = recipes.loc[recipes['HT'] == ht]
 46 |     if len(temp) > 1:
 47 |         ls = []
 48 |         for index, row in temp.iterrows():
 49 |             ls.append([row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9]])
 50 |         d  = {ht:ls}
 51 |         recipe_dict.update(d)
 52 |     else:
 53 |         ls = []
 54 |         ls.append([row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9]])
 55 |         d = {ht:ls}
 56 |         recipe_dict.update(d)
 57 | 
 58 | # take only the first num_seq_steps sequence steps for each recipe to reduce the complexity of the simulation.
 59 | for ht, step in recipe_dict.items():
 60 |     recipe_dict[ht] = step[0:num_seq_steps]
 61 | 
 62 | # Dictionary where the key is the name of the machine and the value is [station, proc_t]
 63 | # machine_dict = {'m0': 's1', 'm2': 's2', 'm1': 's1', 'm3': 's2'}
 64 | machine_dict = modified_machine_dict
 65 | 
 66 | # recipes give the sequence of stations that must be processed at for the wafer of that head type to be completed
 67 | # recipes = {"ht1": [["s1", 5, 0]], "ht2": [["s1", 5, 0], ["s2", 5, 0]]}
 68 | recipes = recipe_dict
 69 | 
 70 | wafers_per_box = 4
 71 | 
 72 | break_mean = 1e5
 73 | 
 74 | repair_mean = 20
 75 | 
 76 | # average lead time for each head type
 77 | head_types = recipes.keys()
 78 | lead_dict = {}
 79 | 
 80 | wip_levels = {}
 81 | 
 82 | for ht in head_types:
 83 |     d = {ht:10000}
 84 |     lead_dict.update(d)
 85 | 
 86 |     w = {ht:10}
 87 |     wip_levels.update(w)
 88 | 
 89 | 
 90 | ####################################################
 91 | ########## CREATING THE STATE SPACE  ###############
 92 | ####################################################
 93 | def get_state(sim):
 94 |     # Calculate the state space representation.
 95 |     # This returns a list containing the number of` parts in the factory for each combination of head type and sequence
 96 |     # step
 97 |     state_rep = [len([wafer for queue in sim.queue_lists.values() for wafer in queue if wafer.HT
 98 |                  == ht and wafer.seq == s]) for ht in list(sim.recipes.keys()) for s in
 99 |                  list(range(len(sim.recipes[ht]) + 1))]
100 |     # b is a one-hot encoded list indicating which machine the next action will correspond to
101 |     b = np.zeros(len(sim.machines_list))
102 |     b[sim.machines_list.index(sim.next_machine)] = 1
103 |     state_rep.extend(b)
104 |     # Append the due dates list to the state space for making the decision
105 |     rolling_window = [] # This is the rolling window that will be appended to state space
106 |     max_length_of_window = math.ceil(max(sim.lead_dict.values()) / (7*24*60)) # Max length of the window to roll 
107 |     current_time = sim.env.now # Calculating the current time
108 |     current_week = math.ceil(current_time / (7*24*60)) #Calculating the current week 
109 | 
110 |     for key, value in sim.due_wafers.items():
111 |         rolling_window.append(value[current_week:current_week+max_length_of_window]) #Adding only the values from current week up till the window length
112 |         buffer_list = [] # This list stores value of previous unfinished wafers count
113 |         buffer_list.append(sum(value[:current_week]))
114 |         rolling_window.extend([buffer_list])
115 | 
116 |     c = sum(rolling_window, [])
117 |     state_rep.extend(c) # Appending the rolling window to state space 
118 |     return state_rep
119 | 
120 | 
121 | 
122 | #####################################################################
123 | ######################### LOADING THE TRAINED POLICY ################
124 | #####################################################################
125 | actor = load_model("actor.h5") # Model used for choosing actions
126 | critic = load_model("critic.h5") # Not using critic anywhere though (just used for training A2C)
127 | 
128 | 
129 | # Action function to choose the best action given the q-function if not exploring based on epsilon
130 | def choose_action(state, allowed_actions, action_space):
131 |     state = np.array(state).reshape(1, state_size)
132 |     pred = actor.predict(state)
133 |     pred = sum(pred.tolist(), [])
134 |     temp = []
135 |     for item in allowed_actions:
136 |         temp.append(pred[action_space.index(item)])
137 |     print(" ********************* CHOOSING A PREDICTED ACTION **********************")
138 |     return allowed_actions[np.argmax(temp)]
139 | 
140 | 
141 | 
142 | # Create the factory simulation object
143 | my_sim = fact_sim.FactorySim(sim_time, machine_dict, recipes, lead_dict, wafers_per_box, wip_levels)
144 | # start the simulation
145 | my_sim.start()
146 | # Retrieve machine object for first action choice
147 | mach = my_sim.next_machine
148 | # Save the state and allowed actions at the start for later use in training examples
149 | state = get_state(my_sim)
150 | allowed_actions = my_sim.allowed_actions
151 | # The action space is a list of tuples of the form [('ht1',0), ('ht1',1), ..., ('ht2', 0), ...] indicating the head
152 | # types and sequence steps for all allowed actions.
153 | action_space = list(chain.from_iterable(my_sim.station_HT_seq.values()))
154 | action_size = len(action_space)
155 | state_size = len(state)
156 | 
157 | 
158 | 
159 | while my_sim.env.now < sim_time:
160 |     action = choose_action(state, allowed_actions, action_space)
161 | 
162 |     my_sim.run_action(mach, action[0], action[1])
163 |     print('Step Reward:'+ str(my_sim.step_reward))
164 |     
165 |     # Record the machine, state, allowed actions and reward at the new time step
166 |     reward = my_sim.step_reward
167 |     next_mach = my_sim.next_machine
168 |     next_state = get_state(my_sim)
169 |     next_allowed_actions = my_sim.allowed_actions   
170 | 
171 |     print(f"state dimension: {len(state)}")
172 |     print(f"next state dimension: {len(next_state)}")
173 |     print("action space dimension:", action_size)
174 |     print("State:", state)
175 | 
176 |     # Record the information for use again in the next training example
177 |     mach, allowed_actions, state = next_mach, next_allowed_actions, next_state
178 | 
179 | 
180 | 
181 | # Total wafers produced
182 | print("Total wafers produced:", len(my_sim.cycle_time))
183 | 
184 | 
185 | #Wafers of each head type
186 | print("### Wafers of each head type ###")
187 | print(my_sim.complete_wafer_dict)
188 | 
189 | # Plot the time taken to complete each wafer
190 | plt.plot(my_sim.cycle_time)
191 | plt.xlabel("Wafers")
192 | plt.ylabel("Cycle time")
193 | plt.title("The time taken to complete each wafer")
194 | plt.show()
195 | 
196 | 
197 | 
198 | 
199 | 
200 | 
201 | 
202 | 


--------------------------------------------------------------------------------
/new/A2C/training_graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/svikramank/DeepRLForFactoryOptimization/f129a460849f1d487ccea620fdcf553afbbd2e6d/new/A2C/training_graph.png


--------------------------------------------------------------------------------
/new/DQN/DQN_fact.py:
--------------------------------------------------------------------------------
  1 | import factory_sim as fact_sim
  2 | import numpy as np
  3 | import pandas as pd
  4 | import math 
  5 | import matplotlib
  6 | import random
  7 | matplotlib.use('TkAgg')
  8 | import matplotlib.pyplot as plt
  9 | from itertools import chain
 10 | import DeepQNet
 11 | 
 12 | sim_time = 3e5
 13 | WEEK = 24*7
 14 | NO_OF_WEEKS = math.ceil(sim_time/WEEK)
 15 | num_seq_steps = 10
 16 | 
 17 | recipes = pd.read_csv('~/Desktop/GSR/fall19/random/test/recipes.csv')
 18 | machines = pd.read_csv('~/Desktop/GSR/fall19/random/test/machines.csv')
 19 | 
 20 | recipes = recipes[recipes.MAXIMUMLS != 0]
 21 | 
 22 | # Create the machine dictionary (machine:station)
 23 | machine_d = dict()
 24 | for index, row in machines.iterrows():
 25 |     d = {row[0]:row[1]}
 26 |     machine_d.update(d)
 27 | 
 28 | # Modifying the above list to match the stations from the two datasets 
 29 | a = machines.TOOLSET.unique()
 30 | b = recipes.TOOLSET.unique()
 31 | common_stations = (set(a) & set(b))
 32 | ls = list(common_stations)
 33 | 
 34 | # This dictionary has the correct set of stations
 35 | modified_machine_dict = {k:v for k,v in machine_d.items() if v in ls}
 36 | 
 37 | # Removing unncommon rows from recipes 
 38 | for index, row in recipes.iterrows():
 39 |     if row[2] not in ls:
 40 |         recipes.drop(index, inplace=True)
 41 | 
 42 | recipes = recipes.dropna()
 43 | recipe_dict = dict()
 44 | for ht in list(recipes.HT.unique()):
 45 |     temp = recipes.loc[recipes['HT'] == ht]
 46 |     if len(temp) > 1:
 47 |         ls = []
 48 |         for index, row in temp.iterrows():
 49 |             ls.append([row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9]])
 50 |         d  = {ht:ls}
 51 |         recipe_dict.update(d)
 52 |     else:
 53 |         ls = []
 54 |         ls.append([row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9]])
 55 |         d = {ht:ls}
 56 |         recipe_dict.update(d)
 57 | 
 58 | # take only the first num_seq_steps sequence steps for each recipe to reduce the complexity of the simulation.
 59 | for ht, step in recipe_dict.items():
 60 |     recipe_dict[ht] = step[0:num_seq_steps]
 61 | 
 62 | # Dictionary where the key is the name of the machine and the value is [station, proc_t]
 63 | # machine_dict = {'m0': 's1', 'm2': 's2', 'm1': 's1', 'm3': 's2'}
 64 | machine_dict = modified_machine_dict
 65 | 
 66 | # recipes give the sequence of stations that must be processed at for the wafer of that head type to be completed
 67 | # recipes = {"ht1": [["s1", 5, 0]], "ht2": [["s1", 5, 0], ["s2", 5, 0]]}
 68 | recipes = recipe_dict
 69 | 
 70 | wafers_per_box = 4
 71 | 
 72 | break_mean = 1e5
 73 | 
 74 | repair_mean = 20
 75 | 
 76 | # average lead time for each head type
 77 | head_types = recipes.keys()
 78 | lead_dict = {}
 79 | 
 80 | wip_levels = {}
 81 | 
 82 | for ht in head_types:
 83 |     d = {ht:10000}
 84 |     lead_dict.update(d)
 85 | 
 86 |     w = {ht:10}
 87 |     wip_levels.update(w)
 88 | 
 89 | 
 90 | ####################################################
 91 | ########## CREATING THE STATE SPACE  ###############
 92 | ####################################################
 93 | def get_state(sim):
 94 |     # Calculate the state space representation.
 95 |     # This returns a list containing the number of` parts in the factory for each combination of head type and sequence
 96 |     # step
 97 |     state_rep = [len([wafer for queue in sim.queue_lists.values() for wafer in queue if wafer.HT
 98 |                  == ht and wafer.seq == s]) for ht in list(sim.recipes.keys()) for s in
 99 |                  list(range(len(sim.recipes[ht]) + 1))]
100 |     # b is a one-hot encoded list indicating which machine the next action will correspond to
101 |     b = np.zeros(len(sim.machines_list))
102 |     b[sim.machines_list.index(sim.next_machine)] = 1
103 |     state_rep.extend(b)
104 |     # Append the due dates list to the state space for making the decision
105 |     rolling_window = [] # This is the rolling window that will be appended to state space
106 |     max_length_of_window = math.ceil(max(sim.lead_dict.values()) / (7*24*60)) # Max length of the window to roll 
107 |     current_time = sim.env.now # Calculating the current time
108 |     current_week = math.ceil(current_time / (7*24*60)) #Calculating the current week 
109 | 
110 |     for key, value in sim.due_wafers.items():
111 |         rolling_window.append(value[current_week:current_week+max_length_of_window]) #Adding only the values from current week up till the window length
112 |         buffer_list = [] # This list stores value of previous unfinished wafers count
113 |         buffer_list.append(sum(value[:current_week]))
114 |         rolling_window.extend([buffer_list])
115 | 
116 |     c = sum(rolling_window, [])
117 |     state_rep.extend(c) # Appending the rolling window to state space 
118 |     return state_rep
119 | 
120 | 
121 | 
122 | # Create the factory simulation object
123 | my_sim = fact_sim.FactorySim(sim_time, machine_dict, recipes, lead_dict, wafers_per_box, wip_levels)
124 | # start the simulation
125 | my_sim.start()
126 | # Retrieve machine object for first action choice
127 | mach = my_sim.next_machine
128 | # Save the state and allowed actions at the start for later use in training examples
129 | state = get_state(my_sim)
130 | allowed_actions = my_sim.allowed_actions
131 | # The action space is a list of tuples of the form [('ht1',0), ('ht1',1), ..., ('ht2', 0), ...] indicating the head
132 | # types and sequence steps for all allowed actions.
133 | action_space = list(chain.from_iterable(my_sim.station_HT_seq.values()))
134 | action_size = len(action_space)
135 | state_size = len(state)
136 | 
137 | # Creating the DQN agent
138 | dqn_agent = DeepQNet.DQN(state_space_dim= state_size, action_space= action_space)
139 | 
140 | order_count = 0
141 | 
142 | while my_sim.env.now < sim_time:
143 |     action = dqn_agent.choose_action(state, allowed_actions)
144 | 
145 |     my_sim.run_action(mach, action[0], action[1])
146 |     print('Step Reward:'+ str(my_sim.step_reward))
147 |     # Record the machine, state, allowed actions and reward at the new time step
148 |     next_mach = my_sim.next_machine
149 |     next_state = get_state(my_sim)
150 |     next_allowed_actions = my_sim.allowed_actions
151 |     reward = my_sim.step_reward
152 | 
153 |     print(f"state dimension: {len(state)}")
154 |     print(f"next state dimension: {len(next_state)}")
155 |     print("action space dimension:", action_size)
156 |     # record the information for use again in the next training example
157 |     mach, allowed_actions, state = next_mach, next_allowed_actions, next_state
158 |     print("State:", state)
159 | 
160 |     # Save the example for later training
161 |     dqn_agent.remember(state, action, reward, next_state, next_allowed_actions)
162 | 
163 |     if my_sim.order_completed:
164 |         # After each wafer completed, train the policy network 
165 |         dqn_agent.replay()
166 |         order_count+= 1
167 |         if order_count >= 20:
168 |             # After every 20 processes update the target network and reset the order count
169 |             dqn_agent.train_target()
170 |             order_count = 0
171 | 
172 |     # Record the information for use again in the next training example
173 |     mach, allowed_actions, state = next_mach, next_allowed_actions, next_state
174 | 
175 | 
176 | # Save the trained DQN policy network
177 | dqn_agent.save_model("DQN_model.h5")
178 | 
179 | # Total wafers produced
180 | print("Total wafers produced:", len(my_sim.cycle_time))
181 | 
182 | 
183 | #Wafers of each head type
184 | print("### Wafers of each head type ###")
185 | print(my_sim.complete_wafer_dict)
186 | 
187 | # Plot the time taken to complete each wafer
188 | plt.plot(my_sim.cycle_time)
189 | plt.xlabel("Wafers")
190 | plt.ylabel("Cycle time")
191 | plt.title("The time taken to complete each wafer")
192 | plt.show()
193 | 
194 | 
195 | 
196 | 
197 | 
198 | 
199 | 
200 | 


--------------------------------------------------------------------------------
/new/DQN/DQN_model.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/svikramank/DeepRLForFactoryOptimization/f129a460849f1d487ccea620fdcf553afbbd2e6d/new/DQN/DQN_model.h5


--------------------------------------------------------------------------------
/new/DQN/DeepQNet.py:
--------------------------------------------------------------------------------
 1 | from keras.models import Sequential
 2 | from keras.layers import Dense, Dropout
 3 | from keras.optimizers import Adam
 4 | from collections import deque
 5 | import numpy as np
 6 | import random
 7 | 
 8 | 
 9 | ########################################################################################################################################
10 | #################################################################### CREATING Deep Q-learning Class ####################################
11 | ########################################################################################################################################
12 | 
13 | class DQN:
14 |     def __init__(self, state_space_dim, action_space, gamma=0.9, epsilon_decay=0.8, tau=0.125, learning_rate=0.005):
15 |         self.state_space_dim = state_space_dim
16 |         self.action_space = action_space
17 |         self.gamma = gamma
18 |         self.epsilon = 1.0
19 |         self.epsilon_min = 0.0
20 |         self.epsilon_decay = epsilon_decay
21 |         self.tau = tau
22 |         self.learning_rate = learning_rate
23 |         self.memory = deque(maxlen=2000)
24 |         self.model = self.create_model()
25 |         self.target_model = self.create_model()
26 | 
27 |     # Create the neural network model to train the q function
28 |     def create_model(self):
29 |         model = Sequential()
30 |         model.add(Dense(400, input_dim= self.state_space_dim, activation='relu'))
31 |         model.add(Dense(250, activation='relu'))
32 |         model.add(Dense(125, activation='relu'))
33 |         model.add(Dense(len(self.action_space)))
34 |         model.compile(loss='mean_squared_error', optimizer=Adam(lr=self.learning_rate))
35 |         return model
36 | 
37 |     # Action function to choose the best action given the q-function if not exploring based on epsilon
38 |     def choose_action(self, state, allowed_actions):
39 |         self.epsilon *= self.epsilon_decay
40 |         self.epsilon = max(self.epsilon_min, self.epsilon)
41 |         r = np.random.random()
42 |         if r < self.epsilon:
43 |             print("******* CHOOSING A RANDOM ACTION *******")
44 |             return random.choice(allowed_actions)
45 |         # print(state)
46 |         # print(len(state))
47 |         state = np.array(state).reshape(1, self.state_space_dim)
48 |         pred = self.model.predict(state)
49 |         pred = sum(pred.tolist(), [])
50 |         temp = []
51 |         for item in allowed_actions:
52 |             temp.append(pred[self.action_space.index(item)])
53 |         print(" ********************* CHOOSING A PREDICTED ACTION **********************")
54 |         return allowed_actions[np.argmax(temp)]
55 | 
56 |     # Create replay buffer memory to sample randomly
57 |     def remember(self, state, action, reward, next_state, next_allowed_actions):
58 |         self.memory.append([state, action, reward, next_state, next_allowed_actions])
59 | 
60 |     # Build the replay buffer
61 |     def replay(self):
62 |         batch_size = 32
63 |         if len(self.memory) < batch_size:
64 |             return
65 |         samples = random.sample(self.memory, batch_size)
66 |         for sample in samples:
67 |             state, action, reward, new_state, new_allowed_actions = sample
68 |             state = np.array(state).reshape(1, self.state_space_dim)
69 |             target = self.target_model.predict(state)
70 |             action_id = self.action_space.index(action)
71 |             # if done:
72 |             #     target[0][action_id] = reward
73 |             # else:
74 |                 # take max only from next_allowed_actions
75 |             new_state = np.array(new_state).reshape(1,self.state_space_dim)
76 |             next_pred = self.target_model.predict(new_state)[0]
77 |             next_pred = next_pred.tolist()
78 |             t = []
79 |             print("new_allowed_actions:", new_allowed_actions)
80 |             for it in new_allowed_actions:
81 |                 t.append(next_pred[self.action_space.index(it)])
82 |             Q_future = max(t)
83 |             target[0][action_id] = reward + self.gamma * Q_future
84 |             self.model.fit(state, target, epochs=1, verbose=1)
85 | 
86 | 
87 |     # Update our target network
88 |     def train_target(self):
89 |         weights = self.model.get_weights()
90 |         target_weights = self.target_model.get_weights()
91 |         for i in range(len(target_weights)):
92 |             target_weights[i] = weights[i] * self.tau + target_weights[i] * (1 - self.tau)
93 |         self.target_model.set_weights(target_weights)
94 | 
95 |     # Save our model
96 |     def save_model(self, fn):
97 |         self.model.save(fn)


--------------------------------------------------------------------------------
/new/DQN/__pycache__/DeepQNet.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/svikramank/DeepRLForFactoryOptimization/f129a460849f1d487ccea620fdcf553afbbd2e6d/new/DQN/__pycache__/DeepQNet.cpython-37.pyc


--------------------------------------------------------------------------------
/new/DQN/__pycache__/factory_sim.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/svikramank/DeepRLForFactoryOptimization/f129a460849f1d487ccea620fdcf553afbbd2e6d/new/DQN/__pycache__/factory_sim.cpython-37.pyc


--------------------------------------------------------------------------------
/new/DQN/factory_sim.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import simpy
  3 | from collections import namedtuple, Counter
  4 | from itertools import count, filterfalse
  5 | import random
  6 | import math
  7 | 
  8 | ####################################################
  9 | ########## CREATING THE WAFER CLASS  ###############
 10 | ####################################################
 11 | class wafer_box(object):
 12 |     def __init__(self, sim_inst, number_wafers, HT, wafer_index, lead_dict):
 13 |         self.env = sim_inst.env
 14 |         self.name = f"w{wafer_index}"
 15 |         self.start_time = sim_inst.env.now
 16 |         self.number_wafers = number_wafers
 17 |         self.HT = HT
 18 |         self.seq = 0
 19 |         self.due_time = self.start_time + lead_dict[self.HT]
 20 | 
 21 | ####################################################
 22 | ########## CREATING THE MACHINE CLASS ##############
 23 | ####################################################
 24 | class Machine(object):
 25 |     def __init__(self, sim_inst, name, station, break_mean=None, repair_mean=None):
 26 |         self.env = sim_inst.env
 27 |         self.name = name
 28 |         self.station = station
 29 |         self.available = True
 30 |         self.broken = False
 31 |         self.wafer_being_proc = None
 32 |         self.parts_made = 0
 33 |         self.break_mean = break_mean
 34 | 
 35 |         if break_mean is not None:
 36 |             self.time_to_fail = self.time_to_failure()
 37 | 
 38 |         self.process = None
 39 |         self.repair_mean = repair_mean
 40 | 
 41 |     def time_to_failure(self):
 42 |         """Return time until next failure for a machine."""
 43 |         return random.expovariate(1/self.break_mean)
 44 | 
 45 |     def time_to_repair(self):
 46 |         """Return time until next failure for a machine."""
 47 |         return random.expovariate(1/self.repair_mean)
 48 | 
 49 |     def break_machine(self):
 50 |         """Break the machine after break_time"""
 51 |         assert not self.broken
 52 |         start = self.env.now
 53 |         try:
 54 |             yield self.env.timeout(self.time_to_fail)
 55 |             self.process.interrupt()
 56 |             self.time_to_fail = self.time_to_failure()
 57 |         except:
 58 |             self.time_to_fail -= self.env.now-start
 59 | 
 60 |     def get_proc_time(self, wafer, sim_inst):
 61 |         proc_step = sim_inst.recipes[wafer.HT][wafer.seq]
 62 |         A = proc_step[1]
 63 |         B = proc_step[2]
 64 |         LS = proc_step[3]
 65 |         include_load = proc_step[4]
 66 |         load = proc_step[5]
 67 |         include_unload = proc_step[6]
 68 |         unload = proc_step[7]
 69 |         proc_t = A * wafer.number_wafers + B * math.ceil(wafer.number_wafers/LS)
 70 | 
 71 |         if include_load == -1:
 72 |             proc_t += load
 73 |         if include_unload == -1:
 74 |             proc_t += unload
 75 |         return proc_t
 76 | 
 77 |     def part_process(self, wafer, sim_inst):
 78 |         # This function defines a process where a part of head type HT and sequence step seq is processed on the machine
 79 | 
 80 |         # get the amount of time it takes for the operation to run
 81 |         proc_t = self.get_proc_time(wafer, sim_inst)
 82 | 
 83 |         done_in = proc_t
 84 |         while done_in:
 85 |             try:
 86 |                 if self.break_mean is not None:
 87 |                     break_process = self.env.process(self.break_machine())
 88 |                 start = self.env.now
 89 |                 print("started processing wafer %s on machine %s at %s"%(wafer.name, self.name, start))
 90 |                 # wait until the process is done
 91 |                 yield sim_inst.env.timeout(done_in)
 92 |                 # set the wafer being processed to None
 93 |                 self.wafer_being_proc = None
 94 |                 # set machine to be available to process part
 95 |                 self.available = True
 96 |                 print("Completed the process step of wafer %s on machine %s at %s and sent to "
 97 |                       "next machine."%(wafer.name, self.name, self.env.now))
 98 |                 # set the wafer to be at the next step in the sequence
 99 |                 wafer.seq += 1
100 |                 # if seq is not the last sequence step then find the next station and choose actions for each of the
101 |                 # available machines in that station
102 |                 if wafer.seq < (len(sim_inst.recipes[wafer.HT])):
103 |                     # add the part to the corresponding queue for the next operation in the sequence
104 |                     sim_inst.queue_lists[sim_inst.recipes[wafer.HT][wafer.seq][0]].append(wafer)
105 |                 else:
106 |                     # # add the part to the list of completed parts
107 |                     # sim_inst.queue_lists['complete'].append(wafer)
108 |                     sim_inst.cycle_time.append(self.env.now - wafer.start_time)
109 |                     print("Finished processing wafer %s at %s"%(wafer.name, self.env.now))
110 |                     sim_inst.complete_wafer_dict[wafer.HT]+=1
111 |                     sim_inst.order_completed = True
112 |                     # Update the due_wafers dictionary to indicate that wafers of this head type were completed
113 | 
114 |                     # Find the index of the earliest week for which there are one or more wafers of the given head type
115 |                     # due.
116 |                     week_index = next((i for i, x in enumerate(sim_inst.due_wafers[wafer.HT]) if x), None)
117 | 
118 |                     # Subtract wafer,number_wafers wafers from the corresponding list element 
119 |                     sim_inst.due_wafers[wafer.HT][week_index] -= wafer.number_wafers
120 | 
121 |                     new_wafer = wafer_box(sim_inst, sim_inst.num_wafers, wafer.HT, sim_inst.wafer_index,
122 |                                           sim_inst.lead_dict)
123 |                     sim_inst.queue_lists[sim_inst.recipes[wafer.HT][0][0]].append(new_wafer)
124 |                     lead_time = sim_inst.lead_dict[wafer.HT]
125 |                     total_processing_time = new_wafer.start_time + lead_time
126 |                     week_number = int(total_processing_time / (7 * 24 * 60))
127 |                     sim_inst.due_wafers[wafer.HT][week_number] += sim_inst.num_wafers
128 |                     sim_inst.wafer_index += 1
129 | 
130 | 
131 | 
132 |                 if self.break_mean is not None:
133 |                     break_process.interrupt()
134 |                 done_in = 0
135 | 
136 |             except simpy.Interrupt:
137 |                 self.broken = True
138 |                 done_in -= self.env.now - start
139 |                 yield self.env.timeout(self.time_to_repair())
140 |                 self.broken = False
141 | 
142 |         # Parts completed by this machine
143 |         self.parts_made += 1
144 | 
145 |     def get_allowed_actions(self, sim_inst):
146 |         #find all (HT, seq) tuples with non zero queues at the station of this machine
147 |         return sorted(list(set((wafer.HT, wafer.seq) for wafer in sim_inst.queue_lists[self.station])))
148 | 
149 | ####################################################
150 | ########## CREATING THE FACTORY CLASS ##############
151 | ####################################################
152 | class FactorySim(object):
153 |     #Initialize simpy environment and set the amount of time the simulation will run for
154 |     def __init__(self, sim_time, m_dict, recipes, lead_dict, wafers_per_box, wip_levels, break_mean=None, repair_mean=None):
155 |         self.break_mean = break_mean
156 |         self.repair_mean = repair_mean
157 |         self.order_completed = False
158 |         self.allowed_actions = None
159 |         self.env = simpy.Environment()
160 |         self.Sim_time = sim_time
161 |         self.next_machine = None
162 |         # self.dgr = dgr_dict
163 |         self.lead_dict = lead_dict
164 |         self.num_wafers = wafers_per_box
165 |         self.wip_levels = wip_levels
166 |         # self.machine_failure = False
167 | 
168 |         # Number of future weeks we want to look into for calculating due dates
169 |         self.FUTURE_WEEKS = 100
170 | 
171 |         # Initialize an index that will be used to name each wafer box
172 |         self.wafer_index = 0
173 | 
174 |         # Dictionary where the key is the name of the machine and the value is [station, proc_t]
175 |         self.machine_dict = m_dict
176 | 
177 |         self.machines_list = [Machine(self, mach[0], mach[1], self.break_mean, self.repair_mean) for mach in self.machine_dict.items()]
178 | 
179 |         # create a list of all the station names
180 |         self.stations = list(set(list(self.machine_dict.values())))
181 | 
182 |         # sim_inst.recipes give the sequence of stations that must be processed at for the wafer of that head type to be completed
183 |         self.recipes = recipes
184 | 
185 |         # create a list to store the number of complete wafers for each head type
186 |         self.complete_wafer_dict = {}
187 |         for ht in self.recipes.keys():
188 |             d = {ht:0}
189 |             self.complete_wafer_dict.update(d)
190 | 
191 |         self.number_of_machines = len(self.machine_dict)
192 | 
193 |         # Create a dictionary which holds lists that will contain 
194 |         # the queues of wafer_box objects at each station and that have been completed
195 |         self.queue_lists = {station: [] for station in self.stations}
196 |         # self.queue_lists['complete'] = []
197 | 
198 |         self.order_complete_time = 0
199 |         self.cycle_time = []
200 |         self.step_reward = 0
201 | 
202 |         # Create a dictionary which holds the number of wafers due in a given week of each head type
203 |         self.due_wafers = {}
204 |         for ht in self.recipes.keys():
205 |             list_of_wafers_due_each_week = [0]*self.FUTURE_WEEKS
206 |             d = {ht:list_of_wafers_due_each_week}
207 |             self.due_wafers.update(d)
208 | 
209 |         # Creates a dictionary where the key is the toolset name and the value is a list of tuples of all head type and
210 |         # sequence step combinations which may be processed at that station
211 |         self.station_HT_seq = {station: [] for station in self.stations}
212 | 
213 |         for HT in self.recipes.keys():
214 |             for seq, step in enumerate(self.recipes[HT]):
215 |                 self.station_HT_seq[step[0]].append((HT, seq))
216 | 
217 | 
218 |     def start(self):
219 |         for ht in self.wip_levels.keys():
220 |             for i in range(self.wip_levels[ht]):
221 |                 new_wafer = wafer_box(self, self.num_wafers, ht, self.wafer_index, self.lead_dict)
222 |                 self.queue_lists[self.recipes[ht][0][0]].append(new_wafer)
223 |                 lead_time = self.lead_dict[ht]
224 |                 total_processing_time = new_wafer.start_time + lead_time
225 |                 week_number = int(total_processing_time / (7*24*60))
226 |                 self.due_wafers[ht][week_number] += self.num_wafers
227 |                 self.wafer_index += 1
228 | 
229 |         for machine in self.machines_list:
230 |             if machine.available:
231 |                 allowed_actions = machine.get_allowed_actions(self)
232 |                 if len(allowed_actions) > 0:
233 |                     self.next_machine = machine
234 |                     self.allowed_actions = allowed_actions
235 |                     return
236 |         while True:
237 |             self.env.step()
238 |             for machine in self.machines_list:
239 |                 if machine.available:
240 |                     allowed_actions = machine.get_allowed_actions(self)
241 |                     if len(allowed_actions) > 0:
242 |                         self.next_machine = machine
243 |                         self.allowed_actions = allowed_actions
244 |                         return
245 | 
246 | 
247 |     def run_action(self, machine, ht, seq):
248 |         self.order_completed = False
249 |         self.step_reward = 0
250 |         # Set the machine to be unavailable to process parts because it is now busy
251 |         assert machine.available
252 |         machine.available = False
253 |         # Find the wafer that has that HT and seq
254 |         wafer_choice = next(wafer for wafer in self.queue_lists[machine.station] if wafer.HT == ht and wafer.seq == seq)
255 |         # set the wafer being processed on this machine to wafer_choice
256 |         machine.wafer_being_proc = wafer_choice
257 |         # Remove the part from it's queue
258 |         self.queue_lists[machine.station].remove(wafer_choice)
259 |         # Begin processing the part on the machine
260 |         machine.process = self.env.process(machine.part_process(wafer_choice, self))
261 | 
262 |         for machine in self.machines_list:
263 |             if machine.available:
264 |                 allowed_actions = machine.get_allowed_actions(self)
265 |                 if len(allowed_actions) > 0:
266 |                     self.next_machine = machine
267 |                     self.allowed_actions = allowed_actions
268 |                     return
269 |         while True:
270 |             before_time = self.env.now
271 |             self.env.step()
272 |             time_change = self.env.now-before_time
273 |             current_week = math.ceil(self.env.now / (7 * 24 * 60))  # Calculating the current week
274 |             for key, value in self.due_wafers.items():
275 |                 buffer_list = []  # This list stores value of previous unfinished wafers count
276 |                 buffer_list.append(sum(value[:current_week]))
277 |                 self.step_reward -= time_change*sum(buffer_list)
278 | 
279 |             for machine in self.machines_list:
280 |                 if machine.available:
281 |                     allowed_actions = machine.get_allowed_actions(self)
282 |                     if len(allowed_actions) > 0:
283 |                         self.next_machine = machine
284 |                         self.allowed_actions = allowed_actions
285 |                         return
286 | 
287 | 
288 | 
289 | 
290 | 
291 | 
292 | 
293 | 


--------------------------------------------------------------------------------
/new/DQN/results.txt:
--------------------------------------------------------------------------------
 1 | Critical Ratio 
 2 | 
 3 | #### For 3e5 time steps ###
 4 | 
 5 | Total wafers produced: 4796
 6 | ### Wafers of each head type ###
 7 | {'ASGA': 540, 'CGUE': 540, 'FGUF': 536, 'GOUC': 530, 'H7UP': 530, 'LAUB': 530, 'LEUA': 530, 'MCUG': 530, 'MMUP': 530}
 8 | 
 9 | 
10 | 
11 | 
12 | 
13 | DQN
14 | 
15 | for ### for 3e5 timesteps ###
16 | Total wafers produced: 4796
17 | ### Wafers of each head type ###
18 | {'ASGA': 401, 'CGUE': 558, 'FGUF': 546, 'GOUC': 565, 'H7UP': 497, 'LAUB': 504, 'LEUA': 592, 'MCUG': 571, 'MMUP': 562}
19 | 
20 | 


--------------------------------------------------------------------------------
/new/DQN/rollout_DQN.py:
--------------------------------------------------------------------------------
  1 | import factory_sim as fact_sim
  2 | import numpy as np
  3 | import pandas as pd
  4 | import math 
  5 | import matplotlib
  6 | import random
  7 | matplotlib.use('TkAgg')
  8 | import matplotlib.pyplot as plt
  9 | from itertools import chain
 10 | from keras.models import load_model
 11 | 
 12 | sim_time = 1e5
 13 | WEEK = 24*7
 14 | NO_OF_WEEKS = math.ceil(sim_time/WEEK)
 15 | num_seq_steps = 10
 16 | 
 17 | recipes = pd.read_csv('~/Desktop/GSR/fall19/random/test/recipes.csv')
 18 | machines = pd.read_csv('~/Desktop/GSR/fall19/random/test/machines.csv')
 19 | 
 20 | recipes = recipes[recipes.MAXIMUMLS != 0]
 21 | 
 22 | # Create the machine dictionary (machine:station)
 23 | machine_d = dict()
 24 | for index, row in machines.iterrows():
 25 |     d = {row[0]:row[1]}
 26 |     machine_d.update(d)
 27 | 
 28 | # Modifying the above list to match the stations from the two datasets 
 29 | a = machines.TOOLSET.unique()
 30 | b = recipes.TOOLSET.unique()
 31 | common_stations = (set(a) & set(b))
 32 | ls = list(common_stations)
 33 | 
 34 | # This dictionary has the correct set of stations
 35 | modified_machine_dict = {k:v for k,v in machine_d.items() if v in ls}
 36 | 
 37 | # Removing unncommon rows from recipes 
 38 | for index, row in recipes.iterrows():
 39 |     if row[2] not in ls:
 40 |         recipes.drop(index, inplace=True)
 41 | 
 42 | recipes = recipes.dropna()
 43 | recipe_dict = dict()
 44 | for ht in list(recipes.HT.unique()):
 45 |     temp = recipes.loc[recipes['HT'] == ht]
 46 |     if len(temp) > 1:
 47 |         ls = []
 48 |         for index, row in temp.iterrows():
 49 |             ls.append([row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9]])
 50 |         d  = {ht:ls}
 51 |         recipe_dict.update(d)
 52 |     else:
 53 |         ls = []
 54 |         ls.append([row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9]])
 55 |         d = {ht:ls}
 56 |         recipe_dict.update(d)
 57 | 
 58 | # take only the first num_seq_steps sequence steps for each recipe to reduce the complexity of the simulation.
 59 | for ht, step in recipe_dict.items():
 60 |     recipe_dict[ht] = step[0:num_seq_steps]
 61 | 
 62 | # Dictionary where the key is the name of the machine and the value is [station, proc_t]
 63 | # machine_dict = {'m0': 's1', 'm2': 's2', 'm1': 's1', 'm3': 's2'}
 64 | machine_dict = modified_machine_dict
 65 | 
 66 | # recipes give the sequence of stations that must be processed at for the wafer of that head type to be completed
 67 | # recipes = {"ht1": [["s1", 5, 0]], "ht2": [["s1", 5, 0], ["s2", 5, 0]]}
 68 | recipes = recipe_dict
 69 | 
 70 | wafers_per_box = 4
 71 | 
 72 | break_mean = 1e5
 73 | 
 74 | repair_mean = 20
 75 | 
 76 | # average lead time for each head type
 77 | head_types = recipes.keys()
 78 | lead_dict = {}
 79 | 
 80 | wip_levels = {}
 81 | 
 82 | for ht in head_types:
 83 |     d = {ht:10000}
 84 |     lead_dict.update(d)
 85 | 
 86 |     w = {ht:10}
 87 |     wip_levels.update(w)
 88 | 
 89 | 
 90 | ####################################################
 91 | ########## CREATING THE STATE SPACE  ###############
 92 | ####################################################
 93 | def get_state(sim):
 94 |     # Calculate the state space representation.
 95 |     # This returns a list containing the number of` parts in the factory for each combination of head type and sequence
 96 |     # step
 97 |     state_rep = [len([wafer for queue in sim.queue_lists.values() for wafer in queue if wafer.HT
 98 |                  == ht and wafer.seq == s]) for ht in list(sim.recipes.keys()) for s in
 99 |                  list(range(len(sim.recipes[ht]) + 1))]
100 |     # b is a one-hot encoded list indicating which machine the next action will correspond to
101 |     b = np.zeros(len(sim.machines_list))
102 |     b[sim.machines_list.index(sim.next_machine)] = 1
103 |     state_rep.extend(b)
104 |     # Append the due dates list to the state space for making the decision
105 |     rolling_window = [] # This is the rolling window that will be appended to state space
106 |     max_length_of_window = math.ceil(max(sim.lead_dict.values()) / (7*24*60)) # Max length of the window to roll 
107 |     current_time = sim.env.now # Calculating the current time
108 |     current_week = math.ceil(current_time / (7*24*60)) #Calculating the current week 
109 | 
110 |     for key, value in sim.due_wafers.items():
111 |         rolling_window.append(value[current_week:current_week+max_length_of_window]) #Adding only the values from current week up till the window length
112 |         buffer_list = [] # This list stores value of previous unfinished wafers count
113 |         buffer_list.append(sum(value[:current_week]))
114 |         rolling_window.extend([buffer_list])
115 | 
116 |     c = sum(rolling_window, [])
117 |     state_rep.extend(c) # Appending the rolling window to state space 
118 |     return state_rep
119 | 
120 | 
121 | 
122 | 
123 | #####################################################################
124 | ######################### LOADING THE TRAINED POLICY ################
125 | #####################################################################
126 | model = load_model("DQN_model.h5")
127 | 
128 | 
129 | # Action function to choose the best action given the q-function if not exploring based on epsilon
130 | def choose_action(state, allowed_actions, action_space):
131 |     state = np.array(state).reshape(1, state_size)
132 |     pred = model.predict(state)
133 |     pred = sum(pred.tolist(), [])
134 |     temp = []
135 |     for item in allowed_actions:
136 |         temp.append(pred[action_space.index(item)])
137 |     print(" ********************* CHOOSING A PREDICTED ACTION **********************")
138 |     return allowed_actions[np.argmax(temp)]
139 | 
140 | 
141 | 
142 | 
143 | # Create the factory simulation object
144 | my_sim = fact_sim.FactorySim(sim_time, machine_dict, recipes, lead_dict, wafers_per_box, wip_levels)
145 | # start the simulation
146 | my_sim.start()
147 | # Retrieve machine object for first action choice
148 | mach = my_sim.next_machine
149 | # Save the state and allowed actions at the start for later use in training examples
150 | state = get_state(my_sim)
151 | allowed_actions = my_sim.allowed_actions
152 | # The action space is a list of tuples of the form [('ht1',0), ('ht1',1), ..., ('ht2', 0), ...] indicating the head
153 | # types and sequence steps for all allowed actions.
154 | action_space = list(chain.from_iterable(my_sim.station_HT_seq.values()))
155 | action_size = len(action_space)
156 | state_size = len(state)
157 | 
158 | 
159 | order_count = 0
160 | 
161 | while my_sim.env.now < sim_time:
162 |     action = choose_action(state, allowed_actions, action_space)
163 | 
164 |     my_sim.run_action(mach, action[0], action[1])
165 |     print('Step Reward:'+ str(my_sim.step_reward))
166 |     # Record the machine, state, allowed actions and reward at the new time step
167 |     next_mach = my_sim.next_machine
168 |     next_state = get_state(my_sim)
169 |     next_allowed_actions = my_sim.allowed_actions
170 |     reward = my_sim.step_reward
171 | 
172 |     print(f"state dimension: {len(state)}")
173 |     print(f"next state dimension: {len(next_state)}")
174 |     print("action space dimension:", action_size)
175 |     # record the information for use again in the next training example
176 |     mach, allowed_actions, state = next_mach, next_allowed_actions, next_state
177 |     # print("State:", state)
178 | 
179 | 
180 |     # Record the information for use again in the next training example
181 |     mach, allowed_actions, state = next_mach, next_allowed_actions, next_state
182 | 
183 | 
184 | # Total wafers produced
185 | print("Total wafers produced:", len(my_sim.cycle_time))
186 | 
187 | 
188 | #Wafers of each head type
189 | print("### Wafers of each head type ###")
190 | print(my_sim.complete_wafer_dict)
191 | 
192 | # Plot the time taken to complete each wafer
193 | plt.plot(my_sim.cycle_time)
194 | plt.xlabel("Wafers")
195 | plt.ylabel("Cycle time")
196 | plt.title("The time taken to complete each wafer")
197 | plt.show()
198 | 
199 | 
200 | 
201 | 
202 | 
203 | 
204 | 
205 | 


--------------------------------------------------------------------------------
/new/V-PG/PG_Class.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | import numpy as np
 4 | import tensorflow as tf
 5 | from keras.layers import Dense
 6 | 
 7 | from keras import optimizers
 8 | from keras.models import Model
 9 | from keras.layers import Input
10 | 
11 | class PolGrad:
12 |     def __init__(self, action_space, state_size, gamma = 0.9,
13 |                  epsilon = 1.0, epsilon_min = 0.00, epsilon_decay = 0.9999):
14 |         self.gamma = gamma
15 |         self.epsilon = epsilon
16 |         self.epsilon_min = epsilon_min
17 |         self.epsilon_decay = epsilon_decay
18 |         self.action_space = action_space
19 |         self.action_size = len(action_space)
20 |         self.state_size = state_size
21 |         
22 |         self.model = self.create_model()
23 |         self.target_model = self.create_model()
24 | 
25 |     @staticmethod
26 |     def custom_loss(y_pred, y_true, discounted_episode_rewards, allowed_actions):
27 |         neg_log_prob = tf.nn.softmax_cross_entropy_with_logits_v2(logits=y_pred, labels=y_true)
28 |         loss = tf.reduce_mean(neg_log_prob * discounted_episode_rewards)
29 |         return loss
30 | 
31 |     # Create the neural network model to train the q function
32 |     def create_model(self):
33 |         x = Input(shape=(self.state_size,), name='input')
34 |         y_true = Input(shape=(self.action_size,), name='y_true')
35 |         discounted_episode_rewards = Input(shape=(1,), name='rewards')
36 |         allowed_actions = Input(shape=(self.action_size,), name='allowed_a')
37 |         f = Dense(400, activation = 'sigmoid', kernel_initializer='glorot_uniform')(x)
38 |         f = Dense(250, activation = 'sigmoid', kernel_initializer='glorot_uniform')(f)
39 |         f = Dense(125, activation = 'sigmoid', kernel_initializer='glorot_uniform')(f)
40 |         #logits = K.layers.Activation('linear')(f)
41 |         y_pred = Dense(self.action_size, activation = 'softmax', kernel_initializer='glorot_uniform')(f)
42 |         model = Model(inputs=[x, y_true, discounted_episode_rewards, allowed_actions], outputs = [y_pred])
43 |         model.add_loss(self.custom_loss(y_pred, y_true, discounted_episode_rewards, allowed_actions))
44 |         adam = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
45 |         model.compile(loss = None, optimizer=adam, metrics=['mae'])
46 |         return model
47 | 
48 | 
49 |     # Action function to choose the best action given the q-function if not exploring based on epsilon
50 |     def choose_action(self, state, allowed_actions):
51 |         self.epsilon *= self.epsilon_decay
52 |         self.epsilon = max(self.epsilon_min, self.epsilon)
53 |         
54 |         n = 0
55 |         allowed_act_prob = np.zeros((1, self.action_size))
56 |         for i in self.action_space:
57 |             for j in allowed_actions:
58 |                 if i == j:
59 |                     allowed_act_prob[0][n] = 1
60 |             n+=1
61 |             
62 |         r = np.random.random()
63 |         if r > self.epsilon:
64 |             print(" ************* CHOOSING A PREDICTED ACTION *************")
65 |             actions = np.ones((1, self.action_size))
66 |             rewards = np.ones((1, 1))
67 |             state = np.array(state).reshape(1, self.state_size)
68 |             pred = self.model.predict([state, actions, rewards, allowed_act_prob])
69 |             allowed_act_prob_aux = allowed_act_prob * pred
70 |             if np.sum(allowed_act_prob_aux) != 0:
71 |                 allowed_act_prob = allowed_act_prob_aux
72 |         else:
73 |             print("******* CHOOSING A RANDOM ACTION *******")
74 |         all_sum = np.sum(allowed_act_prob)
75 |         multiply = 1/all_sum
76 |         allowed_act_prob *= multiply
77 |         # select action w.r.t the actions prob
78 |         action = np.random.choice(range(allowed_act_prob.shape[1]), p=allowed_act_prob.ravel())
79 |         return action
80 | 
81 |     # training our PG network
82 |     def train_policy_gradient(self, states, actions, discounted_episode_rewards, allowed_actions):
83 |         n = 0
84 |         allowed_act_prob = np.zeros((states.shape[0], self.action_size))
85 |         for i in self.action_space:
86 |             for idx, val in enumerate(allowed_actions):
87 |                 for j in val:
88 |                     if i == j:
89 |                         allowed_act_prob[idx][n] = 1.0
90 |             n+=1
91 |         self.model.fit([states, actions, discounted_episode_rewards, allowed_act_prob])
92 | 
93 |     # Save our model
94 |     def save_model(self, fn):
95 |         self.model.save(fn)
96 | 
97 | 
98 | 
99 |         


--------------------------------------------------------------------------------
/new/V-PG/PG_fact.py:
--------------------------------------------------------------------------------
  1 | import factory_sim as fact_sim
  2 | import numpy as np
  3 | import pandas as pd
  4 | import math 
  5 | import matplotlib
  6 | import random
  7 | matplotlib.use('TkAgg')
  8 | import matplotlib.pyplot as plt
  9 | from itertools import chain
 10 | import PG_Class
 11 | 
 12 | sim_time = 1e6
 13 | WEEK = 24*7
 14 | NO_OF_WEEKS = math.ceil(sim_time/WEEK)
 15 | num_seq_steps = 10
 16 | 
 17 | recipes = pd.read_csv('~/Desktop/GSR/fall19/random/test/recipes.csv')
 18 | machines = pd.read_csv('~/Desktop/GSR/fall19/random/test/machines.csv')
 19 | 
 20 | recipes = recipes[recipes.MAXIMUMLS != 0]
 21 | 
 22 | # Create the machine dictionary (machine:station)
 23 | machine_d = dict()
 24 | for index, row in machines.iterrows():
 25 |     d = {row[0]:row[1]}
 26 |     machine_d.update(d)
 27 | 
 28 | # Modifying the above list to match the stations from the two datasets 
 29 | a = machines.TOOLSET.unique()
 30 | b = recipes.TOOLSET.unique()
 31 | common_stations = (set(a) & set(b))
 32 | ls = list(common_stations)
 33 | 
 34 | # This dictionary has the correct set of stations
 35 | modified_machine_dict = {k:v for k,v in machine_d.items() if v in ls}
 36 | 
 37 | # Removing unncommon rows from recipes 
 38 | for index, row in recipes.iterrows():
 39 |     if row[2] not in ls:
 40 |         recipes.drop(index, inplace=True)
 41 | 
 42 | recipes = recipes.dropna()
 43 | recipe_dict = dict()
 44 | for ht in list(recipes.HT.unique()):
 45 |     temp = recipes.loc[recipes['HT'] == ht]
 46 |     if len(temp) > 1:
 47 |         ls = []
 48 |         for index, row in temp.iterrows():
 49 |             ls.append([row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9]])
 50 |         d  = {ht:ls}
 51 |         recipe_dict.update(d)
 52 |     else:
 53 |         ls = []
 54 |         ls.append([row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9]])
 55 |         d = {ht:ls}
 56 |         recipe_dict.update(d)
 57 | 
 58 | # take only the first num_seq_steps sequence steps for each recipe to reduce the complexity of the simulation.
 59 | for ht, step in recipe_dict.items():
 60 |     recipe_dict[ht] = step[0:num_seq_steps]
 61 | 
 62 | # Dictionary where the key is the name of the machine and the value is [station, proc_t]
 63 | # machine_dict = {'m0': 's1', 'm2': 's2', 'm1': 's1', 'm3': 's2'}
 64 | machine_dict = modified_machine_dict
 65 | 
 66 | # recipes give the sequence of stations that must be processed at for the wafer of that head type to be completed
 67 | # recipes = {"ht1": [["s1", 5, 0]], "ht2": [["s1", 5, 0], ["s2", 5, 0]]}
 68 | recipes = recipe_dict
 69 | 
 70 | wafers_per_box = 4
 71 | 
 72 | break_mean = 1e5
 73 | 
 74 | repair_mean = 20
 75 | 
 76 | # average lead time for each head type
 77 | head_types = recipes.keys()
 78 | lead_dict = {}
 79 | 
 80 | wip_levels = {}
 81 | 
 82 | for ht in head_types:
 83 |     d = {ht:40000}
 84 |     lead_dict.update(d)
 85 | 
 86 |     w = {ht:10}
 87 |     wip_levels.update(w)
 88 | 
 89 | 
 90 | # Simple pad utility function
 91 | def pad(l, content, width):
 92 |     l.extend([content] * (width - len(l)))
 93 |     return l
 94 | 
 95 | 
 96 | ####################################################
 97 | ########## CREATING THE STATE SPACE  ###############
 98 | ####################################################
 99 | def get_state(sim):
100 |     # Calculate the state space representation.
101 |     # This returns a list containing the number of` parts in the factory for each combination of head type and sequence
102 |     # step
103 |     state_rep = [len([wafer for queue in sim.queue_lists.values() for wafer in queue if wafer.HT
104 |                  == ht and wafer.seq == s]) for ht in list(sim.recipes.keys()) for s in
105 |                  list(range(len(sim.recipes[ht]) + 1))]
106 |     # b is a one-hot encoded list indicating which machine the next action will correspond to
107 |     b = np.zeros(len(sim.machines_list))
108 |     b[sim.machines_list.index(sim.next_machine)] = 1
109 |     state_rep.extend(b)
110 |     # Append the due dates list to the state space for making the decision
111 |     rolling_window = [] # This is the rolling window that will be appended to state space
112 |     max_length_of_window = math.ceil(max(sim.lead_dict.values()) / (7*24*60)) # Max length of the window to roll 
113 |     current_time = sim.env.now # Calculating the current time
114 |     current_week = math.ceil(current_time / (7*24*60)) #Calculating the current week 
115 | 
116 |     for key, value in sim.due_wafers.items():
117 |         k = value[current_week:current_week+max_length_of_window] #Adding only the values from current week up till the window length
118 |         if len(k) < max_length_of_window: #if list is less than length of window, then append 0's 
119 |             k = pad(k, 0, max_length_of_window)
120 | 
121 |         rolling_window.append(k) 
122 |         buffer_list = [] # This list stores value of previous unfinished wafers count
123 |         buffer_list.append(sum(value[:current_week]))
124 |         rolling_window.extend([buffer_list])
125 |     print("rolling_window: ", rolling_window)
126 |     c = sum(rolling_window, [])
127 |     state_rep.extend(c) # Appending the rolling window to state space 
128 |     return state_rep
129 | 
130 | 
131 | 
132 | # Create the factory simulation object
133 | my_sim = fact_sim.FactorySim(sim_time, machine_dict, recipes, lead_dict, wafers_per_box, wip_levels)
134 | # start the simulation
135 | my_sim.start()
136 | # Retrieve machine object for first action choice
137 | mach = my_sim.next_machine
138 | # Save the state and allowed actions at the start for later use in training examples
139 | state = get_state(my_sim)
140 | allowed_actions = my_sim.allowed_actions 
141 | # The action space is a list of tuples of the form [('ht1',0), ('ht1',1), ..., ('ht2', 0), ...] indicating the head
142 | # types and sequence steps for all allowed actions.
143 | action_space = list(chain.from_iterable(my_sim.station_HT_seq.values()))
144 | action_size = len(action_space)
145 | state_size = len(state)
146 | 
147 | # create the pol_grad object with the appropriate lenght of state and action space
148 | pol_grad = PG_Class.PolGrad(action_space, len(state))
149 | 
150 | episode_states, episode_actions, allRewards, episode_allowed_a = [],[],[],[]
151 | 
152 | 
153 | while my_sim.env.now < sim_time:
154 |     episode_states.append(state)
155 |     episode_allowed_a.append(allowed_actions)
156 |     print("State shape is :", len(state))
157 |     action = pol_grad.choose_action(state, allowed_actions)
158 |     action_ = np.zeros(action_size)
159 |     action_[action] = 1
160 |     episode_actions.append(action_)
161 |     
162 |     action = action_space[action]
163 |     
164 |     if my_sim.order_completed:
165 |         # Calculate discounted reward
166 |         episode_rewards_ = np.ones(np.asarray(episode_states).shape[0])
167 |         episode_rewards_ *= my_sim.step_reward
168 |         pol_grad.train_policy_gradient(np.asarray(episode_states), np.asarray(episode_actions), episode_rewards_, episode_allowed_a)
169 |         
170 |         # Reset the transition stores
171 |         episode_states, episode_actions, episode_allowed_a = [],[],[]
172 | 
173 |     my_sim.run_action(mach, action[0], action[1])
174 |     state = get_state(my_sim)
175 |     allowed_actions = my_sim.allowed_actions
176 |     mach = my_sim.next_machine
177 | 
178 |     print(my_sim.order_completed)
179 |     print(state)
180 |     print(my_sim.step_reward)
181 | 
182 | 
183 | # Save the trained PG policy network
184 | pol_grad.save_model("PG_model.h5")
185 | 
186 | # Total wafers produced
187 | print("Total wafers produced:", len(my_sim.cycle_time))
188 | 
189 | 
190 | #Wafers of each head type
191 | print("### Wafers of each head type ###")
192 | print(my_sim.complete_wafer_dict)
193 | 
194 | # Plot the time taken to complete each wafer
195 | plt.plot(my_sim.cycle_time)
196 | plt.xlabel("Wafers")
197 | plt.ylabel("Cycle time")
198 | plt.title("The time taken to complete each wafer")
199 | plt.show()
200 | 
201 | 
202 | 
203 | 
204 | 
205 | 
206 | 
207 | 


--------------------------------------------------------------------------------
/new/V-PG/PG_model.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/svikramank/DeepRLForFactoryOptimization/f129a460849f1d487ccea620fdcf553afbbd2e6d/new/V-PG/PG_model.h5


--------------------------------------------------------------------------------
/new/V-PG/__pycache__/PG_Class.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/svikramank/DeepRLForFactoryOptimization/f129a460849f1d487ccea620fdcf553afbbd2e6d/new/V-PG/__pycache__/PG_Class.cpython-37.pyc


--------------------------------------------------------------------------------
/new/V-PG/__pycache__/factory_sim.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/svikramank/DeepRLForFactoryOptimization/f129a460849f1d487ccea620fdcf553afbbd2e6d/new/V-PG/__pycache__/factory_sim.cpython-37.pyc


--------------------------------------------------------------------------------
/new/V-PG/factory_sim.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import simpy
  3 | from collections import namedtuple, Counter
  4 | from itertools import count, filterfalse
  5 | import random
  6 | import math
  7 | 
  8 | ####################################################
  9 | ########## CREATING THE WAFER CLASS  ###############
 10 | ####################################################
 11 | class wafer_box(object):
 12 |     def __init__(self, sim_inst, number_wafers, HT, wafer_index, lead_dict):
 13 |         self.env = sim_inst.env
 14 |         self.name = f"w{wafer_index}"
 15 |         self.start_time = sim_inst.env.now
 16 |         self.number_wafers = number_wafers
 17 |         self.HT = HT
 18 |         self.seq = 0
 19 |         self.due_time = self.start_time + lead_dict[self.HT]
 20 | 
 21 | ####################################################
 22 | ########## CREATING THE MACHINE CLASS ##############
 23 | ####################################################
 24 | class Machine(object):
 25 |     def __init__(self, sim_inst, name, station, break_mean=None, repair_mean=None):
 26 |         self.env = sim_inst.env
 27 |         self.name = name
 28 |         self.station = station
 29 |         self.available = True
 30 |         self.broken = False
 31 |         self.wafer_being_proc = None
 32 |         self.parts_made = 0
 33 |         self.break_mean = break_mean
 34 | 
 35 |         if break_mean is not None:
 36 |             self.time_to_fail = self.time_to_failure()
 37 | 
 38 |         self.process = None
 39 |         self.repair_mean = repair_mean
 40 | 
 41 |     def time_to_failure(self):
 42 |         """Return time until next failure for a machine."""
 43 |         return random.expovariate(1/self.break_mean)
 44 | 
 45 |     def time_to_repair(self):
 46 |         """Return time until next failure for a machine."""
 47 |         return random.expovariate(1/self.repair_mean)
 48 | 
 49 |     def break_machine(self):
 50 |         """Break the machine after break_time"""
 51 |         assert not self.broken
 52 |         start = self.env.now
 53 |         try:
 54 |             yield self.env.timeout(self.time_to_fail)
 55 |             self.process.interrupt()
 56 |             self.time_to_fail = self.time_to_failure()
 57 |         except:
 58 |             self.time_to_fail -= self.env.now-start
 59 | 
 60 |     def get_proc_time(self, wafer, sim_inst):
 61 |         proc_step = sim_inst.recipes[wafer.HT][wafer.seq]
 62 |         A = proc_step[1]
 63 |         B = proc_step[2]
 64 |         LS = proc_step[3]
 65 |         include_load = proc_step[4]
 66 |         load = proc_step[5]
 67 |         include_unload = proc_step[6]
 68 |         unload = proc_step[7]
 69 |         proc_t = A * wafer.number_wafers + B * math.ceil(wafer.number_wafers/LS)
 70 | 
 71 |         if include_load == -1:
 72 |             proc_t += load
 73 |         if include_unload == -1:
 74 |             proc_t += unload
 75 |         return proc_t
 76 | 
 77 |     def part_process(self, wafer, sim_inst):
 78 |         # This function defines a process where a part of head type HT and sequence step seq is processed on the machine
 79 | 
 80 |         # get the amount of time it takes for the operation to run
 81 |         proc_t = self.get_proc_time(wafer, sim_inst)
 82 | 
 83 |         done_in = proc_t
 84 |         while done_in:
 85 |             try:
 86 |                 if self.break_mean is not None:
 87 |                     break_process = self.env.process(self.break_machine())
 88 |                 start = self.env.now
 89 |                 print("started processing wafer %s on machine %s at %s"%(wafer.name, self.name, start))
 90 |                 # wait until the process is done
 91 |                 yield sim_inst.env.timeout(done_in)
 92 |                 # set the wafer being processed to None
 93 |                 self.wafer_being_proc = None
 94 |                 # set machine to be available to process part
 95 |                 self.available = True
 96 |                 print("Completed the process step of wafer %s on machine %s at %s and sent to "
 97 |                       "next machine."%(wafer.name, self.name, self.env.now))
 98 |                 # set the wafer to be at the next step in the sequence
 99 |                 wafer.seq += 1
100 |                 # if seq is not the last sequence step then find the next station and choose actions for each of the
101 |                 # available machines in that station
102 |                 if wafer.seq < (len(sim_inst.recipes[wafer.HT])):
103 |                     # add the part to the corresponding queue for the next operation in the sequence
104 |                     sim_inst.queue_lists[sim_inst.recipes[wafer.HT][wafer.seq][0]].append(wafer)
105 |                 else:
106 |                     # # add the part to the list of completed parts
107 |                     # sim_inst.queue_lists['complete'].append(wafer)
108 |                     sim_inst.cycle_time.append(self.env.now - wafer.start_time)
109 |                     print("Finished processing wafer %s at %s"%(wafer.name, self.env.now))
110 |                     sim_inst.complete_wafer_dict[wafer.HT]+=1
111 |                     sim_inst.order_completed = True
112 |                     # Update the due_wafers dictionary to indicate that wafers of this head type were completed
113 | 
114 |                     # Find the index of the earliest week for which there are one or more wafers of the given head type
115 |                     # due.
116 |                     week_index = next((i for i, x in enumerate(sim_inst.due_wafers[wafer.HT]) if x), None)
117 | 
118 |                     # Subtract wafer,number_wafers wafers from the corresponding list element 
119 |                     sim_inst.due_wafers[wafer.HT][week_index] -= wafer.number_wafers
120 | 
121 |                     new_wafer = wafer_box(sim_inst, sim_inst.num_wafers, wafer.HT, sim_inst.wafer_index,
122 |                                           sim_inst.lead_dict)
123 |                     sim_inst.queue_lists[sim_inst.recipes[wafer.HT][0][0]].append(new_wafer)
124 |                     lead_time = sim_inst.lead_dict[wafer.HT]
125 |                     total_processing_time = new_wafer.start_time + lead_time
126 |                     week_number = int(total_processing_time / (7 * 24 * 60))
127 |                     sim_inst.due_wafers[wafer.HT][week_number] += sim_inst.num_wafers
128 |                     sim_inst.wafer_index += 1
129 | 
130 | 
131 | 
132 |                 if self.break_mean is not None:
133 |                     break_process.interrupt()
134 |                 done_in = 0
135 | 
136 |             except simpy.Interrupt:
137 |                 self.broken = True
138 |                 done_in -= self.env.now - start
139 |                 yield self.env.timeout(self.time_to_repair())
140 |                 self.broken = False
141 | 
142 |         # Parts completed by this machine
143 |         self.parts_made += 1
144 | 
145 |     def get_allowed_actions(self, sim_inst):
146 |         #find all (HT, seq) tuples with non zero queues at the station of this machine
147 |         return sorted(list(set((wafer.HT, wafer.seq) for wafer in sim_inst.queue_lists[self.station])))
148 | 
149 | ####################################################
150 | ########## CREATING THE FACTORY CLASS ##############
151 | ####################################################
152 | class FactorySim(object):
153 |     #Initialize simpy environment and set the amount of time the simulation will run for
154 |     def __init__(self, sim_time, m_dict, recipes, lead_dict, wafers_per_box, wip_levels, break_mean=None, repair_mean=None):
155 |         self.break_mean = break_mean
156 |         self.repair_mean = repair_mean
157 |         self.order_completed = False
158 |         self.allowed_actions = None
159 |         self.env = simpy.Environment()
160 |         self.Sim_time = sim_time
161 |         self.next_machine = None
162 |         # self.dgr = dgr_dict
163 |         self.lead_dict = lead_dict
164 |         self.num_wafers = wafers_per_box
165 |         self.wip_levels = wip_levels
166 |         # self.machine_failure = False
167 | 
168 |         # Number of future weeks we want to look into for calculating due dates
169 |         self.FUTURE_WEEKS = 1000
170 | 
171 |         # Initialize an index that will be used to name each wafer box
172 |         self.wafer_index = 0
173 | 
174 |         # Dictionary where the key is the name of the machine and the value is [station, proc_t]
175 |         self.machine_dict = m_dict
176 | 
177 |         self.machines_list = [Machine(self, mach[0], mach[1], self.break_mean, self.repair_mean) for mach in self.machine_dict.items()]
178 | 
179 |         # create a list of all the station names
180 |         self.stations = list(set(list(self.machine_dict.values())))
181 | 
182 |         # sim_inst.recipes give the sequence of stations that must be processed at for the wafer of that head type to be completed
183 |         self.recipes = recipes
184 | 
185 |         # create a list to store the number of complete wafers for each head type
186 |         self.complete_wafer_dict = {}
187 |         for ht in self.recipes.keys():
188 |             d = {ht:0}
189 |             self.complete_wafer_dict.update(d)
190 | 
191 |         self.number_of_machines = len(self.machine_dict)
192 | 
193 |         # Create a dictionary which holds lists that will contain 
194 |         # the queues of wafer_box objects at each station and that have been completed
195 |         self.queue_lists = {station: [] for station in self.stations}
196 |         # self.queue_lists['complete'] = []
197 | 
198 |         self.order_complete_time = 0
199 |         self.cycle_time = []
200 |         self.step_reward = 0
201 | 
202 |         # Create a dictionary which holds the number of wafers due in a given week of each head type
203 |         self.due_wafers = {}
204 |         for ht in self.recipes.keys():
205 |             list_of_wafers_due_each_week = [0]*self.FUTURE_WEEKS
206 |             d = {ht:list_of_wafers_due_each_week}
207 |             self.due_wafers.update(d)
208 | 
209 |         # Creates a dictionary where the key is the toolset name and the value is a list of tuples of all head type and
210 |         # sequence step combinations which may be processed at that station
211 |         self.station_HT_seq = {station: [] for station in self.stations}
212 | 
213 |         for HT in self.recipes.keys():
214 |             for seq, step in enumerate(self.recipes[HT]):
215 |                 self.station_HT_seq[step[0]].append((HT, seq))
216 | 
217 | 
218 |     def start(self):
219 |         for ht in self.wip_levels.keys():
220 |             for i in range(self.wip_levels[ht]):
221 |                 new_wafer = wafer_box(self, self.num_wafers, ht, self.wafer_index, self.lead_dict)
222 |                 self.queue_lists[self.recipes[ht][0][0]].append(new_wafer)
223 |                 lead_time = self.lead_dict[ht]
224 |                 total_processing_time = new_wafer.start_time + lead_time
225 |                 due_week_number = int(total_processing_time / (7*24*60))
226 |                 self.due_wafers[ht][due_week_number] += self.num_wafers
227 |                 self.wafer_index += 1
228 | 
229 |         for machine in self.machines_list:
230 |             if machine.available:
231 |                 allowed_actions = machine.get_allowed_actions(self)
232 |                 if len(allowed_actions) > 0:
233 |                     self.next_machine = machine
234 |                     self.allowed_actions = allowed_actions
235 |                     return
236 |         while True:
237 |             self.env.step()
238 |             for machine in self.machines_list:
239 |                 if machine.available:
240 |                     allowed_actions = machine.get_allowed_actions(self)
241 |                     if len(allowed_actions) > 0:
242 |                         self.next_machine = machine
243 |                         self.allowed_actions = allowed_actions
244 |                         return
245 | 
246 | 
247 |     def run_action(self, machine, ht, seq):
248 |         self.order_completed = False
249 |         self.step_reward = 0
250 |         # Set the machine to be unavailable to process parts because it is now busy
251 |         assert machine.available
252 |         machine.available = False
253 |         # Find the wafer that has that HT and seq
254 |         wafer_choice = next(wafer for wafer in self.queue_lists[machine.station] if wafer.HT == ht and wafer.seq == seq)
255 |         # set the wafer being processed on this machine to wafer_choice
256 |         machine.wafer_being_proc = wafer_choice
257 |         # Remove the part from it's queue
258 |         self.queue_lists[machine.station].remove(wafer_choice)
259 |         # Begin processing the part on the machine
260 |         machine.process = self.env.process(machine.part_process(wafer_choice, self))
261 | 
262 |         for machine in self.machines_list:
263 |             if machine.available:
264 |                 allowed_actions = machine.get_allowed_actions(self)
265 |                 if len(allowed_actions) > 0:
266 |                     self.next_machine = machine
267 |                     self.allowed_actions = allowed_actions
268 |                     return
269 |         while True:
270 |             before_time = self.env.now
271 |             self.env.step()
272 |             time_change = self.env.now-before_time
273 |             current_week = math.ceil(self.env.now / (7 * 24 * 60))  # Calculating the current week
274 |             for key, value in self.due_wafers.items():
275 |                 buffer_list = []  # This list stores value of previous unfinished wafers count
276 |                 buffer_list.append(sum(value[:current_week]))
277 |                 self.step_reward -= time_change*sum(buffer_list)
278 | 
279 |             for machine in self.machines_list:
280 |                 if machine.available:
281 |                     allowed_actions = machine.get_allowed_actions(self)
282 |                     if len(allowed_actions) > 0:
283 |                         self.next_machine = machine
284 |                         self.allowed_actions = allowed_actions
285 |                         return
286 | 
287 | 
288 | 
289 | 
290 | 
291 | 
292 | 
293 | 


--------------------------------------------------------------------------------
/new/V-PG/rollout_PG.py:
--------------------------------------------------------------------------------
  1 | import factory_sim as fact_sim
  2 | import numpy as np
  3 | import pandas as pd
  4 | import math 
  5 | import matplotlib
  6 | import random
  7 | matplotlib.use('TkAgg')
  8 | import matplotlib.pyplot as plt
  9 | from itertools import chain
 10 | from keras.models import load_model
 11 | 
 12 | sim_time = 1e5
 13 | WEEK = 24*7
 14 | NO_OF_WEEKS = math.ceil(sim_time/WEEK)
 15 | num_seq_steps = 10
 16 | 
 17 | recipes = pd.read_csv('~/Desktop/GSR/fall19/random/test/recipes.csv')
 18 | machines = pd.read_csv('~/Desktop/GSR/fall19/random/test/machines.csv')
 19 | 
 20 | recipes = recipes[recipes.MAXIMUMLS != 0]
 21 | 
 22 | # Create the machine dictionary (machine:station)
 23 | machine_d = dict()
 24 | for index, row in machines.iterrows():
 25 |     d = {row[0]:row[1]}
 26 |     machine_d.update(d)
 27 | 
 28 | # Modifying the above list to match the stations from the two datasets 
 29 | a = machines.TOOLSET.unique()
 30 | b = recipes.TOOLSET.unique()
 31 | common_stations = (set(a) & set(b))
 32 | ls = list(common_stations)
 33 | 
 34 | # This dictionary has the correct set of stations
 35 | modified_machine_dict = {k:v for k,v in machine_d.items() if v in ls}
 36 | 
 37 | # Removing unncommon rows from recipes 
 38 | for index, row in recipes.iterrows():
 39 |     if row[2] not in ls:
 40 |         recipes.drop(index, inplace=True)
 41 | 
 42 | recipes = recipes.dropna()
 43 | recipe_dict = dict()
 44 | for ht in list(recipes.HT.unique()):
 45 |     temp = recipes.loc[recipes['HT'] == ht]
 46 |     if len(temp) > 1:
 47 |         ls = []
 48 |         for index, row in temp.iterrows():
 49 |             ls.append([row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9]])
 50 |         d  = {ht:ls}
 51 |         recipe_dict.update(d)
 52 |     else:
 53 |         ls = []
 54 |         ls.append([row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9]])
 55 |         d = {ht:ls}
 56 |         recipe_dict.update(d)
 57 | 
 58 | # take only the first num_seq_steps sequence steps for each recipe to reduce the complexity of the simulation.
 59 | for ht, step in recipe_dict.items():
 60 |     recipe_dict[ht] = step[0:num_seq_steps]
 61 | 
 62 | # Dictionary where the key is the name of the machine and the value is [station, proc_t]
 63 | # machine_dict = {'m0': 's1', 'm2': 's2', 'm1': 's1', 'm3': 's2'}
 64 | machine_dict = modified_machine_dict
 65 | 
 66 | # recipes give the sequence of stations that must be processed at for the wafer of that head type to be completed
 67 | # recipes = {"ht1": [["s1", 5, 0]], "ht2": [["s1", 5, 0], ["s2", 5, 0]]}
 68 | recipes = recipe_dict
 69 | 
 70 | wafers_per_box = 4
 71 | 
 72 | break_mean = 1e5
 73 | 
 74 | repair_mean = 20
 75 | 
 76 | # average lead time for each head type
 77 | head_types = recipes.keys()
 78 | lead_dict = {}
 79 | 
 80 | wip_levels = {}
 81 | 
 82 | for ht in head_types:
 83 |     d = {ht:40000}
 84 |     lead_dict.update(d)
 85 | 
 86 |     w = {ht:10}
 87 |     wip_levels.update(w)
 88 | 
 89 | 
 90 | # Simple pad utility function
 91 | def pad(l, content, width):
 92 |     l.extend([content] * (width - len(l)))
 93 |     return l
 94 | 
 95 | 
 96 | ####################################################
 97 | ########## CREATING THE STATE SPACE  ###############
 98 | ####################################################
 99 | def get_state(sim):
100 |     # Calculate the state space representation.
101 |     # This returns a list containing the number of` parts in the factory for each combination of head type and sequence
102 |     # step
103 |     state_rep = [len([wafer for queue in sim.queue_lists.values() for wafer in queue if wafer.HT
104 |                  == ht and wafer.seq == s]) for ht in list(sim.recipes.keys()) for s in
105 |                  list(range(len(sim.recipes[ht]) + 1))]
106 |     # b is a one-hot encoded list indicating which machine the next action will correspond to
107 |     b = np.zeros(len(sim.machines_list))
108 |     b[sim.machines_list.index(sim.next_machine)] = 1
109 |     state_rep.extend(b)
110 |     # Append the due dates list to the state space for making the decision
111 |     rolling_window = [] # This is the rolling window that will be appended to state space
112 |     max_length_of_window = math.ceil(max(sim.lead_dict.values()) / (7*24*60)) # Max length of the window to roll 
113 |     current_time = sim.env.now # Calculating the current time
114 |     current_week = math.ceil(current_time / (7*24*60)) #Calculating the current week 
115 | 
116 |     for key, value in sim.due_wafers.items():
117 |         k = value[current_week:current_week+max_length_of_window] #Adding only the values from current week up till the window length
118 |         if len(k) < max_length_of_window: #if list is less than length of window, then append 0's 
119 |             k = pad(k, 0, max_length_of_window)
120 | 
121 |         rolling_window.append(k) 
122 |         buffer_list = [] # This list stores value of previous unfinished wafers count
123 |         buffer_list.append(sum(value[:current_week]))
124 |         rolling_window.extend([buffer_list])
125 |     print("rolling_window: ", rolling_window)
126 |     c = sum(rolling_window, [])
127 |     state_rep.extend(c) # Appending the rolling window to state space 
128 |     return state_rep
129 | 
130 | 
131 | 
132 | 
133 | 
134 | #####################################################################
135 | ######################### LOADING THE TRAINED POLICY ################
136 | #####################################################################
137 | def my_custom_loss():
138 |     def custom_loss(y_pred, y_true, discounted_episode_rewards):
139 |         neg_log_prob = tf.nn.softmax_cross_entropy_with_logits_v2(logits=y_pred, labels=y_true)
140 |         loss = tf.reduce_mean(neg_log_prob * discounted_episode_rewards)
141 |         return loss
142 | 
143 | 
144 | model = load_model("PG_model.h5", custom_objects={'custom_loss': my_custom_loss})
145 | 
146 | 
147 | # Action function to choose the best action given the q-function if not exploring based on epsilon
148 | def choose_action(state, allowed_actions, action_space, state_size):
149 |     n = 0
150 |     allowed_act_prob = np.zeros((1, len(action_space)))
151 |     for i in action_space:
152 |         for j in allowed_actions:
153 |             if i == j:
154 |                 allowed_act_prob[0][n] = 1
155 |         n+=1
156 |     print(" ************* CHOOSING A PREDICTED ACTION *************")
157 |     actions = np.ones((1, len(action_space)))
158 |     rewards = np.ones((1, 1))
159 |     state = np.array(state).reshape(1, state_size)
160 |     pred = model.predict([state, actions, rewards, allowed_act_prob])
161 |     allowed_act_prob_aux = allowed_act_prob * pred
162 |     if np.sum(allowed_act_prob_aux) != 0:
163 |         allowed_act_prob = allowed_act_prob_aux
164 | 
165 |     all_sum = np.sum(allowed_act_prob)
166 |     multiply = 1/all_sum
167 |     allowed_act_prob *= multiply
168 |     # select action w.r.t the actions prob
169 |     action = np.random.choice(range(allowed_act_prob.shape[1]), p=allowed_act_prob.ravel())
170 |     return action
171 | 
172 | 
173 | # Create the factory simulation object
174 | my_sim = fact_sim.FactorySim(sim_time, machine_dict, recipes, lead_dict, wafers_per_box, wip_levels)
175 | # start the simulation
176 | my_sim.start()
177 | # Retrieve machine object for first action choice
178 | mach = my_sim.next_machine
179 | # Save the state and allowed actions at the start for later use in training examples
180 | state = get_state(my_sim)
181 | allowed_actions = my_sim.allowed_actions
182 | # The action space is a list of tuples of the form [('ht1',0), ('ht1',1), ..., ('ht2', 0), ...] indicating the head
183 | # types and sequence steps for all allowed actions.
184 | action_space = list(chain.from_iterable(my_sim.station_HT_seq.values()))
185 | action_size = len(action_space)
186 | state_size = len(state)
187 | 
188 | 
189 | order_count = 0
190 | 
191 | while my_sim.env.now < sim_time:
192 | 
193 |     print("State shape is :", len(state))
194 |     action = pol_grad.choose_action(state, allowed_actions)
195 |     
196 |     action = action_space[action]
197 | 
198 |     my_sim.run_action(mach, action[0], action[1])
199 | 
200 |     state = get_state(my_sim)
201 |     allowed_actions = my_sim.allowed_actions
202 |     mach = my_sim.next_machine
203 | 
204 |     print(my_sim.order_completed)
205 |     print(state)
206 |     print(my_sim.step_reward)
207 | 
208 | # Total wafers produced
209 | print("Total wafers produced:", len(my_sim.cycle_time))
210 | 
211 | 
212 | #Wafers of each head type
213 | print("### Wafers of each head type ###")
214 | print(my_sim.complete_wafer_dict)
215 | 
216 | # Plot the time taken to complete each wafer
217 | plt.plot(my_sim.cycle_time)
218 | plt.xlabel("Wafers")
219 | plt.ylabel("Cycle time")
220 | plt.title("The time taken to complete each wafer")
221 | plt.show()
222 | 
223 | 
224 | 
225 | 
226 | 
227 | 
228 | 
229 | 


--------------------------------------------------------------------------------
/new/critical_Ratio/__pycache__/factory_sim.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/svikramank/DeepRLForFactoryOptimization/f129a460849f1d487ccea620fdcf553afbbd2e6d/new/critical_Ratio/__pycache__/factory_sim.cpython-37.pyc


--------------------------------------------------------------------------------
/new/critical_Ratio/critical_ratio.py:
--------------------------------------------------------------------------------
  1 | import factory_sim as fact_sim
  2 | import numpy as np
  3 | import pandas as pd
  4 | import math 
  5 | import matplotlib
  6 | import random
  7 | matplotlib.use('TkAgg')
  8 | import matplotlib.pyplot as plt
  9 | from itertools import chain
 10 | 
 11 | sim_time = 3e5
 12 | WEEK = 24*7
 13 | NO_OF_WEEKS = math.ceil(sim_time/WEEK)
 14 | num_seq_steps = 10
 15 | 
 16 | recipes = pd.read_csv('~/Desktop/GSR/fall19/random/test/recipes.csv')
 17 | machines = pd.read_csv('~/Desktop/GSR/fall19/random/test/machines.csv')
 18 | 
 19 | recipes = recipes[recipes.MAXIMUMLS != 0]
 20 | 
 21 | # Create the machine dictionary (machine:station)
 22 | machine_d = dict()
 23 | for index, row in machines.iterrows():
 24 |     d = {row[0]:row[1]}
 25 |     machine_d.update(d)
 26 | 
 27 | # Modifying the above list to match the stations from the two datasets 
 28 | a = machines.TOOLSET.unique()
 29 | b = recipes.TOOLSET.unique()
 30 | common_stations = (set(a) & set(b))
 31 | ls = list(common_stations)
 32 | 
 33 | # This dictionary has the correct set of stations
 34 | modified_machine_dict = {k:v for k,v in machine_d.items() if v in ls}
 35 | 
 36 | # Removing unncommon rows from recipes 
 37 | for index, row in recipes.iterrows():
 38 |     if row[2] not in ls:
 39 |         recipes.drop(index, inplace=True)
 40 | 
 41 | recipes = recipes.dropna()
 42 | recipe_dict = dict()
 43 | for ht in list(recipes.HT.unique()):
 44 |     temp = recipes.loc[recipes['HT'] == ht]
 45 |     if len(temp) > 1:
 46 |         ls = []
 47 |         for index, row in temp.iterrows():
 48 |             ls.append([row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9]])
 49 |         d  = {ht:ls}
 50 |         recipe_dict.update(d)
 51 |     else:
 52 |         ls = []
 53 |         ls.append([row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9]])
 54 |         d = {ht:ls}
 55 |         recipe_dict.update(d)
 56 | 
 57 | # take only the first num_seq_steps sequence steps for each recipe to reduce the complexity of the simulation.
 58 | for ht, step in recipe_dict.items():
 59 |     recipe_dict[ht] = step[0:num_seq_steps]
 60 | 
 61 | # Dictionary where the key is the name of the machine and the value is [station, proc_t]
 62 | # machine_dict = {'m0': 's1', 'm2': 's2', 'm1': 's1', 'm3': 's2'}
 63 | machine_dict = modified_machine_dict
 64 | 
 65 | # recipes give the sequence of stations that must be processed at for the wafer of that head type to be completed
 66 | # recipes = {"ht1": [["s1", 5, 0]], "ht2": [["s1", 5, 0], ["s2", 5, 0]]}
 67 | recipes = recipe_dict
 68 | 
 69 | wafers_per_box = 4
 70 | 
 71 | break_mean = 1e5
 72 | 
 73 | repair_mean = 20
 74 | 
 75 | # average lead time for each head type
 76 | head_types = recipes.keys()
 77 | lead_dict = {}
 78 | 
 79 | wip_levels = {}
 80 | 
 81 | for ht in head_types:
 82 |     d = {ht:10000}
 83 |     lead_dict.update(d)
 84 | 
 85 |     w = {ht:10}
 86 |     wip_levels.update(w)
 87 | 
 88 | 
 89 | ####################################################
 90 | ########## CREATING THE STATE SPACE  ###############
 91 | ####################################################
 92 | def get_state(sim):
 93 |     # Calculate the state space representation.
 94 |     # This returns a list containing the number of` parts in the factory for each combination of head type and sequence
 95 |     # step
 96 |     state_rep = [len([wafer for queue in sim.queue_lists.values() for wafer in queue if wafer.HT
 97 |                  == ht and wafer.seq == s]) for ht in list(sim.recipes.keys()) for s in
 98 |                  list(range(len(sim.recipes[ht]) + 1))]
 99 |     # b is a one-hot encoded list indicating which machine the next action will correspond to
100 |     b = np.zeros(len(sim.machines_list))
101 |     b[sim.machines_list.index(sim.next_machine)] = 1
102 |     state_rep.extend(b)
103 |     # Append the due dates list to the state space for making the decision
104 |     rolling_window = [] # This is the rolling window that will be appended to state space
105 |     max_length_of_window = math.ceil(max(sim.lead_dict.values()) / (7*24*60)) # Max length of the window to roll 
106 |     current_time = sim.env.now # Calculating the current time
107 |     current_week = math.ceil(current_time / (7*24*60)) #Calculating the current week 
108 | 
109 |     for key, value in sim.due_wafers.items():
110 |         rolling_window.append(value[current_week:current_week+max_length_of_window]) #Adding only the values from current week up till the window length
111 |         buffer_list = [] # This list stores value of previous unfinished wafers count
112 |         buffer_list.append(sum(value[:current_week]))
113 |         rolling_window.extend([buffer_list])
114 | 
115 |     c = sum(rolling_window, [])
116 |     state_rep.extend(c) # Appending the rolling window to state space 
117 |     return state_rep
118 | 
119 | 
120 | 
121 | ####################################################
122 | ########## CHOOSING AN ACTION HERE  ################
123 | ####################################################
124 | 
125 | '''
126 | Critical Ratio. The critical ratio (CR) is calculated by dividing the time remaining until
127 | a job’s due date by the total shop time remaining for the job, which is defined as the
128 | setup, processing, move, and expected waiting times of all remaining operations,
129 | including the operation being scheduled. 
130 | 
131 | CR = (Due date - Today’s date) / (Total shop time remaining)
132 | 
133 | The difference between the due date and today’s date must be in the same time units as
134 | the total shop time remaining. A ratio less than 1.0 implies that the job is behind schedule, 
135 | and a ratio greater than 1.0 implies that the job is ahead of schedule. The job with
136 | the lowest CR is scheduled next.
137 | 
138 | '''
139 | 
140 | def choose_action(sim, state, sim_time):
141 |     wafer_list = sim.queue_lists[sim.next_machine.station]
142 | 
143 |     if len(wafer_list) == 1:
144 |         waf_ = wafer_list[0]
145 |         return (waf_.HT, waf_.seq)
146 | 
147 |     else:
148 |         cr_ratio = {}
149 |         for waf in wafer_list:
150 |             cr_ = abs((waf.due_time - sim.env.now) / (sim_time - sim.env.now))
151 |             cr_ratio[waf] = cr_
152 |         waf_to_choose = min(cr_ratio, key=cr_ratio.get)
153 |         best_action = (waf_to_choose.HT, waf_to_choose.seq)
154 |         return best_action
155 | 
156 | 
157 | 
158 | # Create the factory simulation object
159 | my_sim = fact_sim.FactorySim(sim_time, machine_dict, recipes, lead_dict, wafers_per_box, wip_levels)
160 | # start the simulation
161 | my_sim.start()
162 | # Retrieve machine object for first action choice
163 | mach = my_sim.next_machine
164 | # Save the state and allowed actions at the start for later use in training examples
165 | state = get_state(my_sim)
166 | # The action space is a list of tuples of the form [('ht1',0), ('ht1',1), ..., ('ht2', 0), ...] indicating the head
167 | # types and sequence steps for all allowed actions.
168 | action_space = list(chain.from_iterable(my_sim.station_HT_seq.values()))
169 | action_size = len(action_space)
170 | 
171 | while my_sim.env.now < sim_time:
172 |     action = choose_action(my_sim, state, sim_time)
173 | 
174 |     my_sim.run_action(mach, action[0], action[1])
175 |     print('Step Reward:'+ str(my_sim.step_reward))
176 |     # Record the machine, state, allowed actions and reward at the new time step
177 |     next_mach = my_sim.next_machine
178 |     next_state = get_state(my_sim)
179 |     next_allowed_actions = my_sim.allowed_actions
180 |     reward = my_sim.step_reward
181 | 
182 |     print(f"state dimension: {len(state)}")
183 |     print(f"next state dimension: {len(next_state)}")
184 |     print("action space dimension:", action_size)
185 |     # record the information for use again in the next training example
186 |     mach, allowed_actions, state = next_mach, next_allowed_actions, next_state
187 |     print("State:", state)
188 | 
189 | 
190 | # Total wafers produced
191 | print("Total wafers produced:", len(my_sim.cycle_time))
192 | 
193 | 
194 | #Wafers of each head type
195 | print("### Wafers of each head type ###")
196 | print(my_sim.complete_wafer_dict)
197 | 
198 | # Plot the time taken to complete each wafer
199 | plt.plot(my_sim.cycle_time)
200 | plt.xlabel("Wafers")
201 | plt.ylabel("Cycle time")
202 | plt.title("The time taken to complete each wafer")
203 | plt.show()
204 | 
205 | 
206 | 
207 | 
208 | 
209 | 
210 | 
211 | 


--------------------------------------------------------------------------------
/new/critical_Ratio/factory_sim.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import simpy
  3 | from collections import namedtuple, Counter
  4 | from itertools import count, filterfalse
  5 | import random
  6 | import math
  7 | 
  8 | ####################################################
  9 | ########## CREATING THE WAFER CLASS  ###############
 10 | ####################################################
 11 | class wafer_box(object):
 12 |     def __init__(self, sim_inst, number_wafers, HT, wafer_index, lead_dict):
 13 |         self.env = sim_inst.env
 14 |         self.name = f"w{wafer_index}"
 15 |         self.start_time = sim_inst.env.now
 16 |         self.number_wafers = number_wafers
 17 |         self.HT = HT
 18 |         self.seq = 0
 19 |         self.due_time = self.start_time + lead_dict[self.HT]
 20 | 
 21 | ####################################################
 22 | ########## CREATING THE MACHINE CLASS ##############
 23 | ####################################################
 24 | class Machine(object):
 25 |     def __init__(self, sim_inst, name, station, break_mean=None, repair_mean=None):
 26 |         self.env = sim_inst.env
 27 |         self.name = name
 28 |         self.station = station
 29 |         self.available = True
 30 |         self.broken = False
 31 |         self.wafer_being_proc = None
 32 |         self.parts_made = 0
 33 |         self.break_mean = break_mean
 34 | 
 35 |         if break_mean is not None:
 36 |             self.time_to_fail = self.time_to_failure()
 37 | 
 38 |         self.process = None
 39 |         self.repair_mean = repair_mean
 40 | 
 41 |     def time_to_failure(self):
 42 |         """Return time until next failure for a machine."""
 43 |         return random.expovariate(1/self.break_mean)
 44 | 
 45 |     def time_to_repair(self):
 46 |         """Return time until next failure for a machine."""
 47 |         return random.expovariate(1/self.repair_mean)
 48 | 
 49 |     def break_machine(self):
 50 |         """Break the machine after break_time"""
 51 |         assert not self.broken
 52 |         start = self.env.now
 53 |         try:
 54 |             yield self.env.timeout(self.time_to_fail)
 55 |             self.process.interrupt()
 56 |             self.time_to_fail = self.time_to_failure()
 57 |         except:
 58 |             self.time_to_fail -= self.env.now-start
 59 | 
 60 |     def get_proc_time(self, wafer, sim_inst):
 61 |         proc_step = sim_inst.recipes[wafer.HT][wafer.seq]
 62 |         A = proc_step[1]
 63 |         B = proc_step[2]
 64 |         LS = proc_step[3]
 65 |         include_load = proc_step[4]
 66 |         load = proc_step[5]
 67 |         include_unload = proc_step[6]
 68 |         unload = proc_step[7]
 69 |         proc_t = A * wafer.number_wafers + B * math.ceil(wafer.number_wafers/LS)
 70 | 
 71 |         if include_load == -1:
 72 |             proc_t += load
 73 |         if include_unload == -1:
 74 |             proc_t += unload
 75 |         return proc_t
 76 | 
 77 |     def part_process(self, wafer, sim_inst):
 78 |         # This function defines a process where a part of head type HT and sequence step seq is processed on the machine
 79 | 
 80 |         # get the amount of time it takes for the operation to run
 81 |         proc_t = self.get_proc_time(wafer, sim_inst)
 82 | 
 83 |         done_in = proc_t
 84 |         while done_in:
 85 |             try:
 86 |                 if self.break_mean is not None:
 87 |                     break_process = self.env.process(self.break_machine())
 88 |                 start = self.env.now
 89 |                 print("started processing wafer %s on machine %s at %s"%(wafer.name, self.name, start))
 90 |                 # wait until the process is done
 91 |                 yield sim_inst.env.timeout(done_in)
 92 |                 # set the wafer being processed to None
 93 |                 self.wafer_being_proc = None
 94 |                 # set machine to be available to process part
 95 |                 self.available = True
 96 |                 print("Completed the process step of wafer %s on machine %s at %s and sent to "
 97 |                       "next machine."%(wafer.name, self.name, self.env.now))
 98 |                 # set the wafer to be at the next step in the sequence
 99 |                 wafer.seq += 1
100 |                 # if seq is not the last sequence step then find the next station and choose actions for each of the
101 |                 # available machines in that station
102 |                 if wafer.seq < (len(sim_inst.recipes[wafer.HT])):
103 |                     # add the part to the corresponding queue for the next operation in the sequence
104 |                     sim_inst.queue_lists[sim_inst.recipes[wafer.HT][wafer.seq][0]].append(wafer)
105 |                 else:
106 |                     # # add the part to the list of completed parts
107 |                     # sim_inst.queue_lists['complete'].append(wafer)
108 |                     sim_inst.cycle_time.append(self.env.now - wafer.start_time)
109 |                     print("Finished processing wafer %s at %s"%(wafer.name, self.env.now))
110 |                     sim_inst.complete_wafer_dict[wafer.HT]+=1
111 |                     # Update the due_wafers dictionary to indicate that wafers of this head type were completed
112 | 
113 |                     # Find the index of the earliest week for which there are one or more wafers of the given head type
114 |                     # due.
115 |                     week_index = next((i for i, x in enumerate(sim_inst.due_wafers[wafer.HT]) if x), None)
116 | 
117 |                     # Subtract wafer,number_wafers wafers from the corresponding list element 
118 |                     sim_inst.due_wafers[wafer.HT][week_index] -= wafer.number_wafers
119 | 
120 |                     new_wafer = wafer_box(sim_inst, sim_inst.num_wafers, wafer.HT, sim_inst.wafer_index,
121 |                                           sim_inst.lead_dict)
122 |                     sim_inst.queue_lists[sim_inst.recipes[wafer.HT][0][0]].append(new_wafer)
123 |                     lead_time = sim_inst.lead_dict[wafer.HT]
124 |                     total_processing_time = new_wafer.start_time + lead_time
125 |                     week_number = int(total_processing_time / (7 * 24 * 60))
126 |                     sim_inst.due_wafers[wafer.HT][week_number] += sim_inst.num_wafers
127 |                     sim_inst.wafer_index += 1
128 | 
129 | 
130 | 
131 |                 if self.break_mean is not None:
132 |                     break_process.interrupt()
133 |                 done_in = 0
134 | 
135 |             except simpy.Interrupt:
136 |                 self.broken = True
137 |                 done_in -= self.env.now - start
138 |                 yield self.env.timeout(self.time_to_repair())
139 |                 self.broken = False
140 | 
141 |         # Parts completed by this machine
142 |         self.parts_made += 1
143 | 
144 |     def get_allowed_actions(self, sim_inst):
145 |         #find all (HT, seq) tuples with non zero queues at the station of this machine
146 |         return sorted(list(set((wafer.HT, wafer.seq) for wafer in sim_inst.queue_lists[self.station])))
147 | 
148 | ####################################################
149 | ########## CREATING THE FACTORY CLASS ##############
150 | ####################################################
151 | class FactorySim(object):
152 |     #Initialize simpy environment and set the amount of time the simulation will run for
153 |     def __init__(self, sim_time, m_dict, recipes, lead_dict, wafers_per_box, wip_levels, break_mean=None, repair_mean=None):
154 |         self.break_mean = break_mean
155 |         self.repair_mean = repair_mean
156 |         self.order_completed = False
157 |         self.allowed_actions = None
158 |         self.env = simpy.Environment()
159 |         self.Sim_time = sim_time
160 |         self.next_machine = None
161 |         # self.dgr = dgr_dict
162 |         self.lead_dict = lead_dict
163 |         self.num_wafers = wafers_per_box
164 |         self.wip_levels = wip_levels
165 |         # self.machine_failure = False
166 | 
167 |         # Number of future weeks we want to look into for calculating due dates
168 |         self.FUTURE_WEEKS = 100
169 | 
170 |         # Initialize an index that will be used to name each wafer box
171 |         self.wafer_index = 0
172 | 
173 |         # Dictionary where the key is the name of the machine and the value is [station, proc_t]
174 |         self.machine_dict = m_dict
175 | 
176 |         self.machines_list = [Machine(self, mach[0], mach[1], self.break_mean, self.repair_mean) for mach in self.machine_dict.items()]
177 | 
178 |         # create a list of all the station names
179 |         self.stations = list(set(list(self.machine_dict.values())))
180 | 
181 |         # sim_inst.recipes give the sequence of stations that must be processed at for the wafer of that head type to be completed
182 |         self.recipes = recipes
183 | 
184 |         # create a list to store the number of complete wafers for each head type
185 |         self.complete_wafer_dict = {}
186 |         for ht in self.recipes.keys():
187 |             d = {ht:0}
188 |             self.complete_wafer_dict.update(d)
189 | 
190 |         self.number_of_machines = len(self.machine_dict)
191 | 
192 |         # Create a dictionary which holds lists that will contain 
193 |         # the queues of wafer_box objects at each station and that have been completed
194 |         self.queue_lists = {station: [] for station in self.stations}
195 |         # self.queue_lists['complete'] = []
196 | 
197 |         self.order_complete_time = 0
198 |         self.cycle_time = []
199 |         self.step_reward = 0
200 | 
201 |         # Create a dictionary which holds the number of wafers due in a given week of each head type
202 |         self.due_wafers = {}
203 |         for ht in self.recipes.keys():
204 |             list_of_wafers_due_each_week = [0]*self.FUTURE_WEEKS
205 |             d = {ht:list_of_wafers_due_each_week}
206 |             self.due_wafers.update(d)
207 | 
208 |         # Creates a dictionary where the key is the toolset name and the value is a list of tuples of all head type and
209 |         # sequence step combinations which may be processed at that station
210 |         self.station_HT_seq = {station: [] for station in self.stations}
211 | 
212 |         for HT in self.recipes.keys():
213 |             for seq, step in enumerate(self.recipes[HT]):
214 |                 self.station_HT_seq[step[0]].append((HT, seq))
215 | 
216 | 
217 |     def start(self):
218 |         for ht in self.wip_levels.keys():
219 |             for i in range(self.wip_levels[ht]):
220 |                 new_wafer = wafer_box(self, self.num_wafers, ht, self.wafer_index, self.lead_dict)
221 |                 self.queue_lists[self.recipes[ht][0][0]].append(new_wafer)
222 |                 lead_time = self.lead_dict[ht]
223 |                 total_processing_time = new_wafer.start_time + lead_time
224 |                 week_number = int(total_processing_time / (7*24*60))
225 |                 self.due_wafers[ht][week_number] += self.num_wafers
226 |                 self.wafer_index += 1
227 | 
228 |         for machine in self.machines_list:
229 |             if machine.available:
230 |                 allowed_actions = machine.get_allowed_actions(self)
231 |                 if len(allowed_actions) > 0:
232 |                     self.next_machine = machine
233 |                     self.allowed_actions = allowed_actions
234 |                     return
235 |         while True:
236 |             self.env.step()
237 |             for machine in self.machines_list:
238 |                 if machine.available:
239 |                     allowed_actions = machine.get_allowed_actions(self)
240 |                     if len(allowed_actions) > 0:
241 |                         self.next_machine = machine
242 |                         self.allowed_actions = allowed_actions
243 |                         return
244 | 
245 | 
246 |     def run_action(self, machine, ht, seq):
247 |         self.order_completed = False
248 |         self.step_reward = 0
249 |         # Set the machine to be unavailable to process parts because it is now busy
250 |         assert machine.available
251 |         machine.available = False
252 |         # Find the wafer that has that HT and seq
253 |         wafer_choice = next(wafer for wafer in self.queue_lists[machine.station] if wafer.HT == ht and wafer.seq == seq)
254 |         # set the wafer being processed on this machine to wafer_choice
255 |         machine.wafer_being_proc = wafer_choice
256 |         # Remove the part from it's queue
257 |         self.queue_lists[machine.station].remove(wafer_choice)
258 |         # Begin processing the part on the machine
259 |         machine.process = self.env.process(machine.part_process(wafer_choice, self))
260 | 
261 |         for machine in self.machines_list:
262 |             if machine.available:
263 |                 allowed_actions = machine.get_allowed_actions(self)
264 |                 if len(allowed_actions) > 0:
265 |                     self.next_machine = machine
266 |                     self.allowed_actions = allowed_actions
267 |                     return
268 |         while True:
269 |             before_time = self.env.now
270 |             self.env.step()
271 |             time_change = self.env.now-before_time
272 |             current_week = math.ceil(self.env.now / (7 * 24 * 60))  # Calculating the current week
273 |             for key, value in self.due_wafers.items():
274 |                 buffer_list = []  # This list stores value of previous unfinished wafers count
275 |                 buffer_list.append(sum(value[:current_week]))
276 |                 self.step_reward -= time_change*sum(buffer_list)
277 | 
278 |             for machine in self.machines_list:
279 |                 if machine.available:
280 |                     allowed_actions = machine.get_allowed_actions(self)
281 |                     if len(allowed_actions) > 0:
282 |                         self.next_machine = machine
283 |                         self.allowed_actions = allowed_actions
284 |                         return
285 | 
286 | 
287 | 
288 | 
289 | 
290 | 
291 | 
292 | 


--------------------------------------------------------------------------------