├── .gitignore
├── 1st_on.npy
├── 2nd_on.npy
├── both_on.npy
├── both_off.npy
├── mip-solver.xlsx
├── real-case parameters-experimental-use.xlsx
├── MDP_paper_20220220_AppliedEnergyERevise.docx
├── requirements.txt
├── projectionSimplex.py
├── README.md
├── experiments_comparison.py
├── Simple_Manufacturing_System_routine_strategy.py
├── Simple_Manufacturing_System-Pure_Q-Learning.py
├── reinforcement_learning.py
└── microgrid_manufacturing_system.py


/.gitignore:
--------------------------------------------------------------------------------
1 | venv/
2 | *.txt
3 | !requirements.txt
4 | *.png
5 | __pycache__
6 | 


--------------------------------------------------------------------------------
/1st_on.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huwenqing0606/RL-manufacturing/HEAD/1st_on.npy


--------------------------------------------------------------------------------
/2nd_on.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huwenqing0606/RL-manufacturing/HEAD/2nd_on.npy


--------------------------------------------------------------------------------
/both_on.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huwenqing0606/RL-manufacturing/HEAD/both_on.npy


--------------------------------------------------------------------------------
/both_off.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huwenqing0606/RL-manufacturing/HEAD/both_off.npy


--------------------------------------------------------------------------------
/mip-solver.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huwenqing0606/RL-manufacturing/HEAD/mip-solver.xlsx


--------------------------------------------------------------------------------
/real-case parameters-experimental-use.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huwenqing0606/RL-manufacturing/HEAD/real-case parameters-experimental-use.xlsx


--------------------------------------------------------------------------------
/MDP_paper_20220220_AppliedEnergyERevise.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huwenqing0606/RL-manufacturing/HEAD/MDP_paper_20220220_AppliedEnergyERevise.docx


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | pandas==1.1.5
 2 | matplotlib==3.3.4
 3 | scikit-learn==1.0.2
 4 | retry==0.9.2
 5 | tables==3.7.0
 6 | keras==2.3.1
 7 | tensorflow==1.14.0
 8 | h5py<3.0.0
 9 | python-decouple==3.6
10 | click==8.0.4
11 | statsmodels==0.13.2
12 | protobuf==3.20.*
13 | mip


--------------------------------------------------------------------------------
/projectionSimplex.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Fri Jan  3 10:38:06 2020
 4 | 
 5 | @author: huwenqing
 6 | 
 7 | Title: Projection operator onto the 2-dim simplex {(\lambda_1, \lambda_2): \lambda_1\geq 0, \lambda_2\geq 0, 0\leq \lambda_1+\lambda_2\leq 1}
 8 | """
 9 | import numpy as np
10 | import operator
11 | from functools import reduce
12 | import matplotlib.pyplot as plt
13 | 
14 | def projectionSimplex(theta_0):
15 |     theta_proj=theta_0
16 |     if theta_0[1-1]<0 and theta_0[2-1]<0:
17 |         theta_proj=[0, 0]
18 |     elif theta_0[1-1]>=0 and theta_0[1-1]<=1 and theta_0[2-1]<0:
19 |         theta_proj=[theta_0[1-1], 0]
20 |     elif theta_0[1-1]>1 and theta_0[1-1]-theta_0[2-1]>1:
21 |         theta_proj=[1, 0]
22 |     elif theta_0[1-1]>0 and theta_0[1-1]+theta_0[2-1]>=1 and theta_0[2-1]>0 and theta_0[1-1]-theta_0[2-1]>=-1 and theta_0[1-1]-theta_0[2-1]<=1: 
23 |         theta_proj=[(1+theta_0[1-1]-theta_0[2-1])/2, (1-theta_0[1-1]+theta_0[2-1])/2]
24 |     elif theta_0[2-1]>1 and theta_0[1-1]-theta_0[2-1]<-1:
25 |         theta_proj=[0, 1]
26 |     elif theta_0[2-1]>=0 and theta_0[2-1]<=1 and theta_0[1-1]<0:
27 |         theta_proj=[0, theta_0[2-1]]
28 |     else:
29 |         theta_proj=theta_0
30 |     return theta_proj
31 | 
32 | 
33 | def projection(theta):
34 |     projection=[projectionSimplex([theta[1-1], theta[2-1]]), projectionSimplex([theta[3-1], theta[4-1]]), projectionSimplex([theta[5-1], theta[6-1]])]
35 |     return reduce(operator.add, projection)
36 | 
37 | 
38 | if __name__=="__main__":
39 |  
40 |     x = [[0, 0], [0, 1], [1, 0]] 
41 |     y = [[0, 1], [1, 0], [0, 0]]
42 | 
43 |     for i in range(len(x)): 
44 |         plt.plot(x[i], y[i], color='g')
45 | 
46 |     theta=np.random.uniform(-1,1,size=6)
47 |     print(theta)
48 |     print(projection(theta))
49 |     x = [[theta[0], projection(theta)[0]], [theta[2], projection(theta)[2]], [theta[4], projection(theta)[4]]] 
50 |     y = [[theta[1], projection(theta)[1]], [theta[3], projection(theta)[3]], [theta[5], projection(theta)[5]]]
51 | 
52 |     for i in range(len(x)): 
53 |         plt.plot(x[i], y[i], color='r')
54 |         plt.scatter(x[i], y[i], color='b')
55 | 
56 |     plt.xlim(-1,2)
57 |     plt.ylim(-1,2)
58 |     plt.show()
59 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # RL-manufacturing
 2 | Source code for the paper 
 3 | 
 4 | Joint Control of Manufacturing and Onsite Microgrid System via Novel Neural-Network Integrated Reinforcement Learning Algorithms
 5 | 
 6 | by Yang, J., Sun, Z., Hu, W. and Steimeister, L.
 7 | 
 8 | Accepted at <i>Applied Energy</i>.
 9 | 
10 | <b>The paper with Supplementary Materials is available here as the file MDP_paper_20220220_AppliedEnergyERevise.docx</b>
11 | 
12 | The run files are 
13 | 
14 | 1. <b>experiments_comparison.py</b>
15 | 
16 |   compares the efficiency of optimal solution selected by reinforcement learning, by mixed-integer programming routine          strategy and by benchmark random policy.
17 |   
18 | 2. <b>mip_plot.ipynb, plot_average_experiments.ipynb</b>
19 | 
20 |   plot the comparison of total energy cost and total production throughput in units for the optimal policy and mixed-integer programming policy; also plot the average over 3 times of these experiments.
21 | 
22 | 
23 | The main files are
24 | 
25 | 3. <b>microgrid_manufacturing_system.py</b>
26 | 
27 |   simulates the joint operation of microgrid and manufacturing system.
28 | 
29 | 4. <b>reinforcement_learning.py</b>
30 | 
31 |   reinforcement learning via two layer fully connected neural network. 
32 | 
33 | 5. <b>Simple_Manufacturing_System-Pure_Q-Learning.py, 1st_on.npy, 2nd_on.npy, both_off.npy, both_on.npy</b>
34 | 
35 |   learn the microgrid-manufacturing system using pure Q-learning. This is to compare with our new method.
36 | 
37 | 6. <b>Simple_Manufacturing_System_routine_strategy.py</b>
38 | 
39 |   learn the microgrid-manufacturing system using routine strategy via linear mixed-integer programming.
40 |   
41 | 7. <b>mip-solver.xlsx</b>
42 | 
43 |   solving the mixed-integer programming total cumulative energy cost and total production units given the mixed-integer programming solution.
44 | 
45 | 
46 | The auxiliary files are
47 | 
48 | 8. <b>projectionSimplex.py</b>
49 | 
50 |   proximal operator to the simplex D^c={(x_1, x_2), 0\leq x_i\leq 1, x_1+x_2\leq 1}.
51 | 
52 | 9. <b>SolarIrradiance.csv, WindSpeed.csv, rate_consumption_charge.csv</b>
53 | 
54 |   1 year data in 8640 hours (360 days * 24 hours) for solar irradiance, wind speed and rate of consumption charge.
55 | 
56 | 10. <b>real-case parameters-experimental-use.xlsx</b>
57 |   
58 |   the scaled real-case parameters for the manufacturing system and the microgrid used in the experiment.
59 | 


--------------------------------------------------------------------------------
/experiments_comparison.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Sat Mar 21 15:18:07 2020
  5 | 
  6 | @author: Wenqing Hu (Missouri S&T)
  7 | 
  8 | Title: Experiment for the paper <<Novel Reinforcement Learning Algorithms applied to Joint Control 
  9 |                                     of Manufacturing andOnsite Microgrid System>>
 10 |                                     
 11 | #################################### MAIN FILE FOR RUNNING ALL TESTS #####################################
 12 | 
 13 | Experiment consists of 
 14 | 1. Comparison of the total cost, total throughput and total energy demand for the 
 15 | optimal policy selected by reinforcement learning and the random policy; 
 16 | 2. Comparison of the total cost, total throughput and total energy demand for the 
 17 | optimal policy selected by reinforcement learning and the routine straregy via mixed-integer programming.
 18 | """
 19 | 
 20 | 
 21 | from microgrid_manufacturing_system import SystemInitialize
 22 | from reinforcement_learning import Reinforcement_Learning_Training, Reinforcement_Learning_Testing, Benchmark_RandomAction_Testing
 23 | from Simple_Manufacturing_System_routine_strategy import RoutineStrategy_Testing
 24 | 
 25 | 
 26 | import numpy as np
 27 | import matplotlib.pyplot as plt
 28 | import time
 29 | 
 30 | 
 31 | #set the number of machines
 32 | number_machines=5
 33 | #set the unit reward of production
 34 | unit_reward_production=10000/10000
 35 | #the unit reward for each unit of production (10^4$/unit produced), i.e. the r^p, this applies to the end of the machine sequence#
 36 | 
 37 | #the initial learning rates for the theta and omega iterations#
 38 | lr_theta_initial=0.003
 39 | lr_omega_initial=0.0003
 40 | 
 41 | #number of training and testing iterations#
 42 | training_number_iteration=5000
 43 | testing_number_iteration=100
 44 | 
 45 | #set the initial machine states, machine control actions and buffer states
 46 | initial_machine_states=["Opr" for _ in range(number_machines)]
 47 | initial_machine_actions=["K" for _ in range(number_machines)]
 48 | initial_buffer_states=[100 for _ in range(number_machines-1)]
 49 |     
 50 | #initialize the system
 51 | System=SystemInitialize(initial_machine_states, initial_machine_actions, initial_buffer_states)
 52 |     
 53 | #randomly generate an initial theta and plot the bounday of the simplex where theta moves#
 54 | r=np.random.uniform(0,1,size=6)
 55 |     
 56 | #initialize the theta variable#
 57 | theta=[r[0]*r[1], r[0]*(1-r[1]), r[2]*r[3], r[2]*(1-r[3]), r[4]*r[5], r[4]*(1-r[5])] 
 58 | #record the initial theta applied before training
 59 | thetainit=theta
 60 |     
 61 | x = [[0, 0], [0, 1], [1, 0]] 
 62 | y = [[0, 1], [1, 0], [0, 0]]
 63 | plt.figure(figsize = (14,10))
 64 | for i in range(len(x)): 
 65 |     plt.plot(x[i], y[i], color='g')
 66 |     
 67 | RL_start = time.process_time()    
 68 | 
 69 | theta, omega, my_critic = Reinforcement_Learning_Training(System, 
 70 |                                                           thetainit, 
 71 |                                                           lr_theta_initial, 
 72 |                                                           lr_omega_initial, 
 73 |                                                           training_number_iteration)
 74 |     
 75 | RL_end = time.process_time()
 76 |     
 77 | #with the optimal theta and optimal omega at hand, run the system at a certain time horizon#
 78 | #output the optimal theta and optimal omega#
 79 | thetaoptimal=theta
 80 | omegaoptimal=omega  
 81 | my_critic_optimal=my_critic
 82 | 
 83 | #initialize the system
 84 | System=SystemInitialize(initial_machine_states, initial_machine_actions, initial_buffer_states)
 85 | 
 86 | totalcostlist_optimal, totalthroughputlist_optimal, totalenergydemandlist_optimal, RL_target_output = Reinforcement_Learning_Testing(System, 
 87 |                                                                                                                                      thetainit, 
 88 |                                                                                                                                      thetaoptimal, 
 89 |                                                                                                                                      omegaoptimal, 
 90 |                                                                                                                                      my_critic_optimal, 
 91 |                                                                                                                                      testing_number_iteration, 
 92 |                                                                                                                                      unit_reward_production
 93 |                                                                                                                                      )
 94 |     
 95 | 
 96 | 
 97 | 
 98 | #As benchmark, with initial theta and randomly simulated actions, run the system at a certain time horizon#
 99 |     
100 | #initialize the system
101 | System=SystemInitialize(initial_machine_states, initial_machine_actions, initial_buffer_states)
102 | 
103 | totalcostlist_benchmark, totalthroughputlist_benchmark, totalenergydemandlist_benchmark, random_target_output = Benchmark_RandomAction_Testing(System, 
104 |                                                                                                                                                thetainit, 
105 |                                                                                                                                                testing_number_iteration, 
106 |                                                                                                                                                unit_reward_production 
107 |                                                                                                                                                )
108 | 
109 | 
110 | 
111 | #if target output is not enough, simply quit, else, continue with comparison and further experiments
112 | if RL_target_output<0*random_target_output:
113 |     print("Not enough production! Test Ended Without Plotting the Comparison...")
114 | else:    
115 |     #plot and compare the total cost, the total throughput and the total energy demand for optimal control and random control (benchmark)#
116 |     #plot the total cost#
117 |     plt.figure(figsize = (14,10))
118 |     plt.plot([value*10000 for value in totalcostlist_optimal], '-', color='r')
119 |     plt.plot([value*10000 for value in totalcostlist_benchmark], '--', color='b')
120 |     plt.xlabel('iteration')
121 |     plt.ylabel('total cost ($)')
122 |     plt.title('Total cost under optimal policy (red, solid) and benchmark random policy (blue, dashed)')
123 |     plt.savefig('totalcost.png')
124 |     plt.show()  
125 | 
126 |     #plot the total throughput, in dollar amount#
127 |     plt.figure(figsize = (14,10))
128 |     plt.plot([value*10000 for value in totalthroughputlist_optimal], '-', color='r')
129 |     plt.plot([value*10000 for value in totalthroughputlist_benchmark], '--', color='b')
130 |     plt.xlabel('iteration')
131 |     plt.ylabel('total throughput ($)')
132 |     plt.title('Total throughput under optimal policy (red, solid) and benchmark random policy (blue, dashed)')
133 |     plt.savefig('totalthroughput.png')
134 |     plt.show()  
135 |     
136 |     #plot the total throughput, in production units#
137 |     plt.figure(figsize = (14,10))
138 |     plt.plot([value/unit_reward_production for value in totalthroughputlist_optimal], '-', color='r')
139 |     plt.plot([value/unit_reward_production for value in totalthroughputlist_benchmark], '--', color='b')
140 |     plt.xlabel('iteration')
141 |     plt.ylabel('total throughput (production unit)')
142 |     plt.title('Total throughput (production unit) under optimal policy (red, solid) and benchmark random policy (blue, dashed)')
143 |     plt.savefig('totalthroughput_unit.png')
144 |     plt.show()  
145 | 
146 |     #plot the total energy demand#
147 |     plt.figure(figsize = (14,10))
148 |     plt.plot([value*10000 for value in totalenergydemandlist_optimal], '-', color='r')
149 |     plt.plot([value*10000 for value in totalenergydemandlist_benchmark], '--', color='b')
150 |     plt.xlabel('iteration')
151 |     plt.ylabel('total energy cost ($)')
152 |     plt.title('Total energy cost under optimal policy (red, solid) and benchmark random policy (blue, dashed)')
153 |     plt.savefig('totalenergycost.png')
154 |     plt.show()  
155 |     
156 |     
157 |     """
158 |     The 2nd Comparision Test: Comparison of the total cost, total throughput and total energy demand for the 
159 |         optimal policy selected by reinforcement learning and the routine strategy selected by the mixed-integer programming;        
160 |     """
161 |     target_output=int(RL_target_output)
162 | 
163 |     routine_start = time.process_time()
164 |     
165 |     RoutineStrategy_Testing(testing_number_iteration, target_output)
166 |     
167 |     routine_end = time.process_time()
168 |     
169 |     print("Reinforcement Learning Training Time =", RL_end-RL_start, " seconds \n")
170 |     print("Mixed-Integer Programming Training Time =", routine_end-routine_start, " seconds")
171 | 
172 | 


--------------------------------------------------------------------------------
/Simple_Manufacturing_System_routine_strategy.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Sat Mar  7 21:16:27 2020
  4 | """
  5 | 
  6 | import numpy as np
  7 | 
  8 | """
  9 | *************************************Linear and Mixed-Integer Programming*************************************
 10 | @author: Louis Steimeister (Missouri S&T)
 11 | """
 12 | from scipy import optimize as opt
 13 | import scipy as sp
 14 | from matplotlib import pyplot as plt
 15 | import mip
 16 | 
 17 | dt = 1
 18 | num_machines = 5
 19 | time_horizon = 100
 20 | capacity_of_buffer = [1000]*(num_machines-1)
 21 | #the buffermax
 22 | rated_power_of_machine = [99.0357/1000, 87.0517/1000, 91.7212/1000, 139.3991/1000, 102.8577/1000]
 23 | #rated power of machine measured in MegaWatt = 1000 kW
 24 | production_rate_of_machine = [1]*num_machines
 25 | 
 26 | 
 27 | def Mixed_Integer_Program(target_output):
 28 |     ###########################################
 29 |     # Set up Condition
 30 |     # Bx <= C
 31 |     ###########################################
 32 |     # Buffer Condition
 33 |     DELTA       =  (sp.sparse.hstack([sp.sparse.diags(production_rate_of_machine[0:(num_machines-1)]),
 34 |                                       sp.sparse.csr_matrix((num_machines-1,1))])
 35 |                     -sp.sparse.hstack([sp.sparse.csr_matrix((num_machines-1,1)),
 36 |                                        sp.sparse.diags(production_rate_of_machine[1:num_machines])
 37 |                                        ]))
 38 |     #print("Delta shape: ", DELTA.shape)
 39 |     ZERO        =  sp.sparse.csr_matrix(DELTA.shape)
 40 |     B1_descr    =  [["D" if i<=j else "Z" for i in range(num_machines)] for j in range(num_machines)]
 41 |     B1_mat_vec  =  [[DELTA if i<=j else ZERO for i in range(time_horizon)] for j in range(time_horizon)]
 42 |     B1          =  sp.sparse.bmat(B1_mat_vec)
 43 |     B2          = -B1
 44 |     C1          =  np.array([capacity_of_buffer for i in range(time_horizon)]).flatten()
 45 |     C2          =  np.zeros(B2.shape[0])
 46 |     #print("BufferMat:", B1.todense())
 47 |     #print("BufferMat >= 0:", B2.todense())
 48 |     #print("1 shape: ", B1.shape, C1.shape)
 49 |     #print("2 shape: ", B2.shape, C2.shape)
 50 |     del B1_mat_vec
 51 |     #print(B1_descr)
 52 |     ###########################################
 53 |     # Production Condition
 54 |     #B3          = sp.sparse.eye(num_machines*time_horizon)
 55 |     #B4          = -B3
 56 |     #C3          = np.ones(num_machines*time_horizon)
 57 |     #C4          = np.zeros(num_machines*time_horizon)
 58 |     ###########################################
 59 |     # Minimal Production Condition
 60 |     B5  = -np.concatenate([np.array([0]*(num_machines-1)+[1]) for _ in range(time_horizon)]).reshape((1,num_machines*time_horizon))
 61 |     C5          = -np.array([target_output])
 62 |     ###########################################
 63 |     # Finalize Conditions
 64 |     #print([B1,B2,B3,B4,B5])
 65 |     #B           = sp.sparse.vstack([B1,B2,B3,B4,B5])
 66 |     #C           = np.concatenate([C1,C2,C3,C4,C5])
 67 |     B           = sp.sparse.vstack([B1,B2,B5])
 68 |     C           = np.concatenate([C1,C2,C5])
 69 |     #print(B, "dim: ", B.shape)
 70 |     #print(C, "dim: ", C.shape)
 71 |     
 72 |     ###########################################
 73 |     # Linear programming
 74 |     # Formulate Minimization
 75 |     # min! Ax
 76 |     
 77 |     A           = np.transpose(np.array(rated_power_of_machine*time_horizon))*dt
 78 |     Bounds = np.hstack([np.zeros((num_machines*time_horizon,1)),
 79 |                         np.ones((num_machines*time_horizon,1))])                         
 80 |     res         = opt.linprog(c=A,A_ub=B,b_ub = C,bounds=Bounds,options = {"maxiter": 100000, "rr": False})                
 81 |     prod_mat    = np.round(np.array(res.x).reshape((num_machines,time_horizon),order = "F"),decimals=5)
 82 |     #print(prod_mat)
 83 |     #print("output is:",np.round(-B5 * res.x,5))
 84 |     #print("Buffer is:",np.round(B1 * res.x ,5))
 85 |     
 86 |                 
 87 | 
 88 |     #fig, ax = plt.subplots(num_machines,1, figsize=(10,20))
 89 |     #for k, a in enumerate(ax):
 90 |     #  a.plot(prod_mat[k,:])
 91 |     #  a.set_ylim(-.01,1)
 92 |     #  a.axhline(0)
 93 |     #  # plt.plot()
 94 |     
 95 |     #Mixed integer programming
 96 |     
 97 |     # set up optimization model
 98 |     m = mip.Model()
 99 |     # create decision/optimization variables
100 |     # x = "Production" in {0,1}
101 |     x = [m.add_var(var_type=mip.BINARY) for _ in range(time_horizon*num_machines)]
102 |     #print(x)
103 |     
104 |     #define matrix multiplication which outputs linear combinations of the optimization variable
105 |     def mipMatMult(Mat,Vec):
106 |         if isinstance(Mat,sp.sparse.coo_matrix):
107 |             my_MAT = Mat.tocsr()
108 |         else: my_MAT = Mat
109 |         out=[]
110 |         for i in range(Mat.shape[0]):
111 |             temp = mip.entities.LinExpr()
112 |             for j in range(Mat.shape[1]):
113 |                 if my_MAT[i,j] != 0:
114 |                     temp += my_MAT[i,j]*x[j]
115 |             out.append(temp)
116 |         return out
117 | 
118 | 
119 |     # add constraints to the optimization model
120 |     ineq_constraint_lst = mipMatMult(B,x)
121 |     for k in range(B.shape[0]):
122 |         m += ineq_constraint_lst[k]<=C[k]
123 | 
124 |     # define objective function
125 |     objective = mip.entities.LinExpr()
126 |     for k in range(len(x)):
127 |         if C[k]!=0:
128 |             objective += C[k]*x[k]
129 |     m.objective= mip.minimize(objective)
130 | 
131 |     # run optimization
132 |     m.integer_tol = .0001
133 |     m.start = [(x[k], 1.0) for k in range(len(x))]
134 |     status = m.optimize()
135 |     if status == mip.OptimizationStatus.OPTIMAL:
136 |         print('optimal solution cost {} found'.format(m.objective_value))
137 |     elif status == mip.OptimizationStatus.FEASIBLE:
138 |         print('sol.cost {} found, best possible: {}'.format(m.objective_value, m.objective_bound))
139 |     elif status == mip.OptimizationStatus.NO_SOLUTION_FOUND:
140 |         print('no feasible solution found, lower bound is: {}'.format(m.objective_bound))
141 |     if status == mip.OptimizationStatus.OPTIMAL or status == mip.OptimizationStatus.FEASIBLE:
142 |         print('solution:')
143 |         for v in m.vars:
144 |             if abs(v.x) > 1e-6:# only printing non-zeros
145 |                 print('{} : {}'.format(v.name, v.x))
146 | 
147 | 
148 | 
149 |     m.num_solutions
150 | 
151 |     prod_vec    = np.array([v.x for v in m.vars])
152 |     prod_mat    = np.array(prod_vec).reshape((num_machines,time_horizon),order = "F")
153 | 
154 |     # compute output
155 |     #print("Output")
156 |     #print(B5*prod_vec)
157 | 
158 |     # compute Buffers
159 |     Buffer_vec = B1*prod_vec+1
160 |     Buffer_mat = np.array(Buffer_vec).reshape((num_machines-1,time_horizon),order = "F")
161 |     #print("Buffer")
162 |     #print(Buffer_mat)
163 | 
164 | 
165 |     # plots
166 |     #fig, ax = plt.subplots(num_machines,2, figsize=(10,10),facecolor=(1,1,1))
167 |     #for k, a in enumerate(ax):
168 |     #  # plot production
169 |     #  a[0].step(y=prod_mat[k,:],x =range(time_horizon))
170 |     #  a[0].set_ylim(-.01,1.01)
171 |     #  a[0].set_xticks(range(time_horizon))
172 |     #  a[0].set_title(f"Prduction of Machine {k}")
173 |     #  # plot Buffers
174 |     #  if k == num_machines-1: break
175 |     #  a[1].step(y=Buffer_mat[k,:],x =range(time_horizon))
176 |     #  a[1].set_ylim(1-.01,np.max(capacity_of_buffer)+.01)
177 |     #  a[1].set_xticks(range(time_horizon))
178 |     #  a[1].set_title(f"Buffer of Machine {k}")
179 |   
180 |     #range(time_horizon)
181 | 
182 |     #print("Optimal Production Matrix is ", prod_mat)
183 |     return prod_mat
184 | 
185 | 
186 | 
187 | 
188 | 
189 | """
190 | Testing for the Routine Strategy Selected by Mixed Integer Programming at Given Horizon
191 | """
192 | def RoutineStrategy_Testing(number_iteration, #the number of testing iterations
193 |                             target_output     #the target output
194 |                             ):
195 |     
196 |     #open and output the results to the file routine_output.txt
197 |     rtoutput = open('routine_output.txt', 'w')
198 | 
199 |     #Calculate and output the total cost, total throughput and total energy demand for mixed-integer programming with target output as the one given by the optimal strategy
200 |     print("\n************************* Mixed Integer Programming with given Target Output *************************", file=rtoutput)
201 |     print("***Run the system on routine policy by mixed-integer programming at a time horizon=", number_iteration,"***", file=rtoutput)
202 |     target_output=int(target_output)
203 |     print("Target Output =", target_output, file=rtoutput)
204 |     routine_sol=Mixed_Integer_Program(target_output)
205 |     print("Optimal solution from mixed-integer programming is given by \n", routine_sol.T, file=rtoutput)
206 | 
207 |     #close and save the results to the file
208 |     rtoutput.close()
209 | 
210 |     return 0
211 | 
212 | 
213 | 
214 | """
215 | ######################## MAIN TESTING FILE ##############################
216 | ######################## FOR DEBUGGING ONLY #############################
217 | 
218 | """
219 | 
220 | if __name__=="__main__":
221 |     #set the optimal production matrx which is a 0-1 matrix, rows=number_machines, columns=testing_number_iteration
222 |     mipsol = open('sample_mixed_integer_programming_solution.txt', 'w')
223 |     print("******************** Optimal Strategy for Simple Mixed-Integer Programming with Target Output from 0-100 ********************\n", file=mipsol)
224 |     for i in [73]:
225 |         print("\n------------- Target="+str(i)+" -------------")
226 |         print("\n------------- Target="+str(i)+" -------------", file=mipsol)
227 |         print("\nTarget=", i, file=mipsol)
228 |         x=Mixed_Integer_Program(i)
229 |         print("\noptimal solution is \n", x.T, file=mipsol)
230 |     mipsol.close()        


--------------------------------------------------------------------------------
/Simple_Manufacturing_System-Pure_Q-Learning.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | code for the manufactoring system
  4 | Author: Yunzhao Zhang and Wenqing Hu
  5 | 
  6 | """
  7 | 
  8 | import numpy as np
  9 | 
 10 | class Machine(object):
 11 |     debugMode = True
 12 |     def __init__(self, 
 13 |                  fail_rate=0.0333, 
 14 |                  mttr=10, 
 15 |                  prod_rate=1, 
 16 |                  input_rate=1, 
 17 |                  pow_on=1000, 
 18 |                  pow_idle=600, 
 19 |                  pow_off=0, 
 20 |                  switch=0, 
 21 |                  states="OFF", 
 22 |                  t_repair=0, 
 23 |                  source=None, 
 24 |                  output=None, name = "Machine"):
 25 |         """
 26 |         fail_rate: probability that machine fails
 27 |         mttr: mean time for the machine to be repared
 28 |         prod_rate: production rate at each time interval
 29 |         pow_on: power consumption when state is ON 
 30 |         pow_idle: power consumption when state is IDLE
 31 |         pow_off: power consumtion when state is OFF
 32 |         states: machine states (ON, IDLE, OFF)
 33 |         source: from where it takes input, if none then infinite resource 
 34 |         output: to where it outputs products, if none then infinite storage capacity
 35 |         """
 36 | #       print("A machine is built!")
 37 |         self.fail_rate = fail_rate
 38 |         self.mttr = mttr
 39 |         self.prod_rate = prod_rate
 40 |         self.pow_on = pow_on
 41 |         self.pow_idle = pow_idle 
 42 |         self.pow_off = pow_off 
 43 |         self.switch = switch
 44 |         self.states = states
 45 |         self.source = source
 46 |         self.output = output
 47 |         self.t_repair = t_repair
 48 |         self.input_rate = input_rate
 49 |         self.name = name
 50 |     
 51 |     def set_debug(self, debugMode=True):
 52 |         self.debugMode = debugMode
 53 |         
 54 |     def on_action(self, switch):
 55 |         """
 56 |         switch: turn the machine on or off
 57 |         """
 58 |         self.switch = switch
 59 |         if self.debugMode:
 60 |             print()
 61 |             print("Machine",self.name, "switch status: ", self.switch)
 62 |             print()
 63 |     def on_cycle(self, cycle_time = 2):
 64 |         """
 65 |         cycle_time: time takes to run one cycle 
 66 |         """
 67 |         source = self.source
 68 |         output = self.output
 69 |         consumed = 0
 70 |         produced = 0
 71 |         # check for switch status, if off then state is OFF, else check for buffer
 72 |         if self.switch == 0 or self.t_repair>0:
 73 |             self.states = "OFF"
 74 |         else:
 75 |             if source == None and output == None:
 76 |                 self.states = "ON"
 77 |             elif source and output:
 78 |                 if source.hasEnough(self.input_rate) and not output.willFull(self.prod_rate):
 79 |                     self.states = "ON"
 80 |                 else:
 81 | #                     print("case 1")
 82 |                     self.states = "IDLE"
 83 |             elif source and output == None:
 84 |                 if source.hasEnough(self.input_rate):
 85 |                     self.states = "ON"
 86 |                 else:
 87 | #                     print("case 2")
 88 |                     self.states = "IDLE"
 89 |             elif source == None and output:
 90 |                 if not output.willFull(self.prod_rate):
 91 |                     self.states = "ON"
 92 |                 else:
 93 | #                     print("case 3")
 94 |                     self.states = "IDLE"
 95 |                     
 96 |         # check if it is being repaired
 97 |         if self.t_repair > 0:
 98 |             self.t_repair -= cycle_time
 99 |         else:
100 |             # determine if the machine will fail 
101 |             randn = np.random.uniform(0,1)
102 |             if randn < self.fail_rate:
103 |                 self.t_repair = int((self.mttr/4)*np.random.randn()+self.mttr)
104 |                 self.states = "OFF"
105 |                 if self.debugMode:
106 |                     print("OH NO!!! The machine fails... Need repair time: ", self.t_repair)
107 |             elif self.states == "ON":
108 |                 if source:
109 |                     consumed = source.consume(self.input_rate)
110 |                 if output:
111 |                     produced = output.store(self.prod_rate)
112 |                 
113 |                 
114 | 
115 |         if self.debugMode:
116 |             self.print_status(produced=produced, consumed=consumed)
117 |             
118 |     def print_status(self, produced=0, consumed=0):
119 |         print()
120 |         print("Machine",self.name, " is currently", self.states)            
121 |         print("Remaining repairing time: ", max(0,self.t_repair))
122 |         print("Resource consumed: ", consumed)
123 |         print("Product produced: ", produced)
124 |         print()
125 | 
126 | class Buffer(object):
127 |     def __init__(self, initial_amt = 0, capacity = 20):
128 |         self.amount = initial_amt
129 |         self.capacity = capacity
130 |     def hasEnough(self, taken):
131 |         if self.capacity == -1:
132 |             return True
133 |         return self.amount >= taken
134 |     def willFull(self, gotten):
135 |         if self.capacity == -1:
136 |             return False
137 |         return self.amount+gotten > self.capacity
138 |     def consume(self, taken):
139 |         self.amount -= taken
140 |         return taken
141 |     def store(self, gotten):
142 |         self.amount += gotten
143 |         return gotten
144 | 
145 | import pandas as pd
146 | 
147 | class ManufacturingSystem(object):
148 |     def __init__(self, cycle_time = 2, num_machines = 2, state_time = 60, solar_irr=None, wind_sp=None, 
149 |                  battery_cap=800, generator_cap=400, period = 8640, e_c=0.9, e_dc=0.9, debugMode=True):
150 |         self.time = 0
151 |         self.cycle_time = cycle_time
152 |         self.num_machines = num_machines
153 |         self.buf0 = Buffer(capacity=-1)
154 |         self.buf1 = Buffer(initial_amt=10, capacity=20)
155 |         self.buf2 = Buffer(capacity=-1)
156 |         self.mac0 = Machine(source=self.buf0, output=self.buf1, name="Machine 1")
157 |         self.mac1 = Machine(source=self.buf1, output=self.buf2, name="Machine 2")
158 |         self.machines = [self.mac0, self.mac1]
159 |         self.buffers = [self.buf1, self.buf2]
160 |         self.state_time = state_time
161 |         self.solar_irr = None
162 |         self.wind_sp = None
163 |         self.battery_cap = battery_cap
164 |         self.generator_cap = generator_cap
165 |         self.period = period
166 |         self.e_c = e_c
167 |         self.e_dc = e_dc
168 |         
169 |         
170 |         self.cost_solar = 0.02
171 |         self.cost_wind = 0.03
172 |         self.cost_battery = 0.1
173 |         self.cost_generator = 0.2
174 |         self.cost_utility = [0.35,0.19,0.06]
175 |         self.price_soldback = [0.17,0.07,0]
176 |         
177 |         
178 |         '''
179 |         Energy components
180 |         ''' 
181 |         # Solar charging state
182 |         self.solar_irr = solar_irr
183 |         self.wind_sp = wind_sp
184 |             
185 |         
186 |         '''
187 |         Initial states
188 |         '''
189 |         self.states = []
190 |         # M_i: states of machines, 3 vars for each (ON, IDLE, OFF)                                              
191 |         for i in range(self.num_machines):
192 |             self.states.append(0)
193 |             self.states.append(0)
194 |             self.states.append(1)
195 |         # B_i: states of intermediate buffers
196 |         for i in range(self.num_machines-1):
197 |             self.states.append(self.buffers[i].amount)
198 |         # Y: total production
199 |         self.states.append(self.buf2.amount)
200 |         # S: current solar energy charging rate
201 |         self.states.append(0)
202 |         # W: current wind energy charging rate 
203 |         self.states.append(0)
204 |         # G: current generator rate
205 |         self.states.append(0)
206 |         # SOC: state of charge of the batter
207 |         self.states.append( 0.6)
208 |         # SB: sold back rate
209 |         self.states.append(0)
210 |         # U: utility purchase
211 |         self.states.append(0)
212 |         # I: current solar irradiance
213 |         self.states.append(self.solar_irr[0])
214 |         # F: current wind speed
215 |         self.states.append(self.wind_sp[0])
216 |         # t: time of period 
217 |         self.states.append(self.time//60)
218 |         
219 |         
220 |         
221 |     
222 |     def on_cycle(self):
223 |         self.time += self.cycle_time
224 |         self.mac0.on_cycle(cycle_time=self.cycle_time)
225 |         self.mac1.on_cycle(cycle_time=self.cycle_time)
226 |         
227 |         energy = 0
228 |         for mac in self.machines:
229 |             if mac.states == "ON":
230 |                 energy += mac.pow_on * self.cycle_time / self.state_time
231 |             elif mac.states == "IDLE":
232 |                 energy += mac.pow_idle * self.cycle_time / self.state_time
233 |             elif mac.states == "OFF":
234 |                 energy += mac.pow_off * self.cycle_time / self.state_time
235 |         return energy
236 |     
237 |     def get_distribution(self, actions):
238 |         demand = 0
239 |         
240 |         for i, mac in enumerate(self.machines):
241 |             mac.on_action(actions[i])        
242 | 
243 |             
244 |         for t in range(0, self.state_time//self.cycle_time):
245 | #             print("------------------------------- Cycle {} -------------------------------".format(t))
246 |             demand += self.on_cycle()
247 | #             print()
248 | #             self.print_status()
249 |         
250 | #         print("Energy Demand:",demand)
251 |         return demand
252 |         
253 |         
254 |         
255 |         
256 |     def on_action(self, actions):
257 |         '''
258 |         Actions:
259 |         [0.. num_machines]: control actions of the machines
260 |         [num_machines]: solar energy to manufacturing system
261 |         [num_machines+1]: solar charge to battery 
262 |         [num_machines+2]: solar energy sold back 
263 |         [num_machines+3]: wind energy to manufacturing system
264 |         [num_machines+4]: wind charge to battery 
265 |         [num_machines+5]: wind energy to sold back 
266 |         [num_machines+6]: generator energy to manufacturing system
267 |         [num_machines+7]: discharging from battery to manufacturing system
268 |         [num_machines+8]: utility purchased to manufacturing system 
269 |         '''
270 |         demand = 0
271 |         
272 |         for i, mac in enumerate(self.machines):
273 |             mac.on_action(actions[i])
274 |         
275 |         
276 |         for t in range(0, self.state_time//self.cycle_time):
277 | #             print("------------------------------- Cycle {} -------------------------------".format(t))
278 |             demand += self.on_cycle()
279 | #             print()
280 | #             self.print_status()
281 |         
282 | #         print("Energy Demand:",demand)
283 |         # update the states of the manufacturing system
284 |         pre_state = self.states
285 |         self.update_states(actions)
286 |         post_state = self.states 
287 |         acts = self.get_actions()
288 |         energy_cost = self.calc_cost(demand, actions)
289 |         
290 | #         print("Cost: ",energy_cost)
291 |         return post_state, energy_cost, acts
292 |         
293 |     
294 |     def get_actions(self):
295 |         '''
296 |         put after state is updated
297 |         '''
298 |         
299 |         actions = []
300 |         cost_solar = self.cost_solar
301 |         cost_wind = self.cost_wind
302 |         cost_battery = self.cost_battery
303 |         cost_generator = self.cost_generator
304 | 
305 |         tf = (self.states[-1]) % 24
306 |         if tf > 13 and tf <= 19:
307 |             tf = 0
308 |         elif (tf > 10 and tf <=13) or (tf > 18 and tf <= 21):
309 |             tf = 1
310 |         else:
311 |             tf = 2
312 |         cost_utility = self.cost_utility[tf]
313 |         price_soldback = self.price_soldback[tf]
314 |         
315 |         both_on = np.load("both_on.npy")
316 |         both_off = np.load("both_off.npy")
317 |         st_on = np.load("1st_on.npy")
318 |         nd_on = np.load("2nd_on.npy")
319 |         demands = [both_on, both_off, st_on, nd_on]
320 |         
321 |         
322 |         for mac1 in range(2):
323 |             for mac2 in range(2):
324 |                 demand = demands[0][self.states[-1]]
325 |                 if mac1 == 0 and mac2 == 0:
326 |                     demand = demands[1][self.states[-1]]
327 |                 elif mac1 == 1 and mac2 == 0:
328 |                     demand = demands[2][self.states[-1]]
329 |                 elif mac1 == 0 and mac2 == 1:
330 |                     demand = demands[3][self.states[-1]]
331 |                 use_wind = 0
332 |                 use_solar = 0
333 |                 use_battery = 0
334 |                 use_generator = 0
335 |                 use_utility = 0
336 |                 wind_energy = self.wind_sp[self.states[-1]]
337 |                 solar_energy = self.solar_irr[self.states[-1]]
338 |                 battery_energy = self.states[self.num_machines*3+self.num_machines+3]*self.battery_cap
339 |                 generator_energy = self.generator_cap
340 |                 
341 | #                 print("Demand: ", demand, "Solar: ", solar_energy, "Wind: ", wind_energy)
342 |                 
343 |                 # use up cheapest until demand energy is satisfied
344 |                 if demand > 0 and cost_solar < cost_utility and cost_solar < cost_wind:
345 |                     if solar_energy > demand:
346 |                         use_solar = demand
347 |                         solar_energy -= demand
348 |                         demand = 0
349 |                     else:
350 |                         use_solar = solar_energy
351 |                         demand -= solar_energy
352 |                         solar_energy = 0
353 |                 if demand > 0 and cost_wind < cost_utility and cost_wind < cost_solar:
354 |                     if wind_energy > demand:
355 |                         use_wind = demand
356 |                         wind_energy -= demand 
357 |                         demand = 0
358 |                     else:
359 |                         use_wind = wind_energy
360 |                         demand -= wind_energy
361 |                         wind_energy = 0
362 |                 if demand > 0 and cost_solar < cost_utility and solar_energy>0:
363 |                     if solar_energy > demand:
364 |                         use_solar = demand
365 |                         solar_energy -= demand
366 |                         demand = 0
367 |                     else:
368 |                         use_solar = solar_energy
369 |                         demand -= solar_energy
370 |                         solar_energy = 0
371 |                 if demand > 0 and cost_wind < cost_utility and wind_energy>0:
372 |                     if wind_energy > demand:
373 |                         use_wind = demand
374 |                         wind_energy -= demand 
375 |                         demand = 0
376 |                     else:
377 |                         use_wind = wind_energy
378 |                         demand -= wind_energy
379 |                         wind_energy = 0
380 |                 use_batery = 0
381 |                 use_generator = 0
382 |                 if demand > 0 and cost_battery < cost_utility and cost_battery < cost_generator:
383 |                     if battery_energy > demand:
384 |                         use_battery += demand
385 |                         battery_energy -= demand 
386 |                         demand = 0
387 |                     else:
388 |                         use_battery += battery_energy
389 |                         demand -= battery_energy
390 |                         battery_energy = 0
391 |                 if demand > 0 and cost_generator < cost_utility and cost_generator < cost_battery:
392 |                     if generator_energy > demand:
393 |                         use_generator += demand
394 |                         generator_energy -= demand 
395 |                         demand = 0
396 |                     else:
397 |                         use_generator += generator_energy
398 |                         demand -= generator_energy
399 |                         generator_energy = 0
400 |                         
401 |                 if demand > 0 and cost_battery < cost_utility and battery_energy > 0:
402 |                     if battery_energy > demand:
403 |                         use_battery += demand
404 |                         battery_energy -= demand 
405 |                         demand = 0
406 |                     else:
407 |                         use_battery += battery_energy
408 |                         demand -= battery_energy
409 |                         battery_energy = 0      
410 |                         
411 |                 if demand > 0 and cost_generator < cost_utility and generator_energy > 0:
412 |                     if generator_energy > demand:
413 |                         use_generator += demand
414 |                         generator_energy -= demand 
415 |                         demand = 0
416 |                     else:
417 |                         use_generator += generator_energy
418 |                         demand -= generator_energy
419 |                         generator_energy = 0
420 |                     
421 |                 remain_SOC = (self.battery_cap - battery_energy) / self.battery_cap
422 |                 use_utility = demand
423 |                 
424 | #                 print("remain_SOC: ", remain_SOC)
425 |                 for sb in range(0,int(remain_SOC*100),5):
426 |                     solar_charge = sb*self.battery_cap/100 
427 |                     if solar_charge <= solar_energy:
428 |                         for wb in range(0,int(remain_SOC*100-sb), 5):
429 |                             wind_charge = wb*self.battery_cap/100
430 |                             if wind_charge <= wind_energy:
431 |                                 temp_solar = solar_energy - solar_charge
432 |                                 temp_wind = wind_energy - wind_charge
433 |                                 solar_sold = 0
434 |                                 wind_sold = 0
435 |                                 if cost_solar < price_soldback: 
436 |                                     solar_sold = temp_solar
437 |                                 if cost_wind < price_soldback:
438 |                                     wind_sold = temp_wind
439 |                                 action = [mac1, mac2, use_solar, solar_charge, solar_sold, use_wind, wind_charge, wind_sold, use_generator, use_battery, use_utility]    
440 |                                 actions.append(action)
441 |                 
442 |                 
443 |         
444 |         
445 |                 
446 |         
447 |         return actions
448 |        
449 |             
450 |     
451 |     def calc_cost(self, demand, actions):
452 |         cost_solar = self.cost_solar
453 |         cost_wind = self.cost_wind
454 |         cost_battery = self.cost_battery
455 |         cost_generator = self.cost_generator
456 |         cost_utility = self.cost_utility
457 |         price_soldback = self.price_soldback
458 |         cost = 0
459 |         
460 |         # time frame (on-peak, mid-peak, off-peak)
461 |         tf = self.states[-1] % 24
462 |         if tf > 13 and tf <= 19:
463 |             tf = 0
464 |         elif (tf > 10 and tf <=13) or (tf > 18 and tf < 21):
465 |             tf = 1
466 |         else:
467 |             tf = 2
468 |         
469 |         # cost of solar energy
470 |         cost += (actions[self.num_machines]+actions[self.num_machines+1]+actions[self.num_machines+2])*cost_solar
471 |         # cost of wind energy
472 |         cost += (actions[self.num_machines+3]+actions[self.num_machines+4]+actions[self.num_machines+5])*cost_wind
473 |         # cost of generator energy
474 |         cost += actions[self.num_machines+6]*cost_generator 
475 |         # cost of battery
476 |         cost += (actions[self.num_machines+1]+actions[self.num_machines+4]+actions[self.num_machines+7])*cost_battery
477 |         # cost of utility
478 |         cost += actions[self.num_machines+8]*cost_utility[tf]
479 |         # earned of sold back
480 |         cost -= (actions[self.num_machines+2]+actions[self.num_machines+5])*price_soldback[tf]
481 |         
482 | 
483 |         
484 |         return cost
485 | 
486 |     
487 |         
488 |     def update_states(self, actions):
489 |         # M_i: states of machines, 3 vars for each                                                 
490 |         for i,mac in enumerate(self.machines):
491 |             if mac.states == "ON":
492 |                 self.states[i*3] = 1
493 |                 self.states[i*3+1] = 0
494 |                 self.states[i*3+2] = 0
495 |             elif mac.states == "IDLE":
496 |                 self.states[i*3] = 0
497 |                 self.states[i*3+1] = 1
498 |                 self.states[i*3+2] = 0    
499 |             elif mac.states == "OFF":
500 |                 self.states[i*3] = 0
501 |                 self.states[i*3+1] = 0
502 |                 self.states[i*3+2] = 1 
503 |         # B_i: states of intermediate buffers
504 |         for i in range(self.num_machines-1):
505 |             self.states[self.num_machines*3+i] = self.buffers[i].amount 
506 |         # Y: total production
507 | #         self.states[self.num_machines*3+self.num_machines-1] = self.buffers[-1].amount
508 |         # S: next solar energy charging rate
509 |         self.states[self.num_machines*3+self.num_machines] = actions[self.num_machines]+actions[self.num_machines+1]+actions[self.num_machines+2]
510 |         # W: next wind energy charging rate 
511 |         self.states[self.num_machines*3+self.num_machines+1] = actions[self.num_machines+3]+actions[self.num_machines+4]+actions[self.num_machines+5]
512 |         # G: next generator rate
513 |         self.states[self.num_machines*3+self.num_machines+2] = actions[self.num_machines+6]
514 |         # SOC: state of charge of the battery
515 |         total_charging = actions[self.num_machines+1]+actions[self.num_machines+4]
516 |         self.states[self.num_machines*3+self.num_machines+3] = (self.states[self.num_machines*3+self.num_machines+3]*self.battery_cap+self.e_c*total_charging-actions[self.num_machines+7]/self.e_dc) / self.battery_cap
517 |         # SB: sold back rate
518 |         self.states[self.num_machines*3+self.num_machines+4] = actions[self.num_machines+2]+actions[self.num_machines+5]
519 |         # U: utility purchase
520 |         self.states[self.num_machines*3+self.num_machines+5] = actions[self.num_machines+8]
521 |         # t: time of period 
522 |         self.states[self.num_machines*3+self.num_machines+8] += 1
523 |         self.states[self.num_machines*3+self.num_machines+8] = self.states[self.num_machines*3+self.num_machines+8] % self.period
524 | #         if self.states[self.num_machines*3+self.num_machines+8] == 0:
525 | #             self.states[self.num_machines*3+self.num_machines-1] = 0  
526 | #         elif self.states[self.num_machines*3+self.num_machines+8] % 720 == 0:
527 | #             self.states[self.num_machines*3+self.num_machines-1] -= 15000
528 | 
529 |         # I: current solar irradiance
530 |         self.states[self.num_machines*3+self.num_machines+6] = self.solar_irr[self.states[self.num_machines*3+self.num_machines+8]]
531 |         # F: current wind speed
532 |         self.states[self.num_machines*3+self.num_machines+7] = self.wind_sp[self.states[self.num_machines*3+self.num_machines+8]]
533 | 
534 |         
535 |             
536 |     def print_status(self):
537 |         print("Buffer 0: ", self.buf0.amount)
538 |         print("Buffer 1: ", self.buf1.amount)
539 |         print("Buffer 2: ", self.buf2.amount)
540 | 
541 | from keras.models import Sequential
542 | from keras.layers import Dense
543 | from sklearn.preprocessing import MinMaxScaler
544 | import keras 
545 | import seaborn as sns
546 | import numpy as np
547 | import matplotlib.pyplot as plt 
548 | 
549 | file_si = "SolarIrradiance.csv"
550 | file_ws = "WindSpeed.csv"
551 | area = 300
552 | e_solar = 0.2
553 | cost_solar = 0.02
554 | dt_solar = pd.read_csv(file_si)
555 | solar_irr = np.array(dt_solar.iloc[:,2])*area*e_solar/1000
556 | 
557 | density_air = 1.225
558 | radius = 20
559 | power_coef = 0.593
560 | e_gear = 0.9
561 | e_elec = 0.9
562 | cost_wind = 0.03
563 | h = 3
564 | dt_wind = pd.read_csv(file_ws)
565 | wind_sp = (np.array(dt_wind.iloc[:,2])**3) * 0.5 * density_air * np.pi * (radius**2) * power_coef * e_gear * e_elec * h / 1000 
566 |         
567 | 
568 | alpha = 1
569 | lamb = 0.1
570 | 
571 | 
572 | # print(actions[idx], len(actions[idx]))
573 | # print(factory.states, len(factory.states))
574 | # x = np.concatenate((actions[idx], factory.states))
575 | # scaler = MinMaxScaler()
576 | # x = scaler.fit_transform(np.reshape(x, [1,28]))
577 | # print(x, x.shape)
578 | # print(model.predict(x))
579 | # model.train_on_batch(x_batch, y_batch)
580 | # loss_and_metrics = model.evaluate(x_test, y_test, batch_size=128)
581 | # classes = model.predict(x_test, batch_size=128)
582 | 
583 | yss = []
584 | historys = []
585 | weights = []
586 | for r in range(1):
587 | 
588 |     #build the neural network, weights are stored in weights
589 |     model = Sequential()
590 |     model.add(Dense(32, input_dim=11+17, activation='sigmoid',kernel_initializer='normal'))
591 |     model.add(Dense(32, activation='sigmoid', kernel_initializer='normal'))
592 |     model.add(Dense(1, kernel_initializer='normal'))
593 |     model.compile(loss='mse', optimizer='adam')
594 | 
595 |     factory = ManufacturingSystem(solar_irr=solar_irr, wind_sp=wind_sp)
596 |     factory.mac0.set_debug(False)
597 |     factory.mac1.set_debug(False)
598 |     actions = factory.get_actions()
599 | 
600 |     ys = []
601 |     history = []
602 |     weight = []
603 |     for n in range(100000):
604 |         idx = np.random.randint(len(actions))
605 |         alpha = 1/(n+1)
606 |         
607 |         #Set up the states and actions
608 |         x = np.concatenate((actions[idx], factory.states))
609 |         scaler  = MinMaxScaler()
610 |         x = scaler.fit_transform(np.reshape(x, [1,28]))
611 |         
612 |         #current Q-value predicted by the neural network
613 |         current =model.predict(x)
614 |         prod_before = factory.states[factory.num_machines*3+factory.num_machines-1]
615 |         
616 |         #Output the current state, optimal policy and optimal cost 
617 |         best_current= 999999
618 |         best_action_current = None
619 |         remainder=n % 1000
620 |         if remainder==0:
621 |             for a in actions:
622 |                 x_curr = np.concatenate((a, factory.states))
623 |                 x_curr = scaler.fit_transform(np.reshape(x_curr, [1,28]))
624 |                 predicted_reward_current = model.predict(x_curr)
625 |                 if predicted_reward_current < best_current:
626 |                     best_current = predicted_reward_current
627 |                     best_action_current = a
628 |             print("Epoch: ", n)
629 |             print("Current State: ", factory.states)
630 |             print("Best Action: ", best_action_current, "Best Reward: ", best_current)
631 |         
632 |         #Calculate the reward for one step transition of MDP
633 |         post, cost, actions = factory.on_action(actions[idx])
634 |         prod = post[factory.num_machines*3+factory.num_machines-1] - prod_before
635 |         reward = cost-prod
636 |         
637 |         #Find the optimal cost under current model for the Q-values in the next state
638 |         bestFuture = 999999
639 |         for a in actions:
640 |             xp = np.concatenate((a, post))
641 |             xp = scaler.fit_transform(np.reshape(xp, [1,28]))
642 |             predicted_reward = model.predict(xp)
643 |             if predicted_reward < bestFuture:
644 |                 bestFuture = predicted_reward
645 |         
646 |         #Update the new Q-value
647 |         y = (1-alpha)*current+alpha*(reward+lamb*bestFuture)
648 |         
649 |         ys.append(y)
650 | 
651 |         #Calculate the difference in weights of the neural network
652 |         #Calculate previous weights        
653 |         pre_weight = []
654 |         for layer in model.layers:
655 |             w = layer.get_weights()
656 |             for each in w:
657 |                 pre_weight += list(each.flatten())
658 |         
659 |         #Train the neural network
660 |         history.append(model.train_on_batch(x, y))
661 |         
662 |         #Calculate the weights after training
663 |         post_weight = []
664 |         for layer in model.layers:
665 |             w = layer.get_weights()
666 |             for each in w:
667 |                 post_weight += list(each.flatten())
668 |         
669 |         #Difference in weights
670 |         diff_weights = np.array(post_weight) - np.array(pre_weight)
671 |         
672 |         #L2 norm of the difference in weight vector are stored in weight
673 |         weight.append(np.linalg.norm(diff_weights, 2))
674 |         
675 |         if remainder==0:
676 |             print("Weight difference: ", weight[-1])
677 |             print("----------------------------------------------")
678 | 
679 |         if n % 10000 == 0:
680 |             plt.plot(weight[-10000:])
681 |             plt.show()
682 |     yss.append(np.reshape(ys, (len(ys),)))
683 |     historys.append(history)
684 |     weights.append(weight)
685 | 
686 | 
687 | #Plot the weight difference
688 |     
689 | plt.figure(figsize=(12,8))
690 | sns.tsplot(weights,time=range(len(weights[0])), ci=[68,95], condition="Weights")
691 | plt.xlabel("Iteration")
692 | plt.ylabel("Wight Difference")
693 | 
694 | plt.show()
695 | 
696 | 
697 | 


--------------------------------------------------------------------------------
/reinforcement_learning.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Fri Jan  3 14:33:36 2020
  4 | Modified on Fri, May  5, 15:34:52 2020
  5 | @author: Wenqing Hu and Louis Steinmeister
  6 | Title: Reinforcement Learning for the joint control of onsite microgrid and manufacturing system
  7 | """
  8 | 
  9 | from microgrid_manufacturing_system import Microgrid, ManufacturingSystem, ActionSimulation, MicrogridActionSet_Discrete_Remainder, MachineActionTree, SystemInitialize
 10 | from projectionSimplex import projection
 11 | import numpy as np
 12 | import matplotlib.pyplot as plt
 13 | import tensorflow as tf
 14 | import math
 15 | #import tensorflow.keras.backend as K
 16 | 
 17 | #set the number of machines
 18 | number_machines=5
 19 | #set the unit reward of production
 20 | unit_reward_production=1000/10000
 21 | #the unit reward for each unit of production (10^4$/unit produced), i.e. the r^p, this applies to the end of the machine sequence#
 22 | 
 23 | #the discount factor gamma when calculating the total cost#
 24 | gamma=0.999
 25 | 
 26 | #the seed for reinforcement training initialization of the network weights and biases
 27 | seed=2
 28 | 
 29 | #the probability of using random actions vs. on-policy optimal actions in each step of training
 30 | p_choose_random_action=0.9
 31 | 
 32 | import pandas as pd
 33 | #read the solar irradiance, wind speed and the rate of consumption charge data from file#
 34 | file_SolarIrradiance = "SolarIrradiance.csv"
 35 | file_WindSpeed = "WindSpeed.csv"
 36 | file_rateConsumptionCharge = "rate_consumption_charge.csv"
 37 | #read the solar irradiace
 38 | data_solar = pd.read_csv(file_SolarIrradiance)
 39 | solarirradiance = np.array(data_solar.iloc[:,3])
 40 | #solar irradiance measured by MegaWatt/km^2
 41 | #read the windspeed    
 42 | data_wind = pd.read_csv(file_WindSpeed)
 43 | windspeed = np.array(data_wind.iloc[:,3])*3.6
 44 | #windspeed measured by km/h=1/3.6 m/s
 45 | #read the rate of consumption charge
 46 | data_rate_consumption_charge = pd.read_csv(file_rateConsumptionCharge)
 47 | rate_consumption_charge = np.array(data_rate_consumption_charge.iloc[:,4])/10
 48 | #rate of consumption charge measured by 10^4$/MegaWatt=10 $/kWh
 49 | 
 50 | 
 51 | 
 52 | """
 53 | Provide the structure of the action-value function Q(S, A^d, A^c, A^r; omega), 
 54 | also provide its gradients with respect to A^c and to omega
 55 | Here we assume that Q is a 2-hidden layer neural netwok with parameters omega, 
 56 | this structure is written into class critic
 57 | """ 
 58 | class action_value(object):
 59 |     def __init__(self,
 60 |                  System=ManufacturingSystem(machine_states=["Off" for _ in range(number_machines)],
 61 |                                             machine_control_actions=["K" for _ in range(number_machines)],
 62 |                                             buffer_states=[0 for _ in range(number_machines-1)],
 63 |                                             grid=Microgrid(workingstatus=[0,0,0],
 64 |                                                            SOC=0,
 65 |                                                            actions_adjustingstatus=[0,0,0],
 66 |                                                            actions_solar=[0,0,0],
 67 |                                                            actions_wind=[0,0,0],
 68 |                                                            actions_generator=[0,0,0],
 69 |                                                            actions_purchased=[0,0],
 70 |                                                            actions_discharged=0,
 71 |                                                            solarirradiance=0,
 72 |                                                            windspeed=0
 73 |                                                            )
 74 |                                             ),
 75 |                 critic = None
 76 |                 ):
 77 |         #define neural network with 2 hidden layers for the Q function
 78 |         self.critic = critic
 79 |         self.System=System
 80 |         
 81 |     def num_list_States_Actions(self):
 82 |         #return the states and actions as a numerical list#
 83 |         #"Off"=0, "Brk"=-2, "Idl"=-1, "Blk"=1, "Opr"=2#
 84 |         #"H"=-1, "K"=0, "W"=1#
 85 |         list=[  [0 for _ in range(number_machines)], 
 86 |                 [0 for _ in range(number_machines-1)], 
 87 |                 [0 for _ in range(3)],
 88 |                 [0 for _ in range(number_machines)],
 89 |                 [0 for _ in range(3)],
 90 |                 [0 for _ in range(9)],
 91 |                 [0 for _ in range(2)],
 92 |                 [0] ]
 93 |         for i in range(number_machines):
 94 |             if self.System.machine_states[i]=="Off":
 95 |                 list[1-1][i]=0
 96 |             elif self.System.machine_states[i]=="Brk":
 97 |                 list[1-1][i]=-2
 98 |             elif self.System.machine_states[i]=="Idl":
 99 |                 list[1-1][i]=-1
100 |             elif self.System.machine_states[i]=="Blo":
101 |                 list[1-1][i]=1
102 |             else:
103 |                 list[1-1][i]=2
104 |         for i in range(number_machines-1):
105 |             list[2-1][i]=self.System.buffer_states[i]
106 |         for i in range(3):
107 |             list[3-1][i]=self.System.grid.workingstatus[i]
108 |         for i in range(number_machines):
109 |             if self.System.machine_control_actions[i]=="H":
110 |                 list[4-1][i]=-1
111 |             elif self.System.machine_control_actions[i]=="K":
112 |                 list[4-1][i]=0
113 |             else:
114 |                 list[4-1][i]=1
115 |         for i in range(3):
116 |             list[5-1][i]=self.System.grid.actions_adjustingstatus[i]
117 |         for i in range(3):
118 |             list[6-1][i]=self.System.grid.actions_solar[i]
119 |             list[6-1][i+3]=self.System.grid.actions_wind[i]
120 |             list[6-1][i+6]=self.System.grid.actions_generator[i]
121 |         for i in range(2):
122 |             list[7-1][i]=self.System.grid.actions_purchased[i]
123 |         list[8-1][0]=self.System.grid.actions_discharged #needs to be a list for later convenience
124 |         return list
125 |         
126 |     
127 |     def Q(self, num_list_States_Actions):
128 |         flat_inputs = np.array([item for sublist in num_list_States_Actions for item in sublist],dtype = "float32")
129 |         q = self.critic(flat_inputs)[0,0]
130 |         #print("Evaluation of Critic:", q)
131 |         return q
132 |     
133 |     #has to be called after Q
134 |     def Q_grad_A_c(self):
135 |         #print("DEBUG DQ_Dinput",self.critic.__Q_grad_input__)
136 |         #print("DEBUG DQ_DAc",self.critic.__Q_grad_A_c__)
137 |         return self.critic.__Q_grad_A_c__.numpy()
138 | 
139 | 
140 |     def Q_grad_omega(self, num_list_States_Actions):
141 |         #print("Q_grad_omega:", [tensor for tensor in self.critic.__Q_grad_omega__])
142 |         return [tensor.numpy() for tensor in self.critic.__Q_grad_omega__]
143 | 
144 |     
145 |     def update_weights(self, factor):
146 |         self.critic.update_weights(factor)
147 |         
148 | 
149 | 
150 | 
151 | """
152 | implements everything related to the Q function
153 | """
154 | class critic():
155 |     #define the network architecture
156 |     def __init__(self):
157 |         self.run_eagerly = True
158 |         self.dim_input = 3*number_machines-1+18
159 |         print("inputs expected:",self.dim_input)
160 |         #self.input = self.layer_input
161 |         #hidden layers
162 |         self.layer1     = tf.keras.layers.Dense(100, activation='sigmoid', input_shape = (1,self.dim_input))
163 |         self.layer2     = tf.keras.layers.Dense(100, activation='relu')
164 |         #output layer
165 |         self.layer_out  = tf.keras.layers.Dense(1, activation='linear')
166 |         #variable to make sure that gradients are computed before applying
167 |         self.ready_to_apply_gradients = False
168 |         #store the trainable variables
169 |         self.trainable_variables = None
170 |         #execute eager
171 |         
172 | 
173 |     #evalueate the Q function for a given (state,action)-pair     
174 |     def __call__(self, inputs):
175 |         #flatten inputs for NN
176 | 
177 |         #the Q value generated by the NN
178 |         #inputs = self.layer_input(flat_inputs)
179 |         input_tensor = tf.reshape(inputs,shape = (1,self.dim_input))
180 |         with tf.GradientTape(persistent=True) as tape:
181 |             tape.watch(input_tensor)
182 |             #print("DEBUG:",input_tensor)
183 |             res     = self.layer1(input_tensor)
184 |             res     = self.layer2(res)
185 |             Q_value = self.layer_out(res)
186 |         #print("",Q_value)
187 |         if self.trainable_variables == None:
188 |             self.trainable_variables = self.layer1.variables + self.layer2.variables + self.layer_out.variables
189 |             #print("DEBUG weights:",self.trainable_variables)
190 |         #regularization not needed at this time
191 |         #regularization_loss = tf.math.add_n(self.model.losses)         
192 |         #compute the gradients of Q wrt omega
193 |         
194 |         Q_grad_omega = tape.gradient(Q_value, self.trainable_variables)
195 |         #print("Q_grad_omega",Q_grad_omega)
196 |         Q_grad_input = tape.gradient(Q_value, input_tensor)[0]
197 |         #print("Q_grad_input", Q_grad_input)
198 |         Q_grad_A_c   = Q_grad_input[-12:-3] #there are 3 entries after A_c and A_c is 9 long
199 |         self.__Q_grad_omega__ = Q_grad_omega
200 |         self.__Q_grad_input__ = Q_grad_input
201 |         self.__Q_grad_A_c__   = Q_grad_A_c
202 |         self.__Q__            = Q_value
203 |         self.ready_to_apply_gradients = True
204 |         
205 |         return Q_value
206 | 
207 |     #weight update via gradient descent
208 |     def update_weights(self, factor):
209 |         if self.ready_to_apply_gradients == False: raise Exception("not ready to train the critic. Compute gradients first!")
210 |         #form (gradient, variable)-tuples
211 |         grad_var_tuples = zip(self.__Q_grad_omega__, self.trainable_variables)
212 |         #apply update
213 |         for grad, var in grad_var_tuples:
214 |             #print("TEST")
215 |             #print((var-factor*grad).numpy()[0,0])
216 |             test = (var-factor*grad).numpy()
217 |             while test.ndim>0:
218 |                 test = test[0]
219 |             if(math.isnan(test)):
220 |                 raise Exception("NaN Produced! Exiting training.")
221 |             else:
222 |                 var.assign_add(-factor*grad)
223 |         self.ready_to_apply_gradients = False
224 | 
225 | 
226 | 
227 | 
228 | 
229 | """
230 | Provide the necessary functions for deterministic policy gradient updates for the theta
231 | """
232 | class update_theta(object):
233 |     def __init__(self,
234 |                  System=ManufacturingSystem(machine_states=["Off" for _ in range(number_machines)],
235 |                                             machine_control_actions=["K" for _ in range(number_machines)],
236 |                                             buffer_states=[0 for _ in range(number_machines-1)],
237 |                                             grid=Microgrid(workingstatus=[0,0,0],
238 |                                                            SOC=0,
239 |                                                            actions_adjustingstatus=[0,0,0],
240 |                                                            actions_solar=[0,0,0],
241 |                                                            actions_wind=[0,0,0],
242 |                                                            actions_generator=[0,0,0],
243 |                                                            actions_purchased=[0,0],
244 |                                                            actions_discharged=0,
245 |                                                            solarirradiance=0,
246 |                                                            windspeed=0
247 |                                                            )
248 |                                             ),
249 |                  theta=[0,0,0,0,0,0]
250 |                  ):
251 |         #System has state and action (S_t, A_t), theta corresponds to A_t^c=A_t^c(theta)#
252 |         self.System=System
253 |         self.theta=theta
254 |     
255 |     def A_c_gradient_theta(self):
256 |         #output the gradient tensor of the A^c with respect to the theta variables#
257 |         grad=[[],[],[],[],[],[],[],[],[]]
258 |         #calculate the energy generated by the solar PV, e_t^s#
259 |         energy_generated_solar=self.System.grid.energy_generated_solar()
260 |         #calculate the energy generated by the wind turbine, e_t^w#
261 |         energy_generated_wind=self.System.grid.energy_generated_wind()
262 |         #calculate the energy generated bv the generator, e_t^g#
263 |         energy_generated_generator=self.System.grid.energy_generated_generator()
264 |         grad=[[energy_generated_solar, 0, 0, 0, 0, 0], 
265 |               [0, energy_generated_solar, 0, 0, 0, 0], 
266 |               [-energy_generated_solar, -energy_generated_solar, 0, 0, 0, 0],
267 |               [0, 0, energy_generated_wind, 0, 0, 0],
268 |               [0, 0, 0, energy_generated_wind, 0, 0],
269 |               [0, 0, -energy_generated_wind, -energy_generated_wind, 0, 0],
270 |               [0, 0, 0, 0, energy_generated_generator, 0],
271 |               [0, 0, 0, 0, 0, energy_generated_generator],
272 |               [0, 0, 0, 0, -energy_generated_generator, -energy_generated_generator]]
273 |         return np.array(grad)
274 |         
275 |     def deterministic_policygradient(self, A_c_grad_theta, Q_grad_A_c):
276 |         #output the deterministic policy gradient of the cost with respect to the theta#
277 |         print("Policy gradient; Q_grad_A_c:",Q_grad_A_c)
278 |         policygradient=np.dot(Q_grad_A_c, A_c_grad_theta)
279 |         return policygradient
280 | 
281 |     def update(self, policygradient, lr_theta):
282 |         #deterministic policy gradient on theta#
283 |         theta_old=self.theta
284 |         theta_new=projection(theta_old-lr_theta*policygradient)                           
285 |         return theta_new
286 | 
287 | 
288 | 
289 | """
290 | Given the current theta and omega, determine the next continuous and discrete/remainder actions by finding 
291 | (1) A^c(theta)=energy distributed in [solar, wind, generator]
292 | (2) with probability probability_randomaction: A^{*,d}_{t+1}=randomly sampled action
293 |     with probability 1-probability_randomaction: A^{*,d}_{t+1}=argmin_{A^d}Q(S_{t+1}, A^d, A^c(theta), A^r(A^d, A^c(theta)); omega)
294 | (3) A^{*,r}_{t+1}=A^r(A^{*,d}_{t+1}, A^c(theta))
295 | """
296 | def NextAction_OnPolicySimulation(next_machine_states, next_buffer_states, next_workingstatus, next_SOC, t, my_critic, theta, probability_randomaction):
297 |     #build an auxiliarysystem with next system and grid states#
298 |     AuxiliarySystem=ManufacturingSystem(machine_states=next_machine_states,
299 |                                         machine_control_actions=["K" for _ in range(number_machines)],
300 |                                         buffer_states=next_buffer_states,
301 |                                         grid=Microgrid(workingstatus=next_workingstatus,
302 |                                                        SOC=next_SOC,
303 |                                                        actions_adjustingstatus=[0,0,0],
304 |                                                        actions_solar=[0,0,0],
305 |                                                        actions_wind=[0,0,0],
306 |                                                        actions_generator=[0,0,0],
307 |                                                        actions_purchased=[0,0],
308 |                                                        actions_discharged=0,
309 |                                                        solarirradiance=solarirradiance[t//8640],
310 |                                                        windspeed=windspeed[t//8640]
311 |                                                        )
312 |                                         )
313 |     #under the next system and grid states, calculate the energy generated by the solar PV, e_t^s; the wind turbine, e_t^w; the generator, e_t^g#
314 |     energy_generated_solar=AuxiliarySystem.grid.energy_generated_solar()
315 |     energy_generated_wind=AuxiliarySystem.grid.energy_generated_wind()
316 |     energy_generated_generator=AuxiliarySystem.grid.energy_generated_generator()
317 |     #under the current theta, calculate the continuous actions A^c(theta)=energy distributed in [solar, wind, generator]#
318 |     next_actions_solar=[energy_generated_solar*theta[1-1], energy_generated_solar*theta[2-1], energy_generated_solar*(1-theta[1-1]-theta[2-1])]
319 |     next_actions_wind=[energy_generated_wind*theta[3-1], energy_generated_wind*theta[4-1], energy_generated_wind*(1-theta[3-1]-theta[4-1])]
320 |     next_actions_generator=[energy_generated_generator*theta[5-1], energy_generated_generator*theta[6-1], energy_generated_generator*(1-theta[5-1]-theta[6-1])]
321 |     #tossing_probability is the probability of using randomly simulated actions, and 1-tossing_probability using on-policy actions
322 |     indicator=np.random.binomial(n=1, p=probability_randomaction, size=1)
323 |     if indicator==0:
324 |         #use on-policy actions
325 |         #bulid the list of the set of all admissible machine actions#    
326 |         machine_action_tree=MachineActionTree(machine_action="ROOT")
327 |         machine_action_tree.BuildTree(AuxiliarySystem, level=0, tree=machine_action_tree)
328 |         machine_action_list=[]
329 |         machine_action_tree.TraverseTree(level=0, tree=machine_action_tree, machine_action_list=[])
330 |         machine_action_set_list=machine_action_tree.machine_action_set_list
331 |         #build the list of the set of all admissible microgrid actions for adjusting the status and for purchase/discharge
332 |         microgrid_action_set_DR=MicrogridActionSet_Discrete_Remainder(AuxiliarySystem)
333 |         microgrid_action_set_list_adjustingstatus=microgrid_action_set_DR.List_AdjustingStatus()
334 |         microgrid_action_set_list_purchased_discharged=microgrid_action_set_DR.List_PurchasedDischarged(actions_solar=next_actions_solar,
335 |                                                                                                         actions_wind=next_actions_wind,
336 |                                                                                                         actions_generator=next_actions_generator)
337 |         optimal_Q=0
338 |         next_machine_actions=[]
339 |         next_microgrid_actions_adjustingstatus=[]
340 |         next_microgrid_actions_purchased=[]
341 |         next_microgrid_actions_discharged=0
342 |         i=1
343 |         for machine_action_list in machine_action_set_list:
344 |             for microgrid_action_list_adjustingstatus in microgrid_action_set_list_adjustingstatus:
345 |                 for microgrid_action_list_purchased_discharged in microgrid_action_set_list_purchased_discharged:
346 |                     AuxiliarySystem=ManufacturingSystem(machine_states=next_machine_states,
347 |                                                         machine_control_actions=machine_action_list,
348 |                                                         buffer_states=next_buffer_states,
349 |                                                         grid=Microgrid(workingstatus=next_workingstatus,
350 |                                                                        SOC=next_SOC,
351 |                                                                        actions_adjustingstatus=microgrid_action_list_adjustingstatus,
352 |                                                                        actions_solar=next_actions_solar,
353 |                                                                        actions_wind=next_actions_wind,
354 |                                                                        actions_generator=next_actions_generator,
355 |                                                                        actions_purchased=microgrid_action_list_purchased_discharged[0],
356 |                                                                        actions_discharged=microgrid_action_list_purchased_discharged[1],
357 |                                                                        solarirradiance=solarirradiance[t//8640],
358 |                                                                        windspeed=windspeed[t//8640]
359 |                                                                        )
360 |                                                         )
361 |                     av=action_value(AuxiliarySystem, my_critic)
362 |                     num_list_SA=av.num_list_States_Actions()
363 |                     Q=av.Q(num_list_SA)
364 |                     if i==1:
365 |                         optimal_Q=Q
366 |                         next_machine_actions=machine_action_list
367 |                         next_microgrid_actions_adjustingstatus=microgrid_action_list_adjustingstatus
368 |                         next_microgrid_actions_purchased=microgrid_action_list_purchased_discharged[0]
369 |                         next_microgrid_actions_discharged=microgrid_action_list_purchased_discharged[1]
370 |                     else:
371 |                         if Q<optimal_Q:
372 |                             optimal_Q=Q
373 |                             next_machine_actions=machine_action_list
374 |                             next_microgrid_actions_adjustingstatus=microgrid_action_list_adjustingstatus
375 |                             next_microgrid_actions_purchased=microgrid_action_list_purchased_discharged[0]
376 |                             next_microgrid_actions_discharged=microgrid_action_list_purchased_discharged[1]
377 |                     i=i+1
378 |     else:
379 |         #use randomly sampled actions
380 |         next_action=ActionSimulation(System=ManufacturingSystem(machine_states=next_machine_states,
381 |                                                                 machine_control_actions=["K" for _ in range(number_machines)],
382 |                                                                 buffer_states=next_buffer_states,
383 |                                                                 grid=Microgrid(workingstatus=next_workingstatus,
384 |                                                                                SOC=next_SOC,
385 |                                                                                actions_adjustingstatus=[0,0,0],
386 |                                                                                actions_solar=[0,0,0],
387 |                                                                                actions_wind=[0,0,0],
388 |                                                                                actions_generator=[0,0,0],
389 |                                                                                actions_purchased=[0,0],
390 |                                                                                actions_discharged=0,
391 |                                                                                solarirradiance=solarirradiance[t//8640],
392 |                                                                                windspeed=windspeed[t//8640]
393 |                                                                                )
394 |                                                                 )
395 |                                     )
396 |         next_microgrid_actions_adjustingstatus=next_action.MicroGridActions_adjustingstatus()
397 |         next_actions_solar, next_actions_wind, next_actions_generator=next_action.MicroGridActions_SolarWindGenerator(theta)
398 |         next_microgrid_actions_purchased, next_microgrid_actions_discharged=next_action.MicroGridActions_PurchasedDischarged(next_actions_solar,
399 |                                                                                                                              next_actions_wind,
400 |                                                                                                                              next_actions_generator)
401 |         next_machine_actions=next_action.MachineActions()
402 |                     
403 |                 
404 |                 
405 |     return next_machine_actions, next_actions_solar, next_actions_wind, next_actions_generator, next_microgrid_actions_adjustingstatus, next_microgrid_actions_purchased, next_microgrid_actions_discharged
406 | 
407 | 
408 | 
409 | 
410 | """
411 | Reinforcement Learning Algorithm: On-policy TD control combined with actor-critique
412 | Algorithm 1 in the paper
413 | The Training Process of the Reinforcement Learning Algorithm
414 | """
415 | def Reinforcement_Learning_Training(System_init, #the initial system
416 |                                     theta_init,  #the initial theta value
417 |                                     lr_theta_init, #the initial learning rate in the theta iteration
418 |                                     lr_omega_init, #the initial learning rate in the omega iteration
419 |                                     number_iteration, #the total number of training iterations
420 |                                     ):
421 |     
422 |     tf.enable_eager_execution()
423 |     #set seed for tensorflow (including initialization of weights and bias) for reproducability
424 |     tf.set_random_seed(seed)     
425 |     #K.clear_session()
426 | 
427 |     #initialize#
428 |     theta=theta_init
429 |     System=System_init
430 |     lr_tht=lr_theta_init
431 |     lr_omg=lr_omega_init
432 |     
433 |     #the critic is with parameters given by omega
434 |     my_critic = critic()
435 |     omega = []
436 |     
437 |     #initialize the sequence of Q values, Temporal Difference and the reward, the reward sequence and L^2 difference in omega
438 |     Q=[]
439 |     TemporalDifference=[]
440 |     rewardseq=[]
441 |     reward=0
442 |     diff_omega=[]
443 |     
444 |     ################### reinforcement learning training process ###################
445 |     for t in range(number_iteration):
446 |         print("---------- iteration", t, "----------")
447 |         #calculate the total cost at S_t, A_t: E(S_t, A_t)
448 |         E=System.average_total_cost(rate_consumption_charge[t//8640])
449 |         #calculate the old Q-value and its gradient wrt omega: Q(S_t, A_t; omega_t) and grad_omega Q(S_t, A_t; omega_t)
450 |         av=action_value(System, my_critic)
451 |         num_list_SA=av.num_list_States_Actions()
452 |         Q_old=av.Q(num_list_SA)
453 |         Q.append(Q_old)
454 |         #update the theta using deterministic policy gradient#
455 |         upd_tht=update_theta(System, theta)
456 |         policy_grad=upd_tht.deterministic_policygradient(upd_tht.A_c_gradient_theta(), av.Q_grad_A_c())
457 |         theta=upd_tht.update(policy_grad, lr_tht)
458 |         #calculate the next states and actions: S_{t+1}, A_{t+1}#        
459 |         next_machine_states, next_buffer_states=System.transition_manufacturing()
460 |         next_workingstatus, next_SOC=System.grid.transition()
461 |         #determine A^c(theta)=energy distributed in [solar, wind, generator]
462 |         #determine A^{*,d}_{t+1}=argmin_{A^d}Q(S_{t+1}, A^d, A^c(theta), A^r(A^d, A^c(theta)); omega)
463 |         #determine A^{*,r}_{t+1}=A^r(A^{*,d}_{t+1}, A^c(theta))
464 |         next_machine_actions, next_actions_solar, next_actions_wind, next_actions_generator, next_microgrid_actions_adjustingstatus, next_microgrid_actions_purchased, next_microgrid_actions_discharged = NextAction_OnPolicySimulation(next_machine_states, 
465 |                                                                                                                                                                                                                                          next_buffer_states, 
466 |                                                                                                                                                                                                                                          next_workingstatus, 
467 |                                                                                                                                                                                                                                          next_SOC,
468 |                                                                                                                                                                                                                                          t, 
469 |                                                                                                                                                                                                                                          my_critic,
470 |                                                                                                                                                                                                                                          theta,
471 |                                                                                                                                                                                                                                          probability_randomaction=p_choose_random_action)
472 |         grid=Microgrid(workingstatus=next_workingstatus,
473 |                        SOC=next_SOC,
474 |                        actions_adjustingstatus=next_microgrid_actions_adjustingstatus,
475 |                        actions_solar=next_actions_solar,
476 |                        actions_wind=next_actions_wind,
477 |                        actions_generator=next_actions_generator,
478 |                        actions_purchased=next_microgrid_actions_purchased,
479 |                        actions_discharged=next_microgrid_actions_discharged,
480 |                        solarirradiance=solarirradiance[t//8640],
481 |                        windspeed=windspeed[t//8640]
482 |                        )
483 |         System=ManufacturingSystem(machine_states=next_machine_states, 
484 |                                    machine_control_actions=next_machine_actions, 
485 |                                    buffer_states=next_buffer_states,
486 |                                    grid=grid
487 |                                    )        
488 |         
489 |         #TD-control SARSA#
490 |         av=action_value(System, my_critic)
491 |         num_list_SA=av.num_list_States_Actions()
492 |         Q_new=av.Q(num_list_SA)
493 |         TD=E+gamma*Q_new-Q_old
494 |         TemporalDifference.append(TD)
495 |         #calculate the up-to-date reward#
496 |         reward=reward+np.power(gamma, t)*E
497 |         rewardseq.append(reward)
498 |         #update omega using actor-critique#
499 |         factor=lr_omg*TD
500 |         my_critic.update_weights(factor)
501 |         #update and calculate the norm difference in omega#
502 |         if t==0:
503 |             diff_omega.append(0)
504 |             omega = [var.numpy() for var in my_critic.trainable_variables]
505 |             omega_init = omega
506 |         else:
507 |             omega_old=omega
508 |             omega = [var.numpy() for var in my_critic.trainable_variables]
509 |             omega_new=omega
510 |             diff_omega.append(np.sum([np.linalg.norm(new_var-old_var) for new_var, old_var in zip(omega_new, omega_old)]))
511 |        
512 |         #discount the learning rate#
513 |         lr_tht=lr_tht*1
514 |         lr_omg=lr_omg*0.999
515 |                 
516 |         #plot the theta values#
517 |         plt.scatter(theta[0], theta[1], color='b')
518 |         plt.scatter(theta[2], theta[3], marker='+', color='m')
519 |         plt.scatter(theta[4], theta[5], marker='*', color='r')
520 |         #end of the iteration loop for reinforcement learning training process#
521 |     
522 |     #plot the theta dynamics#
523 |     plt.savefig('theta.png')
524 |     plt.show()  
525 |        
526 |     #plot the Q values#
527 |     plt.figure(figsize = (14,10))
528 |     plt.plot(Q)
529 |     plt.xlabel('iteration')
530 |     plt.ylabel('action-value-function')
531 |     plt.savefig('Q.png')
532 |     plt.show() 
533 |     
534 |     #plot the temporal differences#
535 |     plt.figure(figsize = (14,10))
536 |     plt.plot(TemporalDifference)
537 |     plt.xlabel('iteration')
538 |     plt.ylabel('temporal difference function')
539 |     plt.savefig('TD.png')
540 |     plt.show()   
541 |     
542 |     #plot the reward sequences#
543 |     plt.figure(figsize = (14,10))
544 |     plt.plot([value*10000 for value in rewardseq])
545 |     plt.xlabel('iteration')
546 |     plt.ylabel('Sum of rewards during episode ($)')
547 |     plt.savefig('rewards.png')
548 |     plt.show()       
549 | 
550 |     #plot the weight difference sequences#
551 |     plt.figure(figsize = (14,10))
552 |     plt.plot(diff_omega)
553 |     plt.xlabel('iteration')
554 |     plt.ylabel('L2 norm of the difference in the weights')
555 |     plt.savefig('weightdifference.png')
556 |     plt.show()   
557 |     
558 |     #record the training process onto a file train_output.txt    
559 |     troutput = open('train_output.txt', 'w')
560 |     print("************************ Reinforcement Learning Training at "+str(number_iteration)+" steps ************************", file=troutput)
561 | 
562 |     #print the initial theta and optimal theta after training
563 |     print("\nthe initial theta is given by: ", theta_init, file=troutput)
564 |     print("\nthe optimal theta after training is given by: ", theta, file=troutput)
565 |     #print the L2 difference of the theta between initial and final
566 |     print("\nthe theta moves at a L^2 distance: ", np.sum([np.linalg.norm(new_var-old_var) for new_var, old_var in zip(theta, theta_init)]), file=troutput)
567 | 
568 |     #print the initial omega and optimal omega after training for the critic parameters
569 |     print("\nthe seed for choosing initial weight and bias parameter is given by: ", seed, file=troutput)
570 |     print("\nthe initial omega for the critic is given by: ", omega_init, file=troutput)
571 |     print("\nthe optimal omega for the critic after training is given by: ", omega, file=troutput)
572 |     #print the L2 difference of the omega between initial and final
573 |     print("\nthe omega moves at a L^2 distance: ", np.sum([np.linalg.norm(new_var-old_var) for new_var, old_var in zip(omega, omega_init)]), file=troutput)
574 |         
575 |     #close and save the training ouput file
576 |     troutput.close() 
577 |     
578 |     return theta, omega, my_critic
579 |     
580 | 
581 | 
582 | 
583 | 
584 | """
585 | Reinforcement Learning Algorithm: Off policy TD control combined with actor-critique
586 | Algorithm 1 in the paper
587 | The Testing Process of the Reinforcement Learning Algorithm
588 | Output at a given horizon the system dynamics under the optimal action selected by Reinforcement Learning training
589 | select the best action-value function at each iteration
590 | """
591 | def Reinforcement_Learning_Testing(System_init, #the initial point of the system dynamics
592 |                                    thetainit,   #the initial theta before training
593 |                                    thetaoptimal, #the optimal theta obtained after RL training
594 |                                    omegaoptimal,    #the optimal parameter of critic obtained after training
595 |                                    my_critic_optimal, #the optimal critic obtained after training
596 |                                    number_iteration, #the total number of testing iterations
597 |                                    unit_reward_production #the unit reward of production used to calculate target output
598 |                                    ):
599 |     
600 |     #output the result to a file test_output.txt#
601 |     testoutput = open('test_output.txt', 'w') 
602 | 
603 |     print("************************* Optimal System with optimal policy *************************", file=testoutput)
604 |     print("***Run the system on optimal policy at a time horizon=", number_iteration,"***", file=testoutput)
605 |     print("\n", file=testoutput)
606 |     print("initial proportion of energy supply=", thetainit, file=testoutput)
607 |     print("optimal proportion of energy supply=", thetaoptimal, file=testoutput)
608 |     print("optimal parameter for the neural-network integrated action-value function=", omegaoptimal, file=testoutput)
609 |     print("\n", file=testoutput)
610 |     #run the MDP under optimal theta and optimal omega#
611 |     #at every step search among all discrete actions to find A^d_*=argmin_{A^d}Q(A^d, A^c(thetaoptimal), A^r(A^d, A^c(thetaoptimal)))#
612 |     #Calculate 1. Total cost (E) and throughput in given time horizon that the algorithm is used to guide the bilateral control#
613 |     #Calculate 2. Total energy demand across all time periods of the given time horizon#
614 | 
615 |     #set the initial point of running the system
616 |     System=System_init
617 |     #initialize the list of total cost, total throughput and total energy demand that will be returned
618 |     totalcostlist_optimal=[0]
619 |     totalthroughputlist_optimal=[0]
620 |     totalenergydemandlist_optimal=[0]
621 |     
622 |     #set the total cost, total throughput and the total energy demand#
623 |     totalcost=0
624 |     totalthroughput=0
625 |     totalenergydemand=0 
626 |     RL_target_output=0
627 |     #reinforcement learning testing loop
628 |     for t in range(number_iteration):
629 |         #start of the iteration loop for reinforcement learning testing process#
630 |         #current states and actions S_t and A_t are stored in class System#
631 |         #Print (S_t, A_t)#
632 |         print("*********************Time Step", t, "*********************", file=testoutput)
633 |         System.PrintSystem(testoutput, t)
634 |         #accumulate the total throughput#
635 |         totalthroughput+=System.throughput()
636 |         RL_target_output+=int(System.throughput()/unit_reward_production)
637 |         totalthroughputlist_optimal.append(totalthroughput)
638 |         #calculate the total cost at S_t, A_t: E(S_t, A_t)#
639 |         E=System.average_total_cost(rate_consumption_charge[t//8640])
640 |         #accumulate the total cost#
641 |         totalcost+=E
642 |         totalcostlist_optimal.append(totalcost)
643 |         #accumulate the total energy demand#
644 |         totalenergydemand+=System.energydemand(rate_consumption_charge[t//8640])
645 |         totalenergydemandlist_optimal.append(totalenergydemand)
646 |         #determine the next system and grid states#
647 |         next_machine_states, next_buffer_states=System.transition_manufacturing()
648 |         next_workingstatus, next_SOC=System.grid.transition()
649 |         #determine the next continuous actions A^c(thetaoptimal)=energy distribued to [solar, wind, generator]#
650 |         #determine the next discrete actions by finding A^{*,d}_{t+1}=argmin_{A^d}Q(S_{t+1}, A^d, A^c(thetaoptimal), A^r(A^d, A^c(thetaoptimal)); omegaoptimal)#
651 |         #determine the next remainder actions A^{*,r}_{t+1}=A^r(A^{*,d}_{t+1}, A^c(thetaoptimal))
652 |         optimal_next_machine_actions, optimal_next_actions_solar, optimal_next_actions_wind, optimal_next_actions_generator, optimal_next_microgrid_actions_adjustingstatus, optimal_next_microgrid_actions_purchased, optimal_next_microgrid_actions_discharged = NextAction_OnPolicySimulation(next_machine_states, 
653 |                                                                                                                                                                                                                                                                                                  next_buffer_states, 
654 |                                                                                                                                                                                                                                                                                                  next_workingstatus, 
655 |                                                                                                                                                                                                                                                                                                  next_SOC,
656 |                                                                                                                                                                                                                                                                                                  t, 
657 |                                                                                                                                                                                                                                                                                                  my_critic_optimal,
658 |                                                                                                                                                                                                                                                                                                  thetaoptimal,
659 |                                                                                                                                                                                                                                                                                                  probability_randomaction=0)
660 |         #update the manufacturing system and the grid according to S_{t+1}, A^{*,d}_{t+1}, A^c(thetaoptimal), A^{*,r}_{t+1}#
661 |         grid=Microgrid(workingstatus=next_workingstatus,
662 |                        SOC=next_SOC,
663 |                        actions_adjustingstatus=optimal_next_microgrid_actions_adjustingstatus,
664 |                        actions_solar=optimal_next_actions_solar,
665 |                        actions_wind=optimal_next_actions_wind,
666 |                        actions_generator=optimal_next_actions_generator,
667 |                        actions_purchased=optimal_next_microgrid_actions_purchased,
668 |                        actions_discharged=optimal_next_microgrid_actions_discharged,
669 |                        solarirradiance=solarirradiance[t//8640],
670 |                        windspeed=windspeed[t//8640]
671 |                        )
672 |         System=ManufacturingSystem(machine_states=next_machine_states, 
673 |                                    machine_control_actions=optimal_next_machine_actions, 
674 |                                    buffer_states=next_buffer_states,
675 |                                    grid=grid
676 |                                    )        
677 |         #end of the iteration loop for reinforcement learning training process#
678 | 
679 |     print("\n****************** SUMMARY *******************", file=testoutput)
680 |     print("\ntotal cost list (10^4$) =", totalcostlist_optimal, file=testoutput)
681 |     print("\ntotal throughput list (10^4$) =", totalthroughputlist_optimal, file=testoutput)
682 |     print("\ntotal energy demand list (10^4$) =", totalenergydemandlist_optimal, file=testoutput)
683 | 
684 |     print("\ntotal cost ($) =", totalcost*10000, file=testoutput)    
685 |     print("\ntotal throughput ($) =", totalthroughput*10000, file=testoutput)    
686 |     print("\ntotal energy demand ($) =", totalenergydemand*10000, file=testoutput)
687 |     print("\ntarget output (unit) =", RL_target_output, file=testoutput)
688 |     
689 |     #close and save the test output file
690 |     testoutput.close()
691 | 
692 |     return totalcostlist_optimal, totalthroughputlist_optimal, totalenergydemandlist_optimal, RL_target_output
693 | 
694 | 
695 | 
696 | 
697 | 
698 | """
699 | Benchmark Testing Process of the Manufacturing System Dynamics under randomly selected actions
700 | Output at a given horizon the system dynamics under randomly selected actions
701 | """
702 | def Benchmark_RandomAction_Testing(System_init, #the inital point of running the system dynamics
703 |                                    thetainit, #the initial theta used before RL training
704 |                                    number_iteration, #the number of training iterations
705 |                                    unit_reward_production #the unit reward of production used to calculate target output
706 |                                    ):
707 | 
708 |     #output the result to a file benchmark_output.txt
709 |     bmoutput = open('benchmark_output.txt', 'w')
710 | 
711 |     #As benchmark, with initial theta and randomly simulated actions, run the system at a certain time horizon#
712 |     print("************************* BenchMark System with initial theta and random actions *************************", file=bmoutput)
713 |     print("***Run the system on random policy at a time horizon=", number_iteration,"***", file=bmoutput)
714 |     print("\n", file=bmoutput)
715 |     print("initial proportion of energy supply=", thetainit, file=bmoutput)
716 |     print("\n", file=bmoutput)
717 | 
718 |     #set the initial point of running the system
719 |     System=System_init
720 | 
721 |     #compare the optimal control and random control (benchmark)#
722 |     totalcostlist_benchmark=[0]
723 |     totalthroughputlist_benchmark=[0]
724 |     totalenergydemandlist_benchmark=[0]
725 | 
726 |     #set the total cost, total throughput and the total energy demand#
727 |     totalcost=0
728 |     totalthroughput=0
729 |     totalenergydemand=0 
730 |     random_target_output=0
731 |     #benchmark system iteration loop
732 |     for t in range(number_iteration):
733 |         #start of the iteration loop for a benchmark system with initial theta and random actions#
734 |         #current states and actions S_t and A_t are stored in class System#
735 |         #Print (S_t, A_t)#
736 |         print("*********************Time Step", t, "*********************", file=bmoutput)
737 |         System.PrintSystem(bmoutput, t)
738 |         #accumulate the total throughput#
739 |         totalthroughput+=System.throughput()
740 |         random_target_output+=int(System.throughput()/unit_reward_production)
741 |         totalthroughputlist_benchmark.append(totalthroughput)
742 |         #calculate the total cost at S_t, A_t: E(S_t, A_t)#
743 |         E=System.average_total_cost(rate_consumption_charge[t//8640])
744 |         #accumulate the total cost#
745 |         totalcost+=E
746 |         totalcostlist_benchmark.append(totalcost)
747 |         #accumulate the total energy demand#
748 |         totalenergydemand+=System.energydemand(rate_consumption_charge[t//8640])
749 |         totalenergydemandlist_benchmark.append(totalenergydemand)
750 |         #determine the next system and grid states#
751 |         next_machine_states, next_buffer_states=System.transition_manufacturing()
752 |         next_workingstatus, next_SOC=System.grid.transition()
753 |         #update the manufacturing system and the grid according to S_{t+1}, A^{d}_{t+1}, A^c(thetainit), A^{r}_{t+1}#
754 |         my_critic = critic()
755 |         next_machine_actions, next_actions_solar, next_actions_wind, next_actions_generator, next_microgrid_actions_adjustingstatus, next_microgrid_actions_purchased, next_microgrid_actions_discharged = NextAction_OnPolicySimulation(next_machine_states, 
756 |                                                                                                                                                                                                                                          next_buffer_states, 
757 |                                                                                                                                                                                                                                          next_workingstatus, 
758 |                                                                                                                                                                                                                                          next_SOC,
759 |                                                                                                                                                                                                                                          t, 
760 |                                                                                                                                                                                                                                          my_critic,
761 |                                                                                                                                                                                                                                          theta=thetainit,
762 |                                                                                                                                                                                                                                          probability_randomaction=1)
763 |         
764 |         grid=Microgrid(workingstatus=next_workingstatus,
765 |                        SOC=next_SOC,
766 |                        actions_adjustingstatus=next_microgrid_actions_adjustingstatus,
767 |                        actions_solar=next_actions_solar,
768 |                        actions_wind=next_actions_wind,
769 |                        actions_generator=next_actions_generator,
770 |                        actions_purchased=next_microgrid_actions_purchased,
771 |                        actions_discharged=next_microgrid_actions_discharged,
772 |                        solarirradiance=solarirradiance[t//8640],
773 |                        windspeed=windspeed[t//8640]
774 |                        )
775 |         System=ManufacturingSystem(machine_states=next_machine_states, 
776 |                                    machine_control_actions=next_machine_actions, 
777 |                                    buffer_states=next_buffer_states,
778 |                                    grid=grid
779 |                                    )        
780 |         #end of the iteration loop for for a benchmark system with initial theta and random actions#
781 | 
782 |     print("\n****************** SUMMARY *******************", file=bmoutput)    
783 |     print("\ntotal cost list (10^4$) =", totalcostlist_benchmark, file=bmoutput)
784 |     print("\ntotal throughput list (10^4$) =", totalthroughputlist_benchmark, file=bmoutput)
785 |     print("\ntotal energy demand list (10^4$) =", totalenergydemandlist_benchmark, file=bmoutput)
786 |     
787 |     print("\ntotal cost ($) =", totalcost*10000, file=bmoutput)    
788 |     print("\ntotal throughput ($) =", totalthroughput*10000, file=bmoutput)   
789 |     print("\ntotal energy demand ($) =", totalenergydemand*10000, file=bmoutput)
790 |     print("\ntarget output (unit) =", random_target_output, file=bmoutput)
791 |     
792 |     #close and save the benchmark output file
793 |     bmoutput.close()
794 |     
795 |     return totalcostlist_benchmark, totalthroughputlist_benchmark, totalenergydemandlist_benchmark, random_target_output
796 | 
797 | 
798 | 
799 | 
800 | """
801 | ################################ MAIN TESTING FILE #####################################
802 | ################################ FOR DEBUGGING ONLY #####################################
803 | 
804 | Testing the Reinforcement Learning Algorithm: On-policy TD control combined with actor-critique
805 | Algorithm 1 in the paper
806 | 
807 | Compare its behavior with randomly selected actions
808 | 
809 | When optimal policy is found, must add
810 | 1. Total cost and throughput in given time horizon that the 
811 |    algorithm is used to guide the bilateral control.
812 | 2. Total energy demand across all time periods of the given 
813 |    time horizon and the proportion of the energy supply to satisfy the demand. 
814 | """
815 | 
816 | if __name__ == "__main__":
817 |     #the initial learning rates for the theta and omega iterations#
818 |     lr_theta_initial=0.003
819 |     lr_omega_initial=0.0003
820 | 
821 |     #number of training and testing iterations#
822 |     training_number_iteration=5
823 |     testing_number_iteration=100
824 | 
825 | 
826 |     #set the initial machine states, machine control actions and buffer states
827 |     initial_machine_states=["Opr" for _ in range(number_machines)]
828 |     initial_machine_actions=["K" for _ in range(number_machines)]
829 |     initial_buffer_states=[2 for _ in range(number_machines-1)]
830 |     
831 |     #initialize the system
832 |     System=SystemInitialize(initial_machine_states, initial_machine_actions, initial_buffer_states)
833 |    
834 |     #randomly generate an initial theta and plot the bounday of the simplex where theta moves#
835 |     r=np.random.uniform(0,1,size=6)
836 |     
837 |     #initialize the theta variable#
838 |     theta=[r[0]*r[1], r[0]*(1-r[1]), r[2]*r[3], r[2]*(1-r[3]), r[4]*r[5], r[4]*(1-r[5])] 
839 |     #record the initial theta applied before training
840 |     thetainit=theta
841 |     
842 |     x = [[0, 0], [0, 1], [1, 0]] 
843 |     y = [[0, 1], [1, 0], [0, 0]]
844 |     plt.figure(figsize = (14,10))
845 |     for i in range(len(x)): 
846 |         plt.plot(x[i], y[i], color='g')
847 |     
848 |     
849 |     theta, omega, my_critic = Reinforcement_Learning_Training(System, 
850 |                                                               thetainit, 
851 |                                                               lr_theta_initial, 
852 |                                                               lr_omega_initial, 
853 |                                                               training_number_iteration
854 |                                                               )
855 |     
856 |     
857 |     #with the optimal theta and optimal omega at hand, run the system at a certain time horizon#
858 |     #output the optimal theta and optimal omega#
859 |     thetaoptimal=theta
860 |     omegaoptimal=omega  
861 |     my_critic_optimal=my_critic
862 | 
863 |     #initialize the system
864 |     System=SystemInitialize(initial_machine_states, initial_machine_actions, initial_buffer_states)
865 | 
866 |     totalcostlist_optimal, totalthroughputlist_optimal, totalenergydemandlist_optimal, RL_target_output = Reinforcement_Learning_Testing(System, 
867 |                                                                                                                                          thetainit, 
868 |                                                                                                                                          thetaoptimal, 
869 |                                                                                                                                          omegaoptimal, 
870 |                                                                                                                                          my_critic_optimal, 
871 |                                                                                                                                          testing_number_iteration, 
872 |                                                                                                                                          unit_reward_production)
873 |     
874 |     #As benchmark, with initial theta and randomly simulated actions, run the system at a certain time horizon#
875 |     
876 |     #initialize the system
877 |     System=SystemInitialize(initial_machine_states, initial_machine_actions, initial_buffer_states)
878 | 
879 |     totalcostlist_benchmark, totalthroughputlist_benchmark, totalenergydemandlist_benchmark, random_target_output = Benchmark_RandomAction_Testing(System, 
880 |                                                                                                                                                    thetainit, 
881 |                                                                                                                                                    testing_number_iteration, 
882 |                                                                                                                                                    unit_reward_production 
883 |                                                                                                                                                    )
884 | 
885 |     #plot and compare the total cost, the total throughput and the total energy demand for optimal control and random control (benchmark)#
886 |     #plot the total cost#
887 |     plt.figure(figsize = (14,10))
888 |     plt.plot([value*10000 for value in totalcostlist_optimal], '-', color='r')
889 |     plt.plot([value*10000 for value in totalcostlist_benchmark], '--', color='b')
890 |     plt.xlabel('iteration')
891 |     plt.ylabel('total cost ($)')
892 |     plt.title('Total cost under optimal policy (red, solid) and benchmark random policy (blue, dashed)')
893 |     plt.savefig('totalcost.png')
894 |     plt.show()  
895 | 
896 |     #plot the total throughput, in dollar amount#
897 |     plt.figure(figsize = (14,10))
898 |     plt.plot([value*10000 for value in totalthroughputlist_optimal], '-', color='r')
899 |     plt.plot([value*10000 for value in totalthroughputlist_benchmark], '--', color='b')
900 |     plt.xlabel('iteration')
901 |     plt.ylabel('total throughput ($)')
902 |     plt.title('Total throughput under optimal policy (red, solid) and benchmark random policy (blue, dashed)')
903 |     plt.savefig('totalthroughput.png')
904 |     plt.show()  
905 |     
906 |     #plot the total throughput, in production units#
907 |     plt.figure(figsize = (14,10))
908 |     plt.plot([value/unit_reward_production for value in totalthroughputlist_optimal], '-', color='r')
909 |     plt.plot([value/unit_reward_production for value in totalthroughputlist_benchmark], '--', color='b')
910 |     plt.xlabel('iteration')
911 |     plt.ylabel('total throughput (production unit)')
912 |     plt.title('Total throughput (production unit) under optimal policy (red, solid) and benchmark random policy (blue, dashed)')
913 |     plt.savefig('totalthroughput_unit.png')
914 |     plt.show()  
915 | 
916 |     #plot the total energy demand#
917 |     plt.figure(figsize = (14,10))
918 |     plt.plot([value*10000 for value in totalenergydemandlist_optimal], '-', color='r')
919 |     plt.plot([value*10000 for value in totalenergydemandlist_benchmark], '--', color='b')
920 |     plt.xlabel('iteration')
921 |     plt.ylabel('total energy cost ($)')
922 |     plt.title('Total energy cost under optimal policy (red, solid) and benchmark random policy (blue, dashed)')
923 |     plt.savefig('totalenergycost.png')
924 |     plt.show()  


--------------------------------------------------------------------------------
/microgrid_manufacturing_system.py:
--------------------------------------------------------------------------------
   1 | # -*- coding: utf-8 -*-
   2 | """
   3 | Created on Fri Jan 3 14:33:36 2020
   4 | @author: Wenqing Hu (Missouri S&T)
   5 | Title: MDP for joint control of microgrid and manufactoring system
   6 | """
   7 | 
   8 | import numpy as np
   9 | from random import choice
  10 | from projectionSimplex import projection
  11 | 
  12 | """
  13 | Set up all parameters that are constant throughout the system
  14 | units of measurement: hour, km, MegaWatt(10^6Watt), 10^4 us dollar ($)
  15 | """
  16 | Delta_t=1
  17 | #the actual time measured in one decision epoch unit, in hours#
  18 | cutin_windspeed=3*3.6
  19 | #the cut-in windspeed (km/h=1/3.6 m/s), v^ci#
  20 | cutoff_windspeed=11*3.6
  21 | #the cut-off windspeed (km/h=1/3.6 m/s), v^co#
  22 | rated_windspeed=7*3.6
  23 | #the rated windspeed (km/h=1/3.6 m/s), v^r#
  24 | charging_discharging_efficiency=0.95
  25 | #the charging-discharging efficiency, eta#
  26 | rate_battery_discharge=2/1000
  27 | #the rate for discharging the battery (MegaWatt), b#
  28 | unit_operational_cost_solar=0.17/10
  29 | #the unit operational and maintanance cost for generating power from solar PV (10^4$/MegaWattHour=10 $/kWHour), r_omc^s#
  30 | unit_operational_cost_wind=0.08/10
  31 | #the unit operational and maintanance cost for generating power from wind turbine (10^4$/MegaWattHour=10 $/kWHour), r_omc^w#
  32 | unit_operational_cost_generator=0.45/10
  33 | #the unit opeartional and maintanance cost for generating power from generator (10^4$/MegaWattHour=10 $/kWHour), r_omc^g#
  34 | unit_operational_cost_battery=0.9/10
  35 | #the unit operational and maintanance cost for battery storage system per unit charging/discharging cycle (10^4$/MegaWattHour=10 $/kWHour), r_omc^b#
  36 | capacity_battery_storage=350/1000
  37 | #the capacity of battery storage system (MegaWatt Hour=1000 kWHour), e#
  38 | SOC_max=0.95*capacity_battery_storage
  39 | #the maximum state of charge of battery system#
  40 | SOC_min=0.05*capacity_battery_storage
  41 | #the minimum state of charge of battery system#
  42 | area_solarPV=1400/(1000*1000)
  43 | #the area of the solar PV system (km^2=1000*1000 m^2), a#
  44 | efficiency_solarPV=0.2
  45 | #the efficiency of the solar PV system, delta#
  46 | density_of_air=1.225
  47 | #calculate the rated power of the wind turbine, density of air (10^6kg/km^3=1 kg/m^3), rho#
  48 | radius_wind_turbine_blade=25/1000
  49 | #calculate the rated power of the wind turbine, radius of the wind turbine blade (km=1000 m), r#
  50 | average_wind_speed=3.952*3.6
  51 | #calculate the rated power of the wind turbine, average wind speed (km/h=1/3.6 m/s), v_avg (from the windspeed table)#
  52 | power_coefficient=0.593
  53 | #calculate the rated power of the wind turbine, power coefficient, theta#
  54 | gearbox_transmission_efficiency=0.9
  55 | #calculate the rated power of the wind turbine, gearbox transmission efficiency, eta_t#
  56 | electrical_generator_efficiency=0.9
  57 | #calculate the rated power of the wind turbine, electrical generator efficiency, eta_g#
  58 | rated_power_wind_turbine_original=0.5*density_of_air*np.pi*radius_wind_turbine_blade*radius_wind_turbine_blade*average_wind_speed*average_wind_speed*average_wind_speed*power_coefficient*gearbox_transmission_efficiency*electrical_generator_efficiency
  59 | rated_power_wind_turbine=rated_power_wind_turbine_original/(3.6*3.6*3.6)
  60 | #the rated power of the wind turbine, RP_w (MegaWatt=10^6 W), 
  61 | #with the radius_wind_turbine_blade measured in km=10^3m, average wind speed measured in km/hour=3.6m/s, RP_w will be calculated as RP_w_numerical
  62 | #then RP_w in MegaWatt=(1 kg/m^3)*(10^3 m)*(10^3 m)*(3.6 m/s)*(3.6 m/s)*(3.6 m/s)*RP_w_numerical=3.6^3*10^6 RP_w_numerical W=3.6^3 RP_w_numerical MegaWatt#
  63 | number_windturbine=1
  64 | #the number of wind turbine in the onsite generation system, N_w#
  65 | number_generators=1
  66 | #the number of generators, n_g#
  67 | rated_output_power_generator=65/1000
  68 | #the rated output power of the generator (MegaWatt=1000kW), G_p#
  69 | unit_reward_production=10000/10000
  70 | #the unit reward for each unit of production (10^4$/unit produced), i.e. the r^p, this applies to the end of the machine sequence#
  71 | unit_reward_soldbackenergy=0.2/10
  72 | #the unit reward from sold back energy (10^4$/MegaWattHour=10 $/kWHour), r^sb#
  73 | number_machines=5
  74 | #the total number of machines in the manufacturing system, total number of buffers=number_machines-1#
  75 | machine_lifetime_scale_parameter=[111.39/60, 51.1/60, 110.9/60, 239.1/60, 112.1/60]
  76 | #the set of machine lifetime scale parameters (hour), size=number_machines#
  77 | machine_lifetime_shape_parameter=[1.5766, 1.6532, 1.7174, 1.421, 1.591]
  78 | #the set of machine lifetime shape parameters, size=number_machines#
  79 | machine_repairtime_mean=[4.95/60, 11.7/60, 15.97/60, 27.28/60, 18.37/60]
  80 | #the set of machine repairtime mean parameters (hour), size=number_machines#
  81 | machine_power_consumption_Opr=[115.5/1000, 115.5/1000, 115.5/1000, 170.5/1000, 132/1000]
  82 | #the set of amount of power drawn (MegaWatt) by the machine if the machine state is Opr (Operating), size=number_machines#
  83 | machine_power_consumption_Idl=[105/1000, 105/1000, 105/1000, 155/1000, 120/1000]
  84 | #the set of amount of power drawn (MegaWatt) by the machine if the machine state is Sta (Starvation) or Blo (Blockage), both are Idl (Idle) states, size=number_machines#
  85 | list_buffer_max=[1000, 1000, 1000, 1000]
  86 | list_buffer_min=[0, 0, 0, 0]
  87 | #the maximum and minumum of buffers, size=number_machine-1#
  88 | 
  89 | 
  90 | 
  91 | import pandas as pd
  92 | #read the solar irradiance and wind speed data from file#
  93 | #read the rate of consumption charge date from file#
  94 | file_SolarIrradiance = "SolarIrradiance.csv"
  95 | file_WindSpeed = "WindSpeed.csv"
  96 | file_rateConsumptionCharge = "rate_consumption_charge.csv"
  97 | #read the solar irradiace
  98 | data_solar = pd.read_csv(file_SolarIrradiance)
  99 | solarirradiance = np.array(data_solar.iloc[:,3])
 100 | #solar irradiance measured by MegaWatt/km^2
 101 | #read the windspeed 
 102 | data_wind = pd.read_csv(file_WindSpeed)
 103 | windspeed = 3.6*np.array(data_wind.iloc[:,3])
 104 | #windspeed measured by km/h=1/3.6 m/s
 105 | #read the rate of consumption charge
 106 | data_rate_consumption_charge = pd.read_csv(file_rateConsumptionCharge)
 107 | rate_consumption_charge = np.array(data_rate_consumption_charge.iloc[:,4])/10
 108 | #rate of consumption charge measured by 10^4$/MegaWatt=10 $/kWh
 109 | 
 110 | """
 111 | Define 3 major classes in the system: Machine, Buffer, Microgrid
 112 | """
 113 | """
 114 | the Machine class defines the variables and functions of one machine
 115 | """
 116 | class Machine(object):
 117 |     def __init__(self,
 118 |                  name=1,
 119 |                  #the label of this machine#
 120 |                  lifetime_shape_parameter=0, 
 121 |                  #random lifetime of machine follows Weibull distribution with shape parameter lifetime_shape_parameter
 122 |                  lifetime_scale_parameter=0,
 123 |                  #random lifetime of machine follows Weibull distribution with scale parameter lifetime_scale_parameter
 124 |                  repairtime_mean=0,
 125 |                  #random repair time of machine follows exponential distribution with mean repairtime_mean
 126 |                  power_consumption_Opr=0,
 127 |                  #amount of power drawn by the machine if the machine state is Opr (Operating)
 128 |                  power_consumption_Idl=0,
 129 |                  #amount of power drawn by the machine if the machine state is Sta (Starvation) or Blo (Blockage), both are Idl (Idle) states
 130 |                  state="OFF",
 131 |                  #machine state can be "Opr" (Operating), "Blo" (Blockage), "Sta" (Starvation), "Off", "Brk" (Break)
 132 |                  control_action="K",
 133 |                  #control actions of machine, actions can be "K"-action (keep the original operational), "H"-action (to turn off the machine) or "W"-action (to turn on the machine)#
 134 |                  is_last_machine=False
 135 |                  #check whether or not the machine is the last machine in the queue, if it is last machine, then it contributes to the throughput#
 136 |                  ):
 137 |         self.name=name
 138 |         self.lifetime_shape_parameter=lifetime_shape_parameter
 139 |         self.lifetime_scale_parameter=lifetime_scale_parameter
 140 |         self.repairtime_mean=repairtime_mean
 141 |         self.power_consumption_Opr=power_consumption_Opr
 142 |         self.power_consumption_Idl=power_consumption_Idl
 143 |         self.unit_reward_production=unit_reward_production
 144 |         self.state=state
 145 |         self.control_action=control_action
 146 |         self.is_last_machine=is_last_machine
 147 |     
 148 |     def EnergyConsumption(self):
 149 |         #Calculate the energy consumption of one machine in a time unit#
 150 |         PC=0 
 151 |         #PC is the amount drawn by a machine in a time unit#
 152 |         if self.state=="Brk" or self.state=="Off":
 153 |             PC=0
 154 |         elif self.state=="Opr":
 155 |             PC=self.power_consumption_Opr*Delta_t
 156 |         elif self.state=="Sta" or self.state=="Blo":
 157 |             PC=self.power_consumption_Idl*Delta_t
 158 |         return PC
 159 | 
 160 |     def LastMachineProduction(self):
 161 |         #only the last machine will produce that contributes to the throughput, when the state is Opr and the control action is K#
 162 |         if self.is_last_machine:
 163 |             if self.state!="Opr" or self.control_action=="H":
 164 |                 throughput=0
 165 |             elif self.state=="Opr" and self.control_action=="K":
 166 |                 throughput=1
 167 |             else:
 168 |                 throughput=0
 169 |         else:
 170 |             throughput=0
 171 |         return throughput
 172 |     
 173 |     def NextState_IsOff(self):
 174 |         #Based on the current state of the machine, determine if the state of the machine at next decision epoch is "Off"#
 175 |         #If is "Off" return True otherwise return False#
 176 |         #When return False, the next state lies in the set {"Brk", "Opr", "Sta", "Blo"}#
 177 |         if self.state=="Off":
 178 |             if self.control_action!="W":
 179 |                 IsOff=True
 180 |             else:
 181 |                 IsOff=False
 182 |         else:
 183 |             if self.control_action=="H":
 184 |                 IsOff=True
 185 |             else:
 186 |                 IsOff=False
 187 |         return IsOff
 188 |             
 189 |     def NextState_IsBrk(self):
 190 |         #Based on the current state of the machine, determine if the state of the machine at next decision epoch is "Brk"#
 191 |         #If is "Brk" return True otherwise return False#
 192 |         #When return False, the next state lies in the set {"Opr", "Sta", "Blo", "Off"}#
 193 |         L=self.lifetime_scale_parameter*np.random.weibull(self.lifetime_shape_parameter, 1)
 194 |         #the random variable L is the lifetime#
 195 |         D=np.random.exponential(self.repairtime_mean)
 196 |         #the random variable D is the repair time# 
 197 |         if self.state=="Brk":
 198 |             if D>=Delta_t:
 199 |                 IsBrk=True
 200 |             else:
 201 |                 IsBrk=False
 202 |         else:
 203 |             if self.state!="Off":
 204 |                 if L<Delta_t:
 205 |                     IsBrk=True
 206 |                 else:
 207 |                     IsBrk=False
 208 |             else:
 209 |                 IsBrk=False
 210 |         return IsBrk
 211 |     
 212 |     def PrintMachine(self, file):
 213 |         #print the status of the current machine: state, control_action taken, Energy Consumption, throughput, decide whether the next machine state is Brk#
 214 |         print("Machine", self.name, "=", self.state, ",", "action=", self.control_action, file=file)
 215 |         print(" Energy Consumption=", self.EnergyConsumption(), file=file)
 216 |         if self.is_last_machine:
 217 |             print(" ", file=file)
 218 |             print(" throughput=", self.LastMachineProduction(), file=file)
 219 |             print("\n", file=file)
 220 |         return None
 221 |         
 222 |         
 223 |         
 224 | """
 225 | the Buffer class defines variables and functions of one buffer
 226 | """
 227 | class Buffer(object):
 228 |     def __init__(self, 
 229 |                  name=1,
 230 |                  #the label of this buffer#
 231 |                  state=0,
 232 |                  #the buffer state is an integer from buffer_min (=0) to buffer_max 
 233 |                  buffer_max=0, 
 234 |                  #the maximal capacity of the buffer#
 235 |                  buffer_min=0,
 236 |                  #the minimal capacity of the buffer is zero#
 237 |                  previous_machine_state="Opr",
 238 |                  #the state of the machine that is previous to the current buffer#
 239 |                  next_machine_state="Off",
 240 |                  #the state of the machine that is next to the current buffer#
 241 |                  previous_machine_control_action="K",
 242 |                  #the control action applied to the machine that is previous to the current buffer#
 243 |                  next_machine_control_action="K"
 244 |                  #the control action applied to the machine that is next to the current buffer#
 245 |                  ):
 246 |         self.name=name
 247 |         self.state=state
 248 |         self.buffer_max=buffer_max
 249 |         self.buffer_min=buffer_min
 250 |         self.previous_machine_state=previous_machine_state
 251 |         self.next_machine_state=next_machine_state
 252 |         self.previous_machine_control_action=previous_machine_control_action
 253 |         self.next_machine_control_action=next_machine_control_action
 254 |         
 255 |     def NextState(self):
 256 |         #calculate the state of the buffer at next decision epoch, return this state#
 257 |         nextstate=self.state
 258 |         if self.previous_machine_state!="Opr" or self.previous_machine_control_action=="H":
 259 |             I_previous=0
 260 |         elif self.previous_machine_state=="Opr" and self.previous_machine_control_action=="K":
 261 |             I_previous=1
 262 |         else:
 263 |             I_previous=0
 264 |         if self.next_machine_state!="Opr" or self.next_machine_control_action=="H":
 265 |             I_next=0
 266 |         elif self.next_machine_state=="Opr" and self.next_machine_control_action=="K":
 267 |             I_next=1
 268 |         else:
 269 |             I_next=0
 270 |         nextstate=nextstate+I_previous-I_next
 271 |         if nextstate>self.buffer_max:
 272 |             nextstate=self.buffer_max
 273 |         if nextstate<self.buffer_min:
 274 |             nextstate=self.buffer_min
 275 |         return nextstate
 276 | 
 277 |     def PrintBuffer(self, file):
 278 |         #print the status of the current buffer: buffer state, next buffer state#
 279 |         print("Buffer", self.name, "=", self.state, file=file)
 280 |         print("\n", file=file)
 281 |         return None
 282 | 
 283 | 
 284 |         
 285 | """
 286 | the Microgrid class defines variables and functions of the microgrid
 287 | """
 288 | class Microgrid(object):
 289 |     def __init__(self,
 290 |                  workingstatus=[0,0,0],
 291 |                  #the working status of [solar PV, wind turbine, generator]#
 292 |                  SOC=0,
 293 |                  #the state of charge of the battery system#
 294 |                  actions_adjustingstatus=[0,0,0],
 295 |                  #the actions of adjusting the working status (connected =1 or not =0 to the load) of the [solar, wind, generator]#
 296 |                  actions_solar=[0,0,0],
 297 |                  #the solar energy used for supporting [manufaturing, charging battery, sold back]#
 298 |                  actions_wind=[0,0,0],
 299 |                  #the wind energy used for supporting [manufacturing, charging battery, sold back]#
 300 |                  actions_generator=[0,0,0],
 301 |                  #the use of the energy generated by the generator for supporting [manufacturing, charging battery, sold back]#
 302 |                  actions_purchased=[0,0],
 303 |                  #the use of the energy purchased from the grid for supporting [manufacturing, charging battery]#
 304 |                  actions_discharged=0,
 305 |                  #the energy discharged by the battery for supporting manufacturing#
 306 |                  solarirradiance=0,
 307 |                  #the environment feature: solar irradiance at current decision epoch#
 308 |                  windspeed=0
 309 |                  #the environment feature: wind speed at current decision epoch#
 310 |                  ):
 311 |         self.workingstatus=workingstatus
 312 |         self.SOC=SOC
 313 |         self.actions_adjustingstatus=actions_adjustingstatus
 314 |         self.actions_solar=actions_solar
 315 |         self.actions_wind=actions_wind
 316 |         self.actions_generator=actions_generator
 317 |         self.actions_purchased=actions_purchased
 318 |         self.actions_discharged=actions_discharged
 319 |         self.solarirradiance=solarirradiance
 320 |         self.windspeed=windspeed
 321 |         
 322 |     def transition(self):
 323 |         workingstatus=self.workingstatus
 324 |         SOC=self.SOC
 325 |         if self.actions_adjustingstatus[1-1]==1:
 326 |             workingstatus[1-1]=1
 327 |         else:
 328 |             workingstatus[1-1]=0
 329 |         #determining the next decision epoch working status of solar PV, 1=working, 0=not working#
 330 |         if self.actions_adjustingstatus[2-1]==0 or self.windspeed>cutoff_windspeed or self.windspeed<cutin_windspeed:
 331 |             workingstatus[2-1]=0
 332 |         else: 
 333 |             if self.actions_adjustingstatus[2-1]==1 and self.windspeed<=cutoff_windspeed and self.windspeed>=cutin_windspeed:
 334 |                 workingstatus[2-1]=1
 335 |         #determining the next decision epoch working status of wind turbine, 1=working, 0=not working#        
 336 |         if self.actions_adjustingstatus[3-1]==1:
 337 |             workingstatus[3-1]=1
 338 |         else:
 339 |             workingstatus[3-1]=0
 340 |         #determining the next decision epoch working status of generator, 1=working, 0=not working#
 341 |         SOC=self.SOC+(self.actions_solar[2-1]+self.actions_wind[2-1]+self.actions_generator[2-1]+self.actions_purchased[2-1])*charging_discharging_efficiency-self.actions_discharged/charging_discharging_efficiency
 342 |         if SOC>SOC_max:
 343 |             SOC=SOC_max
 344 |         if SOC<SOC_min:
 345 |             SOC=SOC_min
 346 |         #determining the next desicion epoch SOC, state of charge of the battery system#
 347 |         return workingstatus, SOC
 348 |     
 349 |     def EnergyConsumption(self):
 350 |         #returns the energy consumption from the grid#
 351 |         return -(self.actions_solar[1-1]+self.actions_wind[1-1]+self.actions_generator[1-1]+self.actions_discharged)
 352 | 
 353 |     def energy_generated_solar(self):
 354 |         #calculate the energy generated by the solar PV, e_t^s#
 355 |         if self.workingstatus[1-1]==1:
 356 |             energy_generated_solar=self.solarirradiance*area_solarPV*efficiency_solarPV/1000
 357 |         else:
 358 |             energy_generated_solar=0
 359 |         return energy_generated_solar
 360 |     
 361 |     def energy_generated_wind(self):
 362 |         #calculate the energy generated by the wind turbine, e_t^w#
 363 |         if self.workingstatus[2-1]==1 and self.windspeed<rated_windspeed and self.windspeed>=cutin_windspeed:
 364 |             energy_generated_wind=number_windturbine*rated_power_wind_turbine*(self.windspeed-cutin_windspeed)/(rated_windspeed-cutin_windspeed)
 365 |         else:
 366 |             if self.workingstatus[2-1]==1 and self.windspeed<cutoff_windspeed and self.windspeed>=rated_windspeed:
 367 |                 energy_generated_wind=number_windturbine*rated_power_wind_turbine*Delta_t
 368 |             else:
 369 |                 energy_generated_wind=0
 370 |         return energy_generated_wind
 371 |     
 372 |     def energy_generated_generator(self):
 373 |         #calculate the energy generated bv the generator, e_t^g#
 374 |         if self.workingstatus[3-1]==1:
 375 |             energy_generated_generator=number_generators*rated_output_power_generator*Delta_t
 376 |         else:
 377 |             energy_generated_generator=0
 378 |         return energy_generated_generator
 379 |         
 380 |     def OperationalCost(self):
 381 |         #returns the operational cost for the onsite generation system#
 382 |         if self.workingstatus[1-1]==1:
 383 |             energy_generated_solar=self.solarirradiance*area_solarPV*efficiency_solarPV/1000
 384 |         else:
 385 |             energy_generated_solar=0
 386 |         #calculate the energy generated by the solar PV, e_t^s#
 387 |         if self.workingstatus[2-1]==1 and self.windspeed<rated_windspeed and self.windspeed>=cutin_windspeed:
 388 |             energy_generated_wind=number_windturbine*rated_power_wind_turbine*(self.windspeed-cutin_windspeed)/(rated_windspeed-cutin_windspeed)
 389 |         else:
 390 |             if self.workingstatus[2-1]==1 and self.windspeed<cutoff_windspeed and self.windspeed>=rated_windspeed:
 391 |                 energy_generated_wind=number_windturbine*rated_power_wind_turbine*Delta_t
 392 |             else:
 393 |                 energy_generated_wind=0
 394 |         #calculate the energy generated by the wind turbine, e_t^w#
 395 |         if self.workingstatus[3-1]==1:
 396 |             energy_generated_generator=number_generators*rated_output_power_generator*Delta_t
 397 |         else:
 398 |             energy_generated_generator=0
 399 |         #calculate the energy generated bv the generator, e_t^g#
 400 |         operational_cost=energy_generated_solar*unit_operational_cost_solar+energy_generated_wind*unit_operational_cost_wind+energy_generated_generator*unit_operational_cost_generator
 401 |         operational_cost+=(self.actions_discharged+self.actions_solar[2-1]+self.actions_wind[2-1]+self.actions_generator[2-1])*Delta_t*unit_operational_cost_battery/(2*capacity_battery_storage*(SOC_max-SOC_min))
 402 |         #calculate the operational cost for the onsite generation system#
 403 |         return operational_cost
 404 |     
 405 |     def SoldBackReward(self):
 406 |         #calculate the sold back reward (benefit)#
 407 |         return (self.actions_solar[3-1]+self.actions_wind[3-1]+self.actions_generator[3-1])*unit_reward_soldbackenergy
 408 |     
 409 |     def PrintMicrogrid(self, file):
 410 |         #print the current and the next states of the microgrid#
 411 |         print("Microgrid working status [solar PV, wind turbine, generator]=", self.workingstatus, ", SOC=", self.SOC, file=file)
 412 |         print(" microgrid actions [solar PV, wind turbine, generator]=", self.actions_adjustingstatus, file=file)
 413 |         print(" solar energy supporting [manufaturing, charging battery, sold back]=", self.actions_solar, file=file)
 414 |         print(" wind energy supporting [manufacturing, charging battery, sold back]=", self.actions_wind, file=file)
 415 |         print(" generator energy supporting [manufacturing, charging battery, sold back]=", self.actions_generator, file=file)
 416 |         print(" energy purchased from grid supporting [manufacturing, charging battery]=", self.actions_purchased, file=file)
 417 |         print(" energy discharged by the battery supporting manufacturing=", self.actions_discharged, file=file)
 418 |         print(" solar irradiance=", self.solarirradiance, file=file)
 419 |         print(" wind speed=", self.windspeed, file=file)
 420 |         print(" Microgrid Energy Consumption=", self.EnergyConsumption(), file=file)
 421 |         print(" Microgrid Operational Cost=", self.OperationalCost(), file=file)
 422 |         print(" Microgrid SoldBackReward=", self.SoldBackReward(), file=file)
 423 |         print("\n", file=file)
 424 |         return None
 425 | 
 426 | 
 427 | """    
 428 | Combining the above three classes, define the variables and functions for the whole manufacturing system
 429 | """
 430 | class ManufacturingSystem(object):
 431 |     def __init__(self,
 432 |                  machine_states,
 433 |                  #set the machine states for all machines in the manufacturing system#
 434 |                  machine_control_actions,
 435 |                  #set the control actions for all machines in the manufacturing system#
 436 |                  buffer_states,
 437 |                  #set the buffer states for all buffers in the manufacturing system#
 438 |                  grid=Microgrid(workingstatus=[0,0,0],
 439 |                                 SOC=0,
 440 |                                 actions_adjustingstatus=[0,0,0],
 441 |                                 actions_solar=[0,0,0],
 442 |                                 actions_wind=[0,0,0],
 443 |                                 actions_generator=[0,0,0],
 444 |                                 actions_purchased=[0,0],
 445 |                                 actions_discharged=0,
 446 |                                 solarirradiance=0,
 447 |                                 windspeed=0
 448 |                                 )
 449 |                  #set the microgrid states and control actions#
 450 |                  ):
 451 |         self.machine_states=machine_states
 452 |         self.machine_control_actions=machine_control_actions
 453 |         self.buffer_states=buffer_states
 454 |         #initialize all machines, ManufacturingSystem.machine=[Machine1, Machine2, ..., Machine_{number_machines}]#
 455 |         self.machine=[]
 456 |         for i in range(number_machines):
 457 |             if i!=number_machines-1:
 458 |                 self.machine.append(Machine(name=i+1, 
 459 |                                             state=self.machine_states[i], 
 460 |                                             lifetime_shape_parameter=machine_lifetime_shape_parameter[i],
 461 |                                             lifetime_scale_parameter=machine_lifetime_scale_parameter[i],
 462 |                                             repairtime_mean=machine_repairtime_mean[i],
 463 |                                             power_consumption_Opr=machine_power_consumption_Opr[i],
 464 |                                             power_consumption_Idl=machine_power_consumption_Idl[i],                                            
 465 |                                             control_action=self.machine_control_actions[i], 
 466 |                                             is_last_machine=False))
 467 |             else:
 468 |                 self.machine.append(Machine(name=i+1, 
 469 |                                             state=self.machine_states[i], 
 470 |                                             lifetime_shape_parameter=machine_lifetime_shape_parameter[i],
 471 |                                             lifetime_scale_parameter=machine_lifetime_scale_parameter[i],
 472 |                                             repairtime_mean=machine_repairtime_mean[i],
 473 |                                             power_consumption_Opr=machine_power_consumption_Opr[i],
 474 |                                             power_consumption_Idl=machine_power_consumption_Idl[i],                                            
 475 |                                             control_action=self.machine_control_actions[i], 
 476 |                                             is_last_machine=True))
 477 |         #initialize all buffers, ManufacturingSystem.buffer=[Buffer1, Buffer2, ..., Buffer_{numbers_machines-1}]
 478 |         self.buffer=[]
 479 |         for j in range(number_machines-1):
 480 |             self.buffer.append(Buffer(name=j+1, 
 481 |                                       state=self.buffer_states[j], 
 482 |                                       buffer_max=list_buffer_max[j],
 483 |                                       buffer_min=list_buffer_min[j],
 484 |                                       previous_machine_state=self.machine[j].state, 
 485 |                                       next_machine_state=self.machine[j+1].state,
 486 |                                       previous_machine_control_action=self.machine[j].control_action,
 487 |                                       next_machine_control_action=self.machine[j+1].control_action
 488 |                                       ))
 489 |         self.grid=grid
 490 |         
 491 |     def transition_manufacturing(self):
 492 |         #based on current states and current control actions of the whole manufacturing system, calculate states at the the next decision epoch#
 493 |         #states include machine states, buffer states and microgrid states#
 494 |         buffer_states=[]
 495 |         for j in range(number_machines-1):
 496 |             buffer_states.append(self.buffer[j].NextState())
 497 |         #based on current machine states and control actions taken, calculate the next states of all buffers#    
 498 |         Off=[]
 499 |         Brk=[]
 500 |         Sta=[]
 501 |         Blo=[]
 502 |         #Set up four 0/1 sequence that test the next states being "Off", "Brk", "Sta" or "Blo". If none of these, then "Opr"#
 503 |         for i in range(number_machines):
 504 |             Off.append(0)
 505 |             Brk.append(0)
 506 |             Sta.append(0)
 507 |             Blo.append(0)
 508 |         for i in range(number_machines):
 509 |         #Check the possibilities of "Off" or "Brk" states#    
 510 |             if self.machine[i].NextState_IsOff():
 511 |                 Off[i]=1
 512 |             if self.machine[i].NextState_IsBrk():
 513 |                 Brk[i]=1
 514 |         for i in range(number_machines):
 515 |         #Check the possibilities of "Sta" states#
 516 |             if i==0:
 517 |                 Sta[i]=0
 518 |             else:
 519 |                 if Brk[i]==1 or Off[i]==1:
 520 |                     Sta[i]=0
 521 |                 else:
 522 |                     if buffer_states[i-1]==self.buffer[i-1].buffer_min:
 523 |                         if Brk[i-1]==1 or Sta[i-1]==1 or Off[i-1]==1:
 524 |                             Sta[i]=1
 525 |                         else:
 526 |                             Sta[i]=0
 527 |                     else:
 528 |                         Sta[i]=0
 529 |         for i in reversed(range(number_machines)):
 530 |         #Check the possibilities of "Blo" states#
 531 |             if i==number_machines-1:
 532 |                 Blo[i]=0
 533 |             else:
 534 |                 if Brk[i]==1 or Off[i]==1:
 535 |                     Blo[i]=0
 536 |                 else:
 537 |                     if buffer_states[i]==self.buffer[i].buffer_max:
 538 |                         if Brk[i+1]==1 or Blo[i+1]==1 or Off[i+1]==1:
 539 |                             Blo[i]=1
 540 |                         else:
 541 |                             Blo[i]=0
 542 |                     else:
 543 |                         Blo[i]=0
 544 |         #based on current machine states and control actions taken, calculate the next states of all machines#    
 545 |         machine_states=[]                
 546 |         for i in range(number_machines):
 547 |             if Off[i]==1:
 548 |                 machine_states.append("Off")
 549 |             elif Brk[i]==1:
 550 |                 machine_states.append("Brk")
 551 |             elif Sta[i]==1:
 552 |                 machine_states.append("Sta")
 553 |             elif Blo[i]==1:
 554 |                 machine_states.append("Blo")
 555 |             else: 
 556 |                 machine_states.append("Opr")
 557 |         #return the new states#
 558 |         return machine_states, buffer_states
 559 | 
 560 |     def average_total_cost(self, current_rate_consumption_charge):
 561 |         #calculate the average total cost of the manufacturing system, E(S,A), based on the current machine, buffer, microgrid states and actions#
 562 |         E_mfg=0
 563 |         #total energy consumed by the manufacturing system, summing over all machines#
 564 |         for i in range(number_machines):
 565 |             E_mfg=E_mfg+self.machine[i].EnergyConsumption()
 566 |         #the energy consumption cost#            
 567 |         TF=(E_mfg+self.grid.EnergyConsumption())*current_rate_consumption_charge
 568 |         #the operational cost for the microgrid system#
 569 |         MC=self.grid.OperationalCost()
 570 |         #the prduction throughput of the manufacturing system#
 571 |         TP=self.machine[number_machines-1].LastMachineProduction()*unit_reward_production
 572 |         #the sold back reward#
 573 |         SB=self.grid.SoldBackReward()
 574 |         return TF+MC-TP-SB
 575 |     
 576 |     def energydemand(self, current_rate_consumption_charge):
 577 |         #calculate the total energy demand TF of the system, based on the current machine, buffer, microgrid states and actions#
 578 |         E_mfg=0
 579 |         #total energy consumed by the manufacturing system, summing over all machines#
 580 |         for i in range(number_machines):
 581 |             E_mfg=E_mfg+self.machine[i].EnergyConsumption()
 582 |         #the energy consumption cost#            
 583 |         TF=(E_mfg+self.grid.EnergyConsumption())*current_rate_consumption_charge
 584 |         return TF
 585 |     
 586 |     def throughput(self):
 587 |         #calculate total throughput TP of the manufacturing system, based on the current machine, buffer, microgrid states and actions#
 588 |         #the prduction throughput of the manufacturing system#
 589 |         TP=self.machine[number_machines-1].LastMachineProduction()*unit_reward_production
 590 |         return TP 
 591 | 
 592 |     def PrintSystem(self, file, timepoint):
 593 |         for i in range(number_machines):
 594 |             self.machine[i].PrintMachine(file)
 595 |             if i!=number_machines-1:
 596 |                 self.buffer[i].PrintBuffer(file)
 597 |         self.grid.PrintMicrogrid(file)
 598 |         print("Average Total Cost=", self.average_total_cost(rate_consumption_charge[timepoint//8640]), file=file)
 599 |         print("\n", file=file)
 600 |         return None
 601 |        
 602 | 
 603 | 
 604 | 
 605 | """
 606 | Simulate admissible actions based on the current state S_{t+1} of the manufacturing system, 
 607 | the admissible actions are A_{t+1}=(A^d, A^c, A^r)
 608 | """
 609 | class ActionSimulation(object):
 610 |     def __init__(self,
 611 |                  System=ManufacturingSystem(machine_states=["Off" for _ in range(number_machines)],
 612 |                                             machine_control_actions=["K" for _ in range(number_machines)],
 613 |                                             buffer_states=[0 for _ in range(number_machines-1)],
 614 |                                             grid=Microgrid(workingstatus=[0,0,0],
 615 |                                                            SOC=0,
 616 |                                                            actions_adjustingstatus=[0,0,0],
 617 |                                                            actions_solar=[0,0,0],
 618 |                                                            actions_wind=[0,0,0],
 619 |                                                            actions_generator=[0,0,0],
 620 |                                                            actions_purchased=[0,0],
 621 |                                                            actions_discharged=0,
 622 |                                                            solarirradiance=0,
 623 |                                                            windspeed=0
 624 |                                                            ))
 625 |                  ):
 626 |         #the ManufacturingSystem is with new states S_{t+1} but old actions A_{t}, we obtain the admissible A_{t+1} in this class#
 627 |         self.System=System
 628 |     
 629 |     def MachineActions(self):
 630 |         #Based on current machine states in the system, randomly uniformly simulate an admissible action for all machines#
 631 |         machine_actions=[]
 632 |         for i in range(number_machines):
 633 |             if self.System.machine_states[i]=="Opr":
 634 |                 machine_actions.append(choice(["K", "H"]))
 635 |             elif self.System.machine_states[i]=="Blo":
 636 |                 machine_actions.append(choice(["K", "H"]))
 637 |             elif self.System.machine_states[i]=="Sta":
 638 |                 machine_actions.append(choice(["K", "H"]))
 639 |             elif self.System.machine_states[i]=="Off":
 640 |                 machine_actions.append(choice(["K", "W"]))
 641 |             else:
 642 |                 machine_actions.append("K")
 643 |         return machine_actions
 644 |     
 645 |     def MicroGridActions_adjustingstatus(self):
 646 |         #randomly uniformly simulate an action that adjusts the status (connected=1) of the microgrid [solar, wind, generator]#
 647 |         actions_adjustingstatus=[]
 648 |         for i in range(3):
 649 |             actions_adjustingstatus.append(choice([0,1]))
 650 |         return actions_adjustingstatus
 651 |     
 652 |     def MicroGridActions_SolarWindGenerator(self, theta):
 653 |         #from the updated proportionality parameter theta return the corresponding actions on solar, wind and generator#
 654 |         #theta is the proportionality parameters theta=[lambda_s^m, lambda_s^b, lambda_w^m, lambda_w^b, lambda_g^m, lambda_g^]#
 655 |         #calculate the energy generated by the solar PV, e_t^s#
 656 |         energy_generated_solar=self.System.grid.energy_generated_solar()
 657 |         #calculate the energy generated by the wind turbine, e_t^w#
 658 |         energy_generated_wind=self.System.grid.energy_generated_wind()
 659 |         #calculate the energy generated bv the generator, e_t^g#
 660 |         energy_generated_generator=self.System.grid.energy_generated_generator()
 661 |         #given the new theta, calculated the actions_solar, actions_wind, actions_generator#
 662 |         actions_solar=[energy_generated_solar*theta[1-1], energy_generated_solar*theta[2-1], energy_generated_solar*(1-theta[1-1]-theta[2-1])]
 663 |         actions_wind=[energy_generated_wind*theta[3-1], energy_generated_wind*theta[4-1], energy_generated_wind*(1-theta[3-1]-theta[4-1])]
 664 |         actions_generator=[energy_generated_generator*theta[5-1], energy_generated_generator*theta[6-1], energy_generated_generator*(1-theta[5-1]-theta[6-1])]
 665 |         return actions_solar, actions_wind, actions_generator
 666 |     
 667 |     def MicroGridActions_PurchasedDischarged(self, 
 668 |                                              actions_solar=[0,0,0],
 669 |                                              actions_wind=[0,0,0],
 670 |                                              actions_generator=[0,0,0]):
 671 |         #randomly simulate an action that determines the use of the purchased energy and the energy discharge#
 672 |         #actions_solar, actions_wind, actions_generator are the actions to be taken at current system states#
 673 |         TotalSoldBack=actions_solar[3-1]+actions_wind[3-1]+actions_generator[3-1]
 674 |         #Total amount of sold back energy#
 675 |         TotalBattery=actions_solar[2-1]+actions_wind[2-1]+actions_generator[2-1]
 676 |         #Total amount if energy charged to the battery#
 677 |         SOC_Condition=self.System.grid.SOC-rate_battery_discharge*Delta_t/charging_discharging_efficiency-SOC_min
 678 |         #The condition for SOC at the current system state#
 679 |         E_mfg=0
 680 |         for i in range(number_machines):
 681 |             E_mfg=E_mfg+self.System.machine[i].EnergyConsumption()
 682 |         #total energy consumed by the manufacturing system, summing over all machines#
 683 |         p_hat=E_mfg-(actions_solar[1-1]+actions_wind[1-1]+actions_generator[1-1])
 684 |         if p_hat<0:
 685 |             p_hat=0
 686 |         #Set the p_hat#
 687 |         p_tilde=E_mfg-(actions_solar[1-1]+actions_wind[1-1]+actions_generator[1-1]+rate_battery_discharge*Delta_t)
 688 |         if p_tilde<0:
 689 |             p_tilde=0
 690 |         #Set the p_tilde#
 691 |         ####Calculate actions_purchased and actions_discharged according to the table in the paper####
 692 |         actions_purchased=[0,0]
 693 |         actions_discharged=0
 694 |         if TotalSoldBack>0 and TotalBattery>0 and SOC_Condition>0:
 695 |             actions_purchased=[0,0]
 696 |             actions_discharged=0
 697 |         elif TotalSoldBack>0 and TotalBattery>0 and SOC_Condition<=0:
 698 |             actions_purchased=[0,0]
 699 |             actions_discharged=0
 700 |         elif TotalSoldBack>0 and TotalBattery<=0 and SOC_Condition>0:
 701 |             actions_purchased=[0,0]
 702 |             actions_discharged=choice([0, rate_battery_discharge*Delta_t])
 703 |         elif TotalSoldBack>0 and TotalBattery<=0 and SOC_Condition<=0:
 704 |             actions_purchased=[0,0]
 705 |             actions_discharged=0
 706 |         elif TotalSoldBack<=0 and TotalBattery>0 and SOC_Condition>0:
 707 |             actions_purchased[2-1]=choice([0, p_hat])
 708 |             actions_purchased[1-1]=p_hat-actions_purchased[2-1]
 709 |             actions_discharged=0
 710 |         elif TotalSoldBack<=0 and TotalBattery>0 and SOC_Condition<=0:
 711 |             actions_purchased[2-1]=choice([0, p_hat])
 712 |             actions_purchased[1-1]=p_hat-actions_purchased[2-1]
 713 |             actions_discharged=0
 714 |         elif TotalSoldBack<=0 and TotalBattery<=0 and SOC_Condition>0:
 715 |             actions_discharged=choice([0, rate_battery_discharge*Delta_t])
 716 |             if actions_discharged==0:
 717 |                 actions_purchased[2-1]=choice([0, p_hat])
 718 |                 actions_purchased[1-1]=p_hat-actions_purchased[2-1]
 719 |             else:
 720 |                 actions_purchased[2-1]=0
 721 |                 actions_purchased[1-1]=p_tilde
 722 |         else:
 723 |             actions_purchased[2-1]=choice([0, p_hat])
 724 |             actions_purchased[1-1]=p_hat-actions_purchased[2-1]
 725 |             actions_discharged=0
 726 |         #return actions_purchased and actions_discharged#
 727 |         return actions_purchased, actions_discharged
 728 |             
 729 | 
 730 | 
 731 | """
 732 | Generate the set of all admissible microgrid actions for adjusting the microgrid status
 733 | Generate the set of all admissible microgrid actions for energy purchased/discharged , i.e. the remainder action A^r, 
 734 |  based on the current state S_{t+1} of the manufacturing system and the current discrete actions A^d 
 735 | Return all admissible microgrid actions for adjusting the microgrid status and all microgrid actions 
 736 |  for energy purchase/discharge as a list
 737 | """
 738 | class MicrogridActionSet_Discrete_Remainder(object):
 739 |     def __init__(self,
 740 |                  System=ManufacturingSystem(machine_states=["Off" for _ in range(number_machines)],
 741 |                                             machine_control_actions=["K" for _ in range(number_machines)],
 742 |                                             buffer_states=[0 for _ in range(number_machines-1)],
 743 |                                             grid=Microgrid(workingstatus=[0,0,0],
 744 |                                                            SOC=0,
 745 |                                                            actions_adjustingstatus=[0,0,0],
 746 |                                                            actions_solar=[0,0,0],
 747 |                                                            actions_wind=[0,0,0],
 748 |                                                            actions_generator=[0,0,0],
 749 |                                                            actions_purchased=[0,0],
 750 |                                                            actions_discharged=0,
 751 |                                                            solarirradiance=0,
 752 |                                                            windspeed=0
 753 |                                                            ))
 754 |                  ):
 755 |         #the ManufacturingSystem is with updated machine and microgrid states S_{t+1}
 756 |         #from these we obtain the set of all admissible microgrid actions for adjusting the status of [solar, wind, generator], 
 757 |         #and the set of all admissible microgrid actions for energy purchased/discharged
 758 |         self.System=System
 759 |     
 760 |     def List_AdjustingStatus(self):
 761 |         #return all possible microgrid actions for adjusting the status [solar, wind, generator]#
 762 |         microgrid_action_set_list_adjustingstatus=[]
 763 |         for adjust_solar in range(2):
 764 |             for adjust_wind in range(2):
 765 |                 for adjust_generator in range(2):
 766 |                     microgrid_action_set_list_adjustingstatus.append([adjust_solar, adjust_wind, adjust_generator])
 767 |         return microgrid_action_set_list_adjustingstatus
 768 | 
 769 |     def List_PurchasedDischarged(self, 
 770 |                                  actions_solar=[0,0,0],
 771 |                                  actions_wind=[0,0,0],
 772 |                                  actions_generator=[0,0,0]):
 773 |         #return all possible microgrid actions for the use of the purchased energy and the energy discharge#
 774 |         #actions_solar, actions_wind, actions_generator are the actions to be taken at current system states#
 775 |         TotalSoldBack=actions_solar[3-1]+actions_wind[3-1]+actions_generator[3-1]
 776 |         #Total amount of sold back energy#
 777 |         TotalBattery=actions_solar[2-1]+actions_wind[2-1]+actions_generator[2-1]
 778 |         #Total amount if energy charged to the battery#
 779 |         SOC_Condition=self.System.grid.SOC-rate_battery_discharge*Delta_t/charging_discharging_efficiency-SOC_min
 780 |         #The condition for SOC at the current system state#
 781 |         E_mfg=0
 782 |         for i in range(number_machines):
 783 |             E_mfg=E_mfg+self.System.machine[i].EnergyConsumption()
 784 |         #total energy consumed by the manufacturing system, summing over all machines#
 785 |         p_hat=E_mfg-(actions_solar[1-1]+actions_wind[1-1]+actions_generator[1-1])
 786 |         if p_hat<0:
 787 |             p_hat=0
 788 |         #Set the p_hat#
 789 |         p_tilde=E_mfg-(actions_solar[1-1]+actions_wind[1-1]+actions_generator[1-1]+rate_battery_discharge*Delta_t)
 790 |         if p_tilde<0:
 791 |             p_tilde=0
 792 |         #Set the p_tilde#
 793 |         ####Generate the list of the set of all admissible actions_purchased and actions_discharged according to the table in the paper####
 794 |         #microgrid_action_set_list_purchased_discharged=[[action_purchased[0], action_purchased[1]], action_discharged]
 795 |         microgrid_action_set_list_purchased_discharged=[]
 796 |         if TotalSoldBack>0 and TotalBattery>0 and SOC_Condition>0:
 797 |             microgrid_action_set_list_purchased_discharged=[ [[0,0], 0] ]
 798 |         elif TotalSoldBack>0 and TotalBattery>0 and SOC_Condition<=0:
 799 |             microgrid_action_set_list_purchased_discharged=[ [[0,0], 0] ]
 800 |         elif TotalSoldBack>0 and TotalBattery<=0 and SOC_Condition>0:
 801 |             microgrid_action_set_list_purchased_discharged=[ [[0,0], 0] , [[0,0], rate_battery_discharge*Delta_t] ]
 802 |         elif TotalSoldBack>0 and TotalBattery<=0 and SOC_Condition<=0:
 803 |             microgrid_action_set_list_purchased_discharged=[ [[0,0], 0] ]
 804 |         elif TotalSoldBack<=0 and TotalBattery>0 and SOC_Condition>0:
 805 |             microgrid_action_set_list_purchased_discharged=[ [[p_hat, 0], 0] , [[0, p_hat], 0] ]
 806 |         elif TotalSoldBack<=0 and TotalBattery>0 and SOC_Condition<=0:
 807 |             microgrid_action_set_list_purchased_discharged=[ [[p_hat, 0], 0] , [[0, p_hat], 0] ]
 808 |         elif TotalSoldBack<=0 and TotalBattery<=0 and SOC_Condition>0:
 809 |             microgrid_action_set_list_purchased_discharged=[ [[p_hat, 0], 0] , [[0, p_hat], 0] , [[p_tilde, 0], rate_battery_discharge*Delta_t] ]
 810 |         else:
 811 |             microgrid_action_set_list_purchased_discharged=[ [[p_hat, 0], 0] , [[0, p_hat], 0] ]
 812 |         #return the list of the set of all admissible actions_purchased and actions_discharged#
 813 |         return microgrid_action_set_list_purchased_discharged
 814 |     
 815 | 
 816 | 
 817 | 
 818 | """
 819 | Generate the set of all admissible machine actions based on the current state S_{t+1} of the manufacturing system.
 820 | The set of all machine actions will be stored in a tree with branches 1 or 2, the depth of the tree = num_machines.
 821 | Search the tree and return all possible admissible machine actions as a list
 822 | """
 823 | class MachineActionTree(object):
 824 |     
 825 |     def __init__(self, 
 826 |                  machine_action):
 827 |         self.root=machine_action
 828 |         self.left_child=None
 829 |         self.right_child=None
 830 |         self.machine_action_set_list=[]
 831 |     
 832 |     def InsertLeft(self, machine_action):
 833 |         #insert the left child of the tree from the root#
 834 |         if self.left_child == None:
 835 |             self.left_child = MachineActionTree(machine_action)
 836 |         else:
 837 |             new_node = MachineActionTree(machine_action)
 838 |             new_node.left_child = self.left_child
 839 |             self.left_child = new_node
 840 |             
 841 |     def InsertRight(self, machine_action):
 842 |         #insert the right child of the tree from the root#
 843 |         if self.right_child == None:
 844 |             self.right_child = MachineActionTree(machine_action)
 845 |         else:
 846 |             new_node = MachineActionTree(machine_action)
 847 |             new_node.right_child = self.right_child
 848 |             self.right_child = new_node
 849 |         
 850 |     def BuildTree(self, System, level, tree):
 851 |         #build the tree with root "ROOT", each level corresponding to admissible machine actions for the machine at that level#
 852 |         if level < number_machines:
 853 |             if System.machine_states[level]=="Opr":
 854 |                 tree.InsertLeft("K")
 855 |                 self.BuildTree(System, level+1, tree.left_child)
 856 |                 tree.InsertRight("H")
 857 |                 self.BuildTree(System, level+1, tree.right_child)
 858 |             elif System.machine_states[level]=="Blo":
 859 |                 tree.InsertLeft("K")
 860 |                 self.BuildTree(System, level+1, tree.left_child)
 861 |                 tree.InsertRight("H")
 862 |                 self.BuildTree(System, level+1, tree.right_child)
 863 |             elif System.machine_states[level]=="Sta":
 864 |                 tree.InsertLeft("K")
 865 |                 self.BuildTree(System, level+1, tree.left_child)
 866 |                 tree.InsertRight("H")
 867 |                 self.BuildTree(System, level+1, tree.right_child)
 868 |             elif System.machine_states[level]=="Off":
 869 |                 tree.InsertLeft("K")
 870 |                 self.BuildTree(System, level+1, tree.left_child)
 871 |                 tree.InsertRight("W")
 872 |                 self.BuildTree(System, level+1, tree.right_child)
 873 |             else:
 874 |                 tree.InsertLeft("K")
 875 |                 self.BuildTree(System, level+1, tree.left_child)
 876 |         else:
 877 |             return None
 878 | 
 879 |     def TraverseTree(self, level, tree, machine_action_list):
 880 |         #traverse the tree and output the set of all admissible machine actions as a list#
 881 |         if level < number_machines:
 882 |             machine_action_list.append(tree.left_child.root)
 883 |             self.TraverseTree(level+1, tree.left_child, machine_action_list)
 884 |             machine_action_list.pop()
 885 |             if tree.right_child == None:
 886 |                 return None
 887 |             else:
 888 |                 machine_action_list.append(tree.right_child.root)
 889 |                 self.TraverseTree(level+1, tree.right_child, machine_action_list)
 890 |                 machine_action_list.pop()
 891 |         else:
 892 |             machine_action_list_copy=machine_action_list.copy()
 893 |             self.machine_action_set_list.append(machine_action_list_copy)
 894 |             return None
 895 | 
 896 | #initialize the microgrid and manufacturing system
 897 | def SystemInitialize(initial_machine_states, initial_machine_actions, initial_buffer_states):
 898 |     #the System is initialized with initial machine and buffer states, all other parameters are set to be 0
 899 |     grid=Microgrid(workingstatus=[0,0,0],
 900 |                    SOC=0,
 901 |                    actions_adjustingstatus=[0,0,0],
 902 |                    actions_solar=[0,0,0],
 903 |                    actions_wind=[0,0,0],
 904 |                    actions_generator=[0,0,0],
 905 |                    actions_purchased=[0,0],
 906 |                    actions_discharged=0,
 907 |                    solarirradiance=0,
 908 |                    windspeed=0
 909 |                    )
 910 |     System=ManufacturingSystem(machine_states=initial_machine_states,
 911 |                                machine_control_actions=initial_machine_actions,
 912 |                                buffer_states=initial_buffer_states,
 913 |                                grid=grid
 914 |                                )
 915 |     return System
 916 |     
 917 |     
 918 | """
 919 | ################################ MAIN TESTING FILE #####################################
 920 | ################################ FOR DEBUGGING ONLY #####################################
 921 | 
 922 | testing on random admissible actions
 923 | testing on the generation of admissible actions
 924 | """
 925 | if __name__ == "__main__":
 926 |     
 927 |     #set the initial machine states, machine control actions and buffer states
 928 |     initial_machine_states=["Opr" for _ in range(number_machines)]
 929 |     initial_machine_actions=["K" for _ in range(number_machines)]
 930 |     initial_buffer_states=[2 for _ in range(number_machines-1)]
 931 |     
 932 |     #initialize the system
 933 |     System=SystemInitialize(initial_machine_states, initial_machine_actions, initial_buffer_states)
 934 |     
 935 |     #initialize the theta
 936 |     theta=[0,0,0,0,0,0]
 937 |     
 938 |     targetoutput=0
 939 |     number_iteration=100
 940 |     file=open('microgrid_manufacturing_system.txt', 'w')
 941 |     print("\n*********************** RUN THE MICROGRID-MANUFACTURING SYSTEM AT "+str(number_iteration)+" STEPS ***********************", file=file)
 942 |     for t in range(number_iteration):
 943 |         #current states and actions S_t and A_t are stored in class System#
 944 |         print("*********************Time Step", t, "*********************", file=file)
 945 |         System.PrintSystem(file, t)
 946 |         targetoutput+=int(System.throughput()/unit_reward_production)
 947 |         #update the theta#
 948 |         theta=projection(np.random.uniform(-1,1,size=6))
 949 |         #calculate the next states and actions, S_{t+1}, A_{t+1}#        
 950 |         next_machine_states, next_buffer_states=System.transition_manufacturing()
 951 |         next_workingstatus, next_SOC=System.grid.transition()
 952 |         next_action=ActionSimulation(System=ManufacturingSystem(machine_states=next_machine_states,
 953 |                                                                 machine_control_actions=["K" for _ in range(number_machines)],
 954 |                                                                 buffer_states=next_buffer_states,
 955 |                                                                 grid=Microgrid(workingstatus=next_workingstatus,
 956 |                                                                                SOC=next_SOC,
 957 |                                                                                actions_adjustingstatus=[0,0,0],
 958 |                                                                                actions_solar=[0,0,0],
 959 |                                                                                actions_wind=[0,0,0],
 960 |                                                                                actions_generator=[0,0,0],
 961 |                                                                                actions_purchased=[0,0],
 962 |                                                                                actions_discharged=0,
 963 |                                                                                solarirradiance=solarirradiance[t//8640],
 964 |                                                                                windspeed=windspeed[t//8640],
 965 |                                                                                )
 966 |                                                                 )
 967 |                                     )
 968 |         next_actions_adjustingstatus=next_action.MicroGridActions_adjustingstatus()
 969 |         next_actions_solar, next_actions_wind, next_actions_generator=next_action.MicroGridActions_SolarWindGenerator(theta)
 970 |         next_actions_purchased, next_actions_discharged=next_action.MicroGridActions_PurchasedDischarged(next_actions_solar,
 971 |                                                                                                          next_actions_wind,
 972 |                                                                                                          next_actions_generator)
 973 |         next_machine_control_actions=next_action.MachineActions()
 974 |         grid=Microgrid(workingstatus=next_workingstatus,
 975 |                        SOC=next_SOC,
 976 |                        actions_adjustingstatus=next_actions_adjustingstatus,
 977 |                        actions_solar=next_actions_solar,
 978 |                        actions_wind=next_actions_wind,
 979 |                        actions_generator=next_actions_generator,
 980 |                        actions_purchased=next_actions_purchased,
 981 |                        actions_discharged=next_actions_discharged,
 982 |                        solarirradiance=solarirradiance[t//8640],
 983 |                        windspeed=windspeed[t//8640]
 984 |                        )
 985 |         System=ManufacturingSystem(machine_states=next_machine_states, 
 986 |                                    machine_control_actions=next_machine_control_actions, 
 987 |                                    buffer_states=next_buffer_states,
 988 |                                    grid=grid
 989 |                                    )  
 990 |     print("Target Output = ", targetoutput, file=file)
 991 |     
 992 |     #test the tree structure in the generation of all admissible machine actions#
 993 |     #test the generation of all admissible microgrid adjusting actions and actions for energy purchased/discharged#
 994 |     print("\n*********************** Test the Machine and Microgrid Action Generation ***********************", file=file)
 995 |     #first print the current system parameters#
 996 |     System.PrintSystem(file, t)
 997 |     #generate the admissible machine actions from the tree structure#
 998 |     machine_action_tree=MachineActionTree(machine_action="ROOT")
 999 |     machine_action_tree.BuildTree(System, level=0, tree=machine_action_tree)
1000 |     machine_action_list=[]
1001 |     machine_action_tree.TraverseTree(level=0, tree=machine_action_tree, machine_action_list=[])
1002 |     machine_action_set_list=machine_action_tree.machine_action_set_list
1003 |     i=1
1004 |     for machine_action_list in machine_action_set_list:
1005 |         print("admissible machine action", i, "=", machine_action_list, file=file)
1006 |         i=i+1
1007 |     #generate the admissible microgrid actions for adjusting status and purchased/discharged
1008 |     microgrid_action_set_DR=MicrogridActionSet_Discrete_Remainder(System)
1009 |     microgrid_action_set_list_adjustingstatus=microgrid_action_set_DR.List_AdjustingStatus()
1010 |     i=1
1011 |     print("\n", file=file)
1012 |     for microgrid_action_list_adjustingstatus in microgrid_action_set_list_adjustingstatus:
1013 |         print("admissible microgrid action", i," for adjusting status=", microgrid_action_list_adjustingstatus, file=file)
1014 |         i=i+1
1015 | 
1016 |     microgrid_action_set_list_purchased_discharged=microgrid_action_set_DR.List_PurchasedDischarged(actions_solar=[0,0,0],
1017 |                                                                                                     actions_wind=[0,0,0],
1018 |                                                                                                     actions_generator=[0,0,0])
1019 |     i=1
1020 |     print("\n",file=file)
1021 |     for microgrid_action_list_purchased_discharged in microgrid_action_set_list_purchased_discharged:
1022 |         print("admissible microgrid action", i," for purchase=", microgrid_action_list_purchased_discharged[0],
1023 |               ", admissible microgrid action", i," for discharge=", microgrid_action_list_purchased_discharged[1], file=file)
1024 |         i=i+1
1025 |         
1026 |     file.close()
1027 | 


--------------------------------------------------------------------------------