├── .idea
├── .gitignore
├── Optimal-Energy-System-Scheduling-Combining-Mixed-Integer-Programming-and-Deep-Reinforcement-Learning.iml
├── deployment.xml
├── inspectionProfiles
│ └── profiles_settings.xml
├── misc.xml
├── modules.xml
└── vcs.xml
├── LICENSE
├── MIP_DQN.py
├── Parameters.py
├── README.md
├── data
├── H4.csv
├── PV.csv
└── Prices.csv
├── random_generator_battery.py
└── random_generator_more_battery.py
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | # Editor-based HTTP Client requests
5 | /httpRequests/
6 | # Datasource local storage ignored files
7 | /dataSources/
8 | /dataSources.local.xml
9 |
--------------------------------------------------------------------------------
/.idea/Optimal-Energy-System-Scheduling-Combining-Mixed-Integer-Programming-and-Deep-Reinforcement-Learning.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/.idea/deployment.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2022 Hou Shengren
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/MIP_DQN.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------
2 | # Optimal-Energy-System-Scheduling-Combining-Mixed-Integer-Programming-and-Deep-Reinforcement-Learning
3 | # MIP-DQN algorithm developed by
4 | # Hou Shengren, TU Delft, h.shengren@tudelft.nl
5 | # Pedro, TU Delft, p.p.vergara.barrios@tudeflt.nl
6 | # ------------------------------------------------------------------------
7 | import pickle
8 | import torch
9 | import os
10 | import numpy as np
11 | import numpy.random as rd
12 | import pandas as pd
13 | import pyomo.environ as pyo
14 | import pyomo.kernel as pmo
15 | from omlt import OmltBlock
16 |
17 | from gurobipy import *
18 | from omlt.neuralnet import NetworkDefinition, FullSpaceNNFormulation,ReluBigMFormulation
19 | from omlt.io.onnx import write_onnx_model_with_bounds,load_onnx_neural_network_with_bounds
20 | import tempfile
21 | import torch.onnx
22 | import torch.nn as nn
23 | from copy import deepcopy
24 | import wandb
25 | from random_generator_battery import ESSEnv
26 | ## define net
27 | class ReplayBuffer:
28 | def __init__(self, max_len, state_dim, action_dim, gpu_id=0):
29 | self.now_len = 0
30 | self.next_idx = 0
31 | self.if_full = False
32 | self.max_len = max_len
33 | self.data_type = torch.float32
34 | self.action_dim = action_dim
35 | self.device = torch.device(f"cuda:{gpu_id}" if (torch.cuda.is_available() and (gpu_id >= 0)) else "cpu")
36 |
37 | other_dim = 1 + 1 + self.action_dim
38 | self.buf_other = torch.empty(size=(max_len, other_dim), dtype=self.data_type, device=self.device)
39 |
40 | if isinstance(state_dim, int): # state is pixel
41 | self.buf_state = torch.empty((max_len, state_dim), dtype=torch.float32, device=self.device)
42 | elif isinstance(state_dim, tuple):
43 | self.buf_state = torch.empty((max_len, *state_dim), dtype=torch.uint8, device=self.device)
44 | else:
45 | raise ValueError('state_dim')
46 |
47 | def extend_buffer(self, state, other): # CPU array to CPU array
48 | size = len(other)
49 | next_idx = self.next_idx + size
50 |
51 | if next_idx > self.max_len:
52 | self.buf_state[self.next_idx:self.max_len] = state[:self.max_len - self.next_idx]
53 | self.buf_other[self.next_idx:self.max_len] = other[:self.max_len - self.next_idx]
54 | self.if_full = True
55 |
56 | next_idx = next_idx - self.max_len
57 | self.buf_state[0:next_idx] = state[-next_idx:]
58 | self.buf_other[0:next_idx] = other[-next_idx:]
59 | else:
60 | self.buf_state[self.next_idx:next_idx] = state
61 | self.buf_other[self.next_idx:next_idx] = other
62 | self.next_idx = next_idx
63 |
64 | def sample_batch(self, batch_size) -> tuple:
65 | indices = rd.randint(self.now_len - 1, size=batch_size)
66 | r_m_a = self.buf_other[indices]
67 | return (r_m_a[:, 0:1],
68 | r_m_a[:, 1:2],
69 | r_m_a[:, 2:],
70 | self.buf_state[indices],
71 | self.buf_state[indices + 1])
72 |
73 | def update_now_len(self):
74 | self.now_len = self.max_len if self.if_full else self.next_idx
75 | class Arguments:
76 | def __init__(self, agent=None, env=None):
77 |
78 | self.agent = agent # Deep Reinforcement Learning algorithm
79 | self.env = env # the environment for training
80 | self.cwd = None # current work directory. None means set automatically
81 | self.if_remove = False # remove the cwd folder? (True, False, None:ask me)
82 | self.visible_gpu = '0,1,2,3' # for example: os.environ['CUDA_VISIBLE_DEVICES'] = '0, 2,'
83 | self.worker_num = 2 # rollout workers number pre GPU (adjust it to get high GPU usage)
84 | self.num_threads = 8 # cpu_num for evaluate model, torch.set_num_threads(self.num_threads)
85 |
86 | '''Arguments for training'''
87 | self.num_episode=3000
88 | self.gamma = 0.995 # discount factor of future rewards
89 | self.learning_rate = 1e-4 # 2 ** -14 ~= 6e-5
90 | self.soft_update_tau = 1e-2 # 2 ** -8 ~= 5e-3
91 |
92 | self.net_dim = 64 # the network width 256
93 | self.batch_size = 256 # num of transitions sampled from replay buffer.
94 | self.repeat_times = 2 ** 3 # repeatedly update network to keep critic's loss small
95 | self.target_step = 1000 # collect target_step experiences , then update network, 1024
96 | self.max_memo = 50000 # capacity of replay buffer
97 | ## arguments for controlling exploration
98 | self.explorate_decay=0.99
99 | self.explorate_min=0.3
100 | '''Arguments for evaluate'''
101 | self.random_seed_list=[1234,2234,3234,4234,5234]
102 | # self.random_seed_list=[2234]
103 | self.run_name='MIP_DQN_experiments'
104 | '''Arguments for save'''
105 | self.train=True
106 | self.save_network=True
107 |
108 | def init_before_training(self, if_main):
109 | if self.cwd is None:
110 | agent_name = self.agent.__class__.__name__
111 | self.cwd = f'./{agent_name}/{self.run_name}'
112 |
113 | if if_main:
114 | import shutil # remove history according to bool(if_remove)
115 | if self.if_remove is None:
116 | self.if_remove = bool(input(f"| PRESS 'y' to REMOVE: {self.cwd}? ") == 'y')
117 | elif self.if_remove:
118 | shutil.rmtree(self.cwd, ignore_errors=True)
119 | print(f"| Remove cwd: {self.cwd}")
120 | os.makedirs(self.cwd, exist_ok=True)
121 |
122 | np.random.seed(self.random_seed)
123 | torch.manual_seed(self.random_seed)
124 | torch.set_num_threads(self.num_threads)
125 | torch.set_default_dtype(torch.float32)
126 |
127 | os.environ['CUDA_VISIBLE_DEVICES'] = str(self.visible_gpu)# control how many GPU is used
128 | class Actor(nn.Module):
129 | def __init__(self,mid_dim,state_dim,action_dim):
130 | super().__init__()
131 | self.net=nn.Sequential(nn.Linear(state_dim,mid_dim),nn.ReLU(),
132 | nn.Linear(mid_dim,mid_dim),nn.ReLU(),
133 | nn.Linear(mid_dim,mid_dim),nn.ReLU(),
134 | nn.Linear(mid_dim,action_dim))
135 | def forward(self,state):
136 | return self.net(state).tanh()# make the data from -1 to 1
137 | def get_action(self,state,action_std):#
138 | action=self.net(state).tanh()
139 | noise=(torch.randn_like(action)*action_std).clamp(-0.5,0.5)#
140 | return (action+noise).clamp(-1.0,1.0)
141 | class CriticQ(nn.Module):
142 | def __init__(self,mid_dim,state_dim,action_dim):
143 | super().__init__()
144 | self.net_head=nn.Sequential(nn.Linear(state_dim+action_dim,mid_dim),nn.ReLU(),
145 | nn.Linear(mid_dim,mid_dim),nn.ReLU())
146 | self.net_q1=nn.Sequential(nn.Linear(mid_dim,mid_dim),nn.ReLU(),
147 | nn.Linear(mid_dim,1))# we get q1 value
148 | self.net_q2=nn.Sequential(nn.Linear(mid_dim,mid_dim),nn.ReLU(),
149 | nn.Linear(mid_dim,1))# we get q2 value
150 | def forward(self,value):
151 | mid=self.net_head(value)
152 | return self.net_q1(mid)
153 | def get_q1_q2(self,value):
154 | mid=self.net_head(value)
155 | return self.net_q1(mid),self.net_q2(mid)
156 | class AgentBase:
157 | def __init__(self):
158 | self.state = None
159 | self.device = None
160 | self.action_dim = None
161 | self.if_off_policy = None
162 | self.explore_noise = None
163 | self.trajectory_list = None
164 | self.explore_rate = 1.0
165 |
166 | self.criterion = torch.nn.SmoothL1Loss()
167 |
168 | def init(self, net_dim, state_dim, action_dim, learning_rate=1e-4, _if_per_or_gae=False, gpu_id=0):
169 | self.device = torch.device(
170 | f"cuda:{gpu_id}" if (torch.cuda.is_available() and (gpu_id >= 0)) else "cpu")
171 | self.action_dim = action_dim
172 |
173 | self.cri = self.ClassCri(net_dim, state_dim, action_dim).to(self.device)
174 | self.act = self.ClassAct(net_dim, state_dim, action_dim).to(
175 | self.device) if self.ClassAct else self.cri
176 | self.cri_target = deepcopy(self.cri) if self.if_use_cri_target else self.cri
177 | self.act_target = deepcopy(self.act) if self.if_use_act_target else self.act
178 |
179 | self.cri_optim = torch.optim.Adam(self.cri.parameters(), learning_rate)
180 | self.act_optim = torch.optim.Adam(self.act.parameters(),
181 | learning_rate) if self.ClassAct else self.cri
182 | del self.ClassCri, self.ClassAct
183 |
184 | def select_action(self, state) -> np.ndarray:
185 | states = torch.as_tensor((state,), dtype=torch.float32, device=self.device)
186 | action = self.act(states)[0]
187 | if rd.rand() tuple:
247 | buffer.update_now_len()
248 | obj_critic = obj_actor = None
249 | for update_c in range(int(buffer.now_len / batch_size * repeat_times)):# we update too much time?
250 | obj_critic, state = self.get_obj_critic(buffer, batch_size)
251 | self.optim_update(self.cri_optim, obj_critic)
252 |
253 | action_pg = self.act(state) # policy gradient
254 | obj_actor = -self.cri_target(torch.cat((state, action_pg),dim=-1)).mean() # use cri_target instead of cri for stable training
255 | self.optim_update(self.act_optim, obj_actor)
256 | if update_c % self.update_freq == 0: # delay update
257 | self.soft_update(self.cri_target, self.cri, soft_update_tau)
258 | self.soft_update(self.act_target, self.act, soft_update_tau)
259 | return obj_critic.item() / 2, obj_actor.item()
260 |
261 | def get_obj_critic(self, buffer, batch_size) -> (torch.Tensor, torch.Tensor):
262 | with torch.no_grad():
263 | reward, mask, action, state, next_s = buffer.sample_batch(batch_size)
264 | next_a = self.act_target.get_action(next_s, self.policy_noise) # policy noise,
265 | next_q = torch.min(*self.cri_target.get_q1_q2(torch.cat((next_s, next_a),dim=-1))) # twin critics
266 | q_label = reward + mask * next_q
267 |
268 | q1, q2 = self.cri.get_q1_q2(torch.cat((state, action),dim=-1))
269 | obj_critic = self.criterion(q1, q_label) + self.criterion(q2, q_label) # twin critics
270 | return obj_critic, state
271 |
272 |
273 |
274 | def update_buffer(_trajectory):
275 | ten_state = torch.as_tensor([item[0] for item in _trajectory], dtype=torch.float32)
276 | ary_other = torch.as_tensor([item[1] for item in _trajectory])
277 | ary_other[:, 0] = ary_other[:, 0] # ten_reward
278 | ary_other[:, 1] = (1.0 - ary_other[:, 1]) * gamma # ten_mask = (1.0 - ary_done) * gamma
279 |
280 | buffer.extend_buffer(ten_state, ary_other)
281 |
282 | _steps = ten_state.shape[0]
283 | _r_exp = ary_other[:, 0].mean() # other = (reward, mask, action)
284 | return _steps, _r_exp
285 |
286 |
287 | def get_episode_return(env, act, device):
288 | '''get information of one episode during the training'''
289 | episode_return = 0.0 # sum of rewards in an episode
290 | episode_unbalance=0.0
291 | episode_operation_cost=0.0
292 | state = env.reset()
293 | for i in range(24):
294 | s_tensor = torch.as_tensor((state,), device=device)
295 | a_tensor = act(s_tensor)
296 | action = a_tensor.detach().cpu().numpy()[0] # not need detach(), because with torch.no_grad() outside
297 | state, next_state, reward, done,= env.step(action)
298 | state=next_state
299 | episode_return += reward
300 | episode_unbalance+=env.real_unbalance
301 | episode_operation_cost+=env.operation_cost
302 | if done:
303 | break
304 | return episode_return,episode_unbalance,episode_operation_cost
305 | class Actor_MIP:
306 | '''this actor is used to get the best action and Q function, the only input should be batch tensor state, action, and network, while the output should be
307 | batch tensor max_action, batch tensor max_Q'''
308 | def __init__(self,scaled_parameters,batch_size,net,state_dim,action_dim,env,constrain_on=False):
309 | self.batch_size = batch_size
310 | self.net = net
311 | self.state_dim = state_dim
312 | self.action_dim =action_dim
313 | self.env = env
314 | self.constrain_on=constrain_on
315 | self.scaled_parameters=scaled_parameters
316 |
317 | def get_input_bounds(self,input_batch_state):
318 | batch_size = self.batch_size
319 | batch_input_bounds = []
320 | lbs_states = input_batch_state.detach().numpy()
321 | ubs_states = lbs_states
322 |
323 | for i in range(batch_size):
324 | input_bounds = {}
325 | for j in range(self.action_dim + self.state_dim):
326 | if j < self.state_dim:
327 | input_bounds[j] = (float(lbs_states[i][j]), float(ubs_states[i][j]))
328 | else:
329 | input_bounds[j] = (float(-1), float(1))
330 | batch_input_bounds.append(input_bounds)
331 | return batch_input_bounds
332 |
333 | def predict_best_action(self, state):
334 | state=state.detach().cpu().numpy()
335 | v1 = torch.zeros((1, self.state_dim+self.action_dim), dtype=torch.float32)
336 | '''this function is used to get the best action based on current net'''
337 | model = self.net.to('cpu')
338 | input_bounds = {}
339 | lb_state = state
340 | ub_state = state
341 | for i in range(self.action_dim + self.state_dim):
342 | if i < self.state_dim:
343 | input_bounds[i] = (float(lb_state[0][i]), float(ub_state[0][i]))
344 | else:
345 | input_bounds[i] = (float(-1), float(1))
346 |
347 | with tempfile.NamedTemporaryFile(suffix='.onnx', delete=False) as f:
348 | # export neural network to ONNX
349 | torch.onnx.export(
350 | model,
351 | v1,
352 | f,
353 | input_names=['state_action'],
354 | output_names=['Q_value'],
355 | dynamic_axes={
356 | 'state_action': {0: 'batch_size'},
357 | 'Q_value': {0: 'batch_size'}
358 | }
359 | )
360 | # write ONNX model and its bounds using OMLT
361 | write_onnx_model_with_bounds(f.name, None, input_bounds)
362 | # load the network definition from the ONNX model
363 | network_definition = load_onnx_neural_network_with_bounds(f.name)
364 | # global optimality
365 | formulation = ReluBigMFormulation(network_definition)
366 | m = pyo.ConcreteModel()
367 | m.nn = OmltBlock()
368 | m.nn.build_formulation(formulation)
369 | '''# we are now building the surrogate model between action and state'''
370 | # constrain for battery,
371 | if self.constrain_on:
372 | m.power_balance_con1 = pyo.Constraint(expr=(
373 | (-m.nn.inputs[7] * self.scaled_parameters[0])+\
374 | ((m.nn.inputs[8] * self.scaled_parameters[1])+m.nn.inputs[4]*self.scaled_parameters[5]) +\
375 | ((m.nn.inputs[9] * self.scaled_parameters[2])+m.nn.inputs[5]*self.scaled_parameters[6]) +\
376 | ((m.nn.inputs[10] * self.scaled_parameters[3])+m.nn.inputs[6]*self.scaled_parameters[7])>=\
377 | m.nn.inputs[3] *self.scaled_parameters[4]-self.env.grid.exchange_ability))
378 | m.power_balance_con2 = pyo.Constraint(expr=(
379 | (-m.nn.inputs[7] * self.scaled_parameters[0])+\
380 | (m.nn.inputs[8] * self.scaled_parameters[1]+m.nn.inputs[4]*self.scaled_parameters[5]) +\
381 | (m.nn.inputs[9] * self.scaled_parameters[2]+m.nn.inputs[5]*self.scaled_parameters[6]) +\
382 | (m.nn.inputs[10] * self.scaled_parameters[3]+m.nn.inputs[6]*self.scaled_parameters[7])<=\
383 | m.nn.inputs[3] *self.scaled_parameters[4]+self.env.grid.exchange_ability))
384 | m.obj = pyo.Objective(expr=(m.nn.outputs[0]), sense=pyo.maximize)
385 |
386 | pyo.SolverFactory('gurobi').solve(m, tee=False)
387 |
388 | best_input = pyo.value(m.nn.inputs[:])
389 |
390 | best_action = (best_input[self.state_dim::])
391 | return best_action
392 | # define test function
393 | if __name__ == '__main__':
394 | args = Arguments()
395 | '''here record real unbalance'''
396 | reward_record = {'episode': [], 'steps': [], 'mean_episode_reward': [], 'unbalance': [],
397 | 'episode_operation_cost': []}
398 | loss_record = {'episode': [], 'steps': [], 'critic_loss': [], 'actor_loss': [], 'entropy_loss': []}
399 | args.visible_gpu = '2'
400 | for seed in args.random_seed_list:
401 | args.random_seed = seed
402 | # set different seed
403 | args.agent = AgentMIPDQN()
404 | agent_name = f'{args.agent.__class__.__name__}'
405 | args.agent.cri_target = True
406 | args.env = ESSEnv()
407 | args.init_before_training(if_main=True)
408 | '''init agent and environment'''
409 | agent = args.agent
410 | env = args.env
411 | agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
412 | args.if_per_or_gae)
413 | '''init replay buffer'''
414 | buffer = ReplayBuffer(max_len=args.max_memo, state_dim=env.state_space.shape[0],
415 | action_dim=env.action_space.shape[0])
416 | '''start training'''
417 | cwd = args.cwd
418 | gamma = args.gamma
419 | batch_size = args.batch_size # how much data should be used to update net
420 | target_step = args.target_step # how manysteps of one episode should stop
421 | repeat_times = args.repeat_times # how many times should update for one batch size data
422 | soft_update_tau = args.soft_update_tau
423 | agent.state = env.reset()
424 | '''collect data and train and update network'''
425 | num_episode = args.num_episode
426 | args.train=True
427 | args.save_network=True
428 | wandb.init(project='MIP_DQN_experiments',name=args.run_name,settings=wandb.Settings(start_method="fork"))
429 | wandb.config = {
430 | "epochs": num_episode,
431 | "batch_size": batch_size}
432 | wandb.define_metric('custom_step')
433 | if args.train:
434 | collect_data = True
435 | while collect_data:
436 | print(f'buffer:{buffer.now_len}')
437 | with torch.no_grad():
438 | trajectory = agent.explore_env(env, target_step)
439 |
440 | steps, r_exp = update_buffer(trajectory)
441 | buffer.update_now_len()
442 | if buffer.now_len >= 10000:
443 | collect_data = False
444 | for i_episode in range(num_episode):
445 | critic_loss, actor_loss = agent.update_net(buffer, batch_size, repeat_times, soft_update_tau)
446 | wandb.log({'critic loss':critic_loss,'custom_step':i_episode})
447 | wandb.log({'actor loss': actor_loss,'custom_step':i_episode})
448 | loss_record['critic_loss'].append(critic_loss)
449 | loss_record['actor_loss'].append(actor_loss)
450 | with torch.no_grad():
451 | episode_reward, episode_unbalance, episode_operation_cost = get_episode_return(env, agent.act,
452 | agent.device)
453 | wandb.log({'mean_episode_reward': episode_reward,'custom_step':i_episode})
454 | wandb.log({'unbalance':episode_unbalance,'custom_step':i_episode})
455 | wandb.log({'episode_operation_cost':episode_operation_cost,'custom_step':i_episode})
456 | reward_record['mean_episode_reward'].append(episode_reward)
457 | reward_record['unbalance'].append(episode_unbalance)
458 | reward_record['episode_operation_cost'].append(episode_operation_cost)
459 |
460 | print(
461 | f'curren epsiode is {i_episode}, reward:{episode_reward},unbalance:{episode_unbalance},buffer_length: {buffer.now_len}')
462 | if i_episode % 10 == 0:
463 | # target_step
464 | with torch.no_grad():
465 | agent._update_exploration_rate(args.explorate_decay,args.explorate_min)
466 | trajectory = agent.explore_env(env, target_step)
467 | steps, r_exp = update_buffer(trajectory)
468 | wandb.finish()
469 | if args.update_training_data:
470 | loss_record_path = f'{args.cwd}/loss_data.pkl'
471 | reward_record_path = f'{args.cwd}/reward_data.pkl'
472 | with open(loss_record_path, 'wb') as tf:
473 | pickle.dump(loss_record, tf)
474 | with open(reward_record_path, 'wb') as tf:
475 | pickle.dump(reward_record, tf)
476 | act_save_path = f'{args.cwd}/actor.pth'
477 | cri_save_path = f'{args.cwd}/critic.pth'
478 |
479 | print('training data have been saved')
480 | if args.save_network:
481 | torch.save(agent.act.state_dict(), act_save_path)
482 | torch.save(agent.cri.state_dict(), cri_save_path)
483 | print('training finished and actor and critic parameters have been saved')
484 |
485 |
486 |
--------------------------------------------------------------------------------
/Parameters.py:
--------------------------------------------------------------------------------
1 | battery_parameters={
2 | 'capacity':500,# kw
3 | 'max_charge':100, # kw
4 | 'max_discharge':100, #kw
5 | 'efficiency':0.9,
6 | 'degradation':0, #euro/kw
7 | 'max_soc':0.8,
8 | 'min_soc':0.2,
9 | 'initial_capacity':0.2}
10 |
11 |
12 | dg_parameters={
13 | 'gen_1':{'a':0.0034
14 | ,'b': 3
15 | ,'c':30
16 | ,'d': 0.03,'e':4.2,'f': 0.031,'power_output_max':150,'power_output_min':0,'heat_output_max':None,'heat_output_min':None,\
17 | 'ramping_up':100,'ramping_down':100,'min_up':2,'min_down':1},
18 |
19 | 'gen_2':{'a':0.001
20 | ,'b': 10
21 | ,'c': 40
22 | ,'d': 0.03,'e':4.2,'f': 0.031,'power_output_max':375,'power_output_min':0,'heat_output_max':None,'heat_output_min':None,\
23 | 'ramping_up':100,'ramping_down':100,'min_up':2,'min_down':1},
24 |
25 | 'gen_3':{'a':0.001
26 | ,'b': 15
27 | ,'c': 70
28 | ,'d': 0.03,'e':4.2,'f': 0.031,'power_output_max':500,'power_output_min':0,'heat_output_max':None,'heat_output_min':None,\
29 | 'ramping_up':200,'ramping_down':200,'min_up':2,'min_down':1}}
30 |
31 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | # Optimal Energy System Scheduling Using A Constraint-Aware Reinforcement Learning Algorithm
3 |
4 | * This code accompanies the paper Optimal Energy System Scheduling Using A Constraint-Aware Reinforcement Learning Algorithm, to appear in International Journal of Electrical Power & Energy Systems.
5 | # Abstract
6 | * The massive integration of renewable-based distributed energy resources (DERs) inherently increases the energy system's complexity, especially when it comes to defining its operational schedule. Deep reinforcement learning (DRL) algorithms arise as a promising solution due to their data-driven and model-free features. However, current DRL algorithms fail to enforce rigorous operational constraints (e.g., power balance, ramping up or down constraints) limiting their implementation in real systems. To overcome this, in this paper, a DRL algorithm (namely MIP-DQN) is proposed, capable of \textit{strictly} enforcing all operational constraints in the action space, ensuring the feasibility of the defined schedule in real-time operation. This is done by leveraging recent optimization advances for deep neural networks (DNNs) that allow their representation as a MIP formulation, enabling further consideration of any action space constraints. Comprehensive numerical simulations show that the proposed algorithm outperforms existing state-of-the-art DRL algorithms, obtaining a lower error when compared with the optimal global solution (upper boundary) obtained after solving a mathematical programming formulation with perfect forecast information; while strictly enforcing all operational constraints (even in unseen test days).
7 | # Organization
8 | * Folder "Data" -- Historical and processed data.
9 | * script "Parameters"-- General parameters for batteries and other energy units.
10 | * script "MIP_DQN"-- The implementation of the proposed MIP-DQN algorithm.
11 | * script "random_generator_battery" -- The energy system environment
12 | * script "random_generator_more_battery" -- The energy system environment with multi-batteries. Developing this because the reviewer asked, even I don't think it is essential.
13 | * Run scripts after installing all packages. Please have a look for the code structure.
14 | # Dependencies
15 | This code requires installation of the following libraries: ```PYOMO```,```pandas 1.1.4```, ```numpy 1.20.1```, ```matplotlib 3.3.4```, ```pytorch 1.11.0```, ```OMLT```,```wandb```. I used wandb to monitor the changes during the training. you can find more information [at this page](https://arxiv.org/abs/2305.05484).
16 | # Recommended citation
17 | A preprint is available, and you can check this paper for more details [Link of the paper](https://arxiv.org/abs/2305.05484).
18 | * Paper authors: Hou Shengren, Pedro P. Vergara, Edgar Mauricio Salazar, Peter Palensky
19 | * Accepted for publication at International Journal of Electrical Power & Energy Systems
20 | * If you use (parts of) this code, please cite the preprint or published paper
21 | ## Additional Information
22 | * The clean code and data are now uploaded.
23 |
--------------------------------------------------------------------------------
/random_generator_battery.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------
2 | # Energy management environment for reinforcement learning agents developed by
3 | # Hou Shengren, TU Delft, h.shengren@tudelft.nl
4 | # ------------------------------------------------------------------------
5 | import random
6 | import numpy as np
7 |
8 | import pandas as pd
9 | import gym
10 | from gym import spaces
11 | import math
12 | import os
13 | import sys
14 | from Parameters import battery_parameters,dg_parameters
15 |
16 | class Constant:
17 | MONTHS_LEN = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
18 | MAX_STEP_HOURS = 24 * 30
19 | class DataManager():
20 | def __init__(self) -> None:
21 | self.PV_Generation=[]
22 | self.Prices=[]
23 | self.Electricity_Consumption=[]
24 |
25 | def add_pv_element(self,element):self.PV_Generation.append(element)
26 | def add_price_element(self,element):self.Prices.append(element)
27 | def add_electricity_element(self,element):self.Electricity_Consumption.append(element)
28 |
29 | def get_pv_data(self,month,day,day_time):return self.PV_Generation[(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24+day_time]
30 | def get_price_data(self,month,day,day_time):return self.Prices[(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24+day_time]
31 | def get_electricity_cons_data(self,month,day,day_time):return self.Electricity_Consumption[(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24+day_time]
32 | def get_series_pv_data(self,month,day): return self.PV_Generation[(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24:(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24+24]
33 | def get_series_price_data(self,month,day):return self.Prices[(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24:(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24+24]
34 | def get_series_electricity_cons_data(self,month,day):return self.Electricity_Consumption[(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24:(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24+24]
35 |
36 | class DG():
37 | def __init__(self,parameters):
38 | self.name=parameters.keys()
39 | self.a_factor=parameters['a']
40 | self.b_factor=parameters['b']
41 | self.c_factor=parameters['c']
42 | self.power_output_max=parameters['power_output_max']
43 | self.power_output_min=parameters['power_output_min']
44 | self.ramping_up=parameters['ramping_up']
45 | self.ramping_down=parameters['ramping_down']
46 | self.last_step_output=None
47 | def step(self,action_gen):
48 | output_change=action_gen*self.ramping_up# constrain the output_change with ramping up boundary
49 | output=self.current_output+output_change
50 | if output>0:
51 | output=max(self.power_output_min,min(self.power_output_max,output))# meet the constrain
52 | else:
53 | output=0
54 | self.current_output=output
55 | def _get_cost(self,output):
56 | if output<=0:
57 | cost=0
58 | else:
59 | cost=(self.a_factor*pow(output,2)+self.b_factor*output+self.c_factor)
60 | # print(cost)
61 | return cost
62 | def reset(self):
63 | self.current_output=0
64 |
65 | class Battery():
66 | '''simulate a simple battery here'''
67 | def __init__(self,parameters):
68 | self.capacity=parameters['capacity']# 容量
69 | self.max_soc=parameters['max_soc']# max soc 0.8
70 | self.initial_capacity=parameters['initial_capacity']# initial soc 0.4
71 | self.min_soc=parameters['min_soc']# 0.2
72 | self.degradation=parameters['degradation']# degradation cost 0,
73 | self.max_charge=parameters['max_charge']# max charge ability
74 | self.max_discharge=parameters['max_discharge']# max discharge ability
75 | self.efficiency=parameters['efficiency']# charge and discharge efficiency
76 | def step(self,action_battery):
77 | energy=action_battery*self.max_charge
78 | updated_capacity=max(self.min_soc,min(self.max_soc,(self.current_capacity*self.capacity+energy)/self.capacity))
79 | self.energy_change=(updated_capacity-self.current_capacity)*self.capacity# if charge, positive, if discharge, negative
80 | self.current_capacity=updated_capacity# update capacity to current codition
81 | def _get_cost(self,energy):# calculate the cost depends on the energy change
82 | cost=energy**2*self.degradation
83 | return cost
84 | def SOC(self):
85 | return self.current_capacity
86 | def reset(self):
87 | self.current_capacity=np.random.uniform(0.2,0.8)
88 | class Grid():
89 | def __init__(self):
90 |
91 | self.on=True
92 | if self.on:
93 | self.exchange_ability=30
94 | else:
95 | self.exchange_ability=0
96 | def _get_cost(self,current_price,energy_exchange):##energy if charge, will be positive, if discharge will be negative
97 | return current_price*energy_exchange
98 | def retrive_past_price(self):
99 | result=[]
100 | if self.day<1:
101 | past_price=self.past_price# self.past price is fixed as the last days price
102 | else:
103 | past_price=self.price[24*(self.day-1):24*self.day]# get the price data of previous day
104 | for item in past_price[(self.time-24)::]:# here if current time_step is 10, then the 10th data of past price is extrated to the result as the first value
105 | result.append(item)
106 | for item in self.price[24*self.day:(24*self.day+self.time)]:# continue to retrive data from the past and attend it to the result. as past price is change everytime.
107 | result.append(item)
108 | return result
109 | class ESSEnv(gym.Env):
110 | '''ENV descirption:
111 | the agent learn to charge with low price and then discharge at high price, in this way, it could get benefits'''
112 | def __init__(self,**kwargs):
113 | super(ESSEnv,self).__init__()
114 | #parameters
115 | self.data_manager=DataManager()
116 | self._load_year_data()
117 | self.episode_length=kwargs.get('episode_length',24)
118 | self.month=None
119 | self.day=None
120 | self.TRAIN=True
121 | self.current_time=None
122 | self.battery_parameters=kwargs.get('battery_parameters',battery_parameters)
123 | self.dg_parameters=kwargs.get('dg_parameters',dg_parameters)
124 | self.penalty_coefficient=20#control soft penalty constrain
125 | self.sell_coefficient=0.5# control sell benefits
126 | # instant the components of the environment
127 | self.grid=Grid()
128 | self.battery=Battery(self.battery_parameters)
129 | self.dg1=DG(self.dg_parameters['gen_1'])
130 | self.dg2=DG(self.dg_parameters['gen_2'])
131 | self.dg3=DG(self.dg_parameters['gen_3'])
132 |
133 | # define normalized action space
134 | #action space here is [output of gen1,outputof gen2, output of gen3, charge/discharge of battery]
135 | self.action_space=spaces.Box(low=-1,high=1,shape=(4,),dtype=np.float32)# seems here doesn't used
136 | # state is [time_step,netload,dg_output_last_step]# this time no prive
137 | self.state_space=spaces.Box(low=0,high=1,shape=(7,),dtype=np.float32)
138 | # set state related normalization reference
139 | self.Length_max=24
140 | self.Price_max=max(self.data_manager.Prices)
141 | # self.Netload_max=max(self.data_manager.Electricity_Consumption)-max(self.data_manager.PV_Generation)
142 | self.Netload_max = max(self.data_manager.Electricity_Consumption)
143 | self.SOC_max=self.battery.max_soc
144 | self.DG1_max=self.dg1.power_output_max
145 | self.DG2_max=self.dg2.power_output_max
146 | self.DG3_max=self.dg3.power_output_max
147 |
148 | def reset(self):
149 | '''reset is used for initialize the environment, decide the day of month.'''
150 | self.month=np.random.randint(1,13)# here we choose 12 month
151 |
152 | if self.TRAIN:
153 | self.day=np.random.randint(1,21)
154 | else:
155 | self.day=np.random.randint(21,Constant.MONTHS_LEN[self.month-1])
156 | self.current_time=0
157 | self.battery.reset()
158 | self.dg1.reset()
159 | self.dg2.reset()
160 | self.dg3.reset()
161 | return self._build_state()
162 | def _build_state(self):
163 | #we put all original information into state and then transfer it into normalized state
164 | soc=self.battery.SOC()/self.SOC_max
165 | dg1_output=self.dg1.current_output/self.DG1_max
166 | dg2_output=self.dg2.current_output/self.DG2_max
167 | dg3_output=self.dg3.current_output/self.DG3_max
168 | time_step=self.current_time/(self.Length_max-1)
169 | electricity_demand=self.data_manager.get_electricity_cons_data(self.month,self.day,self.current_time)
170 | pv_generation=self.data_manager.get_pv_data(self.month,self.day,self.current_time)
171 | price=self.data_manager.get_price_data(self.month,self.day,self.current_time)/self.Price_max
172 | net_load=(electricity_demand-pv_generation)/self.Netload_max
173 | obs=np.concatenate((np.float32(time_step),np.float32(price),np.float32(soc),np.float32(net_load),np.float32(dg1_output),np.float32(dg2_output),np.float32(dg3_output)),axis=None)
174 | return obs
175 |
176 | def step(self,action):# state transition here current_obs--take_action--get reward-- get_finish--next_obs
177 | ## here we want to put take action into each components
178 | current_obs=self._build_state()
179 | self.battery.step(action[0])# here execute the state-transition part, battery.current_capacity also changed
180 | self.dg1.step(action[1])
181 | self.dg2.step(action[2])
182 | self.dg3.step(action[3])
183 | current_output=np.array((self.dg1.current_output,self.dg2.current_output,self.dg3.current_output,-self.battery.energy_change))#truely corresonding to the result
184 | self.current_output=current_output
185 | actual_production=sum(current_output)
186 | # transfer to normal_state
187 | netload=current_obs[3]*self.Netload_max
188 | price=current_obs[1]*self.Price_max
189 |
190 | unbalance=actual_production-netload
191 |
192 | reward=0
193 | excess_penalty=0
194 | deficient_penalty=0
195 | sell_benefit=0
196 | buy_cost=0
197 | self.excess=0
198 | self.shedding=0
199 | # logic here is: if unbalance >0 then it is production excess, so the excessed output should sold to power grid to get benefits
200 | if unbalance>=0:# it is now in excess condition
201 | if unbalance<=self.grid.exchange_ability:
202 | sell_benefit=self.grid._get_cost(price,unbalance)*self.sell_coefficient #sell money to grid is little [0.029,0.1]
203 | else:
204 | sell_benefit=self.grid._get_cost(price,self.grid.exchange_ability)*self.sell_coefficient
205 | #real unbalance that even grid could not meet
206 | self.excess=unbalance-self.grid.exchange_ability
207 | excess_penalty=self.excess*self.penalty_coefficient
208 | else:# unbalance <0, its load shedding model, in this case, deficient penalty is used
209 | if abs(unbalance)<=self.grid.exchange_ability:
210 | buy_cost=self.grid._get_cost(price,abs(unbalance))
211 | else:
212 | buy_cost=self.grid._get_cost(price,self.grid.exchange_ability)
213 | self.shedding=abs(unbalance)-self.grid.exchange_ability
214 | deficient_penalty=self.shedding*self.penalty_coefficient
215 | battery_cost=self.battery._get_cost(self.battery.energy_change)# we set it as 0 this time
216 | dg1_cost=self.dg1._get_cost(self.dg1.current_output)
217 | dg2_cost=self.dg2._get_cost(self.dg2.current_output)
218 | dg3_cost=self.dg3._get_cost(self.dg3.current_output)
219 |
220 | reward=-(battery_cost+dg1_cost+dg2_cost+dg3_cost+excess_penalty+
221 | deficient_penalty-sell_benefit+buy_cost)/2e3
222 |
223 | self.operation_cost=battery_cost+dg1_cost+dg2_cost+dg3_cost+buy_cost-sell_benefit+(self.shedding+self.excess)*self.penalty_coefficient
224 |
225 | self.unbalance=unbalance
226 | self.real_unbalance=self.shedding+self.excess
227 | '''here we also need to store the final step outputs for the final steps including, soc, output of units for seeing the final states'''
228 | final_step_outputs=[self.dg1.current_output,self.dg2.current_output,self.dg3.current_output,self.battery.current_capacity]
229 | self.current_time+=1
230 | finish=(self.current_time==self.episode_length)
231 | if finish:
232 | self.final_step_outputs=final_step_outputs
233 | self.current_time=0
234 | next_obs=self.reset()
235 |
236 | else:
237 | next_obs=self._build_state()
238 | return current_obs,next_obs,float(reward),finish
239 | def render(self, current_obs, next_obs, reward, finish):
240 | print('day={},hour={:2d}, state={}, next_state={}, reward={:.4f}, terminal={}\n'.format(self.day,self.current_time, current_obs, next_obs, reward, finish))
241 | def _load_year_data(self):
242 | '''this private function is used to load the electricity consumption, pv generation and related prices in a year as
243 | a one hour resolution, with the cooperation of class DataProcesser and then all these data are stored in data processor'''
244 | pv_df=pd.read_csv('data/PV.csv',sep=';')
245 | #hourly price data for a year
246 | price_df=pd.read_csv('data/Prices.csv',sep=';')
247 | # mins electricity consumption data for a year
248 | electricity_df=pd.read_csv('data/H4.csv',sep=';')
249 | pv_data=pv_df['P_PV_'].apply(lambda x: x.replace(',','.')).to_numpy(dtype=float)
250 | price=price_df['Price'].apply(lambda x:x.replace(',','.')).to_numpy(dtype=float)
251 | electricity=electricity_df['Power'].apply(lambda x:x.replace(',','.')).to_numpy(dtype=float)
252 | # netload=electricity-pv_data
253 | '''we carefully redesign the magnitude for price and amount of generation as well as demand'''
254 | for element in pv_data:
255 | self.data_manager.add_pv_element(element*100)
256 | for element in price:
257 | element/=10
258 | if element<=0.5:
259 | element=0.5
260 | self.data_manager.add_price_element(element)
261 | for i in range(0,electricity.shape[0],60):
262 | element=electricity[i:i+60]
263 | self.data_manager.add_electricity_element(sum(element)*300)
264 | ## test environment
265 | if __name__ == '__main__':
266 | env=ESSEnv()
267 | env.TRAIN=False
268 | rewards=[]
269 | env.reset()
270 | env.day=27
271 | tem_action=[0.1,0.1,0.1,0.1]
272 | for _ in range (240):
273 | print(f'current month is {env.month}, current day is {env.day}, current time is {env.current_time}')
274 | current_obs,next_obs,reward,finish=env.step(tem_action)
275 | env.render(current_obs,next_obs,reward,finish)
276 | current_obs=next_obs
277 | rewards.append(reward)
278 |
279 | # print(f'total reward{sum(rewards)}')
280 |
281 | ## after debug, it could work now.
--------------------------------------------------------------------------------
/random_generator_more_battery.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------
2 | # Energy management environment for reinforcement learning agents developed by
3 | # Hou Shengren, TU Delft, h.shengren@tudelft.nl
4 | import numpy as np
5 | import pandas as pd
6 | import gym
7 | from gym import spaces
8 |
9 | from Parameters import battery_parameters,dg_parameters
10 |
11 | class Constant:
12 | MONTHS_LEN = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
13 | MAX_STEP_HOURS = 24 * 30
14 | class DataManager():
15 | def __init__(self) -> None:
16 | self.PV_Generation=[]
17 | self.Prices=[]
18 | self.Electricity_Consumption=[]
19 |
20 | def add_pv_element(self,element):self.PV_Generation.append(element)
21 | def add_price_element(self,element):self.Prices.append(element)
22 | def add_electricity_element(self,element):self.Electricity_Consumption.append(element)
23 |
24 | # get current time data based on given month day, and day_time
25 | def get_pv_data(self,month,day,day_time):return self.PV_Generation[(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24+day_time]
26 | def get_price_data(self,month,day,day_time):return self.Prices[(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24+day_time]
27 | def get_electricity_cons_data(self,month,day,day_time):return self.Electricity_Consumption[(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24+day_time]
28 | # get series data for one episode
29 | def get_series_pv_data(self,month,day): return self.PV_Generation[(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24:(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24+24]
30 | def get_series_price_data(self,month,day):return self.Prices[(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24:(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24+24]
31 | def get_series_electricity_cons_data(self,month,day):return self.Electricity_Consumption[(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24:(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24+24]
32 | class DG():
33 | def __init__(self,parameters):
34 | self.name=parameters.keys()
35 | self.a_factor=parameters['a']
36 | self.b_factor=parameters['b']
37 | self.c_factor=parameters['c']
38 | self.power_output_max=parameters['power_output_max']
39 | self.power_output_min=parameters['power_output_min']
40 | self.ramping_up=parameters['ramping_up']
41 | self.ramping_down=parameters['ramping_down']
42 | self.last_step_output=None
43 | def step(self,action_gen):
44 | output_change=action_gen*self.ramping_up#
45 | output=self.current_output+output_change
46 | if output>0:
47 | output=max(self.power_output_min,min(self.power_output_max,output))# meet the constrain
48 | else:
49 | output=0
50 | self.current_output=output
51 | def _get_cost(self,output):
52 | if output<=0:
53 | cost=0
54 | else:
55 | cost=(self.a_factor*pow(output,2)+self.b_factor*output+self.c_factor)
56 | # print(cost)
57 | return cost
58 | def reset(self):
59 | self.current_output=0
60 | class Battery():
61 | def __init__(self,parameters):
62 | self.capacity=parameters['capacity']# 容量
63 | self.max_soc=parameters['max_soc']# max soc 0.8
64 | self.initial_capacity=parameters['initial_capacity']# initial soc 0.4
65 | self.min_soc=parameters['min_soc']# 0.2
66 | self.degradation=parameters['degradation']# degradation cost 0,
67 | self.max_charge=parameters['max_charge']# max charge ability
68 | self.max_discharge=parameters['max_discharge']# max discharge ability
69 | self.efficiency=parameters['efficiency']# charge and discharge efficiency
70 | def step(self,action_battery):
71 |
72 | energy=action_battery*self.max_charge
73 | updated_capacity=max(self.min_soc,min(self.max_soc,(self.current_capacity*self.capacity+energy)/self.capacity))
74 | self.energy_change=(updated_capacity-self.current_capacity)*self.capacity# if charge, positive, if discharge, negative
75 | self.current_capacity=updated_capacity# update capacity to current codition
76 | def _get_cost(self,energy):# calculate the cost depends on the energy change
77 | cost=energy**2*self.degradation
78 | return cost
79 | def SOC(self):
80 | return self.current_capacity
81 | def reset(self):
82 | self.current_capacity=np.random.uniform(0.2,0.8)
83 | class Grid():
84 | def __init__(self):
85 |
86 | self.on=True
87 | if self.on:
88 | self.exchange_ability=30
89 | else:
90 | self.exchange_ability=0
91 | def _get_cost(self,current_price,energy_exchange):##energy if charge, will be positive, if discharge will be negative
92 | return current_price*energy_exchange
93 | def retrive_past_price(self):
94 | result=[]
95 | if self.day<1:
96 | past_price=self.past_price# self.past price is fixed as the last days price
97 | else:
98 | past_price=self.price[24*(self.day-1):24*self.day]# get the price data of previous day
99 | # print(past_price)
100 | for item in past_price[(self.time-24)::]:# here if current time_step is 10, then the 10th data of past price is extrated to the result as the first value
101 | result.append(item)
102 | for item in self.price[24*self.day:(24*self.day+self.time)]:# continue to retrive data from the past and attend it to the result. as past price is change everytime.
103 | result.append(item)
104 | return result
105 | class ESSEnv(gym.Env):
106 | def __init__(self,**kwargs):
107 | super(ESSEnv,self).__init__()
108 | #parameters
109 | self.data_manager=DataManager()
110 | self._load_year_data()
111 | self.episode_length=kwargs.get('episode_length',24)
112 | self.month=None
113 | self.day=None
114 | # Control training set and validation set with reset function
115 | self.TRAIN=True
116 | self.current_time=None
117 | self.battery_parameters=kwargs.get('battery_parameters',battery_parameters)
118 | self.dg_parameters=kwargs.get('dg_parameters',dg_parameters)
119 | self.penalty_coefficient=20#control soft penalty constrain
120 | self.sell_coefficient=0.5# control sell benefits
121 | # instant the components of the environment
122 | self.grid=Grid()
123 | self.battery1=Battery(self.battery_parameters)
124 | self.battery2=Battery(self.battery_parameters)
125 | self.battery3=Battery(self.battery_parameters)
126 | self.dg1=DG(self.dg_parameters['gen_1'])
127 | self.dg2=DG(self.dg_parameters['gen_2'])
128 | self.dg3=DG(self.dg_parameters['gen_3'])
129 |
130 | # define normalized action space
131 | self.action_space=spaces.Box(low=-1,high=1,shape=(6,),dtype=np.float32)# seems here doesn't used
132 | self.state_space=spaces.Box(low=0,high=1,shape=(9,),dtype=np.float32)
133 | self.Length_max=24
134 | self.Price_max=max(self.data_manager.Prices)
135 | self.Netload_max = max(self.data_manager.Electricity_Consumption)
136 | self.SOC_max=self.battery1.max_soc
137 | self.DG1_max=self.dg1.power_output_max
138 | self.DG2_max=self.dg2.power_output_max
139 | self.DG3_max=self.dg3.power_output_max
140 |
141 |
142 | def reset(self):
143 | self.month=np.random.randint(1,13)# here we choose 12 month
144 |
145 | if self.TRAIN:
146 | self.day=np.random.randint(1,21)
147 | else:
148 | self.day=np.random.randint(21,Constant.MONTHS_LEN[self.month-1])
149 | self.current_time=0
150 | self.battery1.reset()
151 | self.battery2.reset()
152 | self.battery3.reset()
153 | self.dg1.reset()
154 | self.dg2.reset()
155 | self.dg3.reset()
156 | return self._build_state()
157 | def _build_state(self):
158 | soc1=self.battery1.SOC()/self.SOC_max
159 | soc2=self.battery2.SOC()/self.SOC_max
160 | soc3 = self.battery3.SOC() / self.SOC_max
161 | dg1_output=self.dg1.current_output/self.DG1_max
162 | dg2_output=self.dg2.current_output/self.DG2_max
163 | dg3_output=self.dg3.current_output/self.DG3_max
164 | time_step=self.current_time/(self.Length_max-1)
165 | electricity_demand=self.data_manager.get_electricity_cons_data(self.month,self.day,self.current_time)
166 | pv_generation=self.data_manager.get_pv_data(self.month,self.day,self.current_time)
167 | price=self.data_manager.get_price_data(self.month,self.day,self.current_time)/self.Price_max
168 | net_load=(electricity_demand-pv_generation)/self.Netload_max
169 | obs=np.concatenate((np.float32(time_step),np.float32(price),np.float32(soc1),np.float32(soc2),np.float32(soc3),np.float32(net_load),np.float32(dg1_output),np.float32(dg2_output),np.float32(dg3_output)),axis=None)
170 | return obs
171 |
172 | def step(self,action):# state transition here current_obs--take_action--get reward-- get_finish--next_obs
173 | ## here we want to put take action into each components
174 | current_obs=self._build_state()
175 | self.battery1.step(action[0])# here execute the state-transition part, battery.current_capacity also changed
176 | self.battery2.step(action[1])
177 | self.battery3.step(action[2])
178 | self.dg1.step(action[3])
179 | self.dg2.step(action[4])
180 | self.dg3.step(action[5])
181 | current_output=np.array((self.dg1.current_output,self.dg2.current_output,self.dg3.current_output,-self.battery1.energy_change,-self.battery2.energy_change,-self.battery3.energy_change))#truely corresonding to the result
182 | self.current_output=current_output
183 | actual_production=sum(current_output)
184 | netload=current_obs[5]*self.Netload_max
185 | price=current_obs[1]*self.Price_max
186 |
187 | unbalance=actual_production-netload
188 | reward=0
189 | excess_penalty=0
190 | deficient_penalty=0
191 | sell_benefit=0
192 | buy_cost=0
193 | self.excess=0
194 | self.shedding=0
195 | # logic here is: if unbalance >0 then it is production excess, so the excessed output should sold to power grid to get benefits
196 | if unbalance>=0:# it is now in excess condition
197 | if unbalance<=self.grid.exchange_ability:
198 | sell_benefit=self.grid._get_cost(price,unbalance)*self.sell_coefficient #sell money to grid is little [0.029,0.1]
199 | else:
200 | sell_benefit=self.grid._get_cost(price,self.grid.exchange_ability)*self.sell_coefficient
201 | self.excess=unbalance-self.grid.exchange_ability
202 | excess_penalty=self.excess*self.penalty_coefficient
203 | else:# unbalance <0, its load shedding model, in this case, deficient penalty is used
204 | if abs(unbalance)<=self.grid.exchange_ability:
205 | buy_cost=self.grid._get_cost(price,abs(unbalance))
206 | else:
207 | buy_cost=self.grid._get_cost(price,self.grid.exchange_ability)
208 | self.shedding=abs(unbalance)-self.grid.exchange_ability
209 | deficient_penalty=self.shedding*self.penalty_coefficient
210 | battery1_cost=self.battery1._get_cost(self.battery1.energy_change)# we set it as 0 this time
211 | battery2_cost=self.battery2._get_cost(self.battery2.energy_change)
212 | battery3_cost = self.battery3._get_cost(self.battery3.energy_change)
213 | dg1_cost=self.dg1._get_cost(self.dg1.current_output)
214 | dg2_cost=self.dg2._get_cost(self.dg2.current_output)
215 | dg3_cost=self.dg3._get_cost(self.dg3.current_output)
216 |
217 | reward=-(battery1_cost+battery2_cost+battery3_cost+dg1_cost+dg2_cost+dg3_cost+excess_penalty+
218 | deficient_penalty-sell_benefit+buy_cost)/2e3
219 |
220 | self.operation_cost=battery1_cost+battery2_cost+battery3_cost+dg1_cost+dg2_cost+dg3_cost+buy_cost-sell_benefit+(self.shedding+self.excess)*self.penalty_coefficient
221 |
222 | self.unbalance=unbalance
223 | self.real_unbalance=self.shedding+self.excess
224 | final_step_outputs=[self.dg1.current_output,self.dg2.current_output,self.dg3.current_output,self.battery1.current_capacity,self.battery2.current_capacity,self.battery3.current_capacity]
225 | self.current_time+=1
226 | finish=(self.current_time==self.episode_length)
227 | if finish:
228 | self.final_step_outputs=final_step_outputs
229 | self.current_time=0
230 | next_obs=self.reset()
231 |
232 | else:
233 | next_obs=self._build_state()
234 | return current_obs,next_obs,float(reward),finish
235 | def render(self, current_obs, next_obs, reward, finish):
236 | print('day={},hour={:2d}, state={}, next_state={}, reward={:.4f}, terminal={}\n'.format(self.day,self.current_time, current_obs, next_obs, reward, finish))
237 | def _load_year_data(self):
238 | '''this private function is used to load the electricity consumption, pv generation and related prices in a year as
239 | a one hour resolution, with the cooperation of class DataProcesser and then all these data are stored in data processor'''
240 | pv_df=pd.read_csv('data/PV.csv',sep=';')
241 | #hourly price data for a year
242 | price_df=pd.read_csv('data/Prices.csv',sep=';')
243 | # mins electricity consumption data for a year
244 | electricity_df=pd.read_csv('data/H4.csv',sep=';')
245 | pv_data=pv_df['P_PV_'].apply(lambda x: x.replace(',','.')).to_numpy(dtype=float)
246 | price=price_df['Price'].apply(lambda x:x.replace(',','.')).to_numpy(dtype=float)
247 | electricity=electricity_df['Power'].apply(lambda x:x.replace(',','.')).to_numpy(dtype=float)
248 | # netload=electricity-pv_data
249 | for element in pv_data:
250 | self.data_manager.add_pv_element(element*100)
251 | for element in price:
252 | element/=10
253 | if element<=0.5:
254 | element=0.5
255 | self.data_manager.add_price_element(element)
256 | for i in range(0,electricity.shape[0],60):
257 | element=electricity[i:i+60]
258 | self.data_manager.add_electricity_element(sum(element)*300)
259 | if __name__ == '__main__':
260 | env=ESSEnv()
261 | env.TRAIN=False
262 | rewards=[]
263 | env.reset()
264 | tem_action=[0.1,0.1,0.1,0.1,0.1,0.1]
265 | for _ in range (240):
266 | print(f'current month is {env.month}, current day is {env.day}, current time is {env.current_time}')
267 | current_obs,next_obs,reward,finish=env.step(tem_action)
268 | env.render(current_obs,next_obs,reward,finish)
269 | current_obs=next_obs
270 | rewards.append(reward)
271 |
272 | # print(f'total reward{sum(rewards)}')
273 |
274 | ## after debug, it could work now.
--------------------------------------------------------------------------------