├── .idea ├── .gitignore ├── Optimal-Energy-System-Scheduling-Combining-Mixed-Integer-Programming-and-Deep-Reinforcement-Learning.iml ├── deployment.xml ├── inspectionProfiles │ └── profiles_settings.xml ├── misc.xml ├── modules.xml └── vcs.xml ├── LICENSE ├── MIP_DQN.py ├── Parameters.py ├── README.md ├── data ├── H4.csv ├── PV.csv └── Prices.csv ├── random_generator_battery.py └── random_generator_more_battery.py /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | # Editor-based HTTP Client requests 5 | /httpRequests/ 6 | # Datasource local storage ignored files 7 | /dataSources/ 8 | /dataSources.local.xml 9 | -------------------------------------------------------------------------------- /.idea/Optimal-Energy-System-Scheduling-Combining-Mixed-Integer-Programming-and-Deep-Reinforcement-Learning.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 12 | -------------------------------------------------------------------------------- /.idea/deployment.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Hou Shengren 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MIP_DQN.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Optimal-Energy-System-Scheduling-Combining-Mixed-Integer-Programming-and-Deep-Reinforcement-Learning 3 | # MIP-DQN algorithm developed by 4 | # Hou Shengren, TU Delft, h.shengren@tudelft.nl 5 | # Pedro, TU Delft, p.p.vergara.barrios@tudeflt.nl 6 | # ------------------------------------------------------------------------ 7 | import pickle 8 | import torch 9 | import os 10 | import numpy as np 11 | import numpy.random as rd 12 | import pandas as pd 13 | import pyomo.environ as pyo 14 | import pyomo.kernel as pmo 15 | from omlt import OmltBlock 16 | 17 | from gurobipy import * 18 | from omlt.neuralnet import NetworkDefinition, FullSpaceNNFormulation,ReluBigMFormulation 19 | from omlt.io.onnx import write_onnx_model_with_bounds,load_onnx_neural_network_with_bounds 20 | import tempfile 21 | import torch.onnx 22 | import torch.nn as nn 23 | from copy import deepcopy 24 | import wandb 25 | from random_generator_battery import ESSEnv 26 | ## define net 27 | class ReplayBuffer: 28 | def __init__(self, max_len, state_dim, action_dim, gpu_id=0): 29 | self.now_len = 0 30 | self.next_idx = 0 31 | self.if_full = False 32 | self.max_len = max_len 33 | self.data_type = torch.float32 34 | self.action_dim = action_dim 35 | self.device = torch.device(f"cuda:{gpu_id}" if (torch.cuda.is_available() and (gpu_id >= 0)) else "cpu") 36 | 37 | other_dim = 1 + 1 + self.action_dim 38 | self.buf_other = torch.empty(size=(max_len, other_dim), dtype=self.data_type, device=self.device) 39 | 40 | if isinstance(state_dim, int): # state is pixel 41 | self.buf_state = torch.empty((max_len, state_dim), dtype=torch.float32, device=self.device) 42 | elif isinstance(state_dim, tuple): 43 | self.buf_state = torch.empty((max_len, *state_dim), dtype=torch.uint8, device=self.device) 44 | else: 45 | raise ValueError('state_dim') 46 | 47 | def extend_buffer(self, state, other): # CPU array to CPU array 48 | size = len(other) 49 | next_idx = self.next_idx + size 50 | 51 | if next_idx > self.max_len: 52 | self.buf_state[self.next_idx:self.max_len] = state[:self.max_len - self.next_idx] 53 | self.buf_other[self.next_idx:self.max_len] = other[:self.max_len - self.next_idx] 54 | self.if_full = True 55 | 56 | next_idx = next_idx - self.max_len 57 | self.buf_state[0:next_idx] = state[-next_idx:] 58 | self.buf_other[0:next_idx] = other[-next_idx:] 59 | else: 60 | self.buf_state[self.next_idx:next_idx] = state 61 | self.buf_other[self.next_idx:next_idx] = other 62 | self.next_idx = next_idx 63 | 64 | def sample_batch(self, batch_size) -> tuple: 65 | indices = rd.randint(self.now_len - 1, size=batch_size) 66 | r_m_a = self.buf_other[indices] 67 | return (r_m_a[:, 0:1], 68 | r_m_a[:, 1:2], 69 | r_m_a[:, 2:], 70 | self.buf_state[indices], 71 | self.buf_state[indices + 1]) 72 | 73 | def update_now_len(self): 74 | self.now_len = self.max_len if self.if_full else self.next_idx 75 | class Arguments: 76 | def __init__(self, agent=None, env=None): 77 | 78 | self.agent = agent # Deep Reinforcement Learning algorithm 79 | self.env = env # the environment for training 80 | self.cwd = None # current work directory. None means set automatically 81 | self.if_remove = False # remove the cwd folder? (True, False, None:ask me) 82 | self.visible_gpu = '0,1,2,3' # for example: os.environ['CUDA_VISIBLE_DEVICES'] = '0, 2,' 83 | self.worker_num = 2 # rollout workers number pre GPU (adjust it to get high GPU usage) 84 | self.num_threads = 8 # cpu_num for evaluate model, torch.set_num_threads(self.num_threads) 85 | 86 | '''Arguments for training''' 87 | self.num_episode=3000 88 | self.gamma = 0.995 # discount factor of future rewards 89 | self.learning_rate = 1e-4 # 2 ** -14 ~= 6e-5 90 | self.soft_update_tau = 1e-2 # 2 ** -8 ~= 5e-3 91 | 92 | self.net_dim = 64 # the network width 256 93 | self.batch_size = 256 # num of transitions sampled from replay buffer. 94 | self.repeat_times = 2 ** 3 # repeatedly update network to keep critic's loss small 95 | self.target_step = 1000 # collect target_step experiences , then update network, 1024 96 | self.max_memo = 50000 # capacity of replay buffer 97 | ## arguments for controlling exploration 98 | self.explorate_decay=0.99 99 | self.explorate_min=0.3 100 | '''Arguments for evaluate''' 101 | self.random_seed_list=[1234,2234,3234,4234,5234] 102 | # self.random_seed_list=[2234] 103 | self.run_name='MIP_DQN_experiments' 104 | '''Arguments for save''' 105 | self.train=True 106 | self.save_network=True 107 | 108 | def init_before_training(self, if_main): 109 | if self.cwd is None: 110 | agent_name = self.agent.__class__.__name__ 111 | self.cwd = f'./{agent_name}/{self.run_name}' 112 | 113 | if if_main: 114 | import shutil # remove history according to bool(if_remove) 115 | if self.if_remove is None: 116 | self.if_remove = bool(input(f"| PRESS 'y' to REMOVE: {self.cwd}? ") == 'y') 117 | elif self.if_remove: 118 | shutil.rmtree(self.cwd, ignore_errors=True) 119 | print(f"| Remove cwd: {self.cwd}") 120 | os.makedirs(self.cwd, exist_ok=True) 121 | 122 | np.random.seed(self.random_seed) 123 | torch.manual_seed(self.random_seed) 124 | torch.set_num_threads(self.num_threads) 125 | torch.set_default_dtype(torch.float32) 126 | 127 | os.environ['CUDA_VISIBLE_DEVICES'] = str(self.visible_gpu)# control how many GPU is used   128 | class Actor(nn.Module): 129 | def __init__(self,mid_dim,state_dim,action_dim): 130 | super().__init__() 131 | self.net=nn.Sequential(nn.Linear(state_dim,mid_dim),nn.ReLU(), 132 | nn.Linear(mid_dim,mid_dim),nn.ReLU(), 133 | nn.Linear(mid_dim,mid_dim),nn.ReLU(), 134 | nn.Linear(mid_dim,action_dim)) 135 | def forward(self,state): 136 | return self.net(state).tanh()# make the data from -1 to 1 137 | def get_action(self,state,action_std):# 138 | action=self.net(state).tanh() 139 | noise=(torch.randn_like(action)*action_std).clamp(-0.5,0.5)# 140 | return (action+noise).clamp(-1.0,1.0) 141 | class CriticQ(nn.Module): 142 | def __init__(self,mid_dim,state_dim,action_dim): 143 | super().__init__() 144 | self.net_head=nn.Sequential(nn.Linear(state_dim+action_dim,mid_dim),nn.ReLU(), 145 | nn.Linear(mid_dim,mid_dim),nn.ReLU()) 146 | self.net_q1=nn.Sequential(nn.Linear(mid_dim,mid_dim),nn.ReLU(), 147 | nn.Linear(mid_dim,1))# we get q1 value 148 | self.net_q2=nn.Sequential(nn.Linear(mid_dim,mid_dim),nn.ReLU(), 149 | nn.Linear(mid_dim,1))# we get q2 value 150 | def forward(self,value): 151 | mid=self.net_head(value) 152 | return self.net_q1(mid) 153 | def get_q1_q2(self,value): 154 | mid=self.net_head(value) 155 | return self.net_q1(mid),self.net_q2(mid) 156 | class AgentBase: 157 | def __init__(self): 158 | self.state = None 159 | self.device = None 160 | self.action_dim = None 161 | self.if_off_policy = None 162 | self.explore_noise = None 163 | self.trajectory_list = None 164 | self.explore_rate = 1.0 165 | 166 | self.criterion = torch.nn.SmoothL1Loss() 167 | 168 | def init(self, net_dim, state_dim, action_dim, learning_rate=1e-4, _if_per_or_gae=False, gpu_id=0): 169 | self.device = torch.device( 170 | f"cuda:{gpu_id}" if (torch.cuda.is_available() and (gpu_id >= 0)) else "cpu") 171 | self.action_dim = action_dim 172 | 173 | self.cri = self.ClassCri(net_dim, state_dim, action_dim).to(self.device) 174 | self.act = self.ClassAct(net_dim, state_dim, action_dim).to( 175 | self.device) if self.ClassAct else self.cri 176 | self.cri_target = deepcopy(self.cri) if self.if_use_cri_target else self.cri 177 | self.act_target = deepcopy(self.act) if self.if_use_act_target else self.act 178 | 179 | self.cri_optim = torch.optim.Adam(self.cri.parameters(), learning_rate) 180 | self.act_optim = torch.optim.Adam(self.act.parameters(), 181 | learning_rate) if self.ClassAct else self.cri 182 | del self.ClassCri, self.ClassAct 183 | 184 | def select_action(self, state) -> np.ndarray: 185 | states = torch.as_tensor((state,), dtype=torch.float32, device=self.device) 186 | action = self.act(states)[0] 187 | if rd.rand() tuple: 247 | buffer.update_now_len() 248 | obj_critic = obj_actor = None 249 | for update_c in range(int(buffer.now_len / batch_size * repeat_times)):# we update too much time? 250 | obj_critic, state = self.get_obj_critic(buffer, batch_size) 251 | self.optim_update(self.cri_optim, obj_critic) 252 | 253 | action_pg = self.act(state) # policy gradient 254 | obj_actor = -self.cri_target(torch.cat((state, action_pg),dim=-1)).mean() # use cri_target instead of cri for stable training 255 | self.optim_update(self.act_optim, obj_actor) 256 | if update_c % self.update_freq == 0: # delay update 257 | self.soft_update(self.cri_target, self.cri, soft_update_tau) 258 | self.soft_update(self.act_target, self.act, soft_update_tau) 259 | return obj_critic.item() / 2, obj_actor.item() 260 | 261 | def get_obj_critic(self, buffer, batch_size) -> (torch.Tensor, torch.Tensor): 262 | with torch.no_grad(): 263 | reward, mask, action, state, next_s = buffer.sample_batch(batch_size) 264 | next_a = self.act_target.get_action(next_s, self.policy_noise) # policy noise, 265 | next_q = torch.min(*self.cri_target.get_q1_q2(torch.cat((next_s, next_a),dim=-1))) # twin critics 266 | q_label = reward + mask * next_q 267 | 268 | q1, q2 = self.cri.get_q1_q2(torch.cat((state, action),dim=-1)) 269 | obj_critic = self.criterion(q1, q_label) + self.criterion(q2, q_label) # twin critics 270 | return obj_critic, state 271 | 272 | 273 | 274 | def update_buffer(_trajectory): 275 | ten_state = torch.as_tensor([item[0] for item in _trajectory], dtype=torch.float32) 276 | ary_other = torch.as_tensor([item[1] for item in _trajectory]) 277 | ary_other[:, 0] = ary_other[:, 0] # ten_reward 278 | ary_other[:, 1] = (1.0 - ary_other[:, 1]) * gamma # ten_mask = (1.0 - ary_done) * gamma 279 | 280 | buffer.extend_buffer(ten_state, ary_other) 281 | 282 | _steps = ten_state.shape[0] 283 | _r_exp = ary_other[:, 0].mean() # other = (reward, mask, action) 284 | return _steps, _r_exp 285 | 286 | 287 | def get_episode_return(env, act, device): 288 | '''get information of one episode during the training''' 289 | episode_return = 0.0 # sum of rewards in an episode 290 | episode_unbalance=0.0 291 | episode_operation_cost=0.0 292 | state = env.reset() 293 | for i in range(24): 294 | s_tensor = torch.as_tensor((state,), device=device) 295 | a_tensor = act(s_tensor) 296 | action = a_tensor.detach().cpu().numpy()[0] # not need detach(), because with torch.no_grad() outside 297 | state, next_state, reward, done,= env.step(action) 298 | state=next_state 299 | episode_return += reward 300 | episode_unbalance+=env.real_unbalance 301 | episode_operation_cost+=env.operation_cost 302 | if done: 303 | break 304 | return episode_return,episode_unbalance,episode_operation_cost 305 | class Actor_MIP: 306 | '''this actor is used to get the best action and Q function, the only input should be batch tensor state, action, and network, while the output should be 307 | batch tensor max_action, batch tensor max_Q''' 308 | def __init__(self,scaled_parameters,batch_size,net,state_dim,action_dim,env,constrain_on=False): 309 | self.batch_size = batch_size 310 | self.net = net 311 | self.state_dim = state_dim 312 | self.action_dim =action_dim 313 | self.env = env 314 | self.constrain_on=constrain_on 315 | self.scaled_parameters=scaled_parameters 316 | 317 | def get_input_bounds(self,input_batch_state): 318 | batch_size = self.batch_size 319 | batch_input_bounds = [] 320 | lbs_states = input_batch_state.detach().numpy() 321 | ubs_states = lbs_states 322 | 323 | for i in range(batch_size): 324 | input_bounds = {} 325 | for j in range(self.action_dim + self.state_dim): 326 | if j < self.state_dim: 327 | input_bounds[j] = (float(lbs_states[i][j]), float(ubs_states[i][j])) 328 | else: 329 | input_bounds[j] = (float(-1), float(1)) 330 | batch_input_bounds.append(input_bounds) 331 | return batch_input_bounds 332 | 333 | def predict_best_action(self, state): 334 | state=state.detach().cpu().numpy() 335 | v1 = torch.zeros((1, self.state_dim+self.action_dim), dtype=torch.float32) 336 | '''this function is used to get the best action based on current net''' 337 | model = self.net.to('cpu') 338 | input_bounds = {} 339 | lb_state = state 340 | ub_state = state 341 | for i in range(self.action_dim + self.state_dim): 342 | if i < self.state_dim: 343 | input_bounds[i] = (float(lb_state[0][i]), float(ub_state[0][i])) 344 | else: 345 | input_bounds[i] = (float(-1), float(1)) 346 | 347 | with tempfile.NamedTemporaryFile(suffix='.onnx', delete=False) as f: 348 | # export neural network to ONNX 349 | torch.onnx.export( 350 | model, 351 | v1, 352 | f, 353 | input_names=['state_action'], 354 | output_names=['Q_value'], 355 | dynamic_axes={ 356 | 'state_action': {0: 'batch_size'}, 357 | 'Q_value': {0: 'batch_size'} 358 | } 359 | ) 360 | # write ONNX model and its bounds using OMLT 361 | write_onnx_model_with_bounds(f.name, None, input_bounds) 362 | # load the network definition from the ONNX model 363 | network_definition = load_onnx_neural_network_with_bounds(f.name) 364 | # global optimality 365 | formulation = ReluBigMFormulation(network_definition) 366 | m = pyo.ConcreteModel() 367 | m.nn = OmltBlock() 368 | m.nn.build_formulation(formulation) 369 | '''# we are now building the surrogate model between action and state''' 370 | # constrain for battery, 371 | if self.constrain_on: 372 | m.power_balance_con1 = pyo.Constraint(expr=( 373 | (-m.nn.inputs[7] * self.scaled_parameters[0])+\ 374 | ((m.nn.inputs[8] * self.scaled_parameters[1])+m.nn.inputs[4]*self.scaled_parameters[5]) +\ 375 | ((m.nn.inputs[9] * self.scaled_parameters[2])+m.nn.inputs[5]*self.scaled_parameters[6]) +\ 376 | ((m.nn.inputs[10] * self.scaled_parameters[3])+m.nn.inputs[6]*self.scaled_parameters[7])>=\ 377 | m.nn.inputs[3] *self.scaled_parameters[4]-self.env.grid.exchange_ability)) 378 | m.power_balance_con2 = pyo.Constraint(expr=( 379 | (-m.nn.inputs[7] * self.scaled_parameters[0])+\ 380 | (m.nn.inputs[8] * self.scaled_parameters[1]+m.nn.inputs[4]*self.scaled_parameters[5]) +\ 381 | (m.nn.inputs[9] * self.scaled_parameters[2]+m.nn.inputs[5]*self.scaled_parameters[6]) +\ 382 | (m.nn.inputs[10] * self.scaled_parameters[3]+m.nn.inputs[6]*self.scaled_parameters[7])<=\ 383 | m.nn.inputs[3] *self.scaled_parameters[4]+self.env.grid.exchange_ability)) 384 | m.obj = pyo.Objective(expr=(m.nn.outputs[0]), sense=pyo.maximize) 385 | 386 | pyo.SolverFactory('gurobi').solve(m, tee=False) 387 | 388 | best_input = pyo.value(m.nn.inputs[:]) 389 | 390 | best_action = (best_input[self.state_dim::]) 391 | return best_action 392 | # define test function 393 | if __name__ == '__main__': 394 | args = Arguments() 395 | '''here record real unbalance''' 396 | reward_record = {'episode': [], 'steps': [], 'mean_episode_reward': [], 'unbalance': [], 397 | 'episode_operation_cost': []} 398 | loss_record = {'episode': [], 'steps': [], 'critic_loss': [], 'actor_loss': [], 'entropy_loss': []} 399 | args.visible_gpu = '2' 400 | for seed in args.random_seed_list: 401 | args.random_seed = seed 402 | # set different seed 403 | args.agent = AgentMIPDQN() 404 | agent_name = f'{args.agent.__class__.__name__}' 405 | args.agent.cri_target = True 406 | args.env = ESSEnv() 407 | args.init_before_training(if_main=True) 408 | '''init agent and environment''' 409 | agent = args.agent 410 | env = args.env 411 | agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate, 412 | args.if_per_or_gae) 413 | '''init replay buffer''' 414 | buffer = ReplayBuffer(max_len=args.max_memo, state_dim=env.state_space.shape[0], 415 | action_dim=env.action_space.shape[0]) 416 | '''start training''' 417 | cwd = args.cwd 418 | gamma = args.gamma 419 | batch_size = args.batch_size # how much data should be used to update net 420 | target_step = args.target_step # how manysteps of one episode should stop 421 | repeat_times = args.repeat_times # how many times should update for one batch size data 422 | soft_update_tau = args.soft_update_tau 423 | agent.state = env.reset() 424 | '''collect data and train and update network''' 425 | num_episode = args.num_episode 426 | args.train=True 427 | args.save_network=True 428 | wandb.init(project='MIP_DQN_experiments',name=args.run_name,settings=wandb.Settings(start_method="fork")) 429 | wandb.config = { 430 | "epochs": num_episode, 431 | "batch_size": batch_size} 432 | wandb.define_metric('custom_step') 433 | if args.train: 434 | collect_data = True 435 | while collect_data: 436 | print(f'buffer:{buffer.now_len}') 437 | with torch.no_grad(): 438 | trajectory = agent.explore_env(env, target_step) 439 | 440 | steps, r_exp = update_buffer(trajectory) 441 | buffer.update_now_len() 442 | if buffer.now_len >= 10000: 443 | collect_data = False 444 | for i_episode in range(num_episode): 445 | critic_loss, actor_loss = agent.update_net(buffer, batch_size, repeat_times, soft_update_tau) 446 | wandb.log({'critic loss':critic_loss,'custom_step':i_episode}) 447 | wandb.log({'actor loss': actor_loss,'custom_step':i_episode}) 448 | loss_record['critic_loss'].append(critic_loss) 449 | loss_record['actor_loss'].append(actor_loss) 450 | with torch.no_grad(): 451 | episode_reward, episode_unbalance, episode_operation_cost = get_episode_return(env, agent.act, 452 | agent.device) 453 | wandb.log({'mean_episode_reward': episode_reward,'custom_step':i_episode}) 454 | wandb.log({'unbalance':episode_unbalance,'custom_step':i_episode}) 455 | wandb.log({'episode_operation_cost':episode_operation_cost,'custom_step':i_episode}) 456 | reward_record['mean_episode_reward'].append(episode_reward) 457 | reward_record['unbalance'].append(episode_unbalance) 458 | reward_record['episode_operation_cost'].append(episode_operation_cost) 459 | 460 | print( 461 | f'curren epsiode is {i_episode}, reward:{episode_reward},unbalance:{episode_unbalance},buffer_length: {buffer.now_len}') 462 | if i_episode % 10 == 0: 463 | # target_step 464 | with torch.no_grad(): 465 | agent._update_exploration_rate(args.explorate_decay,args.explorate_min) 466 | trajectory = agent.explore_env(env, target_step) 467 | steps, r_exp = update_buffer(trajectory) 468 | wandb.finish() 469 | if args.update_training_data: 470 | loss_record_path = f'{args.cwd}/loss_data.pkl' 471 | reward_record_path = f'{args.cwd}/reward_data.pkl' 472 | with open(loss_record_path, 'wb') as tf: 473 | pickle.dump(loss_record, tf) 474 | with open(reward_record_path, 'wb') as tf: 475 | pickle.dump(reward_record, tf) 476 | act_save_path = f'{args.cwd}/actor.pth' 477 | cri_save_path = f'{args.cwd}/critic.pth' 478 | 479 | print('training data have been saved') 480 | if args.save_network: 481 | torch.save(agent.act.state_dict(), act_save_path) 482 | torch.save(agent.cri.state_dict(), cri_save_path) 483 | print('training finished and actor and critic parameters have been saved') 484 | 485 | 486 | -------------------------------------------------------------------------------- /Parameters.py: -------------------------------------------------------------------------------- 1 | battery_parameters={ 2 | 'capacity':500,# kw 3 | 'max_charge':100, # kw 4 | 'max_discharge':100, #kw 5 | 'efficiency':0.9, 6 | 'degradation':0, #euro/kw 7 | 'max_soc':0.8, 8 | 'min_soc':0.2, 9 | 'initial_capacity':0.2} 10 | 11 | 12 | dg_parameters={ 13 | 'gen_1':{'a':0.0034 14 | ,'b': 3 15 | ,'c':30 16 | ,'d': 0.03,'e':4.2,'f': 0.031,'power_output_max':150,'power_output_min':0,'heat_output_max':None,'heat_output_min':None,\ 17 | 'ramping_up':100,'ramping_down':100,'min_up':2,'min_down':1}, 18 | 19 | 'gen_2':{'a':0.001 20 | ,'b': 10 21 | ,'c': 40 22 | ,'d': 0.03,'e':4.2,'f': 0.031,'power_output_max':375,'power_output_min':0,'heat_output_max':None,'heat_output_min':None,\ 23 | 'ramping_up':100,'ramping_down':100,'min_up':2,'min_down':1}, 24 | 25 | 'gen_3':{'a':0.001 26 | ,'b': 15 27 | ,'c': 70 28 | ,'d': 0.03,'e':4.2,'f': 0.031,'power_output_max':500,'power_output_min':0,'heat_output_max':None,'heat_output_min':None,\ 29 | 'ramping_up':200,'ramping_down':200,'min_up':2,'min_down':1}} 30 | 31 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # Optimal Energy System Scheduling Using A Constraint-Aware Reinforcement Learning Algorithm 3 | 4 | * This code accompanies the paper Optimal Energy System Scheduling Using A Constraint-Aware Reinforcement Learning Algorithm, to appear in International Journal of Electrical Power & Energy Systems. 5 | # Abstract 6 | * The massive integration of renewable-based distributed energy resources (DERs) inherently increases the energy system's complexity, especially when it comes to defining its operational schedule. Deep reinforcement learning (DRL) algorithms arise as a promising solution due to their data-driven and model-free features. However, current DRL algorithms fail to enforce rigorous operational constraints (e.g., power balance, ramping up or down constraints) limiting their implementation in real systems. To overcome this, in this paper, a DRL algorithm (namely MIP-DQN) is proposed, capable of \textit{strictly} enforcing all operational constraints in the action space, ensuring the feasibility of the defined schedule in real-time operation. This is done by leveraging recent optimization advances for deep neural networks (DNNs) that allow their representation as a MIP formulation, enabling further consideration of any action space constraints. Comprehensive numerical simulations show that the proposed algorithm outperforms existing state-of-the-art DRL algorithms, obtaining a lower error when compared with the optimal global solution (upper boundary) obtained after solving a mathematical programming formulation with perfect forecast information; while strictly enforcing all operational constraints (even in unseen test days). 7 | # Organization 8 | * Folder "Data" -- Historical and processed data. 9 | * script "Parameters"-- General parameters for batteries and other energy units. 10 | * script "MIP_DQN"-- The implementation of the proposed MIP-DQN algorithm. 11 | * script "random_generator_battery" -- The energy system environment 12 | * script "random_generator_more_battery" -- The energy system environment with multi-batteries. Developing this because the reviewer asked, even I don't think it is essential. 13 | * Run scripts after installing all packages. Please have a look for the code structure. 14 | # Dependencies 15 | This code requires installation of the following libraries: ```PYOMO```,```pandas 1.1.4```, ```numpy 1.20.1```, ```matplotlib 3.3.4```, ```pytorch 1.11.0```, ```OMLT```,```wandb```. I used wandb to monitor the changes during the training. you can find more information [at this page](https://arxiv.org/abs/2305.05484). 16 | # Recommended citation 17 | A preprint is available, and you can check this paper for more details [Link of the paper](https://arxiv.org/abs/2305.05484). 18 | * Paper authors: Hou Shengren, Pedro P. Vergara, Edgar Mauricio Salazar, Peter Palensky 19 | * Accepted for publication at International Journal of Electrical Power & Energy Systems 20 | * If you use (parts of) this code, please cite the preprint or published paper 21 | ## Additional Information 22 | * The clean code and data are now uploaded. 23 | -------------------------------------------------------------------------------- /random_generator_battery.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Energy management environment for reinforcement learning agents developed by 3 | # Hou Shengren, TU Delft, h.shengren@tudelft.nl 4 | # ------------------------------------------------------------------------ 5 | import random 6 | import numpy as np 7 | 8 | import pandas as pd 9 | import gym 10 | from gym import spaces 11 | import math 12 | import os 13 | import sys 14 | from Parameters import battery_parameters,dg_parameters 15 | 16 | class Constant: 17 | MONTHS_LEN = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] 18 | MAX_STEP_HOURS = 24 * 30 19 | class DataManager(): 20 | def __init__(self) -> None: 21 | self.PV_Generation=[] 22 | self.Prices=[] 23 | self.Electricity_Consumption=[] 24 | 25 | def add_pv_element(self,element):self.PV_Generation.append(element) 26 | def add_price_element(self,element):self.Prices.append(element) 27 | def add_electricity_element(self,element):self.Electricity_Consumption.append(element) 28 | 29 | def get_pv_data(self,month,day,day_time):return self.PV_Generation[(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24+day_time] 30 | def get_price_data(self,month,day,day_time):return self.Prices[(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24+day_time] 31 | def get_electricity_cons_data(self,month,day,day_time):return self.Electricity_Consumption[(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24+day_time] 32 | def get_series_pv_data(self,month,day): return self.PV_Generation[(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24:(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24+24] 33 | def get_series_price_data(self,month,day):return self.Prices[(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24:(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24+24] 34 | def get_series_electricity_cons_data(self,month,day):return self.Electricity_Consumption[(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24:(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24+24] 35 | 36 | class DG(): 37 | def __init__(self,parameters): 38 | self.name=parameters.keys() 39 | self.a_factor=parameters['a'] 40 | self.b_factor=parameters['b'] 41 | self.c_factor=parameters['c'] 42 | self.power_output_max=parameters['power_output_max'] 43 | self.power_output_min=parameters['power_output_min'] 44 | self.ramping_up=parameters['ramping_up'] 45 | self.ramping_down=parameters['ramping_down'] 46 | self.last_step_output=None 47 | def step(self,action_gen): 48 | output_change=action_gen*self.ramping_up# constrain the output_change with ramping up boundary 49 | output=self.current_output+output_change 50 | if output>0: 51 | output=max(self.power_output_min,min(self.power_output_max,output))# meet the constrain 52 | else: 53 | output=0 54 | self.current_output=output 55 | def _get_cost(self,output): 56 | if output<=0: 57 | cost=0 58 | else: 59 | cost=(self.a_factor*pow(output,2)+self.b_factor*output+self.c_factor) 60 | # print(cost) 61 | return cost 62 | def reset(self): 63 | self.current_output=0 64 | 65 | class Battery(): 66 | '''simulate a simple battery here''' 67 | def __init__(self,parameters): 68 | self.capacity=parameters['capacity']# 容量 69 | self.max_soc=parameters['max_soc']# max soc 0.8 70 | self.initial_capacity=parameters['initial_capacity']# initial soc 0.4 71 | self.min_soc=parameters['min_soc']# 0.2 72 | self.degradation=parameters['degradation']# degradation cost 0, 73 | self.max_charge=parameters['max_charge']# max charge ability 74 | self.max_discharge=parameters['max_discharge']# max discharge ability 75 | self.efficiency=parameters['efficiency']# charge and discharge efficiency 76 | def step(self,action_battery): 77 | energy=action_battery*self.max_charge 78 | updated_capacity=max(self.min_soc,min(self.max_soc,(self.current_capacity*self.capacity+energy)/self.capacity)) 79 | self.energy_change=(updated_capacity-self.current_capacity)*self.capacity# if charge, positive, if discharge, negative 80 | self.current_capacity=updated_capacity# update capacity to current codition 81 | def _get_cost(self,energy):# calculate the cost depends on the energy change 82 | cost=energy**2*self.degradation 83 | return cost 84 | def SOC(self): 85 | return self.current_capacity 86 | def reset(self): 87 | self.current_capacity=np.random.uniform(0.2,0.8) 88 | class Grid(): 89 | def __init__(self): 90 | 91 | self.on=True 92 | if self.on: 93 | self.exchange_ability=30 94 | else: 95 | self.exchange_ability=0 96 | def _get_cost(self,current_price,energy_exchange):##energy if charge, will be positive, if discharge will be negative 97 | return current_price*energy_exchange 98 | def retrive_past_price(self): 99 | result=[] 100 | if self.day<1: 101 | past_price=self.past_price# self.past price is fixed as the last days price 102 | else: 103 | past_price=self.price[24*(self.day-1):24*self.day]# get the price data of previous day 104 | for item in past_price[(self.time-24)::]:# here if current time_step is 10, then the 10th data of past price is extrated to the result as the first value 105 | result.append(item) 106 | for item in self.price[24*self.day:(24*self.day+self.time)]:# continue to retrive data from the past and attend it to the result. as past price is change everytime. 107 | result.append(item) 108 | return result 109 | class ESSEnv(gym.Env): 110 | '''ENV descirption: 111 | the agent learn to charge with low price and then discharge at high price, in this way, it could get benefits''' 112 | def __init__(self,**kwargs): 113 | super(ESSEnv,self).__init__() 114 | #parameters 115 | self.data_manager=DataManager() 116 | self._load_year_data() 117 | self.episode_length=kwargs.get('episode_length',24) 118 | self.month=None 119 | self.day=None 120 | self.TRAIN=True 121 | self.current_time=None 122 | self.battery_parameters=kwargs.get('battery_parameters',battery_parameters) 123 | self.dg_parameters=kwargs.get('dg_parameters',dg_parameters) 124 | self.penalty_coefficient=20#control soft penalty constrain 125 | self.sell_coefficient=0.5# control sell benefits 126 | # instant the components of the environment 127 | self.grid=Grid() 128 | self.battery=Battery(self.battery_parameters) 129 | self.dg1=DG(self.dg_parameters['gen_1']) 130 | self.dg2=DG(self.dg_parameters['gen_2']) 131 | self.dg3=DG(self.dg_parameters['gen_3']) 132 | 133 | # define normalized action space 134 | #action space here is [output of gen1,outputof gen2, output of gen3, charge/discharge of battery] 135 | self.action_space=spaces.Box(low=-1,high=1,shape=(4,),dtype=np.float32)# seems here doesn't used 136 | # state is [time_step,netload,dg_output_last_step]# this time no prive 137 | self.state_space=spaces.Box(low=0,high=1,shape=(7,),dtype=np.float32) 138 | # set state related normalization reference 139 | self.Length_max=24 140 | self.Price_max=max(self.data_manager.Prices) 141 | # self.Netload_max=max(self.data_manager.Electricity_Consumption)-max(self.data_manager.PV_Generation) 142 | self.Netload_max = max(self.data_manager.Electricity_Consumption) 143 | self.SOC_max=self.battery.max_soc 144 | self.DG1_max=self.dg1.power_output_max 145 | self.DG2_max=self.dg2.power_output_max 146 | self.DG3_max=self.dg3.power_output_max 147 | 148 | def reset(self): 149 | '''reset is used for initialize the environment, decide the day of month.''' 150 | self.month=np.random.randint(1,13)# here we choose 12 month 151 | 152 | if self.TRAIN: 153 | self.day=np.random.randint(1,21) 154 | else: 155 | self.day=np.random.randint(21,Constant.MONTHS_LEN[self.month-1]) 156 | self.current_time=0 157 | self.battery.reset() 158 | self.dg1.reset() 159 | self.dg2.reset() 160 | self.dg3.reset() 161 | return self._build_state() 162 | def _build_state(self): 163 | #we put all original information into state and then transfer it into normalized state 164 | soc=self.battery.SOC()/self.SOC_max 165 | dg1_output=self.dg1.current_output/self.DG1_max 166 | dg2_output=self.dg2.current_output/self.DG2_max 167 | dg3_output=self.dg3.current_output/self.DG3_max 168 | time_step=self.current_time/(self.Length_max-1) 169 | electricity_demand=self.data_manager.get_electricity_cons_data(self.month,self.day,self.current_time) 170 | pv_generation=self.data_manager.get_pv_data(self.month,self.day,self.current_time) 171 | price=self.data_manager.get_price_data(self.month,self.day,self.current_time)/self.Price_max 172 | net_load=(electricity_demand-pv_generation)/self.Netload_max 173 | obs=np.concatenate((np.float32(time_step),np.float32(price),np.float32(soc),np.float32(net_load),np.float32(dg1_output),np.float32(dg2_output),np.float32(dg3_output)),axis=None) 174 | return obs 175 | 176 | def step(self,action):# state transition here current_obs--take_action--get reward-- get_finish--next_obs 177 | ## here we want to put take action into each components 178 | current_obs=self._build_state() 179 | self.battery.step(action[0])# here execute the state-transition part, battery.current_capacity also changed 180 | self.dg1.step(action[1]) 181 | self.dg2.step(action[2]) 182 | self.dg3.step(action[3]) 183 | current_output=np.array((self.dg1.current_output,self.dg2.current_output,self.dg3.current_output,-self.battery.energy_change))#truely corresonding to the result 184 | self.current_output=current_output 185 | actual_production=sum(current_output) 186 | # transfer to normal_state 187 | netload=current_obs[3]*self.Netload_max 188 | price=current_obs[1]*self.Price_max 189 | 190 | unbalance=actual_production-netload 191 | 192 | reward=0 193 | excess_penalty=0 194 | deficient_penalty=0 195 | sell_benefit=0 196 | buy_cost=0 197 | self.excess=0 198 | self.shedding=0 199 | # logic here is: if unbalance >0 then it is production excess, so the excessed output should sold to power grid to get benefits 200 | if unbalance>=0:# it is now in excess condition 201 | if unbalance<=self.grid.exchange_ability: 202 | sell_benefit=self.grid._get_cost(price,unbalance)*self.sell_coefficient #sell money to grid is little [0.029,0.1] 203 | else: 204 | sell_benefit=self.grid._get_cost(price,self.grid.exchange_ability)*self.sell_coefficient 205 | #real unbalance that even grid could not meet 206 | self.excess=unbalance-self.grid.exchange_ability 207 | excess_penalty=self.excess*self.penalty_coefficient 208 | else:# unbalance <0, its load shedding model, in this case, deficient penalty is used 209 | if abs(unbalance)<=self.grid.exchange_ability: 210 | buy_cost=self.grid._get_cost(price,abs(unbalance)) 211 | else: 212 | buy_cost=self.grid._get_cost(price,self.grid.exchange_ability) 213 | self.shedding=abs(unbalance)-self.grid.exchange_ability 214 | deficient_penalty=self.shedding*self.penalty_coefficient 215 | battery_cost=self.battery._get_cost(self.battery.energy_change)# we set it as 0 this time 216 | dg1_cost=self.dg1._get_cost(self.dg1.current_output) 217 | dg2_cost=self.dg2._get_cost(self.dg2.current_output) 218 | dg3_cost=self.dg3._get_cost(self.dg3.current_output) 219 | 220 | reward=-(battery_cost+dg1_cost+dg2_cost+dg3_cost+excess_penalty+ 221 | deficient_penalty-sell_benefit+buy_cost)/2e3 222 | 223 | self.operation_cost=battery_cost+dg1_cost+dg2_cost+dg3_cost+buy_cost-sell_benefit+(self.shedding+self.excess)*self.penalty_coefficient 224 | 225 | self.unbalance=unbalance 226 | self.real_unbalance=self.shedding+self.excess 227 | '''here we also need to store the final step outputs for the final steps including, soc, output of units for seeing the final states''' 228 | final_step_outputs=[self.dg1.current_output,self.dg2.current_output,self.dg3.current_output,self.battery.current_capacity] 229 | self.current_time+=1 230 | finish=(self.current_time==self.episode_length) 231 | if finish: 232 | self.final_step_outputs=final_step_outputs 233 | self.current_time=0 234 | next_obs=self.reset() 235 | 236 | else: 237 | next_obs=self._build_state() 238 | return current_obs,next_obs,float(reward),finish 239 | def render(self, current_obs, next_obs, reward, finish): 240 | print('day={},hour={:2d}, state={}, next_state={}, reward={:.4f}, terminal={}\n'.format(self.day,self.current_time, current_obs, next_obs, reward, finish)) 241 | def _load_year_data(self): 242 | '''this private function is used to load the electricity consumption, pv generation and related prices in a year as 243 | a one hour resolution, with the cooperation of class DataProcesser and then all these data are stored in data processor''' 244 | pv_df=pd.read_csv('data/PV.csv',sep=';') 245 | #hourly price data for a year 246 | price_df=pd.read_csv('data/Prices.csv',sep=';') 247 | # mins electricity consumption data for a year 248 | electricity_df=pd.read_csv('data/H4.csv',sep=';') 249 | pv_data=pv_df['P_PV_'].apply(lambda x: x.replace(',','.')).to_numpy(dtype=float) 250 | price=price_df['Price'].apply(lambda x:x.replace(',','.')).to_numpy(dtype=float) 251 | electricity=electricity_df['Power'].apply(lambda x:x.replace(',','.')).to_numpy(dtype=float) 252 | # netload=electricity-pv_data 253 | '''we carefully redesign the magnitude for price and amount of generation as well as demand''' 254 | for element in pv_data: 255 | self.data_manager.add_pv_element(element*100) 256 | for element in price: 257 | element/=10 258 | if element<=0.5: 259 | element=0.5 260 | self.data_manager.add_price_element(element) 261 | for i in range(0,electricity.shape[0],60): 262 | element=electricity[i:i+60] 263 | self.data_manager.add_electricity_element(sum(element)*300) 264 | ## test environment 265 | if __name__ == '__main__': 266 | env=ESSEnv() 267 | env.TRAIN=False 268 | rewards=[] 269 | env.reset() 270 | env.day=27 271 | tem_action=[0.1,0.1,0.1,0.1] 272 | for _ in range (240): 273 | print(f'current month is {env.month}, current day is {env.day}, current time is {env.current_time}') 274 | current_obs,next_obs,reward,finish=env.step(tem_action) 275 | env.render(current_obs,next_obs,reward,finish) 276 | current_obs=next_obs 277 | rewards.append(reward) 278 | 279 | # print(f'total reward{sum(rewards)}') 280 | 281 | ## after debug, it could work now. -------------------------------------------------------------------------------- /random_generator_more_battery.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Energy management environment for reinforcement learning agents developed by 3 | # Hou Shengren, TU Delft, h.shengren@tudelft.nl 4 | import numpy as np 5 | import pandas as pd 6 | import gym 7 | from gym import spaces 8 | 9 | from Parameters import battery_parameters,dg_parameters 10 | 11 | class Constant: 12 | MONTHS_LEN = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] 13 | MAX_STEP_HOURS = 24 * 30 14 | class DataManager(): 15 | def __init__(self) -> None: 16 | self.PV_Generation=[] 17 | self.Prices=[] 18 | self.Electricity_Consumption=[] 19 | 20 | def add_pv_element(self,element):self.PV_Generation.append(element) 21 | def add_price_element(self,element):self.Prices.append(element) 22 | def add_electricity_element(self,element):self.Electricity_Consumption.append(element) 23 | 24 | # get current time data based on given month day, and day_time 25 | def get_pv_data(self,month,day,day_time):return self.PV_Generation[(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24+day_time] 26 | def get_price_data(self,month,day,day_time):return self.Prices[(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24+day_time] 27 | def get_electricity_cons_data(self,month,day,day_time):return self.Electricity_Consumption[(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24+day_time] 28 | # get series data for one episode 29 | def get_series_pv_data(self,month,day): return self.PV_Generation[(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24:(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24+24] 30 | def get_series_price_data(self,month,day):return self.Prices[(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24:(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24+24] 31 | def get_series_electricity_cons_data(self,month,day):return self.Electricity_Consumption[(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24:(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24+24] 32 | class DG(): 33 | def __init__(self,parameters): 34 | self.name=parameters.keys() 35 | self.a_factor=parameters['a'] 36 | self.b_factor=parameters['b'] 37 | self.c_factor=parameters['c'] 38 | self.power_output_max=parameters['power_output_max'] 39 | self.power_output_min=parameters['power_output_min'] 40 | self.ramping_up=parameters['ramping_up'] 41 | self.ramping_down=parameters['ramping_down'] 42 | self.last_step_output=None 43 | def step(self,action_gen): 44 | output_change=action_gen*self.ramping_up# 45 | output=self.current_output+output_change 46 | if output>0: 47 | output=max(self.power_output_min,min(self.power_output_max,output))# meet the constrain 48 | else: 49 | output=0 50 | self.current_output=output 51 | def _get_cost(self,output): 52 | if output<=0: 53 | cost=0 54 | else: 55 | cost=(self.a_factor*pow(output,2)+self.b_factor*output+self.c_factor) 56 | # print(cost) 57 | return cost 58 | def reset(self): 59 | self.current_output=0 60 | class Battery(): 61 | def __init__(self,parameters): 62 | self.capacity=parameters['capacity']# 容量 63 | self.max_soc=parameters['max_soc']# max soc 0.8 64 | self.initial_capacity=parameters['initial_capacity']# initial soc 0.4 65 | self.min_soc=parameters['min_soc']# 0.2 66 | self.degradation=parameters['degradation']# degradation cost 0, 67 | self.max_charge=parameters['max_charge']# max charge ability 68 | self.max_discharge=parameters['max_discharge']# max discharge ability 69 | self.efficiency=parameters['efficiency']# charge and discharge efficiency 70 | def step(self,action_battery): 71 | 72 | energy=action_battery*self.max_charge 73 | updated_capacity=max(self.min_soc,min(self.max_soc,(self.current_capacity*self.capacity+energy)/self.capacity)) 74 | self.energy_change=(updated_capacity-self.current_capacity)*self.capacity# if charge, positive, if discharge, negative 75 | self.current_capacity=updated_capacity# update capacity to current codition 76 | def _get_cost(self,energy):# calculate the cost depends on the energy change 77 | cost=energy**2*self.degradation 78 | return cost 79 | def SOC(self): 80 | return self.current_capacity 81 | def reset(self): 82 | self.current_capacity=np.random.uniform(0.2,0.8) 83 | class Grid(): 84 | def __init__(self): 85 | 86 | self.on=True 87 | if self.on: 88 | self.exchange_ability=30 89 | else: 90 | self.exchange_ability=0 91 | def _get_cost(self,current_price,energy_exchange):##energy if charge, will be positive, if discharge will be negative 92 | return current_price*energy_exchange 93 | def retrive_past_price(self): 94 | result=[] 95 | if self.day<1: 96 | past_price=self.past_price# self.past price is fixed as the last days price 97 | else: 98 | past_price=self.price[24*(self.day-1):24*self.day]# get the price data of previous day 99 | # print(past_price) 100 | for item in past_price[(self.time-24)::]:# here if current time_step is 10, then the 10th data of past price is extrated to the result as the first value 101 | result.append(item) 102 | for item in self.price[24*self.day:(24*self.day+self.time)]:# continue to retrive data from the past and attend it to the result. as past price is change everytime. 103 | result.append(item) 104 | return result 105 | class ESSEnv(gym.Env): 106 | def __init__(self,**kwargs): 107 | super(ESSEnv,self).__init__() 108 | #parameters 109 | self.data_manager=DataManager() 110 | self._load_year_data() 111 | self.episode_length=kwargs.get('episode_length',24) 112 | self.month=None 113 | self.day=None 114 | # Control training set and validation set with reset function 115 | self.TRAIN=True 116 | self.current_time=None 117 | self.battery_parameters=kwargs.get('battery_parameters',battery_parameters) 118 | self.dg_parameters=kwargs.get('dg_parameters',dg_parameters) 119 | self.penalty_coefficient=20#control soft penalty constrain 120 | self.sell_coefficient=0.5# control sell benefits 121 | # instant the components of the environment 122 | self.grid=Grid() 123 | self.battery1=Battery(self.battery_parameters) 124 | self.battery2=Battery(self.battery_parameters) 125 | self.battery3=Battery(self.battery_parameters) 126 | self.dg1=DG(self.dg_parameters['gen_1']) 127 | self.dg2=DG(self.dg_parameters['gen_2']) 128 | self.dg3=DG(self.dg_parameters['gen_3']) 129 | 130 | # define normalized action space 131 | self.action_space=spaces.Box(low=-1,high=1,shape=(6,),dtype=np.float32)# seems here doesn't used 132 | self.state_space=spaces.Box(low=0,high=1,shape=(9,),dtype=np.float32) 133 | self.Length_max=24 134 | self.Price_max=max(self.data_manager.Prices) 135 | self.Netload_max = max(self.data_manager.Electricity_Consumption) 136 | self.SOC_max=self.battery1.max_soc 137 | self.DG1_max=self.dg1.power_output_max 138 | self.DG2_max=self.dg2.power_output_max 139 | self.DG3_max=self.dg3.power_output_max 140 | 141 | 142 | def reset(self): 143 | self.month=np.random.randint(1,13)# here we choose 12 month 144 | 145 | if self.TRAIN: 146 | self.day=np.random.randint(1,21) 147 | else: 148 | self.day=np.random.randint(21,Constant.MONTHS_LEN[self.month-1]) 149 | self.current_time=0 150 | self.battery1.reset() 151 | self.battery2.reset() 152 | self.battery3.reset() 153 | self.dg1.reset() 154 | self.dg2.reset() 155 | self.dg3.reset() 156 | return self._build_state() 157 | def _build_state(self): 158 | soc1=self.battery1.SOC()/self.SOC_max 159 | soc2=self.battery2.SOC()/self.SOC_max 160 | soc3 = self.battery3.SOC() / self.SOC_max 161 | dg1_output=self.dg1.current_output/self.DG1_max 162 | dg2_output=self.dg2.current_output/self.DG2_max 163 | dg3_output=self.dg3.current_output/self.DG3_max 164 | time_step=self.current_time/(self.Length_max-1) 165 | electricity_demand=self.data_manager.get_electricity_cons_data(self.month,self.day,self.current_time) 166 | pv_generation=self.data_manager.get_pv_data(self.month,self.day,self.current_time) 167 | price=self.data_manager.get_price_data(self.month,self.day,self.current_time)/self.Price_max 168 | net_load=(electricity_demand-pv_generation)/self.Netload_max 169 | obs=np.concatenate((np.float32(time_step),np.float32(price),np.float32(soc1),np.float32(soc2),np.float32(soc3),np.float32(net_load),np.float32(dg1_output),np.float32(dg2_output),np.float32(dg3_output)),axis=None) 170 | return obs 171 | 172 | def step(self,action):# state transition here current_obs--take_action--get reward-- get_finish--next_obs 173 | ## here we want to put take action into each components 174 | current_obs=self._build_state() 175 | self.battery1.step(action[0])# here execute the state-transition part, battery.current_capacity also changed 176 | self.battery2.step(action[1]) 177 | self.battery3.step(action[2]) 178 | self.dg1.step(action[3]) 179 | self.dg2.step(action[4]) 180 | self.dg3.step(action[5]) 181 | current_output=np.array((self.dg1.current_output,self.dg2.current_output,self.dg3.current_output,-self.battery1.energy_change,-self.battery2.energy_change,-self.battery3.energy_change))#truely corresonding to the result 182 | self.current_output=current_output 183 | actual_production=sum(current_output) 184 | netload=current_obs[5]*self.Netload_max 185 | price=current_obs[1]*self.Price_max 186 | 187 | unbalance=actual_production-netload 188 | reward=0 189 | excess_penalty=0 190 | deficient_penalty=0 191 | sell_benefit=0 192 | buy_cost=0 193 | self.excess=0 194 | self.shedding=0 195 | # logic here is: if unbalance >0 then it is production excess, so the excessed output should sold to power grid to get benefits 196 | if unbalance>=0:# it is now in excess condition 197 | if unbalance<=self.grid.exchange_ability: 198 | sell_benefit=self.grid._get_cost(price,unbalance)*self.sell_coefficient #sell money to grid is little [0.029,0.1] 199 | else: 200 | sell_benefit=self.grid._get_cost(price,self.grid.exchange_ability)*self.sell_coefficient 201 | self.excess=unbalance-self.grid.exchange_ability 202 | excess_penalty=self.excess*self.penalty_coefficient 203 | else:# unbalance <0, its load shedding model, in this case, deficient penalty is used 204 | if abs(unbalance)<=self.grid.exchange_ability: 205 | buy_cost=self.grid._get_cost(price,abs(unbalance)) 206 | else: 207 | buy_cost=self.grid._get_cost(price,self.grid.exchange_ability) 208 | self.shedding=abs(unbalance)-self.grid.exchange_ability 209 | deficient_penalty=self.shedding*self.penalty_coefficient 210 | battery1_cost=self.battery1._get_cost(self.battery1.energy_change)# we set it as 0 this time 211 | battery2_cost=self.battery2._get_cost(self.battery2.energy_change) 212 | battery3_cost = self.battery3._get_cost(self.battery3.energy_change) 213 | dg1_cost=self.dg1._get_cost(self.dg1.current_output) 214 | dg2_cost=self.dg2._get_cost(self.dg2.current_output) 215 | dg3_cost=self.dg3._get_cost(self.dg3.current_output) 216 | 217 | reward=-(battery1_cost+battery2_cost+battery3_cost+dg1_cost+dg2_cost+dg3_cost+excess_penalty+ 218 | deficient_penalty-sell_benefit+buy_cost)/2e3 219 | 220 | self.operation_cost=battery1_cost+battery2_cost+battery3_cost+dg1_cost+dg2_cost+dg3_cost+buy_cost-sell_benefit+(self.shedding+self.excess)*self.penalty_coefficient 221 | 222 | self.unbalance=unbalance 223 | self.real_unbalance=self.shedding+self.excess 224 | final_step_outputs=[self.dg1.current_output,self.dg2.current_output,self.dg3.current_output,self.battery1.current_capacity,self.battery2.current_capacity,self.battery3.current_capacity] 225 | self.current_time+=1 226 | finish=(self.current_time==self.episode_length) 227 | if finish: 228 | self.final_step_outputs=final_step_outputs 229 | self.current_time=0 230 | next_obs=self.reset() 231 | 232 | else: 233 | next_obs=self._build_state() 234 | return current_obs,next_obs,float(reward),finish 235 | def render(self, current_obs, next_obs, reward, finish): 236 | print('day={},hour={:2d}, state={}, next_state={}, reward={:.4f}, terminal={}\n'.format(self.day,self.current_time, current_obs, next_obs, reward, finish)) 237 | def _load_year_data(self): 238 | '''this private function is used to load the electricity consumption, pv generation and related prices in a year as 239 | a one hour resolution, with the cooperation of class DataProcesser and then all these data are stored in data processor''' 240 | pv_df=pd.read_csv('data/PV.csv',sep=';') 241 | #hourly price data for a year 242 | price_df=pd.read_csv('data/Prices.csv',sep=';') 243 | # mins electricity consumption data for a year 244 | electricity_df=pd.read_csv('data/H4.csv',sep=';') 245 | pv_data=pv_df['P_PV_'].apply(lambda x: x.replace(',','.')).to_numpy(dtype=float) 246 | price=price_df['Price'].apply(lambda x:x.replace(',','.')).to_numpy(dtype=float) 247 | electricity=electricity_df['Power'].apply(lambda x:x.replace(',','.')).to_numpy(dtype=float) 248 | # netload=electricity-pv_data 249 | for element in pv_data: 250 | self.data_manager.add_pv_element(element*100) 251 | for element in price: 252 | element/=10 253 | if element<=0.5: 254 | element=0.5 255 | self.data_manager.add_price_element(element) 256 | for i in range(0,electricity.shape[0],60): 257 | element=electricity[i:i+60] 258 | self.data_manager.add_electricity_element(sum(element)*300) 259 | if __name__ == '__main__': 260 | env=ESSEnv() 261 | env.TRAIN=False 262 | rewards=[] 263 | env.reset() 264 | tem_action=[0.1,0.1,0.1,0.1,0.1,0.1] 265 | for _ in range (240): 266 | print(f'current month is {env.month}, current day is {env.day}, current time is {env.current_time}') 267 | current_obs,next_obs,reward,finish=env.step(tem_action) 268 | env.render(current_obs,next_obs,reward,finish) 269 | current_obs=next_obs 270 | rewards.append(reward) 271 | 272 | # print(f'total reward{sum(rewards)}') 273 | 274 | ## after debug, it could work now. --------------------------------------------------------------------------------