├── controllers ├── __init__.py ├── buffer.py ├── models.py ├── baseline_controller.py ├── ppo_controller.py └── td3_controller.py ├── README.md ├── data.py ├── setting.py ├── cigre_mv_microgrid.py ├── main.py └── utils.py /controllers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Microgrid Energy Management using Deep Reinforcment Learning 2 | This repository contains experiment code for my master thesis "Time Series Observation and Action Handling for Battery Management in Applying Deep Reinforcement Learning for 3 | Microgrid Energy Management". 4 | 5 | # Abstract 6 | Time Series Observation and Action Handling for Battery Management in Applying Deep Reinforcement Learning for Microgrid Energy Management / 7 | The transformation from traditional grids to microgrids introduces challenges due to multiple distributed energy resources and the intermittency of renewable energy sources and loads. Much effort has been committed to the design of microgrid energy management systems to attain optimal operation, and reinforcement learning is considered one of the most promising methods because of its competitive properties. Reinforcement learning algorithms generally do not assume precise models and can learn the underlying dynamics of the system under uncertainty by interacting with the environment. However, directly applying reinforcement learning to microgrid energy management is not an easy task. In this paper, we study two design aspects in reinforcement learning algorithms for microgrid energy management, which are related to time series observation and battery management in microgrids. In order to process time series data and handle varying battery charging/discharging bounds in our deep reinforcement learning algorithm, recurrent neural networks and valid action space mapping are used in our implementation. Experimental results confirm that the two design aspects are crucial for applying reinforcement learning in microgrid energy management. 8 | 9 | # Code Explanation 10 | | File | Description | 11 | |-----------------------|---------------------| 12 | | cigre_mv_microgrid.py | Contains code for creating our test grid | 13 | | data.py | Convert data from PJM for our environment| 14 | | main.py | Entry point of our experiment | 15 | | setting.py | Environment settings | 16 | | utils.py | Some frequently used repeated functions | 17 | 18 | |Directory |Description | 19 | |---|---| 20 | | controllers | Controllers for microgrid energy management using various algorithms | 21 | | data | Processed data for our environment | 22 | |history| Training history | 23 | |model_weights| Trained model weights| 24 | |pf_res| Results of power flow analysis| 25 | |plot| Plots of experimental results | 26 | |rms| Store values for input normalization and running mean std| 27 | 28 | ## main.py 29 | - train_ppo(): train PPO agent. 30 | - train_td3(): train TD3 agent 31 | - test(): test with the trained agent. 32 | - baseline(): test baseline. 33 | -------------------------------------------------------------------------------- /data.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import utils 3 | from setting import * 4 | 5 | def create_unit_profile(pv_df, wt_df, load_df, price_df): 6 | # reshape tables 7 | pv_df = pd.pivot_table(pv_df, values='solar_generation_mw', index=['datetime_beginning_ept'], columns=['area'], sort=False) 8 | wt_df = pd.pivot_table(wt_df, values='wind_generation_mw', index=['datetime_beginning_ept'], columns=['area'], sort=False) 9 | load_df = pd.pivot_table(load_df, values='mw', index=['datetime_beginning_ept'], columns=['load_area'], sort=False) 10 | price_df = pd.pivot_table(price_df, values='hrly_da_demand_bid', index=['datetime_beginning_ept'], columns=['area'], sort=False) 11 | 12 | # scale values 13 | for df in [pv_df, wt_df, load_df, price_df]: 14 | df /= df.max() 15 | print(f'Unit profile: {pv_df.max()}, {wt_df.max()}, {load_df.max()}, {price_df.max()}') 16 | 17 | return pv_df, wt_df, load_df, price_df 18 | 19 | def create_save_profile(pv_df, wt_df, load_df, price_df): 20 | pv_df, wt_df, load_df, price_df = create_unit_profile(pv_df, wt_df, load_df, price_df) 21 | 22 | pv_profile = pd.DataFrame({ 23 | 'pv3': pv_df['MIDATL'] * P_PV3_MAX, 24 | 'pv4': pv_df['MIDATL'] * P_PV4_MAX, 25 | 'pv5': pv_df['MIDATL'] * P_PV5_MAX, 26 | 'pv6': pv_df['RFC'] * P_PV6_MAX, 27 | 'pv8': pv_df['RFC'] * P_PV8_MAX, 28 | 'pv9': pv_df['RFC'] * P_PV9_MAX, 29 | 'pv10': pv_df['RTO'] * P_PV10_MAX, 30 | 'pv11': pv_df['RTO'] * P_PV11_MAX 31 | }) 32 | wt_profile = pd.DataFrame({ 33 | 'wt7': wt_df['MIDATL'] * P_WT7_MAX 34 | }) 35 | load_profile = pd.DataFrame({ 36 | 'load_r1': load_df['AECO'] * P_LOADR1_MAX, 37 | 'load_r3': load_df['BC'] * P_LOADR3_MAX, 38 | 'load_r4': load_df['DPLCO'] * P_LOADR4_MAX, 39 | 'load_r5': load_df['EASTON'] * P_LOADR5_MAX, 40 | 'load_r6': load_df['JC'] * P_LOADR6_MAX, 41 | 'load_r8': load_df['ME'] * P_LOADR8_MAX, 42 | 'load_r10': load_df['PE'] * P_LOADR10_MAX, 43 | 'load_r11': load_df['PEPCO'] * P_LOADR11_MAX, 44 | }) 45 | price_profile = pd.DataFrame({ 46 | 'price': price_df['PJM_RTO'] * C_PRICE_MAX 47 | }) 48 | 49 | # create csv files 50 | pv_profile.to_csv('./data/profile/pv_profile.csv') 51 | wt_profile.to_csv('./data/profile/wt_profile.csv') 52 | load_profile.to_csv('./data/profile/load_profile.csv') 53 | price_profile.to_csv('./data/profile/price_profile.csv') 54 | 55 | if __name__ == '__main__': 56 | pv_df = pd.read_csv('./data/solar_gen.csv') 57 | wt_df = pd.read_csv('./data/wind_gen.csv') 58 | load_df = pd.read_csv('./data/hrl_load_metered.csv') 59 | price_df = pd.read_csv('./data/hrl_dmd_bids.csv') 60 | create_save_profile(pv_df, wt_df, load_df, price_df) 61 | 62 | pv_profile = pd.read_csv('./data/profile/pv_profile.csv') 63 | wt_profile = pd.read_csv('./data/profile/wt_profile.csv') 64 | load_profile = pd.read_csv('./data/profile/load_profile.csv') 65 | price_profile = pd.read_csv('./data/profile/price_profile.csv') 66 | # excess = pv_profile.sum(axis=1) + wt_profile.sum(axis=1) - load_profile.sum(axis=1) 67 | # surplus = excess[excess > 0] 68 | # print(surplus) 69 | # print(surplus.shape[0] / pv_profile.shape[0]) 70 | utils.view_profile(pv_profile, wt_profile, load_profile, price_profile) -------------------------------------------------------------------------------- /setting.py: -------------------------------------------------------------------------------- 1 | ''' 2 | - hyperparameters 3 | - environment 4 | - power ratings 5 | - cost parameters 6 | ''' 7 | 8 | import numpy as np 9 | 10 | # --- Hyperparameters --- 11 | BATCH_SIZE = 100 12 | GAMMA = 0.99 13 | LR_ACTOR = 5e-4 14 | LR_CRITIC = 5e-4 15 | NN_BOUND = 1. 16 | SEQ_LENGTH= 1 17 | 18 | # TD3 only 19 | ACTION_NOISE_SCALE = 0.3 20 | BUFFER_SIZE = 500000 21 | NOISE_TYPE = 'param' # ['action', 'param'] 22 | PARAM_NOISE_ADAPT_RATE = 1.01 23 | PARAM_NOISE_BOUND = 0.1 24 | PARAM_NOISE_SCALE = 0.1 25 | UPDATE_FREQ = 50 26 | UPDATE_TIMES = 4 27 | WARMUP = 1000 28 | 29 | # PPO only 30 | POLICY_CLIP = 0.2 31 | TARGET_KL = 0.01 32 | PPO_BATCH_SIZE = 60 33 | PPO_TRAIN_FREQ = 720 34 | PPO_TRAIN_ITERS = 80 35 | 36 | # others 37 | PREDICT_LENGTH = 24 38 | DENSE_DIM_A = 16 39 | DENSE_DIM_FNN = 16 40 | DENSE_DIM_SEQ = 32 41 | 42 | # Environment 43 | HOUR_PER_TIME_STEP = 1 44 | 45 | # --- Power Ratings --- 46 | # PV 47 | P_PV3_MAX = 0.3 48 | P_PV4_MAX = 0.3 49 | P_PV5_MAX = 0.4 50 | P_PV6_MAX = 0.4 51 | P_PV8_MAX = 0.4 52 | P_PV9_MAX = 0.5 53 | P_PV10_MAX = 0.5 54 | P_PV11_MAX = 0.3 55 | P_PV_MAX_LIST = [P_PV3_MAX, P_PV4_MAX, P_PV5_MAX, P_PV6_MAX, P_PV8_MAX, P_PV9_MAX, P_PV10_MAX, P_PV11_MAX] 56 | 57 | # WT 58 | P_WT7_MAX = 2.5 59 | P_WT_MAX_LIST = [P_WT7_MAX] 60 | 61 | # MGT 62 | # P_MGT5_MAX = 0.033 63 | # P_MGT9_MAX = 0.212 64 | # P_MGT10_MAX = 0.033 65 | # P_MGT5_MIN = 0. 66 | # P_MGT9_MIN = 0. 67 | # P_MGT10_MIN = 0. 68 | # P_MGT_MAX_LIST = [P_MGT5_MAX, P_MGT9_MAX, P_MGT10_MAX] 69 | 70 | # Battery 71 | E_B5_MAX = 3. 72 | P_B5_MAX = 0.6 73 | P_B5_MIN = -0.6 74 | 75 | E_B10_MAX = 1. 76 | P_B10_MAX = 0.2 77 | P_B10_MIN = -0.2 78 | 79 | SOC_MAX = 0.9 80 | SOC_MIN = 0.1 81 | SOC_TOLERANCE = 0.01 82 | 83 | # Load 84 | P_LOADR1_MAX = 0.85 85 | P_LOADR3_MAX = 0.285 86 | P_LOADR4_MAX = 0.245 87 | P_LOADR5_MAX = 0.65 88 | P_LOADR6_MAX = 0.565 89 | P_LOADR8_MAX = 0.605 90 | P_LOADR10_MAX = 0.49 91 | P_LOADR11_MAX = 0.34 92 | P_LOAD_MAX_LIST = [P_LOADR1_MAX, P_LOADR3_MAX, P_LOADR4_MAX, P_LOADR5_MAX, P_LOADR6_MAX, P_LOADR8_MAX, P_LOADR10_MAX, P_LOADR11_MAX] 93 | P_LOAD_MAX = P_LOADR1_MAX + P_LOADR3_MAX + P_LOADR4_MAX + P_LOADR5_MAX + P_LOADR6_MAX + P_LOADR8_MAX + P_LOADR10_MAX + P_LOADR11_MAX 94 | 95 | # PCC 96 | P_EXCESS_MAX = sum([*P_PV_MAX_LIST, *P_WT_MAX_LIST]) 97 | 98 | # State 99 | # N_INTERMITTENT_STATES = len([P_EXCESS_MAX,'price']) 100 | N_INTERMITTENT_STATES = len([*P_PV_MAX_LIST, *P_WT_MAX_LIST, *P_LOAD_MAX_LIST,'price']) 101 | # N_INTERMITTENT_STATES = len([*P_PV_MAX_LIST, *P_WT_MAX_LIST, *P_LOAD_MAX_LIST, P_EXCESS_MAX,'price']) 102 | N_CONTROLLABLE_STATES = len([P_B5_MAX, P_B10_MAX]) 103 | STATE_SEQ_SHAPE = (SEQ_LENGTH, N_INTERMITTENT_STATES) 104 | STATE_FNN_SHAPE = (N_CONTROLLABLE_STATES,) 105 | 106 | # Action 107 | ACTION_IDX = {'p_b5': 0, 'p_b10': 1} 108 | MAX_ACTION = np.array([P_B5_MAX, P_B10_MAX]) 109 | MIN_ACTION = np.array([P_B5_MIN, P_B10_MIN]) 110 | N_ACTION = len(MAX_ACTION) 111 | 112 | # --- Cost Parameters --- 113 | C_PRICE_MAX = 3. 114 | # C_MGT5 = [100, 1.5] 115 | # C_MGT9 = [15.8, 2.] 116 | # C_MGT10 = [100, 1.5] 117 | C_BAT5_DoD = 0.43 118 | C_BAT10_DoD = 0.16 119 | C_SOC_LIMIT = 100 120 | MAX_COST = C_PRICE_MAX * (P_B5_MAX + P_B10_MAX + P_LOAD_MAX) + \ 121 | (C_BAT5_DoD + C_BAT10_DoD) * pow(SOC_MAX-SOC_MIN, 2) + \ 122 | C_SOC_LIMIT 123 | 124 | REWARD_INVALID_ACTION = -5e-3 125 | 126 | if __name__ == '__main__': 127 | print(f'Number of actions: {N_ACTION}') 128 | print(f'Number of intermittent states: {N_INTERMITTENT_STATES}') 129 | print(f'Number of controllable states: {N_CONTROLLABLE_STATES}') 130 | print(f'Load max: {P_LOAD_MAX}') -------------------------------------------------------------------------------- /cigre_mv_microgrid.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Modified CIGRE Task Force C6.04.02 network 3 | 4 | elements: 5 | - 8 PVs 6 | - 1 WT 7 | - 2 Batteries 8 | - 8 Loads 9 | ''' 10 | 11 | import pandapower as pp 12 | from pandapower.control import ConstControl 13 | from setting import * 14 | 15 | def create_cigre_mv_microgrid(pv_ds, wt_ds, load_ds): 16 | net = pp.create_empty_network(name='CIGRE MV Microgrid') 17 | 18 | # --- Buses --- 19 | bus0 = pp.create_bus(net, vn_kv=110, name='Buse 0', type='b', zone='CIGRE_MV') 20 | buses = pp.create_buses(net, 11, vn_kv=20, name=[f'Bus {i}' for i in range(1, 12)], type='b', zone='CIGRE_MV') 21 | 22 | # --- Lines --- 23 | line_data = {'c_nf_per_km': 151.1749, 'r_ohm_per_km': 0.501, 24 | 'x_ohm_per_km': 0.716, 'max_i_ka': 0.145, 25 | 'type': 'cs'} 26 | pp.create_std_type(net, line_data, name='CABLE_CIGRE_MV', element='line') 27 | 28 | pp.create_line(net, buses[0], buses[1], length_km=2.82, 29 | std_type='CABLE_CIGRE_MV', name='Line 1-2') 30 | pp.create_line(net, buses[1], buses[2], length_km=4.42, 31 | std_type='CABLE_CIGRE_MV', name='Line 2-3') 32 | pp.create_line(net, buses[2], buses[3], length_km=0.61, 33 | std_type='CABLE_CIGRE_MV', name='Line 3-4') 34 | pp.create_line(net, buses[3], buses[4], length_km=0.56, 35 | std_type='CABLE_CIGRE_MV', name='Line 4-5') 36 | pp.create_line(net, buses[4], buses[5], length_km=1.54, 37 | std_type='CABLE_CIGRE_MV', name='Line 5-6') 38 | pp.create_line(net, buses[6], buses[7], length_km=1.67, 39 | std_type='CABLE_CIGRE_MV', name='Line 7-8') 40 | pp.create_line(net, buses[7], buses[8], length_km=0.32, 41 | std_type='CABLE_CIGRE_MV', name='Line 8-9') 42 | pp.create_line(net, buses[8], buses[9], length_km=0.77, 43 | std_type='CABLE_CIGRE_MV', name='Line 9-10') 44 | pp.create_line(net, buses[9], buses[10], length_km=0.33, 45 | std_type='CABLE_CIGRE_MV', name='Line 10-11') 46 | pp.create_line(net, buses[2], buses[7], length_km=1.3, 47 | std_type='CABLE_CIGRE_MV', name='Line 3-8') 48 | 49 | # --- External Grid --- 50 | pp.create_ext_grid(net, bus0, vm_pu=1.03, va_degree=0., s_sc_max_mva=5000, s_sc_min_mva=5000, rx_max=0.1, rx_min=0.1) 51 | 52 | # --- Trafos --- 53 | trafo0 = pp.create_transformer_from_parameters(net, bus0, buses[0], sn_mva=25, 54 | vn_hv_kv=110, vn_lv_kv=20, vkr_percent=0.16, 55 | vk_percent=12.00107, pfe_kw=0, i0_percent=0, 56 | shift_degree=30.0, name='Trafo 0-1') 57 | pp.create_switch(net, bus0, trafo0, et='t', closed=True, type='CB') 58 | 59 | # --- RESs --- 60 | # PV 61 | pv3 = pp.create_sgen(net, buses[2], 0.0, q_mvar=0, name='PV 3', type='PV') 62 | pv4 = pp.create_sgen(net, buses[3], 0.0, q_mvar=0, name='PV 4', type='PV') 63 | pv5 = pp.create_sgen(net, buses[4], 0.0, q_mvar=0, name='PV 5', type='PV') 64 | pv6 = pp.create_sgen(net, buses[5], 0.0, q_mvar=0, name='PV 6', type='PV') 65 | pv8 = pp.create_sgen(net, buses[7], 0.0, q_mvar=0, name='PV 8', type='PV') 66 | pv9 = pp.create_sgen(net, buses[8], 0.0, q_mvar=0, name='PV 9', type='PV') 67 | pv10 = pp.create_sgen(net, buses[9], 0.0, q_mvar=0, name='PV 10', type='PV') 68 | pv11 = pp.create_sgen(net, buses[10], 0.0, q_mvar=0, name='PV 11', type='PV') 69 | ConstControl(net, element='sgen', variable='p_mw', element_index=pv3, profile_name='pv3', data_source=pv_ds) 70 | ConstControl(net, element='sgen', variable='p_mw', element_index=pv4, profile_name='pv4', data_source=pv_ds) 71 | ConstControl(net, element='sgen', variable='p_mw', element_index=pv5, profile_name='pv5', data_source=pv_ds) 72 | ConstControl(net, element='sgen', variable='p_mw', element_index=pv6, profile_name='pv6', data_source=pv_ds) 73 | ConstControl(net, element='sgen', variable='p_mw', element_index=pv8, profile_name='pv8', data_source=pv_ds) 74 | ConstControl(net, element='sgen', variable='p_mw', element_index=pv9, profile_name='pv9', data_source=pv_ds) 75 | ConstControl(net, element='sgen', variable='p_mw', element_index=pv10, profile_name='pv10', data_source=pv_ds) 76 | ConstControl(net, element='sgen', variable='p_mw', element_index=pv11, profile_name='pv11', data_source=pv_ds) 77 | 78 | # WT 79 | wt7 = pp.create_sgen(net, buses[6], 0.0, q_mvar=0, name='WKA 7',type='WP') 80 | ConstControl(net, element='sgen', variable='p_mw', element_index=wt7, profile_name='wt7', data_source=wt_ds) 81 | 82 | # --- Generators --- 83 | # mgt5 = pp.create_sgen(net, bus=buses[4], p_mw=0.0, name='MGT 5') 84 | # mgt9 = pp.create_sgen(net, bus=buses[8], p_mw=0.0, name='MGT 9') 85 | # mgt10 = pp.create_sgen(net, bus=buses[9], p_mw=0.0, name='MGT 10') 86 | 87 | # --- Batteries --- 88 | bat5 = pp.create_storage(net, bus=buses[4], p_mw=0.0, max_e_mwh=E_B5_MAX, name='Battery 5', type='Battery', max_p_mw=P_B5_MAX, min_p_mw=P_B5_MIN) 89 | bat10 = pp.create_storage(net, bus=buses[9], p_mw=0.0, max_e_mwh=E_B10_MAX, name='Battery 10', type='Battery', max_p_mw=P_B10_MAX, min_p_mw=P_B10_MIN) 90 | 91 | # --- Loads --- 92 | load_r1 = pp.create_load_from_cosphi(net, buses[0], 0.0, 0.98, "underexcited", name='Load R1') 93 | load_r3 = pp.create_load_from_cosphi(net, buses[2], 0.0, 0.97, "underexcited", name='Load R3') 94 | load_r4 = pp.create_load_from_cosphi(net, buses[3], 0.0, 0.97, "underexcited", name='Load R4') 95 | load_r5 = pp.create_load_from_cosphi(net, buses[4], 0.0, 0.97, "underexcited", name='Load R5') 96 | load_r6 = pp.create_load_from_cosphi(net, buses[5], 0.0, 0.97, "underexcited", name='Load R6') 97 | load_r8 = pp.create_load_from_cosphi(net, buses[7], 0.0, 0.97, "underexcited", name='Load R8') 98 | load_r10 = pp.create_load_from_cosphi(net, buses[9], 0.0, 0.97, "underexcited", name='Load R10') 99 | load_r11 = pp.create_load_from_cosphi(net, buses[10], 0.0, 0.97, "underexcited", name='Load R11') 100 | ConstControl(net, element='load', variable='p_mw', element_index=load_r1, profile_name='load_r1', data_source=load_ds) 101 | ConstControl(net, element='load', variable='p_mw', element_index=load_r3, profile_name='load_r3', data_source=load_ds) 102 | ConstControl(net, element='load', variable='p_mw', element_index=load_r4, profile_name='load_r4', data_source=load_ds) 103 | ConstControl(net, element='load', variable='p_mw', element_index=load_r5, profile_name='load_r5', data_source=load_ds) 104 | ConstControl(net, element='load', variable='p_mw', element_index=load_r6, profile_name='load_r6', data_source=load_ds) 105 | ConstControl(net, element='load', variable='p_mw', element_index=load_r8, profile_name='load_r8', data_source=load_ds) 106 | ConstControl(net, element='load', variable='p_mw', element_index=load_r10, profile_name='load_r10', data_source=load_ds) 107 | ConstControl(net, element='load', variable='p_mw', element_index=load_r11, profile_name='load_r11', data_source=load_ds) 108 | 109 | ids = { 110 | 'trafo0': trafo0, 111 | 'pv3': pv3, 'pv4': pv4, 'pv5': pv5, 'pv6': pv6, 'pv8': pv8, 'pv9': pv9, 'pv10': pv10, 'pv11': pv11, 112 | 'wt7': wt7, 113 | # 'mgt5': mgt5, 'mgt9': mgt9, 'mgt10': mgt10, 114 | 'bat5': bat5, 'bat10': bat10, 115 | 'load_r1': load_r1, 'load_r3': load_r3, 'load_r4': load_r4, 'load_r5': load_r5, 'load_r6': load_r6, 'load_r8': load_r8, 'load_r10': load_r10, 'load_r11': load_r11 116 | } 117 | 118 | return net, ids -------------------------------------------------------------------------------- /controllers/buffer.py: -------------------------------------------------------------------------------- 1 | ''' 2 | class 3 | - Buffer 4 | - PrioritizedReplayBuffer 5 | - ReplayBuffer 6 | ''' 7 | import numpy as np 8 | import scipy.signal 9 | from typing import Dict 10 | 11 | class Buffer: 12 | def __init__(self, buffer_size, state_seq_shape, state_fnn_shape, n_actions, gamma=0.99, lam=0.97): 13 | self.trajectory_start_idx = 0 14 | self.buffer_counter = 0 15 | self.buffer_size = buffer_size 16 | self.gamma = gamma 17 | self.lam = lam 18 | 19 | # transition 20 | self.state_seq_buffer = np.zeros((self.buffer_size, *state_seq_shape)) 21 | self.state_fnn_buffer = np.zeros((self.buffer_size, *state_fnn_shape)) 22 | self.action_buffer = np.zeros((self.buffer_size, n_actions)) 23 | self.reward_buffer = np.zeros((self.buffer_size, 1)) 24 | 25 | self.state_value_buffer = np.zeros((buffer_size, 1)) 26 | self.action_logprob_buffer = np.zeros((buffer_size, 1)) 27 | self.return_buffer = np.zeros((buffer_size, 1)) 28 | self.advantage_buffer = np.zeros((buffer_size, 1)) 29 | 30 | def clear(self): 31 | self.trajectory_start_idx = 0 32 | self.buffer_counter = 0 33 | self.state_seq_buffer = np.zeros_like(self.state_seq_buffer) 34 | self.state_fnn_buffer = np.zeros_like(self.state_fnn_buffer) 35 | self.action_buffer = np.zeros_like(self.action_buffer) 36 | self.reward_buffer = np.zeros_like(self.reward_buffer) 37 | self.state_value_buffer = np.zeros_like(self.state_value_buffer) 38 | self.action_logprob_buffer = np.zeros_like(self.action_logprob_buffer) 39 | self.return_buffer = np.zeros_like(self.return_buffer) 40 | self.advantage_buffer = np.zeros_like(self.advantage_buffer) 41 | 42 | def discounted_cumulative_sums(self, x_arr, discount): 43 | gae = scipy.signal.lfilter([1], [1, float(-discount)], x_arr[::-1], axis=0)[::-1] 44 | return gae.reshape((len(gae), 1)) 45 | 46 | def finish_trajectory(self, last_value): 47 | trajectory_end_idx = self.buffer_counter 48 | path_slice = slice(self.trajectory_start_idx, trajectory_end_idx) 49 | rewards = np.append(self.reward_buffer[path_slice], last_value) 50 | state_values = np.append(self.state_value_buffer[path_slice], last_value) 51 | 52 | deltas = rewards[:-1] + self.gamma * state_values[1:] - state_values[:-1] 53 | self.advantage_buffer[path_slice] = self.discounted_cumulative_sums(deltas, self.gamma*self.lam) 54 | self.return_buffer[path_slice] = self.advantage_buffer[path_slice] + self.state_value_buffer[path_slice] 55 | 56 | def sample(self, batch_size=32): 57 | batch_starts = np.arange(0, self.buffer_size, batch_size) 58 | batch_indices = np.arange(self.buffer_size) 59 | np.random.shuffle(batch_indices) 60 | batches = [batch_indices[batch_start: batch_start+batch_size] for batch_start in batch_starts] 61 | 62 | return self.state_seq_buffer, \ 63 | self.state_fnn_buffer, \ 64 | self.action_buffer, \ 65 | self.action_logprob_buffer, \ 66 | self.return_buffer, \ 67 | self.advantage_buffer, \ 68 | batches 69 | 70 | def store_transition(self, state_seq, state_fnn, action, reward, state_value, action_logprob): 71 | idx = self.buffer_counter 72 | self.state_seq_buffer[idx] = state_seq 73 | self.state_fnn_buffer[idx] = state_fnn 74 | self.action_buffer[idx] = action 75 | self.reward_buffer[idx] = reward 76 | self.state_value_buffer[idx] = state_value 77 | self.action_logprob_buffer[idx] = action_logprob 78 | 79 | self.buffer_counter += 1 80 | 81 | class PrioritizedReplayBuffer: 82 | def __init__(self, buffer_size, state_seq_shape, state_fnn_shape, n_actions, alpha=0.6, beta=0.4): 83 | # params 84 | self.buffer_size = buffer_size 85 | self.buffer_counter = 0 86 | self.alpha = alpha 87 | self.beta = beta 88 | 89 | # transition 90 | self.state_seq_buffer = np.zeros((self.buffer_size, *state_seq_shape)) 91 | self.state_fnn_buffer = np.zeros((self.buffer_size, *state_fnn_shape)) 92 | self.action_buffer = np.zeros((self.buffer_size, n_actions)) 93 | self.reward_buffer = np.zeros((self.buffer_size, 1)) 94 | self.next_state_seq_buffer = np.zeros((self.buffer_size, *state_seq_shape)) 95 | self.next_state_fnn_buffer = np.zeros((self.buffer_size, *state_fnn_shape)) 96 | 97 | # sum tree 98 | n_node = buffer_size * 2 - 1 99 | self.sum_tree = np.zeros(n_node) 100 | 101 | def get_leaf(self, cdf): 102 | idx = self._retrieve(0, cdf) 103 | return idx 104 | 105 | def get_max_priority(self): 106 | max_p = np.max(self.sum_tree[-self.buffer_size:]) 107 | if max_p == 0: 108 | max_p = 1. 109 | return max_p 110 | 111 | def sample(self, batch_size=32): 112 | idxs = np.zeros(batch_size, dtype=np.int32) 113 | trans_idxs = np.zeros(batch_size, dtype=np.int32) 114 | weights = np.zeros((batch_size, 1)) 115 | 116 | trans_idx_start = self.buffer_size - 1 117 | trans_idx_end = trans_idx_start + min(self.buffer_counter, self.buffer_size) 118 | min_prob = np.min(self.sum_tree[trans_idx_start: trans_idx_end]) / self.sum_tree[0] 119 | max_weight = np.power(self.buffer_size * min_prob, -self.beta) 120 | 121 | total_p = self.sum_tree[0] 122 | segment_size = total_p / batch_size 123 | for i in range(batch_size): 124 | segment_low = i * segment_size 125 | segment_high = (i + 1) * segment_size 126 | cdf = np.random.uniform(low=segment_low, high=segment_high) 127 | 128 | idx = self.get_leaf(cdf) 129 | idxs[i] = idx 130 | trans_idxs[i] = idx - self.buffer_size + 1 131 | 132 | prob = self.sum_tree[idx] / self.sum_tree[0] 133 | weights[i] = np.power(self.buffer_size * prob, -self.beta) / max_weight 134 | 135 | return self.state_seq_buffer[trans_idxs], \ 136 | self.state_fnn_buffer[trans_idxs], \ 137 | self.action_buffer[trans_idxs], \ 138 | self.reward_buffer[trans_idxs], \ 139 | self.next_state_seq_buffer[trans_idxs], \ 140 | self.next_state_fnn_buffer[trans_idxs], \ 141 | idxs, \ 142 | weights 143 | 144 | def schedule_beta(self, beta_inc): 145 | self.beta = min(self.beta + beta_inc, 1.) 146 | 147 | def store_transition(self, state_seq, state_fnn, action, reward, next_state_seq, next_state_fnn): 148 | # transition 149 | transition_idx = self.buffer_counter % self.buffer_size 150 | self.state_seq_buffer[transition_idx] = state_seq 151 | self.state_fnn_buffer[transition_idx] = state_fnn 152 | self.action_buffer[transition_idx] = action 153 | self.reward_buffer[transition_idx] = reward 154 | self.next_state_seq_buffer[transition_idx] = next_state_seq 155 | self.next_state_fnn_buffer[transition_idx] = next_state_fnn 156 | 157 | # priority 158 | tree_idx = transition_idx + self.buffer_size - 1 159 | priority = self.get_max_priority() 160 | self.update_tree(tree_idx, priority) 161 | 162 | self.buffer_counter += 1 163 | 164 | def update_tree(self, idx, priority): 165 | new_p = np.power(priority, self.alpha) 166 | change = new_p - self.sum_tree[idx] 167 | self.sum_tree[idx] = new_p 168 | self._propogate(idx, change) 169 | 170 | def _propogate(self, idx, change): 171 | parent_idx = (idx - 1) // 2 172 | self.sum_tree[parent_idx] += change 173 | if parent_idx != 0: 174 | self._propogate(parent_idx, change) 175 | 176 | def _retrieve(self, idx, cdf): 177 | l_child_idx = 2 * idx + 1 178 | r_child_idx = l_child_idx + 1 179 | 180 | if l_child_idx >= len(self.sum_tree): 181 | return idx 182 | elif cdf <= self.sum_tree[l_child_idx]: 183 | return self._retrieve(l_child_idx, cdf) 184 | else: 185 | return self._retrieve(r_child_idx, cdf - self.sum_tree[l_child_idx]) 186 | 187 | class ReplayBuffer: 188 | def __init__(self, buffer_size, state_seq_shape, state_fnn_shape, n_actions): 189 | self.buffer_size = buffer_size 190 | self.buffer_counter = 0 191 | 192 | self.state_seq_buffer = np.zeros((self.buffer_size, *state_seq_shape)) 193 | self.state_fnn_buffer = np.zeros((self.buffer_size, *state_fnn_shape)) 194 | self.action_buffer = np.zeros((self.buffer_size, n_actions)) 195 | self.reward_buffer = np.zeros((self.buffer_size, 1)) 196 | self.next_state_seq_buffer = np.zeros((self.buffer_size, *state_seq_shape)) 197 | self.next_state_fnn_buffer = np.zeros((self.buffer_size, *state_fnn_shape)) 198 | 199 | def store_transition(self, state_seq, state_fnn, action, reward, next_state_seq, next_state_fnn): 200 | index = self.buffer_counter % self.buffer_size 201 | 202 | self.state_seq_buffer[index] = state_seq 203 | self.state_fnn_buffer[index] = state_fnn 204 | self.action_buffer[index] = action 205 | self.reward_buffer[index] = reward 206 | self.next_state_seq_buffer[index] = next_state_seq 207 | self.next_state_fnn_buffer[index] = next_state_fnn 208 | 209 | self.buffer_counter += 1 210 | 211 | def sample(self, batch_size) -> Dict: 212 | sample_range = min(self.buffer_counter, self.buffer_size) 213 | batch_indices = np.random.choice(sample_range, size=batch_size) 214 | 215 | state_seq_batch = self.state_seq_buffer[batch_indices] 216 | state_fnn_batch = self.state_fnn_buffer[batch_indices] 217 | action_batch = self.action_buffer[batch_indices] 218 | reward_batch = self.reward_buffer[batch_indices] 219 | next_state_seq_batch = self.next_state_seq_buffer[batch_indices] 220 | next_state_fnn_batch = self.next_state_fnn_buffer[batch_indices] 221 | 222 | return state_seq_batch, state_fnn_batch, action_batch, reward_batch, next_state_seq_batch, next_state_fnn_batch -------------------------------------------------------------------------------- /controllers/models.py: -------------------------------------------------------------------------------- 1 | ''' 2 | class: 3 | - TransformerEncoder 4 | - ActorModel 5 | - CriticModel 6 | ''' 7 | 8 | import tensorflow as tf 9 | import tensorflow_addons as tfa 10 | from tensorflow_addons.layers import SpectralNormalization 11 | import tensorflow.keras as keras 12 | import tensorflow.keras.layers as layers 13 | 14 | from setting import * 15 | 16 | class TransformerEncoder(layers.Layer): 17 | def __init__(self, key_dim=64, num_heads=2, dense_dim=32, sequence_length=SEQ_LENGTH, **kwargs): 18 | super().__init__(**kwargs) 19 | self.key_dim = key_dim 20 | self.num_heads = num_heads 21 | self.dense_dim = dense_dim 22 | self.sequence_length = sequence_length 23 | 24 | self.dense_inputs = layers.Dense(key_dim) 25 | self.position_embeddings = layers.Embedding(sequence_length, key_dim) 26 | self.attention = layers.MultiHeadAttention(num_heads=num_heads, key_dim=key_dim) 27 | self.dense_proj = keras.Sequential([ 28 | layers.Dense(dense_dim, activation='relu'), 29 | layers.Dense(key_dim) 30 | ]) 31 | self.layernorm1 = layers.LayerNormalization() 32 | self.layernorm2 = layers.LayerNormalization() 33 | 34 | def call(self, inputs): 35 | positions = tf.range(start=0, limit=self.sequence_length, delta=1) 36 | attention_input = self.dense_inputs(inputs) + self.position_embeddings(positions) 37 | attention_output = self.attention(attention_input, attention_input) 38 | proj_input = self.layernorm1(attention_input + attention_output) 39 | proj_output = self.dense_proj(proj_input) 40 | outputs = self.layernorm2(proj_input + proj_output) 41 | return outputs 42 | 43 | def get_config(self): 44 | config = super().get_config() 45 | config.update({ 46 | 'key_dim': self.key_dim, 47 | 'num_heads': self.num_heads, 48 | 'dense_dim': self.dense_dim, 49 | 'sequence_length': self.sequence_length 50 | }) 51 | return config 52 | 53 | class SequenceModel(keras.Model): 54 | def __init__(self, sequence_model_type='rnn', activation='relu', **kwargs): 55 | super().__init__() 56 | self.dense_proj = get_dense_proj_seq(activation=activation) 57 | if sequence_model_type == 'conv1d': 58 | self.seq = get_conv1d_model() 59 | elif sequence_model_type == 'rnn': 60 | self.seq = get_rnn_model() 61 | elif sequence_model_type == 'transformer': 62 | self.seq = get_transformer_model() 63 | else: 64 | self.seq = layers.Flatten() 65 | 66 | def call(self, inputs): 67 | state_seq = self.dense_proj(inputs) 68 | state_seq = self.seq(state_seq) 69 | # state_seq = self.seq(inputs) 70 | 71 | return state_seq 72 | 73 | class ActorMuModel(keras.Model): 74 | def __init__(self, n_action, **kwargs): 75 | super().__init__() 76 | # self.dense_proj = get_dense_proj_fnn() 77 | self.concat = layers.Concatenate() 78 | self.fc = keras.Sequential([ 79 | layers.LayerNormalization(), 80 | layers.Dense(64, activation='tanh', kernel_initializer=tf.random_uniform_initializer(minval=-0.001, maxval=0.001)), 81 | layers.LayerNormalization(), 82 | layers.Dense(64, activation='tanh', kernel_initializer=tf.random_uniform_initializer(minval=-0.001, maxval=0.001)), 83 | layers.LayerNormalization(), 84 | ]) 85 | 86 | self.action = layers.Dense(n_action, activation='tanh', kernel_initializer=tf.random_uniform_initializer(minval=-0.001, maxval=0.001)) 87 | 88 | def call(self, state_seq, state_fnn): 89 | # state_fnn = self.dense_proj(state_fnn) 90 | state = self.concat([state_seq, state_fnn]) 91 | state = self.fc(state) 92 | action = self.action(state) 93 | 94 | return action 95 | 96 | class ActorPiModel(keras.Model): 97 | def __init__(self, n_action, logstd_init=0., **kwargs): 98 | super().__init__() 99 | # self.dense_proj = get_dense_proj_fnn(activation='tanh') 100 | self.concat = layers.Concatenate() 101 | self.fc = keras.Sequential([ 102 | layers.Dense(64, activation='tanh', kernel_initializer=tf.random_uniform_initializer(minval=-0.001, maxval=0.001)), 103 | layers.Dense(64, activation='tanh', kernel_initializer=tf.random_uniform_initializer(minval=-0.001, maxval=0.001)), 104 | ]) 105 | self.action_mean = layers.Dense(n_action, activation='tanh', kernel_initializer=tf.random_uniform_initializer(minval=-0.001, maxval=0.001)) 106 | 107 | self.action_logstd = tf.Variable(logstd_init * tf.ones(n_action), trainable=True) 108 | 109 | def call(self, state_seq, state_fnn): 110 | # state_fnn = self.dense_proj(state_fnn) 111 | state = self.concat([state_seq, state_fnn]) 112 | state = self.fc(state) 113 | action_mean = self.action_mean(state) 114 | 115 | action_std = tf.math.exp(self.action_logstd) 116 | return action_mean, action_std 117 | 118 | class CriticQModel(keras.Model): 119 | def __init__(self, **kwargs): 120 | super().__init__() 121 | # self.dense_proj_fnn = get_dense_proj_fnn() 122 | # self.dense_proj_a = get_dense_proj_a() 123 | self.concat = layers.Concatenate() 124 | self.dense = keras.Sequential([ 125 | layers.Dense(64, activation='tanh', kernel_initializer=tf.random_uniform_initializer(minval=-0.001, maxval=0.001)), 126 | SpectralNormalization(layers.Dense(64, activation='tanh', kernel_initializer=tf.random_uniform_initializer(minval=-0.001, maxval=0.001))), 127 | ]) 128 | self.q = layers.Dense(1) 129 | 130 | def call(self, state_seq, state_fnn, action): 131 | # state_fnn = self.dense_proj_fnn(state_fnn) 132 | # action = self.dense_proj_a(action) 133 | state_action = self.concat([state_seq, state_fnn, action]) 134 | state_action = self.dense(state_action) 135 | q_value = self.q(state_action) 136 | 137 | return q_value 138 | 139 | class CriticVModel(keras.Model): 140 | def __init__(self, name='critic', **kwargs): 141 | super().__init__() 142 | # self.dense_proj_fnn = get_dense_proj_fnn(activation='tanh') 143 | self.concat = layers.Concatenate() 144 | self.dense = keras.Sequential([ 145 | SpectralNormalization(layers.Dense(64, activation='tanh', kernel_initializer=tf.random_uniform_initializer(minval=-0.001, maxval=0.001))), 146 | SpectralNormalization(layers.Dense(64, activation='tanh', kernel_initializer=tf.random_uniform_initializer(minval=-0.001, maxval=0.001))), 147 | ]) 148 | self.v = layers.Dense(1) 149 | 150 | def call(self, state_seq, state_fnn): 151 | # state_fnn = self.dense_proj_fnn(state_fnn) 152 | state = self.concat([state_seq, state_fnn]) 153 | state = self.dense(state) 154 | state_value = self.v(state) 155 | 156 | return state_value 157 | 158 | def get_dense_proj_a(proj_dim=DENSE_DIM_A): 159 | inputs = keras.Input(shape=(N_ACTION,)) 160 | outputs = keras.Sequential([ 161 | layers.Dense(proj_dim, activation='relu') 162 | ])(inputs) 163 | 164 | model = keras.Model(inputs, outputs, name='dense_proj_a') 165 | return model 166 | 167 | def get_dense_proj_fnn(proj_dim=DENSE_DIM_FNN, activation='relu'): 168 | inputs = keras.Input(shape=STATE_FNN_SHAPE) 169 | outputs = layers.Dense(proj_dim, activation=activation, kernel_initializer=tf.random_uniform_initializer(minval=-0.001, maxval=0.001))(inputs) 170 | 171 | model = keras.Model(inputs, outputs, name='dense_proj_fnn') 172 | return model 173 | 174 | def get_dense_proj_seq(proj_dim=DENSE_DIM_SEQ, activation='tanh'): 175 | inputs = keras.Input(shape=STATE_SEQ_SHAPE) 176 | outputs = layers.Dense(proj_dim, activation=activation, kernel_initializer=tf.random_uniform_initializer(minval=-0.001, maxval=0.001))(inputs) 177 | 178 | model = keras.Model(inputs, outputs, name='dense_proj_seq') 179 | return model 180 | 181 | def get_conv1d_model(): 182 | inputs = keras.Input(shape=(SEQ_LENGTH, DENSE_DIM_SEQ)) 183 | outputs = keras.Sequential([ 184 | SpectralNormalization(layers.Conv1D(8, 5, activation='tanh')), 185 | layers.MaxPooling1D(2), 186 | layers.GRU(16) 187 | # layers.Conv1D(8, 3, activation='tanh'), 188 | # layers.GlobalMaxPooling1D(), 189 | ])(inputs) 190 | 191 | model = keras.Model(inputs, outputs, name='sequence_model') 192 | return model 193 | 194 | def get_rnn_model(): 195 | # inputs = keras.Input(shape=(SEQ_LENGTH, DENSE_DIM_SEQ)) 196 | # lstm_in = layers.LayerNormalization()(inputs) 197 | # lstm_out = layers.LSTM(DENSE_DIM_SEQ, return_sequences=True)(lstm_in) 198 | # outputs = layers.Add()([inputs, lstm_out]) # residual connection 199 | # outputs = layers.LayerNormalization()(outputs) 200 | # outputs = layers.LSTM(16)(outputs) 201 | inputs = keras.Input(shape=(SEQ_LENGTH, DENSE_DIM_SEQ)) 202 | outputs = layers.GRU(32)(inputs) 203 | 204 | model = keras.Model(inputs, outputs, name='sequence_model') 205 | return model 206 | 207 | # TODO 208 | def get_transformer_model(): 209 | pass 210 | 211 | # deterministic actor 212 | def get_mu_actor(sequence_model, actor_model): 213 | input_seq = keras.Input(shape=STATE_SEQ_SHAPE) 214 | state_seq = sequence_model(input_seq) 215 | input_fnn = keras.Input(shape=STATE_FNN_SHAPE) 216 | state_fnn = get_dense_proj_fnn()(input_fnn) 217 | action = actor_model(state_seq, state_fnn) 218 | 219 | actor = keras.Model([input_seq, input_fnn], action) 220 | return actor 221 | 222 | # stochastic actor 223 | def get_pi_actor(sequence_model, actor_model): 224 | input_seq = keras.Input(shape=STATE_SEQ_SHAPE) 225 | state_seq = sequence_model(input_seq) 226 | state_fnn = keras.Input(shape=STATE_FNN_SHAPE) 227 | action_mean, action_std = actor_model(state_seq, state_fnn) 228 | 229 | actor = keras.Model([input_seq, state_fnn], [action_mean, action_std]) 230 | return actor 231 | 232 | # q value critic 233 | def get_q_critic(sequence_model, critic_model): 234 | input_seq = keras.Input(shape=STATE_SEQ_SHAPE) 235 | state_seq = sequence_model(input_seq) 236 | state_fnn = keras.Input(shape=STATE_FNN_SHAPE) 237 | action = keras.Input(shape=(N_ACTION,)) 238 | q_value = critic_model(state_seq, state_fnn, action) 239 | 240 | critic = keras.Model([input_seq, state_fnn, action], q_value) 241 | return critic 242 | 243 | # state value critic 244 | def get_v_critic(sequence_model, critic_model): 245 | input_seq = keras.Input(shape=STATE_SEQ_SHAPE) 246 | state_seq = sequence_model(input_seq) 247 | state_fnn = keras.Input(shape=STATE_FNN_SHAPE) 248 | state_value = critic_model(state_seq, state_fnn) 249 | 250 | critic = keras.Model([input_seq, state_fnn], state_value) 251 | return critic -------------------------------------------------------------------------------- /controllers/baseline_controller.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from pandapower.control.basic_controller import Controller 3 | 4 | import utils 5 | from setting import * 6 | 7 | class SimpleControl(Controller): 8 | def __init__(self, net, ids, **kwargs): 9 | super().__init__(net, **kwargs) 10 | self.rewards = [] 11 | self.costs = [] 12 | self.bat5_soc_prev = 0. 13 | self.bat10_soc_prev = 0. 14 | self.bat5_soc = 0. 15 | self.bat10_soc = 0. 16 | self.soc_history = {'bat5_soc': [self.bat5_soc], 'bat10_soc': [self.bat10_soc]} 17 | self.last_time_step = None 18 | self.applied = False 19 | 20 | self.price_profile = kwargs['price_profile'] 21 | 22 | self.ids = ids 23 | self.trafo0_id = ids.get('trafo0') 24 | # self.mgt5_id = ids.get('mgt5') 25 | # self.mgt5_p_mw = net.sgen.at[self.mgt5_id, 'p_mw'] 26 | # self.mgt9_id = ids.get('mgt9') 27 | # self.mgt9_p_mw = net.sgen.at[self.mgt9_id, 'p_mw'] 28 | # self.mgt10_id = ids.get('mgt10') 29 | # self.mgt10_p_mw = net.sgen.at[self.mgt10_id, 'p_mw'] 30 | 31 | self.bat5_id = ids.get('bat5') 32 | self.bat5_p_mw = net.storage.at[self.bat5_id, 'p_mw'] 33 | self.bat5_max_e_mwh = net.storage.at[self.bat5_id, 'max_e_mwh'] 34 | self.bat10_id = ids.get('bat10') 35 | self.bat10_p_mw = net.storage.at[self.bat10_id, 'p_mw'] 36 | self.bat10_max_e_mwh = net.storage.at[self.bat10_id, 'max_e_mwh'] 37 | 38 | def is_converged(self, net) -> bool: 39 | return self.applied 40 | 41 | def calculate_reward(self, net, t): 42 | price = self.price_profile['price'][t - 1] 43 | cost, normalized_cost = utils.cal_cost( 44 | price=price, 45 | pcc_p_mw=-net.res_trafo.at[self.trafo0_id, 'p_lv_mw'], 46 | # mgt5_p_mw=self.mgt5_p_mw, 47 | # mgt9_p_mw=self.mgt9_p_mw, 48 | # mgt10_p_mw=self.mgt10_p_mw, 49 | bat5_soc_now=self.bat5_soc, 50 | bat5_soc_prev=self.bat5_soc_prev, 51 | bat10_soc_now=self.bat10_soc, 52 | bat10_soc_prev=self.bat10_soc_prev, 53 | ) 54 | reward = -normalized_cost 55 | 56 | return cost, reward 57 | 58 | def control_step(self, net): 59 | # net.sgen.at[self.mgt5_id, 'p_mw'] = self.mgt5_p_mw 60 | # net.sgen.at[self.mgt9_id, 'p_mw'] = self.mgt9_p_mw 61 | # net.sgen.at[self.mgt10_id, 'p_mw'] = self.mgt10_p_mw 62 | net.storage.at[self.bat5_id, 'p_mw'] = self.bat5_p_mw 63 | net.storage.at[self.bat10_id, 'p_mw'] = self.bat10_p_mw 64 | self.applied = True 65 | 66 | def finalize_step(self, net, t): 67 | super().finalize_step(net, t) 68 | 69 | # update soc 70 | self.bat5_soc_prev = self.bat5_soc 71 | self.bat10_soc_prev = self.bat10_soc 72 | self.bat5_soc += (self.bat5_p_mw * HOUR_PER_TIME_STEP) / self.bat5_max_e_mwh 73 | self.bat10_soc += (self.bat10_p_mw * HOUR_PER_TIME_STEP) / self.bat10_max_e_mwh 74 | 75 | # calculate reward 76 | t += 1 77 | cost, reward = self.calculate_reward(net, t) 78 | self.costs.append(cost) 79 | self.rewards.append(reward) 80 | 81 | def time_step(self, net, t): 82 | # select action 83 | self.bat5_p_mw, self.bat10_p_mw = self.policy(net) 84 | 85 | self.soc_history['bat5_soc'].append(self.bat5_soc) 86 | self.soc_history['bat10_soc'].append(self.bat10_soc) 87 | 88 | self.applied = False 89 | self.last_time_step = t 90 | 91 | def policy(self, net): 92 | p_pv = net.sgen.at[self.ids.get('pv3'), 'p_mw'] +\ 93 | net.sgen.at[self.ids.get('pv4'), 'p_mw'] +\ 94 | net.sgen.at[self.ids.get('pv5'), 'p_mw'] +\ 95 | net.sgen.at[self.ids.get('pv6'), 'p_mw'] +\ 96 | net.sgen.at[self.ids.get('pv8'), 'p_mw'] +\ 97 | net.sgen.at[self.ids.get('pv9'), 'p_mw'] +\ 98 | net.sgen.at[self.ids.get('pv10'), 'p_mw'] +\ 99 | net.sgen.at[self.ids.get('pv11'), 'p_mw'] 100 | p_wt = net.sgen.at[self.ids.get('wt7'), 'p_mw'] 101 | p_load = net.load.at[self.ids.get('load_r1'), 'p_mw'] +\ 102 | net.load.at[self.ids.get('load_r3'), 'p_mw'] +\ 103 | net.load.at[self.ids.get('load_r4'), 'p_mw'] +\ 104 | net.load.at[self.ids.get('load_r5'), 'p_mw'] +\ 105 | net.load.at[self.ids.get('load_r6'), 'p_mw'] +\ 106 | net.load.at[self.ids.get('load_r8'), 'p_mw'] +\ 107 | net.load.at[self.ids.get('load_r10'), 'p_mw'] +\ 108 | net.load.at[self.ids.get('load_r11'), 'p_mw'] 109 | 110 | p_b5_max = min((SOC_MAX - self.bat5_soc) * self.bat5_max_e_mwh / HOUR_PER_TIME_STEP, P_B5_MAX) 111 | p_b5_min = max((SOC_MIN - self.bat5_soc) * self.bat5_max_e_mwh / HOUR_PER_TIME_STEP, P_B5_MIN) 112 | p_b10_max = min((SOC_MAX - self.bat10_soc) * self.bat10_max_e_mwh / HOUR_PER_TIME_STEP, P_B10_MAX) 113 | p_b10_min = max((SOC_MIN - self.bat10_soc) * self.bat10_max_e_mwh / HOUR_PER_TIME_STEP, P_B10_MIN) 114 | 115 | excess = p_pv + p_wt - p_load 116 | # print(f'Excess = {excess}, pv: {p_pv}, wt: {p_wt}, load: {p_load}') 117 | if excess > 0: 118 | # charge 119 | b5_ratio = p_b5_max / (p_b5_max + p_b10_max) if (p_b5_max + p_b10_max) != 0. else 0. 120 | b10_ratio = p_b10_max / (p_b5_max + p_b10_max) if (p_b5_max + p_b10_max) != 0. else 0. 121 | p_b5 = min(excess * b5_ratio, p_b5_max) 122 | p_b10 = min(excess * b10_ratio, p_b10_max) 123 | # p_mgt5 = 0. 124 | # p_mgt9 = 0. 125 | # p_mgt10 = 0. 126 | else: 127 | # discharge 128 | b5_ratio = p_b5_min / (p_b5_min + p_b10_min) if (p_b5_min + p_b10_min) != 0. else 0. 129 | b10_ratio = p_b10_min / (p_b5_min + p_b10_min) if (p_b5_min + p_b10_min) != 0. else 0. 130 | p_b5 = max(excess * b5_ratio, p_b5_min) 131 | p_b10 = max(excess * b10_ratio, p_b10_min) 132 | p_b = p_b5 + p_b10 133 | 134 | # mgt5_ratio = P_MGT5_MAX / (P_MGT5_MAX + P_MGT9_MAX + P_MGT10_MAX) 135 | # mgt9_ratio = P_MGT9_MAX / (P_MGT5_MAX + P_MGT9_MAX + P_MGT10_MAX) 136 | # mgt10_ratio = P_MGT10_MAX / (P_MGT5_MAX + P_MGT9_MAX + P_MGT10_MAX) 137 | # mgt5_op_point = (C_BUY - C_MGT5[1]) / C_MGT5[0] 138 | # mgt9_op_point = (C_BUY - C_MGT9[1]) / C_MGT9[0] 139 | # mgt10_op_point = (C_BUY - C_MGT10[1]) / C_MGT10[0] 140 | # p_mgt5 = 0. if excess > p_b else min((p_b - excess) * mgt5_ratio, mgt5_op_point) 141 | # p_mgt9 = 0. if excess > p_b else min((p_b - excess) * mgt9_ratio, mgt9_op_point) 142 | # p_mgt10 = 0. if excess > p_b else min((p_b - excess) * mgt10_ratio, mgt10_op_point) 143 | 144 | return p_b5, p_b10 145 | 146 | def reset(self): 147 | # self.mgt5_p_mw = 0. 148 | # self.mgt9_p_mw = 0. 149 | # self.mgt10_p_mw = 0. 150 | self.bat5_p_mw = 0. 151 | self.bat10_p_mw = 0. 152 | self.rewards = [] 153 | self.costs = [] 154 | self.bat5_soc = 0. 155 | self.bat10_soc = 0. 156 | self.soc_history = {'bat5_soc': [self.bat5_soc], 'bat10_soc': [self.bat10_soc]} 157 | self.last_time_step = None 158 | self.applied = False 159 | 160 | class RandomControl(Controller): 161 | def __init__(self, net, ids, **kwargs): 162 | super().__init__(net, **kwargs) 163 | self.rewards = [] 164 | self.costs = [] 165 | self.bat5_soc = np.random.uniform(low=SOC_MIN, high=SOC_MAX) 166 | self.bat10_soc = np.random.uniform(low=SOC_MIN, high=SOC_MAX) 167 | self.last_time_step = None 168 | self.applied = False 169 | 170 | self.price_profile = kwargs['price_profile'] 171 | 172 | self.trafo0_id = ids.get('trafo0') 173 | # self.mgt5_id = ids.get('mgt5') 174 | # self.mgt5_p_mw = net.sgen.at[self.mgt5_id, 'p_mw'] 175 | # self.mgt9_id = ids.get('mgt9') 176 | # self.mgt9_p_mw = net.sgen.at[self.mgt9_id, 'p_mw'] 177 | # self.mgt10_id = ids.get('mgt10') 178 | # self.mgt10_p_mw = net.sgen.at[self.mgt10_id, 'p_mw'] 179 | 180 | self.bat5_id = ids.get('bat5') 181 | self.bat5_p_mw = net.storage.at[self.bat5_id, 'p_mw'] 182 | self.bat5_max_e_mwh = net.storage.at[self.bat5_id, 'max_e_mwh'] 183 | self.bat10_id = ids.get('bat10') 184 | self.bat10_p_mw = net.storage.at[self.bat10_id, 'p_mw'] 185 | self.bat10_max_e_mwh = net.storage.at[self.bat10_id, 'max_e_mwh'] 186 | 187 | def is_converged(self, net): 188 | return self.applied 189 | 190 | def control_step(self, net): 191 | # net.sgen.at[self.mgt5_id, 'p_mw'] = self.mgt5_p_mw 192 | # net.sgen.at[self.mgt9_id, 'p_mw'] = self.mgt9_p_mw 193 | # net.sgen.at[self.mgt10_id, 'p_mw'] = self.mgt10_p_mw 194 | net.storage.at[self.bat5_id, 'p_mw'] = self.bat5_p_mw 195 | net.storage.at[self.bat10_id, 'p_mw'] = self.bat10_p_mw 196 | self.applied = True 197 | 198 | def time_step(self, net, t): 199 | if self.last_time_step is not None: 200 | # update soc 201 | bat5_soc_prev = self.bat5_soc 202 | bat10_soc_prev = self.bat10_soc 203 | self.bat5_soc += (self.bat5_p_mw * HOUR_PER_TIME_STEP) / self.bat5_max_e_mwh 204 | self.bat10_soc += (self.bat10_p_mw * HOUR_PER_TIME_STEP) / self.bat10_max_e_mwh 205 | 206 | # calculate reward 207 | price = self.price_profile['price'][t] 208 | cost, normalized_cost = utils.cal_cost( 209 | price=price, 210 | pcc_p_mw=-net.res_trafo.at[self.trafo0_id, 'p_lv_mw'], 211 | # mgt5_p_mw=self.mgt5_p_mw, 212 | # mgt9_p_mw=self.mgt9_p_mw, 213 | # mgt10_p_mw=self.mgt10_p_mw, 214 | bat5_soc_now=self.bat5_soc, 215 | bat5_soc_prev=bat5_soc_prev, 216 | bat10_soc_now=self.bat10_soc, 217 | bat10_soc_prev=bat10_soc_prev 218 | ) 219 | reward = -normalized_cost 220 | self.rewards.append(reward) 221 | self.costs.append(cost) 222 | 223 | # select action 224 | self.bat5_p_mw, self.bat10_p_mw = np.random.uniform(low=MIN_ACTION, high=MAX_ACTION, size=(N_ACTIONS,)) 225 | p_b5_max = min((SOC_MAX - self.bat5_soc) * self.bat5_max_e_mwh / HOUR_PER_TIME_STEP, P_B5_MAX) 226 | p_b5_min = max((SOC_MIN - self.bat5_soc) * self.bat5_max_e_mwh / HOUR_PER_TIME_STEP, P_B5_MIN) 227 | p_b10_max = min((SOC_MAX - self.bat10_soc) * self.bat10_max_e_mwh / HOUR_PER_TIME_STEP, P_B10_MAX) 228 | p_b10_min = max((SOC_MIN - self.bat10_soc) * self.bat10_max_e_mwh / HOUR_PER_TIME_STEP, P_B10_MIN) 229 | self.bat5_p_mw = np.clip(self.bat5_p_mw, p_b5_min, p_b5_max) 230 | self.bat10_p_mw = np.clip(self.bat10_p_mw, p_b10_min, p_b10_max) 231 | self.applied = False 232 | self.last_time_step = t 233 | 234 | def reset(self): 235 | # self.mgt5_p_mw = 0. 236 | # self.mgt9_p_mw = 0. 237 | # self.mgt10_p_mw = 0. 238 | self.bat5_p_mw = 0. 239 | self.bat10_p_mw = 0. 240 | self.rewards = [] 241 | self.costs = [] 242 | self.bat5_soc = np.random.uniform(low=SOC_MIN, high=SOC_MAX) 243 | self.bat10_soc = np.random.uniform(low=SOC_MIN, high=SOC_MAX) 244 | self.last_time_step = None 245 | self.applied = False -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Main program file. 3 | 4 | func: 5 | - train_ppo 6 | - train_td3 7 | - test 8 | - baseline 9 | ''' 10 | 11 | import os, shutil 12 | import logging 13 | import numpy as np 14 | import pandas as pd 15 | import matplotlib.pyplot as plt 16 | 17 | import pandapower as pp 18 | import pandapower.timeseries as ts 19 | from pandapower.timeseries.data_sources.frame_data import DFData 20 | from pandapower.timeseries.output_writer import OutputWriter 21 | 22 | import utils 23 | from cigre_mv_microgrid import create_cigre_mv_microgrid 24 | from controllers.baseline_controller import RandomControl, SimpleControl 25 | from controllers.td3_controller import TD3Agent 26 | from controllers.ppo_controller import PPOAgent 27 | from setting import * 28 | 29 | def train_ppo(n_runs, n_epochs, start, train_length, pv_profile, wt_profile, load_profile, price_profile, 30 | sequence_model_type='none', noise_type='action'): 31 | # env 32 | assert(start >= 0 and start < pv_profile.shape[0]) 33 | assert(train_length >= 0 and train_length <= pv_profile.shape[0] - start) 34 | time_steps = range(start, start + train_length) 35 | pv_ds = DFData(pv_profile.iloc[start: start+train_length]) 36 | wt_ds = DFData(wt_profile.iloc[start: start+train_length]) 37 | load_ds = DFData(load_profile.iloc[start: start+train_length]) 38 | 39 | # history 40 | history_dir = os.path.join('.', 'history', 'train', 'PPO') 41 | if os.path.isdir(history_dir): 42 | shutil.rmtree(history_dir) 43 | 44 | # run 45 | ep_return_list = np.zeros((n_runs, n_epochs)) 46 | ep_cost_list = np.zeros((n_runs, n_epochs)) 47 | for run in range(n_runs): 48 | net, ids = create_cigre_mv_microgrid(pv_ds, wt_ds, load_ds) 49 | agent = PPOAgent(net, ids, pv_profile, wt_profile, load_profile, price_profile, sequence_model_type) 50 | 51 | best_cost = train_length * MAX_COST 52 | for epoch in range(n_epochs): 53 | agent.training = True 54 | ts.run_timeseries(net, time_steps=time_steps, continue_on_divergence=False) 55 | ep_return_list[run, epoch] = np.sum(agent.rewards) 56 | np.save(os.path.join('.', 'plot', 'ep_return_list.npy'), ep_return_list) 57 | ep_cost_list[run, epoch] = np.sum(agent.costs) 58 | np.save(os.path.join('.', 'plot', 'ep_cost_list.npy'), ep_cost_list) 59 | print(f'Run: {run + 1}, epoch: {epoch + 1}, return = {ep_return_list[run, epoch]:.3f}, cost = {ep_cost_list[run, epoch]:.3f}') 60 | 61 | # history & best models 62 | if epoch >= 20: 63 | cost = np.sum(agent.costs) 64 | if cost < best_cost or (epoch % 20 == 0): 65 | # log history 66 | dir = os.path.join(history_dir,str(run+1), 'best_avg_cost') 67 | if not os.path.isdir(dir): 68 | os.makedirs(dir) 69 | pd.DataFrame(agent.history).to_csv(os.path.join(dir, f'[{epoch}]_cost{cost:.3f}.csv')) 70 | 71 | # save best cost model 72 | if cost < best_cost: 73 | agent.save(run) 74 | best_cost = cost 75 | agent.reset() 76 | 77 | # plot 78 | print(f'Epoch return: \n {np.mean(ep_return_list, axis=0)}') 79 | print(f'Epoch cost: \n {np.mean(ep_cost_list, axis=0)}') 80 | utils.plot_ep_values(ep_return_list, train_length, n_epochs, ylabel='Return') 81 | utils.plot_ep_values(ep_cost_list, train_length, n_epochs, ylabel='Cost') 82 | 83 | 84 | def train_td3(n_runs, n_epochs, start, train_length, pv_profile, wt_profile, load_profile, price_profile, 85 | verbose=True, sequence_model_type='rnn', use_pretrained_sequence_model=False, 86 | noise_type='action', retrain=False, run=1): 87 | # env 88 | assert(start >= 0 and start < pv_profile.shape[0]) 89 | assert(train_length >= 0 and train_length <= pv_profile.shape[0] - start) 90 | time_steps = range(start, start + train_length) 91 | pv_ds = DFData(pv_profile.iloc[start: start+train_length]) 92 | wt_ds = DFData(wt_profile.iloc[start: start+train_length]) 93 | load_ds = DFData(load_profile.iloc[start: start+train_length]) 94 | 95 | # history 96 | history_dir = os.path.join('.', 'history', 'train', 'TD3') 97 | if os.path.isdir(history_dir): 98 | shutil.rmtree(history_dir) 99 | 100 | # run 101 | ep_return_list = np.zeros((n_runs, n_epochs)) 102 | ep_cost_list = np.zeros((n_runs, n_epochs)) 103 | for run in range(n_runs): 104 | net, ids = create_cigre_mv_microgrid(pv_ds, wt_ds, load_ds) 105 | agent = TD3Agent(net, ids, pv_profile, wt_profile, load_profile, price_profile, 106 | training=True, n_epochs=n_epochs, 107 | sequence_model_type=sequence_model_type, use_pretrained_sequence_model=use_pretrained_sequence_model, 108 | buffer_size=BUFFER_SIZE, noise_type=noise_type, batch_size=BATCH_SIZE) 109 | if retrain: 110 | agent.load_models(run=run) 111 | 112 | # run 113 | best_cost = train_length * MAX_COST 114 | for epoch in range(n_epochs): 115 | # train 116 | agent.training = True 117 | ts.run_timeseries(net, time_steps=time_steps, verbose=verbose, continue_on_divergence=False) 118 | ep_return_list[run, epoch] = np.sum(agent.rewards) 119 | ep_cost_list[run, epoch] = np.sum(agent.costs) 120 | print(f'Run: {run + 1}, episode: {epoch + 1}, return = {ep_return_list[run, epoch]:.3f}, cost = {ep_cost_list[run, epoch]:.3f}') 121 | # agent.reset() 122 | 123 | # test 124 | # agent.training = False 125 | # ts.run_timeseries(net, time_steps=time_steps, verbose=verbose, continue_on_divergence=False) 126 | 127 | test_cost = np.sum(agent.costs) 128 | if (epoch >= 20) and ((epoch % 20 == 0) or test_cost < best_cost): 129 | # log history 130 | dir = os.path.join(history_dir,str(run+1), 'best_avg_cost') 131 | if not os.path.isdir(dir): 132 | os.makedirs(dir) 133 | pd.DataFrame(agent.history).to_csv(os.path.join(dir, f'[{epoch}]_cost{ep_cost_list[run, epoch]:.3f}.csv')) 134 | 135 | # save best cost model 136 | # if test_cost < best_cost: 137 | # agent.save_models(run=run+1) 138 | # best_cost = test_cost 139 | agent.reset() 140 | 141 | # plot 142 | print(f'Episode return: \n {np.mean(ep_return_list, axis=0)}') 143 | print(f'Episode cost: \n {np.mean(ep_cost_list, axis=0)}') 144 | utils.plot_ep_values(ep_return_list, train_length, n_epochs, ylabel='Return') 145 | utils.plot_ep_values(ep_cost_list, train_length, n_epochs, ylabel='Cost') 146 | 147 | def test(n_runs, start, test_length, pv_profile, wt_profile, load_profile, price_profile, run, sequence_model_type='rnn', log=False, log_path=None): 148 | assert(start >= 0 and start < pv_profile.shape[0]) 149 | assert(test_length >= 0 and test_length <= pv_profile.shape[0] - start) 150 | time_steps=range(start, start+test_length) 151 | 152 | # env 153 | pv_ds = DFData(pv_profile.iloc[start: start+test_length]) 154 | wt_ds = DFData(wt_profile.iloc[start: start+test_length]) 155 | load_ds = DFData(load_profile.iloc[start: start+test_length]) 156 | net, ids = create_cigre_mv_microgrid(pv_ds, wt_ds, load_ds) 157 | 158 | # log pf results 159 | if log: 160 | n_runs = 1 161 | ow = OutputWriter(net, time_steps, output_path=log_path, output_file_type='.csv', csv_separator=',') 162 | ow.log_variable('res_sgen', 'p_mw') 163 | ow.log_variable('res_load', 'p_mw') 164 | ow.log_variable('res_storage', 'p_mw') 165 | ow.log_variable('res_trafo', 'p_lv_mw', index=[ids['trafo0']]) 166 | 167 | # agent 168 | agent = PPOAgent(net, ids, pv_profile, wt_profile, load_profile, price_profile, sequence_model_type, training=False) 169 | agent.load(run) 170 | # agent = TD3Agent(net, ids, pv_profile, wt_profile, load_profile, price_profile, 171 | # training=False, sequence_model_type=sequence_model_type) 172 | # agent.load_models(run=run) 173 | 174 | # run 175 | ep_cost_list = [] 176 | for _ in range(n_runs): 177 | ts.run_timeseries(net, time_steps=time_steps, verbose=False, continue_on_divergence=False) 178 | ep_cost_list.append(np.sum(agent.costs)) 179 | agent.reset() 180 | print(f'Avg cost = {np.mean(ep_cost_list)}') 181 | 182 | def baseline(n_runs, start, test_length, pv_profile, wt_profile, load_profile, price_profile, Control, log=False, log_path=None): 183 | assert(start >= 0 and start < pv_profile.shape[0]) 184 | assert(test_length >= 0 and test_length <= pv_profile.shape[0] - start) 185 | time_steps=range(start, start+test_length) 186 | 187 | # env 188 | pv_ds = DFData(pv_profile.iloc[start: start+test_length]) 189 | wt_ds = DFData(wt_profile.iloc[start: start+test_length]) 190 | load_ds = DFData(load_profile.iloc[start: start+test_length]) 191 | net, ids = create_cigre_mv_microgrid(pv_ds, wt_ds, load_ds) 192 | 193 | # log pf results 194 | if log: 195 | n_runs = 1 196 | ow = OutputWriter(net, time_steps, output_path=log_path, output_file_type='.csv', csv_separator=',') 197 | ow.log_variable('res_sgen', 'p_mw') 198 | ow.log_variable('res_load', 'p_mw') 199 | ow.log_variable('res_storage', 'p_mw') 200 | ow.log_variable('res_trafo', 'p_lv_mw', index=[ids['trafo0']]) 201 | 202 | # controller 203 | controller = Control(net, ids, price_profile=price_profile) 204 | 205 | # run 206 | ep_cost_list = [] 207 | for _ in range(n_runs): 208 | ts.run_timeseries(net, time_steps=time_steps, continue_on_divergence=False, verbose=True) 209 | ep_cost_list.append(np.sum(controller.costs)) 210 | controller.reset() 211 | print(f'Avg cost = {np.mean(ep_cost_list)}') 212 | 213 | if __name__ == '__main__': 214 | # --- configurations --- 215 | logging.basicConfig(level=logging.INFO) 216 | algo = 'ppo' 217 | sequence_model_type = 'rnn' # ['none', 'conv1d', 'rnn', 'transformer'] 218 | sequence_length = 1 if (sequence_model_type == 'none') else SEQ_LENGTH 219 | 220 | # train configs 221 | n_train_runs = 10 222 | n_epochs = 500 223 | train_start = 0 224 | train_length = 30 * 24 225 | noise_type = 'action' # ['action', 'param'] 226 | use_pretrained_sequence_model = False 227 | 228 | # test configs 229 | n_test_runs = 1 230 | # test_start = train_start + train_length 231 | # test_length = 7 * 24 232 | test_start = train_start 233 | test_length = train_length 234 | log = True 235 | log_path = os.path.join('.', 'pf_res', algo, sequence_model_type) 236 | log_path_baseline = os.path.join('.', 'pf_res', 'baseline', 'simple') 237 | 238 | # --- profile --- 239 | pv_profile = pd.read_csv('./data/profile/pv_profile.csv') 240 | wt_profile = pd.read_csv('./data/profile/wt_profile.csv') 241 | load_profile = pd.read_csv('./data/profile/load_profile.csv') 242 | price_profile = pd.read_csv('./data/profile/price_profile.csv') 243 | 244 | # --- train, test --- 245 | train_ppo(n_train_runs, n_epochs, train_start, train_length, 246 | pv_profile, wt_profile, load_profile, price_profile, sequence_model_type) 247 | 248 | # train_td3(n_runs=n_train_runs, n_epochs=n_epochs, start=train_start, train_length=train_length, 249 | # pv_profile=pv_profile, wt_profile=wt_profile, load_profile=load_profile, price_profile=price_profile, 250 | # sequence_model_type=sequence_model_type, use_pretrained_sequence_model=use_pretrained_sequence_model, noise_type=noise_type) 251 | 252 | # test(n_runs=n_test_runs, start=test_start, test_length=test_length, 253 | # pv_profile=pv_profile, wt_profile=wt_profile, load_profile=load_profile, price_profile=price_profile, 254 | # run=0, sequence_model_type=sequence_model_type, log=log, log_path=log_path) 255 | 256 | # baseline(n_runs=n_test_runs, start=test_start, test_length=test_length, 257 | # pv_profile=pv_profile, wt_profile=wt_profile, load_profile=load_profile, price_profile=price_profile, 258 | # Control=SimpleControl, 259 | # log=log, log_path=log_path_baseline) 260 | 261 | # --- plot pf results --- 262 | # utils.plot_pf_results(log_path, test_start, test_length) 263 | # utils.plot_pf_results(dir=log_path_baseline) -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | ''' 2 | func: 3 | - scale_to_mg 4 | - normalize_state 5 | - cal_cost 6 | - extra_reward 7 | - plot_return 8 | - plot_pf_results 9 | - view_profile 10 | ''' 11 | 12 | import os 13 | import logging 14 | import pickle 15 | from pathlib import Path 16 | import numpy as np 17 | from typing import Dict 18 | import matplotlib.pyplot as plt 19 | import pandas as pd 20 | import tensorflow as tf 21 | 22 | from setting import * 23 | 24 | # --- Action Scaling --- 25 | def scale_to_mg(nn_action, min_action, max_action): 26 | nn_action = np.clip(nn_action, -1., 1.) 27 | return (nn_action + 1) * (max_action - min_action) / 2 + min_action 28 | 29 | # --- Normalization --- 30 | def update_mean_var_count_from_moments(mean, var, count, batch_mean, batch_var, batch_count): 31 | delta = batch_mean - mean 32 | tot_count = count + batch_count 33 | 34 | new_mean = mean + delta * batch_count / tot_count 35 | m_a = var * count 36 | m_b = batch_var * batch_count 37 | M2 = m_a + m_b + np.square(delta) * count * batch_count / tot_count 38 | new_var = M2 / tot_count 39 | new_count = tot_count 40 | 41 | return new_mean, new_var, new_count 42 | 43 | class NormalizeAction: 44 | def __init__(self, epsilon=1e-8): 45 | self.a_rms = RunningMeanStd(shape=(N_ACTION,)) 46 | self.epsilon = epsilon 47 | 48 | def normalize(self, a): 49 | self.a_rms.update(a) 50 | a = (a -self.a_rms.mean) / np.sqrt(self.a_rms.var + self.epsilon) 51 | a = np.clip(a, -5, 5) 52 | return a 53 | 54 | def tf_normalize(self, a): 55 | mean = tf.convert_to_tensor(self.a_rms.mean, dtype=tf.float32) 56 | var = tf.convert_to_tensor(self.a_rms.var, dtype=tf.float32) 57 | a = (a - mean) / tf.math.sqrt(var + self.epsilon) 58 | a = tf.clip_by_value(a, -5, 5) 59 | return a 60 | 61 | class NormalizeObservation: 62 | def __init__(self, epsilon=1e-8): 63 | self.obs_seq_rms = RunningMeanStd(shape=STATE_SEQ_SHAPE) 64 | self.obs_fnn_rms = RunningMeanStd(shape=STATE_FNN_SHAPE) 65 | self.epsilon = epsilon 66 | 67 | def normalize(self, obs, update=True): 68 | obs_seq, obs_fnn = obs 69 | if update: 70 | self.obs_seq_rms.update(obs_seq) 71 | self.obs_fnn_rms.update(obs_fnn) 72 | obs_seq = (obs_seq - self.obs_seq_rms.mean) / np.sqrt(self.obs_seq_rms.var + self.epsilon) 73 | obs_seq = np.clip(obs_seq, -5, 5) 74 | obs_fnn = (obs_fnn - self.obs_fnn_rms.mean) / np.sqrt(self.obs_fnn_rms.var + self.epsilon) 75 | obs_fnn = np.clip(obs_fnn, -5, 5) 76 | return obs_seq, obs_fnn 77 | 78 | def save(self, dir): 79 | fpath = Path(os.path.join(dir, 'obs.pkl')) 80 | fpath.parent.mkdir(parents=True, exist_ok=True) 81 | with open(fpath, 'wb') as f: 82 | pickle.dump({ 83 | 'obs_seq_mean': self.obs_seq_rms.mean, 84 | 'obs_seq_var': self.obs_seq_rms.var, 85 | 'obs_fnn_mean': self.obs_fnn_rms.mean, 86 | 'obs_fnn_var': self.obs_fnn_rms.var, 87 | }, f) 88 | 89 | def load(self, dir): 90 | with open(os.path.join(dir, 'obs.pkl'), 'rb') as f: 91 | data = pickle.load(f) 92 | self.obs_seq_rms.mean = data['obs_seq_mean'] 93 | self.obs_seq_rms.var = data['obs_seq_var'] 94 | self.obs_fnn_rms.mean = data['obs_fnn_mean'] 95 | self.obs_fnn_rms.var = data['obs_fnn_var'] 96 | 97 | class NormalizeReward: 98 | def __init__(self, gamma=GAMMA, epsilon=1e-8): 99 | self.return_rms = RunningMeanStd() 100 | self.return_ = np.zeros(1) 101 | self.gamma = gamma 102 | self.epsilon = epsilon 103 | 104 | def normalize(self, r): 105 | self.return_ = r + self.gamma * self.return_ 106 | self.return_rms.update(self.return_) 107 | r /= np.sqrt(self.return_rms.var + self.epsilon) 108 | r = np.clip(r, -5, 5) 109 | return r 110 | 111 | class RunningMeanStd(object): 112 | # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm 113 | def __init__(self, epsilon=1e-4, shape=()): 114 | self.mean = np.zeros(shape, 'float64') 115 | self.var = np.ones(shape, 'float64') 116 | self.count = epsilon 117 | 118 | def update(self, x): 119 | batch_mean = np.mean(x, axis=0) 120 | batch_var = np.var(x, axis=0) 121 | batch_count = x.shape[0] 122 | self.update_from_moments(batch_mean, batch_var, batch_count) 123 | 124 | def update_from_moments(self, batch_mean, batch_var, batch_count): 125 | self.mean, self.var, self.count = update_mean_var_count_from_moments( 126 | self.mean, self.var, self.count, batch_mean, batch_var, batch_count) 127 | 128 | 129 | def normalize_state(state) -> Dict: 130 | normalized_state_rnn = state[0] / np.array([P_EXCESS_MAX, C_PRICE_MAX]) 131 | # normalized_state_rnn = state[0] / np.array([*P_PV_MAX_LIST, *P_WT_MAX_LIST, *P_LOAD_MAX_LIST, C_PRICE_MAX]) 132 | # normalized_state_rnn = state[0] / np.array([*P_PV_MAX_LIST, *P_WT_MAX_LIST, *P_LOAD_MAX_LIST, P_EXCESS_MAX, C_PRICE_MAX]) 133 | normalized_state_fnn = state[1] / SOC_MAX 134 | 135 | return normalized_state_rnn, normalized_state_fnn 136 | 137 | # --- Reward --- 138 | def cal_cost(price, pcc_p_mw, bat5_soc_now, bat5_soc_prev, bat10_soc_now, bat10_soc_prev, **kwargs): 139 | transaction_cost = price * pcc_p_mw 140 | # mgt_cost = C_MGT5[0] * pow(mgt5_p_mw, 2) + C_MGT5[1] * mgt5_p_mw + \ 141 | # C_MGT9[0] * pow(mgt9_p_mw, 2) + C_MGT9[1] * mgt9_p_mw + \ 142 | # C_MGT10[0] * pow(mgt10_p_mw, 2) + C_MGT10[1] * mgt10_p_mw 143 | battery_cost = C_BAT5_DoD * pow((bat5_soc_now - bat5_soc_prev), 2) + \ 144 | C_BAT10_DoD * pow((bat10_soc_now - bat10_soc_prev), 2) 145 | soc_penalty = C_SOC_LIMIT if ((bat5_soc_now > (1+SOC_TOLERANCE)*SOC_MAX or bat5_soc_now < (1-SOC_TOLERANCE)*SOC_MIN) or 146 | (bat10_soc_now > (1+SOC_TOLERANCE)*SOC_MAX or bat10_soc_now < (1-SOC_TOLERANCE)*SOC_MIN)) else 0 147 | 148 | if len(kwargs): 149 | ids = kwargs['ids'] 150 | t = kwargs['t'] 151 | net = kwargs['net'] 152 | log_cost_info(transaction_cost, battery_cost, soc_penalty, t, net=net, ids=ids, pcc_p_mw=pcc_p_mw) 153 | 154 | cost = (transaction_cost + battery_cost) * HOUR_PER_TIME_STEP + soc_penalty 155 | normalized_cost = cost / MAX_COST 156 | 157 | return cost, normalized_cost 158 | 159 | def extra_reward(nn_bat_p_mw, valid_bat_p_mw): 160 | # penalty for invalid action 161 | dif = np.sum(np.abs(nn_bat_p_mw - valid_bat_p_mw)) 162 | dif /= (P_B10_MAX + P_B5_MAX) 163 | reward = 0. if (dif < 1e-3) else (REWARD_INVALID_ACTION + dif * REWARD_INVALID_ACTION) 164 | return reward 165 | 166 | # --- Plot --- 167 | def plot_ep_values(ep_values, train_length, epochs, ylabel): 168 | runs = ep_values.shape[0] 169 | fig_path = os.path.join('plot', f'{int(train_length/24)}days_{runs}runs_{epochs}eps_{str.lower(ylabel)}.png') 170 | arr_path = os.path.join('plot', f'{int(train_length/24)}days_{runs}runs_{epochs}eps_{str.lower(ylabel)}.npy') 171 | np.save(arr_path, ep_values) 172 | 173 | ep_return = np.median(ep_values, axis=0) 174 | epochs = range(1, len(ep_return) + 1) 175 | plt.plot(epochs, ep_return) 176 | plt.title(f'Training') 177 | plt.xlabel('Epoch') 178 | plt.ylabel(ylabel) 179 | plt.savefig(fig_path) 180 | plt.show() 181 | 182 | def plot_pf_results(dir, start, length): 183 | # pv, wt, mgt, load, bat, util, excess 184 | res_sgen_file = os.path.join(dir, 'res_sgen', 'p_mw.csv') 185 | res_load_file = os.path.join(dir, 'res_load', 'p_mw.csv') 186 | res_storage_file = os.path.join(dir, 'res_storage', 'p_mw.csv') 187 | res_trafo_file = os.path.join(dir, 'res_trafo', 'p_lv_mw.csv') 188 | 189 | # pv, wt, mgt 190 | sgen_p_mw = pd.read_csv(res_sgen_file) 191 | pv_p_mw = sgen_p_mw.iloc[:, 1:9] 192 | pv_p_mw.columns = ['pv3', 'pv4', 'pv5', 'pv6', 'pv8', 'pv9', 'pv10', 'pv11'] 193 | wt_p_mw = sgen_p_mw.iloc[:, [9]] 194 | wt_p_mw.columns = ['wt7'] 195 | # mgt_p_mw = sgen_p_mw.iloc[:, 10:] 196 | # mgt_p_mw.columns = ['mgt5', 'mgt9', 'mgt10'] 197 | 198 | # load 199 | load_p_mw = pd.read_csv(res_load_file) 200 | load_p_mw = load_p_mw.iloc[:, 1:] 201 | load_p_mw.columns = ['load_r1', 'load_r3', 'load_r4', 'load_r5', 'load_r6', 'load_r8', 'load_r10', 'load_r11'] 202 | 203 | # bat 204 | bat_p_mw = pd.read_csv(res_storage_file) 205 | bat_p_mw = bat_p_mw.iloc[:, 1:] 206 | bat_p_mw.columns = ['bat5', 'bat10'] 207 | 208 | # utility 209 | trafo_p_mw = pd.read_csv(res_trafo_file) 210 | util_p_mw = -trafo_p_mw.iloc[:, [1]] 211 | util_p_mw.columns = ['utility'] 212 | 213 | # price 214 | price = pd.read_csv(os.path.join('.', 'data', 'profile', 'price_profile.csv')) 215 | 216 | excess_p_mw = pv_p_mw.sum(axis=1) + wt_p_mw.sum(axis=1) - load_p_mw.sum(axis=1) 217 | excess_p_mw = pd.DataFrame({'excess': excess_p_mw}) 218 | 219 | ax = excess_p_mw.iloc[start: start+length].plot(drawstyle='steps-post') 220 | bat_p_mw.iloc[start: start+length].plot(ax=ax, drawstyle='steps-post') 221 | price.iloc[start: start+length].plot(ax=ax, drawstyle='steps-post') 222 | plt.title('Power Flow') 223 | plt.xlabel('hour') 224 | plt.ylabel('MW') 225 | plt.show() 226 | 227 | def view_profile(pv_profile, wt_profile, load_profile, price_profile, start=None, length=None): 228 | start = 0 if start is None else start 229 | length = (len(pv_profile.index)-start) if length is None else length 230 | pv_p_mw = pv_profile.iloc[start: start+length, :] 231 | wt_p_mw = wt_profile.iloc[start: start+length, :] 232 | load_p_mw = load_profile.iloc[start: start+length, :] 233 | price_profile = price_profile.iloc[start: start+length, :] 234 | 235 | # MW and excess profile 236 | profile_p_mw = pd.concat([pv_p_mw, wt_p_mw, load_p_mw]).iloc[start: start+length, :] 237 | excess_profile = pv_p_mw.sum(axis=1) + wt_p_mw.sum(axis=1) - load_p_mw.sum(axis=1) 238 | excess_profile = pd.DataFrame({'Excess': excess_profile}) 239 | 240 | # info 241 | print('--- Profile ---') 242 | print(f'PV:\n max = {pv_profile.max(numeric_only=True)}, \nmin = {pv_profile.min(numeric_only=True)}') 243 | print(f'WT:\n max = {wt_profile.max(numeric_only=True)}, \nmin = {wt_profile.min(numeric_only=True)}') 244 | print(f'Load:\n max = {load_profile.max(numeric_only=True)}, \nmin = {load_profile.min(numeric_only=True)}') 245 | print(f'Excess:\n max = {excess_profile.max(numeric_only=True)}, \nmin = {excess_profile.min(numeric_only=True)}') 246 | print(f'Price:\n max = {price_profile.max(numeric_only=True)}, \nmin = {price_profile.min(numeric_only=True)}') 247 | 248 | # plot 249 | pv_p_mw.plot(xlabel='hour', ylabel='p_mw', title='PV') 250 | wt_p_mw.plot(xlabel='hour', ylabel='p_mw', title='WT') 251 | load_p_mw.plot(xlabel='hour', ylabel='p_mw', title='Load') 252 | price_profile.plot(xlabel='hour', ylabel='price', title='Price') 253 | profile_p_mw.plot(xlabel='hour', ylabel='p_mw', title='Microgrid') 254 | ax = excess_profile.plot(xlabel='hour', ylabel='p_mw', title='excess') 255 | ax.plot(range(start, start+length), np.zeros((length),)) 256 | plt.show() 257 | 258 | # --- Logging --- 259 | def log_actor_critic_info(actor_loss, critic_loss, t=None, freq=20, **kwargs): 260 | if t is None: 261 | logging.info('--- Learn ---') 262 | logging.info(f'actor loss = {actor_loss}') 263 | logging.info(f'critic loss = {critic_loss}') 264 | return 265 | 266 | if t % freq == 0: 267 | logging.info('--- Learn ---') 268 | logging.info(f'actor loss = {actor_loss}') 269 | logging.info(f'critic loss = {critic_loss}') 270 | 271 | def log_cost_info(transaction_cost, battery_cost, soc_penalty, t, freq=100, **kwargs): 272 | if t % freq == 0: 273 | net = kwargs['net'] 274 | ids = kwargs['ids'] 275 | pcc_p_mw = kwargs['pcc_p_mw'] 276 | p_wt = net.res_sgen['p_mw'].iloc[ids['wt7']].sum() 277 | p_pv = net.res_sgen['p_mw'].sum() - p_wt 278 | p_bat = net.res_storage['p_mw'].sum() 279 | p_load = net.res_load['p_mw'].sum() 280 | excess = p_pv + p_wt - p_bat - p_load 281 | 282 | logging.info('--- Cost ---') 283 | logging.info(f'trans: {transaction_cost:.3f}, bat: {battery_cost:.3f}, soc: {soc_penalty:.3f}') 284 | logging.info('--- Power flow ---') 285 | logging.info(f'pcc = {pcc_p_mw:.3f}, excess = {excess:.3f}, pv = {p_pv:.3f}, wt = {p_wt:.3f}, bat = {p_bat:.3f}, load = {p_load:.3f}') 286 | 287 | def log_trans_info(s, a, t, freq=100, **kwargs): 288 | if t % freq == 0: 289 | s_seq = s[0] 290 | s_fnn = s[1] 291 | 292 | logging.info('--- State ---') 293 | logging.info(f'shape: ({s_seq.shape}, {s_fnn.shape})') 294 | logging.info(f'content: {s_seq[0]}, {s_fnn}') 295 | logging.info('--- Action ---') 296 | logging.info(f'shape: {a.shape}') 297 | logging.info(f'content: {a}') 298 | 299 | # --- Others --- 300 | def get_excess(pv_profile, wt_profile, load_profile, t): 301 | excess = pv_profile['pv3'][t] +\ 302 | pv_profile['pv4'][t] +\ 303 | pv_profile['pv5'][t] +\ 304 | pv_profile['pv6'][t] +\ 305 | pv_profile['pv8'][t] +\ 306 | pv_profile['pv9'][t] +\ 307 | pv_profile['pv10'][t] +\ 308 | pv_profile['pv11'][t] +\ 309 | wt_profile['wt7'][t] -\ 310 | load_profile['load_r1'][t] -\ 311 | load_profile['load_r3'][t] -\ 312 | load_profile['load_r4'][t] -\ 313 | load_profile['load_r5'][t] -\ 314 | load_profile['load_r6'][t] -\ 315 | load_profile['load_r8'][t] -\ 316 | load_profile['load_r10'][t] -\ 317 | load_profile['load_r11'][t] 318 | 319 | return excess 320 | 321 | def policy_simple(net, ids, bat5_soc, bat10_soc, bat5_max_e_mwh, bat10_max_e_mwh): 322 | p_pv = net.sgen.at[ids.get('pv3'), 'p_mw'] +\ 323 | net.sgen.at[ids.get('pv4'), 'p_mw'] +\ 324 | net.sgen.at[ids.get('pv5'), 'p_mw'] +\ 325 | net.sgen.at[ids.get('pv6'), 'p_mw'] +\ 326 | net.sgen.at[ids.get('pv8'), 'p_mw'] +\ 327 | net.sgen.at[ids.get('pv9'), 'p_mw'] +\ 328 | net.sgen.at[ids.get('pv10'), 'p_mw'] +\ 329 | net.sgen.at[ids.get('pv11'), 'p_mw'] 330 | p_wt = net.sgen.at[ids.get('wt7'), 'p_mw'] 331 | p_load = net.load.at[ids.get('load_r1'), 'p_mw'] +\ 332 | net.load.at[ids.get('load_r3'), 'p_mw'] +\ 333 | net.load.at[ids.get('load_r4'), 'p_mw'] +\ 334 | net.load.at[ids.get('load_r5'), 'p_mw'] +\ 335 | net.load.at[ids.get('load_r6'), 'p_mw'] +\ 336 | net.load.at[ids.get('load_r8'), 'p_mw'] +\ 337 | net.load.at[ids.get('load_r10'), 'p_mw'] +\ 338 | net.load.at[ids.get('load_r11'), 'p_mw'] 339 | 340 | p_b5_max = min((SOC_MAX - bat5_soc) * bat5_max_e_mwh / HOUR_PER_TIME_STEP, P_B5_MAX) 341 | p_b5_min = max((SOC_MIN - bat5_soc) * bat5_max_e_mwh / HOUR_PER_TIME_STEP, P_B5_MIN) 342 | p_b10_max = min((SOC_MAX - bat10_soc) * bat10_max_e_mwh / HOUR_PER_TIME_STEP, P_B10_MAX) 343 | p_b10_min = max((SOC_MIN - bat10_soc) * bat10_max_e_mwh / HOUR_PER_TIME_STEP, P_B10_MIN) 344 | 345 | excess = p_pv + p_wt - p_load 346 | # print(f'Excess = {excess}, pv: {p_pv}, wt: {p_wt}, load: {p_load}') 347 | if excess > 0: 348 | # charge 349 | b5_ratio = p_b5_max / (p_b5_max + p_b10_max) if (p_b5_max + p_b10_max) != 0. else 0. 350 | b10_ratio = p_b10_max / (p_b5_max + p_b10_max) if (p_b5_max + p_b10_max) != 0. else 0. 351 | p_b5 = min(excess * b5_ratio, p_b5_max) 352 | p_b10 = min(excess * b10_ratio, p_b10_max) 353 | # p_mgt5 = 0. 354 | # p_mgt9 = 0. 355 | # p_mgt10 = 0. 356 | else: 357 | # discharge 358 | b5_ratio = p_b5_min / (p_b5_min + p_b10_min) if (p_b5_min + p_b10_min) != 0. else 0. 359 | b10_ratio = p_b10_min / (p_b5_min + p_b10_min) if (p_b5_min + p_b10_min) != 0. else 0. 360 | p_b5 = max(excess * b5_ratio, p_b5_min) 361 | p_b10 = max(excess * b10_ratio, p_b10_min) 362 | p_b = p_b5 + p_b10 363 | 364 | # mgt5_ratio = P_MGT5_MAX / (P_MGT5_MAX + P_MGT9_MAX + P_MGT10_MAX) 365 | # mgt9_ratio = P_MGT9_MAX / (P_MGT5_MAX + P_MGT9_MAX + P_MGT10_MAX) 366 | # mgt10_ratio = P_MGT10_MAX / (P_MGT5_MAX + P_MGT9_MAX + P_MGT10_MAX) 367 | # mgt5_op_point = (C_BUY - C_MGT5[1]) / C_MGT5[0] 368 | # mgt9_op_point = (C_BUY - C_MGT9[1]) / C_MGT9[0] 369 | # mgt10_op_point = (C_BUY - C_MGT10[1]) / C_MGT10[0] 370 | # p_mgt5 = 0. if excess > p_b else min((p_b - excess) * mgt5_ratio, mgt5_op_point) 371 | # p_mgt9 = 0. if excess > p_b else min((p_b - excess) * mgt9_ratio, mgt9_op_point) 372 | # p_mgt10 = 0. if excess > p_b else min((p_b - excess) * mgt10_ratio, mgt10_op_point) 373 | 374 | return np.array([p_b5, p_b10]) -------------------------------------------------------------------------------- /controllers/ppo_controller.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import tensorflow as tf 4 | import tensorflow.keras as keras 5 | import tensorflow_probability as tfp 6 | from tensorflow.keras.optimizers import Adam 7 | from pandapower.control.basic_controller import Controller 8 | 9 | from controllers.buffer import Buffer 10 | from controllers.models import ActorPiModel, CriticVModel, SequenceModel, get_pi_actor, get_v_critic 11 | from setting import * 12 | import utils 13 | 14 | class PPOAgent(Controller): 15 | def __init__(self, net, ids, pv_profile, wt_profile, load_profile, price_profile, 16 | sequence_model_type, training=False, 17 | lr_actor=LR_ACTOR, lr_critic=LR_CRITIC, 18 | **kwargs): 19 | super().__init__(net, **kwargs) 20 | 21 | self.ids = ids 22 | self.pv_profile = pv_profile 23 | self.wt_profile = wt_profile 24 | self.load_profile = load_profile 25 | self.price_profile = price_profile 26 | self.state_seq_shape = STATE_SEQ_SHAPE 27 | self.state_fnn_shape = STATE_FNN_SHAPE 28 | self.n_action = N_ACTION 29 | self.training = training 30 | self.time_step_counter = 0 31 | 32 | # normalization 33 | self.obs_norm = utils.NormalizeObservation() 34 | self.r_norm = utils.NormalizeReward() 35 | 36 | # action bounds 37 | self.max_action = MAX_ACTION 38 | self.min_action = MIN_ACTION 39 | 40 | # hyper parameters 41 | self.batch_size = PPO_BATCH_SIZE 42 | self.policy_clip = POLICY_CLIP 43 | self.target_kl = TARGET_KL 44 | self.train_freq = PPO_TRAIN_FREQ 45 | self.train_iters = PPO_TRAIN_ITERS 46 | 47 | # battery 48 | self.bat5_id = ids.get('bat5') 49 | self.bat5_p_mw = net.storage.at[self.bat5_id, 'p_mw'] 50 | self.bat5_max_e_mwh = net.storage.at[self.bat5_id, 'max_e_mwh'] 51 | self.bat10_id = ids.get('bat10') 52 | self.bat10_p_mw = net.storage.at[self.bat10_id, 'p_mw'] 53 | self.bat10_max_e_mwh = net.storage.at[self.bat10_id, 'max_e_mwh'] 54 | 55 | # other elements 56 | self.pv3_id = ids.get('pv3') 57 | self.pv4_id = ids.get('pv4') 58 | self.pv5_id = ids.get('pv5') 59 | self.pv6_id = ids.get('pv6') 60 | self.pv8_id = ids.get('pv8') 61 | self.pv9_id = ids.get('pv9') 62 | self.loadr1_id = ids.get('load_r1') 63 | self.loadr3_id = ids.get('Load_r3') 64 | self.loadr4_id = ids.get('Load_r4') 65 | self.loadr5_id = ids.get('Load_r5') 66 | self.loadr6_id = ids.get('Load_r6') 67 | self.loadr8_id = ids.get('Load_r8') 68 | self.loadr10_id = ids.get('Load_r10') 69 | self.loadr11_id = ids.get('Load_r11') 70 | self.trafo0_id = ids.get('trafo0') # PCC trafo 71 | 72 | # internal states 73 | self.state = None 74 | self.state_value = None 75 | self.action_logprob = None 76 | self.bat5_soc = 0. 77 | self.bat10_soc = 0. 78 | self.rewards = [] 79 | self.costs = [] 80 | self.last_time_step = None 81 | self.applied = False 82 | 83 | self.history = { 84 | 'price': [], 85 | 'excess': [], 86 | 'nn_bat5_p_mw': [], 87 | 'nn_bat10_p_mw': [], 88 | 'bat5_p_mw': [], 89 | 'bat10_p_mw': [], 90 | 'bat5_soc': [], 91 | 'bat10_soc': []} 92 | 93 | # buffer 94 | self.buffer = Buffer(self.train_freq, STATE_SEQ_SHAPE, STATE_FNN_SHAPE, N_ACTION) 95 | 96 | # networks 97 | self.sequence_model_type = sequence_model_type 98 | self.actor = get_pi_actor(SequenceModel(sequence_model_type, activation='tanh'), ActorPiModel(self.n_action)) 99 | self.actor_dist = tfp.distributions.Normal 100 | self.actor.compile(optimizer=Adam(lr_actor)) 101 | self.critic = get_v_critic(SequenceModel(sequence_model_type, activation='tanh'), CriticVModel()) 102 | self.critic.compile(optimizer=Adam(lr_critic)) 103 | 104 | @tf.function 105 | def cal_kl(self, state_seq_buffer, state_fnn_buffer, action_buffer, action_logprob_buffer): 106 | action_means, action_stds = self.actor([state_seq_buffer, state_fnn_buffer]) 107 | action_dists = self.actor_dist(action_means, action_stds) 108 | action_logprobs = self.cal_logprob(action_dists, action_buffer) 109 | kl = tf.math.reduce_mean(action_logprob_buffer - action_logprobs) 110 | return kl 111 | 112 | @tf.function 113 | def cal_logprob(self, dist, a): 114 | a_logprob = dist.log_prob(a) 115 | a_logprob = tf.math.reduce_sum(a_logprob, axis=-1, keepdims=True) 116 | return a_logprob 117 | 118 | def cal_reward(self, net, t, bat5_soc_prev, bat10_soc_prev) -> float: 119 | price = self.price_profile['price'][t] 120 | cost, normalized_cost = utils.cal_cost( 121 | price=price, 122 | pcc_p_mw=-net.res_trafo.at[self.trafo0_id, 'p_lv_mw'], 123 | bat5_soc_now=self.bat5_soc, 124 | bat5_soc_prev=bat5_soc_prev, 125 | bat10_soc_now=self.bat10_soc, 126 | bat10_soc_prev=bat10_soc_prev 127 | ) 128 | reward = -normalized_cost 129 | 130 | return cost, reward 131 | 132 | def control_step(self, net): 133 | net.storage.at[self.bat5_id, 'p_mw'] = self.bat5_p_mw 134 | net.storage.at[self.bat10_id, 'p_mw'] = self.bat10_p_mw 135 | self.applied = True 136 | 137 | def finalize_step(self, net, t): 138 | super().finalize_step(net, t) 139 | # update soc 140 | bat5_soc_prev = self.bat5_soc 141 | bat10_soc_prev = self.bat10_soc 142 | self.bat5_soc += self.bat5_p_mw * HOUR_PER_TIME_STEP / self.bat5_max_e_mwh 143 | self.bat10_soc += self.bat10_p_mw * HOUR_PER_TIME_STEP / self.bat10_max_e_mwh 144 | 145 | # reward 146 | cost, reward = self.cal_reward(net, t, bat5_soc_prev, bat10_soc_prev) 147 | self.rewards.append(reward) 148 | self.costs.append(cost) 149 | 150 | if not self.training: 151 | return 152 | 153 | # store transition 154 | state_seq, state_fnn = self.obs_norm.normalize(self.state) 155 | reward = self.r_norm.normalize(reward) 156 | self.buffer.store_transition(state_seq, state_fnn, self.action, reward, self.state_value, self.action_logprob) 157 | self.time_step_counter += 1 158 | 159 | if self.time_step_counter % self.train_freq != 0: 160 | return 161 | 162 | # finish trajectory 163 | state = self.get_state(net, t+1) 164 | state_seq, state_fnn = self.obs_norm.normalize(state) 165 | state_seq = tf.expand_dims(tf.convert_to_tensor(state_seq, dtype=tf.float32), axis=0) 166 | state_fnn = tf.expand_dims(tf.convert_to_tensor(state_fnn, dtype=tf.float32), axis=0) 167 | last_value = self.critic([state_seq, state_fnn]) 168 | last_value = tf.squeeze(last_value).numpy() 169 | self.buffer.finish_trajectory(last_value) 170 | 171 | self.learn() 172 | 173 | def get_state(self, net, t): 174 | state_seq = np.zeros(self.state_seq_shape) 175 | state_fnn = np.zeros(self.state_fnn_shape) 176 | 177 | for i in range(SEQ_LENGTH): 178 | state_seq[i, 0] = self.pv_profile['pv3'][t + i] 179 | state_seq[i, 1] = self.pv_profile['pv4'][t + i] 180 | state_seq[i, 2] = self.pv_profile['pv5'][t + i] 181 | state_seq[i, 3] = self.pv_profile['pv6'][t + i] 182 | state_seq[i, 4] = self.pv_profile['pv8'][t + i] 183 | state_seq[i, 5] = self.pv_profile['pv9'][t + i] 184 | state_seq[i, 6] = self.pv_profile['pv10'][t + i] 185 | state_seq[i, 7] = self.pv_profile['pv11'][t + i] 186 | state_seq[i, 8] = self.wt_profile['wt7'][t + i] 187 | state_seq[i, 9] = self.load_profile['load_r1'][t + i] 188 | state_seq[i, 10] = self.load_profile['load_r3'][t + i] 189 | state_seq[i, 11] = self.load_profile['load_r4'][t + i] 190 | state_seq[i, 12] = self.load_profile['load_r5'][t + i] 191 | state_seq[i, 13] = self.load_profile['load_r6'][t + i] 192 | state_seq[i, 14] = self.load_profile['load_r8'][t + i] 193 | state_seq[i, 15] = self.load_profile['load_r10'][t + i] 194 | state_seq[i, 16] = self.load_profile['load_r11'][t + i] 195 | state_seq[i, 17] = self.price_profile['price'][t + i] 196 | # state_seq[i, 0] = utils.get_excess(self.pv_profile, self.wt_profile, self.load_profile, t+i) 197 | # state_seq[i, 1] = self.price_profile['price'][t + i] 198 | 199 | state_fnn[0] = self.bat5_soc 200 | state_fnn[1] = self.bat10_soc 201 | 202 | return state_seq, state_fnn 203 | 204 | def is_converged(self, net) -> bool: 205 | return self.applied 206 | 207 | def learn(self): 208 | kl = 0. 209 | for _ in range(self.train_iters): 210 | state_seq_buffer, \ 211 | state_fnn_buffer, \ 212 | action_buffer, \ 213 | action_logprob_buffer, \ 214 | return_buffer, \ 215 | advantage_buffer, \ 216 | batches = self.buffer.sample(self.batch_size) 217 | 218 | # batch update 219 | for batch in batches: 220 | state_seq_batch = tf.convert_to_tensor(state_seq_buffer[batch], dtype=tf.float32) 221 | state_fnn_batch = tf.convert_to_tensor(state_fnn_buffer[batch], dtype=tf.float32) 222 | action_batch = tf.convert_to_tensor(action_buffer[batch], dtype=tf.float32) 223 | action_logprob_batch = tf.convert_to_tensor(action_logprob_buffer[batch], dtype=tf.float32) 224 | return_batch = tf.convert_to_tensor(return_buffer[batch], dtype=tf.float32) 225 | advantage_batch = tf.convert_to_tensor(advantage_buffer[batch], dtype=tf.float32) 226 | 227 | if kl < 1.5 * self.target_kl: 228 | actor_loss = self.update_actor(state_seq_batch, state_fnn_batch, action_batch, action_logprob_batch, advantage_batch) 229 | critic_loss = self.update_critic(state_seq_batch, state_fnn_batch, return_batch) 230 | 231 | # kl divergence 232 | if kl < 1.5 * self.target_kl: 233 | state_seq_buffer = tf.convert_to_tensor(state_seq_buffer, dtype=tf.float32) 234 | state_fnn_buffer = tf.convert_to_tensor(state_fnn_buffer, dtype=tf.float32) 235 | action_buffer = tf.convert_to_tensor(action_buffer, dtype=tf.float32) 236 | action_logprob_buffer = tf.convert_to_tensor(action_logprob_buffer, dtype=tf.float32) 237 | kl = self.cal_kl(state_seq_buffer, state_fnn_buffer, action_buffer, action_logprob_buffer) 238 | 239 | utils.log_actor_critic_info(actor_loss, critic_loss) 240 | self.buffer.clear() 241 | 242 | def load(self, run=1): 243 | self.obs_norm.load(dir=os.path.join('.', 'rms', 'PPO', self.sequence_model_type, str(run))) 244 | self.load_models(dir=os.path.join('.', 'model_weights', 'PPO', self.sequence_model_type, str(run))) 245 | 246 | def load_models(self, dir): 247 | print('... Loading Models ...') 248 | self.actor.load_weights(os.path.join(dir, 'actor_weights')) 249 | self.critic.load_weights(os.path.join(dir, 'critic_weights')) 250 | 251 | def model_info(self): 252 | self.actor.summary() 253 | self.critic.summary() 254 | 255 | def policy(self, net, state): 256 | state_seq, state_fnn = self.obs_norm.normalize(state, update=False) 257 | state_seq = tf.expand_dims(tf.convert_to_tensor(state_seq, dtype=tf.float32), axis=0) 258 | state_fnn = tf.expand_dims(tf.convert_to_tensor(state_fnn, dtype=tf.float32), axis=0) 259 | 260 | action_mean, action_std = self.actor([state_seq, state_fnn]) 261 | action_std = action_std if self.training else 0. 262 | action_dist = self.actor_dist(action_mean, action_std) 263 | nn_action = action_dist.sample() 264 | nn_action = np.clip(nn_action, -NN_BOUND, NN_BOUND) 265 | action_logprob = self.cal_logprob(action_dist, nn_action) 266 | state_value = self.critic([state_seq, state_fnn]) 267 | 268 | # remove batch dim 269 | nn_action = tf.squeeze(nn_action, axis=0).numpy() 270 | # reduce to single value 271 | action_logprob = tf.squeeze(action_logprob).numpy() 272 | state_value = tf.squeeze(state_value).numpy() 273 | 274 | # mg action 275 | p_b5_min = max(P_B5_MIN, (SOC_MIN - self.bat5_soc) * self.bat5_max_e_mwh / HOUR_PER_TIME_STEP) 276 | p_b5_max = min(P_B5_MAX, (SOC_MAX - self.bat5_soc) * self.bat5_max_e_mwh / HOUR_PER_TIME_STEP) 277 | p_b10_min = max(P_B10_MIN, (SOC_MIN - self.bat10_soc) * self.bat10_max_e_mwh / HOUR_PER_TIME_STEP) 278 | p_b10_max = min(P_B10_MAX, (SOC_MAX - self.bat10_soc) * self.bat10_max_e_mwh / HOUR_PER_TIME_STEP) 279 | 280 | # invalid action masking 281 | self.min_action[ACTION_IDX.get('p_b5')] = p_b5_min 282 | self.min_action[ACTION_IDX.get('p_b10')] = p_b10_min 283 | self.max_action[ACTION_IDX.get('p_b5')] = p_b5_max 284 | self.max_action[ACTION_IDX.get('p_b10')] = p_b10_max 285 | mg_action = utils.scale_to_mg(nn_action, self.min_action, self.max_action) 286 | 287 | return mg_action, nn_action, action_logprob, state_value 288 | 289 | def reset(self): 290 | # reset internal states 291 | self.state = None 292 | self.state_value = None 293 | self.action_logprob = None 294 | self.bat5_soc = 0. 295 | self.bat10_soc = 0. 296 | self.rewards = [] 297 | self.costs = [] 298 | self.last_time_step = None 299 | self.applied = False 300 | self.history = { 301 | 'price': [], 302 | 'excess': [], 303 | 'nn_bat5_p_mw': [], 304 | 'nn_bat10_p_mw': [], 305 | 'bat5_p_mw': [], 306 | 'bat10_p_mw': [], 307 | 'bat5_soc': [], 308 | 'bat10_soc': []} 309 | 310 | def save(self, run=1): 311 | self.obs_norm.save(dir=os.path.join('.', 'rms', 'PPO', self.sequence_model_type, str(run))) 312 | self.save_models(dir=os.path.join('.', 'model_weights', 'PPO', self.sequence_model_type, str(run))) 313 | 314 | def save_models(self, dir): 315 | print('... Saving Models ...') 316 | self.actor.save_weights(os.path.join(dir, 'actor_weights')) 317 | self.critic.save_weights(os.path.join(dir, 'critic_weights')) 318 | 319 | @tf.function 320 | def update_actor(self, state_seq_batch, state_fnn_batch, action_batch, action_logprob_batch, advantage_batch): 321 | with tf.GradientTape() as tape: 322 | action_means, action_stds = self.actor([state_seq_batch, state_fnn_batch]) 323 | action_dists = self.actor_dist(action_means, action_stds) 324 | action_logprobs = self.cal_logprob(action_dists, action_batch) 325 | prob_ratio = tf.math.exp(action_logprobs - action_logprob_batch) 326 | 327 | surrogate_obj = prob_ratio * advantage_batch 328 | clipped_surrogate_obj = tf.clip_by_value(prob_ratio, 1-self.policy_clip, 1+self.policy_clip) * advantage_batch 329 | actor_loss = -tf.math.reduce_mean( 330 | tf.math.minimum(surrogate_obj, clipped_surrogate_obj) 331 | ) 332 | actor_grads = tape.gradient(actor_loss, self.actor.trainable_variables) 333 | self.actor.optimizer.apply_gradients(zip(actor_grads, self.actor.trainable_variables)) 334 | 335 | return actor_loss 336 | 337 | @tf.function 338 | def update_critic(self, state_seq_batch, state_fnn_batch, return_batch): 339 | huber_loss = keras.losses.Huber() 340 | with tf.GradientTape() as tape: 341 | critic_values = self.critic([state_seq_batch, state_fnn_batch]) 342 | critic_loss = huber_loss(return_batch, critic_values) 343 | critic_grads = tape.gradient(critic_loss, self.critic.trainable_variables) 344 | self.critic.optimizer.apply_gradients(zip(critic_grads, self.critic.trainable_variables)) 345 | 346 | return critic_loss 347 | 348 | @tf.function 349 | def update_sequence_model(self, state_seq_batch, state_fnn_batch, return_batch): 350 | huber_loss = keras.losses.Huber() 351 | with tf.GradientTape() as tape: 352 | critic_values = self.critic([state_seq_batch, state_fnn_batch]) 353 | critic_loss = huber_loss(return_batch, critic_values) 354 | seq_grads = tape.gradient(critic_loss, self.sequence_model.trainable_variables) 355 | seq_grads = [tf.clip_by_norm(g, 1.0) for g in seq_grads] 356 | self.sequence_model.optimizer.apply_gradients(zip(seq_grads, self.sequence_model.trainable_variables)) 357 | 358 | def time_step(self, net, t): 359 | # action selection 360 | self.state = self.get_state(net, t) 361 | mg_action, nn_action, self.action_logprob, self.state_value = self.policy(net, self.state) 362 | self.bat5_p_mw, self.bat10_p_mw = mg_action 363 | self.action = nn_action 364 | # utils.log_trans_info(self.state, nn_action, t) 365 | 366 | # history 367 | self.history['price'].append(round(self.price_profile['price'][t], 3)) 368 | excess = net.res_sgen['p_mw'].sum() - net.res_load['p_mw'].sum() 369 | self.history['excess'].append(round(excess, 3)) 370 | self.history['nn_bat5_p_mw'].append(round(self.action[ACTION_IDX['p_b5']], 3)) 371 | self.history['nn_bat10_p_mw'].append(round(self.action[ACTION_IDX['p_b10']], 3)) 372 | self.history['bat5_p_mw'].append(round(self.bat5_p_mw, 3)) 373 | self.history['bat10_p_mw'].append(round(self.bat10_p_mw, 3)) 374 | self.history['bat5_soc'].append(round(self.bat5_soc, 3)) 375 | self.history['bat10_soc'].append(round(self.bat10_soc, 3)) 376 | 377 | self.applied = False 378 | self.last_time_step = t -------------------------------------------------------------------------------- /controllers/td3_controller.py: -------------------------------------------------------------------------------- 1 | ''' 2 | class: 3 | - TD3Agent 4 | - adapt_param_noise() 5 | - adjust_action_noise() 6 | - calculate_distance() 7 | - calculate_reward() 8 | - control_step() 9 | - get_state() 10 | - is_converged() 11 | - learn() 12 | - load_models() 13 | - model_info() 14 | - perturb_policy() 15 | - policy() 16 | - reset() 17 | - save_models() 18 | - update_actor() 19 | - update_critics() 20 | - update_sequence_model() 21 | - update_target_networks() 22 | - time_step() 23 | ''' 24 | 25 | import logging 26 | import os 27 | from typing import Dict 28 | import numpy as np 29 | 30 | import tensorflow as tf 31 | import tensorflow.keras as keras 32 | from tensorflow.keras.optimizers import Adam 33 | 34 | from pandapower.control.basic_controller import Controller 35 | from controllers.models import ActorMuModel, CriticQModel, SequenceModel, get_mu_actor, get_q_critic 36 | from controllers.buffer import ReplayBuffer, PrioritizedReplayBuffer 37 | from setting import * 38 | import utils 39 | 40 | class TD3Agent(Controller): 41 | def __init__(self, net, ids, pv_profile, wt_profile, load_profile, price_profile, 42 | noise_type = 'action', sequence_model_type='none', use_pretrained_sequence_model=False, 43 | n_epochs=None, training=False, 44 | delay=2, gamma=GAMMA, lr_actor=LR_ACTOR, lr_critic=LR_CRITIC, 45 | buffer_size=50000, batch_size=128, epsilon_p=0.001, **kwargs): 46 | super().__init__(net, **kwargs) 47 | self.ids = ids 48 | self.pv_profile = pv_profile 49 | self.wt_profile = wt_profile 50 | self.load_profile = load_profile 51 | self.price_profile = price_profile 52 | self.state_seq_shape = STATE_SEQ_SHAPE 53 | self.state_fnn_shape = STATE_FNN_SHAPE 54 | self.n_action = N_ACTION 55 | self.use_pretrained_sequence_model = use_pretrained_sequence_model 56 | self.training = training 57 | self.noise_type = noise_type 58 | self.action_noise_scale = ACTION_NOISE_SCALE 59 | self.action_noise_scale_ = ACTION_NOISE_SCALE 60 | self.param_noise_adapt_rate = PARAM_NOISE_ADAPT_RATE 61 | self.param_noise_bound = PARAM_NOISE_BOUND 62 | self.param_noise_scale = PARAM_NOISE_SCALE 63 | self.n_epochs = n_epochs 64 | self.update_freq = UPDATE_FREQ 65 | self.update_times = UPDATE_TIMES 66 | self.warmup = WARMUP 67 | self.delay = delay 68 | self.gamma = gamma 69 | self.batch_size = batch_size 70 | self.epsilon_p = epsilon_p 71 | 72 | # counter 73 | self.time_step_counter = 0 74 | self.learn_step_counter = 0 75 | 76 | # normalization 77 | self.obs_norm = utils.NormalizeObservation() 78 | self.a_norm = utils.NormalizeAction() 79 | self.r_norm = utils.NormalizeReward() 80 | 81 | # action bounds 82 | self.max_action = MAX_ACTION 83 | self.min_action = MIN_ACTION 84 | 85 | # generator 86 | # self.mgt5_id = ids.get('mgt5') 87 | # self.mgt5_p_mw = net.sgen.at[self.mgt5_id, 'p_mw'] 88 | # self.mgt9_id = ids.get('mgt9') 89 | # self.mgt9_p_mw = net.sgen.at[self.mgt9_id, 'p_mw'] 90 | # self.mgt10_id = ids.get('mgt10') 91 | # self.mgt10_p_mw = net.sgen.at[self.mgt10_id, 'p_mw'] 92 | 93 | # battery 94 | self.bat5_id = ids.get('bat5') 95 | self.bat5_p_mw = net.storage.at[self.bat5_id, 'p_mw'] 96 | self.bat5_max_e_mwh = net.storage.at[self.bat5_id, 'max_e_mwh'] 97 | self.bat10_id = ids.get('bat10') 98 | self.bat10_p_mw = net.storage.at[self.bat10_id, 'p_mw'] 99 | self.bat10_max_e_mwh = net.storage.at[self.bat10_id, 'max_e_mwh'] 100 | 101 | # internal states 102 | self.prev_state = None 103 | self.bat5_soc = 0. 104 | self.bat10_soc = 0. 105 | self.action = None 106 | self.rewards = [] 107 | self.costs = [] 108 | self.history = { 109 | 'price': [], 110 | # 'mgt5_p_mw': [round(self.mgt5_p_mw, 3)], 111 | # 'mgt9_p_mw': [round(self.mgt9_p_mw, 3)], 112 | # 'mgt10_p_mw': [round(self.mgt10_p_mw, 3)], 113 | 'excess': [], 114 | 'nn_bat5_p_mw': [], 115 | 'nn_bat10_p_mw': [], 116 | 'bat5_p_mw': [], 117 | 'bat10_p_mw': [], 118 | 'bat5_soc': [], 119 | 'bat10_soc': []} 120 | self.last_time_step = None 121 | self.applied = False 122 | 123 | # other elements 124 | self.pv3_id = ids.get('pv3') 125 | self.pv4_id = ids.get('pv4') 126 | self.pv5_id = ids.get('pv5') 127 | self.pv6_id = ids.get('pv6') 128 | self.pv8_id = ids.get('pv8') 129 | self.pv9_id = ids.get('pv9') 130 | self.loadr1_id = ids.get('load_r1') 131 | self.loadr3_id = ids.get('Load_r3') 132 | self.loadr4_id = ids.get('Load_r4') 133 | self.loadr5_id = ids.get('Load_r5') 134 | self.loadr6_id = ids.get('Load_r6') 135 | self.loadr8_id = ids.get('Load_r8') 136 | self.loadr10_id = ids.get('Load_r10') 137 | self.loadr11_id = ids.get('Load_r11') 138 | self.trafo0_id = ids.get('trafo0') # PCC trafo 139 | 140 | # buffer 141 | # self.buffer = ReplayBuffer(buffer_size, self.state_seq_shape, self.state_fnn_shape, self.n_action) 142 | self.buffer = PrioritizedReplayBuffer(buffer_size, self.state_seq_shape, self.state_fnn_shape, self.n_action) 143 | 144 | # models 145 | self.sequence_model_type = sequence_model_type 146 | if self.sequence_model_type == 'none': 147 | # actor critic 148 | self.actor = get_mu_actor(SequenceModel(sequence_model_type, name='sequence_model'), ActorMuModel(self.n_action)) 149 | self.perturbed_actor = get_mu_actor(SequenceModel(sequence_model_type, name='sequence_model'), ActorMuModel(self.n_action)) 150 | self.target_actor = get_mu_actor(SequenceModel(sequence_model_type, name='sequence_model'), ActorMuModel(self.n_action)) 151 | self.critic1 = get_q_critic(SequenceModel(sequence_model_type, name='sequence_model'), CriticQModel()) 152 | self.critic2 = get_q_critic(SequenceModel(sequence_model_type, name='sequence_model'), CriticQModel()) 153 | self.target_critic1 = get_q_critic(SequenceModel(sequence_model_type, name='sequence_model'), CriticQModel()) 154 | self.target_critic2 = get_q_critic(SequenceModel(sequence_model_type, name='sequence_model'), CriticQModel()) 155 | else: 156 | # sequence model 157 | self.sequence_model = SequenceModel(sequence_model_type, name='sequence_model') 158 | self.sequence_model.compile(optimizer=Adam(learning_rate=lr_critic, epsilon=1e-5)) 159 | 160 | # actor critic 161 | self.actor = get_mu_actor(self.sequence_model, ActorMuModel(self.n_action)) 162 | self.perturbed_actor = get_mu_actor(self.sequence_model, ActorMuModel(self.n_action)) 163 | self.target_actor = get_mu_actor(self.sequence_model, ActorMuModel(self.n_action)) 164 | self.critic1 = get_q_critic(self.sequence_model, CriticQModel()) 165 | self.critic2 = get_q_critic(self.sequence_model, CriticQModel()) 166 | self.target_critic1 = get_q_critic(self.sequence_model, CriticQModel()) 167 | self.target_critic2 = get_q_critic(self.sequence_model, CriticQModel()) 168 | 169 | self.actor.compile(optimizer=Adam(learning_rate=lr_actor, epsilon=1e-5)) 170 | self.critic1.compile(optimizer=Adam(learning_rate=lr_critic, epsilon=1e-5)) 171 | self.critic2.compile(optimizer=Adam(learning_rate=lr_critic, epsilon=1e-5)) 172 | 173 | # initialization 174 | if self.training: 175 | if self.use_pretrained_sequence_model: 176 | file_path = os.path.join('.', 'pretrained_sequence_model', self.sequence_model_type, 'pretrained_sequence_model_weights.hdf5') 177 | self.sequence_model.load_weights(file_path, by_name=True) 178 | self.perturbed_actor.set_weights(self.actor.get_weights()) 179 | self.update_target_networks(tau=1) 180 | 181 | def adapt_param_noise(self, d): 182 | if d <= self.param_noise_bound: 183 | self.param_noise_scale *= self.param_noise_adapt_rate 184 | else: 185 | self.param_noise_scale /= self.param_noise_adapt_rate 186 | 187 | def adjust_action_noise(self, adjust=True): 188 | if not adjust: 189 | return 190 | else: 191 | self.action_noise_scale -= self.action_noise_scale_ / self.n_epochs 192 | self.action_noise_scale = max(self.action_noise_scale, 0.1) 193 | 194 | # distance for parameter noise 195 | @tf.function 196 | def calculate_distance(self, state_seq_batch, state_fnn_batch): 197 | actions = self.actor([state_seq_batch, state_fnn_batch]) 198 | perturbed_actions = self.perturbed_actor([state_seq_batch, state_fnn_batch]) 199 | d = tf.math.square(actions - perturbed_actions) 200 | d = tf.math.reduce_mean(d) 201 | d = tf.math.sqrt(d) 202 | return d 203 | 204 | def calculate_reward(self, net, t): 205 | price = self.price_profile['price'][t - 1] 206 | cost, normalized_cost = utils.cal_cost( 207 | price=price, 208 | pcc_p_mw=-net.res_trafo.at[self.trafo0_id, 'p_lv_mw'], 209 | # mgt5_p_mw=self.mgt5_p_mw, 210 | # mgt9_p_mw=self.mgt9_p_mw, 211 | # mgt10_p_mw=self.mgt10_p_mw, 212 | bat5_soc_now=self.bat5_soc, 213 | bat5_soc_prev=self.prev_state[1][0], 214 | bat10_soc_now=self.bat10_soc, 215 | bat10_soc_prev=self.prev_state[1][1], 216 | # ids=self.ids, 217 | # t=t, 218 | # net=net 219 | ) 220 | reward = -normalized_cost 221 | 222 | # invalid action penalty 223 | # nn_bat_p_mw = self.action * np.array([P_B5_MAX, P_B10_MAX]) 224 | # valid_bat_p_mw = np.array([self.bat5_p_mw, self.bat10_p_mw]) 225 | # extra_reward = utils.extra_reward(nn_bat_p_mw, valid_bat_p_mw) 226 | # reward += extra_reward 227 | 228 | return cost, reward 229 | 230 | def control_step(self, net): 231 | # net.sgen.at[self.mgt5_id, 'p_mw'] = self.mgt5_p_mw 232 | # net.sgen.at[self.mgt9_id, 'p_mw'] = self.mgt9_p_mw 233 | # net.sgen.at[self.mgt10_id, 'p_mw'] = self.mgt10_p_mw 234 | net.storage.at[self.bat5_id, 'p_mw'] = self.bat5_p_mw 235 | net.storage.at[self.bat10_id, 'p_mw'] = self.bat10_p_mw 236 | self.applied = True 237 | 238 | def finalize_step(self, net, t): 239 | super().finalize_step(net, t) 240 | 241 | # next time step 242 | t += 1 243 | 244 | # update soc 245 | self.bat5_soc += self.bat5_p_mw * HOUR_PER_TIME_STEP / self.bat5_max_e_mwh 246 | self.bat10_soc += self.bat10_p_mw * HOUR_PER_TIME_STEP / self.bat10_max_e_mwh 247 | 248 | # observe transition 249 | state = self.get_state(net, t) 250 | cost, reward = self.calculate_reward(net, t) 251 | self.rewards.append(reward) 252 | self.costs.append(cost) 253 | 254 | if self.training: 255 | # store transition 256 | normalized_prev_state = self.obs_norm.normalize(self.prev_state) 257 | normalized_state = self.obs_norm.normalize(state) 258 | normalized_action = self.a_norm.normalize(self.action) 259 | normalized_reward = self.r_norm.normalize(reward) 260 | self.buffer.store_transition(normalized_prev_state[0], normalized_prev_state[1], normalized_action, normalized_reward, normalized_state[0], normalized_state[1]) 261 | 262 | # update networks 263 | self.learn() 264 | 265 | def get_state(self, net, t): 266 | state_seq = np.zeros(self.state_seq_shape) 267 | state_fnn = np.zeros(self.state_fnn_shape) 268 | 269 | for i in range(SEQ_LENGTH): 270 | state_seq[i, 0] = self.pv_profile['pv3'][t + i] 271 | state_seq[i, 1] = self.pv_profile['pv4'][t + i] 272 | state_seq[i, 2] = self.pv_profile['pv5'][t + i] 273 | state_seq[i, 3] = self.pv_profile['pv6'][t + i] 274 | state_seq[i, 4] = self.pv_profile['pv8'][t + i] 275 | state_seq[i, 5] = self.pv_profile['pv9'][t + i] 276 | state_seq[i, 6] = self.pv_profile['pv10'][t + i] 277 | state_seq[i, 7] = self.pv_profile['pv11'][t + i] 278 | state_seq[i, 8] = self.wt_profile['wt7'][t + i] 279 | state_seq[i, 9] = self.load_profile['load_r1'][t + i] 280 | state_seq[i, 10] = self.load_profile['load_r3'][t + i] 281 | state_seq[i, 11] = self.load_profile['load_r4'][t + i] 282 | state_seq[i, 12] = self.load_profile['load_r5'][t + i] 283 | state_seq[i, 13] = self.load_profile['load_r6'][t + i] 284 | state_seq[i, 14] = self.load_profile['load_r8'][t + i] 285 | state_seq[i, 15] = self.load_profile['load_r10'][t + i] 286 | state_seq[i, 16] = self.load_profile['load_r11'][t + i] 287 | # state_seq[i, 17] = utils.get_excess(self.pv_profile, self.wt_profile, self.load_profile, t+i) 288 | state_seq[i, 17] = self.price_profile['price'][t + i] 289 | 290 | # state_seq[i, 0] = utils.get_excess(self.pv_profile, self.wt_profile, self.load_profile, t+i) 291 | # state_seq[i, 1] = self.price_profile['price'][t + i] 292 | 293 | state_fnn[0] = self.bat5_soc 294 | state_fnn[1] = self.bat10_soc 295 | 296 | return state_seq, state_fnn 297 | 298 | def is_converged(self, net) -> bool: 299 | return self.applied 300 | 301 | def learn(self): 302 | if self.buffer.buffer_counter < self.batch_size: 303 | return 304 | 305 | if self.buffer.buffer_counter < self.warmup: 306 | return 307 | 308 | if self.time_step_counter % self.update_freq != 0: 309 | return 310 | 311 | for _ in range(self.update_times): 312 | self.update() 313 | 314 | def load_models(self, dir='model_weights', run=1): 315 | print('... Loading Models ...') 316 | self.actor.load_weights(os.path.join(dir, self.sequence_model_type, str(run), 'actor_weights')) 317 | self.critic1.load_weights(os.path.join(dir, self.sequence_model_type, str(run), 'critic1_weights')) 318 | self.critic2.load_weights(os.path.join(dir, self.sequence_model_type, str(run), 'critic2_weights')) 319 | self.target_actor.load_weights(os.path.join(dir, self.sequence_model_type, str(run), 'target_actor_weights')) 320 | self.target_critic1.load_weights(os.path.join(dir, self.sequence_model_type, str(run), 'target_critic1_weights')) 321 | self.target_critic2.load_weights(os.path.join(dir, self.sequence_model_type, str(run), 'target_critic2_weights')) 322 | 323 | def model_info(self): 324 | self.actor.summary() 325 | self.critic1.summary() 326 | 327 | @tf.function 328 | def perturb_policy(self): 329 | # if self.sequence_model_type == 'none': 330 | # perturbed_actor_weights = self.perturbed_actor.trainable_weights 331 | # actor_weights = self.actor.trainable_weights 332 | # else: 333 | # perturbed_actor_weights = self.perturbed_actor.get_layer('actor_mu_model_1').trainable_weights 334 | # actor_weights = self.actor.get_layer('actor_mu_model').trainable_weights 335 | perturbed_actor_weights = self.perturbed_actor.get_layer('actor_mu_model_1').trainable_weights 336 | actor_weights = self.actor.get_layer('actor_mu_model').trainable_weights 337 | 338 | for (perturbed_weights, weights) in zip(perturbed_actor_weights, actor_weights): 339 | perturbed_weights.assign(weights + tf.random.normal(shape=tf.shape(weights), mean=0., stddev=self.param_noise_scale)) 340 | 341 | def policy(self, net, t, state): 342 | # network outputs 343 | if self.time_step_counter < self.warmup and self.training: 344 | # warmup 345 | nn_action = np.random.uniform(low=-NN_BOUND, high=NN_BOUND, size=(self.n_action,)) 346 | else: 347 | state_seq, state_fnn = self.obs_norm.normalize(state, update=False) 348 | # add batch index 349 | tf_state_seq = tf.expand_dims(tf.convert_to_tensor(state_seq, dtype=tf.float32), axis=0) 350 | tf_state_fnn = tf.expand_dims(tf.convert_to_tensor(state_fnn, dtype=tf.float32), axis=0) 351 | 352 | if self.training: 353 | # param noise 354 | if self.noise_type == 'param': 355 | tf_action = self.perturbed_actor([tf_state_seq, tf_state_fnn], training=self.training) 356 | tf_action = tf.squeeze(tf_action, axis=0) # remove batch index 357 | nn_action = tf_action.numpy() 358 | # action noise 359 | else: 360 | tf_action = self.actor([tf_state_seq, tf_state_fnn], training=self.training) 361 | tf_action = tf.squeeze(tf_action, axis=0) # remove batch index 362 | nn_action = tf_action.numpy() 363 | if t % 100 == 0: 364 | print(f'nn outputs = {nn_action}') 365 | nn_action += np.random.normal(loc=0., scale=self.action_noise_scale, size=(self.n_action,)) 366 | # testing 367 | else: 368 | tf_action = self.actor([tf_state_seq, tf_state_fnn], training=self.training) 369 | tf_action = tf.squeeze(tf_action, axis=0) # remove batch index 370 | nn_action = tf_action.numpy() 371 | nn_action = np.clip(nn_action, -NN_BOUND, NN_BOUND) 372 | 373 | # mg action 374 | p_b5_min = max(P_B5_MIN, (SOC_MIN - self.bat5_soc) * self.bat5_max_e_mwh / HOUR_PER_TIME_STEP) 375 | p_b5_max = min(P_B5_MAX, (SOC_MAX - self.bat5_soc) * self.bat5_max_e_mwh / HOUR_PER_TIME_STEP) 376 | p_b10_min = max(P_B10_MIN, (SOC_MIN - self.bat10_soc) * self.bat10_max_e_mwh / HOUR_PER_TIME_STEP) 377 | p_b10_max = min(P_B10_MAX, (SOC_MAX - self.bat10_soc) * self.bat10_max_e_mwh / HOUR_PER_TIME_STEP) 378 | 379 | # invalid action clipping 380 | # mgt5_p_mw, mgt9_p_mw, mgt10_p_mw, bat5_p_mw, bat10_p_mw = utils.scale_to_mg(nn_action, self.max_action, self.min_action) 381 | # bat5_p_mw = np.clip(bat5_p_mw, p_b5_min, p_b5_max) 382 | # bat10_p_mw = np.clip(bat10_p_mw, p_b10_min, p_b10_max) 383 | 384 | # invalid action masking 385 | self.min_action[ACTION_IDX.get('p_b5')] = p_b5_min 386 | self.min_action[ACTION_IDX.get('p_b10')] = p_b10_min 387 | self.max_action[ACTION_IDX.get('p_b5')] = p_b5_max 388 | self.max_action[ACTION_IDX.get('p_b10')] = p_b10_max 389 | bat5_p_mw, bat10_p_mw = utils.scale_to_mg(nn_action, self.min_action, self.max_action) 390 | 391 | mg_action = np.array([bat5_p_mw, bat10_p_mw]) 392 | self.time_step_counter += 1 393 | return mg_action, nn_action 394 | 395 | def reset(self): 396 | # init states 397 | # self.mgt5_p_mw = 0. 398 | # self.mgt9_p_mw = 0. 399 | # self.mgt10_p_mw = 0. 400 | self.bat5_p_mw = 0. 401 | self.bat10_p_mw = 0. 402 | self.prev_state = None 403 | self.bat5_soc = 0. 404 | self.bat10_soc = 0. 405 | self.action = None 406 | self.rewards = [] 407 | self.costs = [] 408 | self.history = { 409 | 'price': [], 410 | # 'mgt5_p_mw': [round(self.mgt5_p_mw, 3)], 411 | # 'mgt9_p_mw': [round(self.mgt9_p_mw, 3)], 412 | # 'mgt10_p_mw': [round(self.mgt10_p_mw, 3)], 413 | 'excess': [], 414 | 'nn_bat5_p_mw': [], 415 | 'nn_bat10_p_mw': [], 416 | 'bat5_p_mw': [], 417 | 'bat10_p_mw': [], 418 | 'bat5_soc': [], 419 | 'bat10_soc': []} 420 | self.last_time_step = None 421 | self.applied = False 422 | 423 | if not self.training: 424 | return 425 | 426 | # parameter scheduling 427 | if self.noise_type == 'action': 428 | self.adjust_action_noise() 429 | self.buffer.schedule_beta(beta_inc=0.01) 430 | 431 | def save_models(self, dir='model_weights', run=1): 432 | print('... Saving Models ...') 433 | self.actor.save_weights(os.path.join(dir, self.sequence_model_type, str(run), 'actor_weights')) 434 | self.critic1.save_weights(os.path.join(dir, self.sequence_model_type, str(run), 'critic1_weights')) 435 | self.critic2.save_weights(os.path.join(dir, self.sequence_model_type, str(run), 'critic2_weights')) 436 | self.target_actor.save_weights(os.path.join(dir, self.sequence_model_type, str(run), 'target_actor_weights')) 437 | self.target_critic1.save_weights(os.path.join(dir, self.sequence_model_type, str(run), 'target_critic1_weights')) 438 | self.target_critic2.save_weights(os.path.join(dir, self.sequence_model_type, str(run), 'target_critic2_weights')) 439 | 440 | def update(self): 441 | # sample 442 | state_seq_batch, state_fnn_batch, action_batch, reward_batch, next_state_seq_batch, next_state_fnn_batch, idxs, weights = self.buffer.sample(self.batch_size) 443 | state_seq_batch = tf.convert_to_tensor(state_seq_batch, dtype=tf.float32) 444 | state_fnn_batch = tf.convert_to_tensor(state_fnn_batch, dtype=tf.float32) 445 | action_batch = tf.convert_to_tensor(action_batch, dtype=tf.float32) 446 | reward_batch = tf.convert_to_tensor(reward_batch, dtype=tf.float32) 447 | next_state_seq_batch = tf.convert_to_tensor(next_state_seq_batch, dtype=tf.float32) 448 | next_state_fnn_batch = tf.convert_to_tensor(next_state_fnn_batch, dtype=tf.float32) 449 | weights = tf.convert_to_tensor(weights, dtype=tf.float32) 450 | 451 | # update critics 452 | critic_loss, target_values, td_errs = self.update_critics(state_seq_batch, state_fnn_batch, action_batch, reward_batch, next_state_seq_batch, next_state_fnn_batch, weights) 453 | self.update_priority(td_errs, idxs) 454 | self.learn_step_counter += 1 455 | 456 | # update sequence model 457 | if (self.sequence_model_type != 'none') and (not self.use_pretrained_sequence_model): 458 | self.update_sequence_model(state_seq_batch, state_fnn_batch, action_batch, target_values) 459 | 460 | if self.learn_step_counter % self.delay != 0: 461 | return 462 | 463 | # update actor 464 | actor_loss = self.update_actor(state_seq_batch, state_fnn_batch) 465 | 466 | # parameter noise 467 | if self.noise_type == 'param': 468 | self.perturb_policy() 469 | d = self.calculate_distance(state_seq_batch, state_fnn_batch) 470 | self.adapt_param_noise(d) 471 | 472 | # update targets 473 | self.update_target_networks() 474 | 475 | @tf.function 476 | def update_actor(self, state_seq_batch, state_fnn_batch): 477 | # trainable variables 478 | if self.sequence_model_type == 'none': 479 | actor_vars = self.actor.trainable_variables 480 | else: 481 | actor_vars = self.actor.get_layer('actor_mu_model').trainable_variables 482 | 483 | # gradient descent 484 | with tf.GradientTape() as tape: 485 | actions = self.actor([state_seq_batch, state_fnn_batch], training=True) 486 | actions = self.a_norm.tf_normalize(actions) 487 | q_values = self.critic1([state_seq_batch, state_fnn_batch, actions], training=True) 488 | actor_loss = -tf.math.reduce_mean(q_values) 489 | actor_grads = tape.gradient(actor_loss, actor_vars) 490 | self.actor.optimizer.apply_gradients(zip(actor_grads, actor_vars)) 491 | 492 | return actor_loss 493 | 494 | @tf.function 495 | def update_critics(self, state_seq_batch, state_fnn_batch, action_batch, reward_batch, next_state_seq_batch, next_state_fnn_batch, weights): 496 | # Issue: https://github.com/tensorflow/tensorflow/issues/35928 497 | # with tf.GradientTape(persistent=True) as tape: 498 | 499 | # target actions 500 | target_actions = self.target_actor([next_state_seq_batch, next_state_fnn_batch], training=True) 501 | target_actions += tf.clip_by_value(tf.random.normal(shape=(self.batch_size, self.n_action), stddev=0.2), -0.5, 0.5) 502 | target_actions = tf.clip_by_value(target_actions, -NN_BOUND, NN_BOUND) 503 | target_actions = self.a_norm.tf_normalize(target_actions) 504 | 505 | # target values 506 | target_q_value1 = self.target_critic1([next_state_seq_batch, next_state_fnn_batch, target_actions], training=True) 507 | target_q_value2 = self.target_critic2([next_state_seq_batch, next_state_fnn_batch, target_actions], training=True) 508 | target_values = reward_batch + self.gamma * tf.math.minimum(target_q_value1, target_q_value2) 509 | 510 | # td errors 511 | td_errs = target_values - self.critic1([state_seq_batch, state_fnn_batch, action_batch]) 512 | 513 | # trainable variables 514 | if self.sequence_model_type == 'none': 515 | critic1_vars = self.critic1.trainable_variables 516 | critic2_vars = self.critic2.trainable_variables 517 | else: 518 | critic1_vars = self.critic1.get_layer('critic_q_model').trainable_variables 519 | critic2_vars = self.critic2.get_layer('critic_q_model_1').trainable_variables 520 | 521 | huber_loss = keras.losses.Huber() 522 | # update critic model 1 523 | with tf.GradientTape() as tape1: 524 | critic_loss1 = huber_loss(weights*target_values, weights*self.critic1([state_seq_batch, state_fnn_batch, action_batch], training=True)) 525 | critic_grads1 = tape1.gradient(critic_loss1, critic1_vars) 526 | self.critic1.optimizer.apply_gradients(zip(critic_grads1, critic1_vars)) 527 | 528 | # update critic model 2 529 | with tf.GradientTape() as tape2: 530 | critic_loss2 = huber_loss(weights*target_values, weights*self.critic2([state_seq_batch, state_fnn_batch, action_batch], training=True)) 531 | critic_grads2 = tape2.gradient(critic_loss2, critic2_vars) 532 | self.critic2.optimizer.apply_gradients(zip(critic_grads2, critic2_vars)) 533 | 534 | return critic_loss1, target_values, td_errs 535 | 536 | def update_priority(self, td_errs, idxs): 537 | priorities = np.abs(td_errs.numpy().flatten()) + self.epsilon_p 538 | for idx, p in zip(idxs, priorities): 539 | self.buffer.update_tree(idx, p) 540 | 541 | @tf.function 542 | def update_sequence_model(self, state_seq_batch, state_fnn_batch, action_batch, target_values): 543 | huber_loss = keras.losses.Huber() 544 | with tf.GradientTape() as tape: 545 | critic_loss = huber_loss(target_values, self.critic1([state_seq_batch, state_fnn_batch, action_batch], training=True)) 546 | critic_loss += huber_loss(target_values, self.critic2([state_seq_batch, state_fnn_batch, action_batch], training=True)) 547 | critic_loss /= (2 * SEQ_LENGTH) 548 | seq_grads = tape.gradient(critic_loss, self.sequence_model.trainable_variables) 549 | seq_grads = [tf.clip_by_norm(g, 1.0) for g in seq_grads] 550 | self.sequence_model.optimizer.apply_gradients(zip(seq_grads, self.sequence_model.trainable_variables)) 551 | 552 | @tf.function 553 | def update_target_networks(self, tau=0.005): 554 | if self.sequence_model_type == 'none': 555 | target_actor_weights = self.target_actor.trainable_weights 556 | actor_weights = self.actor.trainable_weights 557 | target_critic1_weights = self.target_critic1.trainable_weights 558 | critic1_weights = self.critic1.trainable_weights 559 | target_critic2_weights = self.target_critic2.trainable_weights 560 | critic2_weights = self.critic2.trainable_weights 561 | else: 562 | target_actor_weights = self.target_actor.get_layer('actor_mu_model_2').trainable_weights 563 | actor_weights = self.actor.get_layer('actor_mu_model').trainable_weights 564 | target_critic1_weights = self.target_critic1.get_layer('critic_q_model_2').trainable_weights 565 | critic1_weights = self.critic1.get_layer('critic_q_model').trainable_weights 566 | target_critic2_weights = self.target_critic2.get_layer('critic_q_model_3').trainable_weights 567 | critic2_weights = self.critic2.get_layer('critic_q_model_1').trainable_weights 568 | 569 | # update target actor 570 | for target_weight, weight in zip(target_actor_weights, actor_weights): 571 | target_weight.assign(tau * weight + (1 - tau) * target_weight) 572 | 573 | # update target critic1 574 | for target_weight, weight in zip(target_critic1_weights, critic1_weights): 575 | target_weight.assign(tau * weight + (1 - tau) * target_weight) 576 | 577 | # update target critic2 578 | for target_weight, weight in zip(target_critic2_weights, critic2_weights): 579 | target_weight.assign(tau * weight + (1 - tau) * target_weight) 580 | 581 | def time_step(self, net, t): 582 | # action 583 | state = self.get_state(net, t) 584 | mg_action, nn_action = self.policy(net, t, state) 585 | self.bat5_p_mw, self.bat10_p_mw = mg_action 586 | self.action = nn_action 587 | self.prev_state = state 588 | if self.training: 589 | utils.log_trans_info(state, mg_action, t+5, freq=50) 590 | 591 | # history 592 | self.history['price'].append(round(self.price_profile['price'][t], 3)) 593 | excess = net.res_sgen['p_mw'].sum() - net.res_load['p_mw'].sum() 594 | self.history['excess'].append(round(excess, 3)) 595 | # self.history['mgt5_p_mw'].append(round(self.mgt5_p_mw, 5)) 596 | # self.history['mgt9_p_mw'].append(round(self.mgt9_p_mw, 5)) 597 | # self.history['mgt10_p_mw'].append(round(self.mgt10_p_mw, 5)) 598 | self.history['nn_bat5_p_mw'].append(round(self.action[ACTION_IDX['p_b5']], 3)) 599 | self.history['nn_bat10_p_mw'].append(round(self.action[ACTION_IDX['p_b10']], 3)) 600 | self.history['bat5_p_mw'].append(round(self.bat5_p_mw, 3)) 601 | self.history['bat10_p_mw'].append(round(self.bat10_p_mw, 3)) 602 | self.history['bat5_soc'].append(round(self.bat5_soc, 3)) 603 | self.history['bat10_soc'].append(round(self.bat10_soc, 3)) 604 | 605 | self.applied = False 606 | self.last_time_step = t --------------------------------------------------------------------------------