├── .gitignore ├── IW_baselines └── IW_ablation.py ├── LICENSE ├── README.md ├── agents ├── base.py ├── inverter_policy.py └── nn_policy.py ├── algo └── ppo.py ├── data ├── ARX-0 ├── data_2017_baseline.pkl ├── data_TMY3_baseline.pkl └── param_IW-nn-1800 ├── docs ├── figs │ ├── framework.pdf │ └── framework.png └── slides.pdf ├── env └── inverter.py ├── environment.yml ├── inverter_baselines ├── inverter_QP.py ├── inverter_acopf.py ├── inverter_no-control.py └── inverter_volt-var.py ├── main_IW.py ├── main_inverter.py ├── mypypower └── newtonpf.py ├── network ├── IEEE-37 │ └── Ybus.mat ├── IEEE-37_linearized │ ├── B.mat │ └── R.mat ├── bracket.m ├── extract_phase_37feeder.m └── ieee37.m ├── run_exp1.sh └── utils ├── inverter_utils.py ├── network.py └── ppo_utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | data/Irradiance_1sec.mat 2 | data/Loads_1sec.mat 3 | runs/ 4 | 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | pip-wheel-metadata/ 28 | share/python-wheels/ 29 | *.egg-info/ 30 | .installed.cfg 31 | *.egg 32 | MANIFEST 33 | 34 | # PyInstaller 35 | # Usually these files are written by a python script from a template 36 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 37 | *.manifest 38 | *.spec 39 | 40 | # Installer logs 41 | pip-log.txt 42 | pip-delete-this-directory.txt 43 | 44 | # Unit test / coverage reports 45 | htmlcov/ 46 | .tox/ 47 | .nox/ 48 | .coverage 49 | .coverage.* 50 | .cache 51 | nosetests.xml 52 | coverage.xml 53 | *.cover 54 | *.py,cover 55 | .hypothesis/ 56 | .pytest_cache/ 57 | 58 | # Translations 59 | *.mo 60 | *.pot 61 | 62 | # Django stuff: 63 | *.log 64 | local_settings.py 65 | db.sqlite3 66 | db.sqlite3-journal 67 | 68 | # Flask stuff: 69 | instance/ 70 | .webassets-cache 71 | 72 | # Scrapy stuff: 73 | .scrapy 74 | 75 | # Sphinx documentation 76 | docs/_build/ 77 | 78 | # PyBuilder 79 | target/ 80 | 81 | # Jupyter Notebook 82 | .ipynb_checkpoints 83 | 84 | # IPython 85 | profile_default/ 86 | ipython_config.py 87 | 88 | # pyenv 89 | .python-version 90 | 91 | # pipenv 92 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 93 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 94 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 95 | # install all needed dependencies. 96 | #Pipfile.lock 97 | 98 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 99 | __pypackages__/ 100 | 101 | # Celery stuff 102 | celerybeat-schedule 103 | celerybeat.pid 104 | 105 | # SageMath parsed files 106 | *.sage.py 107 | 108 | # Environments 109 | .env 110 | .venv 111 | venv/ 112 | #ENV/ 113 | env.bak/ 114 | venv.bak/ 115 | 116 | # Spyder project settings 117 | .spyderproject 118 | .spyproject 119 | 120 | # Rope project settings 121 | .ropeproject 122 | 123 | # mkdocs documentation 124 | /site 125 | 126 | # mypy 127 | .mypy_cache/ 128 | .dmypy.json 129 | dmypy.json 130 | 131 | # Pyre type checker 132 | .pyre/ 133 | -------------------------------------------------------------------------------- /IW_baselines/IW_ablation.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | 3 | import gym 4 | import eplus_env 5 | 6 | import warnings 7 | warnings.filterwarnings("ignore", category=UserWarning) 8 | 9 | import argparse 10 | import numpy as np 11 | import pandas as pd 12 | import copy 13 | import pickle 14 | import pdb 15 | 16 | import torch 17 | import torch.nn as nn 18 | import torch.nn.functional as F 19 | import torch.utils.data as data 20 | import torch.optim as optim 21 | from torch.distributions import MultivariateNormal, Normal 22 | from torch.utils.tensorboard import SummaryWriter 23 | 24 | main_path = os.path.abspath(os.path.join(__file__, '..')) 25 | sys.path.insert(0, main_path) 26 | 27 | from algo.ppo import PPO 28 | from agents.nn_policy import NeuralController 29 | from utils.network import LSTM 30 | from utils.ppo_utils import make_dict, R_func, Advantage_func, Replay_Memory 31 | 32 | DEVICE = "cuda" if torch.cuda.is_available() else "cpu" 33 | DEVICE 34 | 35 | parser = argparse.ArgumentParser(description='Gnu-RL: Online Learning') 36 | parser.add_argument('--gamma', type=float, default=0.9, metavar='G', 37 | help='discount factor (default: 0.9)') 38 | parser.add_argument('--seed', type=int, default=42, metavar='N', 39 | help='random seed (default: 42)') 40 | parser.add_argument('--lr', type=float, default=5e-4, metavar='G', 41 | help='Learning Rate') 42 | parser.add_argument('--lam', type=int, default=10, metavar='N', 43 | help='random seed (default: 42)') 44 | parser.add_argument('--epsilon', type=float, default=0.2, metavar='G', help='PPO Clip Parameter') 45 | parser.add_argument('--update_episode', type=int, default=4, metavar='N', 46 | help='PPO update episode (default: 1); If -1, do not update weights') 47 | parser.add_argument('--T', type=int, default=12, metavar='N', 48 | help='Planning Horizon (default: 12)') 49 | parser.add_argument('--step', type=int, default=300*3, metavar='N', 50 | help='Time Step in Simulation, Unit in Seconds (default: 900)') # 15 Minutes Now! 51 | parser.add_argument('--exp_name', type=str, default='vanilla_update', 52 | help='save name') 53 | parser.add_argument('--eta', type=int, default=3, 54 | help='Hyper Parameter for Balancing Comfort and Energy') 55 | parser.add_argument('--model_no', type = int, default = 1800, help = '') 56 | args = parser.parse_args() 57 | 58 | def update_parameters(agent, sigma=0.1, K = 4): 59 | loader = agent._get_training_samples() 60 | for i in range(K): 61 | for states, actions, disturbance, advantages, old_logprobs, x_uppers, x_lowers in loader: 62 | n_batch = states.shape[0] 63 | # pdb.set_trace() 64 | mu, _ = agent.policy.nn(state, disturbance, disturbance.transpose(0, 1))#, x_lowers = x_lowers, x_uppers = x_uppers)# T x n x n_action 65 | #mus, sigma_sqs, proj_loss = self.policy.forward(states, ) # x, u: T x N x Dim. 66 | sigma_sqs = torch.ones_like(mus) * sigma**2 67 | 68 | log_probs, entropies = agent.evaluate_action(mus[0], actions, sigma_sqs) 69 | 70 | ratio = torch.exp(log_probs.squeeze()-old_logprobs) 71 | surr1 = ratio * advantages 72 | surr2 = torch.clamp(ratio, 1-agent.clip_param, 1+agent.clip_param) * advantages 73 | loss = -torch.min(surr1, surr2).mean() 74 | agent.optimizer.zero_grad() 75 | ## Auxiliary losses 76 | loss -= torch.mean(entropies) * 0.01 77 | loss += agent.lam * proj_loss 78 | 79 | loss.backward() 80 | nn.utils.clip_grad_norm_(agent.policy.nn.parameters(), 100) 81 | self.optimizer.step() 82 | self.policy_old.nn.load_state_dict(self.policy.nn.state_dict()) 83 | 84 | def main(): 85 | torch.manual_seed(args.seed) 86 | writer = SummaryWriter(comment = args.exp_name) 87 | 88 | # Create Simulation Environment 89 | env = gym.make('Eplus-IW-test-v0') 90 | 91 | # Specify variable names for control problem 92 | obs_name = ["Outdoor Temp.", "Outdoor RH", "Wind Speed", "Wind Direction", "Diff. Solar Rad.", "Direct Solar Rad.", "HW Enable OA Setpoint", "IW Average PPD", "HW Supply Setpoint", "Indoor Air Temp.", "Indoor Temp. Setpoint", "Occupancy Flag", "Heating Demand"] 93 | state_name = ["Indoor Air Temp."] 94 | dist_name = ["Outdoor Temp.", "Outdoor RH", "Wind Speed", "Wind Direction", "Diff. Solar Rad.", "Direct Solar Rad.", "Occupancy Flag"] 95 | ctrl_name = ["HW Enable OA Setpoint", "HW Supply Setpoint"] 96 | target_name = ["Indoor Temp. Setpoint"] 97 | dist_name = dist_name + target_name 98 | 99 | n_state = len(state_name) 100 | n_ctrl = 1 #len(ctrl_name) 101 | n_dist = len(dist_name) 102 | eta = [0.1, args.eta] # eta: Weight for comfort during unoccupied and occupied mode 103 | step = args.step # step: Timestep; Unit in seconds 104 | T = args.T # T: Number of timesteps in the planning horizon 105 | tol_eps = 91 # tol_eps: Total number of episodes; Each episode is a natural day 106 | 107 | # Read Information on Weather, Occupancy, and Target Setpoint 108 | obs_2017 = pd.read_pickle("data/data_2017_baseline.pkl") 109 | disturbance = obs_2017[dist_name] 110 | # Min-Max Normalization 111 | obs_TMY3 = pd.read_pickle("data/data_TMY3_baseline.pkl") # For Min-Max Normalization Only 112 | dist_min = obs_TMY3[dist_name].min() 113 | dist_max = obs_TMY3[dist_name].max() 114 | disturbance = (disturbance - dist_min)/(dist_max - dist_min) 115 | state_min = obs_TMY3[state_name].min().values 116 | state_max = obs_TMY3[state_name].max().values 117 | memory = Replay_Memory() 118 | 119 | ## Load pretrained LSTM policy weights 120 | ''' 121 | Expects all states, actions, and disturbances are MinMaxNormalized; (Based on TMY3 data) 122 | The LSTM also expects "setpoint" as part of the disturbance term. 123 | ''' 124 | network = LSTM(n_state, n_ctrl, n_dist) 125 | network.load_state_dict(torch.load("data/param_IW-nn-{}".format(args.model_no))) 126 | 127 | ## Load thermodynamics model to construct the polytope 128 | ''' 129 | New model also expects states, actions, and disturbances to be MinMaxNormalized 130 | ''' 131 | model_dict ={'a': np.array([0.934899]), 132 | 'bu': np.array([0.024423]), 133 | 'bd': np.array([5.15795080e-02, -6.92141185e-04, -1.21103548e-02, 134 | 2.38717578e-03, -3.52816030e-03, 3.32528746e-03, 7.19267820e-03]), 135 | 'Pm': 1 # Upper bound of u; 136 | } 137 | policy = NeuralController(T, step, network, RC_flag = False, **model_dict) 138 | agent = PPO(policy, memory, lr = args.lr, clip_param = args.epsilon, lam = args.lam) 139 | 140 | dir = 'results' 141 | if not os.path.exists(dir): 142 | os.mkdir(dir) 143 | 144 | multiplier = 1 # Normalize the reward for better training performance 145 | n_step = 96 #timesteps per day 146 | 147 | sigma = 0.1 148 | sigma_min = 0.01 149 | sigma_step = (sigma-sigma_min) * args.update_episode/tol_eps 150 | 151 | timeStep, obs, isTerminal = env.reset() 152 | start_time = pd.datetime(year = env.start_year, month = env.start_mon, day = env.start_day) 153 | cur_time = start_time 154 | obs_dict = make_dict(obs_name, obs) 155 | 156 | # Save for record 157 | timeStamp = [start_time] 158 | observations = [obs] 159 | actions_taken = [] 160 | 161 | for i_episode in range(tol_eps): 162 | ## Save for Parameter Updates 163 | rewards = [] 164 | real_rewards = [] 165 | 166 | for t in range(n_step): 167 | state = np.array([obs_dict[name] for name in state_name]) 168 | state = (state-state_min)/(state_max-state_min) 169 | 170 | x_upper = obs_2017['x_upper'][cur_time : cur_time + pd.Timedelta(seconds = (T-1) * step)].values 171 | x_lower = obs_2017['x_lower'][cur_time : cur_time + pd.Timedelta(seconds = (T-1) * step)].values 172 | ## Margin 173 | #x_lower+=0.025 174 | #x_upper-=0.025 175 | 176 | x_upper = (x_upper-state_min)/(state_max-state_min) 177 | x_lower = (x_lower-state_min)/(state_max-state_min) 178 | 179 | dt = disturbance[cur_time : cur_time + pd.Timedelta(seconds = (T-1) * step)].values # T x n_dist 180 | 181 | ## Update the model in the controller 182 | # CVXPY expects np.array for parameters 183 | agent.policy_old.updateState(state, x_lower = x_lower, x_upper = x_upper, d = dt[:, :-1]) 184 | agent.memory.x_lowers.append(torch.tensor(x_lower).float()) 185 | agent.memory.x_uppers.append(torch.tensor(x_upper).float()) 186 | 187 | state = torch.tensor(state).unsqueeze(0).float() # 1 x n_state 188 | dt = torch.tensor(dt).float() 189 | agent.memory.states.append(state) 190 | agent.memory.disturbance.append(dt) 191 | 192 | ## Use policy_old to select action 193 | #mu, sigma_sq, _ = agent.forward(state, dt.unsqueeze(1), current = False) # mu, sigma_sq: T x 1 x Dim. 194 | mu, _ = agent.policy_old.nn(state, dt.unsqueeze(1))# T x n x n_action 195 | sigma_sq = torch.ones_like(mu) * sigma**2 196 | ''' 197 | ## Myopic Limit: A hack to make sure the projected actions do not result in tiny violations 198 | margin = 0.1/(state_max-state_min) 199 | u_limits = np.array([x_lower[0]+margin.item(), x_upper[0]-margin.item()]) - model_dict['a'] * state.item() - model_dict['bd'].dot(dt[0, :-1].numpy()) 200 | u_limits /= model_dict['bu'] 201 | u_limits = np.clip(u_limits, 0, 1) 202 | #pdb.set_trace() 203 | ''' 204 | action, old_logprob = agent.select_action(mu[0], sigma_sq[0], u_limits = None) 205 | agent.memory.actions.append(action.detach().clone()) 206 | agent.memory.old_logprobs.append(old_logprob.detach().clone()) 207 | 208 | ## Project without Gradient 209 | mu[0] = action 210 | mu = mu.squeeze().detach() 211 | #pdb.set_trace() 212 | try: 213 | action_feasible = agent.policy_old.proj_layer(state[0], dt[:, :-1], 214 | mu, torch.zeros_like(mu), torch.zeros_like(mu), 215 | torch.tensor(x_upper).float(), 216 | torch.tensor(x_lower).float(), 217 | torch.tensor(agent.policy_old.u_upper.value).float(), 218 | torch.tensor(agent.policy_old.u_lower.value).float() 219 | ) 220 | action = action_feasible[0][0] 221 | except: 222 | ## The feasible set is empty; Use some heuristics 223 | sp = np.mean((x_lower+x_upper)/2) 224 | if state.item() < sp: 225 | action = torch.tensor([1]) 226 | else: 227 | action = torch.tensor([0]) 228 | #pdb.set_trace() 229 | 230 | 231 | SWT = 20 + 45 * action.item() 232 | if (SWT<30): 233 | HWOEN = -30 # De Facto Off 234 | action = torch.zeros_like(action) 235 | SWT = 20 236 | else: 237 | HWOEN = 30 # De Facto On 238 | if np.isnan(SWT): 239 | SWT = 20 240 | action4env = (HWOEN, SWT) 241 | 242 | # Before step 243 | print(f'{cur_time}: IAT={obs_dict["Indoor Air Temp."]}, Occupied={obs_dict["Occupancy Flag"]}, Control={SWT}') 244 | for _ in range(3): 245 | timeStep, obs, isTerminal = env.step(action4env) 246 | 247 | obs_dict = make_dict(obs_name, obs) 248 | reward = R_func(obs_dict, SWT-20, eta) 249 | 250 | # Per step 251 | real_rewards.append(reward) 252 | ''' 253 | bl = obs_2017['rewards'][cur_time] 254 | rewards.append((reward-bl) / multiplier) 255 | ''' 256 | bl = 0#obs_2017['rewards'][cur_time] 257 | rewards.append((reward-bl) / 15) # multiplier 258 | # print(f'Reward={reward}, BL={bl}') 259 | # Save for record 260 | cur_time = start_time + pd.Timedelta(seconds = timeStep) 261 | timeStamp.append(cur_time) 262 | observations.append(obs) 263 | actions_taken.append(action4env) 264 | 265 | writer.add_scalar('Reward', np.mean(real_rewards), i_episode) 266 | writer.add_scalar('Reward_Diff', np.mean(rewards), i_episode) 267 | print("{}, reward: {}".format(cur_time, np.mean(real_rewards))) 268 | 269 | advantages = Advantage_func(rewards, args.gamma) 270 | agent.memory.advantages.append(advantages) 271 | 272 | # if -1, do not update parameters 273 | if args.update_episode == -1: 274 | agent.memory.clear_memory() # Prevent memory overflow 275 | elif (i_episode >0) & (i_episode % args.update_episode ==0): 276 | agent.update_parameters(sigma = sigma, K = 8) 277 | sigma = max(sigma_min, sigma-sigma_step) 278 | 279 | obs_df = pd.DataFrame(np.array(observations), index = np.array(timeStamp), columns = obs_name) 280 | obs_df = obs_df.drop(columns=ctrl_name) 281 | action_df = pd.DataFrame(np.array(actions_taken), index = np.array(timeStamp[:-1]), columns = ctrl_name) 282 | obs_df = obs_df.merge(action_df, how = 'left', right_index = True, left_index = True) 283 | obs_df.to_pickle("results/obs_"+args.exp_name+".pkl") 284 | 285 | if __name__ == '__main__': 286 | main() 287 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Bingqing Chen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PROF: Projected Feasibility 2 | 3 | This is the official repository that implements the following paper: 4 | 5 | > *Chen, Bingqing, Priya Donti, Kyri Baker, J. Zico Kolter, and Mario Berges. "Enforcing Policy Feasibility Constraints through Differentiable Projection for Energy Optimization." In Proceedings of the Twelfth ACM International Conference on Future Energy Systems (e-energy '21). pp. 199–210. 2021.* 6 | 7 | [[slides]](docs/slides.pdf)[[paper]](https://dl.acm.org/doi/10.1145/3447555.3464874) 8 | 9 | # Overview 10 | 11 | PROjected Feasibility (PROF) is a method to enforce convex operational constraints within neural policies, by incorporating a differentiable projection layer within a neural network-based policy to enforce that all learned actions are feasible. We then update the policy end-to-end by propagating gradients through this differentiable projection layer, making the policy cognizant of the operational constraints. The result is a powerful neural policy that can flexibly optimize performance on the true underlying dynamics, while still satisfying the specified constraints. 12 | 13 | We demonstrate our method on two applications: energy-efficient building operation and inverter control. 14 | - In the building control case, PROF outperforms other RL agents, while maintaining temperature within the deadband except when the control is saturated. 15 | - In the inverter control setting, PROF satisfies the constraints 100% of the time and minimizes curtailment as well as possible within its conservative safe set after learning safely for half a day. 16 | 17 | **Framework.** 18 | 19 | 20 | 21 | 22 | 23 | # Code Usage 24 | ### Clone repository 25 | ``` 26 | git clone https://github.com/INFERLab/PROF.git 27 | cd PROF 28 | ``` 29 | 30 | ### Set up the environment 31 | Set up the virtual environment with your preferred environment/package manager. 32 | 33 | The instruction here is based on **conda**. ([Install conda](https://docs.anaconda.com/anaconda/install/)) 34 | ``` 35 | conda env create --file environment.yml 36 | condo activate nn-w-proj 37 | ``` 38 | 39 | ### File Structure 40 | ``` 41 | . 42 | ├── agents 43 | │ ├── base.py # Implement a controller that instantiate the projection problem given building parameters 44 | │ └── nn_policy.py # Inherit the controller from base.py; Forward pass: NN + Differentiable projection 45 | │ └── inverter_policy.py # Policy for inverter: NN + Differentiable projection 46 | ├── algo 47 | │ └── ppo.py # A PPO trainer 48 | ├── env 49 | │ └── inverter.py # Implements the IEEE 37-bus case 50 | ├── utils 51 | │ ├── network.py # Implements neural network modules, e.g. MLP and LSTM 52 | │ └── ppo_utils.py # Helper function for PPO trainer, e.g. Replay_Memory, Advantage_func 53 | ├── network # Matlab code for linearizing grid model; Data to construct IEEE 37-bus case; 54 | └── mypypower # Include some small changes from PyPower source code to allow customization 55 | 56 | ``` 57 | 58 | ### Running 59 | You can replicate our experiments for *Experiment 1: Energy-efficient Building Operation* with `main_IW.py` and *Experiment 2: Inverter Control* with `main_inverter.py` 60 | 61 | 62 | ### Feedback 63 | 64 | Feel free to send any questions/feedback to: [Bingqing Chen](mailto:bingqinc@andrew.cmu.edu) 65 | 66 | ### Citation 67 | 68 | If you use PROF, please cite us as follows: 69 | 70 | ``` 71 | @inproceedings{chen2021enforcing, 72 | author = {Chen, Bingqing and Donti, Priya L. and Baker, Kyri and Kolter, J. Zico and Berg\'{e}s, Mario}, 73 | title = {Enforcing Policy Feasibility Constraints through Differentiable Projection for Energy Optimization}, 74 | year = {2021}, 75 | isbn = {9781450383332}, 76 | publisher = {Association for Computing Machinery}, 77 | address = {New York, NY, USA}, 78 | url = {https://doi.org/10.1145/3447555.3464874}, 79 | doi = {10.1145/3447555.3464874}, 80 | booktitle = {Proceedings of the Twelfth ACM International Conference on Future Energy Systems}, 81 | pages = {199–210}, 82 | numpages = {12}, 83 | keywords = {inverter control, safe reinforcement learning, implicit layers, differentiable optimization, smart building}, 84 | location = {Virtual Event, Italy}, 85 | series = {e-Energy '21} 86 | } 87 | ``` 88 | -------------------------------------------------------------------------------- /agents/base.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cvxpy as cp 3 | import pdb 4 | 5 | class Controller(): 6 | def __init__(self, T, dt, RC_flag = True, **kwargs): 7 | # dt: planning timestep 8 | # T: planning horizon 9 | # RC_flag: Whether using RC model 10 | # **kwargs: Model Parameters 11 | self.T = T 12 | self.RC_flag = RC_flag 13 | self.err_count = 0 14 | 15 | if RC_flag: 16 | ## RC model: Simulation Study 17 | self.R = kwargs["R"] 18 | self.C = kwargs["C"] 19 | self.Pm = kwargs["Pm"] 20 | self.eta = kwargs["eta"] 21 | self.T_sp = kwargs["theta"] 22 | self.Delta = kwargs["Delta"] 23 | self.sign = kwargs["sign"] #(+) for heating and (-) for cooling 24 | else: 25 | ## ARX model: Hardware-in-the-loop Simulation 26 | self.ap = kwargs["a"] 27 | self.bu = kwargs["bu"] 28 | self.bd = kwargs["bd"] 29 | self.p = len(self.ap) 30 | self.m = len(self.bu) # how many u_prev to consider 31 | self.n_dist = len(self.bd) 32 | self.Pm = kwargs["Pm"] 33 | self.T_sp = 75 34 | self.Delta = 1.8 35 | 36 | # Variable 37 | self.u = cp.Variable(T) 38 | 39 | # Save u_i-u_bar from previous time step 40 | self.u_diff = cp.Parameter(T) 41 | self.v_bar = cp.Parameter(T) 42 | self.w_bar = cp.Parameter(T) 43 | self.objective = cp.sum_squares(self.u-self.u_diff-self.v_bar+self.w_bar) 44 | 45 | ## Info needed for constraints 46 | if RC_flag: 47 | self.x0 = cp.Parameter() 48 | self.d = cp.Parameter(T) 49 | else: 50 | ## Expects [x_{t-p}, ..., x_t] 51 | self.x0 = cp.Parameter(self.p) 52 | self.d = cp.Parameter((T, self.n_dist)) 53 | 54 | # Set default value for constraints 55 | self.u_lower = cp.Parameter(T) 56 | self.u_lower.value = np.tile(0, T) 57 | self.u_upper = cp.Parameter(T) 58 | self.u_upper.value = np.tile(self.Pm, T) 59 | self.x_lower = cp.Parameter(T) 60 | self.x_lower.value = np.tile(self.T_sp-self.Delta, T) 61 | self.x_upper = cp.Parameter(T) 62 | self.x_upper.value = np.tile(self.T_sp+self.Delta, T) 63 | 64 | 65 | if RC_flag: 66 | a = np.exp(-dt/(self.R*self.C)) 67 | b = self.eta * self.R 68 | 69 | lam = np.logspace(1, T, num = T, base = a) 70 | Lam = np.zeros((T, T)) 71 | for i in range(T): 72 | for j in range(i+1): 73 | Lam[i, j] = a**(i-j) 74 | B = np.eye(T)*b*(1-a)*self.Pm 75 | self.d.value = (1-a)*np.tile(32, T) 76 | else: 77 | A = np.eye(self.T) 78 | for i in range(self.T-1): 79 | A[i+1, max(0, i+1-self.p):i+1] = -np.flip(self.ap)[-(i+1):] 80 | Lam = np.linalg.inv(A) 81 | 82 | lam = np.zeros((self.T, self.p)) 83 | for i in range(self.p): 84 | lam[i, i:] = np.flip(self.ap)[:self.p-i] 85 | 86 | ## note: missing the term on u_{t-1} 87 | B = np.zeros((self.T, self.T)) 88 | 89 | for i in range(self.m): 90 | B += np.diag(np.ones(T-i), -i)*self.bu[i]/self.Pm 91 | 92 | self.d.value = np.zeros((T, self.n_dist)) 93 | 94 | # Constraints 95 | self.constraints = [-self.u <= -self.u_lower, 96 | self.u <= self.u_upper] 97 | if RC_flag: 98 | self.constraints += [-Lam@(self.sign*(1-a)*b*self.u+self.d) <= -self.x_lower + lam*self.x0, 99 | Lam@(self.sign*(1-a)*b*self.u+self.d) <= self.x_upper - lam*self.x0] 100 | else: 101 | self.constraints += [-Lam@(B@self.u + self.d@self.bd + lam@self.x0) <= -self.x_lower, 102 | Lam@(B@self.u + self.d@self.bd + lam@self.x0) <= self.x_upper] 103 | 104 | self.Problem = cp.Problem(cp.Minimize(self.objective), 105 | self.constraints) 106 | 107 | def u_update(self, v_bar, w_bar): 108 | self.v_bar.value = v_bar 109 | self.w_bar.value = w_bar 110 | try: 111 | self.Problem.solve() 112 | except: 113 | print("Solver failed") 114 | self.u.value = None 115 | 116 | ## Check solution valid 117 | if self.u.value is not None: 118 | return self.u.value, self.Problem.status 119 | else: 120 | u = (self.x0.value-self.T_sp)/self.Delta 121 | self.err_count += 1 122 | return np.ones(self.T)*np.clip(u, 0, 1)*self.Pm, self.Problem.status 123 | 124 | def updateState(self, x, u_lower = None, u_upper = None, 125 | x_lower = None, x_upper = None, 126 | d = None): # 127 | self.x0.value = x 128 | 129 | # Update constraints if necessary 130 | if u_lower is not None: 131 | if isinstance(u_lower, int) | isinstance(u_lower, float): 132 | self.u_lower.value = np.tile(u_lower, self.T) 133 | else: 134 | assert len(u_lower) == self.T 135 | self.u_lower.value = u_lower 136 | if u_upper is not None: 137 | if isinstance(u_upper, int) | isinstance(u_upper, float): 138 | self.u_upper.value = np.tile(u_upper, self.T) 139 | else: 140 | assert len(u_upper) == self.T 141 | self.u_upper.value = u_upper 142 | if x_lower is not None: 143 | assert len(x_lower) == self.T 144 | self.x_lower.value = x_lower 145 | if x_upper is not None: 146 | assert len(x_upper) == self.T 147 | self.x_upper.value = x_upper 148 | self.T_sp = (x_upper[0]+x_lower[0])/2 149 | self.Delta = (x_upper[0]-x_lower[0])/2 150 | 151 | ## Exog Variables 152 | if d is not None: 153 | assert len(d) == self.T 154 | self.d.value = d 155 | 156 | 157 | class ControllerGroup(): 158 | def __init__(self, T, dt, parameters, RC_flag = True): 159 | self.n_agent = len(parameters) 160 | self.T = T 161 | self.dt = dt 162 | self.RC_flag = RC_flag 163 | self.controller_list = self._init_agents(parameters) 164 | 165 | def _init_agents(self, parameters): 166 | controller_list = [] 167 | for param in parameters: 168 | controller_list.append(Controller(T = self.T, dt = self.dt, RC_flag = self.RC_flag, **param)) 169 | return controller_list 170 | 171 | def updateState(self, x_list, u_list = None, d_list = None, x_lower_list = None, x_upper_list = None): 172 | for idx, controller in enumerate(self.controller_list): 173 | controller.updateState(x_list[idx], d = d_list[idx] if d_list is not None else None, x_lower = x_lower_list[idx] if x_lower_list is not None else None, x_upper = x_upper_list[idx] if x_upper_list is not None else None) 174 | 175 | ## Initialize the controller with action from prev timestep 176 | if u_list is not None: 177 | u_bar = np.mean(u_list, axis = 0) 178 | controller.u_diff.value = u_list[idx] - u_bar 179 | else: 180 | controller.u_diff.value = np.zeros(self.T) 181 | 182 | def u_update(self, v_bar, w_bar): 183 | u_list = [] 184 | #print("v_bar", v_bar) 185 | #print("w_bar", w_bar.shape) 186 | for idx, controller in enumerate(self.controller_list): 187 | #print(idx) 188 | u_i, status = controller.u_update(v_bar, w_bar) 189 | if status in ["infeasible", "unbounded"]: 190 | print(idx, status) 191 | u_list.append(u_i) 192 | 193 | u_bar = np.mean(u_list, axis = 0) 194 | for idx, controller in enumerate(self.controller_list): 195 | controller.u_diff.value = u_list[idx] - u_bar 196 | return u_bar, np.array(u_list) 197 | 198 | -------------------------------------------------------------------------------- /agents/inverter_policy.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cvxpy as cp 3 | from cvxpylayers.torch import CvxpyLayer 4 | 5 | import torch 6 | import torch.nn as nn 7 | import torch.optim as optim 8 | from torch.distributions import MultivariateNormal, Normal 9 | from copy import deepcopy 10 | import operator 11 | from functools import reduce 12 | 13 | import pdb 14 | 15 | DEVICE = "cuda" if torch.cuda.is_available() else "cpu" 16 | 17 | 18 | ### Can move to utils.network if appropriate 19 | class Net(nn.Module): 20 | def __init__(self, n_bus, n_inverters, shared_hidden_layer_sizes, indiv_hidden_layer_sizes, n_input = 3): 21 | super(Net, self).__init__() 22 | #### Multi-headed architecture 23 | # "Shared" model 24 | # Set up non-linear network of Linear -> BatchNorm -> ReLU 25 | layer_sizes = [n_input * n_bus] + shared_hidden_layer_sizes[:-1] 26 | layers = reduce(operator.add, 27 | [[nn.Linear(a,b), nn.ReLU(), ] # nn.BatchNorm1d(b), nn.Dropout(p=0.2)] 28 | for a,b in zip(layer_sizes[0:-1], layer_sizes[1:])]) 29 | layers += [nn.Linear(layer_sizes[-1], shared_hidden_layer_sizes[-1])] 30 | self.base_net = nn.Sequential(*layers) 31 | 32 | # Individual inverter model 33 | layer_sizes = [shared_hidden_layer_sizes[-1]] + indiv_hidden_layer_sizes 34 | layers = reduce(operator.add, 35 | [[nn.Linear(a,b), nn.ReLU(), ] # nn.BatchNorm1d(b), nn.Dropout(p=0.2)] 36 | for a,b in zip(layer_sizes[0:-1], layer_sizes[1:])]) 37 | layers += [nn.Linear(layer_sizes[-1], 2)] # output p and q 38 | indiv_model = nn.Sequential(*layers) 39 | self.inverter_nets = nn.ModuleList( 40 | [deepcopy(indiv_model) for _ in range(n_inverters)] 41 | ) 42 | 43 | # ## Simple fully connected architecture 44 | 45 | # # Set up non-linear network of Linear -> BatchNorm -> ReLU -> Dropout layers 46 | # self.n_inverters = n_inverters 47 | # layer_sizes = [4 * n_inverters] + shared_hidden_layer_sizes 48 | # layers = reduce(operator.add, 49 | # [[nn.Linear(a,b), nn.BatchNorm1d(b), nn.ReLU(), nn.Dropout(p=0.2)] 50 | # for a,b in zip(layer_sizes[0:-1], layer_sizes[1:])]) 51 | # layers += [nn.Linear(layer_sizes[-1], 2 * n_inverters)] 52 | # self.nn = nn.Sequential(*layers) 53 | 54 | 55 | def forward(self, state): 56 | ''' 57 | Input: Vector of voltage magnitudes and angles, real and reactive power demand 58 | Output: Vector of inverter P setpoints, vector of inverter Q setpoints 59 | ''' 60 | 61 | # Multi-headed architecture 62 | z = self.base_net(state) 63 | res = [inverter(z) for inverter in self.inverter_nets] 64 | Ps = torch.cat([x[:, [0]] for x in res], dim=1) 65 | Qs = torch.cat([x[:, [1]] for x in res], dim=1) 66 | return Ps, Qs 67 | 68 | # ## Simple fully connected architecture 69 | # z = self.nn(state) 70 | # return z[:, :self.n_inverters], z[:, self.n_inverters:] 71 | 72 | class NeuralController(nn.Module): 73 | def __init__(self, network, memory, lr, lam = 10, scaler = 1000, **env_params): 74 | super(NeuralController, self).__init__() 75 | self.nn = network 76 | self.optimizer = optim.RMSprop(self.nn.parameters(), lr=lr) 77 | self.lam = lam 78 | self.memory = memory 79 | self.mse = nn.MSELoss() 80 | self.ReLU = nn.ReLU() 81 | 82 | self.n_bus = env_params['n_bus'] 83 | self.gen_idx = env_params['gen_idx'] 84 | self.other_idx = [i for i in range(self.n_bus) if i not in self.gen_idx] 85 | 86 | H = env_params['H'] 87 | R = H[:, :self.n_bus] 88 | B = H[:, self.n_bus:] 89 | R_new = np.vstack([np.hstack([R[self.gen_idx][:, self.gen_idx], 90 | R[self.gen_idx][:, self.other_idx]]), 91 | np.hstack([R[self.other_idx][:, self.gen_idx], 92 | R[self.other_idx][:, self.other_idx]]) 93 | ]) 94 | B_new = np.vstack([np.hstack([B[self.gen_idx][:, self.gen_idx], 95 | B[self.gen_idx][:, self.other_idx]]), 96 | np.hstack([B[self.other_idx][:, self.gen_idx], 97 | B[self.other_idx][:, self.other_idx]]) 98 | ]) 99 | H_new = np.hstack([R_new, B_new]) 100 | 101 | self.scaler = scaler 102 | self.V0 = env_params['V0'] 103 | self.P0 = env_params['P0'] 104 | self.Q0 = env_params['Q0'] 105 | self.V_upper = env_params['V_upper'] 106 | self.V_lower = env_params['V_lower'] 107 | self.S_rating = env_params['S_rating'] 108 | 109 | # Need to set as nn.Parameter such that to(DEVICE) move these to GPU as well 110 | self.V0_torch = nn.Parameter(torch.tensor(self.V0).float()) 111 | self.V_upper_torch = nn.Parameter(torch.tensor(self.V_upper).float()) 112 | self.V_lower_torch = nn.Parameter(torch.tensor(self.V_lower).float()) 113 | self.H_torch = nn.Parameter(torch.tensor(H_new).float()) 114 | self.P0_torch = nn.Parameter(torch.tensor(self.P0).float()) 115 | self.Q0_torch = nn.Parameter(torch.tensor(self.Q0).float()) 116 | self.S_rating_torch = nn.Parameter(torch.tensor(self.S_rating).float()) 117 | 118 | # Set up projection onto inverter setpoint constraints and linearized voltage constraints 119 | P = cp.Variable(len(self.gen_idx)) 120 | Q = cp.Variable(len(self.gen_idx)) 121 | 122 | # P_tilde and Q_tilde are the pre-projection actions 123 | P_tilde = cp.Parameter(len(self.gen_idx)) 124 | Q_tilde = cp.Parameter(len(self.gen_idx)) 125 | 126 | # No inverter buses 127 | P_nc = cp.Parameter(len(self.other_idx)) 128 | Q_nc = cp.Parameter(len(self.other_idx)) 129 | 130 | P_av = cp.Parameter(len(self.gen_idx)) 131 | 132 | # Voltage: Apply to All Buses 133 | z = cp.hstack([P, P_nc, Q, Q_nc]) # z: (70, ) 134 | constraints = [self.V_lower - self.V0 <= H_new@z, 135 | H_new@z <= self.V_upper - self.V0] 136 | 137 | ## Power: Only applies to Inverters 138 | PQ = cp.vstack([self.P0[self.gen_idx] + P, 139 | self.Q0[self.gen_idx] + Q]) # (2, n) 140 | constraints += [0 <= self.P0[self.gen_idx] + P, 141 | self.P0[self.gen_idx] + P <= P_av, 142 | cp.norm(PQ, axis = 0) <= self.S_rating] 143 | 144 | objective = cp.Minimize(cp.sum_squares(P - P_tilde) + cp.sum_squares(Q - Q_tilde)) 145 | problem = cp.Problem(objective, constraints) 146 | 147 | self.proj_layer = CvxpyLayer(problem, variables=[P, Q], 148 | parameters=[P_tilde, Q_tilde, 149 | P_nc, Q_nc, P_av]) 150 | 151 | self.proj_count = 0 152 | 153 | def forward(self, state, Sbus, P_av, inference_flag = True): 154 | ''' 155 | Input: 156 | state: [dV(k-1), P_nc, Q_nc] 157 | where, 158 | Z_nc = Z - Z0 159 | May get (n, dim) or (dim); 160 | Output: 161 | P, Q (with repsect to the reference point) 162 | ''' 163 | ## Get information for non-controllable loads 164 | P_all = Sbus.real /self.scaler 165 | Q_all = Sbus.imag /self.scaler 166 | if len(Sbus.shape)==1: 167 | P_nc = Sbus.real[self.other_idx] / self.scaler 168 | Q_nc = Sbus.imag[self.other_idx] / self.scaler 169 | elif len(Sbus.shape)==2: 170 | P_nc = Sbus.real[:, self.other_idx] / self.scaler 171 | Q_nc = Sbus.imag[:, self.other_idx] / self.scaler 172 | else: 173 | print("Well, not expected to happen") 174 | 175 | P_tilde, Q_tilde = self.nn(state.to(DEVICE)) # n x n_inverter 176 | 177 | ## During inference if the action is already feasible, not need to project 178 | if inference_flag: 179 | P_tilde = P_tilde.squeeze() 180 | Q_tilde = Q_tilde.squeeze() 181 | if self.is_feasible(P_tilde.detach().clone()/self.scaler, 182 | Q_tilde.detach().clone()/self.scaler, 183 | P_nc, Q_nc, P_av): 184 | P_all[self.gen_idx] = P_tilde.detach().cpu().numpy() / self.scaler 185 | Q_all[self.gen_idx] = Q_tilde.detach().cpu().numpy() / self.scaler 186 | return P_all, Q_all 187 | else: 188 | try: 189 | P, Q = self.proj_layer(P_tilde/self.scaler, Q_tilde/self.scaler, 190 | torch.tensor(P_nc).float().to(DEVICE), 191 | torch.tensor(Q_nc).float().to(DEVICE), 192 | torch.tensor(P_av).float().to(DEVICE)) 193 | self.proj_count += 1 194 | P_all[self.gen_idx] = P.detach().cpu().numpy() 195 | Q_all[self.gen_idx] = Q.detach().cpu().numpy() 196 | except: # The solver dies for some reason 197 | P_all[self.gen_idx] = 0 198 | Q_all[self.gen_idx] = 0 199 | return P_all, Q_all 200 | else: 201 | #pdb.set_trace() 202 | P, Q = self.proj_layer(P_tilde/self.scaler, Q_tilde/self.scaler, 203 | torch.tensor(P_nc).float().to(DEVICE), 204 | torch.tensor(Q_nc).float().to(DEVICE), 205 | torch.tensor(P_av).float().to(DEVICE)) 206 | proj_loss = self.mse(P.detach(), P_tilde/self.scaler) \ 207 | + self.mse(Q.detach(), Q_tilde/self.scaler) 208 | return P, Q, proj_loss 209 | 210 | def update(self, batch_size = 64, n_batch = 16): 211 | for _ in range(n_batch): 212 | state, Sbus, P_av = self.memory.sample_batch(batch_size = batch_size) 213 | P, Q, proj_loss = self.forward(state, Sbus, P_av, inference_flag = False) 214 | #pdb.set_trace() 215 | curtail = self.ReLU(torch.tensor(P_av).to(DEVICE) - P) 216 | loss = curtail.mean() + self.lam * proj_loss 217 | print(f'curtail = {curtail.mean().item()}, proj_loss = {proj_loss.item()}') 218 | 219 | self.optimizer.zero_grad() 220 | loss.backward() 221 | self.optimizer.step() 222 | 223 | def is_feasible(self, P, Q, P_nc, Q_nc, P_av): 224 | ''' 225 | Input: P, Q (n_bus) 226 | ''' 227 | eps = 1e-6 228 | assert P.ndimension() == 1 229 | 230 | z = torch.cat([P, torch.tensor(P_nc).float().to(DEVICE), 231 | Q, torch.tensor(Q_nc).float().to(DEVICE)], dim = -1) # (70) 232 | v = self.H_torch.matmul(z) # (35) 233 | 234 | if torch.any(v < self.V_lower_torch -self.V0_torch - eps) | torch.any(v > self.V_upper_torch-self.V0_torch+eps): 235 | return False 236 | 237 | P = P + self.P0_torch[self.gen_idx] 238 | Q = Q + self.Q0_torch[self.gen_idx] 239 | PQ = torch.stack([P, Q]) # (2, 21) 240 | if torch.any(torch.norm(PQ, dim = 0) > self.S_rating_torch + eps): 241 | return False 242 | 243 | if torch.any(P < 0-eps) | torch.any(P > torch.tensor(P_av).to(DEVICE)+eps): 244 | return False 245 | else: 246 | return True 247 | -------------------------------------------------------------------------------- /agents/nn_policy.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cvxpy as cp 3 | from cvxpylayers.torch import CvxpyLayer 4 | import pdb 5 | import torch 6 | import torch.nn as nn 7 | import torch.optim as optim 8 | from torch.distributions import MultivariateNormal, Normal 9 | 10 | from utils.network import MLP, LSTM 11 | from agents.base import Controller, ControllerGroup 12 | 13 | class NeuralController(Controller): 14 | def __init__(self, T, dt, network, RC_flag = True, 15 | **parameters): 16 | super().__init__(T, dt, RC_flag = RC_flag, **parameters) 17 | ## Inherited Properties: 18 | # cp.Variable: self.u 19 | # cp.Parameter: self.u_diff; self.v_bar; self.w_bar; 20 | # self.x0; self.d; 21 | # self.x_lower; self.x_upper; 22 | # self.u_lower; self.u_upper; 23 | # self.objective 24 | # self.Problem 25 | # self.constraints = [...] 26 | 27 | ## Inherited Methods: 28 | # updateState() 29 | 30 | ## Use ADMM update rule for the time being 31 | # u_update(self, v_bar, w_bar): 32 | 33 | ''' 34 | LSTM Usage: 35 | lstm = LSTM(n_state, n_action, n_dist) 36 | mu, sigma_sq = lstm.forward(state, disturbance) 37 | Input: 38 | state: n x dim 39 | disturbance: T x n x dist 40 | Output: 41 | mu, sigma_sq: T x n x n_action 42 | ''' 43 | self.nn = network 44 | self.proj_layer = CvxpyLayer(self.Problem, variables = [self.u], 45 | parameters = [self.x0, self.d, 46 | self.u_diff, self.v_bar, self.w_bar, 47 | self.x_upper, self.x_lower, 48 | self.u_upper, self.u_lower]) 49 | self.criterion = nn.MSELoss() # reduction = 'sum' 50 | 51 | def forward(self, state, disturbance, x_lowers = None, x_uppers = None, detach = False): 52 | ''' 53 | Input: 54 | state: (n, n_state) 55 | disturbance: (T, n, n_dist) 56 | x_lowers, x_uppers: (n, T) 57 | Output: 58 | actions, sigma_sq: (T, n, n_action) 59 | #proj_loss: scalar 60 | ''' 61 | T, n_batch, n_dist = disturbance.shape 62 | mus, sigma_sqs = self.nn(state, disturbance)# T x n x n_action 63 | 64 | actions = [] 65 | #TODO: Implement multi-threading 66 | for i in range(n_batch): 67 | mu = mus[:, i] # T x n_action 68 | 69 | if n_batch==1: 70 | if x_lowers is None: 71 | x_lower = torch.tensor(self.x_lower.value).float() 72 | if x_uppers is None: 73 | x_upper = torch.tensor(self.x_upper.value).float() 74 | 75 | else: 76 | x_lower = x_lowers[i] 77 | x_upper = x_uppers[i] 78 | 79 | # The last value is setpoint; Do not use for projection 80 | dt = disturbance[:, i, :-1] # T x n_dist 81 | x0 = state[i] 82 | mu = mu.squeeze(1) # T x 1 ->T 83 | 84 | try: 85 | u_pred = self.proj_layer(x0, dt, 86 | mu, torch.zeros_like(mu), torch.zeros_like(mu), 87 | x_upper, x_lower, 88 | torch.tensor(self.u_upper.value).float(), 89 | torch.tensor(self.u_lower.value).float()) 90 | actions.append(u_pred[0]) 91 | except: 92 | ## The feasible set is empty; Use some heuristics 93 | sp = torch.mean((x_lower+x_upper)/2) 94 | if x0.item() < sp: 95 | actions.append(torch.ones_like(mu)) 96 | else: 97 | actions.append(torch.zeros_like(mu)) 98 | 99 | actions = torch.stack(actions).transpose(0, 1) # T x n 100 | proj_loss = self.criterion(mus.squeeze(-1), actions) 101 | return actions.unsqueeze(-1), sigma_sqs, proj_loss 102 | 103 | class NeuralControllerGroup(ControllerGroup): 104 | def __init__(self, T, dt, parameters, RC_flag = True): 105 | super().__init__(T, dt, parameters, RC_flag = RC_flag) 106 | 107 | ## Inherited Methods: 108 | # updateState() 109 | # u_update() 110 | 111 | def _init_agents(self, parameters): 112 | controller_list = [] 113 | for param in parameters: 114 | controller_list.append(NeuralController(T = self.T, dt = self.dt, RC_flag = self.RC_flag, **param)) 115 | return controller_list 116 | 117 | def u_warmstart(self, x_list): 118 | u_inits = [] 119 | for idx, controller in enumerate(self.controller_list): 120 | u_pred = controller.forward(x_list[idx].reshape(1, -1)) # 1 x n_input 121 | u_inits.append(u_pred.detach().numpy()) 122 | return np.stack(u_inits) 123 | 124 | def append(self, states, u_stars): 125 | for idx, controller in enumerate(self.controller_list): 126 | controller.memory.append((states[idx], u_stars[idx])) 127 | 128 | def update_policy(self, batch_size = 32): 129 | losses = [] 130 | for idx, controller in enumerate(self.controller_list): 131 | loss = controller.update_policy(batch_size) 132 | losses.append(loss) 133 | return np.array(losses) 134 | 135 | -------------------------------------------------------------------------------- /algo/ppo.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import torch.utils.data as data 5 | import torch.optim as optim 6 | from torch.distributions import MultivariateNormal, Normal 7 | 8 | import pdb 9 | from copy import deepcopy 10 | 11 | from utils.ppo_utils import Dataset 12 | 13 | class PPO(): 14 | def __init__(self, policy, memory, clip_param = 0.2, lam = 10, lr = 5e-4, n_ctrl = 1): 15 | self.memory = memory 16 | 17 | self.policy = policy 18 | self.policy_old = deepcopy(policy) 19 | 20 | self.clip_param = clip_param 21 | self.optimizer = optim.RMSprop(self.policy.nn.parameters(), lr=lr) 22 | self.lam = lam 23 | 24 | self.n_ctrl = n_ctrl 25 | 26 | # Use the "current" flag to indicate which set of parameters to use 27 | def forward(self, state, disturbance, x_lowers = None, x_uppers = None, current = True): 28 | T, n_batch, n_dist = disturbance.shape 29 | if current == True: 30 | mu, sigma_sq, proj_loss = self.policy.forward(state, disturbance, x_lowers = x_lowers, x_uppers = x_uppers) 31 | else: 32 | mu, sigma_sq, proj_loss = self.policy_old.forward(state, disturbance) 33 | return mu, sigma_sq, proj_loss 34 | 35 | def select_action(self, mu, sigma_sq, u_limits = None): 36 | if self.n_ctrl > 1: 37 | m = MultivariateNormal(mu, torch.diag(sigma_sq.squeeze()).unsqueeze(0)) 38 | else: 39 | m = Normal(mu, sigma_sq**0.5) 40 | action = m.sample() 41 | if u_limits is not None: 42 | action = torch.clamp(action, min = u_limits[0], max = u_limits[1]) 43 | log_prob = m.log_prob(action) 44 | return action, log_prob 45 | 46 | def evaluate_action(self, mu, actions, sigma_sq): 47 | n_batch = len(mu) 48 | if self.n_ctrl > 1: 49 | cov = torch.diag_embed(sigma_sq) 50 | m = MultivariateNormal(mu, cov) 51 | else: 52 | m = Normal(mu, sigma_sq**0.5) 53 | log_prob = m.log_prob(actions) 54 | entropy = m.entropy() 55 | return log_prob, entropy 56 | 57 | def _get_training_samples(self): 58 | states, actions, disturbance, advantages, old_logprobs, x_uppers, x_lowers = self.memory.sample() 59 | batch_set = Dataset(states, actions, disturbance, advantages, old_logprobs, x_uppers, x_lowers) 60 | batch_loader = data.DataLoader(batch_set, batch_size=32, shuffle=True, num_workers=2) 61 | return batch_loader 62 | 63 | def update_parameters(self, sigma=0.1, K = 4): 64 | loader = self._get_training_samples() 65 | for i in range(K): 66 | for states, actions, disturbance, advantages, old_logprobs, x_uppers, x_lowers in loader: 67 | n_batch = states.shape[0] 68 | # pdb.set_trace() 69 | mus, sigma_sqs, proj_loss = self.policy.forward(states, disturbance.transpose(0, 1), x_lowers = x_lowers, x_uppers = x_uppers) # x, u: T x N x Dim. 70 | sigma_sqs = torch.ones_like(mus) * sigma**2 71 | log_probs, entropies = self.evaluate_action(mus[0], actions, sigma_sqs) 72 | 73 | ratio = torch.exp(log_probs.squeeze()-old_logprobs) 74 | surr1 = ratio * advantages 75 | surr2 = torch.clamp(ratio, 1-self.clip_param, 1+self.clip_param) * advantages 76 | loss = -torch.min(surr1, surr2).mean() 77 | self.optimizer.zero_grad() 78 | ## Auxiliary losses 79 | loss -= torch.mean(entropies) * 0.01 80 | loss += self.lam * proj_loss 81 | 82 | loss.backward() 83 | nn.utils.clip_grad_norm_(self.policy.nn.parameters(), 100) 84 | self.optimizer.step() 85 | print("Post Step") 86 | self.policy_old.nn.load_state_dict(self.policy.nn.state_dict()) 87 | 88 | ##TODO: Move the update_policy to a Trainer class 89 | def behavior_cloning(self, batch_size): 90 | u_hat, u_star, u_nns = self._get_training_samples(batch_size) 91 | 92 | loss = self.criterion(u_hat, u_star) 93 | loss += self.lam * self.criterion(u_nns, u_hat) # Auxiliary loss 94 | 95 | self.optimizer.zero_grad() 96 | loss.backward() 97 | self.optimizer.step() 98 | 99 | self.predictions = [] 100 | self.targets = [] 101 | return loss.detach() 102 | 103 | 104 | 105 | -------------------------------------------------------------------------------- /data/ARX-0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/INFERLab/PROF/be7f77f606d8c7d6505d4b2bad2d09760e9bafe9/data/ARX-0 -------------------------------------------------------------------------------- /data/data_2017_baseline.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/INFERLab/PROF/be7f77f606d8c7d6505d4b2bad2d09760e9bafe9/data/data_2017_baseline.pkl -------------------------------------------------------------------------------- /data/data_TMY3_baseline.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/INFERLab/PROF/be7f77f606d8c7d6505d4b2bad2d09760e9bafe9/data/data_TMY3_baseline.pkl -------------------------------------------------------------------------------- /data/param_IW-nn-1800: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/INFERLab/PROF/be7f77f606d8c7d6505d4b2bad2d09760e9bafe9/data/param_IW-nn-1800 -------------------------------------------------------------------------------- /docs/figs/framework.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/INFERLab/PROF/be7f77f606d8c7d6505d4b2bad2d09760e9bafe9/docs/figs/framework.pdf -------------------------------------------------------------------------------- /docs/figs/framework.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/INFERLab/PROF/be7f77f606d8c7d6505d4b2bad2d09760e9bafe9/docs/figs/framework.png -------------------------------------------------------------------------------- /docs/slides.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/INFERLab/PROF/be7f77f606d8c7d6505d4b2bad2d09760e9bafe9/docs/slides.pdf -------------------------------------------------------------------------------- /env/inverter.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy 3 | import scipy.io 4 | import torch 5 | import pdb 6 | 7 | from mypypower.newtonpf import newtonpf 8 | from pypower.ppoption import ppoption 9 | 10 | Zbase = 1; 11 | Vbase = 4800; 12 | Sbase = Vbase **2 / Zbase 13 | ''' 14 | def getSbus(P, Q, fac = Sbase/1000): 15 | ## This expects P and Q in kW 16 | P = P/fac; 17 | Q = Q/fac; 18 | return P + 1j*Q 19 | ''' 20 | class IEEE37(): 21 | def __init__(self, filePath = './network/IEEE-37', 22 | dataPath = './data'): 23 | Ybus = scipy.io.loadmat(f'{filePath}/Ybus.mat') 24 | self.Ybus = Ybus['Ybus'] 25 | self.n = self.Ybus.shape[0] 26 | self.v_lower = 0.95 27 | self.v_upper = 1.05 28 | 29 | # Load linearized model 30 | R = scipy.io.loadmat(f'{filePath}_linearized/R.mat') 31 | B = scipy.io.loadmat(f'{filePath}_linearized/B.mat') 32 | self.R = R['R'] 33 | self.B = B['B'] 34 | 35 | ## Bus index lists of each type of bus 36 | self.ref = np.array([0]) 37 | self.pv = np.array([], dtype = np.int32) #np.array([4, 7, 9, 10, 11, 13, 16, 17, 20, 22, 23, 26, 28, 29, 30, 31, 32, 33, 34, 35, 36])-1 38 | self.pq = np.array([i for i in range(self.n) if (i not in self.ref) & (i not in self.pv)], dtype = np.int32) 39 | self.n_pq = len(self.pq) 40 | self.ppopt = ppoption() 41 | 42 | self._get_reference() 43 | self._get_load_and_gen(dataPath = dataPath) 44 | 45 | def getSbus(self, t, wrt_reference = True, w_slack = False): 46 | ''' 47 | Returns the vector of complex bus power injections, that is, generation 48 | minus load. Power is expressed in per unit. 49 | ''' 50 | P = self.P_gen[t] - self.P_l[t] 51 | Q = - self.Q_l[t] 52 | S = P + 1j*Q 53 | P_av = self.P_gen[t] 54 | 55 | if wrt_reference: 56 | S = S - self.S0 57 | 58 | if w_slack: 59 | return S, P_av[self.gen_idx] 60 | else: 61 | return S[-self.n_pq:], P_av[self.gen_idx] 62 | 63 | def step(self, Sbus, wrt_reference = True): 64 | ''' 65 | returns: 66 | voltage magitude, solver flag 67 | ''' 68 | if wrt_reference: 69 | S = self.S0.copy() 70 | S[-len(Sbus):] += Sbus 71 | else: 72 | S = Sbus 73 | V, success, _ = newtonpf(scipy.sparse.csr_matrix(self.Ybus), S, self.V0, self.ref, self.pv, self.pq, self.ppopt) 74 | return np.abs(V), success 75 | 76 | def linear_estimate(self, P, Q, wrt_reference = True): 77 | if wrt_reference: 78 | if torch.is_tensor(P): 79 | return torch.tensor(self.R).float().matmul(P) + torch.tensor(self.B).float().matmul(Q) 80 | else: 81 | return self.R.dot(P) + self.B.dot(Q) 82 | else: 83 | V = self.V0.copy() 84 | delta_p = P-self.P0 85 | delta_q = Q-self.Q0 86 | V[-self.n_pq:] += self.R.dot(delta_p[-self.n_pq:]) + self.B.dot(delta_q[-self.n_pq:]) 87 | return V 88 | 89 | ## Reference Point for Linearization 90 | def _get_reference(self): 91 | # Flat voltage point 92 | self.V0 = np.ones(self.n); 93 | A0 = np.zeros(self.n); 94 | # Corresponding to current injection 95 | J0 = self.Ybus.dot(self.V0*np.exp(1j*A0)); 96 | # Corresponding to power injection 97 | S0 = self.V0*np.exp(1j*A0)*np.conj(J0); 98 | self.P0 = np.real(S0); 99 | self.Q0 = np.imag(S0); 100 | self.S0 = self.P0 + 1j*self.Q0 101 | 102 | ## Load Demand and Generation 103 | def _get_load_and_gen(self, dataPath = './data'): 104 | # Load 105 | self.load_idx = np.array([2, 5, 6, 7, 9, 10, 11, 13, 14, 16, 18, 19, 20, 21, 22, 24, 26, 27, 28, 29, 30, 32, 33, 35, 36]) -1 106 | load = scipy.io.loadmat(f'{dataPath}/Loads_1sec.mat') #(Unit in W) 107 | load = load['Loads'].transpose() # 604800 x 8 108 | self.P_l = np.zeros((load.shape[0], self.n)) 109 | for i, idx in enumerate(self.load_idx): 110 | self.P_l[:, idx] = load[:, i % load.shape[1]] 111 | self.Q_l = 0.5 * self.P_l 112 | # Convert loads to p.u. 113 | self.P_l /= Sbase; 114 | self.Q_l /= Sbase; 115 | 116 | # Generation 117 | solar_rad = scipy.io.loadmat(f'{dataPath}/Irradiance_1sec.mat') 118 | solar_rad = solar_rad['Irr24_seq'].transpose() # # 604800 x 1 119 | 120 | self.gen_idx = np.array([4, 7, 9, 10, 11, 13, 16, 17, 20, 22, 23, 26, 28, 29, 30, 31, 32, 33, 34, 35, 36])-1 121 | 122 | #% PV capacity [kVA] 123 | self.max_S = np.array([200, 200, 100, 200, 200, 200, 200, 200, 200, 200, 200, 124 | 200, 200, 200, 200, 200, 200, 350, 350, 300, 300]); 125 | self.max_S = self.max_S * 1000 / Sbase # Convert to p.u. 126 | #% Area of the PV array 127 | Area_PV = np.array([100, 100, 100, 200, 200, 200, 200, 200, 200, 100, 128 | 200, 200, 200, 100, 200, 200, 200, 350, 350, 300, 300]); 129 | #% PV efficiency; 130 | PV_Irradiance_to_Power_Efficiency = 1; 131 | 132 | self.P_gen = np.zeros((load.shape[0], self.n)) 133 | gen = solar_rad * Area_PV * PV_Irradiance_to_Power_Efficiency 134 | gen /= Sbase # Convert to p.u. 135 | self.P_gen[:, self.gen_idx] = gen.clip(max = self.max_S.reshape(1, -1)) 136 | 137 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: nn-w-proj 2 | channels: 3 | - conda-forge 4 | - defaults 5 | dependencies: 6 | - _libgcc_mutex=0.1=conda_forge 7 | - _openmp_mutex=4.5=1_gnu 8 | - ampl-mp=3.1.0=h616b090_1004 9 | - backcall=0.2.0=pyhd3eb1b0_0 10 | - ca-certificates=2021.4.13=h06a4308_1 11 | - certifi=2020.12.5=py39h06a4308_0 12 | - cvxpy=1.1.12=py39hf3d152e_0 13 | - cvxpy-base=1.1.12=py39hde0f152_0 14 | - cyipopt=1.0.3=py39h3c5bb4f_0 15 | - decorator=5.0.7=pyhd3eb1b0_0 16 | - ecos=2.0.8=py39hce5d2b2_0 17 | - future=0.18.2=py39hf3d152e_3 18 | - ipopt=3.13.4=h7ede334_0 19 | - ipykernel=5.3.4=py39hb070fc8_0 20 | - ipython=7.22.0=py39hb070fc8_0 21 | - ipython_genutils=0.2.0=pyhd3eb1b0_1 22 | - jedi=0.17.2=py39h06a4308_1 23 | - jupyter_client=6.1.12=pyhd3eb1b0_0 24 | - jupyter_core=4.7.1=py39h06a4308_0 25 | - ld_impl_linux-64=2.35.1=hea4e1c9_2 26 | - libblas=3.9.0=9_openblas 27 | - libcblas=3.9.0=9_openblas 28 | - libffi=3.3=h58526e2_2 29 | - libgcc-ng=9.3.0=h2828fa1_19 30 | - libgfortran-ng=9.3.0=hff62375_19 31 | - libgfortran5=9.3.0=hff62375_19 32 | - libgomp=9.3.0=h2828fa1_19 33 | - liblapack=3.9.0=9_openblas 34 | - libopenblas=0.3.15=pthreads_h8fe5266_0 35 | - libsodium=1.0.18=h7b6447c_0 36 | - libstdcxx-ng=9.3.0=h6de172a_19 37 | - metis=5.1.0=h58526e2_1006 38 | - mumps-include=5.2.1=ha770c72_10 39 | - mumps-seq=5.2.1=h47a8eb5_10 40 | - ncurses=6.2=h58526e2_4 41 | - numpy=1.20.2=py39hdbf815f_0 42 | - openssl=1.1.1k=h27cfd23_0 43 | - osqp=0.6.2=py39hde0f152_1 44 | - parso=0.7.0=py_0 45 | - pexpect=4.8.0=pyhd3eb1b0_3 46 | - pickleshare=0.7.5=pyhd3eb1b0_1003 47 | - pip=21.0.1=py39h06a4308_0 48 | - prompt-toolkit=3.0.17=pyh06a4308_0 49 | - ptyprocess=0.7.0=pyhd3eb1b0_2 50 | - pygments=2.8.1=pyhd3eb1b0_0 51 | - python=3.9.4=hffdb5ce_0_cpython 52 | - python-dateutil=2.8.1=pyhd3eb1b0_0 53 | - python_abi=3.9=1_cp39 54 | - pyzmq=20.0.0=py39h2531618_1 55 | - qdldl-python=0.1.5=py39hde0f152_0 56 | - readline=8.1=h46c0cb4_0 57 | - scipy=1.6.3=py39hee8e79c_0 58 | - scotch=6.0.9=h0eec0ba_1 59 | - scs=2.1.3=py39h3c5bb4f_0 60 | - setuptools=49.6.0=py39hf3d152e_3 61 | - six=1.16.0=pyh6c4a22f_0 62 | - sqlite=3.35.5=h74cdb3f_0 63 | - tk=8.6.10=h21135ba_1 64 | - tornado=6.1=py39h27cfd23_0 65 | - traitlets=5.0.5=pyhd3eb1b0_0 66 | - tzdata=2021a=he74cb21_0 67 | - wcwidth=0.2.5=py_0 68 | - wheel=0.36.2=pyhd3deb0d_0 69 | - xz=5.2.5=h516909a_1 70 | - zeromq=4.3.4=h2531618_0 71 | - zlib=1.2.11=h516909a_1010 72 | - pip: 73 | - absl-py==0.13.0 74 | - cachetools==4.2.2 75 | - charset-normalizer==2.0.5 76 | - cvxpylayers==0.1.5 77 | - cycler==0.10.0 78 | - diffcp==1.0.16 79 | - google-auth==1.35.0 80 | - google-auth-oauthlib==0.4.6 81 | - grpcio==1.40.0 82 | - idna==3.2 83 | - kiwisolver==1.3.1 84 | - markdown==3.3.4 85 | - matplotlib==3.4.2 86 | - oauthlib==3.1.1 87 | - pillow==8.2.0 88 | - protobuf==3.17.3 89 | - pyasn1==0.4.8 90 | - pyasn1-modules==0.2.8 91 | - pybind11==2.6.2 92 | - pyparsing==2.4.7 93 | - pypower==5.1.15 94 | - requests==2.26.0 95 | - requests-oauthlib==1.3.0 96 | - rsa==4.7.2 97 | - tensorboard==2.6.0 98 | - tensorboard-data-server==0.6.1 99 | - tensorboard-plugin-wit==1.8.0 100 | - threadpoolctl==2.1.0 101 | - torch==1.8.1 102 | - typing-extensions==3.10.0.0 103 | - urllib3==1.26.6 104 | - werkzeug==2.0.1 105 | prefix: /home/bingqinc/anaconda3/envs/nn-w-proj 106 | -------------------------------------------------------------------------------- /inverter_baselines/inverter_QP.py: -------------------------------------------------------------------------------- 1 | import os, sys, argparse 2 | 3 | import numpy as np 4 | import cvxpy as cp 5 | import torch 6 | import torch.nn as nn 7 | import torch.optim as optim 8 | from torch.utils.tensorboard import SummaryWriter 9 | 10 | main_path = os.path.abspath(os.path.join(__file__, '..')) 11 | sys.path.insert(0, main_path) 12 | 13 | from env.inverter import IEEE37 14 | 15 | from algo.ppo import PPO 16 | from agents.inverter_policy import Net, NeuralController 17 | from utils.inverter_utils import Replay_Memory 18 | 19 | 20 | import pdb 21 | 22 | import torch 23 | DEVICE = "cuda" if torch.cuda.is_available() else "cpu" 24 | DEVICE 25 | 26 | parser = argparse.ArgumentParser(description='GnuRL Demo: Online Learning') 27 | parser.add_argument('--gamma', type=float, default=0.98, metavar='G', 28 | help='discount factor (default: 0.98)') 29 | parser.add_argument('--seed', type=int, default=42, metavar='N', 30 | help='random seed (default: 42)') 31 | parser.add_argument('--lam', type=int, default=10, metavar='N', 32 | help='random seed (default: 42)') 33 | parser.add_argument('--lr', type=float, default=1e-3, metavar='G', 34 | help='Learning Rate') 35 | parser.add_argument('--epsilon', type=float, default=0.2, metavar='G', help='PPO Clip Parameter') 36 | parser.add_argument('--update_episode', type=int, default=4, metavar='N', 37 | help='PPO update episode (default: 1); If -1, do not update weights') 38 | parser.add_argument('--exp_name', type=str, default='inverter_QP', 39 | help='save name') 40 | parser.add_argument('--network_name', type=str, default='ieee37', 41 | help='') 42 | args = parser.parse_args() 43 | 44 | class QP_solver(): 45 | def __init__(self, **env_params): 46 | self.n_bus = env_params['n_bus'] 47 | H = env_params['H'] 48 | self.V0 = env_params['V0'] 49 | self.P0 = env_params['P0'] 50 | self.Q0 = env_params['Q0'] 51 | self.V_upper = env_params['V_upper'] 52 | self.V_lower = env_params['V_lower'] 53 | self.S_rating = env_params['S_rating'] 54 | self.gen_idx = env_params['gen_idx'] 55 | 56 | self.other_idx = [i for i in range(self.n_bus) if i not in self.gen_idx ] 57 | 58 | R = H[:, :self.n_bus] 59 | B = H[:, self.n_bus:] 60 | R_new = np.vstack([np.hstack([R[self.gen_idx][:, self.gen_idx], 61 | R[self.gen_idx][:, self.other_idx]]), 62 | np.hstack([R[self.other_idx][:, self.gen_idx], 63 | R[self.other_idx][:, self.other_idx]]) 64 | ]) 65 | B_new = np.vstack([np.hstack([B[self.gen_idx][:, self.gen_idx], 66 | B[self.gen_idx][:, self.other_idx]]), 67 | np.hstack([B[self.other_idx][:, self.gen_idx], 68 | B[self.other_idx][:, self.other_idx]]) 69 | ]) 70 | #pdb.set_trace() 71 | H_new = np.hstack([R_new, B_new]) 72 | 73 | # Set up projection onto inverter setpoint constraints and linearized voltage constraints 74 | self.P = cp.Variable(len(self.gen_idx)) 75 | self.Q = cp.Variable(len(self.gen_idx)) 76 | 77 | 78 | self.P_nc = cp.Parameter(len(self.other_idx)) 79 | self.Q_nc = cp.Parameter(len(self.other_idx)) 80 | self.P_av = cp.Parameter(len(self.gen_idx)) 81 | 82 | # Voltage: Apply to All Buses 83 | z = cp.hstack([self.P, self.P_nc, self.Q, self.Q_nc]) # z: (70, ) 84 | constraints = [self.V_lower - self.V0 <= H_new@z, 85 | H_new@z <= self.V_upper - self.V0] 86 | 87 | ## Power: Only applies to Inverters 88 | PQ = cp.vstack([self.P0[self.gen_idx] + self.P, 89 | self.Q0[self.gen_idx] + self.Q]) # (2, n) 90 | constraints += [0 <= self.P0[self.gen_idx] + self.P, 91 | self.P0[self.gen_idx] + self.P <= self.P_av, 92 | cp.norm(PQ, axis = 0) <= self.S_rating] 93 | 94 | #objective = cp.Minimize(cp.sum_squares(P - P_tilde) + cp.sum_squares(Q - Q_tilde)) 95 | objective = cp.Minimize(cp.sum(cp.maximum(self.P_av - self.P, 96 | np.zeros(len(self.gen_idx))))) 97 | self.problem = cp.Problem(objective, constraints) 98 | 99 | def solve(self, Sbus, P_av): 100 | self.P_nc.value = Sbus.real[self.other_idx] 101 | self.Q_nc.value = Sbus.imag[self.other_idx] 102 | self.P_av.value = P_av 103 | 104 | #try: 105 | self.problem.solve() 106 | #except: 107 | # print("Solver failed") 108 | # self.P.value = None 109 | 110 | ## Check solution valid 111 | #if self.P.value is not None: 112 | #print(self.problem.status) 113 | #print(self.P.value, self.Q.value) 114 | return self.P.value, self.Q.value#, self.Problem.status 115 | 116 | #else: 117 | # return Sbus.real, Sbus.imag 118 | 119 | def main(): 120 | torch.manual_seed(args.seed) 121 | writer = SummaryWriter(comment = args.exp_name) 122 | 123 | # Create Simulation Environment 124 | if args.network_name == 'ieee37': 125 | env = IEEE37() 126 | else: 127 | print("Not implemented") 128 | 129 | n_bus = env.n - 1 130 | n_inverters = len(env.gen_idx) # inverters at PV panels 131 | 132 | env_params = {'V0': env.V0[-env.n_pq:], 133 | 'P0': env.P0[-env.n_pq:], 134 | 'Q0': env.Q0[-env.n_pq:], 135 | 'H': np.hstack([env.R, env.B]), # 35 x 70 136 | 'n_bus':n_bus, # Slack bus is not controllable 137 | 'gen_idx': env.gen_idx - 1, # Excluded the slack bus 138 | 'V_upper': env.v_upper, 'V_lower': env.v_lower, 139 | 'S_rating': env.max_S, 140 | } 141 | 142 | controller = QP_solver(**env_params) 143 | 144 | # 1-week data 145 | num_steps = 900 # 15 minutes 146 | n_episodes = 7*86400//num_steps 147 | 148 | V_prev = np.zeros(n_bus) 149 | 150 | V_record = [] 151 | V_est_record = [] 152 | P_record = [] 153 | Q_record = [] 154 | 155 | for i in range(n_episodes): 156 | loss = 0 157 | violation_count = 0 158 | 159 | for k in range(num_steps): 160 | t = i*num_steps + k 161 | Sbus, P_av = env.getSbus(t) 162 | 163 | P_gen, Q_gen = controller.solve(Sbus, P_av) 164 | print(f"P_av = {P_av}, P = {P_gen}") 165 | 166 | P = Sbus.real 167 | Q = Sbus.imag 168 | P[controller.gen_idx] = P_gen 169 | Q[controller.gen_idx] = Q_gen 170 | 171 | V, success = env.step(P + 1j*Q) 172 | V_prev = V[1:] 173 | 174 | if np.any(V>env.v_upper) | np.any(V0): 190 | np.save(f"results/V_{args.exp_name}.npy", np.array(V_record)) 191 | np.save(f"results/P_{args.exp_name}.npy", np.array(P_record)) 192 | np.save(f"results/Q_{args.exp_name}.npy", np.array(Q_record)) 193 | 194 | np.save(f"results/V_{args.exp_name}.npy", np.array(V_record)) 195 | np.save(f"results/P_{args.exp_name}.npy", np.array(P_record)) 196 | np.save(f"results/Q_{args.exp_name}.npy", np.array(Q_record)) 197 | 198 | if __name__ == '__main__': 199 | main() 200 | 201 | ''' 202 | # Example Usage of the environment 203 | t = 10 204 | Sbus = env.getSbus(t) 205 | 206 | # Solve power flow equations 207 | V, success = env.step(Sbus) 208 | print(np.abs(V)) 209 | if success == 0: 210 | print("Something is wrong") 211 | 212 | # Estimation using the linearized model 213 | V_est = env.linear_estimate(Sbus) 214 | print(V_est) 215 | ''' 216 | -------------------------------------------------------------------------------- /inverter_baselines/inverter_acopf.py: -------------------------------------------------------------------------------- 1 | import os, sys, argparse 2 | 3 | import numpy as np 4 | import cvxpy as cp 5 | import ipopt 6 | import torch 7 | import torch.nn as nn 8 | import torch.optim as optim 9 | from torch.utils.tensorboard import SummaryWriter 10 | 11 | main_path = os.path.abspath(os.path.join(__file__, '..')) 12 | sys.path.insert(0, main_path) 13 | 14 | from env.inverter import IEEE37 15 | 16 | from algo.ppo import PPO 17 | from agents.inverter_policy import Net, NeuralController 18 | from utils.inverter_utils import Replay_Memory 19 | 20 | import ipdb 21 | 22 | import torch 23 | DEVICE = "cuda" if torch.cuda.is_available() else "cpu" 24 | DEVICE 25 | 26 | parser = argparse.ArgumentParser(description='GnuRL Demo: Online Learning') 27 | parser.add_argument('--gamma', type=float, default=0.98, metavar='G', 28 | help='discount factor (default: 0.98)') 29 | parser.add_argument('--seed', type=int, default=42, metavar='N', 30 | help='random seed (default: 42)') 31 | parser.add_argument('--lam', type=int, default=10, metavar='N', 32 | help='random seed (default: 42)') 33 | parser.add_argument('--lr', type=float, default=1e-3, metavar='G', 34 | help='Learning Rate') 35 | parser.add_argument('--epsilon', type=float, default=0.2, metavar='G', help='PPO Clip Parameter') 36 | parser.add_argument('--update_episode', type=int, default=4, metavar='N', 37 | help='PPO update episode (default: 1); If -1, do not update weights') 38 | parser.add_argument('--exp_name', type=str, default='inverter_ACOPF', 39 | help='save name') 40 | parser.add_argument('--network_name', type=str, default='ieee37', 41 | help='') 42 | args = parser.parse_args() 43 | 44 | class ACOPFController(): 45 | def __init__(self, **env_params): 46 | self.n_bus = env_params['n_bus'] 47 | self.V0 = env_params['V0'] 48 | self.P0 = env_params['P0'] 49 | self.Q0 = env_params['Q0'] 50 | self.V_upper = env_params['V_upper'] 51 | self.V_lower = env_params['V_lower'] 52 | self.S_rating = env_params['S_rating'] 53 | self.gen_idx = env_params['gen_idx'] 54 | self.slack_idx = env_params['slack_idx'] 55 | self.Ybus = env_params['Ybus'] 56 | 57 | self.A0 = np.zeros(self.V0.shape) # initial voltage angle 58 | self.n_gen = len(self.gen_idx) 59 | self.n_slack = len(self.slack_idx) 60 | self.other_idx = [i for i in range(self.n_bus) if i not in self.gen_idx and i not in self.slack_idx] 61 | 62 | 63 | def solve(self, Sbus, P_av): 64 | P_nc = Sbus.real[self.other_idx] 65 | Q_nc = Sbus.imag[self.other_idx] 66 | 67 | # Decision variables: P and Q at controllable buses, 68 | # Pslack and Qslack at slack bus, V and theta at all buses 69 | 70 | # initial guess for decision variables 71 | x0 = np.hstack([P_av, self.Q0[self.gen_idx], 72 | self.P0[self.slack_idx], self.Q0[self.slack_idx], 73 | self.V0, self.A0]) 74 | 75 | # upper and lower bounds on decision variables 76 | # 0 \leq P \leq P_av 77 | # no explicit bounds on Q 78 | # no explicit bounds on Pslack or Qslack 79 | # V and A known at ref bus 80 | # Vmin \leq V \leq Vmax at non-ref buses 81 | # no explicit bounds on A at non-ref buses 82 | def get_bound_with_slack(bound, slack_bound): 83 | values = bound * np.ones(self.n_bus) 84 | values[self.slack_idx] = slack_bound 85 | return values 86 | 87 | lb = np.hstack([ 88 | np.zeros(self.n_gen), 89 | -np.infty * np.ones(self.n_gen), 90 | -np.infty * np.ones(2 * self.n_slack), 91 | get_bound_with_slack(self.V_lower, self.V0[self.slack_idx]), 92 | get_bound_with_slack(-np.infty, self.A0[self.slack_idx])]) 93 | ub = np.hstack([ 94 | P_av, 95 | np.infty * np.ones(self.n_gen), 96 | np.infty * np.ones(2 * self.n_slack), 97 | get_bound_with_slack(self.V_upper, self.V0[self.slack_idx]), 98 | get_bound_with_slack(np.infty, self.A0[self.slack_idx])]) 99 | 100 | # upper and lower bounds on other constraints 101 | # power flow constraint: diag(v)conj(Ybus)conj(v) - S = 0 102 | # where v = diag(V*exp(1j*A)) and S is net demand at all nodes 103 | # (separate out real and imaginary parts) 104 | # P^2 + Q^2 \leq S_rating^2 105 | cl = np.hstack( 106 | [np.zeros(2*self.n_bus), np.zeros(self.n_gen)]) 107 | cu = np.hstack( 108 | [np.zeros(2*self.n_bus), self.S_rating**2]) 109 | 110 | problem_obj = ACOPFSolver(P_av, P_nc, Q_nc, self.Ybus, 111 | self.n_bus, self.n_gen, self.n_slack, self.gen_idx, self.slack_idx, self.other_idx) 112 | nlp = ipopt.problem( 113 | n=len(x0), # num decision vars 114 | m=len(cl), # num constraints 115 | problem_obj=problem_obj, 116 | lb=lb, # lower bounds on decision vars 117 | ub=ub, # upper bounds on decision vars 118 | cl=cl, # lower bounds on constraints 119 | cu=cu # upper bounds on constraints 120 | ) 121 | 122 | nlp.addOption('tol', 1e-4) 123 | nlp.addOption('print_level', 0) # 3) 124 | 125 | x, info = nlp.solve(x0) 126 | P = x[:self.n_gen] 127 | Q = x[self.n_gen:2*self.n_gen] 128 | 129 | return P, Q 130 | 131 | 132 | class ACOPFSolver(object): 133 | def __init__(self, P_av, P_nc, Q_nc, Ybus, n_bus, n_gen, n_slack, gen_idx, slack_idx, other_idx): 134 | self.P_av = P_av 135 | self.P_nc = P_nc 136 | self.Q_nc = Q_nc 137 | self.Ybus = Ybus 138 | self.n_bus = n_bus 139 | self.n_gen = n_gen 140 | self.n_slack = n_slack 141 | self.gen_idx = gen_idx 142 | self.slack_idx = slack_idx 143 | self.other_idx = other_idx 144 | self.split_inds = np.cumsum( 145 | [self.n_gen, self.n_gen, self.n_slack, self.n_slack, self.n_bus, self.n_bus])[:-1] 146 | 147 | # Curtailment objective (will be minimized) 148 | def objective(self, x): 149 | return np.maximum(self.P_av - x[:self.n_gen], 0).sum() 150 | 151 | # Gradient of objective 152 | def gradient(self, x): 153 | p_grad = -1 * ((self.P_av - x[:self.n_gen]) > 0).astype(int) 154 | return np.hstack([p_grad, np.zeros(self.n_gen + 2*self.n_slack + 2*self.n_bus)]) 155 | 156 | # Constraints (excluding box constraints on decision variables) 157 | def constraints(self, y): 158 | P, Q, Pslack, Qslack, V, A = np.split(y, self.split_inds) 159 | 160 | # power flow constraint [diag(v)conj(Ybus)conj(v) - S = 0] 161 | # separate out real and imaginary parts 162 | voltage = V * np.exp(1j * A) 163 | net_power = np.zeros(self.n_bus, dtype=np.complex128) 164 | net_power[self.gen_idx] = P + 1j*Q 165 | net_power[self.slack_idx] = Pslack + 1j*Qslack 166 | net_power[self.other_idx] = self.P_nc + 1j*self.Q_nc 167 | power_mismatch = np.diag(voltage)@np.conj(self.Ybus)@np.conj(voltage) - net_power 168 | 169 | # apparent power at inverters [P^2 + Q^2 \leq S_rating^2; compute left side here] 170 | apparent_power = P**2 + Q**2 171 | 172 | return np.hstack([np.real(power_mismatch), np.imag(power_mismatch), apparent_power]) 173 | 174 | # Jacobian of constraints (excluding box constraints on decision variables) 175 | def jacobian(self, y): 176 | P, Q, _, _, V, A = np.split(y, self.split_inds) 177 | 178 | # Jacobian of power flow constraint 179 | # See: http://www.cs.cmu.edu/~zkolter/course/15-884/eps_power_flow.pdf 180 | vol = V * np.exp(1j * A) 181 | Y = self.Ybus 182 | J1 = 1j * np.diag(vol) @ (np.diag(np.conj(Y)@np.conj(vol)) - np.conj(Y)@np.diag(np.conj(vol))) 183 | J2 = np.diag(vol)@np.conj(Y)@np.diag(np.exp(-1j * A)) + \ 184 | np.diag(np.exp(1j * A))@np.diag(np.conj(Y)@np.conj(vol)) 185 | power_flow_jac = np.vstack([ 186 | np.hstack([-np.eye(self.n_bus)[:, self.gen_idx], np.zeros((self.n_bus, self.n_gen)), 187 | -np.eye(self.n_bus)[:, self.slack_idx], np.zeros((self.n_bus, self.n_slack)), 188 | np.real(J2), np.real(J1)]), 189 | np.hstack([np.zeros((self.n_bus, self.n_gen)), -np.eye(self.n_bus)[:, self.gen_idx], 190 | np.zeros((self.n_bus, self.n_slack)), -np.eye(self.n_bus)[:, self.slack_idx], 191 | np.imag(J2), np.imag(J1)]) 192 | ]) 193 | 194 | # Jacobian of apparent power constraint 195 | apparent_power_jac = np.hstack([ 196 | np.diag(2*P), np.diag(2*Q), 197 | np.zeros( (self.n_gen, 2*self.n_slack + 2*self.n_bus))]) 198 | 199 | return np.concatenate([power_flow_jac.flatten(), apparent_power_jac.flatten()]) 200 | 201 | 202 | def main(): 203 | torch.manual_seed(args.seed) 204 | writer = SummaryWriter(comment = args.exp_name) 205 | 206 | # Create Simulation Environment 207 | if args.network_name == 'ieee37': 208 | env = IEEE37() 209 | else: 210 | print("Not implemented") 211 | 212 | n_bus = env.n 213 | n_inverters = len(env.gen_idx) # inverters at PV panels 214 | 215 | env_params = {'V0': env.V0, 216 | 'P0': env.P0, 217 | 'Q0': env.Q0, 218 | 'n_bus': n_bus, 219 | 'gen_idx': env.gen_idx, 220 | 'slack_idx': env.ref, 221 | 'V_upper': env.v_upper, 'V_lower': env.v_lower, 222 | 'S_rating': env.max_S, 223 | 'Ybus': env.Ybus 224 | } 225 | 226 | controller = ACOPFController(**env_params) 227 | 228 | # 1-week data 229 | num_steps = 900 # 15 minutes 230 | n_episodes = 7*86400//num_steps 231 | 232 | V_prev = np.zeros(n_bus) 233 | 234 | V_record = [] 235 | V_est_record = [] 236 | P_record = [] 237 | Q_record = [] 238 | 239 | start_ep = 600 240 | for i in range(start_ep, min(n_episodes, start_ep + 100)): 241 | loss = 0 242 | violation_count = 0 243 | 244 | for k in range(num_steps): 245 | t = i*num_steps + k 246 | Sbus, P_av = env.getSbus(t, wrt_reference=False, w_slack=True) 247 | 248 | P_gen, Q_gen = controller.solve(Sbus, P_av) 249 | print(f"P_av = {P_av}, P = {P_gen}") 250 | 251 | P = Sbus.real 252 | Q = Sbus.imag 253 | P[controller.gen_idx] = P_gen 254 | Q[controller.gen_idx] = Q_gen 255 | 256 | V, success = env.step(P + 1j*Q, wrt_reference=False) 257 | V_prev = V[1:] 258 | 259 | if np.any(V>env.v_upper) | np.any(V0): 275 | np.save(f"results/V_{args.exp_name}_{start_ep}.npy", np.array(V_record)) 276 | np.save(f"results/P_{args.exp_name}_{start_ep}.npy", np.array(P_record)) 277 | np.save(f"results/Q_{args.exp_name}_{start_ep}.npy", np.array(Q_record)) 278 | 279 | np.save(f"results/V_{args.exp_name}_{start_ep}.npy", np.array(V_record)) 280 | np.save(f"results/P_{args.exp_name}_{start_ep}.npy", np.array(P_record)) 281 | np.save(f"results/Q_{args.exp_name}_{start_ep}.npy", np.array(Q_record)) 282 | 283 | 284 | if __name__ == '__main__': 285 | main() 286 | 287 | ''' 288 | # Example Usage of the environment 289 | t = 10 290 | Sbus = env.getSbus(t) 291 | 292 | # Solve power flow equations 293 | V, success = env.step(Sbus) 294 | print(np.abs(V)) 295 | if success == 0: 296 | print("Something is wrong") 297 | 298 | # Estimation using the linearized model 299 | V_est = env.linear_estimate(Sbus) 300 | print(V_est) 301 | ''' 302 | -------------------------------------------------------------------------------- /inverter_baselines/inverter_no-control.py: -------------------------------------------------------------------------------- 1 | import os, sys, argparse 2 | 3 | import numpy as np 4 | from torch.utils.tensorboard import SummaryWriter 5 | 6 | main_path = os.path.abspath(os.path.join(__file__, '..')) 7 | sys.path.insert(0, main_path) 8 | 9 | from env.inverter import IEEE37 10 | 11 | import pdb 12 | 13 | #import torch 14 | #DEVICE = "cuda" if torch.cuda.is_available() else "cpu" 15 | #DEVICE 16 | 17 | parser = argparse.ArgumentParser(description='GnuRL Demo: Online Learning') 18 | parser.add_argument('--gamma', type=float, default=0.98, metavar='G', 19 | help='discount factor (default: 0.98)') 20 | parser.add_argument('--seed', type=int, default=42, metavar='N', 21 | help='random seed (default: 42)') 22 | parser.add_argument('--lr', type=float, default=1e-3, metavar='G', 23 | help='Learning Rate') 24 | parser.add_argument('--epsilon', type=float, default=0.2, metavar='G', help='PPO Clip Parameter') 25 | parser.add_argument('--update_episode', type=int, default=4, metavar='N', 26 | help='PPO update episode (default: 1); If -1, do not update weights') 27 | parser.add_argument('--exp_name', type=str, default='no-control', 28 | help='save name') 29 | parser.add_argument('--network_name', type=str, default='ieee37', 30 | help='') 31 | args = parser.parse_args() 32 | 33 | 34 | def main(): 35 | writer = SummaryWriter(comment = args.exp_name) 36 | 37 | # Create Simulation Environment 38 | if args.network_name == 'ieee37': 39 | env = IEEE37() 40 | else: 41 | print("Not implemented") 42 | n_bus = env.n 43 | env_params = {'V0': env.V0[-env.n_pq:], 44 | 'P0': env.P0[-env.n_pq:], 45 | 'Q0': env.Q0[-env.n_pq:], 46 | 'gen_idx': env.gen_idx, # Including the slack bus 47 | 'V_upper': env.v_upper, 'V_lower': env.v_lower, 48 | 'S_rating': env.max_S, 49 | } 50 | 51 | ## Note: Volt-Var controller considers deviation from 1 52 | #controller = VoltVarController(0.04, **env_params) 53 | 54 | # 1-week data 55 | num_steps = 600 # 10 minutes 56 | n_episodes = 7*86400//num_steps 57 | 58 | V_prev = np.ones(n_bus) 59 | V_record = [] 60 | 61 | for i in range(n_episodes): 62 | violation_count = 0 63 | for k in range(num_steps): 64 | t = i*num_steps + k 65 | Sbus, P_av = env.getSbus(t, wrt_reference = False, w_slack = True) 66 | 67 | #Q = controller.forward(V_prev, P_av = P_av) # at Generation buses 68 | #Sbus.imag[env.gen_idx] += Q 69 | 70 | V, success = env.step(Sbus) 71 | V_prev = V 72 | 73 | if np.any(V>env.v_upper) | np.any(V0): 83 | np.save(f"results/V_{args.exp_name}.npy", np.array(V_record)) 84 | np.save(f"results/V_{args.exp_name}.npy", np.array(V_record)) 85 | 86 | if __name__ == '__main__': 87 | main() 88 | 89 | ''' 90 | # Example Usage of the environment 91 | t = 10 92 | Sbus = env.getSbus(t) 93 | 94 | # Solve power flow equations 95 | V, success = env.step(Sbus) 96 | print(np.abs(V)) 97 | if success == 0: 98 | print("Something is wrong") 99 | 100 | # Estimation using the linearized model 101 | V_est = env.linear_estimate(Sbus) 102 | print(V_est) 103 | ''' 104 | -------------------------------------------------------------------------------- /inverter_baselines/inverter_volt-var.py: -------------------------------------------------------------------------------- 1 | import os, sys, argparse 2 | 3 | import numpy as np 4 | from torch.utils.tensorboard import SummaryWriter 5 | 6 | main_path = os.path.abspath(os.path.join(__file__, '..')) 7 | sys.path.insert(0, main_path) 8 | 9 | from env.inverter import IEEE37 10 | 11 | import pdb 12 | 13 | #import torch 14 | #DEVICE = "cuda" if torch.cuda.is_available() else "cpu" 15 | #DEVICE 16 | 17 | parser = argparse.ArgumentParser(description='GnuRL Demo: Online Learning') 18 | parser.add_argument('--gamma', type=float, default=0.98, metavar='G', 19 | help='discount factor (default: 0.98)') 20 | parser.add_argument('--seed', type=int, default=42, metavar='N', 21 | help='random seed (default: 42)') 22 | parser.add_argument('--lr', type=float, default=1e-3, metavar='G', 23 | help='Learning Rate') 24 | parser.add_argument('--epsilon', type=float, default=0.2, metavar='G', help='PPO Clip Parameter') 25 | parser.add_argument('--update_episode', type=int, default=4, metavar='N', 26 | help='PPO update episode (default: 1); If -1, do not update weights') 27 | parser.add_argument('--exp_name', type=str, default='volt-var', 28 | help='save name') 29 | parser.add_argument('--network_name', type=str, default='ieee37', 30 | help='') 31 | args = parser.parse_args() 32 | 33 | 34 | class VoltVarController(): 35 | def __init__(self, delta, **env_params): 36 | super(VoltVarController, self).__init__() 37 | self.V_upper = env_params['V_upper'] 38 | self.V_lower = env_params['V_lower'] 39 | self.delta = delta 40 | self.gen_idx = env_params['gen_idx'] 41 | self.S_rating = env_params['S_rating'] 42 | self.a = 1/(self.V_upper-1-self.delta/2) 43 | 44 | def forward(self, voltage, P_av): 45 | Q = (self.S_rating**2-P_av**2)**0.5 46 | 47 | # Piece-wise Linear Curve 48 | voltage = voltage[self.gen_idx] 49 | out = np.zeros_like(voltage) 50 | 51 | out[voltage <= self.V_lower] = 1 52 | 53 | idx = (voltage > self.V_lower) & (voltage < 1 - self.delta/2) 54 | out[idx] = 1 - self.a*(voltage[idx]-self.V_lower) 55 | 56 | idx = (voltage > 1 + self.delta/2) & (voltage < self.V_upper) 57 | out[idx] = -self.a*(voltage[idx]-1-self.delta/2) 58 | 59 | out[voltage >= self.V_upper] = -1 60 | return out * Q 61 | 62 | def main(): 63 | writer = SummaryWriter(comment = args.exp_name) 64 | 65 | # Create Simulation Environment 66 | if args.network_name == 'ieee37': 67 | env = IEEE37() 68 | else: 69 | print("Not implemented") 70 | n_bus = env.n 71 | env_params = {'V0': env.V0[-env.n_pq:], 72 | 'P0': env.P0[-env.n_pq:], 73 | 'Q0': env.Q0[-env.n_pq:], 74 | 'gen_idx': env.gen_idx, # Including the slack bus 75 | 'V_upper': env.v_upper, 'V_lower': env.v_lower, 76 | 'S_rating': env.max_S, 77 | } 78 | 79 | ## Note: Volt-Var controller considers deviation from 1 80 | controller = VoltVarController(0.04, **env_params) 81 | 82 | # 1-week data 83 | num_steps = 600 # 10 minutes 84 | n_episodes = 7*86400//num_steps 85 | 86 | V_prev = np.ones(n_bus) 87 | V_record = [] 88 | 89 | for i in range(n_episodes): 90 | violation_count = 0 91 | for k in range(num_steps): 92 | t = i*num_steps + k 93 | Sbus, P_av = env.getSbus(t, wrt_reference = False, w_slack = True) 94 | 95 | Q = controller.forward(V_prev, P_av = P_av) # at Generation buses 96 | 97 | Sbus.imag[env.gen_idx] += Q 98 | 99 | V, success = env.step(Sbus) 100 | V_prev = V 101 | 102 | if np.any(V>env.v_upper) | np.any(V0): 112 | np.save(f"results/V_{args.exp_name}.npy", np.array(V_record)) 113 | <<<<<<< HEAD 114 | ======= 115 | 116 | >>>>>>> 5d88b0ccebcea057216087804a12ef2c880e3345 117 | np.save(f"results/V_{args.exp_name}.npy", np.array(V_record)) 118 | 119 | if __name__ == '__main__': 120 | main() 121 | 122 | ''' 123 | # Example Usage of the environment 124 | t = 10 125 | Sbus = env.getSbus(t) 126 | 127 | # Solve power flow equations 128 | V, success = env.step(Sbus) 129 | print(np.abs(V)) 130 | if success == 0: 131 | print("Something is wrong") 132 | 133 | # Estimation using the linearized model 134 | V_est = env.linear_estimate(Sbus) 135 | print(V_est) 136 | ''' 137 | -------------------------------------------------------------------------------- /main_IW.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | import gym 5 | import eplus_env 6 | 7 | import warnings 8 | warnings.filterwarnings("ignore", category=UserWarning) 9 | 10 | import argparse 11 | import numpy as np 12 | import pandas as pd 13 | import copy 14 | import pickle 15 | import pdb 16 | 17 | import torch 18 | import torch.nn as nn 19 | import torch.nn.functional as F 20 | import torch.utils.data as data 21 | import torch.optim as optim 22 | from torch.distributions import MultivariateNormal, Normal 23 | from torch.utils.tensorboard import SummaryWriter 24 | 25 | from algo.ppo import PPO 26 | from agents.nn_policy import NeuralController 27 | from utils.network import LSTM 28 | from utils.ppo_utils import make_dict, R_func, Advantage_func, Replay_Memory 29 | 30 | DEVICE = "cuda" if torch.cuda.is_available() else "cpu" 31 | DEVICE 32 | 33 | parser = argparse.ArgumentParser(description='Gnu-RL: Online Learning') 34 | parser.add_argument('--gamma', type=float, default=0.9, metavar='G', 35 | help='discount factor (default: 0.9)') 36 | parser.add_argument('--seed', type=int, default=42, metavar='N', 37 | help='random seed (default: 42)') 38 | parser.add_argument('--lr', type=float, default=5e-4, metavar='G', 39 | help='Learning Rate') 40 | parser.add_argument('--lam', type=int, default=10, metavar='N', 41 | help='random seed (default: 42)') 42 | parser.add_argument('--epsilon', type=float, default=0.2, metavar='G', help='PPO Clip Parameter') 43 | parser.add_argument('--update_episode', type=int, default=4, metavar='N', 44 | help='PPO update episode (default: 1); If -1, do not update weights') 45 | parser.add_argument('--T', type=int, default=12, metavar='N', 46 | help='Planning Horizon (default: 12)') 47 | parser.add_argument('--step', type=int, default=300*3, metavar='N', 48 | help='Time Step in Simulation, Unit in Seconds (default: 900)') # 15 Minutes Now! 49 | parser.add_argument('--exp_name', type=str, default='nn_w_proj', 50 | help='save name') 51 | parser.add_argument('--eta', type=int, default=3, 52 | help='Hyper Parameter for Balancing Comfort and Energy') 53 | parser.add_argument('--model_no', type = int, default = 1800, help = '') 54 | args = parser.parse_args() 55 | 56 | 57 | def main(): 58 | torch.manual_seed(args.seed) 59 | writer = SummaryWriter(comment = args.exp_name) 60 | 61 | # Create Simulation Environment 62 | env = gym.make('Eplus-IW-test-v0') 63 | 64 | # Specify variable names for control problem 65 | obs_name = ["Outdoor Temp.", "Outdoor RH", "Wind Speed", "Wind Direction", "Diff. Solar Rad.", "Direct Solar Rad.", "HW Enable OA Setpoint", "IW Average PPD", "HW Supply Setpoint", "Indoor Air Temp.", "Indoor Temp. Setpoint", "Occupancy Flag", "Heating Demand"] 66 | state_name = ["Indoor Air Temp."] 67 | dist_name = ["Outdoor Temp.", "Outdoor RH", "Wind Speed", "Wind Direction", "Diff. Solar Rad.", "Direct Solar Rad.", "Occupancy Flag"] 68 | ctrl_name = ["HW Enable OA Setpoint", "HW Supply Setpoint"] 69 | target_name = ["Indoor Temp. Setpoint"] 70 | dist_name = dist_name + target_name 71 | 72 | n_state = len(state_name) 73 | n_ctrl = 1 #len(ctrl_name) 74 | n_dist = len(dist_name) 75 | eta = [0.1, args.eta] # eta: Weight for comfort during unoccupied and occupied mode 76 | step = args.step # step: Timestep; Unit in seconds 77 | T = args.T # T: Number of timesteps in the planning horizon 78 | tol_eps = 91 # tol_eps: Total number of episodes; Each episode is a natural day 79 | 80 | # Read Information on Weather, Occupancy, and Target Setpoint 81 | obs_2017 = pd.read_pickle("data/data_2017_baseline.pkl") 82 | disturbance = obs_2017[dist_name] 83 | # Min-Max Normalization 84 | obs_TMY3 = pd.read_pickle("data/data_TMY3_baseline.pkl") # For Min-Max Normalization Only 85 | dist_min = obs_TMY3[dist_name].min() 86 | dist_max = obs_TMY3[dist_name].max() 87 | disturbance = (disturbance - dist_min)/(dist_max - dist_min) 88 | state_min = obs_TMY3[state_name].min().values 89 | state_max = obs_TMY3[state_name].max().values 90 | memory = Replay_Memory() 91 | 92 | ## Load pretrained LSTM policy weights 93 | ''' 94 | Expects all states, actions, and disturbances are MinMaxNormalized; (Based on TMY3 data) 95 | The LSTM also expects "setpoint" as part of the disturbance term. 96 | ''' 97 | network = LSTM(n_state, n_ctrl, n_dist) 98 | network.load_state_dict(torch.load("data/param_IW-nn-{}".format(args.model_no))) 99 | 100 | ## Load thermodynamics model to construct the polytope 101 | ''' 102 | New model also expects states, actions, and disturbances to be MinMaxNormalized 103 | ''' 104 | model_dict ={'a': np.array([0.934899]), 105 | 'bu': np.array([0.024423]), 106 | 'bd': np.array([5.15795080e-02, -6.92141185e-04, -1.21103548e-02, 107 | 2.38717578e-03, -3.52816030e-03, 3.32528746e-03, 7.19267820e-03]), 108 | 'Pm': 1 # Upper bound of u; 109 | } 110 | policy = NeuralController(T, step, network, RC_flag = False, **model_dict) 111 | agent = PPO(policy, memory, lr = args.lr, clip_param = args.epsilon, lam = args.lam) 112 | 113 | dir = 'results' 114 | if not os.path.exists(dir): 115 | os.mkdir(dir) 116 | 117 | multiplier = 1 # Normalize the reward for better training performance 118 | n_step = 96 #timesteps per day 119 | 120 | sigma = 0.1 121 | sigma_min = 0.01 122 | sigma_step = (sigma-sigma_min) * args.update_episode/tol_eps 123 | 124 | timeStep, obs, isTerminal = env.reset() 125 | start_time = pd.datetime(year = env.start_year, month = env.start_mon, day = env.start_day) 126 | cur_time = start_time 127 | obs_dict = make_dict(obs_name, obs) 128 | 129 | # Save for record 130 | timeStamp = [start_time] 131 | observations = [obs] 132 | actions_taken = [] 133 | 134 | for i_episode in range(tol_eps): 135 | ## Save for Parameter Updates 136 | rewards = [] 137 | real_rewards = [] 138 | 139 | for t in range(n_step): 140 | state = np.array([obs_dict[name] for name in state_name]) 141 | state = (state-state_min)/(state_max-state_min) 142 | 143 | x_upper = obs_2017['x_upper'][cur_time : cur_time + pd.Timedelta(seconds = (T-1) * step)].values 144 | x_lower = obs_2017['x_lower'][cur_time : cur_time + pd.Timedelta(seconds = (T-1) * step)].values 145 | ## Margin 146 | #x_lower+=0.025 147 | #x_upper-=0.025 148 | 149 | x_upper = (x_upper-state_min)/(state_max-state_min) 150 | x_lower = (x_lower-state_min)/(state_max-state_min) 151 | 152 | dt = disturbance[cur_time : cur_time + pd.Timedelta(seconds = (T-1) * step)].values # T x n_dist 153 | 154 | ## Update the model in the controller 155 | # CVXPY expects np.array for parameters 156 | agent.policy_old.updateState(state, x_lower = x_lower, x_upper = x_upper, d = dt[:, :-1]) 157 | agent.memory.x_lowers.append(torch.tensor(x_lower).float()) 158 | agent.memory.x_uppers.append(torch.tensor(x_upper).float()) 159 | 160 | state = torch.tensor(state).unsqueeze(0).float() # 1 x n_state 161 | dt = torch.tensor(dt).float() 162 | agent.memory.states.append(state) 163 | agent.memory.disturbance.append(dt) 164 | 165 | ## Use policy_old to select action 166 | mu, sigma_sq, _ = agent.forward(state, dt.unsqueeze(1), current = False) # mu, sigma_sq: T x 1 x Dim. 167 | sigma_sq = torch.ones_like(mu) * sigma**2 168 | 169 | ## Myopic Limit: A hack to make sure the projected actions do not result in tiny violations 170 | margin = 0.1/(state_max-state_min) 171 | u_limits = np.array([x_lower[0]+margin.item(), x_upper[0]-margin.item()]) - model_dict['a'] * state.item() - model_dict['bd'].dot(dt[0, :-1].numpy()) 172 | u_limits /= model_dict['bu'] 173 | u_limits = np.clip(u_limits, 0, 1) 174 | #pdb.set_trace() 175 | action, old_logprob = agent.select_action(mu[0], sigma_sq[0], u_limits = u_limits) 176 | agent.memory.actions.append(action.detach().clone()) 177 | agent.memory.old_logprobs.append(old_logprob.detach().clone()) 178 | 179 | SWT = 20 + 45 * action.item() 180 | if (SWT<30): 181 | HWOEN = -30 # De Facto Off 182 | action = torch.zeros_like(action) 183 | SWT = 20 184 | else: 185 | HWOEN = 30 # De Facto On 186 | if np.isnan(SWT): 187 | SWT = 20 188 | action4env = (HWOEN, SWT) 189 | 190 | # Before step 191 | print(f'{cur_time}: IAT={obs_dict["Indoor Air Temp."]}, Occupied={obs_dict["Occupancy Flag"]}, Control={SWT}') 192 | for _ in range(3): 193 | timeStep, obs, isTerminal = env.step(action4env) 194 | 195 | obs_dict = make_dict(obs_name, obs) 196 | reward = R_func(obs_dict, SWT-20, eta) 197 | 198 | # Per step 199 | real_rewards.append(reward) 200 | bl = 0#obs_2017['rewards'][cur_time] 201 | rewards.append((reward-bl) / 15) # multiplier 202 | # print(f'Reward={reward}, BL={bl}') 203 | # Save for record 204 | cur_time = start_time + pd.Timedelta(seconds = timeStep) 205 | timeStamp.append(cur_time) 206 | observations.append(obs) 207 | actions_taken.append(action4env) 208 | 209 | writer.add_scalar('Reward', np.mean(real_rewards), i_episode) 210 | writer.add_scalar('Reward_Diff', np.mean(rewards), i_episode) 211 | print("{}, reward: {}".format(cur_time, np.mean(real_rewards))) 212 | 213 | advantages = Advantage_func(rewards, args.gamma) 214 | agent.memory.advantages.append(advantages) 215 | # if -1, do not update parameters 216 | if args.update_episode == -1: 217 | agent.memory.clear_memory() 218 | elif (i_episode >0) & (i_episode % args.update_episode ==0): 219 | agent.update_parameters(sigma = sigma, K = 8) 220 | sigma = max(sigma_min, sigma-sigma_step) 221 | 222 | obs_df = pd.DataFrame(np.array(observations), index = np.array(timeStamp), columns = obs_name) 223 | obs_df = obs_df.drop(columns=ctrl_name) 224 | action_df = pd.DataFrame(np.array(actions_taken), index = np.array(timeStamp[:-1]), columns = ctrl_name) 225 | obs_df = obs_df.merge(action_df, how = 'left', right_index = True, left_index = True) 226 | obs_df.to_pickle("results/obs_"+args.exp_name+".pkl") 227 | 228 | if __name__ == '__main__': 229 | main() 230 | -------------------------------------------------------------------------------- /main_inverter.py: -------------------------------------------------------------------------------- 1 | import os, sys, argparse 2 | 3 | import numpy as np 4 | import torch 5 | import torch.nn as nn 6 | import torch.optim as optim 7 | from torch.utils.tensorboard import SummaryWriter 8 | 9 | from env.inverter import IEEE37 10 | 11 | from algo.ppo import PPO 12 | from agents.inverter_policy import Net, NeuralController 13 | from utils.inverter_utils import Replay_Memory 14 | 15 | 16 | import pdb 17 | 18 | import torch 19 | DEVICE = "cuda" if torch.cuda.is_available() else "cpu" 20 | DEVICE 21 | 22 | parser = argparse.ArgumentParser(description='GnuRL Demo: Online Learning') 23 | parser.add_argument('--gamma', type=float, default=0.98, metavar='G', 24 | help='discount factor (default: 0.98)') 25 | parser.add_argument('--seed', type=int, default=42, metavar='N', 26 | help='random seed (default: 42)') 27 | parser.add_argument('--lam', type=int, default=10, metavar='N', 28 | help='random seed (default: 42)') 29 | parser.add_argument('--lr', type=float, default=1e-3, metavar='G', 30 | help='Learning Rate') 31 | parser.add_argument('--epsilon', type=float, default=0.2, metavar='G', help='PPO Clip Parameter') 32 | parser.add_argument('--update_episode', type=int, default=4, metavar='N', 33 | help='PPO update episode (default: 1); If -1, do not update weights') 34 | parser.add_argument('--exp_name', type=str, default='inverter', 35 | help='save name') 36 | parser.add_argument('--network_name', type=str, default='ieee37', 37 | help='') 38 | args = parser.parse_args() 39 | 40 | 41 | def main(): 42 | torch.manual_seed(args.seed) 43 | writer = SummaryWriter(comment = args.exp_name) 44 | 45 | # Create Simulation Environment 46 | if args.network_name == 'ieee37': 47 | env = IEEE37() 48 | else: 49 | print("Not implemented") 50 | 51 | n_bus = env.n - 1 52 | n_inverters = len(env.gen_idx) # inverters at PV panels 53 | 54 | env_params = {'V0': env.V0[-env.n_pq:], 55 | 'P0': env.P0[-env.n_pq:], 56 | 'Q0': env.Q0[-env.n_pq:], 57 | 'H': np.hstack([env.R, env.B]), # 35 x 70 58 | 'n_bus':n_bus, # Slack bus is not controllable 59 | 'gen_idx': env.gen_idx - 1, # Excluded the slack bus 60 | 'V_upper': env.v_upper, 'V_lower': env.v_lower, 61 | 'S_rating': env.max_S, 62 | } 63 | scaler = 1000 # Note: The value for Sbus is really small; Scale up for better learning 64 | 65 | mbp_nn = Net(n_bus, n_inverters, [256, 128, 64], [16, 4]) 66 | memory = Replay_Memory() 67 | mbp_policy = NeuralController(mbp_nn, memory, args.lr, lam = args.lam, scaler = scaler, **env_params) 68 | mbp_policy = mbp_policy.to(DEVICE) 69 | 70 | # 1-week data 71 | num_steps = 900 # 15 minutes 72 | n_episodes = 7*86400//num_steps 73 | 74 | V_prev = np.zeros(n_bus) 75 | 76 | V_record = [] 77 | V_est_record = [] 78 | P_record = [] 79 | Q_record = [] 80 | 81 | for i in range(n_episodes): 82 | loss = 0 83 | violation_count = 0 84 | 85 | for k in range(num_steps): 86 | t = i*num_steps + k 87 | Sbus, P_av = env.getSbus(t) 88 | Sbus *= scaler 89 | state = np.concatenate([V_prev, np.real(Sbus), np.imag(Sbus)]) 90 | mbp_policy.memory.append((state, Sbus, P_av)) ## Everything is np.array! 91 | 92 | state = torch.tensor(state).float().unsqueeze(0) 93 | 94 | P, Q = mbp_policy(state, Sbus, P_av = P_av) 95 | #pdb.set_trace() 96 | 97 | V, success = env.step(P + 1j*Q) 98 | V_prev = V[1:] 99 | 100 | if np.any(V>env.v_upper) | np.any(V0): 113 | mbp_policy.update() 114 | 115 | writer.add_scalar("Loss", loss.mean().item(), i) 116 | writer.add_scalar("violations", violation_count, i) 117 | ## Number of Projection operation during inference time 118 | writer.add_scalar("proj_count", mbp_policy.proj_count, i) 119 | mbp_policy.proj_count = 0 120 | 121 | if (i % 20 ==0) & (i>0): 122 | np.save(f"results/V_{args.exp_name}.npy", np.array(V_record)) 123 | np.save(f"results/P_{args.exp_name}.npy", np.array(P_record)) 124 | np.save(f"results/Q_{args.exp_name}.npy", np.array(Q_record)) 125 | 126 | np.save(f"results/V_{args.exp_name}.npy", np.array(V_record)) 127 | np.save(f"results/P_{args.exp_name}.npy", np.array(P_record)) 128 | np.save(f"results/Q_{args.exp_name}.npy", np.array(Q_record)) 129 | 130 | if __name__ == '__main__': 131 | main() 132 | 133 | ''' 134 | # Example Usage of the environment 135 | t = 10 136 | Sbus = env.getSbus(t) 137 | 138 | # Solve power flow equations 139 | V, success = env.step(Sbus) 140 | print(np.abs(V)) 141 | if success == 0: 142 | print("Something is wrong") 143 | 144 | # Estimation using the linearized model 145 | V_est = env.linear_estimate(Sbus) 146 | print(V_est) 147 | ''' 148 | -------------------------------------------------------------------------------- /mypypower/newtonpf.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 1996-2015 PSERC. All rights reserved. 2 | # Use of this source code is governed by a BSD-style 3 | # license that can be found in the LICENSE file. 4 | 5 | """Solves the power flow using a full Newton's method. 6 | """ 7 | 8 | import sys 9 | 10 | from numpy import array, angle, exp, linalg, conj, r_, Inf 11 | 12 | #from numpy import hstack, vstack 13 | from scipy.sparse import hstack, vstack 14 | from scipy.sparse.linalg import spsolve 15 | 16 | from pypower.dSbus_dV import dSbus_dV 17 | from pypower.ppoption import ppoption 18 | 19 | import pdb 20 | 21 | def newtonpf(Ybus, Sbus, V0, ref, pv, pq, ppopt=None): 22 | """Solves the power flow using a full Newton's method. 23 | 24 | Solves for bus voltages given the full system admittance matrix (for 25 | all buses), the complex bus power injection vector (for all buses), 26 | the initial vector of complex bus voltages, and column vectors with 27 | the lists of bus indices for the swing bus, PV buses, and PQ buses, 28 | respectively. The bus voltage vector contains the set point for 29 | generator (including ref bus) buses, and the reference angle of the 30 | swing bus, as well as an initial guess for remaining magnitudes and 31 | angles. C{ppopt} is a PYPOWER options vector which can be used to 32 | set the termination tolerance, maximum number of iterations, and 33 | output options (see L{ppoption} for details). Uses default options if 34 | this parameter is not given. Returns the final complex voltages, a 35 | flag which indicates whether it converged or not, and the number of 36 | iterations performed. 37 | 38 | @see: L{runpf} 39 | 40 | @author: Ray Zimmerman (PSERC Cornell) 41 | """ 42 | ## default arguments 43 | if ppopt is None: 44 | ppopt = ppoption() 45 | 46 | ## options 47 | tol = ppopt['PF_TOL'] 48 | max_it = ppopt['PF_MAX_IT'] 49 | verbose = ppopt['VERBOSE'] 50 | 51 | ## initialize 52 | converged = 0 53 | i = 0 54 | V = V0 55 | Va = angle(V) 56 | Vm = abs(V) 57 | 58 | ## set up indexing for updating V 59 | pvpq = r_[pv, pq] 60 | npv = len(pv) 61 | npq = len(pq) 62 | j1 = 0; j2 = npv ## j1:j2 - V angle of pv buses 63 | j3 = j2; j4 = j2 + npq ## j3:j4 - V angle of pq buses 64 | j5 = j4; j6 = j4 + npq ## j5:j6 - V mag of pq buses 65 | 66 | ## evaluate F(x0) 67 | mis = V * conj(Ybus * V) - Sbus 68 | if npv: 69 | F = r_[ mis[pv].real, 70 | mis[pq].real, 71 | mis[pq].imag ] 72 | else: 73 | F = r_[ 74 | mis[pq].real, 75 | mis[pq].imag ] 76 | 77 | 78 | ## check tolerance 79 | normF = linalg.norm(F, Inf) 80 | if verbose > 1: 81 | sys.stdout.write('\n it max P & Q mismatch (p.u.)') 82 | sys.stdout.write('\n---- ---------------------------') 83 | sys.stdout.write('\n%3d %10.3e' % (i, normF)) 84 | if normF < tol: 85 | converged = 1 86 | if verbose > 1: 87 | sys.stdout.write('\nConverged!\n') 88 | 89 | ## do Newton iterations 90 | while (not converged and i < max_it): 91 | ## update iteration counter 92 | i = i + 1 93 | 94 | ## evaluate Jacobian 95 | dS_dVm, dS_dVa = dSbus_dV(Ybus, V) 96 | # pdb.set_trace() 97 | J11 = dS_dVa[array([pvpq]).T, pvpq].real 98 | J12 = dS_dVm[array([pvpq]).T, pq].real 99 | J21 = dS_dVa[array([pq]).T, pvpq].imag 100 | J22 = dS_dVm[array([pq]).T, pq].imag 101 | #pdb.set_trace() 102 | J = vstack([hstack([J11, J12]),hstack([J21, J22])], format="csr") 103 | 104 | ## compute update step 105 | dx = -1 * spsolve(J, F) 106 | # pdb.set_trace() 107 | ## update voltage 108 | if npv: 109 | Va[pv] = Va[pv] + dx[j1:j2] 110 | if npq: 111 | Va[pq] = Va[pq] + dx[j3:j4] 112 | Vm[pq] = Vm[pq] + dx[j5:j6] 113 | V = Vm * exp(1j * Va) 114 | Vm = abs(V) ## update Vm and Va again in case 115 | Va = angle(V) ## we wrapped around with a negative Vm 116 | 117 | ## evalute F(x) 118 | mis = V * conj(Ybus * V) - Sbus 119 | if npv: 120 | F = r_[ mis[pv].real, 121 | mis[pq].real, 122 | mis[pq].imag ] 123 | else: 124 | F = r_[ 125 | mis[pq].real, 126 | mis[pq].imag ] 127 | 128 | ## check for convergence 129 | normF = linalg.norm(F, Inf) 130 | if verbose > 1: 131 | sys.stdout.write('\n%3d %10.3e' % (i, normF)) 132 | if normF < tol: 133 | converged = 1 134 | if verbose: 135 | sys.stdout.write("\nNewton's method power flow converged in " 136 | "%d iterations.\n" % i) 137 | 138 | if verbose: 139 | if not converged: 140 | sys.stdout.write("\nNewton's method power did not converge in %d " 141 | "iterations.\n" % i) 142 | 143 | return V, converged, i 144 | -------------------------------------------------------------------------------- /network/IEEE-37/Ybus.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/INFERLab/PROF/be7f77f606d8c7d6505d4b2bad2d09760e9bafe9/network/IEEE-37/Ybus.mat -------------------------------------------------------------------------------- /network/IEEE-37_linearized/B.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/INFERLab/PROF/be7f77f606d8c7d6505d4b2bad2d09760e9bafe9/network/IEEE-37_linearized/B.mat -------------------------------------------------------------------------------- /network/IEEE-37_linearized/R.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/INFERLab/PROF/be7f77f606d8c7d6505d4b2bad2d09760e9bafe9/network/IEEE-37_linearized/R.mat -------------------------------------------------------------------------------- /network/bracket.m: -------------------------------------------------------------------------------- 1 | function BRX = bracket(X) 2 | 3 | BRX = [real(X), -imag(X); imag(X), real(X)]; 4 | 5 | -------------------------------------------------------------------------------- /network/extract_phase_37feeder.m: -------------------------------------------------------------------------------- 1 | function [P,Q,Y] = extract_phase_37feeder(phase,Zbase,Sbase) 2 | 3 | Nnode = 36; 4 | Ybase = 1/Zbase; 5 | %25 loads 6 | P_l = [0 0 0; 7 | 140 140 350; 8 | 0 0 0; 9 | 0 0 0; 10 | 0 0 85; 11 | 8 85 0; 12 | 0 0 85; 13 | 0 0 0; 14 | 17 21 0; 15 | 85 0 0; 16 | 0 0 85; 17 | 0 0 0; 18 | 0 42 0; 19 | 0 140 21; 20 | 0 0 0; 21 | 0 42 0; 22 | 0 0 0; 23 | 0 0 42; 24 | 42 0 0; 25 | 42 0 0; 26 | 42 42 42; 27 | 0 0 85; 28 | 0 0 0; 29 | 0 85 0; 30 | 0 0 0; 31 | 0 0 42; 32 | 85 0 0; 33 | 0 0 42; 34 | 140 0 0; 35 | 126 0 0; 36 | 0 0 0; 37 | 0 0 42; 38 | 0 0 85; 39 | 0 0 0; 40 | 0 42 0; 41 | 0 0 85].'; 42 | 43 | Q_l = [0 0 0; 44 | 70 70 175; 45 | 0 0 0; 46 | 0 0 0; 47 | 0 0 40; 48 | 4 40 0; 49 | 0 0 40; 50 | 0 0 0; 51 | 8 10 0; 52 | 40 0 0; 53 | 0 0 40; 54 | 0 0 0; 55 | 0 21 0; 56 | 0 70 10; 57 | 0 0 0; 58 | 0 21 0; 59 | 0 0 0; 60 | 0 0 21; 61 | 21 0 0; 62 | 21 0 0; 63 | 21 21 21 ; 64 | 0 0 40; 65 | 0 0 0; 66 | 0 40 0; 67 | 0 0 0; 68 | 0 0 21; 69 | 40 0 0; 70 | 0 0 21; 71 | 70 0 0; 72 | 62 0 0; 73 | 0 0 0; 74 | 0 0 21; 75 | 0 0 40; 76 | 0 0 0; 77 | 0 21 0; 78 | 0 0 40].'; 79 | 80 | 81 | 82 | %------------------------------------------------------------------------ 83 | % impedance matrix 84 | %------------------------------------------------------------------------ 85 | 86 | % Configuration 721 87 | Zs1 = [0.2926+0.1973i 0.0673-0.0368i 0.0337-0.0417i; 88 | 0.0673-0.0368i 0.2646+0.1900i 0.0673-0.0368i; 89 | 0.0337-0.0417i 0.0673-0.0368i 0.2926+0.1973i]./Zbase; 90 | Ys1 = sqrt(-1)*159.7919*(10^-6).*eye(3)./Ybase; 91 | 92 | % Configuration 722 93 | Zs2 = [0.4751+0.2973i 0.1629-0.0326i 0.1234-0.0607i; 94 | 0.1629-0.0326i 0.4488+0.2678i 0.1629-0.0326i; 95 | 0.1234-0.0607i 0.1629-0.0326i 0.4751+0.2973i]./Zbase; 96 | Ys2 = sqrt(-1)*127.8306*(10^-6).*eye(3)./Ybase; 97 | 98 | % Configuration 723 99 | Zs3 = [1.2936+0.6713i 0.4871+0.2111i 0.4585+0.1521i; 100 | 0.4871+0.2111i 1.3022+0.6326i 0.4871+0.2111i; 101 | 0.4585+0.1521i 1.2936+0.6713i 1.2936+0.6713i]./Zbase; 102 | Ys3 = sqrt(-1)*74.8405*(10^-6).*eye(3)./Ybase; 103 | 104 | % Configuration 724 105 | Zs4 = [2.0952+0.7758i 0.5204+0.2738i 0.4926+0.2123i; 106 | 0.5204+0.2738i 2.1068+0.7398i 0.5204+0.2738i; 107 | 0.4926+0.2123i 0.5204+0.2738i 2.0952+0.7758i]./Zbase; 108 | Ys4 = sqrt(-1)*60.2483*(10^-6).*eye(3)./Ybase; 109 | 110 | 111 | %-------------------- 112 | % line matrices 113 | %-------------------- 114 | 115 | % mile = 5280 feet 116 | convfm = (1/5280); 117 | 118 | Z12 = Zs1*(1850)*convfm; 119 | Z12i = pinv(Z12); 120 | Y12 = .5.*Ys1*(1850)*convfm; 121 | 122 | Z23 = Zs2*(960)*convfm; 123 | Z23i = pinv(Z23); 124 | Y23 = .5.*Ys2*(960)*convfm; 125 | 126 | Z34 = Zs4*(400)*convfm; 127 | Z34i = pinv(Z34); 128 | Y34 = .5.*Ys4*(400)*convfm; 129 | 130 | Z45 = Zs4*(240)*convfm; 131 | Z45i = pinv(Z45); 132 | Y45 = .5.*Ys4*(240)*convfm; 133 | 134 | Z46 = Zs4*(320)*convfm; 135 | Z46i = pinv(Z46); 136 | Y46 = .5.*Ys4*(320)*convfm; 137 | 138 | Z37 = Zs3*(360)*convfm; 139 | Z37i = pinv(Z37); 140 | Y37 = .5.*Ys3*(360)*convfm; 141 | 142 | Z78 = Zs3*(520)*convfm; 143 | Z78i = pinv(Z78); 144 | Y78 = .5.*Ys3*(520)*convfm; 145 | 146 | Z89 = Zs4*(80)*convfm; 147 | Z89i = pinv(Z89); 148 | Y89 = .5.*Ys4*(80)*convfm; 149 | 150 | Z910 = Zs4*(520)*convfm; 151 | Z910i = pinv(Z910); 152 | Y910 = .5.*Ys4*(520)*convfm; 153 | 154 | Z811 = Zs3*(800)*convfm; 155 | Z811i = pinv(Z811); 156 | Y811 = .5.*Ys3*(800)*convfm; 157 | 158 | Z1112 = Zs4*(920)*convfm; 159 | Z1112i = pinv(Z1112); 160 | Y1112 = .5.*Ys4*(920)*convfm; 161 | 162 | Z1213 = Zs4*(760)*convfm; 163 | Z1213i = pinv(Z1213); 164 | Y1213 = .5.*Ys4*(760)*convfm; 165 | 166 | Z1214 = Zs4*(120)*convfm; 167 | Z1214i = pinv(Z1214); 168 | Y1214 = .5.*Ys4*(120)*convfm; 169 | 170 | Z1115 = Zs3*(600)*convfm; 171 | Z1115i = pinv(Z1115); 172 | Y1115 = .5.*Ys3*(600)*convfm; 173 | 174 | Z1516 = Zs4*(280)*convfm; 175 | Z1516i = pinv(Z1516); 176 | Y1516 = .5.*Ys4*(280)*convfm; 177 | 178 | Z317 = Zs2*(1320)*convfm; 179 | Z317i = pinv(Z317); 180 | Y317 = .5.*Ys2*(1320)*convfm; 181 | 182 | Z1722 = Zs3*(600)*convfm; 183 | Z1722i = pinv(Z1722); 184 | Y1722 = .5.*Ys3*(600)*convfm; 185 | 186 | Z2223 = Zs3*(200)*convfm; 187 | Z2223i = pinv(Z2223); 188 | Y2223 = .5.*Ys3*(200)*convfm; 189 | 190 | Z1718 = Zs4*(240)*convfm; 191 | Z1718i = pinv(Z1718); 192 | Y1718 = .5.*Ys4*(240)*convfm; 193 | 194 | Z1819 = Zs3*(280)*convfm; 195 | Z1819i = pinv(Z1819); 196 | Y1819 = .5.*Ys3*(280)*convfm; 197 | 198 | Z1920 = Zs4*(280)*convfm; 199 | Z1920i = pinv(Z1920); 200 | Y1920 = .5.*Ys4*(280)*convfm; 201 | 202 | Z1921 = Zs4*(200)*convfm; 203 | Z1921i = pinv(Z1921); 204 | Y1921 = .5.*Ys4*(200)*convfm; 205 | 206 | Z2324 = Zs3*(600)*convfm; 207 | Z2324i = pinv(Z2324); 208 | Y2324 = .5.*Ys3*(600)*convfm; 209 | 210 | Z2325 = Zs3*(320)*convfm; 211 | Z2325i = pinv(Z2325); 212 | Y2325 = .5.*Ys3*(320)*convfm; 213 | 214 | Z2526 = Zs4*(320)*convfm; 215 | Z2526i = pinv(Z2526); 216 | Y2526 = .5.*Ys4*(320)*convfm; 217 | 218 | Z2527 = Zs3*(320)*convfm; 219 | Z2527i = pinv(Z2527); 220 | Y2527 = .5.*Ys3*(320)*convfm; 221 | 222 | Z2728 = Zs3*(560)*convfm; 223 | Z2728i = pinv(Z2728); 224 | Y2728 = .5.*Ys3*(560)*convfm; 225 | 226 | Z2829 = Zs3*(640)*convfm; 227 | Z2829i = pinv(Z2829); 228 | Y2829 = .5.*Ys3*(640)*convfm; 229 | 230 | Z2930 = Zs3*(400)*convfm; 231 | Z2930i = pinv(Z2930); 232 | Y2930 = .5.*Ys3*(400)*convfm; 233 | 234 | Z3031 = Zs3*(400)*convfm; 235 | Z3031i = pinv(Z3031); 236 | Y3031 = .5.*Ys3*(400)*convfm; 237 | 238 | Z3132 = Zs3*(400)*convfm; 239 | Z3132i = pinv(Z3132); 240 | Y3132 = .5.*Ys3*(400)*convfm; 241 | 242 | Z3133 = Zs4*(200)*convfm; 243 | Z3133i = pinv(Z3133); 244 | Y3133 = .5.*Ys4*(200)*convfm; 245 | 246 | Z2834 = Zs4*(520)*convfm; 247 | Z2834i = pinv(Z2834); 248 | Y2834 = .5.*Ys4*(520)*convfm; 249 | 250 | Z3435 = Zs4*(1280)*convfm; 251 | Z3435i = pinv(Z3435); 252 | Y3435 = .5.*Ys4*(1280)*convfm; 253 | 254 | Z3436 = Zs4*(200)*convfm; 255 | Z3436i = pinv(Z3436); 256 | Y3436 = .5.*Ys4*(200)*convfm; 257 | 258 | 259 | % network admittance matrix 260 | oo = zeros(3); 261 | 262 | Y_net = [Z12i+Y12 -Z12i zeros(3,3*(Nnode-2)); 263 | -Z12i Z12i+Z23i+Y12+Y23 -Z23i zeros(3,3*(Nnode-3)); 264 | oo -Z23i Z23i+Z34i+Z37i+Z317i+Y23+Y34+Y37+Y317 -Z34i oo oo -Z37i oo oo oo oo oo oo oo oo oo -Z317i zeros(3,3*(Nnode-17)); 265 | oo oo -Z34i Z34i+Z45i+Z46i+Y34+Y45+Y46 -Z45i -Z46i zeros(3,3*(Nnode-6)); 266 | oo oo oo -Z45i Z45i+Y45 zeros(3,3*(Nnode-5)); 267 | oo oo oo -Z46i oo Z46i+Y46 zeros(3,3*(Nnode-6)); 268 | oo oo -Z37i oo oo oo Z37i+Z78i+Y37+Y78 -Z78i zeros(3,3*(Nnode-8)); 269 | oo oo oo oo oo oo -Z78i Z78i+Y78+Z89i+Y89+Z811i+Y811 -Z89i oo -Z811i zeros(3,3*(Nnode-11)); 270 | zeros(3,3*7) -Z89i Z89i+Y89+Z910i+Y910 -Z910i zeros(3,3*(Nnode-10)); 271 | zeros(3,3*8) -Z910i Z910i+Y910 zeros(3,3*(Nnode-10)); 272 | zeros(3,3*7) -Z811i oo oo Z811i+Y811+Z1112i+Y1112+Z1115i+Y1115 -Z1112i oo oo -Z1115i zeros(3,3*(Nnode-15)); 273 | zeros(3,3*10) -Z1112i Z1112i+Z1213i+Z1214i+Y1112+Y1213+Y1214 -Z1213i -Z1214i zeros(3,3*(Nnode-14)); 274 | zeros(3,3*11) -Z1213i Z1213i+Y1213 zeros(3,3*(Nnode-13)); 275 | zeros(3,3*11) -Z1214i oo Z1214i+Y1214 zeros(3,3*(Nnode-14)) 276 | zeros(3,3*10) -Z1115i oo oo oo Z1115i+Z1516i+Y1115+Y1516 -Z1516i zeros(3,3*(Nnode-16)); 277 | zeros(3,3*14) -Z1516i Z1516i+Y1516 zeros(3,3*(Nnode-16)) 278 | oo oo -Z317i zeros(3,3*13) Z317i+Y317+Z1718i+Y1718+Z1722i+Y1722 -Z1718i oo oo oo -Z1722i zeros(3,3*(Nnode-22)); 279 | zeros(3,3*16) -Z1718i Z1718i+Y1718+Z1819i+Y1819 -Z1819i zeros(3,3*(Nnode-19)); 280 | zeros(3,3*17) -Z1819i Z1819i+Y1819+Z1920i+Y1920+Z1921i+Y1921 -Z1920i -Z1921i zeros(3,3*(Nnode-21)); 281 | zeros(3,3*18) -Z1920i Z1920i+Y1920 zeros(3,3*(Nnode-20)); 282 | zeros(3,3*18) -Z1921i oo Z1921i+Y1921 zeros(3,3*(Nnode-21)); 283 | zeros(3,3*16) -Z1722i oo oo oo oo Z1722i+Y1722+Z2223i+Y2223 -Z2223i zeros(3,3*(Nnode-23)); 284 | zeros(3,3*21) -Z2223i Z2223i+Y2223+Z2324i+Y2324+Z2325i+Y2325 -Z2324i -Z2325i zeros(3,3*(Nnode-25)); 285 | zeros(3,3*22) -Z2324i Z2324i+Y2324 zeros(3,3*(Nnode-24)); 286 | zeros(3,3*22) -Z2325i oo Z2325i+Y2325+Z2526i+Y2526+Z2527i+Y2527 -Z2526i -Z2527i zeros(3,3*(Nnode-27)); 287 | zeros(3,3*24) -Z2526i Z2526i+Y2526 zeros(3,3*(Nnode-26)); 288 | zeros(3,3*24) -Z2527i oo Z2527i+Y2527+Z2728i+Y2728 -Z2728i zeros(3,3*(Nnode-28)); 289 | zeros(3,3*26) -Z2728i Z2728i+Y2728+Z2829i+Y2829+Z2834i+Y2834 -Z2829i oo oo oo oo -Z2834i zeros(3,3*(Nnode-34)); 290 | zeros(3,3*27) -Z2829i Z2829i+Y2829+Z2930i+Y2930 -Z2930i zeros(3,3*(Nnode-30)); 291 | zeros(3,3*28) -Z2930i Z2930i+Y2930+Z3031i+Y3031 -Z3031i zeros(3,3*(Nnode-31)); 292 | zeros(3,3*29) -Z3031i Z3031i+Y3031+Z3132i+Y3132+Z3133i+Y3133 -Z3132i -Z3133i zeros(3,3*(Nnode-33)) 293 | zeros(3,3*30) -Z3132i Z3132i+Y3132 zeros(3,3*(Nnode-32)); 294 | zeros(3,3*30) -Z3133i oo Z3133i+Y3133 zeros(3,3*(Nnode-33)); 295 | zeros(3,3*27) -Z2834i zeros(3,3*5) Z2834i+Y2834+Z3435i+Y3435+Z3436i+Y3436 -Z3435i -Z3436i; 296 | zeros(3,3*33) -Z3435i Z3435i+Y3435 oo; 297 | zeros(3,3*33) -Z3436i oo Z3436i+Y3436]; 298 | 299 | 300 | P = P_l(phase,:); 301 | Q = Q_l(phase,:); 302 | Y = Y_net(phase:3:end,phase:3:end); 303 | 304 | fac = Sbase/1000; 305 | 306 | P = P./fac; 307 | Q = Q./fac; 308 | 309 | 310 | end 311 | -------------------------------------------------------------------------------- /network/ieee37.m: -------------------------------------------------------------------------------- 1 | % This code is modified from to linearize IEEE 37-bus feeder system. 2 | % 3 | % S. Bolognani, F. Dörfler (2015) 4 | % "Fast power system analysis via implicit linearization of the power flow manifold." 5 | % In Proc. 53rd Annual Allerton Conference on Communication, Control, and Computing. 6 | % Preprint available at http://control.ee.ethz.ch/~bsaverio/papers/BolognaniDorfler_Allerton2015.pdf 7 | % 8 | % This source code is distributed in the hope that it will be useful, but without any warranty. 9 | % 10 | % MatLab OR GNU Octave, version 3.8.1 available at http://www.gnu.org/software/octave/ 11 | % MATPOWER 5.1 available at http://www.pserc.cornell.edu/matpower/ 12 | 13 | clear all 14 | close all 15 | clc 16 | 17 | % Load grid model 18 | %Vbase = 4160/sqrt(3); 19 | %Sbase = 5e6; 20 | %Zbase = Vbase^2/Sbase; 21 | Zbase = 1; 22 | Vbase = 4800; 23 | Sbase = (Vbase^2)/Zbase; 24 | 25 | phase = 1; 26 | 27 | [Pbus, Qbus, Ybus] = extract_phase_37feeder(phase, Zbase, Sbase); 28 | Sbus = complex(Pbus, Qbus); 29 | n = size(Ybus, 1); 30 | %% 31 | % Compute exact solution via MatPower 32 | ref_idx = [1]; 33 | pv_idx = [];%[4, 7, 9. 10, 11, 13, 16, 17, 20, 22, 23, 26, 28, 29, 30, 31, 32, 33, 34, 35, 36]; 34 | pq_idx =[2:36];%[2, 3, 5, 6, 8, 12, 14, 15, 16, 18, 19, 21, 24, 25, 27]; 35 | V0 = ones(n,1); 36 | %[results, success, i] = gausspf(Ybus, Sbus, V0, ref_idx, pv_idx, pq_idx, mpoption('VERBOSE', 1, 'OUT_ALL',0)); 37 | 38 | %% 39 | %%%%% LINEARIZED MODEL %%%%% 40 | 41 | %%%%% Linearization point (given voltage magnitude and angle) 42 | %Vbus = NaN(n,1); 43 | %Vbus(mpc.gen(:,GEN_BUS)) = mpc.gen(:,VG); 44 | Vbus = ones(n,1); 45 | 46 | % Flat voltage profile 47 | V0 = ones(n,1); 48 | A0 = zeros(n,1); 49 | 50 | % Corresponding current injection 51 | J0 = Ybus*(V0.*exp(1j*A0)); 52 | 53 | % Corresponding power injection 54 | S0 = V0.*exp(1j*A0).*conj(J0); 55 | P0 = real(S0); 56 | Q0 = imag(S0); 57 | 58 | %%%%% Linear system of equations for the grid model 59 | 60 | UU = bracket(diag(V0.*exp(1j*A0))); 61 | JJ = bracket(diag(conj(J0))); 62 | NN = Nmatrix(2*n); 63 | YY = bracket(Ybus); 64 | PP = Rmatrix(ones(n,1), zeros(n,1)); 65 | 66 | AA = zeros(2*n,4*n); 67 | BB = zeros(2*n,1); 68 | 69 | V_OFFSET = 0; 70 | A_OFFSET = 1*n; 71 | P_OFFSET = 2*n; 72 | Q_OFFSET = 3*n; 73 | 74 | % bus models 75 | 76 | for bus = 1:n 77 | row = 2*(bus-1)+1; 78 | if (any(bus == pq_idx(:))) 79 | AA(row,P_OFFSET+bus) = 1; 80 | AA(row+1,Q_OFFSET+bus) = 1; 81 | BB(row) = Pbus(bus) - P0(bus); 82 | BB(row+1) = Qbus(bus) - Q0(bus); 83 | elseif (any(bus == pv_idx(:))) 84 | AA(row,P_OFFSET+bus) = 1; 85 | AA(row+1,V_OFFSET+bus) = 1; 86 | BB(row) = Pbus(bus) - P0(bus); 87 | BB(row+1) = Vbus(bus) - V0(bus); 88 | elseif (any(bus == ref_idx(:))) 89 | AA(row,V_OFFSET+bus) = 1; 90 | AA(row+1,A_OFFSET+bus) = 1; 91 | BB(row) = Vbus(bus) - V0(bus); 92 | BB(row+1) = 0 - A0(bus); 93 | end 94 | end 95 | 96 | Agrid = [(JJ + UU*NN*YY)*PP -eye(2*n)]; 97 | Amat = [Agrid; AA]; 98 | Bmat = [zeros(2*n,1); BB]; 99 | 100 | x = Amat\Bmat; 101 | 102 | approxVM = V0 + x(1:n); 103 | approxVA = (A0 + x(n+1:2*n))/pi*180; 104 | 105 | 106 | %% 107 | % Check my implementation is correct 108 | A11 = (JJ + UU*NN*YY)*PP; 109 | A21 = AA(:, 1:2*n); 110 | A22 = AA(:, 2*n+1:4*n); 111 | 112 | n_new = n-1; 113 | 114 | delta_P = reshape(Pbus(2:end), n_new, 1)-P0(2:end); 115 | delta_Q = reshape(Qbus(2:end), n_new, 1)-Q0(2:end); 116 | 117 | % remove the first bus; 118 | A11(n+1, :) = []; 119 | A11(:, n+1) = []; 120 | A11(1, :) = []; 121 | A11(:, 1) = []; 122 | 123 | x_hat = inv(A11) * [delta_P; delta_Q]; 124 | %x_hat = pinv([A11; A21]) * ([eye(2*n); -A22] * [Pbus.'; Qbus.'] + Bmat); 125 | 126 | myVM = V0(2:end) + x_hat(1:n_new); 127 | myVA = (A0(2:end) + x_hat(n_new+1:2*n_new))/pi*180; 128 | 129 | subplot(211) 130 | %plot(1:n, approxVM, 'k*') 131 | plot(2:n, myVM(1:end), 'ko', 1:n, approxVM, 'k*') 132 | %plot(1:n, results.bus(:,VM), 'ko', 1:n, approxVM, 'k*') 133 | ylabel('magnitudes [p.u.]') 134 | xlim([0 n]) 135 | 136 | subplot(212) 137 | %plot(1:n, approxVA, 'k*') 138 | plot(2:n, myVA, 'ko', 1:n, approxVA, 'k*') 139 | %plot(1:n, results.bus(:,VA), 'ko', 1:n, approxVA, 'k*') 140 | %ylabel('angles [deg]') 141 | xlim([0 n]) 142 | %% 143 | H = inv(A11); 144 | R = H(1:n_new, 1:n_new); 145 | B = H(1:n_new, n_new+1:2*n_new); 146 | 147 | 148 | -------------------------------------------------------------------------------- /run_exp1.sh: -------------------------------------------------------------------------------- 1 | 2 | python main_IW.py --exp_name test_w-aux_0 --lam 10 --seed 42 3 | python main_IW.py --exp_name test_w-aux_1 --lam 10 --seed 0 4 | python main_IW.py --exp_name test_w-aux_2 --lam 10 --seed 105 5 | python main_IW.py --exp_name test_w-aux_3 --lam 10 --seed 7 6 | python main_IW.py --exp_name test_w-aux_4 --lam 100 --seed 59 7 | 8 | python IW_ablation.py --exp_name vanilla-update_w-aux_0 --lam 10 --seed 42 9 | python IW_ablation.py --exp_name vanilla-update_w-aux_1 --lam 10 --seed 0 10 | python IW_ablation.py --exp_name vanilla-update_w-aux_2 --lam 10 --seed 105 11 | python IW_ablation.py --exp_name vanilla-update_w-aux_3 --lam 10 --seed 37 12 | python IW_ablation.py --exp_name vanilla-update_w-aux_4 --lam 10 --seed 49 13 | 14 | #python main_IW.py --exp_name clip_no-update --lam 0 --seed 42 --update_episode -1 15 | 16 | 17 | #python main_IW.py --exp_name nn-w-proj_0 --lam 0 --seed 42 18 | #python main_IW.py --exp_name nn-w-proj_1 --lam 0 --seed 0 19 | #python main_IW.py --exp_name nn-w-proj_2 --lam 0 --seed 15 20 | #python main_IW.py --exp_name nn-w-proj_3 --lam 0 --seed 37 21 | #python main_IW.py --exp_name nn-w-proj_4 --lam 0 --seed 49 22 | 23 | #python IW_ablation.py --exp_name _0 --lam 0 --seed 42 24 | #python IW_ablation.py --exp_name vannilla-update_2 --lam 0 --seed 0 25 | #python IW_ablation.py --exp_name vannilla-update_3 --lam 0 --seed 15 26 | #python IW_ablation.py --exp_name vannilla-update_4 --lam 0 --seed 37 27 | #python IW_ablation.py --exp_name vannilla-update_5 --lam 0 --seed 49 28 | -------------------------------------------------------------------------------- /utils/inverter_utils.py: -------------------------------------------------------------------------------- 1 | # Helper Functions 2 | import numpy as np 3 | import torch 4 | import torch.utils.data as data 5 | import pdb 6 | 7 | class Replay_Memory(): 8 | def __init__(self, memory_size=86400): 9 | self.memory_size = memory_size 10 | self.storage = [] 11 | 12 | def sample_batch(self, batch_size=32): 13 | # This function returns a batch of randomly sampled transitions - i.e. state, action, reward, next state, terminal flag tuples. 14 | # You will feed this to your model to train. 15 | rand_idx = np.random.choice(len(self.storage), batch_size) 16 | batch = [self.storage[i] for i in rand_idx] 17 | 18 | state = [transition[0] for transition in batch] 19 | Sbus = [transition[1] for transition in batch] 20 | P_av = [transition[2] for transition in batch] 21 | return torch.tensor(np.stack(state)).float(), np.stack(Sbus), np.stack(P_av) 22 | 23 | def append(self, transition): 24 | # appends transition to the memory. 25 | self.storage.append(transition) 26 | # only keeps the latest memory_size transitions 27 | if len(self.storage) > self.memory_size: 28 | self.storage = self.storage[-self.memory_size:] 29 | 30 | 31 | -------------------------------------------------------------------------------- /utils/network.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.utils.data as data 4 | import numpy as np 5 | 6 | # Implement a vanilla MLP here 7 | class MLP(nn.Module): 8 | def __init__(self, input_size, hiddens, output_size): 9 | super(MLP, self).__init__() 10 | self.n_layers = len(hiddens) 11 | self.layers = [] 12 | tmp = [input_size] + hiddens 13 | 14 | for i in range(self.n_layers): 15 | self.layers.append(nn.Linear(tmp[i], tmp[i+1])) 16 | self.layers.append(nn.ReLU()) 17 | # self.layers.append(nn.BatchNorm1d(tmp[i+1])) 18 | self.layers.append(nn.Linear(tmp[-1], output_size)) 19 | self.layers = nn.ModuleList(self.layers) 20 | 21 | def forward(self,x): 22 | out = x 23 | for i, l in enumerate(self.layers): 24 | out = l(out) 25 | return out 26 | 27 | 28 | class LSTM(nn.Module): 29 | def __init__(self, n_state, n_action, n_dist, lstm_hidden = 8, hiddens = [4], lstm_layer = 2, bi = False): 30 | super(LSTM, self).__init__() 31 | 32 | self.rnn = nn.LSTM(n_dist, lstm_hidden, lstm_layer, dropout = 0, bidirectional = bi) 33 | if bi: 34 | self.n_direction = 2 35 | else: 36 | self.n_direction = 1 37 | 38 | self.lstm_hidden = lstm_hidden 39 | self.lstm_layer = lstm_layer 40 | 41 | self.encoder1 = nn.Sequential( 42 | nn.Linear(n_state, 4), 43 | nn.ReLU(), 44 | #nn.BatchNorm1d(32), 45 | nn.Linear(4, lstm_hidden*self.n_direction*self.lstm_layer), 46 | nn.ReLU()) 47 | 48 | self.encoder2 = nn.Sequential( 49 | nn.Linear(n_state, 4), 50 | nn.ReLU(), 51 | #nn.BatchNorm1d(32), 52 | nn.Linear(4, lstm_hidden * self.n_direction*self.lstm_layer), 53 | nn.ReLU()) 54 | 55 | n_layers = len(hiddens) + 1 56 | tmp = [self.n_direction * lstm_hidden] + hiddens #+ [n_action] 57 | 58 | self.decoder = [] 59 | for i in range(n_layers-1): 60 | self.decoder.append(nn.Linear(tmp[i], tmp[i+1])) 61 | self.decoder.append(nn.ReLU()) 62 | self.decoder = nn.ModuleList(self.decoder) 63 | 64 | # mu and sigma2 are learned separately 65 | self.final_layer = nn.Linear(tmp[-1], n_action) 66 | self.final_layer_ = nn.Linear(tmp[-1], n_action) 67 | 68 | def forward(self, state, disturbance): 69 | # state: n x dim 70 | # disturbance: T x n x dist 71 | n = state.shape[0] 72 | T = disturbance.shape[0] 73 | 74 | h0 = self.encoder1(state).reshape(n, self.n_direction*self.lstm_layer, self.lstm_hidden).transpose(0, 1) # (layer x direction) x n x Dim. 75 | c0 = self.encoder2(state).reshape(n, self.n_direction*self.lstm_layer, self.lstm_hidden).transpose(0, 1) 76 | 77 | out, (hn, cn) = self.rnn(disturbance, (h0, c0)) # out: T x n x (lstm_hidden x n_direction) 78 | #print("line 176") 79 | out = out.reshape(T * n, self.lstm_hidden * self.n_direction) 80 | for layer in self.decoder: 81 | out = layer(out) 82 | mu = self.final_layer(out).reshape(T, n, -1) 83 | sigma_sq = self.final_layer_(out).reshape(T, n, -1) 84 | # out: (T x n) x n_action 85 | return mu, sigma_sq 86 | 87 | ''' 88 | class Replay_Memory(): 89 | def __init__(self, memory_size=288, burn_in=32): 90 | self.memory_size = memory_size 91 | self.burn_in = burn_in 92 | # the memory is as a list of transitions (S,A,R,S,D). 93 | self.storage = [] 94 | 95 | def sample_batch(self, batch_size=32): 96 | # This function returns a batch of randomly sampled transitions - i.e. state, action, reward, next state, terminal flag tuples. 97 | # You will feed this to your model to train. 98 | rand_idx = np.random.choice(len(self.storage), batch_size) 99 | return [self.storage[i] for i in rand_idx] 100 | 101 | def append(self, transition): 102 | # appends transition to the memory. 103 | self.storage.append(transition) 104 | # only keeps the latest memory_size transitions 105 | if len(self.storage) > self.memory_size: 106 | self.storage = self.storage[-self.memory_size:] 107 | ''' 108 | -------------------------------------------------------------------------------- /utils/ppo_utils.py: -------------------------------------------------------------------------------- 1 | # Helper Functions 2 | import numpy as np 3 | import torch 4 | import torch.utils.data as data 5 | import pdb 6 | 7 | def make_dict(obs_name, obs): 8 | zipbObj = zip(obs_name, obs) 9 | return dict(zipbObj) 10 | 11 | def R_func(obs_dict, action, eta): 12 | reward = - action#- 0.5 * eta[int(obs_dict["Occupancy Flag"])] * (obs_dict["Indoor Air Temp."] - obs_dict["Indoor Temp. Setpoint"] - 1)**2 13 | return reward#.item() 14 | 15 | # Calculate the advantage estimate 16 | def Advantage_func(rewards, gamma): 17 | R = torch.zeros(1, 1).double() 18 | T = len(rewards) 19 | advantage = torch.zeros((T,1)).double() 20 | 21 | for i in reversed(range(len(rewards))): 22 | R = gamma * R + rewards[i] 23 | advantage[i] = R 24 | return advantage 25 | 26 | class Dataset(data.Dataset): 27 | def __init__(self, states, actions, disturbance, advantages, old_logprobs, x_uppers, x_lowers): 28 | self.states = states 29 | self.actions = actions 30 | self.disturbance = disturbance 31 | self.advantages = advantages 32 | self.old_logprobs = old_logprobs 33 | self.x_uppers = x_uppers 34 | self.x_lowers = x_lowers 35 | 36 | def __len__(self): 37 | return len(self.states) 38 | 39 | def __getitem__(self, index): 40 | return self.states[index], self.actions[index], self.disturbance[index], self.advantages[index], self.old_logprobs[index], self.x_uppers[index], self.x_lowers[index] 41 | 42 | class Replay_Memory(): 43 | def __init__(self, ): 44 | self.advantages = [] 45 | self.states = [] 46 | self.old_logprobs = [] 47 | self.actions = [] 48 | self.disturbance = [] # T x n_dist 49 | self.x_uppers = [] 50 | self.x_lowers = [] 51 | 52 | def clear_memory(self, ): 53 | self.advantages = [] 54 | self.states = [] 55 | self.old_logprobs = [] 56 | self.actions = [] 57 | self.disturbance = [] 58 | self.x_uppers = [] 59 | self.x_lowers = [] 60 | 61 | def sample(self): 62 | states = torch.vstack(self.states) 63 | actions = torch.vstack(self.actions) 64 | advantages = torch.vstack(self.advantages).reshape(-1) 65 | old_logprobs = torch.vstack(self.old_logprobs).reshape(-1) 66 | disturbance = torch.stack(self.disturbance) # n x T x dist 67 | x_uppers = torch.vstack(self.x_uppers) 68 | x_lowers = torch.vstack(self.x_lowers) 69 | self.clear_memory() 70 | 71 | return states, actions, disturbance, advantages, old_logprobs, x_uppers, x_lowers 72 | 73 | 74 | --------------------------------------------------------------------------------