├── .gitignore
├── IW_baselines
    └── IW_ablation.py
├── LICENSE
├── README.md
├── agents
    ├── base.py
    ├── inverter_policy.py
    └── nn_policy.py
├── algo
    └── ppo.py
├── data
    ├── ARX-0
    ├── data_2017_baseline.pkl
    ├── data_TMY3_baseline.pkl
    └── param_IW-nn-1800
├── docs
    ├── figs
    │   ├── framework.pdf
    │   └── framework.png
    └── slides.pdf
├── env
    └── inverter.py
├── environment.yml
├── inverter_baselines
    ├── inverter_QP.py
    ├── inverter_acopf.py
    ├── inverter_no-control.py
    └── inverter_volt-var.py
├── main_IW.py
├── main_inverter.py
├── mypypower
    └── newtonpf.py
├── network
    ├── IEEE-37
    │   └── Ybus.mat
    ├── IEEE-37_linearized
    │   ├── B.mat
    │   └── R.mat
    ├── bracket.m
    ├── extract_phase_37feeder.m
    └── ieee37.m
├── run_exp1.sh
└── utils
    ├── inverter_utils.py
    ├── network.py
    └── ppo_utils.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | data/Irradiance_1sec.mat
  2 | data/Loads_1sec.mat
  3 | runs/
  4 | 
  5 | # Byte-compiled / optimized / DLL files
  6 | __pycache__/
  7 | *.py[cod]
  8 | *$py.class
  9 | 
 10 | # C extensions
 11 | *.so
 12 | 
 13 | # Distribution / packaging
 14 | .Python
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | wheels/
 27 | pip-wheel-metadata/
 28 | share/python-wheels/
 29 | *.egg-info/
 30 | .installed.cfg
 31 | *.egg
 32 | MANIFEST
 33 | 
 34 | # PyInstaller
 35 | #  Usually these files are written by a python script from a template
 36 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 37 | *.manifest
 38 | *.spec
 39 | 
 40 | # Installer logs
 41 | pip-log.txt
 42 | pip-delete-this-directory.txt
 43 | 
 44 | # Unit test / coverage reports
 45 | htmlcov/
 46 | .tox/
 47 | .nox/
 48 | .coverage
 49 | .coverage.*
 50 | .cache
 51 | nosetests.xml
 52 | coverage.xml
 53 | *.cover
 54 | *.py,cover
 55 | .hypothesis/
 56 | .pytest_cache/
 57 | 
 58 | # Translations
 59 | *.mo
 60 | *.pot
 61 | 
 62 | # Django stuff:
 63 | *.log
 64 | local_settings.py
 65 | db.sqlite3
 66 | db.sqlite3-journal
 67 | 
 68 | # Flask stuff:
 69 | instance/
 70 | .webassets-cache
 71 | 
 72 | # Scrapy stuff:
 73 | .scrapy
 74 | 
 75 | # Sphinx documentation
 76 | docs/_build/
 77 | 
 78 | # PyBuilder
 79 | target/
 80 | 
 81 | # Jupyter Notebook
 82 | .ipynb_checkpoints
 83 | 
 84 | # IPython
 85 | profile_default/
 86 | ipython_config.py
 87 | 
 88 | # pyenv
 89 | .python-version
 90 | 
 91 | # pipenv
 92 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 93 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 94 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 95 | #   install all needed dependencies.
 96 | #Pipfile.lock
 97 | 
 98 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 99 | __pypackages__/
100 | 
101 | # Celery stuff
102 | celerybeat-schedule
103 | celerybeat.pid
104 | 
105 | # SageMath parsed files
106 | *.sage.py
107 | 
108 | # Environments
109 | .env
110 | .venv
111 | venv/
112 | #ENV/
113 | env.bak/
114 | venv.bak/
115 | 
116 | # Spyder project settings
117 | .spyderproject
118 | .spyproject
119 | 
120 | # Rope project settings
121 | .ropeproject
122 | 
123 | # mkdocs documentation
124 | /site
125 | 
126 | # mypy
127 | .mypy_cache/
128 | .dmypy.json
129 | dmypy.json
130 | 
131 | # Pyre type checker
132 | .pyre/
133 | 


--------------------------------------------------------------------------------
/IW_baselines/IW_ablation.py:
--------------------------------------------------------------------------------
  1 | import os, sys
  2 | 
  3 | import gym
  4 | import eplus_env
  5 | 
  6 | import warnings
  7 | warnings.filterwarnings("ignore", category=UserWarning)
  8 | 
  9 | import argparse
 10 | import numpy as np
 11 | import pandas as pd
 12 | import copy
 13 | import pickle
 14 | import pdb
 15 | 
 16 | import torch
 17 | import torch.nn as nn
 18 | import torch.nn.functional as F
 19 | import torch.utils.data as data
 20 | import torch.optim as optim
 21 | from torch.distributions import MultivariateNormal, Normal
 22 | from torch.utils.tensorboard import SummaryWriter
 23 | 
 24 | main_path = os.path.abspath(os.path.join(__file__, '..'))
 25 | sys.path.insert(0, main_path)
 26 | 
 27 | from algo.ppo import PPO
 28 | from agents.nn_policy import NeuralController
 29 | from utils.network import LSTM
 30 | from utils.ppo_utils import make_dict, R_func, Advantage_func, Replay_Memory
 31 | 
 32 | DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 33 | DEVICE
 34 | 
 35 | parser = argparse.ArgumentParser(description='Gnu-RL: Online Learning')
 36 | parser.add_argument('--gamma', type=float, default=0.9, metavar='G',
 37 |                     help='discount factor (default: 0.9)')
 38 | parser.add_argument('--seed', type=int, default=42, metavar='N',
 39 |                     help='random seed (default: 42)')
 40 | parser.add_argument('--lr', type=float, default=5e-4, metavar='G',
 41 |                     help='Learning Rate')
 42 | parser.add_argument('--lam', type=int, default=10, metavar='N',
 43 |                     help='random seed (default: 42)')
 44 | parser.add_argument('--epsilon', type=float, default=0.2, metavar='G', help='PPO Clip Parameter')
 45 | parser.add_argument('--update_episode', type=int, default=4, metavar='N',
 46 |                     help='PPO update episode (default: 1); If -1, do not update weights')
 47 | parser.add_argument('--T', type=int, default=12, metavar='N',
 48 |                     help='Planning Horizon (default: 12)')
 49 | parser.add_argument('--step', type=int, default=300*3, metavar='N',
 50 |                     help='Time Step in Simulation, Unit in Seconds (default: 900)') # 15 Minutes Now!
 51 | parser.add_argument('--exp_name', type=str, default='vanilla_update',
 52 |                     help='save name')
 53 | parser.add_argument('--eta', type=int, default=3,
 54 |                     help='Hyper Parameter for Balancing Comfort and Energy')
 55 | parser.add_argument('--model_no', type = int, default = 1800, help = '')
 56 | args = parser.parse_args()
 57 | 
 58 | def update_parameters(agent, sigma=0.1, K = 4):
 59 |     loader = agent._get_training_samples()
 60 |     for i in range(K):
 61 |         for states, actions, disturbance, advantages, old_logprobs, x_uppers, x_lowers in loader:
 62 |             n_batch = states.shape[0]
 63 |             # pdb.set_trace()
 64 |             mu, _ = agent.policy.nn(state, disturbance, disturbance.transpose(0, 1))#,  x_lowers = x_lowers, x_uppers = x_uppers)# T x n x n_action
 65 |             #mus, sigma_sqs, proj_loss = self.policy.forward(states, ) # x, u: T x N x Dim.
 66 |             sigma_sqs = torch.ones_like(mus) * sigma**2
 67 |             
 68 |             log_probs, entropies = agent.evaluate_action(mus[0], actions, sigma_sqs)
 69 | 
 70 |             ratio = torch.exp(log_probs.squeeze()-old_logprobs)
 71 |             surr1 = ratio * advantages
 72 |             surr2 = torch.clamp(ratio, 1-agent.clip_param, 1+agent.clip_param) * advantages
 73 |             loss  = -torch.min(surr1, surr2).mean()
 74 |             agent.optimizer.zero_grad()
 75 |             ## Auxiliary losses
 76 |             loss -= torch.mean(entropies) * 0.01
 77 |             loss += agent.lam * proj_loss
 78 | 
 79 |             loss.backward()
 80 |             nn.utils.clip_grad_norm_(agent.policy.nn.parameters(), 100)
 81 |             self.optimizer.step()
 82 |     self.policy_old.nn.load_state_dict(self.policy.nn.state_dict())
 83 | 
 84 | def main():
 85 |     torch.manual_seed(args.seed)
 86 |     writer = SummaryWriter(comment = args.exp_name)
 87 |     
 88 |     # Create Simulation Environment
 89 |     env = gym.make('Eplus-IW-test-v0')
 90 |     
 91 |     # Specify variable names for control problem
 92 |     obs_name = ["Outdoor Temp.", "Outdoor RH", "Wind Speed", "Wind Direction", "Diff. Solar Rad.", "Direct Solar Rad.", "HW Enable OA Setpoint", "IW Average PPD", "HW Supply Setpoint", "Indoor Air Temp.", "Indoor Temp. Setpoint", "Occupancy Flag", "Heating Demand"]
 93 |     state_name = ["Indoor Air Temp."]
 94 |     dist_name = ["Outdoor Temp.", "Outdoor RH", "Wind Speed", "Wind Direction", "Diff. Solar Rad.", "Direct Solar Rad.", "Occupancy Flag"]
 95 |     ctrl_name = ["HW Enable OA Setpoint", "HW Supply Setpoint"]
 96 |     target_name = ["Indoor Temp. Setpoint"]
 97 |     dist_name = dist_name + target_name
 98 |     
 99 |     n_state = len(state_name)
100 |     n_ctrl = 1 #len(ctrl_name)
101 |     n_dist = len(dist_name)
102 |     eta = [0.1, args.eta] # eta: Weight for comfort during unoccupied and occupied mode
103 |     step = args.step # step: Timestep; Unit in seconds
104 |     T = args.T # T: Number of timesteps in the planning horizon
105 |     tol_eps = 91 # tol_eps: Total number of episodes; Each episode is a natural day
106 | 
107 |     # Read Information on Weather, Occupancy, and Target Setpoint
108 |     obs_2017 = pd.read_pickle("data/data_2017_baseline.pkl")
109 |     disturbance = obs_2017[dist_name]
110 |     # Min-Max Normalization
111 |     obs_TMY3 = pd.read_pickle("data/data_TMY3_baseline.pkl") # For Min-Max Normalization Only
112 |     dist_min = obs_TMY3[dist_name].min()
113 |     dist_max = obs_TMY3[dist_name].max()
114 |     disturbance = (disturbance - dist_min)/(dist_max - dist_min)
115 |     state_min = obs_TMY3[state_name].min().values
116 |     state_max = obs_TMY3[state_name].max().values
117 |     memory = Replay_Memory()
118 |     
119 |     ## Load pretrained LSTM policy weights
120 |     '''
121 |         Expects all states, actions, and disturbances are MinMaxNormalized; (Based on TMY3 data)
122 |         The LSTM also expects "setpoint" as part of the disturbance term.
123 |     '''
124 |     network = LSTM(n_state, n_ctrl, n_dist)
125 |     network.load_state_dict(torch.load("data/param_IW-nn-{}".format(args.model_no)))
126 |     
127 |     ## Load thermodynamics model to construct the polytope
128 |     '''
129 |         New model also expects states, actions, and disturbances to be MinMaxNormalized
130 |     '''
131 |     model_dict ={'a': np.array([0.934899]),
132 |                 'bu': np.array([0.024423]),
133 |                 'bd': np.array([5.15795080e-02, -6.92141185e-04, -1.21103548e-02,
134 |                 2.38717578e-03, -3.52816030e-03,  3.32528746e-03,  7.19267820e-03]),
135 |                 'Pm': 1  # Upper bound of u;
136 |                 }
137 |     policy = NeuralController(T, step, network, RC_flag = False, **model_dict)
138 |     agent = PPO(policy, memory, lr = args.lr, clip_param = args.epsilon, lam = args.lam)
139 |     
140 |     dir = 'results'
141 |     if not os.path.exists(dir):
142 |         os.mkdir(dir)
143 |     
144 |     multiplier = 1 # Normalize the reward for better training performance
145 |     n_step = 96 #timesteps per day
146 |     
147 |     sigma = 0.1
148 |     sigma_min = 0.01
149 |     sigma_step = (sigma-sigma_min) * args.update_episode/tol_eps
150 |     
151 |     timeStep, obs, isTerminal = env.reset()
152 |     start_time = pd.datetime(year = env.start_year, month = env.start_mon, day = env.start_day)
153 |     cur_time = start_time
154 |     obs_dict = make_dict(obs_name, obs)
155 |     
156 |     # Save for record
157 |     timeStamp = [start_time]
158 |     observations = [obs]
159 |     actions_taken = []
160 | 
161 |     for i_episode in range(tol_eps):
162 |         ## Save for Parameter Updates
163 |         rewards = []
164 |         real_rewards = []
165 | 
166 |         for t in range(n_step):
167 |             state = np.array([obs_dict[name] for name in state_name])
168 |             state = (state-state_min)/(state_max-state_min)
169 |             
170 |             x_upper = obs_2017['x_upper'][cur_time : cur_time + pd.Timedelta(seconds = (T-1) * step)].values
171 |             x_lower = obs_2017['x_lower'][cur_time : cur_time + pd.Timedelta(seconds = (T-1) * step)].values
172 |             ## Margin
173 |             #x_lower+=0.025
174 |             #x_upper-=0.025
175 |             
176 |             x_upper = (x_upper-state_min)/(state_max-state_min)
177 |             x_lower = (x_lower-state_min)/(state_max-state_min)
178 |             
179 |             dt = disturbance[cur_time : cur_time + pd.Timedelta(seconds = (T-1) * step)].values  # T x n_dist
180 |             
181 |             ## Update the model in the controller
182 |             # CVXPY expects np.array for parameters
183 |             agent.policy_old.updateState(state, x_lower = x_lower, x_upper = x_upper, d = dt[:, :-1])
184 |             agent.memory.x_lowers.append(torch.tensor(x_lower).float())
185 |             agent.memory.x_uppers.append(torch.tensor(x_upper).float())
186 |             
187 |             state = torch.tensor(state).unsqueeze(0).float() # 1 x n_state
188 |             dt = torch.tensor(dt).float()
189 |             agent.memory.states.append(state)
190 |             agent.memory.disturbance.append(dt)
191 |             
192 |             ## Use policy_old to select action
193 |             #mu, sigma_sq, _ = agent.forward(state, dt.unsqueeze(1), current = False) # mu, sigma_sq: T x 1 x Dim.
194 |             mu, _ = agent.policy_old.nn(state, dt.unsqueeze(1))# T x n x n_action
195 |             sigma_sq = torch.ones_like(mu) * sigma**2
196 |             '''
197 |             ## Myopic Limit: A hack to make sure the projected actions do not result in tiny violations
198 |             margin = 0.1/(state_max-state_min)
199 |             u_limits = np.array([x_lower[0]+margin.item(), x_upper[0]-margin.item()]) - model_dict['a'] * state.item() -  model_dict['bd'].dot(dt[0, :-1].numpy())
200 |             u_limits /= model_dict['bu']
201 |             u_limits = np.clip(u_limits, 0, 1)
202 |             #pdb.set_trace()
203 |             '''
204 |             action, old_logprob = agent.select_action(mu[0], sigma_sq[0], u_limits = None)
205 |             agent.memory.actions.append(action.detach().clone())
206 |             agent.memory.old_logprobs.append(old_logprob.detach().clone())
207 |             
208 |             ## Project without Gradient
209 |             mu[0] = action
210 |             mu = mu.squeeze().detach()
211 |             #pdb.set_trace()
212 |             try:
213 |                 action_feasible = agent.policy_old.proj_layer(state[0], dt[:, :-1],
214 |                                   mu, torch.zeros_like(mu), torch.zeros_like(mu),
215 |                                   torch.tensor(x_upper).float(),
216 |                                   torch.tensor(x_lower).float(),
217 |                                   torch.tensor(agent.policy_old.u_upper.value).float(),
218 |                                   torch.tensor(agent.policy_old.u_lower.value).float()
219 |                                   )
220 |                 action = action_feasible[0][0]
221 |             except:
222 |                 ## The feasible set is empty; Use some heuristics
223 |                 sp = np.mean((x_lower+x_upper)/2)
224 |                 if state.item() < sp:
225 |                     action = torch.tensor([1])
226 |                 else:
227 |                     action = torch.tensor([0])
228 |             #pdb.set_trace()
229 |             
230 | 
231 |             SWT = 20 + 45 * action.item()
232 |             if (SWT<30):
233 |                 HWOEN = -30 # De Facto Off
234 |                 action = torch.zeros_like(action)
235 |                 SWT = 20
236 |             else:
237 |                 HWOEN = 30 # De Facto On
238 |             if np.isnan(SWT):
239 |                 SWT = 20
240 |             action4env = (HWOEN, SWT)
241 |             
242 |             # Before step
243 |             print(f'{cur_time}: IAT={obs_dict["Indoor Air Temp."]}, Occupied={obs_dict["Occupancy Flag"]}, Control={SWT}')
244 |             for _ in range(3):
245 |                 timeStep, obs, isTerminal = env.step(action4env)
246 | 
247 |             obs_dict = make_dict(obs_name, obs)
248 |             reward = R_func(obs_dict, SWT-20, eta)
249 |             
250 |             # Per step
251 |             real_rewards.append(reward)
252 |             '''
253 |             bl = obs_2017['rewards'][cur_time]
254 |             rewards.append((reward-bl) / multiplier)
255 |             '''
256 |             bl = 0#obs_2017['rewards'][cur_time]
257 |             rewards.append((reward-bl) / 15) # multiplier
258 |             # print(f'Reward={reward}, BL={bl}')
259 |             # Save for record
260 |             cur_time = start_time + pd.Timedelta(seconds = timeStep)
261 |             timeStamp.append(cur_time)
262 |             observations.append(obs)
263 |             actions_taken.append(action4env)
264 |         
265 |         writer.add_scalar('Reward', np.mean(real_rewards), i_episode)
266 |         writer.add_scalar('Reward_Diff', np.mean(rewards), i_episode)
267 |         print("{}, reward: {}".format(cur_time, np.mean(real_rewards)))
268 |         
269 |         advantages = Advantage_func(rewards, args.gamma)
270 |         agent.memory.advantages.append(advantages)
271 |         
272 |         # if -1, do not update parameters
273 |         if args.update_episode == -1:
274 |             agent.memory.clear_memory() # Prevent memory overflow
275 |         elif (i_episode >0) & (i_episode % args.update_episode ==0):
276 |             agent.update_parameters(sigma = sigma, K = 8)
277 |             sigma = max(sigma_min, sigma-sigma_step)
278 |             
279 |         obs_df = pd.DataFrame(np.array(observations), index = np.array(timeStamp), columns = obs_name)
280 |         obs_df = obs_df.drop(columns=ctrl_name)
281 |         action_df = pd.DataFrame(np.array(actions_taken), index = np.array(timeStamp[:-1]), columns = ctrl_name)
282 |         obs_df = obs_df.merge(action_df, how = 'left', right_index = True, left_index = True)
283 |         obs_df.to_pickle("results/obs_"+args.exp_name+".pkl")
284 | 
285 | if __name__ == '__main__':
286 |     main()
287 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Bingqing Chen
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # PROF: Projected Feasibility
 2 | 
 3 | This is the official repository that implements the following paper:
 4 | 
 5 | > *Chen, Bingqing, Priya Donti, Kyri Baker, J. Zico Kolter, and Mario Berges. "Enforcing Policy Feasibility Constraints through Differentiable Projection for Energy Optimization." In Proceedings of the Twelfth ACM International Conference on Future Energy Systems (e-energy '21). pp. 199–210. 2021.*
 6 | 
 7 | [[slides]](docs/slides.pdf)[[paper]](https://dl.acm.org/doi/10.1145/3447555.3464874) 
 8 | <!--- [[video]](https://www.youtube.com/watch?v=rH64WyPHCVE) ---> 
 9 | # Overview
10 | 
11 | PROjected Feasibility (PROF) is a method to enforce convex operational constraints within neural policies, by incorporating a differentiable projection layer within a neural network-based policy to enforce that all learned actions are feasible. We then update the policy end-to-end by propagating gradients through this differentiable projection layer, making the policy cognizant of the operational constraints. The result is a powerful neural policy that can flexibly optimize performance on the true underlying dynamics, while still satisfying the specified constraints.
12 | 
13 | We demonstrate our method on two applications: energy-efficient building operation and inverter control. 
14 | - In the building control case, PROF outperforms other RL agents, while maintaining temperature within the deadband except when the control is saturated.
15 | - In the inverter control setting, PROF satisfies the constraints 100% of the time and minimizes curtailment as well as possible within its conservative safe set after learning safely for half a day.
16 | 
17 | **Framework.** 
18 | 
19 | <img src="docs/figs/framework.png" data-canonical-src="docs/figs/framework.png" width="1000" />
20 | 
21 | 
22 | 
23 | # Code Usage
24 | ### Clone repository
25 | ```
26 | git clone https://github.com/INFERLab/PROF.git
27 | cd PROF
28 | ```
29 | 
30 | ### Set up the environment 
31 | Set up the virtual environment with your preferred environment/package manager.
32 | 
33 | The instruction here is based on **conda**. ([Install conda](https://docs.anaconda.com/anaconda/install/))
34 | ```
35 | conda env create --file environment.yml 
36 | condo activate nn-w-proj
37 | ```
38 | 
39 | ### File Structure
40 | ```
41 | .
42 | ├── agents
43 | │   ├── base.py             # Implement a controller that instantiate the projection problem given building parameters
44 | │   └── nn_policy.py        # Inherit the controller from base.py; Forward pass: NN + Differentiable projection
45 | │   └── inverter_policy.py  # Policy for inverter: NN + Differentiable projection
46 | ├── algo                    
47 | │   └── ppo.py	 	    # A PPO trainer 
48 | ├── env
49 | │   └── inverter.py         # Implements the IEEE 37-bus case
50 | ├── utils
51 | │   ├── network.py          # Implements neural network modules, e.g. MLP and LSTM
52 | │   └── ppo_utils.py        # Helper function for PPO trainer, e.g. Replay_Memory, Advantage_func
53 | ├── network		    # Matlab code for linearizing grid model; Data to construct IEEE 37-bus case;
54 | └── mypypower		    # Include some small changes from PyPower source code to allow customization
55 | 
56 | ```
57 | 
58 | ### Running
59 | You can replicate our experiments for *Experiment 1: Energy-efficient Building Operation* with `main_IW.py` and *Experiment 2: Inverter Control* with `main_inverter.py`
60 | 
61 | 
62 | ### Feedback
63 | 
64 | Feel free to send any questions/feedback to: [Bingqing Chen](mailto:bingqinc@andrew.cmu.edu)
65 | 
66 | ### Citation
67 | 
68 | If you use PROF, please cite us as follows:
69 | 
70 | ```
71 | @inproceedings{chen2021enforcing,
72 | author = {Chen, Bingqing and Donti, Priya L. and Baker, Kyri and Kolter, J. Zico and Berg\'{e}s, Mario},
73 | title = {Enforcing Policy Feasibility Constraints through Differentiable Projection for Energy Optimization},
74 | year = {2021},
75 | isbn = {9781450383332},
76 | publisher = {Association for Computing Machinery},
77 | address = {New York, NY, USA},
78 | url = {https://doi.org/10.1145/3447555.3464874},
79 | doi = {10.1145/3447555.3464874},
80 | booktitle = {Proceedings of the Twelfth ACM International Conference on Future Energy Systems},
81 | pages = {199–210},
82 | numpages = {12},
83 | keywords = {inverter control, safe reinforcement learning, implicit layers, differentiable optimization, smart building},
84 | location = {Virtual Event, Italy},
85 | series = {e-Energy '21}
86 | }
87 | ```
88 | 


--------------------------------------------------------------------------------
/agents/base.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cvxpy as cp
  3 | import pdb
  4 | 
  5 | class Controller():
  6 |     def __init__(self, T, dt, RC_flag = True, **kwargs):
  7 |         # dt: planning timestep
  8 |         # T: planning horizon
  9 |         # RC_flag: Whether using RC model
 10 |         # **kwargs: Model Parameters
 11 |         self.T = T
 12 |         self.RC_flag = RC_flag
 13 |         self.err_count = 0
 14 |         
 15 |         if RC_flag:
 16 |         ## RC model: Simulation Study
 17 |             self.R = kwargs["R"]
 18 |             self.C = kwargs["C"]
 19 |             self.Pm = kwargs["Pm"]
 20 |             self.eta = kwargs["eta"]
 21 |             self.T_sp = kwargs["theta"]
 22 |             self.Delta = kwargs["Delta"]
 23 |             self.sign = kwargs["sign"] #(+) for heating and (-) for cooling
 24 |         else:
 25 |         ## ARX model: Hardware-in-the-loop Simulation
 26 |             self.ap = kwargs["a"]
 27 |             self.bu = kwargs["bu"]
 28 |             self.bd = kwargs["bd"]
 29 |             self.p = len(self.ap)
 30 |             self.m = len(self.bu) # how many u_prev to consider
 31 |             self.n_dist = len(self.bd)
 32 |             self.Pm = kwargs["Pm"]
 33 |             self.T_sp = 75
 34 |             self.Delta = 1.8
 35 |             
 36 |         # Variable
 37 |         self.u = cp.Variable(T)
 38 |         
 39 |         # Save u_i-u_bar from previous time step
 40 |         self.u_diff = cp.Parameter(T)
 41 |         self.v_bar = cp.Parameter(T)
 42 |         self.w_bar = cp.Parameter(T)
 43 |         self.objective = cp.sum_squares(self.u-self.u_diff-self.v_bar+self.w_bar)
 44 | 
 45 |         ## Info needed for constraints
 46 |         if RC_flag:
 47 |             self.x0 = cp.Parameter()
 48 |             self.d = cp.Parameter(T)
 49 |         else:
 50 |             ## Expects [x_{t-p}, ..., x_t]
 51 |             self.x0 = cp.Parameter(self.p)
 52 |             self.d = cp.Parameter((T, self.n_dist))
 53 |             
 54 |         # Set default value for constraints
 55 |         self.u_lower = cp.Parameter(T)
 56 |         self.u_lower.value = np.tile(0, T)
 57 |         self.u_upper = cp.Parameter(T)
 58 |         self.u_upper.value = np.tile(self.Pm, T)
 59 |         self.x_lower = cp.Parameter(T)
 60 |         self.x_lower.value = np.tile(self.T_sp-self.Delta, T)
 61 |         self.x_upper = cp.Parameter(T)
 62 |         self.x_upper.value = np.tile(self.T_sp+self.Delta, T)
 63 |         
 64 | 
 65 |         if RC_flag:
 66 |             a = np.exp(-dt/(self.R*self.C))
 67 |             b = self.eta * self.R
 68 | 
 69 |             lam = np.logspace(1, T, num = T, base = a)
 70 |             Lam = np.zeros((T, T))
 71 |             for i in range(T):
 72 |                 for j in range(i+1):
 73 |                     Lam[i, j] = a**(i-j)
 74 |             B = np.eye(T)*b*(1-a)*self.Pm
 75 |             self.d.value = (1-a)*np.tile(32, T)
 76 |         else:
 77 |             A = np.eye(self.T)
 78 |             for i in range(self.T-1):
 79 |                 A[i+1, max(0, i+1-self.p):i+1] = -np.flip(self.ap)[-(i+1):]
 80 |             Lam = np.linalg.inv(A)
 81 |     
 82 |             lam = np.zeros((self.T, self.p))
 83 |             for i in range(self.p):
 84 |                 lam[i, i:] = np.flip(self.ap)[:self.p-i]
 85 |         
 86 |             ## note: missing the term on u_{t-1}
 87 |             B = np.zeros((self.T, self.T))
 88 |             
 89 |             for i in range(self.m):
 90 |                 B += np.diag(np.ones(T-i), -i)*self.bu[i]/self.Pm
 91 |             
 92 |             self.d.value = np.zeros((T, self.n_dist))
 93 |             
 94 |         # Constraints
 95 |         self.constraints = [-self.u <= -self.u_lower,
 96 |                             self.u <= self.u_upper]
 97 |         if RC_flag:
 98 |             self.constraints += [-Lam@(self.sign*(1-a)*b*self.u+self.d) <= -self.x_lower + lam*self.x0,
 99 |                             Lam@(self.sign*(1-a)*b*self.u+self.d) <= self.x_upper - lam*self.x0]
100 |         else:
101 |             self.constraints += [-Lam@(B@self.u + self.d@self.bd + lam@self.x0) <= -self.x_lower,
102 |             Lam@(B@self.u + self.d@self.bd + lam@self.x0) <= self.x_upper]
103 | 
104 |         self.Problem = cp.Problem(cp.Minimize(self.objective),
105 |                                   self.constraints)
106 |         
107 |     def u_update(self, v_bar, w_bar):
108 |         self.v_bar.value = v_bar
109 |         self.w_bar.value = w_bar
110 |         try:
111 |             self.Problem.solve()
112 |         except:
113 |             print("Solver failed")
114 |             self.u.value = None
115 |             
116 |         ## Check solution valid
117 |         if self.u.value is not None:
118 |             return self.u.value, self.Problem.status
119 |         else:
120 |             u  = (self.x0.value-self.T_sp)/self.Delta
121 |             self.err_count += 1
122 |             return np.ones(self.T)*np.clip(u, 0, 1)*self.Pm, self.Problem.status
123 |     
124 |     def updateState(self, x, u_lower = None, u_upper = None,
125 |                     x_lower = None, x_upper = None,
126 |                     d = None): #
127 |         self.x0.value = x
128 |         
129 |         # Update constraints if necessary
130 |         if u_lower is not None:
131 |             if isinstance(u_lower, int) | isinstance(u_lower, float):
132 |                 self.u_lower.value = np.tile(u_lower, self.T)
133 |             else:
134 |                 assert len(u_lower) == self.T
135 |                 self.u_lower.value = u_lower
136 |         if u_upper is not None:
137 |             if isinstance(u_upper, int) | isinstance(u_upper, float):
138 |                 self.u_upper.value = np.tile(u_upper, self.T)
139 |             else:
140 |                 assert len(u_upper) == self.T
141 |                 self.u_upper.value = u_upper
142 |         if x_lower is not None:
143 |             assert len(x_lower) == self.T
144 |             self.x_lower.value = x_lower
145 |         if x_upper is not None:
146 |             assert len(x_upper) == self.T
147 |             self.x_upper.value = x_upper
148 |             self.T_sp = (x_upper[0]+x_lower[0])/2
149 |             self.Delta = (x_upper[0]-x_lower[0])/2
150 |             
151 |         ## Exog Variables
152 |         if d is not None:
153 |             assert len(d) == self.T
154 |             self.d.value = d
155 | 
156 | 
157 | class ControllerGroup():
158 |     def __init__(self, T, dt, parameters, RC_flag = True):
159 |         self.n_agent = len(parameters)
160 |         self.T = T
161 |         self.dt = dt
162 |         self.RC_flag = RC_flag
163 |         self.controller_list = self._init_agents(parameters)
164 |         
165 |     def _init_agents(self, parameters):
166 |         controller_list = []
167 |         for param in parameters:
168 |             controller_list.append(Controller(T = self.T, dt = self.dt, RC_flag = self.RC_flag, **param))
169 |         return controller_list
170 |         
171 |     def updateState(self, x_list, u_list = None, d_list = None, x_lower_list = None, x_upper_list = None):
172 |         for idx, controller in enumerate(self.controller_list):
173 |             controller.updateState(x_list[idx], d = d_list[idx] if d_list is not None else None, x_lower = x_lower_list[idx] if x_lower_list is not None else None, x_upper = x_upper_list[idx] if x_upper_list is not None else None)
174 |             
175 |             ## Initialize the controller with action from prev timestep
176 |             if u_list is not None:
177 |                 u_bar = np.mean(u_list, axis = 0)
178 |                 controller.u_diff.value = u_list[idx] - u_bar
179 |             else:
180 |                 controller.u_diff.value = np.zeros(self.T)
181 |                 
182 |     def u_update(self, v_bar, w_bar):
183 |         u_list = []
184 |         #print("v_bar", v_bar)
185 |         #print("w_bar", w_bar.shape)
186 |         for idx, controller in enumerate(self.controller_list):
187 |             #print(idx)
188 |             u_i, status = controller.u_update(v_bar, w_bar)
189 |             if status in ["infeasible", "unbounded"]:
190 |                 print(idx, status)
191 |             u_list.append(u_i)
192 |         
193 |         u_bar = np.mean(u_list, axis = 0)
194 |         for idx, controller in enumerate(self.controller_list):
195 |             controller.u_diff.value = u_list[idx] - u_bar
196 |         return u_bar, np.array(u_list)
197 |         
198 | 


--------------------------------------------------------------------------------
/agents/inverter_policy.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cvxpy as cp
  3 | from cvxpylayers.torch import CvxpyLayer
  4 | 
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.optim as optim
  8 | from torch.distributions import MultivariateNormal, Normal
  9 | from copy import deepcopy
 10 | import operator
 11 | from functools import reduce
 12 | 
 13 | import pdb
 14 | 
 15 | DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 16 | 
 17 | 
 18 | ### Can move to utils.network if appropriate
 19 | class Net(nn.Module):
 20 |     def __init__(self, n_bus, n_inverters, shared_hidden_layer_sizes, indiv_hidden_layer_sizes, n_input = 3):
 21 |         super(Net, self).__init__()
 22 |         #### Multi-headed architecture
 23 |         # "Shared" model
 24 |         # Set up non-linear network of Linear -> BatchNorm -> ReLU
 25 |         layer_sizes = [n_input * n_bus] + shared_hidden_layer_sizes[:-1]
 26 |         layers = reduce(operator.add, 
 27 |             [[nn.Linear(a,b), nn.ReLU(), ] # nn.BatchNorm1d(b), nn.Dropout(p=0.2)]
 28 |                 for a,b in zip(layer_sizes[0:-1], layer_sizes[1:])])
 29 |         layers += [nn.Linear(layer_sizes[-1], shared_hidden_layer_sizes[-1])]
 30 |         self.base_net = nn.Sequential(*layers)
 31 |         
 32 |         # Individual inverter model
 33 |         layer_sizes = [shared_hidden_layer_sizes[-1]] + indiv_hidden_layer_sizes
 34 |         layers = reduce(operator.add, 
 35 |             [[nn.Linear(a,b),  nn.ReLU(), ] # nn.BatchNorm1d(b), nn.Dropout(p=0.2)]
 36 |                 for a,b in zip(layer_sizes[0:-1], layer_sizes[1:])])
 37 |         layers += [nn.Linear(layer_sizes[-1], 2)]  # output p and q
 38 |         indiv_model = nn.Sequential(*layers)
 39 |         self.inverter_nets = nn.ModuleList(
 40 |                 [deepcopy(indiv_model) for _ in range(n_inverters)]
 41 |                 )
 42 | 
 43 |         # ## Simple fully connected architecture
 44 | 
 45 |         # # Set up non-linear network of Linear -> BatchNorm -> ReLU -> Dropout layers
 46 |         # self.n_inverters = n_inverters
 47 |         # layer_sizes = [4 * n_inverters] + shared_hidden_layer_sizes
 48 |         # layers = reduce(operator.add, 
 49 |         #     [[nn.Linear(a,b), nn.BatchNorm1d(b), nn.ReLU(), nn.Dropout(p=0.2)] 
 50 |         #         for a,b in zip(layer_sizes[0:-1], layer_sizes[1:])])
 51 |         # layers += [nn.Linear(layer_sizes[-1], 2 * n_inverters)]
 52 |         # self.nn = nn.Sequential(*layers)
 53 | 
 54 | 
 55 |     def forward(self, state):
 56 |         '''
 57 |             Input: Vector of voltage magnitudes and angles, real and reactive power demand
 58 |             Output: Vector of inverter P setpoints, vector of inverter Q setpoints
 59 |         '''
 60 | 
 61 |         # Multi-headed architecture
 62 |         z = self.base_net(state)
 63 |         res = [inverter(z) for inverter in self.inverter_nets]
 64 |         Ps = torch.cat([x[:, [0]] for x in res], dim=1)
 65 |         Qs = torch.cat([x[:, [1]] for x in res], dim=1)
 66 |         return Ps, Qs
 67 | 
 68 |         # ## Simple fully connected architecture
 69 |         # z = self.nn(state)
 70 |         # return z[:, :self.n_inverters], z[:, self.n_inverters:]
 71 | 
 72 | class NeuralController(nn.Module):
 73 |     def __init__(self, network, memory, lr, lam = 10, scaler = 1000, **env_params):
 74 |         super(NeuralController, self).__init__()
 75 |         self.nn = network
 76 |         self.optimizer = optim.RMSprop(self.nn.parameters(), lr=lr)
 77 |         self.lam = lam
 78 |         self.memory = memory
 79 |         self.mse = nn.MSELoss()
 80 |         self.ReLU = nn.ReLU()
 81 |         
 82 |         self.n_bus = env_params['n_bus']
 83 |         self.gen_idx = env_params['gen_idx']
 84 |         self.other_idx = [i for i in range(self.n_bus) if i not in self.gen_idx]
 85 |         
 86 |         H = env_params['H']
 87 |         R = H[:, :self.n_bus]
 88 |         B = H[:, self.n_bus:]
 89 |         R_new = np.vstack([np.hstack([R[self.gen_idx][:, self.gen_idx],
 90 |                                       R[self.gen_idx][:, self.other_idx]]),
 91 |                             np.hstack([R[self.other_idx][:, self.gen_idx],
 92 |                                        R[self.other_idx][:, self.other_idx]])
 93 |                             ])
 94 |         B_new = np.vstack([np.hstack([B[self.gen_idx][:, self.gen_idx],
 95 |                                       B[self.gen_idx][:, self.other_idx]]),
 96 |                             np.hstack([B[self.other_idx][:, self.gen_idx],
 97 |                                        B[self.other_idx][:, self.other_idx]])
 98 |                             ])
 99 |         H_new = np.hstack([R_new, B_new])
100 |         
101 |         self.scaler = scaler
102 |         self.V0 = env_params['V0']
103 |         self.P0 = env_params['P0']
104 |         self.Q0 = env_params['Q0']
105 |         self.V_upper = env_params['V_upper']
106 |         self.V_lower = env_params['V_lower']
107 |         self.S_rating = env_params['S_rating']
108 |         
109 |         # Need to set as nn.Parameter such that to(DEVICE) move these to GPU as well
110 |         self.V0_torch = nn.Parameter(torch.tensor(self.V0).float())
111 |         self.V_upper_torch = nn.Parameter(torch.tensor(self.V_upper).float())
112 |         self.V_lower_torch = nn.Parameter(torch.tensor(self.V_lower).float())
113 |         self.H_torch = nn.Parameter(torch.tensor(H_new).float())
114 |         self.P0_torch = nn.Parameter(torch.tensor(self.P0).float())
115 |         self.Q0_torch = nn.Parameter(torch.tensor(self.Q0).float())
116 |         self.S_rating_torch = nn.Parameter(torch.tensor(self.S_rating).float())
117 | 
118 |         # Set up projection onto inverter setpoint constraints and linearized voltage constraints
119 |         P = cp.Variable(len(self.gen_idx))
120 |         Q = cp.Variable(len(self.gen_idx))
121 |         
122 |         # P_tilde and Q_tilde are the pre-projection actions
123 |         P_tilde = cp.Parameter(len(self.gen_idx))
124 |         Q_tilde = cp.Parameter(len(self.gen_idx))
125 |         
126 |         # No inverter buses
127 |         P_nc = cp.Parameter(len(self.other_idx))
128 |         Q_nc = cp.Parameter(len(self.other_idx))
129 |         
130 |         P_av = cp.Parameter(len(self.gen_idx))
131 |         
132 |         # Voltage: Apply to All Buses
133 |         z = cp.hstack([P, P_nc, Q, Q_nc]) # z: (70, )
134 |         constraints = [self.V_lower - self.V0 <= H_new@z,
135 |                        H_new@z <= self.V_upper - self.V0]
136 |         
137 |         ## Power: Only applies to Inverters
138 |         PQ = cp.vstack([self.P0[self.gen_idx] + P,
139 |                         self.Q0[self.gen_idx] + Q]) # (2, n)
140 |         constraints += [0 <= self.P0[self.gen_idx] + P,
141 |                        self.P0[self.gen_idx] + P <= P_av,
142 |                        cp.norm(PQ, axis = 0) <= self.S_rating]
143 |         
144 |         objective = cp.Minimize(cp.sum_squares(P - P_tilde) + cp.sum_squares(Q - Q_tilde))
145 |         problem = cp.Problem(objective, constraints)
146 | 
147 |         self.proj_layer = CvxpyLayer(problem, variables=[P, Q],
148 |                 parameters=[P_tilde, Q_tilde,
149 |                            P_nc, Q_nc, P_av])
150 |         
151 |         self.proj_count = 0
152 |         
153 |     def forward(self, state, Sbus, P_av, inference_flag = True):
154 |         '''
155 |         Input:
156 |             state: [dV(k-1), P_nc, Q_nc] 
157 |           where,
158 |                 Z_nc = Z - Z0
159 |             May get (n, dim) or (dim);
160 |         Output:
161 |             P, Q (with repsect to the reference point)
162 |         '''
163 |         ## Get information for non-controllable loads
164 |         P_all = Sbus.real /self.scaler
165 |         Q_all = Sbus.imag /self.scaler
166 |         if len(Sbus.shape)==1:
167 |             P_nc = Sbus.real[self.other_idx] / self.scaler
168 |             Q_nc = Sbus.imag[self.other_idx] / self.scaler
169 |         elif len(Sbus.shape)==2:
170 |             P_nc = Sbus.real[:, self.other_idx] / self.scaler
171 |             Q_nc = Sbus.imag[:, self.other_idx] / self.scaler
172 |         else: 
173 |             print("Well, not expected to happen")
174 | 
175 |         P_tilde, Q_tilde = self.nn(state.to(DEVICE)) # n x n_inverter
176 |         
177 |         ## During inference if the action is already feasible, not need to project
178 |         if inference_flag:
179 |             P_tilde = P_tilde.squeeze()
180 |             Q_tilde = Q_tilde.squeeze()
181 |             if self.is_feasible(P_tilde.detach().clone()/self.scaler, 
182 |                                Q_tilde.detach().clone()/self.scaler,
183 |                                P_nc, Q_nc, P_av):
184 |                 P_all[self.gen_idx] = P_tilde.detach().cpu().numpy() / self.scaler
185 |                 Q_all[self.gen_idx] = Q_tilde.detach().cpu().numpy() / self.scaler
186 |                 return P_all, Q_all
187 |             else:
188 |                 try: 
189 |                     P, Q = self.proj_layer(P_tilde/self.scaler, Q_tilde/self.scaler,
190 |                         torch.tensor(P_nc).float().to(DEVICE),
191 |                         torch.tensor(Q_nc).float().to(DEVICE),
192 |                         torch.tensor(P_av).float().to(DEVICE))
193 |                     self.proj_count += 1
194 |                     P_all[self.gen_idx] = P.detach().cpu().numpy() 
195 |                     Q_all[self.gen_idx] = Q.detach().cpu().numpy() 
196 |                 except: # The solver dies for some reason
197 |                     P_all[self.gen_idx] = 0 
198 |                     Q_all[self.gen_idx] = 0
199 |                 return P_all, Q_all
200 |         else:
201 |             #pdb.set_trace()
202 |             P, Q = self.proj_layer(P_tilde/self.scaler, Q_tilde/self.scaler,
203 |                         torch.tensor(P_nc).float().to(DEVICE),
204 |                         torch.tensor(Q_nc).float().to(DEVICE),
205 |                         torch.tensor(P_av).float().to(DEVICE))
206 |             proj_loss = self.mse(P.detach(), P_tilde/self.scaler)  \
207 |                         + self.mse(Q.detach(), Q_tilde/self.scaler)
208 |             return P, Q, proj_loss
209 |     
210 |     def update(self, batch_size = 64, n_batch = 16):
211 |         for _ in range(n_batch):
212 |             state, Sbus, P_av = self.memory.sample_batch(batch_size = batch_size)
213 |             P, Q, proj_loss = self.forward(state, Sbus, P_av, inference_flag = False)
214 |             #pdb.set_trace()
215 |             curtail = self.ReLU(torch.tensor(P_av).to(DEVICE) - P)
216 |             loss = curtail.mean() + self.lam * proj_loss
217 |             print(f'curtail = {curtail.mean().item()}, proj_loss = {proj_loss.item()}')
218 |             
219 |             self.optimizer.zero_grad()
220 |             loss.backward()
221 |             self.optimizer.step()
222 |                
223 |     def is_feasible(self, P, Q, P_nc, Q_nc, P_av):
224 |         '''
225 |          Input: P, Q (n_bus)
226 |         '''
227 |         eps = 1e-6
228 |         assert P.ndimension() == 1
229 | 
230 |         z = torch.cat([P, torch.tensor(P_nc).float().to(DEVICE),
231 |                        Q, torch.tensor(Q_nc).float().to(DEVICE)], dim = -1) # (70)
232 |         v = self.H_torch.matmul(z) # (35)
233 |         
234 |         if torch.any(v < self.V_lower_torch -self.V0_torch - eps) | torch.any(v > self.V_upper_torch-self.V0_torch+eps):
235 |             return False
236 | 
237 |         P = P + self.P0_torch[self.gen_idx]
238 |         Q = Q + self.Q0_torch[self.gen_idx]
239 |         PQ = torch.stack([P, Q]) # (2, 21)
240 |         if torch.any(torch.norm(PQ, dim = 0) > self.S_rating_torch + eps):
241 |             return False
242 | 
243 |         if torch.any(P < 0-eps) | torch.any(P > torch.tensor(P_av).to(DEVICE)+eps):
244 |             return False
245 |         else:
246 |             return True
247 | 


--------------------------------------------------------------------------------
/agents/nn_policy.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cvxpy as cp
  3 | from cvxpylayers.torch import CvxpyLayer
  4 | import pdb
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.optim as optim
  8 | from torch.distributions import MultivariateNormal, Normal
  9 | 
 10 | from utils.network import MLP, LSTM
 11 | from agents.base import Controller, ControllerGroup
 12 |     
 13 | class NeuralController(Controller):
 14 |     def __init__(self, T, dt, network, RC_flag = True,
 15 |                  **parameters):
 16 |         super().__init__(T, dt, RC_flag = RC_flag, **parameters)
 17 |         ## Inherited Properties:
 18 |         # cp.Variable: self.u
 19 |         # cp.Parameter: self.u_diff; self.v_bar; self.w_bar;
 20 |         #               self.x0; self.d;
 21 |         #               self.x_lower; self.x_upper;
 22 |         #               self.u_lower; self.u_upper;
 23 |         # self.objective
 24 |         # self.Problem
 25 |         # self.constraints = [...]
 26 | 
 27 |         ## Inherited Methods:
 28 |         # updateState()
 29 |         
 30 |         ## Use ADMM update rule for the time being
 31 |         # u_update(self, v_bar, w_bar):
 32 |         
 33 |         '''
 34 |         LSTM Usage:
 35 |             lstm = LSTM(n_state, n_action, n_dist)
 36 |             mu, sigma_sq = lstm.forward(state, disturbance)
 37 |         Input:
 38 |             state: n x dim
 39 |             disturbance: T x n x dist
 40 |         Output:
 41 |             mu, sigma_sq: T x n x n_action
 42 |         '''
 43 |         self.nn = network
 44 |         self.proj_layer = CvxpyLayer(self.Problem, variables = [self.u],
 45 |                                   parameters = [self.x0, self.d,
 46 |                                                 self.u_diff, self.v_bar, self.w_bar,
 47 |                                                 self.x_upper, self.x_lower,
 48 |                                                 self.u_upper, self.u_lower])
 49 |         self.criterion = nn.MSELoss() # reduction = 'sum'
 50 | 
 51 |     def forward(self, state, disturbance, x_lowers = None, x_uppers = None, detach = False):
 52 |         '''
 53 |         Input:
 54 |             state: (n, n_state)
 55 |             disturbance: (T, n, n_dist)
 56 |             x_lowers, x_uppers: (n, T)
 57 |         Output:
 58 |             actions, sigma_sq: (T, n, n_action)
 59 |             #proj_loss: scalar
 60 |         '''
 61 |         T, n_batch, n_dist = disturbance.shape
 62 |         mus, sigma_sqs = self.nn(state, disturbance)# T x n x n_action
 63 |                         
 64 |         actions = []
 65 |         #TODO: Implement multi-threading
 66 |         for i in range(n_batch):
 67 |             mu = mus[:, i] # T x n_action
 68 |             
 69 |             if n_batch==1:
 70 |                 if x_lowers is None:
 71 |                     x_lower = torch.tensor(self.x_lower.value).float()
 72 |                 if x_uppers is None:
 73 |                     x_upper = torch.tensor(self.x_upper.value).float()
 74 |                 
 75 |             else:
 76 |                 x_lower = x_lowers[i]
 77 |                 x_upper = x_uppers[i]
 78 |             
 79 |             # The last value is setpoint; Do not use for projection
 80 |             dt = disturbance[:, i, :-1] # T x n_dist
 81 |             x0 = state[i]
 82 |             mu = mu.squeeze(1) # T x 1 ->T
 83 |             
 84 |             try:
 85 |                 u_pred = self.proj_layer(x0, dt,
 86 |                    mu, torch.zeros_like(mu), torch.zeros_like(mu),
 87 |                    x_upper, x_lower,
 88 |                    torch.tensor(self.u_upper.value).float(),
 89 |                    torch.tensor(self.u_lower.value).float())
 90 |                 actions.append(u_pred[0])
 91 |             except:
 92 |                 ## The feasible set is empty; Use some heuristics
 93 |                 sp = torch.mean((x_lower+x_upper)/2)
 94 |                 if x0.item() < sp:
 95 |                     actions.append(torch.ones_like(mu))
 96 |                 else:
 97 |                     actions.append(torch.zeros_like(mu))
 98 | 
 99 |         actions = torch.stack(actions).transpose(0, 1) # T x n
100 |         proj_loss = self.criterion(mus.squeeze(-1), actions)
101 |         return actions.unsqueeze(-1), sigma_sqs, proj_loss
102 | 
103 | class NeuralControllerGroup(ControllerGroup):
104 |     def __init__(self, T, dt, parameters, RC_flag = True):
105 |         super().__init__(T, dt, parameters, RC_flag = RC_flag)
106 |         
107 |         ## Inherited Methods:
108 |         # updateState()
109 |         # u_update()
110 |         
111 |     def _init_agents(self, parameters):
112 |         controller_list = []
113 |         for param in parameters:
114 |             controller_list.append(NeuralController(T = self.T, dt = self.dt, RC_flag = self.RC_flag, **param))
115 |         return controller_list
116 |     
117 |     def u_warmstart(self, x_list):
118 |         u_inits = []
119 |         for idx, controller in enumerate(self.controller_list):
120 |             u_pred = controller.forward(x_list[idx].reshape(1, -1)) # 1 x n_input
121 |             u_inits.append(u_pred.detach().numpy())
122 |         return np.stack(u_inits)
123 |         
124 |     def append(self, states, u_stars):
125 |         for idx, controller in enumerate(self.controller_list):
126 |             controller.memory.append((states[idx], u_stars[idx]))
127 |             
128 |     def update_policy(self, batch_size = 32):
129 |         losses = []
130 |         for idx, controller in enumerate(self.controller_list):
131 |             loss = controller.update_policy(batch_size)
132 |             losses.append(loss)
133 |         return np.array(losses)
134 |         
135 | 


--------------------------------------------------------------------------------
/algo/ppo.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import torch.utils.data as data
  5 | import torch.optim as optim
  6 | from torch.distributions import MultivariateNormal, Normal
  7 | 
  8 | import pdb
  9 | from copy import deepcopy
 10 | 
 11 | from utils.ppo_utils import Dataset
 12 | 
 13 | class PPO():
 14 |     def __init__(self, policy, memory, clip_param = 0.2, lam = 10, lr = 5e-4, n_ctrl = 1):
 15 |         self.memory = memory
 16 | 
 17 |         self.policy = policy
 18 |         self.policy_old = deepcopy(policy)
 19 | 
 20 |         self.clip_param = clip_param
 21 |         self.optimizer = optim.RMSprop(self.policy.nn.parameters(), lr=lr)
 22 |         self.lam = lam
 23 |         
 24 |         self.n_ctrl = n_ctrl
 25 |         
 26 |     # Use the "current" flag to indicate which set of parameters to use
 27 |     def forward(self, state, disturbance, x_lowers = None, x_uppers = None, current = True):
 28 |         T, n_batch, n_dist = disturbance.shape
 29 |         if current == True:
 30 |             mu, sigma_sq, proj_loss = self.policy.forward(state, disturbance, x_lowers = x_lowers, x_uppers = x_uppers)
 31 |         else:
 32 |             mu, sigma_sq, proj_loss = self.policy_old.forward(state, disturbance)
 33 |         return mu, sigma_sq, proj_loss
 34 | 
 35 |     def select_action(self, mu, sigma_sq, u_limits = None):
 36 |         if self.n_ctrl > 1:
 37 |             m = MultivariateNormal(mu, torch.diag(sigma_sq.squeeze()).unsqueeze(0))
 38 |         else:
 39 |             m = Normal(mu, sigma_sq**0.5)
 40 |         action = m.sample()
 41 |         if u_limits is not None:
 42 |             action = torch.clamp(action, min = u_limits[0], max = u_limits[1])
 43 |         log_prob = m.log_prob(action)
 44 |         return action, log_prob
 45 | 
 46 |     def evaluate_action(self, mu, actions, sigma_sq):
 47 |         n_batch = len(mu)
 48 |         if self.n_ctrl > 1:
 49 |             cov = torch.diag_embed(sigma_sq)
 50 |             m = MultivariateNormal(mu, cov)
 51 |         else:
 52 |             m = Normal(mu, sigma_sq**0.5)
 53 |         log_prob = m.log_prob(actions)
 54 |         entropy = m.entropy()
 55 |         return log_prob, entropy
 56 |     
 57 |     def _get_training_samples(self):
 58 |         states, actions, disturbance, advantages, old_logprobs, x_uppers, x_lowers = self.memory.sample()
 59 |         batch_set = Dataset(states, actions, disturbance, advantages, old_logprobs, x_uppers, x_lowers)
 60 |         batch_loader = data.DataLoader(batch_set, batch_size=32, shuffle=True, num_workers=2)
 61 |         return batch_loader
 62 |     
 63 |     def update_parameters(self, sigma=0.1, K = 4):
 64 |         loader = self._get_training_samples()
 65 |         for i in range(K):
 66 |             for states, actions, disturbance, advantages, old_logprobs, x_uppers, x_lowers in loader:
 67 |                 n_batch = states.shape[0]
 68 |                 # pdb.set_trace()
 69 |                 mus, sigma_sqs, proj_loss = self.policy.forward(states, disturbance.transpose(0, 1),  x_lowers = x_lowers, x_uppers = x_uppers) # x, u: T x N x Dim.
 70 |                 sigma_sqs = torch.ones_like(mus) * sigma**2
 71 |                 log_probs, entropies = self.evaluate_action(mus[0], actions, sigma_sqs)
 72 |         
 73 |                 ratio = torch.exp(log_probs.squeeze()-old_logprobs)
 74 |                 surr1 = ratio * advantages
 75 |                 surr2 = torch.clamp(ratio, 1-self.clip_param, 1+self.clip_param) * advantages
 76 |                 loss  = -torch.min(surr1, surr2).mean()
 77 |                 self.optimizer.zero_grad()
 78 |                 ## Auxiliary losses
 79 |                 loss -= torch.mean(entropies) * 0.01
 80 |                 loss += self.lam * proj_loss
 81 |                 
 82 |                 loss.backward()
 83 |                 nn.utils.clip_grad_norm_(self.policy.nn.parameters(), 100)
 84 |                 self.optimizer.step()
 85 |                 print("Post Step")
 86 |         self.policy_old.nn.load_state_dict(self.policy.nn.state_dict())
 87 |     
 88 |     ##TODO: Move the update_policy to a Trainer class
 89 |     def behavior_cloning(self, batch_size):
 90 |         u_hat, u_star, u_nns = self._get_training_samples(batch_size)
 91 | 
 92 |         loss = self.criterion(u_hat, u_star)
 93 |         loss += self.lam * self.criterion(u_nns, u_hat) # Auxiliary loss
 94 |         
 95 |         self.optimizer.zero_grad()
 96 |         loss.backward()
 97 |         self.optimizer.step()
 98 |         
 99 |         self.predictions = []
100 |         self.targets = []
101 |         return loss.detach()
102 |             
103 | 
104 | 
105 | 


--------------------------------------------------------------------------------
/data/ARX-0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/INFERLab/PROF/be7f77f606d8c7d6505d4b2bad2d09760e9bafe9/data/ARX-0


--------------------------------------------------------------------------------
/data/data_2017_baseline.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/INFERLab/PROF/be7f77f606d8c7d6505d4b2bad2d09760e9bafe9/data/data_2017_baseline.pkl


--------------------------------------------------------------------------------
/data/data_TMY3_baseline.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/INFERLab/PROF/be7f77f606d8c7d6505d4b2bad2d09760e9bafe9/data/data_TMY3_baseline.pkl


--------------------------------------------------------------------------------
/data/param_IW-nn-1800:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/INFERLab/PROF/be7f77f606d8c7d6505d4b2bad2d09760e9bafe9/data/param_IW-nn-1800


--------------------------------------------------------------------------------
/docs/figs/framework.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/INFERLab/PROF/be7f77f606d8c7d6505d4b2bad2d09760e9bafe9/docs/figs/framework.pdf


--------------------------------------------------------------------------------
/docs/figs/framework.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/INFERLab/PROF/be7f77f606d8c7d6505d4b2bad2d09760e9bafe9/docs/figs/framework.png


--------------------------------------------------------------------------------
/docs/slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/INFERLab/PROF/be7f77f606d8c7d6505d4b2bad2d09760e9bafe9/docs/slides.pdf


--------------------------------------------------------------------------------
/env/inverter.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import scipy
  3 | import scipy.io
  4 | import torch
  5 | import pdb
  6 | 
  7 | from mypypower.newtonpf import newtonpf
  8 | from pypower.ppoption import ppoption
  9 | 
 10 | Zbase = 1;
 11 | Vbase = 4800;
 12 | Sbase = Vbase **2 / Zbase
 13 | '''
 14 | def getSbus(P, Q, fac = Sbase/1000):
 15 |     ## This expects P and Q in kW
 16 |     P = P/fac;
 17 |     Q = Q/fac;
 18 |     return P + 1j*Q
 19 | '''
 20 | class IEEE37():
 21 |     def __init__(self, filePath = './network/IEEE-37',
 22 |                        dataPath = './data'):
 23 |         Ybus = scipy.io.loadmat(f'{filePath}/Ybus.mat')
 24 |         self.Ybus = Ybus['Ybus']
 25 |         self.n = self.Ybus.shape[0]
 26 |         self.v_lower = 0.95
 27 |         self.v_upper = 1.05
 28 |         
 29 |         # Load linearized model
 30 |         R = scipy.io.loadmat(f'{filePath}_linearized/R.mat')
 31 |         B = scipy.io.loadmat(f'{filePath}_linearized/B.mat')
 32 |         self.R = R['R']
 33 |         self.B = B['B']
 34 |         
 35 |         ## Bus index lists of each type of bus
 36 |         self.ref = np.array([0])
 37 |         self.pv = np.array([], dtype = np.int32) #np.array([4, 7, 9, 10, 11, 13, 16, 17, 20, 22, 23, 26, 28, 29, 30, 31, 32, 33, 34, 35, 36])-1
 38 |         self.pq = np.array([i  for i in range(self.n) if (i not in self.ref) & (i not in self.pv)], dtype = np.int32)
 39 |         self.n_pq = len(self.pq)
 40 |         self.ppopt = ppoption()
 41 |         
 42 |         self._get_reference()
 43 |         self._get_load_and_gen(dataPath = dataPath)
 44 |         
 45 |     def getSbus(self, t, wrt_reference = True, w_slack = False):
 46 |         '''
 47 |             Returns the vector of complex bus power injections, that is, generation
 48 |             minus load. Power is expressed in per unit.
 49 |         '''
 50 |         P = self.P_gen[t] - self.P_l[t]
 51 |         Q = - self.Q_l[t]
 52 |         S = P + 1j*Q
 53 |         P_av = self.P_gen[t]
 54 |         
 55 |         if wrt_reference:
 56 |             S = S - self.S0
 57 |         
 58 |         if w_slack:
 59 |             return S, P_av[self.gen_idx]
 60 |         else:
 61 |             return S[-self.n_pq:], P_av[self.gen_idx]
 62 | 
 63 |     def step(self, Sbus, wrt_reference = True):
 64 |         '''
 65 |         returns:
 66 |             voltage magitude, solver flag
 67 |         '''
 68 |         if wrt_reference:
 69 |             S = self.S0.copy()
 70 |             S[-len(Sbus):] += Sbus
 71 |         else:
 72 |             S = Sbus
 73 |         V, success, _  = newtonpf(scipy.sparse.csr_matrix(self.Ybus), S, self.V0, self.ref, self.pv, self.pq, self.ppopt)
 74 |         return np.abs(V), success
 75 |     
 76 |     def linear_estimate(self, P, Q, wrt_reference = True):
 77 |         if wrt_reference:
 78 |             if torch.is_tensor(P):
 79 |                 return torch.tensor(self.R).float().matmul(P) + torch.tensor(self.B).float().matmul(Q)
 80 |             else:
 81 |                 return self.R.dot(P) + self.B.dot(Q)
 82 |         else:
 83 |             V = self.V0.copy()
 84 |             delta_p = P-self.P0
 85 |             delta_q = Q-self.Q0
 86 |             V[-self.n_pq:] += self.R.dot(delta_p[-self.n_pq:]) + self.B.dot(delta_q[-self.n_pq:])
 87 |             return V
 88 |     
 89 |     ## Reference Point for Linearization
 90 |     def _get_reference(self):
 91 |         # Flat voltage point
 92 |         self.V0 = np.ones(self.n);
 93 |         A0 = np.zeros(self.n);
 94 |         # Corresponding to current injection
 95 |         J0 = self.Ybus.dot(self.V0*np.exp(1j*A0));
 96 |         # Corresponding to power injection
 97 |         S0 = self.V0*np.exp(1j*A0)*np.conj(J0);
 98 |         self.P0 = np.real(S0);
 99 |         self.Q0 = np.imag(S0);
100 |         self.S0 = self.P0 + 1j*self.Q0
101 |         
102 |     ## Load Demand and Generation
103 |     def _get_load_and_gen(self, dataPath = './data'):
104 |         # Load
105 |         self.load_idx = np.array([2, 5, 6, 7, 9, 10, 11, 13, 14, 16, 18, 19, 20, 21, 22, 24, 26, 27, 28, 29, 30, 32, 33, 35, 36]) -1
106 |         load = scipy.io.loadmat(f'{dataPath}/Loads_1sec.mat') #(Unit in W)
107 |         load = load['Loads'].transpose() # 604800 x 8
108 |         self.P_l = np.zeros((load.shape[0], self.n))
109 |         for i, idx in enumerate(self.load_idx):
110 |             self.P_l[:, idx] = load[:, i % load.shape[1]]
111 |         self.Q_l = 0.5 * self.P_l
112 |         # Convert loads to p.u.
113 |         self.P_l /= Sbase;
114 |         self.Q_l /= Sbase;
115 | 
116 |         # Generation
117 |         solar_rad = scipy.io.loadmat(f'{dataPath}/Irradiance_1sec.mat')
118 |         solar_rad = solar_rad['Irr24_seq'].transpose() # # 604800 x 1
119 | 
120 |         self.gen_idx = np.array([4, 7, 9, 10, 11, 13, 16, 17, 20, 22, 23, 26, 28, 29, 30, 31, 32, 33, 34, 35, 36])-1
121 | 
122 |         #% PV capacity [kVA]
123 |         self.max_S = np.array([200, 200, 100, 200, 200, 200, 200, 200, 200, 200, 200,
124 |                     200, 200, 200, 200, 200, 200, 350, 350, 300, 300]);
125 |         self.max_S = self.max_S * 1000 / Sbase # Convert to p.u.
126 |         #% Area of the PV array
127 |         Area_PV = np.array([100, 100, 100, 200, 200, 200, 200, 200, 200, 100,
128 |            200, 200, 200, 100, 200, 200, 200, 350, 350, 300, 300]);
129 |         #% PV efficiency;
130 |         PV_Irradiance_to_Power_Efficiency = 1;
131 | 
132 |         self.P_gen = np.zeros((load.shape[0], self.n))
133 |         gen = solar_rad * Area_PV * PV_Irradiance_to_Power_Efficiency
134 |         gen /= Sbase # Convert to p.u.
135 |         self.P_gen[:, self.gen_idx] = gen.clip(max = self.max_S.reshape(1, -1))
136 |     
137 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
  1 | name: nn-w-proj
  2 | channels:
  3 |   - conda-forge
  4 |   - defaults
  5 | dependencies:
  6 |   - _libgcc_mutex=0.1=conda_forge
  7 |   - _openmp_mutex=4.5=1_gnu
  8 |   - ampl-mp=3.1.0=h616b090_1004
  9 |   - backcall=0.2.0=pyhd3eb1b0_0
 10 |   - ca-certificates=2021.4.13=h06a4308_1
 11 |   - certifi=2020.12.5=py39h06a4308_0
 12 |   - cvxpy=1.1.12=py39hf3d152e_0
 13 |   - cvxpy-base=1.1.12=py39hde0f152_0
 14 |   - cyipopt=1.0.3=py39h3c5bb4f_0
 15 |   - decorator=5.0.7=pyhd3eb1b0_0
 16 |   - ecos=2.0.8=py39hce5d2b2_0
 17 |   - future=0.18.2=py39hf3d152e_3
 18 |   - ipopt=3.13.4=h7ede334_0
 19 |   - ipykernel=5.3.4=py39hb070fc8_0
 20 |   - ipython=7.22.0=py39hb070fc8_0
 21 |   - ipython_genutils=0.2.0=pyhd3eb1b0_1
 22 |   - jedi=0.17.2=py39h06a4308_1
 23 |   - jupyter_client=6.1.12=pyhd3eb1b0_0
 24 |   - jupyter_core=4.7.1=py39h06a4308_0
 25 |   - ld_impl_linux-64=2.35.1=hea4e1c9_2
 26 |   - libblas=3.9.0=9_openblas
 27 |   - libcblas=3.9.0=9_openblas
 28 |   - libffi=3.3=h58526e2_2
 29 |   - libgcc-ng=9.3.0=h2828fa1_19
 30 |   - libgfortran-ng=9.3.0=hff62375_19
 31 |   - libgfortran5=9.3.0=hff62375_19
 32 |   - libgomp=9.3.0=h2828fa1_19
 33 |   - liblapack=3.9.0=9_openblas
 34 |   - libopenblas=0.3.15=pthreads_h8fe5266_0
 35 |   - libsodium=1.0.18=h7b6447c_0
 36 |   - libstdcxx-ng=9.3.0=h6de172a_19
 37 |   - metis=5.1.0=h58526e2_1006
 38 |   - mumps-include=5.2.1=ha770c72_10
 39 |   - mumps-seq=5.2.1=h47a8eb5_10
 40 |   - ncurses=6.2=h58526e2_4
 41 |   - numpy=1.20.2=py39hdbf815f_0
 42 |   - openssl=1.1.1k=h27cfd23_0
 43 |   - osqp=0.6.2=py39hde0f152_1
 44 |   - parso=0.7.0=py_0
 45 |   - pexpect=4.8.0=pyhd3eb1b0_3
 46 |   - pickleshare=0.7.5=pyhd3eb1b0_1003
 47 |   - pip=21.0.1=py39h06a4308_0
 48 |   - prompt-toolkit=3.0.17=pyh06a4308_0
 49 |   - ptyprocess=0.7.0=pyhd3eb1b0_2
 50 |   - pygments=2.8.1=pyhd3eb1b0_0
 51 |   - python=3.9.4=hffdb5ce_0_cpython
 52 |   - python-dateutil=2.8.1=pyhd3eb1b0_0
 53 |   - python_abi=3.9=1_cp39
 54 |   - pyzmq=20.0.0=py39h2531618_1
 55 |   - qdldl-python=0.1.5=py39hde0f152_0
 56 |   - readline=8.1=h46c0cb4_0
 57 |   - scipy=1.6.3=py39hee8e79c_0
 58 |   - scotch=6.0.9=h0eec0ba_1
 59 |   - scs=2.1.3=py39h3c5bb4f_0
 60 |   - setuptools=49.6.0=py39hf3d152e_3
 61 |   - six=1.16.0=pyh6c4a22f_0
 62 |   - sqlite=3.35.5=h74cdb3f_0
 63 |   - tk=8.6.10=h21135ba_1
 64 |   - tornado=6.1=py39h27cfd23_0
 65 |   - traitlets=5.0.5=pyhd3eb1b0_0
 66 |   - tzdata=2021a=he74cb21_0
 67 |   - wcwidth=0.2.5=py_0
 68 |   - wheel=0.36.2=pyhd3deb0d_0
 69 |   - xz=5.2.5=h516909a_1
 70 |   - zeromq=4.3.4=h2531618_0
 71 |   - zlib=1.2.11=h516909a_1010
 72 |   - pip:
 73 |     - absl-py==0.13.0
 74 |     - cachetools==4.2.2
 75 |     - charset-normalizer==2.0.5
 76 |     - cvxpylayers==0.1.5
 77 |     - cycler==0.10.0
 78 |     - diffcp==1.0.16
 79 |     - google-auth==1.35.0
 80 |     - google-auth-oauthlib==0.4.6
 81 |     - grpcio==1.40.0
 82 |     - idna==3.2
 83 |     - kiwisolver==1.3.1
 84 |     - markdown==3.3.4
 85 |     - matplotlib==3.4.2
 86 |     - oauthlib==3.1.1
 87 |     - pillow==8.2.0
 88 |     - protobuf==3.17.3
 89 |     - pyasn1==0.4.8
 90 |     - pyasn1-modules==0.2.8
 91 |     - pybind11==2.6.2
 92 |     - pyparsing==2.4.7
 93 |     - pypower==5.1.15
 94 |     - requests==2.26.0
 95 |     - requests-oauthlib==1.3.0
 96 |     - rsa==4.7.2
 97 |     - tensorboard==2.6.0
 98 |     - tensorboard-data-server==0.6.1
 99 |     - tensorboard-plugin-wit==1.8.0
100 |     - threadpoolctl==2.1.0
101 |     - torch==1.8.1
102 |     - typing-extensions==3.10.0.0
103 |     - urllib3==1.26.6
104 |     - werkzeug==2.0.1
105 | prefix: /home/bingqinc/anaconda3/envs/nn-w-proj
106 | 


--------------------------------------------------------------------------------
/inverter_baselines/inverter_QP.py:
--------------------------------------------------------------------------------
  1 | import os, sys, argparse
  2 | 
  3 | import numpy as np
  4 | import cvxpy as cp
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.optim as optim
  8 | from torch.utils.tensorboard import SummaryWriter
  9 | 
 10 | main_path = os.path.abspath(os.path.join(__file__, '..'))
 11 | sys.path.insert(0, main_path)
 12 | 
 13 | from env.inverter import IEEE37
 14 | 
 15 | from algo.ppo import PPO
 16 | from agents.inverter_policy import Net, NeuralController
 17 | from utils.inverter_utils import Replay_Memory
 18 | 
 19 | 
 20 | import pdb
 21 | 
 22 | import torch
 23 | DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 24 | DEVICE
 25 | 
 26 | parser = argparse.ArgumentParser(description='GnuRL Demo: Online Learning')
 27 | parser.add_argument('--gamma', type=float, default=0.98, metavar='G',
 28 |                     help='discount factor (default: 0.98)')
 29 | parser.add_argument('--seed', type=int, default=42, metavar='N',
 30 |                     help='random seed (default: 42)')
 31 | parser.add_argument('--lam', type=int, default=10, metavar='N',
 32 |                     help='random seed (default: 42)')
 33 | parser.add_argument('--lr', type=float, default=1e-3, metavar='G',
 34 |                     help='Learning Rate')
 35 | parser.add_argument('--epsilon', type=float, default=0.2, metavar='G', help='PPO Clip Parameter')
 36 | parser.add_argument('--update_episode', type=int, default=4, metavar='N',
 37 |                     help='PPO update episode (default: 1); If -1, do not update weights')
 38 | parser.add_argument('--exp_name', type=str, default='inverter_QP',
 39 |                     help='save name')
 40 | parser.add_argument('--network_name', type=str, default='ieee37',
 41 |                     help='')
 42 | args = parser.parse_args()
 43 | 
 44 | class QP_solver():
 45 |     def __init__(self,  **env_params):
 46 |         self.n_bus = env_params['n_bus']
 47 |         H = env_params['H']
 48 |         self.V0 = env_params['V0']
 49 |         self.P0 = env_params['P0']
 50 |         self.Q0 = env_params['Q0']
 51 |         self.V_upper = env_params['V_upper']
 52 |         self.V_lower = env_params['V_lower']
 53 |         self.S_rating = env_params['S_rating']
 54 |         self.gen_idx = env_params['gen_idx']
 55 | 
 56 |         self.other_idx = [i for i in range(self.n_bus) if i not in self.gen_idx ]
 57 |         
 58 |         R = H[:, :self.n_bus]
 59 |         B = H[:, self.n_bus:]
 60 |         R_new = np.vstack([np.hstack([R[self.gen_idx][:, self.gen_idx], 
 61 |                                       R[self.gen_idx][:, self.other_idx]]), 
 62 |                             np.hstack([R[self.other_idx][:, self.gen_idx], 
 63 |                                        R[self.other_idx][:, self.other_idx]])
 64 |                             ])
 65 |         B_new = np.vstack([np.hstack([B[self.gen_idx][:, self.gen_idx], 
 66 |                                       B[self.gen_idx][:, self.other_idx]]), 
 67 |                             np.hstack([B[self.other_idx][:, self.gen_idx], 
 68 |                                        B[self.other_idx][:, self.other_idx]])
 69 |                             ])
 70 |         #pdb.set_trace()
 71 |         H_new = np.hstack([R_new, B_new])
 72 |                           
 73 |         # Set up projection onto inverter setpoint constraints and linearized voltage constraints
 74 |         self.P = cp.Variable(len(self.gen_idx))
 75 |         self.Q = cp.Variable(len(self.gen_idx))
 76 |         
 77 |         
 78 |         self.P_nc = cp.Parameter(len(self.other_idx))
 79 |         self.Q_nc = cp.Parameter(len(self.other_idx))
 80 |         self.P_av = cp.Parameter(len(self.gen_idx))
 81 |         
 82 |         # Voltage: Apply to All Buses
 83 |         z = cp.hstack([self.P, self.P_nc, self.Q, self.Q_nc]) # z: (70, )
 84 |         constraints = [self.V_lower - self.V0 <= H_new@z,
 85 |                        H_new@z <= self.V_upper - self.V0]
 86 |         
 87 |         ## Power: Only applies to Inverters
 88 |         PQ = cp.vstack([self.P0[self.gen_idx] + self.P, 
 89 |                        self.Q0[self.gen_idx] + self.Q]) # (2, n)
 90 |         constraints += [0 <= self.P0[self.gen_idx] + self.P,
 91 |                        self.P0[self.gen_idx] + self.P <= self.P_av,
 92 |                        cp.norm(PQ, axis = 0) <= self.S_rating]
 93 |         
 94 |         #objective = cp.Minimize(cp.sum_squares(P - P_tilde) + cp.sum_squares(Q - Q_tilde))
 95 |         objective = cp.Minimize(cp.sum(cp.maximum(self.P_av - self.P, 
 96 |                                                   np.zeros(len(self.gen_idx)))))
 97 |         self.problem = cp.Problem(objective, constraints)
 98 | 
 99 |     def solve(self, Sbus, P_av):
100 |         self.P_nc.value = Sbus.real[self.other_idx]
101 |         self.Q_nc.value = Sbus.imag[self.other_idx]
102 |         self.P_av.value = P_av
103 |         
104 |         #try:
105 |         self.problem.solve()
106 |         #except:
107 |         #    print("Solver failed")
108 |         #    self.P.value = None
109 | 
110 |         ## Check solution valid
111 |         #if self.P.value is not None:
112 |         #print(self.problem.status)
113 |         #print(self.P.value, self.Q.value)
114 |         return self.P.value, self.Q.value#, self.Problem.status
115 |         
116 |         #else:
117 |         #    return Sbus.real, Sbus.imag
118 | 
119 | def main():
120 |     torch.manual_seed(args.seed)
121 |     writer = SummaryWriter(comment = args.exp_name)
122 |     
123 |     # Create Simulation Environment
124 |     if args.network_name == 'ieee37':
125 |         env = IEEE37()
126 |     else:
127 |         print("Not implemented")
128 |     
129 |     n_bus = env.n - 1
130 |     n_inverters = len(env.gen_idx) # inverters at PV panels
131 |     
132 |     env_params = {'V0': env.V0[-env.n_pq:],
133 |                   'P0': env.P0[-env.n_pq:],
134 |                   'Q0': env.Q0[-env.n_pq:],
135 |                   'H': np.hstack([env.R, env.B]), # 35 x 70
136 |                   'n_bus':n_bus, # Slack bus is not controllable
137 |                   'gen_idx': env.gen_idx - 1, # Excluded the slack bus
138 |                   'V_upper': env.v_upper, 'V_lower': env.v_lower,
139 |                  'S_rating': env.max_S,
140 |                  }
141 | 
142 |     controller = QP_solver(**env_params)
143 | 
144 |     # 1-week data
145 |     num_steps = 900 # 15 minutes
146 |     n_episodes = 7*86400//num_steps
147 | 
148 |     V_prev = np.zeros(n_bus)
149 |     
150 |     V_record = []
151 |     V_est_record = []
152 |     P_record = []
153 |     Q_record = []
154 |     
155 |     for i in range(n_episodes):
156 |         loss = 0
157 |         violation_count = 0
158 |         
159 |         for k in range(num_steps):
160 |             t = i*num_steps + k
161 |             Sbus, P_av = env.getSbus(t)
162 |             
163 |             P_gen, Q_gen = controller.solve(Sbus, P_av)
164 |             print(f"P_av = {P_av}, P = {P_gen}")
165 | 
166 |             P = Sbus.real
167 |             Q = Sbus.imag
168 |             P[controller.gen_idx] = P_gen
169 |             Q[controller.gen_idx] = Q_gen
170 | 
171 |             V, success = env.step(P + 1j*Q)
172 |             V_prev = V[1:]
173 |             
174 |             if np.any(V>env.v_upper) | np.any(V<env.v_lower):
175 |                 violation_count += 1
176 |             writer.add_scalar("V/max", max(V[1:]), t)
177 |             writer.add_scalar("V/min", min(V[1:]), t)
178 |             
179 |             cost = np.clip(P_av - P_gen, 0, None)
180 |             loss += cost
181 |             
182 |             V_record.append(V[1:])
183 |             P_record.append(P)
184 |             Q_record.append(Q)
185 |              
186 |         writer.add_scalar("Loss", loss.mean().item(), i)
187 |         writer.add_scalar("violations", violation_count, i)
188 |         
189 |         if (i % 20 ==0) & (i>0):
190 |             np.save(f"results/V_{args.exp_name}.npy", np.array(V_record))
191 |             np.save(f"results/P_{args.exp_name}.npy", np.array(P_record))
192 |             np.save(f"results/Q_{args.exp_name}.npy", np.array(Q_record))
193 |             
194 |     np.save(f"results/V_{args.exp_name}.npy", np.array(V_record))
195 |     np.save(f"results/P_{args.exp_name}.npy", np.array(P_record))
196 |     np.save(f"results/Q_{args.exp_name}.npy", np.array(Q_record))
197 |             
198 | if __name__ == '__main__':
199 |     main()
200 | 
201 | '''
202 |     # Example Usage of the environment
203 |     t = 10
204 |     Sbus = env.getSbus(t)
205 |     
206 |     # Solve power flow equations
207 |     V, success = env.step(Sbus)
208 |     print(np.abs(V))
209 |     if success == 0:
210 |         print("Something is wrong")
211 |     
212 |     # Estimation using the linearized model
213 |     V_est = env.linear_estimate(Sbus)
214 |     print(V_est)
215 | '''
216 | 


--------------------------------------------------------------------------------
/inverter_baselines/inverter_acopf.py:
--------------------------------------------------------------------------------
  1 | import os, sys, argparse
  2 | 
  3 | import numpy as np
  4 | import cvxpy as cp
  5 | import ipopt
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.optim as optim
  9 | from torch.utils.tensorboard import SummaryWriter
 10 | 
 11 | main_path = os.path.abspath(os.path.join(__file__, '..'))
 12 | sys.path.insert(0, main_path)
 13 | 
 14 | from env.inverter import IEEE37
 15 | 
 16 | from algo.ppo import PPO
 17 | from agents.inverter_policy import Net, NeuralController
 18 | from utils.inverter_utils import Replay_Memory
 19 | 
 20 | import ipdb
 21 | 
 22 | import torch
 23 | DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 24 | DEVICE
 25 | 
 26 | parser = argparse.ArgumentParser(description='GnuRL Demo: Online Learning')
 27 | parser.add_argument('--gamma', type=float, default=0.98, metavar='G',
 28 |                     help='discount factor (default: 0.98)')
 29 | parser.add_argument('--seed', type=int, default=42, metavar='N',
 30 |                     help='random seed (default: 42)')
 31 | parser.add_argument('--lam', type=int, default=10, metavar='N',
 32 |                     help='random seed (default: 42)')
 33 | parser.add_argument('--lr', type=float, default=1e-3, metavar='G',
 34 |                     help='Learning Rate')
 35 | parser.add_argument('--epsilon', type=float, default=0.2, metavar='G', help='PPO Clip Parameter')
 36 | parser.add_argument('--update_episode', type=int, default=4, metavar='N',
 37 |                     help='PPO update episode (default: 1); If -1, do not update weights')
 38 | parser.add_argument('--exp_name', type=str, default='inverter_ACOPF',
 39 |                     help='save name')
 40 | parser.add_argument('--network_name', type=str, default='ieee37',
 41 |                     help='')
 42 | args = parser.parse_args()
 43 | 
 44 | class ACOPFController():
 45 |     def __init__(self,  **env_params):
 46 |         self.n_bus = env_params['n_bus']
 47 |         self.V0 = env_params['V0']
 48 |         self.P0 = env_params['P0']
 49 |         self.Q0 = env_params['Q0']
 50 |         self.V_upper = env_params['V_upper']
 51 |         self.V_lower = env_params['V_lower']
 52 |         self.S_rating = env_params['S_rating']
 53 |         self.gen_idx = env_params['gen_idx']
 54 |         self.slack_idx = env_params['slack_idx']
 55 |         self.Ybus = env_params['Ybus']
 56 | 
 57 |         self.A0 = np.zeros(self.V0.shape)   # initial voltage angle
 58 |         self.n_gen = len(self.gen_idx)
 59 |         self.n_slack = len(self.slack_idx)
 60 |         self.other_idx = [i for i in range(self.n_bus) if i not in self.gen_idx and i not in self.slack_idx]
 61 | 
 62 | 
 63 |     def solve(self, Sbus, P_av):
 64 |         P_nc = Sbus.real[self.other_idx]
 65 |         Q_nc = Sbus.imag[self.other_idx]
 66 | 
 67 |         # Decision variables: P and Q at controllable buses, 
 68 |         #   Pslack and Qslack at slack bus, V and theta at all buses
 69 | 
 70 |         # initial guess for decision variables
 71 |         x0 = np.hstack([P_av, self.Q0[self.gen_idx], 
 72 |             self.P0[self.slack_idx], self.Q0[self.slack_idx], 
 73 |             self.V0, self.A0])
 74 | 
 75 |         # upper and lower bounds on decision variables
 76 |         #   0 \leq P \leq P_av
 77 |         #   no explicit bounds on Q
 78 |         #   no explicit bounds on Pslack or Qslack
 79 |         #   V and A known at ref bus
 80 |         #   Vmin \leq V \leq Vmax at non-ref buses
 81 |         #   no explicit bounds on A at non-ref buses
 82 |         def get_bound_with_slack(bound, slack_bound):
 83 |             values = bound * np.ones(self.n_bus)
 84 |             values[self.slack_idx] = slack_bound
 85 |             return values
 86 | 
 87 |         lb = np.hstack([
 88 |             np.zeros(self.n_gen), 
 89 |             -np.infty * np.ones(self.n_gen),
 90 |             -np.infty * np.ones(2 * self.n_slack),
 91 |             get_bound_with_slack(self.V_lower, self.V0[self.slack_idx]), 
 92 |             get_bound_with_slack(-np.infty, self.A0[self.slack_idx])])
 93 |         ub = np.hstack([
 94 |             P_av, 
 95 |             np.infty * np.ones(self.n_gen),
 96 |             np.infty * np.ones(2 * self.n_slack),
 97 |             get_bound_with_slack(self.V_upper, self.V0[self.slack_idx]),
 98 |             get_bound_with_slack(np.infty, self.A0[self.slack_idx])])
 99 | 
100 |         # upper and lower bounds on other constraints
101 |         #   power flow constraint: diag(v)conj(Ybus)conj(v) - S = 0      
102 |         #      where v = diag(V*exp(1j*A)) and S is net demand at all nodes
103 |         #      (separate out real and imaginary parts)
104 |         #   P^2 + Q^2 \leq S_rating^2
105 |         cl = np.hstack(
106 |             [np.zeros(2*self.n_bus), np.zeros(self.n_gen)])
107 |         cu = np.hstack(
108 |             [np.zeros(2*self.n_bus), self.S_rating**2])
109 | 
110 |         problem_obj = ACOPFSolver(P_av, P_nc, Q_nc, self.Ybus, 
111 |             self.n_bus, self.n_gen, self.n_slack, self.gen_idx, self.slack_idx, self.other_idx)
112 |         nlp = ipopt.problem(
113 |                     n=len(x0),    # num decision vars
114 |                     m=len(cl),    # num constraints
115 |                     problem_obj=problem_obj,
116 |                     lb=lb,        # lower bounds on decision vars
117 |                     ub=ub,        # upper bounds on decision vars
118 |                     cl=cl,        # lower bounds on constraints
119 |                     cu=cu         # upper bounds on constraints
120 |                     )
121 | 
122 |         nlp.addOption('tol', 1e-4)
123 |         nlp.addOption('print_level', 0) # 3)
124 | 
125 |         x, info = nlp.solve(x0)
126 |         P = x[:self.n_gen]
127 |         Q = x[self.n_gen:2*self.n_gen]
128 |         
129 |         return P, Q
130 | 
131 | 
132 | class ACOPFSolver(object):
133 |     def __init__(self, P_av, P_nc, Q_nc, Ybus, n_bus, n_gen, n_slack, gen_idx, slack_idx, other_idx):
134 |         self.P_av = P_av
135 |         self.P_nc = P_nc
136 |         self.Q_nc = Q_nc
137 |         self.Ybus = Ybus
138 |         self.n_bus = n_bus
139 |         self.n_gen = n_gen
140 |         self.n_slack = n_slack
141 |         self.gen_idx = gen_idx
142 |         self.slack_idx = slack_idx
143 |         self.other_idx = other_idx
144 |         self.split_inds = np.cumsum(
145 |             [self.n_gen, self.n_gen, self.n_slack, self.n_slack, self.n_bus, self.n_bus])[:-1]
146 | 
147 |     # Curtailment objective (will be minimized)
148 |     def objective(self, x):
149 |         return np.maximum(self.P_av - x[:self.n_gen], 0).sum()
150 | 
151 |     # Gradient of objective
152 |     def gradient(self, x):
153 |         p_grad = -1 * ((self.P_av - x[:self.n_gen]) > 0).astype(int)
154 |         return np.hstack([p_grad, np.zeros(self.n_gen + 2*self.n_slack + 2*self.n_bus)])
155 | 
156 |     # Constraints (excluding box constraints on decision variables)
157 |     def constraints(self, y):
158 |         P, Q, Pslack, Qslack, V, A = np.split(y, self.split_inds)
159 |         
160 |         # power flow constraint [diag(v)conj(Ybus)conj(v) - S = 0]
161 |         #   separate out real and imaginary parts
162 |         voltage = V * np.exp(1j * A)
163 |         net_power = np.zeros(self.n_bus, dtype=np.complex128)
164 |         net_power[self.gen_idx] = P + 1j*Q
165 |         net_power[self.slack_idx] = Pslack + 1j*Qslack
166 |         net_power[self.other_idx] = self.P_nc + 1j*self.Q_nc
167 |         power_mismatch = np.diag(voltage)@np.conj(self.Ybus)@np.conj(voltage) - net_power
168 | 
169 |         # apparent power at inverters [P^2 + Q^2 \leq S_rating^2; compute left side here]
170 |         apparent_power = P**2 + Q**2
171 | 
172 |         return np.hstack([np.real(power_mismatch), np.imag(power_mismatch), apparent_power])
173 | 
174 |     # Jacobian of constraints (excluding box constraints on decision variables)
175 |     def jacobian(self, y):
176 |         P, Q, _, _, V, A = np.split(y, self.split_inds)
177 | 
178 |         # Jacobian of power flow constraint
179 |         #  See: http://www.cs.cmu.edu/~zkolter/course/15-884/eps_power_flow.pdf
180 |         vol = V * np.exp(1j * A)
181 |         Y = self.Ybus
182 |         J1 = 1j * np.diag(vol) @ (np.diag(np.conj(Y)@np.conj(vol)) - np.conj(Y)@np.diag(np.conj(vol)))
183 |         J2 = np.diag(vol)@np.conj(Y)@np.diag(np.exp(-1j * A)) + \
184 |             np.diag(np.exp(1j * A))@np.diag(np.conj(Y)@np.conj(vol))
185 |         power_flow_jac = np.vstack([
186 |                 np.hstack([-np.eye(self.n_bus)[:, self.gen_idx], np.zeros((self.n_bus, self.n_gen)), 
187 |                     -np.eye(self.n_bus)[:, self.slack_idx], np.zeros((self.n_bus, self.n_slack)),
188 |                     np.real(J2), np.real(J1)]),
189 |                 np.hstack([np.zeros((self.n_bus, self.n_gen)), -np.eye(self.n_bus)[:, self.gen_idx], 
190 |                     np.zeros((self.n_bus, self.n_slack)), -np.eye(self.n_bus)[:, self.slack_idx],
191 |                     np.imag(J2), np.imag(J1)])
192 |             ])
193 | 
194 |         # Jacobian of apparent power constraint
195 |         apparent_power_jac = np.hstack([
196 |             np.diag(2*P), np.diag(2*Q), 
197 |             np.zeros( (self.n_gen, 2*self.n_slack + 2*self.n_bus))])
198 | 
199 |         return np.concatenate([power_flow_jac.flatten(), apparent_power_jac.flatten()])
200 | 
201 | 
202 | def main():
203 |     torch.manual_seed(args.seed)
204 |     writer = SummaryWriter(comment = args.exp_name)
205 |     
206 |     # Create Simulation Environment
207 |     if args.network_name == 'ieee37':
208 |         env = IEEE37()
209 |     else:
210 |         print("Not implemented")
211 |     
212 |     n_bus = env.n
213 |     n_inverters = len(env.gen_idx) # inverters at PV panels
214 |     
215 |     env_params = {'V0': env.V0,
216 |                   'P0': env.P0,
217 |                   'Q0': env.Q0,
218 |                   'n_bus': n_bus,
219 |                   'gen_idx': env.gen_idx,
220 |                   'slack_idx': env.ref,
221 |                   'V_upper': env.v_upper, 'V_lower': env.v_lower,
222 |                   'S_rating': env.max_S,
223 |                   'Ybus': env.Ybus
224 |                  }
225 | 
226 |     controller = ACOPFController(**env_params)
227 | 
228 |     # 1-week data
229 |     num_steps = 900 # 15 minutes
230 |     n_episodes = 7*86400//num_steps
231 | 
232 |     V_prev = np.zeros(n_bus)
233 |     
234 |     V_record = []
235 |     V_est_record = []
236 |     P_record = []
237 |     Q_record = []
238 |     
239 |     start_ep = 600
240 |     for i in range(start_ep, min(n_episodes, start_ep + 100)):
241 |         loss = 0
242 |         violation_count = 0
243 |         
244 |         for k in range(num_steps):
245 |             t = i*num_steps + k
246 |             Sbus, P_av = env.getSbus(t, wrt_reference=False, w_slack=True)
247 |             
248 |             P_gen, Q_gen = controller.solve(Sbus, P_av)
249 |             print(f"P_av = {P_av}, P = {P_gen}")
250 | 
251 |             P = Sbus.real
252 |             Q = Sbus.imag
253 |             P[controller.gen_idx] = P_gen
254 |             Q[controller.gen_idx] = Q_gen
255 | 
256 |             V, success = env.step(P + 1j*Q, wrt_reference=False)
257 |             V_prev = V[1:]
258 |             
259 |             if np.any(V>env.v_upper) | np.any(V<env.v_lower):
260 |                 violation_count += 1
261 |             writer.add_scalar("V/max", max(V[1:]), t)
262 |             writer.add_scalar("V/min", min(V[1:]), t)
263 |             
264 |             cost = np.clip(P_av - P_gen, 0, None)
265 |             loss += cost
266 |             
267 |             V_record.append(V[1:])
268 |             P_record.append(P)
269 |             Q_record.append(Q)
270 |              
271 |         writer.add_scalar("Loss", loss.mean().item(), i)
272 |         writer.add_scalar("violations", violation_count, i)
273 |         
274 |         if (i % 20 ==0) & (i>0):
275 |             np.save(f"results/V_{args.exp_name}_{start_ep}.npy", np.array(V_record))
276 |             np.save(f"results/P_{args.exp_name}_{start_ep}.npy", np.array(P_record))
277 |             np.save(f"results/Q_{args.exp_name}_{start_ep}.npy", np.array(Q_record))
278 |             
279 |     np.save(f"results/V_{args.exp_name}_{start_ep}.npy", np.array(V_record))
280 |     np.save(f"results/P_{args.exp_name}_{start_ep}.npy", np.array(P_record))
281 |     np.save(f"results/Q_{args.exp_name}_{start_ep}.npy", np.array(Q_record))
282 | 
283 |             
284 | if __name__ == '__main__':
285 |     main()
286 | 
287 | '''
288 |     # Example Usage of the environment
289 |     t = 10
290 |     Sbus = env.getSbus(t)
291 |     
292 |     # Solve power flow equations
293 |     V, success = env.step(Sbus)
294 |     print(np.abs(V))
295 |     if success == 0:
296 |         print("Something is wrong")
297 |     
298 |     # Estimation using the linearized model
299 |     V_est = env.linear_estimate(Sbus)
300 |     print(V_est)
301 | '''
302 | 


--------------------------------------------------------------------------------
/inverter_baselines/inverter_no-control.py:
--------------------------------------------------------------------------------
  1 | import os, sys, argparse
  2 | 
  3 | import numpy as np
  4 | from torch.utils.tensorboard import SummaryWriter
  5 | 
  6 | main_path = os.path.abspath(os.path.join(__file__, '..'))
  7 | sys.path.insert(0, main_path)
  8 | 
  9 | from env.inverter import IEEE37
 10 | 
 11 | import pdb
 12 | 
 13 | #import torch
 14 | #DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 15 | #DEVICE
 16 | 
 17 | parser = argparse.ArgumentParser(description='GnuRL Demo: Online Learning')
 18 | parser.add_argument('--gamma', type=float, default=0.98, metavar='G',
 19 |                     help='discount factor (default: 0.98)')
 20 | parser.add_argument('--seed', type=int, default=42, metavar='N',
 21 |                     help='random seed (default: 42)')
 22 | parser.add_argument('--lr', type=float, default=1e-3, metavar='G',
 23 |                     help='Learning Rate')
 24 | parser.add_argument('--epsilon', type=float, default=0.2, metavar='G', help='PPO Clip Parameter')
 25 | parser.add_argument('--update_episode', type=int, default=4, metavar='N',
 26 |                     help='PPO update episode (default: 1); If -1, do not update weights')
 27 | parser.add_argument('--exp_name', type=str, default='no-control',
 28 |                     help='save name')
 29 | parser.add_argument('--network_name', type=str, default='ieee37',
 30 |                     help='')
 31 | args = parser.parse_args()
 32 | 
 33 | 
 34 | def main():
 35 |     writer = SummaryWriter(comment = args.exp_name)
 36 |     
 37 |     # Create Simulation Environment
 38 |     if args.network_name == 'ieee37':
 39 |         env = IEEE37()
 40 |     else:
 41 |         print("Not implemented")
 42 |     n_bus = env.n
 43 |     env_params = {'V0': env.V0[-env.n_pq:],
 44 |                   'P0': env.P0[-env.n_pq:],
 45 |                   'Q0': env.Q0[-env.n_pq:],
 46 |                   'gen_idx': env.gen_idx, # Including the slack bus
 47 |                   'V_upper': env.v_upper, 'V_lower': env.v_lower,
 48 |                  'S_rating': env.max_S,
 49 |                  }
 50 | 
 51 |     ## Note: Volt-Var controller considers deviation from 1
 52 |     #controller = VoltVarController(0.04, **env_params)
 53 |     
 54 |     # 1-week data
 55 |     num_steps = 600 # 10 minutes
 56 |     n_episodes = 7*86400//num_steps
 57 | 
 58 |     V_prev = np.ones(n_bus)    
 59 |     V_record = []
 60 |     
 61 |     for i in range(n_episodes):
 62 |         violation_count = 0
 63 |         for k in range(num_steps):
 64 |             t = i*num_steps + k
 65 |             Sbus, P_av = env.getSbus(t, wrt_reference = False, w_slack = True)
 66 |             
 67 |             #Q = controller.forward(V_prev, P_av = P_av) # at Generation buses
 68 |             #Sbus.imag[env.gen_idx] += Q
 69 |             
 70 |             V, success = env.step(Sbus)
 71 |             V_prev = V
 72 |             
 73 |             if np.any(V>env.v_upper) | np.any(V<env.v_lower):
 74 |                 violation_count += 1
 75 |             writer.add_scalar("V/max", max(V[1:]), t)
 76 |             writer.add_scalar("V/min", min(V[1:]), t)
 77 |             
 78 |             V_record.append(V[1:])
 79 |         
 80 |         writer.add_scalar("violations", violation_count, i)
 81 |         
 82 |         if (i % 20 == 0) & (i>0):
 83 |             np.save(f"results/V_{args.exp_name}.npy", np.array(V_record))
 84 |     np.save(f"results/V_{args.exp_name}.npy", np.array(V_record))
 85 |         
 86 | if __name__ == '__main__':
 87 |     main()
 88 | 
 89 | '''
 90 |     # Example Usage of the environment
 91 |     t = 10
 92 |     Sbus = env.getSbus(t)
 93 |     
 94 |     # Solve power flow equations
 95 |     V, success = env.step(Sbus)
 96 |     print(np.abs(V))
 97 |     if success == 0:
 98 |         print("Something is wrong")
 99 |     
100 |     # Estimation using the linearized model
101 |     V_est = env.linear_estimate(Sbus)
102 |     print(V_est)
103 | '''
104 | 


--------------------------------------------------------------------------------
/inverter_baselines/inverter_volt-var.py:
--------------------------------------------------------------------------------
  1 | import os, sys, argparse
  2 | 
  3 | import numpy as np
  4 | from torch.utils.tensorboard import SummaryWriter
  5 | 
  6 | main_path = os.path.abspath(os.path.join(__file__, '..'))
  7 | sys.path.insert(0, main_path)
  8 | 
  9 | from env.inverter import IEEE37
 10 | 
 11 | import pdb
 12 | 
 13 | #import torch
 14 | #DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 15 | #DEVICE
 16 | 
 17 | parser = argparse.ArgumentParser(description='GnuRL Demo: Online Learning')
 18 | parser.add_argument('--gamma', type=float, default=0.98, metavar='G',
 19 |                     help='discount factor (default: 0.98)')
 20 | parser.add_argument('--seed', type=int, default=42, metavar='N',
 21 |                     help='random seed (default: 42)')
 22 | parser.add_argument('--lr', type=float, default=1e-3, metavar='G',
 23 |                     help='Learning Rate')
 24 | parser.add_argument('--epsilon', type=float, default=0.2, metavar='G', help='PPO Clip Parameter')
 25 | parser.add_argument('--update_episode', type=int, default=4, metavar='N',
 26 |                     help='PPO update episode (default: 1); If -1, do not update weights')
 27 | parser.add_argument('--exp_name', type=str, default='volt-var',
 28 |                     help='save name')
 29 | parser.add_argument('--network_name', type=str, default='ieee37',
 30 |                     help='')
 31 | args = parser.parse_args()
 32 | 
 33 | 
 34 | class VoltVarController():
 35 |     def __init__(self, delta, **env_params):
 36 |         super(VoltVarController, self).__init__()
 37 |         self.V_upper = env_params['V_upper']
 38 |         self.V_lower = env_params['V_lower']
 39 |         self.delta = delta
 40 |         self.gen_idx = env_params['gen_idx']
 41 |         self.S_rating = env_params['S_rating']
 42 |         self.a = 1/(self.V_upper-1-self.delta/2)
 43 | 
 44 |     def forward(self, voltage, P_av):
 45 |         Q = (self.S_rating**2-P_av**2)**0.5
 46 |         
 47 |         # Piece-wise Linear Curve
 48 |         voltage = voltage[self.gen_idx]
 49 |         out = np.zeros_like(voltage)
 50 |         
 51 |         out[voltage <= self.V_lower] = 1
 52 |         
 53 |         idx = (voltage > self.V_lower) & (voltage < 1 - self.delta/2)
 54 |         out[idx] = 1 - self.a*(voltage[idx]-self.V_lower)
 55 | 
 56 |         idx = (voltage > 1 + self.delta/2) & (voltage < self.V_upper)
 57 |         out[idx] = -self.a*(voltage[idx]-1-self.delta/2)
 58 |         
 59 |         out[voltage >= self.V_upper] = -1
 60 |         return out * Q 
 61 | 
 62 | def main():
 63 |     writer = SummaryWriter(comment = args.exp_name)
 64 |     
 65 |     # Create Simulation Environment
 66 |     if args.network_name == 'ieee37':
 67 |         env = IEEE37()
 68 |     else:
 69 |         print("Not implemented")
 70 |     n_bus = env.n
 71 |     env_params = {'V0': env.V0[-env.n_pq:],
 72 |                   'P0': env.P0[-env.n_pq:],
 73 |                   'Q0': env.Q0[-env.n_pq:],
 74 |                   'gen_idx': env.gen_idx, # Including the slack bus
 75 |                   'V_upper': env.v_upper, 'V_lower': env.v_lower,
 76 |                  'S_rating': env.max_S,
 77 |                  }
 78 | 
 79 |     ## Note: Volt-Var controller considers deviation from 1
 80 |     controller = VoltVarController(0.04, **env_params)
 81 |     
 82 |     # 1-week data
 83 |     num_steps = 600 # 10 minutes
 84 |     n_episodes = 7*86400//num_steps
 85 | 
 86 |     V_prev = np.ones(n_bus)    
 87 |     V_record = []
 88 |     
 89 |     for i in range(n_episodes):
 90 |         violation_count = 0
 91 |         for k in range(num_steps):
 92 |             t = i*num_steps + k
 93 |             Sbus, P_av = env.getSbus(t, wrt_reference = False, w_slack = True)
 94 |             
 95 |             Q = controller.forward(V_prev, P_av = P_av) # at Generation buses
 96 |         
 97 |             Sbus.imag[env.gen_idx] += Q
 98 |             
 99 |             V, success = env.step(Sbus)
100 |             V_prev = V
101 |             
102 |             if np.any(V>env.v_upper) | np.any(V<env.v_lower):
103 |                 violation_count += 1
104 |             writer.add_scalar("V/max", max(V[1:]), t)
105 |             writer.add_scalar("V/min", min(V[1:]), t)
106 |             
107 |             V_record.append(V[1:])
108 |         
109 |         writer.add_scalar("violations", violation_count, i)
110 |         
111 |         if (i % 20 == 0) & (i>0):
112 |             np.save(f"results/V_{args.exp_name}.npy", np.array(V_record))
113 | <<<<<<< HEAD
114 | =======
115 |             
116 | >>>>>>> 5d88b0ccebcea057216087804a12ef2c880e3345
117 |     np.save(f"results/V_{args.exp_name}.npy", np.array(V_record))
118 |         
119 | if __name__ == '__main__':
120 |     main()
121 | 
122 | '''
123 |     # Example Usage of the environment
124 |     t = 10
125 |     Sbus = env.getSbus(t)
126 |     
127 |     # Solve power flow equations
128 |     V, success = env.step(Sbus)
129 |     print(np.abs(V))
130 |     if success == 0:
131 |         print("Something is wrong")
132 |     
133 |     # Estimation using the linearized model
134 |     V_est = env.linear_estimate(Sbus)
135 |     print(V_est)
136 | '''
137 | 


--------------------------------------------------------------------------------
/main_IW.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | 
  4 | import gym
  5 | import eplus_env
  6 | 
  7 | import warnings
  8 | warnings.filterwarnings("ignore", category=UserWarning)
  9 | 
 10 | import argparse
 11 | import numpy as np
 12 | import pandas as pd
 13 | import copy
 14 | import pickle
 15 | import pdb
 16 | 
 17 | import torch
 18 | import torch.nn as nn
 19 | import torch.nn.functional as F
 20 | import torch.utils.data as data
 21 | import torch.optim as optim
 22 | from torch.distributions import MultivariateNormal, Normal
 23 | from torch.utils.tensorboard import SummaryWriter
 24 | 
 25 | from algo.ppo import PPO
 26 | from agents.nn_policy import NeuralController
 27 | from utils.network import LSTM
 28 | from utils.ppo_utils import make_dict, R_func, Advantage_func, Replay_Memory
 29 | 
 30 | DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 31 | DEVICE
 32 | 
 33 | parser = argparse.ArgumentParser(description='Gnu-RL: Online Learning')
 34 | parser.add_argument('--gamma', type=float, default=0.9, metavar='G',
 35 |                     help='discount factor (default: 0.9)')
 36 | parser.add_argument('--seed', type=int, default=42, metavar='N',
 37 |                     help='random seed (default: 42)')
 38 | parser.add_argument('--lr', type=float, default=5e-4, metavar='G',
 39 |                     help='Learning Rate')
 40 | parser.add_argument('--lam', type=int, default=10, metavar='N',
 41 |                    help='random seed (default: 42)')
 42 | parser.add_argument('--epsilon', type=float, default=0.2, metavar='G', help='PPO Clip Parameter')
 43 | parser.add_argument('--update_episode', type=int, default=4, metavar='N',
 44 |                     help='PPO update episode (default: 1); If -1, do not update weights')
 45 | parser.add_argument('--T', type=int, default=12, metavar='N',
 46 |                     help='Planning Horizon (default: 12)')
 47 | parser.add_argument('--step', type=int, default=300*3, metavar='N',
 48 |                     help='Time Step in Simulation, Unit in Seconds (default: 900)') # 15 Minutes Now!
 49 | parser.add_argument('--exp_name', type=str, default='nn_w_proj',
 50 |                     help='save name')
 51 | parser.add_argument('--eta', type=int, default=3,
 52 |                     help='Hyper Parameter for Balancing Comfort and Energy')
 53 | parser.add_argument('--model_no', type = int, default = 1800, help = '')
 54 | args = parser.parse_args()
 55 | 
 56 | 
 57 | def main():
 58 |     torch.manual_seed(args.seed)
 59 |     writer = SummaryWriter(comment = args.exp_name)
 60 |     
 61 |     # Create Simulation Environment
 62 |     env = gym.make('Eplus-IW-test-v0')
 63 |     
 64 |     # Specify variable names for control problem
 65 |     obs_name = ["Outdoor Temp.", "Outdoor RH", "Wind Speed", "Wind Direction", "Diff. Solar Rad.", "Direct Solar Rad.", "HW Enable OA Setpoint", "IW Average PPD", "HW Supply Setpoint", "Indoor Air Temp.", "Indoor Temp. Setpoint", "Occupancy Flag", "Heating Demand"]
 66 |     state_name = ["Indoor Air Temp."]
 67 |     dist_name = ["Outdoor Temp.", "Outdoor RH", "Wind Speed", "Wind Direction", "Diff. Solar Rad.", "Direct Solar Rad.", "Occupancy Flag"]
 68 |     ctrl_name = ["HW Enable OA Setpoint", "HW Supply Setpoint"]
 69 |     target_name = ["Indoor Temp. Setpoint"]
 70 |     dist_name = dist_name + target_name
 71 |     
 72 |     n_state = len(state_name)
 73 |     n_ctrl = 1 #len(ctrl_name)
 74 |     n_dist = len(dist_name)
 75 |     eta = [0.1, args.eta] # eta: Weight for comfort during unoccupied and occupied mode
 76 |     step = args.step # step: Timestep; Unit in seconds
 77 |     T = args.T # T: Number of timesteps in the planning horizon
 78 |     tol_eps = 91 # tol_eps: Total number of episodes; Each episode is a natural day
 79 | 
 80 |     # Read Information on Weather, Occupancy, and Target Setpoint
 81 |     obs_2017 = pd.read_pickle("data/data_2017_baseline.pkl")
 82 |     disturbance = obs_2017[dist_name]
 83 |     # Min-Max Normalization
 84 |     obs_TMY3 = pd.read_pickle("data/data_TMY3_baseline.pkl") # For Min-Max Normalization Only
 85 |     dist_min = obs_TMY3[dist_name].min()
 86 |     dist_max = obs_TMY3[dist_name].max()
 87 |     disturbance = (disturbance - dist_min)/(dist_max - dist_min)
 88 |     state_min = obs_TMY3[state_name].min().values
 89 |     state_max = obs_TMY3[state_name].max().values
 90 |     memory = Replay_Memory()
 91 |     
 92 |     ## Load pretrained LSTM policy weights
 93 |     '''
 94 |         Expects all states, actions, and disturbances are MinMaxNormalized; (Based on TMY3 data)
 95 |         The LSTM also expects "setpoint" as part of the disturbance term.
 96 |     '''
 97 |     network = LSTM(n_state, n_ctrl, n_dist)
 98 |     network.load_state_dict(torch.load("data/param_IW-nn-{}".format(args.model_no)))
 99 |     
100 |     ## Load thermodynamics model to construct the polytope
101 |     '''
102 |         New model also expects states, actions, and disturbances to be MinMaxNormalized
103 |     '''
104 |     model_dict ={'a': np.array([0.934899]),
105 |                 'bu': np.array([0.024423]),
106 |                 'bd': np.array([5.15795080e-02, -6.92141185e-04, -1.21103548e-02,
107 |                 2.38717578e-03, -3.52816030e-03,  3.32528746e-03,  7.19267820e-03]),
108 |                 'Pm': 1  # Upper bound of u;
109 |                 }
110 |     policy = NeuralController(T, step, network, RC_flag = False, **model_dict)
111 |     agent = PPO(policy, memory, lr = args.lr, clip_param = args.epsilon, lam = args.lam)
112 |     
113 |     dir = 'results'
114 |     if not os.path.exists(dir):
115 |         os.mkdir(dir)
116 |     
117 |     multiplier = 1 # Normalize the reward for better training performance
118 |     n_step = 96 #timesteps per day
119 |     
120 |     sigma = 0.1
121 |     sigma_min = 0.01
122 |     sigma_step = (sigma-sigma_min) * args.update_episode/tol_eps
123 |     
124 |     timeStep, obs, isTerminal = env.reset()
125 |     start_time = pd.datetime(year = env.start_year, month = env.start_mon, day = env.start_day)
126 |     cur_time = start_time
127 |     obs_dict = make_dict(obs_name, obs)
128 |     
129 |     # Save for record
130 |     timeStamp = [start_time]
131 |     observations = [obs]
132 |     actions_taken = []
133 | 
134 |     for i_episode in range(tol_eps):
135 |         ## Save for Parameter Updates
136 |         rewards = []
137 |         real_rewards = []
138 | 
139 |         for t in range(n_step):
140 |             state = np.array([obs_dict[name] for name in state_name])
141 |             state = (state-state_min)/(state_max-state_min)
142 |             
143 |             x_upper = obs_2017['x_upper'][cur_time : cur_time + pd.Timedelta(seconds = (T-1) * step)].values
144 |             x_lower = obs_2017['x_lower'][cur_time : cur_time + pd.Timedelta(seconds = (T-1) * step)].values
145 |             ## Margin
146 |             #x_lower+=0.025
147 |             #x_upper-=0.025
148 |             
149 |             x_upper = (x_upper-state_min)/(state_max-state_min)
150 |             x_lower = (x_lower-state_min)/(state_max-state_min)
151 |             
152 |             dt = disturbance[cur_time : cur_time + pd.Timedelta(seconds = (T-1) * step)].values  # T x n_dist
153 |             
154 |             ## Update the model in the controller
155 |             # CVXPY expects np.array for parameters
156 |             agent.policy_old.updateState(state, x_lower = x_lower, x_upper = x_upper, d = dt[:, :-1])
157 |             agent.memory.x_lowers.append(torch.tensor(x_lower).float())
158 |             agent.memory.x_uppers.append(torch.tensor(x_upper).float())
159 |             
160 |             state = torch.tensor(state).unsqueeze(0).float() # 1 x n_state
161 |             dt = torch.tensor(dt).float()
162 |             agent.memory.states.append(state)
163 |             agent.memory.disturbance.append(dt)
164 |             
165 |             ## Use policy_old to select action
166 |             mu, sigma_sq, _ = agent.forward(state, dt.unsqueeze(1), current = False) # mu, sigma_sq: T x 1 x Dim.
167 |             sigma_sq = torch.ones_like(mu) * sigma**2
168 |             
169 |             ## Myopic Limit: A hack to make sure the projected actions do not result in tiny violations
170 |             margin = 0.1/(state_max-state_min)
171 |             u_limits = np.array([x_lower[0]+margin.item(), x_upper[0]-margin.item()]) - model_dict['a'] * state.item() -  model_dict['bd'].dot(dt[0, :-1].numpy())
172 |             u_limits /= model_dict['bu']
173 |             u_limits = np.clip(u_limits, 0, 1)
174 |             #pdb.set_trace()
175 |             action, old_logprob = agent.select_action(mu[0], sigma_sq[0], u_limits = u_limits)
176 |             agent.memory.actions.append(action.detach().clone())
177 |             agent.memory.old_logprobs.append(old_logprob.detach().clone())
178 |             
179 |             SWT = 20 + 45 * action.item()
180 |             if (SWT<30):
181 |                 HWOEN = -30 # De Facto Off
182 |                 action = torch.zeros_like(action)
183 |                 SWT = 20
184 |             else:
185 |                 HWOEN = 30 # De Facto On
186 |             if np.isnan(SWT):
187 |                 SWT = 20
188 |             action4env = (HWOEN, SWT)
189 |             
190 |             # Before step
191 |             print(f'{cur_time}: IAT={obs_dict["Indoor Air Temp."]}, Occupied={obs_dict["Occupancy Flag"]}, Control={SWT}')
192 |             for _ in range(3):
193 |                 timeStep, obs, isTerminal = env.step(action4env)
194 | 
195 |             obs_dict = make_dict(obs_name, obs)
196 |             reward = R_func(obs_dict, SWT-20, eta)
197 |             
198 |             # Per step
199 |             real_rewards.append(reward)
200 |             bl = 0#obs_2017['rewards'][cur_time]
201 |             rewards.append((reward-bl) / 15) # multiplier
202 |             # print(f'Reward={reward}, BL={bl}')
203 |             # Save for record
204 |             cur_time = start_time + pd.Timedelta(seconds = timeStep)
205 |             timeStamp.append(cur_time)
206 |             observations.append(obs)
207 |             actions_taken.append(action4env)
208 |         
209 |         writer.add_scalar('Reward', np.mean(real_rewards), i_episode)
210 |         writer.add_scalar('Reward_Diff', np.mean(rewards), i_episode)
211 |         print("{}, reward: {}".format(cur_time, np.mean(real_rewards)))
212 |         
213 |         advantages = Advantage_func(rewards, args.gamma)
214 |         agent.memory.advantages.append(advantages)
215 |         # if -1, do not update parameters
216 |         if args.update_episode == -1:
217 |             agent.memory.clear_memory()
218 |         elif (i_episode >0) & (i_episode % args.update_episode ==0):
219 |             agent.update_parameters(sigma = sigma, K = 8)
220 |             sigma = max(sigma_min, sigma-sigma_step)
221 |             
222 |         obs_df = pd.DataFrame(np.array(observations), index = np.array(timeStamp), columns = obs_name)
223 |         obs_df = obs_df.drop(columns=ctrl_name)
224 |         action_df = pd.DataFrame(np.array(actions_taken), index = np.array(timeStamp[:-1]), columns = ctrl_name)
225 |         obs_df = obs_df.merge(action_df, how = 'left', right_index = True, left_index = True)
226 |         obs_df.to_pickle("results/obs_"+args.exp_name+".pkl")
227 | 
228 | if __name__ == '__main__':
229 |     main()
230 | 


--------------------------------------------------------------------------------
/main_inverter.py:
--------------------------------------------------------------------------------
  1 | import os, sys, argparse
  2 | 
  3 | import numpy as np
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.optim as optim
  7 | from torch.utils.tensorboard import SummaryWriter
  8 | 
  9 | from env.inverter import IEEE37
 10 | 
 11 | from algo.ppo import PPO
 12 | from agents.inverter_policy import Net, NeuralController
 13 | from utils.inverter_utils import Replay_Memory
 14 | 
 15 | 
 16 | import pdb
 17 | 
 18 | import torch
 19 | DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 20 | DEVICE
 21 | 
 22 | parser = argparse.ArgumentParser(description='GnuRL Demo: Online Learning')
 23 | parser.add_argument('--gamma', type=float, default=0.98, metavar='G',
 24 |                     help='discount factor (default: 0.98)')
 25 | parser.add_argument('--seed', type=int, default=42, metavar='N',
 26 |                     help='random seed (default: 42)')
 27 | parser.add_argument('--lam', type=int, default=10, metavar='N',
 28 |                     help='random seed (default: 42)')
 29 | parser.add_argument('--lr', type=float, default=1e-3, metavar='G',
 30 |                     help='Learning Rate')
 31 | parser.add_argument('--epsilon', type=float, default=0.2, metavar='G', help='PPO Clip Parameter')
 32 | parser.add_argument('--update_episode', type=int, default=4, metavar='N',
 33 |                     help='PPO update episode (default: 1); If -1, do not update weights')
 34 | parser.add_argument('--exp_name', type=str, default='inverter',
 35 |                     help='save name')
 36 | parser.add_argument('--network_name', type=str, default='ieee37',
 37 |                     help='')
 38 | args = parser.parse_args()
 39 | 
 40 | 
 41 | def main():
 42 |     torch.manual_seed(args.seed)
 43 |     writer = SummaryWriter(comment = args.exp_name)
 44 |     
 45 |     # Create Simulation Environment
 46 |     if args.network_name == 'ieee37':
 47 |         env = IEEE37()
 48 |     else:
 49 |         print("Not implemented")
 50 |     
 51 |     n_bus = env.n - 1
 52 |     n_inverters = len(env.gen_idx) # inverters at PV panels
 53 |     
 54 |     env_params = {'V0': env.V0[-env.n_pq:],
 55 |                   'P0': env.P0[-env.n_pq:],
 56 |                   'Q0': env.Q0[-env.n_pq:],
 57 |                   'H': np.hstack([env.R, env.B]), # 35 x 70
 58 |                   'n_bus':n_bus, # Slack bus is not controllable
 59 |                   'gen_idx': env.gen_idx - 1, # Excluded the slack bus
 60 |                   'V_upper': env.v_upper, 'V_lower': env.v_lower,
 61 |                  'S_rating': env.max_S,
 62 |                  }
 63 |     scaler = 1000 # Note: The value for Sbus is really small; Scale up for better learning
 64 |     
 65 |     mbp_nn = Net(n_bus, n_inverters, [256, 128, 64], [16, 4])
 66 |     memory = Replay_Memory()
 67 |     mbp_policy = NeuralController(mbp_nn, memory, args.lr, lam = args.lam, scaler = scaler, **env_params)
 68 |     mbp_policy = mbp_policy.to(DEVICE)
 69 |     
 70 |     # 1-week data
 71 |     num_steps = 900 # 15 minutes
 72 |     n_episodes = 7*86400//num_steps
 73 | 
 74 |     V_prev = np.zeros(n_bus)
 75 |     
 76 |     V_record = []
 77 |     V_est_record = []
 78 |     P_record = []
 79 |     Q_record = []
 80 |     
 81 |     for i in range(n_episodes):
 82 |         loss = 0
 83 |         violation_count = 0
 84 |         
 85 |         for k in range(num_steps):
 86 |             t = i*num_steps + k
 87 |             Sbus, P_av = env.getSbus(t)
 88 |             Sbus *= scaler
 89 |             state = np.concatenate([V_prev, np.real(Sbus), np.imag(Sbus)])
 90 |             mbp_policy.memory.append((state, Sbus, P_av)) ## Everything is np.array!
 91 |             
 92 |             state = torch.tensor(state).float().unsqueeze(0)
 93 |             
 94 |             P, Q = mbp_policy(state, Sbus, P_av = P_av)
 95 |             #pdb.set_trace()
 96 |             
 97 |             V, success = env.step(P + 1j*Q)
 98 |             V_prev = V[1:]
 99 |             
100 |             if np.any(V>env.v_upper) | np.any(V<env.v_lower):
101 |                 violation_count += 1
102 |             writer.add_scalar("V/max", max(V[1:]), t)
103 |             writer.add_scalar("V/min", min(V[1:]), t)
104 |             
105 |             cost = np.clip(P_av - P[mbp_policy.gen_idx], 0, None)
106 |             loss += cost
107 |             
108 |             V_record.append(V[1:])
109 |             P_record.append(P)
110 |             Q_record.append(Q)
111 |             
112 |             if (k % 900 == 0) & (t>0):
113 |                 mbp_policy.update()
114 |              
115 |         writer.add_scalar("Loss", loss.mean().item(), i)
116 |         writer.add_scalar("violations", violation_count, i)
117 |         ## Number of Projection operation during inference time
118 |         writer.add_scalar("proj_count", mbp_policy.proj_count, i)
119 |         mbp_policy.proj_count = 0
120 |         
121 |         if (i % 20 ==0) & (i>0):
122 |             np.save(f"results/V_{args.exp_name}.npy", np.array(V_record))
123 |             np.save(f"results/P_{args.exp_name}.npy", np.array(P_record))
124 |             np.save(f"results/Q_{args.exp_name}.npy", np.array(Q_record))
125 |             
126 |     np.save(f"results/V_{args.exp_name}.npy", np.array(V_record))
127 |     np.save(f"results/P_{args.exp_name}.npy", np.array(P_record))
128 |     np.save(f"results/Q_{args.exp_name}.npy", np.array(Q_record))
129 |             
130 | if __name__ == '__main__':
131 |     main()
132 | 
133 | '''
134 |     # Example Usage of the environment
135 |     t = 10
136 |     Sbus = env.getSbus(t)
137 |     
138 |     # Solve power flow equations
139 |     V, success = env.step(Sbus)
140 |     print(np.abs(V))
141 |     if success == 0:
142 |         print("Something is wrong")
143 |     
144 |     # Estimation using the linearized model
145 |     V_est = env.linear_estimate(Sbus)
146 |     print(V_est)
147 | '''
148 | 


--------------------------------------------------------------------------------
/mypypower/newtonpf.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 1996-2015 PSERC. All rights reserved.
  2 | # Use of this source code is governed by a BSD-style
  3 | # license that can be found in the LICENSE file.
  4 | 
  5 | """Solves the power flow using a full Newton's method.
  6 | """
  7 | 
  8 | import sys
  9 | 
 10 | from numpy import array, angle, exp, linalg, conj, r_, Inf
 11 | 
 12 | #from numpy import hstack, vstack
 13 | from scipy.sparse import hstack, vstack
 14 | from scipy.sparse.linalg import spsolve
 15 | 
 16 | from pypower.dSbus_dV import dSbus_dV
 17 | from pypower.ppoption import ppoption
 18 | 
 19 | import pdb
 20 | 
 21 | def newtonpf(Ybus, Sbus, V0, ref, pv, pq, ppopt=None):
 22 |     """Solves the power flow using a full Newton's method.
 23 | 
 24 |     Solves for bus voltages given the full system admittance matrix (for
 25 |     all buses), the complex bus power injection vector (for all buses),
 26 |     the initial vector of complex bus voltages, and column vectors with
 27 |     the lists of bus indices for the swing bus, PV buses, and PQ buses,
 28 |     respectively. The bus voltage vector contains the set point for
 29 |     generator (including ref bus) buses, and the reference angle of the
 30 |     swing bus, as well as an initial guess for remaining magnitudes and
 31 |     angles. C{ppopt} is a PYPOWER options vector which can be used to
 32 |     set the termination tolerance, maximum number of iterations, and
 33 |     output options (see L{ppoption} for details). Uses default options if
 34 |     this parameter is not given. Returns the final complex voltages, a
 35 |     flag which indicates whether it converged or not, and the number of
 36 |     iterations performed.
 37 | 
 38 |     @see: L{runpf}
 39 | 
 40 |     @author: Ray Zimmerman (PSERC Cornell)
 41 |     """
 42 |     ## default arguments
 43 |     if ppopt is None:
 44 |         ppopt = ppoption()
 45 | 
 46 |     ## options
 47 |     tol     = ppopt['PF_TOL']
 48 |     max_it  = ppopt['PF_MAX_IT']
 49 |     verbose = ppopt['VERBOSE']
 50 | 
 51 |     ## initialize
 52 |     converged = 0
 53 |     i = 0
 54 |     V = V0
 55 |     Va = angle(V)
 56 |     Vm = abs(V)
 57 | 
 58 |     ## set up indexing for updating V
 59 |     pvpq = r_[pv, pq]
 60 |     npv = len(pv)
 61 |     npq = len(pq)
 62 |     j1 = 0;         j2 = npv           ## j1:j2 - V angle of pv buses
 63 |     j3 = j2;        j4 = j2 + npq      ## j3:j4 - V angle of pq buses
 64 |     j5 = j4;        j6 = j4 + npq      ## j5:j6 - V mag of pq buses
 65 | 
 66 |     ## evaluate F(x0)
 67 |     mis = V * conj(Ybus * V) - Sbus
 68 |     if npv:
 69 |         F = r_[  mis[pv].real,
 70 |              mis[pq].real,
 71 |              mis[pq].imag  ]
 72 |     else:
 73 |         F = r_[
 74 |         mis[pq].real,
 75 |         mis[pq].imag  ]
 76 |         
 77 | 
 78 |     ## check tolerance
 79 |     normF = linalg.norm(F, Inf)
 80 |     if verbose > 1:
 81 |         sys.stdout.write('\n it    max P & Q mismatch (p.u.)')
 82 |         sys.stdout.write('\n----  ---------------------------')
 83 |         sys.stdout.write('\n%3d        %10.3e' % (i, normF))
 84 |     if normF < tol:
 85 |         converged = 1
 86 |         if verbose > 1:
 87 |             sys.stdout.write('\nConverged!\n')
 88 | 
 89 |     ## do Newton iterations
 90 |     while (not converged and i < max_it):
 91 |         ## update iteration counter
 92 |         i = i + 1
 93 | 
 94 |         ## evaluate Jacobian
 95 |         dS_dVm, dS_dVa = dSbus_dV(Ybus, V)
 96 |         # pdb.set_trace()
 97 |         J11 = dS_dVa[array([pvpq]).T, pvpq].real
 98 |         J12 = dS_dVm[array([pvpq]).T, pq].real
 99 |         J21 = dS_dVa[array([pq]).T, pvpq].imag
100 |         J22 = dS_dVm[array([pq]).T, pq].imag
101 |         #pdb.set_trace()
102 |         J = vstack([hstack([J11, J12]),hstack([J21, J22])], format="csr")
103 | 
104 |         ## compute update step
105 |         dx = -1 * spsolve(J, F)
106 |         # pdb.set_trace()
107 |         ## update voltage
108 |         if npv:
109 |             Va[pv] = Va[pv] + dx[j1:j2]
110 |         if npq:
111 |             Va[pq] = Va[pq] + dx[j3:j4]
112 |             Vm[pq] = Vm[pq] + dx[j5:j6]
113 |         V = Vm * exp(1j * Va)
114 |         Vm = abs(V)            ## update Vm and Va again in case
115 |         Va = angle(V)          ## we wrapped around with a negative Vm
116 | 
117 |         ## evalute F(x)
118 |         mis = V * conj(Ybus * V) - Sbus
119 |         if npv:
120 |             F = r_[  mis[pv].real,
121 |                  mis[pq].real,
122 |                  mis[pq].imag  ]
123 |         else:
124 |             F = r_[
125 |             mis[pq].real,
126 |             mis[pq].imag  ]
127 | 
128 |         ## check for convergence
129 |         normF = linalg.norm(F, Inf)
130 |         if verbose > 1:
131 |             sys.stdout.write('\n%3d        %10.3e' % (i, normF))
132 |         if normF < tol:
133 |             converged = 1
134 |             if verbose:
135 |                 sys.stdout.write("\nNewton's method power flow converged in "
136 |                                  "%d iterations.\n" % i)
137 | 
138 |     if verbose:
139 |         if not converged:
140 |             sys.stdout.write("\nNewton's method power did not converge in %d "
141 |                              "iterations.\n" % i)
142 | 
143 |     return V, converged, i
144 | 


--------------------------------------------------------------------------------
/network/IEEE-37/Ybus.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/INFERLab/PROF/be7f77f606d8c7d6505d4b2bad2d09760e9bafe9/network/IEEE-37/Ybus.mat


--------------------------------------------------------------------------------
/network/IEEE-37_linearized/B.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/INFERLab/PROF/be7f77f606d8c7d6505d4b2bad2d09760e9bafe9/network/IEEE-37_linearized/B.mat


--------------------------------------------------------------------------------
/network/IEEE-37_linearized/R.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/INFERLab/PROF/be7f77f606d8c7d6505d4b2bad2d09760e9bafe9/network/IEEE-37_linearized/R.mat


--------------------------------------------------------------------------------
/network/bracket.m:
--------------------------------------------------------------------------------
1 | function BRX = bracket(X)
2 | 
3 | 	BRX = [real(X), -imag(X); imag(X), real(X)];
4 | 
5 | 


--------------------------------------------------------------------------------
/network/extract_phase_37feeder.m:
--------------------------------------------------------------------------------
  1 | function [P,Q,Y] = extract_phase_37feeder(phase,Zbase,Sbase)
  2 | 
  3 | Nnode = 36;
  4 | Ybase = 1/Zbase;
  5 | %25 loads
  6 | P_l = [0 0 0;
  7 |            140 140 350; 
  8 |            0 0 0; 
  9 |            0 0 0; 
 10 |            0 0 85; 
 11 |            8 85 0; 
 12 |            0 0 85; 
 13 |            0 0 0;
 14 |            17 21 0;
 15 |            85 0 0; 
 16 |            0 0 85; 
 17 |            0 0 0; 
 18 |            0 42 0; 
 19 |            0 140 21; 
 20 |            0 0 0; 
 21 |            0 42 0;
 22 |            0 0 0;
 23 |            0 0 42;
 24 |            42 0 0; 
 25 |            42 0 0; 
 26 |            42 42 42; 
 27 |            0 0 85; 
 28 |            0 0 0; 
 29 |            0 85 0;
 30 |            0 0 0;
 31 |            0 0 42; 
 32 |            85 0 0;
 33 |            0 0 42; 
 34 |            140 0 0;
 35 |            126 0 0;
 36 |            0 0 0;
 37 |            0 0 42;
 38 |            0 0 85; 
 39 |            0 0 0;
 40 |            0 42 0;
 41 |            0 0 85].';
 42 | 
 43 | Q_l = [0 0 0; 
 44 |        70 70 175; 
 45 |        0 0 0; 
 46 |        0 0 0;
 47 |        0 0 40; 
 48 |        4 40 0; 
 49 |        0 0 40;
 50 |        0 0 0;
 51 |        8 10 0;
 52 |        40 0 0;
 53 |        0 0 40;
 54 |        0 0 0;
 55 |        0 21 0;
 56 |        0 70 10;
 57 |        0 0 0;
 58 |        0 21 0;
 59 |        0 0 0;  
 60 |        0 0 21;
 61 |        21 0 0;
 62 |        21 0 0;
 63 |        21 21 21 ;
 64 |        0 0 40;
 65 |        0 0 0;
 66 |        0 40 0;
 67 |        0 0 0;
 68 |        0 0 21;
 69 |        40 0 0;
 70 |        0 0 21;
 71 |        70 0 0;
 72 |        62 0 0;
 73 |        0 0 0;
 74 |        0 0 21; 
 75 |        0 0 40;
 76 |        0 0 0;
 77 |        0 21 0;
 78 |        0 0 40].';
 79 |    
 80 |    
 81 |    
 82 | %------------------------------------------------------------------------
 83 | % impedance matrix
 84 | %------------------------------------------------------------------------
 85 | 
 86 | % Configuration 721
 87 | Zs1 = [0.2926+0.1973i 0.0673-0.0368i 0.0337-0.0417i;
 88 |        0.0673-0.0368i 0.2646+0.1900i 0.0673-0.0368i;
 89 |        0.0337-0.0417i 0.0673-0.0368i 0.2926+0.1973i]./Zbase;
 90 | Ys1 = sqrt(-1)*159.7919*(10^-6).*eye(3)./Ybase;
 91 | 
 92 | % Configuration 722
 93 | Zs2 = [0.4751+0.2973i 0.1629-0.0326i 0.1234-0.0607i;
 94 |        0.1629-0.0326i 0.4488+0.2678i 0.1629-0.0326i;
 95 |        0.1234-0.0607i 0.1629-0.0326i 0.4751+0.2973i]./Zbase;
 96 | Ys2 = sqrt(-1)*127.8306*(10^-6).*eye(3)./Ybase;
 97 | 
 98 | % Configuration 723
 99 | Zs3 = [1.2936+0.6713i 0.4871+0.2111i 0.4585+0.1521i;
100 |        0.4871+0.2111i 1.3022+0.6326i 0.4871+0.2111i;
101 |        0.4585+0.1521i 1.2936+0.6713i 1.2936+0.6713i]./Zbase;
102 | Ys3 = sqrt(-1)*74.8405*(10^-6).*eye(3)./Ybase;
103 | 
104 | % Configuration 724
105 | Zs4 = [2.0952+0.7758i 0.5204+0.2738i 0.4926+0.2123i;
106 |        0.5204+0.2738i 2.1068+0.7398i 0.5204+0.2738i;
107 |        0.4926+0.2123i 0.5204+0.2738i 2.0952+0.7758i]./Zbase;
108 | Ys4 = sqrt(-1)*60.2483*(10^-6).*eye(3)./Ybase;
109 | 
110 | 
111 | %--------------------
112 | % line matrices 
113 | %--------------------
114 | 
115 | % mile = 5280 feet 
116 | convfm = (1/5280);
117 | 
118 | Z12 = Zs1*(1850)*convfm;
119 | Z12i = pinv(Z12);
120 | Y12 = .5.*Ys1*(1850)*convfm;
121 | 
122 | Z23 = Zs2*(960)*convfm;
123 | Z23i = pinv(Z23);
124 | Y23 = .5.*Ys2*(960)*convfm;
125 | 
126 | Z34 = Zs4*(400)*convfm;
127 | Z34i = pinv(Z34);
128 | Y34 = .5.*Ys4*(400)*convfm;
129 | 
130 | Z45 = Zs4*(240)*convfm;
131 | Z45i = pinv(Z45);
132 | Y45 = .5.*Ys4*(240)*convfm;
133 | 
134 | Z46 = Zs4*(320)*convfm;
135 | Z46i = pinv(Z46);
136 | Y46 = .5.*Ys4*(320)*convfm;
137 | 
138 | Z37 = Zs3*(360)*convfm;
139 | Z37i = pinv(Z37);
140 | Y37 = .5.*Ys3*(360)*convfm;
141 | 
142 | Z78 = Zs3*(520)*convfm;
143 | Z78i = pinv(Z78);
144 | Y78 = .5.*Ys3*(520)*convfm;
145 | 
146 | Z89 = Zs4*(80)*convfm;
147 | Z89i = pinv(Z89);
148 | Y89 = .5.*Ys4*(80)*convfm;
149 | 
150 | Z910 = Zs4*(520)*convfm;
151 | Z910i = pinv(Z910);
152 | Y910 = .5.*Ys4*(520)*convfm;
153 | 
154 | Z811 = Zs3*(800)*convfm;
155 | Z811i = pinv(Z811);
156 | Y811 = .5.*Ys3*(800)*convfm;
157 | 
158 | Z1112 = Zs4*(920)*convfm;
159 | Z1112i = pinv(Z1112);
160 | Y1112 = .5.*Ys4*(920)*convfm;
161 | 
162 | Z1213 = Zs4*(760)*convfm;
163 | Z1213i = pinv(Z1213);
164 | Y1213 = .5.*Ys4*(760)*convfm;
165 | 
166 | Z1214 = Zs4*(120)*convfm;
167 | Z1214i = pinv(Z1214);
168 | Y1214 = .5.*Ys4*(120)*convfm;
169 | 
170 | Z1115 = Zs3*(600)*convfm;
171 | Z1115i = pinv(Z1115);
172 | Y1115 = .5.*Ys3*(600)*convfm;
173 | 
174 | Z1516 = Zs4*(280)*convfm;
175 | Z1516i = pinv(Z1516);
176 | Y1516 = .5.*Ys4*(280)*convfm;
177 | 
178 | Z317 = Zs2*(1320)*convfm;
179 | Z317i = pinv(Z317);
180 | Y317 = .5.*Ys2*(1320)*convfm;
181 | 
182 | Z1722 = Zs3*(600)*convfm;
183 | Z1722i = pinv(Z1722);
184 | Y1722 = .5.*Ys3*(600)*convfm;
185 | 
186 | Z2223 = Zs3*(200)*convfm;
187 | Z2223i = pinv(Z2223);
188 | Y2223 = .5.*Ys3*(200)*convfm;
189 | 
190 | Z1718 = Zs4*(240)*convfm;
191 | Z1718i = pinv(Z1718);
192 | Y1718 = .5.*Ys4*(240)*convfm;
193 | 
194 | Z1819 = Zs3*(280)*convfm;
195 | Z1819i = pinv(Z1819);
196 | Y1819 = .5.*Ys3*(280)*convfm;
197 | 
198 | Z1920 = Zs4*(280)*convfm;
199 | Z1920i = pinv(Z1920);
200 | Y1920 = .5.*Ys4*(280)*convfm;
201 | 
202 | Z1921 = Zs4*(200)*convfm;
203 | Z1921i = pinv(Z1921);
204 | Y1921 = .5.*Ys4*(200)*convfm;
205 | 
206 | Z2324 = Zs3*(600)*convfm;
207 | Z2324i = pinv(Z2324);
208 | Y2324 = .5.*Ys3*(600)*convfm;
209 | 
210 | Z2325 = Zs3*(320)*convfm;
211 | Z2325i = pinv(Z2325);
212 | Y2325 = .5.*Ys3*(320)*convfm;
213 | 
214 | Z2526 = Zs4*(320)*convfm;
215 | Z2526i = pinv(Z2526);
216 | Y2526 = .5.*Ys4*(320)*convfm;
217 | 
218 | Z2527 = Zs3*(320)*convfm;
219 | Z2527i = pinv(Z2527);
220 | Y2527 = .5.*Ys3*(320)*convfm;
221 | 
222 | Z2728 = Zs3*(560)*convfm;
223 | Z2728i = pinv(Z2728);
224 | Y2728 = .5.*Ys3*(560)*convfm;
225 | 
226 | Z2829 = Zs3*(640)*convfm;
227 | Z2829i = pinv(Z2829);
228 | Y2829 = .5.*Ys3*(640)*convfm;
229 | 
230 | Z2930 = Zs3*(400)*convfm;
231 | Z2930i = pinv(Z2930);
232 | Y2930 = .5.*Ys3*(400)*convfm;
233 | 
234 | Z3031 = Zs3*(400)*convfm;
235 | Z3031i = pinv(Z3031);
236 | Y3031 = .5.*Ys3*(400)*convfm;
237 | 
238 | Z3132 = Zs3*(400)*convfm;
239 | Z3132i = pinv(Z3132);
240 | Y3132 = .5.*Ys3*(400)*convfm;
241 | 
242 | Z3133 = Zs4*(200)*convfm;
243 | Z3133i = pinv(Z3133);
244 | Y3133 = .5.*Ys4*(200)*convfm;
245 | 
246 | Z2834 = Zs4*(520)*convfm;
247 | Z2834i = pinv(Z2834);
248 | Y2834 = .5.*Ys4*(520)*convfm;
249 | 
250 | Z3435 = Zs4*(1280)*convfm;
251 | Z3435i = pinv(Z3435);
252 | Y3435 = .5.*Ys4*(1280)*convfm;
253 | 
254 | Z3436 = Zs4*(200)*convfm;
255 | Z3436i = pinv(Z3436);
256 | Y3436 = .5.*Ys4*(200)*convfm;
257 | 
258 | 
259 | % network admittance matrix
260 | oo = zeros(3);
261 | 
262 | Y_net = [Z12i+Y12 -Z12i zeros(3,3*(Nnode-2));
263 |          -Z12i Z12i+Z23i+Y12+Y23 -Z23i zeros(3,3*(Nnode-3));
264 |          oo -Z23i Z23i+Z34i+Z37i+Z317i+Y23+Y34+Y37+Y317 -Z34i oo oo -Z37i oo oo oo oo oo oo oo oo oo -Z317i zeros(3,3*(Nnode-17));
265 |          oo oo -Z34i Z34i+Z45i+Z46i+Y34+Y45+Y46 -Z45i -Z46i zeros(3,3*(Nnode-6));
266 |          oo oo oo -Z45i Z45i+Y45 zeros(3,3*(Nnode-5));
267 |          oo oo oo -Z46i oo Z46i+Y46 zeros(3,3*(Nnode-6));
268 |          oo oo -Z37i oo oo oo Z37i+Z78i+Y37+Y78 -Z78i zeros(3,3*(Nnode-8));
269 |          oo oo oo oo oo oo -Z78i Z78i+Y78+Z89i+Y89+Z811i+Y811 -Z89i oo -Z811i zeros(3,3*(Nnode-11));
270 |          zeros(3,3*7) -Z89i Z89i+Y89+Z910i+Y910 -Z910i zeros(3,3*(Nnode-10));
271 |          zeros(3,3*8) -Z910i Z910i+Y910 zeros(3,3*(Nnode-10));
272 |          zeros(3,3*7) -Z811i oo oo Z811i+Y811+Z1112i+Y1112+Z1115i+Y1115 -Z1112i oo oo -Z1115i zeros(3,3*(Nnode-15));
273 |          zeros(3,3*10) -Z1112i Z1112i+Z1213i+Z1214i+Y1112+Y1213+Y1214 -Z1213i -Z1214i zeros(3,3*(Nnode-14));
274 |          zeros(3,3*11) -Z1213i Z1213i+Y1213 zeros(3,3*(Nnode-13));
275 |          zeros(3,3*11) -Z1214i oo Z1214i+Y1214 zeros(3,3*(Nnode-14))
276 |          zeros(3,3*10) -Z1115i oo oo oo Z1115i+Z1516i+Y1115+Y1516 -Z1516i zeros(3,3*(Nnode-16));
277 |          zeros(3,3*14) -Z1516i Z1516i+Y1516 zeros(3,3*(Nnode-16))
278 |          oo oo -Z317i zeros(3,3*13) Z317i+Y317+Z1718i+Y1718+Z1722i+Y1722 -Z1718i oo oo oo -Z1722i zeros(3,3*(Nnode-22));
279 |          zeros(3,3*16) -Z1718i Z1718i+Y1718+Z1819i+Y1819 -Z1819i zeros(3,3*(Nnode-19));
280 |          zeros(3,3*17) -Z1819i Z1819i+Y1819+Z1920i+Y1920+Z1921i+Y1921 -Z1920i -Z1921i zeros(3,3*(Nnode-21));
281 |          zeros(3,3*18) -Z1920i Z1920i+Y1920 zeros(3,3*(Nnode-20)); 
282 |          zeros(3,3*18) -Z1921i oo Z1921i+Y1921 zeros(3,3*(Nnode-21));
283 |          zeros(3,3*16) -Z1722i oo oo oo oo Z1722i+Y1722+Z2223i+Y2223 -Z2223i zeros(3,3*(Nnode-23));
284 |          zeros(3,3*21) -Z2223i Z2223i+Y2223+Z2324i+Y2324+Z2325i+Y2325 -Z2324i -Z2325i zeros(3,3*(Nnode-25));
285 |          zeros(3,3*22) -Z2324i Z2324i+Y2324 zeros(3,3*(Nnode-24));
286 |          zeros(3,3*22) -Z2325i oo Z2325i+Y2325+Z2526i+Y2526+Z2527i+Y2527 -Z2526i -Z2527i zeros(3,3*(Nnode-27));
287 |          zeros(3,3*24) -Z2526i Z2526i+Y2526 zeros(3,3*(Nnode-26));
288 |          zeros(3,3*24) -Z2527i oo Z2527i+Y2527+Z2728i+Y2728 -Z2728i zeros(3,3*(Nnode-28));
289 |          zeros(3,3*26) -Z2728i Z2728i+Y2728+Z2829i+Y2829+Z2834i+Y2834 -Z2829i oo oo oo oo -Z2834i zeros(3,3*(Nnode-34));
290 |          zeros(3,3*27) -Z2829i Z2829i+Y2829+Z2930i+Y2930 -Z2930i zeros(3,3*(Nnode-30));
291 |          zeros(3,3*28) -Z2930i Z2930i+Y2930+Z3031i+Y3031 -Z3031i zeros(3,3*(Nnode-31));
292 |          zeros(3,3*29) -Z3031i Z3031i+Y3031+Z3132i+Y3132+Z3133i+Y3133 -Z3132i -Z3133i zeros(3,3*(Nnode-33))
293 |          zeros(3,3*30) -Z3132i Z3132i+Y3132 zeros(3,3*(Nnode-32));
294 |          zeros(3,3*30) -Z3133i oo Z3133i+Y3133 zeros(3,3*(Nnode-33));
295 |          zeros(3,3*27) -Z2834i zeros(3,3*5) Z2834i+Y2834+Z3435i+Y3435+Z3436i+Y3436 -Z3435i -Z3436i;
296 |          zeros(3,3*33) -Z3435i Z3435i+Y3435 oo;
297 |          zeros(3,3*33) -Z3436i oo Z3436i+Y3436];
298 | 
299 | 
300 | P = P_l(phase,:);
301 | Q = Q_l(phase,:);
302 | Y = Y_net(phase:3:end,phase:3:end);
303 | 
304 | fac = Sbase/1000;
305 | 
306 | P = P./fac;
307 | Q = Q./fac;
308 | 
309 | 
310 | end
311 |    


--------------------------------------------------------------------------------
/network/ieee37.m:
--------------------------------------------------------------------------------
  1 | %	This code is modified from to linearize IEEE 37-bus feeder system.
  2 | %
  3 | %	S. Bolognani, F. Dörfler (2015)
  4 | %	"Fast power system analysis via implicit linearization of the power flow manifold."
  5 | %	In Proc. 53rd Annual Allerton Conference on Communication, Control, and Computing.
  6 | %	Preprint available at http://control.ee.ethz.ch/~bsaverio/papers/BolognaniDorfler_Allerton2015.pdf
  7 | %
  8 | %	This source code is distributed in the hope that it will be useful, but without any warranty.
  9 | %
 10 | %	MatLab OR GNU Octave, version 3.8.1 available at http://www.gnu.org/software/octave/
 11 | %	MATPOWER 5.1 available at http://www.pserc.cornell.edu/matpower/
 12 | 
 13 | clear all
 14 | close all
 15 | clc
 16 | 
 17 | % Load grid model
 18 | %Vbase = 4160/sqrt(3);
 19 | %Sbase = 5e6;
 20 | %Zbase = Vbase^2/Sbase;
 21 | Zbase = 1;
 22 | Vbase = 4800;
 23 | Sbase = (Vbase^2)/Zbase;
 24 | 
 25 | phase = 1;
 26 | 
 27 | [Pbus, Qbus, Ybus] = extract_phase_37feeder(phase, Zbase, Sbase);
 28 | Sbus = complex(Pbus, Qbus);
 29 | n = size(Ybus, 1); 
 30 | %%
 31 | % Compute exact solution via MatPower
 32 | ref_idx = [1];
 33 | pv_idx = [];%[4, 7, 9. 10, 11, 13, 16, 17, 20, 22, 23, 26, 28, 29, 30, 31, 32, 33, 34, 35, 36];
 34 | pq_idx =[2:36];%[2, 3, 5, 6, 8, 12, 14, 15, 16, 18, 19, 21, 24, 25, 27];
 35 | V0 = ones(n,1);
 36 | %[results, success, i] = gausspf(Ybus, Sbus, V0, ref_idx, pv_idx, pq_idx, mpoption('VERBOSE', 1, 'OUT_ALL',0));
 37 | 
 38 | %%
 39 | %%%%% LINEARIZED MODEL %%%%%
 40 | 
 41 | %%%%% Linearization point (given voltage magnitude and angle)
 42 | %Vbus = NaN(n,1);
 43 | %Vbus(mpc.gen(:,GEN_BUS)) = mpc.gen(:,VG);
 44 | Vbus = ones(n,1);
 45 | 
 46 | % Flat voltage profile
 47 | V0 = ones(n,1);
 48 | A0 = zeros(n,1);
 49 | 
 50 | % Corresponding current injection
 51 | J0 = Ybus*(V0.*exp(1j*A0));
 52 | 
 53 | % Corresponding power injection
 54 | S0 = V0.*exp(1j*A0).*conj(J0);
 55 | P0 = real(S0);
 56 | Q0 = imag(S0);
 57 | 
 58 | %%%%% Linear system of equations for the grid model
 59 | 
 60 | UU = bracket(diag(V0.*exp(1j*A0)));
 61 | JJ = bracket(diag(conj(J0)));
 62 | NN = Nmatrix(2*n);
 63 | YY = bracket(Ybus);
 64 | PP = Rmatrix(ones(n,1), zeros(n,1));
 65 | 
 66 | AA = zeros(2*n,4*n);
 67 | BB = zeros(2*n,1);
 68 | 
 69 | V_OFFSET = 0;
 70 | A_OFFSET = 1*n;
 71 | P_OFFSET = 2*n;
 72 | Q_OFFSET = 3*n;
 73 | 
 74 | % bus models
 75 | 
 76 | for bus = 1:n
 77 | 	row = 2*(bus-1)+1;
 78 | 	if (any(bus == pq_idx(:)))
 79 | 		AA(row,P_OFFSET+bus) = 1;
 80 | 		AA(row+1,Q_OFFSET+bus) = 1;
 81 |         BB(row) = Pbus(bus) - P0(bus);
 82 | 		BB(row+1) = Qbus(bus) - Q0(bus);
 83 | 	elseif (any(bus == pv_idx(:)))
 84 | 		AA(row,P_OFFSET+bus) = 1;
 85 | 		AA(row+1,V_OFFSET+bus) = 1;
 86 |         BB(row) = Pbus(bus) - P0(bus);
 87 | 		BB(row+1) = Vbus(bus) - V0(bus);
 88 | 	elseif (any(bus == ref_idx(:)))
 89 | 		AA(row,V_OFFSET+bus) = 1;
 90 | 		AA(row+1,A_OFFSET+bus) = 1;
 91 | 		BB(row) = Vbus(bus) - V0(bus);
 92 |         BB(row+1) = 0 - A0(bus);
 93 | 	end
 94 | end
 95 | 
 96 | Agrid = [(JJ + UU*NN*YY)*PP -eye(2*n)];
 97 | Amat = [Agrid; AA];
 98 | Bmat = [zeros(2*n,1); BB]; 
 99 | 
100 | x = Amat\Bmat;
101 | 
102 | approxVM = V0 + x(1:n);
103 | approxVA = (A0 + x(n+1:2*n))/pi*180;
104 | 
105 | 
106 | %%
107 | % Check my implementation is correct
108 | A11 = (JJ + UU*NN*YY)*PP;
109 | A21 = AA(:, 1:2*n);
110 | A22 = AA(:, 2*n+1:4*n);
111 | 
112 | n_new = n-1;
113 | 
114 | delta_P = reshape(Pbus(2:end), n_new, 1)-P0(2:end);
115 | delta_Q = reshape(Qbus(2:end), n_new, 1)-Q0(2:end);
116 | 
117 | % remove the first bus;
118 | A11(n+1, :) = [];
119 | A11(:, n+1) = [];
120 | A11(1, :) = [];
121 | A11(:, 1) = [];
122 | 
123 | x_hat = inv(A11) * [delta_P; delta_Q];
124 | %x_hat = pinv([A11; A21]) * ([eye(2*n); -A22] * [Pbus.'; Qbus.'] + Bmat);
125 | 
126 | myVM = V0(2:end) + x_hat(1:n_new);
127 | myVA = (A0(2:end) + x_hat(n_new+1:2*n_new))/pi*180;
128 | 
129 | subplot(211)
130 | %plot(1:n, approxVM, 'k*')
131 | plot(2:n, myVM(1:end), 'ko', 1:n, approxVM, 'k*')
132 | %plot(1:n, results.bus(:,VM), 'ko', 1:n, approxVM, 'k*')
133 | ylabel('magnitudes [p.u.]')
134 | xlim([0 n])
135 | 
136 | subplot(212)
137 | %plot(1:n, approxVA, 'k*')
138 | plot(2:n, myVA, 'ko', 1:n, approxVA, 'k*')
139 | %plot(1:n, results.bus(:,VA), 'ko', 1:n, approxVA, 'k*')
140 | %ylabel('angles [deg]')
141 | xlim([0 n])
142 | %%
143 | H = inv(A11);
144 | R = H(1:n_new, 1:n_new);
145 | B = H(1:n_new, n_new+1:2*n_new);
146 | 
147 | 
148 | 


--------------------------------------------------------------------------------
/run_exp1.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | python main_IW.py --exp_name test_w-aux_0 --lam 10 --seed 42
 3 | python main_IW.py --exp_name test_w-aux_1 --lam 10 --seed 0
 4 | python main_IW.py --exp_name test_w-aux_2 --lam 10 --seed 105
 5 | python main_IW.py --exp_name test_w-aux_3 --lam 10 --seed 7
 6 | python main_IW.py --exp_name test_w-aux_4 --lam 100 --seed 59
 7 | 
 8 | python IW_ablation.py --exp_name vanilla-update_w-aux_0 --lam 10 --seed 42
 9 | python IW_ablation.py --exp_name vanilla-update_w-aux_1 --lam 10 --seed 0
10 | python IW_ablation.py --exp_name vanilla-update_w-aux_2 --lam 10 --seed 105
11 | python IW_ablation.py --exp_name vanilla-update_w-aux_3 --lam 10 --seed 37
12 | python IW_ablation.py --exp_name vanilla-update_w-aux_4 --lam 10 --seed 49
13 | 
14 | #python main_IW.py --exp_name clip_no-update --lam 0 --seed 42 --update_episode -1
15 | 
16 | 
17 | #python main_IW.py --exp_name nn-w-proj_0 --lam 0 --seed 42
18 | #python main_IW.py --exp_name nn-w-proj_1 --lam 0 --seed 0
19 | #python main_IW.py --exp_name nn-w-proj_2 --lam 0 --seed 15
20 | #python main_IW.py --exp_name nn-w-proj_3 --lam 0 --seed 37
21 | #python main_IW.py --exp_name nn-w-proj_4 --lam 0 --seed 49
22 | 
23 | #python IW_ablation.py --exp_name _0 --lam 0 --seed 42
24 | #python IW_ablation.py --exp_name vannilla-update_2 --lam 0 --seed 0
25 | #python IW_ablation.py --exp_name vannilla-update_3 --lam 0 --seed 15
26 | #python IW_ablation.py --exp_name vannilla-update_4 --lam 0 --seed 37
27 | #python IW_ablation.py --exp_name vannilla-update_5 --lam 0 --seed 49
28 | 


--------------------------------------------------------------------------------
/utils/inverter_utils.py:
--------------------------------------------------------------------------------
 1 | # Helper Functions
 2 | import numpy as np
 3 | import torch
 4 | import torch.utils.data as data
 5 | import pdb
 6 | 
 7 | class Replay_Memory():
 8 |     def __init__(self, memory_size=86400):
 9 |         self.memory_size = memory_size
10 |         self.storage = []
11 | 
12 |     def sample_batch(self, batch_size=32):
13 |         # This function returns a batch of randomly sampled transitions - i.e. state, action, reward, next state, terminal flag tuples.
14 |         # You will feed this to your model to train.
15 |         rand_idx = np.random.choice(len(self.storage), batch_size)
16 |         batch = [self.storage[i] for i in rand_idx]
17 |         
18 |         state = [transition[0] for transition in batch]
19 |         Sbus = [transition[1] for transition in batch]
20 |         P_av = [transition[2] for transition in batch]
21 |         return torch.tensor(np.stack(state)).float(), np.stack(Sbus), np.stack(P_av)
22 |          
23 |     def append(self, transition):
24 |         # appends transition to the memory.
25 |         self.storage.append(transition)
26 |         # only keeps the latest memory_size transitions
27 |         if len(self.storage) > self.memory_size:
28 |             self.storage = self.storage[-self.memory_size:]
29 | 
30 |     
31 | 


--------------------------------------------------------------------------------
/utils/network.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.utils.data as data
  4 | import numpy as np
  5 |     
  6 | # Implement a vanilla MLP here
  7 | class MLP(nn.Module):
  8 |     def __init__(self, input_size, hiddens, output_size):
  9 |         super(MLP, self).__init__()
 10 |         self.n_layers = len(hiddens)
 11 |         self.layers = []
 12 |         tmp = [input_size] + hiddens
 13 |         
 14 |         for i in range(self.n_layers):
 15 |             self.layers.append(nn.Linear(tmp[i], tmp[i+1]))
 16 |             self.layers.append(nn.ReLU())
 17 |             # self.layers.append(nn.BatchNorm1d(tmp[i+1]))
 18 |         self.layers.append(nn.Linear(tmp[-1], output_size))
 19 |         self.layers = nn.ModuleList(self.layers)
 20 |     
 21 |     def forward(self,x):
 22 |         out = x
 23 |         for i, l in enumerate(self.layers):
 24 |             out = l(out)
 25 |         return out
 26 | 
 27 | 
 28 | class LSTM(nn.Module):
 29 |     def __init__(self, n_state, n_action, n_dist, lstm_hidden = 8, hiddens = [4], lstm_layer = 2, bi = False):
 30 |         super(LSTM, self).__init__()
 31 |         
 32 |         self.rnn = nn.LSTM(n_dist, lstm_hidden, lstm_layer, dropout = 0, bidirectional = bi)
 33 |         if bi:
 34 |             self.n_direction = 2
 35 |         else:
 36 |             self.n_direction = 1
 37 |             
 38 |         self.lstm_hidden = lstm_hidden
 39 |         self.lstm_layer = lstm_layer
 40 |         
 41 |         self.encoder1 = nn.Sequential(
 42 |             nn.Linear(n_state, 4),
 43 |             nn.ReLU(),
 44 |             #nn.BatchNorm1d(32),
 45 |             nn.Linear(4, lstm_hidden*self.n_direction*self.lstm_layer),
 46 |             nn.ReLU())
 47 |         
 48 |         self.encoder2 = nn.Sequential(
 49 |             nn.Linear(n_state, 4),
 50 |             nn.ReLU(),
 51 |             #nn.BatchNorm1d(32),
 52 |             nn.Linear(4, lstm_hidden * self.n_direction*self.lstm_layer),
 53 |             nn.ReLU())
 54 |             
 55 |         n_layers = len(hiddens) + 1
 56 |         tmp = [self.n_direction * lstm_hidden] + hiddens #+ [n_action]
 57 |         
 58 |         self.decoder = []
 59 |         for i in range(n_layers-1):
 60 |             self.decoder.append(nn.Linear(tmp[i], tmp[i+1]))
 61 |             self.decoder.append(nn.ReLU())
 62 |         self.decoder = nn.ModuleList(self.decoder)
 63 |         
 64 |         # mu and sigma2 are learned separately
 65 |         self.final_layer = nn.Linear(tmp[-1], n_action)
 66 |         self.final_layer_ = nn.Linear(tmp[-1], n_action)
 67 |     
 68 |     def forward(self, state, disturbance):
 69 |         # state: n x dim
 70 |         # disturbance: T x n x dist
 71 |         n = state.shape[0]
 72 |         T = disturbance.shape[0]
 73 |         
 74 |         h0 = self.encoder1(state).reshape(n, self.n_direction*self.lstm_layer, self.lstm_hidden).transpose(0, 1) # (layer x direction) x n x Dim.
 75 |         c0 = self.encoder2(state).reshape(n, self.n_direction*self.lstm_layer, self.lstm_hidden).transpose(0, 1)
 76 | 
 77 |         out, (hn, cn) = self.rnn(disturbance, (h0, c0)) # out:  T x n x (lstm_hidden x n_direction)
 78 |         #print("line 176")
 79 |         out = out.reshape(T * n, self.lstm_hidden * self.n_direction)
 80 |         for layer in self.decoder:
 81 |             out = layer(out)
 82 |         mu = self.final_layer(out).reshape(T, n, -1)
 83 |         sigma_sq = self.final_layer_(out).reshape(T, n, -1)
 84 |         # out: (T x n) x n_action
 85 |         return mu, sigma_sq
 86 | 
 87 | '''
 88 | class Replay_Memory():
 89 |     def __init__(self, memory_size=288, burn_in=32):
 90 |         self.memory_size = memory_size
 91 |         self.burn_in = burn_in
 92 |         # the memory is as a list of transitions (S,A,R,S,D).
 93 |         self.storage = []
 94 | 
 95 |     def sample_batch(self, batch_size=32):
 96 |         # This function returns a batch of randomly sampled transitions - i.e. state, action, reward, next state, terminal flag tuples.
 97 |         # You will feed this to your model to train.
 98 |         rand_idx = np.random.choice(len(self.storage), batch_size)
 99 |         return [self.storage[i] for i in rand_idx]
100 | 
101 |     def append(self, transition):
102 |         # appends transition to the memory.
103 |         self.storage.append(transition)
104 |         # only keeps the latest memory_size transitions
105 |         if len(self.storage) > self.memory_size:
106 |             self.storage = self.storage[-self.memory_size:]
107 | '''
108 | 


--------------------------------------------------------------------------------
/utils/ppo_utils.py:
--------------------------------------------------------------------------------
 1 | # Helper Functions
 2 | import numpy as np
 3 | import torch
 4 | import torch.utils.data as data
 5 | import pdb
 6 | 
 7 | def make_dict(obs_name, obs):
 8 |     zipbObj = zip(obs_name, obs)
 9 |     return dict(zipbObj)
10 | 
11 | def R_func(obs_dict, action, eta):
12 |     reward = - action#- 0.5 * eta[int(obs_dict["Occupancy Flag"])] * (obs_dict["Indoor Air Temp."] - obs_dict["Indoor Temp. Setpoint"] - 1)**2
13 |     return reward#.item()
14 |     
15 | # Calculate the advantage estimate
16 | def Advantage_func(rewards, gamma):
17 |     R = torch.zeros(1, 1).double()
18 |     T = len(rewards)
19 |     advantage = torch.zeros((T,1)).double()
20 |     
21 |     for i in reversed(range(len(rewards))):
22 |         R = gamma * R + rewards[i]
23 |         advantage[i] = R
24 |     return advantage
25 | 
26 | class Dataset(data.Dataset):
27 |     def __init__(self, states, actions, disturbance, advantages, old_logprobs, x_uppers, x_lowers):
28 |         self.states = states
29 |         self.actions = actions
30 |         self.disturbance = disturbance
31 |         self.advantages = advantages
32 |         self.old_logprobs = old_logprobs
33 |         self.x_uppers = x_uppers
34 |         self.x_lowers = x_lowers
35 | 
36 |     def __len__(self):
37 |         return len(self.states)
38 | 
39 |     def __getitem__(self, index):
40 |         return self.states[index], self.actions[index], self.disturbance[index], self.advantages[index], self.old_logprobs[index], self.x_uppers[index], self.x_lowers[index]
41 |     
42 | class Replay_Memory():
43 |     def __init__(self, ):
44 |         self.advantages = []
45 |         self.states = []
46 |         self.old_logprobs = []
47 |         self.actions = []
48 |         self.disturbance = [] # T x n_dist
49 |         self.x_uppers = []
50 |         self.x_lowers = []
51 |     
52 |     def clear_memory(self, ):
53 |         self.advantages = []
54 |         self.states = []
55 |         self.old_logprobs = []
56 |         self.actions = []
57 |         self.disturbance = []
58 |         self.x_uppers = []
59 |         self.x_lowers = []
60 |         
61 |     def sample(self):
62 |         states = torch.vstack(self.states)
63 |         actions = torch.vstack(self.actions)
64 |         advantages = torch.vstack(self.advantages).reshape(-1)
65 |         old_logprobs = torch.vstack(self.old_logprobs).reshape(-1)
66 |         disturbance = torch.stack(self.disturbance) # n x T x dist
67 |         x_uppers = torch.vstack(self.x_uppers)
68 |         x_lowers = torch.vstack(self.x_lowers)
69 |         self.clear_memory()
70 |         
71 |         return states, actions, disturbance, advantages, old_logprobs, x_uppers, x_lowers
72 | 
73 |     
74 | 


--------------------------------------------------------------------------------