├── .gitignore
├── IW_baselines
└── IW_ablation.py
├── LICENSE
├── README.md
├── agents
├── base.py
├── inverter_policy.py
└── nn_policy.py
├── algo
└── ppo.py
├── data
├── ARX-0
├── data_2017_baseline.pkl
├── data_TMY3_baseline.pkl
└── param_IW-nn-1800
├── docs
├── figs
│ ├── framework.pdf
│ └── framework.png
└── slides.pdf
├── env
└── inverter.py
├── environment.yml
├── inverter_baselines
├── inverter_QP.py
├── inverter_acopf.py
├── inverter_no-control.py
└── inverter_volt-var.py
├── main_IW.py
├── main_inverter.py
├── mypypower
└── newtonpf.py
├── network
├── IEEE-37
│ └── Ybus.mat
├── IEEE-37_linearized
│ ├── B.mat
│ └── R.mat
├── bracket.m
├── extract_phase_37feeder.m
└── ieee37.m
├── run_exp1.sh
└── utils
├── inverter_utils.py
├── network.py
└── ppo_utils.py
/.gitignore:
--------------------------------------------------------------------------------
1 | data/Irradiance_1sec.mat
2 | data/Loads_1sec.mat
3 | runs/
4 |
5 | # Byte-compiled / optimized / DLL files
6 | __pycache__/
7 | *.py[cod]
8 | *$py.class
9 |
10 | # C extensions
11 | *.so
12 |
13 | # Distribution / packaging
14 | .Python
15 | build/
16 | develop-eggs/
17 | dist/
18 | downloads/
19 | eggs/
20 | .eggs/
21 | lib/
22 | lib64/
23 | parts/
24 | sdist/
25 | var/
26 | wheels/
27 | pip-wheel-metadata/
28 | share/python-wheels/
29 | *.egg-info/
30 | .installed.cfg
31 | *.egg
32 | MANIFEST
33 |
34 | # PyInstaller
35 | # Usually these files are written by a python script from a template
36 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
37 | *.manifest
38 | *.spec
39 |
40 | # Installer logs
41 | pip-log.txt
42 | pip-delete-this-directory.txt
43 |
44 | # Unit test / coverage reports
45 | htmlcov/
46 | .tox/
47 | .nox/
48 | .coverage
49 | .coverage.*
50 | .cache
51 | nosetests.xml
52 | coverage.xml
53 | *.cover
54 | *.py,cover
55 | .hypothesis/
56 | .pytest_cache/
57 |
58 | # Translations
59 | *.mo
60 | *.pot
61 |
62 | # Django stuff:
63 | *.log
64 | local_settings.py
65 | db.sqlite3
66 | db.sqlite3-journal
67 |
68 | # Flask stuff:
69 | instance/
70 | .webassets-cache
71 |
72 | # Scrapy stuff:
73 | .scrapy
74 |
75 | # Sphinx documentation
76 | docs/_build/
77 |
78 | # PyBuilder
79 | target/
80 |
81 | # Jupyter Notebook
82 | .ipynb_checkpoints
83 |
84 | # IPython
85 | profile_default/
86 | ipython_config.py
87 |
88 | # pyenv
89 | .python-version
90 |
91 | # pipenv
92 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
93 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
94 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
95 | # install all needed dependencies.
96 | #Pipfile.lock
97 |
98 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
99 | __pypackages__/
100 |
101 | # Celery stuff
102 | celerybeat-schedule
103 | celerybeat.pid
104 |
105 | # SageMath parsed files
106 | *.sage.py
107 |
108 | # Environments
109 | .env
110 | .venv
111 | venv/
112 | #ENV/
113 | env.bak/
114 | venv.bak/
115 |
116 | # Spyder project settings
117 | .spyderproject
118 | .spyproject
119 |
120 | # Rope project settings
121 | .ropeproject
122 |
123 | # mkdocs documentation
124 | /site
125 |
126 | # mypy
127 | .mypy_cache/
128 | .dmypy.json
129 | dmypy.json
130 |
131 | # Pyre type checker
132 | .pyre/
133 |
--------------------------------------------------------------------------------
/IW_baselines/IW_ablation.py:
--------------------------------------------------------------------------------
1 | import os, sys
2 |
3 | import gym
4 | import eplus_env
5 |
6 | import warnings
7 | warnings.filterwarnings("ignore", category=UserWarning)
8 |
9 | import argparse
10 | import numpy as np
11 | import pandas as pd
12 | import copy
13 | import pickle
14 | import pdb
15 |
16 | import torch
17 | import torch.nn as nn
18 | import torch.nn.functional as F
19 | import torch.utils.data as data
20 | import torch.optim as optim
21 | from torch.distributions import MultivariateNormal, Normal
22 | from torch.utils.tensorboard import SummaryWriter
23 |
24 | main_path = os.path.abspath(os.path.join(__file__, '..'))
25 | sys.path.insert(0, main_path)
26 |
27 | from algo.ppo import PPO
28 | from agents.nn_policy import NeuralController
29 | from utils.network import LSTM
30 | from utils.ppo_utils import make_dict, R_func, Advantage_func, Replay_Memory
31 |
32 | DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
33 | DEVICE
34 |
35 | parser = argparse.ArgumentParser(description='Gnu-RL: Online Learning')
36 | parser.add_argument('--gamma', type=float, default=0.9, metavar='G',
37 | help='discount factor (default: 0.9)')
38 | parser.add_argument('--seed', type=int, default=42, metavar='N',
39 | help='random seed (default: 42)')
40 | parser.add_argument('--lr', type=float, default=5e-4, metavar='G',
41 | help='Learning Rate')
42 | parser.add_argument('--lam', type=int, default=10, metavar='N',
43 | help='random seed (default: 42)')
44 | parser.add_argument('--epsilon', type=float, default=0.2, metavar='G', help='PPO Clip Parameter')
45 | parser.add_argument('--update_episode', type=int, default=4, metavar='N',
46 | help='PPO update episode (default: 1); If -1, do not update weights')
47 | parser.add_argument('--T', type=int, default=12, metavar='N',
48 | help='Planning Horizon (default: 12)')
49 | parser.add_argument('--step', type=int, default=300*3, metavar='N',
50 | help='Time Step in Simulation, Unit in Seconds (default: 900)') # 15 Minutes Now!
51 | parser.add_argument('--exp_name', type=str, default='vanilla_update',
52 | help='save name')
53 | parser.add_argument('--eta', type=int, default=3,
54 | help='Hyper Parameter for Balancing Comfort and Energy')
55 | parser.add_argument('--model_no', type = int, default = 1800, help = '')
56 | args = parser.parse_args()
57 |
58 | def update_parameters(agent, sigma=0.1, K = 4):
59 | loader = agent._get_training_samples()
60 | for i in range(K):
61 | for states, actions, disturbance, advantages, old_logprobs, x_uppers, x_lowers in loader:
62 | n_batch = states.shape[0]
63 | # pdb.set_trace()
64 | mu, _ = agent.policy.nn(state, disturbance, disturbance.transpose(0, 1))#, x_lowers = x_lowers, x_uppers = x_uppers)# T x n x n_action
65 | #mus, sigma_sqs, proj_loss = self.policy.forward(states, ) # x, u: T x N x Dim.
66 | sigma_sqs = torch.ones_like(mus) * sigma**2
67 |
68 | log_probs, entropies = agent.evaluate_action(mus[0], actions, sigma_sqs)
69 |
70 | ratio = torch.exp(log_probs.squeeze()-old_logprobs)
71 | surr1 = ratio * advantages
72 | surr2 = torch.clamp(ratio, 1-agent.clip_param, 1+agent.clip_param) * advantages
73 | loss = -torch.min(surr1, surr2).mean()
74 | agent.optimizer.zero_grad()
75 | ## Auxiliary losses
76 | loss -= torch.mean(entropies) * 0.01
77 | loss += agent.lam * proj_loss
78 |
79 | loss.backward()
80 | nn.utils.clip_grad_norm_(agent.policy.nn.parameters(), 100)
81 | self.optimizer.step()
82 | self.policy_old.nn.load_state_dict(self.policy.nn.state_dict())
83 |
84 | def main():
85 | torch.manual_seed(args.seed)
86 | writer = SummaryWriter(comment = args.exp_name)
87 |
88 | # Create Simulation Environment
89 | env = gym.make('Eplus-IW-test-v0')
90 |
91 | # Specify variable names for control problem
92 | obs_name = ["Outdoor Temp.", "Outdoor RH", "Wind Speed", "Wind Direction", "Diff. Solar Rad.", "Direct Solar Rad.", "HW Enable OA Setpoint", "IW Average PPD", "HW Supply Setpoint", "Indoor Air Temp.", "Indoor Temp. Setpoint", "Occupancy Flag", "Heating Demand"]
93 | state_name = ["Indoor Air Temp."]
94 | dist_name = ["Outdoor Temp.", "Outdoor RH", "Wind Speed", "Wind Direction", "Diff. Solar Rad.", "Direct Solar Rad.", "Occupancy Flag"]
95 | ctrl_name = ["HW Enable OA Setpoint", "HW Supply Setpoint"]
96 | target_name = ["Indoor Temp. Setpoint"]
97 | dist_name = dist_name + target_name
98 |
99 | n_state = len(state_name)
100 | n_ctrl = 1 #len(ctrl_name)
101 | n_dist = len(dist_name)
102 | eta = [0.1, args.eta] # eta: Weight for comfort during unoccupied and occupied mode
103 | step = args.step # step: Timestep; Unit in seconds
104 | T = args.T # T: Number of timesteps in the planning horizon
105 | tol_eps = 91 # tol_eps: Total number of episodes; Each episode is a natural day
106 |
107 | # Read Information on Weather, Occupancy, and Target Setpoint
108 | obs_2017 = pd.read_pickle("data/data_2017_baseline.pkl")
109 | disturbance = obs_2017[dist_name]
110 | # Min-Max Normalization
111 | obs_TMY3 = pd.read_pickle("data/data_TMY3_baseline.pkl") # For Min-Max Normalization Only
112 | dist_min = obs_TMY3[dist_name].min()
113 | dist_max = obs_TMY3[dist_name].max()
114 | disturbance = (disturbance - dist_min)/(dist_max - dist_min)
115 | state_min = obs_TMY3[state_name].min().values
116 | state_max = obs_TMY3[state_name].max().values
117 | memory = Replay_Memory()
118 |
119 | ## Load pretrained LSTM policy weights
120 | '''
121 | Expects all states, actions, and disturbances are MinMaxNormalized; (Based on TMY3 data)
122 | The LSTM also expects "setpoint" as part of the disturbance term.
123 | '''
124 | network = LSTM(n_state, n_ctrl, n_dist)
125 | network.load_state_dict(torch.load("data/param_IW-nn-{}".format(args.model_no)))
126 |
127 | ## Load thermodynamics model to construct the polytope
128 | '''
129 | New model also expects states, actions, and disturbances to be MinMaxNormalized
130 | '''
131 | model_dict ={'a': np.array([0.934899]),
132 | 'bu': np.array([0.024423]),
133 | 'bd': np.array([5.15795080e-02, -6.92141185e-04, -1.21103548e-02,
134 | 2.38717578e-03, -3.52816030e-03, 3.32528746e-03, 7.19267820e-03]),
135 | 'Pm': 1 # Upper bound of u;
136 | }
137 | policy = NeuralController(T, step, network, RC_flag = False, **model_dict)
138 | agent = PPO(policy, memory, lr = args.lr, clip_param = args.epsilon, lam = args.lam)
139 |
140 | dir = 'results'
141 | if not os.path.exists(dir):
142 | os.mkdir(dir)
143 |
144 | multiplier = 1 # Normalize the reward for better training performance
145 | n_step = 96 #timesteps per day
146 |
147 | sigma = 0.1
148 | sigma_min = 0.01
149 | sigma_step = (sigma-sigma_min) * args.update_episode/tol_eps
150 |
151 | timeStep, obs, isTerminal = env.reset()
152 | start_time = pd.datetime(year = env.start_year, month = env.start_mon, day = env.start_day)
153 | cur_time = start_time
154 | obs_dict = make_dict(obs_name, obs)
155 |
156 | # Save for record
157 | timeStamp = [start_time]
158 | observations = [obs]
159 | actions_taken = []
160 |
161 | for i_episode in range(tol_eps):
162 | ## Save for Parameter Updates
163 | rewards = []
164 | real_rewards = []
165 |
166 | for t in range(n_step):
167 | state = np.array([obs_dict[name] for name in state_name])
168 | state = (state-state_min)/(state_max-state_min)
169 |
170 | x_upper = obs_2017['x_upper'][cur_time : cur_time + pd.Timedelta(seconds = (T-1) * step)].values
171 | x_lower = obs_2017['x_lower'][cur_time : cur_time + pd.Timedelta(seconds = (T-1) * step)].values
172 | ## Margin
173 | #x_lower+=0.025
174 | #x_upper-=0.025
175 |
176 | x_upper = (x_upper-state_min)/(state_max-state_min)
177 | x_lower = (x_lower-state_min)/(state_max-state_min)
178 |
179 | dt = disturbance[cur_time : cur_time + pd.Timedelta(seconds = (T-1) * step)].values # T x n_dist
180 |
181 | ## Update the model in the controller
182 | # CVXPY expects np.array for parameters
183 | agent.policy_old.updateState(state, x_lower = x_lower, x_upper = x_upper, d = dt[:, :-1])
184 | agent.memory.x_lowers.append(torch.tensor(x_lower).float())
185 | agent.memory.x_uppers.append(torch.tensor(x_upper).float())
186 |
187 | state = torch.tensor(state).unsqueeze(0).float() # 1 x n_state
188 | dt = torch.tensor(dt).float()
189 | agent.memory.states.append(state)
190 | agent.memory.disturbance.append(dt)
191 |
192 | ## Use policy_old to select action
193 | #mu, sigma_sq, _ = agent.forward(state, dt.unsqueeze(1), current = False) # mu, sigma_sq: T x 1 x Dim.
194 | mu, _ = agent.policy_old.nn(state, dt.unsqueeze(1))# T x n x n_action
195 | sigma_sq = torch.ones_like(mu) * sigma**2
196 | '''
197 | ## Myopic Limit: A hack to make sure the projected actions do not result in tiny violations
198 | margin = 0.1/(state_max-state_min)
199 | u_limits = np.array([x_lower[0]+margin.item(), x_upper[0]-margin.item()]) - model_dict['a'] * state.item() - model_dict['bd'].dot(dt[0, :-1].numpy())
200 | u_limits /= model_dict['bu']
201 | u_limits = np.clip(u_limits, 0, 1)
202 | #pdb.set_trace()
203 | '''
204 | action, old_logprob = agent.select_action(mu[0], sigma_sq[0], u_limits = None)
205 | agent.memory.actions.append(action.detach().clone())
206 | agent.memory.old_logprobs.append(old_logprob.detach().clone())
207 |
208 | ## Project without Gradient
209 | mu[0] = action
210 | mu = mu.squeeze().detach()
211 | #pdb.set_trace()
212 | try:
213 | action_feasible = agent.policy_old.proj_layer(state[0], dt[:, :-1],
214 | mu, torch.zeros_like(mu), torch.zeros_like(mu),
215 | torch.tensor(x_upper).float(),
216 | torch.tensor(x_lower).float(),
217 | torch.tensor(agent.policy_old.u_upper.value).float(),
218 | torch.tensor(agent.policy_old.u_lower.value).float()
219 | )
220 | action = action_feasible[0][0]
221 | except:
222 | ## The feasible set is empty; Use some heuristics
223 | sp = np.mean((x_lower+x_upper)/2)
224 | if state.item() < sp:
225 | action = torch.tensor([1])
226 | else:
227 | action = torch.tensor([0])
228 | #pdb.set_trace()
229 |
230 |
231 | SWT = 20 + 45 * action.item()
232 | if (SWT<30):
233 | HWOEN = -30 # De Facto Off
234 | action = torch.zeros_like(action)
235 | SWT = 20
236 | else:
237 | HWOEN = 30 # De Facto On
238 | if np.isnan(SWT):
239 | SWT = 20
240 | action4env = (HWOEN, SWT)
241 |
242 | # Before step
243 | print(f'{cur_time}: IAT={obs_dict["Indoor Air Temp."]}, Occupied={obs_dict["Occupancy Flag"]}, Control={SWT}')
244 | for _ in range(3):
245 | timeStep, obs, isTerminal = env.step(action4env)
246 |
247 | obs_dict = make_dict(obs_name, obs)
248 | reward = R_func(obs_dict, SWT-20, eta)
249 |
250 | # Per step
251 | real_rewards.append(reward)
252 | '''
253 | bl = obs_2017['rewards'][cur_time]
254 | rewards.append((reward-bl) / multiplier)
255 | '''
256 | bl = 0#obs_2017['rewards'][cur_time]
257 | rewards.append((reward-bl) / 15) # multiplier
258 | # print(f'Reward={reward}, BL={bl}')
259 | # Save for record
260 | cur_time = start_time + pd.Timedelta(seconds = timeStep)
261 | timeStamp.append(cur_time)
262 | observations.append(obs)
263 | actions_taken.append(action4env)
264 |
265 | writer.add_scalar('Reward', np.mean(real_rewards), i_episode)
266 | writer.add_scalar('Reward_Diff', np.mean(rewards), i_episode)
267 | print("{}, reward: {}".format(cur_time, np.mean(real_rewards)))
268 |
269 | advantages = Advantage_func(rewards, args.gamma)
270 | agent.memory.advantages.append(advantages)
271 |
272 | # if -1, do not update parameters
273 | if args.update_episode == -1:
274 | agent.memory.clear_memory() # Prevent memory overflow
275 | elif (i_episode >0) & (i_episode % args.update_episode ==0):
276 | agent.update_parameters(sigma = sigma, K = 8)
277 | sigma = max(sigma_min, sigma-sigma_step)
278 |
279 | obs_df = pd.DataFrame(np.array(observations), index = np.array(timeStamp), columns = obs_name)
280 | obs_df = obs_df.drop(columns=ctrl_name)
281 | action_df = pd.DataFrame(np.array(actions_taken), index = np.array(timeStamp[:-1]), columns = ctrl_name)
282 | obs_df = obs_df.merge(action_df, how = 'left', right_index = True, left_index = True)
283 | obs_df.to_pickle("results/obs_"+args.exp_name+".pkl")
284 |
285 | if __name__ == '__main__':
286 | main()
287 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 Bingqing Chen
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # PROF: Projected Feasibility
2 |
3 | This is the official repository that implements the following paper:
4 |
5 | > *Chen, Bingqing, Priya Donti, Kyri Baker, J. Zico Kolter, and Mario Berges. "Enforcing Policy Feasibility Constraints through Differentiable Projection for Energy Optimization." In Proceedings of the Twelfth ACM International Conference on Future Energy Systems (e-energy '21). pp. 199–210. 2021.*
6 |
7 | [[slides]](docs/slides.pdf)[[paper]](https://dl.acm.org/doi/10.1145/3447555.3464874)
8 |
9 | # Overview
10 |
11 | PROjected Feasibility (PROF) is a method to enforce convex operational constraints within neural policies, by incorporating a differentiable projection layer within a neural network-based policy to enforce that all learned actions are feasible. We then update the policy end-to-end by propagating gradients through this differentiable projection layer, making the policy cognizant of the operational constraints. The result is a powerful neural policy that can flexibly optimize performance on the true underlying dynamics, while still satisfying the specified constraints.
12 |
13 | We demonstrate our method on two applications: energy-efficient building operation and inverter control.
14 | - In the building control case, PROF outperforms other RL agents, while maintaining temperature within the deadband except when the control is saturated.
15 | - In the inverter control setting, PROF satisfies the constraints 100% of the time and minimizes curtailment as well as possible within its conservative safe set after learning safely for half a day.
16 |
17 | **Framework.**
18 |
19 |
20 |
21 |
22 |
23 | # Code Usage
24 | ### Clone repository
25 | ```
26 | git clone https://github.com/INFERLab/PROF.git
27 | cd PROF
28 | ```
29 |
30 | ### Set up the environment
31 | Set up the virtual environment with your preferred environment/package manager.
32 |
33 | The instruction here is based on **conda**. ([Install conda](https://docs.anaconda.com/anaconda/install/))
34 | ```
35 | conda env create --file environment.yml
36 | condo activate nn-w-proj
37 | ```
38 |
39 | ### File Structure
40 | ```
41 | .
42 | ├── agents
43 | │ ├── base.py # Implement a controller that instantiate the projection problem given building parameters
44 | │ └── nn_policy.py # Inherit the controller from base.py; Forward pass: NN + Differentiable projection
45 | │ └── inverter_policy.py # Policy for inverter: NN + Differentiable projection
46 | ├── algo
47 | │ └── ppo.py # A PPO trainer
48 | ├── env
49 | │ └── inverter.py # Implements the IEEE 37-bus case
50 | ├── utils
51 | │ ├── network.py # Implements neural network modules, e.g. MLP and LSTM
52 | │ └── ppo_utils.py # Helper function for PPO trainer, e.g. Replay_Memory, Advantage_func
53 | ├── network # Matlab code for linearizing grid model; Data to construct IEEE 37-bus case;
54 | └── mypypower # Include some small changes from PyPower source code to allow customization
55 |
56 | ```
57 |
58 | ### Running
59 | You can replicate our experiments for *Experiment 1: Energy-efficient Building Operation* with `main_IW.py` and *Experiment 2: Inverter Control* with `main_inverter.py`
60 |
61 |
62 | ### Feedback
63 |
64 | Feel free to send any questions/feedback to: [Bingqing Chen](mailto:bingqinc@andrew.cmu.edu)
65 |
66 | ### Citation
67 |
68 | If you use PROF, please cite us as follows:
69 |
70 | ```
71 | @inproceedings{chen2021enforcing,
72 | author = {Chen, Bingqing and Donti, Priya L. and Baker, Kyri and Kolter, J. Zico and Berg\'{e}s, Mario},
73 | title = {Enforcing Policy Feasibility Constraints through Differentiable Projection for Energy Optimization},
74 | year = {2021},
75 | isbn = {9781450383332},
76 | publisher = {Association for Computing Machinery},
77 | address = {New York, NY, USA},
78 | url = {https://doi.org/10.1145/3447555.3464874},
79 | doi = {10.1145/3447555.3464874},
80 | booktitle = {Proceedings of the Twelfth ACM International Conference on Future Energy Systems},
81 | pages = {199–210},
82 | numpages = {12},
83 | keywords = {inverter control, safe reinforcement learning, implicit layers, differentiable optimization, smart building},
84 | location = {Virtual Event, Italy},
85 | series = {e-Energy '21}
86 | }
87 | ```
88 |
--------------------------------------------------------------------------------
/agents/base.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import cvxpy as cp
3 | import pdb
4 |
5 | class Controller():
6 | def __init__(self, T, dt, RC_flag = True, **kwargs):
7 | # dt: planning timestep
8 | # T: planning horizon
9 | # RC_flag: Whether using RC model
10 | # **kwargs: Model Parameters
11 | self.T = T
12 | self.RC_flag = RC_flag
13 | self.err_count = 0
14 |
15 | if RC_flag:
16 | ## RC model: Simulation Study
17 | self.R = kwargs["R"]
18 | self.C = kwargs["C"]
19 | self.Pm = kwargs["Pm"]
20 | self.eta = kwargs["eta"]
21 | self.T_sp = kwargs["theta"]
22 | self.Delta = kwargs["Delta"]
23 | self.sign = kwargs["sign"] #(+) for heating and (-) for cooling
24 | else:
25 | ## ARX model: Hardware-in-the-loop Simulation
26 | self.ap = kwargs["a"]
27 | self.bu = kwargs["bu"]
28 | self.bd = kwargs["bd"]
29 | self.p = len(self.ap)
30 | self.m = len(self.bu) # how many u_prev to consider
31 | self.n_dist = len(self.bd)
32 | self.Pm = kwargs["Pm"]
33 | self.T_sp = 75
34 | self.Delta = 1.8
35 |
36 | # Variable
37 | self.u = cp.Variable(T)
38 |
39 | # Save u_i-u_bar from previous time step
40 | self.u_diff = cp.Parameter(T)
41 | self.v_bar = cp.Parameter(T)
42 | self.w_bar = cp.Parameter(T)
43 | self.objective = cp.sum_squares(self.u-self.u_diff-self.v_bar+self.w_bar)
44 |
45 | ## Info needed for constraints
46 | if RC_flag:
47 | self.x0 = cp.Parameter()
48 | self.d = cp.Parameter(T)
49 | else:
50 | ## Expects [x_{t-p}, ..., x_t]
51 | self.x0 = cp.Parameter(self.p)
52 | self.d = cp.Parameter((T, self.n_dist))
53 |
54 | # Set default value for constraints
55 | self.u_lower = cp.Parameter(T)
56 | self.u_lower.value = np.tile(0, T)
57 | self.u_upper = cp.Parameter(T)
58 | self.u_upper.value = np.tile(self.Pm, T)
59 | self.x_lower = cp.Parameter(T)
60 | self.x_lower.value = np.tile(self.T_sp-self.Delta, T)
61 | self.x_upper = cp.Parameter(T)
62 | self.x_upper.value = np.tile(self.T_sp+self.Delta, T)
63 |
64 |
65 | if RC_flag:
66 | a = np.exp(-dt/(self.R*self.C))
67 | b = self.eta * self.R
68 |
69 | lam = np.logspace(1, T, num = T, base = a)
70 | Lam = np.zeros((T, T))
71 | for i in range(T):
72 | for j in range(i+1):
73 | Lam[i, j] = a**(i-j)
74 | B = np.eye(T)*b*(1-a)*self.Pm
75 | self.d.value = (1-a)*np.tile(32, T)
76 | else:
77 | A = np.eye(self.T)
78 | for i in range(self.T-1):
79 | A[i+1, max(0, i+1-self.p):i+1] = -np.flip(self.ap)[-(i+1):]
80 | Lam = np.linalg.inv(A)
81 |
82 | lam = np.zeros((self.T, self.p))
83 | for i in range(self.p):
84 | lam[i, i:] = np.flip(self.ap)[:self.p-i]
85 |
86 | ## note: missing the term on u_{t-1}
87 | B = np.zeros((self.T, self.T))
88 |
89 | for i in range(self.m):
90 | B += np.diag(np.ones(T-i), -i)*self.bu[i]/self.Pm
91 |
92 | self.d.value = np.zeros((T, self.n_dist))
93 |
94 | # Constraints
95 | self.constraints = [-self.u <= -self.u_lower,
96 | self.u <= self.u_upper]
97 | if RC_flag:
98 | self.constraints += [-Lam@(self.sign*(1-a)*b*self.u+self.d) <= -self.x_lower + lam*self.x0,
99 | Lam@(self.sign*(1-a)*b*self.u+self.d) <= self.x_upper - lam*self.x0]
100 | else:
101 | self.constraints += [-Lam@(B@self.u + self.d@self.bd + lam@self.x0) <= -self.x_lower,
102 | Lam@(B@self.u + self.d@self.bd + lam@self.x0) <= self.x_upper]
103 |
104 | self.Problem = cp.Problem(cp.Minimize(self.objective),
105 | self.constraints)
106 |
107 | def u_update(self, v_bar, w_bar):
108 | self.v_bar.value = v_bar
109 | self.w_bar.value = w_bar
110 | try:
111 | self.Problem.solve()
112 | except:
113 | print("Solver failed")
114 | self.u.value = None
115 |
116 | ## Check solution valid
117 | if self.u.value is not None:
118 | return self.u.value, self.Problem.status
119 | else:
120 | u = (self.x0.value-self.T_sp)/self.Delta
121 | self.err_count += 1
122 | return np.ones(self.T)*np.clip(u, 0, 1)*self.Pm, self.Problem.status
123 |
124 | def updateState(self, x, u_lower = None, u_upper = None,
125 | x_lower = None, x_upper = None,
126 | d = None): #
127 | self.x0.value = x
128 |
129 | # Update constraints if necessary
130 | if u_lower is not None:
131 | if isinstance(u_lower, int) | isinstance(u_lower, float):
132 | self.u_lower.value = np.tile(u_lower, self.T)
133 | else:
134 | assert len(u_lower) == self.T
135 | self.u_lower.value = u_lower
136 | if u_upper is not None:
137 | if isinstance(u_upper, int) | isinstance(u_upper, float):
138 | self.u_upper.value = np.tile(u_upper, self.T)
139 | else:
140 | assert len(u_upper) == self.T
141 | self.u_upper.value = u_upper
142 | if x_lower is not None:
143 | assert len(x_lower) == self.T
144 | self.x_lower.value = x_lower
145 | if x_upper is not None:
146 | assert len(x_upper) == self.T
147 | self.x_upper.value = x_upper
148 | self.T_sp = (x_upper[0]+x_lower[0])/2
149 | self.Delta = (x_upper[0]-x_lower[0])/2
150 |
151 | ## Exog Variables
152 | if d is not None:
153 | assert len(d) == self.T
154 | self.d.value = d
155 |
156 |
157 | class ControllerGroup():
158 | def __init__(self, T, dt, parameters, RC_flag = True):
159 | self.n_agent = len(parameters)
160 | self.T = T
161 | self.dt = dt
162 | self.RC_flag = RC_flag
163 | self.controller_list = self._init_agents(parameters)
164 |
165 | def _init_agents(self, parameters):
166 | controller_list = []
167 | for param in parameters:
168 | controller_list.append(Controller(T = self.T, dt = self.dt, RC_flag = self.RC_flag, **param))
169 | return controller_list
170 |
171 | def updateState(self, x_list, u_list = None, d_list = None, x_lower_list = None, x_upper_list = None):
172 | for idx, controller in enumerate(self.controller_list):
173 | controller.updateState(x_list[idx], d = d_list[idx] if d_list is not None else None, x_lower = x_lower_list[idx] if x_lower_list is not None else None, x_upper = x_upper_list[idx] if x_upper_list is not None else None)
174 |
175 | ## Initialize the controller with action from prev timestep
176 | if u_list is not None:
177 | u_bar = np.mean(u_list, axis = 0)
178 | controller.u_diff.value = u_list[idx] - u_bar
179 | else:
180 | controller.u_diff.value = np.zeros(self.T)
181 |
182 | def u_update(self, v_bar, w_bar):
183 | u_list = []
184 | #print("v_bar", v_bar)
185 | #print("w_bar", w_bar.shape)
186 | for idx, controller in enumerate(self.controller_list):
187 | #print(idx)
188 | u_i, status = controller.u_update(v_bar, w_bar)
189 | if status in ["infeasible", "unbounded"]:
190 | print(idx, status)
191 | u_list.append(u_i)
192 |
193 | u_bar = np.mean(u_list, axis = 0)
194 | for idx, controller in enumerate(self.controller_list):
195 | controller.u_diff.value = u_list[idx] - u_bar
196 | return u_bar, np.array(u_list)
197 |
198 |
--------------------------------------------------------------------------------
/agents/inverter_policy.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import cvxpy as cp
3 | from cvxpylayers.torch import CvxpyLayer
4 |
5 | import torch
6 | import torch.nn as nn
7 | import torch.optim as optim
8 | from torch.distributions import MultivariateNormal, Normal
9 | from copy import deepcopy
10 | import operator
11 | from functools import reduce
12 |
13 | import pdb
14 |
15 | DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
16 |
17 |
18 | ### Can move to utils.network if appropriate
19 | class Net(nn.Module):
20 | def __init__(self, n_bus, n_inverters, shared_hidden_layer_sizes, indiv_hidden_layer_sizes, n_input = 3):
21 | super(Net, self).__init__()
22 | #### Multi-headed architecture
23 | # "Shared" model
24 | # Set up non-linear network of Linear -> BatchNorm -> ReLU
25 | layer_sizes = [n_input * n_bus] + shared_hidden_layer_sizes[:-1]
26 | layers = reduce(operator.add,
27 | [[nn.Linear(a,b), nn.ReLU(), ] # nn.BatchNorm1d(b), nn.Dropout(p=0.2)]
28 | for a,b in zip(layer_sizes[0:-1], layer_sizes[1:])])
29 | layers += [nn.Linear(layer_sizes[-1], shared_hidden_layer_sizes[-1])]
30 | self.base_net = nn.Sequential(*layers)
31 |
32 | # Individual inverter model
33 | layer_sizes = [shared_hidden_layer_sizes[-1]] + indiv_hidden_layer_sizes
34 | layers = reduce(operator.add,
35 | [[nn.Linear(a,b), nn.ReLU(), ] # nn.BatchNorm1d(b), nn.Dropout(p=0.2)]
36 | for a,b in zip(layer_sizes[0:-1], layer_sizes[1:])])
37 | layers += [nn.Linear(layer_sizes[-1], 2)] # output p and q
38 | indiv_model = nn.Sequential(*layers)
39 | self.inverter_nets = nn.ModuleList(
40 | [deepcopy(indiv_model) for _ in range(n_inverters)]
41 | )
42 |
43 | # ## Simple fully connected architecture
44 |
45 | # # Set up non-linear network of Linear -> BatchNorm -> ReLU -> Dropout layers
46 | # self.n_inverters = n_inverters
47 | # layer_sizes = [4 * n_inverters] + shared_hidden_layer_sizes
48 | # layers = reduce(operator.add,
49 | # [[nn.Linear(a,b), nn.BatchNorm1d(b), nn.ReLU(), nn.Dropout(p=0.2)]
50 | # for a,b in zip(layer_sizes[0:-1], layer_sizes[1:])])
51 | # layers += [nn.Linear(layer_sizes[-1], 2 * n_inverters)]
52 | # self.nn = nn.Sequential(*layers)
53 |
54 |
55 | def forward(self, state):
56 | '''
57 | Input: Vector of voltage magnitudes and angles, real and reactive power demand
58 | Output: Vector of inverter P setpoints, vector of inverter Q setpoints
59 | '''
60 |
61 | # Multi-headed architecture
62 | z = self.base_net(state)
63 | res = [inverter(z) for inverter in self.inverter_nets]
64 | Ps = torch.cat([x[:, [0]] for x in res], dim=1)
65 | Qs = torch.cat([x[:, [1]] for x in res], dim=1)
66 | return Ps, Qs
67 |
68 | # ## Simple fully connected architecture
69 | # z = self.nn(state)
70 | # return z[:, :self.n_inverters], z[:, self.n_inverters:]
71 |
72 | class NeuralController(nn.Module):
73 | def __init__(self, network, memory, lr, lam = 10, scaler = 1000, **env_params):
74 | super(NeuralController, self).__init__()
75 | self.nn = network
76 | self.optimizer = optim.RMSprop(self.nn.parameters(), lr=lr)
77 | self.lam = lam
78 | self.memory = memory
79 | self.mse = nn.MSELoss()
80 | self.ReLU = nn.ReLU()
81 |
82 | self.n_bus = env_params['n_bus']
83 | self.gen_idx = env_params['gen_idx']
84 | self.other_idx = [i for i in range(self.n_bus) if i not in self.gen_idx]
85 |
86 | H = env_params['H']
87 | R = H[:, :self.n_bus]
88 | B = H[:, self.n_bus:]
89 | R_new = np.vstack([np.hstack([R[self.gen_idx][:, self.gen_idx],
90 | R[self.gen_idx][:, self.other_idx]]),
91 | np.hstack([R[self.other_idx][:, self.gen_idx],
92 | R[self.other_idx][:, self.other_idx]])
93 | ])
94 | B_new = np.vstack([np.hstack([B[self.gen_idx][:, self.gen_idx],
95 | B[self.gen_idx][:, self.other_idx]]),
96 | np.hstack([B[self.other_idx][:, self.gen_idx],
97 | B[self.other_idx][:, self.other_idx]])
98 | ])
99 | H_new = np.hstack([R_new, B_new])
100 |
101 | self.scaler = scaler
102 | self.V0 = env_params['V0']
103 | self.P0 = env_params['P0']
104 | self.Q0 = env_params['Q0']
105 | self.V_upper = env_params['V_upper']
106 | self.V_lower = env_params['V_lower']
107 | self.S_rating = env_params['S_rating']
108 |
109 | # Need to set as nn.Parameter such that to(DEVICE) move these to GPU as well
110 | self.V0_torch = nn.Parameter(torch.tensor(self.V0).float())
111 | self.V_upper_torch = nn.Parameter(torch.tensor(self.V_upper).float())
112 | self.V_lower_torch = nn.Parameter(torch.tensor(self.V_lower).float())
113 | self.H_torch = nn.Parameter(torch.tensor(H_new).float())
114 | self.P0_torch = nn.Parameter(torch.tensor(self.P0).float())
115 | self.Q0_torch = nn.Parameter(torch.tensor(self.Q0).float())
116 | self.S_rating_torch = nn.Parameter(torch.tensor(self.S_rating).float())
117 |
118 | # Set up projection onto inverter setpoint constraints and linearized voltage constraints
119 | P = cp.Variable(len(self.gen_idx))
120 | Q = cp.Variable(len(self.gen_idx))
121 |
122 | # P_tilde and Q_tilde are the pre-projection actions
123 | P_tilde = cp.Parameter(len(self.gen_idx))
124 | Q_tilde = cp.Parameter(len(self.gen_idx))
125 |
126 | # No inverter buses
127 | P_nc = cp.Parameter(len(self.other_idx))
128 | Q_nc = cp.Parameter(len(self.other_idx))
129 |
130 | P_av = cp.Parameter(len(self.gen_idx))
131 |
132 | # Voltage: Apply to All Buses
133 | z = cp.hstack([P, P_nc, Q, Q_nc]) # z: (70, )
134 | constraints = [self.V_lower - self.V0 <= H_new@z,
135 | H_new@z <= self.V_upper - self.V0]
136 |
137 | ## Power: Only applies to Inverters
138 | PQ = cp.vstack([self.P0[self.gen_idx] + P,
139 | self.Q0[self.gen_idx] + Q]) # (2, n)
140 | constraints += [0 <= self.P0[self.gen_idx] + P,
141 | self.P0[self.gen_idx] + P <= P_av,
142 | cp.norm(PQ, axis = 0) <= self.S_rating]
143 |
144 | objective = cp.Minimize(cp.sum_squares(P - P_tilde) + cp.sum_squares(Q - Q_tilde))
145 | problem = cp.Problem(objective, constraints)
146 |
147 | self.proj_layer = CvxpyLayer(problem, variables=[P, Q],
148 | parameters=[P_tilde, Q_tilde,
149 | P_nc, Q_nc, P_av])
150 |
151 | self.proj_count = 0
152 |
153 | def forward(self, state, Sbus, P_av, inference_flag = True):
154 | '''
155 | Input:
156 | state: [dV(k-1), P_nc, Q_nc]
157 | where,
158 | Z_nc = Z - Z0
159 | May get (n, dim) or (dim);
160 | Output:
161 | P, Q (with repsect to the reference point)
162 | '''
163 | ## Get information for non-controllable loads
164 | P_all = Sbus.real /self.scaler
165 | Q_all = Sbus.imag /self.scaler
166 | if len(Sbus.shape)==1:
167 | P_nc = Sbus.real[self.other_idx] / self.scaler
168 | Q_nc = Sbus.imag[self.other_idx] / self.scaler
169 | elif len(Sbus.shape)==2:
170 | P_nc = Sbus.real[:, self.other_idx] / self.scaler
171 | Q_nc = Sbus.imag[:, self.other_idx] / self.scaler
172 | else:
173 | print("Well, not expected to happen")
174 |
175 | P_tilde, Q_tilde = self.nn(state.to(DEVICE)) # n x n_inverter
176 |
177 | ## During inference if the action is already feasible, not need to project
178 | if inference_flag:
179 | P_tilde = P_tilde.squeeze()
180 | Q_tilde = Q_tilde.squeeze()
181 | if self.is_feasible(P_tilde.detach().clone()/self.scaler,
182 | Q_tilde.detach().clone()/self.scaler,
183 | P_nc, Q_nc, P_av):
184 | P_all[self.gen_idx] = P_tilde.detach().cpu().numpy() / self.scaler
185 | Q_all[self.gen_idx] = Q_tilde.detach().cpu().numpy() / self.scaler
186 | return P_all, Q_all
187 | else:
188 | try:
189 | P, Q = self.proj_layer(P_tilde/self.scaler, Q_tilde/self.scaler,
190 | torch.tensor(P_nc).float().to(DEVICE),
191 | torch.tensor(Q_nc).float().to(DEVICE),
192 | torch.tensor(P_av).float().to(DEVICE))
193 | self.proj_count += 1
194 | P_all[self.gen_idx] = P.detach().cpu().numpy()
195 | Q_all[self.gen_idx] = Q.detach().cpu().numpy()
196 | except: # The solver dies for some reason
197 | P_all[self.gen_idx] = 0
198 | Q_all[self.gen_idx] = 0
199 | return P_all, Q_all
200 | else:
201 | #pdb.set_trace()
202 | P, Q = self.proj_layer(P_tilde/self.scaler, Q_tilde/self.scaler,
203 | torch.tensor(P_nc).float().to(DEVICE),
204 | torch.tensor(Q_nc).float().to(DEVICE),
205 | torch.tensor(P_av).float().to(DEVICE))
206 | proj_loss = self.mse(P.detach(), P_tilde/self.scaler) \
207 | + self.mse(Q.detach(), Q_tilde/self.scaler)
208 | return P, Q, proj_loss
209 |
210 | def update(self, batch_size = 64, n_batch = 16):
211 | for _ in range(n_batch):
212 | state, Sbus, P_av = self.memory.sample_batch(batch_size = batch_size)
213 | P, Q, proj_loss = self.forward(state, Sbus, P_av, inference_flag = False)
214 | #pdb.set_trace()
215 | curtail = self.ReLU(torch.tensor(P_av).to(DEVICE) - P)
216 | loss = curtail.mean() + self.lam * proj_loss
217 | print(f'curtail = {curtail.mean().item()}, proj_loss = {proj_loss.item()}')
218 |
219 | self.optimizer.zero_grad()
220 | loss.backward()
221 | self.optimizer.step()
222 |
223 | def is_feasible(self, P, Q, P_nc, Q_nc, P_av):
224 | '''
225 | Input: P, Q (n_bus)
226 | '''
227 | eps = 1e-6
228 | assert P.ndimension() == 1
229 |
230 | z = torch.cat([P, torch.tensor(P_nc).float().to(DEVICE),
231 | Q, torch.tensor(Q_nc).float().to(DEVICE)], dim = -1) # (70)
232 | v = self.H_torch.matmul(z) # (35)
233 |
234 | if torch.any(v < self.V_lower_torch -self.V0_torch - eps) | torch.any(v > self.V_upper_torch-self.V0_torch+eps):
235 | return False
236 |
237 | P = P + self.P0_torch[self.gen_idx]
238 | Q = Q + self.Q0_torch[self.gen_idx]
239 | PQ = torch.stack([P, Q]) # (2, 21)
240 | if torch.any(torch.norm(PQ, dim = 0) > self.S_rating_torch + eps):
241 | return False
242 |
243 | if torch.any(P < 0-eps) | torch.any(P > torch.tensor(P_av).to(DEVICE)+eps):
244 | return False
245 | else:
246 | return True
247 |
--------------------------------------------------------------------------------
/agents/nn_policy.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import cvxpy as cp
3 | from cvxpylayers.torch import CvxpyLayer
4 | import pdb
5 | import torch
6 | import torch.nn as nn
7 | import torch.optim as optim
8 | from torch.distributions import MultivariateNormal, Normal
9 |
10 | from utils.network import MLP, LSTM
11 | from agents.base import Controller, ControllerGroup
12 |
13 | class NeuralController(Controller):
14 | def __init__(self, T, dt, network, RC_flag = True,
15 | **parameters):
16 | super().__init__(T, dt, RC_flag = RC_flag, **parameters)
17 | ## Inherited Properties:
18 | # cp.Variable: self.u
19 | # cp.Parameter: self.u_diff; self.v_bar; self.w_bar;
20 | # self.x0; self.d;
21 | # self.x_lower; self.x_upper;
22 | # self.u_lower; self.u_upper;
23 | # self.objective
24 | # self.Problem
25 | # self.constraints = [...]
26 |
27 | ## Inherited Methods:
28 | # updateState()
29 |
30 | ## Use ADMM update rule for the time being
31 | # u_update(self, v_bar, w_bar):
32 |
33 | '''
34 | LSTM Usage:
35 | lstm = LSTM(n_state, n_action, n_dist)
36 | mu, sigma_sq = lstm.forward(state, disturbance)
37 | Input:
38 | state: n x dim
39 | disturbance: T x n x dist
40 | Output:
41 | mu, sigma_sq: T x n x n_action
42 | '''
43 | self.nn = network
44 | self.proj_layer = CvxpyLayer(self.Problem, variables = [self.u],
45 | parameters = [self.x0, self.d,
46 | self.u_diff, self.v_bar, self.w_bar,
47 | self.x_upper, self.x_lower,
48 | self.u_upper, self.u_lower])
49 | self.criterion = nn.MSELoss() # reduction = 'sum'
50 |
51 | def forward(self, state, disturbance, x_lowers = None, x_uppers = None, detach = False):
52 | '''
53 | Input:
54 | state: (n, n_state)
55 | disturbance: (T, n, n_dist)
56 | x_lowers, x_uppers: (n, T)
57 | Output:
58 | actions, sigma_sq: (T, n, n_action)
59 | #proj_loss: scalar
60 | '''
61 | T, n_batch, n_dist = disturbance.shape
62 | mus, sigma_sqs = self.nn(state, disturbance)# T x n x n_action
63 |
64 | actions = []
65 | #TODO: Implement multi-threading
66 | for i in range(n_batch):
67 | mu = mus[:, i] # T x n_action
68 |
69 | if n_batch==1:
70 | if x_lowers is None:
71 | x_lower = torch.tensor(self.x_lower.value).float()
72 | if x_uppers is None:
73 | x_upper = torch.tensor(self.x_upper.value).float()
74 |
75 | else:
76 | x_lower = x_lowers[i]
77 | x_upper = x_uppers[i]
78 |
79 | # The last value is setpoint; Do not use for projection
80 | dt = disturbance[:, i, :-1] # T x n_dist
81 | x0 = state[i]
82 | mu = mu.squeeze(1) # T x 1 ->T
83 |
84 | try:
85 | u_pred = self.proj_layer(x0, dt,
86 | mu, torch.zeros_like(mu), torch.zeros_like(mu),
87 | x_upper, x_lower,
88 | torch.tensor(self.u_upper.value).float(),
89 | torch.tensor(self.u_lower.value).float())
90 | actions.append(u_pred[0])
91 | except:
92 | ## The feasible set is empty; Use some heuristics
93 | sp = torch.mean((x_lower+x_upper)/2)
94 | if x0.item() < sp:
95 | actions.append(torch.ones_like(mu))
96 | else:
97 | actions.append(torch.zeros_like(mu))
98 |
99 | actions = torch.stack(actions).transpose(0, 1) # T x n
100 | proj_loss = self.criterion(mus.squeeze(-1), actions)
101 | return actions.unsqueeze(-1), sigma_sqs, proj_loss
102 |
103 | class NeuralControllerGroup(ControllerGroup):
104 | def __init__(self, T, dt, parameters, RC_flag = True):
105 | super().__init__(T, dt, parameters, RC_flag = RC_flag)
106 |
107 | ## Inherited Methods:
108 | # updateState()
109 | # u_update()
110 |
111 | def _init_agents(self, parameters):
112 | controller_list = []
113 | for param in parameters:
114 | controller_list.append(NeuralController(T = self.T, dt = self.dt, RC_flag = self.RC_flag, **param))
115 | return controller_list
116 |
117 | def u_warmstart(self, x_list):
118 | u_inits = []
119 | for idx, controller in enumerate(self.controller_list):
120 | u_pred = controller.forward(x_list[idx].reshape(1, -1)) # 1 x n_input
121 | u_inits.append(u_pred.detach().numpy())
122 | return np.stack(u_inits)
123 |
124 | def append(self, states, u_stars):
125 | for idx, controller in enumerate(self.controller_list):
126 | controller.memory.append((states[idx], u_stars[idx]))
127 |
128 | def update_policy(self, batch_size = 32):
129 | losses = []
130 | for idx, controller in enumerate(self.controller_list):
131 | loss = controller.update_policy(batch_size)
132 | losses.append(loss)
133 | return np.array(losses)
134 |
135 |
--------------------------------------------------------------------------------
/algo/ppo.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | import torch.utils.data as data
5 | import torch.optim as optim
6 | from torch.distributions import MultivariateNormal, Normal
7 |
8 | import pdb
9 | from copy import deepcopy
10 |
11 | from utils.ppo_utils import Dataset
12 |
13 | class PPO():
14 | def __init__(self, policy, memory, clip_param = 0.2, lam = 10, lr = 5e-4, n_ctrl = 1):
15 | self.memory = memory
16 |
17 | self.policy = policy
18 | self.policy_old = deepcopy(policy)
19 |
20 | self.clip_param = clip_param
21 | self.optimizer = optim.RMSprop(self.policy.nn.parameters(), lr=lr)
22 | self.lam = lam
23 |
24 | self.n_ctrl = n_ctrl
25 |
26 | # Use the "current" flag to indicate which set of parameters to use
27 | def forward(self, state, disturbance, x_lowers = None, x_uppers = None, current = True):
28 | T, n_batch, n_dist = disturbance.shape
29 | if current == True:
30 | mu, sigma_sq, proj_loss = self.policy.forward(state, disturbance, x_lowers = x_lowers, x_uppers = x_uppers)
31 | else:
32 | mu, sigma_sq, proj_loss = self.policy_old.forward(state, disturbance)
33 | return mu, sigma_sq, proj_loss
34 |
35 | def select_action(self, mu, sigma_sq, u_limits = None):
36 | if self.n_ctrl > 1:
37 | m = MultivariateNormal(mu, torch.diag(sigma_sq.squeeze()).unsqueeze(0))
38 | else:
39 | m = Normal(mu, sigma_sq**0.5)
40 | action = m.sample()
41 | if u_limits is not None:
42 | action = torch.clamp(action, min = u_limits[0], max = u_limits[1])
43 | log_prob = m.log_prob(action)
44 | return action, log_prob
45 |
46 | def evaluate_action(self, mu, actions, sigma_sq):
47 | n_batch = len(mu)
48 | if self.n_ctrl > 1:
49 | cov = torch.diag_embed(sigma_sq)
50 | m = MultivariateNormal(mu, cov)
51 | else:
52 | m = Normal(mu, sigma_sq**0.5)
53 | log_prob = m.log_prob(actions)
54 | entropy = m.entropy()
55 | return log_prob, entropy
56 |
57 | def _get_training_samples(self):
58 | states, actions, disturbance, advantages, old_logprobs, x_uppers, x_lowers = self.memory.sample()
59 | batch_set = Dataset(states, actions, disturbance, advantages, old_logprobs, x_uppers, x_lowers)
60 | batch_loader = data.DataLoader(batch_set, batch_size=32, shuffle=True, num_workers=2)
61 | return batch_loader
62 |
63 | def update_parameters(self, sigma=0.1, K = 4):
64 | loader = self._get_training_samples()
65 | for i in range(K):
66 | for states, actions, disturbance, advantages, old_logprobs, x_uppers, x_lowers in loader:
67 | n_batch = states.shape[0]
68 | # pdb.set_trace()
69 | mus, sigma_sqs, proj_loss = self.policy.forward(states, disturbance.transpose(0, 1), x_lowers = x_lowers, x_uppers = x_uppers) # x, u: T x N x Dim.
70 | sigma_sqs = torch.ones_like(mus) * sigma**2
71 | log_probs, entropies = self.evaluate_action(mus[0], actions, sigma_sqs)
72 |
73 | ratio = torch.exp(log_probs.squeeze()-old_logprobs)
74 | surr1 = ratio * advantages
75 | surr2 = torch.clamp(ratio, 1-self.clip_param, 1+self.clip_param) * advantages
76 | loss = -torch.min(surr1, surr2).mean()
77 | self.optimizer.zero_grad()
78 | ## Auxiliary losses
79 | loss -= torch.mean(entropies) * 0.01
80 | loss += self.lam * proj_loss
81 |
82 | loss.backward()
83 | nn.utils.clip_grad_norm_(self.policy.nn.parameters(), 100)
84 | self.optimizer.step()
85 | print("Post Step")
86 | self.policy_old.nn.load_state_dict(self.policy.nn.state_dict())
87 |
88 | ##TODO: Move the update_policy to a Trainer class
89 | def behavior_cloning(self, batch_size):
90 | u_hat, u_star, u_nns = self._get_training_samples(batch_size)
91 |
92 | loss = self.criterion(u_hat, u_star)
93 | loss += self.lam * self.criterion(u_nns, u_hat) # Auxiliary loss
94 |
95 | self.optimizer.zero_grad()
96 | loss.backward()
97 | self.optimizer.step()
98 |
99 | self.predictions = []
100 | self.targets = []
101 | return loss.detach()
102 |
103 |
104 |
105 |
--------------------------------------------------------------------------------
/data/ARX-0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/INFERLab/PROF/be7f77f606d8c7d6505d4b2bad2d09760e9bafe9/data/ARX-0
--------------------------------------------------------------------------------
/data/data_2017_baseline.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/INFERLab/PROF/be7f77f606d8c7d6505d4b2bad2d09760e9bafe9/data/data_2017_baseline.pkl
--------------------------------------------------------------------------------
/data/data_TMY3_baseline.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/INFERLab/PROF/be7f77f606d8c7d6505d4b2bad2d09760e9bafe9/data/data_TMY3_baseline.pkl
--------------------------------------------------------------------------------
/data/param_IW-nn-1800:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/INFERLab/PROF/be7f77f606d8c7d6505d4b2bad2d09760e9bafe9/data/param_IW-nn-1800
--------------------------------------------------------------------------------
/docs/figs/framework.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/INFERLab/PROF/be7f77f606d8c7d6505d4b2bad2d09760e9bafe9/docs/figs/framework.pdf
--------------------------------------------------------------------------------
/docs/figs/framework.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/INFERLab/PROF/be7f77f606d8c7d6505d4b2bad2d09760e9bafe9/docs/figs/framework.png
--------------------------------------------------------------------------------
/docs/slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/INFERLab/PROF/be7f77f606d8c7d6505d4b2bad2d09760e9bafe9/docs/slides.pdf
--------------------------------------------------------------------------------
/env/inverter.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import scipy
3 | import scipy.io
4 | import torch
5 | import pdb
6 |
7 | from mypypower.newtonpf import newtonpf
8 | from pypower.ppoption import ppoption
9 |
10 | Zbase = 1;
11 | Vbase = 4800;
12 | Sbase = Vbase **2 / Zbase
13 | '''
14 | def getSbus(P, Q, fac = Sbase/1000):
15 | ## This expects P and Q in kW
16 | P = P/fac;
17 | Q = Q/fac;
18 | return P + 1j*Q
19 | '''
20 | class IEEE37():
21 | def __init__(self, filePath = './network/IEEE-37',
22 | dataPath = './data'):
23 | Ybus = scipy.io.loadmat(f'{filePath}/Ybus.mat')
24 | self.Ybus = Ybus['Ybus']
25 | self.n = self.Ybus.shape[0]
26 | self.v_lower = 0.95
27 | self.v_upper = 1.05
28 |
29 | # Load linearized model
30 | R = scipy.io.loadmat(f'{filePath}_linearized/R.mat')
31 | B = scipy.io.loadmat(f'{filePath}_linearized/B.mat')
32 | self.R = R['R']
33 | self.B = B['B']
34 |
35 | ## Bus index lists of each type of bus
36 | self.ref = np.array([0])
37 | self.pv = np.array([], dtype = np.int32) #np.array([4, 7, 9, 10, 11, 13, 16, 17, 20, 22, 23, 26, 28, 29, 30, 31, 32, 33, 34, 35, 36])-1
38 | self.pq = np.array([i for i in range(self.n) if (i not in self.ref) & (i not in self.pv)], dtype = np.int32)
39 | self.n_pq = len(self.pq)
40 | self.ppopt = ppoption()
41 |
42 | self._get_reference()
43 | self._get_load_and_gen(dataPath = dataPath)
44 |
45 | def getSbus(self, t, wrt_reference = True, w_slack = False):
46 | '''
47 | Returns the vector of complex bus power injections, that is, generation
48 | minus load. Power is expressed in per unit.
49 | '''
50 | P = self.P_gen[t] - self.P_l[t]
51 | Q = - self.Q_l[t]
52 | S = P + 1j*Q
53 | P_av = self.P_gen[t]
54 |
55 | if wrt_reference:
56 | S = S - self.S0
57 |
58 | if w_slack:
59 | return S, P_av[self.gen_idx]
60 | else:
61 | return S[-self.n_pq:], P_av[self.gen_idx]
62 |
63 | def step(self, Sbus, wrt_reference = True):
64 | '''
65 | returns:
66 | voltage magitude, solver flag
67 | '''
68 | if wrt_reference:
69 | S = self.S0.copy()
70 | S[-len(Sbus):] += Sbus
71 | else:
72 | S = Sbus
73 | V, success, _ = newtonpf(scipy.sparse.csr_matrix(self.Ybus), S, self.V0, self.ref, self.pv, self.pq, self.ppopt)
74 | return np.abs(V), success
75 |
76 | def linear_estimate(self, P, Q, wrt_reference = True):
77 | if wrt_reference:
78 | if torch.is_tensor(P):
79 | return torch.tensor(self.R).float().matmul(P) + torch.tensor(self.B).float().matmul(Q)
80 | else:
81 | return self.R.dot(P) + self.B.dot(Q)
82 | else:
83 | V = self.V0.copy()
84 | delta_p = P-self.P0
85 | delta_q = Q-self.Q0
86 | V[-self.n_pq:] += self.R.dot(delta_p[-self.n_pq:]) + self.B.dot(delta_q[-self.n_pq:])
87 | return V
88 |
89 | ## Reference Point for Linearization
90 | def _get_reference(self):
91 | # Flat voltage point
92 | self.V0 = np.ones(self.n);
93 | A0 = np.zeros(self.n);
94 | # Corresponding to current injection
95 | J0 = self.Ybus.dot(self.V0*np.exp(1j*A0));
96 | # Corresponding to power injection
97 | S0 = self.V0*np.exp(1j*A0)*np.conj(J0);
98 | self.P0 = np.real(S0);
99 | self.Q0 = np.imag(S0);
100 | self.S0 = self.P0 + 1j*self.Q0
101 |
102 | ## Load Demand and Generation
103 | def _get_load_and_gen(self, dataPath = './data'):
104 | # Load
105 | self.load_idx = np.array([2, 5, 6, 7, 9, 10, 11, 13, 14, 16, 18, 19, 20, 21, 22, 24, 26, 27, 28, 29, 30, 32, 33, 35, 36]) -1
106 | load = scipy.io.loadmat(f'{dataPath}/Loads_1sec.mat') #(Unit in W)
107 | load = load['Loads'].transpose() # 604800 x 8
108 | self.P_l = np.zeros((load.shape[0], self.n))
109 | for i, idx in enumerate(self.load_idx):
110 | self.P_l[:, idx] = load[:, i % load.shape[1]]
111 | self.Q_l = 0.5 * self.P_l
112 | # Convert loads to p.u.
113 | self.P_l /= Sbase;
114 | self.Q_l /= Sbase;
115 |
116 | # Generation
117 | solar_rad = scipy.io.loadmat(f'{dataPath}/Irradiance_1sec.mat')
118 | solar_rad = solar_rad['Irr24_seq'].transpose() # # 604800 x 1
119 |
120 | self.gen_idx = np.array([4, 7, 9, 10, 11, 13, 16, 17, 20, 22, 23, 26, 28, 29, 30, 31, 32, 33, 34, 35, 36])-1
121 |
122 | #% PV capacity [kVA]
123 | self.max_S = np.array([200, 200, 100, 200, 200, 200, 200, 200, 200, 200, 200,
124 | 200, 200, 200, 200, 200, 200, 350, 350, 300, 300]);
125 | self.max_S = self.max_S * 1000 / Sbase # Convert to p.u.
126 | #% Area of the PV array
127 | Area_PV = np.array([100, 100, 100, 200, 200, 200, 200, 200, 200, 100,
128 | 200, 200, 200, 100, 200, 200, 200, 350, 350, 300, 300]);
129 | #% PV efficiency;
130 | PV_Irradiance_to_Power_Efficiency = 1;
131 |
132 | self.P_gen = np.zeros((load.shape[0], self.n))
133 | gen = solar_rad * Area_PV * PV_Irradiance_to_Power_Efficiency
134 | gen /= Sbase # Convert to p.u.
135 | self.P_gen[:, self.gen_idx] = gen.clip(max = self.max_S.reshape(1, -1))
136 |
137 |
--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
1 | name: nn-w-proj
2 | channels:
3 | - conda-forge
4 | - defaults
5 | dependencies:
6 | - _libgcc_mutex=0.1=conda_forge
7 | - _openmp_mutex=4.5=1_gnu
8 | - ampl-mp=3.1.0=h616b090_1004
9 | - backcall=0.2.0=pyhd3eb1b0_0
10 | - ca-certificates=2021.4.13=h06a4308_1
11 | - certifi=2020.12.5=py39h06a4308_0
12 | - cvxpy=1.1.12=py39hf3d152e_0
13 | - cvxpy-base=1.1.12=py39hde0f152_0
14 | - cyipopt=1.0.3=py39h3c5bb4f_0
15 | - decorator=5.0.7=pyhd3eb1b0_0
16 | - ecos=2.0.8=py39hce5d2b2_0
17 | - future=0.18.2=py39hf3d152e_3
18 | - ipopt=3.13.4=h7ede334_0
19 | - ipykernel=5.3.4=py39hb070fc8_0
20 | - ipython=7.22.0=py39hb070fc8_0
21 | - ipython_genutils=0.2.0=pyhd3eb1b0_1
22 | - jedi=0.17.2=py39h06a4308_1
23 | - jupyter_client=6.1.12=pyhd3eb1b0_0
24 | - jupyter_core=4.7.1=py39h06a4308_0
25 | - ld_impl_linux-64=2.35.1=hea4e1c9_2
26 | - libblas=3.9.0=9_openblas
27 | - libcblas=3.9.0=9_openblas
28 | - libffi=3.3=h58526e2_2
29 | - libgcc-ng=9.3.0=h2828fa1_19
30 | - libgfortran-ng=9.3.0=hff62375_19
31 | - libgfortran5=9.3.0=hff62375_19
32 | - libgomp=9.3.0=h2828fa1_19
33 | - liblapack=3.9.0=9_openblas
34 | - libopenblas=0.3.15=pthreads_h8fe5266_0
35 | - libsodium=1.0.18=h7b6447c_0
36 | - libstdcxx-ng=9.3.0=h6de172a_19
37 | - metis=5.1.0=h58526e2_1006
38 | - mumps-include=5.2.1=ha770c72_10
39 | - mumps-seq=5.2.1=h47a8eb5_10
40 | - ncurses=6.2=h58526e2_4
41 | - numpy=1.20.2=py39hdbf815f_0
42 | - openssl=1.1.1k=h27cfd23_0
43 | - osqp=0.6.2=py39hde0f152_1
44 | - parso=0.7.0=py_0
45 | - pexpect=4.8.0=pyhd3eb1b0_3
46 | - pickleshare=0.7.5=pyhd3eb1b0_1003
47 | - pip=21.0.1=py39h06a4308_0
48 | - prompt-toolkit=3.0.17=pyh06a4308_0
49 | - ptyprocess=0.7.0=pyhd3eb1b0_2
50 | - pygments=2.8.1=pyhd3eb1b0_0
51 | - python=3.9.4=hffdb5ce_0_cpython
52 | - python-dateutil=2.8.1=pyhd3eb1b0_0
53 | - python_abi=3.9=1_cp39
54 | - pyzmq=20.0.0=py39h2531618_1
55 | - qdldl-python=0.1.5=py39hde0f152_0
56 | - readline=8.1=h46c0cb4_0
57 | - scipy=1.6.3=py39hee8e79c_0
58 | - scotch=6.0.9=h0eec0ba_1
59 | - scs=2.1.3=py39h3c5bb4f_0
60 | - setuptools=49.6.0=py39hf3d152e_3
61 | - six=1.16.0=pyh6c4a22f_0
62 | - sqlite=3.35.5=h74cdb3f_0
63 | - tk=8.6.10=h21135ba_1
64 | - tornado=6.1=py39h27cfd23_0
65 | - traitlets=5.0.5=pyhd3eb1b0_0
66 | - tzdata=2021a=he74cb21_0
67 | - wcwidth=0.2.5=py_0
68 | - wheel=0.36.2=pyhd3deb0d_0
69 | - xz=5.2.5=h516909a_1
70 | - zeromq=4.3.4=h2531618_0
71 | - zlib=1.2.11=h516909a_1010
72 | - pip:
73 | - absl-py==0.13.0
74 | - cachetools==4.2.2
75 | - charset-normalizer==2.0.5
76 | - cvxpylayers==0.1.5
77 | - cycler==0.10.0
78 | - diffcp==1.0.16
79 | - google-auth==1.35.0
80 | - google-auth-oauthlib==0.4.6
81 | - grpcio==1.40.0
82 | - idna==3.2
83 | - kiwisolver==1.3.1
84 | - markdown==3.3.4
85 | - matplotlib==3.4.2
86 | - oauthlib==3.1.1
87 | - pillow==8.2.0
88 | - protobuf==3.17.3
89 | - pyasn1==0.4.8
90 | - pyasn1-modules==0.2.8
91 | - pybind11==2.6.2
92 | - pyparsing==2.4.7
93 | - pypower==5.1.15
94 | - requests==2.26.0
95 | - requests-oauthlib==1.3.0
96 | - rsa==4.7.2
97 | - tensorboard==2.6.0
98 | - tensorboard-data-server==0.6.1
99 | - tensorboard-plugin-wit==1.8.0
100 | - threadpoolctl==2.1.0
101 | - torch==1.8.1
102 | - typing-extensions==3.10.0.0
103 | - urllib3==1.26.6
104 | - werkzeug==2.0.1
105 | prefix: /home/bingqinc/anaconda3/envs/nn-w-proj
106 |
--------------------------------------------------------------------------------
/inverter_baselines/inverter_QP.py:
--------------------------------------------------------------------------------
1 | import os, sys, argparse
2 |
3 | import numpy as np
4 | import cvxpy as cp
5 | import torch
6 | import torch.nn as nn
7 | import torch.optim as optim
8 | from torch.utils.tensorboard import SummaryWriter
9 |
10 | main_path = os.path.abspath(os.path.join(__file__, '..'))
11 | sys.path.insert(0, main_path)
12 |
13 | from env.inverter import IEEE37
14 |
15 | from algo.ppo import PPO
16 | from agents.inverter_policy import Net, NeuralController
17 | from utils.inverter_utils import Replay_Memory
18 |
19 |
20 | import pdb
21 |
22 | import torch
23 | DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
24 | DEVICE
25 |
26 | parser = argparse.ArgumentParser(description='GnuRL Demo: Online Learning')
27 | parser.add_argument('--gamma', type=float, default=0.98, metavar='G',
28 | help='discount factor (default: 0.98)')
29 | parser.add_argument('--seed', type=int, default=42, metavar='N',
30 | help='random seed (default: 42)')
31 | parser.add_argument('--lam', type=int, default=10, metavar='N',
32 | help='random seed (default: 42)')
33 | parser.add_argument('--lr', type=float, default=1e-3, metavar='G',
34 | help='Learning Rate')
35 | parser.add_argument('--epsilon', type=float, default=0.2, metavar='G', help='PPO Clip Parameter')
36 | parser.add_argument('--update_episode', type=int, default=4, metavar='N',
37 | help='PPO update episode (default: 1); If -1, do not update weights')
38 | parser.add_argument('--exp_name', type=str, default='inverter_QP',
39 | help='save name')
40 | parser.add_argument('--network_name', type=str, default='ieee37',
41 | help='')
42 | args = parser.parse_args()
43 |
44 | class QP_solver():
45 | def __init__(self, **env_params):
46 | self.n_bus = env_params['n_bus']
47 | H = env_params['H']
48 | self.V0 = env_params['V0']
49 | self.P0 = env_params['P0']
50 | self.Q0 = env_params['Q0']
51 | self.V_upper = env_params['V_upper']
52 | self.V_lower = env_params['V_lower']
53 | self.S_rating = env_params['S_rating']
54 | self.gen_idx = env_params['gen_idx']
55 |
56 | self.other_idx = [i for i in range(self.n_bus) if i not in self.gen_idx ]
57 |
58 | R = H[:, :self.n_bus]
59 | B = H[:, self.n_bus:]
60 | R_new = np.vstack([np.hstack([R[self.gen_idx][:, self.gen_idx],
61 | R[self.gen_idx][:, self.other_idx]]),
62 | np.hstack([R[self.other_idx][:, self.gen_idx],
63 | R[self.other_idx][:, self.other_idx]])
64 | ])
65 | B_new = np.vstack([np.hstack([B[self.gen_idx][:, self.gen_idx],
66 | B[self.gen_idx][:, self.other_idx]]),
67 | np.hstack([B[self.other_idx][:, self.gen_idx],
68 | B[self.other_idx][:, self.other_idx]])
69 | ])
70 | #pdb.set_trace()
71 | H_new = np.hstack([R_new, B_new])
72 |
73 | # Set up projection onto inverter setpoint constraints and linearized voltage constraints
74 | self.P = cp.Variable(len(self.gen_idx))
75 | self.Q = cp.Variable(len(self.gen_idx))
76 |
77 |
78 | self.P_nc = cp.Parameter(len(self.other_idx))
79 | self.Q_nc = cp.Parameter(len(self.other_idx))
80 | self.P_av = cp.Parameter(len(self.gen_idx))
81 |
82 | # Voltage: Apply to All Buses
83 | z = cp.hstack([self.P, self.P_nc, self.Q, self.Q_nc]) # z: (70, )
84 | constraints = [self.V_lower - self.V0 <= H_new@z,
85 | H_new@z <= self.V_upper - self.V0]
86 |
87 | ## Power: Only applies to Inverters
88 | PQ = cp.vstack([self.P0[self.gen_idx] + self.P,
89 | self.Q0[self.gen_idx] + self.Q]) # (2, n)
90 | constraints += [0 <= self.P0[self.gen_idx] + self.P,
91 | self.P0[self.gen_idx] + self.P <= self.P_av,
92 | cp.norm(PQ, axis = 0) <= self.S_rating]
93 |
94 | #objective = cp.Minimize(cp.sum_squares(P - P_tilde) + cp.sum_squares(Q - Q_tilde))
95 | objective = cp.Minimize(cp.sum(cp.maximum(self.P_av - self.P,
96 | np.zeros(len(self.gen_idx)))))
97 | self.problem = cp.Problem(objective, constraints)
98 |
99 | def solve(self, Sbus, P_av):
100 | self.P_nc.value = Sbus.real[self.other_idx]
101 | self.Q_nc.value = Sbus.imag[self.other_idx]
102 | self.P_av.value = P_av
103 |
104 | #try:
105 | self.problem.solve()
106 | #except:
107 | # print("Solver failed")
108 | # self.P.value = None
109 |
110 | ## Check solution valid
111 | #if self.P.value is not None:
112 | #print(self.problem.status)
113 | #print(self.P.value, self.Q.value)
114 | return self.P.value, self.Q.value#, self.Problem.status
115 |
116 | #else:
117 | # return Sbus.real, Sbus.imag
118 |
119 | def main():
120 | torch.manual_seed(args.seed)
121 | writer = SummaryWriter(comment = args.exp_name)
122 |
123 | # Create Simulation Environment
124 | if args.network_name == 'ieee37':
125 | env = IEEE37()
126 | else:
127 | print("Not implemented")
128 |
129 | n_bus = env.n - 1
130 | n_inverters = len(env.gen_idx) # inverters at PV panels
131 |
132 | env_params = {'V0': env.V0[-env.n_pq:],
133 | 'P0': env.P0[-env.n_pq:],
134 | 'Q0': env.Q0[-env.n_pq:],
135 | 'H': np.hstack([env.R, env.B]), # 35 x 70
136 | 'n_bus':n_bus, # Slack bus is not controllable
137 | 'gen_idx': env.gen_idx - 1, # Excluded the slack bus
138 | 'V_upper': env.v_upper, 'V_lower': env.v_lower,
139 | 'S_rating': env.max_S,
140 | }
141 |
142 | controller = QP_solver(**env_params)
143 |
144 | # 1-week data
145 | num_steps = 900 # 15 minutes
146 | n_episodes = 7*86400//num_steps
147 |
148 | V_prev = np.zeros(n_bus)
149 |
150 | V_record = []
151 | V_est_record = []
152 | P_record = []
153 | Q_record = []
154 |
155 | for i in range(n_episodes):
156 | loss = 0
157 | violation_count = 0
158 |
159 | for k in range(num_steps):
160 | t = i*num_steps + k
161 | Sbus, P_av = env.getSbus(t)
162 |
163 | P_gen, Q_gen = controller.solve(Sbus, P_av)
164 | print(f"P_av = {P_av}, P = {P_gen}")
165 |
166 | P = Sbus.real
167 | Q = Sbus.imag
168 | P[controller.gen_idx] = P_gen
169 | Q[controller.gen_idx] = Q_gen
170 |
171 | V, success = env.step(P + 1j*Q)
172 | V_prev = V[1:]
173 |
174 | if np.any(V>env.v_upper) | np.any(V0):
190 | np.save(f"results/V_{args.exp_name}.npy", np.array(V_record))
191 | np.save(f"results/P_{args.exp_name}.npy", np.array(P_record))
192 | np.save(f"results/Q_{args.exp_name}.npy", np.array(Q_record))
193 |
194 | np.save(f"results/V_{args.exp_name}.npy", np.array(V_record))
195 | np.save(f"results/P_{args.exp_name}.npy", np.array(P_record))
196 | np.save(f"results/Q_{args.exp_name}.npy", np.array(Q_record))
197 |
198 | if __name__ == '__main__':
199 | main()
200 |
201 | '''
202 | # Example Usage of the environment
203 | t = 10
204 | Sbus = env.getSbus(t)
205 |
206 | # Solve power flow equations
207 | V, success = env.step(Sbus)
208 | print(np.abs(V))
209 | if success == 0:
210 | print("Something is wrong")
211 |
212 | # Estimation using the linearized model
213 | V_est = env.linear_estimate(Sbus)
214 | print(V_est)
215 | '''
216 |
--------------------------------------------------------------------------------
/inverter_baselines/inverter_acopf.py:
--------------------------------------------------------------------------------
1 | import os, sys, argparse
2 |
3 | import numpy as np
4 | import cvxpy as cp
5 | import ipopt
6 | import torch
7 | import torch.nn as nn
8 | import torch.optim as optim
9 | from torch.utils.tensorboard import SummaryWriter
10 |
11 | main_path = os.path.abspath(os.path.join(__file__, '..'))
12 | sys.path.insert(0, main_path)
13 |
14 | from env.inverter import IEEE37
15 |
16 | from algo.ppo import PPO
17 | from agents.inverter_policy import Net, NeuralController
18 | from utils.inverter_utils import Replay_Memory
19 |
20 | import ipdb
21 |
22 | import torch
23 | DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
24 | DEVICE
25 |
26 | parser = argparse.ArgumentParser(description='GnuRL Demo: Online Learning')
27 | parser.add_argument('--gamma', type=float, default=0.98, metavar='G',
28 | help='discount factor (default: 0.98)')
29 | parser.add_argument('--seed', type=int, default=42, metavar='N',
30 | help='random seed (default: 42)')
31 | parser.add_argument('--lam', type=int, default=10, metavar='N',
32 | help='random seed (default: 42)')
33 | parser.add_argument('--lr', type=float, default=1e-3, metavar='G',
34 | help='Learning Rate')
35 | parser.add_argument('--epsilon', type=float, default=0.2, metavar='G', help='PPO Clip Parameter')
36 | parser.add_argument('--update_episode', type=int, default=4, metavar='N',
37 | help='PPO update episode (default: 1); If -1, do not update weights')
38 | parser.add_argument('--exp_name', type=str, default='inverter_ACOPF',
39 | help='save name')
40 | parser.add_argument('--network_name', type=str, default='ieee37',
41 | help='')
42 | args = parser.parse_args()
43 |
44 | class ACOPFController():
45 | def __init__(self, **env_params):
46 | self.n_bus = env_params['n_bus']
47 | self.V0 = env_params['V0']
48 | self.P0 = env_params['P0']
49 | self.Q0 = env_params['Q0']
50 | self.V_upper = env_params['V_upper']
51 | self.V_lower = env_params['V_lower']
52 | self.S_rating = env_params['S_rating']
53 | self.gen_idx = env_params['gen_idx']
54 | self.slack_idx = env_params['slack_idx']
55 | self.Ybus = env_params['Ybus']
56 |
57 | self.A0 = np.zeros(self.V0.shape) # initial voltage angle
58 | self.n_gen = len(self.gen_idx)
59 | self.n_slack = len(self.slack_idx)
60 | self.other_idx = [i for i in range(self.n_bus) if i not in self.gen_idx and i not in self.slack_idx]
61 |
62 |
63 | def solve(self, Sbus, P_av):
64 | P_nc = Sbus.real[self.other_idx]
65 | Q_nc = Sbus.imag[self.other_idx]
66 |
67 | # Decision variables: P and Q at controllable buses,
68 | # Pslack and Qslack at slack bus, V and theta at all buses
69 |
70 | # initial guess for decision variables
71 | x0 = np.hstack([P_av, self.Q0[self.gen_idx],
72 | self.P0[self.slack_idx], self.Q0[self.slack_idx],
73 | self.V0, self.A0])
74 |
75 | # upper and lower bounds on decision variables
76 | # 0 \leq P \leq P_av
77 | # no explicit bounds on Q
78 | # no explicit bounds on Pslack or Qslack
79 | # V and A known at ref bus
80 | # Vmin \leq V \leq Vmax at non-ref buses
81 | # no explicit bounds on A at non-ref buses
82 | def get_bound_with_slack(bound, slack_bound):
83 | values = bound * np.ones(self.n_bus)
84 | values[self.slack_idx] = slack_bound
85 | return values
86 |
87 | lb = np.hstack([
88 | np.zeros(self.n_gen),
89 | -np.infty * np.ones(self.n_gen),
90 | -np.infty * np.ones(2 * self.n_slack),
91 | get_bound_with_slack(self.V_lower, self.V0[self.slack_idx]),
92 | get_bound_with_slack(-np.infty, self.A0[self.slack_idx])])
93 | ub = np.hstack([
94 | P_av,
95 | np.infty * np.ones(self.n_gen),
96 | np.infty * np.ones(2 * self.n_slack),
97 | get_bound_with_slack(self.V_upper, self.V0[self.slack_idx]),
98 | get_bound_with_slack(np.infty, self.A0[self.slack_idx])])
99 |
100 | # upper and lower bounds on other constraints
101 | # power flow constraint: diag(v)conj(Ybus)conj(v) - S = 0
102 | # where v = diag(V*exp(1j*A)) and S is net demand at all nodes
103 | # (separate out real and imaginary parts)
104 | # P^2 + Q^2 \leq S_rating^2
105 | cl = np.hstack(
106 | [np.zeros(2*self.n_bus), np.zeros(self.n_gen)])
107 | cu = np.hstack(
108 | [np.zeros(2*self.n_bus), self.S_rating**2])
109 |
110 | problem_obj = ACOPFSolver(P_av, P_nc, Q_nc, self.Ybus,
111 | self.n_bus, self.n_gen, self.n_slack, self.gen_idx, self.slack_idx, self.other_idx)
112 | nlp = ipopt.problem(
113 | n=len(x0), # num decision vars
114 | m=len(cl), # num constraints
115 | problem_obj=problem_obj,
116 | lb=lb, # lower bounds on decision vars
117 | ub=ub, # upper bounds on decision vars
118 | cl=cl, # lower bounds on constraints
119 | cu=cu # upper bounds on constraints
120 | )
121 |
122 | nlp.addOption('tol', 1e-4)
123 | nlp.addOption('print_level', 0) # 3)
124 |
125 | x, info = nlp.solve(x0)
126 | P = x[:self.n_gen]
127 | Q = x[self.n_gen:2*self.n_gen]
128 |
129 | return P, Q
130 |
131 |
132 | class ACOPFSolver(object):
133 | def __init__(self, P_av, P_nc, Q_nc, Ybus, n_bus, n_gen, n_slack, gen_idx, slack_idx, other_idx):
134 | self.P_av = P_av
135 | self.P_nc = P_nc
136 | self.Q_nc = Q_nc
137 | self.Ybus = Ybus
138 | self.n_bus = n_bus
139 | self.n_gen = n_gen
140 | self.n_slack = n_slack
141 | self.gen_idx = gen_idx
142 | self.slack_idx = slack_idx
143 | self.other_idx = other_idx
144 | self.split_inds = np.cumsum(
145 | [self.n_gen, self.n_gen, self.n_slack, self.n_slack, self.n_bus, self.n_bus])[:-1]
146 |
147 | # Curtailment objective (will be minimized)
148 | def objective(self, x):
149 | return np.maximum(self.P_av - x[:self.n_gen], 0).sum()
150 |
151 | # Gradient of objective
152 | def gradient(self, x):
153 | p_grad = -1 * ((self.P_av - x[:self.n_gen]) > 0).astype(int)
154 | return np.hstack([p_grad, np.zeros(self.n_gen + 2*self.n_slack + 2*self.n_bus)])
155 |
156 | # Constraints (excluding box constraints on decision variables)
157 | def constraints(self, y):
158 | P, Q, Pslack, Qslack, V, A = np.split(y, self.split_inds)
159 |
160 | # power flow constraint [diag(v)conj(Ybus)conj(v) - S = 0]
161 | # separate out real and imaginary parts
162 | voltage = V * np.exp(1j * A)
163 | net_power = np.zeros(self.n_bus, dtype=np.complex128)
164 | net_power[self.gen_idx] = P + 1j*Q
165 | net_power[self.slack_idx] = Pslack + 1j*Qslack
166 | net_power[self.other_idx] = self.P_nc + 1j*self.Q_nc
167 | power_mismatch = np.diag(voltage)@np.conj(self.Ybus)@np.conj(voltage) - net_power
168 |
169 | # apparent power at inverters [P^2 + Q^2 \leq S_rating^2; compute left side here]
170 | apparent_power = P**2 + Q**2
171 |
172 | return np.hstack([np.real(power_mismatch), np.imag(power_mismatch), apparent_power])
173 |
174 | # Jacobian of constraints (excluding box constraints on decision variables)
175 | def jacobian(self, y):
176 | P, Q, _, _, V, A = np.split(y, self.split_inds)
177 |
178 | # Jacobian of power flow constraint
179 | # See: http://www.cs.cmu.edu/~zkolter/course/15-884/eps_power_flow.pdf
180 | vol = V * np.exp(1j * A)
181 | Y = self.Ybus
182 | J1 = 1j * np.diag(vol) @ (np.diag(np.conj(Y)@np.conj(vol)) - np.conj(Y)@np.diag(np.conj(vol)))
183 | J2 = np.diag(vol)@np.conj(Y)@np.diag(np.exp(-1j * A)) + \
184 | np.diag(np.exp(1j * A))@np.diag(np.conj(Y)@np.conj(vol))
185 | power_flow_jac = np.vstack([
186 | np.hstack([-np.eye(self.n_bus)[:, self.gen_idx], np.zeros((self.n_bus, self.n_gen)),
187 | -np.eye(self.n_bus)[:, self.slack_idx], np.zeros((self.n_bus, self.n_slack)),
188 | np.real(J2), np.real(J1)]),
189 | np.hstack([np.zeros((self.n_bus, self.n_gen)), -np.eye(self.n_bus)[:, self.gen_idx],
190 | np.zeros((self.n_bus, self.n_slack)), -np.eye(self.n_bus)[:, self.slack_idx],
191 | np.imag(J2), np.imag(J1)])
192 | ])
193 |
194 | # Jacobian of apparent power constraint
195 | apparent_power_jac = np.hstack([
196 | np.diag(2*P), np.diag(2*Q),
197 | np.zeros( (self.n_gen, 2*self.n_slack + 2*self.n_bus))])
198 |
199 | return np.concatenate([power_flow_jac.flatten(), apparent_power_jac.flatten()])
200 |
201 |
202 | def main():
203 | torch.manual_seed(args.seed)
204 | writer = SummaryWriter(comment = args.exp_name)
205 |
206 | # Create Simulation Environment
207 | if args.network_name == 'ieee37':
208 | env = IEEE37()
209 | else:
210 | print("Not implemented")
211 |
212 | n_bus = env.n
213 | n_inverters = len(env.gen_idx) # inverters at PV panels
214 |
215 | env_params = {'V0': env.V0,
216 | 'P0': env.P0,
217 | 'Q0': env.Q0,
218 | 'n_bus': n_bus,
219 | 'gen_idx': env.gen_idx,
220 | 'slack_idx': env.ref,
221 | 'V_upper': env.v_upper, 'V_lower': env.v_lower,
222 | 'S_rating': env.max_S,
223 | 'Ybus': env.Ybus
224 | }
225 |
226 | controller = ACOPFController(**env_params)
227 |
228 | # 1-week data
229 | num_steps = 900 # 15 minutes
230 | n_episodes = 7*86400//num_steps
231 |
232 | V_prev = np.zeros(n_bus)
233 |
234 | V_record = []
235 | V_est_record = []
236 | P_record = []
237 | Q_record = []
238 |
239 | start_ep = 600
240 | for i in range(start_ep, min(n_episodes, start_ep + 100)):
241 | loss = 0
242 | violation_count = 0
243 |
244 | for k in range(num_steps):
245 | t = i*num_steps + k
246 | Sbus, P_av = env.getSbus(t, wrt_reference=False, w_slack=True)
247 |
248 | P_gen, Q_gen = controller.solve(Sbus, P_av)
249 | print(f"P_av = {P_av}, P = {P_gen}")
250 |
251 | P = Sbus.real
252 | Q = Sbus.imag
253 | P[controller.gen_idx] = P_gen
254 | Q[controller.gen_idx] = Q_gen
255 |
256 | V, success = env.step(P + 1j*Q, wrt_reference=False)
257 | V_prev = V[1:]
258 |
259 | if np.any(V>env.v_upper) | np.any(V0):
275 | np.save(f"results/V_{args.exp_name}_{start_ep}.npy", np.array(V_record))
276 | np.save(f"results/P_{args.exp_name}_{start_ep}.npy", np.array(P_record))
277 | np.save(f"results/Q_{args.exp_name}_{start_ep}.npy", np.array(Q_record))
278 |
279 | np.save(f"results/V_{args.exp_name}_{start_ep}.npy", np.array(V_record))
280 | np.save(f"results/P_{args.exp_name}_{start_ep}.npy", np.array(P_record))
281 | np.save(f"results/Q_{args.exp_name}_{start_ep}.npy", np.array(Q_record))
282 |
283 |
284 | if __name__ == '__main__':
285 | main()
286 |
287 | '''
288 | # Example Usage of the environment
289 | t = 10
290 | Sbus = env.getSbus(t)
291 |
292 | # Solve power flow equations
293 | V, success = env.step(Sbus)
294 | print(np.abs(V))
295 | if success == 0:
296 | print("Something is wrong")
297 |
298 | # Estimation using the linearized model
299 | V_est = env.linear_estimate(Sbus)
300 | print(V_est)
301 | '''
302 |
--------------------------------------------------------------------------------
/inverter_baselines/inverter_no-control.py:
--------------------------------------------------------------------------------
1 | import os, sys, argparse
2 |
3 | import numpy as np
4 | from torch.utils.tensorboard import SummaryWriter
5 |
6 | main_path = os.path.abspath(os.path.join(__file__, '..'))
7 | sys.path.insert(0, main_path)
8 |
9 | from env.inverter import IEEE37
10 |
11 | import pdb
12 |
13 | #import torch
14 | #DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
15 | #DEVICE
16 |
17 | parser = argparse.ArgumentParser(description='GnuRL Demo: Online Learning')
18 | parser.add_argument('--gamma', type=float, default=0.98, metavar='G',
19 | help='discount factor (default: 0.98)')
20 | parser.add_argument('--seed', type=int, default=42, metavar='N',
21 | help='random seed (default: 42)')
22 | parser.add_argument('--lr', type=float, default=1e-3, metavar='G',
23 | help='Learning Rate')
24 | parser.add_argument('--epsilon', type=float, default=0.2, metavar='G', help='PPO Clip Parameter')
25 | parser.add_argument('--update_episode', type=int, default=4, metavar='N',
26 | help='PPO update episode (default: 1); If -1, do not update weights')
27 | parser.add_argument('--exp_name', type=str, default='no-control',
28 | help='save name')
29 | parser.add_argument('--network_name', type=str, default='ieee37',
30 | help='')
31 | args = parser.parse_args()
32 |
33 |
34 | def main():
35 | writer = SummaryWriter(comment = args.exp_name)
36 |
37 | # Create Simulation Environment
38 | if args.network_name == 'ieee37':
39 | env = IEEE37()
40 | else:
41 | print("Not implemented")
42 | n_bus = env.n
43 | env_params = {'V0': env.V0[-env.n_pq:],
44 | 'P0': env.P0[-env.n_pq:],
45 | 'Q0': env.Q0[-env.n_pq:],
46 | 'gen_idx': env.gen_idx, # Including the slack bus
47 | 'V_upper': env.v_upper, 'V_lower': env.v_lower,
48 | 'S_rating': env.max_S,
49 | }
50 |
51 | ## Note: Volt-Var controller considers deviation from 1
52 | #controller = VoltVarController(0.04, **env_params)
53 |
54 | # 1-week data
55 | num_steps = 600 # 10 minutes
56 | n_episodes = 7*86400//num_steps
57 |
58 | V_prev = np.ones(n_bus)
59 | V_record = []
60 |
61 | for i in range(n_episodes):
62 | violation_count = 0
63 | for k in range(num_steps):
64 | t = i*num_steps + k
65 | Sbus, P_av = env.getSbus(t, wrt_reference = False, w_slack = True)
66 |
67 | #Q = controller.forward(V_prev, P_av = P_av) # at Generation buses
68 | #Sbus.imag[env.gen_idx] += Q
69 |
70 | V, success = env.step(Sbus)
71 | V_prev = V
72 |
73 | if np.any(V>env.v_upper) | np.any(V0):
83 | np.save(f"results/V_{args.exp_name}.npy", np.array(V_record))
84 | np.save(f"results/V_{args.exp_name}.npy", np.array(V_record))
85 |
86 | if __name__ == '__main__':
87 | main()
88 |
89 | '''
90 | # Example Usage of the environment
91 | t = 10
92 | Sbus = env.getSbus(t)
93 |
94 | # Solve power flow equations
95 | V, success = env.step(Sbus)
96 | print(np.abs(V))
97 | if success == 0:
98 | print("Something is wrong")
99 |
100 | # Estimation using the linearized model
101 | V_est = env.linear_estimate(Sbus)
102 | print(V_est)
103 | '''
104 |
--------------------------------------------------------------------------------
/inverter_baselines/inverter_volt-var.py:
--------------------------------------------------------------------------------
1 | import os, sys, argparse
2 |
3 | import numpy as np
4 | from torch.utils.tensorboard import SummaryWriter
5 |
6 | main_path = os.path.abspath(os.path.join(__file__, '..'))
7 | sys.path.insert(0, main_path)
8 |
9 | from env.inverter import IEEE37
10 |
11 | import pdb
12 |
13 | #import torch
14 | #DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
15 | #DEVICE
16 |
17 | parser = argparse.ArgumentParser(description='GnuRL Demo: Online Learning')
18 | parser.add_argument('--gamma', type=float, default=0.98, metavar='G',
19 | help='discount factor (default: 0.98)')
20 | parser.add_argument('--seed', type=int, default=42, metavar='N',
21 | help='random seed (default: 42)')
22 | parser.add_argument('--lr', type=float, default=1e-3, metavar='G',
23 | help='Learning Rate')
24 | parser.add_argument('--epsilon', type=float, default=0.2, metavar='G', help='PPO Clip Parameter')
25 | parser.add_argument('--update_episode', type=int, default=4, metavar='N',
26 | help='PPO update episode (default: 1); If -1, do not update weights')
27 | parser.add_argument('--exp_name', type=str, default='volt-var',
28 | help='save name')
29 | parser.add_argument('--network_name', type=str, default='ieee37',
30 | help='')
31 | args = parser.parse_args()
32 |
33 |
34 | class VoltVarController():
35 | def __init__(self, delta, **env_params):
36 | super(VoltVarController, self).__init__()
37 | self.V_upper = env_params['V_upper']
38 | self.V_lower = env_params['V_lower']
39 | self.delta = delta
40 | self.gen_idx = env_params['gen_idx']
41 | self.S_rating = env_params['S_rating']
42 | self.a = 1/(self.V_upper-1-self.delta/2)
43 |
44 | def forward(self, voltage, P_av):
45 | Q = (self.S_rating**2-P_av**2)**0.5
46 |
47 | # Piece-wise Linear Curve
48 | voltage = voltage[self.gen_idx]
49 | out = np.zeros_like(voltage)
50 |
51 | out[voltage <= self.V_lower] = 1
52 |
53 | idx = (voltage > self.V_lower) & (voltage < 1 - self.delta/2)
54 | out[idx] = 1 - self.a*(voltage[idx]-self.V_lower)
55 |
56 | idx = (voltage > 1 + self.delta/2) & (voltage < self.V_upper)
57 | out[idx] = -self.a*(voltage[idx]-1-self.delta/2)
58 |
59 | out[voltage >= self.V_upper] = -1
60 | return out * Q
61 |
62 | def main():
63 | writer = SummaryWriter(comment = args.exp_name)
64 |
65 | # Create Simulation Environment
66 | if args.network_name == 'ieee37':
67 | env = IEEE37()
68 | else:
69 | print("Not implemented")
70 | n_bus = env.n
71 | env_params = {'V0': env.V0[-env.n_pq:],
72 | 'P0': env.P0[-env.n_pq:],
73 | 'Q0': env.Q0[-env.n_pq:],
74 | 'gen_idx': env.gen_idx, # Including the slack bus
75 | 'V_upper': env.v_upper, 'V_lower': env.v_lower,
76 | 'S_rating': env.max_S,
77 | }
78 |
79 | ## Note: Volt-Var controller considers deviation from 1
80 | controller = VoltVarController(0.04, **env_params)
81 |
82 | # 1-week data
83 | num_steps = 600 # 10 minutes
84 | n_episodes = 7*86400//num_steps
85 |
86 | V_prev = np.ones(n_bus)
87 | V_record = []
88 |
89 | for i in range(n_episodes):
90 | violation_count = 0
91 | for k in range(num_steps):
92 | t = i*num_steps + k
93 | Sbus, P_av = env.getSbus(t, wrt_reference = False, w_slack = True)
94 |
95 | Q = controller.forward(V_prev, P_av = P_av) # at Generation buses
96 |
97 | Sbus.imag[env.gen_idx] += Q
98 |
99 | V, success = env.step(Sbus)
100 | V_prev = V
101 |
102 | if np.any(V>env.v_upper) | np.any(V0):
112 | np.save(f"results/V_{args.exp_name}.npy", np.array(V_record))
113 | <<<<<<< HEAD
114 | =======
115 |
116 | >>>>>>> 5d88b0ccebcea057216087804a12ef2c880e3345
117 | np.save(f"results/V_{args.exp_name}.npy", np.array(V_record))
118 |
119 | if __name__ == '__main__':
120 | main()
121 |
122 | '''
123 | # Example Usage of the environment
124 | t = 10
125 | Sbus = env.getSbus(t)
126 |
127 | # Solve power flow equations
128 | V, success = env.step(Sbus)
129 | print(np.abs(V))
130 | if success == 0:
131 | print("Something is wrong")
132 |
133 | # Estimation using the linearized model
134 | V_est = env.linear_estimate(Sbus)
135 | print(V_est)
136 | '''
137 |
--------------------------------------------------------------------------------
/main_IW.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 |
4 | import gym
5 | import eplus_env
6 |
7 | import warnings
8 | warnings.filterwarnings("ignore", category=UserWarning)
9 |
10 | import argparse
11 | import numpy as np
12 | import pandas as pd
13 | import copy
14 | import pickle
15 | import pdb
16 |
17 | import torch
18 | import torch.nn as nn
19 | import torch.nn.functional as F
20 | import torch.utils.data as data
21 | import torch.optim as optim
22 | from torch.distributions import MultivariateNormal, Normal
23 | from torch.utils.tensorboard import SummaryWriter
24 |
25 | from algo.ppo import PPO
26 | from agents.nn_policy import NeuralController
27 | from utils.network import LSTM
28 | from utils.ppo_utils import make_dict, R_func, Advantage_func, Replay_Memory
29 |
30 | DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
31 | DEVICE
32 |
33 | parser = argparse.ArgumentParser(description='Gnu-RL: Online Learning')
34 | parser.add_argument('--gamma', type=float, default=0.9, metavar='G',
35 | help='discount factor (default: 0.9)')
36 | parser.add_argument('--seed', type=int, default=42, metavar='N',
37 | help='random seed (default: 42)')
38 | parser.add_argument('--lr', type=float, default=5e-4, metavar='G',
39 | help='Learning Rate')
40 | parser.add_argument('--lam', type=int, default=10, metavar='N',
41 | help='random seed (default: 42)')
42 | parser.add_argument('--epsilon', type=float, default=0.2, metavar='G', help='PPO Clip Parameter')
43 | parser.add_argument('--update_episode', type=int, default=4, metavar='N',
44 | help='PPO update episode (default: 1); If -1, do not update weights')
45 | parser.add_argument('--T', type=int, default=12, metavar='N',
46 | help='Planning Horizon (default: 12)')
47 | parser.add_argument('--step', type=int, default=300*3, metavar='N',
48 | help='Time Step in Simulation, Unit in Seconds (default: 900)') # 15 Minutes Now!
49 | parser.add_argument('--exp_name', type=str, default='nn_w_proj',
50 | help='save name')
51 | parser.add_argument('--eta', type=int, default=3,
52 | help='Hyper Parameter for Balancing Comfort and Energy')
53 | parser.add_argument('--model_no', type = int, default = 1800, help = '')
54 | args = parser.parse_args()
55 |
56 |
57 | def main():
58 | torch.manual_seed(args.seed)
59 | writer = SummaryWriter(comment = args.exp_name)
60 |
61 | # Create Simulation Environment
62 | env = gym.make('Eplus-IW-test-v0')
63 |
64 | # Specify variable names for control problem
65 | obs_name = ["Outdoor Temp.", "Outdoor RH", "Wind Speed", "Wind Direction", "Diff. Solar Rad.", "Direct Solar Rad.", "HW Enable OA Setpoint", "IW Average PPD", "HW Supply Setpoint", "Indoor Air Temp.", "Indoor Temp. Setpoint", "Occupancy Flag", "Heating Demand"]
66 | state_name = ["Indoor Air Temp."]
67 | dist_name = ["Outdoor Temp.", "Outdoor RH", "Wind Speed", "Wind Direction", "Diff. Solar Rad.", "Direct Solar Rad.", "Occupancy Flag"]
68 | ctrl_name = ["HW Enable OA Setpoint", "HW Supply Setpoint"]
69 | target_name = ["Indoor Temp. Setpoint"]
70 | dist_name = dist_name + target_name
71 |
72 | n_state = len(state_name)
73 | n_ctrl = 1 #len(ctrl_name)
74 | n_dist = len(dist_name)
75 | eta = [0.1, args.eta] # eta: Weight for comfort during unoccupied and occupied mode
76 | step = args.step # step: Timestep; Unit in seconds
77 | T = args.T # T: Number of timesteps in the planning horizon
78 | tol_eps = 91 # tol_eps: Total number of episodes; Each episode is a natural day
79 |
80 | # Read Information on Weather, Occupancy, and Target Setpoint
81 | obs_2017 = pd.read_pickle("data/data_2017_baseline.pkl")
82 | disturbance = obs_2017[dist_name]
83 | # Min-Max Normalization
84 | obs_TMY3 = pd.read_pickle("data/data_TMY3_baseline.pkl") # For Min-Max Normalization Only
85 | dist_min = obs_TMY3[dist_name].min()
86 | dist_max = obs_TMY3[dist_name].max()
87 | disturbance = (disturbance - dist_min)/(dist_max - dist_min)
88 | state_min = obs_TMY3[state_name].min().values
89 | state_max = obs_TMY3[state_name].max().values
90 | memory = Replay_Memory()
91 |
92 | ## Load pretrained LSTM policy weights
93 | '''
94 | Expects all states, actions, and disturbances are MinMaxNormalized; (Based on TMY3 data)
95 | The LSTM also expects "setpoint" as part of the disturbance term.
96 | '''
97 | network = LSTM(n_state, n_ctrl, n_dist)
98 | network.load_state_dict(torch.load("data/param_IW-nn-{}".format(args.model_no)))
99 |
100 | ## Load thermodynamics model to construct the polytope
101 | '''
102 | New model also expects states, actions, and disturbances to be MinMaxNormalized
103 | '''
104 | model_dict ={'a': np.array([0.934899]),
105 | 'bu': np.array([0.024423]),
106 | 'bd': np.array([5.15795080e-02, -6.92141185e-04, -1.21103548e-02,
107 | 2.38717578e-03, -3.52816030e-03, 3.32528746e-03, 7.19267820e-03]),
108 | 'Pm': 1 # Upper bound of u;
109 | }
110 | policy = NeuralController(T, step, network, RC_flag = False, **model_dict)
111 | agent = PPO(policy, memory, lr = args.lr, clip_param = args.epsilon, lam = args.lam)
112 |
113 | dir = 'results'
114 | if not os.path.exists(dir):
115 | os.mkdir(dir)
116 |
117 | multiplier = 1 # Normalize the reward for better training performance
118 | n_step = 96 #timesteps per day
119 |
120 | sigma = 0.1
121 | sigma_min = 0.01
122 | sigma_step = (sigma-sigma_min) * args.update_episode/tol_eps
123 |
124 | timeStep, obs, isTerminal = env.reset()
125 | start_time = pd.datetime(year = env.start_year, month = env.start_mon, day = env.start_day)
126 | cur_time = start_time
127 | obs_dict = make_dict(obs_name, obs)
128 |
129 | # Save for record
130 | timeStamp = [start_time]
131 | observations = [obs]
132 | actions_taken = []
133 |
134 | for i_episode in range(tol_eps):
135 | ## Save for Parameter Updates
136 | rewards = []
137 | real_rewards = []
138 |
139 | for t in range(n_step):
140 | state = np.array([obs_dict[name] for name in state_name])
141 | state = (state-state_min)/(state_max-state_min)
142 |
143 | x_upper = obs_2017['x_upper'][cur_time : cur_time + pd.Timedelta(seconds = (T-1) * step)].values
144 | x_lower = obs_2017['x_lower'][cur_time : cur_time + pd.Timedelta(seconds = (T-1) * step)].values
145 | ## Margin
146 | #x_lower+=0.025
147 | #x_upper-=0.025
148 |
149 | x_upper = (x_upper-state_min)/(state_max-state_min)
150 | x_lower = (x_lower-state_min)/(state_max-state_min)
151 |
152 | dt = disturbance[cur_time : cur_time + pd.Timedelta(seconds = (T-1) * step)].values # T x n_dist
153 |
154 | ## Update the model in the controller
155 | # CVXPY expects np.array for parameters
156 | agent.policy_old.updateState(state, x_lower = x_lower, x_upper = x_upper, d = dt[:, :-1])
157 | agent.memory.x_lowers.append(torch.tensor(x_lower).float())
158 | agent.memory.x_uppers.append(torch.tensor(x_upper).float())
159 |
160 | state = torch.tensor(state).unsqueeze(0).float() # 1 x n_state
161 | dt = torch.tensor(dt).float()
162 | agent.memory.states.append(state)
163 | agent.memory.disturbance.append(dt)
164 |
165 | ## Use policy_old to select action
166 | mu, sigma_sq, _ = agent.forward(state, dt.unsqueeze(1), current = False) # mu, sigma_sq: T x 1 x Dim.
167 | sigma_sq = torch.ones_like(mu) * sigma**2
168 |
169 | ## Myopic Limit: A hack to make sure the projected actions do not result in tiny violations
170 | margin = 0.1/(state_max-state_min)
171 | u_limits = np.array([x_lower[0]+margin.item(), x_upper[0]-margin.item()]) - model_dict['a'] * state.item() - model_dict['bd'].dot(dt[0, :-1].numpy())
172 | u_limits /= model_dict['bu']
173 | u_limits = np.clip(u_limits, 0, 1)
174 | #pdb.set_trace()
175 | action, old_logprob = agent.select_action(mu[0], sigma_sq[0], u_limits = u_limits)
176 | agent.memory.actions.append(action.detach().clone())
177 | agent.memory.old_logprobs.append(old_logprob.detach().clone())
178 |
179 | SWT = 20 + 45 * action.item()
180 | if (SWT<30):
181 | HWOEN = -30 # De Facto Off
182 | action = torch.zeros_like(action)
183 | SWT = 20
184 | else:
185 | HWOEN = 30 # De Facto On
186 | if np.isnan(SWT):
187 | SWT = 20
188 | action4env = (HWOEN, SWT)
189 |
190 | # Before step
191 | print(f'{cur_time}: IAT={obs_dict["Indoor Air Temp."]}, Occupied={obs_dict["Occupancy Flag"]}, Control={SWT}')
192 | for _ in range(3):
193 | timeStep, obs, isTerminal = env.step(action4env)
194 |
195 | obs_dict = make_dict(obs_name, obs)
196 | reward = R_func(obs_dict, SWT-20, eta)
197 |
198 | # Per step
199 | real_rewards.append(reward)
200 | bl = 0#obs_2017['rewards'][cur_time]
201 | rewards.append((reward-bl) / 15) # multiplier
202 | # print(f'Reward={reward}, BL={bl}')
203 | # Save for record
204 | cur_time = start_time + pd.Timedelta(seconds = timeStep)
205 | timeStamp.append(cur_time)
206 | observations.append(obs)
207 | actions_taken.append(action4env)
208 |
209 | writer.add_scalar('Reward', np.mean(real_rewards), i_episode)
210 | writer.add_scalar('Reward_Diff', np.mean(rewards), i_episode)
211 | print("{}, reward: {}".format(cur_time, np.mean(real_rewards)))
212 |
213 | advantages = Advantage_func(rewards, args.gamma)
214 | agent.memory.advantages.append(advantages)
215 | # if -1, do not update parameters
216 | if args.update_episode == -1:
217 | agent.memory.clear_memory()
218 | elif (i_episode >0) & (i_episode % args.update_episode ==0):
219 | agent.update_parameters(sigma = sigma, K = 8)
220 | sigma = max(sigma_min, sigma-sigma_step)
221 |
222 | obs_df = pd.DataFrame(np.array(observations), index = np.array(timeStamp), columns = obs_name)
223 | obs_df = obs_df.drop(columns=ctrl_name)
224 | action_df = pd.DataFrame(np.array(actions_taken), index = np.array(timeStamp[:-1]), columns = ctrl_name)
225 | obs_df = obs_df.merge(action_df, how = 'left', right_index = True, left_index = True)
226 | obs_df.to_pickle("results/obs_"+args.exp_name+".pkl")
227 |
228 | if __name__ == '__main__':
229 | main()
230 |
--------------------------------------------------------------------------------
/main_inverter.py:
--------------------------------------------------------------------------------
1 | import os, sys, argparse
2 |
3 | import numpy as np
4 | import torch
5 | import torch.nn as nn
6 | import torch.optim as optim
7 | from torch.utils.tensorboard import SummaryWriter
8 |
9 | from env.inverter import IEEE37
10 |
11 | from algo.ppo import PPO
12 | from agents.inverter_policy import Net, NeuralController
13 | from utils.inverter_utils import Replay_Memory
14 |
15 |
16 | import pdb
17 |
18 | import torch
19 | DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
20 | DEVICE
21 |
22 | parser = argparse.ArgumentParser(description='GnuRL Demo: Online Learning')
23 | parser.add_argument('--gamma', type=float, default=0.98, metavar='G',
24 | help='discount factor (default: 0.98)')
25 | parser.add_argument('--seed', type=int, default=42, metavar='N',
26 | help='random seed (default: 42)')
27 | parser.add_argument('--lam', type=int, default=10, metavar='N',
28 | help='random seed (default: 42)')
29 | parser.add_argument('--lr', type=float, default=1e-3, metavar='G',
30 | help='Learning Rate')
31 | parser.add_argument('--epsilon', type=float, default=0.2, metavar='G', help='PPO Clip Parameter')
32 | parser.add_argument('--update_episode', type=int, default=4, metavar='N',
33 | help='PPO update episode (default: 1); If -1, do not update weights')
34 | parser.add_argument('--exp_name', type=str, default='inverter',
35 | help='save name')
36 | parser.add_argument('--network_name', type=str, default='ieee37',
37 | help='')
38 | args = parser.parse_args()
39 |
40 |
41 | def main():
42 | torch.manual_seed(args.seed)
43 | writer = SummaryWriter(comment = args.exp_name)
44 |
45 | # Create Simulation Environment
46 | if args.network_name == 'ieee37':
47 | env = IEEE37()
48 | else:
49 | print("Not implemented")
50 |
51 | n_bus = env.n - 1
52 | n_inverters = len(env.gen_idx) # inverters at PV panels
53 |
54 | env_params = {'V0': env.V0[-env.n_pq:],
55 | 'P0': env.P0[-env.n_pq:],
56 | 'Q0': env.Q0[-env.n_pq:],
57 | 'H': np.hstack([env.R, env.B]), # 35 x 70
58 | 'n_bus':n_bus, # Slack bus is not controllable
59 | 'gen_idx': env.gen_idx - 1, # Excluded the slack bus
60 | 'V_upper': env.v_upper, 'V_lower': env.v_lower,
61 | 'S_rating': env.max_S,
62 | }
63 | scaler = 1000 # Note: The value for Sbus is really small; Scale up for better learning
64 |
65 | mbp_nn = Net(n_bus, n_inverters, [256, 128, 64], [16, 4])
66 | memory = Replay_Memory()
67 | mbp_policy = NeuralController(mbp_nn, memory, args.lr, lam = args.lam, scaler = scaler, **env_params)
68 | mbp_policy = mbp_policy.to(DEVICE)
69 |
70 | # 1-week data
71 | num_steps = 900 # 15 minutes
72 | n_episodes = 7*86400//num_steps
73 |
74 | V_prev = np.zeros(n_bus)
75 |
76 | V_record = []
77 | V_est_record = []
78 | P_record = []
79 | Q_record = []
80 |
81 | for i in range(n_episodes):
82 | loss = 0
83 | violation_count = 0
84 |
85 | for k in range(num_steps):
86 | t = i*num_steps + k
87 | Sbus, P_av = env.getSbus(t)
88 | Sbus *= scaler
89 | state = np.concatenate([V_prev, np.real(Sbus), np.imag(Sbus)])
90 | mbp_policy.memory.append((state, Sbus, P_av)) ## Everything is np.array!
91 |
92 | state = torch.tensor(state).float().unsqueeze(0)
93 |
94 | P, Q = mbp_policy(state, Sbus, P_av = P_av)
95 | #pdb.set_trace()
96 |
97 | V, success = env.step(P + 1j*Q)
98 | V_prev = V[1:]
99 |
100 | if np.any(V>env.v_upper) | np.any(V0):
113 | mbp_policy.update()
114 |
115 | writer.add_scalar("Loss", loss.mean().item(), i)
116 | writer.add_scalar("violations", violation_count, i)
117 | ## Number of Projection operation during inference time
118 | writer.add_scalar("proj_count", mbp_policy.proj_count, i)
119 | mbp_policy.proj_count = 0
120 |
121 | if (i % 20 ==0) & (i>0):
122 | np.save(f"results/V_{args.exp_name}.npy", np.array(V_record))
123 | np.save(f"results/P_{args.exp_name}.npy", np.array(P_record))
124 | np.save(f"results/Q_{args.exp_name}.npy", np.array(Q_record))
125 |
126 | np.save(f"results/V_{args.exp_name}.npy", np.array(V_record))
127 | np.save(f"results/P_{args.exp_name}.npy", np.array(P_record))
128 | np.save(f"results/Q_{args.exp_name}.npy", np.array(Q_record))
129 |
130 | if __name__ == '__main__':
131 | main()
132 |
133 | '''
134 | # Example Usage of the environment
135 | t = 10
136 | Sbus = env.getSbus(t)
137 |
138 | # Solve power flow equations
139 | V, success = env.step(Sbus)
140 | print(np.abs(V))
141 | if success == 0:
142 | print("Something is wrong")
143 |
144 | # Estimation using the linearized model
145 | V_est = env.linear_estimate(Sbus)
146 | print(V_est)
147 | '''
148 |
--------------------------------------------------------------------------------
/mypypower/newtonpf.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 1996-2015 PSERC. All rights reserved.
2 | # Use of this source code is governed by a BSD-style
3 | # license that can be found in the LICENSE file.
4 |
5 | """Solves the power flow using a full Newton's method.
6 | """
7 |
8 | import sys
9 |
10 | from numpy import array, angle, exp, linalg, conj, r_, Inf
11 |
12 | #from numpy import hstack, vstack
13 | from scipy.sparse import hstack, vstack
14 | from scipy.sparse.linalg import spsolve
15 |
16 | from pypower.dSbus_dV import dSbus_dV
17 | from pypower.ppoption import ppoption
18 |
19 | import pdb
20 |
21 | def newtonpf(Ybus, Sbus, V0, ref, pv, pq, ppopt=None):
22 | """Solves the power flow using a full Newton's method.
23 |
24 | Solves for bus voltages given the full system admittance matrix (for
25 | all buses), the complex bus power injection vector (for all buses),
26 | the initial vector of complex bus voltages, and column vectors with
27 | the lists of bus indices for the swing bus, PV buses, and PQ buses,
28 | respectively. The bus voltage vector contains the set point for
29 | generator (including ref bus) buses, and the reference angle of the
30 | swing bus, as well as an initial guess for remaining magnitudes and
31 | angles. C{ppopt} is a PYPOWER options vector which can be used to
32 | set the termination tolerance, maximum number of iterations, and
33 | output options (see L{ppoption} for details). Uses default options if
34 | this parameter is not given. Returns the final complex voltages, a
35 | flag which indicates whether it converged or not, and the number of
36 | iterations performed.
37 |
38 | @see: L{runpf}
39 |
40 | @author: Ray Zimmerman (PSERC Cornell)
41 | """
42 | ## default arguments
43 | if ppopt is None:
44 | ppopt = ppoption()
45 |
46 | ## options
47 | tol = ppopt['PF_TOL']
48 | max_it = ppopt['PF_MAX_IT']
49 | verbose = ppopt['VERBOSE']
50 |
51 | ## initialize
52 | converged = 0
53 | i = 0
54 | V = V0
55 | Va = angle(V)
56 | Vm = abs(V)
57 |
58 | ## set up indexing for updating V
59 | pvpq = r_[pv, pq]
60 | npv = len(pv)
61 | npq = len(pq)
62 | j1 = 0; j2 = npv ## j1:j2 - V angle of pv buses
63 | j3 = j2; j4 = j2 + npq ## j3:j4 - V angle of pq buses
64 | j5 = j4; j6 = j4 + npq ## j5:j6 - V mag of pq buses
65 |
66 | ## evaluate F(x0)
67 | mis = V * conj(Ybus * V) - Sbus
68 | if npv:
69 | F = r_[ mis[pv].real,
70 | mis[pq].real,
71 | mis[pq].imag ]
72 | else:
73 | F = r_[
74 | mis[pq].real,
75 | mis[pq].imag ]
76 |
77 |
78 | ## check tolerance
79 | normF = linalg.norm(F, Inf)
80 | if verbose > 1:
81 | sys.stdout.write('\n it max P & Q mismatch (p.u.)')
82 | sys.stdout.write('\n---- ---------------------------')
83 | sys.stdout.write('\n%3d %10.3e' % (i, normF))
84 | if normF < tol:
85 | converged = 1
86 | if verbose > 1:
87 | sys.stdout.write('\nConverged!\n')
88 |
89 | ## do Newton iterations
90 | while (not converged and i < max_it):
91 | ## update iteration counter
92 | i = i + 1
93 |
94 | ## evaluate Jacobian
95 | dS_dVm, dS_dVa = dSbus_dV(Ybus, V)
96 | # pdb.set_trace()
97 | J11 = dS_dVa[array([pvpq]).T, pvpq].real
98 | J12 = dS_dVm[array([pvpq]).T, pq].real
99 | J21 = dS_dVa[array([pq]).T, pvpq].imag
100 | J22 = dS_dVm[array([pq]).T, pq].imag
101 | #pdb.set_trace()
102 | J = vstack([hstack([J11, J12]),hstack([J21, J22])], format="csr")
103 |
104 | ## compute update step
105 | dx = -1 * spsolve(J, F)
106 | # pdb.set_trace()
107 | ## update voltage
108 | if npv:
109 | Va[pv] = Va[pv] + dx[j1:j2]
110 | if npq:
111 | Va[pq] = Va[pq] + dx[j3:j4]
112 | Vm[pq] = Vm[pq] + dx[j5:j6]
113 | V = Vm * exp(1j * Va)
114 | Vm = abs(V) ## update Vm and Va again in case
115 | Va = angle(V) ## we wrapped around with a negative Vm
116 |
117 | ## evalute F(x)
118 | mis = V * conj(Ybus * V) - Sbus
119 | if npv:
120 | F = r_[ mis[pv].real,
121 | mis[pq].real,
122 | mis[pq].imag ]
123 | else:
124 | F = r_[
125 | mis[pq].real,
126 | mis[pq].imag ]
127 |
128 | ## check for convergence
129 | normF = linalg.norm(F, Inf)
130 | if verbose > 1:
131 | sys.stdout.write('\n%3d %10.3e' % (i, normF))
132 | if normF < tol:
133 | converged = 1
134 | if verbose:
135 | sys.stdout.write("\nNewton's method power flow converged in "
136 | "%d iterations.\n" % i)
137 |
138 | if verbose:
139 | if not converged:
140 | sys.stdout.write("\nNewton's method power did not converge in %d "
141 | "iterations.\n" % i)
142 |
143 | return V, converged, i
144 |
--------------------------------------------------------------------------------
/network/IEEE-37/Ybus.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/INFERLab/PROF/be7f77f606d8c7d6505d4b2bad2d09760e9bafe9/network/IEEE-37/Ybus.mat
--------------------------------------------------------------------------------
/network/IEEE-37_linearized/B.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/INFERLab/PROF/be7f77f606d8c7d6505d4b2bad2d09760e9bafe9/network/IEEE-37_linearized/B.mat
--------------------------------------------------------------------------------
/network/IEEE-37_linearized/R.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/INFERLab/PROF/be7f77f606d8c7d6505d4b2bad2d09760e9bafe9/network/IEEE-37_linearized/R.mat
--------------------------------------------------------------------------------
/network/bracket.m:
--------------------------------------------------------------------------------
1 | function BRX = bracket(X)
2 |
3 | BRX = [real(X), -imag(X); imag(X), real(X)];
4 |
5 |
--------------------------------------------------------------------------------
/network/extract_phase_37feeder.m:
--------------------------------------------------------------------------------
1 | function [P,Q,Y] = extract_phase_37feeder(phase,Zbase,Sbase)
2 |
3 | Nnode = 36;
4 | Ybase = 1/Zbase;
5 | %25 loads
6 | P_l = [0 0 0;
7 | 140 140 350;
8 | 0 0 0;
9 | 0 0 0;
10 | 0 0 85;
11 | 8 85 0;
12 | 0 0 85;
13 | 0 0 0;
14 | 17 21 0;
15 | 85 0 0;
16 | 0 0 85;
17 | 0 0 0;
18 | 0 42 0;
19 | 0 140 21;
20 | 0 0 0;
21 | 0 42 0;
22 | 0 0 0;
23 | 0 0 42;
24 | 42 0 0;
25 | 42 0 0;
26 | 42 42 42;
27 | 0 0 85;
28 | 0 0 0;
29 | 0 85 0;
30 | 0 0 0;
31 | 0 0 42;
32 | 85 0 0;
33 | 0 0 42;
34 | 140 0 0;
35 | 126 0 0;
36 | 0 0 0;
37 | 0 0 42;
38 | 0 0 85;
39 | 0 0 0;
40 | 0 42 0;
41 | 0 0 85].';
42 |
43 | Q_l = [0 0 0;
44 | 70 70 175;
45 | 0 0 0;
46 | 0 0 0;
47 | 0 0 40;
48 | 4 40 0;
49 | 0 0 40;
50 | 0 0 0;
51 | 8 10 0;
52 | 40 0 0;
53 | 0 0 40;
54 | 0 0 0;
55 | 0 21 0;
56 | 0 70 10;
57 | 0 0 0;
58 | 0 21 0;
59 | 0 0 0;
60 | 0 0 21;
61 | 21 0 0;
62 | 21 0 0;
63 | 21 21 21 ;
64 | 0 0 40;
65 | 0 0 0;
66 | 0 40 0;
67 | 0 0 0;
68 | 0 0 21;
69 | 40 0 0;
70 | 0 0 21;
71 | 70 0 0;
72 | 62 0 0;
73 | 0 0 0;
74 | 0 0 21;
75 | 0 0 40;
76 | 0 0 0;
77 | 0 21 0;
78 | 0 0 40].';
79 |
80 |
81 |
82 | %------------------------------------------------------------------------
83 | % impedance matrix
84 | %------------------------------------------------------------------------
85 |
86 | % Configuration 721
87 | Zs1 = [0.2926+0.1973i 0.0673-0.0368i 0.0337-0.0417i;
88 | 0.0673-0.0368i 0.2646+0.1900i 0.0673-0.0368i;
89 | 0.0337-0.0417i 0.0673-0.0368i 0.2926+0.1973i]./Zbase;
90 | Ys1 = sqrt(-1)*159.7919*(10^-6).*eye(3)./Ybase;
91 |
92 | % Configuration 722
93 | Zs2 = [0.4751+0.2973i 0.1629-0.0326i 0.1234-0.0607i;
94 | 0.1629-0.0326i 0.4488+0.2678i 0.1629-0.0326i;
95 | 0.1234-0.0607i 0.1629-0.0326i 0.4751+0.2973i]./Zbase;
96 | Ys2 = sqrt(-1)*127.8306*(10^-6).*eye(3)./Ybase;
97 |
98 | % Configuration 723
99 | Zs3 = [1.2936+0.6713i 0.4871+0.2111i 0.4585+0.1521i;
100 | 0.4871+0.2111i 1.3022+0.6326i 0.4871+0.2111i;
101 | 0.4585+0.1521i 1.2936+0.6713i 1.2936+0.6713i]./Zbase;
102 | Ys3 = sqrt(-1)*74.8405*(10^-6).*eye(3)./Ybase;
103 |
104 | % Configuration 724
105 | Zs4 = [2.0952+0.7758i 0.5204+0.2738i 0.4926+0.2123i;
106 | 0.5204+0.2738i 2.1068+0.7398i 0.5204+0.2738i;
107 | 0.4926+0.2123i 0.5204+0.2738i 2.0952+0.7758i]./Zbase;
108 | Ys4 = sqrt(-1)*60.2483*(10^-6).*eye(3)./Ybase;
109 |
110 |
111 | %--------------------
112 | % line matrices
113 | %--------------------
114 |
115 | % mile = 5280 feet
116 | convfm = (1/5280);
117 |
118 | Z12 = Zs1*(1850)*convfm;
119 | Z12i = pinv(Z12);
120 | Y12 = .5.*Ys1*(1850)*convfm;
121 |
122 | Z23 = Zs2*(960)*convfm;
123 | Z23i = pinv(Z23);
124 | Y23 = .5.*Ys2*(960)*convfm;
125 |
126 | Z34 = Zs4*(400)*convfm;
127 | Z34i = pinv(Z34);
128 | Y34 = .5.*Ys4*(400)*convfm;
129 |
130 | Z45 = Zs4*(240)*convfm;
131 | Z45i = pinv(Z45);
132 | Y45 = .5.*Ys4*(240)*convfm;
133 |
134 | Z46 = Zs4*(320)*convfm;
135 | Z46i = pinv(Z46);
136 | Y46 = .5.*Ys4*(320)*convfm;
137 |
138 | Z37 = Zs3*(360)*convfm;
139 | Z37i = pinv(Z37);
140 | Y37 = .5.*Ys3*(360)*convfm;
141 |
142 | Z78 = Zs3*(520)*convfm;
143 | Z78i = pinv(Z78);
144 | Y78 = .5.*Ys3*(520)*convfm;
145 |
146 | Z89 = Zs4*(80)*convfm;
147 | Z89i = pinv(Z89);
148 | Y89 = .5.*Ys4*(80)*convfm;
149 |
150 | Z910 = Zs4*(520)*convfm;
151 | Z910i = pinv(Z910);
152 | Y910 = .5.*Ys4*(520)*convfm;
153 |
154 | Z811 = Zs3*(800)*convfm;
155 | Z811i = pinv(Z811);
156 | Y811 = .5.*Ys3*(800)*convfm;
157 |
158 | Z1112 = Zs4*(920)*convfm;
159 | Z1112i = pinv(Z1112);
160 | Y1112 = .5.*Ys4*(920)*convfm;
161 |
162 | Z1213 = Zs4*(760)*convfm;
163 | Z1213i = pinv(Z1213);
164 | Y1213 = .5.*Ys4*(760)*convfm;
165 |
166 | Z1214 = Zs4*(120)*convfm;
167 | Z1214i = pinv(Z1214);
168 | Y1214 = .5.*Ys4*(120)*convfm;
169 |
170 | Z1115 = Zs3*(600)*convfm;
171 | Z1115i = pinv(Z1115);
172 | Y1115 = .5.*Ys3*(600)*convfm;
173 |
174 | Z1516 = Zs4*(280)*convfm;
175 | Z1516i = pinv(Z1516);
176 | Y1516 = .5.*Ys4*(280)*convfm;
177 |
178 | Z317 = Zs2*(1320)*convfm;
179 | Z317i = pinv(Z317);
180 | Y317 = .5.*Ys2*(1320)*convfm;
181 |
182 | Z1722 = Zs3*(600)*convfm;
183 | Z1722i = pinv(Z1722);
184 | Y1722 = .5.*Ys3*(600)*convfm;
185 |
186 | Z2223 = Zs3*(200)*convfm;
187 | Z2223i = pinv(Z2223);
188 | Y2223 = .5.*Ys3*(200)*convfm;
189 |
190 | Z1718 = Zs4*(240)*convfm;
191 | Z1718i = pinv(Z1718);
192 | Y1718 = .5.*Ys4*(240)*convfm;
193 |
194 | Z1819 = Zs3*(280)*convfm;
195 | Z1819i = pinv(Z1819);
196 | Y1819 = .5.*Ys3*(280)*convfm;
197 |
198 | Z1920 = Zs4*(280)*convfm;
199 | Z1920i = pinv(Z1920);
200 | Y1920 = .5.*Ys4*(280)*convfm;
201 |
202 | Z1921 = Zs4*(200)*convfm;
203 | Z1921i = pinv(Z1921);
204 | Y1921 = .5.*Ys4*(200)*convfm;
205 |
206 | Z2324 = Zs3*(600)*convfm;
207 | Z2324i = pinv(Z2324);
208 | Y2324 = .5.*Ys3*(600)*convfm;
209 |
210 | Z2325 = Zs3*(320)*convfm;
211 | Z2325i = pinv(Z2325);
212 | Y2325 = .5.*Ys3*(320)*convfm;
213 |
214 | Z2526 = Zs4*(320)*convfm;
215 | Z2526i = pinv(Z2526);
216 | Y2526 = .5.*Ys4*(320)*convfm;
217 |
218 | Z2527 = Zs3*(320)*convfm;
219 | Z2527i = pinv(Z2527);
220 | Y2527 = .5.*Ys3*(320)*convfm;
221 |
222 | Z2728 = Zs3*(560)*convfm;
223 | Z2728i = pinv(Z2728);
224 | Y2728 = .5.*Ys3*(560)*convfm;
225 |
226 | Z2829 = Zs3*(640)*convfm;
227 | Z2829i = pinv(Z2829);
228 | Y2829 = .5.*Ys3*(640)*convfm;
229 |
230 | Z2930 = Zs3*(400)*convfm;
231 | Z2930i = pinv(Z2930);
232 | Y2930 = .5.*Ys3*(400)*convfm;
233 |
234 | Z3031 = Zs3*(400)*convfm;
235 | Z3031i = pinv(Z3031);
236 | Y3031 = .5.*Ys3*(400)*convfm;
237 |
238 | Z3132 = Zs3*(400)*convfm;
239 | Z3132i = pinv(Z3132);
240 | Y3132 = .5.*Ys3*(400)*convfm;
241 |
242 | Z3133 = Zs4*(200)*convfm;
243 | Z3133i = pinv(Z3133);
244 | Y3133 = .5.*Ys4*(200)*convfm;
245 |
246 | Z2834 = Zs4*(520)*convfm;
247 | Z2834i = pinv(Z2834);
248 | Y2834 = .5.*Ys4*(520)*convfm;
249 |
250 | Z3435 = Zs4*(1280)*convfm;
251 | Z3435i = pinv(Z3435);
252 | Y3435 = .5.*Ys4*(1280)*convfm;
253 |
254 | Z3436 = Zs4*(200)*convfm;
255 | Z3436i = pinv(Z3436);
256 | Y3436 = .5.*Ys4*(200)*convfm;
257 |
258 |
259 | % network admittance matrix
260 | oo = zeros(3);
261 |
262 | Y_net = [Z12i+Y12 -Z12i zeros(3,3*(Nnode-2));
263 | -Z12i Z12i+Z23i+Y12+Y23 -Z23i zeros(3,3*(Nnode-3));
264 | oo -Z23i Z23i+Z34i+Z37i+Z317i+Y23+Y34+Y37+Y317 -Z34i oo oo -Z37i oo oo oo oo oo oo oo oo oo -Z317i zeros(3,3*(Nnode-17));
265 | oo oo -Z34i Z34i+Z45i+Z46i+Y34+Y45+Y46 -Z45i -Z46i zeros(3,3*(Nnode-6));
266 | oo oo oo -Z45i Z45i+Y45 zeros(3,3*(Nnode-5));
267 | oo oo oo -Z46i oo Z46i+Y46 zeros(3,3*(Nnode-6));
268 | oo oo -Z37i oo oo oo Z37i+Z78i+Y37+Y78 -Z78i zeros(3,3*(Nnode-8));
269 | oo oo oo oo oo oo -Z78i Z78i+Y78+Z89i+Y89+Z811i+Y811 -Z89i oo -Z811i zeros(3,3*(Nnode-11));
270 | zeros(3,3*7) -Z89i Z89i+Y89+Z910i+Y910 -Z910i zeros(3,3*(Nnode-10));
271 | zeros(3,3*8) -Z910i Z910i+Y910 zeros(3,3*(Nnode-10));
272 | zeros(3,3*7) -Z811i oo oo Z811i+Y811+Z1112i+Y1112+Z1115i+Y1115 -Z1112i oo oo -Z1115i zeros(3,3*(Nnode-15));
273 | zeros(3,3*10) -Z1112i Z1112i+Z1213i+Z1214i+Y1112+Y1213+Y1214 -Z1213i -Z1214i zeros(3,3*(Nnode-14));
274 | zeros(3,3*11) -Z1213i Z1213i+Y1213 zeros(3,3*(Nnode-13));
275 | zeros(3,3*11) -Z1214i oo Z1214i+Y1214 zeros(3,3*(Nnode-14))
276 | zeros(3,3*10) -Z1115i oo oo oo Z1115i+Z1516i+Y1115+Y1516 -Z1516i zeros(3,3*(Nnode-16));
277 | zeros(3,3*14) -Z1516i Z1516i+Y1516 zeros(3,3*(Nnode-16))
278 | oo oo -Z317i zeros(3,3*13) Z317i+Y317+Z1718i+Y1718+Z1722i+Y1722 -Z1718i oo oo oo -Z1722i zeros(3,3*(Nnode-22));
279 | zeros(3,3*16) -Z1718i Z1718i+Y1718+Z1819i+Y1819 -Z1819i zeros(3,3*(Nnode-19));
280 | zeros(3,3*17) -Z1819i Z1819i+Y1819+Z1920i+Y1920+Z1921i+Y1921 -Z1920i -Z1921i zeros(3,3*(Nnode-21));
281 | zeros(3,3*18) -Z1920i Z1920i+Y1920 zeros(3,3*(Nnode-20));
282 | zeros(3,3*18) -Z1921i oo Z1921i+Y1921 zeros(3,3*(Nnode-21));
283 | zeros(3,3*16) -Z1722i oo oo oo oo Z1722i+Y1722+Z2223i+Y2223 -Z2223i zeros(3,3*(Nnode-23));
284 | zeros(3,3*21) -Z2223i Z2223i+Y2223+Z2324i+Y2324+Z2325i+Y2325 -Z2324i -Z2325i zeros(3,3*(Nnode-25));
285 | zeros(3,3*22) -Z2324i Z2324i+Y2324 zeros(3,3*(Nnode-24));
286 | zeros(3,3*22) -Z2325i oo Z2325i+Y2325+Z2526i+Y2526+Z2527i+Y2527 -Z2526i -Z2527i zeros(3,3*(Nnode-27));
287 | zeros(3,3*24) -Z2526i Z2526i+Y2526 zeros(3,3*(Nnode-26));
288 | zeros(3,3*24) -Z2527i oo Z2527i+Y2527+Z2728i+Y2728 -Z2728i zeros(3,3*(Nnode-28));
289 | zeros(3,3*26) -Z2728i Z2728i+Y2728+Z2829i+Y2829+Z2834i+Y2834 -Z2829i oo oo oo oo -Z2834i zeros(3,3*(Nnode-34));
290 | zeros(3,3*27) -Z2829i Z2829i+Y2829+Z2930i+Y2930 -Z2930i zeros(3,3*(Nnode-30));
291 | zeros(3,3*28) -Z2930i Z2930i+Y2930+Z3031i+Y3031 -Z3031i zeros(3,3*(Nnode-31));
292 | zeros(3,3*29) -Z3031i Z3031i+Y3031+Z3132i+Y3132+Z3133i+Y3133 -Z3132i -Z3133i zeros(3,3*(Nnode-33))
293 | zeros(3,3*30) -Z3132i Z3132i+Y3132 zeros(3,3*(Nnode-32));
294 | zeros(3,3*30) -Z3133i oo Z3133i+Y3133 zeros(3,3*(Nnode-33));
295 | zeros(3,3*27) -Z2834i zeros(3,3*5) Z2834i+Y2834+Z3435i+Y3435+Z3436i+Y3436 -Z3435i -Z3436i;
296 | zeros(3,3*33) -Z3435i Z3435i+Y3435 oo;
297 | zeros(3,3*33) -Z3436i oo Z3436i+Y3436];
298 |
299 |
300 | P = P_l(phase,:);
301 | Q = Q_l(phase,:);
302 | Y = Y_net(phase:3:end,phase:3:end);
303 |
304 | fac = Sbase/1000;
305 |
306 | P = P./fac;
307 | Q = Q./fac;
308 |
309 |
310 | end
311 |
--------------------------------------------------------------------------------
/network/ieee37.m:
--------------------------------------------------------------------------------
1 | % This code is modified from to linearize IEEE 37-bus feeder system.
2 | %
3 | % S. Bolognani, F. Dörfler (2015)
4 | % "Fast power system analysis via implicit linearization of the power flow manifold."
5 | % In Proc. 53rd Annual Allerton Conference on Communication, Control, and Computing.
6 | % Preprint available at http://control.ee.ethz.ch/~bsaverio/papers/BolognaniDorfler_Allerton2015.pdf
7 | %
8 | % This source code is distributed in the hope that it will be useful, but without any warranty.
9 | %
10 | % MatLab OR GNU Octave, version 3.8.1 available at http://www.gnu.org/software/octave/
11 | % MATPOWER 5.1 available at http://www.pserc.cornell.edu/matpower/
12 |
13 | clear all
14 | close all
15 | clc
16 |
17 | % Load grid model
18 | %Vbase = 4160/sqrt(3);
19 | %Sbase = 5e6;
20 | %Zbase = Vbase^2/Sbase;
21 | Zbase = 1;
22 | Vbase = 4800;
23 | Sbase = (Vbase^2)/Zbase;
24 |
25 | phase = 1;
26 |
27 | [Pbus, Qbus, Ybus] = extract_phase_37feeder(phase, Zbase, Sbase);
28 | Sbus = complex(Pbus, Qbus);
29 | n = size(Ybus, 1);
30 | %%
31 | % Compute exact solution via MatPower
32 | ref_idx = [1];
33 | pv_idx = [];%[4, 7, 9. 10, 11, 13, 16, 17, 20, 22, 23, 26, 28, 29, 30, 31, 32, 33, 34, 35, 36];
34 | pq_idx =[2:36];%[2, 3, 5, 6, 8, 12, 14, 15, 16, 18, 19, 21, 24, 25, 27];
35 | V0 = ones(n,1);
36 | %[results, success, i] = gausspf(Ybus, Sbus, V0, ref_idx, pv_idx, pq_idx, mpoption('VERBOSE', 1, 'OUT_ALL',0));
37 |
38 | %%
39 | %%%%% LINEARIZED MODEL %%%%%
40 |
41 | %%%%% Linearization point (given voltage magnitude and angle)
42 | %Vbus = NaN(n,1);
43 | %Vbus(mpc.gen(:,GEN_BUS)) = mpc.gen(:,VG);
44 | Vbus = ones(n,1);
45 |
46 | % Flat voltage profile
47 | V0 = ones(n,1);
48 | A0 = zeros(n,1);
49 |
50 | % Corresponding current injection
51 | J0 = Ybus*(V0.*exp(1j*A0));
52 |
53 | % Corresponding power injection
54 | S0 = V0.*exp(1j*A0).*conj(J0);
55 | P0 = real(S0);
56 | Q0 = imag(S0);
57 |
58 | %%%%% Linear system of equations for the grid model
59 |
60 | UU = bracket(diag(V0.*exp(1j*A0)));
61 | JJ = bracket(diag(conj(J0)));
62 | NN = Nmatrix(2*n);
63 | YY = bracket(Ybus);
64 | PP = Rmatrix(ones(n,1), zeros(n,1));
65 |
66 | AA = zeros(2*n,4*n);
67 | BB = zeros(2*n,1);
68 |
69 | V_OFFSET = 0;
70 | A_OFFSET = 1*n;
71 | P_OFFSET = 2*n;
72 | Q_OFFSET = 3*n;
73 |
74 | % bus models
75 |
76 | for bus = 1:n
77 | row = 2*(bus-1)+1;
78 | if (any(bus == pq_idx(:)))
79 | AA(row,P_OFFSET+bus) = 1;
80 | AA(row+1,Q_OFFSET+bus) = 1;
81 | BB(row) = Pbus(bus) - P0(bus);
82 | BB(row+1) = Qbus(bus) - Q0(bus);
83 | elseif (any(bus == pv_idx(:)))
84 | AA(row,P_OFFSET+bus) = 1;
85 | AA(row+1,V_OFFSET+bus) = 1;
86 | BB(row) = Pbus(bus) - P0(bus);
87 | BB(row+1) = Vbus(bus) - V0(bus);
88 | elseif (any(bus == ref_idx(:)))
89 | AA(row,V_OFFSET+bus) = 1;
90 | AA(row+1,A_OFFSET+bus) = 1;
91 | BB(row) = Vbus(bus) - V0(bus);
92 | BB(row+1) = 0 - A0(bus);
93 | end
94 | end
95 |
96 | Agrid = [(JJ + UU*NN*YY)*PP -eye(2*n)];
97 | Amat = [Agrid; AA];
98 | Bmat = [zeros(2*n,1); BB];
99 |
100 | x = Amat\Bmat;
101 |
102 | approxVM = V0 + x(1:n);
103 | approxVA = (A0 + x(n+1:2*n))/pi*180;
104 |
105 |
106 | %%
107 | % Check my implementation is correct
108 | A11 = (JJ + UU*NN*YY)*PP;
109 | A21 = AA(:, 1:2*n);
110 | A22 = AA(:, 2*n+1:4*n);
111 |
112 | n_new = n-1;
113 |
114 | delta_P = reshape(Pbus(2:end), n_new, 1)-P0(2:end);
115 | delta_Q = reshape(Qbus(2:end), n_new, 1)-Q0(2:end);
116 |
117 | % remove the first bus;
118 | A11(n+1, :) = [];
119 | A11(:, n+1) = [];
120 | A11(1, :) = [];
121 | A11(:, 1) = [];
122 |
123 | x_hat = inv(A11) * [delta_P; delta_Q];
124 | %x_hat = pinv([A11; A21]) * ([eye(2*n); -A22] * [Pbus.'; Qbus.'] + Bmat);
125 |
126 | myVM = V0(2:end) + x_hat(1:n_new);
127 | myVA = (A0(2:end) + x_hat(n_new+1:2*n_new))/pi*180;
128 |
129 | subplot(211)
130 | %plot(1:n, approxVM, 'k*')
131 | plot(2:n, myVM(1:end), 'ko', 1:n, approxVM, 'k*')
132 | %plot(1:n, results.bus(:,VM), 'ko', 1:n, approxVM, 'k*')
133 | ylabel('magnitudes [p.u.]')
134 | xlim([0 n])
135 |
136 | subplot(212)
137 | %plot(1:n, approxVA, 'k*')
138 | plot(2:n, myVA, 'ko', 1:n, approxVA, 'k*')
139 | %plot(1:n, results.bus(:,VA), 'ko', 1:n, approxVA, 'k*')
140 | %ylabel('angles [deg]')
141 | xlim([0 n])
142 | %%
143 | H = inv(A11);
144 | R = H(1:n_new, 1:n_new);
145 | B = H(1:n_new, n_new+1:2*n_new);
146 |
147 |
148 |
--------------------------------------------------------------------------------
/run_exp1.sh:
--------------------------------------------------------------------------------
1 |
2 | python main_IW.py --exp_name test_w-aux_0 --lam 10 --seed 42
3 | python main_IW.py --exp_name test_w-aux_1 --lam 10 --seed 0
4 | python main_IW.py --exp_name test_w-aux_2 --lam 10 --seed 105
5 | python main_IW.py --exp_name test_w-aux_3 --lam 10 --seed 7
6 | python main_IW.py --exp_name test_w-aux_4 --lam 100 --seed 59
7 |
8 | python IW_ablation.py --exp_name vanilla-update_w-aux_0 --lam 10 --seed 42
9 | python IW_ablation.py --exp_name vanilla-update_w-aux_1 --lam 10 --seed 0
10 | python IW_ablation.py --exp_name vanilla-update_w-aux_2 --lam 10 --seed 105
11 | python IW_ablation.py --exp_name vanilla-update_w-aux_3 --lam 10 --seed 37
12 | python IW_ablation.py --exp_name vanilla-update_w-aux_4 --lam 10 --seed 49
13 |
14 | #python main_IW.py --exp_name clip_no-update --lam 0 --seed 42 --update_episode -1
15 |
16 |
17 | #python main_IW.py --exp_name nn-w-proj_0 --lam 0 --seed 42
18 | #python main_IW.py --exp_name nn-w-proj_1 --lam 0 --seed 0
19 | #python main_IW.py --exp_name nn-w-proj_2 --lam 0 --seed 15
20 | #python main_IW.py --exp_name nn-w-proj_3 --lam 0 --seed 37
21 | #python main_IW.py --exp_name nn-w-proj_4 --lam 0 --seed 49
22 |
23 | #python IW_ablation.py --exp_name _0 --lam 0 --seed 42
24 | #python IW_ablation.py --exp_name vannilla-update_2 --lam 0 --seed 0
25 | #python IW_ablation.py --exp_name vannilla-update_3 --lam 0 --seed 15
26 | #python IW_ablation.py --exp_name vannilla-update_4 --lam 0 --seed 37
27 | #python IW_ablation.py --exp_name vannilla-update_5 --lam 0 --seed 49
28 |
--------------------------------------------------------------------------------
/utils/inverter_utils.py:
--------------------------------------------------------------------------------
1 | # Helper Functions
2 | import numpy as np
3 | import torch
4 | import torch.utils.data as data
5 | import pdb
6 |
7 | class Replay_Memory():
8 | def __init__(self, memory_size=86400):
9 | self.memory_size = memory_size
10 | self.storage = []
11 |
12 | def sample_batch(self, batch_size=32):
13 | # This function returns a batch of randomly sampled transitions - i.e. state, action, reward, next state, terminal flag tuples.
14 | # You will feed this to your model to train.
15 | rand_idx = np.random.choice(len(self.storage), batch_size)
16 | batch = [self.storage[i] for i in rand_idx]
17 |
18 | state = [transition[0] for transition in batch]
19 | Sbus = [transition[1] for transition in batch]
20 | P_av = [transition[2] for transition in batch]
21 | return torch.tensor(np.stack(state)).float(), np.stack(Sbus), np.stack(P_av)
22 |
23 | def append(self, transition):
24 | # appends transition to the memory.
25 | self.storage.append(transition)
26 | # only keeps the latest memory_size transitions
27 | if len(self.storage) > self.memory_size:
28 | self.storage = self.storage[-self.memory_size:]
29 |
30 |
31 |
--------------------------------------------------------------------------------
/utils/network.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.utils.data as data
4 | import numpy as np
5 |
6 | # Implement a vanilla MLP here
7 | class MLP(nn.Module):
8 | def __init__(self, input_size, hiddens, output_size):
9 | super(MLP, self).__init__()
10 | self.n_layers = len(hiddens)
11 | self.layers = []
12 | tmp = [input_size] + hiddens
13 |
14 | for i in range(self.n_layers):
15 | self.layers.append(nn.Linear(tmp[i], tmp[i+1]))
16 | self.layers.append(nn.ReLU())
17 | # self.layers.append(nn.BatchNorm1d(tmp[i+1]))
18 | self.layers.append(nn.Linear(tmp[-1], output_size))
19 | self.layers = nn.ModuleList(self.layers)
20 |
21 | def forward(self,x):
22 | out = x
23 | for i, l in enumerate(self.layers):
24 | out = l(out)
25 | return out
26 |
27 |
28 | class LSTM(nn.Module):
29 | def __init__(self, n_state, n_action, n_dist, lstm_hidden = 8, hiddens = [4], lstm_layer = 2, bi = False):
30 | super(LSTM, self).__init__()
31 |
32 | self.rnn = nn.LSTM(n_dist, lstm_hidden, lstm_layer, dropout = 0, bidirectional = bi)
33 | if bi:
34 | self.n_direction = 2
35 | else:
36 | self.n_direction = 1
37 |
38 | self.lstm_hidden = lstm_hidden
39 | self.lstm_layer = lstm_layer
40 |
41 | self.encoder1 = nn.Sequential(
42 | nn.Linear(n_state, 4),
43 | nn.ReLU(),
44 | #nn.BatchNorm1d(32),
45 | nn.Linear(4, lstm_hidden*self.n_direction*self.lstm_layer),
46 | nn.ReLU())
47 |
48 | self.encoder2 = nn.Sequential(
49 | nn.Linear(n_state, 4),
50 | nn.ReLU(),
51 | #nn.BatchNorm1d(32),
52 | nn.Linear(4, lstm_hidden * self.n_direction*self.lstm_layer),
53 | nn.ReLU())
54 |
55 | n_layers = len(hiddens) + 1
56 | tmp = [self.n_direction * lstm_hidden] + hiddens #+ [n_action]
57 |
58 | self.decoder = []
59 | for i in range(n_layers-1):
60 | self.decoder.append(nn.Linear(tmp[i], tmp[i+1]))
61 | self.decoder.append(nn.ReLU())
62 | self.decoder = nn.ModuleList(self.decoder)
63 |
64 | # mu and sigma2 are learned separately
65 | self.final_layer = nn.Linear(tmp[-1], n_action)
66 | self.final_layer_ = nn.Linear(tmp[-1], n_action)
67 |
68 | def forward(self, state, disturbance):
69 | # state: n x dim
70 | # disturbance: T x n x dist
71 | n = state.shape[0]
72 | T = disturbance.shape[0]
73 |
74 | h0 = self.encoder1(state).reshape(n, self.n_direction*self.lstm_layer, self.lstm_hidden).transpose(0, 1) # (layer x direction) x n x Dim.
75 | c0 = self.encoder2(state).reshape(n, self.n_direction*self.lstm_layer, self.lstm_hidden).transpose(0, 1)
76 |
77 | out, (hn, cn) = self.rnn(disturbance, (h0, c0)) # out: T x n x (lstm_hidden x n_direction)
78 | #print("line 176")
79 | out = out.reshape(T * n, self.lstm_hidden * self.n_direction)
80 | for layer in self.decoder:
81 | out = layer(out)
82 | mu = self.final_layer(out).reshape(T, n, -1)
83 | sigma_sq = self.final_layer_(out).reshape(T, n, -1)
84 | # out: (T x n) x n_action
85 | return mu, sigma_sq
86 |
87 | '''
88 | class Replay_Memory():
89 | def __init__(self, memory_size=288, burn_in=32):
90 | self.memory_size = memory_size
91 | self.burn_in = burn_in
92 | # the memory is as a list of transitions (S,A,R,S,D).
93 | self.storage = []
94 |
95 | def sample_batch(self, batch_size=32):
96 | # This function returns a batch of randomly sampled transitions - i.e. state, action, reward, next state, terminal flag tuples.
97 | # You will feed this to your model to train.
98 | rand_idx = np.random.choice(len(self.storage), batch_size)
99 | return [self.storage[i] for i in rand_idx]
100 |
101 | def append(self, transition):
102 | # appends transition to the memory.
103 | self.storage.append(transition)
104 | # only keeps the latest memory_size transitions
105 | if len(self.storage) > self.memory_size:
106 | self.storage = self.storage[-self.memory_size:]
107 | '''
108 |
--------------------------------------------------------------------------------
/utils/ppo_utils.py:
--------------------------------------------------------------------------------
1 | # Helper Functions
2 | import numpy as np
3 | import torch
4 | import torch.utils.data as data
5 | import pdb
6 |
7 | def make_dict(obs_name, obs):
8 | zipbObj = zip(obs_name, obs)
9 | return dict(zipbObj)
10 |
11 | def R_func(obs_dict, action, eta):
12 | reward = - action#- 0.5 * eta[int(obs_dict["Occupancy Flag"])] * (obs_dict["Indoor Air Temp."] - obs_dict["Indoor Temp. Setpoint"] - 1)**2
13 | return reward#.item()
14 |
15 | # Calculate the advantage estimate
16 | def Advantage_func(rewards, gamma):
17 | R = torch.zeros(1, 1).double()
18 | T = len(rewards)
19 | advantage = torch.zeros((T,1)).double()
20 |
21 | for i in reversed(range(len(rewards))):
22 | R = gamma * R + rewards[i]
23 | advantage[i] = R
24 | return advantage
25 |
26 | class Dataset(data.Dataset):
27 | def __init__(self, states, actions, disturbance, advantages, old_logprobs, x_uppers, x_lowers):
28 | self.states = states
29 | self.actions = actions
30 | self.disturbance = disturbance
31 | self.advantages = advantages
32 | self.old_logprobs = old_logprobs
33 | self.x_uppers = x_uppers
34 | self.x_lowers = x_lowers
35 |
36 | def __len__(self):
37 | return len(self.states)
38 |
39 | def __getitem__(self, index):
40 | return self.states[index], self.actions[index], self.disturbance[index], self.advantages[index], self.old_logprobs[index], self.x_uppers[index], self.x_lowers[index]
41 |
42 | class Replay_Memory():
43 | def __init__(self, ):
44 | self.advantages = []
45 | self.states = []
46 | self.old_logprobs = []
47 | self.actions = []
48 | self.disturbance = [] # T x n_dist
49 | self.x_uppers = []
50 | self.x_lowers = []
51 |
52 | def clear_memory(self, ):
53 | self.advantages = []
54 | self.states = []
55 | self.old_logprobs = []
56 | self.actions = []
57 | self.disturbance = []
58 | self.x_uppers = []
59 | self.x_lowers = []
60 |
61 | def sample(self):
62 | states = torch.vstack(self.states)
63 | actions = torch.vstack(self.actions)
64 | advantages = torch.vstack(self.advantages).reshape(-1)
65 | old_logprobs = torch.vstack(self.old_logprobs).reshape(-1)
66 | disturbance = torch.stack(self.disturbance) # n x T x dist
67 | x_uppers = torch.vstack(self.x_uppers)
68 | x_lowers = torch.vstack(self.x_lowers)
69 | self.clear_memory()
70 |
71 | return states, actions, disturbance, advantages, old_logprobs, x_uppers, x_lowers
72 |
73 |
74 |
--------------------------------------------------------------------------------