├── README.md ├── algorithm ├── DDPG.py ├── QLearning.py ├── VRE.py ├── __init__.py └── model.py ├── market ├── __init__.py ├── thirty_bus.py └── three_bus.py ├── results ├── DDPG_30-bus.png ├── DDPG_3_bus.png ├── Q-Learning.png └── VRE.png └── run ├── run_DDPG_3-bus.py ├── run_DDPG_30-bus.py ├── run_Q-Learning_3-bus.py └── run_VRE_3-bus.py /README.md: -------------------------------------------------------------------------------- 1 | # agent-based-modeling-in-electricity-market-using-DDPG-algorithm 2 | Agent-Based Modeling in Electricity Market Using Deep Deterministic Policy Gradient Algorithm 3 | 4 | ## Dependencies 5 | - Python 3.7 6 | - Pytorch 1.0.1 7 | 8 | ## Run 9 | * VRE algorithm in 3-bus system: 10 | 11 | ``` 12 | python run_VRE_3-bus.py 13 | ``` 14 | 15 | ![VRE](https://github.com/liangyancang/agent-based-modeling-in-electricity-market-using-DDPG-algorithm/blob/master/results/VRE.png) 16 | 17 | * Q-Learning algorithm in 3-bus system: 18 | 19 | ``` 20 | python run_Q-Learning_3-bus.py 21 | ``` 22 | 23 | ![Q-Learning](https://github.com/liangyancang/agent-based-modeling-in-electricity-market-using-DDPG-algorithm/blob/master/results/Q-Learning.png) 24 | 25 | * DDPG algorithm in 3-bus system: 26 | 27 | ``` 28 | python run_DDPG_3-bus.py 29 | ``` 30 | 31 | ![DDPG-3](https://github.com/liangyancang/agent-based-modeling-in-electricity-market-using-DDPG-algorithm/blob/master/results/DDPG_3_bus.png) 32 | 33 | * DDPG algorithm in 30-bus system: 34 | 35 | ``` 36 | python run_DDPG_30-bus.py 37 | ``` 38 | 39 | ![DDPG-30](https://github.com/liangyancang/agent-based-modeling-in-electricity-market-using-DDPG-algorithm/blob/master/results/DDPG_30-bus.png) 40 | 41 | ## TODO 42 | -------------------------------------------------------------------------------- /algorithm/DDPG.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | 5 | 6 | class DDPG: 7 | def __init__(self, s_dim, a_dim, ANet, CNet, memory_capacity=1000, gamma=0.0, lr_a=0.001, lr_c=0.001): 8 | self.a_dim, self.s_dim = a_dim, s_dim 9 | self.gamma = gamma 10 | self.memory_capacity = memory_capacity 11 | self.memory = np.zeros((self.memory_capacity, s_dim * 2 + a_dim + 1), dtype=np.float32) 12 | self.pointer = 0 13 | self.Actor_eval = ANet(s_dim, a_dim) 14 | self.Actor_target = ANet(s_dim, a_dim) 15 | self.Critic_eval = CNet(s_dim, a_dim) 16 | self.Critic_target = CNet(s_dim, a_dim) 17 | self.atrain = torch.optim.Adam(self.Actor_eval.parameters(), lr=lr_a) 18 | self.ctrain = torch.optim.Adam(self.Critic_eval.parameters(), lr=lr_c) 19 | self.loss_td = nn.MSELoss() 20 | 21 | def choose_action(self, s): 22 | s = torch.unsqueeze(torch.FloatTensor(s), 0) 23 | return self.Actor_eval(s)[0].detach() 24 | 25 | def learn(self, batch_size=100, tau=0.01): 26 | for x in self.Actor_target.state_dict().keys(): 27 | eval('self.Actor_target.' + x + '.data.mul_((1 - tau))') 28 | eval('self.Actor_target.' + x + '.data.add_(tau*self.Actor_eval.' + x + '.data)') 29 | for x in self.Critic_target.state_dict().keys(): 30 | eval('self.Critic_target.' + x + '.data.mul_((1 - tau))') 31 | eval('self.Critic_target.' + x + '.data.add_(tau*self.Critic_eval.' + x + '.data)') 32 | 33 | indices = np.random.choice(self.memory_capacity, batch_size) 34 | bt = self.memory[indices, :] 35 | bs = torch.FloatTensor(bt[:, :self.s_dim]) 36 | ba = torch.FloatTensor(bt[:, self.s_dim: self.s_dim + self.a_dim]) 37 | br = torch.FloatTensor(bt[:, -self.s_dim - 1: -self.s_dim]) 38 | bs_ = torch.FloatTensor(bt[:, -self.s_dim:]) 39 | 40 | a = self.Actor_eval(bs) 41 | q = self.Critic_eval(bs, a) 42 | loss_a = -torch.mean(q) 43 | self.atrain.zero_grad() 44 | loss_a.backward() 45 | self.atrain.step() 46 | 47 | a_ = self.Actor_target(bs_) 48 | q_ = self.Critic_target(bs_, a_) 49 | q_target = br + self.gamma * q_ 50 | q_v = self.Critic_eval(bs, ba) 51 | td_error = self.loss_td(q_target, q_v) 52 | self.ctrain.zero_grad() 53 | td_error.backward() 54 | self.ctrain.step() 55 | 56 | def store_transition(self, s, a, r, s_): 57 | transition = np.hstack((s, a, [r], s_)) 58 | index = self.pointer % self.memory_capacity 59 | self.memory[index, :] = transition 60 | self.pointer += 1 61 | -------------------------------------------------------------------------------- /algorithm/QLearning.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | 5 | class QLearningAgents: 6 | def __init__(self, n_agents, action_space, gamma=0.0): 7 | self.gamma = gamma 8 | self.n_agents = n_agents 9 | self.agents = [QLearningTable(action_space, gamma=self.gamma) for _ in range(self.n_agents)] 10 | 11 | def select_action(self, obs): 12 | action = [self.agents[i].choose_action(obs) for i in range(self.n_agents)] 13 | return np.array(action) 14 | 15 | def learn(self, s, a, r, s_): 16 | if self.n_agents == 1: 17 | self.agents[0].learn(s, a, r, s_) 18 | else: 19 | for i in range(self.n_agents): 20 | self.agents[i].learn(s, a[i], r[i], s_) 21 | 22 | 23 | class QLearningTable: 24 | def __init__(self, actions, learning_rate=0.01, gamma=0.0, e_greedy=0.9): 25 | self.actions = actions # a list 26 | self.lr = learning_rate 27 | self.gamma = gamma 28 | self.epsilon = e_greedy 29 | self.n_steps = 0 30 | self.q_table = pd.DataFrame(columns=self.actions, dtype=np.float64) 31 | 32 | def choose_action(self, observation): 33 | self.check_state_exist(observation) 34 | self.n_steps += 1 35 | if self.epsilon > 0.1: 36 | self.epsilon = 0.9993 ** self.n_steps 37 | else: 38 | self.epsilon = 0.05 39 | 40 | if np.random.uniform() > self.epsilon: 41 | # choose best action 42 | state_action = self.q_table.loc[observation, :] 43 | # some actions may have the same value, randomly choose on in these actions 44 | action = np.random.choice(state_action[state_action == np.max(state_action)].index) 45 | else: 46 | # choose random action 47 | action = np.random.choice(self.actions) 48 | return action 49 | 50 | def learn(self, s, a, r, s_): 51 | self.check_state_exist(s_) 52 | q_predict = self.q_table.loc[s, a] 53 | if s_ != 'terminal': 54 | q_target = r + self.gamma * self.q_table.loc[s_, :].max() # next state is not terminal 55 | else: 56 | q_target = r # next state is terminal 57 | self.q_table.loc[s, a] += self.lr * (q_target - q_predict) # update 58 | 59 | def check_state_exist(self, state): 60 | if state not in self.q_table.index: 61 | # append new state to q table 62 | self.q_table = self.q_table.append( 63 | pd.Series( 64 | [0]*len(self.actions), 65 | index=self.q_table.columns, 66 | name=state, 67 | ) 68 | ) 69 | -------------------------------------------------------------------------------- /algorithm/VRE.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class RothErevAgents: 5 | def __init__(self, n_agents, action_space): 6 | self.n_agents = n_agents 7 | self.agents = [RothErev(action_space) for _ in range(self.n_agents)] 8 | 9 | def select_action(self): 10 | action = [self.agents[i].choose_action() for i in range(self.n_agents)] 11 | return np.array(action) 12 | 13 | def learn(self, reward, r=0.1, e=0.2): 14 | for i in range(self.n_agents): 15 | self.agents[i].learn(reward[i], r=r, e=e) 16 | 17 | 18 | class RothErev: 19 | def __init__(self, action_space): 20 | self.action_space = action_space 21 | self.n_strategies = len(action_space) 22 | self.strategy_value = np.ones(self.n_strategies) * 1000 23 | self.latest_ind = None 24 | 25 | def choose_action(self): 26 | k = 2.0 27 | c = k / self.n_strategies * self.strategy_value.sum() 28 | exp_strategy_value = np.exp(self.strategy_value / c) 29 | sum_exp_strategy_value = exp_strategy_value.sum() 30 | strategy_prob = exp_strategy_value / sum_exp_strategy_value 31 | cumsum_strategy_prob = strategy_prob.cumsum(0) 32 | random_number = np.random.rand() 33 | for ind in range(self.n_strategies): 34 | if random_number <= cumsum_strategy_prob[ind]: 35 | self.latest_ind = ind 36 | 37 | return self.action_space[ind] 38 | 39 | return self.action_space[self.latest_ind] 40 | 41 | def learn(self, reward, r=0.1, e=0.9): 42 | for ind in range(self.n_strategies): 43 | if ind == self.latest_ind: 44 | self.strategy_value[ind] = (1 - r) * self.strategy_value[ind] + (1 - e) * reward 45 | else: 46 | self.strategy_value[ind] = (1 - r) * self.strategy_value[ind] + \ 47 | e * self.strategy_value[ind] / self.n_strategies 48 | -------------------------------------------------------------------------------- /algorithm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanchang-liang/agent-based-modeling-in-electricity-market-using-DDPG-algorithm/e337500606416f92a5ac91fe1f3635b749d4ae21/algorithm/__init__.py -------------------------------------------------------------------------------- /algorithm/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class ANet1(nn.Module): 7 | def __init__(self, s_dim, a_dim): 8 | super(ANet1, self).__init__() 9 | self.FC1 = nn.Linear(s_dim, 128) 10 | self.FC2 = nn.Linear(128, 64) 11 | self.FC3 = nn.Linear(64, a_dim) 12 | 13 | def forward(self, obs): 14 | result = F.relu(self.FC1(obs)) 15 | result = F.relu(self.FC2(result)) 16 | result = torch.tanh(self.FC3(result)) 17 | return result 18 | 19 | 20 | class CNet1(nn.Module): 21 | def __init__(self, s_dim, a_dim): 22 | super(CNet1, self).__init__() 23 | self.FC1 = nn.Linear(s_dim, 128) 24 | self.FC2 = nn.Linear(128 + a_dim, 128) 25 | self.FC3 = nn.Linear(128, 64) 26 | self.FC4 = nn.Linear(64, 1) 27 | 28 | def forward(self, obs, acts): 29 | result = F.relu(self.FC1(obs)) 30 | combined = torch.cat([result, acts], 1) 31 | result = F.relu(self.FC2(combined)) 32 | return self.FC4(F.relu(self.FC3(result))) 33 | 34 | 35 | class ANet2(nn.Module): 36 | def __init__(self, s_dim, a_dim): 37 | super(ANet2,self).__init__() 38 | self.fc1 = nn.Linear(s_dim, 64) 39 | self.fc1.weight.data.normal_(0, 0.1) 40 | self.out = nn.Linear(64, a_dim) 41 | self.out.weight.data.normal_(0, 0.1) 42 | 43 | def forward(self, x): 44 | x = self.fc1(x) 45 | x = F.relu(x) 46 | x = self.out(x) 47 | x = torch.tanh(x) 48 | return x 49 | 50 | 51 | class CNet2(nn.Module): 52 | def __init__(self, s_dim, a_dim): 53 | super(CNet2,self).__init__() 54 | self.fcs = nn.Linear(s_dim, 64) 55 | self.fcs.weight.data.normal_(0, 0.1) 56 | self.fca = nn.Linear(a_dim, 64) 57 | self.fca.weight.data.normal_(0, 0.1) 58 | self.fcsa = nn.Linear(64, 64) 59 | self.fcsa.weight.data.normal_(0, 0.1) 60 | self.out = nn.Linear(64, 1) 61 | self.out.weight.data.normal_(0, 0.1) 62 | 63 | def forward(self, s, a): 64 | x = self.fcs(s) 65 | y = self.fca(a) 66 | xy = F.relu(x + y) 67 | xy = F.relu(self.fcsa(xy)) 68 | actions_value = self.out(xy) 69 | return actions_value 70 | -------------------------------------------------------------------------------- /market/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanchang-liang/agent-based-modeling-in-electricity-market-using-DDPG-algorithm/e337500606416f92a5ac91fe1f3635b749d4ae21/market/__init__.py -------------------------------------------------------------------------------- /market/thirty_bus.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from cvxopt import matrix, solvers 3 | solvers.options['show_progress'] = False 4 | 5 | 6 | # 市场出清,考虑网络阻塞 7 | def market_clearing(a_declare): 8 | 9 | # 供给曲线的截距和斜率 10 | a_real = np.array([18.0, 20.0, 25.0, 22.0, 22.0, 16.0]) 11 | b_real = np.array([0.25, 0.20, 0.20, 0.20, 0.20, 0.25]) 12 | 13 | # 需求曲线的截距和斜率 14 | c_real = np.array([120.0, 130.0, 120.0, 135.0, 150.0, 95.0, 150.0, 125.0, 100.0, 150.0, 15 | 90.0, 95.0, 90.0, 90.0, 160.0, 120.0, 150.0, 100.0, 95.0, 125.0]) * -1 16 | d_real = np.array([5.0, 5.5, 4.5, 5.0, 5.0, 3.0, 5.5, 4.0, 4.5, 5.0, 17 | 3.5, 3.5, 3.5, 3.5, 6.0, 5.0, 6.0, 4.5, 3.5, 4.5]) 18 | 19 | # 机组功率上下限 20 | p_min = np.array([5.0, 5.0, 5.0, 5.0, 5.0, 5.0]) 21 | p_max = np.array([100.0, 80.0, 50.0, 80.0, 50.0, 120.0]) 22 | 23 | # 负荷需求上下限 24 | q_min = np.zeros(20) 25 | q_max = np.array([24.0, 23.63636364, 26.66666667, 27.0, 30.0, 26 | 31.66666667, 27.27272727, 31.25, 22.22222222, 30.0, 27 | 25.71428571, 27.14285714, 25.71428571, 25.71428571, 26.66666667, 28 | 24.0, 25.0, 22.22222222, 27.14285714, 27.77777778]) 29 | 30 | J_g = ([[-0.6616, -0.3384, -0.2243, -0.3384, -0.1357, -0.3016, -0.4039, -0.1357, 0.1357, -0.1287, -0.1260, -0.0720, 0.0000, -0.1260, -0.1588, 0.0000, -0.0279, -0.0986, -0.0323, -0.0279, -0.0323, -0.0038, -0.0038, -0.0038, 0.0038, 0.0323, -0.1463, -0.0878, -0.1463, -0.1227, -0.2340, -0.1227, -0.3567, 0.0000, -0.3567, -0.6433, -0.4082, -0.5918, -0.4082, -0.1287, -0.5146], 31 | [0.1775, -0.1775, -0.3028, -0.1775, -0.1613, -0.3585, -0.3263, -0.1613, 0.1613, -0.1288, -0.1285, -0.0734, 0.0000, -0.1285, -0.1539, 0.0000, -0.0273, -0.0965, -0.0301, -0.0273, -0.0301, -0.0025, -0.0025, -0.0025, 0.0025, 0.0301, -0.1466, -0.0879, -0.1466, -0.1213, -0.2345, -0.1213, -0.3558, 0.0000, -0.3558, -0.6442, -0.4082, -0.5918, -0.4082, -0.1288, -0.5154], 32 | [-0.0238, 0.0238, 0.0406, 0.0238, -0.0200, -0.0444, -0.3723, -0.0200, 0.0200, -0.1132, 0.0824, 0.0471, 0.0000, 0.0824, 0.4367, 1.0000, -0.0763, -0.2699, -0.2171, -0.0763, -0.2171, -0.1063, -0.1063, -0.1063, 0.1063, 0.2171, -0.1212, -0.0727, -0.1212, -0.2399, -0.1939, -0.2399, -0.4338, 0.0000, -0.4338, -0.5662, -0.4082, -0.5918, -0.4082, -0.1132, -0.4530], 33 | [-0.0069, 0.0069, 0.0117, 0.0069, -0.0058, -0.0128, -0.1076, -0.0058, 0.0058, -0.1036, 0.2492, 0.1424, 0.0000, 0.2492, 0.1262, 0.0000, 0.0078, 0.0277, 0.0907, 0.0078, 0.0907, 0.0619, 0.0619, 0.0619, -0.0619, -0.0907, 0.3402, 0.2041, 0.3402, -0.0264, -0.4557, -0.0264, -0.4821, 0.0000, -0.4821, -0.5179, -0.4082, -0.5918, -0.4082, -0.1036, -0.4143], 34 | [-0.0147, 0.0147, 0.0250, 0.0147, -0.0123, -0.0273, -0.2293, -0.0123, 0.0123, -0.0982, 0.1413, 0.0807, 0.0000, 0.1413, 0.2689, 0.0000, 0.0751, 0.2659, -0.0721, 0.0751, -0.0721, -0.1086, -0.1086, -0.1086, 0.1086, 0.0721, 0.0258, 0.0155, 0.0258, 0.4496, 0.0413, -0.5504, -0.5091, 0.0000, -0.5091, -0.4909, -0.4082, -0.5918, -0.4082, -0.0982, -0.3927], 35 | [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, -0.4082, -0.5918, -0.4082, 0.0000, 0.0000]]) 36 | 37 | J = np.array([[0.1287, 0.1288, 0.1132, 0.1036, 0.0982, 0.0000, -0.1288, -0.1281, -0.1280, -0.1297, 0.7355, -0.1095, -0.1132, -0.1105, -0.1083, -0.1117, -0.1102, -0.1087, -0.1090, -0.1091, -0.1049, -0.0982, -0.0845, -0.0329, 0.0000, 0.0000], 38 | [0.0279, 0.0273, 0.0763, -0.0078, -0.0751, 0.0000, -0.0273, -0.0297, -0.0301, -0.0247, -0.0236, 0.0012, -0.0763, 0.4634, 0.1093, -0.0433, -0.0120, 0.0716, 0.0493, 0.0373, 0.0064, 0.0751, 0.0290, 0.0113, 0.0000, 0.0000], 39 | [-0.0323, -0.0301, -0.2171, 0.0907, -0.0721, 0.0000, 0.0301, 0.0393, 0.0408, 0.0202, 0.0174, -0.1124, 0.2171, 0.1742, 0.1413, 0.5024, 0.7735, 0.0527, 0.0004, -0.0278, -0.0955, 0.0721, -0.0213, -0.0083, 0.0000, 0.0000]]) 40 | 41 | J_max = np.array([10, 8, 10, 10, 8, 10]) 42 | 43 | P = matrix(np.diag(np.append(b_real, d_real))) 44 | q = matrix(np.append(a_declare, c_real)) 45 | G = matrix(np.vstack((J, -J, np.diag(-np.ones(26)), np.diag(np.ones(26))))) 46 | h = matrix(np.hstack((J_max, -p_min, -q_min, p_max, q_max))) 47 | A = matrix(np.hstack((-np.ones(6), np.ones(20)))).T 48 | b = matrix(0.0) 49 | 50 | sv = solvers.qp(P, q, G, h, A, b) 51 | 52 | miu1, miu2 = np.zeros((41, 1)), np.zeros((41, 1)) 53 | miu1[9], miu1[16], miu1[25] = sv['z'][0:3] 54 | miu2[9], miu2[16], miu2[25] = sv['z'][3:6] 55 | 56 | nodal_price_g = (np.ones((6, 1)) * sv['y'][0] + np.dot(J_g, miu1 - miu2)).squeeze() 57 | 58 | mc_amount = np.array(sv['x'][:6]).squeeze() 59 | 60 | cost_real = 0.5 * b_real * mc_amount ** 2 + a_real * mc_amount 61 | cost_declare = mc_amount * np.transpose(nodal_price_g) 62 | profit = cost_declare - cost_real 63 | 64 | return nodal_price_g, profit 65 | 66 | 67 | if __name__ == '__main__': 68 | 69 | alpha = np.array([21.388, 23.807, 34.317, 27.235, 33.609, 24.848]) 70 | print(market_clearing(alpha)) 71 | -------------------------------------------------------------------------------- /market/three_bus.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from cvxopt import matrix, solvers 3 | solvers.options['show_progress'] = False 4 | 5 | 6 | # 市场出清,考虑网络阻塞 7 | def market_clearing(alpha): 8 | 9 | # 供给曲线的截距和斜率 10 | a_real = np.array([15.0, 18.0]) 11 | b_real = np.array([0.01, 0.008]) 12 | 13 | # 需求曲线的截距和斜率 14 | c_real = np.array([40.0, 40.0]) * -1 15 | d_real = np.array([0.08, 0.06]) 16 | 17 | # 机组功率上下限 18 | p_min = np.array([0.0, 0.0]) 19 | p_max = np.array([500.0, 500.0]) 20 | 21 | # 负荷需求上下限 22 | q_min = np.zeros(2) 23 | q_max = np.array([500.0, 666.666666666667]) 24 | 25 | J_g = ([[-0.333333333333333, -0.333333333333333, -0.666666666666667], 26 | [0.333333333333334, -0.666666666666667, -0.333333333333333], 27 | [0, 0, 0]]) 28 | 29 | J = np.array([[-0.333333333333333, 0.0, 0.333333333333333, -0.333333333333334], 30 | [-0.333333333333333, 0.0, 0.333333333333333, 0.666666666666667], 31 | [-0.666666666666667, 0.0, 0.666666666666667, 0.333333333333333]]) 32 | 33 | J_max = np.array([25.0, 1000.0, 1000.0, 25.0, 1000.0, 1000.0]) 34 | 35 | P = matrix(np.diag(np.append(b_real, d_real))) 36 | q = matrix(np.append(alpha, c_real)) 37 | G = matrix(np.vstack((J, -J, np.diag(-np.ones(4)), np.diag(np.ones(4))))) 38 | h = matrix(np.hstack((J_max, -p_min, -q_min, p_max, q_max))) 39 | A = matrix(np.hstack((-np.ones(2), np.ones(2)))).T 40 | b = matrix(0.0) 41 | 42 | sv = solvers.qp(P, q, G, h, A, b) 43 | 44 | miu1 = sv['z'][0:3] 45 | miu2 = sv['z'][3:6] 46 | 47 | nodal_price = (np.ones((3, 1)) * sv['y'][0] - np.dot(J_g, miu1 - miu2)).squeeze() 48 | nodal_price_g = np.array([nodal_price[0], nodal_price[2]]) 49 | mc_amount = np.array(sv['x'][:2]).squeeze() 50 | cost_real = 0.5 * b_real * mc_amount ** 2 + a_real * mc_amount 51 | cost_declare = mc_amount * np.transpose(nodal_price_g) 52 | profit = cost_declare - cost_real 53 | 54 | return nodal_price_g, profit 55 | 56 | 57 | if __name__ == '__main__': 58 | 59 | alpha = np.array([20.29, 22.98]) 60 | print(market_clearing(alpha)) 61 | -------------------------------------------------------------------------------- /results/DDPG_30-bus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanchang-liang/agent-based-modeling-in-electricity-market-using-DDPG-algorithm/e337500606416f92a5ac91fe1f3635b749d4ae21/results/DDPG_30-bus.png -------------------------------------------------------------------------------- /results/DDPG_3_bus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanchang-liang/agent-based-modeling-in-electricity-market-using-DDPG-algorithm/e337500606416f92a5ac91fe1f3635b749d4ae21/results/DDPG_3_bus.png -------------------------------------------------------------------------------- /results/Q-Learning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanchang-liang/agent-based-modeling-in-electricity-market-using-DDPG-algorithm/e337500606416f92a5ac91fe1f3635b749d4ae21/results/Q-Learning.png -------------------------------------------------------------------------------- /results/VRE.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanchang-liang/agent-based-modeling-in-electricity-market-using-DDPG-algorithm/e337500606416f92a5ac91fe1f3635b749d4ae21/results/VRE.png -------------------------------------------------------------------------------- /run/run_DDPG_3-bus.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from market.three_bus import market_clearing 3 | from algorithm.DDPG import DDPG 4 | from algorithm.model import ANet2, CNet2 5 | import matplotlib.pyplot as plt 6 | 7 | n_agents = 2 8 | n_states = 2 9 | n_actions = 1 10 | n_steps = 10000 11 | var = 1 12 | 13 | a_real = np.array([15.0, 18.0]) 14 | 15 | a = np.zeros(n_agents) 16 | s_ = np.random.rand(n_agents) 17 | alpha = np.zeros(n_agents) 18 | strategic_variables = np.zeros((n_steps, n_agents)) 19 | 20 | gencos = [] 21 | for _ in range(n_agents): 22 | gencos.append(DDPG(n_states, n_actions, ANet2, CNet2)) 23 | 24 | for step in range(n_steps): 25 | s = s_ 26 | for g in range(n_agents): 27 | a[g] = gencos[g].choose_action(s) 28 | a[g] = np.clip(a[g] + np.random.randn(1) * var, -1, 1) 29 | 30 | alpha = (a + 1) * a_real * 1.5 # strategic variable 31 | nodal_price, profit = market_clearing(alpha) 32 | strategic_variables[step] = alpha 33 | r = profit / 1000 34 | 35 | for g in range(n_agents): 36 | s_ = nodal_price / 30 37 | gencos[g].store_transition(s, a[g], r[g], s_) 38 | 39 | if 1000 <= step < 9000: 40 | for g in range(n_agents): 41 | gencos[g].learn() 42 | if var > 0.03: 43 | var *= 0.999 44 | elif step >= 9000: 45 | var = 0 46 | 47 | if (step + 1) % 1000 == 0: 48 | print('Step:', step + 1, 'a1: %.2f' % alpha[0], 'a2: %.2f' % alpha[1], 'r1: %.3f' % profit[0], 49 | 'r2: %.3f' % profit[1], 'Explore: %.2f' % var) 50 | 51 | 52 | C = np.array([[0.36, 0.58, 0.75], 53 | [0.92, 0.28, 0.29]]) 54 | 55 | plt.plot(strategic_variables[:, 0], lw=0.1, C=C[0], alpha=0.5, label=r"$\alpha_{1t}$") 56 | plt.plot(strategic_variables[:, 1], lw=0.1, C=C[1], alpha=0.5, label=r"$\alpha_{2t}$") 57 | plt.plot([0, 10000], [20.29, 20.29], '--', C=C[0], label=r"$\alpha_{1t}^\ast$") 58 | plt.plot([0, 10000], [22.98, 22.98], '--', C=C[1], label=r"$\alpha_{2t}^\ast$") 59 | plt.xlabel(r"$t$") 60 | plt.ylabel(r"$\alpha_{gt}$ (\$/MHh)") 61 | plt.title("DDPG (3-Bus System)") 62 | plt.legend() 63 | plt.savefig('DDPG_3_bus.png', dpi=600) 64 | plt.show() 65 | -------------------------------------------------------------------------------- /run/run_DDPG_30-bus.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from market.thirty_bus import market_clearing 3 | from algorithm.DDPG import DDPG 4 | from algorithm.model import ANet1, CNet1 5 | import matplotlib.pyplot as plt 6 | 7 | n_agents = 6 8 | n_states = 6 9 | n_actions = 1 10 | n_steps = 10000 11 | var = 1 12 | 13 | a_real = np.array([18.0, 20.0, 25.0, 22.0, 22.0, 16.0]) 14 | 15 | a = np.zeros(n_agents) 16 | s_ = np.random.rand(n_agents) 17 | alpha = np.zeros(n_agents) 18 | strategic_variables = np.zeros((n_steps, n_agents)) 19 | 20 | gencos = [] 21 | for _ in range(n_agents): 22 | gencos.append(DDPG(n_states, n_actions, ANet1, CNet1)) 23 | 24 | for step in range(n_steps): 25 | s = s_ 26 | for g in range(n_agents): 27 | a[g] = gencos[g].choose_action(s) 28 | a[g] = np.clip(a[g] + np.random.randn(1) * var, -1, 1) 29 | 30 | alpha = (a + 1) * a_real * 1.5 # strategic variable 31 | nodal_price, profit = market_clearing(alpha) 32 | strategic_variables[step] = alpha 33 | r = profit / 1000 34 | 35 | for g in range(n_agents): 36 | s_ = nodal_price / 30 37 | gencos[g].store_transition(s, a[g], r[g], s_) 38 | 39 | if 1000 <= step < 9000: 40 | for g in range(n_agents): 41 | gencos[g].learn() 42 | if var > 0.05: 43 | var *= 0.9993 44 | elif step >= 9000: 45 | var = 0 46 | 47 | if (step + 1) % 1000 == 0: 48 | print('Step:', step + 1, 'a1: %.2f' % alpha[0], 'a2: %.2f' % alpha[1], 'a3: %.2f' % alpha[2], 49 | 'a4: %.2f' % alpha[3], 'a5: %.2f' % alpha[4], 'a6: %.2f' % alpha[5], 'Explore: %.2f' % var) 50 | 51 | 52 | C = np.array([[0.90, 0.19, 0.20], 53 | [0.29, 0.54, 0.75], 54 | [0.37, 0.72, 0.36], 55 | [1.00, 0.55, 0.10], 56 | [0.96, 0.89, 0.47], 57 | [0.69, 0.40, 0.24]]) 58 | 59 | plt.plot(strategic_variables[:, 0], lw=0.1, C=C[0], alpha=0.5, label=r"$\alpha_{1t}$") 60 | plt.plot(strategic_variables[:, 1], lw=0.1, C=C[1], alpha=0.5, label=r"$\alpha_{2t}$") 61 | plt.plot(strategic_variables[:, 2], lw=0.1, C=C[2], alpha=0.5, label=r"$\alpha_{3t}$") 62 | plt.plot(strategic_variables[:, 3], lw=0.1, C=C[3], alpha=0.5, label=r"$\alpha_{4t}$") 63 | plt.plot(strategic_variables[:, 4], lw=0.1, C=C[4], alpha=0.5, label=r"$\alpha_{5t}$") 64 | plt.plot(strategic_variables[:, 5], lw=0.1, C=C[5], alpha=0.5, label=r"$\alpha_{6t}$") 65 | plt.plot([0, 10000], [21.388, 21.388], '--', C=C[0]) 66 | plt.plot([0, 10000], [23.807, 23.807], '--', C=C[1]) 67 | plt.plot([0, 10000], [34.317, 34.317], '--', C=C[2]) 68 | plt.plot([0, 10000], [27.235, 27.235], '--', C=C[3]) 69 | plt.plot([0, 10000], [24.848, 24.848], '--', C=C[5]) 70 | plt.xlabel(r"$t$") 71 | plt.ylabel(r"$\alpha_{gt}$ (\$/MHh)") 72 | plt.title("DDPG (IEEE 30-Bus System)") 73 | plt.legend() 74 | plt.savefig('DDPG_30-bus.png', dpi=600) 75 | plt.show() 76 | -------------------------------------------------------------------------------- /run/run_Q-Learning_3-bus.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from market.three_bus import market_clearing 3 | from algorithm.QLearning import QLearningAgents 4 | import matplotlib.pyplot as plt 5 | 6 | n_agents = 2 7 | action_space = np.arange(0, 3.1, 0.2) 8 | n_steps = 10000 9 | a_real = np.array([15.0, 18.0]) 10 | obs = 5 11 | strategic_variables = np.zeros((n_steps, n_agents)) 12 | 13 | multi_agents = QLearningAgents(n_agents, action_space) 14 | 15 | for step in range(n_steps): 16 | action = multi_agents.select_action(str(obs)) 17 | alpha = action * a_real 18 | nodal_price, profit = market_clearing(alpha) 19 | 20 | strategic_variables[step] = alpha 21 | 22 | mean_price = nodal_price.mean() 23 | obs_ = mean_price // 10 24 | 25 | if step != n_steps - 1: 26 | next_obs = obs_ 27 | else: 28 | next_obs = None 29 | 30 | multi_agents.learn(str(obs), action, profit, str(next_obs)) 31 | obs = next_obs 32 | 33 | if (step + 1) % 1000 == 0: 34 | print('Step:', step + 1, 'a1: %.2f' % alpha[0], 'a2: %.2f' % alpha[1], 35 | 'r1: %.3f' % profit[0], 'r2: %.3f' % profit[1]) 36 | 37 | 38 | C = np.array([[0.36, 0.58, 0.75], 39 | [0.92, 0.28, 0.29]]) 40 | 41 | plt.plot(strategic_variables[:, 0], lw=0.5, C=C[0], alpha=0.5, label=r"$\alpha_{1t}$") 42 | plt.plot(strategic_variables[:, 1], lw=0.5, C=C[1], alpha=0.5, label=r"$\alpha_{2t}$") 43 | plt.plot([0, 10000], [20.29, 20.29], '--', C=C[0], label=r"$\alpha_{1t}^\ast$") 44 | plt.plot([0, 10000], [22.98, 22.98], '--', C=C[1], label=r"$\alpha_{2t}^\ast$") 45 | plt.xlabel(r"$t$") 46 | plt.ylabel(r"$\alpha_{gt}$ (\$/MHh)") 47 | plt.title("Q-Learning (3-Bus System)") 48 | plt.legend() 49 | plt.savefig('Q-Learning.png', dpi=600) 50 | plt.show() 51 | -------------------------------------------------------------------------------- /run/run_VRE_3-bus.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from market.three_bus import market_clearing 3 | from algorithm.VRE import RothErevAgents 4 | import matplotlib.pyplot as plt 5 | 6 | n_agents = 2 7 | action_space = np.arange(0, 3.1, 0.2) 8 | n_steps = 10000 9 | a_real = np.array([15.0, 18.0]) 10 | strategic_variables = np.zeros((n_steps, n_agents)) 11 | 12 | multi_agents = RothErevAgents(n_agents, action_space) 13 | 14 | for step in range(n_steps): 15 | action = multi_agents.select_action() 16 | alpha = action * a_real 17 | nodal_price, profit = market_clearing(alpha) 18 | 19 | strategic_variables[step] = alpha 20 | 21 | multi_agents.learn(profit) 22 | 23 | if (step + 1) % 1000 == 0: 24 | print('Step:', step + 1, 'a1: %.2f' % alpha[0], 'a2: %.2f' % alpha[1], 25 | 'r1: %.3f' % profit[0], 'r2: %.3f' % profit[1]) 26 | 27 | 28 | C = np.array([[0.36, 0.58, 0.75], 29 | [0.92, 0.28, 0.29]]) 30 | 31 | plt.plot(strategic_variables[:, 0], lw=0.5, C=C[0], alpha=0.5, label=r"$\alpha_{1t}$") 32 | plt.plot(strategic_variables[:, 1], lw=0.5, C=C[1], alpha=0.5, label=r"$\alpha_{2t}$") 33 | plt.plot([0, 10000], [20.29, 20.29], '--', C=C[0], label=r"$\alpha_{1t}^\ast$") 34 | plt.plot([0, 10000], [22.98, 22.98], '--', C=C[1], label=r"$\alpha_{2t}^\ast$") 35 | plt.xlabel(r"$t$") 36 | plt.ylabel(r"$\alpha_{gt}$ (\$/MHh)") 37 | plt.title("VRE (3-Bus System)") 38 | plt.legend() 39 | plt.savefig('VRE.png', dpi=600) 40 | plt.show() 41 | --------------------------------------------------------------------------------