├── README.md
├── algorithm
    ├── DDPG.py
    ├── QLearning.py
    ├── VRE.py
    ├── __init__.py
    └── model.py
├── market
    ├── __init__.py
    ├── thirty_bus.py
    └── three_bus.py
├── results
    ├── DDPG_30-bus.png
    ├── DDPG_3_bus.png
    ├── Q-Learning.png
    └── VRE.png
└── run
    ├── run_DDPG_3-bus.py
    ├── run_DDPG_30-bus.py
    ├── run_Q-Learning_3-bus.py
    └── run_VRE_3-bus.py


/README.md:
--------------------------------------------------------------------------------
 1 | # agent-based-modeling-in-electricity-market-using-DDPG-algorithm
 2 | Agent-Based Modeling in Electricity Market Using Deep Deterministic Policy Gradient Algorithm
 3 | 
 4 | ## Dependencies
 5 | - Python 3.7
 6 | - Pytorch 1.0.1
 7 | 
 8 | ## Run
 9 | * VRE algorithm in 3-bus system:
10 | 
11 | ```
12 | python run_VRE_3-bus.py
13 | ```
14 | 
15 | ![VRE](https://github.com/liangyancang/agent-based-modeling-in-electricity-market-using-DDPG-algorithm/blob/master/results/VRE.png)
16 | 
17 | * Q-Learning algorithm in 3-bus system:
18 | 
19 | ```
20 | python run_Q-Learning_3-bus.py
21 | ```
22 | 
23 | ![Q-Learning](https://github.com/liangyancang/agent-based-modeling-in-electricity-market-using-DDPG-algorithm/blob/master/results/Q-Learning.png)
24 | 
25 | * DDPG algorithm in 3-bus system:
26 | 
27 | ```
28 | python run_DDPG_3-bus.py
29 | ```
30 | 
31 | ![DDPG-3](https://github.com/liangyancang/agent-based-modeling-in-electricity-market-using-DDPG-algorithm/blob/master/results/DDPG_3_bus.png)
32 | 
33 | * DDPG algorithm in 30-bus system:
34 | 
35 | ```
36 | python run_DDPG_30-bus.py
37 | ```
38 | 
39 | ![DDPG-30](https://github.com/liangyancang/agent-based-modeling-in-electricity-market-using-DDPG-algorithm/blob/master/results/DDPG_30-bus.png)
40 | 
41 | ## TODO
42 | 


--------------------------------------------------------------------------------
/algorithm/DDPG.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import torch.nn as nn
 4 | 
 5 | 
 6 | class DDPG:
 7 |     def __init__(self, s_dim, a_dim, ANet, CNet, memory_capacity=1000, gamma=0.0, lr_a=0.001, lr_c=0.001):
 8 |         self.a_dim, self.s_dim = a_dim, s_dim
 9 |         self.gamma = gamma
10 |         self.memory_capacity = memory_capacity
11 |         self.memory = np.zeros((self.memory_capacity, s_dim * 2 + a_dim + 1), dtype=np.float32)
12 |         self.pointer = 0
13 |         self.Actor_eval = ANet(s_dim, a_dim)
14 |         self.Actor_target = ANet(s_dim, a_dim)
15 |         self.Critic_eval = CNet(s_dim, a_dim)
16 |         self.Critic_target = CNet(s_dim, a_dim)
17 |         self.atrain = torch.optim.Adam(self.Actor_eval.parameters(), lr=lr_a)
18 |         self.ctrain = torch.optim.Adam(self.Critic_eval.parameters(), lr=lr_c)
19 |         self.loss_td = nn.MSELoss()
20 | 
21 |     def choose_action(self, s):
22 |         s = torch.unsqueeze(torch.FloatTensor(s), 0)
23 |         return self.Actor_eval(s)[0].detach()
24 | 
25 |     def learn(self, batch_size=100, tau=0.01):
26 |         for x in self.Actor_target.state_dict().keys():
27 |             eval('self.Actor_target.' + x + '.data.mul_((1 - tau))')
28 |             eval('self.Actor_target.' + x + '.data.add_(tau*self.Actor_eval.' + x + '.data)')
29 |         for x in self.Critic_target.state_dict().keys():
30 |             eval('self.Critic_target.' + x + '.data.mul_((1 - tau))')
31 |             eval('self.Critic_target.' + x + '.data.add_(tau*self.Critic_eval.' + x + '.data)')
32 | 
33 |         indices = np.random.choice(self.memory_capacity, batch_size)
34 |         bt = self.memory[indices, :]
35 |         bs = torch.FloatTensor(bt[:, :self.s_dim])
36 |         ba = torch.FloatTensor(bt[:, self.s_dim: self.s_dim + self.a_dim])
37 |         br = torch.FloatTensor(bt[:, -self.s_dim - 1: -self.s_dim])
38 |         bs_ = torch.FloatTensor(bt[:, -self.s_dim:])
39 | 
40 |         a = self.Actor_eval(bs)
41 |         q = self.Critic_eval(bs, a)
42 |         loss_a = -torch.mean(q)
43 |         self.atrain.zero_grad()
44 |         loss_a.backward()
45 |         self.atrain.step()
46 | 
47 |         a_ = self.Actor_target(bs_)
48 |         q_ = self.Critic_target(bs_, a_)
49 |         q_target = br + self.gamma * q_
50 |         q_v = self.Critic_eval(bs, ba)
51 |         td_error = self.loss_td(q_target, q_v)
52 |         self.ctrain.zero_grad()
53 |         td_error.backward()
54 |         self.ctrain.step()
55 | 
56 |     def store_transition(self, s, a, r, s_):
57 |         transition = np.hstack((s, a, [r], s_))
58 |         index = self.pointer % self.memory_capacity
59 |         self.memory[index, :] = transition
60 |         self.pointer += 1
61 | 


--------------------------------------------------------------------------------
/algorithm/QLearning.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | 
 4 | 
 5 | class QLearningAgents:
 6 |     def __init__(self, n_agents, action_space, gamma=0.0):
 7 |         self.gamma = gamma
 8 |         self.n_agents = n_agents
 9 |         self.agents = [QLearningTable(action_space, gamma=self.gamma) for _ in range(self.n_agents)]
10 | 
11 |     def select_action(self, obs):
12 |         action = [self.agents[i].choose_action(obs) for i in range(self.n_agents)]
13 |         return np.array(action)
14 | 
15 |     def learn(self, s, a, r, s_):
16 |         if self.n_agents == 1:
17 |             self.agents[0].learn(s, a, r, s_)
18 |         else:
19 |             for i in range(self.n_agents):
20 |                 self.agents[i].learn(s, a[i], r[i], s_)
21 | 
22 | 
23 | class QLearningTable:
24 |     def __init__(self, actions, learning_rate=0.01, gamma=0.0, e_greedy=0.9):
25 |         self.actions = actions  # a list
26 |         self.lr = learning_rate
27 |         self.gamma = gamma
28 |         self.epsilon = e_greedy
29 |         self.n_steps = 0
30 |         self.q_table = pd.DataFrame(columns=self.actions, dtype=np.float64)
31 | 
32 |     def choose_action(self, observation):
33 |         self.check_state_exist(observation)
34 |         self.n_steps += 1
35 |         if self.epsilon > 0.1:
36 |             self.epsilon = 0.9993 ** self.n_steps
37 |         else:
38 |             self.epsilon = 0.05
39 | 
40 |         if np.random.uniform() > self.epsilon:
41 |             # choose best action
42 |             state_action = self.q_table.loc[observation, :]
43 |             # some actions may have the same value, randomly choose on in these actions
44 |             action = np.random.choice(state_action[state_action == np.max(state_action)].index)
45 |         else:
46 |             # choose random action
47 |             action = np.random.choice(self.actions)
48 |         return action
49 | 
50 |     def learn(self, s, a, r, s_):
51 |         self.check_state_exist(s_)
52 |         q_predict = self.q_table.loc[s, a]
53 |         if s_ != 'terminal':
54 |             q_target = r + self.gamma * self.q_table.loc[s_, :].max()  # next state is not terminal
55 |         else:
56 |             q_target = r  # next state is terminal
57 |         self.q_table.loc[s, a] += self.lr * (q_target - q_predict)  # update
58 | 
59 |     def check_state_exist(self, state):
60 |         if state not in self.q_table.index:
61 |             # append new state to q table
62 |             self.q_table = self.q_table.append(
63 |                 pd.Series(
64 |                     [0]*len(self.actions),
65 |                     index=self.q_table.columns,
66 |                     name=state,
67 |                 )
68 |             )
69 | 


--------------------------------------------------------------------------------
/algorithm/VRE.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | class RothErevAgents:
 5 |     def __init__(self, n_agents, action_space):
 6 |         self.n_agents = n_agents
 7 |         self.agents = [RothErev(action_space) for _ in range(self.n_agents)]
 8 | 
 9 |     def select_action(self):
10 |         action = [self.agents[i].choose_action() for i in range(self.n_agents)]
11 |         return np.array(action)
12 | 
13 |     def learn(self, reward, r=0.1, e=0.2):
14 |         for i in range(self.n_agents):
15 |             self.agents[i].learn(reward[i], r=r, e=e)
16 | 
17 | 
18 | class RothErev:
19 |     def __init__(self, action_space):
20 |         self.action_space = action_space
21 |         self.n_strategies = len(action_space)
22 |         self.strategy_value = np.ones(self.n_strategies) * 1000
23 |         self.latest_ind = None
24 | 
25 |     def choose_action(self):
26 |         k = 2.0
27 |         c = k / self.n_strategies * self.strategy_value.sum()
28 |         exp_strategy_value = np.exp(self.strategy_value / c)
29 |         sum_exp_strategy_value = exp_strategy_value.sum()
30 |         strategy_prob = exp_strategy_value / sum_exp_strategy_value
31 |         cumsum_strategy_prob = strategy_prob.cumsum(0)
32 |         random_number = np.random.rand()
33 |         for ind in range(self.n_strategies):
34 |             if random_number <= cumsum_strategy_prob[ind]:
35 |                 self.latest_ind = ind
36 | 
37 |                 return self.action_space[ind]
38 | 
39 |         return self.action_space[self.latest_ind]
40 | 
41 |     def learn(self, reward, r=0.1, e=0.9):
42 |         for ind in range(self.n_strategies):
43 |             if ind == self.latest_ind:
44 |                 self.strategy_value[ind] = (1 - r) * self.strategy_value[ind] + (1 - e) * reward
45 |             else:
46 |                 self.strategy_value[ind] = (1 - r) * self.strategy_value[ind] + \
47 |                                            e * self.strategy_value[ind] / self.n_strategies
48 | 


--------------------------------------------------------------------------------
/algorithm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yanchang-liang/agent-based-modeling-in-electricity-market-using-DDPG-algorithm/e337500606416f92a5ac91fe1f3635b749d4ae21/algorithm/__init__.py


--------------------------------------------------------------------------------
/algorithm/model.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | class ANet1(nn.Module):
 7 |     def __init__(self, s_dim, a_dim):
 8 |         super(ANet1, self).__init__()
 9 |         self.FC1 = nn.Linear(s_dim, 128)
10 |         self.FC2 = nn.Linear(128, 64)
11 |         self.FC3 = nn.Linear(64, a_dim)
12 | 
13 |     def forward(self, obs):
14 |         result = F.relu(self.FC1(obs))
15 |         result = F.relu(self.FC2(result))
16 |         result = torch.tanh(self.FC3(result))
17 |         return result
18 | 
19 | 
20 | class CNet1(nn.Module):
21 |     def __init__(self, s_dim, a_dim):
22 |         super(CNet1, self).__init__()
23 |         self.FC1 = nn.Linear(s_dim, 128)
24 |         self.FC2 = nn.Linear(128 + a_dim, 128)
25 |         self.FC3 = nn.Linear(128, 64)
26 |         self.FC4 = nn.Linear(64, 1)
27 | 
28 |     def forward(self, obs, acts):
29 |         result = F.relu(self.FC1(obs))
30 |         combined = torch.cat([result, acts], 1)
31 |         result = F.relu(self.FC2(combined))
32 |         return self.FC4(F.relu(self.FC3(result)))
33 | 
34 | 
35 | class ANet2(nn.Module):
36 |     def __init__(self, s_dim, a_dim):
37 |         super(ANet2,self).__init__()
38 |         self.fc1 = nn.Linear(s_dim, 64)
39 |         self.fc1.weight.data.normal_(0, 0.1)
40 |         self.out = nn.Linear(64, a_dim)
41 |         self.out.weight.data.normal_(0, 0.1)
42 | 
43 |     def forward(self, x):
44 |         x = self.fc1(x)
45 |         x = F.relu(x)
46 |         x = self.out(x)
47 |         x = torch.tanh(x)
48 |         return x
49 | 
50 | 
51 | class CNet2(nn.Module):
52 |     def __init__(self, s_dim, a_dim):
53 |         super(CNet2,self).__init__()
54 |         self.fcs = nn.Linear(s_dim, 64)
55 |         self.fcs.weight.data.normal_(0, 0.1)
56 |         self.fca = nn.Linear(a_dim, 64)
57 |         self.fca.weight.data.normal_(0, 0.1)
58 |         self.fcsa = nn.Linear(64, 64)
59 |         self.fcsa.weight.data.normal_(0, 0.1)
60 |         self.out = nn.Linear(64, 1)
61 |         self.out.weight.data.normal_(0, 0.1)
62 | 
63 |     def forward(self, s, a):
64 |         x = self.fcs(s)
65 |         y = self.fca(a)
66 |         xy = F.relu(x + y)
67 |         xy = F.relu(self.fcsa(xy))
68 |         actions_value = self.out(xy)
69 |         return actions_value
70 | 


--------------------------------------------------------------------------------
/market/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yanchang-liang/agent-based-modeling-in-electricity-market-using-DDPG-algorithm/e337500606416f92a5ac91fe1f3635b749d4ae21/market/__init__.py


--------------------------------------------------------------------------------
/market/thirty_bus.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from cvxopt import matrix, solvers
 3 | solvers.options['show_progress'] = False
 4 | 
 5 | 
 6 | # 市场出清，考虑网络阻塞
 7 | def market_clearing(a_declare):
 8 | 
 9 |     # 供给曲线的截距和斜率
10 |     a_real = np.array([18.0, 20.0, 25.0, 22.0, 22.0, 16.0])
11 |     b_real = np.array([0.25, 0.20, 0.20, 0.20, 0.20, 0.25])
12 | 
13 |     # 需求曲线的截距和斜率
14 |     c_real = np.array([120.0, 130.0, 120.0, 135.0, 150.0, 95.0, 150.0, 125.0, 100.0, 150.0,
15 |                        90.0, 95.0, 90.0, 90.0, 160.0, 120.0, 150.0, 100.0, 95.0, 125.0]) * -1
16 |     d_real = np.array([5.0, 5.5, 4.5, 5.0, 5.0, 3.0, 5.5, 4.0, 4.5, 5.0,
17 |                        3.5, 3.5, 3.5, 3.5, 6.0, 5.0, 6.0, 4.5, 3.5, 4.5])
18 | 
19 |     # 机组功率上下限
20 |     p_min = np.array([5.0, 5.0, 5.0, 5.0, 5.0, 5.0])
21 |     p_max = np.array([100.0, 80.0, 50.0, 80.0, 50.0, 120.0])
22 | 
23 |     # 负荷需求上下限
24 |     q_min = np.zeros(20)
25 |     q_max = np.array([24.0, 23.63636364, 26.66666667, 27.0, 30.0,
26 |                       31.66666667, 27.27272727, 31.25, 22.22222222, 30.0,
27 |                       25.71428571, 27.14285714, 25.71428571, 25.71428571, 26.66666667,
28 |                       24.0, 25.0, 22.22222222, 27.14285714, 27.77777778])
29 | 
30 |     J_g = ([[-0.6616, -0.3384, -0.2243, -0.3384, -0.1357, -0.3016, -0.4039, -0.1357, 0.1357, -0.1287, -0.1260, -0.0720, 0.0000, -0.1260, -0.1588, 0.0000, -0.0279, -0.0986, -0.0323, -0.0279, -0.0323, -0.0038, -0.0038, -0.0038, 0.0038, 0.0323, -0.1463, -0.0878, -0.1463, -0.1227, -0.2340, -0.1227, -0.3567, 0.0000, -0.3567, -0.6433, -0.4082, -0.5918, -0.4082, -0.1287, -0.5146],
31 |             [0.1775, -0.1775, -0.3028, -0.1775, -0.1613, -0.3585, -0.3263, -0.1613, 0.1613, -0.1288, -0.1285, -0.0734, 0.0000, -0.1285, -0.1539, 0.0000, -0.0273, -0.0965, -0.0301, -0.0273, -0.0301, -0.0025, -0.0025, -0.0025, 0.0025, 0.0301, -0.1466, -0.0879, -0.1466, -0.1213, -0.2345, -0.1213, -0.3558, 0.0000, -0.3558, -0.6442, -0.4082, -0.5918, -0.4082, -0.1288, -0.5154],
32 |             [-0.0238, 0.0238, 0.0406, 0.0238, -0.0200, -0.0444, -0.3723, -0.0200, 0.0200, -0.1132, 0.0824, 0.0471, 0.0000, 0.0824, 0.4367, 1.0000, -0.0763, -0.2699, -0.2171, -0.0763, -0.2171, -0.1063, -0.1063, -0.1063, 0.1063, 0.2171, -0.1212, -0.0727, -0.1212, -0.2399, -0.1939, -0.2399, -0.4338, 0.0000, -0.4338, -0.5662, -0.4082, -0.5918, -0.4082, -0.1132, -0.4530],
33 |             [-0.0069, 0.0069, 0.0117, 0.0069, -0.0058, -0.0128, -0.1076, -0.0058, 0.0058, -0.1036, 0.2492, 0.1424, 0.0000, 0.2492, 0.1262, 0.0000, 0.0078, 0.0277, 0.0907, 0.0078, 0.0907, 0.0619, 0.0619, 0.0619, -0.0619, -0.0907, 0.3402, 0.2041, 0.3402, -0.0264, -0.4557, -0.0264, -0.4821, 0.0000, -0.4821, -0.5179, -0.4082, -0.5918, -0.4082, -0.1036, -0.4143],
34 |             [-0.0147, 0.0147, 0.0250, 0.0147, -0.0123, -0.0273, -0.2293, -0.0123, 0.0123, -0.0982, 0.1413, 0.0807, 0.0000, 0.1413, 0.2689, 0.0000, 0.0751, 0.2659, -0.0721, 0.0751, -0.0721, -0.1086, -0.1086, -0.1086, 0.1086, 0.0721, 0.0258, 0.0155, 0.0258, 0.4496, 0.0413, -0.5504, -0.5091, 0.0000, -0.5091, -0.4909, -0.4082, -0.5918, -0.4082, -0.0982, -0.3927],
35 |             [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, -0.4082, -0.5918, -0.4082, 0.0000, 0.0000]])
36 | 
37 |     J = np.array([[0.1287, 0.1288, 0.1132, 0.1036, 0.0982, 0.0000, -0.1288, -0.1281, -0.1280, -0.1297, 0.7355, -0.1095, -0.1132, -0.1105, -0.1083, -0.1117, -0.1102, -0.1087, -0.1090, -0.1091, -0.1049, -0.0982, -0.0845, -0.0329, 0.0000, 0.0000],
38 |                   [0.0279, 0.0273, 0.0763, -0.0078, -0.0751, 0.0000, -0.0273, -0.0297, -0.0301, -0.0247, -0.0236, 0.0012, -0.0763, 0.4634, 0.1093, -0.0433, -0.0120, 0.0716, 0.0493, 0.0373, 0.0064, 0.0751, 0.0290, 0.0113, 0.0000, 0.0000],
39 |                   [-0.0323, -0.0301, -0.2171, 0.0907, -0.0721, 0.0000, 0.0301, 0.0393, 0.0408, 0.0202, 0.0174, -0.1124, 0.2171, 0.1742, 0.1413, 0.5024, 0.7735, 0.0527, 0.0004, -0.0278, -0.0955, 0.0721, -0.0213, -0.0083, 0.0000, 0.0000]])
40 | 
41 |     J_max = np.array([10, 8, 10, 10, 8, 10])
42 | 
43 |     P = matrix(np.diag(np.append(b_real, d_real)))
44 |     q = matrix(np.append(a_declare, c_real))
45 |     G = matrix(np.vstack((J, -J, np.diag(-np.ones(26)), np.diag(np.ones(26)))))
46 |     h = matrix(np.hstack((J_max, -p_min, -q_min, p_max, q_max)))
47 |     A = matrix(np.hstack((-np.ones(6), np.ones(20)))).T
48 |     b = matrix(0.0)
49 | 
50 |     sv = solvers.qp(P, q, G, h, A, b)
51 | 
52 |     miu1, miu2 = np.zeros((41, 1)), np.zeros((41, 1))
53 |     miu1[9],  miu1[16], miu1[25] = sv['z'][0:3]
54 |     miu2[9],  miu2[16], miu2[25] = sv['z'][3:6]
55 | 
56 |     nodal_price_g = (np.ones((6, 1)) * sv['y'][0] + np.dot(J_g, miu1 - miu2)).squeeze()
57 | 
58 |     mc_amount = np.array(sv['x'][:6]).squeeze()
59 | 
60 |     cost_real = 0.5 * b_real * mc_amount ** 2 + a_real * mc_amount
61 |     cost_declare = mc_amount * np.transpose(nodal_price_g)
62 |     profit = cost_declare - cost_real
63 | 
64 |     return nodal_price_g, profit
65 | 
66 | 
67 | if __name__ == '__main__':
68 | 
69 |     alpha = np.array([21.388, 23.807, 34.317, 27.235, 33.609, 24.848])
70 |     print(market_clearing(alpha))
71 | 


--------------------------------------------------------------------------------
/market/three_bus.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from cvxopt import matrix, solvers
 3 | solvers.options['show_progress'] = False
 4 | 
 5 | 
 6 | # 市场出清，考虑网络阻塞
 7 | def market_clearing(alpha):
 8 | 
 9 |     # 供给曲线的截距和斜率
10 |     a_real = np.array([15.0, 18.0])
11 |     b_real = np.array([0.01, 0.008])
12 | 
13 |     # 需求曲线的截距和斜率
14 |     c_real = np.array([40.0, 40.0]) * -1
15 |     d_real = np.array([0.08, 0.06])
16 | 
17 |     # 机组功率上下限
18 |     p_min = np.array([0.0, 0.0])
19 |     p_max = np.array([500.0, 500.0])
20 | 
21 |     # 负荷需求上下限
22 |     q_min = np.zeros(2)
23 |     q_max = np.array([500.0, 666.666666666667])
24 | 
25 |     J_g = ([[-0.333333333333333, -0.333333333333333, -0.666666666666667],
26 |             [0.333333333333334, -0.666666666666667, -0.333333333333333],
27 |             [0, 0, 0]])
28 | 
29 |     J = np.array([[-0.333333333333333, 0.0, 0.333333333333333, -0.333333333333334],
30 |                   [-0.333333333333333, 0.0, 0.333333333333333, 0.666666666666667],
31 |                   [-0.666666666666667, 0.0, 0.666666666666667, 0.333333333333333]])
32 | 
33 |     J_max = np.array([25.0, 1000.0, 1000.0, 25.0, 1000.0, 1000.0])
34 | 
35 |     P = matrix(np.diag(np.append(b_real, d_real)))
36 |     q = matrix(np.append(alpha, c_real))
37 |     G = matrix(np.vstack((J, -J, np.diag(-np.ones(4)), np.diag(np.ones(4)))))
38 |     h = matrix(np.hstack((J_max, -p_min, -q_min, p_max, q_max)))
39 |     A = matrix(np.hstack((-np.ones(2), np.ones(2)))).T
40 |     b = matrix(0.0)
41 | 
42 |     sv = solvers.qp(P, q, G, h, A, b)
43 | 
44 |     miu1 = sv['z'][0:3]
45 |     miu2 = sv['z'][3:6]
46 | 
47 |     nodal_price = (np.ones((3, 1)) * sv['y'][0] - np.dot(J_g, miu1 - miu2)).squeeze()
48 |     nodal_price_g = np.array([nodal_price[0], nodal_price[2]])
49 |     mc_amount = np.array(sv['x'][:2]).squeeze()
50 |     cost_real = 0.5 * b_real * mc_amount ** 2 + a_real * mc_amount
51 |     cost_declare = mc_amount * np.transpose(nodal_price_g)
52 |     profit = cost_declare - cost_real
53 | 
54 |     return nodal_price_g, profit
55 | 
56 | 
57 | if __name__ == '__main__':
58 | 
59 |     alpha = np.array([20.29, 22.98])
60 |     print(market_clearing(alpha))
61 | 


--------------------------------------------------------------------------------
/results/DDPG_30-bus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yanchang-liang/agent-based-modeling-in-electricity-market-using-DDPG-algorithm/e337500606416f92a5ac91fe1f3635b749d4ae21/results/DDPG_30-bus.png


--------------------------------------------------------------------------------
/results/DDPG_3_bus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yanchang-liang/agent-based-modeling-in-electricity-market-using-DDPG-algorithm/e337500606416f92a5ac91fe1f3635b749d4ae21/results/DDPG_3_bus.png


--------------------------------------------------------------------------------
/results/Q-Learning.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yanchang-liang/agent-based-modeling-in-electricity-market-using-DDPG-algorithm/e337500606416f92a5ac91fe1f3635b749d4ae21/results/Q-Learning.png


--------------------------------------------------------------------------------
/results/VRE.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yanchang-liang/agent-based-modeling-in-electricity-market-using-DDPG-algorithm/e337500606416f92a5ac91fe1f3635b749d4ae21/results/VRE.png


--------------------------------------------------------------------------------
/run/run_DDPG_3-bus.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from market.three_bus import market_clearing
 3 | from algorithm.DDPG import DDPG
 4 | from algorithm.model import ANet2, CNet2
 5 | import matplotlib.pyplot as plt
 6 | 
 7 | n_agents = 2
 8 | n_states = 2
 9 | n_actions = 1
10 | n_steps = 10000
11 | var = 1
12 | 
13 | a_real = np.array([15.0, 18.0])
14 | 
15 | a = np.zeros(n_agents)
16 | s_ = np.random.rand(n_agents)
17 | alpha = np.zeros(n_agents)
18 | strategic_variables = np.zeros((n_steps, n_agents))
19 | 
20 | gencos = []
21 | for _ in range(n_agents):
22 |     gencos.append(DDPG(n_states, n_actions, ANet2, CNet2))
23 | 
24 | for step in range(n_steps):
25 |     s = s_
26 |     for g in range(n_agents):
27 |         a[g] = gencos[g].choose_action(s)
28 |         a[g] = np.clip(a[g] + np.random.randn(1) * var, -1, 1)
29 | 
30 |     alpha = (a + 1) * a_real * 1.5  # strategic variable
31 |     nodal_price, profit = market_clearing(alpha)
32 |     strategic_variables[step] = alpha
33 |     r = profit / 1000
34 | 
35 |     for g in range(n_agents):
36 |         s_ = nodal_price / 30
37 |         gencos[g].store_transition(s, a[g], r[g], s_)
38 | 
39 |     if 1000 <= step < 9000:
40 |         for g in range(n_agents):
41 |             gencos[g].learn()
42 |         if var > 0.03:
43 |             var *= 0.999
44 |     elif step >= 9000:
45 |         var = 0
46 | 
47 |     if (step + 1) % 1000 == 0:
48 |         print('Step:', step + 1, 'a1: %.2f' % alpha[0], 'a2: %.2f' % alpha[1], 'r1: %.3f' % profit[0],
49 |               'r2: %.3f' % profit[1], 'Explore: %.2f' % var)
50 | 
51 | 
52 | C = np.array([[0.36, 0.58, 0.75],
53 |               [0.92, 0.28, 0.29]])
54 | 
55 | plt.plot(strategic_variables[:, 0], lw=0.1, C=C[0], alpha=0.5, label=r"$\alpha_{1t}$")
56 | plt.plot(strategic_variables[:, 1], lw=0.1, C=C[1], alpha=0.5, label=r"$\alpha_{2t}$")
57 | plt.plot([0, 10000], [20.29, 20.29], '--', C=C[0], label=r"$\alpha_{1t}^\ast$")
58 | plt.plot([0, 10000], [22.98, 22.98], '--', C=C[1], label=r"$\alpha_{2t}^\ast$")
59 | plt.xlabel(r"$t$")
60 | plt.ylabel(r"$\alpha_{gt}$ (\$/MHh)")
61 | plt.title("DDPG (3-Bus System)")
62 | plt.legend()
63 | plt.savefig('DDPG_3_bus.png', dpi=600)
64 | plt.show()
65 | 


--------------------------------------------------------------------------------
/run/run_DDPG_30-bus.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from market.thirty_bus import market_clearing
 3 | from algorithm.DDPG import DDPG
 4 | from algorithm.model import ANet1, CNet1
 5 | import matplotlib.pyplot as plt
 6 | 
 7 | n_agents = 6
 8 | n_states = 6
 9 | n_actions = 1
10 | n_steps = 10000
11 | var = 1
12 | 
13 | a_real = np.array([18.0, 20.0, 25.0, 22.0, 22.0, 16.0])
14 | 
15 | a = np.zeros(n_agents)
16 | s_ = np.random.rand(n_agents)
17 | alpha = np.zeros(n_agents)
18 | strategic_variables = np.zeros((n_steps, n_agents))
19 | 
20 | gencos = []
21 | for _ in range(n_agents):
22 |     gencos.append(DDPG(n_states, n_actions, ANet1, CNet1))
23 | 
24 | for step in range(n_steps):
25 |     s = s_
26 |     for g in range(n_agents):
27 |         a[g] = gencos[g].choose_action(s)
28 |         a[g] = np.clip(a[g] + np.random.randn(1) * var, -1, 1)
29 | 
30 |     alpha = (a + 1) * a_real * 1.5  # strategic variable
31 |     nodal_price, profit = market_clearing(alpha)
32 |     strategic_variables[step] = alpha
33 |     r = profit / 1000
34 | 
35 |     for g in range(n_agents):
36 |         s_ = nodal_price / 30
37 |         gencos[g].store_transition(s, a[g], r[g], s_)
38 | 
39 |     if 1000 <= step < 9000:
40 |         for g in range(n_agents):
41 |             gencos[g].learn()
42 |         if var > 0.05:
43 |             var *= 0.9993
44 |     elif step >= 9000:
45 |         var = 0
46 | 
47 |     if (step + 1) % 1000 == 0:
48 |         print('Step:', step + 1, 'a1: %.2f' % alpha[0], 'a2: %.2f' % alpha[1], 'a3: %.2f' % alpha[2],
49 |               'a4: %.2f' % alpha[3], 'a5: %.2f' % alpha[4], 'a6: %.2f' % alpha[5], 'Explore: %.2f' % var)
50 | 
51 | 
52 | C = np.array([[0.90, 0.19, 0.20],
53 |               [0.29, 0.54, 0.75],
54 |               [0.37, 0.72, 0.36],
55 |               [1.00, 0.55, 0.10],
56 |               [0.96, 0.89, 0.47],
57 |               [0.69, 0.40, 0.24]])
58 | 
59 | plt.plot(strategic_variables[:, 0], lw=0.1, C=C[0], alpha=0.5, label=r"$\alpha_{1t}$")
60 | plt.plot(strategic_variables[:, 1], lw=0.1, C=C[1], alpha=0.5, label=r"$\alpha_{2t}$")
61 | plt.plot(strategic_variables[:, 2], lw=0.1, C=C[2], alpha=0.5, label=r"$\alpha_{3t}$")
62 | plt.plot(strategic_variables[:, 3], lw=0.1, C=C[3], alpha=0.5, label=r"$\alpha_{4t}$")
63 | plt.plot(strategic_variables[:, 4], lw=0.1, C=C[4], alpha=0.5, label=r"$\alpha_{5t}$")
64 | plt.plot(strategic_variables[:, 5], lw=0.1, C=C[5], alpha=0.5, label=r"$\alpha_{6t}$")
65 | plt.plot([0, 10000], [21.388, 21.388], '--', C=C[0])
66 | plt.plot([0, 10000], [23.807, 23.807], '--', C=C[1])
67 | plt.plot([0, 10000], [34.317, 34.317], '--', C=C[2])
68 | plt.plot([0, 10000], [27.235, 27.235], '--', C=C[3])
69 | plt.plot([0, 10000], [24.848, 24.848], '--', C=C[5])
70 | plt.xlabel(r"$t$")
71 | plt.ylabel(r"$\alpha_{gt}$ (\$/MHh)")
72 | plt.title("DDPG (IEEE 30-Bus System)")
73 | plt.legend()
74 | plt.savefig('DDPG_30-bus.png', dpi=600)
75 | plt.show()
76 | 


--------------------------------------------------------------------------------
/run/run_Q-Learning_3-bus.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from market.three_bus import market_clearing
 3 | from algorithm.QLearning import QLearningAgents
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | n_agents = 2
 7 | action_space = np.arange(0, 3.1, 0.2)
 8 | n_steps = 10000
 9 | a_real = np.array([15.0, 18.0])
10 | obs = 5
11 | strategic_variables = np.zeros((n_steps, n_agents))
12 | 
13 | multi_agents = QLearningAgents(n_agents, action_space)
14 | 
15 | for step in range(n_steps):
16 |     action = multi_agents.select_action(str(obs))
17 |     alpha = action * a_real
18 |     nodal_price, profit = market_clearing(alpha)
19 | 
20 |     strategic_variables[step] = alpha
21 | 
22 |     mean_price = nodal_price.mean()
23 |     obs_ = mean_price // 10
24 | 
25 |     if step != n_steps - 1:
26 |         next_obs = obs_
27 |     else:
28 |         next_obs = None
29 | 
30 |     multi_agents.learn(str(obs), action, profit, str(next_obs))
31 |     obs = next_obs
32 | 
33 |     if (step + 1) % 1000 == 0:
34 |         print('Step:', step + 1, 'a1: %.2f' % alpha[0], 'a2: %.2f' % alpha[1],
35 |               'r1: %.3f' % profit[0], 'r2: %.3f' % profit[1])
36 | 
37 | 
38 | C = np.array([[0.36, 0.58, 0.75],
39 |               [0.92, 0.28, 0.29]])
40 | 
41 | plt.plot(strategic_variables[:, 0], lw=0.5, C=C[0], alpha=0.5, label=r"$\alpha_{1t}$")
42 | plt.plot(strategic_variables[:, 1], lw=0.5, C=C[1], alpha=0.5, label=r"$\alpha_{2t}$")
43 | plt.plot([0, 10000], [20.29, 20.29], '--', C=C[0], label=r"$\alpha_{1t}^\ast$")
44 | plt.plot([0, 10000], [22.98, 22.98], '--', C=C[1], label=r"$\alpha_{2t}^\ast$")
45 | plt.xlabel(r"$t$")
46 | plt.ylabel(r"$\alpha_{gt}$ (\$/MHh)")
47 | plt.title("Q-Learning (3-Bus System)")
48 | plt.legend()
49 | plt.savefig('Q-Learning.png', dpi=600)
50 | plt.show()
51 | 


--------------------------------------------------------------------------------
/run/run_VRE_3-bus.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from market.three_bus import market_clearing
 3 | from algorithm.VRE import RothErevAgents
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | n_agents = 2
 7 | action_space = np.arange(0, 3.1, 0.2)
 8 | n_steps = 10000
 9 | a_real = np.array([15.0, 18.0])
10 | strategic_variables = np.zeros((n_steps, n_agents))
11 | 
12 | multi_agents = RothErevAgents(n_agents, action_space)
13 | 
14 | for step in range(n_steps):
15 |     action = multi_agents.select_action()
16 |     alpha = action * a_real
17 |     nodal_price, profit = market_clearing(alpha)
18 | 
19 |     strategic_variables[step] = alpha
20 | 
21 |     multi_agents.learn(profit)
22 | 
23 |     if (step + 1) % 1000 == 0:
24 |         print('Step:', step + 1, 'a1: %.2f' % alpha[0], 'a2: %.2f' % alpha[1],
25 |               'r1: %.3f' % profit[0], 'r2: %.3f' % profit[1])
26 | 
27 | 
28 | C = np.array([[0.36, 0.58, 0.75],
29 |               [0.92, 0.28, 0.29]])
30 | 
31 | plt.plot(strategic_variables[:, 0], lw=0.5, C=C[0], alpha=0.5, label=r"$\alpha_{1t}$")
32 | plt.plot(strategic_variables[:, 1], lw=0.5, C=C[1], alpha=0.5, label=r"$\alpha_{2t}$")
33 | plt.plot([0, 10000], [20.29, 20.29], '--', C=C[0], label=r"$\alpha_{1t}^\ast$")
34 | plt.plot([0, 10000], [22.98, 22.98], '--', C=C[1], label=r"$\alpha_{2t}^\ast$")
35 | plt.xlabel(r"$t$")
36 | plt.ylabel(r"$\alpha_{gt}$ (\$/MHh)")
37 | plt.title("VRE (3-Bus System)")
38 | plt.legend()
39 | plt.savefig('VRE.png', dpi=600)
40 | plt.show()
41 | 


--------------------------------------------------------------------------------