├── .gitignore
├── CITATION.cff
├── LICENSE
├── README.md
├── agents
    ├── agent_aggregator_dqn.py
    └── agent_customer.py
├── data
    ├── 15minute_data_austin.csv
    ├── 15minute_data_austin_fixed_consumption.csv
    ├── 15minute_data_austin_processed.csv
    ├── baselines_regr_temp_correction.npy
    ├── baselines_regr_temp_correction_old.npy
    └── outdoor_temperatures_noaa.csv
├── environment
    └── environment.py
├── main.py
├── params.py
└── utils
    ├── consumer_baseline_process.py
    ├── load_demand.py
    ├── pre_process.py
    ├── replay_buffer.py
    └── visualize.py


/.gitignore:
--------------------------------------------------------------------------------
1 | logs/
2 | save_files/
3 | .idea/
4 | __pycache__/
5 | 


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | cff-version: 1.2.0
 2 | title: >-
 3 |   Case study for
 4 |   MARL-iDR-Multi-Agent-Reinforcement-Learning-for-Incentive-based-Residential-Demand-Response
 5 | message: 'If you use this software, please cite it as below.'
 6 | type: software
 7 | authors:
 8 |   - given-names: Jasper
 9 |     family-names: 'van Tilburg '
10 |     affiliation: University of Technology Delft
11 |   - given-names: Luciano Cavalcante
12 |     family-names: Siebert
13 |     email: L.CavalcanteSiebert@tudelft.nl
14 |     affiliation: University of Technology Delft
15 |     orcid: 'https://orcid.org/0000-0002-7531-3154'
16 |   - given-names: Jochen L.
17 |     family-names: Cremer
18 |     email: j.l.cremer@tudelft.nl
19 |     affiliation: University of Technology Delft
20 |     orcid: 'https://orcid.org/0000-0001-9284-5083'
21 | repository-code: >-
22 |   https://github.com/TU-Delft-AI-Energy-Lab/MARL-iDR-Multi-Agent-Reinforcement-Learning-for-Incentive-based-Residential-Demand-Response
23 | abstract: >-
24 |   This repository contains the code for the most recent
25 |   versions of the model for the paper:
26 |   Jasper van Tilburg, Luciano C. Siebert, Jochen L. Cremer,
27 |   "MARL-iDR: Multi-Agent Reinforcement Learning for
28 |   Incentive-based Residential Demand Response" to appear at
29 |   IEEE PowerTech 2023, Belgrade, Serbia
30 | keywords:
31 |   - Reinforcement Learning
32 |   - Energy Community
33 |   - Demand Response
34 | license: MIT
35 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 TU-Delft-AI-Energy-Lab
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Case study for MARL-iDR-Multi-Agent-Reinforcement-Learning-for-Incentive-based-Residential-Demand-Response
 2 | 
 3 | This repository contains code for the paper:
 4 | 
 5 | *Jasper van Tilburg, Luciano C. Siebert, Jochen L. Cremer, "MARL-iDR: Multi-Agent Reinforcement Learning for Incentive-based Residential Demand Response" IEEE PowerTech 2023, Belgrade, Serbia, https://arxiv.org/abs/2304.04086*
 6 | 
 7 | ## Data
 8 | This repository includes only placeholder Excel files in /data which includes the first and last data samples. The full data that was used in the case studies in our paper can be downloaded from “Pecan Street Inc.” [Online]. Available: https://www.pecanstreet.org/
 9 | 
10 | ## License
11 |    
12 | This work is licensed under a
13 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
14 | 


--------------------------------------------------------------------------------
/agents/agent_aggregator_dqn.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import random
  3 | 
  4 | from keras import initializers
  5 | from tensorflow.python.keras import Sequential
  6 | from tensorflow.python.keras.layers import Dense
  7 | from tensorflow.python.keras.models import load_model
  8 | from tensorflow.python.keras.optimizer_v2.adam import Adam
  9 | 
 10 | from environment.environment import sample_action_aggregator
 11 | from params import EPSILON_START, EPSILON_MIN, EPSILON_DECAY, \
 12 |     BUFFER_SIZE, BATCH_SIZE, TRAINING_INTERVAL, REPLACE_TARGET_INTERVAL, \
 13 |     TAU, LEARNING_RATE_DQN, HIDDEN_LAYER_SIZE, AGGREGATOR_ACTION_SIZE, AGGREGATOR_STATE_SIZE, DISCOUNT_RATE_AGGREGATOR
 14 | from utils.replay_buffer import ReplayBuffer
 15 | 
 16 | 
 17 | def construct_network():
 18 |     model = Sequential()
 19 |     model.add(Dense(HIDDEN_LAYER_SIZE, input_shape=(AGGREGATOR_STATE_SIZE,), activation='relu',
 20 |                     kernel_initializer=initializers.RandomNormal(stddev=0.01), bias_initializer=initializers.Zeros()))
 21 |     model.add(Dense(HIDDEN_LAYER_SIZE, activation='relu',
 22 |                     kernel_initializer=initializers.RandomNormal(stddev=0.01), bias_initializer=initializers.Zeros()))
 23 |     model.add(Dense(AGGREGATOR_ACTION_SIZE, activation='linear',
 24 |                     kernel_initializer=initializers.RandomNormal(stddev=0.01), bias_initializer=initializers.Zeros()))
 25 |     model.compile(loss='mse', optimizer=Adam(lr=LEARNING_RATE_DQN))
 26 |     return model
 27 | 
 28 | 
 29 | def predict(state, network):
 30 |     state_input = np.reshape(state, (-1, AGGREGATOR_STATE_SIZE))
 31 |     return network(state_input)
 32 | 
 33 | 
 34 | class AggregatorAgent:
 35 |     """ This AggregatorAgent is a deep Reinforcement Learning agent similar to the CustomerAgent. Currently the
 36 |     Q-learning AggregatorAgent is used. """
 37 | 
 38 |     def __init__(self, env):
 39 |         self.env = env
 40 |         self.epsilon = EPSILON_START
 41 |         self.acc_reward = 0
 42 |         self.last_state = None
 43 |         self.last_action = None
 44 |         self.last_history = None
 45 |         self.q_network = construct_network()
 46 |         self.target_network = construct_network()
 47 |         self.memory = ReplayBuffer(BUFFER_SIZE, BATCH_SIZE)
 48 | 
 49 |     def reset(self):
 50 |         self.last_state = None
 51 |         self.last_action = None
 52 |         self.last_history = None
 53 |         self.acc_reward = 0
 54 |         self.epsilon = max(EPSILON_MIN, self.epsilon * EPSILON_DECAY)
 55 | 
 56 |     def act(self, train=True):
 57 |         observation, reward, done, _ = self.env.last_aggregator()
 58 | 
 59 |         if train:
 60 |             if self.last_action is not None:
 61 |                 self.step(self.last_state, self.last_action, reward, observation, done)
 62 |             action = self.choose_action(observation, self.epsilon)
 63 |         else:
 64 |             action = self.choose_action(observation)
 65 | 
 66 |         self.env.act_aggregator(action)
 67 |         self.last_state = observation
 68 |         self.last_action = action
 69 |         self.acc_reward += reward
 70 | 
 71 |     def choose_action(self, s, eps=0.0):
 72 |         if random.uniform(0, 1) < eps:
 73 |             return sample_action_aggregator()
 74 |         else:
 75 |             actions = predict(s, self.q_network)
 76 |             action = np.argmax(actions)
 77 |             return action
 78 | 
 79 |     def step(self, state, action, reward, next_state, done, name=None):
 80 |         self.memory.add(state, action, reward, next_state, done)
 81 | 
 82 |         # Train network
 83 |         if len(self.memory) >= BATCH_SIZE and self.env.curr_step % TRAINING_INTERVAL == 0:
 84 |             sampled_experiences = self.memory.sample()
 85 |             self.train(sampled_experiences)
 86 | 
 87 |         # Replace target network
 88 |         if self.env.episode % REPLACE_TARGET_INTERVAL == 0:
 89 |             self.target_network.set_weights(self.q_network.get_weights())
 90 | 
 91 |     def train(self, experiences):
 92 |         states, actions, rewards, next_states, dones = experiences
 93 |         outputs = predict(next_states, self.target_network)
 94 |         next_actions = np.max(outputs, axis=1)
 95 |         target_values = rewards + (DISCOUNT_RATE_AGGREGATOR * next_actions * (1 - dones))
 96 |         targets = predict(states, self.q_network).numpy()
 97 |         targets[np.arange(len(states)), actions] = target_values
 98 |         self.last_history = self.q_network.fit(np.array(states), np.array(targets), verbose=False)
 99 | 
100 |     def update_network(self):
101 |         model_weights = self.q_network.get_weights()
102 |         target_model_weights = self.target_network.get_weights()
103 |         for i in range(len(model_weights)):
104 |             target_model_weights[i] = TAU * model_weights[i] + (1 - TAU) * target_model_weights[i]
105 |         self.target_network.set_weights(target_model_weights)
106 | 
107 |     def save(self, path):
108 |         self.q_network.save(path + '/Q_network_aggregator.h5')
109 |         print("Successfully saved network.")
110 | 
111 |     def load(self, path):
112 |         self.q_network = load_model(path + '/Q_network_aggregator.h5')
113 | 


--------------------------------------------------------------------------------
/agents/agent_customer.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import random
  3 | 
  4 | from keras import initializers
  5 | from tensorflow.python.keras import Sequential
  6 | from tensorflow.python.keras.layers import Dense
  7 | from tensorflow.python.keras.models import load_model
  8 | from tensorflow.python.keras.optimizer_v2.adam import Adam
  9 | 
 10 | from environment.environment import sample_action_customer
 11 | from params import EPSILON_START, DISCOUNT_RATE, EPSILON_MIN, EPSILON_DECAY, CUSTOMER_ACTION_SIZE, BUFFER_SIZE, \
 12 |     BATCH_SIZE, CUSTOMER_STATE_SIZE, TRAINING_INTERVAL, \
 13 |     REPLACE_TARGET_INTERVAL, TAU, LEARNING_RATE_DQN, HIDDEN_LAYER_SIZE, POWER_RATES
 14 | from utils.replay_buffer import ReplayBuffer
 15 | 
 16 | 
 17 | def construct_network():
 18 |     """ Construct the Deep-Q network. It consists of an input layer with the size of the state variables, 2 hidden
 19 |     layers and an output layer with the size of the possible actions. """
 20 |     model = Sequential()
 21 |     model.add(Dense(HIDDEN_LAYER_SIZE, input_shape=(CUSTOMER_STATE_SIZE,), activation='relu',
 22 |                     kernel_initializer=initializers.RandomNormal(stddev=0.01), bias_initializer=initializers.Zeros()))
 23 |     model.add(Dense(HIDDEN_LAYER_SIZE, activation='relu',
 24 |                     kernel_initializer=initializers.RandomNormal(stddev=0.01), bias_initializer=initializers.Zeros()))
 25 |     model.add(Dense(CUSTOMER_ACTION_SIZE, activation='linear',
 26 |                     kernel_initializer=initializers.RandomNormal(stddev=0.01), bias_initializer=initializers.Zeros()))
 27 |     model.compile(loss='mse', optimizer=Adam(lr=LEARNING_RATE_DQN))
 28 |     return model
 29 | 
 30 | 
 31 | def predict(state, network):
 32 |     """ Predict the Q-values for a given state and network. """
 33 |     state_input = np.reshape(state, (-1, CUSTOMER_STATE_SIZE))
 34 |     return network(state_input)
 35 | 
 36 | 
 37 | class CustomerAgent:
 38 |     """ This CustomerAgent is a Reinforcement Learning agent using a Deep-Q network to predict the Q-values of
 39 |     state-action pairs. In the act function the agent calls for the previous reward and the next observation.
 40 |     It updates its network based on the previous reward, observation and action. Then it decides upon the next action.
 41 |     """
 42 | 
 43 |     def __init__(self, agent_id, data_id, env, dummy=False, q_network=None, target_network=None):
 44 |         self.agent_id = agent_id
 45 |         self.data_id = data_id
 46 |         self.env = env
 47 |         self.epsilon = EPSILON_START
 48 |         self.dummy = dummy
 49 |         self.acc_reward = 0
 50 |         self.last_state = None
 51 |         self.last_action = None
 52 |         self.last_history = None
 53 |         self.memory = ReplayBuffer(BUFFER_SIZE, BATCH_SIZE)
 54 |         self.visited = {}
 55 |         self.q_network = q_network
 56 |         self.target_network = target_network
 57 |         if q_network is None:
 58 |             self.q_network = construct_network()
 59 |         if target_network is None:
 60 |             self.target_network = construct_network()
 61 | 
 62 |     def reset(self):
 63 |         """ Reset the agent before each episode. """
 64 |         self.last_state = None
 65 |         self.last_action = None
 66 |         self.last_history = None
 67 |         self.acc_reward = 0
 68 |         self.epsilon = max(EPSILON_MIN, self.epsilon * EPSILON_DECAY)
 69 | 
 70 |     def act(self, train=True):
 71 |         """ Select an action based on the observation. If the agent is in training and is not a dummy the agent's
 72 |         Q-network is also updated in the step function. """
 73 |         observation, reward, done, _ = self.env.last_customer(self.agent_id)
 74 | 
 75 |         if train and not self.dummy:
 76 |             if self.last_action is not None:
 77 |                 self.step(self.last_state, self.last_action, reward, observation, done)
 78 |             action = self.choose_action(observation, self.epsilon)
 79 |         else:
 80 |             action = self.choose_action(observation)
 81 | 
 82 |         self.env.act(self.agent_id, action)
 83 |         self.last_state = observation
 84 |         self.last_action = action
 85 |         self.acc_reward += reward
 86 | 
 87 |     def choose_action(self, s, eps=0.0):
 88 |         """ Choose an action based on the given state. If the agent is a dummy it will simply consume an amount equal
 89 |         to its demand (power rate 1.0). Otherwise an action is selected based on epsilon-greedy. """
 90 |         if self.dummy:
 91 |             return POWER_RATES.index(1.0)
 92 |         elif random.uniform(0, 1) < eps:
 93 |             return sample_action_customer()
 94 |         else:
 95 |             actions = predict(s, self.q_network)
 96 |             action = np.argmax(actions)
 97 |             return action
 98 | 
 99 |     def step(self, state, action, reward, next_state, done):
100 |         """ Every iteration the agent takes a training step. The agent adds the SARS tuple to the replay buffer. The
101 |         replay buffer is then used for sampling a batch for training. """
102 |         self.memory.add(state, action, reward, next_state, done)
103 | 
104 |         # Train network on a certain interval and if the replay buffer has enough samples
105 |         if len(self.memory) >= BATCH_SIZE and self.env.curr_step % TRAINING_INTERVAL == 0:
106 |             sampled_experiences = self.memory.sample()
107 |             self.train(sampled_experiences)
108 | 
109 |         # Replace target network on a certain interval
110 |         if self.env.episode % REPLACE_TARGET_INTERVAL == 0:
111 |             self.target_network.set_weights(self.q_network.get_weights())
112 | 
113 |     def train(self, experiences):
114 |         """ Train the Q-network. The target values are based on a target network to stabilize training. """
115 |         states, actions, rewards, next_states, dones = experiences
116 |         outputs = predict(next_states, self.target_network)
117 |         next_actions = np.max(outputs, axis=1)
118 |         target_values = rewards + (DISCOUNT_RATE * next_actions * (1 - dones))
119 |         targets = predict(states, self.q_network).numpy()
120 |         targets[np.arange(len(states)), actions] = target_values
121 |         self.last_history = self.q_network.fit(np.array(states), np.array(targets), verbose=False)
122 | 
123 |     def update_network(self):
124 |         """ Do a soft update on the target network. A soft update can be done every iteration. This is slightly
125 |         different from a hard update on an interval. This is currently not used. """
126 |         model_weights = self.q_network.get_weights()
127 |         target_model_weights = self.target_network.get_weights()
128 |         for i in range(len(model_weights)):
129 |             target_model_weights[i] = TAU * model_weights[i] + (1 - TAU) * target_model_weights[i]
130 |         self.target_network.set_weights(target_model_weights)
131 | 
132 |     def save(self, path):
133 |         """ Save the network. """
134 |         self.q_network.save(path + '/Q_network_' + str(self.data_id) + '.h5')
135 |         np.save(path + '/dissatisfaction_coefficients_' + str(self.data_id) + '.npy', self.env.dissatisfaction_coefficients[self.agent_id])
136 |         print("Successfully saved network for agent " + str(self.data_id))
137 | 
138 |     def load(self, path):
139 |         """ Load a network give its path. """
140 |         self.q_network = load_model(path + '/Q_network_' + str(self.data_id) + '.h5')
141 |         self.env.dissatisfaction_coefficients[self.agent_id] = np.load(path + '/dissatisfaction_coefficients_' + str(self.data_id) + '.npy')
142 | 


--------------------------------------------------------------------------------
/data/15minute_data_austin.csv:
--------------------------------------------------------------------------------
1 | dataid,local_15min,air1,air2,air3,airwindowunit1,aquarium1,bathroom1,bathroom2,bedroom1,bedroom2,bedroom3,bedroom4,bedroom5,battery1,car1,car2,circpump1,clotheswasher1,clotheswasher_dryg1,diningroom1,diningroom2,dishwasher1,disposal1,drye1,dryg1,freezer1,furnace1,furnace2,garage1,garage2,grid,heater1,heater2,heater3,housefan1,icemaker1,jacuzzi1,kitchen1,kitchen2,kitchenapp1,kitchenapp2,lights_plugs1,lights_plugs2,lights_plugs3,lights_plugs4,lights_plugs5,lights_plugs6,livingroom1,livingroom2,microwave1,office1,outsidelights_plugs1,outsidelights_plugs2,oven1,oven2,pool1,pool2,poollight1,poolpump1,pump1,range1,refrigerator1,refrigerator2,security1,sewerpump1,shed1,solar,solar2,sprinkler1,sumppump1,utilityroom1,venthood1,waterheater1,waterheater2,wellpump1,winecooler1,leg1v,leg2v
2 | 661,21/11/2018 15:15,0,,,,,,,,,,,,,0.001,,,,,,,,,0,,,,,,,0.124,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.276,,,,,,,,,,123.915,124.277
3 | 9278,31/12/2018 23:45,0.039,,,,,,,,,,,,,,,,0.001,,,,-0.001,,0.001,,,0.014,,,,1.02,,,,,,,,,0.012,0.003,0.001,,,,,,,,0.003,,,,-0.001,,,,,,,,0.438,0.014,,,,-0.003,,,,,,,,,,122.939,123.402
4 | 


--------------------------------------------------------------------------------
/data/15minute_data_austin_fixed_consumption.csv:
--------------------------------------------------------------------------------
1 | time,dataid,air,car,clotheswasher,dishwasher,dry,non-shiftable,total
2 | 01/01/2018 00:00,661,0,0,0,0,0,0.434,0.434
3 | 31/12/2018 23:45,9922,0,0,0,0,0,0.684,0.684
4 | 


--------------------------------------------------------------------------------
/data/15minute_data_austin_processed.csv:
--------------------------------------------------------------------------------
1 | time,dataid,air,car,clotheswasher,dishwasher,dry,non-shiftable,total
2 | 01/01/2018 00:00,661,0,0,0,0.001,0,0.446,0.447
3 | 31/12/2018 23:45,9922,-0.003,0,0.078,0.001,0.001,0.843,0.92
4 | 


--------------------------------------------------------------------------------
/data/baselines_regr_temp_correction.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TU-Delft-AI-Energy-Lab/MARL-iDR-Multi-Agent-Reinforcement-Learning-for-Incentive-based-Residential-Demand-Response/1d972a0a8f7f32824a9d72945524ce77ed889044/data/baselines_regr_temp_correction.npy


--------------------------------------------------------------------------------
/data/baselines_regr_temp_correction_old.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TU-Delft-AI-Energy-Lab/MARL-iDR-Multi-Agent-Reinforcement-Learning-for-Incentive-based-Residential-Demand-Response/1d972a0a8f7f32824a9d72945524ce77ed889044/data/baselines_regr_temp_correction_old.npy


--------------------------------------------------------------------------------
/data/outdoor_temperatures_noaa.csv:
--------------------------------------------------------------------------------
1 | WBANNO UTC_DATE UTC_TIME LST_DATE LST_TIME CRX_VN LONGITUDE LATITUDE T_CALC T_HR_AVG T_MAX T_MIN P_CALC SOLARAD SOLARAD_FLAG SOLARAD_MAX SOLARAD_MAX_FLAG SOLARAD_MIN SOLARAD_MIN_FLAG SUR_TEMP_TYPE SUR_TEMP SUR_TEMP_FLAG SUR_TEMP_MAX SUR_TEMP_MAX_FLAG SUR_TEMP_MIN SUR_TEMP_MIN_FLAG RH_HR_AVG RH_HR_AVG_FLAG SOIL_MOISTURE_5 SOIL_MOISTURE_10 SOIL_MOISTURE_20 SOIL_MOISTURE_50 SOIL_MOISTURE_100 SOIL_TEMP_5 SOIL_TEMP_10 SOIL_TEMP_20 SOIL_TEMP_50 SOIL_TEMP_100
2 | 23907 20180101 0100 20171231 1900  2.423  -98.08   30.62    -4.1    -3.8    -3.5    -4.1     0.0      0 0      0 0      0 0 C    -0.9 0    -0.7 0    -1.3 0    92 0   0.372   0.422 -99.000 -99.000 -99.000     7.0     9.0 -9999.0 -9999.0 -9999.0
3 | 23907 20190101 0000 20181231 1800  2.623  -98.08   30.62    12.9    13.5    14.8    12.7     0.0     29 0     96 0      0 0 C    11.5 0    12.9 0     9.8 0    47 0   0.509   0.502 -99.000 -99.000 -99.000    10.3    10.1 -9999.0 -9999.0 -9999.0
4 | 


--------------------------------------------------------------------------------
/environment/environment.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from utils.load_demand import load_requests, load_day, get_device_demands, load_baselines, get_peak_demand
  4 | from params import RHO, CUSTOMER_ACTION_SIZE, TRAINING_START_DAY, TRAINING_END_DAY, INCENTIVE_RATES, \
  5 |     AGGREGATOR_ACTION_SIZE, TIME_STEPS_TRAIN, DEVICE_CONSUMPTION, DISSATISFACTION_COEFFICIENTS, \
  6 |     DEVICE_NON_INTERRUPTIBLE, RHO_AGGREGATOR, DEVICES, \
  7 |     CRITICAL_THRESHOLD_RELATIVE, MAX_TOTAL_DEMAND, MAX_INCENTIVE, RHO_COMMON, POWER_RATES, BASELINE_START_DAY, \
  8 |     NUM_AGENTS, DISSATISFACTION_COEFFICIENTS_STD, DISSATISFACTION_COEFFICIENTS_MIN, TESTING_START_DAY, TESTING_END_DAY
  9 | 
 10 | 
 11 | def knapsack(values, weights, capacity):
 12 |     """ Schedule the devices based on their dissatisfaction (the values) and their consumption (the weights). The
 13 |     customer selected a fraction of demand as consumption (capacity). This function brute-forces the optimal knapsack
 14 |     solution. """
 15 |     non_zero_values = np.nonzero(values)[0]
 16 |     n = len(non_zero_values)
 17 |     max_value = 0
 18 |     max_weight = capacity
 19 |     max_actions = np.zeros(len(values), dtype=bool)
 20 | 
 21 |     if n == 0:
 22 |         return 0, 0, max_actions
 23 | 
 24 |     for i in range(2 ** n):
 25 |         actions = np.array([int(x) for x in list(f'{i:b}'.zfill(n))], dtype=bool)
 26 |         action_indices = non_zero_values[actions]
 27 |         value = values[action_indices].sum()
 28 |         weight = weights[action_indices].sum()
 29 |         if (weight <= capacity and value > max_value) or (weight <= max_weight and value == max_value):
 30 |             max_value = value
 31 |             max_weight = weight
 32 |             max_actions = np.zeros(len(values), dtype=bool)
 33 |             max_actions[action_indices] = True
 34 | 
 35 |     return max_value, max_weight, max_actions
 36 | 
 37 | 
 38 | def knapsack_ensemble(values, weights, capacity, dissatisfaction_coefficients):
 39 |     max_values = []
 40 |     max_weights = []
 41 |     max_actionss = []
 42 |     rates = []
 43 |     ac_index = DEVICES.index('air')
 44 |     ac_consumption = weights[ac_index]
 45 |     ac_max_value = values[ac_index]
 46 |     for rate in POWER_RATES[1:]:
 47 |         ac_weight = ac_consumption * rate
 48 |         ac_reduction = ac_consumption - ac_weight
 49 |         ac_value = dissatisfaction_coefficients[ac_index] * np.square(ac_reduction)
 50 |         ac_value = ac_max_value - ac_value
 51 |         weights[ac_index] = ac_weight
 52 |         values[ac_index] = ac_value
 53 |         max_value, max_weight, max_actions = knapsack(values, weights, capacity)
 54 |         max_values.append(max_value)
 55 |         max_weights.append(max_weight)
 56 |         max_actionss.append(max_actions)
 57 |         rates.append(rate)
 58 | 
 59 |     sorted_values = sorted(zip(max_values, max_weights, max_actionss, rates), key=lambda elem: (-elem[0], elem[1]))
 60 |     return sorted_values[0]
 61 | 
 62 | 
 63 | def sample_action_customer():
 64 |     """ Sample a random action for the customer. """
 65 |     return np.random.randint(0, CUSTOMER_ACTION_SIZE)
 66 | 
 67 | 
 68 | def sample_action_aggregator():
 69 |     """ Sample a random action for the aggregator. """
 70 |     return np.random.randint(0, AGGREGATOR_ACTION_SIZE)
 71 | 
 72 | 
 73 | class Environment:
 74 |     """ The AggregatorAgent and the CustomerAgents interact with the Environment. The Environment controls input of
 75 |     device requests and demands. It schedules devices with the knapsack algorithm for the CustomerAgents. Finally,
 76 |     it calculates the rewards."""
 77 | 
 78 |     def __init__(self, data_ids, heterogeneous=False, baseline=False):
 79 |         self.data_ids = data_ids
 80 |         self.episode = 0
 81 |         self.df = load_requests()
 82 |         self.heterogeneous = heterogeneous
 83 |         self.baseline = baseline
 84 |         self.dissatisfaction_coefficients = np.full((len(data_ids), len(DEVICES)), DISSATISFACTION_COEFFICIENTS)
 85 |         if heterogeneous:
 86 |             dissatisfaction_coefficients = np.random.normal(
 87 |                 loc=DISSATISFACTION_COEFFICIENTS, scale=DISSATISFACTION_COEFFICIENTS_STD, size=(NUM_AGENTS, len(DEVICES)))
 88 |             self.dissatisfaction_coefficients = np.maximum(DISSATISFACTION_COEFFICIENTS_MIN, dissatisfaction_coefficients)
 89 | 
 90 |     def reset(self, day=None, max_steps=TIME_STEPS_TRAIN):
 91 |         self.day = day
 92 |         if day is None:
 93 |             # self.day = np.random.randint(TRAINING_START_DAY, TRAINING_END_DAY)
 94 |             day_range = [(TRAINING_START_DAY, TESTING_START_DAY), (TESTING_END_DAY, TRAINING_END_DAY)][np.random.randint(0, 2)]
 95 |             self.day = np.random.randint(*day_range)
 96 |         self.curr_step = 0
 97 |         self.episode += 1
 98 |         self.done = False
 99 |         self.max_steps = max_steps
100 | 
101 |         # Customer agent params
102 |         self.demand = np.zeros((max_steps, len(self.data_ids)))
103 |         self.non_shiftable_load = np.zeros((max_steps, len(self.data_ids)))
104 |         self.requests_new = np.zeros((max_steps, len(self.data_ids), len(DEVICES)), dtype=bool)         # Incoming requests from PecanStreet
105 |         self.request_loads = np.zeros((max_steps, len(self.data_ids), len(DEVICES)))                        # The load in kW for open requests
106 |         self.requests_started = np.zeros((max_steps, len(self.data_ids), len(DEVICES)), dtype=bool)     # Request for Non-interruptible devices that have been started but are still running
107 |         self.requests_open = np.zeros((max_steps, len(self.data_ids), len(DEVICES)))                    # How many time steps are still unfulfilled for a request for a device (the length of the request)
108 |         self.requests_delayed = np.zeros((max_steps, len(self.data_ids), len(DEVICES)))                 # How many time steps a device has been delayed
109 | 
110 |         self.possible_actions = np.zeros((max_steps, len(self.data_ids)))      # The devices scheduled by knapsack in each time step
111 |         self.power_rates = np.zeros((max_steps, len(self.data_ids)))      # The devices scheduled by knapsack in each time step
112 |         self.request_actions = np.zeros((max_steps, len(self.data_ids), len(DEVICES)), dtype=bool)      # The devices scheduled by knapsack in each time step
113 |         self.ac_rates = np.zeros((max_steps, len(self.data_ids)))
114 |         self.consumptions = np.zeros((max_steps, len(self.data_ids)))                                       # Total consumtpion by each agent in each time step                                     # Total consumtpion by each agent in each time step
115 |         self.incentive_received = np.zeros((max_steps, len(self.data_ids)))
116 |         self.rewards_customers = np.zeros((max_steps, len(self.data_ids)))
117 |         self.dissatisfaction = np.zeros((max_steps, len(self.data_ids), len(DEVICES)))
118 |         self.customer_reward_matrix = np.zeros((max_steps, len(INCENTIVE_RATES), len(self.data_ids), len(POWER_RATES)))
119 |         self.aggregator_reward_matrix = np.zeros((max_steps, len(INCENTIVE_RATES)))
120 | 
121 |         # Aggregator agent params
122 |         self.incentives = np.zeros(max_steps)
123 |         self.rewards_aggregator = np.zeros(max_steps)
124 | 
125 |         # Demand data params
126 |         self.day_df = load_day(self.df, self.day, max_steps)
127 |         self.baselines = load_baselines()
128 |         self.set_demands()
129 |         # self.capacity_threshold = CRITICAL_THRESHOLD
130 |         self.capacity_threshold = get_peak_demand(self.day_df) * CRITICAL_THRESHOLD_RELATIVE
131 | 
132 |     def last_customer(self, agent_id):
133 |         """ The CustomerAgent can call this method to receive the previous reward and the next observation.
134 |         The observation consists of the state of the household appliances and the offered incentive. The state of the
135 |         household appliances is defined as an integer, 0 for no request or requests for non-interruptible devices that
136 |         have been started, 1 for a new request and > 1 if the request has been delayed. """
137 |         incentive = self.incentives[self.curr_step]
138 |         baseline = self.baselines[agent_id][self.day - BASELINE_START_DAY][self.curr_step]
139 |         new_requests = self.requests_new[self.curr_step][agent_id]
140 |         started_requests = self.requests_started[self.curr_step][agent_id]
141 |         open_requests = self.requests_open[self.curr_step][agent_id] + new_requests
142 |         delays = self.requests_delayed[self.curr_step][agent_id]
143 |         new_delays = new_requests + delays
144 |         open_delays = new_delays * np.invert(started_requests)
145 |         ac_consumption = self.request_loads[self.curr_step][agent_id][0]
146 |         non_shiftable = self.non_shiftable_load[self.curr_step][agent_id]
147 |         non_interruptible = (np.logical_and(open_requests, started_requests) * DEVICE_CONSUMPTION).sum()
148 |         observation = np.array(np.concatenate(([ac_consumption], open_delays[1:], [non_shiftable + non_interruptible, incentive, baseline])))
149 |         reward = self.rewards_customers[self.curr_step][agent_id]
150 |         done = self.done
151 |         return observation, reward, done, None
152 | 
153 |     def last_aggregator(self):
154 |         """ The AggregatorAgent can call this method to receive the previous reward and the next observation.
155 |         The observation only contains the total demand of the customer together. """
156 |         total_demand = self.get_total_demand(self.curr_step)
157 |         # total_demand = self.baselines[:, self.day - TRAINING_START_DAY, self.curr_step].sum(axis=0)
158 |         threshold = self.capacity_threshold
159 |         reduction = self.get_total_reduction(self.curr_step-1 if self.curr_step > 0 else 0)
160 |         observation = np.array([total_demand, threshold, reduction])
161 |         reward = self.rewards_aggregator[self.curr_step]
162 |         done = self.done
163 |         return observation, reward, done, None
164 | 
165 |     def act(self, agent_id, action):
166 |         """ Apply the action selected by a CustomerAgent.
167 |         The agent selects a power rate and sends it to the environment. Based on this power rate this method calls the
168 |         knapsack algorithm and determines the devices scheduled for this time step. Afterwards this method calculates
169 |         the new state of the appliances taking device-specific constraints into account. """
170 |         # Get power rate and incentive rate
171 |         incentive_rate = self.incentives[self.curr_step]
172 |         baseline_demand = self.baselines[agent_id][self.day - BASELINE_START_DAY][self.curr_step]
173 |         car_index = DEVICES.index('car')
174 |         ac_index = DEVICES.index('air')
175 |         power_rate = POWER_RATES[action]
176 |         if self.baseline:
177 |             power_rate = POWER_RATES[np.argmax(self.customer_reward_matrix[self.curr_step][int(incentive_rate)][agent_id])]
178 | 
179 |         # Get requests and demands
180 |         started_requests = self.requests_started[self.curr_step][agent_id]
181 |         new_requests = self.requests_new[self.curr_step][agent_id]
182 |         open_requests = self.requests_open[self.curr_step][agent_id] + new_requests
183 |         delayed_requests = self.requests_delayed[self.curr_step][agent_id]
184 |         selectable_requests = np.logical_and(open_requests, np.invert(started_requests))
185 |         non_interruptible_requests = np.logical_and(open_requests, started_requests)
186 |         non_interruptible_demand = (non_interruptible_requests * DEVICE_CONSUMPTION).sum()
187 |         non_shiftable_demand = self.non_shiftable_load[self.curr_step][agent_id]
188 |         device_consumptions = selectable_requests * DEVICE_CONSUMPTION
189 |         device_consumptions[ac_index] = self.request_loads[self.curr_step][agent_id][ac_index]
190 | 
191 |         # Brute-force knapsack
192 |         dissatisfaction_values = self.dissatisfaction_coefficients[agent_id] * np.square(delayed_requests + 1)
193 |         dissatisfaction_values[ac_index] = self.dissatisfaction_coefficients[agent_id][ac_index] * np.square(device_consumptions[ac_index])
194 |         device_values = dissatisfaction_values * selectable_requests
195 |         shiftable_demand = (selectable_requests * device_consumptions).sum()
196 |         capacity = power_rate * shiftable_demand
197 |         value, weight, actions, ac_rate = knapsack_ensemble(device_values, device_consumptions, capacity, self.dissatisfaction_coefficients[agent_id])
198 |         delayed_devices = np.invert(actions) * selectable_requests
199 |         dissatisfaction = device_values.sum() - value
200 | 
201 |         # Calculate received incentive
202 |         consumption = weight + non_interruptible_demand + non_shiftable_demand
203 |         energy_diff = baseline_demand - consumption
204 |         # energy_diff = self.demand[self.curr_step, agent_id] - consumption
205 |         # energy_diff = shiftable_demand - weight
206 |         incentive_received = incentive_rate * max(0, energy_diff)
207 | 
208 |         # Calculate reward
209 |         incentive_term = RHO * incentive_received
210 |         dissatisfaction_term = (1 - RHO) * -dissatisfaction
211 |         reward = incentive_term + dissatisfaction_term
212 |         # reward = max(MINIMUM_CUSTOMER_REWARD, incentive_term + dissatisfaction_term)
213 | 
214 |         # Save selected devices, energy consumption and received incentive
215 |         fulfilled_requests = np.logical_or(actions, non_interruptible_requests)
216 |         self.possible_actions[self.curr_step][agent_id] = np.count_nonzero(selectable_requests)
217 |         self.request_actions[self.curr_step][agent_id] = fulfilled_requests
218 |         self.consumptions[self.curr_step][agent_id] = consumption
219 |         self.incentive_received[self.curr_step][agent_id] = incentive_received
220 |         self.power_rates[self.curr_step][agent_id] = power_rate if selectable_requests.any() else 1
221 |         self.ac_rates[self.curr_step][agent_id] = ac_rate * actions[ac_index] if selectable_requests[ac_index] else 1
222 |         self.dissatisfaction[self.curr_step][agent_id] = device_values * delayed_devices
223 |         self.dissatisfaction[self.curr_step][agent_id][ac_index] = self.dissatisfaction_coefficients[agent_id][ac_index] * np.square((1 - ac_rate) * device_consumptions[ac_index])
224 | 
225 |         # Update parameters for use in the next time step
226 |         if self.curr_step < self.max_steps - 1:
227 |             started_non_interruptibles = actions * DEVICE_NON_INTERRUPTIBLE
228 |             open_requests_next = open_requests - fulfilled_requests
229 | 
230 |             self.rewards_customers[self.curr_step + 1][agent_id] = reward
231 |             self.requests_open[self.curr_step + 1][agent_id] = open_requests_next
232 |             self.requests_started[self.curr_step + 1][agent_id] = started_non_interruptibles + non_interruptible_requests
233 |             self.requests_delayed[self.curr_step + 1][agent_id] = delayed_requests + delayed_devices
234 |             self.requests_delayed[self.curr_step + 1][agent_id][started_non_interruptibles] = 0
235 | 
236 |             # If all requested time slots for the EV are fulfilled reset the delay
237 |             if open_requests_next[car_index] == 0:
238 |                 self.requests_delayed[self.curr_step + 1][agent_id][car_index] = 0
239 | 
240 |             # AC has no delay
241 |             self.requests_delayed[self.curr_step + 1][agent_id][ac_index] = 0
242 |             self.requests_open[self.curr_step + 1][agent_id][ac_index] = 0
243 | 
244 |     def act_aggregator(self, action):
245 |         """ Apply the action selected by the AggregatorAgent.
246 |         The agent selects the incentive rate and sends it to the environment. The environment saves the incentive to
247 |         send it to the CustomerAgents later. """
248 |         incentive_rate = INCENTIVE_RATES[action]
249 |         self.incentives[self.curr_step] = incentive_rate
250 |         if self.baseline:
251 |             print('Computing baseline step:', self.curr_step)
252 |             self.compute_best_responses()
253 |             self.incentives[self.curr_step] = np.argmax(self.aggregator_reward_matrix[self.curr_step])
254 | 
255 |     def step(self):
256 |         """ This method is called at the end of a time step.
257 |         If it was not the final time step, the reward for the aggregator is calculated and demands for the next
258 |         time step are retrieved. """
259 |         self.curr_step += 1
260 |         self.done = self.curr_step == self.max_steps
261 |         if not self.done:
262 |             self.reward_aggregator()
263 |             self.set_demands()
264 |             self.set_incentive()
265 | 
266 |     def reward_aggregator(self):
267 |         """ Calculate the reward for the aggregator.
268 |         The reward consists of a consumption term, indicating how much the total consumption exceeds the threshold, and
269 |         an incentive term, indicating how much each agent received on average. The term is normalized instead of taking
270 |         the total, because the number of RL agents may differ. """
271 |         consumption_term = max(0, self.get_total_consumption(self.curr_step - 1) - self.capacity_threshold)
272 |         incentive_term = self.incentive_received[self.curr_step - 1].sum() / 100
273 |         reward = - RHO_AGGREGATOR * consumption_term - (1 - RHO_AGGREGATOR) * incentive_term
274 |         self.rewards_aggregator[self.curr_step] = reward
275 | 
276 |         customer_reward = self.rewards_customers[self.curr_step]
277 |         customer_bonus = RHO_COMMON * customer_reward - (1 - RHO_COMMON) * consumption_term
278 |         self.rewards_customers[self.curr_step] = customer_bonus
279 | 
280 |     def set_demands(self):
281 |         """ Retrieve the demands per customer and per device for the current time step from the demands DataFrame.
282 |         If the demand is larger than a certain threshold the device is considered requested by the user. The actual
283 |         load in kW that is requested for the device is fixed, except for the total non-shiftable devices. """
284 |         df = get_device_demands(self.day_df, self.data_ids, self.day, self.curr_step)
285 |         non_shiftable = df['non-shiftable'].to_numpy()
286 |         total = df['total'].to_numpy()
287 |         requests = df[DEVICES].to_numpy()
288 |         request_new = np.greater(requests, 0)
289 |         self.non_shiftable_load[self.curr_step] = non_shiftable
290 |         self.requests_new[self.curr_step] = request_new
291 |         self.request_loads[self.curr_step] = requests
292 |         self.demand[self.curr_step] = total
293 | 
294 |     def get_total_demand(self, step=None):
295 |         """ Sum the demands of the customer agents. """
296 |         if step is None:
297 |             return self.demand.sum(axis=1)
298 |         return self.demand[step].sum()
299 | 
300 |     def get_total_consumption(self, step=None):
301 |         """ Sum the consumptions of the customer agents. """
302 |         if step is None:
303 |             return self.consumptions.sum(axis=1)
304 |         return self.consumptions[step].sum()
305 | 
306 |     def get_total_reduction(self, step=None):
307 |         if step is None:
308 |             return self.get_total_demand() - self.get_total_consumption()
309 |         return self.get_total_demand(step) - self.get_total_consumption(step)
310 | 
311 |     def set_incentive(self):
312 |         """ A simple heuristic for calculating incentives without the aggregator as an agent.
313 |         The incentive is a linear relation to the demand exceeding the capacity. """
314 |         total_demand = min(MAX_TOTAL_DEMAND, self.get_total_demand(self.curr_step))
315 |         demand_range = MAX_TOTAL_DEMAND - self.capacity_threshold
316 |         demand_overflow = max(0, total_demand - self.capacity_threshold)
317 |         incentive = np.ceil((demand_overflow / demand_range) * MAX_INCENTIVE)
318 |         self.incentives[self.curr_step] = incentive
319 | 
320 |     def set_baseline(self):
321 |         """ Average the pre-computed baseline with the consumption of the last time step for a more accurate result. """
322 |         baseline_demand = self.baselines[:, self.day - BASELINE_START_DAY, self.curr_step]
323 |         new_baseline_demand = (baseline_demand + self.consumptions[self.curr_step - 1]) / 2
324 |         self.baselines[:, self.day - BASELINE_START_DAY, self.curr_step] = new_baseline_demand
325 | 
326 |     def compute_best_responses(self):
327 |         rewards = np.zeros((AGGREGATOR_ACTION_SIZE, len(self.data_ids), len(POWER_RATES)))
328 |         profits = np.zeros((AGGREGATOR_ACTION_SIZE, len(self.data_ids), len(POWER_RATES)))
329 |         consumptions = np.zeros((AGGREGATOR_ACTION_SIZE, len(self.data_ids), len(POWER_RATES)))
330 |         best_profits = np.zeros((AGGREGATOR_ACTION_SIZE, len(self.data_ids)))
331 |         best_consumptions = np.zeros((AGGREGATOR_ACTION_SIZE, len(self.data_ids)))
332 |         for i, incentive_rate in enumerate(INCENTIVE_RATES):
333 |             for agent_id in range(len(self.data_ids)):
334 |                 for j, power_rate in enumerate(POWER_RATES):
335 |                     baseline_demand = self.baselines[agent_id][self.day - BASELINE_START_DAY][self.curr_step]
336 |                     started_requests = self.requests_started[self.curr_step][agent_id]
337 |                     new_requests = self.requests_new[self.curr_step][agent_id]
338 |                     open_requests = self.requests_open[self.curr_step][agent_id] + new_requests
339 |                     delayed_requests = self.requests_delayed[self.curr_step][agent_id]
340 |                     selectable_requests = np.logical_and(open_requests, np.invert(started_requests))
341 |                     non_interruptible_requests = np.logical_and(open_requests, started_requests)
342 |                     non_interruptible_demand = (non_interruptible_requests * DEVICE_CONSUMPTION).sum()
343 |                     non_shiftable_demand = self.non_shiftable_load[self.curr_step][agent_id]
344 |                     device_consumptions = selectable_requests * DEVICE_CONSUMPTION
345 |                     device_consumptions[0] = self.request_loads[self.curr_step][agent_id][0]
346 |                     dissatisfaction_values = self.dissatisfaction_coefficients[agent_id] * np.square(delayed_requests + 1)
347 |                     dissatisfaction_values[0] = self.dissatisfaction_coefficients[agent_id][0] * np.square(device_consumptions[0])
348 |                     device_values = dissatisfaction_values * selectable_requests
349 |                     shiftable_demand = (selectable_requests * device_consumptions).sum()
350 |                     capacity = power_rate * shiftable_demand
351 |                     value, weight, actions, ac_rate = knapsack_ensemble(device_values, device_consumptions, capacity, self.dissatisfaction_coefficients[agent_id])
352 |                     dissatisfaction = device_values.sum() - value
353 |                     consumption = weight + non_interruptible_demand + non_shiftable_demand
354 |                     profit = incentive_rate * max(0, baseline_demand - consumption)
355 |                     reward = profit - dissatisfaction
356 |                     rewards[i, agent_id, j] = reward
357 |                     profits[i, agent_id, j] = profit
358 |                     consumptions[i, agent_id, j] = consumption
359 | 
360 |                 best_profits[i][agent_id] = profits[i][agent_id][np.argmax(rewards[i, agent_id])]
361 |                 best_consumptions[i][agent_id] = consumptions[i][agent_id][np.argmax(rewards[i, agent_id])]
362 | 
363 |         aggregator_rewards = - best_profits.sum(axis=1) / 100 - np.maximum(0, best_consumptions.sum(axis=1) - self.capacity_threshold)
364 |         self.customer_reward_matrix[self.curr_step] = rewards
365 |         self.aggregator_reward_matrix[self.curr_step] = aggregator_rewards
366 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import os
  3 | import time
  4 | import matplotlib.pyplot as plt
  5 | import numpy as np
  6 | import tensorflow as tf
  7 | import matplotlib.dates as mdates
  8 | from matplotlib import ticker
  9 | 
 10 | from agents.agent_aggregator_dqn import AggregatorAgent
 11 | from agents.agent_customer import CustomerAgent, construct_network
 12 | from environment.environment import Environment
 13 | from params import EPISODES, TIME_STEPS_TEST, TRAINING_START_DAY, NUM_RL_AGENTS, \
 14 |     AGENT_IDS, TIME_STEPS_TRAIN, TRAINING_PERIOD, TRAINING_END_DAY, AGENT_CLASS, CLASS_RHO, CLASS_DC, \
 15 |     DISSATISFACTION_COEFFICIENTS, REPLACE_TARGET_INTERVAL, TESTING_START_DAY, TESTING_END_DAY, BASELINE_START_DAY, \
 16 |     TESTING_PERIOD
 17 | 
 18 | env = Environment(AGENT_IDS, heterogeneous=False, baseline=False)
 19 | aggregator_agent = AggregatorAgent(env)
 20 | customer_agents = [CustomerAgent(agent_id, data_id, env, dummy=agent_id >= NUM_RL_AGENTS) for agent_id, data_id in enumerate(AGENT_IDS)]
 21 | 
 22 | 
 23 | def main():
 24 |     train(log=True, save=True)
 25 |     test_single_day(path=None, day=182)
 26 |     # test_single_day(path='save_files/MARL_IDR_2', day=182)
 27 |     # test_average(path='save_files/X2_a')
 28 |     # for day in range(181, 243):
 29 |     #     test_single_day(path='save_files/Case_1_b', day=day)
 30 | 
 31 | 
 32 | def train(log, save):
 33 |     # Use TensorBoard for the learning curves
 34 |     current_time = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
 35 |     start = time.time()
 36 |     # log_name = 'no_baseline_' + current_time
 37 |     log_name = 'MARL_IDR_6'
 38 |     if log:
 39 |         log_path = os.path.join('logs', log_name)
 40 |         tf_writer = tf.summary.create_file_writer(log_path)
 41 | 
 42 |     # Train with trained customer agents
 43 |     # for agent in customer_agents:
 44 |     #     agent.load('save_files/Case_2_b')
 45 | 
 46 |     # Train with trained aggregator
 47 |     # aggregator_agent.load('save_files/K8')
 48 | 
 49 |     aggregator_turn = True
 50 | 
 51 |     for episode in range(EPISODES):
 52 |         start_episode = time.time()
 53 | 
 54 |         # Reset environment and agents
 55 |         env.reset(max_steps=TIME_STEPS_TRAIN)
 56 |         aggregator_agent.reset()
 57 |         for agent in customer_agents:
 58 |             agent.reset()
 59 | 
 60 |         if episode % REPLACE_TARGET_INTERVAL == 0:
 61 |             aggregator_turn = False if aggregator_turn else True
 62 | 
 63 |         # Train single episode
 64 |         while not env.done:
 65 |             aggregator_agent.act(train=True)
 66 |             # aggregator_agent.act(train=True) if aggregator_turn else aggregator_agent.act(train=False)
 67 |             for agent in customer_agents:
 68 |                 agent.act(train=True)
 69 |                 # agent.act(train=False) if aggregator_turn else agent.act(train=True)
 70 |             env.step()
 71 | 
 72 |         if episode % 1 == 0:
 73 |             print('Episode:', episode)
 74 |             print('Aggregator turn:', aggregator_turn)
 75 |             print('Day:', datetime.datetime.strptime('{} {}'.format(env.day, 2018), '%j %Y'))
 76 |             print('Episode run time:', (time.time() - start_episode), 'sec')
 77 |             print('Cumulated run time:', (time.time() - start), 'sec')
 78 | 
 79 |         if log:
 80 |             with tf_writer.as_default():
 81 |                 tf.summary.scalar("epsilon/agent_{}".format(customer_agents[0].data_id), customer_agents[0].epsilon, episode)
 82 |                 tf.summary.scalar("reward/agent_{}".format(customer_agents[0].data_id), customer_agents[0].acc_reward, episode)
 83 |                 tf.summary.scalar("reward/agent_{}".format(customer_agents[8].data_id), customer_agents[8].acc_reward, episode)
 84 |                 tf.summary.scalar("reward/agent_{}".format(customer_agents[17].data_id), customer_agents[17].acc_reward, episode)
 85 |                 tf.summary.scalar("reward/aggregator", aggregator_agent.acc_reward, episode)
 86 |                 tf.summary.scalar("epsilon/aggregator", aggregator_agent.epsilon, episode)
 87 | 
 88 |     # Save trained networks
 89 |     if save:
 90 |         path = 'save_files/' + log_name
 91 |         os.mkdir(path)
 92 |         aggregator_agent.save(path)
 93 |         for agent in customer_agents:
 94 |             agent.save(path)
 95 | 
 96 |     print('Training done')
 97 |     print()
 98 | 
 99 | 
100 | def test_single_day(path, day=TRAINING_START_DAY):
101 |     env.reset(day=day, max_steps=TIME_STEPS_TEST)
102 | 
103 |     # Load agents
104 |     if path is not None:
105 |         aggregator_agent.load(path)
106 |         for agent in customer_agents:
107 |             agent.load(path)
108 | 
109 |     # Run single day
110 |     start = time.time()
111 |     for iteration in range(TIME_STEPS_TEST):
112 |         aggregator_agent.act(train=False)
113 |         for agent in customer_agents:
114 |             agent.act(train=False)
115 |         env.step()
116 |     end = time.time()
117 | 
118 |     plot_agent = customer_agents[0]
119 |     plot_hourly_average = False  # Average demands, consumptions and incentives per hour to make the plot more readable
120 |     plot_incentive = True  # Plot the incentive
121 |     time_labels = [datetime.datetime(year=2018, month=1, day=1) + datetime.timedelta(days=day - 1, minutes=i * 15) for i in range(TIME_STEPS_TEST)]
122 | 
123 |     print_metrics(end - start, path)
124 |     for plot_agent in customer_agents:
125 |         plot_aggregated_load_curve(time_labels, day, path)
126 |         plot_single_load_curve(plot_agent, time_labels, day, path)
127 |         plot_ac_reduction(plot_agent, time_labels, path, plot_incentive)
128 |         plot_dissatisfaction(plot_agent, time_labels, path, plot_incentive)
129 |         plot_schedule(plot_agent, time_labels, path)
130 |         plt.show()
131 | 
132 | 
133 | def test_average(path):
134 |     agents_rewards = np.zeros((TESTING_PERIOD, NUM_RL_AGENTS))
135 |     incentives_received = np.zeros((TESTING_PERIOD, NUM_RL_AGENTS))
136 |     aggregator_rewards = np.zeros(TESTING_PERIOD)
137 |     total_demands = np.zeros((TESTING_PERIOD, TIME_STEPS_TEST))
138 |     total_consumptions = np.zeros((TESTING_PERIOD, TIME_STEPS_TEST))
139 |     peak_demand = np.zeros(TESTING_PERIOD)
140 |     peak_consumption = np.zeros(TESTING_PERIOD)
141 |     mean_demand = np.zeros(TESTING_PERIOD)
142 |     mean_consumption = np.zeros(TESTING_PERIOD)
143 |     thresholds = np.zeros(TESTING_PERIOD)
144 |     runtime = np.zeros(TESTING_PERIOD)
145 | 
146 |     for day in range(TESTING_START_DAY, TESTING_END_DAY):
147 |         print('Test on', datetime.datetime.strptime('{} {}'.format(day, 2018), '%j %Y'))
148 |         env.reset(day=day, max_steps=TIME_STEPS_TEST)
149 | 
150 |         # Load agents
151 |         if path is not None:
152 |             aggregator_agent.load(path)
153 |             for agent in customer_agents:
154 |                 agent.load(path)
155 | 
156 |         # Run single day
157 |         start = time.time()
158 |         for iteration in range(TIME_STEPS_TEST):
159 |             aggregator_agent.act(train=False)
160 |             for agent in customer_agents:
161 |                 agent.act(train=False)
162 |             env.step()
163 |         end = time.time()
164 | 
165 |         agents_rewards[day - TESTING_START_DAY] = env.rewards_customers.sum(axis=0)[:NUM_RL_AGENTS]
166 |         aggregator_rewards[day - TESTING_START_DAY] = env.rewards_aggregator.sum()
167 |         incentives_received[day - TESTING_START_DAY] = env.incentive_received.sum(axis=0)[:NUM_RL_AGENTS]
168 |         total_demands[day - TESTING_START_DAY] = env.get_total_demand()
169 |         total_consumptions[day - TESTING_START_DAY] = env.get_total_consumption()
170 |         peak_demand[day - TESTING_START_DAY] = np.max(env.get_total_demand())
171 |         peak_consumption[day - TESTING_START_DAY] = np.max(env.get_total_consumption())
172 |         mean_demand[day - TESTING_START_DAY] = np.mean(env.get_total_demand())
173 |         mean_consumption[day - TESTING_START_DAY] = np.mean(env.get_total_consumption())
174 |         thresholds[day - TESTING_START_DAY] = env.capacity_threshold
175 |         runtime[day - TESTING_END_DAY] = end - start
176 | 
177 |     print('Path:', str(path))
178 |     print('Test period', datetime.datetime.strptime('{} {}'.format(TESTING_START_DAY, 2018), '%j %Y'),
179 |           datetime.datetime.strptime('{} {}'.format(TESTING_END_DAY, 2018), '%j %Y'))
180 |     print('Mean run time:', np.mean(runtime))
181 |     print('Mean customer reward:', np.mean(agents_rewards))
182 |     print('Mean customer reward per agent:', np.mean(agents_rewards, axis=0))
183 |     print('Mean customer reward per day:', np.mean(agents_rewards, axis=1))
184 |     print('Mean aggregator reward per day:', np.mean(aggregator_rewards))
185 |     print()
186 |     print('Metrics averaged over testing period', '  No DR', '   with DR')
187 |     print('Peak load:', np.mean(peak_demand), np.mean(peak_consumption))
188 |     print('Mean load:', np.mean(mean_demand), np.mean(mean_consumption))
189 |     print('PAR:', np.mean(peak_demand) / np.mean(mean_demand), np.mean(peak_consumption) / np.mean(mean_consumption))
190 |     print('Mean incentive paid:', '0', np.mean(np.sum(incentives_received, axis=1)))
191 |     print('Mean incentive received per agent:', '0', np.mean(incentives_received))
192 |     print('Mean threshold exceedance:',
193 |           np.mean(np.sum(np.maximum(0, total_demands - thresholds[:, None]), axis=1)),
194 |           np.mean(np.sum(np.maximum(0, total_consumptions - thresholds[:, None]), axis=1)))
195 | 
196 | 
197 | def print_metrics(run_time, path):
198 |     agents_rewards = env.rewards_customers.sum(axis=0)[:NUM_RL_AGENTS]
199 |     incentives_received = env.incentive_received.sum(axis=0)[:NUM_RL_AGENTS]
200 |     test_day = datetime.datetime.strptime('{} {}'.format(env.day, 2018), '%j %Y')
201 |     peak_demand = np.max(env.get_total_demand())
202 |     peak_consumption = np.max(env.get_total_consumption())
203 |     mean_demand = np.mean(env.get_total_demand())
204 |     mean_consumption = np.mean(env.get_total_consumption())
205 |     print('Path:', str(path))
206 |     print('Test on', test_day)
207 |     print('Run time:', run_time, 'sec')
208 |     print('Customer agent rewards:', agents_rewards)
209 |     print('Mean customer agent reward:', np.mean(agents_rewards))
210 |     print('Aggregator agent reward:', env.rewards_aggregator.sum())
211 |     print('Metrics', '  No DR', '   with DR')
212 |     print('Peak load:', peak_demand, peak_consumption)
213 |     print('Mean load:', mean_demand, mean_consumption)
214 |     print('Std:', np.std(env.get_total_demand()), np.std(env.get_total_consumption()))
215 |     print('PAR:', peak_demand / mean_demand, peak_consumption / mean_consumption)
216 |     print('Total incentive paid:', '0', np.sum(incentives_received))
217 |     print('Mean incentive received:', '0', np.mean(incentives_received))
218 |     print('Threshold exceedance:',
219 |           np.sum(np.maximum(0, env.get_total_demand() - env.capacity_threshold)),
220 |           np.sum(np.maximum(0, env.get_total_consumption() - env.capacity_threshold)))
221 | 
222 |     print('Demand:', repr(env.get_total_demand()))
223 |     print('Load curve:', repr(env.get_total_consumption()))
224 |     print('Incentives:', repr(env.incentives))
225 |     print('Capacity:', env.capacity_threshold)
226 | 
227 | 
228 | def plot_aggregated_load_curve(time_labels, day, path):
229 |     ax, ax2 = init_plot()
230 |     # ax = init_plot(twinx=False)
231 | 
232 |     ax.hlines(env.capacity_threshold, time_labels[0], time_labels[-1], label='Capacity', colors='black', linestyles='dashed', linewidth=3, alpha=0.8)
233 |     # ax.fill_between(time_labels, env.non_shiftable_load.sum(axis=1), label='Non-shiftable demand', color='orange')
234 |     ax.plot(time_labels, env.get_total_demand(), label='Without DR', color='C1', linestyle='dashed', linewidth=3)
235 |     # ax.plot(time_labels, env.baselines[:, day - TRAINING_START_DAY, :TIME_STEPS_TEST].sum(axis=0), label='Baseline', color='C3', linestyle='dashed', linewidth=3)
236 |     ax.fill_between(time_labels, env.get_total_consumption(), label='With DR', color='C1', alpha=0.5)
237 |     ax2.plot(time_labels, env.incentives, label='Incentive', color='C2', marker='x', markersize=8, linewidth=3, markeredgewidth=3)
238 |     # ax.bar(time_labels, env.incentives, width=1/len(time_labels[48:]), label='Incentive', color='black')
239 | 
240 |     # Labels
241 |     # ax.yaxis.set_major_locator(ticker.MultipleLocator(1))
242 |     ax.tick_params(axis='both', which='both', labelsize=18)
243 |     ax2.tick_params(axis='both', which='both', labelsize=18)
244 |     ax.set_xlim(time_labels[47], time_labels[-1])
245 |     ax2.set_xlim(time_labels[47], time_labels[-1])
246 |     ax.set_ylim(bottom=40)
247 |     # ax.set_ylim(bottom=0, top=10)
248 |     ax2.set_ylim(bottom=0, top=10)
249 |     ax.set_ylabel('Aggregated consumption (kW)', fontsize=20)
250 |     ax.set_ylabel('Incentive (¢)', fontsize=20)
251 |     ax.legend(loc='upper left', fontsize=20)
252 |     ax2.legend(loc='upper right', fontsize=20)
253 |     ax.set_xlabel('Time (h)', fontsize=20)
254 |     ax.grid(which='major', linewidth=1)
255 |     ax.grid(which='minor', linewidth=0.5)
256 |     plt.title('Load curve' + '\n' + str(path))
257 |     # plt.show()
258 | 
259 | 
260 | def plot_single_load_curve(plot_agent, time_labels, day, path):
261 |     ax = init_plot(twinx=False)
262 | 
263 |     baseline = env.baselines[plot_agent.agent_id, day - BASELINE_START_DAY, :][:TIME_STEPS_TEST]
264 | 
265 |     # ax.fill_between(time_labels, env.non_shiftable_load[:, plot_agent.agent_id], label='Agent {} non-shiftable'.format(plot_agent.data_id), color='C0')
266 |     ax.plot(time_labels, env.demand[:, plot_agent.agent_id], label='Without DR', color='C0', marker='o', markersize=8, linewidth=3)
267 |     ax.plot(time_labels, env.consumptions[:, plot_agent.agent_id], label='MARL-DR', color='C3', marker='^', markersize=8, linewidth=3, alpha=1)
268 |     ax.plot(time_labels, baseline, label='Baseline', color='black', linestyle='dashed', linewidth=3)
269 |     ax.plot(time_labels, env.incentives, label='Incentive', color='C2', marker='x', markersize=8)
270 |     # print('Incenties:', repr(env.incentives))
271 | 
272 |     # Labels
273 |     ax.tick_params(axis='both', which='both', labelsize=20)
274 |     ax.set_xlim(time_labels[48], time_labels[-1])
275 |     # ax2.set_xlim(time_labels[47], time_labels[-1])
276 |     ax.set_ylim(bottom=0)
277 |     # ax2.set_ylim(bottom=0)
278 |     ax.set_ylabel('Power (kW)', fontsize=32)
279 |     # ax2.set_ylabel('Incentive (¢)')
280 |     ax.legend(loc='upper left', fontsize=20)
281 |     # ax2.legend(loc='upper right')
282 |     ax.set_xlabel('Time (h)', fontsize=32)
283 |     ax.grid(which='major', linewidth=1)
284 |     # ax.grid(which='minor', linewidth=0.5)
285 |     # plt.title('Load curve' + '\n' + str(path))
286 |     # plt.show()
287 | 
288 | 
289 | def plot_ac_reduction(plot_agent, time_labels, path, plot_incentive):
290 |     ax, ax2 = init_plot()
291 | 
292 |     ac_demand = env.request_loads[:, plot_agent.agent_id, 0]
293 |     ac_consumption = env.ac_rates[:, plot_agent.agent_id] * env.request_loads[:, plot_agent.agent_id, 0]
294 | 
295 |     ax.plot(time_labels, ac_demand, label='Agent {} AC original'.format(plot_agent.data_id), color='C0', alpha=0.5)
296 |     ax.plot(time_labels, ac_consumption, label='Agent {} AC actual'.format(plot_agent.data_id), linestyle='dashed', color='C0', alpha=0.5)
297 |     ax.fill_between(time_labels, env.ac_rates[:, plot_agent.agent_id] * env.request_loads[:, plot_agent.agent_id, 0], color='C0', alpha=0.2)
298 |     ax2.plot(time_labels, env.ac_rates[:, plot_agent.agent_id] * 10, label='Agent {} AC rate'.format(plot_agent.data_id), color='C1', alpha=0.5)
299 | 
300 |     if plot_incentive:
301 |         ax2.plot(time_labels, env.incentives, label='Incentive', color='C2', alpha=1.0)
302 | 
303 |     # Labels
304 |     ax.tick_params(axis='x', which='both', rotation=45)
305 |     ax.set_xlim(time_labels[0], time_labels[-1])
306 |     ax2.set_xlim(time_labels[0], time_labels[-1])
307 |     ax.set_ylim(bottom=0)
308 |     ax2.set_ylim(bottom=0)
309 |     ax.set_ylabel('Total demand (kW)')
310 |     ax2.set_ylabel('Incentive (¢)')
311 |     ax.legend(loc='upper left')
312 |     ax2.legend(loc='upper right')
313 |     ax.set_xlabel('Time')
314 |     ax.grid(which='major', linewidth=1)
315 |     ax.grid(which='minor', linewidth=0.5)
316 |     plt.title('Load curve' + '\n' + str(path))
317 |     # plt.show()
318 | 
319 | 
320 | def plot_dissatisfaction(plot_agent, time_labels, path, plot_incentive):
321 |     # ax, ax2 = init_plot()
322 |     ax = init_plot(twinx=False)
323 |     a, b, c, d, e = -env.dissatisfaction[:, plot_agent.agent_id, :].T
324 |     rewards = np.roll(env.rewards_customers[:, plot_agent.agent_id], -1)
325 |     rewards[-1] = 0
326 | 
327 |     ax.stackplot(time_labels, b, c, d, e, a, labels=['EV', 'WM', 'DW', 'Dryer', 'AC'])
328 |     ax.plot(time_labels, env.incentive_received[:, plot_agent.agent_id], label='Profit', color='black', marker='s', markersize=8, linewidth=3)
329 |     ax.plot(time_labels, env.rewards_customers[:, plot_agent.agent_id], label='Total reward', color='C2', marker='o', markersize=8, linewidth=3)
330 | 
331 |     # Labels
332 |     ax.tick_params(axis='both', which='both', labelsize=20)
333 |     # ax2.tick_params(axis='both', which='both', labelsize=14)
334 |     ax.set_xlim(time_labels[48], time_labels[-1])
335 |     # ax.set_ylim(bottom=0, top=15)
336 |     ax.set_ylabel('Reward', fontsize=32)
337 |     ax.legend(loc='upper left', fontsize=20)
338 |     ax.set_xlabel('Time (h)', fontsize=32)
339 |     ax.grid(which='major', linewidth=1)
340 |     # ax.grid(which='minor', linewidth=0.5)
341 |     # plt.title('Dissatisfaction' + '\n' + str(path))
342 |     # plt.show()
343 | 
344 | 
345 | def plot_schedule(plot_agent, time_labels, path):
346 |     # ax, ax2 = init_plot()
347 |     ax = init_plot(twinx=False)
348 | 
349 |     requests = env.requests_new[:, plot_agent.agent_id][:TIME_STEPS_TRAIN]
350 |     actions = env.request_actions[:, plot_agent.agent_id][:TIME_STEPS_TRAIN]
351 |     time_labels = time_labels[:TIME_STEPS_TRAIN]
352 |     incentives = env.incentives[:TIME_STEPS_TRAIN]
353 |     power_rates = env.power_rates[:, plot_agent.agent_id][:TIME_STEPS_TRAIN]
354 |     ac_rates = env.ac_rates[:, plot_agent.agent_id][:TIME_STEPS_TRAIN]
355 | 
356 |     # Uncomment for hourly average incentives
357 |     # incentives = np.mean(incentives.reshape(-1, 4), axis=1)
358 | 
359 |     for i, (time_label, req, act) in enumerate(zip(time_labels, requests, actions)):
360 |         for j, (dev_name, dev_color, dev_req, dev_act) in enumerate(zip(['AC', 'EV', 'WM', 'DW', 'Dryer'], ['C4', 'C0', 'C1', 'C2', 'C3'], req, act)):
361 |             height = 0.4
362 |             request_bar = ax.barh(y=j + height, height=height, width=dev_req / 96, left=time_label, label=dev_name + ' original', color=dev_color, alpha=0.5, align='edge')
363 |             if j == 0:
364 |                 height *= ac_rates[i]
365 |             action_bar = ax.barh(y=dev_name, height=height, width=dev_act / 96, left=time_label, label=dev_name, color=dev_color, align='edge')
366 | 
367 |     # incentive_bar = ax.bar(time_labels, incentives, width=1 / 96, label='Incentive', align='edge', color='C1', alpha=0.4)
368 |     # power_rate_bar = ax2.plot(time_labels, power_rates * 10, label='Power rates', color='C2')
369 | 
370 |     ax.tick_params(axis='both', which='both', labelsize=20)
371 |     # ax2.tick_params(axis='both', which='both', labelsize=14)
372 |     ax.set_xlim(time_labels[47], time_labels[-1])
373 |     # ax2.set_xlim(time_labels[47], time_labels[-1])
374 |     ax.set_ylim(bottom=0)
375 |     # ax2.set_ylim(bottom=0)
376 |     ax.grid(which='major', linewidth=1)
377 |     # ax.grid(which='minor', linewidth=0.5)
378 |     # ax.legend([request_bar, action_bar], ['Appliance requested', 'Appliance scheduled'], loc='upper left', fontsize=20)
379 |     # ax2.legend([incentive_bar], ['Incentive'], loc='upper right', fontsize=16)
380 |     # ax2.set_ylabel('Incentive (¢)', fontsize=16)
381 |     ax.set_xlabel('Time (h)', fontsize=32)
382 |     plt.setp(ax.get_yticklabels(), rotation=90, va="bottom", fontsize=20)
383 |     # plt.title('Load schedule agent ' + str(plot_agent.data_id) + '\n' + str(path))
384 |     # plt.show()
385 | 
386 | 
387 | def init_plot(twinx=True):
388 |     fig, ax = plt.subplots()
389 | 
390 |     day_locator = mdates.DayLocator()
391 |     hour_locator = mdates.HourLocator(interval=1)
392 |     minute_locator = mdates.MinuteLocator(interval=15)
393 |     ax.xaxis.set_major_locator(hour_locator)
394 |     ax.xaxis.set_minor_locator(minute_locator)
395 |     ax.xaxis.set_major_formatter(mdates.DateFormatter('%H'))
396 |     # ax.xaxis.set_minor_formatter(mdates.DateFormatter('%H:%M'))
397 | 
398 |     if twinx:
399 |         ax2 = ax.twinx()
400 |         ax2.xaxis.set_major_locator(hour_locator)
401 |         ax2.xaxis.set_minor_locator(minute_locator)
402 |         ax2.xaxis.set_major_formatter(mdates.DateFormatter('%H'))
403 |         # ax2.xaxis.set_minor_formatter(mdates.DateFormatter('%H:%M'))
404 |         return ax, ax2
405 | 
406 |     return ax
407 | 
408 | 
409 | main()
410 | 


--------------------------------------------------------------------------------
/params.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | # Agent params
 4 | AGENT_IDS = [661, 1642, 2335, 2361, 2818, 3039, 3456, 3538, 4031, 4373, 4767, 5746, 6139, 7536, 7719, 7800, 7901, 7951, 8156, 8386, 8565, 9019, 9160, 9922, 9278]
 5 | NUM_AGENTS = len(AGENT_IDS)
 6 | NUM_RL_AGENTS = 25              # Number of agents that are trained. The others are dummy agents (always choosing power rate 1.0)
 7 | # TRAINING_START_DAY = 182        # Day of the year defining the start of the training period
 8 | # TRAINING_END_DAY = 244          # Day of the year defining the end of the training period
 9 | TRAINING_START_DAY = 91        # Day of the year defining the start of the training period
10 | TRAINING_END_DAY = 305          # Day of the year defining the end of the training period
11 | TESTING_START_DAY = 182
12 | TESTING_END_DAY = 213
13 | BASELINE_START_DAY = 91
14 | TRAINING_PERIOD = TRAINING_END_DAY - TRAINING_START_DAY     # The length of the training period
15 | TESTING_PERIOD = TESTING_END_DAY - TESTING_START_DAY     # The length of the training period
16 | 
17 | # RL params
18 | EPSILON = 0.1                   # Fixed epsilon
19 | EPSILON_START = 1.0             # Epsilon start when using epsilon decay
20 | EPSILON_MIN = 0.01              # Epsilon minimum when using epsilon decay
21 | EPSILON_DECAY = 0.999           # Epsilon is multiplied by this decay every step (depends on number of episodes)
22 | DISCOUNT_RATE = 0.9              # Discount rate (gamma) of the Q-learning algorithm
23 | EPISODES = 5000                 # Number of episodes to train
24 | 
25 | # DQN params
26 | BUFFER_SIZE = 10000             # The maximum number of SARS samples in the replay buffer
27 | BATCH_SIZE = 32                 # The batch size for training of the Q-network
28 | LEARNING_RATE_DQN = 0.001      # The learning rate for training the Q-network
29 | TAU = 0.001                     # The soft-update parameter for updating the target network
30 | TRAINING_INTERVAL = 16           # After so many steps the agent performs a training update on the network
31 | REPLACE_TARGET_INTERVAL = 50    # After so many episodes the target network is replaced
32 | HIDDEN_LAYER_SIZE = 32          # Size of the hidden layers
33 | 
34 | # Environment params
35 | TIME_STEPS_TRAIN = 96                       # Number of time steps per episode in training
36 | TIME_STEPS_TEST = 96                       # Number of time steps per episode in testing
37 | POWER_RATES = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]   # Set of actions (fraction of the demand the agent uses)
38 | CUSTOMER_ACTION_SIZE = len(POWER_RATES)     # Number of actions for the customers
39 | CUSTOMER_STATE_SIZE = 8                     # Number of state variables for the customers
40 | RHO = 0.5                       # Weight of the incentive term (the weight of the dissatisfaction term is 1-RHO)
41 | RHO_COMMON = 1.0
42 | CRITICAL_THRESHOLD = 70         # Above this threshold for the total demand the agents receive incentives > 0
43 | CRITICAL_THRESHOLD_RELATIVE = 0.8         # Above this threshold for the total demand the agents receive incentives > 0
44 | MAX_TOTAL_DEMAND = 110          # Incentives will not increase when total demand is higher than this value
45 | MINIMUM_CUSTOMER_REWARD = -10  # Minimum reward to avoid too large negative rewards
46 | 
47 | # Aggregator params
48 | RHO_AGGREGATOR = 0.5              # Weight of the consumption term (the weight of the incentive term is 1-RHO)
49 | INCENTIVE_RATES = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]    # Set of actions (incentives)
50 | AGGREGATOR_ACTION_SIZE = len(INCENTIVE_RATES)   # Number of actions for the aggregator
51 | AGGREGATOR_STATE_SIZE = 3      # Number of state variables for the aggregator
52 | MAX_INCENTIVE = 10
53 | DISCOUNT_RATE_AGGREGATOR = 0.9  # Discount rate for the aggregator
54 | 
55 | # Device params
56 | DEVICES = ['air', 'car', 'clotheswasher', 'dishwasher', 'dry']
57 | DEVICE_CONSUMPTION = np.array([2.5, 4.0, 1.0, 2.0, 2.0])                             # Fixed consumption of the devices in kW
58 | # DISSATISFACTION_COEFFICIENTS = np.array([3.0, 0.04, 0.1, 0.06, 0.2])                  # Delay coefficients
59 | # DISSATISFACTION_COEFFICIENTS = np.array([10.0, 0.2, 0.4, 0.3, 0.6])                  # Delay coefficients
60 | DISSATISFACTION_COEFFICIENTS = np.array([6.0, 0.05, 0.2, 0.1, 0.4])                  # Delay coefficients
61 | DISSATISFACTION_COEFFICIENTS_STD = np.array([2.0, 0.1, 0.1, 0.1, 0.2])                  # Delay coefficients
62 | DISSATISFACTION_COEFFICIENTS_MIN = np.array([1.0, 0.01, 0.01, 0.01, 0.01])                  # Delay coefficients
63 | DEVICE_NON_INTERRUPTIBLE = np.array([False, False, True, True, True])   # If the device is non-interruptible
64 | 


--------------------------------------------------------------------------------
/utils/consumer_baseline_process.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import pandas as pd
 3 | import numpy as np
 4 | from scipy.stats import linregress
 5 | 
 6 | AGENT_IDS = [661, 1642, 2335, 2361, 2818, 3039, 3456, 3538, 4031, 4373, 4767, 5746, 6139, 7536, 7719, 7800, 7901, 7951, 8156, 8386, 8565, 9019, 9160, 9922, 9278]
 7 | NON_AC = ['car1', 'clotheswasher1', 'dishwasher1', 'dry1', 'waterheater1', 'non-shiftable']
 8 | pd.set_option('display.max_rows', 500)
 9 | pd.set_option('display.max_columns', 500)
10 | pd.set_option('display.width', 1000)
11 | 
12 | df_temperature = pd.read_csv('data/outdoor_temperatures_noaa.csv',
13 |                              delim_whitespace=True,
14 |                              parse_dates=[['LST_DATE', 'LST_TIME']],
15 |                              index_col=['LST_DATE_LST_TIME'],
16 |                              usecols=['LST_DATE', 'LST_TIME', 'T_HR_AVG'])
17 | print(df_temperature.describe)
18 | 
19 | # df_load = pd.read_csv('pecan_street_data/15minute_data_austin_processed_08_04.csv',
20 | df_load = pd.read_csv('data/15minute_data_austin_fixed_consumption.csv',
21 |                       parse_dates=['time'],
22 |                       index_col=['time'])
23 | print(df_load.describe)
24 | 
25 | start, end, steps = 182, 365, 96
26 | baselines = np.zeros((len(AGENT_IDS), len(range(start, end)), steps))
27 | for id, agent in enumerate(AGENT_IDS):
28 |     df_filter = df_load.loc[df_load['dataid'] == agent]
29 |     df_load_resampled = df_filter.resample('H').max()
30 |     start_date = datetime.datetime(2018, 1, 1)
31 |     temperatures = []
32 |     loads = []
33 | 
34 |     for day in range(1, 364):
35 |         for step in range(24):
36 |             offset = day * 24 + step
37 |             time_delta = pd.to_timedelta(offset, 'h')
38 |             current_time = start_date + time_delta
39 |             temperature = df_temperature.loc[current_time]['T_HR_AVG']
40 |             if temperature == -9999:
41 |                 df_temperature.loc[current_time]['T_HR_AVG'] = df_temperature.loc[current_time - pd.to_timedelta(1, 'h')]['T_HR_AVG']
42 |             load = df_load_resampled.loc[current_time]['air']
43 |             if load > 0 and temperature != -9999:
44 |                 temperatures.append(temperature)
45 |                 loads.append(load)
46 | 
47 |     if temperatures and loads:
48 |         slope, intercept, _, _, _ = linregress(temperatures, loads)
49 |     else:
50 |         slope, intercept = 0, 0
51 | 
52 |     for day in range(start, end):
53 |         for step in range(steps):
54 | 
55 |             # Select last 10 same weekdays at the same moment
56 |             time_delta = datetime.timedelta(minutes=step*15)
57 |             start_date = datetime.datetime.strptime('{} {}'.format(day, 2018), '%j %Y')
58 |             time = start_date + time_delta
59 |             time_h = start_date + datetime.timedelta(hours=int(step*0.25))
60 |             similar = [time - datetime.timedelta(days=7 * i) for i in range(1, 11)]
61 |             df_load_similar = df_filter.loc[similar]
62 |             similar_rounded = [ix.round('1h') for ix in df_load_similar.index]
63 | 
64 |             # Filter the 5 days with the largest demand
65 |             # TODO exclude holidays
66 |             df_load_similar = df_load_similar.nlargest(5, 'total')
67 |             baseline_total = df_load_similar['total'].mean()
68 | 
69 |             # Take average of those 5 moments as the baseline
70 |             df_temperature_similar = df_temperature.loc[similar_rounded]
71 |             avg_temp = df_temperature_similar['T_HR_AVG'].mean()
72 |             current_temp = df_temperature.loc[time_h]['T_HR_AVG']
73 |             temp_diff = current_temp - avg_temp
74 | 
75 |             baseline_ac = slope * temp_diff
76 |             # baseline = baseline_ac + baseline_total # with temperature correction
77 |             baseline = baseline_total               # without temperature correction
78 |             baselines[id][day-start][step] = baseline
79 | 
80 |             print(agent, day, step, baseline)
81 | 
82 | # np.save('pecan_street_data/baselines_regr_temp_correction.npy', baselines)
83 | np.save('data/baselines_regr_temp_correction_new.npy', baselines)
84 | # np.save('pecan_street_data/baselines_regr.npy', baselines)
85 | 


--------------------------------------------------------------------------------
/utils/load_demand.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import pandas as pd
 3 | import numpy as np
 4 | 
 5 | 
 6 | def load_requests():
 7 |     df = pd.read_csv('data/15minute_data_austin_fixed_consumption.csv', parse_dates=['time'], index_col=['time'])
 8 |     return df
 9 | 
10 | 
11 | def load_day(df, day, max_steps):
12 |     minutes = max_steps * 15
13 |     time_delta = pd.to_timedelta(minutes, 'm')
14 |     start_date = datetime.datetime.strptime('{} {}'.format(day, 2018), '%j %Y')
15 |     end_date = start_date + time_delta
16 |     df = df.loc[(df.index >= start_date) & (df.index < end_date)]
17 |     return df
18 | 
19 | 
20 | def get_device_demands(df, agent_ids, day, timestep):
21 |     minutes = timestep * 15
22 |     time_delta = pd.to_timedelta(minutes, 'm')
23 |     start_date = datetime.datetime.strptime('{} {}'.format(day, 2018), '%j %Y')
24 |     time = start_date + time_delta
25 |     df = df.loc[(df['dataid'].isin(agent_ids)) & (df.index == time)]
26 |     return df
27 | 
28 | 
29 | def get_peak_demand(df):
30 |     df = df.groupby(pd.Grouper(freq='15Min')).sum()
31 |     return df['total'].max()
32 | 
33 | 
34 | def load_baselines():
35 |     return np.load('data/baselines_regr_temp_correction.npy')
36 | 


--------------------------------------------------------------------------------
/utils/pre_process.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import numpy as np
 3 | import pandas as pd
 4 | import matplotlib.pyplot as plt
 5 | import seaborn as sns
 6 | 
 7 | pd.set_option('display.width', 10000)
 8 | pd.set_option('display.max_columns', 100)
 9 | SHIFTABLE = ['air', 'car', 'clotheswasher', 'dishwasher', 'dry']
10 | TIME_SHIFTABLE = ['car', 'clotheswasher', 'dishwasher', 'dry']
11 | NON_SHIFTABLE = ['bathroom1', 'bathroom2', 'bedroom1', 'bedroom2', 'bedroom3', 'diningroom1', 'disposal1', 'furnace1',
12 |                  'furnace2', 'garage1', 'kitchen1', 'kitchen2', 'kitchenapp1', 'kitchenapp2', 'lights_plugs1',
13 |                  'lights_plugs2', 'lights_plugs3', 'lights_plugs4', 'livingroom1', 'microwave1', 'office1', 'oven1',
14 |                  'oven2', 'range1', 'refrigerator1', 'refrigerator2', 'utilityroom1', 'venthood1', 'waterheater1',
15 |                  'waterheater2', 'winecooler1']
16 | DEVICES = ['air', 'car', 'clotheswasher', 'dishwasher', 'dry', 'non-shiftable']
17 | INCLUDE = ['dataid', 'air', 'car', 'clotheswasher', 'dishwasher', 'dry', 'non-shiftable', 'total']
18 | 
19 | df = pd.read_csv('data/15minute_data_austin.csv', engine='python', encoding="ISO-8859-1", parse_dates=['local_15min'], index_col=['local_15min'])
20 | df.index = pd.to_datetime(df.index, utc=True, infer_datetime_format=True)
21 | df.index.names = ['time']
22 | df = df.tz_convert(None)
23 | df = df.groupby(['dataid']).resample('15T').max()
24 | df = df.drop('dataid', axis=1).reset_index('dataid')
25 | df = df.fillna(0)
26 | df = df.apply(lambda l: np.where(l < 0.1, 0, l))
27 | 
28 | df['air'] = df[['air1', 'air2', 'air3', 'airwindowunit1']].sum(axis=1).clip(upper=4.0)
29 | df['dry'] = df[['drye1', 'dryg1']].sum(axis=1)
30 | df['car'] = df[['car1', 'car2']].sum(axis=1)
31 | df['dishwasher'] = df['dishwasher1']
32 | df['clotheswasher'] = df['clotheswasher1']
33 | 
34 | for device, consumption, threshold in zip(TIME_SHIFTABLE, [4, 1, 2, 2], [0.1, 0.1, 0.1, 0.1]):
35 |     df[device] = df[device].apply(lambda x: consumption if x >= threshold else 0)
36 | 
37 | df['non-shiftable'] = df[NON_SHIFTABLE].sum(axis=1).clip(upper=5.0)
38 | df['total'] = df[DEVICES].sum(axis=1)
39 | 
40 | # Uncomment to save processed data to csv
41 | df[INCLUDE].to_csv('data/15minute_data_austin_fixed_consumption_new.csv')
42 | 
43 | # Filter Household
44 | dataid = 661
45 | # df = df.loc[df['dataid'] != 9019]
46 | df = df.loc[df['dataid'] == dataid]
47 | 
48 | # Filter dates
49 | day = 182
50 | start_date = datetime.datetime.strptime('{} {}'.format(day, 2018), '%j %Y')
51 | end_date = datetime.datetime.strptime('{} {}'.format(day + 1, 2018), '%j %Y')
52 | # start_date = datetime.datetime(2018, 7, 9)
53 | # end_date = datetime.datetime(2018, 7, 10)
54 | df = df.loc[(df.index >= start_date) & (df.index < end_date)]
55 | 
56 | # create the plot
57 | # Use seaborn style defaults and set the default figure size
58 | sns.set(rc={'figure.figsize': (11, 4)})
59 | solar_plot = df[DEVICES].plot(linewidth=0.5, marker='.')
60 | solar_plot.set_xlabel('Date')
61 | solar_plot.set_ylabel('Grid Usage kW')
62 | 
63 | # display the plot
64 | plt.title('Major consumers')
65 | plt.ylabel('Power consumnption (KW)')
66 | plt.show()
67 | 


--------------------------------------------------------------------------------
/utils/replay_buffer.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import numpy as np
 3 | from collections import deque, namedtuple
 4 | 
 5 | 
 6 | class ReplayBuffer:
 7 |     def __init__(self, buffer_size, batch_size):
 8 |         self.batch_size = batch_size
 9 |         self.memory = deque(maxlen=buffer_size)
10 |         self.experience = namedtuple("Experience", field_names=["state", "action", "reward", "next_state", "done"])
11 | 
12 |     def add(self, state, action, reward, next_state, done):
13 |         experience = self.experience(state, action, reward, next_state, done)
14 |         self.memory.append(experience)
15 | 
16 |     def sample(self):
17 |         batch = random.sample(self.memory, k=self.batch_size)
18 |         states, actions, rewards, next_states, dones = list(map(np.array, list(zip(*batch))))
19 |         return states, actions, rewards, next_states, dones
20 | 
21 |     def __len__(self):
22 |         return len(self.memory)
23 | 


--------------------------------------------------------------------------------
/utils/visualize.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import os
 3 | import random
 4 | 
 5 | import numpy as np
 6 | import pandas as pd
 7 | import matplotlib.pyplot as plt
 8 | import seaborn as sns
 9 | 
10 | # This constant limits the number of rows read in from the big CSV file.
11 | # Set to None if you want to read the whole thing
12 | LIMIT = None
13 | pd.set_option('display.width', 10000)
14 | pd.set_option('display.max_columns', 100)
15 | devices_excl_other = ['car1', 'air1', 'clotheswasher1', 'dishwasher1', 'drye1', 'dryg1', 'waterheater1']
16 | # devices = ['car1', 'air1', 'clotheswasher1', 'dishwasher1', 'drye1', 'dryg1', 'waterheater1', 'non-shiftable', 'solar']
17 | devices = ['car1', 'air1', 'clotheswasher1', 'dishwasher1', 'drye1', 'dryg1', 'waterheater1']
18 | include = ['dataid', 'grid', 'car1', 'air1', 'clotheswasher1', 'dishwasher1', 'drye1', 'dryg1', 'waterheater1', 'solar', 'non-shiftable', 'total']
19 | drop = ['dataid', 'leg1v', 'leg2v', 'grid', 'solar']
20 | 
21 | # read the 15 minute data file for Austin
22 | df = pd.read_csv('data/15minute_data_austin_fixed_consumption.csv',
23 |                        engine='python', encoding="ISO-8859-1", parse_dates=['time'], index_col=['time'], nrows=LIMIT)
24 | print(df.describe)
25 | 
26 | # Filter Household
27 | # incl_dataid = 661
28 | # df = df.loc[df['dataid'] == incl_dataid]
29 | # excl_dataid = 9019
30 | # df = df.loc[df['dataid'] != excl_dataid]
31 | print(df.max())
32 | 
33 | # Filter dates
34 | for day in range(182, 200):
35 |     # day = random.randint(1, 365)
36 |     start_date = datetime.datetime.strptime('{} {} {}'.format(day, 2018, 0), '%j %Y %H')
37 |     end_date = datetime.datetime.strptime('{} {} {}'.format(day + 1, 2018, 12), '%j %Y %H')
38 |     # start_date = datetime.datetime(2018, 10, 16)
39 |     # end_date = datetime.datetime(2018, 10, 17)
40 |     df_filter = df.loc[(df.index >= start_date) & (df.index < end_date)]
41 | 
42 |     # group the data by time or date and take the mean of those
43 |     # df.index = df.reset_index()['time'].apply(lambda x: x - pd.Timestamp(x.date()))
44 |     # df = df.groupby(pd.Grouper(freq='M')).max()
45 |     # y = df.groupby(['dataid']).max()
46 |     # print(y.describe)
47 | 
48 |     # convert from kW to kWh
49 |     # df['total_kwh'] = df['total'].apply(lambda x: x)
50 | 
51 |     # Plot boxplot for device
52 |     # threshold = 0.01
53 |     # x = df.apply(lambda l: np.where(l < threshold, np.nan, l))
54 |     # print(x[devices].describe())
55 |     # fig = plt.figure(figsize=(8,6))
56 |     # ax = fig.gca()
57 |     # x.boxplot(column=devices, ax=ax)
58 | 
59 |     # create the plot
60 |     # df = df.drop(drop, 'columns')
61 |     # df = df.dropna('columns', thresh=1)
62 |     # Use seaborn style defaults and set the default figure size
63 |     sns.set(rc={'figure.figsize': (11, 4)})
64 |     # solar_plot = df_filter[devices + ['total', 'total_incl_solar']].plot(linewidth=0.5, marker='.')
65 |     solar_plot = df_filter[devices + ['total']].plot(linewidth=0.5, marker='.')
66 |     # solar_plot = df_filter[devices].plot(linewidth=0.5, marker='.')
67 |     solar_plot.set_xlabel('Date')
68 |     solar_plot.set_ylabel('Grid Usage kW')
69 | 
70 |     # Plot hist
71 |     # plt.hist(x['clotheswasher1'].to_numpy(), bins=50)
72 | 
73 |     # display the plot
74 |     plt.title('Major consumers')
75 |     plt.ylabel('Power consumnption (KW)')
76 |     plt.show()
77 | 
78 | print('done')
79 | 


--------------------------------------------------------------------------------