├── .gitignore ├── CITATION.cff ├── LICENSE ├── README.md ├── agents ├── agent_aggregator_dqn.py └── agent_customer.py ├── data ├── 15minute_data_austin.csv ├── 15minute_data_austin_fixed_consumption.csv ├── 15minute_data_austin_processed.csv ├── baselines_regr_temp_correction.npy ├── baselines_regr_temp_correction_old.npy └── outdoor_temperatures_noaa.csv ├── environment └── environment.py ├── main.py ├── params.py └── utils ├── consumer_baseline_process.py ├── load_demand.py ├── pre_process.py ├── replay_buffer.py └── visualize.py /.gitignore: -------------------------------------------------------------------------------- 1 | logs/ 2 | save_files/ 3 | .idea/ 4 | __pycache__/ 5 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | title: >- 3 | Case study for 4 | MARL-iDR-Multi-Agent-Reinforcement-Learning-for-Incentive-based-Residential-Demand-Response 5 | message: 'If you use this software, please cite it as below.' 6 | type: software 7 | authors: 8 | - given-names: Jasper 9 | family-names: 'van Tilburg ' 10 | affiliation: University of Technology Delft 11 | - given-names: Luciano Cavalcante 12 | family-names: Siebert 13 | email: L.CavalcanteSiebert@tudelft.nl 14 | affiliation: University of Technology Delft 15 | orcid: 'https://orcid.org/0000-0002-7531-3154' 16 | - given-names: Jochen L. 17 | family-names: Cremer 18 | email: j.l.cremer@tudelft.nl 19 | affiliation: University of Technology Delft 20 | orcid: 'https://orcid.org/0000-0001-9284-5083' 21 | repository-code: >- 22 | https://github.com/TU-Delft-AI-Energy-Lab/MARL-iDR-Multi-Agent-Reinforcement-Learning-for-Incentive-based-Residential-Demand-Response 23 | abstract: >- 24 | This repository contains the code for the most recent 25 | versions of the model for the paper: 26 | Jasper van Tilburg, Luciano C. Siebert, Jochen L. Cremer, 27 | "MARL-iDR: Multi-Agent Reinforcement Learning for 28 | Incentive-based Residential Demand Response" to appear at 29 | IEEE PowerTech 2023, Belgrade, Serbia 30 | keywords: 31 | - Reinforcement Learning 32 | - Energy Community 33 | - Demand Response 34 | license: MIT 35 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 TU-Delft-AI-Energy-Lab 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Case study for MARL-iDR-Multi-Agent-Reinforcement-Learning-for-Incentive-based-Residential-Demand-Response 2 | 3 | This repository contains code for the paper: 4 | 5 | *Jasper van Tilburg, Luciano C. Siebert, Jochen L. Cremer, "MARL-iDR: Multi-Agent Reinforcement Learning for Incentive-based Residential Demand Response" IEEE PowerTech 2023, Belgrade, Serbia, https://arxiv.org/abs/2304.04086* 6 | 7 | ## Data 8 | This repository includes only placeholder Excel files in /data which includes the first and last data samples. The full data that was used in the case studies in our paper can be downloaded from “Pecan Street Inc.” [Online]. Available: https://www.pecanstreet.org/ 9 | 10 | ## License 11 | 12 | This work is licensed under a 13 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) 14 | -------------------------------------------------------------------------------- /agents/agent_aggregator_dqn.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | 4 | from keras import initializers 5 | from tensorflow.python.keras import Sequential 6 | from tensorflow.python.keras.layers import Dense 7 | from tensorflow.python.keras.models import load_model 8 | from tensorflow.python.keras.optimizer_v2.adam import Adam 9 | 10 | from environment.environment import sample_action_aggregator 11 | from params import EPSILON_START, EPSILON_MIN, EPSILON_DECAY, \ 12 | BUFFER_SIZE, BATCH_SIZE, TRAINING_INTERVAL, REPLACE_TARGET_INTERVAL, \ 13 | TAU, LEARNING_RATE_DQN, HIDDEN_LAYER_SIZE, AGGREGATOR_ACTION_SIZE, AGGREGATOR_STATE_SIZE, DISCOUNT_RATE_AGGREGATOR 14 | from utils.replay_buffer import ReplayBuffer 15 | 16 | 17 | def construct_network(): 18 | model = Sequential() 19 | model.add(Dense(HIDDEN_LAYER_SIZE, input_shape=(AGGREGATOR_STATE_SIZE,), activation='relu', 20 | kernel_initializer=initializers.RandomNormal(stddev=0.01), bias_initializer=initializers.Zeros())) 21 | model.add(Dense(HIDDEN_LAYER_SIZE, activation='relu', 22 | kernel_initializer=initializers.RandomNormal(stddev=0.01), bias_initializer=initializers.Zeros())) 23 | model.add(Dense(AGGREGATOR_ACTION_SIZE, activation='linear', 24 | kernel_initializer=initializers.RandomNormal(stddev=0.01), bias_initializer=initializers.Zeros())) 25 | model.compile(loss='mse', optimizer=Adam(lr=LEARNING_RATE_DQN)) 26 | return model 27 | 28 | 29 | def predict(state, network): 30 | state_input = np.reshape(state, (-1, AGGREGATOR_STATE_SIZE)) 31 | return network(state_input) 32 | 33 | 34 | class AggregatorAgent: 35 | """ This AggregatorAgent is a deep Reinforcement Learning agent similar to the CustomerAgent. Currently the 36 | Q-learning AggregatorAgent is used. """ 37 | 38 | def __init__(self, env): 39 | self.env = env 40 | self.epsilon = EPSILON_START 41 | self.acc_reward = 0 42 | self.last_state = None 43 | self.last_action = None 44 | self.last_history = None 45 | self.q_network = construct_network() 46 | self.target_network = construct_network() 47 | self.memory = ReplayBuffer(BUFFER_SIZE, BATCH_SIZE) 48 | 49 | def reset(self): 50 | self.last_state = None 51 | self.last_action = None 52 | self.last_history = None 53 | self.acc_reward = 0 54 | self.epsilon = max(EPSILON_MIN, self.epsilon * EPSILON_DECAY) 55 | 56 | def act(self, train=True): 57 | observation, reward, done, _ = self.env.last_aggregator() 58 | 59 | if train: 60 | if self.last_action is not None: 61 | self.step(self.last_state, self.last_action, reward, observation, done) 62 | action = self.choose_action(observation, self.epsilon) 63 | else: 64 | action = self.choose_action(observation) 65 | 66 | self.env.act_aggregator(action) 67 | self.last_state = observation 68 | self.last_action = action 69 | self.acc_reward += reward 70 | 71 | def choose_action(self, s, eps=0.0): 72 | if random.uniform(0, 1) < eps: 73 | return sample_action_aggregator() 74 | else: 75 | actions = predict(s, self.q_network) 76 | action = np.argmax(actions) 77 | return action 78 | 79 | def step(self, state, action, reward, next_state, done, name=None): 80 | self.memory.add(state, action, reward, next_state, done) 81 | 82 | # Train network 83 | if len(self.memory) >= BATCH_SIZE and self.env.curr_step % TRAINING_INTERVAL == 0: 84 | sampled_experiences = self.memory.sample() 85 | self.train(sampled_experiences) 86 | 87 | # Replace target network 88 | if self.env.episode % REPLACE_TARGET_INTERVAL == 0: 89 | self.target_network.set_weights(self.q_network.get_weights()) 90 | 91 | def train(self, experiences): 92 | states, actions, rewards, next_states, dones = experiences 93 | outputs = predict(next_states, self.target_network) 94 | next_actions = np.max(outputs, axis=1) 95 | target_values = rewards + (DISCOUNT_RATE_AGGREGATOR * next_actions * (1 - dones)) 96 | targets = predict(states, self.q_network).numpy() 97 | targets[np.arange(len(states)), actions] = target_values 98 | self.last_history = self.q_network.fit(np.array(states), np.array(targets), verbose=False) 99 | 100 | def update_network(self): 101 | model_weights = self.q_network.get_weights() 102 | target_model_weights = self.target_network.get_weights() 103 | for i in range(len(model_weights)): 104 | target_model_weights[i] = TAU * model_weights[i] + (1 - TAU) * target_model_weights[i] 105 | self.target_network.set_weights(target_model_weights) 106 | 107 | def save(self, path): 108 | self.q_network.save(path + '/Q_network_aggregator.h5') 109 | print("Successfully saved network.") 110 | 111 | def load(self, path): 112 | self.q_network = load_model(path + '/Q_network_aggregator.h5') 113 | -------------------------------------------------------------------------------- /agents/agent_customer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | 4 | from keras import initializers 5 | from tensorflow.python.keras import Sequential 6 | from tensorflow.python.keras.layers import Dense 7 | from tensorflow.python.keras.models import load_model 8 | from tensorflow.python.keras.optimizer_v2.adam import Adam 9 | 10 | from environment.environment import sample_action_customer 11 | from params import EPSILON_START, DISCOUNT_RATE, EPSILON_MIN, EPSILON_DECAY, CUSTOMER_ACTION_SIZE, BUFFER_SIZE, \ 12 | BATCH_SIZE, CUSTOMER_STATE_SIZE, TRAINING_INTERVAL, \ 13 | REPLACE_TARGET_INTERVAL, TAU, LEARNING_RATE_DQN, HIDDEN_LAYER_SIZE, POWER_RATES 14 | from utils.replay_buffer import ReplayBuffer 15 | 16 | 17 | def construct_network(): 18 | """ Construct the Deep-Q network. It consists of an input layer with the size of the state variables, 2 hidden 19 | layers and an output layer with the size of the possible actions. """ 20 | model = Sequential() 21 | model.add(Dense(HIDDEN_LAYER_SIZE, input_shape=(CUSTOMER_STATE_SIZE,), activation='relu', 22 | kernel_initializer=initializers.RandomNormal(stddev=0.01), bias_initializer=initializers.Zeros())) 23 | model.add(Dense(HIDDEN_LAYER_SIZE, activation='relu', 24 | kernel_initializer=initializers.RandomNormal(stddev=0.01), bias_initializer=initializers.Zeros())) 25 | model.add(Dense(CUSTOMER_ACTION_SIZE, activation='linear', 26 | kernel_initializer=initializers.RandomNormal(stddev=0.01), bias_initializer=initializers.Zeros())) 27 | model.compile(loss='mse', optimizer=Adam(lr=LEARNING_RATE_DQN)) 28 | return model 29 | 30 | 31 | def predict(state, network): 32 | """ Predict the Q-values for a given state and network. """ 33 | state_input = np.reshape(state, (-1, CUSTOMER_STATE_SIZE)) 34 | return network(state_input) 35 | 36 | 37 | class CustomerAgent: 38 | """ This CustomerAgent is a Reinforcement Learning agent using a Deep-Q network to predict the Q-values of 39 | state-action pairs. In the act function the agent calls for the previous reward and the next observation. 40 | It updates its network based on the previous reward, observation and action. Then it decides upon the next action. 41 | """ 42 | 43 | def __init__(self, agent_id, data_id, env, dummy=False, q_network=None, target_network=None): 44 | self.agent_id = agent_id 45 | self.data_id = data_id 46 | self.env = env 47 | self.epsilon = EPSILON_START 48 | self.dummy = dummy 49 | self.acc_reward = 0 50 | self.last_state = None 51 | self.last_action = None 52 | self.last_history = None 53 | self.memory = ReplayBuffer(BUFFER_SIZE, BATCH_SIZE) 54 | self.visited = {} 55 | self.q_network = q_network 56 | self.target_network = target_network 57 | if q_network is None: 58 | self.q_network = construct_network() 59 | if target_network is None: 60 | self.target_network = construct_network() 61 | 62 | def reset(self): 63 | """ Reset the agent before each episode. """ 64 | self.last_state = None 65 | self.last_action = None 66 | self.last_history = None 67 | self.acc_reward = 0 68 | self.epsilon = max(EPSILON_MIN, self.epsilon * EPSILON_DECAY) 69 | 70 | def act(self, train=True): 71 | """ Select an action based on the observation. If the agent is in training and is not a dummy the agent's 72 | Q-network is also updated in the step function. """ 73 | observation, reward, done, _ = self.env.last_customer(self.agent_id) 74 | 75 | if train and not self.dummy: 76 | if self.last_action is not None: 77 | self.step(self.last_state, self.last_action, reward, observation, done) 78 | action = self.choose_action(observation, self.epsilon) 79 | else: 80 | action = self.choose_action(observation) 81 | 82 | self.env.act(self.agent_id, action) 83 | self.last_state = observation 84 | self.last_action = action 85 | self.acc_reward += reward 86 | 87 | def choose_action(self, s, eps=0.0): 88 | """ Choose an action based on the given state. If the agent is a dummy it will simply consume an amount equal 89 | to its demand (power rate 1.0). Otherwise an action is selected based on epsilon-greedy. """ 90 | if self.dummy: 91 | return POWER_RATES.index(1.0) 92 | elif random.uniform(0, 1) < eps: 93 | return sample_action_customer() 94 | else: 95 | actions = predict(s, self.q_network) 96 | action = np.argmax(actions) 97 | return action 98 | 99 | def step(self, state, action, reward, next_state, done): 100 | """ Every iteration the agent takes a training step. The agent adds the SARS tuple to the replay buffer. The 101 | replay buffer is then used for sampling a batch for training. """ 102 | self.memory.add(state, action, reward, next_state, done) 103 | 104 | # Train network on a certain interval and if the replay buffer has enough samples 105 | if len(self.memory) >= BATCH_SIZE and self.env.curr_step % TRAINING_INTERVAL == 0: 106 | sampled_experiences = self.memory.sample() 107 | self.train(sampled_experiences) 108 | 109 | # Replace target network on a certain interval 110 | if self.env.episode % REPLACE_TARGET_INTERVAL == 0: 111 | self.target_network.set_weights(self.q_network.get_weights()) 112 | 113 | def train(self, experiences): 114 | """ Train the Q-network. The target values are based on a target network to stabilize training. """ 115 | states, actions, rewards, next_states, dones = experiences 116 | outputs = predict(next_states, self.target_network) 117 | next_actions = np.max(outputs, axis=1) 118 | target_values = rewards + (DISCOUNT_RATE * next_actions * (1 - dones)) 119 | targets = predict(states, self.q_network).numpy() 120 | targets[np.arange(len(states)), actions] = target_values 121 | self.last_history = self.q_network.fit(np.array(states), np.array(targets), verbose=False) 122 | 123 | def update_network(self): 124 | """ Do a soft update on the target network. A soft update can be done every iteration. This is slightly 125 | different from a hard update on an interval. This is currently not used. """ 126 | model_weights = self.q_network.get_weights() 127 | target_model_weights = self.target_network.get_weights() 128 | for i in range(len(model_weights)): 129 | target_model_weights[i] = TAU * model_weights[i] + (1 - TAU) * target_model_weights[i] 130 | self.target_network.set_weights(target_model_weights) 131 | 132 | def save(self, path): 133 | """ Save the network. """ 134 | self.q_network.save(path + '/Q_network_' + str(self.data_id) + '.h5') 135 | np.save(path + '/dissatisfaction_coefficients_' + str(self.data_id) + '.npy', self.env.dissatisfaction_coefficients[self.agent_id]) 136 | print("Successfully saved network for agent " + str(self.data_id)) 137 | 138 | def load(self, path): 139 | """ Load a network give its path. """ 140 | self.q_network = load_model(path + '/Q_network_' + str(self.data_id) + '.h5') 141 | self.env.dissatisfaction_coefficients[self.agent_id] = np.load(path + '/dissatisfaction_coefficients_' + str(self.data_id) + '.npy') 142 | -------------------------------------------------------------------------------- /data/15minute_data_austin.csv: -------------------------------------------------------------------------------- 1 | dataid,local_15min,air1,air2,air3,airwindowunit1,aquarium1,bathroom1,bathroom2,bedroom1,bedroom2,bedroom3,bedroom4,bedroom5,battery1,car1,car2,circpump1,clotheswasher1,clotheswasher_dryg1,diningroom1,diningroom2,dishwasher1,disposal1,drye1,dryg1,freezer1,furnace1,furnace2,garage1,garage2,grid,heater1,heater2,heater3,housefan1,icemaker1,jacuzzi1,kitchen1,kitchen2,kitchenapp1,kitchenapp2,lights_plugs1,lights_plugs2,lights_plugs3,lights_plugs4,lights_plugs5,lights_plugs6,livingroom1,livingroom2,microwave1,office1,outsidelights_plugs1,outsidelights_plugs2,oven1,oven2,pool1,pool2,poollight1,poolpump1,pump1,range1,refrigerator1,refrigerator2,security1,sewerpump1,shed1,solar,solar2,sprinkler1,sumppump1,utilityroom1,venthood1,waterheater1,waterheater2,wellpump1,winecooler1,leg1v,leg2v 2 | 661,21/11/2018 15:15,0,,,,,,,,,,,,,0.001,,,,,,,,,0,,,,,,,0.124,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.276,,,,,,,,,,123.915,124.277 3 | 9278,31/12/2018 23:45,0.039,,,,,,,,,,,,,,,,0.001,,,,-0.001,,0.001,,,0.014,,,,1.02,,,,,,,,,0.012,0.003,0.001,,,,,,,,0.003,,,,-0.001,,,,,,,,0.438,0.014,,,,-0.003,,,,,,,,,,122.939,123.402 4 | -------------------------------------------------------------------------------- /data/15minute_data_austin_fixed_consumption.csv: -------------------------------------------------------------------------------- 1 | time,dataid,air,car,clotheswasher,dishwasher,dry,non-shiftable,total 2 | 01/01/2018 00:00,661,0,0,0,0,0,0.434,0.434 3 | 31/12/2018 23:45,9922,0,0,0,0,0,0.684,0.684 4 | -------------------------------------------------------------------------------- /data/15minute_data_austin_processed.csv: -------------------------------------------------------------------------------- 1 | time,dataid,air,car,clotheswasher,dishwasher,dry,non-shiftable,total 2 | 01/01/2018 00:00,661,0,0,0,0.001,0,0.446,0.447 3 | 31/12/2018 23:45,9922,-0.003,0,0.078,0.001,0.001,0.843,0.92 4 | -------------------------------------------------------------------------------- /data/baselines_regr_temp_correction.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TU-Delft-AI-Energy-Lab/MARL-iDR-Multi-Agent-Reinforcement-Learning-for-Incentive-based-Residential-Demand-Response/1d972a0a8f7f32824a9d72945524ce77ed889044/data/baselines_regr_temp_correction.npy -------------------------------------------------------------------------------- /data/baselines_regr_temp_correction_old.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TU-Delft-AI-Energy-Lab/MARL-iDR-Multi-Agent-Reinforcement-Learning-for-Incentive-based-Residential-Demand-Response/1d972a0a8f7f32824a9d72945524ce77ed889044/data/baselines_regr_temp_correction_old.npy -------------------------------------------------------------------------------- /data/outdoor_temperatures_noaa.csv: -------------------------------------------------------------------------------- 1 | WBANNO UTC_DATE UTC_TIME LST_DATE LST_TIME CRX_VN LONGITUDE LATITUDE T_CALC T_HR_AVG T_MAX T_MIN P_CALC SOLARAD SOLARAD_FLAG SOLARAD_MAX SOLARAD_MAX_FLAG SOLARAD_MIN SOLARAD_MIN_FLAG SUR_TEMP_TYPE SUR_TEMP SUR_TEMP_FLAG SUR_TEMP_MAX SUR_TEMP_MAX_FLAG SUR_TEMP_MIN SUR_TEMP_MIN_FLAG RH_HR_AVG RH_HR_AVG_FLAG SOIL_MOISTURE_5 SOIL_MOISTURE_10 SOIL_MOISTURE_20 SOIL_MOISTURE_50 SOIL_MOISTURE_100 SOIL_TEMP_5 SOIL_TEMP_10 SOIL_TEMP_20 SOIL_TEMP_50 SOIL_TEMP_100 2 | 23907 20180101 0100 20171231 1900 2.423 -98.08 30.62 -4.1 -3.8 -3.5 -4.1 0.0 0 0 0 0 0 0 C -0.9 0 -0.7 0 -1.3 0 92 0 0.372 0.422 -99.000 -99.000 -99.000 7.0 9.0 -9999.0 -9999.0 -9999.0 3 | 23907 20190101 0000 20181231 1800 2.623 -98.08 30.62 12.9 13.5 14.8 12.7 0.0 29 0 96 0 0 0 C 11.5 0 12.9 0 9.8 0 47 0 0.509 0.502 -99.000 -99.000 -99.000 10.3 10.1 -9999.0 -9999.0 -9999.0 4 | -------------------------------------------------------------------------------- /environment/environment.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from utils.load_demand import load_requests, load_day, get_device_demands, load_baselines, get_peak_demand 4 | from params import RHO, CUSTOMER_ACTION_SIZE, TRAINING_START_DAY, TRAINING_END_DAY, INCENTIVE_RATES, \ 5 | AGGREGATOR_ACTION_SIZE, TIME_STEPS_TRAIN, DEVICE_CONSUMPTION, DISSATISFACTION_COEFFICIENTS, \ 6 | DEVICE_NON_INTERRUPTIBLE, RHO_AGGREGATOR, DEVICES, \ 7 | CRITICAL_THRESHOLD_RELATIVE, MAX_TOTAL_DEMAND, MAX_INCENTIVE, RHO_COMMON, POWER_RATES, BASELINE_START_DAY, \ 8 | NUM_AGENTS, DISSATISFACTION_COEFFICIENTS_STD, DISSATISFACTION_COEFFICIENTS_MIN, TESTING_START_DAY, TESTING_END_DAY 9 | 10 | 11 | def knapsack(values, weights, capacity): 12 | """ Schedule the devices based on their dissatisfaction (the values) and their consumption (the weights). The 13 | customer selected a fraction of demand as consumption (capacity). This function brute-forces the optimal knapsack 14 | solution. """ 15 | non_zero_values = np.nonzero(values)[0] 16 | n = len(non_zero_values) 17 | max_value = 0 18 | max_weight = capacity 19 | max_actions = np.zeros(len(values), dtype=bool) 20 | 21 | if n == 0: 22 | return 0, 0, max_actions 23 | 24 | for i in range(2 ** n): 25 | actions = np.array([int(x) for x in list(f'{i:b}'.zfill(n))], dtype=bool) 26 | action_indices = non_zero_values[actions] 27 | value = values[action_indices].sum() 28 | weight = weights[action_indices].sum() 29 | if (weight <= capacity and value > max_value) or (weight <= max_weight and value == max_value): 30 | max_value = value 31 | max_weight = weight 32 | max_actions = np.zeros(len(values), dtype=bool) 33 | max_actions[action_indices] = True 34 | 35 | return max_value, max_weight, max_actions 36 | 37 | 38 | def knapsack_ensemble(values, weights, capacity, dissatisfaction_coefficients): 39 | max_values = [] 40 | max_weights = [] 41 | max_actionss = [] 42 | rates = [] 43 | ac_index = DEVICES.index('air') 44 | ac_consumption = weights[ac_index] 45 | ac_max_value = values[ac_index] 46 | for rate in POWER_RATES[1:]: 47 | ac_weight = ac_consumption * rate 48 | ac_reduction = ac_consumption - ac_weight 49 | ac_value = dissatisfaction_coefficients[ac_index] * np.square(ac_reduction) 50 | ac_value = ac_max_value - ac_value 51 | weights[ac_index] = ac_weight 52 | values[ac_index] = ac_value 53 | max_value, max_weight, max_actions = knapsack(values, weights, capacity) 54 | max_values.append(max_value) 55 | max_weights.append(max_weight) 56 | max_actionss.append(max_actions) 57 | rates.append(rate) 58 | 59 | sorted_values = sorted(zip(max_values, max_weights, max_actionss, rates), key=lambda elem: (-elem[0], elem[1])) 60 | return sorted_values[0] 61 | 62 | 63 | def sample_action_customer(): 64 | """ Sample a random action for the customer. """ 65 | return np.random.randint(0, CUSTOMER_ACTION_SIZE) 66 | 67 | 68 | def sample_action_aggregator(): 69 | """ Sample a random action for the aggregator. """ 70 | return np.random.randint(0, AGGREGATOR_ACTION_SIZE) 71 | 72 | 73 | class Environment: 74 | """ The AggregatorAgent and the CustomerAgents interact with the Environment. The Environment controls input of 75 | device requests and demands. It schedules devices with the knapsack algorithm for the CustomerAgents. Finally, 76 | it calculates the rewards.""" 77 | 78 | def __init__(self, data_ids, heterogeneous=False, baseline=False): 79 | self.data_ids = data_ids 80 | self.episode = 0 81 | self.df = load_requests() 82 | self.heterogeneous = heterogeneous 83 | self.baseline = baseline 84 | self.dissatisfaction_coefficients = np.full((len(data_ids), len(DEVICES)), DISSATISFACTION_COEFFICIENTS) 85 | if heterogeneous: 86 | dissatisfaction_coefficients = np.random.normal( 87 | loc=DISSATISFACTION_COEFFICIENTS, scale=DISSATISFACTION_COEFFICIENTS_STD, size=(NUM_AGENTS, len(DEVICES))) 88 | self.dissatisfaction_coefficients = np.maximum(DISSATISFACTION_COEFFICIENTS_MIN, dissatisfaction_coefficients) 89 | 90 | def reset(self, day=None, max_steps=TIME_STEPS_TRAIN): 91 | self.day = day 92 | if day is None: 93 | # self.day = np.random.randint(TRAINING_START_DAY, TRAINING_END_DAY) 94 | day_range = [(TRAINING_START_DAY, TESTING_START_DAY), (TESTING_END_DAY, TRAINING_END_DAY)][np.random.randint(0, 2)] 95 | self.day = np.random.randint(*day_range) 96 | self.curr_step = 0 97 | self.episode += 1 98 | self.done = False 99 | self.max_steps = max_steps 100 | 101 | # Customer agent params 102 | self.demand = np.zeros((max_steps, len(self.data_ids))) 103 | self.non_shiftable_load = np.zeros((max_steps, len(self.data_ids))) 104 | self.requests_new = np.zeros((max_steps, len(self.data_ids), len(DEVICES)), dtype=bool) # Incoming requests from PecanStreet 105 | self.request_loads = np.zeros((max_steps, len(self.data_ids), len(DEVICES))) # The load in kW for open requests 106 | self.requests_started = np.zeros((max_steps, len(self.data_ids), len(DEVICES)), dtype=bool) # Request for Non-interruptible devices that have been started but are still running 107 | self.requests_open = np.zeros((max_steps, len(self.data_ids), len(DEVICES))) # How many time steps are still unfulfilled for a request for a device (the length of the request) 108 | self.requests_delayed = np.zeros((max_steps, len(self.data_ids), len(DEVICES))) # How many time steps a device has been delayed 109 | 110 | self.possible_actions = np.zeros((max_steps, len(self.data_ids))) # The devices scheduled by knapsack in each time step 111 | self.power_rates = np.zeros((max_steps, len(self.data_ids))) # The devices scheduled by knapsack in each time step 112 | self.request_actions = np.zeros((max_steps, len(self.data_ids), len(DEVICES)), dtype=bool) # The devices scheduled by knapsack in each time step 113 | self.ac_rates = np.zeros((max_steps, len(self.data_ids))) 114 | self.consumptions = np.zeros((max_steps, len(self.data_ids))) # Total consumtpion by each agent in each time step # Total consumtpion by each agent in each time step 115 | self.incentive_received = np.zeros((max_steps, len(self.data_ids))) 116 | self.rewards_customers = np.zeros((max_steps, len(self.data_ids))) 117 | self.dissatisfaction = np.zeros((max_steps, len(self.data_ids), len(DEVICES))) 118 | self.customer_reward_matrix = np.zeros((max_steps, len(INCENTIVE_RATES), len(self.data_ids), len(POWER_RATES))) 119 | self.aggregator_reward_matrix = np.zeros((max_steps, len(INCENTIVE_RATES))) 120 | 121 | # Aggregator agent params 122 | self.incentives = np.zeros(max_steps) 123 | self.rewards_aggregator = np.zeros(max_steps) 124 | 125 | # Demand data params 126 | self.day_df = load_day(self.df, self.day, max_steps) 127 | self.baselines = load_baselines() 128 | self.set_demands() 129 | # self.capacity_threshold = CRITICAL_THRESHOLD 130 | self.capacity_threshold = get_peak_demand(self.day_df) * CRITICAL_THRESHOLD_RELATIVE 131 | 132 | def last_customer(self, agent_id): 133 | """ The CustomerAgent can call this method to receive the previous reward and the next observation. 134 | The observation consists of the state of the household appliances and the offered incentive. The state of the 135 | household appliances is defined as an integer, 0 for no request or requests for non-interruptible devices that 136 | have been started, 1 for a new request and > 1 if the request has been delayed. """ 137 | incentive = self.incentives[self.curr_step] 138 | baseline = self.baselines[agent_id][self.day - BASELINE_START_DAY][self.curr_step] 139 | new_requests = self.requests_new[self.curr_step][agent_id] 140 | started_requests = self.requests_started[self.curr_step][agent_id] 141 | open_requests = self.requests_open[self.curr_step][agent_id] + new_requests 142 | delays = self.requests_delayed[self.curr_step][agent_id] 143 | new_delays = new_requests + delays 144 | open_delays = new_delays * np.invert(started_requests) 145 | ac_consumption = self.request_loads[self.curr_step][agent_id][0] 146 | non_shiftable = self.non_shiftable_load[self.curr_step][agent_id] 147 | non_interruptible = (np.logical_and(open_requests, started_requests) * DEVICE_CONSUMPTION).sum() 148 | observation = np.array(np.concatenate(([ac_consumption], open_delays[1:], [non_shiftable + non_interruptible, incentive, baseline]))) 149 | reward = self.rewards_customers[self.curr_step][agent_id] 150 | done = self.done 151 | return observation, reward, done, None 152 | 153 | def last_aggregator(self): 154 | """ The AggregatorAgent can call this method to receive the previous reward and the next observation. 155 | The observation only contains the total demand of the customer together. """ 156 | total_demand = self.get_total_demand(self.curr_step) 157 | # total_demand = self.baselines[:, self.day - TRAINING_START_DAY, self.curr_step].sum(axis=0) 158 | threshold = self.capacity_threshold 159 | reduction = self.get_total_reduction(self.curr_step-1 if self.curr_step > 0 else 0) 160 | observation = np.array([total_demand, threshold, reduction]) 161 | reward = self.rewards_aggregator[self.curr_step] 162 | done = self.done 163 | return observation, reward, done, None 164 | 165 | def act(self, agent_id, action): 166 | """ Apply the action selected by a CustomerAgent. 167 | The agent selects a power rate and sends it to the environment. Based on this power rate this method calls the 168 | knapsack algorithm and determines the devices scheduled for this time step. Afterwards this method calculates 169 | the new state of the appliances taking device-specific constraints into account. """ 170 | # Get power rate and incentive rate 171 | incentive_rate = self.incentives[self.curr_step] 172 | baseline_demand = self.baselines[agent_id][self.day - BASELINE_START_DAY][self.curr_step] 173 | car_index = DEVICES.index('car') 174 | ac_index = DEVICES.index('air') 175 | power_rate = POWER_RATES[action] 176 | if self.baseline: 177 | power_rate = POWER_RATES[np.argmax(self.customer_reward_matrix[self.curr_step][int(incentive_rate)][agent_id])] 178 | 179 | # Get requests and demands 180 | started_requests = self.requests_started[self.curr_step][agent_id] 181 | new_requests = self.requests_new[self.curr_step][agent_id] 182 | open_requests = self.requests_open[self.curr_step][agent_id] + new_requests 183 | delayed_requests = self.requests_delayed[self.curr_step][agent_id] 184 | selectable_requests = np.logical_and(open_requests, np.invert(started_requests)) 185 | non_interruptible_requests = np.logical_and(open_requests, started_requests) 186 | non_interruptible_demand = (non_interruptible_requests * DEVICE_CONSUMPTION).sum() 187 | non_shiftable_demand = self.non_shiftable_load[self.curr_step][agent_id] 188 | device_consumptions = selectable_requests * DEVICE_CONSUMPTION 189 | device_consumptions[ac_index] = self.request_loads[self.curr_step][agent_id][ac_index] 190 | 191 | # Brute-force knapsack 192 | dissatisfaction_values = self.dissatisfaction_coefficients[agent_id] * np.square(delayed_requests + 1) 193 | dissatisfaction_values[ac_index] = self.dissatisfaction_coefficients[agent_id][ac_index] * np.square(device_consumptions[ac_index]) 194 | device_values = dissatisfaction_values * selectable_requests 195 | shiftable_demand = (selectable_requests * device_consumptions).sum() 196 | capacity = power_rate * shiftable_demand 197 | value, weight, actions, ac_rate = knapsack_ensemble(device_values, device_consumptions, capacity, self.dissatisfaction_coefficients[agent_id]) 198 | delayed_devices = np.invert(actions) * selectable_requests 199 | dissatisfaction = device_values.sum() - value 200 | 201 | # Calculate received incentive 202 | consumption = weight + non_interruptible_demand + non_shiftable_demand 203 | energy_diff = baseline_demand - consumption 204 | # energy_diff = self.demand[self.curr_step, agent_id] - consumption 205 | # energy_diff = shiftable_demand - weight 206 | incentive_received = incentive_rate * max(0, energy_diff) 207 | 208 | # Calculate reward 209 | incentive_term = RHO * incentive_received 210 | dissatisfaction_term = (1 - RHO) * -dissatisfaction 211 | reward = incentive_term + dissatisfaction_term 212 | # reward = max(MINIMUM_CUSTOMER_REWARD, incentive_term + dissatisfaction_term) 213 | 214 | # Save selected devices, energy consumption and received incentive 215 | fulfilled_requests = np.logical_or(actions, non_interruptible_requests) 216 | self.possible_actions[self.curr_step][agent_id] = np.count_nonzero(selectable_requests) 217 | self.request_actions[self.curr_step][agent_id] = fulfilled_requests 218 | self.consumptions[self.curr_step][agent_id] = consumption 219 | self.incentive_received[self.curr_step][agent_id] = incentive_received 220 | self.power_rates[self.curr_step][agent_id] = power_rate if selectable_requests.any() else 1 221 | self.ac_rates[self.curr_step][agent_id] = ac_rate * actions[ac_index] if selectable_requests[ac_index] else 1 222 | self.dissatisfaction[self.curr_step][agent_id] = device_values * delayed_devices 223 | self.dissatisfaction[self.curr_step][agent_id][ac_index] = self.dissatisfaction_coefficients[agent_id][ac_index] * np.square((1 - ac_rate) * device_consumptions[ac_index]) 224 | 225 | # Update parameters for use in the next time step 226 | if self.curr_step < self.max_steps - 1: 227 | started_non_interruptibles = actions * DEVICE_NON_INTERRUPTIBLE 228 | open_requests_next = open_requests - fulfilled_requests 229 | 230 | self.rewards_customers[self.curr_step + 1][agent_id] = reward 231 | self.requests_open[self.curr_step + 1][agent_id] = open_requests_next 232 | self.requests_started[self.curr_step + 1][agent_id] = started_non_interruptibles + non_interruptible_requests 233 | self.requests_delayed[self.curr_step + 1][agent_id] = delayed_requests + delayed_devices 234 | self.requests_delayed[self.curr_step + 1][agent_id][started_non_interruptibles] = 0 235 | 236 | # If all requested time slots for the EV are fulfilled reset the delay 237 | if open_requests_next[car_index] == 0: 238 | self.requests_delayed[self.curr_step + 1][agent_id][car_index] = 0 239 | 240 | # AC has no delay 241 | self.requests_delayed[self.curr_step + 1][agent_id][ac_index] = 0 242 | self.requests_open[self.curr_step + 1][agent_id][ac_index] = 0 243 | 244 | def act_aggregator(self, action): 245 | """ Apply the action selected by the AggregatorAgent. 246 | The agent selects the incentive rate and sends it to the environment. The environment saves the incentive to 247 | send it to the CustomerAgents later. """ 248 | incentive_rate = INCENTIVE_RATES[action] 249 | self.incentives[self.curr_step] = incentive_rate 250 | if self.baseline: 251 | print('Computing baseline step:', self.curr_step) 252 | self.compute_best_responses() 253 | self.incentives[self.curr_step] = np.argmax(self.aggregator_reward_matrix[self.curr_step]) 254 | 255 | def step(self): 256 | """ This method is called at the end of a time step. 257 | If it was not the final time step, the reward for the aggregator is calculated and demands for the next 258 | time step are retrieved. """ 259 | self.curr_step += 1 260 | self.done = self.curr_step == self.max_steps 261 | if not self.done: 262 | self.reward_aggregator() 263 | self.set_demands() 264 | self.set_incentive() 265 | 266 | def reward_aggregator(self): 267 | """ Calculate the reward for the aggregator. 268 | The reward consists of a consumption term, indicating how much the total consumption exceeds the threshold, and 269 | an incentive term, indicating how much each agent received on average. The term is normalized instead of taking 270 | the total, because the number of RL agents may differ. """ 271 | consumption_term = max(0, self.get_total_consumption(self.curr_step - 1) - self.capacity_threshold) 272 | incentive_term = self.incentive_received[self.curr_step - 1].sum() / 100 273 | reward = - RHO_AGGREGATOR * consumption_term - (1 - RHO_AGGREGATOR) * incentive_term 274 | self.rewards_aggregator[self.curr_step] = reward 275 | 276 | customer_reward = self.rewards_customers[self.curr_step] 277 | customer_bonus = RHO_COMMON * customer_reward - (1 - RHO_COMMON) * consumption_term 278 | self.rewards_customers[self.curr_step] = customer_bonus 279 | 280 | def set_demands(self): 281 | """ Retrieve the demands per customer and per device for the current time step from the demands DataFrame. 282 | If the demand is larger than a certain threshold the device is considered requested by the user. The actual 283 | load in kW that is requested for the device is fixed, except for the total non-shiftable devices. """ 284 | df = get_device_demands(self.day_df, self.data_ids, self.day, self.curr_step) 285 | non_shiftable = df['non-shiftable'].to_numpy() 286 | total = df['total'].to_numpy() 287 | requests = df[DEVICES].to_numpy() 288 | request_new = np.greater(requests, 0) 289 | self.non_shiftable_load[self.curr_step] = non_shiftable 290 | self.requests_new[self.curr_step] = request_new 291 | self.request_loads[self.curr_step] = requests 292 | self.demand[self.curr_step] = total 293 | 294 | def get_total_demand(self, step=None): 295 | """ Sum the demands of the customer agents. """ 296 | if step is None: 297 | return self.demand.sum(axis=1) 298 | return self.demand[step].sum() 299 | 300 | def get_total_consumption(self, step=None): 301 | """ Sum the consumptions of the customer agents. """ 302 | if step is None: 303 | return self.consumptions.sum(axis=1) 304 | return self.consumptions[step].sum() 305 | 306 | def get_total_reduction(self, step=None): 307 | if step is None: 308 | return self.get_total_demand() - self.get_total_consumption() 309 | return self.get_total_demand(step) - self.get_total_consumption(step) 310 | 311 | def set_incentive(self): 312 | """ A simple heuristic for calculating incentives without the aggregator as an agent. 313 | The incentive is a linear relation to the demand exceeding the capacity. """ 314 | total_demand = min(MAX_TOTAL_DEMAND, self.get_total_demand(self.curr_step)) 315 | demand_range = MAX_TOTAL_DEMAND - self.capacity_threshold 316 | demand_overflow = max(0, total_demand - self.capacity_threshold) 317 | incentive = np.ceil((demand_overflow / demand_range) * MAX_INCENTIVE) 318 | self.incentives[self.curr_step] = incentive 319 | 320 | def set_baseline(self): 321 | """ Average the pre-computed baseline with the consumption of the last time step for a more accurate result. """ 322 | baseline_demand = self.baselines[:, self.day - BASELINE_START_DAY, self.curr_step] 323 | new_baseline_demand = (baseline_demand + self.consumptions[self.curr_step - 1]) / 2 324 | self.baselines[:, self.day - BASELINE_START_DAY, self.curr_step] = new_baseline_demand 325 | 326 | def compute_best_responses(self): 327 | rewards = np.zeros((AGGREGATOR_ACTION_SIZE, len(self.data_ids), len(POWER_RATES))) 328 | profits = np.zeros((AGGREGATOR_ACTION_SIZE, len(self.data_ids), len(POWER_RATES))) 329 | consumptions = np.zeros((AGGREGATOR_ACTION_SIZE, len(self.data_ids), len(POWER_RATES))) 330 | best_profits = np.zeros((AGGREGATOR_ACTION_SIZE, len(self.data_ids))) 331 | best_consumptions = np.zeros((AGGREGATOR_ACTION_SIZE, len(self.data_ids))) 332 | for i, incentive_rate in enumerate(INCENTIVE_RATES): 333 | for agent_id in range(len(self.data_ids)): 334 | for j, power_rate in enumerate(POWER_RATES): 335 | baseline_demand = self.baselines[agent_id][self.day - BASELINE_START_DAY][self.curr_step] 336 | started_requests = self.requests_started[self.curr_step][agent_id] 337 | new_requests = self.requests_new[self.curr_step][agent_id] 338 | open_requests = self.requests_open[self.curr_step][agent_id] + new_requests 339 | delayed_requests = self.requests_delayed[self.curr_step][agent_id] 340 | selectable_requests = np.logical_and(open_requests, np.invert(started_requests)) 341 | non_interruptible_requests = np.logical_and(open_requests, started_requests) 342 | non_interruptible_demand = (non_interruptible_requests * DEVICE_CONSUMPTION).sum() 343 | non_shiftable_demand = self.non_shiftable_load[self.curr_step][agent_id] 344 | device_consumptions = selectable_requests * DEVICE_CONSUMPTION 345 | device_consumptions[0] = self.request_loads[self.curr_step][agent_id][0] 346 | dissatisfaction_values = self.dissatisfaction_coefficients[agent_id] * np.square(delayed_requests + 1) 347 | dissatisfaction_values[0] = self.dissatisfaction_coefficients[agent_id][0] * np.square(device_consumptions[0]) 348 | device_values = dissatisfaction_values * selectable_requests 349 | shiftable_demand = (selectable_requests * device_consumptions).sum() 350 | capacity = power_rate * shiftable_demand 351 | value, weight, actions, ac_rate = knapsack_ensemble(device_values, device_consumptions, capacity, self.dissatisfaction_coefficients[agent_id]) 352 | dissatisfaction = device_values.sum() - value 353 | consumption = weight + non_interruptible_demand + non_shiftable_demand 354 | profit = incentive_rate * max(0, baseline_demand - consumption) 355 | reward = profit - dissatisfaction 356 | rewards[i, agent_id, j] = reward 357 | profits[i, agent_id, j] = profit 358 | consumptions[i, agent_id, j] = consumption 359 | 360 | best_profits[i][agent_id] = profits[i][agent_id][np.argmax(rewards[i, agent_id])] 361 | best_consumptions[i][agent_id] = consumptions[i][agent_id][np.argmax(rewards[i, agent_id])] 362 | 363 | aggregator_rewards = - best_profits.sum(axis=1) / 100 - np.maximum(0, best_consumptions.sum(axis=1) - self.capacity_threshold) 364 | self.customer_reward_matrix[self.curr_step] = rewards 365 | self.aggregator_reward_matrix[self.curr_step] = aggregator_rewards 366 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os 3 | import time 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | import tensorflow as tf 7 | import matplotlib.dates as mdates 8 | from matplotlib import ticker 9 | 10 | from agents.agent_aggregator_dqn import AggregatorAgent 11 | from agents.agent_customer import CustomerAgent, construct_network 12 | from environment.environment import Environment 13 | from params import EPISODES, TIME_STEPS_TEST, TRAINING_START_DAY, NUM_RL_AGENTS, \ 14 | AGENT_IDS, TIME_STEPS_TRAIN, TRAINING_PERIOD, TRAINING_END_DAY, AGENT_CLASS, CLASS_RHO, CLASS_DC, \ 15 | DISSATISFACTION_COEFFICIENTS, REPLACE_TARGET_INTERVAL, TESTING_START_DAY, TESTING_END_DAY, BASELINE_START_DAY, \ 16 | TESTING_PERIOD 17 | 18 | env = Environment(AGENT_IDS, heterogeneous=False, baseline=False) 19 | aggregator_agent = AggregatorAgent(env) 20 | customer_agents = [CustomerAgent(agent_id, data_id, env, dummy=agent_id >= NUM_RL_AGENTS) for agent_id, data_id in enumerate(AGENT_IDS)] 21 | 22 | 23 | def main(): 24 | train(log=True, save=True) 25 | test_single_day(path=None, day=182) 26 | # test_single_day(path='save_files/MARL_IDR_2', day=182) 27 | # test_average(path='save_files/X2_a') 28 | # for day in range(181, 243): 29 | # test_single_day(path='save_files/Case_1_b', day=day) 30 | 31 | 32 | def train(log, save): 33 | # Use TensorBoard for the learning curves 34 | current_time = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") 35 | start = time.time() 36 | # log_name = 'no_baseline_' + current_time 37 | log_name = 'MARL_IDR_6' 38 | if log: 39 | log_path = os.path.join('logs', log_name) 40 | tf_writer = tf.summary.create_file_writer(log_path) 41 | 42 | # Train with trained customer agents 43 | # for agent in customer_agents: 44 | # agent.load('save_files/Case_2_b') 45 | 46 | # Train with trained aggregator 47 | # aggregator_agent.load('save_files/K8') 48 | 49 | aggregator_turn = True 50 | 51 | for episode in range(EPISODES): 52 | start_episode = time.time() 53 | 54 | # Reset environment and agents 55 | env.reset(max_steps=TIME_STEPS_TRAIN) 56 | aggregator_agent.reset() 57 | for agent in customer_agents: 58 | agent.reset() 59 | 60 | if episode % REPLACE_TARGET_INTERVAL == 0: 61 | aggregator_turn = False if aggregator_turn else True 62 | 63 | # Train single episode 64 | while not env.done: 65 | aggregator_agent.act(train=True) 66 | # aggregator_agent.act(train=True) if aggregator_turn else aggregator_agent.act(train=False) 67 | for agent in customer_agents: 68 | agent.act(train=True) 69 | # agent.act(train=False) if aggregator_turn else agent.act(train=True) 70 | env.step() 71 | 72 | if episode % 1 == 0: 73 | print('Episode:', episode) 74 | print('Aggregator turn:', aggregator_turn) 75 | print('Day:', datetime.datetime.strptime('{} {}'.format(env.day, 2018), '%j %Y')) 76 | print('Episode run time:', (time.time() - start_episode), 'sec') 77 | print('Cumulated run time:', (time.time() - start), 'sec') 78 | 79 | if log: 80 | with tf_writer.as_default(): 81 | tf.summary.scalar("epsilon/agent_{}".format(customer_agents[0].data_id), customer_agents[0].epsilon, episode) 82 | tf.summary.scalar("reward/agent_{}".format(customer_agents[0].data_id), customer_agents[0].acc_reward, episode) 83 | tf.summary.scalar("reward/agent_{}".format(customer_agents[8].data_id), customer_agents[8].acc_reward, episode) 84 | tf.summary.scalar("reward/agent_{}".format(customer_agents[17].data_id), customer_agents[17].acc_reward, episode) 85 | tf.summary.scalar("reward/aggregator", aggregator_agent.acc_reward, episode) 86 | tf.summary.scalar("epsilon/aggregator", aggregator_agent.epsilon, episode) 87 | 88 | # Save trained networks 89 | if save: 90 | path = 'save_files/' + log_name 91 | os.mkdir(path) 92 | aggregator_agent.save(path) 93 | for agent in customer_agents: 94 | agent.save(path) 95 | 96 | print('Training done') 97 | print() 98 | 99 | 100 | def test_single_day(path, day=TRAINING_START_DAY): 101 | env.reset(day=day, max_steps=TIME_STEPS_TEST) 102 | 103 | # Load agents 104 | if path is not None: 105 | aggregator_agent.load(path) 106 | for agent in customer_agents: 107 | agent.load(path) 108 | 109 | # Run single day 110 | start = time.time() 111 | for iteration in range(TIME_STEPS_TEST): 112 | aggregator_agent.act(train=False) 113 | for agent in customer_agents: 114 | agent.act(train=False) 115 | env.step() 116 | end = time.time() 117 | 118 | plot_agent = customer_agents[0] 119 | plot_hourly_average = False # Average demands, consumptions and incentives per hour to make the plot more readable 120 | plot_incentive = True # Plot the incentive 121 | time_labels = [datetime.datetime(year=2018, month=1, day=1) + datetime.timedelta(days=day - 1, minutes=i * 15) for i in range(TIME_STEPS_TEST)] 122 | 123 | print_metrics(end - start, path) 124 | for plot_agent in customer_agents: 125 | plot_aggregated_load_curve(time_labels, day, path) 126 | plot_single_load_curve(plot_agent, time_labels, day, path) 127 | plot_ac_reduction(plot_agent, time_labels, path, plot_incentive) 128 | plot_dissatisfaction(plot_agent, time_labels, path, plot_incentive) 129 | plot_schedule(plot_agent, time_labels, path) 130 | plt.show() 131 | 132 | 133 | def test_average(path): 134 | agents_rewards = np.zeros((TESTING_PERIOD, NUM_RL_AGENTS)) 135 | incentives_received = np.zeros((TESTING_PERIOD, NUM_RL_AGENTS)) 136 | aggregator_rewards = np.zeros(TESTING_PERIOD) 137 | total_demands = np.zeros((TESTING_PERIOD, TIME_STEPS_TEST)) 138 | total_consumptions = np.zeros((TESTING_PERIOD, TIME_STEPS_TEST)) 139 | peak_demand = np.zeros(TESTING_PERIOD) 140 | peak_consumption = np.zeros(TESTING_PERIOD) 141 | mean_demand = np.zeros(TESTING_PERIOD) 142 | mean_consumption = np.zeros(TESTING_PERIOD) 143 | thresholds = np.zeros(TESTING_PERIOD) 144 | runtime = np.zeros(TESTING_PERIOD) 145 | 146 | for day in range(TESTING_START_DAY, TESTING_END_DAY): 147 | print('Test on', datetime.datetime.strptime('{} {}'.format(day, 2018), '%j %Y')) 148 | env.reset(day=day, max_steps=TIME_STEPS_TEST) 149 | 150 | # Load agents 151 | if path is not None: 152 | aggregator_agent.load(path) 153 | for agent in customer_agents: 154 | agent.load(path) 155 | 156 | # Run single day 157 | start = time.time() 158 | for iteration in range(TIME_STEPS_TEST): 159 | aggregator_agent.act(train=False) 160 | for agent in customer_agents: 161 | agent.act(train=False) 162 | env.step() 163 | end = time.time() 164 | 165 | agents_rewards[day - TESTING_START_DAY] = env.rewards_customers.sum(axis=0)[:NUM_RL_AGENTS] 166 | aggregator_rewards[day - TESTING_START_DAY] = env.rewards_aggregator.sum() 167 | incentives_received[day - TESTING_START_DAY] = env.incentive_received.sum(axis=0)[:NUM_RL_AGENTS] 168 | total_demands[day - TESTING_START_DAY] = env.get_total_demand() 169 | total_consumptions[day - TESTING_START_DAY] = env.get_total_consumption() 170 | peak_demand[day - TESTING_START_DAY] = np.max(env.get_total_demand()) 171 | peak_consumption[day - TESTING_START_DAY] = np.max(env.get_total_consumption()) 172 | mean_demand[day - TESTING_START_DAY] = np.mean(env.get_total_demand()) 173 | mean_consumption[day - TESTING_START_DAY] = np.mean(env.get_total_consumption()) 174 | thresholds[day - TESTING_START_DAY] = env.capacity_threshold 175 | runtime[day - TESTING_END_DAY] = end - start 176 | 177 | print('Path:', str(path)) 178 | print('Test period', datetime.datetime.strptime('{} {}'.format(TESTING_START_DAY, 2018), '%j %Y'), 179 | datetime.datetime.strptime('{} {}'.format(TESTING_END_DAY, 2018), '%j %Y')) 180 | print('Mean run time:', np.mean(runtime)) 181 | print('Mean customer reward:', np.mean(agents_rewards)) 182 | print('Mean customer reward per agent:', np.mean(agents_rewards, axis=0)) 183 | print('Mean customer reward per day:', np.mean(agents_rewards, axis=1)) 184 | print('Mean aggregator reward per day:', np.mean(aggregator_rewards)) 185 | print() 186 | print('Metrics averaged over testing period', ' No DR', ' with DR') 187 | print('Peak load:', np.mean(peak_demand), np.mean(peak_consumption)) 188 | print('Mean load:', np.mean(mean_demand), np.mean(mean_consumption)) 189 | print('PAR:', np.mean(peak_demand) / np.mean(mean_demand), np.mean(peak_consumption) / np.mean(mean_consumption)) 190 | print('Mean incentive paid:', '0', np.mean(np.sum(incentives_received, axis=1))) 191 | print('Mean incentive received per agent:', '0', np.mean(incentives_received)) 192 | print('Mean threshold exceedance:', 193 | np.mean(np.sum(np.maximum(0, total_demands - thresholds[:, None]), axis=1)), 194 | np.mean(np.sum(np.maximum(0, total_consumptions - thresholds[:, None]), axis=1))) 195 | 196 | 197 | def print_metrics(run_time, path): 198 | agents_rewards = env.rewards_customers.sum(axis=0)[:NUM_RL_AGENTS] 199 | incentives_received = env.incentive_received.sum(axis=0)[:NUM_RL_AGENTS] 200 | test_day = datetime.datetime.strptime('{} {}'.format(env.day, 2018), '%j %Y') 201 | peak_demand = np.max(env.get_total_demand()) 202 | peak_consumption = np.max(env.get_total_consumption()) 203 | mean_demand = np.mean(env.get_total_demand()) 204 | mean_consumption = np.mean(env.get_total_consumption()) 205 | print('Path:', str(path)) 206 | print('Test on', test_day) 207 | print('Run time:', run_time, 'sec') 208 | print('Customer agent rewards:', agents_rewards) 209 | print('Mean customer agent reward:', np.mean(agents_rewards)) 210 | print('Aggregator agent reward:', env.rewards_aggregator.sum()) 211 | print('Metrics', ' No DR', ' with DR') 212 | print('Peak load:', peak_demand, peak_consumption) 213 | print('Mean load:', mean_demand, mean_consumption) 214 | print('Std:', np.std(env.get_total_demand()), np.std(env.get_total_consumption())) 215 | print('PAR:', peak_demand / mean_demand, peak_consumption / mean_consumption) 216 | print('Total incentive paid:', '0', np.sum(incentives_received)) 217 | print('Mean incentive received:', '0', np.mean(incentives_received)) 218 | print('Threshold exceedance:', 219 | np.sum(np.maximum(0, env.get_total_demand() - env.capacity_threshold)), 220 | np.sum(np.maximum(0, env.get_total_consumption() - env.capacity_threshold))) 221 | 222 | print('Demand:', repr(env.get_total_demand())) 223 | print('Load curve:', repr(env.get_total_consumption())) 224 | print('Incentives:', repr(env.incentives)) 225 | print('Capacity:', env.capacity_threshold) 226 | 227 | 228 | def plot_aggregated_load_curve(time_labels, day, path): 229 | ax, ax2 = init_plot() 230 | # ax = init_plot(twinx=False) 231 | 232 | ax.hlines(env.capacity_threshold, time_labels[0], time_labels[-1], label='Capacity', colors='black', linestyles='dashed', linewidth=3, alpha=0.8) 233 | # ax.fill_between(time_labels, env.non_shiftable_load.sum(axis=1), label='Non-shiftable demand', color='orange') 234 | ax.plot(time_labels, env.get_total_demand(), label='Without DR', color='C1', linestyle='dashed', linewidth=3) 235 | # ax.plot(time_labels, env.baselines[:, day - TRAINING_START_DAY, :TIME_STEPS_TEST].sum(axis=0), label='Baseline', color='C3', linestyle='dashed', linewidth=3) 236 | ax.fill_between(time_labels, env.get_total_consumption(), label='With DR', color='C1', alpha=0.5) 237 | ax2.plot(time_labels, env.incentives, label='Incentive', color='C2', marker='x', markersize=8, linewidth=3, markeredgewidth=3) 238 | # ax.bar(time_labels, env.incentives, width=1/len(time_labels[48:]), label='Incentive', color='black') 239 | 240 | # Labels 241 | # ax.yaxis.set_major_locator(ticker.MultipleLocator(1)) 242 | ax.tick_params(axis='both', which='both', labelsize=18) 243 | ax2.tick_params(axis='both', which='both', labelsize=18) 244 | ax.set_xlim(time_labels[47], time_labels[-1]) 245 | ax2.set_xlim(time_labels[47], time_labels[-1]) 246 | ax.set_ylim(bottom=40) 247 | # ax.set_ylim(bottom=0, top=10) 248 | ax2.set_ylim(bottom=0, top=10) 249 | ax.set_ylabel('Aggregated consumption (kW)', fontsize=20) 250 | ax.set_ylabel('Incentive (¢)', fontsize=20) 251 | ax.legend(loc='upper left', fontsize=20) 252 | ax2.legend(loc='upper right', fontsize=20) 253 | ax.set_xlabel('Time (h)', fontsize=20) 254 | ax.grid(which='major', linewidth=1) 255 | ax.grid(which='minor', linewidth=0.5) 256 | plt.title('Load curve' + '\n' + str(path)) 257 | # plt.show() 258 | 259 | 260 | def plot_single_load_curve(plot_agent, time_labels, day, path): 261 | ax = init_plot(twinx=False) 262 | 263 | baseline = env.baselines[plot_agent.agent_id, day - BASELINE_START_DAY, :][:TIME_STEPS_TEST] 264 | 265 | # ax.fill_between(time_labels, env.non_shiftable_load[:, plot_agent.agent_id], label='Agent {} non-shiftable'.format(plot_agent.data_id), color='C0') 266 | ax.plot(time_labels, env.demand[:, plot_agent.agent_id], label='Without DR', color='C0', marker='o', markersize=8, linewidth=3) 267 | ax.plot(time_labels, env.consumptions[:, plot_agent.agent_id], label='MARL-DR', color='C3', marker='^', markersize=8, linewidth=3, alpha=1) 268 | ax.plot(time_labels, baseline, label='Baseline', color='black', linestyle='dashed', linewidth=3) 269 | ax.plot(time_labels, env.incentives, label='Incentive', color='C2', marker='x', markersize=8) 270 | # print('Incenties:', repr(env.incentives)) 271 | 272 | # Labels 273 | ax.tick_params(axis='both', which='both', labelsize=20) 274 | ax.set_xlim(time_labels[48], time_labels[-1]) 275 | # ax2.set_xlim(time_labels[47], time_labels[-1]) 276 | ax.set_ylim(bottom=0) 277 | # ax2.set_ylim(bottom=0) 278 | ax.set_ylabel('Power (kW)', fontsize=32) 279 | # ax2.set_ylabel('Incentive (¢)') 280 | ax.legend(loc='upper left', fontsize=20) 281 | # ax2.legend(loc='upper right') 282 | ax.set_xlabel('Time (h)', fontsize=32) 283 | ax.grid(which='major', linewidth=1) 284 | # ax.grid(which='minor', linewidth=0.5) 285 | # plt.title('Load curve' + '\n' + str(path)) 286 | # plt.show() 287 | 288 | 289 | def plot_ac_reduction(plot_agent, time_labels, path, plot_incentive): 290 | ax, ax2 = init_plot() 291 | 292 | ac_demand = env.request_loads[:, plot_agent.agent_id, 0] 293 | ac_consumption = env.ac_rates[:, plot_agent.agent_id] * env.request_loads[:, plot_agent.agent_id, 0] 294 | 295 | ax.plot(time_labels, ac_demand, label='Agent {} AC original'.format(plot_agent.data_id), color='C0', alpha=0.5) 296 | ax.plot(time_labels, ac_consumption, label='Agent {} AC actual'.format(plot_agent.data_id), linestyle='dashed', color='C0', alpha=0.5) 297 | ax.fill_between(time_labels, env.ac_rates[:, plot_agent.agent_id] * env.request_loads[:, plot_agent.agent_id, 0], color='C0', alpha=0.2) 298 | ax2.plot(time_labels, env.ac_rates[:, plot_agent.agent_id] * 10, label='Agent {} AC rate'.format(plot_agent.data_id), color='C1', alpha=0.5) 299 | 300 | if plot_incentive: 301 | ax2.plot(time_labels, env.incentives, label='Incentive', color='C2', alpha=1.0) 302 | 303 | # Labels 304 | ax.tick_params(axis='x', which='both', rotation=45) 305 | ax.set_xlim(time_labels[0], time_labels[-1]) 306 | ax2.set_xlim(time_labels[0], time_labels[-1]) 307 | ax.set_ylim(bottom=0) 308 | ax2.set_ylim(bottom=0) 309 | ax.set_ylabel('Total demand (kW)') 310 | ax2.set_ylabel('Incentive (¢)') 311 | ax.legend(loc='upper left') 312 | ax2.legend(loc='upper right') 313 | ax.set_xlabel('Time') 314 | ax.grid(which='major', linewidth=1) 315 | ax.grid(which='minor', linewidth=0.5) 316 | plt.title('Load curve' + '\n' + str(path)) 317 | # plt.show() 318 | 319 | 320 | def plot_dissatisfaction(plot_agent, time_labels, path, plot_incentive): 321 | # ax, ax2 = init_plot() 322 | ax = init_plot(twinx=False) 323 | a, b, c, d, e = -env.dissatisfaction[:, plot_agent.agent_id, :].T 324 | rewards = np.roll(env.rewards_customers[:, plot_agent.agent_id], -1) 325 | rewards[-1] = 0 326 | 327 | ax.stackplot(time_labels, b, c, d, e, a, labels=['EV', 'WM', 'DW', 'Dryer', 'AC']) 328 | ax.plot(time_labels, env.incentive_received[:, plot_agent.agent_id], label='Profit', color='black', marker='s', markersize=8, linewidth=3) 329 | ax.plot(time_labels, env.rewards_customers[:, plot_agent.agent_id], label='Total reward', color='C2', marker='o', markersize=8, linewidth=3) 330 | 331 | # Labels 332 | ax.tick_params(axis='both', which='both', labelsize=20) 333 | # ax2.tick_params(axis='both', which='both', labelsize=14) 334 | ax.set_xlim(time_labels[48], time_labels[-1]) 335 | # ax.set_ylim(bottom=0, top=15) 336 | ax.set_ylabel('Reward', fontsize=32) 337 | ax.legend(loc='upper left', fontsize=20) 338 | ax.set_xlabel('Time (h)', fontsize=32) 339 | ax.grid(which='major', linewidth=1) 340 | # ax.grid(which='minor', linewidth=0.5) 341 | # plt.title('Dissatisfaction' + '\n' + str(path)) 342 | # plt.show() 343 | 344 | 345 | def plot_schedule(plot_agent, time_labels, path): 346 | # ax, ax2 = init_plot() 347 | ax = init_plot(twinx=False) 348 | 349 | requests = env.requests_new[:, plot_agent.agent_id][:TIME_STEPS_TRAIN] 350 | actions = env.request_actions[:, plot_agent.agent_id][:TIME_STEPS_TRAIN] 351 | time_labels = time_labels[:TIME_STEPS_TRAIN] 352 | incentives = env.incentives[:TIME_STEPS_TRAIN] 353 | power_rates = env.power_rates[:, plot_agent.agent_id][:TIME_STEPS_TRAIN] 354 | ac_rates = env.ac_rates[:, plot_agent.agent_id][:TIME_STEPS_TRAIN] 355 | 356 | # Uncomment for hourly average incentives 357 | # incentives = np.mean(incentives.reshape(-1, 4), axis=1) 358 | 359 | for i, (time_label, req, act) in enumerate(zip(time_labels, requests, actions)): 360 | for j, (dev_name, dev_color, dev_req, dev_act) in enumerate(zip(['AC', 'EV', 'WM', 'DW', 'Dryer'], ['C4', 'C0', 'C1', 'C2', 'C3'], req, act)): 361 | height = 0.4 362 | request_bar = ax.barh(y=j + height, height=height, width=dev_req / 96, left=time_label, label=dev_name + ' original', color=dev_color, alpha=0.5, align='edge') 363 | if j == 0: 364 | height *= ac_rates[i] 365 | action_bar = ax.barh(y=dev_name, height=height, width=dev_act / 96, left=time_label, label=dev_name, color=dev_color, align='edge') 366 | 367 | # incentive_bar = ax.bar(time_labels, incentives, width=1 / 96, label='Incentive', align='edge', color='C1', alpha=0.4) 368 | # power_rate_bar = ax2.plot(time_labels, power_rates * 10, label='Power rates', color='C2') 369 | 370 | ax.tick_params(axis='both', which='both', labelsize=20) 371 | # ax2.tick_params(axis='both', which='both', labelsize=14) 372 | ax.set_xlim(time_labels[47], time_labels[-1]) 373 | # ax2.set_xlim(time_labels[47], time_labels[-1]) 374 | ax.set_ylim(bottom=0) 375 | # ax2.set_ylim(bottom=0) 376 | ax.grid(which='major', linewidth=1) 377 | # ax.grid(which='minor', linewidth=0.5) 378 | # ax.legend([request_bar, action_bar], ['Appliance requested', 'Appliance scheduled'], loc='upper left', fontsize=20) 379 | # ax2.legend([incentive_bar], ['Incentive'], loc='upper right', fontsize=16) 380 | # ax2.set_ylabel('Incentive (¢)', fontsize=16) 381 | ax.set_xlabel('Time (h)', fontsize=32) 382 | plt.setp(ax.get_yticklabels(), rotation=90, va="bottom", fontsize=20) 383 | # plt.title('Load schedule agent ' + str(plot_agent.data_id) + '\n' + str(path)) 384 | # plt.show() 385 | 386 | 387 | def init_plot(twinx=True): 388 | fig, ax = plt.subplots() 389 | 390 | day_locator = mdates.DayLocator() 391 | hour_locator = mdates.HourLocator(interval=1) 392 | minute_locator = mdates.MinuteLocator(interval=15) 393 | ax.xaxis.set_major_locator(hour_locator) 394 | ax.xaxis.set_minor_locator(minute_locator) 395 | ax.xaxis.set_major_formatter(mdates.DateFormatter('%H')) 396 | # ax.xaxis.set_minor_formatter(mdates.DateFormatter('%H:%M')) 397 | 398 | if twinx: 399 | ax2 = ax.twinx() 400 | ax2.xaxis.set_major_locator(hour_locator) 401 | ax2.xaxis.set_minor_locator(minute_locator) 402 | ax2.xaxis.set_major_formatter(mdates.DateFormatter('%H')) 403 | # ax2.xaxis.set_minor_formatter(mdates.DateFormatter('%H:%M')) 404 | return ax, ax2 405 | 406 | return ax 407 | 408 | 409 | main() 410 | -------------------------------------------------------------------------------- /params.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | # Agent params 4 | AGENT_IDS = [661, 1642, 2335, 2361, 2818, 3039, 3456, 3538, 4031, 4373, 4767, 5746, 6139, 7536, 7719, 7800, 7901, 7951, 8156, 8386, 8565, 9019, 9160, 9922, 9278] 5 | NUM_AGENTS = len(AGENT_IDS) 6 | NUM_RL_AGENTS = 25 # Number of agents that are trained. The others are dummy agents (always choosing power rate 1.0) 7 | # TRAINING_START_DAY = 182 # Day of the year defining the start of the training period 8 | # TRAINING_END_DAY = 244 # Day of the year defining the end of the training period 9 | TRAINING_START_DAY = 91 # Day of the year defining the start of the training period 10 | TRAINING_END_DAY = 305 # Day of the year defining the end of the training period 11 | TESTING_START_DAY = 182 12 | TESTING_END_DAY = 213 13 | BASELINE_START_DAY = 91 14 | TRAINING_PERIOD = TRAINING_END_DAY - TRAINING_START_DAY # The length of the training period 15 | TESTING_PERIOD = TESTING_END_DAY - TESTING_START_DAY # The length of the training period 16 | 17 | # RL params 18 | EPSILON = 0.1 # Fixed epsilon 19 | EPSILON_START = 1.0 # Epsilon start when using epsilon decay 20 | EPSILON_MIN = 0.01 # Epsilon minimum when using epsilon decay 21 | EPSILON_DECAY = 0.999 # Epsilon is multiplied by this decay every step (depends on number of episodes) 22 | DISCOUNT_RATE = 0.9 # Discount rate (gamma) of the Q-learning algorithm 23 | EPISODES = 5000 # Number of episodes to train 24 | 25 | # DQN params 26 | BUFFER_SIZE = 10000 # The maximum number of SARS samples in the replay buffer 27 | BATCH_SIZE = 32 # The batch size for training of the Q-network 28 | LEARNING_RATE_DQN = 0.001 # The learning rate for training the Q-network 29 | TAU = 0.001 # The soft-update parameter for updating the target network 30 | TRAINING_INTERVAL = 16 # After so many steps the agent performs a training update on the network 31 | REPLACE_TARGET_INTERVAL = 50 # After so many episodes the target network is replaced 32 | HIDDEN_LAYER_SIZE = 32 # Size of the hidden layers 33 | 34 | # Environment params 35 | TIME_STEPS_TRAIN = 96 # Number of time steps per episode in training 36 | TIME_STEPS_TEST = 96 # Number of time steps per episode in testing 37 | POWER_RATES = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] # Set of actions (fraction of the demand the agent uses) 38 | CUSTOMER_ACTION_SIZE = len(POWER_RATES) # Number of actions for the customers 39 | CUSTOMER_STATE_SIZE = 8 # Number of state variables for the customers 40 | RHO = 0.5 # Weight of the incentive term (the weight of the dissatisfaction term is 1-RHO) 41 | RHO_COMMON = 1.0 42 | CRITICAL_THRESHOLD = 70 # Above this threshold for the total demand the agents receive incentives > 0 43 | CRITICAL_THRESHOLD_RELATIVE = 0.8 # Above this threshold for the total demand the agents receive incentives > 0 44 | MAX_TOTAL_DEMAND = 110 # Incentives will not increase when total demand is higher than this value 45 | MINIMUM_CUSTOMER_REWARD = -10 # Minimum reward to avoid too large negative rewards 46 | 47 | # Aggregator params 48 | RHO_AGGREGATOR = 0.5 # Weight of the consumption term (the weight of the incentive term is 1-RHO) 49 | INCENTIVE_RATES = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] # Set of actions (incentives) 50 | AGGREGATOR_ACTION_SIZE = len(INCENTIVE_RATES) # Number of actions for the aggregator 51 | AGGREGATOR_STATE_SIZE = 3 # Number of state variables for the aggregator 52 | MAX_INCENTIVE = 10 53 | DISCOUNT_RATE_AGGREGATOR = 0.9 # Discount rate for the aggregator 54 | 55 | # Device params 56 | DEVICES = ['air', 'car', 'clotheswasher', 'dishwasher', 'dry'] 57 | DEVICE_CONSUMPTION = np.array([2.5, 4.0, 1.0, 2.0, 2.0]) # Fixed consumption of the devices in kW 58 | # DISSATISFACTION_COEFFICIENTS = np.array([3.0, 0.04, 0.1, 0.06, 0.2]) # Delay coefficients 59 | # DISSATISFACTION_COEFFICIENTS = np.array([10.0, 0.2, 0.4, 0.3, 0.6]) # Delay coefficients 60 | DISSATISFACTION_COEFFICIENTS = np.array([6.0, 0.05, 0.2, 0.1, 0.4]) # Delay coefficients 61 | DISSATISFACTION_COEFFICIENTS_STD = np.array([2.0, 0.1, 0.1, 0.1, 0.2]) # Delay coefficients 62 | DISSATISFACTION_COEFFICIENTS_MIN = np.array([1.0, 0.01, 0.01, 0.01, 0.01]) # Delay coefficients 63 | DEVICE_NON_INTERRUPTIBLE = np.array([False, False, True, True, True]) # If the device is non-interruptible 64 | -------------------------------------------------------------------------------- /utils/consumer_baseline_process.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import pandas as pd 3 | import numpy as np 4 | from scipy.stats import linregress 5 | 6 | AGENT_IDS = [661, 1642, 2335, 2361, 2818, 3039, 3456, 3538, 4031, 4373, 4767, 5746, 6139, 7536, 7719, 7800, 7901, 7951, 8156, 8386, 8565, 9019, 9160, 9922, 9278] 7 | NON_AC = ['car1', 'clotheswasher1', 'dishwasher1', 'dry1', 'waterheater1', 'non-shiftable'] 8 | pd.set_option('display.max_rows', 500) 9 | pd.set_option('display.max_columns', 500) 10 | pd.set_option('display.width', 1000) 11 | 12 | df_temperature = pd.read_csv('data/outdoor_temperatures_noaa.csv', 13 | delim_whitespace=True, 14 | parse_dates=[['LST_DATE', 'LST_TIME']], 15 | index_col=['LST_DATE_LST_TIME'], 16 | usecols=['LST_DATE', 'LST_TIME', 'T_HR_AVG']) 17 | print(df_temperature.describe) 18 | 19 | # df_load = pd.read_csv('pecan_street_data/15minute_data_austin_processed_08_04.csv', 20 | df_load = pd.read_csv('data/15minute_data_austin_fixed_consumption.csv', 21 | parse_dates=['time'], 22 | index_col=['time']) 23 | print(df_load.describe) 24 | 25 | start, end, steps = 182, 365, 96 26 | baselines = np.zeros((len(AGENT_IDS), len(range(start, end)), steps)) 27 | for id, agent in enumerate(AGENT_IDS): 28 | df_filter = df_load.loc[df_load['dataid'] == agent] 29 | df_load_resampled = df_filter.resample('H').max() 30 | start_date = datetime.datetime(2018, 1, 1) 31 | temperatures = [] 32 | loads = [] 33 | 34 | for day in range(1, 364): 35 | for step in range(24): 36 | offset = day * 24 + step 37 | time_delta = pd.to_timedelta(offset, 'h') 38 | current_time = start_date + time_delta 39 | temperature = df_temperature.loc[current_time]['T_HR_AVG'] 40 | if temperature == -9999: 41 | df_temperature.loc[current_time]['T_HR_AVG'] = df_temperature.loc[current_time - pd.to_timedelta(1, 'h')]['T_HR_AVG'] 42 | load = df_load_resampled.loc[current_time]['air'] 43 | if load > 0 and temperature != -9999: 44 | temperatures.append(temperature) 45 | loads.append(load) 46 | 47 | if temperatures and loads: 48 | slope, intercept, _, _, _ = linregress(temperatures, loads) 49 | else: 50 | slope, intercept = 0, 0 51 | 52 | for day in range(start, end): 53 | for step in range(steps): 54 | 55 | # Select last 10 same weekdays at the same moment 56 | time_delta = datetime.timedelta(minutes=step*15) 57 | start_date = datetime.datetime.strptime('{} {}'.format(day, 2018), '%j %Y') 58 | time = start_date + time_delta 59 | time_h = start_date + datetime.timedelta(hours=int(step*0.25)) 60 | similar = [time - datetime.timedelta(days=7 * i) for i in range(1, 11)] 61 | df_load_similar = df_filter.loc[similar] 62 | similar_rounded = [ix.round('1h') for ix in df_load_similar.index] 63 | 64 | # Filter the 5 days with the largest demand 65 | # TODO exclude holidays 66 | df_load_similar = df_load_similar.nlargest(5, 'total') 67 | baseline_total = df_load_similar['total'].mean() 68 | 69 | # Take average of those 5 moments as the baseline 70 | df_temperature_similar = df_temperature.loc[similar_rounded] 71 | avg_temp = df_temperature_similar['T_HR_AVG'].mean() 72 | current_temp = df_temperature.loc[time_h]['T_HR_AVG'] 73 | temp_diff = current_temp - avg_temp 74 | 75 | baseline_ac = slope * temp_diff 76 | # baseline = baseline_ac + baseline_total # with temperature correction 77 | baseline = baseline_total # without temperature correction 78 | baselines[id][day-start][step] = baseline 79 | 80 | print(agent, day, step, baseline) 81 | 82 | # np.save('pecan_street_data/baselines_regr_temp_correction.npy', baselines) 83 | np.save('data/baselines_regr_temp_correction_new.npy', baselines) 84 | # np.save('pecan_street_data/baselines_regr.npy', baselines) 85 | -------------------------------------------------------------------------------- /utils/load_demand.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import pandas as pd 3 | import numpy as np 4 | 5 | 6 | def load_requests(): 7 | df = pd.read_csv('data/15minute_data_austin_fixed_consumption.csv', parse_dates=['time'], index_col=['time']) 8 | return df 9 | 10 | 11 | def load_day(df, day, max_steps): 12 | minutes = max_steps * 15 13 | time_delta = pd.to_timedelta(minutes, 'm') 14 | start_date = datetime.datetime.strptime('{} {}'.format(day, 2018), '%j %Y') 15 | end_date = start_date + time_delta 16 | df = df.loc[(df.index >= start_date) & (df.index < end_date)] 17 | return df 18 | 19 | 20 | def get_device_demands(df, agent_ids, day, timestep): 21 | minutes = timestep * 15 22 | time_delta = pd.to_timedelta(minutes, 'm') 23 | start_date = datetime.datetime.strptime('{} {}'.format(day, 2018), '%j %Y') 24 | time = start_date + time_delta 25 | df = df.loc[(df['dataid'].isin(agent_ids)) & (df.index == time)] 26 | return df 27 | 28 | 29 | def get_peak_demand(df): 30 | df = df.groupby(pd.Grouper(freq='15Min')).sum() 31 | return df['total'].max() 32 | 33 | 34 | def load_baselines(): 35 | return np.load('data/baselines_regr_temp_correction.npy') 36 | -------------------------------------------------------------------------------- /utils/pre_process.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import numpy as np 3 | import pandas as pd 4 | import matplotlib.pyplot as plt 5 | import seaborn as sns 6 | 7 | pd.set_option('display.width', 10000) 8 | pd.set_option('display.max_columns', 100) 9 | SHIFTABLE = ['air', 'car', 'clotheswasher', 'dishwasher', 'dry'] 10 | TIME_SHIFTABLE = ['car', 'clotheswasher', 'dishwasher', 'dry'] 11 | NON_SHIFTABLE = ['bathroom1', 'bathroom2', 'bedroom1', 'bedroom2', 'bedroom3', 'diningroom1', 'disposal1', 'furnace1', 12 | 'furnace2', 'garage1', 'kitchen1', 'kitchen2', 'kitchenapp1', 'kitchenapp2', 'lights_plugs1', 13 | 'lights_plugs2', 'lights_plugs3', 'lights_plugs4', 'livingroom1', 'microwave1', 'office1', 'oven1', 14 | 'oven2', 'range1', 'refrigerator1', 'refrigerator2', 'utilityroom1', 'venthood1', 'waterheater1', 15 | 'waterheater2', 'winecooler1'] 16 | DEVICES = ['air', 'car', 'clotheswasher', 'dishwasher', 'dry', 'non-shiftable'] 17 | INCLUDE = ['dataid', 'air', 'car', 'clotheswasher', 'dishwasher', 'dry', 'non-shiftable', 'total'] 18 | 19 | df = pd.read_csv('data/15minute_data_austin.csv', engine='python', encoding="ISO-8859-1", parse_dates=['local_15min'], index_col=['local_15min']) 20 | df.index = pd.to_datetime(df.index, utc=True, infer_datetime_format=True) 21 | df.index.names = ['time'] 22 | df = df.tz_convert(None) 23 | df = df.groupby(['dataid']).resample('15T').max() 24 | df = df.drop('dataid', axis=1).reset_index('dataid') 25 | df = df.fillna(0) 26 | df = df.apply(lambda l: np.where(l < 0.1, 0, l)) 27 | 28 | df['air'] = df[['air1', 'air2', 'air3', 'airwindowunit1']].sum(axis=1).clip(upper=4.0) 29 | df['dry'] = df[['drye1', 'dryg1']].sum(axis=1) 30 | df['car'] = df[['car1', 'car2']].sum(axis=1) 31 | df['dishwasher'] = df['dishwasher1'] 32 | df['clotheswasher'] = df['clotheswasher1'] 33 | 34 | for device, consumption, threshold in zip(TIME_SHIFTABLE, [4, 1, 2, 2], [0.1, 0.1, 0.1, 0.1]): 35 | df[device] = df[device].apply(lambda x: consumption if x >= threshold else 0) 36 | 37 | df['non-shiftable'] = df[NON_SHIFTABLE].sum(axis=1).clip(upper=5.0) 38 | df['total'] = df[DEVICES].sum(axis=1) 39 | 40 | # Uncomment to save processed data to csv 41 | df[INCLUDE].to_csv('data/15minute_data_austin_fixed_consumption_new.csv') 42 | 43 | # Filter Household 44 | dataid = 661 45 | # df = df.loc[df['dataid'] != 9019] 46 | df = df.loc[df['dataid'] == dataid] 47 | 48 | # Filter dates 49 | day = 182 50 | start_date = datetime.datetime.strptime('{} {}'.format(day, 2018), '%j %Y') 51 | end_date = datetime.datetime.strptime('{} {}'.format(day + 1, 2018), '%j %Y') 52 | # start_date = datetime.datetime(2018, 7, 9) 53 | # end_date = datetime.datetime(2018, 7, 10) 54 | df = df.loc[(df.index >= start_date) & (df.index < end_date)] 55 | 56 | # create the plot 57 | # Use seaborn style defaults and set the default figure size 58 | sns.set(rc={'figure.figsize': (11, 4)}) 59 | solar_plot = df[DEVICES].plot(linewidth=0.5, marker='.') 60 | solar_plot.set_xlabel('Date') 61 | solar_plot.set_ylabel('Grid Usage kW') 62 | 63 | # display the plot 64 | plt.title('Major consumers') 65 | plt.ylabel('Power consumnption (KW)') 66 | plt.show() 67 | -------------------------------------------------------------------------------- /utils/replay_buffer.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | from collections import deque, namedtuple 4 | 5 | 6 | class ReplayBuffer: 7 | def __init__(self, buffer_size, batch_size): 8 | self.batch_size = batch_size 9 | self.memory = deque(maxlen=buffer_size) 10 | self.experience = namedtuple("Experience", field_names=["state", "action", "reward", "next_state", "done"]) 11 | 12 | def add(self, state, action, reward, next_state, done): 13 | experience = self.experience(state, action, reward, next_state, done) 14 | self.memory.append(experience) 15 | 16 | def sample(self): 17 | batch = random.sample(self.memory, k=self.batch_size) 18 | states, actions, rewards, next_states, dones = list(map(np.array, list(zip(*batch)))) 19 | return states, actions, rewards, next_states, dones 20 | 21 | def __len__(self): 22 | return len(self.memory) 23 | -------------------------------------------------------------------------------- /utils/visualize.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os 3 | import random 4 | 5 | import numpy as np 6 | import pandas as pd 7 | import matplotlib.pyplot as plt 8 | import seaborn as sns 9 | 10 | # This constant limits the number of rows read in from the big CSV file. 11 | # Set to None if you want to read the whole thing 12 | LIMIT = None 13 | pd.set_option('display.width', 10000) 14 | pd.set_option('display.max_columns', 100) 15 | devices_excl_other = ['car1', 'air1', 'clotheswasher1', 'dishwasher1', 'drye1', 'dryg1', 'waterheater1'] 16 | # devices = ['car1', 'air1', 'clotheswasher1', 'dishwasher1', 'drye1', 'dryg1', 'waterheater1', 'non-shiftable', 'solar'] 17 | devices = ['car1', 'air1', 'clotheswasher1', 'dishwasher1', 'drye1', 'dryg1', 'waterheater1'] 18 | include = ['dataid', 'grid', 'car1', 'air1', 'clotheswasher1', 'dishwasher1', 'drye1', 'dryg1', 'waterheater1', 'solar', 'non-shiftable', 'total'] 19 | drop = ['dataid', 'leg1v', 'leg2v', 'grid', 'solar'] 20 | 21 | # read the 15 minute data file for Austin 22 | df = pd.read_csv('data/15minute_data_austin_fixed_consumption.csv', 23 | engine='python', encoding="ISO-8859-1", parse_dates=['time'], index_col=['time'], nrows=LIMIT) 24 | print(df.describe) 25 | 26 | # Filter Household 27 | # incl_dataid = 661 28 | # df = df.loc[df['dataid'] == incl_dataid] 29 | # excl_dataid = 9019 30 | # df = df.loc[df['dataid'] != excl_dataid] 31 | print(df.max()) 32 | 33 | # Filter dates 34 | for day in range(182, 200): 35 | # day = random.randint(1, 365) 36 | start_date = datetime.datetime.strptime('{} {} {}'.format(day, 2018, 0), '%j %Y %H') 37 | end_date = datetime.datetime.strptime('{} {} {}'.format(day + 1, 2018, 12), '%j %Y %H') 38 | # start_date = datetime.datetime(2018, 10, 16) 39 | # end_date = datetime.datetime(2018, 10, 17) 40 | df_filter = df.loc[(df.index >= start_date) & (df.index < end_date)] 41 | 42 | # group the data by time or date and take the mean of those 43 | # df.index = df.reset_index()['time'].apply(lambda x: x - pd.Timestamp(x.date())) 44 | # df = df.groupby(pd.Grouper(freq='M')).max() 45 | # y = df.groupby(['dataid']).max() 46 | # print(y.describe) 47 | 48 | # convert from kW to kWh 49 | # df['total_kwh'] = df['total'].apply(lambda x: x) 50 | 51 | # Plot boxplot for device 52 | # threshold = 0.01 53 | # x = df.apply(lambda l: np.where(l < threshold, np.nan, l)) 54 | # print(x[devices].describe()) 55 | # fig = plt.figure(figsize=(8,6)) 56 | # ax = fig.gca() 57 | # x.boxplot(column=devices, ax=ax) 58 | 59 | # create the plot 60 | # df = df.drop(drop, 'columns') 61 | # df = df.dropna('columns', thresh=1) 62 | # Use seaborn style defaults and set the default figure size 63 | sns.set(rc={'figure.figsize': (11, 4)}) 64 | # solar_plot = df_filter[devices + ['total', 'total_incl_solar']].plot(linewidth=0.5, marker='.') 65 | solar_plot = df_filter[devices + ['total']].plot(linewidth=0.5, marker='.') 66 | # solar_plot = df_filter[devices].plot(linewidth=0.5, marker='.') 67 | solar_plot.set_xlabel('Date') 68 | solar_plot.set_ylabel('Grid Usage kW') 69 | 70 | # Plot hist 71 | # plt.hist(x['clotheswasher1'].to_numpy(), bins=50) 72 | 73 | # display the plot 74 | plt.title('Major consumers') 75 | plt.ylabel('Power consumnption (KW)') 76 | plt.show() 77 | 78 | print('done') 79 | --------------------------------------------------------------------------------