├── marlagent ├── __init__.py ├── agent │ ├── __init__.py │ ├── dqn │ │ ├── __init__.py │ │ ├── model.py │ │ ├── replay_buffer.py │ │ └── dqn.py │ └── linear │ │ ├── __init__.py │ │ └── lin_agent.py ├── agent_actions.py └── rlagent.py ├── prediction ├── __init__.py └── energy_generation.py ├── shutdown.sh ├── .gitignore ├── start.sh ├── experimental.py ├── synchronizer.py ├── cghandler └── httpservice.py ├── feat_extractor.py ├── nameserver.py ├── state.py ├── util.py └── main.py /marlagent/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /prediction/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /marlagent/agent/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /marlagent/agent/dqn/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /marlagent/agent/linear/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /shutdown.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | pkill -f main.py 3 | pkill -f synchronizer.py -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | __pycache__ 3 | p1.log 4 | p2.log 5 | p4.log 6 | feat_extractor.pyc 7 | marlagent/*.pyc 8 | *.pyc 9 | assets/ns.pid 10 | assets/Aliceerror.csv 11 | assets/Boberror.csv 12 | sync.log 13 | assets/Charlieerror.csv 14 | p3.log 15 | -------------------------------------------------------------------------------- /start.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python3 synchronizer.py --agentname Steve --nameserver 127.0.0.1:10000 > sync.log 2>&1 & 3 | python3 main.py --agentname Alice --nameserver 127.0.0.1:10000 --allies Bob,Charlie,Dave --battInit 7.5 --nSolarPanel 72 > p1.log 2>&1 & 4 | python3 main.py --agentname Bob --nameserver 127.0.0.1:10000 --allies Alice,Charlie,Dave --battInit 2.5 --nSolarPanel 54 > p2.log 2>&1 & 5 | python3 main.py --agentname Charlie --nameserver 127.0.0.1:10000 --allies Alice,Bob,Dave --battInit 5.0 --nSolarPanel 12 > p3.log 2>&1 & 6 | python3 main.py --agentname Dave --nameserver 127.0.0.1:10000 --allies Alice,Bob,Charlie --battInit 0.0 --nSolarPanel 0 > p4.log 2>&1 & -------------------------------------------------------------------------------- /marlagent/agent/dqn/model.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | class DQN(nn.Module): 5 | 6 | def __init__(self, in_channels): 7 | """ 8 | Initialize a deep Q-learning network as described in 9 | https://storage.googleapis.com/deepmind-data/assets/papers/DeepMindNature14236Paper.pdf 10 | Arguments: 11 | in_channels: number of channel of input. 12 | i.e The number of most recent frames stacked together as describe in the paper 13 | num_actions: number of action-value to output, one-to-one correspondence to action in game. 14 | """ 15 | super(DQN, self).__init__() 16 | self.fc1 = nn.Linear(in_channels, 100) 17 | self.fc2 = nn.Linear(100, 100) 18 | self.fc3 = nn.Linear(100, 100) 19 | self.fc4 = nn.Linear(100, 1) 20 | 21 | 22 | def forward(self, x): 23 | x = F.sigmoid(self.fc1(x)) 24 | x = F.sigmoid(self.fc2(x)) 25 | x = F.sigmoid(self.fc3(x)) 26 | x = self.fc4(x) 27 | return x 28 | 29 | -------------------------------------------------------------------------------- /experimental.py: -------------------------------------------------------------------------------- 1 | from osbrain import run_agent 2 | from osbrain import run_nameserver 3 | import multiprocessing 4 | 5 | 6 | def method_a(agent, message): 7 | gg = mpns.temp 8 | gg = 10 9 | agent.log_info('Method A Temp: %s' % mpns.temp) 10 | return 'Blah 1' 11 | 12 | def method_b(agent, message): 13 | agent.log_info('Method B Temp: %s' % mpns.temp) 14 | return 'Blah 2' 15 | 16 | 17 | if __name__ == '__main__': 18 | manager = multiprocessing.Manager() 19 | global mpns 20 | mpns = manager.Namespace() 21 | 22 | mpns.temp = 1 23 | 24 | ns = run_nameserver() 25 | 26 | alice = run_agent('Alice') 27 | bob = run_agent('Bob') 28 | 29 | addr1 = alice.bind('REP', alias='main1', handler=method_a) 30 | addr2 = alice.bind('REP', alias='main2', handler=method_b) 31 | 32 | bob.connect(addr1, alias='main1') 33 | bob.send('main1', "Some message") 34 | reply = bob.recv('main1') 35 | 36 | bob.connect(addr2, alias='main2') 37 | bob.send('main2', "Some message") 38 | reply = bob.recv('main2') 39 | agents = ns.agents() 40 | print(agents) 41 | ns.shutdown() 42 | 43 | -------------------------------------------------------------------------------- /prediction/energy_generation.py: -------------------------------------------------------------------------------- 1 | import util 2 | import pandas as pd 3 | from datetime import datetime 4 | 5 | class EnergyGeneration: 6 | 7 | def __init__(self, path_to_file, n_solar_panel): 8 | 9 | self.n_solar_panel = n_solar_panel 10 | 11 | time = [] 12 | dni = [] # Values are w/m2 13 | self.D = pd.read_csv(path_to_file, sep=',', usecols=['Year', 'Month', 'Day', 'Hour', 'Minute', 'DNI']) 14 | 15 | for index, row in self.D.iterrows(): 16 | ts = "{0}/{1:02d}/{2:02d} {3:02d}:{4:02d}".format(row['Year'], row['Month'], row['Day'], row['Hour'], row['Minute']) 17 | time.append(ts) 18 | dni.append(row['DNI']) 19 | 20 | d = {'Time':time, 'DNI': dni} 21 | self.D = pd.DataFrame(data=d) 22 | 23 | print("Solar exposure data loaded successfully.") 24 | 25 | 26 | def get_generation(self, ts): 27 | """ 28 | Get the generation at a particular time in kWh. It is assumed that solar exposure at a particular time has been 29 | predicted. 30 | :param ts: 31 | :return: kWh 32 | """ 33 | ts_str = util.cnv_datetime_to_str(ts, '%m/%d %H:%M') 34 | data = self.D.loc[self.D['Time'].str.contains(ts_str)] 35 | 36 | unit_generation = self._calculate_generation(exposure=float(data['DNI'].values[0])) 37 | total_generation = (unit_generation * self.n_solar_panel) / 1000.0 38 | print("TOTAL GENERATION: "+str(total_generation)) 39 | return total_generation 40 | 41 | 42 | def _calculate_generation(self, exposure, max_cap = 180.0): 43 | return max_cap * (exposure / 1000.0) -------------------------------------------------------------------------------- /synchronizer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import traceback 4 | import argparse 5 | import time 6 | from nameserver import NameServer 7 | from osbrain import run_nameserver, run_agent 8 | 9 | pidfile = "assets/ns.pid" 10 | 11 | 12 | def initiate_nameserver(ns_socket_addr): 13 | osbrain_ns = None 14 | # If file exists then nameserver has already been started. Return a reference to the name server 15 | if os.path.isfile(pidfile): 16 | print("PID file already exists. Removing old pid file.") 17 | os.unlink(pidfile) 18 | 19 | try : 20 | print("Creating a new nameserver...") 21 | pid = str(os.getpid()) 22 | osbrain_ns = run_nameserver(addr=ns_socket_addr) 23 | open(pidfile, 'w+').write(pid) 24 | 25 | except Exception: 26 | osbrain_ns.shutdown() 27 | print(traceback.format_exc()) 28 | print("ERROR: Exception caught when creating nameserver.") 29 | sys.exit(-1) 30 | 31 | return osbrain_ns 32 | 33 | 34 | 35 | def start_server_job(osbrain_ns, agentname): 36 | time.sleep(3) 37 | ns_agent = NameServer(osbrain_ns, agentname) 38 | 39 | # Start the scheduled job 40 | steve = run_agent(agentname, serializer='json') 41 | ns_agent.schedule_job(steve) 42 | 43 | 44 | if __name__ == '__main__': 45 | parser = argparse.ArgumentParser(description='Agent Module') 46 | 47 | parser.add_argument('--agentname', required=True, help='Name of the agent') 48 | parser.add_argument('--nameserver', required=True, help='Socket address of the nameserver') 49 | args = parser.parse_args() 50 | 51 | osbrain_ns = initiate_nameserver(args.nameserver) 52 | start_server_job(osbrain_ns, args.agentname) -------------------------------------------------------------------------------- /marlagent/agent/linear/lin_agent.py: -------------------------------------------------------------------------------- 1 | import util 2 | 3 | from marlagent import rlagent 4 | 5 | class LinearQAgent(rlagent.RLAgent): 6 | 7 | def __init__(self): 8 | 9 | print("Linear Approximate Q learning agent instantiated...") 10 | super(LinearQAgent, self).__init__() 11 | 12 | self.weights = util.Counter() 13 | 14 | 15 | def get_qValue(self, state, action): 16 | """ 17 | Should return Q(state,action) = w * featureVector 18 | where * is the dotProduct operator 19 | 20 | :param state: 21 | :param action: 22 | :return: 23 | """ 24 | features = self.feat_extractor.get_features(state, action) 25 | 26 | q_value = 0.0 27 | for f_key in features: 28 | q_value = q_value + (features[f_key] * self.weights[f_key]) 29 | 30 | # print(features) 31 | # print("Q - VALUE:::::%s"%q_value) 32 | return q_value 33 | 34 | 35 | def update(self, state, action, next_state, reward): 36 | """ 37 | Update weights based on transition 38 | 39 | :param state: 40 | :param action: 41 | :param nextState: 42 | :param reward: 43 | :return: 44 | """ 45 | # TODO 46 | features = self.feat_extractor.get_features(state, action) 47 | # difference = reward + (self.discount * self.compute_value_from_qValues(next_state)) - self.get_qValue(state, action) 48 | q_value_next_state = (self.discount * self.compute_value_from_qValues(next_state)) 49 | q_value_curr_state = self.get_qValue(state, action) 50 | d_error = reward + q_value_next_state - q_value_curr_state 51 | 52 | # print("DISCOUNTED Q VALUE NEXT STATE:%s"%q_value_next_state) 53 | # print("Q VALUE CURR STATE:%s" % q_value_curr_state) 54 | print("CORRECTION-------------:%s"%d_error) 55 | self.write_to_file(data = d_error, path_to_file = 'assets/'+state.name+'error.csv') 56 | 57 | for f_key in features: 58 | self.weights[f_key] = self.weights[f_key] + (self.alpha * d_error * features[f_key]) 59 | 60 | # Write weights into a file to observe learning 61 | # print("WEIGHTS---------------:") 62 | # print(self.weights) 63 | 64 | 65 | 66 | def get_weights(self): 67 | return self.weights -------------------------------------------------------------------------------- /marlagent/agent_actions.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import random 3 | 4 | def update_battery_status(battery_max, battery_curr, amount): 5 | ''' 6 | Update the battery status 7 | :param battery_max: 8 | :param battery_curr: 9 | :param amount: 10 | :return: the new current battery status 11 | ''' 12 | excess = 0 13 | battery_cap_left = battery_max - battery_curr 14 | new_batt_status = battery_curr 15 | 16 | if amount <= 0.0 and abs(amount) <= battery_curr: 17 | new_batt_status += amount 18 | 19 | elif amount <= 0.0 and abs(amount) > battery_curr: 20 | new_batt_status = 0.0 21 | 22 | elif amount > 0.0 and battery_cap_left >= amount: 23 | new_batt_status += amount 24 | 25 | elif amount > 0.0 and battery_cap_left < amount: 26 | new_batt_status += battery_cap_left 27 | excess = amount - battery_cap_left 28 | 29 | print("Battery status updated: %s." % new_batt_status) 30 | return new_batt_status, excess 31 | 32 | 33 | def request_ally(ns, agent, agent_name, allies, energy_amt, time): 34 | 35 | allies_remaining = copy.deepcopy(allies) 36 | 37 | while (len(allies_remaining) > 0): 38 | 39 | # select a random ally 40 | ally_name = random.choice(allies_remaining) 41 | 42 | ally_proxy = ns.proxy(name = ally_name, timeout=0.5) 43 | ally_proxy_addr = ally_proxy.addr(alias=str('energy_request_'+ally_name)) 44 | 45 | message = { 46 | 'topic': 'ENERGY_REQUEST', 47 | 'agentName':agent_name, 48 | 'time': time, 49 | 'energy': energy_amt 50 | } 51 | 52 | agent.log_info("Contacting ally ({0}) for: {1}".format(ally_name, message['energy'])) 53 | resp = send_message(agent = agent, server_addr = ally_proxy_addr, alias = str('energy_request_'+ally_name), message = message) 54 | 55 | # If energy request is accepted 56 | if resp['topic'] != 'ENERGY_REQUEST_DECLINE': 57 | agent.log_info("Energy request granted by ally ({0}) : {1}".format(ally_name, resp['energy'])) 58 | return resp['energy'] 59 | else: 60 | allies_remaining.remove(ally_name) 61 | 62 | return float(0.0) 63 | 64 | 65 | def energy_transaction(next_state): 66 | 67 | next_state.energy_consumption = 0.0 68 | next_state.energy_generation = 0.0 69 | next_state.battery_curr = 0.0 70 | 71 | return next_state 72 | 73 | 74 | def get_energy_balance(state): 75 | return (state.energy_generation + state.battery_curr) - state.energy_consumption 76 | 77 | 78 | def send_message(agent, server_addr, alias, message): 79 | agent.connect(server=server_addr, alias=alias) 80 | agent.send(alias, message=message) 81 | reply = agent.recv(alias) 82 | agent.log_info("Recieved: "+str(reply)) 83 | agent.close(alias=alias) 84 | return reply 85 | -------------------------------------------------------------------------------- /cghandler/httpservice.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | 4 | class CGHTTPHandler: 5 | 6 | def __init__(self, agent_name): 7 | self.agent_name = agent_name 8 | self._register_agent() 9 | 10 | 11 | def _register_agent(self): 12 | print("Registering Agent with Central Monitor...") 13 | 14 | url = 'http://localhost:8080/agent/register' 15 | data = { 16 | "name": self.agent_name, 17 | "active": True 18 | } 19 | 20 | response = requests.post(url=url, json=data) 21 | self.agent_id = json.loads(response.content.decode('utf-8'))['id'] 22 | 23 | 24 | def update_energy_status(self, time, iter, batt_init, energy_consumption, energy_generation, borrowed_from_CG): 25 | 26 | url = 'http://localhost:8080/energy/status' 27 | 28 | data = { 29 | "timestamp": time, 30 | "agentId": self.agent_id, 31 | "iter": iter, 32 | "batteryInitial": batt_init, 33 | "energyConsumption": energy_consumption, 34 | "energyGeneration": energy_generation, 35 | "borrowedFromCG": borrowed_from_CG 36 | } 37 | print(data) 38 | response = requests.put(url=url, json=data) 39 | 40 | if response.status_code == 200: 41 | print("Energy status updated successfully with central grid.") 42 | else: 43 | print("ERROR: %s"%response.content.decode('utf-8')) 44 | 45 | 46 | def register_transaction(self, iter, time, buyer_name, amount): 47 | 48 | url = 'http://localhost:8080/energy/trasaction' 49 | 50 | data = { 51 | "iter": iter, 52 | "timestamp": time, 53 | "sellerId": self.agent_id, 54 | "buyerName": buyer_name, 55 | "price": 0.5, 56 | "amount": amount 57 | } 58 | 59 | response = requests.post(url=url, json=data) 60 | 61 | if response.status_code == 200: 62 | print("Energy transaction successfully registered with central grid.") 63 | else: 64 | print("ERROR: %s"%response.content.decode('utf-8')) 65 | 66 | 67 | def get_energy_status(self, iter): 68 | url = 'http://localhost:8080/energy/status/grid/'+str(iter) 69 | response = requests.get(url=url) 70 | 71 | if response.status_code == 200: 72 | print("Grid energy status retrieved successfully.") 73 | return json.loads(response.content.decode('utf-8')) 74 | else: 75 | print("ERROR: Error retrieving grid energy status. %s"%response.content) 76 | return None 77 | 78 | 79 | def log_iteration_status(self, iter, env, nzeb_status): 80 | url = 'http://localhost:8080/energy/log/iteration/status' 81 | 82 | data = { 83 | "iteration": iter, 84 | "agentId": self.agent_id, 85 | "energyGeneration": env.get_total_generated(), 86 | "energyConsumption": env.get_total_consumed(), 87 | "energyBorrowedFromAlly": env.get_energy_borrowed_from_ally(), 88 | "energyBorrowedFromCG": env.get_energy_borrowed_from_CG(), 89 | "nzebStatus": nzeb_status 90 | } 91 | 92 | response = requests.post(url=url, json=data) 93 | 94 | if response.status_code == 200: 95 | print("Iteration status successfully logged to central grid.") 96 | else: 97 | print("ERROR: %s" % response.content.decode('utf-8')) 98 | 99 | 100 | instance = False 101 | cg_http_handler = None 102 | 103 | def get_CG_serivce_instance(agent_name): 104 | 105 | global instance 106 | if not instance: 107 | global cg_http_handler 108 | cg_http_handler = CGHTTPHandler(agent_name) 109 | instance = True 110 | return cg_http_handler 111 | else: 112 | return cg_http_handler -------------------------------------------------------------------------------- /feat_extractor.py: -------------------------------------------------------------------------------- 1 | import util 2 | import numpy as np 3 | from state import EnvironmentState 4 | from datetime import datetime 5 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder 6 | from state import AgentState 7 | 8 | class FeatureExtractor: 9 | 10 | def __init__(self): 11 | print("Instantiating feature extractor...") 12 | self._train() 13 | 14 | 15 | def _train(self): 16 | 17 | train_x = np.zeros(shape=[365 * 48, 2]) 18 | 19 | for i in range (0, (365 * 48)): 20 | train_x[i][0] = i%48 21 | 22 | for i in range (0, (365 * 48)): 23 | train_x[i][1] = i%7 24 | 25 | # for i in range (0, (365 * 48)): 26 | # train_x[i][2] = i%12 27 | 28 | self.ohe_time = OneHotEncoder(sparse=False) 29 | self.ohe_time.fit(train_x) 30 | 31 | self.lb_actions = LabelEncoder() 32 | actions_trans = self.lb_actions.fit_transform(AgentState.actions) 33 | self.ohe_actions = OneHotEncoder(sparse=False) 34 | self.ohe_actions.fit(actions_trans.reshape(-1,1)) 35 | 36 | 37 | def get_features(self, state, action): 38 | ''' 39 | Compute the features from the state to extract the q-value 40 | :param state: 41 | :param action: 42 | :return: a list of feature values 43 | ''' 44 | 45 | features = self.encode_state(state) 46 | 47 | # ---------------- ENCODING ACTIONS ---------------- 48 | # Modelling energy request data 49 | if action['action'] == 'grant' or action['action'] == 'deny_request': 50 | # TODO: Discritize by observing the values of data 51 | features.append(int(action['data']/0.2)) 52 | 53 | else: 54 | features.append(0) 55 | 56 | action_trans = self.ohe_actions.transform(self.lb_actions.transform([action['action']]).reshape(1,-1)) 57 | for f in action_trans[0]: 58 | features.append(f) 59 | # ------------------------------------------------ 60 | 61 | #return self.__encode_features_to_Counter(features) 62 | 63 | return features 64 | 65 | 66 | def encode_state(self, state): 67 | ''' 68 | Encode the state variable into n features 69 | :param state: 70 | :return: 71 | ''' 72 | 73 | time_feat = util.Counter() 74 | time_feat['hour'] = (state.time.time().hour * 60 + state.time.time().minute) // 30 75 | time_feat['dayofweek'] = state.time.weekday() # monday = 0 76 | # time_feat['month'] = state.time.month - 1 77 | 78 | # Transform and avoid the dummy variable trap 79 | features = self.ohe_time.transform(np.array([time_feat['hour'], time_feat['dayofweek']]) 80 | .reshape(1, -1))[:, :-1] 81 | 82 | features = list(features[0]) 83 | 84 | features.append(self.__encode_energy(state.energy_consumption)) 85 | features.append(self.__encode_energy(state.energy_generation)) 86 | features.append(self.__encode_energy(state.battery_curr)) 87 | 88 | return features 89 | 90 | 91 | def __encode_features_to_Counter(self, features): 92 | # Transforming into apt data structure 93 | feat_dict = util.Counter() 94 | for i in range(len(features)): 95 | feat_dict['f_' + str(i)] = float(features[i]) 96 | 97 | # print(feat_dict) 98 | return feat_dict 99 | 100 | 101 | def get_n_features(self): 102 | ''' 103 | Simulates a fake agent state and returns the numbers of features. 104 | :param state: 105 | :return: 106 | ''' 107 | 108 | environment_state = EnvironmentState(0.0, 0.0, 0.0, 0.0, 0.0, 0.0) 109 | fake_agent_state = AgentState(name='Test', iter =0, energy_consumption=0.0, energy_generation=0.0, 110 | battery_curr=float(5), time=datetime.now(), 111 | environment_state=environment_state, 112 | cg_http_service=None) 113 | action = {} 114 | action['action'] = 'consume_and_store' 115 | features = self.get_features(fake_agent_state, action) 116 | 117 | return len(features) 118 | 119 | 120 | def __encode_energy(self, energy): 121 | if energy == 0.0: 122 | return 0 123 | elif energy < 1.0: 124 | return 1.0 125 | elif energy < 2.88: 126 | return 2.0 127 | else: 128 | return 3 129 | 130 | 131 | -------------------------------------------------------------------------------- /nameserver.py: -------------------------------------------------------------------------------- 1 | import time 2 | import util 3 | import traceback 4 | import pandas as pd 5 | from osbrain import run_agent 6 | from datetime import datetime 7 | 8 | class NameServer: 9 | def __init__(self, ns, agentname): 10 | print("Instantiating NameServer class...") 11 | 12 | self.ns = ns 13 | self.agentname = agentname 14 | time.sleep(5) # to let all the agents load due to startup latency 15 | 16 | 17 | def schedule_job(self, server_agent): 18 | self.d1 = self._load_data("assets/house1_consumption.csv") 19 | self.d2 = self._load_data("assets/house2_consumption.csv") 20 | self.d3 = self._load_data("assets/house3_consumption.csv") 21 | 22 | d_map = { 23 | "Alice": self.d1, 24 | "Bob": self.d2, 25 | "Charlie": self.d1, 26 | "Dave": self.d3, 27 | } 28 | 29 | # extracting the list of agents 30 | agents = self.ns.agents() 31 | server_agent.log_info("Registering client details...") 32 | agent_name_arr, agent_addr = self.extract_agents(agents) 33 | server_agent.log_info("Registered clients: %s"%agent_addr) 34 | 35 | 36 | message = { 37 | 'topic': 'ENERGY_CONSUMPTION', 38 | 'time': datetime.now().strftime('%Y/%m/%d %H:%M'), 39 | 'iter': 0, 40 | 'consumption': 0.0, 41 | 'generation': 0.0 42 | } 43 | 44 | max_iter = 500 45 | 46 | for iter in range(max_iter): 47 | message['iter'] = iter 48 | 49 | last_message = self.dispatch_energy_data(server_agent, message, agent_name_arr, agent_addr, d_map) 50 | server_agent.log_info("Iteration (%s) complete!"%iter) 51 | 52 | if iter <= (max_iter-11): 53 | eoi_message = { 54 | 'topic': 'END_OF_ITERATION', 55 | 'iter': iter, 56 | 'time': last_message['time'] 57 | } 58 | else: 59 | # last iteration will warn the agents to exploit their policies completely 60 | eoi_message = { 61 | 'topic': 'TRAINING_COMPLETE', 62 | 'iter': iter, 63 | 'time': last_message['time'] 64 | } 65 | 66 | time.sleep(2) 67 | # EOI: notify each agent to save its status at the end of each iteration 68 | for name in agent_name_arr: 69 | self._send_message(server_agent, agent_addr[name], alias='consumption', message=eoi_message) 70 | time.sleep(4) 71 | 72 | # Exit Message after iterations done 73 | # Safe shutdown of all agents for testing 74 | for name in agent_name_arr: 75 | self._send_message(server_agent, agent_addr[name], alias='consumption', message={'topic': 'exit'}) 76 | 77 | 78 | def _load_data(self, path_to_file): 79 | ''' 80 | Import data from the specified directory 81 | :param path_to_file: 82 | :return: 83 | ''' 84 | print("Loading ("+str(path_to_file)+")...") 85 | dateparse = lambda dates: pd.datetime.strptime(dates, '%m/%d/%Y %I:%M %p') 86 | D = pd.read_csv(path_to_file, sep=';', parse_dates=['Time'], date_parser=dateparse) 87 | D = D.set_index(D['Electricity.Timestep']) 88 | return D 89 | 90 | 91 | def dispatch_energy_data(self, server_agent, message, agent_name_arr, agent_addr, d_map): 92 | 93 | try: 94 | # for timestep in range(0, 1200, 30): 95 | for timestep in range(7200, 11490, 30): 96 | 97 | for name in agent_name_arr: 98 | d = d_map[name] 99 | d_consumption = d.loc[d['Electricity.Timestep'] == timestep] 100 | 101 | message['time'] = util.cnv_datetime_to_str(d_consumption['Time'].get(timestep), '%Y/%m/%d %H:%M') 102 | 103 | message['consumption'] = float(d_consumption['Sum [kWh]']) 104 | message['generation'] = float( 105 | util.get_generation(d_consumption['Time'].get(timestep), message['consumption'])) 106 | 107 | self._send_message(server_agent, agent_addr[name], alias='consumption', message=message) 108 | 109 | time.sleep(1.5) 110 | 111 | except Exception: 112 | print(traceback.format_exc()) 113 | 114 | return message 115 | 116 | 117 | def extract_agents(self, agents): 118 | 119 | agent_name_arr = [] 120 | agent_addr = {} 121 | for name in agents: 122 | if name != self.agentname: 123 | agent_name_arr.append(name) 124 | agent = self.ns.proxy(name) 125 | agent_addr[name] = agent.addr(alias='consumption') 126 | 127 | return agent_name_arr, agent_addr 128 | 129 | 130 | def _send_message(self, server_agent, client_addr, alias, message): 131 | 132 | server_agent.connect(client_addr, alias=alias) 133 | server_agent.send(alias, message=message) 134 | reply = server_agent.recv(alias) 135 | server_agent.log_info("Recieved: "+str(reply)) 136 | server_agent.close(alias=alias) 137 | 138 | -------------------------------------------------------------------------------- /marlagent/agent/dqn/replay_buffer.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | 4 | class ReplayBuffer: 5 | """ 6 | Replay Buffer stores the past obervations along with actions 7 | performed and the reward obtained after performing 8 | those actions. 9 | 10 | """ 11 | 12 | def __init__(self, size, n_features): 13 | 14 | self.size = size 15 | self.n_features = n_features 16 | 17 | self.idx = 0 18 | self.num_in_buffer = 0 19 | 20 | self.obs = None 21 | self.action = None 22 | self.reward = None 23 | self.eoi = None 24 | 25 | 26 | def store_transition(self, state, action, reward): 27 | 28 | if self.obs is None: 29 | self.obs = np.empty([self.size, self.n_features], dtype=np.float32) 30 | self.action = np.empty([self.size], dtype=np.str) 31 | self.reward = np.empty([self.size], dtype=np.float32) 32 | self.eoi = np.empty([self.size], dtype=np.float32) 33 | 34 | self.obs[self.idx] = state 35 | self.action[self.idx] = action['action'] 36 | self.reward[self.idx] = reward 37 | self.eoi[self.idx] = 0.0 38 | 39 | # set the next idx 40 | # starts from 1st position and overwrites if buffer full 41 | self.idx = (self.idx + 1) % self.size 42 | 43 | # number of elements in the buffer. 44 | # if the buffer is full then the size of buffer is the number of elements present 45 | self.num_in_buffer = min(self.size, self.num_in_buffer + 1) 46 | 47 | 48 | def reset(self): 49 | self.idx = 0 50 | self.obs = np.empty([self.size, self.n_features], dtype=np.float32) 51 | self.action = np.empty([self.size], dtype=np.str) 52 | self.reward = np.empty([self.size], dtype=np.float32) 53 | self.eoi = np.empty([self.size], dtype=np.float32) 54 | 55 | 56 | def sample(self, batch_size): 57 | """Sample `batch_size` different transitions. 58 | i-th sample transition is the following: 59 | when observing `obs_batch[i]`, action `act_batch[i]` was taken, 60 | after which reward `rew_batch[i]` was received and subsequent 61 | observation next_obs_batch[i] was observed, unless the epsiode 62 | was done which is represented by `done_mask[i]` which is equal 63 | to 1 if episode has ended as a result of that action. 64 | Parameters 65 | ---------- 66 | batch_size: int 67 | How many transitions to sample. 68 | Returns 69 | ------- 70 | obs_batch: np.array 71 | Array of shape 72 | act_batch: np.array 73 | Array of shape (batch_size,) and dtype np.int32 74 | rew_batch: np.array 75 | Array of shape (batch_size,) and dtype np.float32 76 | next_obs_batch: np.array 77 | 78 | """ 79 | 80 | # Extract the radom indexes of batch_size from the number of elements in the buffer 81 | idxes = sample_n_unique(lambda: random.randint(0, self.num_in_buffer - 2), batch_size) 82 | 83 | obs = np.concatenate([[self.obs[idx]] for idx in idxes], 0) 84 | next_obs = np.copy(obs[1:, :]) 85 | reward = np.array([np.array([self.reward[idx]]) for idx in idxes]) 86 | eoi = np.array([np.array([self.eoi[idx]]) for idx in idxes]) 87 | 88 | obs = obs[:-1, :] 89 | next_obs = next_obs 90 | reward = reward[:-1,:] 91 | eoi = eoi[:-1,:] 92 | 93 | # sample the latest observation and add it to this batch 94 | # Combined experience replay 95 | l_obs, l_next_obs, l_reward, l_eoi = self.__get_latest_obs() 96 | obs = np.concatenate([obs, [l_obs]], 0) 97 | next_obs = np.concatenate([next_obs, [l_next_obs]], 0) 98 | reward = np.concatenate([reward, [[l_reward]]]) 99 | eoi = np.concatenate([eoi, [[l_eoi]]]) 100 | 101 | return obs, next_obs, reward, eoi 102 | 103 | 104 | def __get_latest_obs(self): 105 | ''' 106 | Fetches the last observation. Helper function 107 | for Combined experience replay. 108 | :return: Returns a (s,s',r) tuple 109 | ''' 110 | prev_idx, next_idx = self.__get_last_transition_idxs() 111 | 112 | return self.obs[prev_idx], self.obs[next_idx], self.reward[prev_idx], self.eoi[prev_idx] 113 | 114 | 115 | def update_last_transition_with_reward(self, reward): 116 | ''' 117 | Support for EOI rewards 118 | :return: 119 | ''' 120 | prev_idx, next_idx = self.__get_last_transition_idxs() 121 | self.reward[prev_idx] = reward 122 | 123 | 124 | 125 | def __get_last_transition_idxs(self): 126 | if self.idx == 0: 127 | prev_idx = self.size - 2 128 | next_idx = prev_idx + 1 129 | elif self.idx - 2 < 0: 130 | prev_idx = self.size - 1 131 | next_idx = 0 132 | else: 133 | prev_idx = self.idx - 2 134 | next_idx = prev_idx + 1 135 | 136 | return prev_idx, next_idx 137 | 138 | 139 | def sample_n_unique(sampling_f, n): 140 | """Helper function. Given a function `sampling_f` that returns 141 | comparable objects, sample n such unique objects. 142 | """ 143 | res = [] 144 | while len(res) < n: 145 | candidate = sampling_f() 146 | if candidate not in res: 147 | res.append(candidate) 148 | return res -------------------------------------------------------------------------------- /marlagent/agent/dqn/dqn.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | 3 | import numpy as np 4 | import torch 5 | import torch.autograd as autograd 6 | import torch.optim as optim 7 | 8 | from marlagent import rlagent 9 | from marlagent.agent.dqn.model import DQN 10 | from marlagent.agent.dqn.replay_buffer import ReplayBuffer 11 | 12 | USE_CUDA = torch.cuda.is_available() 13 | dtype = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor 14 | 15 | 16 | class Variable(autograd.Variable): 17 | def __init__(self, data, *args, **kwargs): 18 | if USE_CUDA: 19 | data = data.cuda() 20 | super(Variable, self).__init__(data, *args, **kwargs) 21 | 22 | 23 | OptimizerSpec = namedtuple("OptimizerSpec", ["constructor", "kwargs"]) 24 | 25 | optimizer_spec = OptimizerSpec( 26 | constructor=optim.RMSprop, 27 | kwargs=dict(lr=0.00025, alpha=0.95, eps=0.01), 28 | ) 29 | 30 | 31 | class DQNAgent(rlagent.RLAgent): 32 | 33 | def __init__(self): 34 | 35 | super(DQNAgent, self).__init__() 36 | print("DQN initiated...") 37 | 38 | self.learning_freq = 10 39 | self.learning_starts = 1000 40 | self.target_update_freq = 50 41 | self.num_updates = 0 42 | self.num_calls = 0 43 | self.discount = 0.99 44 | 45 | self.n_features = self.feat_extractor.get_n_features() 46 | 47 | # Instantiating a MLP model 48 | self.Q = DQN(self.n_features) 49 | self.target_Q = DQN(self.n_features) 50 | 51 | self.replay_buffer = ReplayBuffer(size = 10000, n_features = self.n_features) 52 | 53 | 54 | # Construct Q network optimizer function 55 | self.optimizer = optimizer_spec.constructor(self.Q.parameters(), **optimizer_spec.kwargs) 56 | 57 | 58 | 59 | def get_qValue(self, state, action): 60 | 61 | features = self.feat_extractor.get_features(state, action) 62 | feat_arr = self.__transform_to_numpy(features) 63 | 64 | state_ts = torch.from_numpy(feat_arr).type(dtype).unsqueeze(0) 65 | q_values_ts = self.Q(Variable(state_ts, volatile=True)).data 66 | 67 | print("Calculated Q-Value for action ({0}): {1}".format(action['action'], q_values_ts)) 68 | 69 | # Use volatile = True if variable is only used in inference mode, i.e. don’t save the history 70 | return q_values_ts 71 | 72 | 73 | 74 | def update(self, state, action, next_state, reward, eoi = False): 75 | 76 | if eoi == True: 77 | self.replay_buffer.update_last_transition_with_reward(reward) 78 | else: 79 | features = self.feat_extractor.get_features(state, action) 80 | 81 | # store the converted state in the replay buffer 82 | # if action['action'] != 'consume_and_store': 83 | self.num_calls += 1 84 | self.replay_buffer.store_transition(features, action, reward) 85 | 86 | # Perform the update in a batch. Apply the average error over all fields 87 | 88 | if self.num_calls > self.learning_starts and self.num_calls % self.learning_freq == 0: 89 | self.perform_update(state.name, reward = 0) 90 | 91 | 92 | def perform_update(self, agent_name, reward): 93 | 94 | #TODO: Ignore reward from EOI handler 95 | 96 | print("Updating network...") 97 | obs, next_obs, r, eoi = self.replay_buffer.sample(batch_size=64) 98 | 99 | #reward = reward * np.zeros(obs.shape[0]) 100 | # r[r.shape[0] - 1] = reward 101 | reward = r 102 | 103 | obs_batch = Variable(torch.from_numpy(obs).type(dtype)) 104 | reward_batch = Variable(torch.from_numpy(reward).type(dtype)) 105 | next_obs_batch = Variable(torch.from_numpy(next_obs).type(dtype)) 106 | not_eoi = Variable(torch.from_numpy(1 - eoi)).type(dtype) 107 | 108 | current_Q_values = self.Q(obs_batch) 109 | target_Q_values = self.target_Q(next_obs_batch).detach() 110 | target_Q_values = target_Q_values * not_eoi 111 | 112 | # print("CURR Q VALUE:", current_Q_values) 113 | # print("TARGET Q VALUE:", target_Q_values) 114 | # print("REWARD BATCH", reward_batch) 115 | print("Not EOI", not_eoi) 116 | 117 | 118 | q_value_curr_state = current_Q_values 119 | q_value_next_state = reward_batch + (self.discount * target_Q_values) 120 | # print("Q VALUE NEXT STATE:", q_value_next_state) 121 | 122 | # Compute Bellman error 123 | bellman_error = q_value_next_state - q_value_curr_state 124 | # print("BELLMAN ERROR:", bellman_error) 125 | 126 | # clip the bellman error between [-1 , 1] 127 | clipped_bellman_error = bellman_error.clamp(-1, 1) 128 | # print("Bellman Error:", clipped_bellman_error) 129 | 130 | d_error = clipped_bellman_error * -1.0 131 | # print("Delta Error:", d_error.data.unsqueeze(1)) 132 | print("Delta Error:", d_error.data.mean()) 133 | 134 | self.write_to_file(data=d_error.mean(), path_to_file='assets/' + agent_name + 'error.csv') 135 | 136 | # Clear previous gradients before backward pass 137 | self.optimizer.zero_grad() 138 | 139 | new_q_value_curr_state = Variable(q_value_curr_state.data, requires_grad=True) 140 | # new_q_value_curr_state.backward() 141 | new_q_value_curr_state.backward(d_error.data) 142 | 143 | # Perfom the update 144 | self.optimizer.step() 145 | 146 | # Clear stored values in the replay buffer 147 | # self.replay_buffer.reset() 148 | print("Updating network finished.") 149 | 150 | self.num_updates += 1 151 | 152 | # Periodically update the target network with the Q network 153 | if self.num_updates % self.target_update_freq == 0: 154 | self.target_Q.load_state_dict(self.Q.state_dict()) 155 | print("Updating target Q network finished.") 156 | 157 | 158 | def __transform_to_numpy(self, features): 159 | numpy_arr = np.array(features, dtype=np.float32) 160 | return numpy_arr 161 | -------------------------------------------------------------------------------- /state.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | class AgentState: 4 | """ 5 | Class representing the agent's state at any given moment. 6 | """ 7 | 8 | actions = ['request_ally', 'request_grid', 'grant', 'deny_request', 'consume_and_store'] 9 | 10 | def __init__(self, name, iter, energy_consumption, energy_generation, battery_curr, time, environment_state, 11 | cg_http_service): 12 | print("registering state...") 13 | 14 | self.name = name 15 | self.iter = iter 16 | self.energy_consumption = energy_consumption 17 | self.energy_generation = energy_generation 18 | self.battery_max = 7.2 19 | self.battery_curr = battery_curr 20 | self.time = time 21 | 22 | self.environment_state = environment_state 23 | self.cg_http_service = cg_http_service 24 | 25 | 26 | def get_possible_actions(self, actions = None): 27 | ''' 28 | Computes the set of all legal actions allowed in this state 29 | :return: array of legal actions 30 | ''' 31 | possible_actions = [] 32 | 33 | if actions is None: 34 | 35 | if(self.energy_generation + self.battery_curr > self.energy_consumption): 36 | possible_actions.append({'action':'consume_and_store', 'data':None}) 37 | else: 38 | possible_actions.append({'action':'request_ally', 'data':None}) 39 | possible_actions.append({'action':'request_grid', 'data':None}) 40 | 41 | else: 42 | # Case when only options are grant or deny 43 | # Simply deny the request if current battery is 0 44 | if self.battery_curr <= 0: 45 | for action in actions: 46 | if action['action'] == 'deny_request': 47 | possible_actions.append(action) 48 | else: 49 | possible_actions = actions 50 | 51 | return possible_actions 52 | 53 | 54 | def get_score(self): 55 | score = 0.0 56 | # if it is in the positive state 57 | # if (self.energy_generation + self.battery_curr) >= self.energy_consumption: 58 | # score += 1 59 | # elif (self.energy_generation + self.battery_curr) < self.energy_consumption: 60 | # score -= 10 61 | # 62 | # if there is remaining charge in the battery 63 | # if self.battery_curr > 0.0: 64 | # score += 1.0 65 | 66 | # overall impact of the agent on the environment 67 | # if (self.environment_state.get_total_generated() + self.environment_state.get_energy_borrowed_from_ally()) \ 68 | # >= (self.environment_state.get_total_consumed() + self.environment_state.get_energy_borrowed_from_CG()): 69 | # score += 1.0 70 | # elif (self.environment_state.get_total_generated() + self.environment_state.get_energy_borrowed_from_ally()) \ 71 | # < (self.environment_state.get_total_consumed() + self.environment_state.get_energy_borrowed_from_CG()): 72 | # # score -= 1.0 73 | # score += 0.0 74 | 75 | 76 | # Add global state information 77 | # community_status = self.cg_http_service.get_energy_status(self.iter) 78 | 79 | # diff = self.environment_state.get_energy_borrowed_from_ally() - (0.5*self.environment_state.get_energy_borrowed_from_CG()) 80 | # 81 | # if(diff > 0): 82 | # score += 1 83 | 84 | return score 85 | 86 | 87 | def reset(self, battery_init): 88 | self.energy_consumption = 0.0 89 | self.energy_generation = 0.0 90 | self.battery_curr = battery_init 91 | self.time = datetime.strptime('2014/01/01 12:00', '%Y/%m/%d %H:%M') 92 | self.environment_state = EnvironmentState(0.0, 0.0, 0.0, 0.0, 0.0, 0.0) 93 | 94 | 95 | def __str__(self): 96 | str_rep = """ 97 | Time: {0} 98 | Energy Generation: {1} 99 | Energy Consumption {2} 100 | Battery Current: {3} 101 | Battery Max: {4} 102 | """.format(self.time, self.energy_generation, self.energy_consumption, self.battery_curr, self.battery_max) 103 | 104 | return str_rep 105 | 106 | def set_environment_state(self, environment_state): 107 | self.environment_state = environment_state 108 | 109 | 110 | class EnvironmentState: 111 | """ 112 | Maintains the state of the environment 113 | """ 114 | 115 | def __init__(self, total_consumed, total_generated, central_grid, energy_borrowed_from_ally, energy_granted_to_ally, 116 | net_grid_status): 117 | 118 | self.total_consumed = total_consumed 119 | self.total_generated = total_generated 120 | self.central_grid = central_grid 121 | self.energy_borrowed_from_ally = energy_borrowed_from_ally 122 | self.energy_granted_to_ally = energy_granted_to_ally 123 | self.net_grid_status = net_grid_status 124 | 125 | 126 | def get_total_consumed(self): 127 | return self.total_consumed 128 | 129 | def update_total_consumed(self, energy): 130 | self.total_consumed = self.total_consumed + energy 131 | 132 | def get_total_generated(self): 133 | return self.total_generated 134 | 135 | def set_total_generated(self, energy): 136 | self.total_generated = energy 137 | 138 | def update_total_generated(self, energy): 139 | self.total_generated = self.total_generated + energy 140 | 141 | def get_energy_borrowed_from_CG(self): 142 | return self.central_grid 143 | 144 | def update_energy_borrowed_from_CG(self, energy): 145 | self.central_grid = self.central_grid + energy 146 | 147 | def get_energy_borrowed_from_ally(self): 148 | return self.energy_borrowed_from_ally 149 | 150 | def update_energy_borrowed_from_ally(self, energy): 151 | self.energy_borrowed_from_ally = self.energy_borrowed_from_ally + energy 152 | 153 | def update_energy_granted_to_ally(self, energy): 154 | self.energy_granted_to_ally = self.energy_granted_to_ally + energy 155 | 156 | def get_energy_granted_to_ally(self): 157 | return self.energy_granted_to_ally 158 | 159 | def __str__(self): 160 | str_rep = """ 161 | Total Generated: {0} 162 | Total Consumed: {1} 163 | Total Borrowed From CG: {2} 164 | Total Borrowed From Allies: {3} 165 | Total Granted To Allies: {4} 166 | """.format(self.total_generated, self.total_consumed, self.central_grid, self.energy_borrowed_from_ally, self.energy_granted_to_ally) 167 | 168 | return str_rep 169 | -------------------------------------------------------------------------------- /marlagent/rlagent.py: -------------------------------------------------------------------------------- 1 | import os 2 | import util 3 | import random 4 | import copy 5 | import feat_extractor as fe 6 | from marlagent import agent_actions 7 | 8 | class RLAgent: 9 | 10 | def __init__(self, alpha=0.001, epsilon=1.0, gamma=0.9, numTraining = 10): 11 | 12 | print("RL agent instantiated...") 13 | self.alpha = float(alpha) # learning rate 14 | self.epsilon = float(epsilon) # exploration vs exploitation 15 | self.discount = float(gamma) # significance of future rewards 16 | self.numTraining = int(numTraining) 17 | 18 | self.feat_extractor = fe.FeatureExtractor() 19 | self.central_grid = util.Counter() # note the energy borrowed from central grid 20 | 21 | 22 | 23 | def get_qValue(self, state, action): 24 | pass 25 | 26 | 27 | 28 | def update(self, state, action, next_state, reward, update = False): 29 | pass 30 | 31 | 32 | 33 | def compute_value_from_qValues(self, state): 34 | """ 35 | Compute the q_value for each action and return the max Q-value as the value of that state 36 | 37 | :param state: 38 | :return: 39 | """ 40 | 41 | # No actions available 42 | if len(self._get_legal_actions(state)) == 0: 43 | return 0.0 44 | 45 | q_values_for_this_state = [] 46 | for action in self._get_legal_actions(state): 47 | q_values_for_this_state.append(self.get_qValue(state, action)) 48 | 49 | return max(q_values_for_this_state) 50 | 51 | 52 | 53 | def compute_action_from_qValues(self, state, actions = None): 54 | """ 55 | Iterate over all the actions and compute their q-values. Then return the action with the highest q-value. 56 | 57 | :param state: 58 | :return: 59 | """ 60 | actions = self._get_legal_actions(state, actions) 61 | 62 | if len(actions) == 0: 63 | print("Something wrong. Check!. Maybe all actions are done.") 64 | return None 65 | 66 | # Populating a new list of (action, value) pair from list of q_values 67 | action_value_pair = [] 68 | for action in actions: 69 | action_value_pair.append((action, self.get_qValue(state, action))) 70 | 71 | # Returning the action with maximum q_value 72 | #TODO: if q values for multiple action value pairs is the same it picks the first one. Need to randomize this selection 73 | return max(action_value_pair, key=lambda x: x[1])[0] 74 | 75 | 76 | 77 | def get_action(self, state, actions = None): 78 | """ 79 | Compute the action to take in the current state. 80 | Epsilon decides whether to exploit the current policy or choice a new action randomly. 81 | 82 | A small value for epsilon indicates lesser exploration. 83 | :param state: 84 | :return: appropriate action to take in the current state 85 | """ 86 | legal_actions = self._get_legal_actions(state, actions) 87 | action = None 88 | 89 | if util.flip_coin(self.epsilon): 90 | print("Randomizing action...") 91 | action = random.choice(legal_actions) 92 | else: 93 | print("Selecting the best action based on policy...") 94 | action = self.get_policy(state, actions) 95 | 96 | return action 97 | 98 | 99 | 100 | def get_policy(self, state, actions): 101 | return self.compute_action_from_qValues(state, actions) 102 | 103 | 104 | 105 | def do_action(self, state, action, ns, agent, agent_name, allies): 106 | ''' 107 | Perform an action and return the next state 108 | :param state: 109 | :param action: 110 | :return: the next state on taking the action 111 | ''' 112 | next_state = copy.deepcopy(state) 113 | next_state.environment_state.update_total_consumed(state.energy_consumption) 114 | next_state.environment_state.update_total_generated(state.energy_generation) 115 | 116 | usable_generated_energy = state.energy_generation 117 | 118 | time_str = util.cnv_datetime_to_str(state.time, '%Y/%m/%d %H:%M') 119 | if action['action'] == 'consume_and_store': 120 | 121 | diff = state.energy_generation - state.energy_consumption 122 | 123 | # Store the unused energy and return the excess 124 | batt_curr, excess = agent_actions.update_battery_status(state.battery_max, state.battery_curr, diff) 125 | 126 | # Subtract the energy which could not be used 127 | usable_generated_energy = usable_generated_energy - excess 128 | next_state.environment_state.set_total_generated(next_state.environment_state.get_total_generated() - excess) 129 | 130 | next_state.battery_curr = batt_curr 131 | next_state.energy_generation = 0.0 132 | next_state.energy_consumption = 0.0 133 | 134 | 135 | if action['action'] == 'request_ally': 136 | # TODO think about what to do if ally does not serve request 137 | diff = (state.energy_generation + state.battery_curr) - state.energy_consumption 138 | agent.log_info("---------Energy Diff: "+str(diff)) 139 | energy_grant = 0.0 140 | if diff < 0.0: 141 | energy_grant = agent_actions.request_ally(ns=ns, agent=agent, agent_name = agent_name, allies=allies, energy_amt = abs(diff), time = time_str) 142 | # energy_grant = abs(diff) 143 | next_state.energy_generation = 0.0 144 | next_state.battery_curr = 0.0 145 | next_state.environment_state.update_energy_borrowed_from_ally(energy_grant) 146 | 147 | # TODO think how to handle energy consumption if 148 | # If energy consumption is positive in next state then penalize agent 149 | next_state.energy_consumption = abs(diff) - energy_grant 150 | 151 | if next_state.energy_consumption > 0: 152 | self.central_grid[time_str] = next_state.energy_consumption 153 | next_state.environment_state.update_energy_borrowed_from_CG(self.central_grid[time_str]) 154 | #next_state.energy_consumption = 0.0 155 | 156 | else: 157 | print("Ally not requested as enough energy available in battery.") 158 | next_state.energy_generation = 0.0 159 | next_state.energy_consumption = 0.0 160 | next_state.battery_curr = diff 161 | 162 | 163 | if action['action'] == 'request_grid': 164 | # calculate the energy difference 165 | energy_diff = abs(agent_actions.get_energy_balance(state)) 166 | self.central_grid[time_str] = energy_diff 167 | 168 | next_state.energy_consumption = 0.0 169 | next_state.energy_generation = 0.0 170 | next_state.battery_curr = 0.0 171 | next_state.environment_state.update_energy_borrowed_from_CG(energy_diff) 172 | 173 | 174 | if action['action'] == 'grant': 175 | energy_request = action['data'] 176 | bal = (state.energy_generation + state.battery_curr) - energy_request 177 | energy_grant = 0.0 178 | 179 | if(bal >= 0): 180 | energy_grant = energy_request 181 | next_state.energy_generation = 0.0 182 | next_state.battery_curr, excess = agent_actions.update_battery_status(state.battery_max, state.battery_curr, 183 | -energy_grant) 184 | agent.log_info("Granting full energy.") 185 | 186 | elif(bal < 0): 187 | energy_grant = (state.energy_generation + state.battery_curr) 188 | next_state.energy_generation = 0.0 189 | next_state.battery_curr = 0.0 190 | agent.log_info("Granting partial energy.") 191 | 192 | # A more complex case can be designed where it gives partial energy 193 | 194 | return (next_state, energy_grant) 195 | 196 | if action['action'] == 'deny_request': 197 | energy_grant = 0.0 198 | return (next_state, energy_grant) 199 | 200 | return (next_state, usable_generated_energy) 201 | 202 | 203 | def write_to_file(self, data, path_to_file = 'assets/error.csv'): 204 | 205 | if os.path.isfile(path_to_file): 206 | with open(path_to_file, mode='a') as f: 207 | f.write(str(data)+str("\n")) 208 | f.close() 209 | 210 | else: 211 | with open(path_to_file, 'w+') as f: 212 | f.write(str(data)+str("\n")) 213 | f.close() 214 | 215 | 216 | 217 | def _get_legal_actions(self, agent_state, actions=None): 218 | """ 219 | Computes the set of actions a agent should take from the set of possible actions 220 | :param agent_state: 221 | :param actions: 222 | :return: legal actions the agent can take 223 | """ 224 | possible_actions = agent_state.get_possible_actions(actions) 225 | 226 | # TODO some filtering of actions 227 | 228 | legal_actions = copy.deepcopy(possible_actions) 229 | 230 | return legal_actions -------------------------------------------------------------------------------- /util.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility file 3 | """ 4 | import sys 5 | import inspect 6 | import random 7 | from datetime import datetime 8 | 9 | 10 | def raiseNotDefined(): 11 | fileName = inspect.stack()[1][1] 12 | line = inspect.stack()[1][2] 13 | method = inspect.stack()[1][3] 14 | 15 | print("*** Method not implemented: %s at line %s of %s" % (method, line, fileName)) 16 | sys.exit(1) 17 | 18 | 19 | def flip_coin( p ): 20 | r = random.random() 21 | return r < p 22 | 23 | 24 | def cnv_datetime_to_str(time, format): 25 | # date = datetime.strptime(time, '%Y-%m-%d %H:%M') 26 | return time.strftime(format) 27 | 28 | 29 | def get_generation(ts, consumption): 30 | generation = 0.0 31 | if ts.time().hour < 18 and ts.time().hour > 6: 32 | if flip_coin(0.7): 33 | generation += consumption + consumption*(0.5) 34 | else: 35 | generation = consumption - consumption*(0.1) 36 | 37 | return generation 38 | 39 | 40 | def randomize_max(max_value, action_value_pairs): 41 | '' 42 | 43 | 44 | def get_reward_for_action(action): 45 | action_reward_dict = { 46 | 'request_ally' : 0, 47 | 'request_grid': 0, 48 | 'grant': 0.0, 49 | 'deny_request': 0, 50 | 'consume_and_store': 0, 51 | } 52 | 53 | return action_reward_dict[action] 54 | 55 | 56 | 57 | def reward_transaction(state, next_state, action, net_curr_grid_status): 58 | reward = 0.0 59 | # if next_state.environment_state.get_energy_borrowed_from_ally() > state.environment_state.get_energy_borrowed_from_ally(): 60 | # reward += 0.5 61 | # 62 | # if next_state.environment_state.get_energy_granted_to_ally() > state.environment_state.get_energy_granted_to_ally(): 63 | # reward += 1 64 | 65 | 66 | # Local NZEB State 67 | # next_state_nzeb = (next_state.environment_state.get_total_generated() + next_state.environment_state.get_energy_borrowed_from_ally()) \ 68 | # - (next_state.environment_state.get_total_consumed() + next_state.environment_state.get_energy_borrowed_from_CG()) 69 | # 70 | # curr_state_nzeb = (state.environment_state.get_total_generated() + state.environment_state.get_energy_borrowed_from_ally()) \ 71 | # - (state.environment_state.get_total_consumed() + state.environment_state.get_energy_borrowed_from_CG()) 72 | # 73 | # if next_state_nzeb > curr_state_nzeb: 74 | # reward += 1 75 | 76 | 77 | # Community NZEB state 78 | 79 | # If the total grid nZEB status is better than the previous status 80 | # if(net_curr_grid_status > state.environment_state.net_grid_status): 81 | # reward += 1 82 | 83 | #OR 84 | 85 | reward = net_curr_grid_status 86 | 87 | return round(reward,1) 88 | 89 | 90 | 91 | def compare(a, b): 92 | if a > b: 93 | return 1 94 | elif a < b: 95 | return -1 96 | else: 97 | return 0 98 | 99 | 100 | def calc_net_grid_status(curr_grid_status): 101 | net_curr_grid_status = curr_grid_status['generation'] \ 102 | - (curr_grid_status['consumption'] - curr_grid_status['borrowedFromCG']) 103 | return net_curr_grid_status 104 | 105 | 106 | class Counter(dict): 107 | """ 108 | A counter keeps track of counts for a set of keys. 109 | 110 | The counter class is an extension of the standard python 111 | dictionary type. It is specialized to have number values 112 | (integers or floats), and includes a handful of additional 113 | functions to ease the task of counting data. In particular, 114 | all keys are defaulted to have value 0. Using a dictionary: 115 | 116 | a = {} 117 | print a['test'] 118 | 119 | would give an error, while the Counter class analogue: 120 | 121 | >>> a = Counter() 122 | >>> print a['test'] 123 | 0 124 | 125 | returns the default 0 value. Note that to reference a key 126 | that you know is contained in the counter, 127 | you can still use the dictionary syntax: 128 | 129 | >>> a = Counter() 130 | >>> a['test'] = 2 131 | >>> print a['test'] 132 | 2 133 | 134 | This is very useful for counting things without initializing their counts, 135 | see for example: 136 | 137 | >>> a['blah'] += 1 138 | >>> print a['blah'] 139 | 1 140 | 141 | The counter also includes additional functionality useful in implementing 142 | the classifiers for this assignment. Two counters can be added, 143 | subtracted or multiplied together. See below for details. They can 144 | also be normalized and their total count and arg max can be extracted. 145 | """ 146 | def __getitem__(self, idx): 147 | self.setdefault(idx, 0) 148 | return dict.__getitem__(self, idx) 149 | 150 | def incrementAll(self, keys, count): 151 | """ 152 | Increments all elements of keys by the same count. 153 | 154 | >>> a = Counter() 155 | >>> a.incrementAll(['one','two', 'three'], 1) 156 | >>> a['one'] 157 | 1 158 | >>> a['two'] 159 | 1 160 | """ 161 | for key in keys: 162 | self[key] += count 163 | 164 | def argMax(self): 165 | """ 166 | Returns the key with the highest value. 167 | """ 168 | if len(self.keys()) == 0: return None 169 | all = self.items() 170 | values = [x[1] for x in all] 171 | maxIndex = values.index(max(values)) 172 | return all[maxIndex][0] 173 | 174 | def sortedKeys(self): 175 | """ 176 | Returns a list of keys sorted by their values. Keys 177 | with the highest values will appear first. 178 | 179 | >>> a = Counter() 180 | >>> a['first'] = -2 181 | >>> a['second'] = 4 182 | >>> a['third'] = 1 183 | >>> a.sortedKeys() 184 | ['second', 'third', 'first'] 185 | """ 186 | sortedItems = self.items() 187 | compare = lambda x, y: sign(y[1] - x[1]) 188 | sortedItems.sort(cmp=compare) 189 | return [x[0] for x in sortedItems] 190 | 191 | def totalCount(self): 192 | """ 193 | Returns the sum of counts for all keys. 194 | """ 195 | return sum(self.values()) 196 | 197 | def normalize(self): 198 | """ 199 | Edits the counter such that the total count of all 200 | keys sums to 1. The ratio of counts for all keys 201 | will remain the same. Note that normalizing an empty 202 | Counter will result in an error. 203 | """ 204 | total = float(self.totalCount()) 205 | if total == 0: return 206 | for key in self.keys(): 207 | self[key] = self[key] / total 208 | 209 | def divideAll(self, divisor): 210 | """ 211 | Divides all counts by divisor 212 | """ 213 | divisor = float(divisor) 214 | for key in self: 215 | self[key] /= divisor 216 | 217 | def copy(self): 218 | """ 219 | Returns a copy of the counter 220 | """ 221 | return Counter(dict.copy(self)) 222 | 223 | def __mul__(self, y ): 224 | """ 225 | Multiplying two counters gives the dot product of their vectors where 226 | each unique label is a vector element. 227 | 228 | >>> a = Counter() 229 | >>> b = Counter() 230 | >>> a['first'] = -2 231 | >>> a['second'] = 4 232 | >>> b['first'] = 3 233 | >>> b['second'] = 5 234 | >>> a['third'] = 1.5 235 | >>> a['fourth'] = 2.5 236 | >>> a * b 237 | 14 238 | """ 239 | sum = 0 240 | x = self 241 | if len(x) > len(y): 242 | x,y = y,x 243 | for key in x: 244 | if key not in y: 245 | continue 246 | sum += x[key] * y[key] 247 | return sum 248 | 249 | def __radd__(self, y): 250 | """ 251 | Adding another counter to a counter increments the current counter 252 | by the values stored in the second counter. 253 | 254 | >>> a = Counter() 255 | >>> b = Counter() 256 | >>> a['first'] = -2 257 | >>> a['second'] = 4 258 | >>> b['first'] = 3 259 | >>> b['third'] = 1 260 | >>> a += b 261 | >>> a['first'] 262 | 1 263 | """ 264 | for key, value in y.items(): 265 | self[key] += value 266 | 267 | def __add__( self, y ): 268 | """ 269 | Adding two counters gives a counter with the union of all keys and 270 | counts of the second added to counts of the first. 271 | 272 | >>> a = Counter() 273 | >>> b = Counter() 274 | >>> a['first'] = -2 275 | >>> a['second'] = 4 276 | >>> b['first'] = 3 277 | >>> b['third'] = 1 278 | >>> (a + b)['first'] 279 | 1 280 | """ 281 | addend = Counter() 282 | for key in self: 283 | if key in y: 284 | addend[key] = self[key] + y[key] 285 | else: 286 | addend[key] = self[key] 287 | for key in y: 288 | if key in self: 289 | continue 290 | addend[key] = y[key] 291 | return addend 292 | 293 | def __sub__( self, y ): 294 | """ 295 | Subtracting a counter from another gives a counter with the union of all keys and 296 | counts of the second subtracted from counts of the first. 297 | 298 | >>> a = Counter() 299 | >>> b = Counter() 300 | >>> a['first'] = -2 301 | >>> a['second'] = 4 302 | >>> b['first'] = 3 303 | >>> b['third'] = 1 304 | >>> (a - b)['first'] 305 | -5 306 | """ 307 | addend = Counter() 308 | for key in self: 309 | if key in y: 310 | addend[key] = self[key] - y[key] 311 | else: 312 | addend[key] = self[key] 313 | for key in y: 314 | if key in self: 315 | continue 316 | addend[key] = -1 * y[key] 317 | return addend 318 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | ''' 2 | The main program that triggers the application 3 | ''' 4 | 5 | import argparse 6 | import os 7 | import sys 8 | import traceback 9 | import time 10 | import copy 11 | import _thread 12 | import util 13 | import multiprocessing 14 | from random import randint 15 | from state import AgentState, EnvironmentState 16 | from datetime import datetime 17 | from marlagent.agent.linear.lin_agent import LinearQAgent 18 | from marlagent.agent.dqn.dqn import DQNAgent 19 | from osbrain import run_agent 20 | from osbrain import run_nameserver 21 | from osbrain import NSProxy 22 | from nameserver import NameServer 23 | from cghandler import httpservice 24 | from prediction.energy_generation import EnergyGeneration 25 | 26 | 27 | pidfile = "assets/ns.pid" 28 | 29 | def exit_check(msg): 30 | if msg['topic'] == 'exit': 31 | return True 32 | 33 | 34 | def energy_request_handler(agent, message): 35 | 36 | # Acquire the lock 37 | lock_count = 0 38 | while not multiprocessing_lock.acquire(blocking=False): 39 | try: 40 | if lock_count <= 2: 41 | time.sleep(randint(1, 3) / 10) 42 | lock_count += 1 43 | else: 44 | yield {'topic': 'ENERGY_REQUEST_DECLINE'} 45 | agent.log_info("Could not acquire lock! Energy request declined.") 46 | return 47 | except: 48 | print(traceback.format_exc()) 49 | 50 | 51 | agent.log_info("Lock Acquired!") 52 | 53 | try: 54 | print("-----------------------Start Transaction-----------------------") 55 | agent.log_info('Received: %s' % message) 56 | 57 | agent.log_info("Deepy copy of global state initiated...") 58 | l_g_agent_state = multiprocessing_ns.g_agent_state 59 | l_curr_state = copy.deepcopy(l_g_agent_state) 60 | 61 | # update with new values of energy consumption and generation 62 | l_curr_state.time = datetime.strptime(message['time'], '%Y/%m/%d %H:%M') 63 | 64 | # amount of requested energy 65 | energy_req = message['energy'] 66 | 67 | actions = [ 68 | { 69 | 'action': 'grant', 70 | 'data': energy_req 71 | }, 72 | { 73 | 'action': 'deny_request', 74 | 'data': energy_req 75 | } 76 | ] 77 | 78 | # call get action with this new state 79 | l_rl_agent = multiprocessing_ns.rl_agent 80 | action = l_rl_agent.get_action(copy.deepcopy(l_curr_state), actions) 81 | 82 | agent.log_info('Performing action (%s).' % action) 83 | 84 | response = None 85 | 86 | # If energy request is declined 87 | if action['action'] == 'deny_request': 88 | response = {'topic':'ENERGY_REQUEST_DECLINE'} 89 | 90 | # perform action and update global agent state 91 | next_state, energy_grant = l_rl_agent.do_action(l_curr_state, action, osbrain_ns, agent, args.agentname, allies) 92 | 93 | # if energy request is accepted 94 | if action['action'] == 'grant': 95 | response = {'topic': 'ENERGY_REQUEST_ACCEPTED', 'energy': energy_grant} 96 | agent.log_info("GRANTING:-----:%s"%energy_grant) 97 | next_state.environment_state.update_energy_granted_to_ally(energy_grant) 98 | print("BATTERY AFTER GRANTING-----:%s"%next_state.battery_curr) 99 | 100 | _thread.start_new_thread(cg_http_service.register_transaction, (l_g_agent_state.iter, 101 | message['time'], message['agentName'], 102 | energy_grant)) 103 | 104 | 105 | l_rl_agent.update(state=l_curr_state, action=action, next_state=next_state, reward=0.0, eoi = False) 106 | 107 | 108 | # update the global state 109 | l_g_agent_state.energy_consumption = 0.0 110 | l_g_agent_state.energy_generation = 0.0 111 | l_g_agent_state.battery_curr = next_state.battery_curr 112 | l_g_agent_state.environment_state = next_state.environment_state 113 | 114 | agent.log_info('Completed update operation. Resting!') 115 | 116 | # agent.log_info(next_state) 117 | agent.log_info(l_g_agent_state.environment_state) 118 | 119 | print("-----------------------End of Transaction-----------------------\n\n\n") 120 | 121 | # Synchronize Objects 122 | multiprocessing_ns.g_agent_state = l_g_agent_state 123 | multiprocessing_ns.rl_agent = l_rl_agent 124 | agent.log_info("Finished synchronizing objects across forked processes.") 125 | 126 | yield response 127 | except Exception: 128 | print(traceback.format_exc()) 129 | yield {'topic': 'ENERGY_REQUEST_DECLINE'} 130 | 131 | 132 | finally: 133 | # Release the lock 134 | multiprocessing_lock.release() 135 | agent.log_info("Lock Released!") 136 | 137 | 138 | def energy_consumption_handler(agent, message): 139 | yield {'topic': 'Ok'} # immediate reply 140 | 141 | # Exit check 142 | if exit_check(message): 143 | sys.exit(0) 144 | 145 | global osbrain_ns 146 | 147 | if message['topic'] == 'ENERGY_CONSUMPTION': 148 | _thread.start_new_thread(invoke_agent_ec_handle, (agent, osbrain_ns, message)) 149 | 150 | elif message['topic'] == 'END_OF_ITERATION' or message['topic'] == 'TRAINING_COMPLETE': 151 | _thread.start_new_thread(eoi_handle, (agent, message)) 152 | 153 | 154 | def invoke_agent_ec_handle(agent, osbrain_ns, message): 155 | 156 | try: 157 | print("Trying to acquire lock!") 158 | # Acquire the lock 159 | multiprocessing_lock.acquire() 160 | except Exception: 161 | print(traceback.format_exc()) 162 | return 163 | 164 | print("\n-----------------------Start Transaction-----------------------") 165 | agent.log_info('Received: %s' % message) 166 | 167 | try: 168 | agent.log_info("Deepy copy of global state initiated...") 169 | l_g_agent_state = multiprocessing_ns.g_agent_state 170 | l_curr_state = copy.deepcopy(l_g_agent_state) 171 | 172 | # update with new values of energy consumption and generation 173 | l_curr_state.time = datetime.strptime(message['time'], '%Y/%m/%d %H:%M') 174 | 175 | # Get energy generation 176 | energy_generated = energy_generator.get_generation(l_curr_state.time) 177 | 178 | l_curr_state.energy_consumption = message['consumption'] 179 | l_curr_state.energy_generation = energy_generated 180 | 181 | 182 | # call get action with this new state 183 | l_rl_agent = multiprocessing_ns.rl_agent 184 | action = l_rl_agent.get_action(copy.deepcopy(l_curr_state)) 185 | 186 | agent.log_info('Performing action (%s).' % action) 187 | # perform action and update global agent state 188 | next_state, usable_generated_energy = l_rl_agent.do_action(l_curr_state, action, osbrain_ns, agent, args.agentname, allies) 189 | 190 | agent.log_info('Action complete. Registering action effect with the environment.') 191 | 192 | # Registering information to CG 193 | _thread.start_new_thread(cg_http_service.update_energy_status, (message['time'], 194 | message['iter'], 195 | float(args.battInit), 196 | message['consumption'], 197 | usable_generated_energy, 198 | next_state.environment_state.get_energy_borrowed_from_CG() 199 | - l_curr_state.environment_state.get_energy_borrowed_from_CG())) 200 | 201 | 202 | delta_reward = 0.0 203 | # Get grid status from CG 204 | # curr_grid_status = cg_http_service.get_energy_status(l_curr_state.iter) 205 | # net_curr_grid_status = util.calc_net_grid_status(curr_grid_status) 206 | 207 | # calculate reward 208 | # delta_reward = next_state.get_score() + util.reward_transaction(l_curr_state, next_state, action, 209 | # net_curr_grid_status) 210 | 211 | 212 | agent.log_info('Updating agent with reward %s.' % delta_reward) 213 | l_rl_agent.update(state=l_curr_state, action=action, next_state=next_state, reward=0.0) 214 | 215 | # Update grid status 216 | # next_state.environment_state.net_grid_status = net_curr_grid_status 217 | 218 | # update the global state 219 | l_g_agent_state.energy_consumption = 0.0 220 | l_g_agent_state.energy_generation = 0.0 221 | l_g_agent_state.battery_curr = next_state.battery_curr 222 | l_g_agent_state.environment_state = next_state.environment_state 223 | 224 | # agent.log_info(next_state) 225 | # agent.log_info(l_g_agent_state.environment_state) 226 | agent.log_info('Completed update operation. Resting!') 227 | print("-----------------------End of Transaction-----------------------\n\n") 228 | 229 | # Synchronize Objects 230 | multiprocessing_ns.g_agent_state = l_g_agent_state 231 | multiprocessing_ns.rl_agent = l_rl_agent 232 | agent.log_info("Finished synchronizing objects across forked processes.") 233 | 234 | except Exception: 235 | print(traceback.format_exc()) 236 | 237 | finally: 238 | # Release the lock 239 | multiprocessing_lock.release() 240 | agent.log_info("Lock Released!") 241 | 242 | 243 | def eoi_handle(agent, message): 244 | ''' 245 | End of iteration handler. 246 | :return: 247 | ''' 248 | multiprocessing_lock.acquire() 249 | global g_env_state 250 | try: 251 | print("\n\n\-----------------------Iteration (%s) Completed-----------------------\n\n"%message['iter']) 252 | 253 | # Fetching Reference 254 | l_rl_agent = multiprocessing_ns.rl_agent 255 | l_g_agent_state = multiprocessing_ns.g_agent_state 256 | g_env_state = l_g_agent_state.environment_state 257 | 258 | 259 | agent.log_info("Publishing Stats...") 260 | agent.log_info(g_env_state) 261 | 262 | nzeb_status = (g_env_state.get_total_generated() + g_env_state.get_energy_borrowed_from_ally()) \ 263 | - (g_env_state.get_total_consumed() + g_env_state.get_energy_borrowed_from_CG()) 264 | agent.log_info("NZEB Status: %s" % nzeb_status) 265 | 266 | 267 | # Log EOI details to CG 268 | cg_http_service.log_iteration_status(message['iter'], g_env_state, nzeb_status) 269 | 270 | 271 | # --------------------- Updating reward --------------------- 272 | agent.log_info('Calculating reward.') 273 | 274 | # Get grid status from CG 275 | curr_grid_status = cg_http_service.get_energy_status(int(message['iter'])) 276 | net_curr_grid_status = util.calc_net_grid_status(curr_grid_status) 277 | 278 | # calculate reward 279 | # delta_reward = util.compare(net_curr_grid_status, multiprocessing_ns.old_grid_status) 280 | 281 | 282 | # If this grid status is better than the previous best grid status 283 | # if util.compare(net_curr_grid_status, multiprocessing_ns.best_grid_status) > 1 : 284 | # multiprocessing_ns.best_grid_status = net_curr_grid_status 285 | # delta_reward += 3 286 | 287 | # delta_reward = delta_reward - abs(int(multiprocessing_ns.best_grid_status - net_curr_grid_status)) * 0.1 288 | 289 | # multiprocessing_ns.old_grid_status = net_curr_grid_status 290 | 291 | delta_reward = util.reward_transaction(state = None, next_state = None, action = None, net_curr_grid_status = net_curr_grid_status) 292 | l_rl_agent.update(state=None, action=None, next_state=None, reward=delta_reward, eoi = True) 293 | #--------------------------------------------------------------- 294 | 295 | 296 | if int(message['iter']) > 0 and int(message['iter']) % 50 == 0: 297 | l_rl_agent.epsilon = round(l_rl_agent.epsilon * 0.8, 5) 298 | agent.log_info("Updated Epsilon: %s"%l_rl_agent.epsilon) 299 | 300 | 301 | # If training phase done then set exploration to 0 302 | # i.e. complete exploitation 303 | if message['topic'] == 'TRAINING_COMPLETE': 304 | l_rl_agent.epsilon = 0.0 305 | 306 | 307 | # reset the agent global state 308 | print(".......................RESETTING GLOBAL STATE.......................") 309 | l_g_agent_state.reset(float(args.battInit)) 310 | l_g_agent_state.iter = int(message['iter']) + 1 311 | agent.log_info(l_g_agent_state.environment_state) 312 | 313 | 314 | # Synchronize Objects 315 | multiprocessing_ns.rl_agent = l_rl_agent 316 | multiprocessing_ns.g_agent_state = l_g_agent_state 317 | agent.log_info("Finished synchronizing objects across forked processes.") 318 | 319 | except Exception: 320 | print(traceback.format_exc()) 321 | 322 | finally: 323 | # Release the lock 324 | multiprocessing_lock.release() 325 | 326 | def predict_energy_generation(time): 327 | print("TBD") 328 | return 0.0 329 | 330 | 331 | def get_ref_to_nameserver(ns_socket_addr): 332 | osbrain_ns = None 333 | print("Fetching reference to existing nameserver...") 334 | osbrain_ns = NSProxy(nsaddr=ns_socket_addr) 335 | return osbrain_ns 336 | 337 | 338 | def start_server_job(osbrain_ns): 339 | time.sleep(2) 340 | ns_agent = NameServer(osbrain_ns) 341 | 342 | # Start the scheduled job 343 | steve = run_agent('Steve', serializer='json') 344 | ns_agent.schedule_job(steve) 345 | 346 | 347 | def args_handler(): 348 | parser = argparse.ArgumentParser(description='Agent Module') 349 | 350 | parser.add_argument('--agentname', required=True, help='Name of the agent') 351 | parser.add_argument('--nameserver', required=True, help='Socket address of the nameserver') 352 | parser.add_argument('--allies', required=False, help='Socket address of the nameserver') 353 | parser.add_argument('--battInit', required=True, help='Initial battery charge.') 354 | parser.add_argument('--solarexposure', required=False, help='Path to solar exposure dataset') 355 | parser.add_argument('--nSolarPanel', required=True, help='Number fo solar panel this house has') 356 | 357 | global args 358 | args = parser.parse_args() 359 | 360 | if args.solarexposure is None: 361 | args.solarexposure = 'assets/toronto_solar_exp_2011.csv' 362 | 363 | 364 | if __name__ == '__main__': 365 | 366 | print("Started process at ("+str(datetime.now())+")") 367 | args_handler() 368 | 369 | print("Hi! I am "+args.agentname+". I am taking command of this process.") 370 | 371 | # Initiate name server 372 | global osbrain_ns 373 | osbrain_ns = get_ref_to_nameserver(args.nameserver) 374 | 375 | global cg_http_service 376 | cg_http_service = httpservice.CGHTTPHandler(args.agentname) 377 | 378 | try: 379 | from osbrain.logging import pyro_log 380 | pyro_log() 381 | 382 | # instantiate reinforcement learning module and making it globally accessible 383 | global multiprocessing_ns, multiprocessing_lock 384 | manager = multiprocessing.Manager() 385 | multiprocessing_ns = manager.Namespace() 386 | multiprocessing_lock = manager.RLock() 387 | 388 | multiprocessing_ns.rl_agent = DQNAgent() 389 | # multiprocessing_ns.rl_agent = LinearQAgent() 390 | 391 | global energy_generator 392 | energy_generator = EnergyGeneration(args.solarexposure, float(args.nSolarPanel)) 393 | 394 | # Declare a agent state and make it global 395 | environment_state = EnvironmentState(0.0, 0.0, 0.0, 0.0, 0.0, 0.0) 396 | # global g_agent_state 397 | multiprocessing_ns.g_agent_state = AgentState(name = args.agentname, iter = 0, energy_consumption = 0.0, energy_generation = 0.0, 398 | battery_curr = float(args.battInit), time = '2014/01/01 12:00', environment_state = environment_state, 399 | cg_http_service = cg_http_service) 400 | 401 | multiprocessing_ns.old_grid_status = -99999 402 | multiprocessing_ns.best_grid_status = -99999 403 | 404 | global allies 405 | allies = [ally for ally in args.allies.split(",") ] 406 | # allies = [] 407 | 408 | # Initialize the agent 409 | agent = run_agent(name = args.agentname, nsaddr = osbrain_ns.addr(), serializer='json', transport='tcp') 410 | agent.bind('REP', alias=str('energy_request_'+args.agentname), handler=energy_request_handler) 411 | agent.bind('REP', alias='consumption', handler=energy_consumption_handler) 412 | 413 | 414 | 415 | except Exception: 416 | print(traceback.format_exc()) 417 | 418 | 419 | finally: 420 | 421 | while(1): 422 | time.sleep(1) 423 | 424 | print("Bye!") 425 | 426 | 427 | --------------------------------------------------------------------------------