├── marlagent
    ├── __init__.py
    ├── agent
    │   ├── __init__.py
    │   ├── dqn
    │   │   ├── __init__.py
    │   │   ├── model.py
    │   │   ├── replay_buffer.py
    │   │   └── dqn.py
    │   └── linear
    │   │   ├── __init__.py
    │   │   └── lin_agent.py
    ├── agent_actions.py
    └── rlagent.py
├── prediction
    ├── __init__.py
    └── energy_generation.py
├── shutdown.sh
├── .gitignore
├── start.sh
├── experimental.py
├── synchronizer.py
├── cghandler
    └── httpservice.py
├── feat_extractor.py
├── nameserver.py
├── state.py
├── util.py
└── main.py


/marlagent/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/prediction/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/marlagent/agent/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/marlagent/agent/dqn/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/marlagent/agent/linear/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/shutdown.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | pkill -f main.py
3 | pkill -f synchronizer.py


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .idea
 2 | __pycache__
 3 | p1.log
 4 | p2.log
 5 | p4.log
 6 | feat_extractor.pyc
 7 | marlagent/*.pyc
 8 | *.pyc
 9 | assets/ns.pid
10 | assets/Aliceerror.csv
11 | assets/Boberror.csv
12 | sync.log
13 | assets/Charlieerror.csv
14 | p3.log
15 | 


--------------------------------------------------------------------------------
/start.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | python3 synchronizer.py --agentname Steve --nameserver 127.0.0.1:10000 > sync.log 2>&1 &
3 | python3 main.py --agentname Alice --nameserver 127.0.0.1:10000 --allies Bob,Charlie,Dave --battInit 7.5 --nSolarPanel 72 > p1.log 2>&1 &
4 | python3 main.py --agentname Bob --nameserver 127.0.0.1:10000 --allies Alice,Charlie,Dave --battInit 2.5 --nSolarPanel 54 > p2.log 2>&1 &
5 | python3 main.py --agentname Charlie --nameserver 127.0.0.1:10000 --allies Alice,Bob,Dave --battInit 5.0 --nSolarPanel 12 > p3.log 2>&1 &
6 | python3 main.py --agentname Dave --nameserver 127.0.0.1:10000 --allies Alice,Bob,Charlie --battInit 0.0 --nSolarPanel 0 > p4.log 2>&1 &


--------------------------------------------------------------------------------
/marlagent/agent/dqn/model.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.functional as F
 3 | 
 4 | class DQN(nn.Module):
 5 | 
 6 |     def __init__(self, in_channels):
 7 |         """
 8 |         Initialize a deep Q-learning network as described in
 9 |         https://storage.googleapis.com/deepmind-data/assets/papers/DeepMindNature14236Paper.pdf
10 |         Arguments:
11 |             in_channels: number of channel of input.
12 |                 i.e The number of most recent frames stacked together as describe in the paper
13 |             num_actions: number of action-value to output, one-to-one correspondence to action in game.
14 |         """
15 |         super(DQN, self).__init__()
16 |         self.fc1 = nn.Linear(in_channels, 100)
17 |         self.fc2 = nn.Linear(100, 100)
18 |         self.fc3 = nn.Linear(100, 100)
19 |         self.fc4 = nn.Linear(100, 1)
20 | 
21 | 
22 |     def forward(self, x):
23 |         x = F.sigmoid(self.fc1(x))
24 |         x = F.sigmoid(self.fc2(x))
25 |         x = F.sigmoid(self.fc3(x))
26 |         x = self.fc4(x)
27 |         return x
28 | 
29 |     


--------------------------------------------------------------------------------
/experimental.py:
--------------------------------------------------------------------------------
 1 | from osbrain import run_agent
 2 | from osbrain import run_nameserver
 3 | import multiprocessing
 4 | 
 5 | 
 6 | def method_a(agent, message):
 7 |     gg = mpns.temp
 8 |     gg = 10
 9 |     agent.log_info('Method A Temp: %s' % mpns.temp)
10 |     return 'Blah 1'
11 | 
12 | def method_b(agent, message):
13 |     agent.log_info('Method B Temp: %s' % mpns.temp)
14 |     return 'Blah 2'
15 | 
16 | 
17 | if __name__ == '__main__':
18 |     manager = multiprocessing.Manager()
19 |     global mpns
20 |     mpns = manager.Namespace()
21 | 
22 |     mpns.temp = 1
23 | 
24 |     ns = run_nameserver()
25 | 
26 |     alice = run_agent('Alice')
27 |     bob = run_agent('Bob')
28 | 
29 |     addr1 = alice.bind('REP', alias='main1', handler=method_a)
30 |     addr2 = alice.bind('REP', alias='main2', handler=method_b)
31 | 
32 |     bob.connect(addr1, alias='main1')
33 |     bob.send('main1', "Some message")
34 |     reply = bob.recv('main1')
35 | 
36 |     bob.connect(addr2, alias='main2')
37 |     bob.send('main2', "Some message")
38 |     reply = bob.recv('main2')
39 |     agents = ns.agents()
40 |     print(agents)
41 |     ns.shutdown()
42 | 
43 | 


--------------------------------------------------------------------------------
/prediction/energy_generation.py:
--------------------------------------------------------------------------------
 1 | import util
 2 | import pandas as pd
 3 | from datetime import datetime
 4 | 
 5 | class EnergyGeneration:
 6 | 
 7 |     def __init__(self, path_to_file, n_solar_panel):
 8 | 
 9 |         self.n_solar_panel = n_solar_panel
10 | 
11 |         time = []
12 |         dni = [] # Values are w/m2
13 |         self.D = pd.read_csv(path_to_file, sep=',', usecols=['Year', 'Month', 'Day', 'Hour', 'Minute', 'DNI'])
14 | 
15 |         for index, row in self.D.iterrows():
16 |             ts = "{0}/{1:02d}/{2:02d} {3:02d}:{4:02d}".format(row['Year'], row['Month'], row['Day'], row['Hour'], row['Minute'])
17 |             time.append(ts)
18 |             dni.append(row['DNI'])
19 | 
20 |         d = {'Time':time, 'DNI': dni}
21 |         self.D = pd.DataFrame(data=d)
22 | 
23 |         print("Solar exposure data loaded successfully.")
24 | 
25 | 
26 |     def get_generation(self, ts):
27 |         """
28 |         Get the generation at a particular time in kWh. It is assumed that solar exposure at a particular time has been
29 |         predicted.
30 |         :param ts:
31 |         :return: kWh
32 |         """
33 |         ts_str = util.cnv_datetime_to_str(ts, '%m/%d %H:%M')
34 |         data = self.D.loc[self.D['Time'].str.contains(ts_str)]
35 | 
36 |         unit_generation = self._calculate_generation(exposure=float(data['DNI'].values[0]))
37 |         total_generation = (unit_generation * self.n_solar_panel) / 1000.0
38 |         print("TOTAL GENERATION: "+str(total_generation))
39 |         return total_generation
40 | 
41 | 
42 |     def _calculate_generation(self, exposure, max_cap = 180.0):
43 |         return max_cap * (exposure / 1000.0)


--------------------------------------------------------------------------------
/synchronizer.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import traceback
 4 | import argparse
 5 | import time
 6 | from nameserver import NameServer
 7 | from osbrain import run_nameserver, run_agent
 8 | 
 9 | pidfile = "assets/ns.pid"
10 | 
11 | 
12 | def initiate_nameserver(ns_socket_addr):
13 |     osbrain_ns = None
14 |     # If file exists then nameserver has already been started. Return a reference to the name server
15 |     if os.path.isfile(pidfile):
16 |         print("PID file already exists. Removing old pid file.")
17 |         os.unlink(pidfile)
18 | 
19 |     try :
20 |         print("Creating a new nameserver...")
21 |         pid = str(os.getpid())
22 |         osbrain_ns = run_nameserver(addr=ns_socket_addr)
23 |         open(pidfile, 'w+').write(pid)
24 | 
25 |     except Exception:
26 |         osbrain_ns.shutdown()
27 |         print(traceback.format_exc())
28 |         print("ERROR: Exception caught when creating nameserver.")
29 |         sys.exit(-1)
30 | 
31 |     return osbrain_ns
32 | 
33 | 
34 | 
35 | def start_server_job(osbrain_ns, agentname):
36 |     time.sleep(3)
37 |     ns_agent = NameServer(osbrain_ns, agentname)
38 | 
39 |     # Start the scheduled job
40 |     steve = run_agent(agentname, serializer='json')
41 |     ns_agent.schedule_job(steve)
42 | 
43 | 
44 | if __name__ == '__main__':
45 |     parser = argparse.ArgumentParser(description='Agent Module')
46 | 
47 |     parser.add_argument('--agentname', required=True, help='Name of the agent')
48 |     parser.add_argument('--nameserver', required=True, help='Socket address of the nameserver')
49 |     args = parser.parse_args()
50 | 
51 |     osbrain_ns = initiate_nameserver(args.nameserver)
52 |     start_server_job(osbrain_ns, args.agentname)


--------------------------------------------------------------------------------
/marlagent/agent/linear/lin_agent.py:
--------------------------------------------------------------------------------
 1 | import util
 2 | 
 3 | from marlagent import rlagent
 4 | 
 5 | class LinearQAgent(rlagent.RLAgent):
 6 | 
 7 |     def __init__(self):
 8 | 
 9 |         print("Linear Approximate Q learning agent instantiated...")
10 |         super(LinearQAgent, self).__init__()
11 | 
12 |         self.weights = util.Counter()
13 | 
14 | 
15 |     def get_qValue(self, state, action):
16 |         """
17 |         Should return Q(state,action) = w * featureVector
18 |         where * is the dotProduct operator
19 | 
20 |         :param state:
21 |         :param action:
22 |         :return:
23 |         """
24 |         features = self.feat_extractor.get_features(state, action)
25 | 
26 |         q_value = 0.0
27 |         for f_key in features:
28 |             q_value = q_value + (features[f_key] * self.weights[f_key])
29 | 
30 |         # print(features)
31 |         # print("Q - VALUE:::::%s"%q_value)
32 |         return q_value
33 | 
34 | 
35 |     def update(self, state, action, next_state, reward):
36 |         """
37 |         Update weights based on transition
38 | 
39 |         :param state:
40 |         :param action:
41 |         :param nextState:
42 |         :param reward:
43 |         :return:
44 |         """
45 |         # TODO
46 |         features = self.feat_extractor.get_features(state, action)
47 |         # difference = reward + (self.discount * self.compute_value_from_qValues(next_state)) - self.get_qValue(state, action)
48 |         q_value_next_state = (self.discount * self.compute_value_from_qValues(next_state))
49 |         q_value_curr_state = self.get_qValue(state, action)
50 |         d_error = reward + q_value_next_state - q_value_curr_state
51 | 
52 |         # print("DISCOUNTED Q VALUE NEXT STATE:%s"%q_value_next_state)
53 |         # print("Q VALUE CURR STATE:%s" % q_value_curr_state)
54 |         print("CORRECTION-------------:%s"%d_error)
55 |         self.write_to_file(data = d_error, path_to_file = 'assets/'+state.name+'error.csv')
56 | 
57 |         for f_key in features:
58 |             self.weights[f_key] = self.weights[f_key] + (self.alpha * d_error * features[f_key])
59 | 
60 |         # Write weights into a file to observe learning
61 |         # print("WEIGHTS---------------:")
62 |         # print(self.weights)
63 | 
64 | 
65 | 
66 |     def get_weights(self):
67 |         return self.weights


--------------------------------------------------------------------------------
/marlagent/agent_actions.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | import random
 3 | 
 4 | def update_battery_status(battery_max, battery_curr, amount):
 5 |     '''
 6 |     Update the battery status
 7 |     :param battery_max:
 8 |     :param battery_curr:
 9 |     :param amount:
10 |     :return: the new current battery status
11 |     '''
12 |     excess = 0
13 |     battery_cap_left = battery_max - battery_curr
14 |     new_batt_status = battery_curr
15 | 
16 |     if amount <= 0.0 and abs(amount) <= battery_curr:
17 |         new_batt_status += amount
18 | 
19 |     elif amount <= 0.0 and abs(amount) > battery_curr:
20 |         new_batt_status = 0.0
21 | 
22 |     elif amount > 0.0 and battery_cap_left >= amount:
23 |         new_batt_status += amount
24 | 
25 |     elif amount > 0.0 and battery_cap_left < amount:
26 |         new_batt_status += battery_cap_left
27 |         excess = amount - battery_cap_left
28 | 
29 |     print("Battery status updated: %s." % new_batt_status)
30 |     return new_batt_status, excess
31 | 
32 | 
33 | def request_ally(ns, agent, agent_name, allies, energy_amt, time):
34 | 
35 |     allies_remaining = copy.deepcopy(allies)
36 | 
37 |     while (len(allies_remaining) > 0):
38 | 
39 |         # select a random ally
40 |         ally_name =  random.choice(allies_remaining)
41 | 
42 |         ally_proxy = ns.proxy(name = ally_name, timeout=0.5)
43 |         ally_proxy_addr = ally_proxy.addr(alias=str('energy_request_'+ally_name))
44 | 
45 |         message = {
46 |             'topic': 'ENERGY_REQUEST',
47 |             'agentName':agent_name,
48 |             'time': time,
49 |             'energy': energy_amt
50 |         }
51 | 
52 |         agent.log_info("Contacting ally ({0}) for: {1}".format(ally_name, message['energy']))
53 |         resp = send_message(agent = agent, server_addr = ally_proxy_addr, alias = str('energy_request_'+ally_name), message = message)
54 | 
55 |         # If energy request is accepted
56 |         if resp['topic'] != 'ENERGY_REQUEST_DECLINE':
57 |             agent.log_info("Energy request granted by ally ({0}) : {1}".format(ally_name, resp['energy']))
58 |             return resp['energy']
59 |         else:
60 |             allies_remaining.remove(ally_name)
61 | 
62 |     return float(0.0)
63 | 
64 | 
65 | def energy_transaction(next_state):
66 | 
67 |     next_state.energy_consumption = 0.0
68 |     next_state.energy_generation = 0.0
69 |     next_state.battery_curr = 0.0
70 | 
71 |     return next_state
72 | 
73 | 
74 | def get_energy_balance(state):
75 |     return (state.energy_generation + state.battery_curr) - state.energy_consumption
76 | 
77 | 
78 | def send_message(agent, server_addr, alias,  message):
79 |     agent.connect(server=server_addr, alias=alias)
80 |     agent.send(alias, message=message)
81 |     reply = agent.recv(alias)
82 |     agent.log_info("Recieved: "+str(reply))
83 |     agent.close(alias=alias)
84 |     return reply
85 | 


--------------------------------------------------------------------------------
/cghandler/httpservice.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | import json
  3 | 
  4 | class CGHTTPHandler:
  5 | 
  6 |     def __init__(self, agent_name):
  7 |         self.agent_name = agent_name
  8 |         self._register_agent()
  9 | 
 10 | 
 11 |     def _register_agent(self):
 12 |         print("Registering Agent with Central Monitor...")
 13 | 
 14 |         url = 'http://localhost:8080/agent/register'
 15 |         data = {
 16 |             "name": self.agent_name,
 17 |             "active": True
 18 |         }
 19 | 
 20 |         response = requests.post(url=url, json=data)
 21 |         self.agent_id = json.loads(response.content.decode('utf-8'))['id']
 22 | 
 23 | 
 24 |     def update_energy_status(self, time, iter, batt_init, energy_consumption, energy_generation, borrowed_from_CG):
 25 | 
 26 |         url = 'http://localhost:8080/energy/status'
 27 | 
 28 |         data = {
 29 |             "timestamp": time,
 30 |             "agentId": self.agent_id,
 31 |             "iter": iter,
 32 |             "batteryInitial": batt_init,
 33 |             "energyConsumption": energy_consumption,
 34 |             "energyGeneration": energy_generation,
 35 |             "borrowedFromCG": borrowed_from_CG
 36 |         }
 37 |         print(data)
 38 |         response = requests.put(url=url, json=data)
 39 | 
 40 |         if response.status_code == 200:
 41 |             print("Energy status updated successfully with central grid.")
 42 |         else:
 43 |             print("ERROR: %s"%response.content.decode('utf-8'))
 44 | 
 45 | 
 46 |     def register_transaction(self, iter, time, buyer_name, amount):
 47 | 
 48 |         url = 'http://localhost:8080/energy/trasaction'
 49 | 
 50 |         data = {
 51 |             "iter": iter,
 52 |             "timestamp": time,
 53 |             "sellerId": self.agent_id,
 54 |             "buyerName": buyer_name,
 55 |             "price": 0.5,
 56 |             "amount": amount
 57 |         }
 58 | 
 59 |         response = requests.post(url=url, json=data)
 60 | 
 61 |         if response.status_code == 200:
 62 |             print("Energy transaction successfully registered with central grid.")
 63 |         else:
 64 |             print("ERROR: %s"%response.content.decode('utf-8'))
 65 | 
 66 | 
 67 |     def get_energy_status(self, iter):
 68 |         url = 'http://localhost:8080/energy/status/grid/'+str(iter)
 69 |         response = requests.get(url=url)
 70 | 
 71 |         if response.status_code == 200:
 72 |             print("Grid energy status retrieved successfully.")
 73 |             return json.loads(response.content.decode('utf-8'))
 74 |         else:
 75 |             print("ERROR: Error retrieving grid energy status. %s"%response.content)
 76 |             return None
 77 | 
 78 | 
 79 |     def log_iteration_status(self, iter, env, nzeb_status):
 80 |         url = 'http://localhost:8080/energy/log/iteration/status'
 81 | 
 82 |         data = {
 83 |             "iteration": iter,
 84 |             "agentId": self.agent_id,
 85 |             "energyGeneration": env.get_total_generated(),
 86 |             "energyConsumption": env.get_total_consumed(),
 87 |             "energyBorrowedFromAlly": env.get_energy_borrowed_from_ally(),
 88 |             "energyBorrowedFromCG": env.get_energy_borrowed_from_CG(),
 89 |             "nzebStatus": nzeb_status
 90 |         }
 91 | 
 92 |         response = requests.post(url=url, json=data)
 93 | 
 94 |         if response.status_code == 200:
 95 |             print("Iteration status successfully logged to central grid.")
 96 |         else:
 97 |             print("ERROR: %s" % response.content.decode('utf-8'))
 98 | 
 99 | 
100 | instance = False
101 | cg_http_handler = None
102 | 
103 | def get_CG_serivce_instance(agent_name):
104 | 
105 |     global instance
106 |     if not instance:
107 |         global cg_http_handler
108 |         cg_http_handler = CGHTTPHandler(agent_name)
109 |         instance = True
110 |         return cg_http_handler
111 |     else:
112 |         return cg_http_handler


--------------------------------------------------------------------------------
/feat_extractor.py:
--------------------------------------------------------------------------------
  1 | import util
  2 | import numpy as np
  3 | from state import EnvironmentState
  4 | from datetime import datetime
  5 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder
  6 | from state import AgentState
  7 | 
  8 | class FeatureExtractor:
  9 | 
 10 |     def __init__(self):
 11 |         print("Instantiating feature extractor...")
 12 |         self._train()
 13 | 
 14 | 
 15 |     def _train(self):
 16 | 
 17 |         train_x = np.zeros(shape=[365 * 48, 2])
 18 | 
 19 |         for i in range (0, (365 * 48)):
 20 |             train_x[i][0] = i%48
 21 | 
 22 |         for i in range (0, (365 * 48)):
 23 |             train_x[i][1] = i%7
 24 | 
 25 |         # for i in range (0, (365 * 48)):
 26 |         #     train_x[i][2] = i%12
 27 | 
 28 |         self.ohe_time = OneHotEncoder(sparse=False)
 29 |         self.ohe_time.fit(train_x)
 30 | 
 31 |         self.lb_actions = LabelEncoder()
 32 |         actions_trans = self.lb_actions.fit_transform(AgentState.actions)
 33 |         self.ohe_actions = OneHotEncoder(sparse=False)
 34 |         self.ohe_actions.fit(actions_trans.reshape(-1,1))
 35 | 
 36 | 
 37 |     def get_features(self, state, action):
 38 |         '''
 39 |         Compute the features from the state to extract the q-value
 40 |         :param state:
 41 |         :param action:
 42 |         :return: a list of feature values
 43 |         '''
 44 | 
 45 |         features = self.encode_state(state)
 46 | 
 47 |         # ---------------- ENCODING ACTIONS ----------------
 48 |         # Modelling energy request data
 49 |         if action['action'] == 'grant' or action['action'] == 'deny_request':
 50 |             # TODO: Discritize by observing the values of data
 51 |             features.append(int(action['data']/0.2))
 52 | 
 53 |         else:
 54 |             features.append(0)
 55 | 
 56 |         action_trans = self.ohe_actions.transform(self.lb_actions.transform([action['action']]).reshape(1,-1))
 57 |         for f in action_trans[0]:
 58 |             features.append(f)
 59 |         # ------------------------------------------------
 60 | 
 61 |         #return self.__encode_features_to_Counter(features)
 62 | 
 63 |         return features
 64 | 
 65 | 
 66 |     def encode_state(self, state):
 67 |         '''
 68 |         Encode the state variable into n features
 69 |         :param state:
 70 |         :return:
 71 |         '''
 72 | 
 73 |         time_feat = util.Counter()
 74 |         time_feat['hour'] = (state.time.time().hour * 60 + state.time.time().minute) // 30
 75 |         time_feat['dayofweek'] = state.time.weekday()  # monday = 0
 76 |         # time_feat['month'] = state.time.month - 1
 77 | 
 78 |         # Transform and avoid the dummy variable trap
 79 |         features = self.ohe_time.transform(np.array([time_feat['hour'], time_feat['dayofweek']])
 80 |                                            .reshape(1, -1))[:, :-1]
 81 | 
 82 |         features = list(features[0])
 83 | 
 84 |         features.append(self.__encode_energy(state.energy_consumption))
 85 |         features.append(self.__encode_energy(state.energy_generation))
 86 |         features.append(self.__encode_energy(state.battery_curr))
 87 | 
 88 |         return features
 89 | 
 90 | 
 91 |     def __encode_features_to_Counter(self, features):
 92 |         # Transforming into apt data structure
 93 |         feat_dict = util.Counter()
 94 |         for i in range(len(features)):
 95 |             feat_dict['f_' + str(i)] = float(features[i])
 96 | 
 97 |         # print(feat_dict)
 98 |         return feat_dict
 99 | 
100 | 
101 |     def get_n_features(self):
102 |         '''
103 |         Simulates a fake agent state and returns the numbers of features.
104 |         :param state:
105 |         :return:
106 |         '''
107 | 
108 |         environment_state = EnvironmentState(0.0, 0.0, 0.0, 0.0, 0.0, 0.0)
109 |         fake_agent_state = AgentState(name='Test', iter =0, energy_consumption=0.0, energy_generation=0.0,
110 |                                       battery_curr=float(5), time=datetime.now(),
111 |                                       environment_state=environment_state,
112 |                                       cg_http_service=None)
113 |         action = {}
114 |         action['action'] = 'consume_and_store'
115 |         features = self.get_features(fake_agent_state, action)
116 | 
117 |         return len(features)
118 | 
119 | 
120 |     def __encode_energy(self, energy):
121 |         if energy == 0.0:
122 |             return 0
123 |         elif energy < 1.0:
124 |             return 1.0
125 |         elif energy < 2.88:
126 |             return 2.0
127 |         else:
128 |             return 3
129 | 
130 | 
131 | 


--------------------------------------------------------------------------------
/nameserver.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import util
  3 | import traceback
  4 | import pandas as pd
  5 | from osbrain import run_agent
  6 | from datetime import datetime
  7 | 
  8 | class NameServer:
  9 |     def __init__(self, ns, agentname):
 10 |         print("Instantiating NameServer class...")
 11 | 
 12 |         self.ns = ns
 13 |         self.agentname = agentname
 14 |         time.sleep(5) # to let all the agents load due to startup latency
 15 | 
 16 | 
 17 |     def schedule_job(self, server_agent):
 18 |         self.d1 = self._load_data("assets/house1_consumption.csv")
 19 |         self.d2 = self._load_data("assets/house2_consumption.csv")
 20 |         self.d3 = self._load_data("assets/house3_consumption.csv")
 21 | 
 22 |         d_map = {
 23 |             "Alice": self.d1,
 24 |             "Bob": self.d2,
 25 |             "Charlie": self.d1,
 26 |             "Dave": self.d3,
 27 |         }
 28 | 
 29 |         # extracting the list of agents
 30 |         agents = self.ns.agents()
 31 |         server_agent.log_info("Registering client details...")
 32 |         agent_name_arr, agent_addr = self.extract_agents(agents)
 33 |         server_agent.log_info("Registered clients: %s"%agent_addr)
 34 | 
 35 | 
 36 |         message = {
 37 |             'topic': 'ENERGY_CONSUMPTION',
 38 |             'time': datetime.now().strftime('%Y/%m/%d %H:%M'),
 39 |             'iter': 0,
 40 |             'consumption': 0.0,
 41 |             'generation': 0.0
 42 |         }
 43 | 
 44 |         max_iter = 500
 45 | 
 46 |         for iter in range(max_iter):
 47 |             message['iter'] = iter
 48 | 
 49 |             last_message = self.dispatch_energy_data(server_agent, message, agent_name_arr, agent_addr, d_map)
 50 |             server_agent.log_info("Iteration (%s) complete!"%iter)
 51 | 
 52 |             if iter <= (max_iter-11):
 53 |                 eoi_message = {
 54 |                     'topic': 'END_OF_ITERATION',
 55 |                     'iter': iter,
 56 |                     'time': last_message['time']
 57 |                 }
 58 |             else:
 59 |                 # last iteration will warn the agents to exploit their policies completely
 60 |                 eoi_message = {
 61 |                     'topic': 'TRAINING_COMPLETE',
 62 |                     'iter': iter,
 63 |                     'time': last_message['time']
 64 |                 }
 65 | 
 66 |             time.sleep(2)
 67 |             # EOI: notify each agent to save its status at the end of each iteration
 68 |             for name in agent_name_arr:
 69 |                 self._send_message(server_agent, agent_addr[name], alias='consumption', message=eoi_message)
 70 |             time.sleep(4)
 71 | 
 72 |         # Exit Message after iterations done
 73 |         # Safe shutdown of all agents for testing
 74 |         for name in agent_name_arr:
 75 |             self._send_message(server_agent, agent_addr[name], alias='consumption', message={'topic': 'exit'})
 76 | 
 77 | 
 78 |     def _load_data(self, path_to_file):
 79 |         '''
 80 |         Import data from the specified directory
 81 |         :param path_to_file:
 82 |         :return:
 83 |         '''
 84 |         print("Loading ("+str(path_to_file)+")...")
 85 |         dateparse = lambda dates: pd.datetime.strptime(dates, '%m/%d/%Y %I:%M %p')
 86 |         D = pd.read_csv(path_to_file, sep=';', parse_dates=['Time'], date_parser=dateparse)
 87 |         D = D.set_index(D['Electricity.Timestep'])
 88 |         return D
 89 | 
 90 | 
 91 |     def dispatch_energy_data(self, server_agent, message, agent_name_arr, agent_addr, d_map):
 92 | 
 93 |         try:
 94 |             # for timestep in range(0, 1200, 30):
 95 |             for timestep in range(7200, 11490, 30):
 96 | 
 97 |                 for name in agent_name_arr:
 98 |                     d = d_map[name]
 99 |                     d_consumption = d.loc[d['Electricity.Timestep'] == timestep]
100 | 
101 |                     message['time'] = util.cnv_datetime_to_str(d_consumption['Time'].get(timestep), '%Y/%m/%d %H:%M')
102 | 
103 |                     message['consumption'] = float(d_consumption['Sum [kWh]'])
104 |                     message['generation'] = float(
105 |                         util.get_generation(d_consumption['Time'].get(timestep), message['consumption']))
106 | 
107 |                     self._send_message(server_agent, agent_addr[name], alias='consumption', message=message)
108 | 
109 |                 time.sleep(1.5)
110 | 
111 |         except Exception:
112 |             print(traceback.format_exc())
113 | 
114 |         return message
115 | 
116 | 
117 |     def extract_agents(self, agents):
118 | 
119 |         agent_name_arr = []
120 |         agent_addr = {}
121 |         for name in agents:
122 |             if name != self.agentname:
123 |                 agent_name_arr.append(name)
124 |                 agent = self.ns.proxy(name)
125 |                 agent_addr[name] = agent.addr(alias='consumption')
126 | 
127 |         return agent_name_arr, agent_addr
128 | 
129 | 
130 |     def _send_message(self, server_agent, client_addr, alias,  message):
131 | 
132 |         server_agent.connect(client_addr, alias=alias)
133 |         server_agent.send(alias, message=message)
134 |         reply = server_agent.recv(alias)
135 |         server_agent.log_info("Recieved: "+str(reply))
136 |         server_agent.close(alias=alias)
137 | 
138 | 


--------------------------------------------------------------------------------
/marlagent/agent/dqn/replay_buffer.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import numpy as np
  3 | 
  4 | class ReplayBuffer:
  5 |     """
  6 |     Replay Buffer stores the past obervations along with actions
  7 |     performed and the reward obtained after performing
  8 |     those actions.
  9 | 
 10 |     """
 11 | 
 12 |     def __init__(self, size, n_features):
 13 | 
 14 |         self.size = size
 15 |         self.n_features = n_features
 16 | 
 17 |         self.idx = 0
 18 |         self.num_in_buffer = 0
 19 | 
 20 |         self.obs = None
 21 |         self.action = None
 22 |         self.reward = None
 23 |         self.eoi = None
 24 | 
 25 | 
 26 |     def store_transition(self, state, action, reward):
 27 | 
 28 |         if self.obs is None:
 29 |             self.obs        = np.empty([self.size, self.n_features],        dtype=np.float32)
 30 |             self.action     = np.empty([self.size],                         dtype=np.str)
 31 |             self.reward     = np.empty([self.size],                         dtype=np.float32)
 32 |             self.eoi        = np.empty([self.size],                         dtype=np.float32)
 33 | 
 34 |         self.obs[self.idx] = state
 35 |         self.action[self.idx] = action['action']
 36 |         self.reward[self.idx] = reward
 37 |         self.eoi[self.idx] = 0.0
 38 | 
 39 |         # set the next idx
 40 |         # starts from 1st position and overwrites if buffer full
 41 |         self.idx = (self.idx + 1) % self.size
 42 | 
 43 |         # number of elements in the buffer.
 44 |         # if the buffer is full then the size of buffer is the number of elements present
 45 |         self.num_in_buffer = min(self.size, self.num_in_buffer + 1)
 46 | 
 47 | 
 48 |     def reset(self):
 49 |         self.idx = 0
 50 |         self.obs = np.empty([self.size, self.n_features], dtype=np.float32)
 51 |         self.action = np.empty([self.size], dtype=np.str)
 52 |         self.reward = np.empty([self.size], dtype=np.float32)
 53 |         self.eoi = np.empty([self.size], dtype=np.float32)
 54 | 
 55 | 
 56 |     def sample(self, batch_size):
 57 |         """Sample `batch_size` different transitions.
 58 |         i-th sample transition is the following:
 59 |         when observing `obs_batch[i]`, action `act_batch[i]` was taken,
 60 |         after which reward `rew_batch[i]` was received and subsequent
 61 |         observation  next_obs_batch[i] was observed, unless the epsiode
 62 |         was done which is represented by `done_mask[i]` which is equal
 63 |         to 1 if episode has ended as a result of that action.
 64 |         Parameters
 65 |         ----------
 66 |         batch_size: int
 67 |             How many transitions to sample.
 68 |         Returns
 69 |         -------
 70 |         obs_batch: np.array
 71 |             Array of shape
 72 |         act_batch: np.array
 73 |             Array of shape (batch_size,) and dtype np.int32
 74 |         rew_batch: np.array
 75 |             Array of shape (batch_size,) and dtype np.float32
 76 |         next_obs_batch: np.array
 77 | 
 78 |         """
 79 | 
 80 |         # Extract the radom indexes of batch_size from the number of elements in the buffer
 81 |         idxes = sample_n_unique(lambda: random.randint(0, self.num_in_buffer - 2), batch_size)
 82 | 
 83 |         obs = np.concatenate([[self.obs[idx]] for idx in idxes], 0)
 84 |         next_obs = np.copy(obs[1:, :])
 85 |         reward = np.array([np.array([self.reward[idx]]) for idx in idxes])
 86 |         eoi = np.array([np.array([self.eoi[idx]]) for idx in idxes])
 87 | 
 88 |         obs = obs[:-1, :]
 89 |         next_obs = next_obs
 90 |         reward = reward[:-1,:]
 91 |         eoi = eoi[:-1,:]
 92 | 
 93 |         # sample the latest observation and add it to this batch
 94 |         # Combined experience replay
 95 |         l_obs, l_next_obs, l_reward, l_eoi =  self.__get_latest_obs()
 96 |         obs = np.concatenate([obs, [l_obs]], 0)
 97 |         next_obs = np.concatenate([next_obs, [l_next_obs]], 0)
 98 |         reward = np.concatenate([reward, [[l_reward]]])
 99 |         eoi = np.concatenate([eoi, [[l_eoi]]])
100 | 
101 |         return obs, next_obs, reward, eoi
102 | 
103 | 
104 |     def __get_latest_obs(self):
105 |         '''
106 |         Fetches the last observation. Helper function
107 |         for Combined experience replay.
108 |         :return: Returns a (s,s',r) tuple
109 |         '''
110 |         prev_idx, next_idx = self.__get_last_transition_idxs()
111 | 
112 |         return self.obs[prev_idx], self.obs[next_idx], self.reward[prev_idx], self.eoi[prev_idx]
113 | 
114 | 
115 |     def update_last_transition_with_reward(self, reward):
116 |         '''
117 |         Support for EOI rewards
118 |         :return:
119 |         '''
120 |         prev_idx, next_idx = self.__get_last_transition_idxs()
121 |         self.reward[prev_idx] = reward
122 | 
123 | 
124 | 
125 |     def __get_last_transition_idxs(self):
126 |         if self.idx == 0:
127 |             prev_idx = self.size - 2
128 |             next_idx = prev_idx + 1
129 |         elif self.idx - 2 < 0:
130 |             prev_idx = self.size - 1
131 |             next_idx = 0
132 |         else:
133 |             prev_idx = self.idx - 2
134 |             next_idx = prev_idx + 1
135 | 
136 |         return prev_idx, next_idx
137 | 
138 | 
139 | def sample_n_unique(sampling_f, n):
140 |     """Helper function. Given a function `sampling_f` that returns
141 |     comparable objects, sample n such unique objects.
142 |     """
143 |     res = []
144 |     while len(res) < n:
145 |         candidate = sampling_f()
146 |         if candidate not in res:
147 |             res.append(candidate)
148 |     return res


--------------------------------------------------------------------------------
/marlagent/agent/dqn/dqn.py:
--------------------------------------------------------------------------------
  1 | from collections import namedtuple
  2 | 
  3 | import numpy as np
  4 | import torch
  5 | import torch.autograd as autograd
  6 | import torch.optim as optim
  7 | 
  8 | from marlagent import rlagent
  9 | from marlagent.agent.dqn.model import DQN
 10 | from marlagent.agent.dqn.replay_buffer import ReplayBuffer
 11 | 
 12 | USE_CUDA = torch.cuda.is_available()
 13 | dtype = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor
 14 | 
 15 | 
 16 | class Variable(autograd.Variable):
 17 |     def __init__(self, data, *args, **kwargs):
 18 |         if USE_CUDA:
 19 |             data = data.cuda()
 20 |         super(Variable, self).__init__(data, *args, **kwargs)
 21 | 
 22 | 
 23 | OptimizerSpec = namedtuple("OptimizerSpec", ["constructor", "kwargs"])
 24 | 
 25 | optimizer_spec = OptimizerSpec(
 26 |         constructor=optim.RMSprop,
 27 |         kwargs=dict(lr=0.00025, alpha=0.95, eps=0.01),
 28 |     )
 29 | 
 30 | 
 31 | class DQNAgent(rlagent.RLAgent):
 32 | 
 33 |     def __init__(self):
 34 | 
 35 |         super(DQNAgent, self).__init__()
 36 |         print("DQN initiated...")
 37 | 
 38 |         self.learning_freq = 10
 39 |         self.learning_starts = 1000
 40 |         self.target_update_freq = 50
 41 |         self.num_updates = 0
 42 |         self.num_calls = 0
 43 |         self.discount = 0.99
 44 | 
 45 |         self.n_features = self.feat_extractor.get_n_features()
 46 | 
 47 |         # Instantiating a MLP model
 48 |         self.Q = DQN(self.n_features)
 49 |         self.target_Q = DQN(self.n_features)
 50 | 
 51 |         self.replay_buffer = ReplayBuffer(size = 10000, n_features = self.n_features)
 52 | 
 53 | 
 54 |         # Construct Q network optimizer function
 55 |         self.optimizer = optimizer_spec.constructor(self.Q.parameters(), **optimizer_spec.kwargs)
 56 | 
 57 | 
 58 | 
 59 |     def get_qValue(self, state, action):
 60 | 
 61 |         features = self.feat_extractor.get_features(state, action)
 62 |         feat_arr = self.__transform_to_numpy(features)
 63 | 
 64 |         state_ts = torch.from_numpy(feat_arr).type(dtype).unsqueeze(0)
 65 |         q_values_ts = self.Q(Variable(state_ts, volatile=True)).data
 66 | 
 67 |         print("Calculated Q-Value for action ({0}): {1}".format(action['action'], q_values_ts))
 68 | 
 69 |         # Use volatile = True if variable is only used in inference mode, i.e. don’t save the history
 70 |         return q_values_ts
 71 | 
 72 | 
 73 | 
 74 |     def update(self, state, action, next_state, reward, eoi = False):
 75 | 
 76 |         if eoi == True:
 77 |             self.replay_buffer.update_last_transition_with_reward(reward)
 78 |         else:
 79 |             features = self.feat_extractor.get_features(state, action)
 80 | 
 81 |             # store the converted state in the replay buffer
 82 |             # if action['action'] != 'consume_and_store':
 83 |             self.num_calls += 1
 84 |             self.replay_buffer.store_transition(features, action, reward)
 85 | 
 86 |             # Perform the update in a batch. Apply the average error over all fields
 87 | 
 88 |             if self.num_calls > self.learning_starts and self.num_calls % self.learning_freq == 0:
 89 |                 self.perform_update(state.name, reward = 0)
 90 | 
 91 | 
 92 |     def perform_update(self, agent_name, reward):
 93 | 
 94 |         #TODO: Ignore reward from EOI handler
 95 | 
 96 |         print("Updating network...")
 97 |         obs, next_obs, r, eoi = self.replay_buffer.sample(batch_size=64)
 98 | 
 99 |         #reward = reward * np.zeros(obs.shape[0])
100 |         # r[r.shape[0] - 1] = reward
101 |         reward = r
102 | 
103 |         obs_batch = Variable(torch.from_numpy(obs).type(dtype))
104 |         reward_batch = Variable(torch.from_numpy(reward).type(dtype))
105 |         next_obs_batch = Variable(torch.from_numpy(next_obs).type(dtype))
106 |         not_eoi = Variable(torch.from_numpy(1 - eoi)).type(dtype)
107 | 
108 |         current_Q_values = self.Q(obs_batch)
109 |         target_Q_values = self.target_Q(next_obs_batch).detach()
110 |         target_Q_values = target_Q_values * not_eoi
111 | 
112 |         # print("CURR Q VALUE:", current_Q_values)
113 |         # print("TARGET Q VALUE:", target_Q_values)
114 |         # print("REWARD BATCH", reward_batch)
115 |         print("Not EOI", not_eoi)
116 | 
117 | 
118 |         q_value_curr_state = current_Q_values
119 |         q_value_next_state = reward_batch + (self.discount * target_Q_values)
120 |         # print("Q VALUE NEXT STATE:", q_value_next_state)
121 | 
122 |         # Compute Bellman error
123 |         bellman_error = q_value_next_state - q_value_curr_state
124 |         # print("BELLMAN ERROR:", bellman_error)
125 | 
126 |         # clip the bellman error between [-1 , 1]
127 |         clipped_bellman_error = bellman_error.clamp(-1, 1)
128 |         # print("Bellman Error:", clipped_bellman_error)
129 | 
130 |         d_error = clipped_bellman_error * -1.0
131 |         # print("Delta Error:", d_error.data.unsqueeze(1))
132 |         print("Delta Error:", d_error.data.mean())
133 | 
134 |         self.write_to_file(data=d_error.mean(), path_to_file='assets/' + agent_name + 'error.csv')
135 | 
136 |         # Clear previous gradients before backward pass
137 |         self.optimizer.zero_grad()
138 | 
139 |         new_q_value_curr_state = Variable(q_value_curr_state.data, requires_grad=True)
140 |         # new_q_value_curr_state.backward()
141 |         new_q_value_curr_state.backward(d_error.data)
142 | 
143 |         # Perfom the update
144 |         self.optimizer.step()
145 | 
146 |         # Clear stored values in the replay buffer
147 |         # self.replay_buffer.reset()
148 |         print("Updating network finished.")
149 | 
150 |         self.num_updates += 1
151 | 
152 |         # Periodically update the target network with the Q network
153 |         if self.num_updates % self.target_update_freq == 0:
154 |             self.target_Q.load_state_dict(self.Q.state_dict())
155 |             print("Updating target Q network finished.")
156 | 
157 | 
158 |     def __transform_to_numpy(self, features):
159 |         numpy_arr = np.array(features, dtype=np.float32)
160 |         return numpy_arr
161 | 


--------------------------------------------------------------------------------
/state.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | 
  3 | class AgentState:
  4 |     """
  5 |     Class representing the agent's state at any given moment.
  6 |     """
  7 | 
  8 |     actions = ['request_ally', 'request_grid', 'grant', 'deny_request', 'consume_and_store']
  9 | 
 10 |     def __init__(self, name, iter, energy_consumption, energy_generation, battery_curr, time, environment_state,
 11 |                  cg_http_service):
 12 |         print("registering state...")
 13 | 
 14 |         self.name = name
 15 |         self.iter = iter
 16 |         self.energy_consumption = energy_consumption
 17 |         self.energy_generation = energy_generation
 18 |         self.battery_max = 7.2
 19 |         self.battery_curr = battery_curr
 20 |         self.time = time
 21 | 
 22 |         self.environment_state = environment_state
 23 |         self.cg_http_service = cg_http_service
 24 | 
 25 | 
 26 |     def get_possible_actions(self, actions = None):
 27 |         '''
 28 |         Computes the set of all legal actions allowed in this state
 29 |         :return: array of legal actions
 30 |         '''
 31 |         possible_actions = []
 32 | 
 33 |         if actions is None:
 34 | 
 35 |             if(self.energy_generation + self.battery_curr > self.energy_consumption):
 36 |                 possible_actions.append({'action':'consume_and_store', 'data':None})
 37 |             else:
 38 |                 possible_actions.append({'action':'request_ally', 'data':None})
 39 |                 possible_actions.append({'action':'request_grid', 'data':None})
 40 | 
 41 |         else:
 42 |             # Case when only options are grant or deny
 43 |             # Simply deny the request if current battery is 0
 44 |             if self.battery_curr <= 0:
 45 |                 for action in actions:
 46 |                     if action['action'] == 'deny_request':
 47 |                         possible_actions.append(action)
 48 |             else:
 49 |                 possible_actions = actions
 50 | 
 51 |         return possible_actions
 52 | 
 53 | 
 54 |     def get_score(self):
 55 |         score = 0.0
 56 |         # if it is in the positive state
 57 |         # if (self.energy_generation + self.battery_curr) >= self.energy_consumption:
 58 |         #     score += 1
 59 |         # elif (self.energy_generation + self.battery_curr) < self.energy_consumption:
 60 |         #     score -= 10
 61 |         #
 62 |         # if there is remaining charge in the battery
 63 |         # if self.battery_curr > 0.0:
 64 |         #     score += 1.0
 65 | 
 66 |         # overall impact of the agent on the environment
 67 |         # if (self.environment_state.get_total_generated() + self.environment_state.get_energy_borrowed_from_ally()) \
 68 |         #         >= (self.environment_state.get_total_consumed() + self.environment_state.get_energy_borrowed_from_CG()):
 69 |         #     score += 1.0
 70 |         # elif (self.environment_state.get_total_generated() + self.environment_state.get_energy_borrowed_from_ally()) \
 71 |         #         < (self.environment_state.get_total_consumed() + self.environment_state.get_energy_borrowed_from_CG()):
 72 |         #     # score -= 1.0
 73 |         #     score += 0.0
 74 | 
 75 | 
 76 |         # Add global state information
 77 |         # community_status = self.cg_http_service.get_energy_status(self.iter)
 78 | 
 79 |         # diff = self.environment_state.get_energy_borrowed_from_ally() - (0.5*self.environment_state.get_energy_borrowed_from_CG())
 80 |         #
 81 |         # if(diff > 0):
 82 |         #     score += 1
 83 | 
 84 |         return score
 85 | 
 86 | 
 87 |     def reset(self, battery_init):
 88 |         self.energy_consumption = 0.0
 89 |         self.energy_generation = 0.0
 90 |         self.battery_curr = battery_init
 91 |         self.time =  datetime.strptime('2014/01/01 12:00', '%Y/%m/%d %H:%M')
 92 |         self.environment_state = EnvironmentState(0.0, 0.0, 0.0, 0.0, 0.0, 0.0)
 93 | 
 94 | 
 95 |     def __str__(self):
 96 |         str_rep = """
 97 |         Time: {0}
 98 |         Energy Generation: {1}
 99 |         Energy Consumption {2}
100 |         Battery Current: {3}
101 |         Battery Max: {4}
102 |         """.format(self.time, self.energy_generation, self.energy_consumption, self.battery_curr, self.battery_max)
103 | 
104 |         return str_rep
105 | 
106 |     def set_environment_state(self, environment_state):
107 |         self.environment_state = environment_state
108 | 
109 | 
110 | class EnvironmentState:
111 |     """
112 |     Maintains the state of the environment
113 |     """
114 | 
115 |     def __init__(self, total_consumed, total_generated, central_grid, energy_borrowed_from_ally, energy_granted_to_ally,
116 |                  net_grid_status):
117 | 
118 |         self.total_consumed = total_consumed
119 |         self.total_generated = total_generated
120 |         self.central_grid = central_grid
121 |         self.energy_borrowed_from_ally = energy_borrowed_from_ally
122 |         self.energy_granted_to_ally = energy_granted_to_ally
123 |         self.net_grid_status = net_grid_status
124 | 
125 | 
126 |     def get_total_consumed(self):
127 |         return self.total_consumed
128 | 
129 |     def update_total_consumed(self, energy):
130 |         self.total_consumed = self.total_consumed + energy
131 | 
132 |     def get_total_generated(self):
133 |         return self.total_generated
134 | 
135 |     def set_total_generated(self, energy):
136 |         self.total_generated = energy
137 | 
138 |     def update_total_generated(self, energy):
139 |         self.total_generated = self.total_generated + energy
140 | 
141 |     def get_energy_borrowed_from_CG(self):
142 |         return self.central_grid
143 | 
144 |     def update_energy_borrowed_from_CG(self, energy):
145 |         self.central_grid = self.central_grid + energy
146 | 
147 |     def get_energy_borrowed_from_ally(self):
148 |         return self.energy_borrowed_from_ally
149 | 
150 |     def update_energy_borrowed_from_ally(self, energy):
151 |         self.energy_borrowed_from_ally = self.energy_borrowed_from_ally + energy
152 | 
153 |     def update_energy_granted_to_ally(self, energy):
154 |         self.energy_granted_to_ally = self.energy_granted_to_ally + energy
155 | 
156 |     def get_energy_granted_to_ally(self):
157 |         return self.energy_granted_to_ally
158 | 
159 |     def __str__(self):
160 |         str_rep = """
161 |         Total Generated: {0}
162 |         Total Consumed: {1}
163 |         Total Borrowed From CG: {2}
164 |         Total Borrowed From Allies: {3}
165 |         Total Granted To Allies: {4}
166 |         """.format(self.total_generated, self.total_consumed, self.central_grid, self.energy_borrowed_from_ally, self.energy_granted_to_ally)
167 | 
168 |         return str_rep
169 | 


--------------------------------------------------------------------------------
/marlagent/rlagent.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import util
  3 | import random
  4 | import copy
  5 | import feat_extractor as fe
  6 | from marlagent import agent_actions
  7 | 
  8 | class RLAgent:
  9 | 
 10 |     def __init__(self, alpha=0.001, epsilon=1.0, gamma=0.9, numTraining = 10):
 11 | 
 12 |         print("RL agent instantiated...")
 13 |         self.alpha = float(alpha) # learning rate
 14 |         self.epsilon = float(epsilon) # exploration vs exploitation
 15 |         self.discount = float(gamma) # significance of future rewards
 16 |         self.numTraining = int(numTraining)
 17 | 
 18 |         self.feat_extractor = fe.FeatureExtractor()
 19 |         self.central_grid = util.Counter() # note the energy borrowed from central grid
 20 | 
 21 | 
 22 | 
 23 |     def get_qValue(self, state, action):
 24 |         pass
 25 | 
 26 | 
 27 | 
 28 |     def update(self, state, action, next_state, reward, update = False):
 29 |         pass
 30 | 
 31 | 
 32 | 
 33 |     def compute_value_from_qValues(self, state):
 34 |         """
 35 |         Compute the q_value for each action and return the max Q-value as the value of that state
 36 | 
 37 |         :param state:
 38 |         :return:
 39 |         """
 40 | 
 41 |         # No actions available
 42 |         if len(self._get_legal_actions(state)) == 0:
 43 |             return 0.0
 44 | 
 45 |         q_values_for_this_state = []
 46 |         for action in self._get_legal_actions(state):
 47 |             q_values_for_this_state.append(self.get_qValue(state, action))
 48 | 
 49 |         return max(q_values_for_this_state)
 50 | 
 51 | 
 52 | 
 53 |     def compute_action_from_qValues(self, state, actions = None):
 54 |         """
 55 |         Iterate over all the actions and compute their q-values. Then return the action with the highest q-value.
 56 | 
 57 |         :param state:
 58 |         :return:
 59 |         """
 60 |         actions = self._get_legal_actions(state, actions)
 61 | 
 62 |         if len(actions) == 0:
 63 |             print("Something wrong. Check!. Maybe all actions are done.")
 64 |             return None
 65 | 
 66 |         # Populating a new list of (action, value) pair from list of q_values
 67 |         action_value_pair = []
 68 |         for action in actions:
 69 |             action_value_pair.append((action, self.get_qValue(state, action)))
 70 | 
 71 |         # Returning the action with maximum q_value
 72 |         #TODO: if q values for multiple action value pairs is the same it picks the first one. Need to randomize this selection
 73 |         return max(action_value_pair, key=lambda x: x[1])[0]
 74 | 
 75 | 
 76 | 
 77 |     def get_action(self, state, actions = None):
 78 |         """
 79 |         Compute the action to take in the current state.
 80 |         Epsilon decides whether to exploit the current policy or choice a new action randomly.
 81 | 
 82 |         A small value for epsilon indicates lesser exploration.
 83 |         :param state:
 84 |         :return: appropriate action to take in the current state
 85 |         """
 86 |         legal_actions = self._get_legal_actions(state, actions)
 87 |         action = None
 88 | 
 89 |         if util.flip_coin(self.epsilon):
 90 |             print("Randomizing action...")
 91 |             action = random.choice(legal_actions)
 92 |         else:
 93 |             print("Selecting the best action based on policy...")
 94 |             action = self.get_policy(state, actions)
 95 | 
 96 |         return action
 97 | 
 98 | 
 99 | 
100 |     def get_policy(self, state, actions):
101 |         return self.compute_action_from_qValues(state, actions)
102 | 
103 | 
104 | 
105 |     def do_action(self, state, action, ns, agent, agent_name, allies):
106 |         '''
107 |         Perform an action and return the next state
108 |         :param state:
109 |         :param action:
110 |         :return: the next state on taking the action
111 |         '''
112 |         next_state = copy.deepcopy(state)
113 |         next_state.environment_state.update_total_consumed(state.energy_consumption)
114 |         next_state.environment_state.update_total_generated(state.energy_generation)
115 | 
116 |         usable_generated_energy =  state.energy_generation
117 | 
118 |         time_str = util.cnv_datetime_to_str(state.time, '%Y/%m/%d %H:%M')
119 |         if action['action'] == 'consume_and_store':
120 | 
121 |             diff = state.energy_generation - state.energy_consumption
122 | 
123 |             # Store the unused energy and return the excess
124 |             batt_curr, excess = agent_actions.update_battery_status(state.battery_max, state.battery_curr, diff)
125 | 
126 |             # Subtract the energy which could not be used
127 |             usable_generated_energy = usable_generated_energy - excess
128 |             next_state.environment_state.set_total_generated(next_state.environment_state.get_total_generated() - excess)
129 | 
130 |             next_state.battery_curr = batt_curr
131 |             next_state.energy_generation = 0.0
132 |             next_state.energy_consumption = 0.0
133 | 
134 | 
135 |         if action['action'] == 'request_ally':
136 |             # TODO think about what to do if ally does not serve request
137 |             diff = (state.energy_generation + state.battery_curr) - state.energy_consumption
138 |             agent.log_info("---------Energy Diff: "+str(diff))
139 |             energy_grant = 0.0
140 |             if diff < 0.0:
141 |                 energy_grant = agent_actions.request_ally(ns=ns, agent=agent, agent_name = agent_name, allies=allies, energy_amt = abs(diff), time = time_str)
142 |                 # energy_grant = abs(diff)
143 |                 next_state.energy_generation = 0.0
144 |                 next_state.battery_curr = 0.0
145 |                 next_state.environment_state.update_energy_borrowed_from_ally(energy_grant)
146 | 
147 |                 # TODO think how to handle energy consumption if
148 |                 # If energy consumption is positive in next state then penalize agent
149 |                 next_state.energy_consumption = abs(diff) - energy_grant
150 | 
151 |                 if next_state.energy_consumption > 0:
152 |                     self.central_grid[time_str] = next_state.energy_consumption
153 |                     next_state.environment_state.update_energy_borrowed_from_CG(self.central_grid[time_str])
154 |                     #next_state.energy_consumption = 0.0
155 | 
156 |             else:
157 |                 print("Ally not requested as enough energy available in battery.")
158 |                 next_state.energy_generation = 0.0
159 |                 next_state.energy_consumption = 0.0
160 |                 next_state.battery_curr = diff
161 | 
162 | 
163 |         if action['action'] == 'request_grid':
164 |             # calculate the energy difference
165 |             energy_diff = abs(agent_actions.get_energy_balance(state))
166 |             self.central_grid[time_str] = energy_diff
167 | 
168 |             next_state.energy_consumption = 0.0
169 |             next_state.energy_generation = 0.0
170 |             next_state.battery_curr = 0.0
171 |             next_state.environment_state.update_energy_borrowed_from_CG(energy_diff)
172 | 
173 | 
174 |         if action['action'] == 'grant':
175 |             energy_request = action['data']
176 |             bal = (state.energy_generation + state.battery_curr) - energy_request
177 |             energy_grant = 0.0
178 | 
179 |             if(bal >= 0):
180 |                 energy_grant = energy_request
181 |                 next_state.energy_generation = 0.0
182 |                 next_state.battery_curr, excess = agent_actions.update_battery_status(state.battery_max, state.battery_curr,
183 |                                                                               -energy_grant)
184 |                 agent.log_info("Granting full energy.")
185 | 
186 |             elif(bal < 0):
187 |                 energy_grant = (state.energy_generation + state.battery_curr)
188 |                 next_state.energy_generation = 0.0
189 |                 next_state.battery_curr = 0.0
190 |                 agent.log_info("Granting partial energy.")
191 | 
192 |             # A more complex case can be designed where it gives partial energy
193 | 
194 |             return (next_state, energy_grant)
195 | 
196 |         if action['action'] == 'deny_request':
197 |             energy_grant = 0.0
198 |             return (next_state, energy_grant)
199 | 
200 |         return (next_state, usable_generated_energy)
201 | 
202 | 
203 |     def write_to_file(self, data, path_to_file = 'assets/error.csv'):
204 | 
205 |         if os.path.isfile(path_to_file):
206 |             with open(path_to_file, mode='a') as f:
207 |                 f.write(str(data)+str("\n"))
208 |                 f.close()
209 | 
210 |         else:
211 |             with open(path_to_file, 'w+') as f:
212 |                 f.write(str(data)+str("\n"))
213 |                 f.close()
214 | 
215 | 
216 | 
217 |     def _get_legal_actions(self, agent_state, actions=None):
218 |         """
219 |         Computes the set of actions a agent should take from the set of possible actions
220 |         :param agent_state:
221 |         :param actions:
222 |         :return: legal actions the agent can take
223 |         """
224 |         possible_actions = agent_state.get_possible_actions(actions)
225 | 
226 |         # TODO some filtering of actions
227 | 
228 |         legal_actions = copy.deepcopy(possible_actions)
229 | 
230 |         return legal_actions


--------------------------------------------------------------------------------
/util.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Utility file
  3 | """
  4 | import sys
  5 | import inspect
  6 | import random
  7 | from datetime import datetime
  8 | 
  9 | 
 10 | def raiseNotDefined():
 11 |     fileName = inspect.stack()[1][1]
 12 |     line = inspect.stack()[1][2]
 13 |     method = inspect.stack()[1][3]
 14 | 
 15 |     print("*** Method not implemented: %s at line %s of %s" % (method, line, fileName))
 16 |     sys.exit(1)
 17 | 
 18 | 
 19 | def flip_coin( p ):
 20 |     r = random.random()
 21 |     return r < p
 22 | 
 23 | 
 24 | def cnv_datetime_to_str(time, format):
 25 |     # date = datetime.strptime(time, '%Y-%m-%d %H:%M')
 26 |     return time.strftime(format)
 27 | 
 28 | 
 29 | def get_generation(ts, consumption):
 30 |     generation = 0.0
 31 |     if ts.time().hour < 18 and ts.time().hour > 6:
 32 |         if flip_coin(0.7):
 33 |             generation += consumption + consumption*(0.5)
 34 |         else:
 35 |             generation = consumption - consumption*(0.1)
 36 | 
 37 |     return generation
 38 | 
 39 | 
 40 | def randomize_max(max_value, action_value_pairs):
 41 |     ''
 42 | 
 43 | 
 44 | def get_reward_for_action(action):
 45 |     action_reward_dict = {
 46 |         'request_ally' : 0,
 47 |         'request_grid': 0,
 48 |         'grant': 0.0,
 49 |         'deny_request': 0,
 50 |         'consume_and_store': 0,
 51 |     }
 52 | 
 53 |     return action_reward_dict[action]
 54 | 
 55 | 
 56 | 
 57 | def reward_transaction(state, next_state, action, net_curr_grid_status):
 58 |     reward = 0.0
 59 |     # if next_state.environment_state.get_energy_borrowed_from_ally() > state.environment_state.get_energy_borrowed_from_ally():
 60 |     #     reward += 0.5
 61 |     #
 62 |     # if next_state.environment_state.get_energy_granted_to_ally() > state.environment_state.get_energy_granted_to_ally():
 63 |     #     reward += 1
 64 | 
 65 | 
 66 |     # Local NZEB State
 67 |     # next_state_nzeb = (next_state.environment_state.get_total_generated() + next_state.environment_state.get_energy_borrowed_from_ally()) \
 68 |     #               - (next_state.environment_state.get_total_consumed() + next_state.environment_state.get_energy_borrowed_from_CG())
 69 |     #
 70 |     # curr_state_nzeb = (state.environment_state.get_total_generated() + state.environment_state.get_energy_borrowed_from_ally()) \
 71 |     #                   - (state.environment_state.get_total_consumed() + state.environment_state.get_energy_borrowed_from_CG())
 72 |     #
 73 |     # if next_state_nzeb > curr_state_nzeb:
 74 |     #     reward += 1
 75 | 
 76 | 
 77 |     # Community NZEB state
 78 | 
 79 |     # If the total grid nZEB status is better than the previous status
 80 |     # if(net_curr_grid_status > state.environment_state.net_grid_status):
 81 |     #     reward += 1
 82 | 
 83 |     #OR
 84 | 
 85 |     reward = net_curr_grid_status
 86 | 
 87 |     return round(reward,1)
 88 | 
 89 | 
 90 | 
 91 | def compare(a, b):
 92 |     if a > b:
 93 |         return 1
 94 |     elif a < b:
 95 |         return -1
 96 |     else:
 97 |         return 0
 98 | 
 99 | 
100 | def calc_net_grid_status(curr_grid_status):
101 |     net_curr_grid_status = curr_grid_status['generation'] \
102 |                            - (curr_grid_status['consumption'] - curr_grid_status['borrowedFromCG'])
103 |     return net_curr_grid_status
104 | 
105 | 
106 | class Counter(dict):
107 |     """
108 |     A counter keeps track of counts for a set of keys.
109 | 
110 |     The counter class is an extension of the standard python
111 |     dictionary type.  It is specialized to have number values
112 |     (integers or floats), and includes a handful of additional
113 |     functions to ease the task of counting data.  In particular,
114 |     all keys are defaulted to have value 0.  Using a dictionary:
115 | 
116 |     a = {}
117 |     print a['test']
118 | 
119 |     would give an error, while the Counter class analogue:
120 | 
121 |     >>> a = Counter()
122 |     >>> print a['test']
123 |     0
124 | 
125 |     returns the default 0 value. Note that to reference a key
126 |     that you know is contained in the counter,
127 |     you can still use the dictionary syntax:
128 | 
129 |     >>> a = Counter()
130 |     >>> a['test'] = 2
131 |     >>> print a['test']
132 |     2
133 | 
134 |     This is very useful for counting things without initializing their counts,
135 |     see for example:
136 | 
137 |     >>> a['blah'] += 1
138 |     >>> print a['blah']
139 |     1
140 | 
141 |     The counter also includes additional functionality useful in implementing
142 |     the classifiers for this assignment.  Two counters can be added,
143 |     subtracted or multiplied together.  See below for details.  They can
144 |     also be normalized and their total count and arg max can be extracted.
145 |     """
146 |     def __getitem__(self, idx):
147 |         self.setdefault(idx, 0)
148 |         return dict.__getitem__(self, idx)
149 | 
150 |     def incrementAll(self, keys, count):
151 |         """
152 |         Increments all elements of keys by the same count.
153 | 
154 |         >>> a = Counter()
155 |         >>> a.incrementAll(['one','two', 'three'], 1)
156 |         >>> a['one']
157 |         1
158 |         >>> a['two']
159 |         1
160 |         """
161 |         for key in keys:
162 |             self[key] += count
163 | 
164 |     def argMax(self):
165 |         """
166 |         Returns the key with the highest value.
167 |         """
168 |         if len(self.keys()) == 0: return None
169 |         all = self.items()
170 |         values = [x[1] for x in all]
171 |         maxIndex = values.index(max(values))
172 |         return all[maxIndex][0]
173 | 
174 |     def sortedKeys(self):
175 |         """
176 |         Returns a list of keys sorted by their values.  Keys
177 |         with the highest values will appear first.
178 | 
179 |         >>> a = Counter()
180 |         >>> a['first'] = -2
181 |         >>> a['second'] = 4
182 |         >>> a['third'] = 1
183 |         >>> a.sortedKeys()
184 |         ['second', 'third', 'first']
185 |         """
186 |         sortedItems = self.items()
187 |         compare = lambda x, y:  sign(y[1] - x[1])
188 |         sortedItems.sort(cmp=compare)
189 |         return [x[0] for x in sortedItems]
190 | 
191 |     def totalCount(self):
192 |         """
193 |         Returns the sum of counts for all keys.
194 |         """
195 |         return sum(self.values())
196 | 
197 |     def normalize(self):
198 |         """
199 |         Edits the counter such that the total count of all
200 |         keys sums to 1.  The ratio of counts for all keys
201 |         will remain the same. Note that normalizing an empty
202 |         Counter will result in an error.
203 |         """
204 |         total = float(self.totalCount())
205 |         if total == 0: return
206 |         for key in self.keys():
207 |             self[key] = self[key] / total
208 | 
209 |     def divideAll(self, divisor):
210 |         """
211 |         Divides all counts by divisor
212 |         """
213 |         divisor = float(divisor)
214 |         for key in self:
215 |             self[key] /= divisor
216 | 
217 |     def copy(self):
218 |         """
219 |         Returns a copy of the counter
220 |         """
221 |         return Counter(dict.copy(self))
222 | 
223 |     def __mul__(self, y ):
224 |         """
225 |         Multiplying two counters gives the dot product of their vectors where
226 |         each unique label is a vector element.
227 | 
228 |         >>> a = Counter()
229 |         >>> b = Counter()
230 |         >>> a['first'] = -2
231 |         >>> a['second'] = 4
232 |         >>> b['first'] = 3
233 |         >>> b['second'] = 5
234 |         >>> a['third'] = 1.5
235 |         >>> a['fourth'] = 2.5
236 |         >>> a * b
237 |         14
238 |         """
239 |         sum = 0
240 |         x = self
241 |         if len(x) > len(y):
242 |             x,y = y,x
243 |         for key in x:
244 |             if key not in y:
245 |                 continue
246 |             sum += x[key] * y[key]
247 |         return sum
248 | 
249 |     def __radd__(self, y):
250 |         """
251 |         Adding another counter to a counter increments the current counter
252 |         by the values stored in the second counter.
253 | 
254 |         >>> a = Counter()
255 |         >>> b = Counter()
256 |         >>> a['first'] = -2
257 |         >>> a['second'] = 4
258 |         >>> b['first'] = 3
259 |         >>> b['third'] = 1
260 |         >>> a += b
261 |         >>> a['first']
262 |         1
263 |         """
264 |         for key, value in y.items():
265 |             self[key] += value
266 | 
267 |     def __add__( self, y ):
268 |         """
269 |         Adding two counters gives a counter with the union of all keys and
270 |         counts of the second added to counts of the first.
271 | 
272 |         >>> a = Counter()
273 |         >>> b = Counter()
274 |         >>> a['first'] = -2
275 |         >>> a['second'] = 4
276 |         >>> b['first'] = 3
277 |         >>> b['third'] = 1
278 |         >>> (a + b)['first']
279 |         1
280 |         """
281 |         addend = Counter()
282 |         for key in self:
283 |             if key in y:
284 |                 addend[key] = self[key] + y[key]
285 |             else:
286 |                 addend[key] = self[key]
287 |         for key in y:
288 |             if key in self:
289 |                 continue
290 |             addend[key] = y[key]
291 |         return addend
292 | 
293 |     def __sub__( self, y ):
294 |         """
295 |         Subtracting a counter from another gives a counter with the union of all keys and
296 |         counts of the second subtracted from counts of the first.
297 | 
298 |         >>> a = Counter()
299 |         >>> b = Counter()
300 |         >>> a['first'] = -2
301 |         >>> a['second'] = 4
302 |         >>> b['first'] = 3
303 |         >>> b['third'] = 1
304 |         >>> (a - b)['first']
305 |         -5
306 |         """
307 |         addend = Counter()
308 |         for key in self:
309 |             if key in y:
310 |                 addend[key] = self[key] - y[key]
311 |             else:
312 |                 addend[key] = self[key]
313 |         for key in y:
314 |             if key in self:
315 |                 continue
316 |             addend[key] = -1 * y[key]
317 |         return addend
318 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | The main program that triggers the application
  3 | '''
  4 | 
  5 | import argparse
  6 | import os
  7 | import sys
  8 | import traceback
  9 | import time
 10 | import copy
 11 | import _thread
 12 | import util
 13 | import multiprocessing
 14 | from random import randint
 15 | from state import AgentState, EnvironmentState
 16 | from datetime import datetime
 17 | from marlagent.agent.linear.lin_agent import LinearQAgent
 18 | from marlagent.agent.dqn.dqn import DQNAgent
 19 | from osbrain import run_agent
 20 | from osbrain import run_nameserver
 21 | from osbrain import NSProxy
 22 | from nameserver import NameServer
 23 | from cghandler import httpservice
 24 | from prediction.energy_generation import EnergyGeneration
 25 | 
 26 | 
 27 | pidfile = "assets/ns.pid"
 28 | 
 29 | def exit_check(msg):
 30 |     if msg['topic'] == 'exit':
 31 |         return True
 32 | 
 33 | 
 34 | def energy_request_handler(agent, message):
 35 | 
 36 |     # Acquire the lock
 37 |     lock_count = 0
 38 |     while not multiprocessing_lock.acquire(blocking=False):
 39 |         try:
 40 |             if lock_count <= 2:
 41 |                 time.sleep(randint(1, 3) / 10)
 42 |                 lock_count += 1
 43 |             else:
 44 |                 yield {'topic': 'ENERGY_REQUEST_DECLINE'}
 45 |                 agent.log_info("Could not acquire lock! Energy request declined.")
 46 |                 return
 47 |         except:
 48 |             print(traceback.format_exc())
 49 | 
 50 | 
 51 |     agent.log_info("Lock Acquired!")
 52 | 
 53 |     try:
 54 |         print("-----------------------Start Transaction-----------------------")
 55 |         agent.log_info('Received: %s' % message)
 56 | 
 57 |         agent.log_info("Deepy copy of global state initiated...")
 58 |         l_g_agent_state = multiprocessing_ns.g_agent_state
 59 |         l_curr_state = copy.deepcopy(l_g_agent_state)
 60 | 
 61 |         # update with new values of energy consumption and generation
 62 |         l_curr_state.time = datetime.strptime(message['time'], '%Y/%m/%d %H:%M')
 63 | 
 64 |         # amount of requested energy
 65 |         energy_req = message['energy']
 66 | 
 67 |         actions = [
 68 |             {
 69 |                 'action': 'grant',
 70 |                 'data': energy_req
 71 |             },
 72 |             {
 73 |                 'action': 'deny_request',
 74 |                 'data': energy_req
 75 |             }
 76 |         ]
 77 | 
 78 |         # call get action with this new state
 79 |         l_rl_agent = multiprocessing_ns.rl_agent
 80 |         action = l_rl_agent.get_action(copy.deepcopy(l_curr_state), actions)
 81 | 
 82 |         agent.log_info('Performing action (%s).' % action)
 83 | 
 84 |         response = None
 85 | 
 86 |         # If energy request is declined
 87 |         if action['action'] ==  'deny_request':
 88 |             response = {'topic':'ENERGY_REQUEST_DECLINE'}
 89 | 
 90 |         # perform action and update global agent state
 91 |         next_state, energy_grant = l_rl_agent.do_action(l_curr_state, action, osbrain_ns, agent, args.agentname, allies)
 92 | 
 93 |         # if energy request is accepted
 94 |         if action['action'] == 'grant':
 95 |             response = {'topic': 'ENERGY_REQUEST_ACCEPTED', 'energy': energy_grant}
 96 |             agent.log_info("GRANTING:-----:%s"%energy_grant)
 97 |             next_state.environment_state.update_energy_granted_to_ally(energy_grant)
 98 |             print("BATTERY AFTER GRANTING-----:%s"%next_state.battery_curr)
 99 | 
100 |             _thread.start_new_thread(cg_http_service.register_transaction, (l_g_agent_state.iter,
101 |                                                                             message['time'], message['agentName'],
102 |                                                                             energy_grant))
103 | 
104 | 
105 |         l_rl_agent.update(state=l_curr_state, action=action, next_state=next_state, reward=0.0, eoi = False)
106 | 
107 |    
108 |         # update the global state
109 |         l_g_agent_state.energy_consumption = 0.0
110 |         l_g_agent_state.energy_generation = 0.0
111 |         l_g_agent_state.battery_curr = next_state.battery_curr
112 |         l_g_agent_state.environment_state = next_state.environment_state
113 | 
114 |         agent.log_info('Completed update operation. Resting!')
115 | 
116 |         # agent.log_info(next_state)
117 |         agent.log_info(l_g_agent_state.environment_state)
118 | 
119 |         print("-----------------------End of Transaction-----------------------\n\n\n")
120 | 
121 |         # Synchronize Objects
122 |         multiprocessing_ns.g_agent_state = l_g_agent_state
123 |         multiprocessing_ns.rl_agent = l_rl_agent
124 |         agent.log_info("Finished synchronizing objects across forked processes.")
125 | 
126 |         yield response
127 |     except Exception:
128 |         print(traceback.format_exc())
129 |         yield {'topic': 'ENERGY_REQUEST_DECLINE'}
130 | 
131 | 
132 |     finally:
133 |         # Release the lock
134 |         multiprocessing_lock.release()
135 |         agent.log_info("Lock Released!")
136 | 
137 | 
138 | def energy_consumption_handler(agent, message):
139 |     yield {'topic': 'Ok'}  # immediate reply
140 | 
141 |     # Exit check
142 |     if exit_check(message):
143 |         sys.exit(0)
144 | 
145 |     global osbrain_ns
146 | 
147 |     if message['topic'] == 'ENERGY_CONSUMPTION':
148 |         _thread.start_new_thread(invoke_agent_ec_handle, (agent, osbrain_ns, message))
149 | 
150 |     elif message['topic'] == 'END_OF_ITERATION' or message['topic'] == 'TRAINING_COMPLETE':
151 |         _thread.start_new_thread(eoi_handle, (agent, message))
152 | 
153 | 
154 | def invoke_agent_ec_handle(agent, osbrain_ns, message):
155 | 
156 |     try:
157 |         print("Trying to acquire lock!")
158 |         # Acquire the lock
159 |         multiprocessing_lock.acquire()
160 |     except Exception:
161 |         print(traceback.format_exc())
162 |         return
163 | 
164 |     print("\n-----------------------Start Transaction-----------------------")
165 |     agent.log_info('Received: %s' % message)
166 | 
167 |     try:
168 |         agent.log_info("Deepy copy of global state initiated...")
169 |         l_g_agent_state = multiprocessing_ns.g_agent_state
170 |         l_curr_state = copy.deepcopy(l_g_agent_state)
171 | 
172 |         # update with new values of energy consumption and generation
173 |         l_curr_state.time = datetime.strptime(message['time'], '%Y/%m/%d %H:%M')
174 | 
175 |         # Get energy generation
176 |         energy_generated = energy_generator.get_generation(l_curr_state.time)
177 | 
178 |         l_curr_state.energy_consumption = message['consumption']
179 |         l_curr_state.energy_generation = energy_generated
180 | 
181 | 
182 |         # call get action with this new state
183 |         l_rl_agent = multiprocessing_ns.rl_agent
184 |         action = l_rl_agent.get_action(copy.deepcopy(l_curr_state))
185 | 
186 |         agent.log_info('Performing action (%s).' % action)
187 |         # perform action and update global agent state
188 |         next_state, usable_generated_energy = l_rl_agent.do_action(l_curr_state, action, osbrain_ns, agent, args.agentname, allies)
189 | 
190 |         agent.log_info('Action complete. Registering action effect with the environment.')
191 | 
192 |         # Registering information to CG
193 |         _thread.start_new_thread(cg_http_service.update_energy_status, (message['time'],
194 |                                                                         message['iter'],
195 |                                                                         float(args.battInit),
196 |                                                                         message['consumption'],
197 |                                                                         usable_generated_energy,
198 |                                                                         next_state.environment_state.get_energy_borrowed_from_CG()
199 |                                                                         - l_curr_state.environment_state.get_energy_borrowed_from_CG()))
200 | 
201 | 
202 |         delta_reward = 0.0
203 |         # Get grid status from CG
204 |         # curr_grid_status = cg_http_service.get_energy_status(l_curr_state.iter)
205 |         # net_curr_grid_status = util.calc_net_grid_status(curr_grid_status)
206 | 
207 |         # calculate reward
208 |         # delta_reward = next_state.get_score() + util.reward_transaction(l_curr_state, next_state, action,
209 |         #                                                                 net_curr_grid_status)
210 | 
211 | 
212 |         agent.log_info('Updating agent with reward %s.' % delta_reward)
213 |         l_rl_agent.update(state=l_curr_state, action=action, next_state=next_state, reward=0.0)
214 | 
215 |         # Update grid status
216 |         # next_state.environment_state.net_grid_status = net_curr_grid_status
217 | 
218 |         # update the global state
219 |         l_g_agent_state.energy_consumption = 0.0
220 |         l_g_agent_state.energy_generation = 0.0
221 |         l_g_agent_state.battery_curr = next_state.battery_curr
222 |         l_g_agent_state.environment_state = next_state.environment_state
223 | 
224 |         # agent.log_info(next_state)
225 |         # agent.log_info(l_g_agent_state.environment_state)
226 |         agent.log_info('Completed update operation. Resting!')
227 |         print("-----------------------End of Transaction-----------------------\n\n")
228 | 
229 |         # Synchronize Objects
230 |         multiprocessing_ns.g_agent_state = l_g_agent_state
231 |         multiprocessing_ns.rl_agent = l_rl_agent
232 |         agent.log_info("Finished synchronizing objects across forked processes.")
233 | 
234 |     except Exception:
235 |         print(traceback.format_exc())
236 | 
237 |     finally:
238 |         # Release the lock
239 |         multiprocessing_lock.release()
240 |         agent.log_info("Lock Released!")
241 | 
242 | 
243 | def eoi_handle(agent, message):
244 |     '''
245 |     End of iteration handler.
246 |     :return:
247 |     '''
248 |     multiprocessing_lock.acquire()
249 |     global g_env_state
250 |     try:
251 |         print("\n\n\-----------------------Iteration (%s) Completed-----------------------\n\n"%message['iter'])
252 | 
253 |         # Fetching Reference
254 |         l_rl_agent = multiprocessing_ns.rl_agent
255 |         l_g_agent_state = multiprocessing_ns.g_agent_state
256 |         g_env_state = l_g_agent_state.environment_state
257 | 
258 | 
259 |         agent.log_info("Publishing Stats...")
260 |         agent.log_info(g_env_state)
261 | 
262 |         nzeb_status = (g_env_state.get_total_generated() + g_env_state.get_energy_borrowed_from_ally()) \
263 |                       - (g_env_state.get_total_consumed() + g_env_state.get_energy_borrowed_from_CG())
264 |         agent.log_info("NZEB Status: %s" % nzeb_status)
265 | 
266 | 
267 |         # Log EOI details to CG
268 |         cg_http_service.log_iteration_status(message['iter'], g_env_state, nzeb_status)
269 | 
270 | 
271 |         # --------------------- Updating reward ---------------------
272 |         agent.log_info('Calculating reward.')
273 | 
274 |         # Get grid status from CG
275 |         curr_grid_status = cg_http_service.get_energy_status(int(message['iter']))
276 |         net_curr_grid_status = util.calc_net_grid_status(curr_grid_status)
277 | 
278 |         # calculate reward
279 |         # delta_reward = util.compare(net_curr_grid_status, multiprocessing_ns.old_grid_status)
280 | 
281 | 
282 |         # If this grid status is better than the previous best grid status
283 |         # if util.compare(net_curr_grid_status, multiprocessing_ns.best_grid_status) > 1 :
284 |         #     multiprocessing_ns.best_grid_status = net_curr_grid_status
285 |         #     delta_reward += 3
286 | 
287 |         # delta_reward = delta_reward - abs(int(multiprocessing_ns.best_grid_status - net_curr_grid_status)) * 0.1
288 | 
289 |         # multiprocessing_ns.old_grid_status = net_curr_grid_status
290 | 
291 |         delta_reward = util.reward_transaction(state = None, next_state = None, action = None, net_curr_grid_status = net_curr_grid_status)
292 |         l_rl_agent.update(state=None, action=None, next_state=None, reward=delta_reward, eoi = True)
293 |         #---------------------------------------------------------------
294 | 
295 | 
296 |         if int(message['iter']) > 0 and int(message['iter']) % 50 == 0:
297 |             l_rl_agent.epsilon = round(l_rl_agent.epsilon * 0.8, 5)
298 |             agent.log_info("Updated Epsilon: %s"%l_rl_agent.epsilon)
299 | 
300 | 
301 |         # If training phase done then set exploration to 0
302 |         # i.e. complete exploitation
303 |         if message['topic'] == 'TRAINING_COMPLETE':
304 |             l_rl_agent.epsilon = 0.0
305 | 
306 | 
307 |         # reset the agent global state
308 |         print(".......................RESETTING GLOBAL STATE.......................")
309 |         l_g_agent_state.reset(float(args.battInit))
310 |         l_g_agent_state.iter = int(message['iter']) + 1
311 |         agent.log_info(l_g_agent_state.environment_state)
312 | 
313 | 
314 |         # Synchronize Objects
315 |         multiprocessing_ns.rl_agent = l_rl_agent
316 |         multiprocessing_ns.g_agent_state = l_g_agent_state
317 |         agent.log_info("Finished synchronizing objects across forked processes.")
318 | 
319 |     except Exception:
320 |         print(traceback.format_exc())
321 | 
322 |     finally:
323 |         # Release the lock
324 |         multiprocessing_lock.release()
325 | 
326 | def predict_energy_generation(time):
327 |     print("TBD")
328 |     return 0.0
329 | 
330 | 
331 | def get_ref_to_nameserver(ns_socket_addr):
332 |     osbrain_ns = None
333 |     print("Fetching reference to existing nameserver...")
334 |     osbrain_ns = NSProxy(nsaddr=ns_socket_addr)
335 |     return osbrain_ns
336 | 
337 | 
338 | def start_server_job(osbrain_ns):
339 |     time.sleep(2)
340 |     ns_agent = NameServer(osbrain_ns)
341 | 
342 |     # Start the scheduled job
343 |     steve = run_agent('Steve', serializer='json')
344 |     ns_agent.schedule_job(steve)
345 | 
346 | 
347 | def args_handler():
348 |     parser = argparse.ArgumentParser(description='Agent Module')
349 | 
350 |     parser.add_argument('--agentname', required=True, help='Name of the agent')
351 |     parser.add_argument('--nameserver', required=True, help='Socket address of the nameserver')
352 |     parser.add_argument('--allies', required=False, help='Socket address of the nameserver')
353 |     parser.add_argument('--battInit', required=True, help='Initial battery charge.')
354 |     parser.add_argument('--solarexposure', required=False, help='Path to solar exposure dataset')
355 |     parser.add_argument('--nSolarPanel', required=True, help='Number fo solar panel this house has')
356 | 
357 |     global args
358 |     args = parser.parse_args()
359 | 
360 |     if args.solarexposure is None:
361 |         args.solarexposure = 'assets/toronto_solar_exp_2011.csv'
362 | 
363 | 
364 | if __name__ == '__main__':
365 | 
366 |     print("Started process at ("+str(datetime.now())+")")
367 |     args_handler()
368 | 
369 |     print("Hi! I am "+args.agentname+". I am taking command of this process.")
370 | 
371 |     # Initiate name server
372 |     global osbrain_ns
373 |     osbrain_ns = get_ref_to_nameserver(args.nameserver)
374 | 
375 |     global cg_http_service
376 |     cg_http_service = httpservice.CGHTTPHandler(args.agentname)
377 | 
378 |     try:
379 |         from osbrain.logging import pyro_log
380 |         pyro_log()
381 | 
382 |         # instantiate reinforcement learning module and making it globally accessible
383 |         global multiprocessing_ns, multiprocessing_lock
384 |         manager = multiprocessing.Manager()
385 |         multiprocessing_ns = manager.Namespace()
386 |         multiprocessing_lock = manager.RLock()
387 | 
388 |         multiprocessing_ns.rl_agent = DQNAgent()
389 |         # multiprocessing_ns.rl_agent = LinearQAgent()
390 | 
391 |         global energy_generator
392 |         energy_generator = EnergyGeneration(args.solarexposure, float(args.nSolarPanel))
393 | 
394 |         # Declare a agent state and make it global
395 |         environment_state = EnvironmentState(0.0, 0.0, 0.0, 0.0, 0.0, 0.0)
396 |         # global g_agent_state
397 |         multiprocessing_ns.g_agent_state = AgentState(name = args.agentname, iter = 0, energy_consumption = 0.0, energy_generation = 0.0,
398 |                                    battery_curr = float(args.battInit), time = '2014/01/01 12:00', environment_state = environment_state,
399 |                                    cg_http_service = cg_http_service)
400 | 
401 |         multiprocessing_ns.old_grid_status = -99999
402 |         multiprocessing_ns.best_grid_status = -99999
403 | 
404 |         global allies
405 |         allies = [ally for ally in args.allies.split(",") ]
406 |         # allies = []
407 | 
408 |         # Initialize the agent
409 |         agent = run_agent(name = args.agentname, nsaddr = osbrain_ns.addr(), serializer='json', transport='tcp')
410 |         agent.bind('REP', alias=str('energy_request_'+args.agentname), handler=energy_request_handler)
411 |         agent.bind('REP', alias='consumption', handler=energy_consumption_handler)
412 | 
413 | 
414 | 
415 |     except Exception:
416 |         print(traceback.format_exc())
417 | 
418 | 
419 |     finally:
420 | 
421 |         while(1):
422 |             time.sleep(1)
423 | 
424 |         print("Bye!")
425 | 
426 | 
427 | 


--------------------------------------------------------------------------------