├── README.md ├── a3c ├── A3C_model.py ├── README.md ├── environment.py ├── main.py ├── my_optim.py ├── test.py └── train.py ├── a3c_ver1.0 ├── .DS_Store ├── 2022-03-30-22h-49m-02s.txt ├── environment.py ├── experiment.yaml ├── main.py ├── network.py ├── test.py └── train.py ├── choose_action.py ├── data ├── DataPreprocessing.py └── server_and_path_map.py ├── dqn_models.zip ├── dqn_train.py ├── environment.py ├── experiment.yaml ├── main.py ├── network.py ├── new_main.py ├── new_network.py ├── simulate_task.py └── temp_env.py /README.md: -------------------------------------------------------------------------------- 1 | # IoV-Computation-Offloading 2 | 3 | 1. A3C-based 4 | 2. Energy-Efficient and Delay-Aware 5 | 3. Partial Task Offloading 6 | -------------------------------------------------------------------------------- /a3c/A3C_model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | def normalized_columns_initializer(weights, std=1.0): 7 | out = torch.randn(weights.size()) 8 | out *= std / torch.sqrt(out.pow(2).sum(1, keepdim=True)) 9 | return out 10 | 11 | 12 | def weights_init(m): 13 | classname = m.__class__.__name__ 14 | if classname.find('Conv') != -1: 15 | weight_shape = list(m.weight.data.size()) 16 | fan_in = np.prod(weight_shape[1:4]) 17 | fan_out = np.prod(weight_shape[2:4]) * weight_shape[0] 18 | w_bound = np.sqrt(6. / (fan_in + fan_out)) 19 | m.weight.data.uniform_(-w_bound, w_bound) 20 | m.bias.data.fill_(0) 21 | elif classname.find('Linear') != -1: 22 | weight_shape = list(m.weight.data.size()) 23 | fan_in = weight_shape[1] 24 | fan_out = weight_shape[0] 25 | w_bound = np.sqrt(6. / (fan_in + fan_out)) 26 | m.weight.data.uniform_(-w_bound, w_bound) 27 | m.bias.data.fill_(0) 28 | 29 | 30 | class ActorCritic(torch.nn.Module): 31 | def __init__(self, num_inputs,num_outputs): 32 | super(ActorCritic, self).__init__() 33 | self.fcn1 = nn.Linear(num_inputs, 64) 34 | self.fcn2 = nn.Linear(64, 32) 35 | self.fcn3 = nn.Linear(32, 16) 36 | 37 | self.lstm = nn.LSTMCell(16, 128) 38 | 39 | self.critic_linear = nn.Linear(128, 1) 40 | self.actor_linear = nn.Linear(128, num_outputs) 41 | self.assign_linear=nn.Sequential( 42 | nn.Linear(128, 1), 43 | nn.Sigmoid() 44 | ) 45 | self.apply(weights_init) 46 | self.actor_linear.weight.data = normalized_columns_initializer( 47 | self.actor_linear.weight.data, 0.01) 48 | self.actor_linear.bias.data.fill_(0) 49 | self.critic_linear.weight.data = normalized_columns_initializer( 50 | self.critic_linear.weight.data, 1.0) 51 | self.critic_linear.bias.data.fill_(0) 52 | 53 | self.lstm.bias_ih.data.fill_(0) 54 | self.lstm.bias_hh.data.fill_(0) 55 | 56 | self.train() 57 | 58 | def forward(self, inputs): 59 | inputs, (hx, cx) = inputs 60 | x = F.elu(self.fcn1(inputs)) 61 | x = F.elu(self.fcn2(x)) 62 | x = F.elu(self.fcn3(x)) 63 | 64 | x = x.view(-1, 16) 65 | hx, cx = self.lstm(x, (hx, cx)) 66 | x = hx 67 | 68 | return self.critic_linear(x), self.actor_linear(x), self.assign_linear(x), (hx, cx) 69 | -------------------------------------------------------------------------------- /a3c/README.md: -------------------------------------------------------------------------------- 1 | 이폴더에 simulate_task를 사용해서 test용 train용을 만들어주세요. 2 | train은 simulated_task.csv, test는 simulated_task_test.csv로 이름지어주세요. 3 | 4 | 5 | test.csv , train.csv파일도 만들어주세요 준비끝 6 | 7 | python main.py --help로 설명한번 읽고시작하세요. 8 | -------------------------------------------------------------------------------- /a3c/environment.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import math 4 | import torch 5 | 6 | # Parameter 7 | # Distance = ~10000m 8 | # 9 | VEH_COMP_RESOURCE = 50 #(MHz) 10 | VEH_TRAN_POWER = 1000 #scaling #0.1(W) 11 | VEC_COMP_RESOURCE = 6300 #(MHz) 12 | VEC_POWER = 0.007 #(W) 13 | BANDWIDTH = 5 #(MHz) 14 | PATH_FADE = 1.75 #scaling #3.75 15 | KAPPA = 10 ** -6 #원래 10^-11~10^-27 (결과에 따라 scaling할것) 16 | 17 | class Vehicle: 18 | def __init__(self, id, distance, velocity): 19 | self.id = id 20 | self.distance = distance 21 | self.v = velocity 22 | self.comp = np.random.normal(VEH_COMP_RESOURCE, 3) 23 | self.tran = np.random.normal(VEH_TRAN_POWER, 10) 24 | 25 | class Task: 26 | def __init__(self, vehicle, threshold, input, comp, e_weight): 27 | self.vehicle = vehicle 28 | self.threshold = threshold 29 | self.input = input 30 | self.comp = comp 31 | self.e_weight = e_weight 32 | 33 | class Server: 34 | def __init__(self, id): 35 | self.id = id 36 | self.comp = np.random.normal(VEC_COMP_RESOURCE, 70) 37 | self.power = np.random.normal(VEC_POWER, 0.002) 38 | self.crowd = 1 #init as 1 (N_j) #몇개의 일 처리중인지 39 | 40 | class Env: 41 | def __init__(self, nv, ns, load_vehicle_position, load_task_position): # num_vehicle,num_server,vehicle.csv,task.csv 42 | print("set environment") 43 | self.num_vehicle = nv 44 | self.vehicles = [] 45 | self.num_server = ns 46 | self.servers = [] 47 | self.tasks = [] 48 | self.actions=[] 49 | self.update = 1 50 | 51 | # .csv파일에서 vehicle 불러오기 52 | self.vehicle_data = pd.read_csv(load_vehicle_position) 53 | self.vehicle_data.set_index("TIMESTAMP", inplace=True) #TIMESTAMP를 index로 설정 loc.[x]하면 x타임스탬프 다나옴 54 | self.update_vehicle() 55 | 56 | # .csv파일에서 task 불러오기 57 | self.task_data = pd.read_csv(load_task_position) 58 | self.task_data.set_index("Timestamp", inplace=True) 59 | self.update_task() 60 | 61 | # server 불러오기 62 | for s in range(self.num_server): 63 | self.servers.append(Server(id=s+1)) 64 | 65 | def get_actions(self,act): 66 | self.actions=act 67 | for i in act: 68 | self.servers[int(i[1])-1].crowd+=1 69 | 70 | def update_vehicle(self): # 71 | sub_data = self.vehicle_data.loc[self.update] #update의 TIMESTAMP 차량 다불러옴 72 | sub_list = sub_data.values # list형식으로 값 다 받아옴 73 | for d in sub_list: 74 | if self.update == 1: 75 | distance_vector = [] 76 | for i in range(self.num_server): 77 | distance_vector.append(d[2+i]) 78 | self.vehicles.append(Vehicle(id=d[0], velocity=d[1], distance=distance_vector)) #vehecle들을 self.vehicles에 저장함 79 | else: 80 | for v in self.vehicles: 81 | if d[0] != v.id: 82 | continue 83 | else: 84 | distance_vector = [] 85 | for i in range(self.num_server): 86 | distance_vector.append(d[2+i]) 87 | v.distance = distance_vector 88 | v.v = d[1] #거리와 속도 업데이트 89 | 90 | def update_task(self): #vehicles와 같은방식 업데이트 91 | sub_data = self.task_data.loc[self.update] 92 | sub_list = sub_data.values 93 | self.tasks = [] 94 | for d in sub_list: 95 | self.tasks.append(Task(vehicle=d[0], threshold=d[1], input=d[2], comp=d[3], e_weight=d[4])) 96 | self.update += 1 97 | 98 | 99 | def construct_state(self): # input vector 생성 100 | """ 101 | Constructs the state to be exploited by the algorithms. 102 | Returns state vector as an input to the RL model calculated for each vehicle. 103 | * Prerequisite: update_vehicle(), update_task() 104 | """ 105 | 106 | state_vector = [] 107 | 108 | for v in range(self.num_vehicle): 109 | # 논문 순서따름: threshold, velocity, x_i, y_i, distance, N_j 110 | # (논문 수정하기: GPS point --> distance btwn vehicles and servers) 111 | # (논문 수정하기: 1*26 1-dim. vector) 112 | state_vector_by_vehicle = [] 113 | 114 | local_time, local_energy = self.get_local_computing(v+1) 115 | state_vector_by_vehicle.append(local_time) 116 | state_vector_by_vehicle.append(local_energy) 117 | for s in range(self.num_server): 118 | remote_time, remote_energy = self.get_remote_computing(v+1, s+1) 119 | state_vector_by_vehicle.append(remote_time) 120 | state_vector_by_vehicle.append(remote_energy) 121 | 122 | state_vector.append(state_vector_by_vehicle) 123 | return np.array(state_vector) # (# of vehicle, 1*26) dim 124 | 125 | def get_max_tolerance(self, v, s): # Eq 1,2 # ID starts from 1 126 | #todo: .csv speed error --> stay_time~N(5,1) 127 | stay_time = 2 * self.vehicles[v-1].distance[s-1] / self.vehicles[v-1].v 128 | return min(stay_time, self.tasks[v-1].threshold) 129 | 130 | def get_transmission_rate(self, v, s): 131 | shared_bandwidth = BANDWIDTH / self.servers[s-1].crowd 132 | log = self.vehicles[v-1].tran * ((self.vehicles[v-1].distance[s-1] / 1000) ** (-PATH_FADE)) 133 | log /= self.servers[s-1].crowd 134 | return shared_bandwidth * math.log2(log+1) 135 | 136 | def get_local_computing(self, v): 137 | time = self.tasks[v-1].comp / self.vehicles[v].comp 138 | energy = KAPPA * (self.vehicles[v].comp ** 2) * self.tasks[v-1].comp 139 | return time, energy 140 | 141 | def get_remote_computing(self, v, s): 142 | trans = self.tasks[v-1].input / self.get_transmission_rate(v,s) 143 | comp = self.tasks[v-1].comp / (self.servers[s-1].comp / self.servers[s-1].crowd) 144 | time = trans + comp 145 | energy = self.vehicles[v-1].tran * (10 ** -4) * trans + self.servers[s-1].power * comp # ~0.01 146 | return time, energy 147 | 148 | def calculate_reward(self, vehicle, action): # 논문 수정하기 / 수식 이상함 149 | """ 150 | Calculates the reward based on the action of the vehicle. 151 | """ 152 | reward = self.get_max_tolerance(vehicle, int(action[1])) 153 | local_time, local_energy = self.get_local_computing(vehicle) 154 | remote_time, remote_energy = self.get_remote_computing(vehicle, int(action[1])) 155 | time = (1-self.tasks[vehicle].e_weight) * (action[0] * local_time + (1-action[0]) * remote_time) 156 | energy = self.tasks[vehicle].e_weight * (action[0] * local_energy + (1-action[0]) * remote_energy) 157 | return reward - time - energy 158 | 159 | def train_step(self,action): 160 | """ 161 | Step function of the environment. 162 | Calculates the rewards based on the action taken by the vehicles. 163 | :return: 164 | rewards: concatenated reward of each vehicle for the taken actions 165 | """ 166 | rews = np.zeros(self.num_vehicle) 167 | for i in self.servers: 168 | i.crowd=1 169 | self.get_actions(action) # 논문 수정하기: action = [float, int] (vehicle, #server) 170 | for v in range(self.num_vehicle): 171 | rews[v] = self.calculate_reward(v, self.actions[v-1]) 172 | self.update_vehicle() 173 | self.update_task() 174 | state = self.construct_state() 175 | return rews,torch.from_numpy(state).float() 176 | 177 | def reset(self): 178 | self.update=1 179 | self.update_vehicle() 180 | self.update_task() 181 | state = self.construct_state() 182 | return state -------------------------------------------------------------------------------- /a3c/main.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import argparse 4 | import os 5 | 6 | import torch 7 | import torch.multiprocessing as mp 8 | 9 | import my_optim 10 | import environment as envs 11 | from A3C_model import ActorCritic 12 | from test import test 13 | from train import train 14 | 15 | # Based on 16 | # https://github.com/pytorch/examples/tree/master/mnist_hogwild 17 | # Training settings 18 | 19 | 20 | parser = argparse.ArgumentParser(description='A3C') 21 | parser.add_argument('--lr', type=float, default=0.0001, 22 | help='learning rate (default: 0.0001)') 23 | parser.add_argument('--gamma', type=float, default=0.99, 24 | help='discount factor for rewards (default: 0.99)') 25 | parser.add_argument('--gae-lambda', type=float, default=1.00, 26 | help='lambda parameter for GAE (default: 1.00)') 27 | parser.add_argument('--entropy-coef', type=float, default=0.01, 28 | help='entropy term coefficient (default: 0.01)') 29 | parser.add_argument('--value-loss-coef', type=float, default=0.5, 30 | help='value loss coefficient (default: 0.5)') 31 | parser.add_argument('--max-grad-norm', type=float, default=50, 32 | help='value loss coefficient (default: 50)') 33 | parser.add_argument('--seed', type=int, default=1, 34 | help='random seed (default: 1)') 35 | parser.add_argument('--num-processes', type=int, default=16, 36 | help='how many training processes to use (default: 16)') 37 | parser.add_argument('--num-steps', type=int, default=64, 38 | help='number of forward steps in A3C (default: 64)') 39 | parser.add_argument('--max-episode-length', type=int, default=1000000, 40 | help='maximum length of an episode (default: 1000000)') 41 | parser.add_argument('--env-name', default='offloading', 42 | help='environment to train on (default: offloading)') 43 | parser.add_argument('--no-shared', default=False, 44 | help='use an optimizer without shared momentum.') 45 | parser.add_argument('--nv',default=100, 46 | help='number of vehicles. (default: 100)') 47 | parser.add_argument('--ns',default=12, 48 | help='number of servers. (default: 12)') 49 | parser.add_argument('--load_vehicle_position',default='./train.csv', 50 | help='number of vehicles. (default: ./train.csv)') 51 | parser.add_argument('--load_task_position',default='./simulated_tasks.csv', 52 | help='number of vehicles. (default: ./simulated_tasks.csv)') 53 | parser.add_argument('--load_vehicle_position_test',default='./test.csv', 54 | help='number of vehicles. (default: ./test.csv)') 55 | parser.add_argument('--load_task_position_test',default='./simulated_tasks_test.csv', 56 | help='number of vehicles. (default: ./simulated_tasks_test.csv)') 57 | parser.add_argument('--train_step',default=22359, 58 | help='number of servers. (default: 22359)') 59 | parser.add_argument('--test_step',default=11939, 60 | help='number of servers. (default: 11939)') 61 | 62 | 63 | 64 | if __name__ == '__main__': 65 | 66 | args = parser.parse_args() 67 | 68 | torch.manual_seed(args.seed) 69 | shared_model = ActorCritic( 70 | 26, args.ns) 71 | shared_model.share_memory() 72 | 73 | if args.no_shared: 74 | optimizer = None 75 | else: 76 | optimizer = my_optim.SharedAdam(shared_model.parameters(), lr=args.lr) 77 | optimizer.share_memory() 78 | 79 | processes = [] 80 | 81 | counter = mp.Value('i', 0) 82 | lock = mp.Lock() 83 | 84 | p = mp.Process(target=test, args=(args.num_processes, args, shared_model, counter)) 85 | p.start() 86 | processes.append(p) 87 | 88 | for rank in range(0, args.num_processes): 89 | p = mp.Process(target=train, args=(rank, args, shared_model, counter, lock, optimizer)) 90 | p.start() 91 | processes.append(p) 92 | for p in processes: 93 | p.join() 94 | -------------------------------------------------------------------------------- /a3c/my_optim.py: -------------------------------------------------------------------------------- 1 | 2 | import math 3 | 4 | import torch 5 | import torch.optim as optim 6 | 7 | 8 | class SharedAdam(optim.Adam): 9 | """Implements Adam algorithm with shared states. 10 | """ 11 | 12 | def __init__(self, 13 | params, 14 | lr=1e-3, 15 | betas=(0.9, 0.999), 16 | eps=1e-8, 17 | weight_decay=0): 18 | super(SharedAdam, self).__init__(params, lr, betas, eps, weight_decay) 19 | 20 | for group in self.param_groups: 21 | for p in group['params']: 22 | state = self.state[p] 23 | state['step'] = torch.zeros(1) 24 | state['exp_avg'] = p.data.new().resize_as_(p.data).zero_() 25 | state['exp_avg_sq'] = p.data.new().resize_as_(p.data).zero_() 26 | 27 | def share_memory(self): 28 | for group in self.param_groups: 29 | for p in group['params']: 30 | state = self.state[p] 31 | state['step'].share_memory_() 32 | state['exp_avg'].share_memory_() 33 | state['exp_avg_sq'].share_memory_() 34 | 35 | def step(self, closure=None): 36 | """Performs a single optimization step. 37 | Arguments: 38 | closure (callable, optional): A closure that reevaluates the model 39 | and returns the loss. 40 | """ 41 | loss = None 42 | if closure is not None: 43 | loss = closure() 44 | 45 | for group in self.param_groups: 46 | for p in group['params']: 47 | if p.grad is None: 48 | continue 49 | grad = p.grad.data 50 | state = self.state[p] 51 | 52 | exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] 53 | beta1, beta2 = group['betas'] 54 | 55 | state['step'] += 1 56 | 57 | if group['weight_decay'] != 0: 58 | grad = grad.add(group['weight_decay'], p.data) 59 | 60 | # Decay the first and second moment running average coefficient 61 | exp_avg.mul_(beta1).add_(1 - beta1, grad) 62 | exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) 63 | 64 | denom = exp_avg_sq.sqrt().add_(group['eps']) 65 | 66 | bias_correction1 = 1 - beta1 ** state['step'].item() 67 | bias_correction2 = 1 - beta2 ** state['step'].item() 68 | step_size = group['lr'] * math.sqrt( 69 | bias_correction2) / bias_correction1 70 | 71 | p.data.addcdiv_(-step_size, exp_avg, denom) 72 | 73 | return loss -------------------------------------------------------------------------------- /a3c/test.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import torch 4 | import torch.nn.functional as F 5 | import torch.optim as optim 6 | import numpy as np 7 | 8 | import environment as envs 9 | from A3C_model import ActorCritic 10 | 11 | def test(rank, args, shared_model, counter): 12 | torch.manual_seed(args.seed + rank) 13 | 14 | env = envs.Env(args.nv,args.ns,args.load_vehicle_position_test,args.load_task_position_test) 15 | 16 | model = ActorCritic(26,args.ns) 17 | 18 | model.eval() 19 | 20 | state = env.reset() 21 | state = torch.from_numpy(state).float() 22 | reward_sum = 0 23 | done = True 24 | 25 | start_time = time.time() 26 | 27 | episode_length = 0 28 | while True: 29 | episode_length += 1 30 | # Sync with the shared model 31 | if done: 32 | model.load_state_dict(shared_model.state_dict()) 33 | cx = torch.zeros(args.nv, 128) 34 | hx = torch.zeros(args.nv, 128) 35 | else: 36 | cx = cx.detach() 37 | hx = hx.detach() 38 | 39 | with torch.no_grad(): 40 | value, logit,assign_rate, (hx, cx) = model((state,(hx, cx))) 41 | prob = F.softmax(logit, dim=-1) 42 | 43 | action = (prob.multinomial(num_samples=1).detach()+1).numpy() 44 | assign_rate=assign_rate.numpy() 45 | action = np.hstack((assign_rate,action)).tolist() 46 | reward,state=env.train_step(action) 47 | reward_sum+=np.sum(reward) 48 | if (episode_length==args.test_step): 49 | done==True 50 | 51 | 52 | 53 | if done: 54 | print("Time {}, num steps {},".format( 55 | time.strftime("%Hh %Mm %Ss", 56 | time.gmtime(time.time() - start_time)), 57 | counter.value,reward_sum)) 58 | reward_sum = 0 59 | episode_length = 0 60 | state = env.reset() 61 | state = torch.from_numpy(state).float() 62 | time.sleep(60) 63 | 64 | 65 | -------------------------------------------------------------------------------- /a3c/train.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | import torch.optim as optim 4 | import numpy as np 5 | import environment as envs 6 | from A3C_model import ActorCritic 7 | 8 | 9 | def ensure_shared_grads(model, shared_model): 10 | for param, shared_param in zip(model.parameters(), 11 | shared_model.parameters()): 12 | if shared_param.grad is not None: 13 | return 14 | shared_param._grad = param.grad 15 | 16 | 17 | def train(rank, args, shared_model, counter, lock, optimizer=None): 18 | torch.manual_seed(args.seed + rank) 19 | env = envs.Env(args.nv,args.ns,args.load_vehicle_position,args.load_task_position) 20 | 21 | model = ActorCritic(26,args.ns) 22 | 23 | if optimizer is None: 24 | optimizer = optim.Adam(shared_model.parameters(), lr=args.lr) 25 | 26 | model.train() 27 | 28 | 29 | state = env.reset() 30 | state = torch.from_numpy(state).float() 31 | done = True 32 | 33 | episode_length = 0 34 | while True: 35 | # Sync with the shared model 36 | model.load_state_dict(shared_model.state_dict()) 37 | if done: 38 | cx = torch.zeros(args.nv, 128) 39 | hx = torch.zeros(args.nv, 128) 40 | else: 41 | cx = cx.detach() 42 | hx = hx.detach() 43 | 44 | 45 | values = [] 46 | log_probs = [] 47 | rewards = [] 48 | entropies = [] 49 | 50 | 51 | for step in range(args.num_steps): 52 | sum_reward=0 53 | 54 | episode_length += 1 55 | value, logit,assign_rate, (hx, cx) = model((state,(hx, cx))) 56 | 57 | prob = F.softmax(logit, dim=-1) 58 | log_prob = F.log_softmax(logit, dim=-1) 59 | entropy = -(log_prob * prob).sum(1, keepdim=True) 60 | entropies.append(entropy) 61 | 62 | action = (prob.multinomial(num_samples=1).detach()+1).numpy() 63 | assign_rate=assign_rate.detach().numpy() 64 | action = np.hstack((assign_rate,action)).tolist() 65 | log_prob = log_prob.gather(1, prob.multinomial(num_samples=1).detach()) 66 | reward,state=env.train_step(action) 67 | sum_reward=np.sum(reward) 68 | 69 | # reward = max(min(reward, 1), -1) 70 | 71 | with lock: 72 | counter.value += 1 73 | 74 | if episode_length==args.train_step: 75 | episode_length = 0 76 | state = env.reset() 77 | done=True 78 | 79 | values.append(value) 80 | log_probs.append(log_prob) 81 | rewards.append(sum_reward) 82 | 83 | if done: 84 | break 85 | 86 | R = torch.zeros(args.nv, 1) 87 | if not done: 88 | value, _, _, _ = model((state, (hx, cx))) 89 | R = value.detach() 90 | 91 | values.append(R) 92 | policy_loss = 0 93 | value_loss = 0 94 | gae = torch.zeros(1, 1) 95 | for i in reversed(range(len(rewards))): 96 | R = args.gamma * R + rewards[i] 97 | advantage = R - values[i] 98 | value_loss = value_loss + 0.5 * advantage.pow(2) 99 | 100 | # Generalized Advantage Estimation 101 | delta_t = rewards[i] + args.gamma * \ 102 | values[i + 1] - values[i] 103 | gae = gae * args.gamma * args.gae_lambda + delta_t 104 | 105 | policy_loss = policy_loss - \ 106 | log_probs[i] * gae.detach() - args.entropy_coef * entropies[i] 107 | 108 | optimizer.zero_grad() 109 | 110 | (torch.mean(policy_loss) + args.value_loss_coef * torch.mean(value_loss)).backward() 111 | torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) 112 | 113 | ensure_shared_grads(model, shared_model) 114 | optimizer.step() 115 | 116 | done=False 117 | -------------------------------------------------------------------------------- /a3c_ver1.0/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jungyeonkoh/IoV-Computation-Offloading/f163ee8d5406552faf452a7d4269b43ee3d60ef6/a3c_ver1.0/.DS_Store -------------------------------------------------------------------------------- /a3c_ver1.0/2022-03-30-22h-49m-02s.txt: -------------------------------------------------------------------------------- 1 | 2022-03-30-22h-49m-02s에 시작한 훈련의 log입니다. 2 | Time: 00h 01m 17s, num steps: 226, rewards: -1.7777699907092954 3 | Time: 00h 02m 42s, num steps: 490, rewards: -0.7903762562148832 4 | Time: 00h 04m 14s, num steps: 777, rewards: 0.06636552969263622 5 | Time: 00h 05m 54s, num steps: 1085, rewards: 0.5300425730475546 6 | Time: 00h 07m 40s, num steps: 1418, rewards: 0.8772981063779822 7 | Time: 00h 09m 35s, num steps: 1775, rewards: 1.085076767581779 8 | Time: 00h 11m 36s, num steps: 2146, rewards: 1.3427431037246023 9 | Time: 00h 13m 46s, num steps: 2526, rewards: 1.7388297157114512 10 | Time: 00h 16m 03s, num steps: 2931, rewards: 2.2756265191390512 11 | Time: 00h 18m 28s, num steps: 3358, rewards: 2.358653656356279 12 | Time: 00h 21m 00s, num steps: 3809, rewards: 2.5300825312254824 13 | Time: 00h 23m 39s, num steps: 4267, rewards: 2.602937590664176 14 | Time: 00h 26m 28s, num steps: 4737, rewards: 2.6746091335892546 15 | Time: 00h 29m 23s, num steps: 5230, rewards: 2.7101072289064554 16 | Time: 00h 32m 25s, num steps: 5743, rewards: 2.899284656273266 17 | Time: 00h 35m 36s, num steps: 6264, rewards: 2.8955213617765736 18 | Time: 00h 38m 54s, num steps: 6795, rewards: 2.886341447674358 19 | Time: 00h 42m 20s, num steps: 7347, rewards: 2.971908149790779 20 | Time: 00h 45m 55s, num steps: 7922, rewards: 3.2543003711707073 21 | Time: 00h 49m 38s, num steps: 8491, rewards: 3.0407692941879714 22 | Time: 00h 53m 32s, num steps: 9075, rewards: 3.071349286619688 23 | Time: 00h 57m 34s, num steps: 9682, rewards: 3.2282546486513093 24 | -------------------------------------------------------------------------------- /a3c_ver1.0/environment.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import math 4 | 5 | # Parameter 6 | # Distance = ~10000m 7 | # 8 | VEH_COMP_RESOURCE = 50 #(MHz) 9 | VEH_TRAN_POWER = 1000 #scaling #0.1(W) 10 | VEC_COMP_RESOURCE = 6300 #(MHz) 11 | VEC_POWER = 0.007 #(W) 12 | BANDWIDTH = 5 #(MHz) 13 | PATH_FADE = 1.75 #scaling #3.75 14 | KAPPA = 10 ** -6 #원래 10^-11~10^-27 (결과에 따라 scaling할것) 15 | 16 | class Vehicle: 17 | def __init__(self, id, distance, velocity): 18 | self.id = id 19 | self.distance = distance 20 | self.v = velocity 21 | self.comp = np.random.normal(VEH_COMP_RESOURCE, 3) 22 | self.tran = np.random.normal(VEH_TRAN_POWER, 10) 23 | 24 | class Task: 25 | def __init__(self, vehicle, threshold, input, comp, e_weight): 26 | self.vehicle = vehicle 27 | self.threshold = threshold 28 | self.input = input 29 | self.comp = comp 30 | self.e_weight = e_weight 31 | 32 | class Server: 33 | def __init__(self, id): 34 | self.id = id 35 | self.comp = np.random.normal(VEC_COMP_RESOURCE, 70) 36 | self.power = np.random.normal(VEC_POWER, 0.002) 37 | self.crowd = 1 #init as 1 (N_j) 38 | 39 | class Env: 40 | def __init__(self, nv, ns, vehicle, vehicle_test, task, task_test, train): 41 | self.num_vehicle = nv 42 | self.vehicles = [] 43 | self.num_server = ns 44 | self.servers = [] 45 | self.tasks = [] 46 | 47 | self.update = 1 48 | 49 | if train: 50 | self.vehicle_data = pd.read_csv(vehicle) 51 | self.task_data = pd.read_csv(task) 52 | else: 53 | self.vehicle_data = pd.read_csv(vehicle_test) 54 | self.task_data = pd.read_csv(task_test) 55 | 56 | # .csv파일에서 vehicle 불러오기 57 | self.vehicle_data.set_index("TIMESTAMP", inplace=True) 58 | # .csv파일에서 task 불러오기 59 | self.task_data.set_index("Timestamp", inplace=True) 60 | 61 | # server 불러오기 62 | for s in range(self.num_server): 63 | self.servers.append(Server(id=s+1)) 64 | 65 | def update_vehicle(self): 66 | sub_data = self.vehicle_data.loc[self.update] 67 | sub_list = sub_data.values 68 | for d in sub_list: 69 | if self.update == 1: 70 | distance_vector = [] 71 | for i in range(self.num_server): 72 | distance_vector.append(d[2+i]) 73 | self.vehicles.append(Vehicle(id=d[0], velocity=d[1], distance=distance_vector)) 74 | else: 75 | for v in self.vehicles: 76 | if d[0] != v.id: 77 | continue 78 | else: 79 | distance_vector = [] 80 | for i in range(self.num_server): 81 | distance_vector.append(d[2+i]) 82 | v.distance = distance_vector 83 | v.v = d[1] 84 | 85 | def update_task(self): 86 | sub_data = self.task_data.loc[self.update] 87 | sub_list = sub_data.values 88 | self.tasks = [] 89 | 90 | # for single vehicle 91 | #self.tasks.append(Task(vehicle=sub_list[0], threshold=sub_list[1], input=sub_list[2], comp=sub_list[3], e_weight=sub_list[4])) 92 | for d in sub_list: 93 | self.tasks.append(Task(vehicle=d[0], threshold=d[1], input=d[2], comp=d[3], e_weight=d[4])) 94 | self.update += 1 95 | def construct_state(self): 96 | """ 97 | Constructs the state to be exploited by the algorithms. 98 | Returns state vector as an input to the RL model calculated for each vehicle. 99 | * Prerequisite: update_vehicle(), update_task() 100 | """ 101 | state_vector = [] 102 | 103 | for v in range(self.num_vehicle): 104 | # 논문 순서따름: threshold, velocity, x_i, y_i, distance, N_j 105 | # (논문 수정하기: GPS point --> distance btwn vehicles and servers) 106 | # (논문 수정하기: 1*26 1-dim. vector) 107 | state_vector_by_vehicle = [] 108 | 109 | local_time, local_energy = self.get_local_computing(v) # vehicle index: 0~ 110 | state_vector_by_vehicle.append(local_time) 111 | state_vector_by_vehicle.append(local_energy) 112 | for s in range(self.num_server): 113 | remote_time, remote_energy = self.get_remote_computing(v, s) 114 | state_vector_by_vehicle.append(remote_time) 115 | state_vector_by_vehicle.append(remote_energy) 116 | 117 | state_vector.append(state_vector_by_vehicle) 118 | return state_vector 119 | 120 | # def get_max_tolerance(self, v, s): # Eq 1,2 # ID starts from 1 121 | # stay_time = 2 * self.vehicles[v-1].distance[s-1] / self.vehicles[v-1].v 122 | # return min(stay_time, self.tasks[v-1].threshold) 123 | 124 | def get_transmission_rate(self, v, s): # vehicle index: 0~, server index: 0~ 125 | shared_bandwidth = BANDWIDTH / self.servers[s].crowd 126 | log = self.vehicles[v].tran * ((self.vehicles[v].distance[s] / 1000) ** (-PATH_FADE)) 127 | log /= self.servers[s].crowd 128 | return shared_bandwidth * math.log2(log+1) 129 | 130 | def get_local_computing(self, v): # vehicle index: 0~ 131 | time = self.tasks[v].comp / self.vehicles[v].comp 132 | energy = KAPPA * (self.vehicles[v].comp ** 2) * self.tasks[v].comp 133 | return time, energy 134 | 135 | def get_remote_computing(self, v, s): # vehicle index: 0~ / server index: 0~ 136 | trans = self.tasks[v].input / self.get_transmission_rate(v,s) 137 | comp = self.tasks[v].comp / (self.servers[s].comp / self.servers[s].crowd) 138 | time = trans + comp 139 | energy = self.vehicles[v].tran * (10 ** -4) * trans + self.servers[s].power * comp # ~0.01 140 | return time, energy 141 | 142 | def calculate_reward(self, vehicle, action, assign_prob): # 논문 수정하기 / 수식 이상함 143 | """ 144 | Calculates the reward based on the action of the vehicle. 145 | """ 146 | reward = 15 147 | local_time, local_energy = self.get_local_computing(vehicle) 148 | remote_time, remote_energy = self.get_remote_computing(vehicle, action) 149 | time = (1-self.tasks[vehicle].e_weight) * (assign_prob * local_time + (1-assign_prob) * remote_time) 150 | energy = self.tasks[vehicle].e_weight * (assign_prob * local_energy + (1-assign_prob) * remote_energy) 151 | return reward - time - energy 152 | 153 | #def step(self, action): 154 | def step(self, action, assign_prob): # action(server) index: 0~ 155 | """ 156 | Step function of the environment. 157 | Calculates the rewards based on the action taken by the vehicles. 158 | :return: 159 | next_state 160 | rewards: concatenated reward of each vehicle for the taken actions 161 | """ 162 | for i in range(self.num_server): 163 | self.servers[i].crowd = 1 164 | for i in range(self.num_vehicle): 165 | self.servers[action[i]].crowd += 1 166 | 167 | rewards = [] 168 | for i in range(self.num_vehicle): 169 | #reward = self.calculate_reward(i, action[i], 0.) 170 | reward = self.calculate_reward(i, action[i], assign_prob[i]) 171 | rewards.append(reward.item()) 172 | 173 | self.update_vehicle() 174 | self.update_task() 175 | next_state = self.construct_state() 176 | return next_state, rewards 177 | def reset(self): 178 | self.update=1 179 | self.update_vehicle() 180 | self.update_task() 181 | state = self.construct_state() 182 | return state -------------------------------------------------------------------------------- /a3c_ver1.0/experiment.yaml: -------------------------------------------------------------------------------- 1 | isTrain: True 2 | experiment_name: toy1 3 | episode_size: 100 4 | step_size: 10000 5 | batch_size: 50 6 | discount_rate: 0.99 7 | print_reward_interval: 1000 8 | num_processes: 10 9 | seed: 1 10 | max_step: 9999 11 | 12 | EnvironmentParams: 13 | vehicle: "./data/train.csv" 14 | vehicle_test: "./data/test.csv" 15 | task: "./simulated_tasks_100.csv" 16 | task_test: "./simulated_tasks_test.csv" 17 | nv: 100 18 | ns: 12 19 | 20 | ActorParams: 21 | state_space: 26 22 | action_space: 12 23 | num_hidden_layer: 3 24 | hidden_dim: 64 25 | 26 | CriticParams: 27 | state_space: 26 28 | num_hidden_layer: 3 29 | hidden_dim: 64 -------------------------------------------------------------------------------- /a3c_ver1.0/main.py: -------------------------------------------------------------------------------- 1 | import environment 2 | import yaml 3 | import torch 4 | import torch.multiprocessing as mp 5 | import torch.optim as optim 6 | import torch.nn.functional as F 7 | from torch.distributions import Categorical 8 | import random 9 | import os 10 | import numpy as np 11 | from network import Actor, Critic 12 | import os 13 | from test import test 14 | from train import train 15 | #os.environ['KMP_DUPLICATE_LIB_OK']='True' 16 | 17 | def nn(env): 18 | step = 0 19 | score = 0 20 | 21 | while step < max_step: 22 | rewards = [] 23 | for i in range(env.num_vehicle): 24 | action = np.argmin(env.vehicles[i].distance) 25 | reward = env.calculate_reward(i, action, 0.) 26 | rewards.append(reward/100) 27 | env.update_vehicle() 28 | env.update_task() 29 | score += np.mean(rewards) 30 | step += 1 31 | if step % 1000 == 0: 32 | print(step, " : ", score / step) 33 | 34 | def rand(env): 35 | step = 0 36 | score = 0 37 | 38 | while step < max_step: 39 | rewards = [] 40 | for i in range(env.num_vehicle): 41 | action = np.random.randint(0, env.num_server) 42 | reward = env.calculate_reward(i, action, 0.) 43 | rewards.append(reward / 100) 44 | env.update_vehicle() 45 | env.update_task() 46 | score += np.mean(rewards) 47 | step += 1 48 | if step % 1000 == 0: 49 | print(step, " : ", score / step) 50 | 51 | def seed_torch(seed): 52 | torch.manual_seed(seed) 53 | if torch.backends.cudnn.enabled: 54 | torch.backends.cudnn.benchmark = False 55 | torch.backends.cudnn.deterministic = True 56 | 57 | if __name__ == '__main__': 58 | 59 | 60 | config = yaml.load(open("./experiment.yaml"), Loader=yaml.FullLoader) 61 | seed=config["seed"] 62 | np.random.seed(seed) 63 | random.seed(seed) 64 | seed_torch(seed) 65 | #env = environment.Env(**config["EnvironmentParams"], train=True) 66 | 67 | #nn(env) 68 | #rand(env) 69 | 70 | #test_env = environment.Env(**config["EnvironmentParams"], train=False) 71 | # model = TheModelClass(*args, **kwargs) 72 | # model.load_state_dict(torch.load(PATH)) 73 | # model.eval() 74 | global_Actor = Actor(**config["ActorParams"]) 75 | global_Critic = Critic(**config["CriticParams"]) 76 | global_Actor.share_memory() 77 | global_Critic.share_memory() 78 | 79 | isTrain = config.setdefault("isTrain", True) 80 | experiment_name = config.setdefault("experiment_name", "") 81 | #episode_size = config.setdefault("episode_size", 1000) 82 | step_size = config.setdefault("step_size", 10000) 83 | batch_size = config.setdefault("batch_size", 128) 84 | discount_rate = config.setdefault("discount_rate", 0.99) 85 | #print_reward_interval = config.setdefault("print_reward_interval", 1000) 86 | 87 | processes = [] 88 | num_processes=config["num_processes"] 89 | 90 | counter = mp.Value('i', 0) 91 | lock = mp.Lock() 92 | 93 | p = mp.Process(target=test, args=(config,num_processes, global_Actor,counter,2000)) 94 | p.start() 95 | processes.append(p) 96 | 97 | for rank in range(num_processes): 98 | p = mp.Process(target=train, args=(config, rank, counter, lock, batch_size, discount_rate, global_Actor, global_Critic,config["max_step"])) 99 | p.start() 100 | processes.append(p) 101 | for p in processes: 102 | p.join() -------------------------------------------------------------------------------- /a3c_ver1.0/network.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.distributions import Categorical 5 | import math 6 | 7 | class Actor(nn.Module): 8 | def __init__(self, state_space=None, action_space=None, num_hidden_layer=2, hidden_dim=None): 9 | super(Actor, self).__init__() 10 | 11 | # state_space, action_space check 12 | assert state_space is not None, "None state_space input: state_space should be assigned." 13 | assert action_space is not None, "None action_space input: action_space should be assigned." 14 | 15 | if hidden_dim is None: 16 | hidden_dim = state_space * 2 17 | 18 | self.layers = nn. ModuleList() 19 | self.layers.append(nn.Linear(state_space, hidden_dim)) # input layer 20 | for i in range(num_hidden_layer): # hidden layer 21 | self.layers.append(nn.Linear(hidden_dim, hidden_dim)) 22 | self.layers.append(nn.LSTMCell(hidden_dim,hidden_dim)) 23 | 24 | self.layers_act=nn.ModuleList() 25 | dim_num=hidden_dim 26 | for i in range(3): # hidden layer 27 | self.layers_act.append(nn.Linear(dim_num, int(dim_num/2))) 28 | dim_num=int(dim_num/2) 29 | self.layers_act.append(nn.Linear(dim_num,action_space)) 30 | 31 | self.layers_partial=nn.ModuleList() 32 | dim_num=hidden_dim+1 33 | for i in range(3): # hidden layer 34 | self.layers_partial.append(nn.Linear(dim_num, int(math.floor(dim_num/2)))) 35 | dim_num=int(math.floor(dim_num/2)) 36 | self.layers_partial.append(nn.Linear(dim_num,1)) 37 | 38 | def forward(self, x): 39 | x,(hx,cx)=x 40 | for layer in self.layers[:-1]: 41 | x = F.relu(layer(x)) 42 | hx,cx=self.layers[-1](x,(hx,cx)) 43 | x=hx 44 | 45 | act_out=x 46 | for layer in self.layers_act[:-1]: 47 | act_out = F.relu(layer(act_out)) 48 | out = F.softmax(self.layers_act[-1](act_out),dim=1) 49 | action_dist = Categorical(out) 50 | action = action_dist.sample() 51 | x=torch.cat([x,action.unsqueeze(1)],dim=1) 52 | 53 | 54 | for layer in self.layers_partial[:-1]: 55 | x = F.relu(layer(x)) 56 | partial=torch.sigmoid(self.layers_partial[-1](x)) 57 | 58 | return out,partial 59 | 60 | class Critic(nn.Module): 61 | def __init__(self, state_space=None, num_hidden_layer=2, hidden_dim=None): 62 | super(Critic, self).__init__() 63 | 64 | # state_space check 65 | assert state_space is not None, "None state_space input: state_space should be assigned." 66 | 67 | if hidden_dim is None: 68 | hidden_dim = state_space * 2 69 | 70 | self.layers = nn.ModuleList() 71 | self.layers.append(nn.Linear(state_space, hidden_dim)) # input layer 72 | for i in range(num_hidden_layer): # hidden layer 73 | self.layers.append(nn.Linear(hidden_dim, hidden_dim)) 74 | self.layers.append(nn.Linear(hidden_dim,1 )) # output layer 75 | 76 | def forward(self, x): 77 | for layer in self.layers[:-1]: 78 | x = F.relu(layer(x)) 79 | out = self.layers[-1](x) 80 | return out 81 | -------------------------------------------------------------------------------- /a3c_ver1.0/test.py: -------------------------------------------------------------------------------- 1 | import environment 2 | import torch 3 | from torch.distributions import Categorical 4 | import numpy as np 5 | from network import Actor 6 | import time 7 | import os 8 | 9 | 10 | def test(config,rank, global_Actor,counter,max_step): 11 | print("Start test") 12 | torch.manual_seed(config["seed"]+rank) 13 | rewards=[] 14 | env = environment.Env(**config["EnvironmentParams"], train=False) 15 | 16 | start_time = time.time() 17 | name=time.strftime('%Y-%m-%d-%Hh-%Mm-%Ss', time.localtime(time.time())) 18 | f=open("./"+name+".txt",'w') 19 | f.write(name+"에 시작한 훈련의 log입니다.\n") 20 | 21 | while True: 22 | score = 0 23 | step = 0 24 | state=env.reset() 25 | hx=torch.zeros(config["EnvironmentParams"]["nv"],config["ActorParams"]["hidden_dim"]) 26 | cx=torch.zeros(config["EnvironmentParams"]["nv"],config["ActorParams"]["hidden_dim"]) 27 | 28 | while step < max_step: 29 | action_prob,partial = global_Actor((torch.FloatTensor(state),(hx,cx))) 30 | action_dist = Categorical(action_prob) 31 | action = action_dist.sample() 32 | 33 | next_state, reward = env.step(action,partial) 34 | state = next_state 35 | score += np.mean(reward) 36 | step += 1 37 | hx=hx.detach() 38 | cx=cx.detach() 39 | rewards.append(score) 40 | f.write("Time: {}, num steps: {}, rewards: {}\n".format( 41 | time.strftime("%Hh %Mm %Ss", 42 | time.gmtime(time.time() - start_time)), 43 | counter.value,score/max_step)) 44 | print("test.py: Time: {}, num steps: {}, rewards: {}".format( 45 | time.strftime("%Hh %Mm %Ss", 46 | time.gmtime(time.time() - start_time)), 47 | counter.value,score/max_step)) 48 | 49 | if(counter.value>=1000000): 50 | np.save("reward"+str(env.num_vehicle)+"_test.npy", rewards) -------------------------------------------------------------------------------- /a3c_ver1.0/train.py: -------------------------------------------------------------------------------- 1 | import environment 2 | import torch 3 | import torch.optim as optim 4 | import torch.nn.functional as F 5 | from torch.distributions import Categorical 6 | import numpy as np 7 | from network import Actor, Critic 8 | import os 9 | 10 | def train(config, rank, counter,lock, batch_size, discount_rate, global_Actor, global_Critic,max_step): 11 | torch.manual_seed(config["seed"]+rank) 12 | print("Start "+str(rank)+"process train") 13 | env = environment.Env(**config["EnvironmentParams"], train=True) 14 | 15 | local_Actor = Actor(**config["ActorParams"]) 16 | local_Critic = Critic(**config["CriticParams"]) 17 | local_Actor.load_state_dict(global_Actor.state_dict()) 18 | local_Critic.load_state_dict(global_Critic.state_dict()) 19 | actor_optimizer = optim.Adam(global_Actor.parameters(), lr=1e-4) 20 | critic_optimizer = optim.Adam(global_Critic.parameters(), lr=1e-4) 21 | 22 | batch = [] 23 | rewards = [] 24 | 25 | hx=torch.zeros(config["EnvironmentParams"]["nv"],config["ActorParams"]["hidden_dim"]) 26 | cx=torch.zeros(config["EnvironmentParams"]["nv"],config["ActorParams"]["hidden_dim"]) 27 | epi=-1 28 | while True: 29 | epi+=1 30 | env.update = 1 31 | state = env.reset() 32 | 33 | score = 0 34 | step = 0 35 | 36 | while step < max_step: 37 | # Get action 38 | 39 | action_prob,partial = local_Actor((torch.FloatTensor(state),(hx,cx))) # shape: (V, S) 40 | action_dist = Categorical(action_prob) 41 | action = action_dist.sample() # server index : 0~ 42 | next_state, reward = env.step(action,partial) 43 | done = np.zeros_like(reward) if len(batch) == batch_size - 1 else np.ones_like(reward) 44 | action_prob_temp = [] 45 | for i in range(len(action)): 46 | action_prob_temp.append(action_prob[i][action[i]]) 47 | reward[i] /= 100 48 | 49 | batch.append([state, next_state, reward, action_prob_temp, done]) 50 | 51 | if len(batch) >= batch_size: 52 | state_buffer = [] 53 | next_state_buffer = [] 54 | reward_buffer = [] 55 | action_prob_buffer = [] 56 | done_buffer = [] 57 | 58 | 59 | for item in batch: 60 | state_buffer.append(item[0]) 61 | next_state_buffer.append(item[1]) 62 | reward_buffer.append(item[2]) 63 | action_prob_buffer.append(item[3]) 64 | done_buffer.append(item[4]) 65 | 66 | state_buffer = torch.FloatTensor(state_buffer) # (batch_size, V, state_size) 67 | next_state_buffer = torch.FloatTensor(next_state_buffer) 68 | reward_buffer = torch.FloatTensor(reward_buffer).unsqueeze(-1) # (batch_size, V, 1) 69 | done_buffer = torch.FloatTensor(done_buffer).unsqueeze(-1) # (batch_size, V, 1) 70 | 71 | value_state = local_Critic(state_buffer).squeeze(1) # (batch_size, V, 1) 72 | value_next_state = local_Critic(next_state_buffer).squeeze(1) # (batch_size, V, 1) 73 | Q = reward_buffer + discount_rate * value_next_state * done_buffer 74 | A = Q - value_state 75 | 76 | # update Critic 77 | critic_optimizer.zero_grad() 78 | critic_loss = F.mse_loss(value_state, Q.detach()) # constant 79 | critic_loss.backward(retain_graph=True) 80 | for global_param, local_param in zip(global_Critic.parameters(), local_Critic.parameters()): 81 | global_param._grad = local_param.grad 82 | critic_optimizer.step() 83 | 84 | # update Actor 85 | actor_optimizer.zero_grad() 86 | actor_loss = 0 87 | for idx, prob in enumerate(action_prob_buffer): 88 | for i in range(len(prob)): 89 | actor_loss += -A[idx][i] * torch.log(prob[i]) 90 | actor_loss /= len(action_prob_buffer) 91 | actor_loss.backward() 92 | 93 | 94 | for global_param, local_param in zip(global_Actor.parameters(), local_Actor.parameters()): 95 | global_param._grad = local_param.grad 96 | actor_optimizer.step() 97 | 98 | local_Actor.load_state_dict(global_Actor.state_dict()) 99 | local_Critic.load_state_dict(global_Critic.state_dict()) 100 | with lock: 101 | counter.value+=1 102 | 103 | batch = [] 104 | hx=torch.zeros(config["EnvironmentParams"]["nv"],config["ActorParams"]["hidden_dim"]) 105 | cx=torch.zeros(config["EnvironmentParams"]["nv"],config["ActorParams"]["hidden_dim"]) 106 | else: 107 | hx=hx.detach() 108 | cx=cx.detach() 109 | 110 | state = next_state 111 | score += np.mean(reward) 112 | step += 1 113 | #if (step % 1000 == 0 and rank==1): 114 | # print("Episode: ", epi, " Step: ", step, " Reward: ", score/step) 115 | 116 | 117 | #print("Save reward value: ", score/max_step) 118 | rewards.append(score/max_step) 119 | 120 | # print weight values 121 | if ((epi % 5) == 4 and rank==1): 122 | np.save("reward"+str(env.num_vehicle)+"_"+str(epi)+".npy", rewards) 123 | # save model weights 124 | if ((epi % 10) == 0 and rank==1): 125 | save_dir = "./a3c_v"+str(env.num_vehicle) 126 | if not os.path.isdir(save_dir): 127 | os.mkdir(save_dir) 128 | # torch.save(local_Actor.state_dict(), os.path.join(save_dir, str(epi)+"_local_actor.pt")) 129 | # torch.save(local_Critic.state_dict(), os.path.join(save_dir, str(epi)+"_local_critic.pt")) 130 | torch.save(global_Actor.state_dict(), os.path.join(save_dir, str(epi)+"_global_actor.pt")) 131 | torch.save(global_Critic.state_dict(), os.path.join(save_dir, str(epi)+"_global_critic.pt")) 132 | if counter.value>=1000000: 133 | break 134 | -------------------------------------------------------------------------------- /choose_action.py: -------------------------------------------------------------------------------- 1 | import random 2 | import tensorflow as tf 3 | 4 | 5 | num_servers = 12 6 | output_size = num_servers 7 | 8 | 9 | models_loaded = False 10 | dqns = [] 11 | 12 | 13 | def load_models(num_vehicles): 14 | global models_loaded 15 | for index in range(num_vehicles): 16 | dqns.append(tf.keras.models.load_model(f'dqn_models/{num_vehicles}_{index}')) 17 | print('Models loaded!') 18 | models_loaded = True 19 | 20 | 21 | def choose_action(num_vehicles, v_index, state): 22 | if not models_loaded: 23 | print('Loading models...') 24 | load_models(num_vehicles) 25 | 26 | dqn = dqns[v_index] 27 | qualities = dqn(state) 28 | action = tf.math.argmax(qualities[0]) 29 | action = int(action) 30 | 31 | return action 32 | 33 | -------------------------------------------------------------------------------- /data/DataPreprocessing.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from tqdm import tqdm 4 | from haversine import haversine 5 | from multiprocessing import Pool 6 | import multiprocessing as mp 7 | from functools import partial 8 | print("Number of processors: ", mp.cpu_count()) 9 | 10 | 11 | server_location=[(41.9797074301314, 12.517774537227256), 12 | (41.96645724499441, 12.442500521765464), 13 | (41.92362247264185, 12.419533015161926), 14 | (41.859693563285674, 12.424287549712513), 15 | (41.83379583604655, 12.49831550937137), 16 | (41.860946758260795, 12.555531201442555), 17 | (41.94153940428322, 12.553860053429428), 18 | (41.93927042880409, 12.496136876739858), 19 | (41.91832213334783, 12.47306760729969), 20 | (41.887024456510844, 12.472200090859424), 21 | (41.88291790209538, 12.534428295360426), 22 | (41.905086456104385, 12.488293373328746)] 23 | 24 | def load_data(): 25 | file = pd.read_csv("./taxi_february.txt", sep=";") 26 | file = file.rename(columns={ ' TIMESTAMP': 'TIMESTAMP', ' LOCATION': 'LOCATION'}) 27 | 28 | # 날짜 추출의 편의를 위해 TIMESTAMP row를 Datetime 항목으로 변경하여 저장 29 | file["TIMESTAMP"] = pd.to_datetime(file["TIMESTAMP"]) 30 | # 거리 계산의 편의를 위해 LOCATION row를 (float, float) 형식의 튜플로 변경하여 저장 31 | temp_location = file["LOCATION"].str.split() 32 | def createLocation(t): 33 | return (float(t[0][6:]), float(t[1][:-1])) 34 | 35 | file["LOCATION"] = temp_location.map(createLocation) 36 | 37 | # 2014-02-01부터 2014-02-22까지 훈련 데이터 38 | # 2014-02-23부터 2014-03-02까지 검증 데이터 39 | train_mask = (file["TIMESTAMP"] >= '2014-02-01') & (file["TIMESTAMP"] <= '2014-02-22') 40 | test_mask = ~train_mask 41 | train = file.loc[train_mask] 42 | test = file.loc[test_mask] 43 | 44 | train.sort_values(by=["ID", "TIMESTAMP"], inplace=True, ascending=True) 45 | train=train.reset_index(drop=True) 46 | test.sort_values(by=["ID", "TIMESTAMP"], inplace=True, ascending=True) 47 | test=test.reset_index(drop=True) 48 | 49 | group_train=train.groupby(train["ID"]) 50 | train_data=group_train.filter(lambda g: len(g)>50000) 51 | list_train=list(train_data.groupby(train_data["ID"])) 52 | 53 | group_test=test.groupby(test["ID"]) 54 | test_data=group_test.filter(lambda g: len(g)>25000) 55 | list_test=list(test_data.groupby(test_data["ID"])) 56 | 57 | return list_train,list_test 58 | 59 | 60 | def multiprocessing(list_train,num): 61 | data=list_train[num][1] 62 | time_inter=[0] 63 | num=0 64 | j=1 65 | prev_timestamp=data.iloc[0]["TIMESTAMP"] 66 | prev_location=data.iloc[0]["LOCATION"] 67 | for i, row in tqdm(data.iterrows(),total=data.shape[0]): 68 | if(haversine(prev_location, row["LOCATION"]) * 1000 == 0 ): 69 | time_inter.append(0) 70 | else: 71 | time_inter.append((row["TIMESTAMP"]-prev_timestamp).seconds) 72 | prev_timestamp=row["TIMESTAMP"] 73 | prev_location=row["LOCATION"] 74 | data["TIME_INTER"]=time_inter[1:] 75 | data=data.drop(index=data[data["TIME_INTER"]==0].index) 76 | data=data.drop(columns="TIME_INTER") 77 | return data 78 | 79 | def refine(data): 80 | re_data=data[0] 81 | for i in data[1:100]: 82 | re_data=pd.concat([re_data,i]) 83 | return re_data 84 | 85 | # 각 ID의 차량에서 amount개의 데이터만 추출 후 index reset 86 | def preproc(data,amount): 87 | data=data.reset_index(drop=True) 88 | 89 | j=0 90 | file=data.iloc[0:amount] 91 | for i in data.groupby(data["ID"]).size(): 92 | if(j==0): 93 | j+=i 94 | else: 95 | file=pd.concat([file,data[j:j+amount]]) 96 | j+=i 97 | 98 | file=file.reset_index(drop=True) 99 | 100 | 101 | j=0 102 | for i in tqdm(range(100)): 103 | file.at[j:amount+j,"ID"]=i+1 104 | j+=amount 105 | 106 | return file 107 | 108 | #각 시간별 서버와의 위치, 속도, 추출 후 csv 파일 저장 109 | #[ID, TIMESTAMP, LOCATION, DISTANCE] 형식의 파일로 저장될 예정 DISTANCE는 각 서버에대한 거리를 LIST형식으로 받는다. 110 | #각 시간별 서버와의 위치, 속도, 추출 후 csv 파일 저장 111 | #[ID, TIMESTAMP, LOCATION, DISTANCE] 형식의 파일로 저장될 예정 DISTANCE는 각 서버에대한 거리를 LIST형식으로 받는다. 112 | 113 | def get_v(data,server_location,amount,train=True): 114 | test=data 115 | prev_id = -1 116 | 117 | distance_list=[] 118 | num=0 119 | prev_loc=0 120 | 121 | for i, row in tqdm(test.iterrows(),total=test.shape[0]): 122 | distance_list.append(list(map(lambda x: haversine(x, row["LOCATION"])*1000, server_location))) 123 | if(row["ID"]!=prev_id): 124 | prev_id=row["ID"] 125 | test.at[i,"SPEED"]=0.0 126 | num=0 127 | else: 128 | test.at[i,"SPEED"]=haversine(prev_loc, row["LOCATION"])*1000*3.6/(row["TIMESTAMP"]-prev_time).seconds 129 | prev_time=row["TIMESTAMP"] 130 | prev_loc=row["LOCATION"] 131 | test.at[i,"TIMESTAMP"]=num 132 | num+=1 133 | 134 | distance_list=np.array(distance_list) 135 | for i in range(len(server_location)): 136 | dist_name=("DIST_%s"%(i+1)) 137 | test[dist_name]=distance_list[:,i] 138 | test=test.drop(index=test[test["SPEED"]==0].index) 139 | test=test.drop(columns="LOCATION") 140 | test=test.reset_index(drop=True) 141 | if train: 142 | test.to_csv("./train.csv",index = False) 143 | else: 144 | test.to_csv("./test.csv",index = False) 145 | 146 | 147 | 148 | 149 | if __name__=="__main__": 150 | print("===============Start preprocessing===============") 151 | 152 | list_train,list_test=load_data() 153 | 154 | pool=Pool(processes= mp.cpu_count()) 155 | func1 = partial(multiprocessing,list_train) 156 | num_train=list(range(len(list_train))) 157 | outputs1=pool.map(func1,num_train) 158 | pool.close() 159 | pool.join() 160 | 161 | pool=Pool(processes= mp.cpu_count()) 162 | func2 = partial(multiprocessing,list_test) 163 | num_test=list(range(len(list_test))) 164 | outputs2=pool.map(func2,num_test) 165 | pool.close() 166 | pool.join() 167 | 168 | train_data=refine(outputs1) 169 | test_data=refine(outputs2) 170 | 171 | print("===============Get test data===============") 172 | f=preproc(test_data,min(test_data.groupby(test_data["ID"]).size())) 173 | get_v(f,server_location,min(test_data.groupby(test_data["ID"]).size()),False) 174 | 175 | print("===============Get train data===============") 176 | f=preproc(train_data,min(train_data.groupby(train_data["ID"]).size())) 177 | get_v(f,server_location,min(train_data.groupby(train_data["ID"]).size()),True) 178 | -------------------------------------------------------------------------------- /data/server_and_path_map.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # In[1]: 5 | 6 | 7 | import pandas as pd 8 | import numpy as np 9 | from tqdm import tqdm 10 | from haversine import haversine 11 | 12 | file = pd.read_csv("./taxi_february.txt", sep=";") 13 | file = file.rename(columns={ ' TIMESTAMP': 'TIMESTAMP', ' LOCATION': 'LOCATION'}) 14 | 15 | # 날짜 추출의 편의를 위해 TIMESTAMP row를 Datetime 항목으로 변경하여 저장 16 | file["TIMESTAMP"] = pd.to_datetime(file["TIMESTAMP"]) 17 | # 거리 계산의 편의를 위해 LOCATION row를 (float, float) 형식의 튜플로 변경하여 저장 18 | temp_location = file["LOCATION"].str.split() 19 | def createLoc_x(t): 20 | return float(t[0][6:]) 21 | def createLoc_y(t): 22 | return float(t[1][:-1]) 23 | file["LOC_X"]=temp_location.map(createLoc_x) 24 | file["LOC_Y"]=temp_location.map(createLoc_y) 25 | 26 | 27 | # In[2]: 28 | 29 | 30 | # 2014-02-01부터 2014-02-02까지 데이터 수집 31 | map_data=(file["TIMESTAMP"] >= '2014-02-01') & (file["TIMESTAMP"] <= '2014-02-02') 32 | m=file.loc[map_data] 33 | 34 | 35 | # In[3]: 36 | 37 | 38 | server_location=[(41.9797074301314, 12.517774537227256), 39 | (41.96645724499441, 12.442500521765464), 40 | (41.92362247264185, 12.419533015161926), 41 | (41.859693563285674, 12.424287549712513), 42 | (41.83379583604655, 12.49831550937137), 43 | (41.860946758260795, 12.555531201442555), 44 | (41.94153940428322, 12.553860053429428), 45 | (41.93927042880409, 12.496136876739858), 46 | (41.91832213334783, 12.47306760729969), 47 | (41.887024456510844, 12.472200090859424), 48 | (41.88291790209538, 12.534428295360426), 49 | (41.905086456104385, 12.488293373328746)] 50 | 51 | 52 | # In[4]: 53 | 54 | 55 | m.sort_values(by=["ID", "TIMESTAMP"], inplace=True, ascending=True) 56 | 57 | 58 | # In[5]: 59 | 60 | 61 | m = m.iloc[::4,:] 62 | 63 | 64 | # In[6]: 65 | 66 | 67 | import folium 68 | 69 | lat = m['LOC_X'].mean() 70 | long = m['LOC_Y'].mean() 71 | 72 | 73 | # In[7]: 74 | 75 | 76 | #지도 새로 띄우기 77 | q = folium.Map([lat,long],zoom_start=200) 78 | 79 | for i in tqdm(m.index,total=len(m.index)): 80 | sub_lat = m.loc[i,'LOC_X'] 81 | sub_long = m.loc[i,'LOC_Y'] 82 | color = 'green' 83 | 84 | #지도에 동그라미로 데이터 찍기 85 | folium.CircleMarker([sub_lat,sub_long],color=color,radius = 3).add_to(q) 86 | 87 | for i in server_location: 88 | sub_lat = i[0] 89 | sub_long = i[1] 90 | # 91 | folium.Marker([sub_lat,sub_long]).add_to(q) 92 | 93 | #한글이 안나오는 오류로 html로 trouble shooting 94 | q.save('server_and_path_map.html') 95 | 96 | 97 | # In[ ]: 98 | 99 | 100 | 101 | 102 | -------------------------------------------------------------------------------- /dqn_models.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jungyeonkoh/IoV-Computation-Offloading/f163ee8d5406552faf452a7d4269b43ee3d60ef6/dqn_models.zip -------------------------------------------------------------------------------- /dqn_train.py: -------------------------------------------------------------------------------- 1 | # from temp_env import * 2 | from environment import * 3 | import tensorflow as tf 4 | from collections import deque 5 | import random 6 | import numpy as np 7 | 8 | num_vehicles = 5 9 | num_servers = 12 10 | sum_len = 10 # length of weighted reward sum 100 11 | gamma = 0.8 # decay rate of weighted reward sum 12 | num_episodes = 10 13 | num_timesteps = 1000 # number of timesteps in one episode 14 | 15 | mem_capacity = 200 16 | exp_mems = [deque([], maxlen=mem_capacity) for i in range(num_vehicles)] 17 | batch_size = 32 18 | 19 | alloc_unit = 0.1 # the proportion of the task that the vehicle processes (that is allocated to the vehicle) 20 | # is in the interval [0, 1]. 21 | # we consider only integer multiples of alloc_unit in this interval [0, 1] 22 | input_size = 2 * num_servers + 2 # length of state vector 23 | num_possible_allocs = int(1 / alloc_unit) + 1 # number of possible allocation proportions 24 | output_size = num_possible_allocs * num_servers # number of possible actions 25 | output_size = num_servers # temp code for server picking only (no partial offloading) 26 | 27 | # dqns = [] 28 | # for i in range(num_vehicles): 29 | # dqn = tf.keras.Sequential([ 30 | # tf.keras.layers.Dense(1000, activation='relu'), tf.keras.layers.Dense(1000, activation='relu'), 31 | # tf.keras.layers.Dense(1000, activation='relu'), tf.keras.layers.Dense(1000, activation='relu'), 32 | # tf.keras.layers.Dense(1000, activation='relu'), tf.keras.layers.Dense(1000, activation='relu'), 33 | # tf.keras.layers.Dense(1000, activation='relu'), tf.keras.layers.Dense(1000, activation='relu'), 34 | # tf.keras.layers.Dense(1000, activation='relu'), tf.keras.layers.Dense(1000, activation='relu'), 35 | # tf.keras.layers.Dense(output_size) 36 | # ]) 37 | # dqns.append(dqn) 38 | 39 | num_layers = 6 40 | 41 | 42 | class DDDQN(tf.keras.Model): 43 | """deuling DQN""" 44 | def __init__(self): 45 | super(DDDQN, self).__init__() 46 | self.layer_list = [tf.keras.layers.Dense(128, activation='relu') for i in range(num_layers)] 47 | self.v = tf.keras.layers.Dense(1, activation=None) 48 | self.a = tf.keras.layers.Dense(output_size, activation=None) 49 | 50 | def call(self, x): 51 | for i in range(num_layers): 52 | x = self.layer_list[i](x) 53 | v = self.v(x) 54 | a = self.a(x) 55 | Q = v + (a - tf.math.reduce_mean(a, axis=1, keepdims=True)) 56 | return Q 57 | 58 | 59 | dqns = [DDDQN() for i in range(num_vehicles)] # create a DQN for each vehicle 60 | 61 | 62 | random_chance = 0.9 # probability of choosing action randomly 63 | random_decay_step = 50 64 | random_decay_rate = 0.8 65 | 66 | 67 | def choose_action(dqn, state): 68 | if random.random() > random_chance * random_decay_rate ** (step_count / random_decay_step): 69 | qualities = dqn(state) 70 | action = tf.math.argmax(qualities[0]) 71 | action = int(action) 72 | else: 73 | action = random.randrange(0, output_size) 74 | return action 75 | 76 | 77 | def get_weighted_sum(index, reward_record): 78 | weighted_sum = 0 79 | for i in range(sum_len): 80 | weighted_sum += reward_record[index + i] * gamma ** i 81 | return weighted_sum 82 | 83 | 84 | def timestep(step_count, reward_record, exps): 85 | rews = [] 86 | states = env.construct_state() 87 | np_states = np.array(states) 88 | 89 | actions = [] 90 | 91 | # choose action for each vehicle 92 | for v_index in range(num_vehicles): 93 | action = choose_action(dqns[v_index], np_states[v_index: v_index + 1]) 94 | actions.append(action) 95 | 96 | # old code for old environment 97 | 98 | # for v_index in range(num_vehicles): 99 | # # convert action from scalar representation into vector representation 100 | # action_vector = [int(actions[v_index] / num_servers) * alloc_unit, actions[v_index] % num_servers] # + 1] 101 | # rews.append(env.calculate_reward(v_index, action_vector)) 102 | # env.update_vehicle() 103 | # env.update_task() 104 | # next_states = env.construct_state() 105 | # rews_sum = sum(rews) 106 | # reward_record.append(rews_sum) 107 | 108 | 109 | 110 | 111 | # temp code for server picking only (no partial offloading) 112 | # actions = [int(action) for action in actions] 113 | next_states, rews = env.step(actions) 114 | rews_sum = sum(rews) 115 | reward_record.append(rews_sum) 116 | 117 | 118 | 119 | 120 | # store experience in memory 121 | for v_index in range(num_vehicles): 122 | exp = [states[v_index], actions[v_index], rews_sum, next_states[v_index]] 123 | # exp_mems[v_index].push(exp) 124 | exps[v_index].append(exp) 125 | 126 | if len(reward_record) >= sum_len: 127 | index = len(reward_record) - sum_len 128 | w_sum = get_weighted_sum(index, reward_record) 129 | for v_index in range(num_vehicles): 130 | exp_mems[v_index].append(exps[v_index][index] + [w_sum]) 131 | # experience is in the format: 132 | # [state, action, reward, next state, weighted reward sum] 133 | return w_sum 134 | 135 | 136 | def update_parameters(): 137 | rand_inds = random.sample(range(len(exp_mems[0])), batch_size) # random indices for experiences 138 | for v_index in range(num_vehicles): 139 | exps_batch = [exp_mems[v_index][rand_ind] for rand_ind in rand_inds] 140 | states_batch = [rand_exp[0] for rand_exp in exps_batch] # shape: (batch_size, input_size) 141 | actions_batch = [rand_exp[1] for rand_exp in exps_batch] # shape: (batch_size,) 142 | WRSs_batch = [rand_exp[4] for rand_exp in exps_batch] # WRS means weighted reward sum. shape: (batch_size,) 143 | 144 | np_states_batch = np.array(states_batch) 145 | dqn = dqns[v_index] 146 | WRSs_batch = tf.convert_to_tensor(WRSs_batch) 147 | 148 | with tf.GradientTape() as tape: 149 | qualities_batch = dqn( 150 | np_states_batch) # batch of qualities of all possible actions. shape: (batch_size, output_size) 151 | action_q_batch = [qualities_batch[i][actions_batch[i]] for i in 152 | range(batch_size)] # batch of quality of selected action. shape: (batch_size, ) 153 | # action_q_batch = tf.convert_to_tensor(action_q_batch, dtype=tf.float64) 154 | action_q_batch = tf.convert_to_tensor(action_q_batch, dtype=WRSs_batch.dtype) 155 | loss = sum(abs(action_q_batch - WRSs_batch)) / batch_size 156 | 157 | grads = tape.gradient(loss, dqn.trainable_variables) 158 | optim = optims[v_index] 159 | optim.apply_gradients(zip(grads, dqn.trainable_variables)) 160 | 161 | return loss 162 | 163 | 164 | step_count = 1 165 | optims = [tf.keras.optimizers.Adam(learning_rate=tf.keras.optimizers.schedules.ExponentialDecay(0.01, 100, 0.8)) 166 | for i in range(num_vehicles)] # each vehicle has one optimizer 167 | 168 | for i in range(num_episodes): 169 | reward_record = [] 170 | exps = [[] for i in range(num_vehicles)] 171 | env = Env(num_vehicles, num_servers, 'train.csv', 'simulated_tasks.csv') # initialize env (set timestep as 1) 172 | print(f'Starting episode {i}') 173 | for j in range(num_timesteps): 174 | w_sum = timestep(step_count, reward_record, exps) 175 | loss = None 176 | if len(exp_mems[0]) >= batch_size: 177 | loss = update_parameters() 178 | 179 | # if step_count % 100 == 0: 180 | NUM = 200 181 | if step_count % NUM == 0: 182 | print(f'step: {step_count}') 183 | # print(f'loss: {loss}') 184 | # print(f'WRS: {w_sum}') 185 | print(f'reward sum: {sum(reward_record[-NUM:])}') 186 | print() 187 | 188 | step_count += 1 189 | -------------------------------------------------------------------------------- /environment.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import math 4 | 5 | # Parameter 6 | # Distance = ~10000m 7 | # 8 | VEH_COMP_RESOURCE = 50 #(MHz) 9 | VEH_TRAN_POWER = 1000 #scaling #0.1(W) 10 | VEC_COMP_RESOURCE = 6300 #(MHz) 11 | VEC_POWER = 0.007 #(W) 12 | BANDWIDTH = 5 #(MHz) 13 | PATH_FADE = 1.75 #scaling #3.75 14 | KAPPA = 10 ** -6 #원래 10^-11~10^-27 (결과에 따라 scaling할것) 15 | 16 | class Vehicle: 17 | def __init__(self, id, distance, velocity): 18 | self.id = id 19 | self.distance = distance 20 | self.v = velocity 21 | self.comp = np.random.normal(VEH_COMP_RESOURCE, 3) 22 | self.tran = np.random.normal(VEH_TRAN_POWER, 10) 23 | 24 | class Task: 25 | def __init__(self, vehicle, threshold, input, comp, e_weight): 26 | self.vehicle = vehicle 27 | self.threshold = threshold 28 | self.input = input 29 | self.comp = comp 30 | self.e_weight = e_weight 31 | 32 | class Server: 33 | def __init__(self, id): 34 | self.id = id 35 | self.comp = np.random.normal(VEC_COMP_RESOURCE, 70) 36 | self.power = np.random.normal(VEC_POWER, 0.002) 37 | self.crowd = 1 #init as 1 (N_j) 38 | 39 | class Env: 40 | def __init__(self, nv, ns, vehicle, vehicle_test, task, task_test, train): 41 | self.num_vehicle = nv 42 | self.vehicles = [] 43 | self.num_server = ns 44 | self.servers = [] 45 | self.tasks = [] 46 | 47 | self.update = 1 48 | 49 | if train: 50 | self.vehicle_data = pd.read_csv(vehicle) 51 | self.task_data = pd.read_csv(task) 52 | else: 53 | self.vehicle_data = pd.read_csv(vehicle_test) 54 | self.task_data = pd.read_csv(task_test) 55 | 56 | # .csv파일에서 vehicle 불러오기 57 | self.vehicle_data.set_index("TIMESTAMP", inplace=True) 58 | self.update_vehicle() 59 | # .csv파일에서 task 불러오기 60 | self.task_data.set_index("Timestamp", inplace=True) 61 | self.update_task() 62 | 63 | # server 불러오기 64 | for s in range(self.num_server): 65 | self.servers.append(Server(id=s+1)) 66 | 67 | def update_vehicle(self): 68 | sub_data = self.vehicle_data.loc[self.update] 69 | sub_list = sub_data.values 70 | for d in sub_list: 71 | if self.update == 1: 72 | distance_vector = [] 73 | for i in range(self.num_server): 74 | distance_vector.append(d[2+i]) 75 | self.vehicles.append(Vehicle(id=d[0], velocity=d[1], distance=distance_vector)) 76 | else: 77 | for v in self.vehicles: 78 | if d[0] != v.id: 79 | continue 80 | else: 81 | distance_vector = [] 82 | for i in range(self.num_server): 83 | distance_vector.append(d[2+i]) 84 | v.distance = distance_vector 85 | v.v = d[1] 86 | 87 | def update_task(self): 88 | sub_data = self.task_data.loc[self.update] 89 | sub_list = sub_data.values 90 | self.tasks = [] 91 | 92 | # for single vehicle 93 | #self.tasks.append(Task(vehicle=sub_list[0], threshold=sub_list[1], input=sub_list[2], comp=sub_list[3], e_weight=sub_list[4])) 94 | for d in sub_list: 95 | self.tasks.append(Task(vehicle=d[0], threshold=d[1], input=d[2], comp=d[3], e_weight=d[4])) 96 | self.update += 1 97 | def construct_state(self): 98 | """ 99 | Constructs the state to be exploited by the algorithms. 100 | Returns state vector as an input to the RL model calculated for each vehicle. 101 | * Prerequisite: update_vehicle(), update_task() 102 | """ 103 | state_vector = [] 104 | 105 | for v in range(self.num_vehicle): 106 | # 논문 순서따름: threshold, velocity, x_i, y_i, distance, N_j 107 | # (논문 수정하기: GPS point --> distance btwn vehicles and servers) 108 | # (논문 수정하기: 1*26 1-dim. vector) 109 | state_vector_by_vehicle = [] 110 | 111 | local_time, local_energy = self.get_local_computing(v) # vehicle index: 0~ 112 | state_vector_by_vehicle.append(local_time) 113 | state_vector_by_vehicle.append(local_energy) 114 | for s in range(self.num_server): 115 | remote_time, remote_energy = self.get_remote_computing(v, s) 116 | state_vector_by_vehicle.append(remote_time) 117 | state_vector_by_vehicle.append(remote_energy) 118 | 119 | state_vector.append(state_vector_by_vehicle) 120 | return state_vector 121 | 122 | # def get_max_tolerance(self, v, s): # Eq 1,2 # ID starts from 1 123 | # stay_time = 2 * self.vehicles[v-1].distance[s-1] / self.vehicles[v-1].v 124 | # return min(stay_time, self.tasks[v-1].threshold) 125 | 126 | def get_transmission_rate(self, v, s): # vehicle index: 0~, server index: 0~ 127 | shared_bandwidth = BANDWIDTH / self.servers[s].crowd 128 | log = self.vehicles[v].tran * ((self.vehicles[v].distance[s] / 1000) ** (-PATH_FADE)) 129 | log /= self.servers[s].crowd 130 | return shared_bandwidth * math.log2(log+1) 131 | 132 | def get_local_computing(self, v): # vehicle index: 0~ 133 | time = self.tasks[v].comp / self.vehicles[v].comp 134 | energy = KAPPA * (self.vehicles[v].comp ** 2) * self.tasks[v].comp 135 | return time, energy 136 | 137 | def get_remote_computing(self, v, s): # vehicle index: 0~ / server index: 0~ 138 | trans = self.tasks[v].input / self.get_transmission_rate(v,s) 139 | comp = self.tasks[v].comp / (self.servers[s].comp / self.servers[s].crowd) 140 | time = trans + comp 141 | energy = self.vehicles[v].tran * (10 ** -4) * trans + self.servers[s].power * comp # ~0.01 142 | return time, energy 143 | 144 | def calculate_reward(self, vehicle, action, assign_prob): # 논문 수정하기 / 수식 이상함 145 | """ 146 | Calculates the reward based on the action of the vehicle. 147 | """ 148 | reward = 15 149 | local_time, local_energy = self.get_local_computing(vehicle) 150 | remote_time, remote_energy = self.get_remote_computing(vehicle, action) 151 | time = (1-self.tasks[vehicle].e_weight) * (assign_prob * local_time + (1-assign_prob) * remote_time) 152 | energy = self.tasks[vehicle].e_weight * (assign_prob * local_energy + (1-assign_prob) * remote_energy) 153 | return reward - time - energy 154 | 155 | def step(self, action): 156 | #def step(self, action, assign_prob): # action(server) index: 0~ 157 | """ 158 | Step function of the environment. 159 | Calculates the rewards based on the action taken by the vehicles. 160 | :return: 161 | next_state 162 | rewards: concatenated reward of each vehicle for the taken actions 163 | """ 164 | for i in range(self.num_server): 165 | self.servers[i].crowd = 1 166 | for i in range(self.num_vehicle): 167 | self.servers[action[i]].crowd += 1 168 | 169 | rewards = [] 170 | for i in range(self.num_vehicle): 171 | reward = self.calculate_reward(i, action[i], 0.) 172 | #reward = self.calculate_reward(i, action[i], assign_prob[i]) 173 | rewards.append(reward.item()) 174 | 175 | self.update_vehicle() 176 | self.update_task() 177 | next_state = self.construct_state() 178 | return next_state, rewards 179 | -------------------------------------------------------------------------------- /experiment.yaml: -------------------------------------------------------------------------------- 1 | isTrain: True 2 | experiment_name: toy1 3 | episode_size: 100 4 | step_size: 10000 5 | batch_size: 50 6 | discount_rate: 0.99 7 | print_reward_interval: 1000 8 | 9 | EnvironmentParams: 10 | vehicle: "./data/train.csv" 11 | vehicle_test: "./data/test.csv" 12 | task: "./simulated_tasks_10.csv" 13 | task_test: "./simulated_tasks_50_test.csv" 14 | nv: 10 15 | ns: 12 16 | 17 | ActorParams: 18 | state_space: 26 19 | action_space: 12 20 | num_hidden_layer: 3 21 | hidden_dim: 64 22 | 23 | CriticParams: 24 | state_space: 26 25 | num_hidden_layer: 3 26 | hidden_dim: 64 -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import environment 2 | import yaml 3 | import torch 4 | import torch.multiprocessing as mp 5 | import torch.optim as optim 6 | import torch.nn.functional as F 7 | from torch.distributions import Categorical 8 | import random 9 | import os 10 | import numpy as np 11 | from network import Actor, Critic 12 | 13 | seed = 1 14 | 15 | def train(config, env, rank, episode_size, step_size, batch_size, discount_rate, global_Actor, global_Critic): 16 | torch.manual_seed(seed+rank) 17 | 18 | local_Actor = Actor(**config["ActorParams"]) 19 | local_Critic = Critic(**config["CriticParams"]) 20 | local_Actor.load_state_dict(global_Actor.state_dict()) 21 | local_Critic.load_state_dict(global_Critic.state_dict()) 22 | actor_optimizer = optim.Adam(global_Actor.parameters(), lr=1e-4) 23 | critic_optimizer = optim.Adam(global_Critic.parameters(), lr=1e-4) 24 | 25 | batch = [] 26 | rewards = [] 27 | 28 | 29 | for epi in range(episode_size): 30 | env.update = 1 31 | state = env.construct_state() 32 | 33 | score = 0 34 | step = 0 35 | while step < step_size: 36 | # Get action 37 | action_prob = local_Actor(torch.FloatTensor(state)) # shape: (V, S) 38 | action_dist = Categorical(action_prob) 39 | action = action_dist.sample() # server index : 0~ 40 | next_state, reward = env.step(action) 41 | done = np.zeros_like(reward) if len(batch) == batch_size - 1 else np.ones_like(reward) 42 | action_prob_temp = [] 43 | for i in range(len(action)): 44 | action_prob_temp.append(action_prob[i][action[i]]) 45 | reward[i] /= 100 46 | 47 | batch.append([state, next_state, reward, action_prob_temp, done]) 48 | 49 | if len(batch) >= batch_size: 50 | state_buffer = [] 51 | next_state_buffer = [] 52 | reward_buffer = [] 53 | action_prob_buffer = [] 54 | done_buffer = [] 55 | 56 | for item in batch: 57 | state_buffer.append(item[0]) 58 | next_state_buffer.append(item[1]) 59 | reward_buffer.append(item[2]) 60 | action_prob_buffer.append(item[3]) 61 | done_buffer.append(item[4]) 62 | 63 | state_buffer = torch.FloatTensor(state_buffer) # (batch_size, V, state_size) 64 | next_state_buffer = torch.FloatTensor(next_state_buffer) 65 | reward_buffer = torch.FloatTensor(reward_buffer).unsqueeze(-1) # (batch_size, V, 1) 66 | done_buffer = torch.FloatTensor(done_buffer).unsqueeze(-1) # (batch_size, V, 1) 67 | 68 | value_state = local_Critic(state_buffer).squeeze(1) # (batch_size, V, 1) 69 | value_next_state = local_Critic(next_state_buffer).squeeze(1) # (batch_size, V, 1) 70 | Q = reward_buffer + discount_rate * value_next_state * done_buffer 71 | A = Q - value_state 72 | 73 | # update Critic 74 | critic_optimizer.zero_grad() 75 | critic_loss = F.mse_loss(value_state, Q.detach()) # constant 76 | critic_loss.backward(retain_graph=True) 77 | for global_param, local_param in zip(global_Critic.parameters(), local_Critic.parameters()): 78 | global_param._grad = local_param.grad 79 | critic_optimizer.step() 80 | 81 | # update Actor 82 | actor_optimizer.zero_grad() 83 | actor_loss = 0 84 | for idx, prob in enumerate(action_prob_buffer): 85 | for i in range(len(prob)): 86 | actor_loss += -A[idx][i] * torch.log(prob[i]) 87 | actor_loss /= len(action_prob_buffer) 88 | actor_loss.backward() 89 | 90 | for global_param, local_param in zip(global_Actor.parameters(), local_Actor.parameters()): 91 | global_param._grad = local_param.grad 92 | actor_optimizer.step() 93 | 94 | local_Actor.load_state_dict(global_Actor.state_dict()) 95 | local_Critic.load_state_dict(global_Critic.state_dict()) 96 | batch = [] 97 | 98 | state = next_state 99 | score += np.mean(reward) 100 | step += 1 101 | if (step % 1000 == 0): 102 | print("Episode: ", epi, " Step: ", step, " Reward: ", score/step) 103 | 104 | print("Save reward value: ", score/step_size) 105 | rewards.append(score/step_size) 106 | 107 | # print weight values 108 | if epi % 5 == 4: 109 | np.save("reward"+str(env.num_vehicle)+"_"+str(epi)+".npy", rewards) 110 | # save model weights 111 | if epi % 10 == 0: 112 | print("Save model") 113 | save_dir = "./a3c_v"+str(env.num_vehicle) 114 | if not os.path.isdir(save_dir): 115 | os.mkdir(save_dir) 116 | torch.save(local_Actor.state_dict(), os.path.join(save_dir, str(epi)+"_local_actor.pt")) 117 | torch.save(local_Critic.state_dict(), os.path.join(save_dir, str(epi)+"_local_critic.pt")) 118 | torch.save(global_Actor.state_dict(), os.path.join(save_dir, str(epi)+"_global_actor.pt")) 119 | torch.save(global_Critic.state_dict(), os.path.join(save_dir, str(epi)+"_global_critic.pt")) 120 | return rewards 121 | 122 | def test(env, step_size, print_reward_interval, global_Actor): 123 | iteration = 1 124 | while True: 125 | score = 0 126 | step = 0 127 | env.update = 1 128 | state = env.construct_state() 129 | 130 | while step < step_size: 131 | action_prob = global_Actor(torch.FloatTensor(state)) 132 | action_dist = Categorical(action_prob) 133 | action = action_dist.sample() 134 | 135 | next_state, reward = env.step(action) 136 | state = next_state 137 | score += np.mean(reward) 138 | step += 1 139 | 140 | if step % print_reward_interval == 0: 141 | print("Iteration: ", iteration, " Step: ", step, " Reward: ", score/step) 142 | iteration += 1 143 | 144 | def nn(env, step_size): 145 | step = 0 146 | score = 0 147 | 148 | while step < step_size: 149 | rewards = [] 150 | actions = [] 151 | 152 | for i in range(env.num_server): 153 | env.servers[i].crowd = 1 154 | for i in range(env.num_vehicle): 155 | action = np.argmin(env.vehicles[i].distance) 156 | actions.append(action) 157 | env.servers[action].crowd += 1 158 | for i in range(env.num_vehicle): 159 | reward = env.calculate_reward(i, actions[i], 0.) 160 | rewards.append(reward/100) 161 | env.update_vehicle() 162 | env.update_task() 163 | score += np.mean(rewards) 164 | step += 1 165 | if step % 1000 == 0: 166 | print(step, " : ", score / step) 167 | 168 | def rand(env, step_size): 169 | step = 0 170 | score = 0 171 | 172 | while step < step_size: 173 | rewards = [] 174 | actions = [] 175 | 176 | for i in range(env.num_server): 177 | env.servers[i].crowd = 1 178 | for i in range(env.num_vehicle): 179 | action = np.random.randint(0, env.num_server) 180 | actions.append(action) 181 | env.servers[action].crowd += 1 182 | for i in range(env.num_vehicle): 183 | reward = env.calculate_reward(i, actions[i], 0.) 184 | rewards.append(reward / 100) 185 | env.update_vehicle() 186 | env.update_task() 187 | score += np.mean(rewards) 188 | step += 1 189 | if step % 1000 == 0: 190 | print(step, " : ", score / step) 191 | 192 | def seed_torch(seed): 193 | torch.manual_seed(seed) 194 | if torch.backends.cudnn.enabled: 195 | torch.backends.cudnn.benchmark = False 196 | torch.backends.cudnn.deterministic = True 197 | 198 | if __name__ == '__main__': 199 | np.random.seed(seed) 200 | random.seed(seed) 201 | seed_torch(seed) 202 | 203 | config = yaml.load(open("./experiment.yaml"), Loader=yaml.FullLoader) 204 | 205 | env = environment.Env(**config["EnvironmentParams"], train=True) 206 | # test_env = environment.Env(**config["EnvironmentParams"], train=False) 207 | # model = TheModelClass(*args, **kwargs) 208 | # model.load_state_dict(torch.load(PATH)) 209 | # model.eval() 210 | global_Actor = Actor(**config["ActorParams"]) 211 | global_Critic = Critic(**config["CriticParams"]) 212 | global_Actor.share_memory() 213 | global_Critic.share_memory() 214 | 215 | isTrain = config.setdefault("isTrain", True) 216 | experiment_name = config.setdefault("experiment_name", "") 217 | episode_size = config.setdefault("episode_size", 1000) 218 | step_size = config.setdefault("step_size", 10000) 219 | batch_size = config.setdefault("batch_size", 128) 220 | discount_rate = config.setdefault("discount_rate", 0.99) 221 | print_reward_interval = config.setdefault("print_reward_interval", 1000) 222 | 223 | #nn(env, step_size) 224 | #rand(env, step_size) 225 | 226 | print("==========") 227 | print("Experiment: " + experiment_name) 228 | processes = [] 229 | process_num = 1 230 | mp.set_start_method("spawn") 231 | print("MP start method: ", mp.get_start_method()) 232 | print("==========") 233 | 234 | # p = mp.Process(target=test, args=(env, step_size, print_reward_interval, global_Actor)) 235 | # p.start() 236 | # processes.append(p) 237 | for rank in range(process_num): 238 | p = mp.Process(target=train, args=(config, env, rank, episode_size, step_size, batch_size, discount_rate, global_Actor, global_Critic)) 239 | p.start() 240 | processes.append(p) 241 | for p in processes: 242 | p.join() 243 | 244 | -------------------------------------------------------------------------------- /network.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class Actor(nn.Module): 6 | def __init__(self, state_space=None, action_space=None, num_hidden_layer=2, hidden_dim=None): 7 | super(Actor, self).__init__() 8 | 9 | # state_space, action_space check 10 | assert state_space is not None, "None state_space input: state_space should be assigned." 11 | assert action_space is not None, "None action_space input: action_space should be assigned." 12 | 13 | if hidden_dim is None: 14 | hidden_dim = state_space * 2 15 | 16 | self.layers = nn. ModuleList() 17 | self.layers.append(nn.Linear(state_space, hidden_dim)) # input layer 18 | for i in range(num_hidden_layer): # hidden layer 19 | self.layers.append(nn.Linear(hidden_dim, hidden_dim)) 20 | self.layers.append(nn.Linear(hidden_dim, action_space)) # output layer 21 | 22 | def forward(self, x): 23 | for layer in self.layers[:-1]: 24 | x = F.relu(layer(x)) 25 | out = F.softmax(self.layers[-1](x)) 26 | return out 27 | 28 | class Critic(nn.Module): 29 | def __init__(self, state_space=None, num_hidden_layer=2, hidden_dim=None): 30 | super(Critic, self).__init__() 31 | 32 | # state_space check 33 | assert state_space is not None, "None state_space input: state_space should be assigned." 34 | 35 | if hidden_dim is None: 36 | hidden_dim = state_space * 2 37 | 38 | self.layers = nn.ModuleList() 39 | self.layers.append(nn.Linear(state_space, hidden_dim)) # input layer 40 | for i in range(num_hidden_layer): # hidden layer 41 | self.layers.append(nn.Linear(hidden_dim, hidden_dim)) 42 | self.layers.append(nn.Linear(hidden_dim,1 )) # output layer 43 | 44 | def forward(self, x): 45 | for layer in self.layers[:-1]: 46 | x = F.relu(layer(x)) 47 | out = self.layers[-1](x) 48 | return out 49 | -------------------------------------------------------------------------------- /new_main.py: -------------------------------------------------------------------------------- 1 | import environment 2 | import yaml 3 | import torch 4 | import torch.multiprocessing as mp 5 | import torch.optim as optim 6 | import torch.nn.functional as F 7 | from torch.distributions import Categorical 8 | import random 9 | import numpy as np 10 | from new_network import Actor, Critic 11 | 12 | seed = 1 13 | max_step = 5000 14 | 15 | def train(config, env, rank, episode_size, batch_size, discount_rate, global_Actor, global_Critic): 16 | torch.manual_seed(seed+rank) 17 | 18 | local_Actor = Actor(**config["ActorParams"]) 19 | local_Critic = Critic(**config["CriticParams"]) 20 | local_Actor.load_state_dict(global_Actor.state_dict()) 21 | local_Critic.load_state_dict(global_Critic.state_dict()) 22 | actor_optimizer = optim.Adam(global_Actor.parameters(), lr=1e-4) 23 | critic_optimizer = optim.Adam(global_Critic.parameters(), lr=1e-4) 24 | 25 | batch = [] 26 | 27 | for epi in range(episode_size): 28 | env.update = 1 29 | state = env.construct_state() 30 | 31 | score = 0 32 | step = 0 33 | 34 | while step < max_step: 35 | # Get action 36 | action_prob, assign_prob = local_Actor(torch.FloatTensor(state)) #action_prob: (V, S), assign_prob: (V, 1) 37 | action_dist = Categorical(action_prob) 38 | action = action_dist.sample() # server index : 0~ 39 | next_state, reward = env.step(action, assign_prob) 40 | done = np.zeros_like(reward) if len(batch) == batch_size - 1 else np.ones_like(reward) 41 | action_prob_temp = [] 42 | for i in range(len(action)): 43 | action_prob_temp.append(action_prob[i][action[i]]) 44 | reward[i] /= 100 45 | 46 | batch.append([state, next_state, reward, action_prob_temp, done]) 47 | 48 | if len(batch) >= batch_size: 49 | state_buffer = [] 50 | next_state_buffer = [] 51 | reward_buffer = [] 52 | action_prob_buffer = [] 53 | done_buffer = [] 54 | 55 | for item in batch: 56 | state_buffer.append(item[0]) 57 | next_state_buffer.append(item[1]) 58 | reward_buffer.append(item[2]) 59 | action_prob_buffer.append(item[3]) 60 | done_buffer.append(item[4]) 61 | 62 | state_buffer = torch.FloatTensor(state_buffer) # (batch_size, V, state_size) 63 | next_state_buffer = torch.FloatTensor(next_state_buffer) 64 | reward_buffer = torch.FloatTensor(reward_buffer).unsqueeze(-1) # (batch_size, V, 1) 65 | done_buffer = torch.FloatTensor(done_buffer).unsqueeze(-1) # (batch_size, V, 1) 66 | 67 | value_state = local_Critic(state_buffer).squeeze(1) # (batch_size, V, 1) 68 | value_next_state = local_Critic(next_state_buffer).squeeze(1) # (batch_size, V, 1) 69 | Q = reward_buffer + discount_rate * value_next_state * done_buffer 70 | A = Q - value_state 71 | 72 | # update Critic 73 | critic_optimizer.zero_grad() 74 | critic_loss = F.mse_loss(value_state, Q.detach()) # constant 75 | critic_loss.backward(retain_graph=True) 76 | for global_param, local_param in zip(global_Critic.parameters(), local_Critic.parameters()): 77 | global_param._grad = local_param.grad 78 | critic_optimizer.step() 79 | 80 | # update Actor 81 | actor_optimizer.zero_grad() 82 | actor_loss = 0 83 | for idx, prob in enumerate(action_prob_buffer): 84 | for i in range(len(prob)): 85 | actor_loss += -A[idx][i] * torch.log(prob[i]) 86 | actor_loss /= len(action_prob_buffer) 87 | actor_loss.backward() 88 | 89 | for global_param, local_param in zip(global_Actor.parameters(), local_Actor.parameters()): 90 | global_param._grad = local_param.grad 91 | actor_optimizer.step() 92 | 93 | local_Actor.load_state_dict(global_Actor.state_dict()) 94 | local_Critic.load_state_dict(global_Critic.state_dict()) 95 | batch = [] 96 | 97 | state = next_state 98 | score += np.mean(reward) 99 | step += 1 100 | if (step % 1000 == 0): 101 | print("Episode: ", epi, " Step: ", step, " Reward: ", score/step) 102 | 103 | 104 | def test(env, print_reward_interval, global_Actor): 105 | iteration = 1 106 | while True: 107 | score = 0 108 | step = 0 109 | env.update = 1 110 | state = env.construct_state() 111 | 112 | while step < max_step: 113 | action_prob = global_Actor(torch.FloatTensor(state)) 114 | action_dist = Categorical(action_prob) 115 | action = action_dist.sample() 116 | 117 | next_state, reward = env.step(action) 118 | state = next_state 119 | score += np.mean(reward) 120 | step += 1 121 | 122 | if step % print_reward_interval == 0: 123 | print("Iteration: ", iteration, " Step: ", step, " Reward: ", score/step) 124 | iteration += 1 125 | 126 | def seed_torch(seed): 127 | torch.manual_seed(seed) 128 | if torch.backends.cudnn.enabled: 129 | torch.backends.cudnn.benchmark = False 130 | torch.backends.cudnn.deterministic = True 131 | 132 | if __name__ == '__main__': 133 | np.random.seed(seed) 134 | random.seed(seed) 135 | seed_torch(seed) 136 | 137 | config = yaml.load(open("./experiment.yaml"), Loader=yaml.FullLoader) 138 | 139 | env = environment.Env(**config["EnvironmentParams"], train=True) 140 | #test_env = environment.Env(**config["EnvironmentParams"], train=False) 141 | global_Actor = Actor(**config["ActorParams"]) 142 | global_Critic = Critic(**config["CriticParams"]) 143 | global_Actor.share_memory() 144 | global_Critic.share_memory() 145 | 146 | isTrain = config.setdefault("isTrain", True) 147 | experiment_name = config.setdefault("experiment_name", "") 148 | episode_size = config.setdefault("episode_size", 1000) 149 | step_size = config.setdefault("step_size", 10000) 150 | batch_size = config.setdefault("batch_size", 128) 151 | discount_rate = config.setdefault("discount_rate", 0.99) 152 | print_reward_interval = config.setdefault("print_reward_interval", 1000) 153 | 154 | print("==========") 155 | print("Experiment: " + experiment_name) 156 | processes = [] 157 | process_num = 1 158 | mp.set_start_method("spawn") 159 | print("MP start method: ", mp.get_start_method()) 160 | print("==========") 161 | 162 | #p = mp.Process(target=test, args=(test_env, print_reward_interval, global_Actor)) 163 | #p.start() 164 | #processes.append(p) 165 | for rank in range(process_num): 166 | p = mp.Process(target=train, args=(config, env, rank, episode_size, batch_size, discount_rate, global_Actor, global_Critic)) 167 | p.start() 168 | processes.append(p) 169 | for p in processes: 170 | p.join() 171 | 172 | -------------------------------------------------------------------------------- /new_network.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class Actor(nn.Module): 6 | def __init__(self, state_space=None, action_space=None, num_hidden_layer=2, hidden_dim=None): 7 | super(Actor, self).__init__() 8 | 9 | # state_space, action_space check 10 | assert state_space is not None, "None state_space input: state_space should be assigned." 11 | assert action_space is not None, "None action_space input: action_space should be assigned." 12 | 13 | if hidden_dim is None: 14 | hidden_dim = state_space * 2 15 | 16 | self.layers = nn. ModuleList() 17 | self.layers.append(nn.Linear(state_space, hidden_dim)) # input layer 18 | for i in range(num_hidden_layer): # hidden layer 19 | self.layers.append(nn.Linear(hidden_dim, hidden_dim)) 20 | 21 | self.actor_softmax = nn.Linear(hidden_dim, action_space) #which server to offload 22 | self.actor_sigmoid = nn.Linear(hidden_dim, 1) # partial offloading 23 | #self.layers.append(nn.Linear(hidden_dim, action_space)) # which server to offload 24 | 25 | def forward(self, x): 26 | for layer in self.layers[:]: 27 | x = F.relu(layer(x)) 28 | softmax = F.softmax(self.actor_softmax(x)) 29 | sigmoid = F.sigmoid(self.actor_sigmoid(x)) 30 | return softmax, sigmoid 31 | 32 | class Critic(nn.Module): 33 | def __init__(self, state_space=None, num_hidden_layer=2, hidden_dim=None): 34 | super(Critic, self).__init__() 35 | 36 | # state_space check 37 | assert state_space is not None, "None state_space input: state_space should be assigned." 38 | 39 | if hidden_dim is None: 40 | hidden_dim = state_space * 2 41 | 42 | self.layers = nn.ModuleList() 43 | self.layers.append(nn.Linear(state_space, hidden_dim)) # input layer 44 | for i in range(num_hidden_layer): # hidden layer 45 | self.layers.append(nn.Linear(hidden_dim, hidden_dim)) 46 | self.layers.append(nn.Linear(hidden_dim,1 )) # output layer 47 | 48 | def forward(self, x): 49 | for layer in self.layers[:-1]: 50 | x = F.relu(layer(x)) 51 | out = self.layers[-1](x) 52 | return out 53 | -------------------------------------------------------------------------------- /simulate_task.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import csv 3 | import random 4 | import numpy as np 5 | from PyQt5.QtWidgets import ( 6 | QApplication, QWidget, QLabel, QLineEdit, QPushButton, QVBoxLayout, QHBoxLayout 7 | ) 8 | 9 | class Simulator(QWidget): 10 | def __init__(self): 11 | super().__init__() 12 | self.SimulatorUI() 13 | 14 | def SimulatorUI(self): 15 | # task 16 | self.lb1 = QLabel("Number of Vehicles") 17 | self.lb2 = QLabel("Length of Timestep") 18 | self.input1 = QLineEdit(self) 19 | self.input2 = QLineEdit(self) 20 | self.btnSimulateTask = QPushButton("Simulate Tasks") 21 | self.btnSimulateTask.pressed.connect(self.simulate_tasks) 22 | 23 | vbox1 = QVBoxLayout() 24 | vbox1.addWidget(self.lb1) 25 | vbox1.addWidget(self.input1) 26 | vbox1.addWidget(self.lb2) 27 | vbox1.addWidget(self.input2) 28 | vbox1.addWidget(self.btnSimulateTask) 29 | 30 | self.setLayout(vbox1) 31 | self.setWindowTitle("My Simulator") 32 | self.move(400, 100) 33 | self.resize(200, 200) 34 | self.show() 35 | 36 | def simulate_tasks(self): 37 | self.vehicles = int(self.input1.text()) 38 | self.timestep = int(self.input2.text()) 39 | 40 | f = open("simulated_tasks.csv", "w", newline="") 41 | wr = csv.writer(f) 42 | wr.writerow(["Timestamp", "Vehicle ID", "Threshold", "Input", "Computation", "Energy Weight"]) 43 | 44 | for i in range(self.vehicles): 45 | for j in range(self.timestep): 46 | data_per_task = [] 47 | data_per_task.append(j + 1) #timestamp 48 | data_per_task.append(i + 1) #vehicle_id 49 | data_per_task.append(np.random.normal(5, 1)) #threshold 50 | data_per_task.append(random.randint(1, 100)) #input 51 | data_per_task.append(random.randint(1,100)) #computation 52 | data_per_task.append(random.random()) #weight(0~1) 53 | 54 | wr.writerow(data_per_task) 55 | 56 | if __name__ == "__main__": 57 | app = QApplication(sys.argv) 58 | ex = Simulator() 59 | sys.exit(app.exec_()) 60 | -------------------------------------------------------------------------------- /temp_env.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import math 4 | 5 | # Parameter 6 | # Distance = ~10000m 7 | # 8 | VEH_COMP_RESOURCE = 50 #(MHz) 9 | VEH_TRAN_POWER = 1000 #scaling #0.1(W) 10 | VEC_COMP_RESOURCE = 6300 #(MHz) 11 | VEC_POWER = 0.007 #(W) 12 | BANDWIDTH = 5 #(MHz) 13 | PATH_FADE = 1.75 #scaling #3.75 14 | KAPPA = 10 ** -6 #원래 10^-11~10^-27 (결과에 따라 scaling할것) 15 | 16 | class Vehicle: 17 | def __init__(self, id, distance, velocity): 18 | self.id = id 19 | self.distance = distance 20 | self.v = velocity 21 | self.comp = np.random.normal(VEH_COMP_RESOURCE, 3) 22 | self.tran = np.random.normal(VEH_TRAN_POWER, 10) 23 | 24 | class Task: 25 | def __init__(self, vehicle, threshold, input, comp, e_weight): 26 | self.vehicle = vehicle 27 | self.threshold = threshold 28 | self.input = input 29 | self.comp = comp 30 | self.e_weight = e_weight 31 | 32 | class Server: 33 | def __init__(self, id): 34 | self.id = id 35 | self.comp = np.random.normal(VEC_COMP_RESOURCE, 70) 36 | self.power = np.random.normal(VEC_POWER, 0.002) 37 | self.crowd = 1 #init as 1 (N_j) 38 | 39 | class Env: 40 | def __init__(self, nv, ns, load_vehicle_position, load_task_position): 41 | self.num_vehicle = nv 42 | self.vehicles = [] 43 | self.num_server = ns 44 | self.servers = [] 45 | self.tasks = [] 46 | 47 | self.update = 1 48 | 49 | # .csv파일에서 vehicle 불러오기 50 | self.vehicle_data = pd.read_csv(load_vehicle_position) 51 | self.vehicle_data.set_index("TIMESTAMP", inplace=True) 52 | self.update_vehicle() 53 | 54 | # .csv파일에서 task 불러오기 55 | self.task_data = pd.read_csv(load_task_position) 56 | self.task_data.set_index("Timestamp", inplace=True) 57 | self.update_task() 58 | 59 | # server 불러오기 60 | for s in range(self.num_server): 61 | self.servers.append(Server(id=s+1)) 62 | 63 | def update_vehicle(self): 64 | sub_data = self.vehicle_data.loc[self.update] 65 | sub_list = sub_data.values 66 | for d in sub_list: 67 | if self.update == 1: 68 | distance_vector = [] 69 | for i in range(self.num_server): 70 | distance_vector.append(d[2+i]) 71 | self.vehicles.append(Vehicle(id=d[0], velocity=d[1], distance=distance_vector)) 72 | else: 73 | for v in self.vehicles: 74 | if d[0] != v.id: 75 | continue 76 | else: 77 | distance_vector = [] 78 | for i in range(self.num_server): 79 | distance_vector.append(d[2+i]) 80 | v.distance = distance_vector 81 | v.v = d[1] 82 | 83 | def update_task(self): 84 | sub_data = self.task_data.loc[self.update] 85 | sub_list = sub_data.values 86 | self.tasks = [] 87 | for d in sub_list: 88 | self.tasks.append(Task(vehicle=d[0], threshold=d[1], input=d[2], comp=d[3], e_weight=d[4])) 89 | self.update += 1 90 | 91 | def construct_state(self): 92 | """ 93 | Constructs the state to be exploited by the algorithms. 94 | Returns state vector as an input to the RL model calculated for each vehicle. 95 | * Prerequisite: update_vehicle(), update_task() 96 | """ 97 | state_vector = [] 98 | 99 | for v in range(self.num_vehicle): 100 | # 논문 순서따름: threshold, velocity, x_i, y_i, distance, N_j 101 | # (논문 수정하기: GPS point --> distance btwn vehicles and servers) 102 | # (논문 수정하기: 1*26 1-dim. vector) 103 | state_vector_by_vehicle = [] 104 | 105 | local_time, local_energy = self.get_local_computing(v+1) 106 | state_vector_by_vehicle.append(local_time) 107 | state_vector_by_vehicle.append(local_energy) 108 | for s in range(self.num_server): 109 | remote_time, remote_energy = self.get_remote_computing(v+1, s+1) 110 | state_vector_by_vehicle.append(remote_time) 111 | state_vector_by_vehicle.append(remote_energy) 112 | 113 | state_vector.append(state_vector_by_vehicle) 114 | return state_vector 115 | 116 | def get_max_tolerance(self, v, s): # Eq 1,2 # ID starts from 1 117 | #todo: .csv speed error --> stay_time~N(5,1) 118 | stay_time = 2 * self.vehicles[v-1].distance[s-1] / self.vehicles[v-1].v 119 | return min(stay_time, self.tasks[v-1].threshold) 120 | 121 | def get_transmission_rate(self, v, s): 122 | shared_bandwidth = BANDWIDTH / self.servers[s-1].crowd 123 | log = self.vehicles[v-1].tran * ((self.vehicles[v-1].distance[s-1] / 1000) ** (-PATH_FADE)) 124 | log /= self.servers[s-1].crowd 125 | return shared_bandwidth * math.log2(log+1) 126 | 127 | def get_local_computing(self, v): 128 | time = self.tasks[v-1].comp / self.vehicles[v].comp 129 | energy = KAPPA * (self.vehicles[v].comp ** 2) * self.tasks[v-1].comp 130 | return time, energy 131 | 132 | def get_remote_computing(self, v, s): 133 | trans = self.tasks[v-1].input / self.get_transmission_rate(v,s) 134 | comp = self.tasks[v-1].comp / (self.servers[s-1].comp / self.servers[s-1].crowd) 135 | time = trans + comp 136 | energy = self.vehicles[v-1].tran * (10 ** -4) * trans + self.servers[s-1].power * comp # ~0.01 137 | # print(time, energy) 138 | return time, energy 139 | 140 | def calculate_reward(self, vehicle, action): # 논문 수정하기 / 수식 이상함 141 | """ 142 | Calculates the reward based on the action of the vehicle. 143 | """ 144 | # reward = self.get_max_tolerance(vehicle, action[1]) 145 | # local_time, local_energy = self.get_local_computing(vehicle) 146 | # remote_time, remote_energy = self.get_remote_computing(vehicle, action[1]) 147 | # time = (1-self.tasks[vehicle].e_weight) * (action[0] * local_time + (1-action[0]) * remote_time) 148 | # energy = self.tasks[vehicle].e_weight * (action[0] * local_energy + (1-action[0]) * remote_energy) 149 | # return reward - time - energy 150 | 151 | local_time, local_energy = self.get_local_computing(vehicle + 1) 152 | # print(local_time, local_energy) 153 | return -1 * (action[0] - local_time) ** 2 154 | # return action[0] 155 | 156 | def train_step(self): 157 | """ 158 | Step function of the environment. 159 | Calculates the rewards based on the action taken by the vehicles. 160 | :return: 161 | rewards: concatenated reward of each vehicle for the taken actions 162 | """ 163 | rews = np.zeros(self.num_vehicle) 164 | 165 | state = self.construct_state() 166 | # print() 167 | # for s in state: 168 | # print(s[0], s[1]) 169 | # print() 170 | 171 | # print(len(state), len(state[0]), state[0]) 172 | action = [] 173 | for i in range(self.num_vehicle): 174 | action.append([0, 0]) 175 | # action = np.zeros((self.num_vehicle, 2)) # 논문 수정하기: action = [float, int] (vehicle, #server) 176 | for v in range(self.num_vehicle): 177 | #action[v] = model.infer_action() #TODO 178 | rews[v] = self.calculate_reward(v, action[v]) 179 | self.update_vehicle() 180 | self.update_task() 181 | return rews 182 | --------------------------------------------------------------------------------