├── radio_map ├── AABB_plot.m └── map_data.mat ├── _doc ├── simulation_fig.png └── simulation_fig2.png ├── plot_figure ├── FIGURE_2.m ├── FIGURE_1.m ├── FIGURE_5.m ├── FIGURE_4.m └── FIGURE_3.m ├── algorithm ├── MA-TD3_core.py ├── MA-TD3_main.py ├── MA-PPO_main.py ├── MA-SAC_main.py └── MA-DDPG_main.py ├── tradition_baseline ├── A_search.py ├── fig5.m ├── fig3.m └── pso.py ├── README.md └── environment ├── environment.yaml └── requirements.txt /radio_map/AABB_plot.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lry-bupt/MAAC_DRL/HEAD/radio_map/AABB_plot.m -------------------------------------------------------------------------------- /radio_map/map_data.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lry-bupt/MAAC_DRL/HEAD/radio_map/map_data.mat -------------------------------------------------------------------------------- /_doc/simulation_fig.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lry-bupt/MAAC_DRL/HEAD/_doc/simulation_fig.png -------------------------------------------------------------------------------- /_doc/simulation_fig2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lry-bupt/MAAC_DRL/HEAD/_doc/simulation_fig2.png -------------------------------------------------------------------------------- /plot_figure/FIGURE_2.m: -------------------------------------------------------------------------------- 1 | clc; 2 | clear all; 3 | close all; 4 | X=[1:1:400]; 5 | TD3_avg=importdata('C:\Users\Administrator.DESKTOP-NLH290A\Desktop\2302_code\FIGURE_2\TD3.txt'); 6 | % TD3_2=importdata('C:\Users\Administrator.DESKTOP-NLH290A\Desktop\2302_code\FIGURE_1\MATLAB\TD3-2.txt'); 7 | % TD3_3=importdata('C:\Users\Administrator.DESKTOP-NLH290A\Desktop\2302_code\FIGURE_1\MATLAB\TD3-2.txt'); 8 | % TD3_avg=(TD3_1+TD3_2+TD3_3)/3; 9 | 10 | % importdata('C:\Users\Administrator.DESKTOP-NLH290A\Desktop\2302_code\FIGURE_1\MATLAB\TD3-avg.txt'); 11 | PPO_avg=importdata('C:\Users\Administrator.DESKTOP-NLH290A\Desktop\2302_code\FIGURE_2\PPO.txt'); 12 | DDPG_avg=importdata('C:\Users\Administrator.DESKTOP-NLH290A\Desktop\2302_code\FIGURE_2\DDPG.txt'); 13 | SAC_avg=importdata('C:\Users\Administrator.DESKTOP-NLH290A\Desktop\2302_code\FIGURE_2\SAC.txt'); 14 | RANDOM_avg=ones(400)*TD3_avg(1); 15 | % PPO_avg(1)=TD3_avg(1); 16 | % DDPG_avg(20)=TD3_avg(1); 17 | % SAC_avg(20)=TD3_avg(1); 18 | 19 | p1=plot(X, TD3_avg(20:419), '-p', 'MarkerSize',5, 'LineWidth',1.2,'Color',[0.00,0.45,0.74],'MarkerIndices',1:100:400); 20 | hold on 21 | p2=plot(X, PPO_avg(1:400), '-o', 'MarkerSize',5, 'LineWidth',1.2,'Color',[0.47,0.67,0.19],'MarkerIndices',1:100:400); 22 | hold on 23 | p3=plot(X, DDPG_avg(20:419), '-.*', 'MarkerSize',5, 'LineWidth',1.5,'Color',[1 0.54902 0],'MarkerIndices',1:100:400); 24 | hold on 25 | p4=plot(X, SAC_avg(20:419), '-', 'MarkerSize',5, 'LineWidth',1.2,'Color',[0.50,0.50,0.50],'MarkerIndices',1:100:400); 26 | hold on 27 | p5=plot(X, RANDOM_avg(1:400), '--', 'MarkerSize',5, 'LineWidth',1.2,'Color',[0.72,0.27,1.00],'MarkerIndices',1:100:400); 28 | % p1.MarkerIndices = 400:500:length(y1_ping); 29 | legend([p1 p4 p3 p2 p5 ],{'TD3','SAC','DDPG','PPO','Random'},'Location','SouthEast','Interpreter','latex') 30 | xlabel('Episode','Interpreter','latex') 31 | ylabel('Reward','Interpreter','latex') 32 | ylim([-900, 0]) -------------------------------------------------------------------------------- /plot_figure/FIGURE_1.m: -------------------------------------------------------------------------------- 1 | clc; 2 | clear all; 3 | close all; 4 | X=[1:1:400]; 5 | TD3_1=importdata('C:\Users\Administrator.DESKTOP-NLH290A\Desktop\2302_code\FIGURE_1\MATLAB\TD3-1.txt'); 6 | TD3_2=importdata('C:\Users\Administrator.DESKTOP-NLH290A\Desktop\2302_code\FIGURE_1\MATLAB\TD3-2.txt'); 7 | TD3_3=importdata('C:\Users\Administrator.DESKTOP-NLH290A\Desktop\2302_code\FIGURE_1\MATLAB\TD3-2.txt'); 8 | TD3_avg=(TD3_1+TD3_2+TD3_3)/3; 9 | 10 | % importdata('C:\Users\Administrator.DESKTOP-NLH290A\Desktop\2302_code\FIGURE_1\MATLAB\TD3-avg.txt'); 11 | PPO_avg=importdata('C:\Users\Administrator.DESKTOP-NLH290A\Desktop\2302_code\FIGURE_1\MATLAB\PPO-avg.txt'); 12 | DDPG_avg=importdata('C:\Users\Administrator.DESKTOP-NLH290A\Desktop\2302_code\FIGURE_1\MATLAB\DDPG-avg.txt'); 13 | SAC_avg=importdata('C:\Users\Administrator.DESKTOP-NLH290A\Desktop\2302_code\FIGURE_1\MATLAB\SAC-avg.txt'); 14 | RANDOM_avg=ones(400)*TD3_avg(1); 15 | PPO_avg(1)=TD3_avg(1); 16 | DDPG_avg(20)=TD3_avg(1); 17 | SAC_avg(20)=TD3_avg(1); 18 | 19 | p1=plot(X, TD3_avg(20:419), '-p', 'MarkerSize',5, 'LineWidth',1.2,'Color',[0.00,0.45,0.74],'MarkerIndices',1:100:400); 20 | hold on 21 | p2=plot(X, PPO_avg(1:400), '-o', 'MarkerSize',5, 'LineWidth',1.2,'Color',[0.47,0.67,0.19],'MarkerIndices',1:100:400); 22 | hold on 23 | p3=plot(X, DDPG_avg(20:419), '-.*', 'MarkerSize',5, 'LineWidth',1.5,'Color',[1 0.54902 0],'MarkerIndices',1:100:400); 24 | hold on 25 | p4=plot(X, SAC_avg(20:419), '-', 'MarkerSize',5, 'LineWidth',1.2,'Color',[0.50,0.50,0.50],'MarkerIndices',1:100:400); 26 | hold on 27 | p5=plot(X, RANDOM_avg(1:400), '--', 'MarkerSize',5, 'LineWidth',1.2,'Color',[0.72,0.27,1.00],'MarkerIndices',1:100:400); 28 | % p1.MarkerIndices = 400:500:length(y1_ping); 29 | legend([p1 p4 p3 p2 p5 ],{'TD3','SAC','DDPG','PPO','Random'},'Location','SouthEast','Interpreter','latex') 30 | xlabel('Episode','Interpreter','latex') 31 | ylabel('Reward','Interpreter','latex') 32 | ylim([-1500, 0]) -------------------------------------------------------------------------------- /algorithm/MA-TD3_core.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.signal 3 | 4 | import torch 5 | import torch.nn as nn 6 | 7 | 8 | def combined_shape(length, shape=None): 9 | if shape is None: 10 | return (length,) 11 | return (length, shape) if np.isscalar(shape) else (length, *shape) 12 | 13 | def mlp(sizes, activation, output_activation=nn.Identity): 14 | layers = [] 15 | for j in range(len(sizes)-1): 16 | act = activation if j < len(sizes)-2 else output_activation 17 | layers += [nn.Linear(sizes[j], sizes[j+1]), act()] 18 | return nn.Sequential(*layers) 19 | 20 | def count_vars(module): 21 | return sum([np.prod(p.shape) for p in module.parameters()]) 22 | 23 | class MLPActor(nn.Module): 24 | 25 | def __init__(self, obs_dim, act_dim, hidden_sizes, activation): 26 | super().__init__() 27 | pi_sizes = [obs_dim] + list(hidden_sizes) + [act_dim] 28 | self.pi = mlp(pi_sizes, activation, nn.Tanh) 29 | 30 | def forward(self, obs): 31 | # Return output from network scaled to action space limits. 32 | return self.pi(obs) 33 | 34 | class MLPQFunction(nn.Module): 35 | 36 | def __init__(self, obs_dim, act_dim, hidden_sizes, activation): 37 | super().__init__() 38 | self.q = mlp([obs_dim + act_dim] + list(hidden_sizes) + [1], activation) 39 | 40 | def forward(self, obs, act): 41 | q = self.q(torch.cat([obs, act], dim=-1)) 42 | return torch.squeeze(q, -1) # Critical to ensure q has right shape. 43 | 44 | class MLPActorCritic(nn.Module): 45 | 46 | def __init__(self, obs_dim, act_dim, hidden_sizes=(256,256), 47 | activation=nn.ReLU): 48 | super().__init__() 49 | 50 | # build policy and value functions 51 | self.pi = MLPActor(obs_dim, act_dim, hidden_sizes, activation) 52 | self.q1 = MLPQFunction(obs_dim, act_dim, hidden_sizes, activation) 53 | self.q2 = MLPQFunction(obs_dim, act_dim, hidden_sizes, activation) 54 | 55 | def act(self, obs): 56 | with torch.no_grad(): 57 | return self.pi(obs).numpy() 58 | -------------------------------------------------------------------------------- /plot_figure/FIGURE_5.m: -------------------------------------------------------------------------------- 1 | X_1 = [0.02, 0.04, 0.06, 0.08, 0.1]; 2 | Y1_NOMA = [-1.333717993, -7.825136502, -10.44519537, -13.65757053, -16]; 3 | Y1_OMA = [-1.42E-05,-0.025458926,-0.356210216,-0.629268766,-0.818900052]; 4 | Y2_NOMA = [-0.271909959, -2.990650853,-5.692890644,-6.519637193,-9.841153601]; %FIX 5 | Y2_OMA = [-2.88E-03,-0.027303647,-0.147327698,-0.395702838,-0.600764221]; 6 | Y3_NOMA = [-0.002032929,-0.391495296,-1.822073156,-2.974095926,-7.442980097];%3 U 7 | Y3_OMA = [-2.98E-11,-1.93E-07,-8.47E-07,-1.86E-05,-8.91E-05]; 8 | Y4_NOMA = [-0.004375222,-1.439502907,-3.918489578,-7.066198785,-9.329658858];%0.01 9 | Y4_OMA = [-2.25E-06,-0.000636687,-0.071654114,-0.217504705,-0.407177846]; 10 | Y5_NOMA = [-0.004367413,-2.221372461,-4.209532276,-6.103669332,-7.890873251];%ddpg 11 | Y5_OMA = [-4.12E-11,-9.69E-05,-0.004444109,-0.037339037,-0.142955241]; 12 | Y6_NOMA = [-0.859490241,-2.221372461,-5.558103089,-7.355402684,-8.419060732];%2 envir 13 | Y6_OMA = [-2.57E-01,-0.906342075,-1.958136833,-2.708482246,-4.548873318]; 14 | 15 | % fig = figure; 16 | % left_color = [0 0 0]; 17 | % right_color = [0 0 0]; 18 | % set(fig,'defaultAxesColorOrder',[left_color; right_color]); 19 | 20 | %激活左侧 21 | % yyaxis left 22 | p1=plot(X_1,Y1_NOMA,'LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'k', 'MarkerFaceColor','b'); 23 | hold on 24 | p2=plot(X_1,Y1_OMA,'-o','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'k', 'MarkerFaceColor','b'); 25 | hold on 26 | % p3=plot(X_1,Y2_NOMA,'-^','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'k', 'MarkerFaceColor','b'); 27 | % ylabel('Averaged data rate recieved by each robot') 28 | % hold on 29 | % % yyaxis right 30 | % p4=plot(X_1,Y2_OMA,'LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'r', 'MarkerFaceColor','b'); 31 | % hold on 32 | % p5=plot(X_1,Y3_NOMA,'-o','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'r', 'MarkerFaceColor','b'); 33 | % hold on 34 | % p6=plot(X_1,Y3_OMA,'-^','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'r', 'MarkerFaceColor','b'); 35 | % hold on 36 | p1=plot(X_1,Y4_NOMA,'LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'k', 'MarkerFaceColor','b'); 37 | hold on 38 | p2=plot(X_1,Y4_OMA,'-o','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'k', 'MarkerFaceColor','b'); 39 | hold on 40 | p3=plot(X_1,Y5_NOMA,'-^','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'k', 'MarkerFaceColor','b'); 41 | ylabel('Averaged data rate recieved by each robot') 42 | hold on 43 | % yyaxis right 44 | p4=plot(X_1,Y5_OMA,'LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'r', 'MarkerFaceColor','b'); 45 | hold on 46 | p5=plot(X_1,Y6_NOMA,'-o','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'r', 'MarkerFaceColor','b'); 47 | hold on 48 | p6=plot(X_1,Y6_OMA,'-^','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'r', 'MarkerFaceColor','b'); 49 | hold on 50 | ylabel('Decoding Error Probability $\lg (\mathcal{P}_1)$') 51 | xlabel('$P_{\max}$') 52 | legend('UE distribution $1$, $\kappa_1=0.1$, MA-TD3, NOMA','UE distribution $1$, $\kappa_1=0.1$, MA-TD3, OMA','$UE distribution $1$, \kappa_1=0.01$, MA-TD3, NOMA','$UE distribution $1$, \kappa_1=0.01$, MA-TD3, OMA','$UE distribution $1$, \kappa_1=0.1$, MA-DDPG,NOMA','$UE distribution $1$, \kappa_1=0.1$, MA-DDPG,OMA','UE distribution $2$, $\kappa_1=0.1$, MA-TD3, NOMA','UE distribution $2$, $\kappa_1=0.1$, MA-TD3, OMA') 53 | % set(get(a(1),'Ylabel'),'String','Averaged data rate of each robot') 54 | % set(get(a(2),'Ylabel'),'String','Averaged arriving step of each robot') 55 | 56 | box on 57 | grid off 58 | -------------------------------------------------------------------------------- /tradition_baseline/A_search.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import heapq 3 | from scipy.io import loadmat 4 | import math 5 | 6 | x_max=99 7 | y_max=99 8 | m = loadmat("C://Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/mapdata_0717.mat") 9 | #correct_action=0 10 | MARK= m["MARK_new"] 11 | 12 | def generate_directions(num_directions): 13 | # 生成均匀分布的方向向量 14 | directions = [] 15 | angle_step = 360 / num_directions 16 | for i in range(num_directions): 17 | angle = math.radians(i * angle_step) 18 | directions.append((math.cos(angle), math.sin(angle))) 19 | return directions 20 | 21 | def is_valid(x, y, MARK, x_max, y_max): 22 | # Check if (x, y) is within bounds and not an obstacle 23 | return 0 <= int(x) < x_max and 0 <= int(y) < y_max and MARK[int(x), int(y)] != 2 24 | 25 | def heuristic(a, b): 26 | # Manhattan distance heuristic 27 | return abs(a[0] - b[0]) + abs(a[1] - b[1]) 28 | 29 | def a_star_search(start, goal, MARK, x_max, y_max): 30 | # A* search to find the shortest path from start to goal 31 | neighbors = [ (0, -1), (-1, 0),(1, 0), (0, 1)] 32 | # neighbors = generate_directions(8) 33 | close_set = set() 34 | came_from = {} 35 | gscore = {start: 0} 36 | fscore = {start: heuristic(start, goal)} 37 | oheap = [] 38 | 39 | heapq.heappush(oheap, (fscore[start], start)) 40 | 41 | while oheap: 42 | current = heapq.heappop(oheap)[1] 43 | 44 | if current == goal: 45 | data = [] 46 | while current in came_from: 47 | data.append(current) 48 | current = came_from[current] 49 | data.append(start) 50 | data.reverse() 51 | return data 52 | 53 | close_set.add(current) 54 | for i, j in neighbors: 55 | neighbor = current[0] + i, current[1] + j 56 | tentative_g_score = gscore[current] + 1 57 | 58 | if not is_valid(neighbor[0], neighbor[1], MARK, x_max, y_max): 59 | continue 60 | 61 | if neighbor in close_set and tentative_g_score >= gscore.get(neighbor, 0): 62 | continue 63 | 64 | if tentative_g_score < gscore.get(neighbor, 0) or neighbor not in [i[1] for i in oheap]: 65 | came_from[neighbor] = current 66 | gscore[neighbor] = tentative_g_score 67 | fscore[neighbor] = tentative_g_score + heuristic(neighbor, goal) 68 | heapq.heappush(oheap, (fscore[neighbor], neighbor)) 69 | 70 | return False 71 | 72 | def save_path_to_txt(path, filename): 73 | with open(filename, 'w') as f: 74 | for x, y in path: 75 | f.write(f"{x},{y}\n") 76 | 77 | # Define start, goal, and MARK matrix 78 | x1, y1 = 40, 40 # Starting point 79 | x2, y2 = 90, 90 # Goal point 80 | # x_max, y_max = 10, 10 # Grid size 81 | 82 | # Example MARK matrix with obstacles 83 | # MARK = np.zeros((x_max, y_max)) 84 | # MARK[4, 4] = 2 85 | # MARK[4, 5] = 2 86 | # MARK[4, 6] = 2 87 | # MARK[5, 4] = 2 88 | # MARK[6, 4] = 2 89 | 90 | start = (x1, y1) 91 | goal = (x2, y2) 92 | 93 | # Find path 94 | path = a_star_search(start, goal, MARK, x_max, y_max) 95 | 96 | # Save path to txt file 97 | if path: 98 | save_path_to_txt(path, 'robot_path_2_v2.txt') 99 | print("Path found and saved to robot_path.txt") 100 | else: 101 | print("No path found") 102 | -------------------------------------------------------------------------------- /tradition_baseline/fig5.m: -------------------------------------------------------------------------------- 1 | X_1 = [0.02, 0.04, 0.06, 0.08, 0.1]; 2 | Y1_NOMA = [-1.333717993, -7.825136502, -10.44519537, -13.65757053, -16]; 3 | Y1_OMA = [-1.42E-05,-0.025458926,-0.356210216,-0.629268766,-0.818900052]; 4 | Y2_NOMA = [-0.271909959, -2.990650853,-5.692890644,-6.519637193,-9.841153601]; %FIX 5 | Y2_OMA = [-2.88E-03,-0.027303647,-0.147327698,-0.395702838,-0.600764221]; 6 | Y3_NOMA = [-0.002032929,-0.391495296,-1.822073156,-2.974095926,-7.442980097];%3 U 7 | Y3_OMA = [-2.98E-11,-1.93E-07,-8.47E-07,-1.86E-05,-8.91E-05]; 8 | Y4_NOMA = [-0.004375222,-1.439502907,-3.918489578,-7.066198785,-9.329658858];%0.01 9 | Y4_OMA = [-2.25E-06,-0.000636687,-0.071654114,-0.217504705,-0.407177846]; 10 | Y5_NOMA = [-0.004367413,-2.221372461,-4.209532276,-6.103669332,-7.890873251];%ddpg 11 | Y5_OMA = [-4.12E-11,-9.69E-05,-0.004444109,-0.037339037,-0.142955241]; 12 | Y6_NOMA = [-0.859490241,-2.221372461,-5.558103089,-7.355402684,-8.419060732];%2 envir 13 | Y6_OMA = [-2.57E-01,-0.906342075,-1.958136833,-2.708482246,-4.548873318]; 14 | 15 | Optimal_NOMA = [-5.66606907, -15.94919966, -21.868942625, -25.36415256, -27.00059453]; 16 | 17 | % fig = figure; 18 | % left_color = [0 0 0]; 19 | % right_color = [0 0 0]; 20 | % set(fig,'defaultAxesColorOrder',[left_color; right_color]); 21 | 22 | %激活左侧 23 | % yyaxis left 24 | p1=plot(X_1,Y1_NOMA,'LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'k', 'MarkerFaceColor','b'); 25 | hold on 26 | p2=plot(X_1,Y1_OMA,'-o','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'k', 'MarkerFaceColor','b'); 27 | hold on 28 | % p3=plot(X_1,Y2_NOMA,'-^','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'k', 'MarkerFaceColor','b'); 29 | % ylabel('Averaged data rate recieved by each robot') 30 | % hold on 31 | % % yyaxis right 32 | % p4=plot(X_1,Y2_OMA,'LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'r', 'MarkerFaceColor','b'); 33 | % hold on 34 | % p5=plot(X_1,Y3_NOMA,'-o','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'r', 'MarkerFaceColor','b'); 35 | % hold on 36 | % p6=plot(X_1,Y3_OMA,'-^','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'r', 'MarkerFaceColor','b'); 37 | % hold on 38 | p1=plot(X_1,Y4_NOMA,'LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'k', 'MarkerFaceColor','b'); 39 | hold on 40 | p2=plot(X_1,Y4_OMA,'-o','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'k', 'MarkerFaceColor','b'); 41 | hold on 42 | p3=plot(X_1,Y5_NOMA,'-^','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'k', 'MarkerFaceColor','b'); 43 | ylabel('Averaged data rate recieved by each robot') 44 | hold on 45 | % yyaxis right 46 | p4=plot(X_1,Y5_OMA,'LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'r', 'MarkerFaceColor','b'); 47 | hold on 48 | p5=plot(X_1,Y6_NOMA,'-o','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'r', 'MarkerFaceColor','b'); 49 | hold on 50 | p6=plot(X_1,Y6_OMA,'-^','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'r', 'MarkerFaceColor','b'); 51 | hold on 52 | p7=plot(X_1, Optimal_NOMA,'-^','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'r', 'MarkerFaceColor','b'); 53 | hold on 54 | ylabel('Decoding Error Probability $\lg (\mathcal{P}_1)$') 55 | xlabel('$P_{\max}$') 56 | legend('UE distribution $1$, $\kappa_1=0.1$, MA-TD3, NOMA','UE distribution $1$, $\kappa_1=0.1$, MA-TD3, OMA','$UE distribution $1$, \kappa_1=0.01$, MA-TD3, NOMA','$UE distribution $1$, \kappa_1=0.01$, MA-TD3, OMA','$UE distribution $1$, \kappa_1=0.1$, MA-DDPG,NOMA','$UE distribution $1$, \kappa_1=0.1$, MA-DDPG,OMA','UE distribution $2$, $\kappa_1=0.1$, MA-TD3, NOMA','UE distribution $2$, $\kappa_1=0.1$, MA-TD3, OMA', 'Traditional Optimization Method') 57 | % set(get(a(1),'Ylabel'),'String','Averaged data rate of each robot') 58 | % set(get(a(2),'Ylabel'),'String','Averaged arriving step of each robot') 59 | 60 | box on 61 | grid off 62 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MAAC_DRL 2 | This repository contains the Python implementation of our submitted paper titled "Deep Reinforcement Learning Enables Joint Trajectory and Communication in Internet of Robotic Things" . 3 | ## Quick Links 4 | [[Installation]](#installation) [[Installation]](#installation) [[Usage]](#usage) 5 | ## Introduction 6 | We learn the multi-agent actor-critic deep reinforcement learning (MAAC-DRL) algorithms to reduce the decoding error rate and arriving time of robots in industrial Internet of Robotic Things (IoRT) with the requirements of ultra-reliable and low-latency communications. 7 | 8 | Here are the settings of the considered IoRT environment. 9 | | Notation | Simulation Value | Physical Meaning | 10 | | ------------ | ------------------ | ------------------------------------------------------------ | 11 | | $K$ | $\{2, 4, 6\}$ | the number of users | 12 | | $L$ | $\{2, 3\}$ | the number of antennas | 13 | | $K_{\rm MU}$ | $\{1, 2, 3\}$ | the number of robots | 14 | | $D$ | $100 \ {\rm bits}$ | packet size | 15 | | $M$ | $50 \ {\rm symbols}$ | the number of transmitted symbols | 16 | | $T_{\max}$ | $2000 \ {\rm s}$ | the moving deadline of robots | 17 | | $H_0$ | $1 \ {\rm m}$ | the height of antennas | 18 | | $P_{\max}$ | $[0.02, 0.1] \ {\rm W}$ | the maximal transmit power | 19 | | $\sigma^2$ | $-100 \ {\rm dBm/Hz}$ | the variance of the additive white Gaussian noise | 20 | | $v$ | $5 \ {\rm m/s}$ | the moving speed | 21 | 22 | 23 | 24 | ## Results 25 | 26 | 27 | 28 | 29 | 30 |
31 | 32 | For more details and simulation results, please check our paper. 33 | 34 | ## Installation 35 | Dependencies can be installed by Conda: 36 | 37 | For example to install env used for IoRT environments with URLLC requirements: 38 | ``` 39 | conda env create -f environment/environment.yml URLLC 40 | conda activate URLLC 41 | ``` 42 | 43 | Then activate it by 44 | ``` 45 | conda activate URLLC 46 | ``` 47 | To run on atari environment, please further install the considered environment by 48 | ``` 49 | pip install -r environment/requirements.txt 50 | ``` 51 | 52 | ## Usage 53 | 54 | Here are the parameters of our simulations. 55 | | Notation | Simulation Value | Physical Meaning | 56 | | ------------ | ------------------ | ------------------------------------------------------------ | 57 | | $lr$ | $\{10^{-4}, 2 \times 10^{-3}\}$ | the learning rate of the DRL algorithms | 58 | | $\kappa_1$ | $\{0, 0.01, 0.1\}$ | the parameters of the reward designs | 59 | | $\|\mathcal{D}_0\|$ | $128$ | the size of the mini-batch buffer | 60 | | $\|\mathcal{D}\|$ | $10^{6}$ | the maximal size of the experevce buffer | 61 | 62 | ### algorithm (`python codes of different MA-DRL algorithms`): 63 | - `'MA-DDPG_main.py'` (Main functions and MDP transitions of MA-DDPG) 64 | - `'MA-PPO_main.py'` (Main functions and MDP transitions of MA-PPO) 65 | - `'MA-SAC_main.py'` (Main functions and MDP transitions of MA-SAC) 66 | - `'MA-TD3_core.py'` (MLP operators of MA-TD3) 67 | - `'MA-TD3_main.py'` (Main functions and MDP transitions of MA-TD3) 68 | 69 | ### environment (`documents of considered system`): 70 | - `'environment.yaml'` (Conda environmental document) 71 | - `'requirements.txt'` (Pip environmental document) 72 | 73 | ### plot_figure (`matlab codes of different algorithms`): 74 | - `'FIGURE_1.m'` (Reward comparison under different MA-DRL algorithms) 75 | - `'FIGURE_2.m'` (Robots' trajectory comparison under different reward settings) 76 | - `'FIGURE_3.m'` (Average decoding error probability under different clustering and multiple access scheme) 77 | - `'FIGURE_4.m'` (Objective function under different environmental settings) 78 | - `'FIGURE_5.m'` (Arriving time under different environmental settings) 79 | 80 | ### radio_map (`documents of building environment`): 81 | - `'AABB_plot.m'` (Construct a radio map based on the deployment of obstacles and intersection detection) 82 | - `'map_data.mat'` (Raw data of the built radio map) 83 | 84 | -------------------------------------------------------------------------------- /plot_figure/FIGURE_4.m: -------------------------------------------------------------------------------- 1 | y1_0=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Downloads/matdisk/TD3-STEP/STEP_v1_agent1_0.txt").'; 2 | y2_0=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Downloads/matdisk/TD3-STEP/STEP_v1_agent1_1.txt").'; 3 | y3_0=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Downloads/matdisk/TD3-STEP/STEP_v1_agent1_2.txt").'; 4 | y4_0=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Downloads/matdisk/TD3-STEP/STEP_v1_agent1_3.txt").'; 5 | y5_0=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Downloads/matdisk/TD3-STEP/STEP_v1_agent1_4.txt").'; 6 | h4=cdfplot(y1_0(200:500)); 7 | hold on 8 | % h2=cdfplot(y2_0(200:500)); 9 | % hold on 10 | h2=cdfplot(y3_0(200:500)); 11 | % hold on 12 | % h4=cdfplot(y4_0(200:500)); 13 | % hold on 14 | h3=cdfplot(y5_0(200:500)); 15 | hold on 16 | 17 | y1_0=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Downloads/matdisk/TD3-STEP-v2/STEP_v1_agent1_0.txt").'; 18 | y2_0=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Downloads/matdisk/TD3-STEP-v2/STEP_v1_agent1_1.txt").'; 19 | y3_0=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Downloads/matdisk/TD3-STEP-v2/STEP_v1_agent1_2.txt").'; 20 | y4_0=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Downloads/matdisk/TD3-STEP-v2/STEP_v1_agent1_3.txt").'; 21 | y5_0=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Downloads/matdisk/TD3-STEP-v2/STEP_v1_agent1_4.txt").'; 22 | h5=cdfplot(y1_0(200:500)); 23 | hold on 24 | h6=cdfplot(y2_0(200:500)); 25 | hold on 26 | % h3=cdfplot(y3_0(200:500)); 27 | % hold on 28 | % h4=cdfplot(y4_0(200:500)); 29 | % hold on 30 | h1=cdfplot(y5_0(200:500)); 31 | hold on 32 | % h6=cdfplot(y4(1500:4999)); 33 | % hold on 34 | set(h1,'Color',[0,0,0],'LineWidth',1.2, 'MarkerSize',4,'MarkerIndices',1:60:300,'LineStyle','--') 35 | set(h2,'Color',[0,0,0],'Marker','^','LineWidth',1.2, 'MarkerSize',4,'MarkerIndices',1:60:300,'LineStyle','--') 36 | set(h3,'Color',[0,0,0],'Marker','*','LineWidth',1.2, 'MarkerSize',6,'MarkerIndices',1:60:300,'LineStyle','--') 37 | set(h4,'Color',[0,0,0],'LineWidth',1.2, 'MarkerSize',4,'MarkerIndices',1:60:300) 38 | set(h5,'Color',[0,0,0],'Marker','^','LineWidth',1.2, 'MarkerSize',4,'MarkerIndices',1:60:300) 39 | set(h6,'Color',[0,0,0],'Marker','*','LineWidth',1.2, 'MarkerSize',6,'MarkerIndices',1:60:300) 40 | 41 | ax1 = gca; 42 | % set(gca,'XAxisLocation','top') 43 | % set(gca,'YAxisLocation','right') 44 | 45 | xlim([100 350]); 46 | ylim([0 0.85]); 47 | xlabel('Arriving step','Interpreter','latex') 48 | ylabel('Cumulative distribution function','Interpreter','latex') 49 | set(gca,'XTickLabel',{'$90 \%$','$100 \%$','$110 \%$','$120 \%$','$130 \%$','$140 \%$'}); 50 | set(gca,'YTickLabel',{'0','0.2','0.4','0.6','0.8','1.0'}); 51 | set(gca,'xtick',100:250/5:350) 52 | set(gca,'ytick',0:0.85/5:0.85) 53 | 54 | % 去掉上面和右面边框上的刻度 保留边框 55 | % box off; 56 | grid off 57 | 58 | xl=xlim; 59 | yl=ylim; 60 | % line([xl(1),xl(2)],[yl(2),yl(2)],'color',[0 0 0]); %画上边框,线条的颜色设置为黑色 61 | % line([xl(2),xl(2)],[yl(1),yl(2)],'color',[0 0 0]); %画右边框 ,线条的颜色设置为黑色 62 | gs=legend([h1 h2 h3 h4 h5 h6],{'$P_{\max}=0.02, \kappa_1=0.1$','$P_{\max}=0.04, \kappa_1=0.1$','$P_{\max}=0.1, \kappa_1=0.1$','$P_{\max}=0.02, \kappa_1=0.01$','$P_{\max}=0.04, \kappa_1=0.01$'},'Interpreter','latex','Location','northwest','NumColumns',1); 63 | % set(gs,'Location',best) 64 | title(" ") 65 | 66 | % ax2=axes('Position',get(ax1,'Position'),... 67 | % 'XAxisLocation','top',... 68 | % 'YAxisLocation','left',... 69 | % 'Color','none',... 70 | % 'XColor','b','YColor','b'); 71 | % hold on 72 | % 73 | % X_1 = [0, 0.02, 0.04, 0.06, 0.08, 0.1]; 74 | % % Y1_1 = [73.32247929, 73.59625821, 73.87003713, 73.58897458, 73.54327335, 73.57012282, 73.48700371, 73.74321622, 73.56069694, 73.43016281, 73.41759497]; 75 | % % Y1_2 = [77.09425878, 77.96301057, 78.32762068, 78.77320765, 78.1179663, 78.80548415, 78.4038846, 78.67466438, 78.16966581, 78.41473865, 78.54641531]; 76 | % % Y1_3 = [82.97800628, 84.27606398, 84.41930877, 83.87746358, 83.93316195, 84.33647529, 84.48843188, 84.8706084, 84.52499286, 85.03199086, 84.51185376]; 77 | % Y2_1 = [70.94397619, 80.87746462, 83.66667014, 85.63058596, 86.90541567, 87.85830977, 88.64414142, 89.31973965, 89.89561792, 90.38694579, 90.87500257]; 78 | % Y2_2 = [70.96419081, 80.91497367, 83.69259198, 85.64478655, 86.8985237, 87.87413717, 88.68100766, 89.43362594, 89.9297314, 90.50355753, 90.8866812]; 79 | % Y2_3 = [70.99211407, 80.94282523, 83.75781121, 85.71406009, 87.01403988, 87.92457729, 88.7456004, 89.4045163, 90.00031461, 90.44857253, 90.92024851]; 80 | % p4=plot(X_1,Y2_1,'b:o','LineWidth',1.2, 'MarkerSize',4, 'MarkerEdgeColor', 'b', 'MarkerFaceColor','b'); 81 | % p5=plot(X_1,Y2_2,'b-.*','LineWidth',1.2, 'MarkerSize',6, 'MarkerEdgeColor', 'b', 'MarkerFaceColor','b'); 82 | % p6=plot(X_1,Y2_3,'b--^','LineWidth',1.2, 'MarkerSize',4, 'MarkerEdgeColor', 'b', 'MarkerFaceColor','b'); 83 | % ylabel('Average sum-rate','Interpreter','latex') 84 | % xlabel('Power budget $P_{\max}$','Interpreter','latex') 85 | % % legend([p4 p5 p6],{'$\kappa_1=0.0001$','$\kappa_1=0.002$','$\kappa_1=0.005$'},'Location','NorthWest','Interpreter','latex','NumColumns',1) 86 | % % legend('$\kappa_1=0.002$','$\kappa_1=0.005$','$\kappa_1=0.0001$') 87 | % xlim([0 1]); 88 | % % set(gca,'XTickLabel',{'0','0.2','0.4','0.6','0.8','1'}); 89 | % set(gca,'XAxisLocation','bottom') 90 | % set(gca,'xtick',0:0.2:1) 91 | % grid off 92 | -------------------------------------------------------------------------------- /algorithm/MA-TD3_main.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | import itertools 3 | import numpy as np 4 | import torch 5 | from torch.optim import Adam 6 | import core 7 | 8 | 9 | class ReplayBuffer: 10 | """ 11 | A simple FIFO experience replay buffer for TD3 agents. 12 | """ 13 | 14 | def __init__(self, obs_dim, act_dim, size): 15 | self.obs_buf = np.zeros(core.combined_shape(size, obs_dim), dtype=np.float32) 16 | self.obs2_buf = np.zeros(core.combined_shape(size, obs_dim), dtype=np.float32) 17 | self.act_buf = np.zeros(core.combined_shape(size, act_dim), dtype=np.float32) 18 | self.rew_buf = np.zeros(size, dtype=np.float32) 19 | self.done_buf = np.zeros(size, dtype=np.float32) 20 | self.ptr, self.size, self.max_size = 0, 0, size 21 | 22 | def store(self, obs, act, rew, next_obs, done): 23 | self.obs_buf[self.ptr] = obs 24 | self.obs2_buf[self.ptr] = next_obs 25 | self.act_buf[self.ptr] = act 26 | self.rew_buf[self.ptr] = rew 27 | self.done_buf[self.ptr] = done 28 | self.ptr = (self.ptr+1) % self.max_size 29 | self.size = min(self.size+1, self.max_size) 30 | 31 | def sample_batch(self, batch_size=32): 32 | idxs = np.random.randint(0, self.size, size=batch_size) 33 | batch = dict(obs=self.obs_buf[idxs], 34 | obs2=self.obs2_buf[idxs], 35 | act=self.act_buf[idxs], 36 | rew=self.rew_buf[idxs], 37 | done=self.done_buf[idxs]) 38 | return {k: torch.as_tensor(v, dtype=torch.float32) for k,v in batch.items()} 39 | 40 | class TD3: 41 | def __init__(self, obs_dim, act_dim, actor_critic=core.MLPActorCritic, 42 | replay_size=int(1e6), gamma=0.99, polyak=0.995, pi_lr=1e-3, q_lr=5e-4, 43 | act_noise=0.5, target_noise=0.25, noise_clip=0.5, policy_delay=2): 44 | self.obs_dim = obs_dim 45 | self.act_dim = act_dim 46 | self.gamma = gamma 47 | self.polyak = polyak 48 | self.act_noise = act_noise 49 | self.target_noise = target_noise 50 | self.noise_clip = noise_clip 51 | self.policy_delay = policy_delay 52 | self.replay_buffer = ReplayBuffer(obs_dim=obs_dim, act_dim=act_dim,size=replay_size) 53 | 54 | self.ac = actor_critic(obs_dim, act_dim) 55 | self.ac_targ = deepcopy(self.ac) 56 | 57 | for p in self.ac_targ.parameters(): 58 | p.requires_grad = False 59 | 60 | # List of parameters for both Q-networks (save this for convenience) 61 | self.q_params = itertools.chain(self.ac.q1.parameters(), self.ac.q2.parameters()) 62 | 63 | # Set up optimizers for policy and q-function 64 | self.pi_optimizer = Adam(self.ac.pi.parameters(), lr=pi_lr) 65 | self.q_optimizer = Adam(self.q_params, lr=q_lr) 66 | 67 | # Experience buffer 68 | replay_buffer = ReplayBuffer(obs_dim=obs_dim, act_dim=act_dim, size=replay_size) 69 | def compute_loss_q(self,data): 70 | o, a, r, o2, d = data['obs'], data['act'], data['rew'], data['obs2'], data['done'] 71 | 72 | q1 = self.ac.q1(o,a) 73 | q2 = self.ac.q2(o,a) 74 | 75 | # Bellman backup for Q functions 76 | with torch.no_grad(): 77 | pi_targ = self.ac_targ.pi(o2) 78 | 79 | # Target policy smoothing 80 | epsilon = torch.randn_like(pi_targ) * self.target_noise 81 | epsilon = torch.clamp(epsilon, -self.noise_clip, self.noise_clip) 82 | a2 = pi_targ + epsilon 83 | a2 = torch.clamp(a2, -1, 1) 84 | 85 | # Target Q-values 86 | q1_pi_targ = self.ac_targ.q1(o2, a2) 87 | q2_pi_targ = self.ac_targ.q2(o2, a2) 88 | q_pi_targ = torch.min(q1_pi_targ, q2_pi_targ) 89 | backup = r + self.gamma * (1 - d) * q_pi_targ 90 | 91 | # MSE loss against Bellman backup 92 | loss_q1 = ((q1 - backup)**2).mean() 93 | loss_q2 = ((q2 - backup)**2).mean() 94 | loss_q = loss_q1 + loss_q2 95 | 96 | return loss_q 97 | 98 | def compute_loss_pi(self, data): 99 | o = data['obs'] 100 | q1_pi = self.ac.q1(o, self.ac.pi(o)) 101 | return -q1_pi.mean() 102 | 103 | def update(self, batch_size, repeat_times): 104 | for i in range(int(repeat_times)): 105 | data = self.replay_buffer.sample_batch(batch_size) 106 | # First run one gradient descent step for Q1 and Q2 107 | self.q_optimizer.zero_grad() 108 | loss_q = self.compute_loss_q(data) 109 | loss_q.backward() 110 | self.q_optimizer.step() 111 | 112 | # Possibly update pi and target networks 113 | if i % self.policy_delay == 0: 114 | 115 | # Freeze Q-networks so you don't waste computational effort 116 | # computing gradients for them during the policy learning step. 117 | for p in self.q_params: 118 | p.requires_grad = False 119 | 120 | # Next run one gradient descent step for pi. 121 | self.pi_optimizer.zero_grad() 122 | loss_pi = self.compute_loss_pi(data) 123 | loss_pi.backward() 124 | self.pi_optimizer.step() 125 | 126 | # Unfreeze Q-networks so you can optimize it at next DDPG step. 127 | for p in self.q_params: 128 | p.requires_grad = True 129 | 130 | # Finally, update target networks by polyak averaging. 131 | with torch.no_grad(): 132 | for p, p_targ in zip(self.ac.parameters(), self.ac_targ.parameters()): 133 | # NB: We use an in-place operations "mul_", "add_" to update target 134 | # params, as opposed to "mul" and "add", which would make new tensors. 135 | p_targ.data.mul_(self.polyak) 136 | p_targ.data.add_((1 - self.polyak) * p.data) 137 | 138 | def get_action(self, o, noise_scale): 139 | a = self.ac.act(torch.as_tensor(o, dtype=torch.float32)) 140 | a += noise_scale * np.random.randn(self.act_dim) 141 | return np.clip(a, -1, 1) 142 | -------------------------------------------------------------------------------- /plot_figure/FIGURE_3.m: -------------------------------------------------------------------------------- 1 | % X = [ 8 8 12 12]*6; 2 | % Y = [8 12 12 8]*6; 3 | % Z = [0.5 0.5 0.5 0.5]; 4 | % p6=plot(10*6,10*6,'s','MarkerEdgeColor',[0.41176 0.41176 0.41176], 'MarkerFaceColor',[0.41176 0.41176 0.41176],'MarkerSize',10, 'LineWidth',2); 5 | % 6 | % patch(Y,X,Z,[0.41176 0.41176 0.41176]) 7 | % hold on 8 | % 9 | % X = [ 28 28 32 32]*6; 10 | % Y = [8 12 12 8]*6; 11 | % Z = [0.5 0.5 0.5 0.5]; 12 | % 13 | % patch(Y,X,Z,[0.41176 0.41176 0.41176]) 14 | % 15 | % X = [ 28 28 32 32]*6; 16 | % Y = [28 32 32 28]*6; 17 | % Z = [0.5 0.5 0.5 0.5]; 18 | % 19 | % patch(Y,X,Z,[0.41176 0.41176 0.41176]) 20 | % 21 | % X = [ 8 8 12 12]*6; 22 | % Y = [28 32 32 28]*6; 23 | % Z = [0.5 0.5 0.5 0.5]; 24 | % 25 | % patch(Y,X,Z,[0.41176 0.41176 0.41176]) 26 | % 27 | % view(90,90) 28 | % p5=plot(0*6,15*6,'p','MarkerEdgeColor',[1 1 0 ], 'MarkerFaceColor',[1 1 0 ],'MarkerSize',5, 'LineWidth',2); 29 | % hold on 30 | % plot(5*6,17*6,'p','MarkerEdgeColor',[1 1 0 ], 'MarkerFaceColor',[1 1 0 ],'MarkerSize',5, 'LineWidth',2) 31 | % hold on 32 | % plot(13*6,17*6,'p','MarkerEdgeColor',[1 1 0 ], 'MarkerFaceColor',[1 1 0 ],'MarkerSize',5, 'LineWidth',2) 33 | % hold on 34 | % plot(23*6,1*6,'p','MarkerEdgeColor',[1 1 0 ], 'MarkerFaceColor',[1 1 0 ],'MarkerSize',5, 'LineWidth',2) 35 | % hold on 36 | % plot(31*6,3*6,'p','MarkerEdgeColor',[1 1 0 ], 'MarkerFaceColor',[1 1 0 ],'MarkerSize',7, 'LineWidth',2) 37 | % hold on 38 | % plot(35*6,17*6,'p','MarkerEdgeColor',[1 1 0 ], 'MarkerFaceColor',[1 1 0 ],'MarkerSize',7, 'LineWidth',2) 39 | % hold on 40 | % % 41 | % plot(20*6,35*6,'p','MarkerEdgeColor',[1 1 0 ], 'MarkerFaceColor',[1 1 0 ],'MarkerSize',5, 'LineWidth',2); 42 | % hold on 43 | % plot(25*6,37*6,'p','MarkerEdgeColor',[1 1 0 ], 'MarkerFaceColor',[1 1 0 ],'MarkerSize',5, 'LineWidth',2) 44 | % hold on 45 | % plot(35*6,37*6,'p','MarkerEdgeColor',[1 1 0 ], 'MarkerFaceColor',[1 1 0 ],'MarkerSize',5, 'LineWidth',2) 46 | % hold on 47 | % plot(5*6,23*6,'p','MarkerEdgeColor',[1 1 0 ], 'MarkerFaceColor',[1 1 0 ],'MarkerSize',5, 'LineWidth',2) 48 | % hold on 49 | % plot(10*6,25*6,'p','MarkerEdgeColor',[1 1 0 ], 'MarkerFaceColor',[1 1 0 ],'MarkerSize',7, 'LineWidth',2) 50 | % hold on 51 | % plot(15*6,25*6,'p','MarkerEdgeColor',[1 1 0 ], 'MarkerFaceColor',[1 1 0 ],'MarkerSize',7, 'LineWidth',2) 52 | % hold on 53 | 54 | radio_map_=-load("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/mapdata_0717.mat").MARK_PL; 55 | % radio_map=rot90(radio_map_); 56 | img=imagesc(radio_map_);%画图 57 | % axis xy 58 | %imrotated_img = imrotate(img, 90, 'bilinear'); 59 | colorbar; 60 | hold on 61 | % figure(2) 62 | y1=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短/y_k169_38.txt").'; 63 | x1=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短/x_k169_38.txt").'; 64 | y2=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短/y_k2131_35.txt").'; 65 | x2=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短/x_k2131_35.txt").'; 66 | y3=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短/y_k3255_44.txt").'; 67 | x3=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短/x_k3255_44.txt").'; 68 | 69 | 70 | y4=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短+SINR/y_k1428_95.txt").'; 71 | x4=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短+SINR/x_k1428_95.txt").'; 72 | y5=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短+SINR/y_k2424_32.txt").'; 73 | x5=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短+SINR/x_k2424_32.txt").'; 74 | y6=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短+SINR/y_k3483_48.txt").'; 75 | x6=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短+SINR/x_k3483_48.txt").'; 76 | p5=plot(50,17,'p','MarkerEdgeColor',[1 0 0], 'MarkerFaceColor',[1 0 0],'MarkerSize',10,'LineWidth',2); 77 | hold on 78 | plot(50,50, 'p','MarkerEdgeColor',[1 0 0], 'MarkerFaceColor',[1 0 0],'MarkerSize',10,'LineWidth',2) 79 | hold on 80 | plot(50,84, 'p','MarkerEdgeColor',[1 0 0], 'MarkerFaceColor',[1 0 0],'MarkerSize',10,'LineWidth',2) 81 | hold on 82 | for i=1:1:length(x1) 83 | p1=plot(y1(i),x1(i),'o','MarkerSize',3,'MarkerEdgeColor',[0.93,0.69,0.13], 'MarkerFaceColor',[0.93,0.69,0.13]); 84 | end 85 | for i=1:1:length(x2) 86 | plot(y2(i),x2(i),'o','MarkerSize',3,'MarkerEdgeColor',[0.85,0.33,0.10], 'MarkerFaceColor',[0.85,0.33,0.10]) ; 87 | end 88 | for i=1:1:length(x3) 89 | plot(y3(i),x3(i),'o','MarkerSize',4,'MarkerEdgeColor',[1.00,0.41,0.16], 'MarkerFaceColor',[1.00,0.41,0.16]) ; 90 | end 91 | for i=1:1:length(x4) 92 | p2=plot(y4(i),x4(i),'^','MarkerSize',3,'MarkerEdgeColor',[0.76,0.43,0.96], 'MarkerFaceColor',[0.76,0.43,0.96]); 93 | end 94 | for i=1:1:length(x5) 95 | plot(y5(i),x5(i),'^','MarkerSize',4,'MarkerEdgeColor',[0.40,0.14,0.58], 'MarkerFaceColor',[0.40,0.14,0.58]) 96 | end 97 | for i=1:1:length(x6) 98 | plot(y6(i),x6(i),'^','MarkerSize',4,'MarkerEdgeColor',[0.58,0.27,0.78], 'MarkerFaceColor',[0.58,0.27,0.78]) 99 | end 100 | hold on 101 | p3=plot(y1(1),x1(1),'+','MarkerEdgeColor',[0.50,0.50,0.50], 'MarkerFaceColor',[0,0,0],'MarkerSize',13, 'LineWidth',3); 102 | hold on 103 | p4=plot(y1(length(x1)),x1(length(x1)),'X','MarkerEdgeColor',[0.50,0.50,0.50], 'MarkerFaceColor',[0,0,0],'MarkerSize',13, 'LineWidth',3); 104 | hold on 105 | plot(y2(1),x2(1),'+','MarkerEdgeColor',[0 0 0], 'MarkerFaceColor',[0 0 0],'MarkerSize',13,'LineWidth',3); 106 | hold on 107 | plot(y2(length(x2)),x2(length(x2)),'X','MarkerEdgeColor',[0 0 0], 'MarkerFaceColor',[0 0 0],'MarkerSize',13,'LineWidth',3) 108 | hold on 109 | plot(y3(1),x3(1),'+','MarkerEdgeColor',[0.31,0.30,0.30], 'MarkerFaceColor',[0.31,0.30,0.30],'MarkerSize',13, 'LineWidth',3); 110 | hold on 111 | plot(y3(length(x3)),x3(length(x3)),'+','MarkerEdgeColor',[0.31,0.30,0.30], 'MarkerFaceColor',[0.31,0.30,0.30],'MarkerSize',13, 'LineWidth',3); 112 | hold on 113 | 114 | % rot90; 115 | view(-90,90); 116 | axis equal 117 | axis([0 100 0 100]); 118 | legend([p1 p2],{'Distance-Aware','Communication-Aware'},'Location','NorthWest','Interpreter','latex','NumColumns',2) 119 | set(gca,'XTick',0:20:100) 120 | set(gca,'XTickLabel',{'0','10','20','30','40','50'}) 121 | set(gca,'YTick',0:20:100) 122 | set(gca,'YTickLabel',{'0','10','20','30','40','50'}) 123 | xlabel('$x \ ({\rm m})$','Interpreter','latex') 124 | ylabel('$y \ ({\rm m})$','Interpreter','latex') 125 | box on 126 | % grid on 127 | ah=axes('position',get(gca,'position'), 'visible','off'); 128 | legend(ah,[p3 p4 p5],{'MU Starting Point ','MU Destination','SU'},'Location','NorthEast','Interpreter','latex','NumColumns',4) 129 | 130 | -------------------------------------------------------------------------------- /tradition_baseline/fig3.m: -------------------------------------------------------------------------------- 1 | % X = [ 8 8 12 12]*6; 2 | % Y = [8 12 12 8]*6; 3 | % Z = [0.5 0.5 0.5 0.5]; 4 | % p6=plot(10*6,10*6,'s','MarkerEdgeColor',[0.41176 0.41176 0.41176], 'MarkerFaceColor',[0.41176 0.41176 0.41176],'MarkerSize',10, 'LineWidth',2); 5 | % 6 | % patch(Y,X,Z,[0.41176 0.41176 0.41176]) 7 | % hold on 8 | % 9 | % X = [ 28 28 32 32]*6; 10 | % Y = [8 12 12 8]*6; 11 | % Z = [0.5 0.5 0.5 0.5]; 12 | % 13 | % patch(Y,X,Z,[0.41176 0.41176 0.41176]) 14 | % 15 | % X = [ 28 28 32 32]*6; 16 | % Y = [28 32 32 28]*6; 17 | % Z = [0.5 0.5 0.5 0.5]; 18 | % 19 | % patch(Y,X,Z,[0.41176 0.41176 0.41176]) 20 | % 21 | % X = [ 8 8 12 12]*6; 22 | % Y = [28 32 32 28]*6; 23 | % Z = [0.5 0.5 0.5 0.5]; 24 | % 25 | % patch(Y,X,Z,[0.41176 0.41176 0.41176]) 26 | % 27 | % view(90,90) 28 | % p5=plot(0*6,15*6,'p','MarkerEdgeColor',[1 1 0 ], 'MarkerFaceColor',[1 1 0 ],'MarkerSize',5, 'LineWidth',2); 29 | % hold on 30 | % plot(5*6,17*6,'p','MarkerEdgeColor',[1 1 0 ], 'MarkerFaceColor',[1 1 0 ],'MarkerSize',5, 'LineWidth',2) 31 | % hold on 32 | % plot(13*6,17*6,'p','MarkerEdgeColor',[1 1 0 ], 'MarkerFaceColor',[1 1 0 ],'MarkerSize',5, 'LineWidth',2) 33 | % hold on 34 | % plot(23*6,1*6,'p','MarkerEdgeColor',[1 1 0 ], 'MarkerFaceColor',[1 1 0 ],'MarkerSize',5, 'LineWidth',2) 35 | % hold on 36 | % plot(31*6,3*6,'p','MarkerEdgeColor',[1 1 0 ], 'MarkerFaceColor',[1 1 0 ],'MarkerSize',7, 'LineWidth',2) 37 | % hold on 38 | % plot(35*6,17*6,'p','MarkerEdgeColor',[1 1 0 ], 'MarkerFaceColor',[1 1 0 ],'MarkerSize',7, 'LineWidth',2) 39 | % hold on 40 | % % 41 | % plot(20*6,35*6,'p','MarkerEdgeColor',[1 1 0 ], 'MarkerFaceColor',[1 1 0 ],'MarkerSize',5, 'LineWidth',2); 42 | % hold on 43 | % plot(25*6,37*6,'p','MarkerEdgeColor',[1 1 0 ], 'MarkerFaceColor',[1 1 0 ],'MarkerSize',5, 'LineWidth',2) 44 | % hold on 45 | % plot(35*6,37*6,'p','MarkerEdgeColor',[1 1 0 ], 'MarkerFaceColor',[1 1 0 ],'MarkerSize',5, 'LineWidth',2) 46 | % hold on 47 | % plot(5*6,23*6,'p','MarkerEdgeColor',[1 1 0 ], 'MarkerFaceColor',[1 1 0 ],'MarkerSize',5, 'LineWidth',2) 48 | % hold on 49 | % plot(10*6,25*6,'p','MarkerEdgeColor',[1 1 0 ], 'MarkerFaceColor',[1 1 0 ],'MarkerSize',7, 'LineWidth',2) 50 | % hold on 51 | % plot(15*6,25*6,'p','MarkerEdgeColor',[1 1 0 ], 'MarkerFaceColor',[1 1 0 ],'MarkerSize',7, 'LineWidth',2) 52 | % hold on 53 | 54 | radio_map_=-load("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/mapdata_0717.mat").MARK_PL; 55 | % radio_map=rot90(radio_map_); 56 | img=imagesc(radio_map_);%画图 57 | % axis xy 58 | %imrotated_img = imrotate(img, 90, 'bilinear'); 59 | colorbar; 60 | hold on 61 | % figure(2) 62 | y1=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短/y_k169_38.txt").'; 63 | x1=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短/x_k169_38.txt").'; 64 | y2=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短/y_k2131_35.txt").'; 65 | x2=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短/x_k2131_35.txt").'; 66 | y3=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短/y_k3255_44.txt").'; 67 | x3=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短/x_k3255_44.txt").'; 68 | 69 | 70 | y4=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短+SINR/y_k1428_95.txt").'; 71 | x4=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短+SINR/x_k1428_95.txt").'; 72 | y5=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短+SINR/y_k2424_32.txt").'; 73 | x5=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短+SINR/x_k2424_32.txt").'; 74 | y6=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短+SINR/y_k3483_48.txt").'; 75 | x6=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短+SINR/x_k3483_48.txt").'; 76 | p6=plot(50,17,'p','MarkerEdgeColor',[1 0 0], 'MarkerFaceColor',[1 0 0],'MarkerSize',10,'LineWidth',2); 77 | hold on 78 | plot(50,50, 'p','MarkerEdgeColor',[1 0 0], 'MarkerFaceColor',[1 0 0],'MarkerSize',10,'LineWidth',2) 79 | hold on 80 | plot(50,84, 'p','MarkerEdgeColor',[1 0 0], 'MarkerFaceColor',[1 0 0],'MarkerSize',10,'LineWidth',2) 81 | hold on 82 | for i=1:1:length(x1) 83 | p1=plot(y1(i),x1(i),'o','MarkerSize',3,'MarkerEdgeColor',[0.15,0.15,0.15], 'MarkerFaceColor',[0.15,0.15,0.15]); 84 | end 85 | for i=1:1:length(x2) 86 | plot(y2(i),x2(i),'o','MarkerSize',3,'MarkerEdgeColor',[1.00,0.41,0.16], 'MarkerFaceColor',[1.00,0.41,0.16]) ; 87 | end 88 | for i=1:1:length(x3) 89 | plot(y3(i),x3(i),'o','MarkerSize',4,'MarkerEdgeColor',[ 0.58,0.27,0.78], 'MarkerFaceColor',[ 0.58,0.27,0.78]) ; 90 | end 91 | for i=1:1:length(x4) 92 | p2=plot(y4(i),x4(i),'^','MarkerSize',3,'MarkerEdgeColor',[0.24,0.24,0.24], 'MarkerFaceColor',[0.24,0.24,0.24]); 93 | end 94 | for i=1:1:length(x5) 95 | plot(y5(i),x5(i),'^','MarkerSize',4,'MarkerEdgeColor',[0.85,0.33,0.10], 'MarkerFaceColor',[0.85,0.33,0.10]) 96 | end 97 | for i=1:1:length(x6) 98 | plot(y6(i),x6(i),'^','MarkerSize',4,'MarkerEdgeColor',[0.40,0.14,0.58], 'MarkerFaceColor',[ 0.40,0.14,0.58]) 99 | end 100 | hold on 101 | p4=plot(y1(1),x1(1),'+','MarkerEdgeColor',[0.50,0.50,0.50], 'MarkerFaceColor',[0,0,0],'MarkerSize',13, 'LineWidth',3); 102 | hold on 103 | p5=plot(y1(length(x1)),x1(length(x1)),'X','MarkerEdgeColor',[0.50,0.50,0.50], 'MarkerFaceColor',[0,0,0],'MarkerSize',13, 'LineWidth',3); 104 | hold on 105 | plot(y2(1),x2(1),'+','MarkerEdgeColor',[0 0 0], 'MarkerFaceColor',[0 0 0],'MarkerSize',13,'LineWidth',3); 106 | hold on 107 | plot(y2(length(x2)),x2(length(x2)),'X','MarkerEdgeColor',[0 0 0], 'MarkerFaceColor',[0 0 0],'MarkerSize',13,'LineWidth',3) 108 | hold on 109 | plot(y3(1),x3(1),'+','MarkerEdgeColor',[0.31,0.30,0.30], 'MarkerFaceColor',[0.31,0.30,0.30],'MarkerSize',13, 'LineWidth',3); 110 | hold on 111 | plot(y3(length(x3)),x3(length(x3)),'+','MarkerEdgeColor',[0.31,0.30,0.30], 'MarkerFaceColor',[0.31,0.30,0.30],'MarkerSize',13, 'LineWidth',3); 112 | hold on 113 | 114 | fileID = fopen('C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/response_code/fig3/A算法/robot_path_1.txt', 'r'); 115 | % fileID="C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/response_code/fig3/A算法/robot_path_1.txt"; 116 | data = textscan(fileID, '%f32,%f32', 'Delimiter', ','); 117 | x1=data{1}; 118 | y1=data{2}; 119 | fclose(fileID); 120 | fileID = fopen('C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/response_code/fig3/A算法/robot_path_2.txt', 'r'); 121 | data = textscan(fileID, '%f,%f', 'Delimiter', ','); 122 | x2=data{1}; 123 | y2=data{2}; 124 | fclose(fileID); 125 | fileID = fopen('C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/response_code/fig3/A算法/robot_path_3.txt', 'r'); 126 | data = textscan(fileID, '%f,%f', 'Delimiter', ','); 127 | x3=data{1}; 128 | y3=data{2}; 129 | fclose(fileID); 130 | for i=1:2:length(x1) 131 | p3=plot(y1(i),x1(i),'*','MarkerSize',5,'MarkerEdgeColor',[0.50,0.50,0.50], 'MarkerFaceColor',[0.50,0.50,0.50]); 132 | end 133 | for i=1:2:length(x2) 134 | plot(y2(i),x2(i),'*','MarkerSize',6,'MarkerEdgeColor',[ 0.93,0.69,0.13], 'MarkerFaceColor',[ 0.93,0.69,0.13 ]) ; 135 | end 136 | for i=1:2:length(x3) 137 | plot(y3(i),x3(i),'*','MarkerSize',5,'MarkerEdgeColor',[ 0.76,0.43,0.96], 'MarkerFaceColor',[0.76,0.43,0.96]) ; 138 | end 139 | 140 | % rot90; 141 | view(-90,90); 142 | axis equal 143 | axis([0 100 0 100]); 144 | legend([p1 p2 p3],{'Distance-Aware MA-DDPG ($\kappa_1=0$)','Communication-Aware MA-DDPG ($\kappa_1=0.1$)', '$A^{\ast}$ Search Algorithm'},'Location','NorthWest','Interpreter','latex','NumColumns',2) 145 | set(gca,'XTick',0:20:100) 146 | set(gca,'XTickLabel',{'0','10','20','30','40','50'}) 147 | set(gca,'YTick',0:20:100) 148 | set(gca,'YTickLabel',{'0','10','20','30','40','50'}) 149 | xlabel('$x \ ({\rm m})$','Interpreter','latex') 150 | ylabel('$y \ ({\rm m})$','Interpreter','latex') 151 | box on 152 | % grid on 153 | ah=axes('position',get(gca,'position'), 'visible','off'); 154 | legend(ah,[p4 p5 p6],{'MU Starting Point ','MU Destination','SU'},'Location','NorthEast','Interpreter','latex','NumColumns',4) 155 | 156 | -------------------------------------------------------------------------------- /algorithm/MA-PPO_main.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow import keras 3 | from keras.layers import * 4 | import numpy as np 5 | import gym 6 | import matplotlib.pyplot as plt 7 | import math 8 | from scipy.io import loadmat 9 | np.random.seed(2) 10 | tf.random.set_seed(2) 11 | 12 | EP_MAX = 500 13 | BATCH = 32 14 | EP_LEN = 1000 15 | GAMMA = 0.9 16 | A_LR = 0.0001 17 | C_LR = 0.0005 18 | 19 | A_UPDATE_STEPS = 20 20 | C_UPDATE_STEPS = 20 21 | S_DIM, A_DIM = 2, 10 22 | epsilon=0.2 23 | 24 | n_width=100 25 | n_height = 100 26 | m = loadmat("mapdata_0717.mat") 27 | #correct_action=0 28 | MARK= m["MARK_new"] 29 | PL_AP=m["MARK_PL_real"] 30 | n_mu=3 31 | n_M=5 32 | n_o=6*7 33 | a_bound=1 34 | 35 | class PPO(object): 36 | 37 | def __init__(self): 38 | self.opt_a = tf.compat.v1.train.AdamOptimizer(A_LR) 39 | self.opt_c = tf.compat.v1.train.AdamOptimizer(C_LR) 40 | 41 | self.model_a = self._build_anet(trainable=True) 42 | self.model_a_old = self._build_anet(trainable=False) 43 | self.model_c = self._build_cnet() 44 | 45 | def _build_anet(self,trainable=True): 46 | tfs_a = Input([S_DIM], ) 47 | l1 = Dense(100, 'relu',trainable=trainable)(tfs_a) 48 | mu = a_bound * Dense(A_DIM, 'tanh',trainable=trainable)(l1) 49 | sigma = Dense(A_DIM, 'softplus',trainable=trainable)(l1) 50 | model_a = keras.models.Model(inputs=tfs_a, outputs=[mu, sigma]) 51 | return model_a 52 | 53 | def _build_cnet(self): 54 | tfs_c = Input([S_DIM], ) 55 | l1 = Dense(100, 'relu')(tfs_c) 56 | v = Dense(1)(l1) 57 | model_c = keras.models.Model(inputs=tfs_c, outputs=v) 58 | model_c.compile(optimizer=self.opt_c, loss='mse') 59 | return model_c 60 | 61 | def update(self, s, a, r): 62 | self.model_a_old.set_weights(self.model_a.get_weights()) 63 | 64 | mu, sigma = self.model_a_old(s) 65 | oldpi = tf.compat.v1.distributions.Normal(loc=mu, scale=sigma) 66 | old_prob_a = oldpi.prob(a) 67 | 68 | v = self.get_v(s) 69 | adv = r - v 70 | 71 | for i in range(A_UPDATE_STEPS): 72 | with tf.GradientTape() as tape: 73 | mu, sigma = self.model_a(s) 74 | pi = tf.compat.v1.distributions.Normal(loc=mu, scale=sigma) 75 | ratio = pi.prob(a) / (old_prob_a + 1e-5) 76 | surr = ratio * adv 77 | x2 = tf.clip_by_value(ratio, 1. - epsilon, 1. + epsilon) * adv 78 | x3 = tf.minimum(surr, x2) 79 | aloss = -tf.reduce_mean(x3) 80 | 81 | a_grads = tape.gradient(aloss, self.model_a.trainable_weights) 82 | a_grads_and_vars = zip(a_grads, self.model_a.trainable_weights) 83 | self.opt_a.apply_gradients(a_grads_and_vars) 84 | 85 | self.model_c.fit(s, r, verbose=0, shuffle=False,epochs=C_UPDATE_STEPS) 86 | 87 | def choose_action(self, s): 88 | mu, sigma = self.model_a(s) 89 | pi = tf.compat.v1.distributions.Normal(loc=mu, scale=sigma) 90 | a = tf.squeeze(pi.sample(1), axis=0) 91 | return np.clip(a, -2, 2) 92 | 93 | def get_v(self, s): 94 | v = self.model_c(s) 95 | return v 96 | 97 | def cosVector(x,y): 98 | result1=0.0; 99 | result2=0.0; 100 | result3=0.0; 101 | for i in range(len(x)): 102 | result1+=x[i]*y[i] #sum(X*Y) 103 | result2+=x[i]**2 #sum(X*X) 104 | result3+=y[i]**2 #sum(Y*Y) 105 | return result1/((result2*result3)**0.5) 106 | 107 | ppo = PPO() 108 | end_location = [15*2,32*2] 109 | all_ep_r = [] 110 | all_ep_reward_p=[] 111 | for ep in range(EP_MAX): #train 112 | s = np.array([4*2,5*2], dtype=np.float32) 113 | buffer_s, buffer_a, buffer_r = [], [], [] 114 | ep_r = 0 115 | done1 = False 116 | distance_01_max=math.sqrt((s[0]-end_location[0])*(s[0]-end_location[0])/4+(s[1]-end_location[1])*(s[1]-end_location[1])/4) 117 | 118 | s = np.reshape(s, (-1, S_DIM)) 119 | observation_su1 = np.array([17, 25*2], dtype=np.float32) 120 | for t in range(EP_LEN): # in one episode 121 | a = ppo.choose_action(s) 122 | if not done1: 123 | [old_x, old_y] = s[0] 124 | new_x, new_y = int(old_x), int(old_y) 125 | new_x=int(old_x+a[0,0]) 126 | new_y=int(old_y+a[0,1]) 127 | if int(new_x) <= 0: 128 | new_x = 1 129 | if int(new_x) >= n_width: 130 | new_x = int(n_width)-1 131 | if int(new_y) <= 0: 132 | new_y = 1 133 | if int(new_y) >= n_height: 134 | new_y = int(n_height)-1 135 | if MARK[new_x,new_y] == 2: 136 | new_x, new_y = old_x, old_y 137 | s_=np.array([new_x, new_y], dtype=np.float32) 138 | else: 139 | s_ = s 140 | a=a[0] 141 | if a[8]==-1: 142 | a[8]=-0.9999999 143 | # if action2[8]==-1: 144 | # action2[8]=-0.9999999 145 | # if action3[8]==-1: 146 | # action3[8]=-0.9999999 147 | if a[8]==1: 148 | a[8]=0.9999999 149 | 150 | w_1=np.array([a[2]* math.exp(1)**(1j*(1+a[3])*math.pi), a[4]* math.exp(1)**(1j*(1+a[5])*math.pi), a[6]* math.exp(1)**(1j*(1+a[7])*math.pi)]) 151 | # w_2=np.array([action2[2]* math.exp(1)**(1j*(1+action2[3])*math.pi), action2[4]* math.exp(1)**(1j*(1+action2[5])*math.pi), action2[6]* math.exp(1)**(1j*(1+action2[7])*math.pi)]) 152 | # w_3=np.array([action3[2]* math.exp(1)**(1j*(1+action3[3])*math.pi), action3[4]* math.exp(1)**(1j*(1+action3[5])*math.pi), action3[6]* math.exp(1)**(1j*(1+action3[7])*math.pi)]) 153 | theta_1=cosVector([1,0,0],[s_[0]-50,s_[1]-100, 1-2]) 154 | a_1=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_1)), math.exp(1)**(-2*1j*(math.pi*theta_1))])# 155 | b_1_AP_LOS=math.sqrt(PL_AP[int(s_[0]), int(s_[1])]) 156 | h_1=b_1_AP_LOS*a_1 157 | interference_1=10**(-9) 158 | # theta_2=cosVector([1,0,0],[observation2_[0]-50,observation2_[1]-100, 1-2]) 159 | # a_2=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_2)), math.exp(1)**(-2*1j*(math.pi*theta_2))])# 160 | # b_2_AP_LOS=math.sqrt(PL_AP[int(observation2_[0]), int(observation2_[1])]) 161 | # h_2=b_2_AP_LOS*a_2 162 | # interference_2=10**(-9) 163 | # theta_3=cosVector([1,0,0],[observation3_[0]-50,observation3_[1]-100, 1-2]) 164 | # a_3=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_3)), math.exp(1)**(-2*1j*(math.pi*theta_3))])# 165 | # b_3_AP_LOS=math.sqrt(PL_AP[int(observation3_[0]), int(observation3_[1])]) 166 | # h_3=b_3_AP_LOS*a_3 167 | # interference_3=10**(-9) 168 | theta_4=cosVector([1,0,0],[observation_su1[0]-50,observation_su1[1]-100, 1-2]) 169 | a_4=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_4)), math.exp(1)**(-2*1j*(math.pi*theta_4))])# 170 | b_4_AP_LOS=math.sqrt(PL_AP[int(observation_su1[0]), int(observation_su1[1])]) 171 | h_4=b_4_AP_LOS*a_4 172 | interference_4=10**(-9) 173 | 174 | if a[8]>0: 175 | interference_1+=(1-(a[8]+1)/2)*(np.linalg.norm(h_1*w_1))**2 176 | else: 177 | interference_4+=((a[8]+1)/2)*(np.linalg.norm(h_4*w_1))**2 178 | 179 | SINR_1=((a[8]+1)/2)*(np.linalg.norm(h_1*w_1))**2/interference_1 180 | # SINR_2=((action2[8]+1)/2)*(np.linalg.norm(h_2*w_2))**2/interference_2 181 | # SINR_3=((action3[8]+1)/2)*(np.linalg.norm(h_3*w_3))**2/interference_3 182 | SINR_4=(1-(a[8]+1)/2)*(np.linalg.norm(h_4*w_1))**2/interference_4 183 | 184 | 185 | buffer_s.append(s) 186 | buffer_a.append(a) 187 | distance_01_2=(s_[0]-end_location[0])*(s_[0]-end_location[0])/4+(s_[1]-end_location[1])*(s_[1]-end_location[1])/4 188 | distance_01 = math.sqrt(distance_01_2) 189 | s_ = np.reshape(s_, (-1, S_DIM)) 190 | r= -(distance_01/50) 191 | if distance_01==0: 192 | done1 = True 193 | #os.system("pause") 194 | r=1 195 | r = np.reshape(r, (-1, 1)) 196 | buffer_r.append(r) # normalize reward, find to be useful 197 | a = np.reshape(a, (-1, A_DIM)) 198 | s_ = np.reshape(s_, (-1, S_DIM)) 199 | s = s_ 200 | ep_r += r[0] 201 | 202 | # update ppo 203 | if (t + 1) % BATCH == 0 or t == EP_LEN - 1 or done1: 204 | #print("here") 205 | v_s_ = ppo.get_v(s_)[0,0] 206 | discounted_r = [] 207 | for r in buffer_r[::-1]: 208 | v_s_ = r + GAMMA * v_s_ 209 | discounted_r.append(v_s_) 210 | discounted_r.reverse() 211 | 212 | bs = np.vstack(buffer_s) 213 | ba = np.vstack(buffer_a) 214 | br = np.array(discounted_r) 215 | buffer_s, buffer_a, buffer_r = [], [], [] 216 | ppo.update(bs, ba, br) 217 | if done1: 218 | print("success!!!!!!!!!!!!") 219 | break 220 | if ep == 0: 221 | # all_ep_r.append(ep_r) 222 | all_ep_reward_p.append(ep_r) 223 | else: 224 | # all_ep_r.append(all_ep_r[-1] * 0.9 + ep_r * 0.1) 225 | all_ep_reward_p.append(all_ep_reward_p[-1] * 0.9 + ep_r * 0.1) 226 | print( 227 | 'Ep: %i' % ep, 228 | "|Ep_r: %i" % ep_r, 229 | ) 230 | 231 | plt.plot(all_ep_reward_p) 232 | 233 | 234 | 235 | # while 1: #play 236 | # s = env.reset() 237 | # for t in range(EP_LEN): 238 | # s = s.reshape([-1, S_DIM]) 239 | # env.render() 240 | # s, r, done, info = env.step(ppo.choose_action(s)) 241 | # if done: 242 | # break 243 | -------------------------------------------------------------------------------- /tradition_baseline/pso.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.special import erfcinv 3 | import numpy as np 4 | from sklearn.cluster import KMeans 5 | import matplotlib.pyplot as plt 6 | from scipy.io import loadmat 7 | import math 8 | import numpy as np 9 | from scipy.special import erfc 10 | 11 | m = loadmat("C://Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/mapdata_0717.mat") 12 | #correct_action=0 13 | MARK= m["MARK_new"] 14 | PL_AP=m["MARK_PL_real"] 15 | 16 | def cosVector(x,y): 17 | result1=0.0; 18 | result2=0.0; 19 | result3=0.0; 20 | for i in range(len(x)): 21 | result1+=x[i]*y[i] #sum(X*Y) 22 | result2+=x[i]**2 #sum(X*X) 23 | result3+=y[i]**2 #sum(Y*Y) 24 | return result1/((result2*result3)**0.5) 25 | 26 | # 高斯Q函数的逆函数 27 | def Q_inv(x): 28 | return np.sqrt(2) * erfcinv(2 * x) 29 | 30 | # 计算V(γk(t)) 31 | def V(gamma): 32 | # 根据具体公式定义V(γk(t)) 33 | return gamma # Placeholder, replace with actual formula if necessary 34 | 35 | # 目标函数 36 | def objective_function(w, num_points, A1, A2, h_k, sigma2, M, D): 37 | # 计算gamma_k(t) 38 | term_tol=0 39 | db_tol=0 40 | for i in range(num_points): 41 | gamma_k_t = np.abs(A1[i] @ w)**2 / (i * np.abs(A1[i] @ w)**2 + sigma2) 42 | 43 | # 计算目标函数 44 | term1 = np.log(2) * np.sqrt(M / (1-(1+gamma_k_t)**(-2))) 45 | term2 = np.log2(1 + gamma_k_t) - D / M 46 | term_tol=term1 * term2 47 | db_tol+=math.log10(max(1-0.5 * erfc(term_tol / np.sqrt(2)),10**(-20))) 48 | 49 | return (db_tol/num_points) 50 | 51 | import random 52 | 53 | class Particle: 54 | def __init__(self, dimension): 55 | self.position = np.random.rand(dimension) 56 | self.velocity = np.random.rand(dimension) - 0.5 57 | self.best_position = self.position.copy() 58 | self.best_score = -np.inf 59 | 60 | def update_velocity(self, global_best_position, inertia_weight=0.5, cognitive_coeff=2, social_coeff=2): 61 | cognitive_component = cognitive_coeff * random.random() * (self.best_position - self.position) 62 | social_component = social_coeff * random.random() * (global_best_position - self.position) 63 | self.velocity = inertia_weight * self.velocity + cognitive_component + social_component 64 | 65 | def update_position(self): 66 | self.position += self.velocity 67 | self.position = np.clip(self.position, 0, 1) # Ensure within bounds 68 | 69 | class PSO: 70 | def __init__(self, objective_function, dimension, swarm_size=30, iterations=100): 71 | self.objective_function = objective_function 72 | self.dimension = dimension 73 | self.swarm_size = swarm_size 74 | self.iterations = iterations 75 | self.swarm = [Particle(dimension) for _ in range(swarm_size)] 76 | self.global_best_position = np.random.rand(dimension) 77 | self.global_best_score = -np.inf 78 | 79 | def optimize(self, *args): 80 | for iteration in range(self.iterations): 81 | for particle in self.swarm: 82 | # print(particle.position) 83 | score = self.objective_function(particle.position, *args) 84 | if score > particle.best_score: 85 | particle.best_score = score 86 | particle.best_position = particle.position.copy() 87 | 88 | if score > self.global_best_score: 89 | self.global_best_score = score 90 | self.global_best_position = particle.position.copy() 91 | 92 | for particle in self.swarm: 93 | particle.update_velocity(self.global_best_position) 94 | particle.update_position() 95 | 96 | # print(f"Iteration {iteration + 1}/{self.iterations}, Best Score: {self.global_best_score}") 97 | 98 | return self.global_best_position, self.global_best_score 99 | 100 | for k in range(3): 101 | import matplotlib.pyplot as plt 102 | exec('''path_file = 'C://Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/response_code/fig3/A算法/robot_path_{}.txt' '''.format(k+1)) 103 | with open(path_file, 'r') as f: 104 | lines = f.readlines() 105 | 106 | # 解析路径数据 107 | path = [(int(line.split(',')[0]), int(line.split(',')[1])) for line in lines] 108 | 109 | # 提取 x 和 y 坐标 110 | exec('''x_coords_{} = [point[0] for point in path]'''.format(k+1)) 111 | exec('''y_coords_{} = [point[1] for point in path]'''.format(k+1)) 112 | 113 | def calculate_interference(coords, labels, cluster_num): 114 | interference = 0 115 | cluster_points = coords[labels == cluster_num] 116 | num_points = len(cluster_points) 117 | for i in range(num_points): 118 | for j in range(i + 1, num_points): 119 | interference += np.linalg.norm(cluster_points[i] - cluster_points[j]) 120 | return interference 121 | 122 | power_array=[math.sqrt(0.02), math.sqrt(0.04), math.sqrt(0.06), math.sqrt(0.08),math.sqrt(0.1)] 123 | for power_j in range(len(power_array)): 124 | w = np.random.rand(3) 125 | for t in range(77): 126 | # print('time+++++++++') 127 | # for cl in range(1): 128 | # for cl in range(2): 129 | # # 假设三个机器人的坐标 130 | robot_coords = np.array([[x_coords_1[t], y_coords_1[t]], [x_coords_2[t], y_coords_2[t]]]) 131 | 132 | # 假设三个固定用户的坐标 133 | user_coords = np.array([[17, 50], [50, 50], [84, 50]]) 134 | 135 | # 合并所有坐标 136 | all_coords = np.vstack((robot_coords, user_coords)) 137 | 138 | # 定义 KMeans 模型 139 | kmeans = KMeans(n_clusters=3) 140 | 141 | # 使用所有坐标进行聚类 142 | kmeans.fit(all_coords) 143 | 144 | # 获取聚类结果 145 | labels = kmeans.labels_ 146 | 147 | # 获取聚类中心 148 | cluster_centers = kmeans.cluster_centers_ 149 | 150 | # 打印结果 151 | # print("Labels:", labels) 152 | # print("Cluster centers:", cluster_centers) 153 | 154 | # 绘制聚类结果 155 | # plt.scatter(all_coords[:, 0], all_coords[:, 1], c=labels, cmap='viridis') 156 | # plt.scatter(cluster_centers[:, 0], cluster_centers[:, 1], s=300, c='red', marker='x') 157 | # plt.xlabel('X') 158 | # plt.ylabel('Y') 159 | # plt.title('KMeans Clustering of Robots and Users') 160 | # plt.show() 161 | 162 | robot_powers = np.array([10, 20, 30, 10, 20]) 163 | theta_1=cosVector([1,0,0],[all_coords[0][0]-50,all_coords[0][1]-100, 1-2]) 164 | aLP_1=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_1)), math.exp(1)**(-2*1j*(math.pi*theta_1))])# 165 | b_1_AP_LOS=math.sqrt(PL_AP[int(all_coords[0][0]), int(all_coords[0][1])]) 166 | h_1=b_1_AP_LOS*aLP_1 167 | interference_1=10**(-9) 168 | theta_2=cosVector([1,0,0],[all_coords[1][0]-50,all_coords[1][1]-100, 1-2]) 169 | aLP_2=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_2)), math.exp(1)**(-2*1j*(math.pi*theta_2))])# 170 | b_2_AP_LOS=math.sqrt(PL_AP[int(all_coords[1][0]), int(all_coords[1][1])]) 171 | h_2=b_2_AP_LOS*aLP_2 172 | interference_2=10**(-9) 173 | theta_3=cosVector([1,0,0],[all_coords[2][0]-50,all_coords[2][1]-100, 1-2]) 174 | aLP_3=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_3)), math.exp(1)**(-2*1j*(math.pi*theta_3))])# 175 | b_3_AP_LOS=math.sqrt(PL_AP[int(all_coords[2][0]), int(all_coords[2][1])]) 176 | h_3=b_3_AP_LOS*aLP_3 177 | interference_3=10**(-9) 178 | theta_4=cosVector([1,0,0],[all_coords[3][0]-50,all_coords[3][1]-100, 1-2]) 179 | a_4=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_4)), math.exp(1)**(-2*1j*(math.pi*theta_4))])# 180 | b_4_AP_LOS=math.sqrt(PL_AP[int(all_coords[3][0]), int(all_coords[3][1])]) 181 | h_4=b_4_AP_LOS*a_4 182 | interference_4=10**(-9) 183 | theta_5=cosVector([1,0,0],[all_coords[4][0]-50,all_coords[4][1]-100, 1-2]) 184 | a_5=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_5)), math.exp(1)**(-2*1j*(math.pi*theta_5))])# 185 | b_5_AP_LOS=math.sqrt(PL_AP[int(all_coords[4][0]), int(all_coords[4][1])]) 186 | h_5=b_5_AP_LOS*a_5 187 | interference_5=10**(-9) 188 | # theta_6=cosVector([1,0,0],[all_coords[5][0]-50,all_coords[5][1]-100, 1-2]) 189 | # a_6=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_6)), math.exp(1)**(-2*1j*(math.pi*theta_6))])# 190 | # b_6_AP_LOS=math.sqrt(PL_AP[int(all_coords[5][0]), int(all_coords[5][1])]) 191 | # h_6=b_6_AP_LOS*a_6 192 | # interference_6=10**(-9) 193 | H_array=[] 194 | H_array.append(h_1) 195 | H_array.append(h_2) 196 | H_array.append(h_3) 197 | H_array.append(h_4) 198 | H_array.append(h_5) 199 | # H_array.append(h_6) 200 | H_array=np.array(H_array) 201 | 202 | for r in range(len(robot_powers)): 203 | robot_powers[r]=np.abs(H_array[r] @ w*power_array[power_j]/math.sqrt(3))**2 204 | 205 | # 206 | num_clusters = 3 207 | interference_list = [] 208 | 209 | gamma_avg=0 210 | gamma_array=np.zeros(3) 211 | 212 | for cluster_num in range(num_clusters): 213 | # interference = calculate_interference(all_coords, labels, cluster_num) 214 | w = np.random.rand(3) 215 | previous_A = w 216 | counter = 0 217 | for episode in range(500): 218 | interference = 0 219 | cluster_points = all_coords[labels == cluster_num] 220 | cluster_powers = robot_powers[labels == cluster_num] 221 | 222 | # 根据功率对簇内的用户进行排序,功率大的优先 223 | sorted_indices = np.argsort(-cluster_powers) 224 | sorted_points = cluster_points[sorted_indices] 225 | # sorted_powers = cluster_powers[sorted_indices] 226 | cluster_hk = H_array[labels == cluster_num] 227 | # cluster_wk = wk[labels == cluster_num] 228 | 229 | # for i_j in range(len(sorted_points)): 230 | # sorted_powers 231 | 232 | num_points = len(sorted_points) 233 | # for i in range(num_points): 234 | interference = 0 235 | A1 = cluster_hk * power_array[power_j]/math.sqrt(3) 236 | A2 = 1 237 | h_k = H_array # 示例信道向量 238 | # K_c = [0, 1, 2, 3] # 示例其他用户索引 239 | sigma2 = 10**(-9) 240 | M = 50 241 | D = 100 242 | 243 | # 粒子群优化 244 | pso = PSO(objective_function, dimension=3, swarm_size=30, iterations=100) 245 | w, best_score = pso.optimize(num_points, A1, A2, h_k, sigma2, M, D) 246 | 247 | 248 | # sinr = np.abs(A1 @ w)**2 / (i * np.abs(A1 @ w)**2 + sigma2) 249 | 250 | # print("最佳位置(波束成形向量):", best_position) 251 | # if episode==99: 252 | if w.all() == previous_A.all(): 253 | counter += 1 254 | else: 255 | counter = 0 256 | 257 | previous_A = w 258 | 259 | # 如果A持续十轮不变,则跳出循环 260 | if counter >= 10: 261 | # print(cluster_num, 'sucess!!!!!!!!!!!!') 262 | break 263 | 264 | 265 | 266 | for i in range(num_points): 267 | gamma_k_t = np.abs(A1[i] @ w)**2 / (i * np.abs(A1[i] @ w)**2 + sigma2) 268 | gamma_avg+=gamma_k_t 269 | gamma_k_t=gamma_k_t#/num_points 270 | 271 | gamma_array[cluster_num] = best_score 272 | # print("最佳得分:", best_score)#math.log10(max(1-0.5 * erfc(best_score / np.sqrt(2)),10**(-20)))) 273 | 274 | filename='DB_NOMA_new_'+str(power_j)+'.txt' 275 | with open (filename, 'a') as fileobject: 276 | fileobject.write(str((gamma_array[0]+gamma_array[1]+gamma_array[2])/6)+'\n') 277 | 278 | 279 | # for j in range(0, i): 280 | # # 计算干扰,假设干扰与距离成反比 281 | # interference += np.abs(H_array[i] @ w*power_array[power_j]/math.sqrt(3))**2 282 | 283 | # sinr += sorted_powers[i] / interference # 假设干扰公式 284 | # return interference 285 | # interference_list.append(interference) 286 | 287 | # 打印每个簇的干扰 288 | # for i, interference in enumerate(interference_list): 289 | # print(f"Cluster {i} interference: {interference}") 290 | 291 | 292 | # 示例参数 293 | -------------------------------------------------------------------------------- /environment/environment.yaml: -------------------------------------------------------------------------------- 1 | name: base 2 | channels: 3 | - https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/pytorch 4 | - https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/pytorch/ 5 | - https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/msys2/ 6 | - defaults 7 | - conda-forge 8 | dependencies: 9 | - alabaster=0.7.12=pyhd3eb1b0_0 10 | - anaconda-client=1.11.2=py310haa95532_0 11 | - anaconda-navigator=2.4.0=py310haa95532_0 12 | - anaconda-project=0.11.1=py310haa95532_0 13 | - anyio=3.5.0=py310haa95532_0 14 | - appdirs=1.4.4=pyhd3eb1b0_0 15 | - argon2-cffi=21.3.0=pyhd3eb1b0_0 16 | - argon2-cffi-bindings=21.2.0=py310h2bbff1b_0 17 | - arrow=1.2.3=py310haa95532_1 18 | - astroid=2.14.2=py310haa95532_0 19 | - astropy=5.1=py310h9128911_0 20 | - asttokens=2.0.5=pyhd3eb1b0_0 21 | - atomicwrites=1.4.0=py_0 22 | - attrs=22.1.0=py310haa95532_0 23 | - automat=20.2.0=py_0 24 | - autopep8=1.6.0=pyhd3eb1b0_1 25 | - babel=2.11.0=py310haa95532_0 26 | - backcall=0.2.0=pyhd3eb1b0_0 27 | - backports=1.1=pyhd3eb1b0_0 28 | - backports.functools_lru_cache=1.6.4=pyhd3eb1b0_0 29 | - backports.tempfile=1.0=pyhd3eb1b0_1 30 | - backports.weakref=1.0.post1=py_1 31 | - bcrypt=3.2.0=py310h2bbff1b_1 32 | - beautifulsoup4=4.11.1=py310haa95532_0 33 | - binaryornot=0.4.4=pyhd3eb1b0_1 34 | - black=22.6.0=py310haa95532_0 35 | - blas=1.0=mkl 36 | - bleach=4.1.0=pyhd3eb1b0_0 37 | - blosc=1.21.3=h6c2663c_0 38 | - bokeh=2.4.3=py310haa95532_0 39 | - boltons=23.0.0=py310haa95532_0 40 | - bottleneck=1.3.5=py310h9128911_0 41 | - brotli=1.0.9=h2bbff1b_7 42 | - brotli-bin=1.0.9=h2bbff1b_7 43 | - brotlipy=0.7.0=py310h2bbff1b_1002 44 | - bzip2=1.0.8=he774522_0 45 | - ca-certificates=2023.05.30=haa95532_0 46 | - certifi=2023.5.7=py310haa95532_0 47 | - cffi=1.15.1=py310h2bbff1b_3 48 | - cfitsio=3.470=h2bbff1b_7 49 | - chardet=4.0.0=py310haa95532_1003 50 | - charls=2.2.0=h6c2663c_0 51 | - charset-normalizer=2.0.4=pyhd3eb1b0_0 52 | - click=8.0.4=py310haa95532_0 53 | - cloudpickle=2.0.0=pyhd3eb1b0_0 54 | - clyent=1.2.2=py310haa95532_1 55 | - colorama=0.4.6=py310haa95532_0 56 | - colorcet=3.0.1=py310haa95532_0 57 | - comm=0.1.2=py310haa95532_0 58 | - conda=23.3.1=py310haa95532_0 59 | - conda-build=3.24.0=py310haa95532_0 60 | - conda-content-trust=0.1.3=py310haa95532_0 61 | - conda-pack=0.6.0=pyhd3eb1b0_0 62 | - conda-package-handling=2.0.2=py310haa95532_0 63 | - conda-package-streaming=0.7.0=py310haa95532_0 64 | - conda-repo-cli=1.0.41=py310haa95532_0 65 | - conda-token=0.4.0=pyhd3eb1b0_0 66 | - conda-verify=3.4.2=py_1 67 | - console_shortcut=0.1.1=4 68 | - constantly=15.1.0=py310haa95532_0 69 | - contourpy=1.0.5=py310h59b6b97_0 70 | - cookiecutter=1.7.3=pyhd3eb1b0_0 71 | - cryptography=39.0.1=py310h21b164f_0 72 | - cssselect=1.1.0=pyhd3eb1b0_0 73 | - curl=7.87.0=h2bbff1b_0 74 | - cycler=0.11.0=pyhd3eb1b0_0 75 | - cytoolz=0.12.0=py310h2bbff1b_0 76 | - daal4py=2023.0.2=py310hf497b98_0 77 | - dal=2023.0.1=h59b6b97_26646 78 | - dask=2022.7.0=py310haa95532_0 79 | - dask-core=2022.7.0=py310haa95532_0 80 | - datashader=0.14.4=py310haa95532_0 81 | - datashape=0.5.4=py310haa95532_1 82 | - debugpy=1.5.1=py310hd77b12b_0 83 | - decorator=5.1.1=pyhd3eb1b0_0 84 | - defusedxml=0.7.1=pyhd3eb1b0_0 85 | - diff-match-patch=20200713=pyhd3eb1b0_0 86 | - dill=0.3.6=py310haa95532_0 87 | - distributed=2022.7.0=py310haa95532_0 88 | - docstring-to-markdown=0.11=py310haa95532_0 89 | - docutils=0.18.1=py310haa95532_3 90 | - entrypoints=0.4=py310haa95532_0 91 | - et_xmlfile=1.1.0=py310haa95532_0 92 | - executing=0.8.3=pyhd3eb1b0_0 93 | - filelock=3.9.0=py310haa95532_0 94 | - flake8=6.0.0=py310haa95532_0 95 | - flask=2.2.2=py310haa95532_0 96 | - flit-core=3.6.0=pyhd3eb1b0_0 97 | - fonttools=4.25.0=pyhd3eb1b0_0 98 | - freetype=2.12.1=ha860e81_0 99 | - fsspec=2022.11.0=py310haa95532_0 100 | - future=0.18.3=py310haa95532_0 101 | - gensim=4.3.0=py310h4ed8f06_0 102 | - giflib=5.2.1=h8cc25b3_3 103 | - glib=2.69.1=h5dc1a3c_2 104 | - glob2=0.7=pyhd3eb1b0_0 105 | - greenlet=2.0.1=py310hd77b12b_0 106 | - gst-plugins-base=1.18.5=h9e645db_0 107 | - gstreamer=1.18.5=hd78058f_0 108 | - hdf5=1.10.6=h1756f20_1 109 | - heapdict=1.0.1=pyhd3eb1b0_0 110 | - holoviews=1.15.4=py310haa95532_0 111 | - huggingface_hub=0.10.1=py310haa95532_0 112 | - hvplot=0.8.2=py310haa95532_0 113 | - hyperlink=21.0.0=pyhd3eb1b0_0 114 | - icc_rt=2022.1.0=h6049295_2 115 | - icu=58.2=ha925a31_3 116 | - idna=3.4=py310haa95532_0 117 | - imagecodecs=2021.8.26=py310h4c966c4_2 118 | - imageio=2.26.0=py310haa95532_0 119 | - imagesize=1.4.1=py310haa95532_0 120 | - imbalanced-learn=0.10.1=py310haa95532_0 121 | - importlib-metadata=4.11.3=py310haa95532_0 122 | - importlib_metadata=4.11.3=hd3eb1b0_0 123 | - incremental=21.3.0=pyhd3eb1b0_0 124 | - inflection=0.5.1=py310haa95532_0 125 | - iniconfig=1.1.1=pyhd3eb1b0_0 126 | - intake=0.6.7=py310haa95532_0 127 | - intel-openmp=2021.4.0=haa95532_3556 128 | - intervaltree=3.1.0=pyhd3eb1b0_0 129 | - ipykernel=6.19.2=py310h9909e9c_0 130 | - ipython=8.10.0=py310haa95532_0 131 | - ipython_genutils=0.2.0=pyhd3eb1b0_1 132 | - ipywidgets=7.6.5=pyhd3eb1b0_1 133 | - isort=5.9.3=pyhd3eb1b0_0 134 | - itemadapter=0.3.0=pyhd3eb1b0_0 135 | - itemloaders=1.0.4=pyhd3eb1b0_1 136 | - itsdangerous=2.0.1=pyhd3eb1b0_0 137 | - jedi=0.18.1=py310haa95532_1 138 | - jellyfish=0.9.0=py310h2bbff1b_0 139 | - jinja2=3.1.2=py310haa95532_0 140 | - jinja2-time=0.2.0=pyhd3eb1b0_3 141 | - jmespath=0.10.0=pyhd3eb1b0_0 142 | - joblib=1.1.1=py310haa95532_0 143 | - jpeg=9e=h2bbff1b_1 144 | - jq=1.6=haa95532_1 145 | - json5=0.9.6=pyhd3eb1b0_0 146 | - jsonpatch=1.32=pyhd3eb1b0_0 147 | - jsonpointer=2.1=pyhd3eb1b0_0 148 | - jsonschema=4.17.3=py310haa95532_0 149 | - jupyter=1.0.0=py310haa95532_8 150 | - jupyter_client=7.3.4=py310haa95532_0 151 | - jupyter_console=6.6.2=py310haa95532_0 152 | - jupyter_core=5.2.0=py310haa95532_0 153 | - jupyter_server=1.23.4=py310haa95532_0 154 | - jupyterlab=3.5.3=py310haa95532_0 155 | - jupyterlab_pygments=0.1.2=py_0 156 | - jupyterlab_server=2.19.0=py310haa95532_0 157 | - jupyterlab_widgets=1.0.0=pyhd3eb1b0_1 158 | - jxrlib=1.1=he774522_2 159 | - keyring=23.4.0=py310haa95532_0 160 | - kiwisolver=1.4.4=py310hd77b12b_0 161 | - lazy-object-proxy=1.6.0=py310h2bbff1b_0 162 | - lcms2=2.12=h83e58a3_0 163 | - lerc=3.0=hd77b12b_0 164 | - libaec=1.0.4=h33f27b4_1 165 | - libarchive=3.6.2=hebabd0d_0 166 | - libbrotlicommon=1.0.9=h2bbff1b_7 167 | - libbrotlidec=1.0.9=h2bbff1b_7 168 | - libbrotlienc=1.0.9=h2bbff1b_7 169 | - libcurl=7.87.0=h86230a5_0 170 | - libdeflate=1.17=h2bbff1b_0 171 | - libffi=3.4.2=hd77b12b_6 172 | - libiconv=1.16=h2bbff1b_2 173 | - liblief=0.12.3=hd77b12b_0 174 | - libogg=1.3.5=h2bbff1b_1 175 | - libpng=1.6.39=h8cc25b3_0 176 | - libsodium=1.0.18=h62dcd97_0 177 | - libspatialindex=1.9.3=h6c2663c_0 178 | - libssh2=1.10.0=hcd4344a_0 179 | - libtiff=4.5.0=h6c2663c_2 180 | - libuv=1.44.2=h2bbff1b_0 181 | - libvorbis=1.3.7=he774522_0 182 | - libwebp=1.2.4=hbc33d0d_1 183 | - libwebp-base=1.2.4=h2bbff1b_1 184 | - libxml2=2.9.14=h0ad7f3c_0 185 | - libxslt=1.1.35=h2bbff1b_0 186 | - libzopfli=1.0.3=ha925a31_0 187 | - llvmlite=0.39.1=py310h23ce68f_0 188 | - locket=1.0.0=py310haa95532_0 189 | - lxml=4.9.1=py310h1985fb9_0 190 | - lz4=3.1.3=py310h2bbff1b_0 191 | - lz4-c=1.9.4=h2bbff1b_0 192 | - lzo=2.10=he774522_2 193 | - m2-msys2-runtime=2.5.0.17080.65c939c=3 194 | - m2-patch=2.7.5=2 195 | - m2w64-libwinpthread-git=5.0.0.4634.697f757=2 196 | - markdown=3.4.1=py310haa95532_0 197 | - markupsafe=2.1.1=py310h2bbff1b_0 198 | - matplotlib=3.7.0=py310haa95532_0 199 | - matplotlib-base=3.7.0=py310h4ed8f06_0 200 | - matplotlib-inline=0.1.6=py310haa95532_0 201 | - mccabe=0.7.0=pyhd3eb1b0_0 202 | - menuinst=1.4.19=py310h59b6b97_0 203 | - mistune=0.8.4=py310h2bbff1b_1000 204 | - mkl=2021.4.0=haa95532_640 205 | - mkl-service=2.4.0=py310h2bbff1b_0 206 | - mkl_fft=1.3.1=py310ha0764ea_0 207 | - mkl_random=1.2.2=py310h4ed8f06_0 208 | - mock=4.0.3=pyhd3eb1b0_0 209 | - mpmath=1.2.1=py310haa95532_0 210 | - msgpack-python=1.0.3=py310h59b6b97_0 211 | - msys2-conda-epoch=20160418=1 212 | - multipledispatch=0.6.0=py310haa95532_0 213 | - munkres=1.1.4=py_0 214 | - mypy_extensions=0.4.3=py310haa95532_1 215 | - navigator-updater=0.3.0=py310haa95532_0 216 | - nbclassic=0.5.2=py310haa95532_0 217 | - nbclient=0.5.13=py310haa95532_0 218 | - nbconvert=6.5.4=py310haa95532_0 219 | - nbformat=5.7.0=py310haa95532_0 220 | - nest-asyncio=1.5.6=py310haa95532_0 221 | - networkx=2.8.4=py310haa95532_0 222 | - ninja=1.10.2=haa95532_5 223 | - ninja-base=1.10.2=h6d14046_5 224 | - nltk=3.7=pyhd3eb1b0_0 225 | - notebook=6.5.2=py310haa95532_0 226 | - notebook-shim=0.2.2=py310haa95532_0 227 | - numba=0.56.4=py310h4ed8f06_0 228 | - numexpr=2.8.4=py310hd213c9f_0 229 | - numpy=1.23.5=py310h60c9a35_0 230 | - numpy-base=1.23.5=py310h04254f7_0 231 | - numpydoc=1.5.0=py310haa95532_0 232 | - openjpeg=2.4.0=h4fc8c34_0 233 | - openpyxl=3.0.10=py310h2bbff1b_0 234 | - openssl=1.1.1t=h2bbff1b_0 235 | - packaging=22.0=py310haa95532_0 236 | - pandas=1.5.3=py310h4ed8f06_0 237 | - pandocfilters=1.5.0=pyhd3eb1b0_0 238 | - panel=0.14.3=py310haa95532_0 239 | - param=1.12.3=py310haa95532_0 240 | - paramiko=2.8.1=pyhd3eb1b0_0 241 | - parsel=1.6.0=py310haa95532_0 242 | - parso=0.8.3=pyhd3eb1b0_0 243 | - partd=1.2.0=pyhd3eb1b0_1 244 | - pathlib=1.0.1=pyhd3eb1b0_1 245 | - pathspec=0.10.3=py310haa95532_0 246 | - patsy=0.5.3=py310haa95532_0 247 | - pcre=8.45=hd77b12b_0 248 | - pep8=1.7.1=py310haa95532_1 249 | - pexpect=4.8.0=pyhd3eb1b0_3 250 | - pickleshare=0.7.5=pyhd3eb1b0_1003 251 | - pillow=9.4.0=py310hd77b12b_0 252 | - pip=22.3.1=py310haa95532_0 253 | - pkginfo=1.9.6=py310haa95532_0 254 | - platformdirs=2.5.2=py310haa95532_0 255 | - plotly=5.9.0=py310haa95532_0 256 | - pluggy=1.0.0=py310haa95532_1 257 | - ply=3.11=py310haa95532_0 258 | - pooch=1.4.0=pyhd3eb1b0_0 259 | - powershell_shortcut=0.0.1=3 260 | - poyo=0.5.0=pyhd3eb1b0_0 261 | - prometheus_client=0.14.1=py310haa95532_0 262 | - prompt-toolkit=3.0.36=py310haa95532_0 263 | - prompt_toolkit=3.0.36=hd3eb1b0_0 264 | - protego=0.1.16=py_0 265 | - psutil=5.9.0=py310h2bbff1b_0 266 | - ptyprocess=0.7.0=pyhd3eb1b0_2 267 | - pure_eval=0.2.2=pyhd3eb1b0_0 268 | - py=1.11.0=pyhd3eb1b0_0 269 | - py-lief=0.12.3=py310hd77b12b_0 270 | - pyasn1=0.4.8=pyhd3eb1b0_0 271 | - pyasn1-modules=0.2.8=py_0 272 | - pycodestyle=2.10.0=py310haa95532_0 273 | - pycosat=0.6.4=py310h2bbff1b_0 274 | - pycparser=2.21=pyhd3eb1b0_0 275 | - pyct=0.5.0=py310haa95532_0 276 | - pycurl=7.45.1=py310hcd4344a_0 277 | - pydispatcher=2.0.5=py310haa95532_2 278 | - pydocstyle=6.3.0=py310haa95532_0 279 | - pyerfa=2.0.0=py310h2bbff1b_0 280 | - pyflakes=3.0.1=py310haa95532_0 281 | - pygments=2.11.2=pyhd3eb1b0_0 282 | - pyhamcrest=2.0.2=pyhd3eb1b0_2 283 | - pyjwt=2.4.0=py310haa95532_0 284 | - pylint=2.16.2=py310haa95532_0 285 | - pyls-spyder=0.4.0=pyhd3eb1b0_0 286 | - pynacl=1.5.0=py310h8cc25b3_0 287 | - pyodbc=4.0.34=py310hd77b12b_0 288 | - pyopenssl=23.0.0=py310haa95532_0 289 | - pyparsing=3.0.9=py310haa95532_0 290 | - pyqt=5.15.7=py310hd77b12b_0 291 | - pyqt5-sip=12.11.0=py310hd77b12b_0 292 | - pyqtwebengine=5.15.7=py310hd77b12b_0 293 | - pyrsistent=0.18.0=py310h2bbff1b_0 294 | - pysocks=1.7.1=py310haa95532_0 295 | - pytables=3.7.0=py310h388bc9b_1 296 | - pytest=7.1.2=py310haa95532_0 297 | - python=3.10.9=h966fe2a_1 298 | - python-dateutil=2.8.2=pyhd3eb1b0_0 299 | - python-fastjsonschema=2.16.2=py310haa95532_0 300 | - python-libarchive-c=2.9=pyhd3eb1b0_1 301 | - python-lsp-black=1.2.1=py310haa95532_0 302 | - python-lsp-jsonrpc=1.0.0=pyhd3eb1b0_0 303 | - python-lsp-server=1.7.1=py310haa95532_0 304 | - python-slugify=5.0.2=pyhd3eb1b0_0 305 | - python-snappy=0.6.1=py310hd77b12b_0 306 | - pytoolconfig=1.2.5=py310haa95532_1 307 | - pytorch=2.0.1=py3.10_cpu_0 308 | - pytorch-mutex=1.0=cpu 309 | - pytz=2022.7=py310haa95532_0 310 | - pyviz_comms=2.0.2=pyhd3eb1b0_0 311 | - pywavelets=1.4.1=py310h2bbff1b_0 312 | - pywin32=305=py310h2bbff1b_0 313 | - pywin32-ctypes=0.2.0=py310haa95532_1000 314 | - pywinpty=2.0.10=py310h5da7b33_0 315 | - pyyaml=6.0=py310h2bbff1b_1 316 | - pyzmq=23.2.0=py310hd77b12b_0 317 | - qdarkstyle=3.0.2=pyhd3eb1b0_0 318 | - qt-main=5.15.2=he8e5bd7_7 319 | - qt-webengine=5.15.9=hb9a9bb5_5 320 | - qtpy=2.2.0=py310haa95532_0 321 | - qtwebkit=5.212=h3ad3cdb_4 322 | - queuelib=1.5.0=py310haa95532_0 323 | - regex=2022.7.9=py310h2bbff1b_0 324 | - requests=2.28.1=py310haa95532_0 325 | - requests-file=1.5.1=pyhd3eb1b0_0 326 | - requests-toolbelt=0.9.1=pyhd3eb1b0_0 327 | - rope=1.7.0=py310haa95532_0 328 | - rtree=1.0.1=py310h2eaa2aa_0 329 | - ruamel.yaml=0.17.21=py310h2bbff1b_0 330 | - ruamel.yaml.clib=0.2.6=py310h2bbff1b_1 331 | - ruamel_yaml=0.17.21=py310h2bbff1b_0 332 | - scikit-image=0.19.3=py310hd77b12b_1 333 | - scikit-learn=1.2.1=py310hd77b12b_0 334 | - scikit-learn-intelex=2023.0.2=py310haa95532_0 335 | - scipy=1.10.0=py310hb9afe5d_1 336 | - scrapy=2.8.0=py310haa95532_0 337 | - seaborn=0.12.2=py310haa95532_0 338 | - send2trash=1.8.0=pyhd3eb1b0_1 339 | - service_identity=18.1.0=pyhd3eb1b0_1 340 | - setuptools=65.6.3=py310haa95532_0 341 | - sip=6.6.2=py310hd77b12b_0 342 | - six=1.16.0=pyhd3eb1b0_1 343 | - smart_open=5.2.1=py310haa95532_0 344 | - snappy=1.1.9=h6c2663c_0 345 | - sniffio=1.2.0=py310haa95532_1 346 | - snowballstemmer=2.2.0=pyhd3eb1b0_0 347 | - sortedcontainers=2.4.0=pyhd3eb1b0_0 348 | - soupsieve=2.3.2.post1=py310haa95532_0 349 | - sphinx=5.0.2=py310haa95532_0 350 | - sphinxcontrib-applehelp=1.0.2=pyhd3eb1b0_0 351 | - sphinxcontrib-devhelp=1.0.2=pyhd3eb1b0_0 352 | - sphinxcontrib-htmlhelp=2.0.0=pyhd3eb1b0_0 353 | - sphinxcontrib-jsmath=1.0.1=pyhd3eb1b0_0 354 | - sphinxcontrib-qthelp=1.0.3=pyhd3eb1b0_0 355 | - sphinxcontrib-serializinghtml=1.1.5=pyhd3eb1b0_0 356 | - spyder=5.4.1=py310haa95532_0 357 | - spyder-kernels=2.4.1=py310haa95532_0 358 | - sqlalchemy=1.4.39=py310h2bbff1b_0 359 | - sqlite=3.40.1=h2bbff1b_0 360 | - stack_data=0.2.0=pyhd3eb1b0_0 361 | - statsmodels=0.13.5=py310h9128911_1 362 | - sympy=1.11.1=py310haa95532_0 363 | - tabulate=0.8.10=py310haa95532_0 364 | - tbb=2021.7.0=h59b6b97_0 365 | - tbb4py=2021.7.0=py310h59b6b97_0 366 | - tblib=1.7.0=pyhd3eb1b0_0 367 | - tenacity=8.0.1=py310haa95532_1 368 | - terminado=0.17.1=py310haa95532_0 369 | - text-unidecode=1.3=pyhd3eb1b0_0 370 | - textdistance=4.2.1=pyhd3eb1b0_0 371 | - threadpoolctl=2.2.0=pyh0d69192_0 372 | - three-merge=0.1.1=pyhd3eb1b0_0 373 | - tifffile=2021.7.2=pyhd3eb1b0_2 374 | - tinycss2=1.2.1=py310haa95532_0 375 | - tk=8.6.12=h2bbff1b_0 376 | - tldextract=3.2.0=pyhd3eb1b0_0 377 | - tokenizers=0.11.4=py310he5181cf_1 378 | - toml=0.10.2=pyhd3eb1b0_0 379 | - tomli=2.0.1=py310haa95532_0 380 | - tomlkit=0.11.1=py310haa95532_0 381 | - toolz=0.12.0=py310haa95532_0 382 | - tornado=6.1=py310h2bbff1b_0 383 | - tqdm=4.64.1=py310haa95532_0 384 | - traitlets=5.7.1=py310haa95532_0 385 | - transformers=4.24.0=py310haa95532_0 386 | - twisted=22.2.0=py310h2bbff1b_1 387 | - twisted-iocpsupport=1.0.2=py310h2bbff1b_0 388 | - typing-extensions=4.4.0=py310haa95532_0 389 | - typing_extensions=4.4.0=py310haa95532_0 390 | - tzdata=2022g=h04d1e81_0 391 | - ujson=5.4.0=py310hd77b12b_0 392 | - unidecode=1.2.0=pyhd3eb1b0_0 393 | - urllib3=1.26.14=py310haa95532_0 394 | - vc=14.2=h21ff451_1 395 | - vs2015_runtime=14.27.29016=h5e58377_2 396 | - w3lib=1.21.0=pyhd3eb1b0_0 397 | - watchdog=2.1.6=py310haa95532_0 398 | - wcwidth=0.2.5=pyhd3eb1b0_0 399 | - webencodings=0.5.1=py310haa95532_1 400 | - websocket-client=0.58.0=py310haa95532_4 401 | - werkzeug=2.2.2=py310haa95532_0 402 | - whatthepatch=1.0.2=py310haa95532_0 403 | - wheel=0.38.4=py310haa95532_0 404 | - widgetsnbextension=3.5.2=py310haa95532_0 405 | - win_inet_pton=1.1.0=py310haa95532_0 406 | - wincertstore=0.2=py310haa95532_2 407 | - winpty=0.4.3=4 408 | - wrapt=1.14.1=py310h2bbff1b_0 409 | - xarray=2022.11.0=py310haa95532_0 410 | - xlwings=0.29.1=py310haa95532_0 411 | - xz=5.2.10=h8cc25b3_1 412 | - yaml=0.2.5=he774522_0 413 | - yapf=0.31.0=pyhd3eb1b0_0 414 | - zeromq=4.3.4=hd77b12b_0 415 | - zfp=0.5.5=hd77b12b_6 416 | - zict=2.1.0=py310haa95532_0 417 | - zipp=3.11.0=py310haa95532_0 418 | - zlib=1.2.13=h8cc25b3_0 419 | - zope=1.0=py310haa95532_1 420 | - zope.interface=5.4.0=py310h2bbff1b_0 421 | - zstandard=0.19.0=py310h2bbff1b_0 422 | - zstd=1.5.2=h19a0ad4_0 423 | - pip: 424 | - absl-py==1.4.0 425 | - ale-py==0.8.1 426 | - astunparse==1.6.3 427 | - cachetools==5.3.0 428 | - dm-tree==0.1.8 429 | - docker-pycreds==0.4.0 430 | - farama-notifications==0.0.4 431 | - flatbuffers==23.5.9 432 | - gast==0.4.0 433 | - gitdb==4.0.10 434 | - gitpython==3.1.31 435 | - google-auth==2.18.1 436 | - google-auth-oauthlib==1.0.0 437 | - google-pasta==0.2.0 438 | - grpcio==1.54.2 439 | - gym==0.26.2 440 | - gym-notices==0.0.8 441 | - gymnasium==0.28.1 442 | - importlib-resources==5.12.0 443 | - jax==0.4.10 444 | - jax-jumpy==1.0.0 445 | - keras==2.12.0 446 | - libclang==16.0.0 447 | - ml-dtypes==0.1.0 448 | - numpy-stl==3.0.1 449 | - oauthlib==3.2.2 450 | - opencv-contrib-python==4.7.0.72 451 | - opt-einsum==3.3.0 452 | - pathtools==0.1.2 453 | - progressbar2==4.2.0 454 | - protobuf==3.20.3 455 | - pyglet==1.5.27 456 | - pylint-venv==2.3.0 457 | - python-utils==3.5.2 458 | - qstylizer==0.2.2 459 | - qtawesome==1.2.2 460 | - qtconsole==5.4.0 461 | - requests-oauthlib==1.3.1 462 | - rsa==4.9 463 | - sentry-sdk==1.24.0 464 | - setproctitle==1.3.2 465 | - smmap==5.0.0 466 | - sumolib==1.17.0 467 | - tensorboard==2.12.3 468 | - tensorboard-data-server==0.7.0 469 | - tensorboardx==2.6 470 | - tensordict==0.1.2 471 | - tensorflow==2.12.0 472 | - tensorflow-estimator==2.12.0 473 | - tensorflow-intel==2.12.0 474 | - tensorflow-io-gcs-filesystem==0.31.0 475 | - tensorflow-probability==0.20.1 476 | - tensorlayer==2.2.5 477 | - termcolor==2.3.0 478 | - torchrl==0.1.1 479 | - traci==1.17.0 480 | - visdom==0.2.4 481 | - wandb==0.15.3 482 | prefix: D:\anaconda 483 | -------------------------------------------------------------------------------- /environment/requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==1.4.0 2 | alabaster @ file:///home/ktietz/src/ci/alabaster_1611921544520/work 3 | ale-py==0.8.1 4 | anaconda-client==1.11.2 5 | anaconda-navigator==2.4.0 6 | anaconda-project @ file:///C:/Windows/TEMP/abs_91fu4tfkih/croots/recipe/anaconda-project_1660339890874/work 7 | anyio @ file:///C:/ci/anyio_1644481856696/work/dist 8 | appdirs==1.4.4 9 | argon2-cffi @ file:///opt/conda/conda-bld/argon2-cffi_1645000214183/work 10 | argon2-cffi-bindings @ file:///C:/ci/argon2-cffi-bindings_1644569876605/work 11 | arrow @ file:///C:/b/abs_cal7u12ktb/croot/arrow_1676588147908/work 12 | astroid @ file:///C:/b/abs_d4lg3_taxn/croot/astroid_1676904351456/work 13 | astropy @ file:///C:/ci/astropy_1657719642921/work 14 | asttokens @ file:///opt/conda/conda-bld/asttokens_1646925590279/work 15 | astunparse==1.6.3 16 | atomicwrites==1.4.0 17 | attrs @ file:///C:/b/abs_09s3y775ra/croot/attrs_1668696195628/work 18 | Automat @ file:///tmp/build/80754af9/automat_1600298431173/work 19 | autopep8 @ file:///opt/conda/conda-bld/autopep8_1650463822033/work 20 | Babel @ file:///C:/b/abs_a2shv_3tqi/croot/babel_1671782804377/work 21 | backcall @ file:///home/ktietz/src/ci/backcall_1611930011877/work 22 | backports.functools-lru-cache @ file:///tmp/build/80754af9/backports.functools_lru_cache_1618170165463/work 23 | backports.tempfile @ file:///home/linux1/recipes/ci/backports.tempfile_1610991236607/work 24 | backports.weakref==1.0.post1 25 | bcrypt @ file:///C:/Windows/Temp/abs_36kl66t_aw/croots/recipe/bcrypt_1659554334050/work 26 | beautifulsoup4 @ file:///C:/ci/beautifulsoup4_1650293028159/work 27 | binaryornot @ file:///tmp/build/80754af9/binaryornot_1617751525010/work 28 | black @ file:///C:/ci/black_1660221726201/work 29 | bleach @ file:///opt/conda/conda-bld/bleach_1641577558959/work 30 | bokeh @ file:///C:/Windows/TEMP/abs_4a259bc2-ed05-4a1f-808e-ac712cc0900cddqp8sp7/croots/recipe/bokeh_1658136660686/work 31 | boltons @ file:///C:/b/abs_707eo7c09t/croot/boltons_1677628723117/work 32 | Bottleneck @ file:///C:/Windows/Temp/abs_3198ca53-903d-42fd-87b4-03e6d03a8381yfwsuve8/croots/recipe/bottleneck_1657175565403/work 33 | brotlipy==0.7.0 34 | cachetools==5.3.0 35 | certifi @ file:///C:/b/abs_4a0polqwty/croot/certifi_1683875377622/work/certifi 36 | cffi @ file:///C:/b/abs_49n3v2hyhr/croot/cffi_1670423218144/work 37 | chardet @ file:///C:/ci_310/chardet_1642114080098/work 38 | charset-normalizer @ file:///tmp/build/80754af9/charset-normalizer_1630003229654/work 39 | click @ file:///C:/ci/click_1646056762388/work 40 | cloudpickle @ file:///tmp/build/80754af9/cloudpickle_1632508026186/work 41 | clyent==1.2.2 42 | colorama @ file:///C:/b/abs_a9ozq0l032/croot/colorama_1672387194846/work 43 | colorcet @ file:///C:/b/abs_46vyu0rpdl/croot/colorcet_1668084513237/work 44 | comm @ file:///C:/b/abs_1419earm7u/croot/comm_1671231131638/work 45 | conda==23.3.1 46 | conda-build==3.24.0 47 | conda-content-trust @ file:///C:/Windows/TEMP/abs_4589313d-fc62-4ccc-81c0-b801b4449e833j1ajrwu/croots/recipe/conda-content-trust_1658126379362/work 48 | conda-pack @ file:///tmp/build/80754af9/conda-pack_1611163042455/work 49 | conda-package-handling @ file:///C:/b/abs_fcga8w0uem/croot/conda-package-handling_1672865024290/work 50 | conda-repo-cli==1.0.41 51 | conda-token @ file:///Users/paulyim/miniconda3/envs/c3i/conda-bld/conda-token_1662660369760/work 52 | conda-verify==3.4.2 53 | conda_package_streaming @ file:///C:/b/abs_0e5n5hdal3/croot/conda-package-streaming_1670508162902/work 54 | constantly==15.1.0 55 | contourpy @ file:///C:/b/abs_d5rpy288vc/croots/recipe/contourpy_1663827418189/work 56 | cookiecutter @ file:///opt/conda/conda-bld/cookiecutter_1649151442564/work 57 | cryptography @ file:///C:/b/abs_8ecplyc3n2/croot/cryptography_1677533105000/work 58 | cssselect==1.1.0 59 | cycler @ file:///tmp/build/80754af9/cycler_1637851556182/work 60 | cytoolz @ file:///C:/b/abs_61m9vzb4qh/croot/cytoolz_1667465938275/work 61 | daal4py==2023.0.2 62 | dask @ file:///C:/ci/dask-core_1658497112560/work 63 | datashader @ file:///C:/b/abs_e80f3d7ac0/croot/datashader_1676023254070/work 64 | datashape==0.5.4 65 | debugpy @ file:///C:/ci_310/debugpy_1642079916595/work 66 | decorator @ file:///opt/conda/conda-bld/decorator_1643638310831/work 67 | defusedxml @ file:///tmp/build/80754af9/defusedxml_1615228127516/work 68 | diff-match-patch @ file:///Users/ktietz/demo/mc3/conda-bld/diff-match-patch_1630511840874/work 69 | dill @ file:///C:/b/abs_42h_07z1yj/croot/dill_1667919550096/work 70 | distributed @ file:///C:/ci/distributed_1658523963030/work 71 | dm-tree==0.1.8 72 | docker-pycreds==0.4.0 73 | docstring-to-markdown @ file:///C:/b/abs_cf10j8nr4q/croot/docstring-to-markdown_1673447652942/work 74 | docutils @ file:///C:/Windows/TEMP/abs_24e5e278-4d1c-47eb-97b9-f761d871f482dy2vg450/croots/recipe/docutils_1657175444608/work 75 | entrypoints @ file:///C:/ci/entrypoints_1649926676279/work 76 | et-xmlfile==1.1.0 77 | executing @ file:///opt/conda/conda-bld/executing_1646925071911/work 78 | Farama-Notifications==0.0.4 79 | fastjsonschema @ file:///C:/Users/BUILDE~1/AppData/Local/Temp/abs_ebruxzvd08/croots/recipe/python-fastjsonschema_1661376484940/work 80 | filelock @ file:///C:/b/abs_c7yrhs9uz2/croot/filelock_1672387617533/work 81 | flake8 @ file:///C:/b/abs_9f6_n1jlpc/croot/flake8_1674581816810/work 82 | Flask @ file:///C:/b/abs_ef16l83sif/croot/flask_1671217367534/work 83 | flatbuffers==23.5.9 84 | flit_core @ file:///opt/conda/conda-bld/flit-core_1644941570762/work/source/flit_core 85 | fonttools==4.25.0 86 | fsspec @ file:///C:/b/abs_5bjz6v0w_f/croot/fsspec_1670336608940/work 87 | future @ file:///C:/b/abs_3dcibf18zi/croot/future_1677599891380/work 88 | gast==0.4.0 89 | gensim @ file:///C:/b/abs_a5vat69tv8/croot/gensim_1674853640591/work 90 | gitdb==4.0.10 91 | GitPython==3.1.31 92 | glob2 @ file:///home/linux1/recipes/ci/glob2_1610991677669/work 93 | google-auth==2.18.1 94 | google-auth-oauthlib==1.0.0 95 | google-pasta==0.2.0 96 | greenlet @ file:///C:/b/abs_47lk_w2ajq/croot/greenlet_1670513248400/work 97 | grpcio==1.54.2 98 | gym==0.26.2 99 | gym-notices==0.0.8 100 | gymnasium==0.28.1 101 | h5py==3.9.0 102 | HeapDict @ file:///Users/ktietz/demo/mc3/conda-bld/heapdict_1630598515714/work 103 | holoviews @ file:///C:/b/abs_bbf97_0kcd/croot/holoviews_1676372911083/work 104 | huggingface-hub @ file:///C:/b/abs_8d8wo2z8c6/croot/huggingface_hub_1667490298905/work 105 | hvplot @ file:///C:/b/abs_13un17_4x_/croot/hvplot_1670508919193/work 106 | hyperlink @ file:///tmp/build/80754af9/hyperlink_1610130746837/work 107 | idna @ file:///C:/b/abs_bdhbebrioa/croot/idna_1666125572046/work 108 | imagecodecs @ file:///C:/b/abs_f0cr12h73p/croot/imagecodecs_1677576746499/work 109 | imageio @ file:///C:/b/abs_27kq2gy1us/croot/imageio_1677879918708/work 110 | imagesize @ file:///C:/Windows/TEMP/abs_3cecd249-3fc4-4bfc-b80b-bb227b0d701en12vqzot/croots/recipe/imagesize_1657179501304/work 111 | imbalanced-learn @ file:///C:/b/abs_1911ryuksz/croot/imbalanced-learn_1677191585237/work 112 | importlib-metadata @ file:///C:/ci/importlib-metadata_1648544469310/work 113 | importlib-resources==5.12.0 114 | incremental @ file:///tmp/build/80754af9/incremental_1636629750599/work 115 | inflection==0.5.1 116 | iniconfig @ file:///home/linux1/recipes/ci/iniconfig_1610983019677/work 117 | intake @ file:///C:/b/abs_42yyb2lhwx/croot/intake_1676619887779/work 118 | intervaltree @ file:///Users/ktietz/demo/mc3/conda-bld/intervaltree_1630511889664/work 119 | ipykernel @ file:///C:/b/abs_b4f07tbsyd/croot/ipykernel_1672767104060/work 120 | ipython @ file:///C:/b/abs_d3h279dv3h/croot/ipython_1676582236558/work 121 | ipython-genutils @ file:///tmp/build/80754af9/ipython_genutils_1606773439826/work 122 | ipywidgets @ file:///tmp/build/80754af9/ipywidgets_1634143127070/work 123 | isort @ file:///tmp/build/80754af9/isort_1628603791788/work 124 | itemadapter @ file:///tmp/build/80754af9/itemadapter_1626442940632/work 125 | itemloaders @ file:///opt/conda/conda-bld/itemloaders_1646805235997/work 126 | itsdangerous @ file:///tmp/build/80754af9/itsdangerous_1621432558163/work 127 | jax==0.4.10 128 | jax-jumpy==1.0.0 129 | jedi @ file:///C:/ci/jedi_1644315428305/work 130 | jellyfish @ file:///C:/ci/jellyfish_1647962737334/work 131 | Jinja2 @ file:///C:/b/abs_7cdis66kl9/croot/jinja2_1666908141852/work 132 | jinja2-time @ file:///opt/conda/conda-bld/jinja2-time_1649251842261/work 133 | jmespath @ file:///Users/ktietz/demo/mc3/conda-bld/jmespath_1630583964805/work 134 | joblib @ file:///C:/b/abs_e60_bwl1v6/croot/joblib_1666298845728/work 135 | json5 @ file:///tmp/build/80754af9/json5_1624432770122/work 136 | jsonpatch @ file:///tmp/build/80754af9/jsonpatch_1615747632069/work 137 | jsonpointer==2.1 138 | jsonschema @ file:///C:/b/abs_6ccs97j_l8/croot/jsonschema_1676558690963/work 139 | jupyter @ file:///C:/Windows/TEMP/abs_56xfdi__li/croots/recipe/jupyter_1659349053177/work 140 | jupyter-console @ file:///C:/b/abs_68ttzd5p9c/croot/jupyter_console_1677674667636/work 141 | jupyter-server @ file:///C:/b/abs_1cfi3__jl8/croot/jupyter_server_1671707636383/work 142 | jupyter_client @ file:///C:/ci/jupyter_client_1661834530766/work 143 | jupyter_core @ file:///C:/b/abs_bd7elvu3w2/croot/jupyter_core_1676538600510/work 144 | jupyterlab @ file:///C:/b/abs_513jt6yy74/croot/jupyterlab_1675354138043/work 145 | jupyterlab-pygments @ file:///tmp/build/80754af9/jupyterlab_pygments_1601490720602/work 146 | jupyterlab-widgets @ file:///tmp/build/80754af9/jupyterlab_widgets_1609884341231/work 147 | jupyterlab_server @ file:///C:/b/abs_d1z_g1swc8/croot/jupyterlab_server_1677153204814/work 148 | keras==2.12.0 149 | keyring @ file:///C:/ci_310/keyring_1642165564669/work 150 | kiwisolver @ file:///C:/b/abs_88mdhvtahm/croot/kiwisolver_1672387921783/work 151 | lazy-object-proxy @ file:///C:/ci_310/lazy-object-proxy_1642083437654/work 152 | libarchive-c @ file:///tmp/build/80754af9/python-libarchive-c_1617780486945/work 153 | libclang==16.0.0 154 | llvmlite==0.39.1 155 | locket @ file:///C:/ci/locket_1652904090946/work 156 | lxml @ file:///C:/ci/lxml_1657527492694/work 157 | lz4 @ file:///C:/ci_310/lz4_1643300078932/work 158 | Markdown @ file:///C:/b/abs_98lv_ucina/croot/markdown_1671541919225/work 159 | MarkupSafe @ file:///C:/ci/markupsafe_1654508036328/work 160 | matplotlib @ file:///C:/b/abs_b2d7uv90hg/croot/matplotlib-suite_1677674332463/work 161 | matplotlib-inline @ file:///C:/ci/matplotlib-inline_1661934094726/work 162 | mccabe @ file:///opt/conda/conda-bld/mccabe_1644221741721/work 163 | menuinst @ file:///C:/Users/BUILDE~1/AppData/Local/Temp/abs_455sf5o0ct/croots/recipe/menuinst_1661805970842/work 164 | mistune @ file:///C:/ci_310/mistune_1642084168466/work 165 | mkl-fft==1.3.1 166 | mkl-random @ file:///C:/ci_310/mkl_random_1643050563308/work 167 | mkl-service==2.4.0 168 | ml-dtypes==0.1.0 169 | mock @ file:///tmp/build/80754af9/mock_1607622725907/work 170 | mpmath==1.2.1 171 | msgpack @ file:///C:/ci/msgpack-python_1652348582618/work 172 | # Editable install with no version control (multiagent==0.0.1) 173 | -e c:\users\administrator.desktop-nlh290a\desktop\cm3_code\cm3-master\env\multiagent-particle-envs 174 | multipledispatch @ file:///C:/ci_310/multipledispatch_1642084438481/work 175 | munkres==1.1.4 176 | mypy-extensions==0.4.3 177 | navigator-updater==0.3.0 178 | nbclassic @ file:///C:/b/abs_d0_ze5q0j2/croot/nbclassic_1676902914817/work 179 | nbclient @ file:///C:/ci/nbclient_1650308592199/work 180 | nbconvert @ file:///C:/b/abs_4av3q4okro/croot/nbconvert_1668450658054/work 181 | nbformat @ file:///C:/b/abs_85_3g7dkt4/croot/nbformat_1670352343720/work 182 | nest-asyncio @ file:///C:/b/abs_3a_4jsjlqu/croot/nest-asyncio_1672387322800/work 183 | networkx @ file:///C:/ci/networkx_1657716953747/work 184 | nltk @ file:///opt/conda/conda-bld/nltk_1645628263994/work 185 | notebook @ file:///C:/b/abs_ca13hqvuzw/croot/notebook_1668179888546/work 186 | notebook_shim @ file:///C:/b/abs_ebfczttg6x/croot/notebook-shim_1668160590914/work 187 | numba @ file:///C:/b/abs_e53pp2e4k7/croot/numba_1670258349527/work 188 | numexpr @ file:///C:/b/abs_a7kbak88hk/croot/numexpr_1668713882979/work 189 | numpy @ file:///C:/b/abs_datssh7cer/croot/numpy_and_numpy_base_1672336199388/work 190 | numpy-stl==3.0.1 191 | numpydoc @ file:///C:/b/abs_cfdd4zxbga/croot/numpydoc_1668085912100/work 192 | oauthlib==3.2.2 193 | opencv-contrib-python==4.7.0.72 194 | openpyxl==3.0.10 195 | opt-einsum==3.3.0 196 | packaging @ file:///C:/b/abs_cfsup8ur87/croot/packaging_1671697442297/work 197 | pandas @ file:///C:/b/abs_b9kefbuby2/croot/pandas_1677835593760/work 198 | pandocfilters @ file:///opt/conda/conda-bld/pandocfilters_1643405455980/work 199 | panel @ file:///C:/b/abs_55ujq2fpyh/croot/panel_1676379705003/work 200 | param @ file:///C:/b/abs_d799n8xz_7/croot/param_1671697759755/work 201 | paramiko @ file:///opt/conda/conda-bld/paramiko_1640109032755/work 202 | parsel @ file:///C:/ci/parsel_1646722035970/work 203 | parso @ file:///opt/conda/conda-bld/parso_1641458642106/work 204 | partd @ file:///opt/conda/conda-bld/partd_1647245470509/work 205 | pathlib @ file:///Users/ktietz/demo/mc3/conda-bld/pathlib_1629713961906/work 206 | pathspec @ file:///C:/b/abs_9cu5_2yb3i/croot/pathspec_1674681579249/work 207 | pathtools==0.1.2 208 | patsy==0.5.3 209 | pep8==1.7.1 210 | pexpect @ file:///tmp/build/80754af9/pexpect_1605563209008/work 211 | pickleshare @ file:///tmp/build/80754af9/pickleshare_1606932040724/work 212 | Pillow==9.4.0 213 | pkginfo @ file:///C:/b/abs_d18srtr68x/croot/pkginfo_1679431192239/work 214 | platformdirs @ file:///C:/b/abs_73cc5cz_1u/croots/recipe/platformdirs_1662711386458/work 215 | plotly @ file:///C:/ci/plotly_1658160673416/work 216 | pluggy @ file:///C:/ci/pluggy_1648042746254/work 217 | ply==3.11 218 | pooch @ file:///tmp/build/80754af9/pooch_1623324770023/work 219 | poyo @ file:///tmp/build/80754af9/poyo_1617751526755/work 220 | progressbar2==4.2.0 221 | prometheus-client @ file:///C:/Windows/TEMP/abs_ab9nx8qb08/croots/recipe/prometheus_client_1659455104602/work 222 | prompt-toolkit @ file:///C:/b/abs_6coz5_9f2s/croot/prompt-toolkit_1672387908312/work 223 | Protego @ file:///tmp/build/80754af9/protego_1598657180827/work 224 | protobuf==3.20.3 225 | psutil @ file:///C:/Windows/Temp/abs_b2c2fd7f-9fd5-4756-95ea-8aed74d0039flsd9qufz/croots/recipe/psutil_1656431277748/work 226 | ptyprocess @ file:///tmp/build/80754af9/ptyprocess_1609355006118/work/dist/ptyprocess-0.7.0-py2.py3-none-any.whl 227 | pure-eval @ file:///opt/conda/conda-bld/pure_eval_1646925070566/work 228 | py @ file:///opt/conda/conda-bld/py_1644396412707/work 229 | pyasn1 @ file:///Users/ktietz/demo/mc3/conda-bld/pyasn1_1629708007385/work 230 | pyasn1-modules==0.2.8 231 | pycodestyle @ file:///C:/b/abs_d77nxvklcq/croot/pycodestyle_1674267231034/work 232 | pycosat @ file:///C:/b/abs_4b1rrw8pn9/croot/pycosat_1666807711599/work 233 | pycparser @ file:///tmp/build/80754af9/pycparser_1636541352034/work 234 | pyct @ file:///C:/b/abs_92z17k7ig2/croot/pyct_1675450330889/work 235 | pycurl==7.45.1 236 | PyDispatcher==2.0.5 237 | pydocstyle @ file:///C:/b/abs_6dz687_5i3/croot/pydocstyle_1675221688656/work 238 | pyerfa @ file:///C:/ci_310/pyerfa_1642088497201/work 239 | pyflakes @ file:///C:/b/abs_6dve6e13zh/croot/pyflakes_1674165143327/work 240 | pyglet==1.5.27 241 | Pygments @ file:///opt/conda/conda-bld/pygments_1644249106324/work 242 | PyHamcrest @ file:///tmp/build/80754af9/pyhamcrest_1615748656804/work 243 | PyJWT @ file:///C:/ci/pyjwt_1657529477795/work 244 | pylint @ file:///C:/b/abs_83sq99jc8i/croot/pylint_1676919922167/work 245 | pylint-venv @ file:///C:/b/abs_bf0lepsbij/croot/pylint-venv_1673990138593/work 246 | pyls-spyder==0.4.0 247 | PyNaCl @ file:///C:/Windows/Temp/abs_d5c3ajcm87/croots/recipe/pynacl_1659620667490/work 248 | pyodbc @ file:///C:/Windows/Temp/abs_61e3jz3u05/croots/recipe/pyodbc_1659513801402/work 249 | pyOpenSSL @ file:///C:/b/abs_552w85x1jz/croot/pyopenssl_1677607703691/work 250 | pyparsing @ file:///C:/Users/BUILDE~1/AppData/Local/Temp/abs_7f_7lba6rl/croots/recipe/pyparsing_1661452540662/work 251 | PyQt5==5.15.7 252 | PyQt5-sip @ file:///C:/Windows/Temp/abs_d7gmd2jg8i/croots/recipe/pyqt-split_1659273064801/work/pyqt_sip 253 | PyQtWebEngine==5.15.4 254 | pyrsistent @ file:///C:/ci_310/pyrsistent_1642117077485/work 255 | PySocks @ file:///C:/ci_310/pysocks_1642089375450/work 256 | pytest==7.1.2 257 | python-dateutil @ file:///tmp/build/80754af9/python-dateutil_1626374649649/work 258 | python-lsp-black @ file:///C:/Users/BUILDE~1/AppData/Local/Temp/abs_dddk9lhpp1/croots/recipe/python-lsp-black_1661852041405/work 259 | python-lsp-jsonrpc==1.0.0 260 | python-lsp-server @ file:///C:/b/abs_e44khh1wya/croot/python-lsp-server_1677296772730/work 261 | python-slugify @ file:///tmp/build/80754af9/python-slugify_1620405669636/work 262 | python-snappy @ file:///C:/b/abs_61b1fmzxcn/croot/python-snappy_1670943932513/work 263 | python-utils==3.5.2 264 | pytoolconfig @ file:///C:/b/abs_18sf9z_iwl/croot/pytoolconfig_1676315065270/work 265 | pytz @ file:///C:/b/abs_22fofvpn1x/croot/pytz_1671698059864/work 266 | pyviz-comms @ file:///tmp/build/80754af9/pyviz_comms_1623747165329/work 267 | PyWavelets @ file:///C:/b/abs_a8r4b1511a/croot/pywavelets_1670425185881/work 268 | pywin32==305.1 269 | pywin32-ctypes @ file:///C:/ci_310/pywin32-ctypes_1642657835512/work 270 | pywinpty @ file:///C:/b/abs_73vshmevwq/croot/pywinpty_1677609966356/work/target/wheels/pywinpty-2.0.10-cp310-none-win_amd64.whl 271 | PyYAML @ file:///C:/b/abs_d0g7dqt2xw/croot/pyyaml_1670514768165/work 272 | pyzmq @ file:///C:/ci/pyzmq_1657616000714/work 273 | QDarkStyle @ file:///tmp/build/80754af9/qdarkstyle_1617386714626/work 274 | qstylizer @ file:///C:/b/abs_ef86cgllby/croot/qstylizer_1674008538857/work/dist/qstylizer-0.2.2-py2.py3-none-any.whl 275 | QtAwesome @ file:///C:/b/abs_c5evilj98g/croot/qtawesome_1674008690220/work 276 | qtconsole @ file:///C:/b/abs_5bap7f8n0t/croot/qtconsole_1674008444833/work 277 | QtPy @ file:///C:/ci/qtpy_1662015130233/work 278 | queuelib==1.5.0 279 | regex @ file:///C:/ci/regex_1658258299320/work 280 | requests @ file:///C:/ci/requests_1657735340829/work 281 | requests-file @ file:///Users/ktietz/demo/mc3/conda-bld/requests-file_1629455781986/work 282 | requests-oauthlib==1.3.1 283 | requests-toolbelt @ file:///Users/ktietz/demo/mc3/conda-bld/requests-toolbelt_1629456163440/work 284 | rope @ file:///C:/b/abs_55g_tm_6ff/croot/rope_1676675029164/work 285 | rsa==4.9 286 | Rtree @ file:///C:/b/abs_e116ltblik/croot/rtree_1675157871717/work 287 | ruamel-yaml-conda @ file:///C:/b/abs_6ejaexx82s/croot/ruamel_yaml_1667489767827/work 288 | ruamel.yaml @ file:///C:/b/abs_30ee5qbthd/croot/ruamel.yaml_1666304562000/work 289 | ruamel.yaml.clib @ file:///C:/b/abs_aarblxbilo/croot/ruamel.yaml.clib_1666302270884/work 290 | scikit-image @ file:///C:/b/abs_63r0vmx78u/croot/scikit-image_1669241746873/work 291 | scikit-learn @ file:///C:/b/abs_7ck_bnw91r/croot/scikit-learn_1676911676133/work 292 | scikit-learn-intelex==20230228.214818 293 | scipy==1.10.0 294 | Scrapy @ file:///C:/b/abs_9fn69i_d86/croot/scrapy_1677738199744/work 295 | seaborn @ file:///C:/b/abs_68ltdkoyoo/croot/seaborn_1673479199997/work 296 | Send2Trash @ file:///tmp/build/80754af9/send2trash_1632406701022/work 297 | sentry-sdk==1.24.0 298 | service-identity @ file:///Users/ktietz/demo/mc3/conda-bld/service_identity_1629460757137/work 299 | setproctitle==1.3.2 300 | sip @ file:///C:/Windows/Temp/abs_b8fxd17m2u/croots/recipe/sip_1659012372737/work 301 | six @ file:///tmp/build/80754af9/six_1644875935023/work 302 | smart-open @ file:///C:/ci/smart_open_1651235038100/work 303 | smmap==5.0.0 304 | sniffio @ file:///C:/ci_310/sniffio_1642092172680/work 305 | snowballstemmer @ file:///tmp/build/80754af9/snowballstemmer_1637937080595/work 306 | sortedcontainers @ file:///tmp/build/80754af9/sortedcontainers_1623949099177/work 307 | soupsieve @ file:///C:/b/abs_fasraqxhlv/croot/soupsieve_1666296394662/work 308 | Sphinx @ file:///C:/ci/sphinx_1657617157451/work 309 | sphinxcontrib-applehelp @ file:///home/ktietz/src/ci/sphinxcontrib-applehelp_1611920841464/work 310 | sphinxcontrib-devhelp @ file:///home/ktietz/src/ci/sphinxcontrib-devhelp_1611920923094/work 311 | sphinxcontrib-htmlhelp @ file:///tmp/build/80754af9/sphinxcontrib-htmlhelp_1623945626792/work 312 | sphinxcontrib-jsmath @ file:///home/ktietz/src/ci/sphinxcontrib-jsmath_1611920942228/work 313 | sphinxcontrib-qthelp @ file:///home/ktietz/src/ci/sphinxcontrib-qthelp_1611921055322/work 314 | sphinxcontrib-serializinghtml @ file:///tmp/build/80754af9/sphinxcontrib-serializinghtml_1624451540180/work 315 | spyder @ file:///C:/b/abs_93s9xkw3pn/croot/spyder_1677776163871/work 316 | spyder-kernels @ file:///C:/b/abs_feh4xo1mrn/croot/spyder-kernels_1673292245176/work 317 | SQLAlchemy @ file:///C:/Windows/Temp/abs_f8661157-660b-49bb-a790-69ab9f3b8f7c8a8s2psb/croots/recipe/sqlalchemy_1657867864564/work 318 | stack-data @ file:///opt/conda/conda-bld/stack_data_1646927590127/work 319 | statsmodels @ file:///C:/b/abs_bdqo3zaryj/croot/statsmodels_1676646249859/work 320 | sumolib==1.17.0 321 | sympy @ file:///C:/b/abs_95fbf1z7n6/croot/sympy_1668202411612/work 322 | tables==3.7.0 323 | tabulate @ file:///C:/ci/tabulate_1657600805799/work 324 | TBB==0.2 325 | tblib @ file:///Users/ktietz/demo/mc3/conda-bld/tblib_1629402031467/work 326 | tenacity @ file:///C:/Windows/TEMP/abs_980d07a6-8e21-4174-9c17-7296219678ads7dhdov_/croots/recipe/tenacity_1657899108023/work 327 | tensorboard==2.12.3 328 | tensorboard-data-server==0.7.0 329 | tensorboardX==2.6 330 | tensordict==0.1.2 331 | tensorflow==2.12.0 332 | tensorflow-estimator==2.12.0 333 | tensorflow-intel==2.12.0 334 | tensorflow-io-gcs-filesystem==0.31.0 335 | tensorflow-probability==0.20.1 336 | tensorlayer==2.2.5 337 | termcolor==2.3.0 338 | terminado @ file:///C:/b/abs_25nakickad/croot/terminado_1671751845491/work 339 | text-unidecode @ file:///Users/ktietz/demo/mc3/conda-bld/text-unidecode_1629401354553/work 340 | textdistance @ file:///tmp/build/80754af9/textdistance_1612461398012/work 341 | threadpoolctl @ file:///Users/ktietz/demo/mc3/conda-bld/threadpoolctl_1629802263681/work 342 | three-merge @ file:///tmp/build/80754af9/three-merge_1607553261110/work 343 | tifffile @ file:///tmp/build/80754af9/tifffile_1627275862826/work 344 | tinycss2 @ file:///C:/b/abs_52w5vfuaax/croot/tinycss2_1668168823131/work 345 | tldextract @ file:///opt/conda/conda-bld/tldextract_1646638314385/work 346 | tokenizers @ file:///C:/ci/tokenizers_1651821358528/work 347 | toml @ file:///tmp/build/80754af9/toml_1616166611790/work 348 | tomli @ file:///C:/Windows/TEMP/abs_ac109f85-a7b3-4b4d-bcfd-52622eceddf0hy332ojo/croots/recipe/tomli_1657175513137/work 349 | tomlkit @ file:///C:/Windows/TEMP/abs_3296qo9v6b/croots/recipe/tomlkit_1658946894808/work 350 | toolz @ file:///C:/b/abs_cfvk6rc40d/croot/toolz_1667464080130/work 351 | torch==2.0.1 352 | torchrl==0.1.1 353 | torchvision==0.15.2 354 | tornado @ file:///C:/ci_310/tornado_1642093111997/work 355 | tqdm @ file:///C:/b/abs_0axbz66qik/croots/recipe/tqdm_1664392691071/work 356 | traci==1.17.0 357 | traitlets @ file:///C:/b/abs_e5m_xjjl94/croot/traitlets_1671143896266/work 358 | transformers @ file:///C:/b/abs_8byf5_j714/croot/transformers_1667919454001/work 359 | Twisted @ file:///C:/Windows/Temp/abs_ccblv2rzfa/croots/recipe/twisted_1659592764512/work 360 | twisted-iocpsupport @ file:///C:/ci/twisted-iocpsupport_1646817083730/work 361 | typing_extensions @ file:///C:/b/abs_89eui86zuq/croot/typing_extensions_1669923792806/work 362 | ujson @ file:///C:/ci/ujson_1657525893897/work 363 | Unidecode @ file:///tmp/build/80754af9/unidecode_1614712377438/work 364 | urllib3 @ file:///C:/b/abs_9bcwxczrvm/croot/urllib3_1673575521331/work 365 | visdom==0.2.4 366 | w3lib @ file:///Users/ktietz/demo/mc3/conda-bld/w3lib_1629359764703/work 367 | wandb==0.15.3 368 | watchdog @ file:///C:/ci_310/watchdog_1642113443984/work 369 | wcwidth @ file:///Users/ktietz/demo/mc3/conda-bld/wcwidth_1629357192024/work 370 | webencodings==0.5.1 371 | websocket-client @ file:///C:/ci_310/websocket-client_1642093970919/work 372 | Werkzeug @ file:///C:/b/abs_17q5kgb8bo/croot/werkzeug_1671216014857/work 373 | whatthepatch @ file:///C:/Users/BUILDE~1/AppData/Local/Temp/abs_e7bihs8grh/croots/recipe/whatthepatch_1661796085215/work 374 | widgetsnbextension @ file:///C:/ci/widgetsnbextension_1645009839917/work 375 | win-inet-pton @ file:///C:/ci_310/win_inet_pton_1642658466512/work 376 | wincertstore==0.2 377 | wrapt @ file:///C:/Windows/Temp/abs_7c3dd407-1390-477a-b542-fd15df6a24085_diwiza/croots/recipe/wrapt_1657814452175/work 378 | xarray @ file:///C:/b/abs_2fi_umrauo/croot/xarray_1668776806973/work 379 | xlwings @ file:///C:/b/abs_1ejhh6s00l/croot/xlwings_1677024180629/work 380 | yapf @ file:///tmp/build/80754af9/yapf_1615749224965/work 381 | zict==2.1.0 382 | zipp @ file:///C:/b/abs_b9jfdr908q/croot/zipp_1672387552360/work 383 | zope.interface @ file:///C:/ci_310/zope.interface_1642113633904/work 384 | zstandard==0.19.0 385 | -------------------------------------------------------------------------------- /algorithm/MA-SAC_main.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 6 | import numpy as np 7 | import math 8 | # import gym 9 | import sympy 10 | from matplotlib import pyplot as plt 11 | from scipy.io import loadmat 12 | import os 13 | os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE' 14 | # env = gym.make("LargeGridWorld-v0").unwrapped 15 | state_number=2 16 | action_number=10 #9 17 | max_action = 1 18 | min_action = -1 19 | RENDER=False 20 | EP_MAX = 500 21 | EP_LEN = 1000 22 | GAMMA = 0.9 23 | q_lr = 5e-5#3e-4 24 | value_lr = 5e-4#3e-3 25 | policy_lr = 1.5e-4#3 26 | BATCH = 128 27 | tau = 1e-2 28 | MemoryCapacity=20000 29 | Switch=0 30 | n_width=100 31 | n_height = 100 32 | m = loadmat("mapdata_0717.mat") 33 | #correct_action=0 34 | MARK= m["MARK_new"] 35 | PL_AP=m["MARK_PL_real"] 36 | 37 | 38 | class ActorNet(nn.Module): 39 | def __init__(self,inp,outp): 40 | super(ActorNet, self).__init__() 41 | self.in_to_y1=nn.Linear(inp,256) 42 | self.in_to_y1.weight.data.normal_(0,0.1) 43 | self.y1_to_y2=nn.Linear(256,256) 44 | self.y1_to_y2.weight.data.normal_(0,0.1) 45 | self.out=nn.Linear(256,outp) 46 | self.out.weight.data.normal_(0,0.1) 47 | self.std_out = nn.Linear(256, outp) 48 | self.std_out.weight.data.normal_(0, 0.1) 49 | 50 | def forward(self,inputstate): 51 | inputstate=self.in_to_y1(inputstate) 52 | inputstate=F.relu(inputstate) 53 | inputstate=self.y1_to_y2(inputstate) 54 | inputstate=F.relu(inputstate) 55 | mean=max_action*torch.tanh(self.out(inputstate))#输出概率分布的均值mean 56 | log_std=self.std_out(inputstate)#softplus激活函数的值域>0 57 | log_std=torch.clamp(log_std,-20,2) 58 | std=log_std.exp() 59 | return mean,std 60 | 61 | class CriticNet(nn.Module): 62 | def __init__(self,input,output): 63 | super(CriticNet, self).__init__() 64 | #q1 65 | self.in_to_y1=nn.Linear(input+output,256) 66 | self.in_to_y1.weight.data.normal_(0,0.1) 67 | self.y1_to_y2=nn.Linear(256,256) 68 | self.y1_to_y2.weight.data.normal_(0,0.1) 69 | self.out=nn.Linear(256,1) 70 | self.out.weight.data.normal_(0,0.1) 71 | #q2 72 | self.q2_in_to_y1 = nn.Linear(input+output, 256) 73 | self.q2_in_to_y1.weight.data.normal_(0, 0.1) 74 | self.q2_y1_to_y2 = nn.Linear(256, 256) 75 | self.q2_y1_to_y2.weight.data.normal_(0, 0.1) 76 | self.q2_out = nn.Linear(256, 1) 77 | self.q2_out.weight.data.normal_(0, 0.1) 78 | def forward(self,s,a): 79 | inputstate = torch.cat((s, a), dim=1) 80 | #q1 81 | q1=self.in_to_y1(inputstate) 82 | q1=F.relu(q1) 83 | q1=self.y1_to_y2(q1) 84 | q1=F.relu(q1) 85 | q1=self.out(q1) 86 | #q2 87 | q2 = self.in_to_y1(inputstate) 88 | q2 = F.relu(q2) 89 | q2 = self.y1_to_y2(q2) 90 | q2 = F.relu(q2) 91 | q2 = self.out(q2) 92 | return q1,q2 93 | 94 | class Memory(): 95 | def __init__(self,capacity,dims,type_m): 96 | self.capacity=capacity 97 | self.mem=np.zeros((capacity,dims)) 98 | self.memory_counter=0 99 | self.type_m=type_m 100 | '''存储记忆''' 101 | def store_transition(self,s,a,r,s_): 102 | if self.type_m==1: 103 | tran = np.hstack((s, [a[0],a[1],a[2],a[3],a[4],a[5],a[6],a[7],a[8],a[9],r], s_)) # 把s,a,r,s_困在一起,水平拼接 104 | else: 105 | tran = np.hstack((s, [a[0],r], s_)) # 把s,a,r,s_困在一起,水平拼接 106 | 107 | index = self.memory_counter % self.capacity#除余得索引 108 | self.mem[index, :] = tran # 给索引存值,第index行所有列都为其中一次的s,a,r,s_;mem会是一个capacity行,(s+a+r+s_)列的数组 109 | self.memory_counter+=1 110 | '''随机从记忆库里抽取''' 111 | def sample(self,n): 112 | assert self.memory_counter>=self.capacity,'记忆库没有存满记忆' 113 | sample_index = np.random.choice(self.capacity, n)#从capacity个记忆里随机抽取n个为一批,可得到抽样后的索引号 114 | new_mem = self.mem[sample_index, :]#由抽样得到的索引号在所有的capacity个记忆中 得到记忆s,a,r,s_ 115 | return new_mem 116 | class Actor(): 117 | def __init__(self): 118 | self.action_net=ActorNet(state_number,action_number)#这只是均值mean 119 | self.optimizer=torch.optim.Adam(self.action_net.parameters(),lr=policy_lr) 120 | 121 | def choose_action(self,s): 122 | inputstate = torch.FloatTensor(s) 123 | mean,std=self.action_net(inputstate) 124 | dist = torch.distributions.Normal(mean, std) 125 | action=dist.sample() 126 | action=torch.clamp(action,min_action,max_action) 127 | return action.detach().numpy() 128 | def evaluate(self,s): 129 | inputstate = torch.FloatTensor(s) 130 | mean,std=self.action_net(inputstate) 131 | dist = torch.distributions.Normal(mean, std) 132 | noise = torch.distributions.Normal(0, 1) 133 | z = noise.sample() 134 | action=torch.tanh(mean+std*z) 135 | action=torch.clamp(action,min_action,max_action) 136 | action_logprob=dist.log_prob(mean+std*z)-torch.log(1-action.pow(2)+1e-6) 137 | return action,action_logprob,z,mean,std 138 | 139 | def learn(self,actor_loss): 140 | loss=actor_loss 141 | self.optimizer.zero_grad() 142 | loss.backward() 143 | self.optimizer.step() 144 | 145 | class Entroy(): 146 | def __init__(self): 147 | self.target_entropy = -action_number 148 | self.log_alpha = torch.zeros(1, requires_grad=True) 149 | self.alpha = self.log_alpha.exp() 150 | self.optimizer = torch.optim.Adam([self.log_alpha], lr=q_lr) 151 | 152 | def learn(self,entroy_loss): 153 | loss=entroy_loss 154 | self.optimizer.zero_grad() 155 | loss.backward() 156 | self.optimizer.step() 157 | 158 | class Critic(): 159 | def __init__(self): 160 | self.critic_v,self.target_critic_v=CriticNet(state_number,action_number),CriticNet(state_number,action_number)#改网络输入状态,生成一个Q值 161 | self.optimizer = torch.optim.Adam(self.critic_v.parameters(), lr=value_lr,eps=1e-5) 162 | self.lossfunc = nn.MSELoss() 163 | def soft_update(self): 164 | for target_param, param in zip(self.target_critic_v.parameters(), self.critic_v.parameters()): 165 | target_param.data.copy_(target_param.data * (1.0 - tau) + param.data * tau) 166 | 167 | def get_v(self,s,a): 168 | return self.critic_v(s,a) 169 | 170 | def learn(self,current_q1,current_q2,target_q): 171 | loss = self.lossfunc(current_q1, target_q) + self.lossfunc(current_q2, target_q) 172 | self.optimizer.zero_grad() 173 | loss.backward() 174 | self.optimizer.step() 175 | 176 | def cosVector(x,y): 177 | result1=0.0; 178 | result2=0.0; 179 | result3=0.0; 180 | for i in range(len(x)): 181 | result1+=x[i]*y[i] #sum(X*Y) 182 | result2+=x[i]**2 #sum(X*X) 183 | result3+=y[i]**2 #sum(Y*Y) 184 | return result1/((result2*result3)**0.5) 185 | 186 | 187 | if Switch==0: 188 | print('SAC训练中...') 189 | actor = Actor() 190 | critic = Critic() 191 | entroy=Entroy() 192 | M = Memory(MemoryCapacity, 2 * state_number + action_number + 1,1) 193 | all_ep_r = [] 194 | # actor2 = Actor() 195 | # critic2 = Critic() 196 | # entroy2=Entroy() 197 | # M2 = Memory(MemoryCapacity, 2 * state_number + action_number + 1,1) 198 | # all_ep_r2 = [] 199 | # actor3 = Actor() 200 | # critic3 = Critic() 201 | # entroy3=Entroy() 202 | # M3 = Memory(MemoryCapacity, 2 * state_number + action_number + 1,1) 203 | # all_ep_r3 = [] 204 | # state_number=6 205 | # action_number=1 #9 206 | # actor4 = Actor() 207 | # critic4 = Critic() 208 | # entroy4=Entroy() 209 | # M4 = Memory(MemoryCapacity, 2 * state_number + 1+ 1,2) 210 | # all_ep_r4 = [] 211 | # state_number=2 212 | # action_number=9 #9 213 | # end_location = [26*2,18*2] #8*2,9*2[] 214 | 215 | end_location = [15*2,32*2] 216 | end_location2 = [45*2,45*2] 217 | end_location3 = [47,38*2] 218 | D=100 219 | m_d=100 220 | lambda_q=10 221 | 222 | for episode in range(EP_MAX): 223 | observation = np.array([4*2,5*2], dtype=np.float32) # 环境重置 224 | # observation2 = np.array([20*2,20*2], dtype=np.float32) 225 | # observation3 = np.array([40*2,10*2], dtype=np.float32) 226 | observation_su1 = np.array([17, 25*2], dtype=np.float32) 227 | observation_su2 = np.array([50, 25*2], dtype=np.float32) 228 | observation_su3 = np.array([84, 25*2], dtype=np.float32) 229 | # observation4 = np.array([observation[0], observation[1], observation2[0], observation2[1], observation3[0], observation3[1]]) 230 | 231 | reward_totle = 0 232 | reward_totle2 = 0 233 | reward_totle3 = 0 234 | reward_totle4 = 0 235 | done1=False 236 | done2=False 237 | done3=False 238 | bobao=0 239 | bobao2=0 240 | bobao3=0 241 | for timestep in range(EP_LEN): 242 | # if RENDER: 243 | # env.render() 244 | action = actor.choose_action(observation) 245 | # action2 = actor2.choose_action(observation2) 246 | # action3 = actor3.choose_action(observation3) 247 | # action4 = actor4.choose_action(observation4) 248 | if not done1: 249 | [old_x, old_y] = observation 250 | new_x, new_y = int(old_x), int(old_y) 251 | new_x=int(old_x+action[0]) 252 | new_y=int(old_y+action[1]) 253 | if int(new_x) <= 0: 254 | new_x = 1 255 | if int(new_x) >= n_width: 256 | new_x = int(n_width)-1 257 | if int(new_y) <= 0: 258 | new_y = 1 259 | if int(new_y) >= n_height: 260 | new_y = int(n_height)-1 261 | if MARK[new_x,new_y] == 2: 262 | new_x, new_y = old_x, old_y 263 | observation_=np.array([new_x, new_y], dtype=np.float32) 264 | else: 265 | observation_ = observation 266 | # if not done2: 267 | # [old_x, old_y] = observation2 268 | # new_x, new_y = int(old_x), int(old_y) 269 | # new_x=int(old_x+action2[0]) 270 | # new_y=int(old_y+action2[1]) 271 | # if int(new_x) <= 0: 272 | # new_x = 1 273 | # if int(new_x) >= n_width: 274 | # new_x = int(n_width)-1 275 | # if int(new_y) <= 0: 276 | # new_y = 1 277 | # if int(new_y) >= n_height: 278 | # new_y = int(n_height)-1 279 | # if MARK[new_x,new_y] == 2: 280 | # new_x, new_y = old_x, old_y 281 | # observation2_=np.array([new_x, new_y], dtype=np.float32) 282 | # else: 283 | # observation2_ = observation2 284 | # if not done3: 285 | # [old_x, old_y] = observation3 286 | # new_x, new_y = int(old_x), int(old_y) 287 | # new_x=int(old_x+action3[0]) 288 | # new_y=int(old_y+action3[1]) 289 | # if int(new_x) <= 0: 290 | # new_x = 1 291 | # if int(new_x) >= n_width: 292 | # new_x = int(n_width)-1 293 | # if int(new_y) <= 0: 294 | # new_y = 1 295 | # if int(new_y) >= n_height: 296 | # new_y = int(n_height)-1 297 | # if MARK[new_x,new_y] == 2: 298 | # new_x, new_y = old_x, old_y 299 | # observation3_=np.array([new_x, new_y], dtype=np.float32) 300 | # else: 301 | # observation3_ = observation3 302 | # observation_ = env.step(observation, 1, action) # 单步交互 303 | # observation4_ = np.array([observation_[0], observation_[1], observation2_[0], observation2_[1], observation3_[0], observation3_[1]]) 304 | # state7_ = np.array([state_[0], state_[1], state2_[0], state2_[1], state3_[0], state3_[1]]) 305 | # done_sys = done1 and done2 and done3 306 | 307 | if action[8]==-1: 308 | action[8]=-0.9999999 309 | # if action2[8]==-1: 310 | # action2[8]=-0.9999999 311 | # if action3[8]==-1: 312 | # action3[8]=-0.9999999 313 | if action[8]==1: 314 | action[8]=0.9999999 315 | # if action2[8]==1: 316 | # action2[8]=0.9999999 317 | # if action3[8]==1: 318 | # action3[8]=0.9999999 319 | 320 | w_1=np.array([action[2]* math.exp(1)**(1j*(1+action[3])*math.pi), action[4]* math.exp(1)**(1j*(1+action[5])*math.pi), action[6]* math.exp(1)**(1j*(1+action[7])*math.pi)]) 321 | # w_2=np.array([action2[2]* math.exp(1)**(1j*(1+action2[3])*math.pi), action2[4]* math.exp(1)**(1j*(1+action2[5])*math.pi), action2[6]* math.exp(1)**(1j*(1+action2[7])*math.pi)]) 322 | # w_3=np.array([action3[2]* math.exp(1)**(1j*(1+action3[3])*math.pi), action3[4]* math.exp(1)**(1j*(1+action3[5])*math.pi), action3[6]* math.exp(1)**(1j*(1+action3[7])*math.pi)]) 323 | theta_1=cosVector([1,0,0],[observation_[0]-50,observation_[1]-100, 1-2]) 324 | a_1=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_1)), math.exp(1)**(-2*1j*(math.pi*theta_1))])# 325 | b_1_AP_LOS=math.sqrt(PL_AP[int(observation_[0]), int(observation_[1])]) 326 | h_1=b_1_AP_LOS*a_1 327 | interference_1=10**(-9) 328 | # theta_2=cosVector([1,0,0],[observation2_[0]-50,observation2_[1]-100, 1-2]) 329 | # a_2=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_2)), math.exp(1)**(-2*1j*(math.pi*theta_2))])# 330 | # b_2_AP_LOS=math.sqrt(PL_AP[int(observation2_[0]), int(observation2_[1])]) 331 | # h_2=b_2_AP_LOS*a_2 332 | # interference_2=10**(-9) 333 | # theta_3=cosVector([1,0,0],[observation3_[0]-50,observation3_[1]-100, 1-2]) 334 | # a_3=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_3)), math.exp(1)**(-2*1j*(math.pi*theta_3))])# 335 | # b_3_AP_LOS=math.sqrt(PL_AP[int(observation3_[0]), int(observation3_[1])]) 336 | # h_3=b_3_AP_LOS*a_3 337 | # interference_3=10**(-9) 338 | theta_4=cosVector([1,0,0],[observation_su1[0]-50,observation_su1[1]-100, 1-2]) 339 | a_4=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_4)), math.exp(1)**(-2*1j*(math.pi*theta_4))])# 340 | b_4_AP_LOS=math.sqrt(PL_AP[int(observation_su1[0]), int(observation_su1[1])]) 341 | h_4=b_4_AP_LOS*a_4 342 | interference_4=10**(-9) 343 | theta_5=cosVector([1,0,0],[observation_su2[0]-50,observation_su2[1]-100, 1-2]) 344 | a_5=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_5)), math.exp(1)**(-2*1j*(math.pi*theta_5))])# 345 | b_5_AP_LOS=math.sqrt(PL_AP[int(observation_su2[0]), int(observation_su2[1])]) 346 | h_5=b_5_AP_LOS*a_5 347 | interference_5=10**(-9) 348 | theta_6=cosVector([1,0,0],[observation_su3[0]-50,observation_su3[1]-100, 1-2]) 349 | a_6=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_6)), math.exp(1)**(-2*1j*(math.pi*theta_6))])# 350 | b_6_AP_LOS=math.sqrt(PL_AP[int(observation_su3[0]), int(observation_su3[1])]) 351 | h_6=b_6_AP_LOS*a_6 352 | interference_6=10**(-9) 353 | if action[8]>0: 354 | interference_1+=(1-(action[8]+1)/2)*(np.linalg.norm(h_1*w_1))**2 355 | else: 356 | interference_4+=((action[8]+1)/2)*(np.linalg.norm(h_4*w_1))**2 357 | # if action2[8]>0.5: 358 | # interference_2+=(1-(action2[8]+1)/2)*(np.linalg.norm(h_2*w_2))**2 359 | # else: 360 | # interference_5+=((action2[8]+1)/2)*(np.linalg.norm(h_5*w_2))**2 361 | # if action3[8]>0.5: 362 | # interference_3+=(1-(action3[8]+1)/2)*(np.linalg.norm(h_3*w_3))**2 363 | # else: 364 | # interference_6+=((action3[8]+1)/2)*(np.linalg.norm(h_6*w_3))**2 365 | SINR_1=((action[8]+1)/2)*(np.linalg.norm(h_1*w_1))**2/interference_1 366 | # SINR_2=((action2[8]+1)/2)*(np.linalg.norm(h_2*w_2))**2/interference_2 367 | # SINR_3=((action3[8]+1)/2)*(np.linalg.norm(h_3*w_3))**2/interference_3 368 | SINR_4=(1-(action[8]+1)/2)*(np.linalg.norm(h_4*w_1))**2/interference_4 369 | # SINR_5=(1-(action2[8]+1)/2)*(np.linalg.norm(h_5*w_2))**2/interference_5 370 | # SINR_6=(1-(action3[8]+1)/2)*(np.linalg.norm(h_6*w_3))**2/interference_6 371 | # calculate reward 372 | # V_sinr_1=1-(1+SINR_1)**(-2) 373 | # # integrate(x**2, (x, 1, 2)) 374 | # f_x=math.log(2)*math.sqrt(m_d/V_sinr_1)*(math.log(1+SINR_1, 2)-D/m_d) 375 | # x=sympy.Symbol('x') 376 | # f = sympy.exp(-x**2/2) 377 | # epsilon_d_1=1/(math.sqrt(2*math.pi))*(sympy.integrate(f, (x, f_x, sympy.oo))) 378 | # # print(epsilon_d_1) 379 | # #sympy.integrate()*math.exp(1)**(-u_var**2)/2, (u_var, f_x, float('inf'))) 380 | # #ue 2 381 | # V_sinr_2=1-(1+SINR_2)**(-2) 382 | # f_x=math.log(2)*math.sqrt(m_d/V_sinr_2)*(math.log(1+SINR_2, 2)-D/m_d) 383 | # x=sympy.Symbol('x') 384 | # f = sympy.exp(-x**2/2) 385 | # epsilon_d_2=1/(math.sqrt(2*math.pi))*(sympy.integrate(f, (x, f_x, sympy.oo))) 386 | # #ue 3 387 | # V_sinr_3=1-(1+SINR_3)**(-2) 388 | # f_x=math.log(2)*math.sqrt(m_d/V_sinr_3)*(math.log(1+SINR_3, 2)-D/m_d) 389 | # x=sympy.Symbol('x') 390 | # f = sympy.exp(-x**2/2) 391 | # epsilon_d_3=1/(math.sqrt(2*math.pi))*(sympy.integrate(f, (x, f_x, sympy.oo))) 392 | # #ue 4 393 | # V_sinr_4=1-(1+SINR_4)**(-2) 394 | # f_x=math.log(2)*math.sqrt(m_d/V_sinr_4)*(math.log(1+SINR_4, 2)-D/m_d) 395 | # x=sympy.Symbol('x') 396 | # f = sympy.exp(-x**2/2) 397 | # epsilon_d_4=1/(math.sqrt(2*math.pi))*(sympy.integrate(f, (x, f_x, sympy.oo))) 398 | # #ue 4 399 | # V_sinr_5=1-(1+SINR_5)**(-2) 400 | # f_x=math.log(2)*math.sqrt(m_d/V_sinr_5)*(math.log(1+SINR_5, 2)-D/m_d) 401 | # x=sympy.Symbol('x') 402 | # f = sympy.exp(-x**2/2) 403 | # epsilon_d_5=1/(math.sqrt(2*math.pi))*(sympy.integrate(f, (x, f_x, sympy.oo))) 404 | # V_sinr_6=1-(1+SINR_6)**(-2) 405 | # f_x=math.log(2)*math.sqrt(m_d/V_sinr_6)*(math.log(1+SINR_6, 2)-D/m_d) 406 | # x=sympy.Symbol('x') 407 | # f = sympy.exp(-x**2/2) 408 | # epsilon_d_6=1/(math.sqrt(2*math.pi))*(sympy.integrate(f, (x, f_x, sympy.oo))) 409 | 410 | 411 | 412 | distance_01_2=(observation_[0]-end_location[0])*(observation_[0]-end_location[0])/4+(observation_[1]-end_location[1])*(observation_[1]-end_location[1])/4 413 | distance_01 = math.sqrt(distance_01_2) 414 | #print(distance_01) 415 | # if epsilon_d_1<10**(-14): 416 | # epsilon_d_1=10**(-14) 417 | reward = -(distance_01/50)+max(0.01, min(SINR_1, SINR_4)/1000)-0.01 418 | #reward = -1 419 | #reward=0 420 | if distance_01==0: 421 | reward = 1 422 | if not done1: 423 | reward_totle += reward 424 | # if epsilon_d_2<10**(-14): 425 | # epsilon_d_2=10**(-14) 426 | # distance_02_2=(observation2_[0]-end_location2[0])*(observation2_[0]-end_location2[0])/4+(observation2_[1]-end_location2[1])*(observation2_[1]-end_location2[1])/4 427 | # distance_02 = math.sqrt(distance_02_2) 428 | # reward2 = -(distance_02/50)#+max(0.2, min(SINR_2, SINR_5)/50) 429 | # if distance_02==0: 430 | # reward2 = 1 431 | # if not done2: 432 | # reward_totle2 += reward2 433 | # distance_03_2=(observation3_[0]-end_location3[0])*(observation3_[0]-end_location3[0])/4+(observation3_[1]-end_location3[1])*(observation3_[1]-end_location3[1])/4 434 | # distance_03 = math.sqrt(distance_03_2) 435 | # # if epsilon_d_3<10**(-14): 436 | # # epsilon_d_3=10**(-14) 437 | # reward3 = -(distance_03/50)#+max(0.2, min(SINR_3, SINR_6)/50) 438 | # if distance_03==0: 439 | # reward3 = 1 440 | # if not done3: 441 | # reward_totle3 += reward3 442 | 443 | # reward4=(reward+reward2+reward3)/3 444 | 445 | 446 | # distance_01_2=(observation_[0]-end_location[0])*(observation_[0]-end_location[0])/4+(observation_[1]-end_location[1])*(observation_[1]-end_location[1])/4 447 | # distance_01 = math.sqrt(distance_01_2) 448 | # reward= -(distance_01/10) 449 | # if distance_01==0: 450 | # done1 = True 451 | # #os.system("pause") 452 | # reward=10 453 | #print(observation, action, observation_) 454 | M.store_transition(observation, action, reward, observation_) 455 | # M2.store_transition(observation2, action2, reward2, observation2_) 456 | # M3.store_transition(observation3, action3, reward3, observation3_) 457 | # M4.store_transition(observation4, action4, reward4, observation4_) 458 | 459 | 460 | # 记忆库存储 461 | # 有的2000个存储数据就开始学习 462 | if M.memory_counter > MemoryCapacity and not done1: 463 | b_M = M.sample(BATCH) 464 | b_s = b_M[:, :state_number] 465 | b_a = b_M[:, state_number: state_number + action_number] 466 | b_r = b_M[:, -state_number - 1: -state_number] 467 | b_s_ = b_M[:, -state_number:] 468 | b_s = torch.FloatTensor(b_s) 469 | b_a = torch.FloatTensor(b_a) 470 | b_r = torch.FloatTensor(b_r) 471 | b_s_ = torch.FloatTensor(b_s_) 472 | new_action, log_prob_, z, mean, log_std = actor.evaluate(b_s_) 473 | target_q1,target_q2=critic.get_v(b_s_,new_action) 474 | target_q=b_r+GAMMA*(torch.min(target_q1,target_q2)-entroy.alpha*log_prob_) 475 | current_q1, current_q2 = critic.get_v(b_s, b_a) 476 | critic.learn(current_q1,current_q2,target_q.detach()) 477 | a,log_prob,_,_,_=actor.evaluate(b_s) 478 | q1,q2=critic.get_v(b_s,a) 479 | q=torch.min(q1,q2) 480 | actor_loss = (entroy.alpha * log_prob - q).mean() 481 | actor.learn(actor_loss) 482 | alpha_loss = -(entroy.log_alpha.exp() * (log_prob + entroy.target_entropy).detach()).mean() 483 | entroy.learn(alpha_loss) 484 | entroy.alpha=entroy.log_alpha.exp() 485 | # 软更新 486 | critic.soft_update() 487 | observation = observation_ 488 | # reward_totle += reward 489 | if distance_01==0: 490 | done1=True 491 | # print("arrive success!!!!!!!!!!!!!!") 492 | # if M2.memory_counter > MemoryCapacity and not done2: 493 | # b_M = M2.sample(BATCH) 494 | # b_s = b_M[:, :state_number] 495 | # b_a = b_M[:, state_number: state_number + action_number] 496 | # b_r = b_M[:, -state_number - 1: -state_number] 497 | # b_s_ = b_M[:, -state_number:] 498 | # b_s = torch.FloatTensor(b_s) 499 | # b_a = torch.FloatTensor(b_a) 500 | # b_r = torch.FloatTensor(b_r) 501 | # b_s_ = torch.FloatTensor(b_s_) 502 | # new_action, log_prob_, z, mean, log_std = actor2.evaluate(b_s_) 503 | # target_q1,target_q2=critic2.get_v(b_s_,new_action) 504 | # target_q=b_r+GAMMA*(torch.min(target_q1,target_q2)-entroy2.alpha*log_prob_) 505 | # current_q1, current_q2 = critic2.get_v(b_s, b_a) 506 | # critic2.learn(current_q1,current_q2,target_q.detach()) 507 | # a,log_prob,_,_,_=actor2.evaluate(b_s) 508 | # q1,q2=critic2.get_v(b_s,a) 509 | # q=torch.min(q1,q2) 510 | # actor_loss = (entroy2.alpha * log_prob - q).mean() 511 | # actor2.learn(actor_loss) 512 | # alpha_loss = -(entroy2.log_alpha.exp() * (log_prob + entroy2.target_entropy).detach()).mean() 513 | # entroy2.learn(alpha_loss) 514 | # entroy2.alpha=entroy2.log_alpha.exp() 515 | # # 软更新 516 | # critic2.soft_update() 517 | # observation2 = observation2_ 518 | # # reward_totle2 += reward2 519 | # if distance_02==0: 520 | # done2=True 521 | # # print("arrive success 2 !!!!!!!!!!!!!!") 522 | # if M3.memory_counter > MemoryCapacity and not done3: 523 | # b_M = M3.sample(BATCH) 524 | # b_s = b_M[:, :state_number] 525 | # b_a = b_M[:, state_number: state_number + action_number] 526 | # b_r = b_M[:, -state_number - 1: -state_number] 527 | # b_s_ = b_M[:, -state_number:] 528 | # b_s = torch.FloatTensor(b_s) 529 | # b_a = torch.FloatTensor(b_a) 530 | # b_r = torch.FloatTensor(b_r) 531 | # b_s_ = torch.FloatTensor(b_s_) 532 | # new_action, log_prob_, z, mean, log_std = actor3.evaluate(b_s_) 533 | # target_q1,target_q3=critic3.get_v(b_s_,new_action) 534 | # target_q=b_r+GAMMA*(torch.min(target_q1,target_q3)-entroy3.alpha*log_prob_) 535 | # current_q1, current_q3 = critic3.get_v(b_s, b_a) 536 | # critic3.learn(current_q1,current_q3,target_q.detach()) 537 | # a,log_prob,_,_,_=actor3.evaluate(b_s) 538 | # q1,q3=critic3.get_v(b_s,a) 539 | # q=torch.min(q1,q3) 540 | # actor_loss = (entroy3.alpha * log_prob - q).mean() 541 | # actor3.learn(actor_loss) 542 | # alpha_loss = -(entroy3.log_alpha.exp() * (log_prob + entroy3.target_entropy).detach()).mean() 543 | # entroy3.learn(alpha_loss) 544 | # entroy3.alpha=entroy3.log_alpha.exp() 545 | # # 软更新 546 | # critic3.soft_update() 547 | # observation3 = observation3_ 548 | # # reward_totle += reward 549 | # if distance_03==0: 550 | # done3=True 551 | # print("arrive success 3!!!!!!!!!!!!!!") 552 | # state_number=6 553 | # action_number=1 554 | # if M4.memory_counter > MemoryCapacity: 555 | # b_M = M4.sample(BATCH) 556 | # b_s = b_M[:, :state_number] 557 | # b_a = b_M[:, state_number: state_number + action_number] 558 | # b_r = b_M[:, -state_number - 1: -state_number] 559 | # b_s_ = b_M[:, -state_number:] 560 | # b_s = torch.FloatTensor(b_s) 561 | # b_a = torch.FloatTensor(b_a) 562 | # b_r = torch.FloatTensor(b_r) 563 | # b_s_ = torch.FloatTensor(b_s_) 564 | # new_action, log_prob_, z, mean, log_std = actor4.evaluate(b_s_) 565 | # target_q1,target_q4=critic4.get_v(b_s_,new_action) 566 | # target_q=b_r+GAMMA*(torch.min(target_q1,target_q4)-entroy4.alpha*log_prob_) 567 | # current_q1, current_q4 = critic4.get_v(b_s, b_a) 568 | # critic4.learn(current_q1,current_q4,target_q.detach()) 569 | # a,log_prob,_,_,_=actor4.evaluate(b_s) 570 | # q1,q4=critic4.get_v(b_s,a) 571 | # q=torch.min(q1,q4) 572 | # actor_loss = (entroy4.alpha * log_prob - q).mean() 573 | # actor4.learn(actor_loss) 574 | # alpha_loss = -(entroy4.log_alpha.exp() * (log_prob + entroy4.target_entropy).detach()).mean() 575 | # entroy4.learn(alpha_loss) 576 | # entroy4.alpha=entroy4.log_alpha.exp() 577 | # # 软更新 578 | # critic4.soft_update() 579 | # observation4 = observation4_ 580 | if done1: 581 | # print("arrive success!!!!!!!!!!!!!!") 582 | break 583 | print("Ep: {} | rewards: {} {} {} {} | Step: {:.4f} | END: {}".format(episode, reward_totle, reward_totle2, reward_totle3, reward_totle4, timestep, observation)) 584 | # if reward_totle > -10: RENDER = True 585 | all_ep_r.append(reward_totle) 586 | # all_ep_r2.append(reward_totle2) 587 | # all_ep_r3.append(reward_totle3) 588 | # all_ep_r4.append(reward_totle4) 589 | #if episode % 20 == 0 and episode > 200:#保存神经网络参数 590 | # save_data = {'net': actor.action_net.observation_dict(), 'opt': actor.optimizer.state_dict(), 'i': episode} 591 | #torch.save(save_data, "C:\\Users\Administrator.DESKTOP-NLH290A\Desktop\2302_code\0606\model_SAC.pth") 592 | # env.close() 593 | plt.plot(np.arange(len(all_ep_r)), all_ep_r) 594 | # plt.plot(np.arange(len(all_ep_r2)), all_ep_r2) 595 | # plt.plot(np.arange(len(all_ep_r3)), all_ep_r3) 596 | # plt.plot(np.arange(len(all_ep_r4)), all_ep_r4) 597 | plt.xlabel('Episode') 598 | plt.ylabel('Moving averaged episode reward') 599 | plt.show() 600 | else: 601 | print('SAC测试中...') 602 | aa=Actor() 603 | checkpoint_aa = torch.load("C:\\Users\Administrator.DESKTOP-NLH290A\Desktop\2302_code\0606\model_SAC.pth") 604 | aa.action_net.load_state_dict(checkpoint_aa['net']) 605 | for j in range(10): 606 | # state = env.reset() 607 | total_rewards = 0 608 | for timestep in range(EP_LEN): 609 | # env.render() 610 | # action = aa.choose_action(state) 611 | # new_state, reward, done, info = env.step(action) # 执行动作 612 | total_rewards += reward 613 | # state = new_state 614 | print("Score:", total_rewards) 615 | # env.close() 616 | -------------------------------------------------------------------------------- /algorithm/MA-DDPG_main.py: -------------------------------------------------------------------------------- 1 | """ 2 | Deep Deterministic Policy Gradient (DDPG) 3 | ----------------------------------------- 4 | An algorithm concurrently learns a Q-function and a policy. 5 | It uses off-policy data and the Bellman equation to learn the Q-function, 6 | and uses the Q-function to learn the policy. 7 | Reference 8 | --------- 9 | Deterministic Policy Gradient Algorithms, Silver et al. 2014 10 | Continuous Control With Deep Reinforcement Learning, Lillicrap et al. 2016 11 | MorvanZhou's tutorial page: https://morvanzhou.github.io/tutorials/ 12 | Environment 13 | ----------- 14 | Openai Gym Pendulum-v0, continual action space 15 | Prerequisites 16 | ------------- 17 | tensorflow >=2.0.0a0 18 | tensorflow-proactionsbility 0.6.0 19 | tensorlayer >=2.0.0 20 | To run 21 | ------ 22 | python tutorial_DDPG.py --train/test 23 | """ 24 | 25 | import argparse 26 | import os 27 | import random 28 | import time 29 | import math 30 | 31 | #import gym 32 | import matplotlib.pyplot as plt 33 | import numpy as np 34 | import tensorflow as tf 35 | from scipy.io import loadmat 36 | 37 | import tensorlayer as tl 38 | 39 | # add arguments in command --train/test 40 | parser = argparse.ArgumentParser(description='Train or test neural net motor controller.') 41 | parser.add_argument('--train', dest='train', action='store_true', default=True) 42 | parser.add_argument('--test', dest='test', action='store_true', default=False) 43 | args = parser.parse_args() 44 | 45 | ##################### hyper parameters #################### 46 | 47 | ENV_ID = 'LargeGridWorld-v0' # environment id 48 | RANDOM_SEED = 666 # random seed, can be either an int number or None 49 | RENDER = False # render while training 50 | 51 | ALG_NAME = 'DDPG' 52 | TRAIN_EPISODES = 500 # total number of episodes for training 53 | TEST_EPISODES = 10 # total number of episodes for training 54 | MAX_STEPS = 1000 # 20000total number of steps for each episode 55 | 56 | LR_A = 0.001 # learning rate for actor 57 | LR_C = 0.002 # learning rate for critic 58 | GAMMA = 0.9 # reward discount 59 | TAU = 0.01 # soft replacemen 60 | MEMORY_CAPACITY = 20000 # 500000size of replay buffer 61 | BATCH_SIZE = 64 # update action batch size 62 | VAR = 5 # control exploration 63 | #var_real=VAR 64 | ############################### DDPG #################################### 65 | n_width=93 66 | n_height = 93 67 | m = loadmat("C://Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/mapdata_0717.mat") 68 | #correct_action=0 69 | MARK= m["MARK_new"] 70 | PL_AP=m["MARK_PL_real"] 71 | 72 | 73 | class ReplayBuffer: 74 | """ 75 | a ring buffer for storing transitions and sampling for training 76 | :state: (state_dim,) 77 | :action: (action_dim,) 78 | :reward: (,), scalar 79 | :next_state: (state_dim,) 80 | :done: (,), scalar (0 and 1) or bool (True and False) 81 | """ 82 | 83 | def __init__(self, capacity): 84 | self.capacity = capacity 85 | self.buffer = [] 86 | self.position = 0 87 | 88 | def push(self, state, action, reward, next_state, done): 89 | if len(self.buffer) < self.capacity: 90 | self.buffer.append(None) 91 | self.buffer[self.position] = (state, action, reward, next_state, done) 92 | self.position = int((self.position + 1) % self.capacity) # as a ring buffer 93 | 94 | def sample(self, batch_size): 95 | batch = random.sample(self.buffer, batch_size) 96 | state, action, reward, next_state, done = map(np.stack, zip(*batch)) # stack for each element 97 | return state, action, reward, next_state, done 98 | 99 | def __len__(self): 100 | return len(self.buffer) 101 | 102 | def cosVector(x,y): 103 | result1=0.0; 104 | result2=0.0; 105 | result3=0.0; 106 | for i in range(len(x)): 107 | result1+=x[i]*y[i] #sum(X*Y) 108 | result2+=x[i]**2 #sum(X*X) 109 | result3+=y[i]**2 #sum(Y*Y) 110 | return result1/((result2*result3)**0.5) 111 | 112 | class DDPG(object): 113 | """ 114 | DDPG class 115 | """ 116 | def __init__(self, action_dim, state_dim, action_range, replay_buffer, agent_num=0): 117 | self.replay_buffer = replay_buffer 118 | self.action_dim, self.state_dim, self.action_range = action_dim, state_dim, action_range 119 | self.var = VAR 120 | self.agent_num=agent_num 121 | 122 | W_init = tf.random_normal_initializer(mean=0, stddev=0.3) 123 | b_init = tf.constant_initializer(0.1) 124 | 125 | def get_actor(input_state_shape, name=str(self.agent_num)): 126 | """ 127 | Build actor network 128 | :param input_state_shape: state 129 | :param name: name 130 | :return: act 131 | """ 132 | input_layer = tl.layers.Input(input_state_shape, name='A_input'+str(self.agent_num)) 133 | layer = tl.layers.Dense(n_units=64, act=tf.nn.relu, W_init=W_init, b_init=b_init, name=str(self.agent_num)+'A_l1')(input_layer) 134 | layer = tl.layers.Dense(n_units=64, act=tf.nn.relu, W_init=W_init, b_init=b_init, name=str(self.agent_num)+'A_l2')(layer) 135 | layer = tl.layers.Dense(n_units=action_dim, act=tf.nn.tanh, W_init=W_init, b_init=b_init, name=str(self.agent_num)+'A_a')(layer) 136 | layer = tl.layers.Lambda(lambda x: action_range * x)(layer) 137 | return tl.models.Model(inputs=input_layer, outputs=layer, name='Actor' + name) 138 | 139 | def get_critic(input_state_shape, input_action_shape, name=str(self.agent_num)): 140 | """ 141 | Build critic network 142 | :param input_state_shape: state 143 | :param input_action_shape: act 144 | :param name: name 145 | :return: Q value Q(s,a) 146 | """ 147 | state_input = tl.layers.Input(input_state_shape, name=str(self.agent_num)+'C_s_input') 148 | action_input = tl.layers.Input(input_action_shape, name=str(self.agent_num)+'C_a_input') 149 | layer = tl.layers.Concat(1)([state_input, action_input]) 150 | layer = tl.layers.Dense(n_units=64, act=tf.nn.relu, W_init=W_init, b_init=b_init, name=str(self.agent_num)+'C_l1')(layer) 151 | layer = tl.layers.Dense(n_units=64, act=tf.nn.relu, W_init=W_init, b_init=b_init, name=str(self.agent_num)+'C_l2')(layer) 152 | layer = tl.layers.Dense(n_units=1, W_init=W_init, b_init=b_init, name=str(self.agent_num)+'C_out')(layer) 153 | return tl.models.Model(inputs=[state_input, action_input], outputs=layer, name='Critic' + name) 154 | 155 | self.actor = get_actor([None, state_dim]) 156 | self.critic = get_critic([None, state_dim], [None, action_dim]) 157 | self.actor.train() 158 | self.critic.train() 159 | 160 | def copy_para(from_model, to_model): 161 | """ 162 | Copy parameters for soft updating 163 | :param from_model: latest model 164 | :param to_model: target model 165 | :return: None 166 | """ 167 | for i, j in zip(from_model.trainable_weights, to_model.trainable_weights): 168 | j.assign(i) 169 | 170 | self.actor_target = get_actor([None, state_dim], name=str(self.agent_num)+'_target') 171 | copy_para(self.actor, self.actor_target) 172 | self.actor_target.eval() 173 | 174 | self.critic_target = get_critic([None, state_dim], [None, action_dim], name=str(self.agent_num)+'_target') 175 | copy_para(self.critic, self.critic_target) 176 | self.critic_target.eval() 177 | 178 | self.ema = tf.train.ExponentialMovingAverage(decay=1 - TAU) # soft replacement 179 | 180 | self.actor_opt = tf.optimizers.Adam(LR_A) 181 | self.critic_opt = tf.optimizers.Adam(LR_C) 182 | 183 | def ema_update(self): 184 | """ 185 | Soft updating by exponential smoothing 186 | :return: None 187 | """ 188 | paras = self.actor.trainable_weights + self.critic.trainable_weights 189 | self.ema.apply(paras) 190 | for i, j in zip(self.actor_target.trainable_weights + self.critic_target.trainable_weights, paras): 191 | i.assign(self.ema.average(j)) 192 | 193 | def get_action(self, state, greedy=False): 194 | """ 195 | Choose action 196 | :param s: state 197 | :param greedy: get action greedy or not 198 | :return: act 199 | """ 200 | action = self.actor(np.array([state]))[0] 201 | if greedy: 202 | return action 203 | #return np.random.rand(len(action)).astype(np.float32)- action_range 204 | return np.clip( 205 | np.random.normal(action, self.var), -self.action_range, self.action_range 206 | ).astype(np.float32) # add randomness to action selection for exploration 207 | 208 | def learn(self, exact_var): 209 | """, 210 | Update parameters 211 | :return: None 212 | """ 213 | self.var = exact_var 214 | #print(self.var) 215 | states, actions, rewards, states_, done = self.replay_buffer.sample(BATCH_SIZE) 216 | rewards = rewards[:, np.newaxis] 217 | done = done[:, np.newaxis] 218 | 219 | with tf.GradientTape() as tape: 220 | actions_ = self.actor_target(states_) 221 | q_ = self.critic_target([states_, actions_]) 222 | target = rewards + (1 - done) * GAMMA * q_ 223 | q_pred = self.critic([states, actions]) 224 | td_error = tf.losses.mean_squared_error(target, q_pred) 225 | critic_grads = tape.gradient(td_error, self.critic.trainable_weights) 226 | self.critic_opt.apply_gradients(zip(critic_grads, self.critic.trainable_weights)) 227 | 228 | with tf.GradientTape() as tape: 229 | actions = self.actor(states) 230 | q = self.critic([states, actions]) 231 | actor_loss = -tf.reduce_mean(q) # maximize the q 232 | actor_grads = tape.gradient(actor_loss, self.actor.trainable_weights) 233 | self.actor_opt.apply_gradients(zip(actor_grads, self.actor.trainable_weights)) 234 | self.ema_update() 235 | 236 | 237 | def save(self): 238 | """ 239 | save trained weights 240 | :return: None 241 | """ 242 | path = os.path.join('model', '_'.join([ALG_NAME, ENV_ID])) 243 | if not os.path.exists(path): 244 | os.makedirs(path) 245 | tl.files.save_weights_to_hdf5(os.path.join(path, 'actor.hdf5'), self.actor) 246 | tl.files.save_weights_to_hdf5(os.path.join(path, 'actor_target.hdf5'), self.actor_target) 247 | tl.files.save_weights_to_hdf5(os.path.join(path, 'critic.hdf5'), self.critic) 248 | tl.files.save_weights_to_hdf5(os.path.join(path, 'critic_target.hdf5'), self.critic_target) 249 | 250 | def load(self): 251 | """ 252 | load trained weights 253 | :return: None 254 | """ 255 | path = os.path.join('model', '_'.join([ALG_NAME, ENV_ID])) 256 | tl.files.load_hdf5_to_weights_in_order(os.path.join(path, 'actor.hdf5'), self.actor) 257 | tl.files.load_hdf5_to_weights_in_order(os.path.join(path, 'actor_target.hdf5'), self.actor_target) 258 | tl.files.load_hdf5_to_weights_in_order(os.path.join(path, 'critic.hdf5'), self.critic) 259 | tl.files.load_hdf5_to_weights_in_order(os.path.join(path, 'critic_target.hdf5'), self.critic_target) 260 | 261 | 262 | if __name__ == '__main__': 263 | n_mu=3 264 | n_M=5 265 | n_o=6*7 266 | 267 | #env = gym.make(ENV_ID).unwrapped 268 | #env = gym.make(ENV_ID).unwrapped 269 | 270 | # reproducible 271 | # env.seed(RANDOM_SEED) 272 | np.random.seed(RANDOM_SEED) 273 | tf.random.set_seed(RANDOM_SEED) 274 | 275 | state_dim = 2 276 | action_dim = 10 277 | action_range = 1 # scale action, [-action_range, action_range] 278 | action_range_su = np.array([1, 1, 1], dtype=np.float32) 279 | action_range_ris = np.array([1]*(2*n_M+1), dtype=np.float32) 280 | action_range_n_o = np.array([1]*(2+1), dtype=np.float32) 281 | 282 | buffer = ReplayBuffer(MEMORY_CAPACITY) #MU1 283 | buffer2 = ReplayBuffer(MEMORY_CAPACITY) #MU2 284 | buffer3 = ReplayBuffer(MEMORY_CAPACITY) #MU3 285 | # buffer4 = ReplayBuffer(MEMORY_CAPACITY) #su1 286 | # buffer5 = ReplayBuffer(MEMORY_CAPACITY) #su2 287 | # buffer6 = ReplayBuffer(MEMORY_CAPACITY) #ris 288 | # buffer7 = ReplayBuffer(MEMORY_CAPACITY) #commu 289 | 290 | 291 | agent = DDPG(action_dim, state_dim, action_range, buffer, 1) #mu 292 | agent2 = DDPG(action_dim, state_dim, action_range, buffer2, 2) 293 | agent3 = DDPG(action_dim, state_dim, action_range, buffer3, 3) 294 | # agent4 = DDPG(action_dim-2, state_dim, action_range_su, buffer4, 4) #su 295 | # agent5 = DDPG(action_dim-2, state_dim, action_range_su, buffer5, 5) 296 | # agent6 = DDPG(2*n_M+1, state_dim*n_mu, action_range_ris, buffer6, 6) #ris 297 | # agent7 = DDPG(2+1, state_dim*n_mu, action_range_n_o, buffer7, 7) #commu n_o 298 | 299 | VAR1=VAR 300 | VAR2=VAR 301 | VAR3=VAR 302 | t0 = time.time() 303 | if args.train: # train 304 | all_episode_reward = [] 305 | all_episode_reward2 = [] 306 | all_episode_reward3 = [] 307 | # all_episode_reward4 = [] 308 | # all_episode_reward5 = [] 309 | # all_episode_reward6 = [] 310 | # all_episode_reward7 = [] 311 | init=0 312 | 313 | # end_location = [38*2,11*2] 314 | # end_location2 = [26*2,18*2] #[8*2,9*2] 315 | # end_location3 = [16*2,32*2] #[35*2,9*2] 316 | end_location = [15*2,32*2] 317 | end_location2 = [45*2,45*2] 318 | end_location3 = [47,38*2] 319 | #end_location = end_location3 320 | #end_location2 = end_location3 321 | study=0 322 | study2=0 323 | study3=0 324 | for episode in range(TRAIN_EPISODES): 325 | #state initialize 326 | x_k1_array = [] 327 | y_k1_array = [] 328 | x_k2_array = [] 329 | y_k2_array = [] 330 | x_k3_array = [] 331 | y_k3_array = [] 332 | state = np.array([4*2,5*2], dtype=np.float32) # 环境重置 333 | state2 = np.array([20*2,20*2], dtype=np.float32) 334 | state3 = np.array([40*2,10*2], dtype=np.float32) 335 | state_su1 = np.array([17, 25*2], dtype=np.float32) 336 | state_su2 = np.array([50, 25*2], dtype=np.float32) 337 | state_su3 = np.array([84, 25*2], dtype=np.float32) 338 | episode_reward = 0 339 | episode_reward2 = 0 340 | episode_reward3 = 0 341 | # episode_reward4 = 0 342 | # episode_reward5 = 0 343 | # episode_reward6 = 0 344 | # episode_reward7 = 0 345 | done1=False 346 | done2=False 347 | done3=False 348 | bobao=0 349 | bobao2=0 350 | bobao3=0 351 | x_k1_array.append(state[0]) 352 | y_k1_array.append(state[1]) 353 | x_k2_array.append(state2[0]) 354 | y_k2_array.append(state2[1]) 355 | x_k3_array.append(state3[0]) 356 | y_k3_array.append(state3[1]) 357 | #greedy0=True 358 | for steps in range(MAX_STEPS): 359 | # if RENDER: 360 | # env.render() 361 | # Add exploration noise 362 | # action selection 363 | #if len(buffer) >= MEMORY_CAPACITY: 364 | # greedy0=False 365 | action = agent.get_action(state) 366 | action2 = agent2.get_action(state2) 367 | action3 = agent3.get_action(state3) 368 | 369 | 370 | # action4 = agent4.get_action(state4) 371 | # action5 = agent5.get_action(state5) 372 | # action6 = agent6.get_action(state6) 373 | # action7 = agent7.get_action(state7) 374 | # Step 375 | if not done1: 376 | [old_x, old_y] = state 377 | new_x, new_y = int(old_x), int(old_y) 378 | new_x=int(old_x+action[0]) 379 | new_y=int(old_y+action[1]) 380 | if int(new_x) <= 0: 381 | new_x = 1 382 | if int(new_x) >= n_width: 383 | new_x = int(n_width)-1 384 | if int(new_y) <= 0: 385 | new_y = 1 386 | if int(new_y) >= n_height: 387 | new_y = int(n_height)-1 388 | if MARK[new_x,new_y] == 2: 389 | new_x, new_y = old_x, old_y 390 | state_=np.array([new_x, new_y], dtype=np.float32) 391 | x_k1_array.append(state_[0]) 392 | y_k1_array.append(state_[1]) 393 | else: 394 | state_ = state 395 | if not done2: 396 | [old_x, old_y] = state2 397 | new_x, new_y = int(old_x), int(old_y) 398 | new_x=int(old_x+action2[0]) 399 | new_y=int(old_y+action2[1]) 400 | if int(new_x) <= 0: 401 | new_x = 1 402 | if int(new_x) >= n_width: 403 | new_x = int(n_width)-1 404 | if int(new_y) <= 0: 405 | new_y = 1 406 | if int(new_y) >= n_height: 407 | new_y = int(n_height)-1 408 | if MARK[new_x,new_y] == 2: 409 | new_x, new_y = old_x, old_y 410 | state2_=np.array([new_x, new_y], dtype=np.float32) 411 | x_k2_array.append(state2_[0]) 412 | y_k2_array.append(state2_[1]) 413 | else: 414 | state2_ = state2 415 | if not done3: 416 | [old_x, old_y] = state3 417 | new_x, new_y = int(old_x), int(old_y) 418 | new_x=int(old_x+action3[0]) 419 | new_y=int(old_y+action3[1]) 420 | if int(new_x) <= 0: 421 | new_x = 1 422 | if int(new_x) >= n_width: 423 | new_x = int(n_width)-1 424 | if int(new_y) <= 0: 425 | new_y = 1 426 | if int(new_y) >= n_height: 427 | new_y = int(n_height)-1 428 | if MARK[new_x,new_y] == 2: 429 | new_x, new_y = old_x, old_y 430 | state3_=np.array([new_x, new_y], dtype=np.float32) 431 | x_k3_array.append(state3_[0]) 432 | y_k3_array.append(state3_[1]) 433 | else: 434 | state3_ = state3 435 | # state4+5 static 436 | 437 | # state6_ = np.array([state_[0], state_[1], state2_[0], state2_[1], state3_[0], state3_[1]]) 438 | # state7_ = np.array([state_[0], state_[1], state2_[0], state2_[1], state3_[0], state3_[1]]) 439 | done_sys = done1 and done2 and done3 440 | 441 | if action[8]==-1: 442 | action[8]=-0.9999999 443 | if action2[8]==-1: 444 | action2[8]=-0.9999999 445 | if action3[8]==-1: 446 | action3[8]=-0.9999999 447 | if action[8]==1: 448 | action[8]=0.9999999 449 | if action2[8]==1: 450 | action2[8]=0.9999999 451 | if action3[8]==1: 452 | action3[8]=0.9999999 453 | 454 | w_1=np.array([action[2]* math.exp(1)**(1j*(1+action[3])*math.pi), action[4]* math.exp(1)**(1j*(1+action[5])*math.pi), action[6]* math.exp(1)**(1j*(1+action[7])*math.pi)]) 455 | w_2=np.array([action2[2]* math.exp(1)**(1j*(1+action2[3])*math.pi), action2[4]* math.exp(1)**(1j*(1+action2[5])*math.pi), action2[6]* math.exp(1)**(1j*(1+action2[7])*math.pi)]) 456 | w_3=np.array([action3[2]* math.exp(1)**(1j*(1+action3[3])*math.pi), action3[4]* math.exp(1)**(1j*(1+action3[5])*math.pi), action3[6]* math.exp(1)**(1j*(1+action3[7])*math.pi)]) 457 | theta_1=cosVector([1,0,0],[state_[0]-50,state_[1]-100, 1-2]) 458 | a_1=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_1)), math.exp(1)**(-2*1j*(math.pi*theta_1))])# 459 | b_1_AP_LOS=math.sqrt(PL_AP[int(state_[0]), int(state_[1])]) 460 | h_1=b_1_AP_LOS*a_1 461 | interference_1=10**(-9) 462 | theta_2=cosVector([1,0,0],[state2_[0]-50,state2_[1]-100, 1-2]) 463 | a_2=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_2)), math.exp(1)**(-2*1j*(math.pi*theta_2))])# 464 | b_2_AP_LOS=math.sqrt(PL_AP[int(state2_[0]), int(state2_[1])]) 465 | h_2=b_2_AP_LOS*a_2 466 | interference_2=10**(-9) 467 | theta_3=cosVector([1,0,0],[state3_[0]-50,state3_[1]-100, 1-2]) 468 | a_3=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_3)), math.exp(1)**(-2*1j*(math.pi*theta_3))])# 469 | b_3_AP_LOS=math.sqrt(PL_AP[int(state3_[0]), int(state3_[1])]) 470 | h_3=b_3_AP_LOS*a_3 471 | interference_3=10**(-9) 472 | theta_4=cosVector([1,0,0],[state_su1[0]-50,state_su1[1]-100, 1-2]) 473 | a_4=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_4)), math.exp(1)**(-2*1j*(math.pi*theta_4))])# 474 | b_4_AP_LOS=math.sqrt(PL_AP[int(state_su1[0]), int(state_su1[1])]) 475 | h_4=b_4_AP_LOS*a_4 476 | interference_4=10**(-9) 477 | theta_5=cosVector([1,0,0],[state_su2[0]-50,state_su2[1]-100, 1-2]) 478 | a_5=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_5)), math.exp(1)**(-2*1j*(math.pi*theta_5))])# 479 | b_5_AP_LOS=math.sqrt(PL_AP[int(state_su2[0]), int(state_su2[1])]) 480 | h_5=b_5_AP_LOS*a_5 481 | interference_5=10**(-9) 482 | theta_6=cosVector([1,0,0],[state_su3[0]-50,state_su3[1]-100, 1-2]) 483 | a_6=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_6)), math.exp(1)**(-2*1j*(math.pi*theta_6))])# 484 | b_6_AP_LOS=math.sqrt(PL_AP[int(state_su3[0]), int(state_su3[1])]) 485 | h_6=b_6_AP_LOS*a_6 486 | interference_6=10**(-9) 487 | action1=action 488 | order_array=[action1[9], action2[9], action3[9]] 489 | order_index=[b[0] for b in sorted(enumerate(order_array), key=lambda i:i[1])] 490 | # action1=action 491 | # for order_i in order_index: 492 | # exec('''if action{}[8]>0.5: 493 | # interference_{}+=(1-(action{}[8]+1)/2)*(np.linalg.norm(h_{}*w_{}))**2 494 | # else: 495 | # interference_4+=((action[8]+1)/2)*(np.linalg.norm(h_4*w_1))**2 496 | # ''') 497 | 498 | exec('''if action{}[8]>0: 499 | interference_{}+=(1-(action{}[8]+1)/2)*(np.linalg.norm(h_{}*w_{}))**2 500 | else: 501 | interference_4+=((action{}[8]+1)/2)*(np.linalg.norm(h_4*w_{}))**2'''.format(order_index[0]+1, order_index[0]+1, order_index[0]+1, order_index[0]+1, order_index[0]+1, order_index[0]+1, order_index[0]+1)) 502 | exec('''if action{}[8]>0: 503 | interference_{}+=(1-(action{}[8]+1)/2)*(np.linalg.norm(h_{}*w_{}))**2 504 | else: 505 | interference_5+=((action{}[8]+1)/2)*(np.linalg.norm(h_5*w_{}))**2'''.format(order_index[1]+1, order_index[1]+1, order_index[1]+1, order_index[1]+1, order_index[1]+1, order_index[1]+1, order_index[1]+1)) 506 | exec('''if action{}[8]>0: 507 | interference_{}+=(1-(action{}[8]+1)/2)*(np.linalg.norm(h_{}*w_{}))**2 508 | else: 509 | interference_6+=((action{}[8]+1)/2)*(np.linalg.norm(h_6*w_{}))**2'''.format(order_index[2]+1, order_index[2]+1, order_index[2]+1, order_index[2]+1, order_index[2]+1, order_index[2]+1, order_index[2]+1)) 510 | 511 | SINR_1=((action1[8]+1)/2)*(np.linalg.norm(h_1*w_1))**2/interference_1 512 | SINR_2=((action2[8]+1)/2)*(np.linalg.norm(h_2*w_2))**2/interference_2 513 | SINR_3=((action3[8]+1)/2)*(np.linalg.norm(h_3*w_3))**2/interference_3 514 | exec('''SINR_4=(1-(action{}[8]+1)/2)*(np.linalg.norm(h_4*w_{}))**2/interference_4'''.format(order_index[0]+1, order_index[0]+1)) 515 | exec('''SINR_5=(1-(action{}[8]+1)/2)*(np.linalg.norm(h_5*w_{}))**2/interference_5'''.format(order_index[1]+1, order_index[1]+1)) 516 | exec('''SINR_6=(1-(action{}[8]+1)/2)*(np.linalg.norm(h_6*w_{}))**2/interference_6'''.format(order_index[2]+1, order_index[2]+1)) 517 | 518 | 519 | # calculate reward 520 | distance_01_2=(state_[0]-end_location[0])*(state_[0]-end_location[0])/4+(state_[1]-end_location[1])*(state_[1]-end_location[1])/4 521 | distance_01 = math.sqrt(distance_01_2) 522 | #print(distance_01) 523 | exec('''reward = -(distance_01/50)+max(0.01, min(SINR_1, SINR_{})/1000)-0.01'''.format(order_index.index(0)+4)) 524 | #reward = -1 525 | #reward=0 526 | if distance_01==0: 527 | reward = 1 528 | if not done1: 529 | episode_reward += reward 530 | distance_02_2=(state2_[0]-end_location2[0])*(state2_[0]-end_location2[0])/4+(state2_[1]-end_location2[1])*(state2_[1]-end_location2[1])/4 531 | distance_02 = math.sqrt(distance_02_2) 532 | exec('''reward2 = -(distance_02/50)+max(0.01, min(SINR_2, SINR_{})/1000)-0.01'''.format(order_index.index(1)+4)) 533 | if distance_02==0: 534 | reward2 = 1 535 | if not done2: 536 | episode_reward2 += reward2 537 | distance_03_2=(state3_[0]-end_location3[0])*(state3_[0]-end_location3[0])/4+(state3_[1]-end_location3[1])*(state3_[1]-end_location3[1])/4 538 | distance_03 = math.sqrt(distance_03_2) 539 | exec('''reward3 = -(distance_03/50)+max(0.01, min(SINR_3, SINR_{})/1000)-0.01'''.format(order_index.index(2)+4)) 540 | if distance_03==0: 541 | reward3 = 1 542 | if not done3: 543 | episode_reward3 += reward3 544 | state_ = np.array(state_, dtype=np.float32) 545 | state2_ = np.array(state2_, dtype=np.float32) 546 | state3_ = np.array(state3_, dtype=np.float32) 547 | 548 | # if len(buffer) >= MEMORY_CAPACITY and steps%100==0: 549 | # VAR *= .99995 550 | #print(state) 551 | #done = 1 if done is True else 0 552 | buffer.push(state, action, reward, state_, done1) 553 | buffer2.push(state2, action2, reward2, state2_, done2) 554 | buffer3.push(state3, action3, reward3, state3_, done3) 555 | if not done1: 556 | study=study+1 557 | if not done2: 558 | study2=study2+1 559 | if not done3: 560 | study3=study3+1 561 | if len(buffer) >= MEMORY_CAPACITY and not done1 and episode >= MEMORY_CAPACITY/MAX_STEPS: 562 | #print("in") 563 | #for i in range(20): 564 | # if study>=10: 565 | VAR1 *= math.sqrt(.99995) 566 | # study=-1 567 | agent.learn(VAR1) 568 | 569 | if len(buffer2) >= MEMORY_CAPACITY and not done2 and episode>=MEMORY_CAPACITY/MAX_STEPS: 570 | # if study2>=10: 571 | # study2=-1 572 | VAR2 *= math.sqrt(.99995) 573 | # for i in range(20): 574 | agent2.learn(VAR2) 575 | 576 | if len(buffer3) >= MEMORY_CAPACITY and not done3 and episode>=MEMORY_CAPACITY/MAX_STEPS: 577 | # 578 | # if study3>=10: 579 | # study3=-1 580 | VAR3 *= math.sqrt(.99995) 581 | # for i in range(20): 582 | agent3.learn(VAR3) 583 | 584 | if distance_01==0 and bobao==0: 585 | done1=True 586 | if steps<100: 587 | for x in range(len(x_k1_array)): 588 | filename = 'x_k1'+str(episode)+"_"+str(steps)+'.txt' 589 | with open(filename,'a') as fileobject: #使用‘a'来提醒python用附加模式的方式打开 590 | fileobject.write(str(x_k1_array[x])+'\n') 591 | for y in range(len(y_k1_array)): 592 | filename = 'y_k1'+str(episode)+"_"+str(steps)+'.txt' 593 | with open(filename,'a') as fileobject: #使用‘a'来提醒python用附加模式的方式打开 594 | fileobject.write(str(y_k1_array[y])+'\n') 595 | print("1 arrive success!!!!!!!!!!!!!!") 596 | bobao=1 597 | if distance_02==0 and bobao2==0: 598 | if steps<100: 599 | for x in range(len(x_k2_array)): 600 | filename = 'x_k2'+str(episode)+"_"+str(steps)+'.txt' 601 | with open(filename,'a') as fileobject: #使用‘a'来提醒python用附加模式的方式打开 602 | fileobject.write(str(x_k2_array[x])+'\n') 603 | for y in range(len(y_k2_array)): 604 | filename = 'y_k2'+str(episode)+"_"+str(steps)+'.txt' 605 | with open(filename,'a') as fileobject: #使用‘a'来提醒python用附加模式的方式打开 606 | fileobject.write(str(y_k2_array[y])+'\n') 607 | 608 | done2=True 609 | print("2 arrive success!!!!!!!!!!!!!!") 610 | bobao2=1 611 | if distance_03==0 and bobao3==0: 612 | if steps<100: 613 | for x in range(len(x_k3_array)): 614 | filename = 'x_k3'+str(episode)+"_"+str(steps)+'.txt' 615 | with open(filename,'a') as fileobject: #使用‘a'来提醒python用附加模式的方式打开 616 | fileobject.write(str(x_k3_array[x])+'\n') 617 | for y in range(len(y_k3_array)): 618 | filename = 'y_k3'+str(episode)+"_"+str(steps)+'.txt' 619 | with open(filename,'a') as fileobject: #使用‘a'来提醒python用附加模式的方式打开 620 | fileobject.write(str(y_k3_array[y])+'\n') 621 | 622 | done3=True 623 | print("3 arrive success!!!!!!!!!!!!!!") 624 | bobao3=1 625 | if done1 and done2 and done3: 626 | break 627 | 628 | state = state_ 629 | state2 = state2_ 630 | state3 = state3_ 631 | 632 | 633 | if episode == 0: 634 | all_episode_reward.append(episode_reward) 635 | all_episode_reward2.append(episode_reward2) 636 | all_episode_reward3.append(episode_reward3) 637 | # filename='Reward_v2_agent1.txt' 638 | # with open (filename, 'a') as fileobject: 639 | # fileobject.write(str(episode_reward)+'\n') 640 | # filename='Reward_v2_agent2.txt' 641 | # with open (filename, 'a') as fileobject: 642 | # fileobject.write(str(episode_reward2)+'\n') 643 | # filename='Reward_v2_agent3.txt' 644 | # with open (filename, 'a') as fileobject: 645 | # fileobject.write(str(episode_reward3)+'\n') 646 | else: 647 | all_episode_reward.append(all_episode_reward[-1] * 0.9 + episode_reward * 0.1) 648 | all_episode_reward2.append(all_episode_reward2[-1] * 0.9 + episode_reward2 * 0.1) 649 | all_episode_reward3.append(all_episode_reward3[-1] * 0.9 + episode_reward3 * 0.1) 650 | # filename='Reward_v2_agent1.txt' 651 | # with open (filename, 'a') as fileobject: 652 | # fileobject.write(str(all_episode_reward[-1] * 0.9 + episode_reward * 0.1)+'\n') 653 | # filename='Reward_v2_agent2.txt' 654 | # with open (filename, 'a') as fileobject: 655 | # fileobject.write(str(all_episode_reward2[-1] * 0.9 + episode_reward2 * 0.1)+'\n') 656 | # filename='Reward_v2_agent3.txt' 657 | # with open (filename, 'a') as fileobject: 658 | # fileobject.write(str(all_episode_reward3[-1] * 0.9 + episode_reward3 * 0.1)+'\n') 659 | #print(var_real) 660 | print( 661 | ' Episode: {}/{} | Reward: {:.4f} & {:.4f} & {:.4f} | Step: {:.4f}| END: {}-{} {}-{} {}-{}'.format( 662 | episode + 1, TRAIN_EPISODES, episode_reward, episode_reward2, episode_reward3, 663 | steps, end_location, state, end_location2, state2, end_location3, state3 664 | )) 665 | #print(len(buffer3)) 666 | 667 | #env.close() 668 | #agent.save() 669 | # filename = os.path.basename(path) 670 | plt.plot(all_episode_reward) 671 | plt.plot(all_episode_reward2) 672 | plt.plot(all_episode_reward3) 673 | if not os.path.exists('image'): 674 | os.makedirs('image') 675 | plt.savefig(os.path.join('image', '_'.join([ALG_NAME, ENV_ID]))) 676 | 677 | # if args.test: 678 | # # test 679 | # agent.load() 680 | # for episode in range(TEST_EPISODES): 681 | # state = env.reset().astype(np.float32) 682 | # episode_reward = 0 683 | # for step in range(MAX_STEPS): 684 | # env.render() 685 | # state, reward, done, info = env.step(agent.get_action(state, greedy=True)) 686 | # state = state.astype(np.float32) 687 | # episode_reward += reward 688 | # if done: 689 | # break 690 | # print( 691 | # 'Testing | Episode: {}/{} | Episode Reward: {:.4f} | Running Time: {:.4f} '.format( 692 | # episode + 1, TEST_EPISODES, episode_reward, 693 | # time.time() - t0 694 | # ) 695 | # ) 696 | # env.close() --------------------------------------------------------------------------------