├── radio_map
    ├── AABB_plot.m
    └── map_data.mat
├── _doc
    ├── simulation_fig.png
    └── simulation_fig2.png
├── plot_figure
    ├── FIGURE_2.m
    ├── FIGURE_1.m
    ├── FIGURE_5.m
    ├── FIGURE_4.m
    └── FIGURE_3.m
├── algorithm
    ├── MA-TD3_core.py
    ├── MA-TD3_main.py
    ├── MA-PPO_main.py
    ├── MA-SAC_main.py
    └── MA-DDPG_main.py
├── tradition_baseline
    ├── A_search.py
    ├── fig5.m
    ├── fig3.m
    └── pso.py
├── README.md
└── environment
    ├── environment.yaml
    └── requirements.txt


/radio_map/AABB_plot.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lry-bupt/MAAC_DRL/HEAD/radio_map/AABB_plot.m


--------------------------------------------------------------------------------
/radio_map/map_data.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lry-bupt/MAAC_DRL/HEAD/radio_map/map_data.mat


--------------------------------------------------------------------------------
/_doc/simulation_fig.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lry-bupt/MAAC_DRL/HEAD/_doc/simulation_fig.png


--------------------------------------------------------------------------------
/_doc/simulation_fig2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lry-bupt/MAAC_DRL/HEAD/_doc/simulation_fig2.png


--------------------------------------------------------------------------------
/plot_figure/FIGURE_2.m:
--------------------------------------------------------------------------------
 1 | clc;
 2 | clear all;
 3 | close all;
 4 | X=[1:1:400];
 5 | TD3_avg=importdata('C:\Users\Administrator.DESKTOP-NLH290A\Desktop\2302_code\FIGURE_2\TD3.txt');
 6 | % TD3_2=importdata('C:\Users\Administrator.DESKTOP-NLH290A\Desktop\2302_code\FIGURE_1\MATLAB\TD3-2.txt');
 7 | % TD3_3=importdata('C:\Users\Administrator.DESKTOP-NLH290A\Desktop\2302_code\FIGURE_1\MATLAB\TD3-2.txt');
 8 | % TD3_avg=(TD3_1+TD3_2+TD3_3)/3;
 9 | 
10 | % importdata('C:\Users\Administrator.DESKTOP-NLH290A\Desktop\2302_code\FIGURE_1\MATLAB\TD3-avg.txt');
11 | PPO_avg=importdata('C:\Users\Administrator.DESKTOP-NLH290A\Desktop\2302_code\FIGURE_2\PPO.txt');
12 | DDPG_avg=importdata('C:\Users\Administrator.DESKTOP-NLH290A\Desktop\2302_code\FIGURE_2\DDPG.txt');
13 | SAC_avg=importdata('C:\Users\Administrator.DESKTOP-NLH290A\Desktop\2302_code\FIGURE_2\SAC.txt');
14 | RANDOM_avg=ones(400)*TD3_avg(1);
15 | % PPO_avg(1)=TD3_avg(1);
16 | % DDPG_avg(20)=TD3_avg(1);
17 | % SAC_avg(20)=TD3_avg(1);
18 | 
19 | p1=plot(X, TD3_avg(20:419), '-p', 'MarkerSize',5, 'LineWidth',1.2,'Color',[0.00,0.45,0.74],'MarkerIndices',1:100:400);
20 | hold on
21 | p2=plot(X, PPO_avg(1:400), '-o', 'MarkerSize',5, 'LineWidth',1.2,'Color',[0.47,0.67,0.19],'MarkerIndices',1:100:400);
22 | hold on
23 | p3=plot(X, DDPG_avg(20:419), '-.*', 'MarkerSize',5, 'LineWidth',1.5,'Color',[1	0.54902	0],'MarkerIndices',1:100:400);
24 | hold on
25 | p4=plot(X, SAC_avg(20:419), '-', 'MarkerSize',5, 'LineWidth',1.2,'Color',[0.50,0.50,0.50],'MarkerIndices',1:100:400);
26 | hold on
27 | p5=plot(X, RANDOM_avg(1:400), '--', 'MarkerSize',5, 'LineWidth',1.2,'Color',[0.72,0.27,1.00],'MarkerIndices',1:100:400);
28 | % p1.MarkerIndices = 400:500:length(y1_ping);
29 | legend([p1 p4 p3 p2  p5 ],{'TD3','SAC','DDPG','PPO','Random'},'Location','SouthEast','Interpreter','latex')
30 | xlabel('Episode','Interpreter','latex')
31 | ylabel('Reward','Interpreter','latex')
32 | ylim([-900, 0])


--------------------------------------------------------------------------------
/plot_figure/FIGURE_1.m:
--------------------------------------------------------------------------------
 1 | clc;
 2 | clear all;
 3 | close all;
 4 | X=[1:1:400];
 5 | TD3_1=importdata('C:\Users\Administrator.DESKTOP-NLH290A\Desktop\2302_code\FIGURE_1\MATLAB\TD3-1.txt');
 6 | TD3_2=importdata('C:\Users\Administrator.DESKTOP-NLH290A\Desktop\2302_code\FIGURE_1\MATLAB\TD3-2.txt');
 7 | TD3_3=importdata('C:\Users\Administrator.DESKTOP-NLH290A\Desktop\2302_code\FIGURE_1\MATLAB\TD3-2.txt');
 8 | TD3_avg=(TD3_1+TD3_2+TD3_3)/3;
 9 | 
10 | % importdata('C:\Users\Administrator.DESKTOP-NLH290A\Desktop\2302_code\FIGURE_1\MATLAB\TD3-avg.txt');
11 | PPO_avg=importdata('C:\Users\Administrator.DESKTOP-NLH290A\Desktop\2302_code\FIGURE_1\MATLAB\PPO-avg.txt');
12 | DDPG_avg=importdata('C:\Users\Administrator.DESKTOP-NLH290A\Desktop\2302_code\FIGURE_1\MATLAB\DDPG-avg.txt');
13 | SAC_avg=importdata('C:\Users\Administrator.DESKTOP-NLH290A\Desktop\2302_code\FIGURE_1\MATLAB\SAC-avg.txt');
14 | RANDOM_avg=ones(400)*TD3_avg(1);
15 | PPO_avg(1)=TD3_avg(1);
16 | DDPG_avg(20)=TD3_avg(1);
17 | SAC_avg(20)=TD3_avg(1);
18 | 
19 | p1=plot(X, TD3_avg(20:419), '-p', 'MarkerSize',5, 'LineWidth',1.2,'Color',[0.00,0.45,0.74],'MarkerIndices',1:100:400);
20 | hold on
21 | p2=plot(X, PPO_avg(1:400), '-o', 'MarkerSize',5, 'LineWidth',1.2,'Color',[0.47,0.67,0.19],'MarkerIndices',1:100:400);
22 | hold on
23 | p3=plot(X, DDPG_avg(20:419), '-.*', 'MarkerSize',5, 'LineWidth',1.5,'Color',[1	0.54902	0],'MarkerIndices',1:100:400);
24 | hold on
25 | p4=plot(X, SAC_avg(20:419), '-', 'MarkerSize',5, 'LineWidth',1.2,'Color',[0.50,0.50,0.50],'MarkerIndices',1:100:400);
26 | hold on
27 | p5=plot(X, RANDOM_avg(1:400), '--', 'MarkerSize',5, 'LineWidth',1.2,'Color',[0.72,0.27,1.00],'MarkerIndices',1:100:400);
28 | % p1.MarkerIndices = 400:500:length(y1_ping);
29 | legend([p1 p4 p3 p2  p5 ],{'TD3','SAC','DDPG','PPO','Random'},'Location','SouthEast','Interpreter','latex')
30 | xlabel('Episode','Interpreter','latex')
31 | ylabel('Reward','Interpreter','latex')
32 | ylim([-1500, 0])


--------------------------------------------------------------------------------
/algorithm/MA-TD3_core.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import scipy.signal
 3 | 
 4 | import torch
 5 | import torch.nn as nn
 6 | 
 7 | 
 8 | def combined_shape(length, shape=None):
 9 |     if shape is None:
10 |         return (length,)
11 |     return (length, shape) if np.isscalar(shape) else (length, *shape)
12 | 
13 | def mlp(sizes, activation, output_activation=nn.Identity):
14 |     layers = []
15 |     for j in range(len(sizes)-1):
16 |         act = activation if j < len(sizes)-2 else output_activation
17 |         layers += [nn.Linear(sizes[j], sizes[j+1]), act()]
18 |     return nn.Sequential(*layers)
19 | 
20 | def count_vars(module):
21 |     return sum([np.prod(p.shape) for p in module.parameters()])
22 | 
23 | class MLPActor(nn.Module):
24 | 
25 |     def __init__(self, obs_dim, act_dim, hidden_sizes, activation):
26 |         super().__init__()
27 |         pi_sizes = [obs_dim] + list(hidden_sizes) + [act_dim]
28 |         self.pi = mlp(pi_sizes, activation, nn.Tanh)
29 | 
30 |     def forward(self, obs):
31 |         # Return output from network scaled to action space limits.
32 |         return self.pi(obs)
33 | 
34 | class MLPQFunction(nn.Module):
35 | 
36 |     def __init__(self, obs_dim, act_dim, hidden_sizes, activation):
37 |         super().__init__()
38 |         self.q = mlp([obs_dim + act_dim] + list(hidden_sizes) + [1], activation)
39 | 
40 |     def forward(self, obs, act):
41 |         q = self.q(torch.cat([obs, act], dim=-1))
42 |         return torch.squeeze(q, -1) # Critical to ensure q has right shape.
43 | 
44 | class MLPActorCritic(nn.Module):
45 | 
46 |     def __init__(self, obs_dim, act_dim, hidden_sizes=(256,256),
47 |                  activation=nn.ReLU):
48 |         super().__init__()
49 | 
50 |         # build policy and value functions
51 |         self.pi = MLPActor(obs_dim, act_dim, hidden_sizes, activation)
52 |         self.q1 = MLPQFunction(obs_dim, act_dim, hidden_sizes, activation)
53 |         self.q2 = MLPQFunction(obs_dim, act_dim, hidden_sizes, activation)
54 | 
55 |     def act(self, obs):
56 |         with torch.no_grad():
57 |             return self.pi(obs).numpy()
58 | 


--------------------------------------------------------------------------------
/plot_figure/FIGURE_5.m:
--------------------------------------------------------------------------------
 1 | X_1 = [0.02, 0.04, 0.06, 0.08, 0.1];
 2 | Y1_NOMA = [-1.333717993, -7.825136502, -10.44519537, -13.65757053, -16];
 3 | Y1_OMA = [-1.42E-05,-0.025458926,-0.356210216,-0.629268766,-0.818900052];
 4 | Y2_NOMA = [-0.271909959, -2.990650853,-5.692890644,-6.519637193,-9.841153601]; %FIX
 5 | Y2_OMA = [-2.88E-03,-0.027303647,-0.147327698,-0.395702838,-0.600764221];
 6 | Y3_NOMA = [-0.002032929,-0.391495296,-1.822073156,-2.974095926,-7.442980097];%3 U 
 7 | Y3_OMA = [-2.98E-11,-1.93E-07,-8.47E-07,-1.86E-05,-8.91E-05];
 8 | Y4_NOMA = [-0.004375222,-1.439502907,-3.918489578,-7.066198785,-9.329658858];%0.01 
 9 | Y4_OMA = [-2.25E-06,-0.000636687,-0.071654114,-0.217504705,-0.407177846];
10 | Y5_NOMA = [-0.004367413,-2.221372461,-4.209532276,-6.103669332,-7.890873251];%ddpg 
11 | Y5_OMA = [-4.12E-11,-9.69E-05,-0.004444109,-0.037339037,-0.142955241];
12 | Y6_NOMA = [-0.859490241,-2.221372461,-5.558103089,-7.355402684,-8.419060732];%2 envir
13 | Y6_OMA = [-2.57E-01,-0.906342075,-1.958136833,-2.708482246,-4.548873318];
14 | 
15 | % fig = figure;
16 | % left_color = [0 0 0];
17 | % right_color = [0 0 0];
18 | % set(fig,'defaultAxesColorOrder',[left_color; right_color]);
19 | 
20 | %激活左侧
21 | % yyaxis left
22 | p1=plot(X_1,Y1_NOMA,'LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'k', 'MarkerFaceColor','b');
23 | hold on
24 | p2=plot(X_1,Y1_OMA,'-o','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'k', 'MarkerFaceColor','b');
25 | hold on
26 | % p3=plot(X_1,Y2_NOMA,'-^','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'k', 'MarkerFaceColor','b');
27 | % ylabel('Averaged data rate recieved by each robot')
28 | % hold on
29 | % % yyaxis right
30 | % p4=plot(X_1,Y2_OMA,'LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'r', 'MarkerFaceColor','b');
31 | % hold on
32 | % p5=plot(X_1,Y3_NOMA,'-o','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'r', 'MarkerFaceColor','b');
33 | % hold on
34 | % p6=plot(X_1,Y3_OMA,'-^','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'r', 'MarkerFaceColor','b');
35 | % hold on
36 | p1=plot(X_1,Y4_NOMA,'LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'k', 'MarkerFaceColor','b');
37 | hold on
38 | p2=plot(X_1,Y4_OMA,'-o','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'k', 'MarkerFaceColor','b');
39 | hold on
40 | p3=plot(X_1,Y5_NOMA,'-^','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'k', 'MarkerFaceColor','b');
41 | ylabel('Averaged data rate recieved by each robot')
42 | hold on
43 | % yyaxis right
44 | p4=plot(X_1,Y5_OMA,'LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'r', 'MarkerFaceColor','b');
45 | hold on
46 | p5=plot(X_1,Y6_NOMA,'-o','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'r', 'MarkerFaceColor','b');
47 | hold on
48 | p6=plot(X_1,Y6_OMA,'-^','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'r', 'MarkerFaceColor','b');
49 | hold on
50 | ylabel('Decoding Error Probability $\lg (\mathcal{P}_1)$')
51 | xlabel('$P_{\max}$')
52 | legend('UE distribution $1$, $\kappa_1=0.1$, MA-TD3, NOMA','UE distribution $1$, $\kappa_1=0.1$, MA-TD3, OMA','$UE distribution $1$, \kappa_1=0.01$, MA-TD3, NOMA','$UE distribution $1$, \kappa_1=0.01$, MA-TD3, OMA','$UE distribution $1$, \kappa_1=0.1$, MA-DDPG,NOMA','$UE distribution $1$, \kappa_1=0.1$, MA-DDPG,OMA','UE distribution $2$, $\kappa_1=0.1$, MA-TD3, NOMA','UE distribution $2$, $\kappa_1=0.1$, MA-TD3, OMA')
53 | % set(get(a(1),'Ylabel'),'String','Averaged data rate of each robot')
54 | % set(get(a(2),'Ylabel'),'String','Averaged arriving step of each robot')
55 | 
56 | box on
57 | grid off
58 | 


--------------------------------------------------------------------------------
/tradition_baseline/A_search.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import heapq
  3 | from scipy.io import loadmat
  4 | import math
  5 | 
  6 | x_max=99
  7 | y_max=99
  8 | m = loadmat("C://Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/mapdata_0717.mat") 
  9 | #correct_action=0
 10 | MARK= m["MARK_new"]
 11 | 
 12 | def generate_directions(num_directions):
 13 |     # 生成均匀分布的方向向量
 14 |     directions = []
 15 |     angle_step = 360 / num_directions
 16 |     for i in range(num_directions):
 17 |         angle = math.radians(i * angle_step)
 18 |         directions.append((math.cos(angle), math.sin(angle)))
 19 |     return directions
 20 | 
 21 | def is_valid(x, y, MARK, x_max, y_max):
 22 |     # Check if (x, y) is within bounds and not an obstacle
 23 |     return 0 <= int(x) < x_max and 0 <= int(y) < y_max and MARK[int(x), int(y)] != 2
 24 | 
 25 | def heuristic(a, b):
 26 |     # Manhattan distance heuristic
 27 |     return abs(a[0] - b[0]) + abs(a[1] - b[1])
 28 | 
 29 | def a_star_search(start, goal, MARK, x_max, y_max):
 30 |     # A* search to find the shortest path from start to goal
 31 |     neighbors = [ (0, -1), (-1, 0),(1, 0), (0, 1)]
 32 |     # neighbors = generate_directions(8)
 33 |     close_set = set()
 34 |     came_from = {}
 35 |     gscore = {start: 0}
 36 |     fscore = {start: heuristic(start, goal)}
 37 |     oheap = []
 38 | 
 39 |     heapq.heappush(oheap, (fscore[start], start))
 40 |     
 41 |     while oheap:
 42 |         current = heapq.heappop(oheap)[1]
 43 | 
 44 |         if current == goal:
 45 |             data = []
 46 |             while current in came_from:
 47 |                 data.append(current)
 48 |                 current = came_from[current]
 49 |             data.append(start)
 50 |             data.reverse()
 51 |             return data
 52 | 
 53 |         close_set.add(current)
 54 |         for i, j in neighbors:
 55 |             neighbor = current[0] + i, current[1] + j
 56 |             tentative_g_score = gscore[current] + 1
 57 | 
 58 |             if not is_valid(neighbor[0], neighbor[1], MARK, x_max, y_max):
 59 |                 continue
 60 | 
 61 |             if neighbor in close_set and tentative_g_score >= gscore.get(neighbor, 0):
 62 |                 continue
 63 | 
 64 |             if tentative_g_score < gscore.get(neighbor, 0) or neighbor not in [i[1] for i in oheap]:
 65 |                 came_from[neighbor] = current
 66 |                 gscore[neighbor] = tentative_g_score
 67 |                 fscore[neighbor] = tentative_g_score + heuristic(neighbor, goal)
 68 |                 heapq.heappush(oheap, (fscore[neighbor], neighbor))
 69 | 
 70 |     return False
 71 | 
 72 | def save_path_to_txt(path, filename):
 73 |     with open(filename, 'w') as f:
 74 |         for x, y in path:
 75 |             f.write(f"{x},{y}\n")
 76 | 
 77 | # Define start, goal, and MARK matrix
 78 | x1, y1 = 40, 40  # Starting point
 79 | x2, y2 = 90, 90  # Goal point
 80 | # x_max, y_max = 10, 10  # Grid size
 81 | 
 82 | # Example MARK matrix with obstacles
 83 | # MARK = np.zeros((x_max, y_max))
 84 | # MARK[4, 4] = 2
 85 | # MARK[4, 5] = 2
 86 | # MARK[4, 6] = 2
 87 | # MARK[5, 4] = 2
 88 | # MARK[6, 4] = 2
 89 | 
 90 | start = (x1, y1)
 91 | goal = (x2, y2)
 92 | 
 93 | # Find path
 94 | path = a_star_search(start, goal, MARK, x_max, y_max)
 95 | 
 96 | # Save path to txt file
 97 | if path:
 98 |     save_path_to_txt(path, 'robot_path_2_v2.txt')
 99 |     print("Path found and saved to robot_path.txt")
100 | else:
101 |     print("No path found")
102 | 


--------------------------------------------------------------------------------
/tradition_baseline/fig5.m:
--------------------------------------------------------------------------------
 1 | X_1 = [0.02, 0.04, 0.06, 0.08, 0.1];
 2 | Y1_NOMA = [-1.333717993, -7.825136502, -10.44519537, -13.65757053, -16];
 3 | Y1_OMA = [-1.42E-05,-0.025458926,-0.356210216,-0.629268766,-0.818900052];
 4 | Y2_NOMA = [-0.271909959, -2.990650853,-5.692890644,-6.519637193,-9.841153601]; %FIX
 5 | Y2_OMA = [-2.88E-03,-0.027303647,-0.147327698,-0.395702838,-0.600764221];
 6 | Y3_NOMA = [-0.002032929,-0.391495296,-1.822073156,-2.974095926,-7.442980097];%3 U 
 7 | Y3_OMA = [-2.98E-11,-1.93E-07,-8.47E-07,-1.86E-05,-8.91E-05];
 8 | Y4_NOMA = [-0.004375222,-1.439502907,-3.918489578,-7.066198785,-9.329658858];%0.01 
 9 | Y4_OMA = [-2.25E-06,-0.000636687,-0.071654114,-0.217504705,-0.407177846];
10 | Y5_NOMA = [-0.004367413,-2.221372461,-4.209532276,-6.103669332,-7.890873251];%ddpg 
11 | Y5_OMA = [-4.12E-11,-9.69E-05,-0.004444109,-0.037339037,-0.142955241];
12 | Y6_NOMA = [-0.859490241,-2.221372461,-5.558103089,-7.355402684,-8.419060732];%2 envir
13 | Y6_OMA = [-2.57E-01,-0.906342075,-1.958136833,-2.708482246,-4.548873318];
14 | 
15 | Optimal_NOMA = [-5.66606907, -15.94919966, -21.868942625, -25.36415256, -27.00059453];
16 | 
17 | % fig = figure;
18 | % left_color = [0 0 0];
19 | % right_color = [0 0 0];
20 | % set(fig,'defaultAxesColorOrder',[left_color; right_color]);
21 | 
22 | %激活左侧
23 | % yyaxis left
24 | p1=plot(X_1,Y1_NOMA,'LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'k', 'MarkerFaceColor','b');
25 | hold on
26 | p2=plot(X_1,Y1_OMA,'-o','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'k', 'MarkerFaceColor','b');
27 | hold on
28 | % p3=plot(X_1,Y2_NOMA,'-^','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'k', 'MarkerFaceColor','b');
29 | % ylabel('Averaged data rate recieved by each robot')
30 | % hold on
31 | % % yyaxis right
32 | % p4=plot(X_1,Y2_OMA,'LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'r', 'MarkerFaceColor','b');
33 | % hold on
34 | % p5=plot(X_1,Y3_NOMA,'-o','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'r', 'MarkerFaceColor','b');
35 | % hold on
36 | % p6=plot(X_1,Y3_OMA,'-^','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'r', 'MarkerFaceColor','b');
37 | % hold on
38 | p1=plot(X_1,Y4_NOMA,'LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'k', 'MarkerFaceColor','b');
39 | hold on
40 | p2=plot(X_1,Y4_OMA,'-o','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'k', 'MarkerFaceColor','b');
41 | hold on
42 | p3=plot(X_1,Y5_NOMA,'-^','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'k', 'MarkerFaceColor','b');
43 | ylabel('Averaged data rate recieved by each robot')
44 | hold on
45 | % yyaxis right
46 | p4=plot(X_1,Y5_OMA,'LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'r', 'MarkerFaceColor','b');
47 | hold on
48 | p5=plot(X_1,Y6_NOMA,'-o','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'r', 'MarkerFaceColor','b');
49 | hold on
50 | p6=plot(X_1,Y6_OMA,'-^','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'r', 'MarkerFaceColor','b');
51 | hold on
52 | p7=plot(X_1, Optimal_NOMA,'-^','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'r', 'MarkerFaceColor','b');
53 | hold on
54 | ylabel('Decoding Error Probability $\lg (\mathcal{P}_1)$')
55 | xlabel('$P_{\max}$')
56 | legend('UE distribution $1$, $\kappa_1=0.1$, MA-TD3, NOMA','UE distribution $1$, $\kappa_1=0.1$, MA-TD3, OMA','$UE distribution $1$, \kappa_1=0.01$, MA-TD3, NOMA','$UE distribution $1$, \kappa_1=0.01$, MA-TD3, OMA','$UE distribution $1$, \kappa_1=0.1$, MA-DDPG,NOMA','$UE distribution $1$, \kappa_1=0.1$, MA-DDPG,OMA','UE distribution $2$, $\kappa_1=0.1$, MA-TD3, NOMA','UE distribution $2$, $\kappa_1=0.1$, MA-TD3, OMA', 'Traditional Optimization Method')
57 | % set(get(a(1),'Ylabel'),'String','Averaged data rate of each robot')
58 | % set(get(a(2),'Ylabel'),'String','Averaged arriving step of each robot')
59 | 
60 | box on
61 | grid off
62 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # MAAC_DRL
 2 | This repository contains the Python implementation of our submitted paper titled "Deep Reinforcement Learning Enables Joint Trajectory and Communication in Internet of Robotic Things" .
 3 | ## Quick Links
 4 | [[Installation]](#installation)  [[Installation]](#installation) [[Usage]](#usage) 
 5 | ## Introduction
 6 | We learn the multi-agent actor-critic deep reinforcement learning (MAAC-DRL) algorithms to reduce the decoding error rate and arriving time of robots in industrial Internet of Robotic Things (IoRT) with the requirements of ultra-reliable and low-latency communications.
 7 | 
 8 | Here are the settings of the considered IoRT environment.
 9 | | Notation     | Simulation Value   | Physical Meaning                                             |
10 | | ------------ | ------------------ | ------------------------------------------------------------ |
11 | | $K$      | $\{2, 4, 6\}$                | the number of users    |
12 | | $L$ | $\{2, 3\}$     | the number of antennas    |
13 | | $K_{\rm MU}$ | $\{1, 2, 3\}$     | the number of robots     |
14 | | $D$        | $100 \ {\rm bits}$      | packet size    |
15 | | $M$        | $50 \ {\rm symbols}$     | the number of transmitted symbols    |
16 | | $T_{\max}$   | $2000 \ {\rm s}$ | the moving deadline of robots   |
17 | | $H_0$  | $1 \ {\rm m}$   | the height of antennas     |
18 | | $P_{\max}$  | $[0.02, 0.1] \ {\rm W}$   | the maximal transmit power |
19 | | $\sigma^2$     | $-100 \ {\rm dBm/Hz}$   | the variance of the additive white Gaussian noise                  |
20 | | $v$          | $5 \ {\rm m/s}$    | the moving speed    |
21 | 
22 | 
23 | 
24 | ## Results
25 | <table style="padding: 0; border-spacing: 0;">
26 | <tr style="padding: 0; border-spacing: 0;">
27 | <td style="padding: 0; border-spacing: 0; width: 50%"><img src="./_doc/simulation_fig.png"></td>
28 | <td style="padding: 0; border-spacing: 0; width: 50%"><img src="./_doc/simulation_fig2.png"></td>
29 | </tr>
30 | </table>
31 | 
32 | For more details and simulation results, please check our paper.
33 | 
34 | ## Installation
35 | Dependencies can be installed by Conda:
36 | 
37 | For example to install env used for IoRT environments with URLLC requirements:
38 | ```
39 | conda env create -f environment/environment.yml URLLC
40 | conda activate URLLC
41 | ```
42 | 
43 | Then activate it by
44 | ```
45 | conda activate URLLC
46 | ```
47 | To run on atari environment, please further install the considered environment by 
48 | ```
49 | pip install -r environment/requirements.txt
50 | ```
51 | 
52 | ## Usage
53 | 
54 | Here are the parameters of our simulations.
55 | | Notation     | Simulation Value   | Physical Meaning                                             |
56 | | ------------ | ------------------ | ------------------------------------------------------------ |
57 | | $lr$      | $\{10^{-4}, 2 \times 10^{-3}\}$                | the learning rate of the DRL algorithms    |
58 | | $\kappa_1$ | $\{0, 0.01, 0.1\}$     | the parameters of the reward designs    |
59 | | $\|\mathcal{D}_0\|$ | $128$     | the size of the mini-batch buffer   |
60 | | $\|\mathcal{D}\|$        | $10^{6}$      | the maximal size of the experevce buffer    |
61 | 
62 | ### algorithm (`python codes of different MA-DRL algorithms`):
63 |  - `'MA-DDPG_main.py'` (Main functions and MDP transitions of MA-DDPG)
64 |  - `'MA-PPO_main.py'` (Main functions and MDP transitions of MA-PPO)
65 |  - `'MA-SAC_main.py'` (Main functions and MDP transitions of MA-SAC)
66 |  - `'MA-TD3_core.py'` (MLP operators of MA-TD3)
67 |  - `'MA-TD3_main.py'` (Main functions and MDP transitions of MA-TD3)
68 | 
69 | ### environment (`documents of considered system`):
70 |  - `'environment.yaml'` (Conda environmental document)
71 |  - `'requirements.txt'` (Pip environmental document)
72 | 
73 | ### plot_figure (`matlab codes of different algorithms`):
74 |  - `'FIGURE_1.m'` (Reward comparison under different MA-DRL algorithms)
75 |  - `'FIGURE_2.m'` (Robots' trajectory comparison under different reward settings)
76 |  - `'FIGURE_3.m'` (Average decoding error probability under different clustering and multiple access scheme)
77 |  - `'FIGURE_4.m'` (Objective function under different environmental settings)
78 |  - `'FIGURE_5.m'` (Arriving time under different environmental settings)
79 | 
80 | ### radio_map (`documents of building environment`):
81 |  - `'AABB_plot.m'` (Construct a radio map based on the deployment of obstacles and intersection detection)
82 |  - `'map_data.mat'` (Raw data of the built radio map)
83 | 
84 | 


--------------------------------------------------------------------------------
/plot_figure/FIGURE_4.m:
--------------------------------------------------------------------------------
 1 | y1_0=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Downloads/matdisk/TD3-STEP/STEP_v1_agent1_0.txt").';
 2 | y2_0=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Downloads/matdisk/TD3-STEP/STEP_v1_agent1_1.txt").';
 3 | y3_0=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Downloads/matdisk/TD3-STEP/STEP_v1_agent1_2.txt").';
 4 | y4_0=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Downloads/matdisk/TD3-STEP/STEP_v1_agent1_3.txt").';
 5 | y5_0=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Downloads/matdisk/TD3-STEP/STEP_v1_agent1_4.txt").';
 6 | h4=cdfplot(y1_0(200:500));
 7 | hold on
 8 | % h2=cdfplot(y2_0(200:500));
 9 | % hold on
10 | h2=cdfplot(y3_0(200:500));
11 | % hold on
12 | % h4=cdfplot(y4_0(200:500));
13 | % hold on
14 | h3=cdfplot(y5_0(200:500));
15 | hold on
16 | 
17 | y1_0=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Downloads/matdisk/TD3-STEP-v2/STEP_v1_agent1_0.txt").';
18 | y2_0=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Downloads/matdisk/TD3-STEP-v2/STEP_v1_agent1_1.txt").';
19 | y3_0=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Downloads/matdisk/TD3-STEP-v2/STEP_v1_agent1_2.txt").';
20 | y4_0=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Downloads/matdisk/TD3-STEP-v2/STEP_v1_agent1_3.txt").';
21 | y5_0=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Downloads/matdisk/TD3-STEP-v2/STEP_v1_agent1_4.txt").';
22 | h5=cdfplot(y1_0(200:500));
23 | hold on
24 | h6=cdfplot(y2_0(200:500));
25 | hold on
26 | % h3=cdfplot(y3_0(200:500));
27 | % hold on
28 | % h4=cdfplot(y4_0(200:500));
29 | % hold on
30 | h1=cdfplot(y5_0(200:500));
31 | hold on
32 | % h6=cdfplot(y4(1500:4999));
33 | % hold on
34 | set(h1,'Color',[0,0,0],'LineWidth',1.2, 'MarkerSize',4,'MarkerIndices',1:60:300,'LineStyle','--')
35 | set(h2,'Color',[0,0,0],'Marker','^','LineWidth',1.2, 'MarkerSize',4,'MarkerIndices',1:60:300,'LineStyle','--')
36 | set(h3,'Color',[0,0,0],'Marker','*','LineWidth',1.2, 'MarkerSize',6,'MarkerIndices',1:60:300,'LineStyle','--')
37 | set(h4,'Color',[0,0,0],'LineWidth',1.2, 'MarkerSize',4,'MarkerIndices',1:60:300)
38 | set(h5,'Color',[0,0,0],'Marker','^','LineWidth',1.2, 'MarkerSize',4,'MarkerIndices',1:60:300)
39 | set(h6,'Color',[0,0,0],'Marker','*','LineWidth',1.2, 'MarkerSize',6,'MarkerIndices',1:60:300)
40 | 
41 | ax1 = gca;
42 | % set(gca,'XAxisLocation','top')
43 | % set(gca,'YAxisLocation','right')
44 | 
45 | xlim([100 350]);
46 | ylim([0 0.85]);
47 | xlabel('Arriving step','Interpreter','latex')
48 | ylabel('Cumulative distribution function','Interpreter','latex')
49 | set(gca,'XTickLabel',{'$90 \%$','$100 \%$','$110 \%$','$120 \%$','$130 \%$','$140 \%$'});
50 | set(gca,'YTickLabel',{'0','0.2','0.4','0.6','0.8','1.0'});
51 | set(gca,'xtick',100:250/5:350)   
52 | set(gca,'ytick',0:0.85/5:0.85)
53 | 
54 | % 去掉上面和右面边框上的刻度 保留边框
55 | % box off;
56 | grid off
57 | 
58 | xl=xlim;
59 | yl=ylim;
60 | % line([xl(1),xl(2)],[yl(2),yl(2)],'color',[0 0 0]);   %画上边框，线条的颜色设置为黑色
61 | % line([xl(2),xl(2)],[yl(1),yl(2)],'color',[0 0 0]);    %画右边框 ，线条的颜色设置为黑色
62 | gs=legend([h1 h2 h3 h4 h5 h6],{'$P_{\max}=0.02, \kappa_1=0.1$','$P_{\max}=0.04, \kappa_1=0.1$','$P_{\max}=0.1,  \kappa_1=0.1$','$P_{\max}=0.02,  \kappa_1=0.01$','$P_{\max}=0.04, \kappa_1=0.01$'},'Interpreter','latex','Location','northwest','NumColumns',1);
63 | % set(gs,'Location',best)
64 | title(" ")
65 | 
66 | % ax2=axes('Position',get(ax1,'Position'),...
67 | %            'XAxisLocation','top',...
68 | %            'YAxisLocation','left',...
69 | %            'Color','none',...
70 | %            'XColor','b','YColor','b');
71 | % hold on
72 | % 
73 | % X_1 = [0,  0.02, 0.04, 0.06, 0.08, 0.1];
74 | % % Y1_1 = [73.32247929, 73.59625821, 73.87003713, 73.58897458, 73.54327335, 73.57012282, 73.48700371, 73.74321622, 73.56069694, 73.43016281, 73.41759497];
75 | % % Y1_2 = [77.09425878, 77.96301057, 78.32762068, 78.77320765, 78.1179663, 78.80548415, 78.4038846, 78.67466438, 78.16966581, 78.41473865, 78.54641531];
76 | % % Y1_3 = [82.97800628, 84.27606398, 84.41930877, 83.87746358, 83.93316195, 84.33647529, 84.48843188, 84.8706084, 84.52499286, 85.03199086, 84.51185376];
77 | % Y2_1 = [70.94397619, 80.87746462, 83.66667014, 85.63058596, 86.90541567, 87.85830977, 88.64414142, 89.31973965, 89.89561792, 90.38694579, 90.87500257];
78 | % Y2_2 = [70.96419081, 80.91497367, 83.69259198, 85.64478655, 86.8985237, 87.87413717, 88.68100766, 89.43362594, 89.9297314, 90.50355753, 90.8866812];
79 | % Y2_3 = [70.99211407, 80.94282523, 83.75781121, 85.71406009, 87.01403988, 87.92457729, 88.7456004, 89.4045163, 90.00031461, 90.44857253, 90.92024851];
80 | % p4=plot(X_1,Y2_1,'b:o','LineWidth',1.2, 'MarkerSize',4, 'MarkerEdgeColor', 'b', 'MarkerFaceColor','b');
81 | % p5=plot(X_1,Y2_2,'b-.*','LineWidth',1.2, 'MarkerSize',6, 'MarkerEdgeColor', 'b', 'MarkerFaceColor','b');
82 | % p6=plot(X_1,Y2_3,'b--^','LineWidth',1.2, 'MarkerSize',4, 'MarkerEdgeColor', 'b', 'MarkerFaceColor','b');
83 | % ylabel('Average sum-rate','Interpreter','latex')
84 | % xlabel('Power budget $P_{\max}$','Interpreter','latex')
85 | % % legend([p4 p5 p6],{'$\kappa_1=0.0001$','$\kappa_1=0.002$','$\kappa_1=0.005$'},'Location','NorthWest','Interpreter','latex','NumColumns',1)
86 | % % legend('$\kappa_1=0.002$','$\kappa_1=0.005$','$\kappa_1=0.0001$')
87 | % xlim([0 1]);
88 | % % set(gca,'XTickLabel',{'0','0.2','0.4','0.6','0.8','1'});
89 | % set(gca,'XAxisLocation','bottom')
90 | % set(gca,'xtick',0:0.2:1)
91 | % grid off
92 | 


--------------------------------------------------------------------------------
/algorithm/MA-TD3_main.py:
--------------------------------------------------------------------------------
  1 | from copy import deepcopy
  2 | import itertools
  3 | import numpy as np
  4 | import torch
  5 | from torch.optim import Adam
  6 | import core
  7 | 
  8 | 
  9 | class ReplayBuffer:
 10 |     """
 11 |     A simple FIFO experience replay buffer for TD3 agents.
 12 |     """
 13 | 
 14 |     def __init__(self, obs_dim, act_dim, size):
 15 |         self.obs_buf = np.zeros(core.combined_shape(size, obs_dim), dtype=np.float32)
 16 |         self.obs2_buf = np.zeros(core.combined_shape(size, obs_dim), dtype=np.float32)
 17 |         self.act_buf = np.zeros(core.combined_shape(size, act_dim), dtype=np.float32)
 18 |         self.rew_buf = np.zeros(size, dtype=np.float32)
 19 |         self.done_buf = np.zeros(size, dtype=np.float32)
 20 |         self.ptr, self.size, self.max_size = 0, 0, size
 21 | 
 22 |     def store(self, obs, act, rew, next_obs, done):
 23 |         self.obs_buf[self.ptr] = obs
 24 |         self.obs2_buf[self.ptr] = next_obs
 25 |         self.act_buf[self.ptr] = act
 26 |         self.rew_buf[self.ptr] = rew
 27 |         self.done_buf[self.ptr] = done
 28 |         self.ptr = (self.ptr+1) % self.max_size
 29 |         self.size = min(self.size+1, self.max_size)
 30 | 
 31 |     def sample_batch(self, batch_size=32):
 32 |         idxs = np.random.randint(0, self.size, size=batch_size)
 33 |         batch = dict(obs=self.obs_buf[idxs],
 34 |                      obs2=self.obs2_buf[idxs],
 35 |                      act=self.act_buf[idxs],
 36 |                      rew=self.rew_buf[idxs],
 37 |                      done=self.done_buf[idxs])
 38 |         return {k: torch.as_tensor(v, dtype=torch.float32) for k,v in batch.items()}
 39 | 
 40 | class TD3:
 41 |     def __init__(self, obs_dim, act_dim, actor_critic=core.MLPActorCritic,
 42 |                  replay_size=int(1e6), gamma=0.99, polyak=0.995, pi_lr=1e-3, q_lr=5e-4,
 43 |                  act_noise=0.5, target_noise=0.25, noise_clip=0.5, policy_delay=2):
 44 |         self.obs_dim = obs_dim
 45 |         self.act_dim = act_dim
 46 |         self.gamma = gamma
 47 |         self.polyak = polyak
 48 |         self.act_noise = act_noise
 49 |         self.target_noise = target_noise
 50 |         self.noise_clip = noise_clip
 51 |         self.policy_delay = policy_delay
 52 |         self.replay_buffer = ReplayBuffer(obs_dim=obs_dim, act_dim=act_dim,size=replay_size)
 53 | 
 54 |         self.ac = actor_critic(obs_dim, act_dim)
 55 |         self.ac_targ = deepcopy(self.ac)
 56 | 
 57 |         for p in self.ac_targ.parameters():
 58 |             p.requires_grad = False
 59 | 
 60 |         # List of parameters for both Q-networks (save this for convenience)
 61 |         self.q_params = itertools.chain(self.ac.q1.parameters(), self.ac.q2.parameters())
 62 | 
 63 |         # Set up optimizers for policy and q-function
 64 |         self.pi_optimizer = Adam(self.ac.pi.parameters(), lr=pi_lr)
 65 |         self.q_optimizer = Adam(self.q_params, lr=q_lr)
 66 | 
 67 |         # Experience buffer
 68 |         replay_buffer = ReplayBuffer(obs_dim=obs_dim, act_dim=act_dim, size=replay_size)
 69 |     def compute_loss_q(self,data):
 70 |         o, a, r, o2, d = data['obs'], data['act'], data['rew'], data['obs2'], data['done']
 71 | 
 72 |         q1 = self.ac.q1(o,a)
 73 |         q2 = self.ac.q2(o,a)
 74 | 
 75 |         # Bellman backup for Q functions
 76 |         with torch.no_grad():
 77 |             pi_targ = self.ac_targ.pi(o2)
 78 | 
 79 |             # Target policy smoothing
 80 |             epsilon = torch.randn_like(pi_targ) * self.target_noise
 81 |             epsilon = torch.clamp(epsilon, -self.noise_clip, self.noise_clip)
 82 |             a2 = pi_targ + epsilon
 83 |             a2 = torch.clamp(a2, -1, 1)
 84 | 
 85 |             # Target Q-values
 86 |             q1_pi_targ = self.ac_targ.q1(o2, a2)
 87 |             q2_pi_targ = self.ac_targ.q2(o2, a2)
 88 |             q_pi_targ = torch.min(q1_pi_targ, q2_pi_targ)
 89 |             backup = r + self.gamma * (1 - d) * q_pi_targ
 90 | 
 91 |         # MSE loss against Bellman backup
 92 |         loss_q1 = ((q1 - backup)**2).mean()
 93 |         loss_q2 = ((q2 - backup)**2).mean()
 94 |         loss_q = loss_q1 + loss_q2
 95 | 
 96 |         return loss_q
 97 | 
 98 |     def compute_loss_pi(self, data):
 99 |         o = data['obs']
100 |         q1_pi = self.ac.q1(o, self.ac.pi(o))
101 |         return -q1_pi.mean()
102 | 
103 |     def update(self, batch_size, repeat_times):
104 |         for i in range(int(repeat_times)):
105 |             data = self.replay_buffer.sample_batch(batch_size)
106 |             # First run one gradient descent step for Q1 and Q2
107 |             self.q_optimizer.zero_grad()
108 |             loss_q = self.compute_loss_q(data)
109 |             loss_q.backward()
110 |             self.q_optimizer.step()
111 | 
112 |             # Possibly update pi and target networks
113 |             if i % self.policy_delay == 0:
114 | 
115 |                 # Freeze Q-networks so you don't waste computational effort
116 |                 # computing gradients for them during the policy learning step.
117 |                 for p in self.q_params:
118 |                     p.requires_grad = False
119 | 
120 |                 # Next run one gradient descent step for pi.
121 |                 self.pi_optimizer.zero_grad()
122 |                 loss_pi = self.compute_loss_pi(data)
123 |                 loss_pi.backward()
124 |                 self.pi_optimizer.step()
125 | 
126 |                 # Unfreeze Q-networks so you can optimize it at next DDPG step.
127 |                 for p in self.q_params:
128 |                     p.requires_grad = True
129 | 
130 |                 # Finally, update target networks by polyak averaging.
131 |                 with torch.no_grad():
132 |                     for p, p_targ in zip(self.ac.parameters(), self.ac_targ.parameters()):
133 |                         # NB: We use an in-place operations "mul_", "add_" to update target
134 |                         # params, as opposed to "mul" and "add", which would make new tensors.
135 |                         p_targ.data.mul_(self.polyak)
136 |                         p_targ.data.add_((1 - self.polyak) * p.data)
137 | 
138 |     def get_action(self, o, noise_scale):
139 |         a = self.ac.act(torch.as_tensor(o, dtype=torch.float32))
140 |         a += noise_scale * np.random.randn(self.act_dim)
141 |         return np.clip(a, -1, 1)
142 | 


--------------------------------------------------------------------------------
/plot_figure/FIGURE_3.m:
--------------------------------------------------------------------------------
  1 | % X = [ 8  8   12   12]*6;
  2 | % Y = [8 12 12 8]*6;
  3 | % Z = [0.5 0.5 0.5 0.5];
  4 | % p6=plot(10*6,10*6,'s','MarkerEdgeColor',[0.41176 0.41176 0.41176], 'MarkerFaceColor',[0.41176 0.41176 0.41176],'MarkerSize',10, 'LineWidth',2);
  5 | % 
  6 | % patch(Y,X,Z,[0.41176 0.41176 0.41176])
  7 | % hold on
  8 | % 
  9 | % X = [ 28  28   32   32]*6;
 10 | % Y = [8 12 12 8]*6;
 11 | % Z = [0.5 0.5 0.5 0.5];
 12 | % 
 13 | % patch(Y,X,Z,[0.41176 0.41176 0.41176])
 14 | % 
 15 | % X = [ 28  28   32   32]*6;
 16 | % Y = [28 32 32 28]*6;
 17 | % Z = [0.5 0.5 0.5 0.5];
 18 | % 
 19 | % patch(Y,X,Z,[0.41176 0.41176 0.41176])
 20 | % 
 21 | % X = [ 8  8   12   12]*6;
 22 | % Y = [28 32 32 28]*6;
 23 | % Z = [0.5 0.5 0.5 0.5];
 24 | % 
 25 | % patch(Y,X,Z,[0.41176 0.41176 0.41176])
 26 | % 
 27 | % view(90,90)
 28 | % p5=plot(0*6,15*6,'p','MarkerEdgeColor',[1	1	0 ], 'MarkerFaceColor',[1	1	0 ],'MarkerSize',5, 'LineWidth',2);
 29 | % hold on
 30 | % plot(5*6,17*6,'p','MarkerEdgeColor',[1	1	0 ], 'MarkerFaceColor',[1	1	0 ],'MarkerSize',5, 'LineWidth',2)
 31 | % hold on
 32 | % plot(13*6,17*6,'p','MarkerEdgeColor',[1	1	0 ], 'MarkerFaceColor',[1	1	0 ],'MarkerSize',5, 'LineWidth',2)
 33 | % hold on
 34 | % plot(23*6,1*6,'p','MarkerEdgeColor',[1	1	0 ], 'MarkerFaceColor',[1	1	0 ],'MarkerSize',5, 'LineWidth',2)
 35 | % hold on
 36 | % plot(31*6,3*6,'p','MarkerEdgeColor',[1	1	0 ], 'MarkerFaceColor',[1	1	0 ],'MarkerSize',7, 'LineWidth',2)
 37 | % hold on
 38 | % plot(35*6,17*6,'p','MarkerEdgeColor',[1	1	0 ], 'MarkerFaceColor',[1	1	0 ],'MarkerSize',7, 'LineWidth',2)
 39 | % hold on
 40 | % %
 41 | % plot(20*6,35*6,'p','MarkerEdgeColor',[1	1	0 ], 'MarkerFaceColor',[1	1	0 ],'MarkerSize',5, 'LineWidth',2);
 42 | % hold on
 43 | % plot(25*6,37*6,'p','MarkerEdgeColor',[1	1	0 ], 'MarkerFaceColor',[1	1	0 ],'MarkerSize',5, 'LineWidth',2)
 44 | % hold on
 45 | % plot(35*6,37*6,'p','MarkerEdgeColor',[1	1	0 ], 'MarkerFaceColor',[1	1	0 ],'MarkerSize',5, 'LineWidth',2)
 46 | % hold on
 47 | % plot(5*6,23*6,'p','MarkerEdgeColor',[1	1	0 ], 'MarkerFaceColor',[1	1	0 ],'MarkerSize',5, 'LineWidth',2)
 48 | % hold on
 49 | % plot(10*6,25*6,'p','MarkerEdgeColor',[1	1	0 ], 'MarkerFaceColor',[1	1	0 ],'MarkerSize',7, 'LineWidth',2)
 50 | % hold on
 51 | % plot(15*6,25*6,'p','MarkerEdgeColor',[1	1	0 ], 'MarkerFaceColor',[1	1	0 ],'MarkerSize',7, 'LineWidth',2)
 52 | % hold on
 53 | 
 54 | radio_map_=-load("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/mapdata_0717.mat").MARK_PL;
 55 | % radio_map=rot90(radio_map_);
 56 | img=imagesc(radio_map_);%画图
 57 | % axis xy
 58 | %imrotated_img = imrotate(img, 90, 'bilinear');
 59 | colorbar;
 60 | hold on
 61 | % figure(2)
 62 | y1=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短/y_k169_38.txt").';
 63 | x1=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短/x_k169_38.txt").';
 64 | y2=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短/y_k2131_35.txt").';
 65 | x2=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短/x_k2131_35.txt").';
 66 | y3=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短/y_k3255_44.txt").';
 67 | x3=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短/x_k3255_44.txt").';
 68 | 
 69 | 
 70 | y4=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短+SINR/y_k1428_95.txt").';
 71 | x4=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短+SINR/x_k1428_95.txt").';
 72 | y5=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短+SINR/y_k2424_32.txt").';
 73 | x5=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短+SINR/x_k2424_32.txt").';
 74 | y6=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短+SINR/y_k3483_48.txt").';
 75 | x6=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短+SINR/x_k3483_48.txt").';
 76 | p5=plot(50,17,'p','MarkerEdgeColor',[1	0	0], 'MarkerFaceColor',[1	0	0],'MarkerSize',10,'LineWidth',2);
 77 | hold on
 78 | plot(50,50, 'p','MarkerEdgeColor',[1	0	0], 'MarkerFaceColor',[1	0	0],'MarkerSize',10,'LineWidth',2)
 79 | hold on
 80 | plot(50,84, 'p','MarkerEdgeColor',[1	0	0], 'MarkerFaceColor',[1	0	0],'MarkerSize',10,'LineWidth',2)
 81 | hold on
 82 | for i=1:1:length(x1)
 83 |    p1=plot(y1(i),x1(i),'o','MarkerSize',3,'MarkerEdgeColor',[0.93,0.69,0.13], 'MarkerFaceColor',[0.93,0.69,0.13]); 
 84 | end
 85 | for i=1:1:length(x2)
 86 |    plot(y2(i),x2(i),'o','MarkerSize',3,'MarkerEdgeColor',[0.85,0.33,0.10], 'MarkerFaceColor',[0.85,0.33,0.10]) ;
 87 | end
 88 | for i=1:1:length(x3)
 89 |    plot(y3(i),x3(i),'o','MarkerSize',4,'MarkerEdgeColor',[1.00,0.41,0.16], 'MarkerFaceColor',[1.00,0.41,0.16]) ;
 90 | end
 91 | for i=1:1:length(x4)
 92 |    p2=plot(y4(i),x4(i),'^','MarkerSize',3,'MarkerEdgeColor',[0.76,0.43,0.96], 'MarkerFaceColor',[0.76,0.43,0.96]); 
 93 | end
 94 | for i=1:1:length(x5)
 95 |    plot(y5(i),x5(i),'^','MarkerSize',4,'MarkerEdgeColor',[0.40,0.14,0.58], 'MarkerFaceColor',[0.40,0.14,0.58]) 
 96 | end
 97 | for i=1:1:length(x6)
 98 |    plot(y6(i),x6(i),'^','MarkerSize',4,'MarkerEdgeColor',[0.58,0.27,0.78], 'MarkerFaceColor',[0.58,0.27,0.78]) 
 99 | end
100 | hold on
101 | p3=plot(y1(1),x1(1),'+','MarkerEdgeColor',[0.50,0.50,0.50], 'MarkerFaceColor',[0,0,0],'MarkerSize',13, 'LineWidth',3);
102 | hold on
103 | p4=plot(y1(length(x1)),x1(length(x1)),'X','MarkerEdgeColor',[0.50,0.50,0.50], 'MarkerFaceColor',[0,0,0],'MarkerSize',13, 'LineWidth',3);
104 | hold on
105 | plot(y2(1),x2(1),'+','MarkerEdgeColor',[0	0	0], 'MarkerFaceColor',[0	0	0],'MarkerSize',13,'LineWidth',3);
106 | hold on
107 | plot(y2(length(x2)),x2(length(x2)),'X','MarkerEdgeColor',[0	0	0], 'MarkerFaceColor',[0	0	0],'MarkerSize',13,'LineWidth',3)
108 | hold on
109 | plot(y3(1),x3(1),'+','MarkerEdgeColor',[0.31,0.30,0.30], 'MarkerFaceColor',[0.31,0.30,0.30],'MarkerSize',13, 'LineWidth',3);
110 | hold on
111 | plot(y3(length(x3)),x3(length(x3)),'+','MarkerEdgeColor',[0.31,0.30,0.30], 'MarkerFaceColor',[0.31,0.30,0.30],'MarkerSize',13, 'LineWidth',3);
112 | hold on
113 | 
114 | % rot90;
115 | view(-90,90);
116 | axis equal
117 | axis([0 100 0 100]);
118 | legend([p1 p2],{'Distance-Aware','Communication-Aware'},'Location','NorthWest','Interpreter','latex','NumColumns',2)
119 | set(gca,'XTick',0:20:100)
120 | set(gca,'XTickLabel',{'0','10','20','30','40','50'})
121 | set(gca,'YTick',0:20:100)
122 | set(gca,'YTickLabel',{'0','10','20','30','40','50'})
123 | xlabel('$x \ ({\rm  m})$','Interpreter','latex')
124 | ylabel('$y \ ({\rm  m})$','Interpreter','latex')
125 | box on
126 | % grid on
127 | ah=axes('position',get(gca,'position'), 'visible','off');
128 | legend(ah,[p3 p4 p5],{'MU Starting Point ','MU Destination','SU'},'Location','NorthEast','Interpreter','latex','NumColumns',4)
129 | 
130 | 


--------------------------------------------------------------------------------
/tradition_baseline/fig3.m:
--------------------------------------------------------------------------------
  1 | % X = [ 8  8   12   12]*6;
  2 | % Y = [8 12 12 8]*6;
  3 | % Z = [0.5 0.5 0.5 0.5];
  4 | % p6=plot(10*6,10*6,'s','MarkerEdgeColor',[0.41176 0.41176 0.41176], 'MarkerFaceColor',[0.41176 0.41176 0.41176],'MarkerSize',10, 'LineWidth',2);
  5 | % 
  6 | % patch(Y,X,Z,[0.41176 0.41176 0.41176])
  7 | % hold on
  8 | % 
  9 | % X = [ 28  28   32   32]*6;
 10 | % Y = [8 12 12 8]*6;
 11 | % Z = [0.5 0.5 0.5 0.5];
 12 | % 
 13 | % patch(Y,X,Z,[0.41176 0.41176 0.41176])
 14 | % 
 15 | % X = [ 28  28   32   32]*6;
 16 | % Y = [28 32 32 28]*6;
 17 | % Z = [0.5 0.5 0.5 0.5];
 18 | % 
 19 | % patch(Y,X,Z,[0.41176 0.41176 0.41176])
 20 | % 
 21 | % X = [ 8  8   12   12]*6;
 22 | % Y = [28 32 32 28]*6;
 23 | % Z = [0.5 0.5 0.5 0.5];
 24 | % 
 25 | % patch(Y,X,Z,[0.41176 0.41176 0.41176])
 26 | % 
 27 | % view(90,90)
 28 | % p5=plot(0*6,15*6,'p','MarkerEdgeColor',[1	1	0 ], 'MarkerFaceColor',[1	1	0 ],'MarkerSize',5, 'LineWidth',2);
 29 | % hold on
 30 | % plot(5*6,17*6,'p','MarkerEdgeColor',[1	1	0 ], 'MarkerFaceColor',[1	1	0 ],'MarkerSize',5, 'LineWidth',2)
 31 | % hold on
 32 | % plot(13*6,17*6,'p','MarkerEdgeColor',[1	1	0 ], 'MarkerFaceColor',[1	1	0 ],'MarkerSize',5, 'LineWidth',2)
 33 | % hold on
 34 | % plot(23*6,1*6,'p','MarkerEdgeColor',[1	1	0 ], 'MarkerFaceColor',[1	1	0 ],'MarkerSize',5, 'LineWidth',2)
 35 | % hold on
 36 | % plot(31*6,3*6,'p','MarkerEdgeColor',[1	1	0 ], 'MarkerFaceColor',[1	1	0 ],'MarkerSize',7, 'LineWidth',2)
 37 | % hold on
 38 | % plot(35*6,17*6,'p','MarkerEdgeColor',[1	1	0 ], 'MarkerFaceColor',[1	1	0 ],'MarkerSize',7, 'LineWidth',2)
 39 | % hold on
 40 | % %
 41 | % plot(20*6,35*6,'p','MarkerEdgeColor',[1	1	0 ], 'MarkerFaceColor',[1	1	0 ],'MarkerSize',5, 'LineWidth',2);
 42 | % hold on
 43 | % plot(25*6,37*6,'p','MarkerEdgeColor',[1	1	0 ], 'MarkerFaceColor',[1	1	0 ],'MarkerSize',5, 'LineWidth',2)
 44 | % hold on
 45 | % plot(35*6,37*6,'p','MarkerEdgeColor',[1	1	0 ], 'MarkerFaceColor',[1	1	0 ],'MarkerSize',5, 'LineWidth',2)
 46 | % hold on
 47 | % plot(5*6,23*6,'p','MarkerEdgeColor',[1	1	0 ], 'MarkerFaceColor',[1	1	0 ],'MarkerSize',5, 'LineWidth',2)
 48 | % hold on
 49 | % plot(10*6,25*6,'p','MarkerEdgeColor',[1	1	0 ], 'MarkerFaceColor',[1	1	0 ],'MarkerSize',7, 'LineWidth',2)
 50 | % hold on
 51 | % plot(15*6,25*6,'p','MarkerEdgeColor',[1	1	0 ], 'MarkerFaceColor',[1	1	0 ],'MarkerSize',7, 'LineWidth',2)
 52 | % hold on
 53 | 
 54 | radio_map_=-load("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/mapdata_0717.mat").MARK_PL;
 55 | % radio_map=rot90(radio_map_);
 56 | img=imagesc(radio_map_);%画图
 57 | % axis xy
 58 | %imrotated_img = imrotate(img, 90, 'bilinear');
 59 | colorbar;
 60 | hold on
 61 | % figure(2)
 62 | y1=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短/y_k169_38.txt").';
 63 | x1=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短/x_k169_38.txt").';
 64 | y2=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短/y_k2131_35.txt").';
 65 | x2=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短/x_k2131_35.txt").';
 66 | y3=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短/y_k3255_44.txt").';
 67 | x3=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短/x_k3255_44.txt").';
 68 | 
 69 | 
 70 | y4=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短+SINR/y_k1428_95.txt").';
 71 | x4=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短+SINR/x_k1428_95.txt").';
 72 | y5=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短+SINR/y_k2424_32.txt").';
 73 | x5=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短+SINR/x_k2424_32.txt").';
 74 | y6=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短+SINR/y_k3483_48.txt").';
 75 | x6=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短+SINR/x_k3483_48.txt").';
 76 | p6=plot(50,17,'p','MarkerEdgeColor',[1	0	0], 'MarkerFaceColor',[1	0	0],'MarkerSize',10,'LineWidth',2);
 77 | hold on
 78 | plot(50,50, 'p','MarkerEdgeColor',[1	0	0], 'MarkerFaceColor',[1	0	0],'MarkerSize',10,'LineWidth',2)
 79 | hold on
 80 | plot(50,84, 'p','MarkerEdgeColor',[1	0	0], 'MarkerFaceColor',[1	0	0],'MarkerSize',10,'LineWidth',2)
 81 | hold on
 82 | for i=1:1:length(x1)
 83 |    p1=plot(y1(i),x1(i),'o','MarkerSize',3,'MarkerEdgeColor',[0.15,0.15,0.15], 'MarkerFaceColor',[0.15,0.15,0.15]); 
 84 | end
 85 | for i=1:1:length(x2)
 86 |    plot(y2(i),x2(i),'o','MarkerSize',3,'MarkerEdgeColor',[1.00,0.41,0.16], 'MarkerFaceColor',[1.00,0.41,0.16]) ;
 87 | end
 88 | for i=1:1:length(x3)
 89 |    plot(y3(i),x3(i),'o','MarkerSize',4,'MarkerEdgeColor',[ 0.58,0.27,0.78], 'MarkerFaceColor',[ 0.58,0.27,0.78]) ;
 90 | end
 91 | for i=1:1:length(x4)
 92 |    p2=plot(y4(i),x4(i),'^','MarkerSize',3,'MarkerEdgeColor',[0.24,0.24,0.24], 'MarkerFaceColor',[0.24,0.24,0.24]); 
 93 | end
 94 | for i=1:1:length(x5)
 95 |    plot(y5(i),x5(i),'^','MarkerSize',4,'MarkerEdgeColor',[0.85,0.33,0.10], 'MarkerFaceColor',[0.85,0.33,0.10]) 
 96 | end
 97 | for i=1:1:length(x6)
 98 |    plot(y6(i),x6(i),'^','MarkerSize',4,'MarkerEdgeColor',[0.40,0.14,0.58], 'MarkerFaceColor',[ 0.40,0.14,0.58]) 
 99 | end
100 | hold on
101 | p4=plot(y1(1),x1(1),'+','MarkerEdgeColor',[0.50,0.50,0.50], 'MarkerFaceColor',[0,0,0],'MarkerSize',13, 'LineWidth',3);
102 | hold on
103 | p5=plot(y1(length(x1)),x1(length(x1)),'X','MarkerEdgeColor',[0.50,0.50,0.50], 'MarkerFaceColor',[0,0,0],'MarkerSize',13, 'LineWidth',3);
104 | hold on
105 | plot(y2(1),x2(1),'+','MarkerEdgeColor',[0	0	0], 'MarkerFaceColor',[0	0	0],'MarkerSize',13,'LineWidth',3);
106 | hold on
107 | plot(y2(length(x2)),x2(length(x2)),'X','MarkerEdgeColor',[0	0	0], 'MarkerFaceColor',[0	0	0],'MarkerSize',13,'LineWidth',3)
108 | hold on
109 | plot(y3(1),x3(1),'+','MarkerEdgeColor',[0.31,0.30,0.30], 'MarkerFaceColor',[0.31,0.30,0.30],'MarkerSize',13, 'LineWidth',3);
110 | hold on
111 | plot(y3(length(x3)),x3(length(x3)),'+','MarkerEdgeColor',[0.31,0.30,0.30], 'MarkerFaceColor',[0.31,0.30,0.30],'MarkerSize',13, 'LineWidth',3);
112 | hold on
113 | 
114 | fileID = fopen('C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/response_code/fig3/A算法/robot_path_1.txt', 'r');
115 | % fileID="C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/response_code/fig3/A算法/robot_path_1.txt";
116 | data = textscan(fileID, '%f32,%f32', 'Delimiter', '，');
117 | x1=data{1};
118 | y1=data{2};
119 | fclose(fileID);
120 | fileID = fopen('C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/response_code/fig3/A算法/robot_path_2.txt', 'r');
121 | data = textscan(fileID, '%f,%f', 'Delimiter', '，');
122 | x2=data{1};
123 | y2=data{2};
124 | fclose(fileID);
125 | fileID = fopen('C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/response_code/fig3/A算法/robot_path_3.txt', 'r');
126 | data = textscan(fileID, '%f,%f', 'Delimiter', '，');
127 | x3=data{1};
128 | y3=data{2};
129 | fclose(fileID);
130 | for i=1:2:length(x1)
131 |    p3=plot(y1(i),x1(i),'*','MarkerSize',5,'MarkerEdgeColor',[0.50,0.50,0.50], 'MarkerFaceColor',[0.50,0.50,0.50]); 
132 | end
133 | for i=1:2:length(x2)
134 |    plot(y2(i),x2(i),'*','MarkerSize',6,'MarkerEdgeColor',[  0.93,0.69,0.13], 'MarkerFaceColor',[ 0.93,0.69,0.13 ]) ;
135 | end
136 | for i=1:2:length(x3)
137 |    plot(y3(i),x3(i),'*','MarkerSize',5,'MarkerEdgeColor',[ 0.76,0.43,0.96], 'MarkerFaceColor',[0.76,0.43,0.96]) ;
138 | end
139 | 
140 | % rot90;
141 | view(-90,90);
142 | axis equal
143 | axis([0 100 0 100]);
144 | legend([p1 p2 p3],{'Distance-Aware MA-DDPG ($\kappa_1=0$)','Communication-Aware MA-DDPG ($\kappa_1=0.1$)', '$A^{\ast}$ Search Algorithm'},'Location','NorthWest','Interpreter','latex','NumColumns',2)
145 | set(gca,'XTick',0:20:100)
146 | set(gca,'XTickLabel',{'0','10','20','30','40','50'})
147 | set(gca,'YTick',0:20:100)
148 | set(gca,'YTickLabel',{'0','10','20','30','40','50'})
149 | xlabel('$x \ ({\rm  m})$','Interpreter','latex')
150 | ylabel('$y \ ({\rm  m})$','Interpreter','latex')
151 | box on
152 | % grid on
153 | ah=axes('position',get(gca,'position'), 'visible','off');
154 | legend(ah,[p4 p5 p6],{'MU Starting Point ','MU Destination','SU'},'Location','NorthEast','Interpreter','latex','NumColumns',4)
155 | 
156 | 


--------------------------------------------------------------------------------
/algorithm/MA-PPO_main.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from tensorflow import keras
  3 | from keras.layers import *
  4 | import numpy as np
  5 | import gym
  6 | import matplotlib.pyplot as plt
  7 | import math
  8 | from scipy.io import loadmat
  9 | np.random.seed(2)
 10 | tf.random.set_seed(2)
 11 | 
 12 | EP_MAX = 500
 13 | BATCH = 32
 14 | EP_LEN = 1000
 15 | GAMMA = 0.9
 16 | A_LR = 0.0001
 17 | C_LR = 0.0005
 18 | 
 19 | A_UPDATE_STEPS = 20
 20 | C_UPDATE_STEPS = 20
 21 | S_DIM, A_DIM = 2, 10
 22 | epsilon=0.2
 23 | 
 24 | n_width=100
 25 | n_height = 100
 26 | m = loadmat("mapdata_0717.mat") 
 27 | #correct_action=0
 28 | MARK= m["MARK_new"]
 29 | PL_AP=m["MARK_PL_real"]
 30 | n_mu=3
 31 | n_M=5
 32 | n_o=6*7
 33 | a_bound=1
 34 | 
 35 | class PPO(object):
 36 | 
 37 |     def __init__(self):
 38 |         self.opt_a = tf.compat.v1.train.AdamOptimizer(A_LR)
 39 |         self.opt_c = tf.compat.v1.train.AdamOptimizer(C_LR)
 40 | 
 41 |         self.model_a = self._build_anet(trainable=True)
 42 |         self.model_a_old = self._build_anet(trainable=False)
 43 |         self.model_c = self._build_cnet()
 44 | 
 45 |     def _build_anet(self,trainable=True):
 46 |         tfs_a = Input([S_DIM], )
 47 |         l1 = Dense(100, 'relu',trainable=trainable)(tfs_a)
 48 |         mu = a_bound * Dense(A_DIM, 'tanh',trainable=trainable)(l1)
 49 |         sigma = Dense(A_DIM, 'softplus',trainable=trainable)(l1)
 50 |         model_a = keras.models.Model(inputs=tfs_a, outputs=[mu, sigma])
 51 |         return model_a
 52 | 
 53 |     def _build_cnet(self):
 54 |         tfs_c = Input([S_DIM], )
 55 |         l1 = Dense(100, 'relu')(tfs_c)
 56 |         v = Dense(1)(l1)
 57 |         model_c = keras.models.Model(inputs=tfs_c, outputs=v)
 58 |         model_c.compile(optimizer=self.opt_c, loss='mse')
 59 |         return model_c
 60 | 
 61 |     def update(self, s, a, r):
 62 |         self.model_a_old.set_weights(self.model_a.get_weights())
 63 | 
 64 |         mu, sigma = self.model_a_old(s)
 65 |         oldpi = tf.compat.v1.distributions.Normal(loc=mu, scale=sigma)
 66 |         old_prob_a = oldpi.prob(a)
 67 | 
 68 |         v = self.get_v(s)
 69 |         adv = r - v
 70 | 
 71 |         for i in range(A_UPDATE_STEPS):
 72 |             with tf.GradientTape() as tape:
 73 |                 mu, sigma = self.model_a(s)
 74 |                 pi = tf.compat.v1.distributions.Normal(loc=mu, scale=sigma)
 75 |                 ratio = pi.prob(a) / (old_prob_a + 1e-5)
 76 |                 surr = ratio * adv
 77 |                 x2 = tf.clip_by_value(ratio, 1. - epsilon, 1. + epsilon) * adv
 78 |                 x3 = tf.minimum(surr, x2)
 79 |                 aloss = -tf.reduce_mean(x3)
 80 | 
 81 |             a_grads = tape.gradient(aloss, self.model_a.trainable_weights)
 82 |             a_grads_and_vars = zip(a_grads, self.model_a.trainable_weights)
 83 |             self.opt_a.apply_gradients(a_grads_and_vars)
 84 | 
 85 |         self.model_c.fit(s, r, verbose=0, shuffle=False,epochs=C_UPDATE_STEPS)
 86 | 
 87 |     def choose_action(self, s):
 88 |         mu, sigma = self.model_a(s)
 89 |         pi = tf.compat.v1.distributions.Normal(loc=mu, scale=sigma)
 90 |         a = tf.squeeze(pi.sample(1), axis=0)
 91 |         return np.clip(a, -2, 2)
 92 | 
 93 |     def get_v(self, s):
 94 |         v = self.model_c(s)
 95 |         return v
 96 | 
 97 | def cosVector(x,y):
 98 |     result1=0.0;
 99 |     result2=0.0;
100 |     result3=0.0;
101 |     for i in range(len(x)):
102 |         result1+=x[i]*y[i]   #sum(X*Y)
103 |         result2+=x[i]**2     #sum(X*X)
104 |         result3+=y[i]**2     #sum(Y*Y)
105 |     return result1/((result2*result3)**0.5)
106 | 
107 | ppo = PPO()
108 | end_location = [15*2,32*2]
109 | all_ep_r = []
110 | all_ep_reward_p=[]
111 | for ep in range(EP_MAX):                    #train
112 |     s = np.array([4*2,5*2], dtype=np.float32) 
113 |     buffer_s, buffer_a, buffer_r = [], [], []
114 |     ep_r = 0
115 |     done1 = False
116 |     distance_01_max=math.sqrt((s[0]-end_location[0])*(s[0]-end_location[0])/4+(s[1]-end_location[1])*(s[1]-end_location[1])/4)
117 | 
118 |     s = np.reshape(s, (-1, S_DIM))
119 |     observation_su1 = np.array([17, 25*2], dtype=np.float32)    
120 |     for t in range(EP_LEN):  # in one episode
121 |         a = ppo.choose_action(s)
122 |         if not done1:
123 |             [old_x, old_y] = s[0]
124 |             new_x, new_y = int(old_x), int(old_y)
125 |             new_x=int(old_x+a[0,0])
126 |             new_y=int(old_y+a[0,1])
127 |             if int(new_x) <= 0: 
128 |                 new_x = 1
129 |             if int(new_x) >= n_width: 
130 |                 new_x = int(n_width)-1
131 |             if int(new_y) <= 0: 
132 |                 new_y = 1
133 |             if int(new_y) >= n_height: 
134 |                 new_y = int(n_height)-1
135 |             if MARK[new_x,new_y] == 2:
136 |                 new_x, new_y = old_x, old_y
137 |             s_=np.array([new_x, new_y], dtype=np.float32)
138 |         else:
139 |             s_ = s
140 |         a=a[0]
141 |         if a[8]==-1:
142 |             a[8]=-0.9999999
143 |         # if action2[8]==-1:
144 |         #     action2[8]=-0.9999999
145 |         # if action3[8]==-1:
146 |         #     action3[8]=-0.9999999
147 |         if a[8]==1:
148 |             a[8]=0.9999999
149 |         
150 |         w_1=np.array([a[2]* math.exp(1)**(1j*(1+a[3])*math.pi), a[4]* math.exp(1)**(1j*(1+a[5])*math.pi), a[6]* math.exp(1)**(1j*(1+a[7])*math.pi)])
151 |         # w_2=np.array([action2[2]* math.exp(1)**(1j*(1+action2[3])*math.pi), action2[4]* math.exp(1)**(1j*(1+action2[5])*math.pi), action2[6]* math.exp(1)**(1j*(1+action2[7])*math.pi)])
152 |         # w_3=np.array([action3[2]* math.exp(1)**(1j*(1+action3[3])*math.pi), action3[4]* math.exp(1)**(1j*(1+action3[5])*math.pi), action3[6]* math.exp(1)**(1j*(1+action3[7])*math.pi)])
153 |         theta_1=cosVector([1,0,0],[s_[0]-50,s_[1]-100, 1-2])
154 |         a_1=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_1)), math.exp(1)**(-2*1j*(math.pi*theta_1))])#
155 |         b_1_AP_LOS=math.sqrt(PL_AP[int(s_[0]), int(s_[1])])
156 |         h_1=b_1_AP_LOS*a_1
157 |         interference_1=10**(-9)
158 |         # theta_2=cosVector([1,0,0],[observation2_[0]-50,observation2_[1]-100, 1-2])
159 |         # a_2=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_2)), math.exp(1)**(-2*1j*(math.pi*theta_2))])#
160 |         # b_2_AP_LOS=math.sqrt(PL_AP[int(observation2_[0]), int(observation2_[1])])
161 |         # h_2=b_2_AP_LOS*a_2
162 |         # interference_2=10**(-9)
163 |         # theta_3=cosVector([1,0,0],[observation3_[0]-50,observation3_[1]-100, 1-2])
164 |         # a_3=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_3)), math.exp(1)**(-2*1j*(math.pi*theta_3))])#
165 |         # b_3_AP_LOS=math.sqrt(PL_AP[int(observation3_[0]), int(observation3_[1])])
166 |         # h_3=b_3_AP_LOS*a_3
167 |         # interference_3=10**(-9)
168 |         theta_4=cosVector([1,0,0],[observation_su1[0]-50,observation_su1[1]-100, 1-2])
169 |         a_4=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_4)), math.exp(1)**(-2*1j*(math.pi*theta_4))])#
170 |         b_4_AP_LOS=math.sqrt(PL_AP[int(observation_su1[0]), int(observation_su1[1])])
171 |         h_4=b_4_AP_LOS*a_4
172 |         interference_4=10**(-9)
173 |         
174 |         if a[8]>0:
175 |             interference_1+=(1-(a[8]+1)/2)*(np.linalg.norm(h_1*w_1))**2
176 |         else:
177 |             interference_4+=((a[8]+1)/2)*(np.linalg.norm(h_4*w_1))**2
178 |             
179 |         SINR_1=((a[8]+1)/2)*(np.linalg.norm(h_1*w_1))**2/interference_1
180 |          # SINR_2=((action2[8]+1)/2)*(np.linalg.norm(h_2*w_2))**2/interference_2
181 |          # SINR_3=((action3[8]+1)/2)*(np.linalg.norm(h_3*w_3))**2/interference_3
182 |         SINR_4=(1-(a[8]+1)/2)*(np.linalg.norm(h_4*w_1))**2/interference_4
183 | 
184 |         
185 |         buffer_s.append(s)
186 |         buffer_a.append(a)
187 |         distance_01_2=(s_[0]-end_location[0])*(s_[0]-end_location[0])/4+(s_[1]-end_location[1])*(s_[1]-end_location[1])/4
188 |         distance_01 = math.sqrt(distance_01_2)
189 |         s_ = np.reshape(s_, (-1, S_DIM))
190 |         r= -(distance_01/50)
191 |         if distance_01==0:
192 |             done1 = True
193 |             #os.system("pause")
194 |             r=1
195 |         r = np.reshape(r, (-1, 1))
196 |         buffer_r.append(r)  # normalize reward, find to be useful
197 |         a = np.reshape(a, (-1, A_DIM))
198 |         s_ = np.reshape(s_, (-1, S_DIM))
199 |         s = s_
200 |         ep_r += r[0]
201 | 
202 |         # update ppo
203 |         if (t + 1) % BATCH == 0 or t == EP_LEN - 1 or done1:
204 |             #print("here")
205 |             v_s_ = ppo.get_v(s_)[0,0]
206 |             discounted_r = []
207 |             for r in buffer_r[::-1]:
208 |                 v_s_ = r + GAMMA * v_s_
209 |                 discounted_r.append(v_s_)
210 |             discounted_r.reverse()
211 | 
212 |             bs = np.vstack(buffer_s)
213 |             ba = np.vstack(buffer_a)
214 |             br = np.array(discounted_r)
215 |             buffer_s, buffer_a, buffer_r = [], [], []
216 |             ppo.update(bs, ba, br)
217 |         if done1:
218 |             print("success!!!!!!!!!!!!")
219 |             break
220 |     if ep == 0:
221 |         # all_ep_r.append(ep_r)
222 |         all_ep_reward_p.append(ep_r)
223 |     else:
224 |         # all_ep_r.append(all_ep_r[-1] * 0.9 + ep_r * 0.1)
225 |         all_ep_reward_p.append(all_ep_reward_p[-1] * 0.9 + ep_r * 0.1)
226 |     print(
227 |         'Ep: %i' % ep,
228 |         "|Ep_r: %i" % ep_r,
229 |     )
230 | 
231 | plt.plot(all_ep_reward_p)
232 | 
233 | 
234 | 
235 | # while 1:                        #play
236 | #     s = env.reset()
237 | #     for t in range(EP_LEN):
238 | #         s = s.reshape([-1, S_DIM])
239 | #         env.render()
240 | #         s, r, done, info = env.step(ppo.choose_action(s))
241 | #         if done:
242 | #             break
243 | 


--------------------------------------------------------------------------------
/tradition_baseline/pso.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from scipy.special import erfcinv
  3 | import numpy as np
  4 | from sklearn.cluster import KMeans
  5 | import matplotlib.pyplot as plt
  6 | from scipy.io import loadmat
  7 | import math
  8 | import numpy as np
  9 | from scipy.special import erfc
 10 | 
 11 | m = loadmat("C://Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/mapdata_0717.mat") 
 12 | #correct_action=0
 13 | MARK= m["MARK_new"]
 14 | PL_AP=m["MARK_PL_real"]
 15 | 
 16 | def cosVector(x,y):
 17 |     result1=0.0;
 18 |     result2=0.0;
 19 |     result3=0.0;
 20 |     for i in range(len(x)):
 21 |         result1+=x[i]*y[i]   #sum(X*Y)
 22 |         result2+=x[i]**2     #sum(X*X)
 23 |         result3+=y[i]**2     #sum(Y*Y)
 24 |     return result1/((result2*result3)**0.5)
 25 | 
 26 | # 高斯Q函数的逆函数
 27 | def Q_inv(x):
 28 |     return np.sqrt(2) * erfcinv(2 * x)
 29 | 
 30 | # 计算V(γk(t))
 31 | def V(gamma):
 32 |     # 根据具体公式定义V(γk(t))
 33 |     return gamma  # Placeholder, replace with actual formula if necessary
 34 | 
 35 | # 目标函数
 36 | def objective_function(w, num_points, A1, A2, h_k, sigma2, M, D):
 37 |     # 计算gamma_k(t)
 38 |     term_tol=0
 39 |     db_tol=0
 40 |     for i in range(num_points):
 41 |         gamma_k_t = np.abs(A1[i] @ w)**2 / (i * np.abs(A1[i] @ w)**2 + sigma2)
 42 |         
 43 |         # 计算目标函数
 44 |         term1 = np.log(2) * np.sqrt(M / (1-(1+gamma_k_t)**(-2)))
 45 |         term2 = np.log2(1 + gamma_k_t) - D / M
 46 |         term_tol=term1 * term2
 47 |         db_tol+=math.log10(max(1-0.5 * erfc(term_tol / np.sqrt(2)),10**(-20)))
 48 |     
 49 |     return (db_tol/num_points)
 50 | 
 51 | import random
 52 | 
 53 | class Particle:
 54 |     def __init__(self, dimension):
 55 |         self.position = np.random.rand(dimension)
 56 |         self.velocity = np.random.rand(dimension) - 0.5
 57 |         self.best_position = self.position.copy()
 58 |         self.best_score = -np.inf
 59 | 
 60 |     def update_velocity(self, global_best_position, inertia_weight=0.5, cognitive_coeff=2, social_coeff=2):
 61 |         cognitive_component = cognitive_coeff * random.random() * (self.best_position - self.position)
 62 |         social_component = social_coeff * random.random() * (global_best_position - self.position)
 63 |         self.velocity = inertia_weight * self.velocity + cognitive_component + social_component
 64 | 
 65 |     def update_position(self):
 66 |         self.position += self.velocity
 67 |         self.position = np.clip(self.position, 0, 1)  # Ensure within bounds
 68 | 
 69 | class PSO:
 70 |     def __init__(self, objective_function, dimension, swarm_size=30, iterations=100):
 71 |         self.objective_function = objective_function
 72 |         self.dimension = dimension
 73 |         self.swarm_size = swarm_size
 74 |         self.iterations = iterations
 75 |         self.swarm = [Particle(dimension) for _ in range(swarm_size)]
 76 |         self.global_best_position = np.random.rand(dimension)
 77 |         self.global_best_score = -np.inf
 78 | 
 79 |     def optimize(self, *args):
 80 |         for iteration in range(self.iterations):
 81 |             for particle in self.swarm:
 82 |                 # print(particle.position)
 83 |                 score = self.objective_function(particle.position, *args)
 84 |                 if score > particle.best_score:
 85 |                     particle.best_score = score
 86 |                     particle.best_position = particle.position.copy()
 87 | 
 88 |                 if score > self.global_best_score:
 89 |                     self.global_best_score = score
 90 |                     self.global_best_position = particle.position.copy()
 91 | 
 92 |             for particle in self.swarm:
 93 |                 particle.update_velocity(self.global_best_position)
 94 |                 particle.update_position()
 95 | 
 96 |             # print(f"Iteration {iteration + 1}/{self.iterations}, Best Score: {self.global_best_score}")
 97 | 
 98 |         return self.global_best_position, self.global_best_score
 99 | 
100 | for k in range(3):
101 |     import matplotlib.pyplot as plt
102 |     exec('''path_file = 'C://Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/response_code/fig3/A算法/robot_path_{}.txt' '''.format(k+1))
103 |     with open(path_file, 'r') as f:
104 |         lines = f.readlines()
105 | 
106 |     # 解析路径数据
107 |     path = [(int(line.split(',')[0]), int(line.split(',')[1])) for line in lines]
108 | 
109 |     # 提取 x 和 y 坐标
110 |     exec('''x_coords_{} = [point[0] for point in path]'''.format(k+1))
111 |     exec('''y_coords_{} = [point[1] for point in path]'''.format(k+1))
112 | 
113 | def calculate_interference(coords, labels, cluster_num):
114 |     interference = 0
115 |     cluster_points = coords[labels == cluster_num]
116 |     num_points = len(cluster_points)
117 |     for i in range(num_points):
118 |         for j in range(i + 1, num_points):
119 |             interference += np.linalg.norm(cluster_points[i] - cluster_points[j])
120 |     return interference
121 | 
122 | power_array=[math.sqrt(0.02), math.sqrt(0.04), math.sqrt(0.06), math.sqrt(0.08),math.sqrt(0.1)]
123 | for power_j in range(len(power_array)):
124 |     w = np.random.rand(3)
125 |     for t in range(77):
126 |         # print('time+++++++++')
127 |         # for cl in range(1):    
128 |         # for cl in range(2):
129 |         # # 假设三个机器人的坐标
130 |         robot_coords = np.array([[x_coords_1[t], y_coords_1[t]], [x_coords_2[t], y_coords_2[t]]])
131 |         
132 |         # 假设三个固定用户的坐标
133 |         user_coords = np.array([[17, 50], [50, 50], [84, 50]])
134 |         
135 |         # 合并所有坐标
136 |         all_coords = np.vstack((robot_coords, user_coords))
137 |         
138 |         # 定义 KMeans 模型
139 |         kmeans = KMeans(n_clusters=3)
140 |         
141 |         # 使用所有坐标进行聚类
142 |         kmeans.fit(all_coords)
143 |         
144 |         # 获取聚类结果
145 |         labels = kmeans.labels_
146 |         
147 |         # 获取聚类中心
148 |         cluster_centers = kmeans.cluster_centers_
149 |         
150 |         # 打印结果
151 |         # print("Labels:", labels)
152 |         # print("Cluster centers:", cluster_centers)
153 |         
154 |         # 绘制聚类结果
155 |         # plt.scatter(all_coords[:, 0], all_coords[:, 1], c=labels, cmap='viridis')
156 |         # plt.scatter(cluster_centers[:, 0], cluster_centers[:, 1], s=300, c='red', marker='x')
157 |         # plt.xlabel('X')
158 |         # plt.ylabel('Y')
159 |         # plt.title('KMeans Clustering of Robots and Users')
160 |         # plt.show()
161 |         
162 |         robot_powers = np.array([10, 20, 30, 10, 20])
163 |         theta_1=cosVector([1,0,0],[all_coords[0][0]-50,all_coords[0][1]-100, 1-2])
164 |         aLP_1=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_1)), math.exp(1)**(-2*1j*(math.pi*theta_1))])#
165 |         b_1_AP_LOS=math.sqrt(PL_AP[int(all_coords[0][0]), int(all_coords[0][1])])
166 |         h_1=b_1_AP_LOS*aLP_1
167 |         interference_1=10**(-9)
168 |         theta_2=cosVector([1,0,0],[all_coords[1][0]-50,all_coords[1][1]-100, 1-2])
169 |         aLP_2=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_2)), math.exp(1)**(-2*1j*(math.pi*theta_2))])#
170 |         b_2_AP_LOS=math.sqrt(PL_AP[int(all_coords[1][0]), int(all_coords[1][1])])
171 |         h_2=b_2_AP_LOS*aLP_2
172 |         interference_2=10**(-9)
173 |         theta_3=cosVector([1,0,0],[all_coords[2][0]-50,all_coords[2][1]-100, 1-2])
174 |         aLP_3=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_3)), math.exp(1)**(-2*1j*(math.pi*theta_3))])#
175 |         b_3_AP_LOS=math.sqrt(PL_AP[int(all_coords[2][0]), int(all_coords[2][1])])
176 |         h_3=b_3_AP_LOS*aLP_3
177 |         interference_3=10**(-9)
178 |         theta_4=cosVector([1,0,0],[all_coords[3][0]-50,all_coords[3][1]-100, 1-2])
179 |         a_4=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_4)), math.exp(1)**(-2*1j*(math.pi*theta_4))])#
180 |         b_4_AP_LOS=math.sqrt(PL_AP[int(all_coords[3][0]), int(all_coords[3][1])])
181 |         h_4=b_4_AP_LOS*a_4
182 |         interference_4=10**(-9)
183 |         theta_5=cosVector([1,0,0],[all_coords[4][0]-50,all_coords[4][1]-100, 1-2])
184 |         a_5=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_5)), math.exp(1)**(-2*1j*(math.pi*theta_5))])#
185 |         b_5_AP_LOS=math.sqrt(PL_AP[int(all_coords[4][0]), int(all_coords[4][1])])
186 |         h_5=b_5_AP_LOS*a_5
187 |         interference_5=10**(-9)
188 |         # theta_6=cosVector([1,0,0],[all_coords[5][0]-50,all_coords[5][1]-100, 1-2])
189 |         # a_6=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_6)), math.exp(1)**(-2*1j*(math.pi*theta_6))])#
190 |         # b_6_AP_LOS=math.sqrt(PL_AP[int(all_coords[5][0]), int(all_coords[5][1])])
191 |         # h_6=b_6_AP_LOS*a_6
192 |         # interference_6=10**(-9)
193 |         H_array=[]
194 |         H_array.append(h_1)
195 |         H_array.append(h_2)
196 |         H_array.append(h_3)
197 |         H_array.append(h_4)
198 |         H_array.append(h_5)
199 |         # H_array.append(h_6)
200 |         H_array=np.array(H_array)
201 |         
202 |         for r in range(len(robot_powers)):
203 |             robot_powers[r]=np.abs(H_array[r] @ w*power_array[power_j]/math.sqrt(3))**2
204 |         
205 |         #
206 |         num_clusters = 3
207 |         interference_list = []
208 |         
209 |         gamma_avg=0
210 |         gamma_array=np.zeros(3)
211 |         
212 |         for cluster_num in range(num_clusters):
213 |             # interference = calculate_interference(all_coords, labels, cluster_num)
214 |             w = np.random.rand(3)
215 |             previous_A = w
216 |             counter = 0
217 |             for episode in range(500):
218 |                 interference = 0
219 |                 cluster_points = all_coords[labels == cluster_num]
220 |                 cluster_powers = robot_powers[labels == cluster_num]
221 |                 
222 |                 # 根据功率对簇内的用户进行排序，功率大的优先
223 |                 sorted_indices = np.argsort(-cluster_powers)
224 |                 sorted_points = cluster_points[sorted_indices]
225 |                 # sorted_powers = cluster_powers[sorted_indices]
226 |                 cluster_hk = H_array[labels == cluster_num]
227 |                 # cluster_wk = wk[labels == cluster_num]
228 |                 
229 |                 # for i_j in range(len(sorted_points)):
230 |                 #     sorted_powers
231 |                 
232 |                 num_points = len(sorted_points)
233 |                 # for i in range(num_points):
234 |                 interference = 0
235 |                 A1 = cluster_hk * power_array[power_j]/math.sqrt(3)
236 |                 A2 = 1
237 |                 h_k = H_array  # 示例信道向量
238 |                 # K_c = [0, 1, 2, 3]  # 示例其他用户索引
239 |                 sigma2 = 10**(-9)
240 |                 M = 50
241 |                 D = 100
242 |                 
243 |                 # 粒子群优化
244 |                 pso = PSO(objective_function, dimension=3, swarm_size=30, iterations=100)
245 |                 w, best_score = pso.optimize(num_points, A1, A2, h_k, sigma2, M, D)
246 |                     
247 |                     
248 |                     # sinr = np.abs(A1 @ w)**2 / (i * np.abs(A1 @ w)**2 + sigma2)
249 |     
250 |                     # print("最佳位置（波束成形向量）:", best_position)
251 |                     # if episode==99:
252 |                 if w.all() == previous_A.all():
253 |                     counter += 1
254 |                 else:
255 |                     counter = 0 
256 |                     
257 |                 previous_A = w
258 | 
259 |                 # 如果A持续十轮不变，则跳出循环
260 |                 if counter >= 10:
261 |                     # print(cluster_num, 'sucess!!!!!!!!!!!!')
262 |                     break
263 |             
264 |                 
265 |             
266 |             for i in range(num_points):
267 |                 gamma_k_t = np.abs(A1[i] @ w)**2 / (i * np.abs(A1[i] @ w)**2 + sigma2)
268 |                 gamma_avg+=gamma_k_t
269 |             gamma_k_t=gamma_k_t#/num_points
270 |             
271 |             gamma_array[cluster_num] = best_score
272 |             # print("最佳得分:", best_score)#math.log10(max(1-0.5 * erfc(best_score / np.sqrt(2)),10**(-20))))
273 |         
274 |         filename='DB_NOMA_new_'+str(power_j)+'.txt'
275 |         with open (filename, 'a') as fileobject:
276 |             fileobject.write(str((gamma_array[0]+gamma_array[1]+gamma_array[2])/6)+'\n')
277 | 
278 |         
279 |                 # for j in range(0, i):
280 |                 #     # 计算干扰，假设干扰与距离成反比
281 |                 #     interference += np.abs(H_array[i] @ w*power_array[power_j]/math.sqrt(3))**2
282 |                 
283 |                 # sinr += sorted_powers[i] / interference  # 假设干扰公式
284 |             # return interference
285 |             # interference_list.append(interference)
286 |         
287 |         # 打印每个簇的干扰
288 |         # for i, interference in enumerate(interference_list):
289 |         #     print(f"Cluster {i} interference: {interference}")
290 |         
291 |         
292 |             # 示例参数
293 |             


--------------------------------------------------------------------------------
/environment/environment.yaml:
--------------------------------------------------------------------------------
  1 | name: base
  2 | channels:
  3 |   - https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/pytorch
  4 |   - https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/pytorch/
  5 |   - https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/msys2/
  6 |   - defaults
  7 |   - conda-forge
  8 | dependencies:
  9 |   - alabaster=0.7.12=pyhd3eb1b0_0
 10 |   - anaconda-client=1.11.2=py310haa95532_0
 11 |   - anaconda-navigator=2.4.0=py310haa95532_0
 12 |   - anaconda-project=0.11.1=py310haa95532_0
 13 |   - anyio=3.5.0=py310haa95532_0
 14 |   - appdirs=1.4.4=pyhd3eb1b0_0
 15 |   - argon2-cffi=21.3.0=pyhd3eb1b0_0
 16 |   - argon2-cffi-bindings=21.2.0=py310h2bbff1b_0
 17 |   - arrow=1.2.3=py310haa95532_1
 18 |   - astroid=2.14.2=py310haa95532_0
 19 |   - astropy=5.1=py310h9128911_0
 20 |   - asttokens=2.0.5=pyhd3eb1b0_0
 21 |   - atomicwrites=1.4.0=py_0
 22 |   - attrs=22.1.0=py310haa95532_0
 23 |   - automat=20.2.0=py_0
 24 |   - autopep8=1.6.0=pyhd3eb1b0_1
 25 |   - babel=2.11.0=py310haa95532_0
 26 |   - backcall=0.2.0=pyhd3eb1b0_0
 27 |   - backports=1.1=pyhd3eb1b0_0
 28 |   - backports.functools_lru_cache=1.6.4=pyhd3eb1b0_0
 29 |   - backports.tempfile=1.0=pyhd3eb1b0_1
 30 |   - backports.weakref=1.0.post1=py_1
 31 |   - bcrypt=3.2.0=py310h2bbff1b_1
 32 |   - beautifulsoup4=4.11.1=py310haa95532_0
 33 |   - binaryornot=0.4.4=pyhd3eb1b0_1
 34 |   - black=22.6.0=py310haa95532_0
 35 |   - blas=1.0=mkl
 36 |   - bleach=4.1.0=pyhd3eb1b0_0
 37 |   - blosc=1.21.3=h6c2663c_0
 38 |   - bokeh=2.4.3=py310haa95532_0
 39 |   - boltons=23.0.0=py310haa95532_0
 40 |   - bottleneck=1.3.5=py310h9128911_0
 41 |   - brotli=1.0.9=h2bbff1b_7
 42 |   - brotli-bin=1.0.9=h2bbff1b_7
 43 |   - brotlipy=0.7.0=py310h2bbff1b_1002
 44 |   - bzip2=1.0.8=he774522_0
 45 |   - ca-certificates=2023.05.30=haa95532_0
 46 |   - certifi=2023.5.7=py310haa95532_0
 47 |   - cffi=1.15.1=py310h2bbff1b_3
 48 |   - cfitsio=3.470=h2bbff1b_7
 49 |   - chardet=4.0.0=py310haa95532_1003
 50 |   - charls=2.2.0=h6c2663c_0
 51 |   - charset-normalizer=2.0.4=pyhd3eb1b0_0
 52 |   - click=8.0.4=py310haa95532_0
 53 |   - cloudpickle=2.0.0=pyhd3eb1b0_0
 54 |   - clyent=1.2.2=py310haa95532_1
 55 |   - colorama=0.4.6=py310haa95532_0
 56 |   - colorcet=3.0.1=py310haa95532_0
 57 |   - comm=0.1.2=py310haa95532_0
 58 |   - conda=23.3.1=py310haa95532_0
 59 |   - conda-build=3.24.0=py310haa95532_0
 60 |   - conda-content-trust=0.1.3=py310haa95532_0
 61 |   - conda-pack=0.6.0=pyhd3eb1b0_0
 62 |   - conda-package-handling=2.0.2=py310haa95532_0
 63 |   - conda-package-streaming=0.7.0=py310haa95532_0
 64 |   - conda-repo-cli=1.0.41=py310haa95532_0
 65 |   - conda-token=0.4.0=pyhd3eb1b0_0
 66 |   - conda-verify=3.4.2=py_1
 67 |   - console_shortcut=0.1.1=4
 68 |   - constantly=15.1.0=py310haa95532_0
 69 |   - contourpy=1.0.5=py310h59b6b97_0
 70 |   - cookiecutter=1.7.3=pyhd3eb1b0_0
 71 |   - cryptography=39.0.1=py310h21b164f_0
 72 |   - cssselect=1.1.0=pyhd3eb1b0_0
 73 |   - curl=7.87.0=h2bbff1b_0
 74 |   - cycler=0.11.0=pyhd3eb1b0_0
 75 |   - cytoolz=0.12.0=py310h2bbff1b_0
 76 |   - daal4py=2023.0.2=py310hf497b98_0
 77 |   - dal=2023.0.1=h59b6b97_26646
 78 |   - dask=2022.7.0=py310haa95532_0
 79 |   - dask-core=2022.7.0=py310haa95532_0
 80 |   - datashader=0.14.4=py310haa95532_0
 81 |   - datashape=0.5.4=py310haa95532_1
 82 |   - debugpy=1.5.1=py310hd77b12b_0
 83 |   - decorator=5.1.1=pyhd3eb1b0_0
 84 |   - defusedxml=0.7.1=pyhd3eb1b0_0
 85 |   - diff-match-patch=20200713=pyhd3eb1b0_0
 86 |   - dill=0.3.6=py310haa95532_0
 87 |   - distributed=2022.7.0=py310haa95532_0
 88 |   - docstring-to-markdown=0.11=py310haa95532_0
 89 |   - docutils=0.18.1=py310haa95532_3
 90 |   - entrypoints=0.4=py310haa95532_0
 91 |   - et_xmlfile=1.1.0=py310haa95532_0
 92 |   - executing=0.8.3=pyhd3eb1b0_0
 93 |   - filelock=3.9.0=py310haa95532_0
 94 |   - flake8=6.0.0=py310haa95532_0
 95 |   - flask=2.2.2=py310haa95532_0
 96 |   - flit-core=3.6.0=pyhd3eb1b0_0
 97 |   - fonttools=4.25.0=pyhd3eb1b0_0
 98 |   - freetype=2.12.1=ha860e81_0
 99 |   - fsspec=2022.11.0=py310haa95532_0
100 |   - future=0.18.3=py310haa95532_0
101 |   - gensim=4.3.0=py310h4ed8f06_0
102 |   - giflib=5.2.1=h8cc25b3_3
103 |   - glib=2.69.1=h5dc1a3c_2
104 |   - glob2=0.7=pyhd3eb1b0_0
105 |   - greenlet=2.0.1=py310hd77b12b_0
106 |   - gst-plugins-base=1.18.5=h9e645db_0
107 |   - gstreamer=1.18.5=hd78058f_0
108 |   - hdf5=1.10.6=h1756f20_1
109 |   - heapdict=1.0.1=pyhd3eb1b0_0
110 |   - holoviews=1.15.4=py310haa95532_0
111 |   - huggingface_hub=0.10.1=py310haa95532_0
112 |   - hvplot=0.8.2=py310haa95532_0
113 |   - hyperlink=21.0.0=pyhd3eb1b0_0
114 |   - icc_rt=2022.1.0=h6049295_2
115 |   - icu=58.2=ha925a31_3
116 |   - idna=3.4=py310haa95532_0
117 |   - imagecodecs=2021.8.26=py310h4c966c4_2
118 |   - imageio=2.26.0=py310haa95532_0
119 |   - imagesize=1.4.1=py310haa95532_0
120 |   - imbalanced-learn=0.10.1=py310haa95532_0
121 |   - importlib-metadata=4.11.3=py310haa95532_0
122 |   - importlib_metadata=4.11.3=hd3eb1b0_0
123 |   - incremental=21.3.0=pyhd3eb1b0_0
124 |   - inflection=0.5.1=py310haa95532_0
125 |   - iniconfig=1.1.1=pyhd3eb1b0_0
126 |   - intake=0.6.7=py310haa95532_0
127 |   - intel-openmp=2021.4.0=haa95532_3556
128 |   - intervaltree=3.1.0=pyhd3eb1b0_0
129 |   - ipykernel=6.19.2=py310h9909e9c_0
130 |   - ipython=8.10.0=py310haa95532_0
131 |   - ipython_genutils=0.2.0=pyhd3eb1b0_1
132 |   - ipywidgets=7.6.5=pyhd3eb1b0_1
133 |   - isort=5.9.3=pyhd3eb1b0_0
134 |   - itemadapter=0.3.0=pyhd3eb1b0_0
135 |   - itemloaders=1.0.4=pyhd3eb1b0_1
136 |   - itsdangerous=2.0.1=pyhd3eb1b0_0
137 |   - jedi=0.18.1=py310haa95532_1
138 |   - jellyfish=0.9.0=py310h2bbff1b_0
139 |   - jinja2=3.1.2=py310haa95532_0
140 |   - jinja2-time=0.2.0=pyhd3eb1b0_3
141 |   - jmespath=0.10.0=pyhd3eb1b0_0
142 |   - joblib=1.1.1=py310haa95532_0
143 |   - jpeg=9e=h2bbff1b_1
144 |   - jq=1.6=haa95532_1
145 |   - json5=0.9.6=pyhd3eb1b0_0
146 |   - jsonpatch=1.32=pyhd3eb1b0_0
147 |   - jsonpointer=2.1=pyhd3eb1b0_0
148 |   - jsonschema=4.17.3=py310haa95532_0
149 |   - jupyter=1.0.0=py310haa95532_8
150 |   - jupyter_client=7.3.4=py310haa95532_0
151 |   - jupyter_console=6.6.2=py310haa95532_0
152 |   - jupyter_core=5.2.0=py310haa95532_0
153 |   - jupyter_server=1.23.4=py310haa95532_0
154 |   - jupyterlab=3.5.3=py310haa95532_0
155 |   - jupyterlab_pygments=0.1.2=py_0
156 |   - jupyterlab_server=2.19.0=py310haa95532_0
157 |   - jupyterlab_widgets=1.0.0=pyhd3eb1b0_1
158 |   - jxrlib=1.1=he774522_2
159 |   - keyring=23.4.0=py310haa95532_0
160 |   - kiwisolver=1.4.4=py310hd77b12b_0
161 |   - lazy-object-proxy=1.6.0=py310h2bbff1b_0
162 |   - lcms2=2.12=h83e58a3_0
163 |   - lerc=3.0=hd77b12b_0
164 |   - libaec=1.0.4=h33f27b4_1
165 |   - libarchive=3.6.2=hebabd0d_0
166 |   - libbrotlicommon=1.0.9=h2bbff1b_7
167 |   - libbrotlidec=1.0.9=h2bbff1b_7
168 |   - libbrotlienc=1.0.9=h2bbff1b_7
169 |   - libcurl=7.87.0=h86230a5_0
170 |   - libdeflate=1.17=h2bbff1b_0
171 |   - libffi=3.4.2=hd77b12b_6
172 |   - libiconv=1.16=h2bbff1b_2
173 |   - liblief=0.12.3=hd77b12b_0
174 |   - libogg=1.3.5=h2bbff1b_1
175 |   - libpng=1.6.39=h8cc25b3_0
176 |   - libsodium=1.0.18=h62dcd97_0
177 |   - libspatialindex=1.9.3=h6c2663c_0
178 |   - libssh2=1.10.0=hcd4344a_0
179 |   - libtiff=4.5.0=h6c2663c_2
180 |   - libuv=1.44.2=h2bbff1b_0
181 |   - libvorbis=1.3.7=he774522_0
182 |   - libwebp=1.2.4=hbc33d0d_1
183 |   - libwebp-base=1.2.4=h2bbff1b_1
184 |   - libxml2=2.9.14=h0ad7f3c_0
185 |   - libxslt=1.1.35=h2bbff1b_0
186 |   - libzopfli=1.0.3=ha925a31_0
187 |   - llvmlite=0.39.1=py310h23ce68f_0
188 |   - locket=1.0.0=py310haa95532_0
189 |   - lxml=4.9.1=py310h1985fb9_0
190 |   - lz4=3.1.3=py310h2bbff1b_0
191 |   - lz4-c=1.9.4=h2bbff1b_0
192 |   - lzo=2.10=he774522_2
193 |   - m2-msys2-runtime=2.5.0.17080.65c939c=3
194 |   - m2-patch=2.7.5=2
195 |   - m2w64-libwinpthread-git=5.0.0.4634.697f757=2
196 |   - markdown=3.4.1=py310haa95532_0
197 |   - markupsafe=2.1.1=py310h2bbff1b_0
198 |   - matplotlib=3.7.0=py310haa95532_0
199 |   - matplotlib-base=3.7.0=py310h4ed8f06_0
200 |   - matplotlib-inline=0.1.6=py310haa95532_0
201 |   - mccabe=0.7.0=pyhd3eb1b0_0
202 |   - menuinst=1.4.19=py310h59b6b97_0
203 |   - mistune=0.8.4=py310h2bbff1b_1000
204 |   - mkl=2021.4.0=haa95532_640
205 |   - mkl-service=2.4.0=py310h2bbff1b_0
206 |   - mkl_fft=1.3.1=py310ha0764ea_0
207 |   - mkl_random=1.2.2=py310h4ed8f06_0
208 |   - mock=4.0.3=pyhd3eb1b0_0
209 |   - mpmath=1.2.1=py310haa95532_0
210 |   - msgpack-python=1.0.3=py310h59b6b97_0
211 |   - msys2-conda-epoch=20160418=1
212 |   - multipledispatch=0.6.0=py310haa95532_0
213 |   - munkres=1.1.4=py_0
214 |   - mypy_extensions=0.4.3=py310haa95532_1
215 |   - navigator-updater=0.3.0=py310haa95532_0
216 |   - nbclassic=0.5.2=py310haa95532_0
217 |   - nbclient=0.5.13=py310haa95532_0
218 |   - nbconvert=6.5.4=py310haa95532_0
219 |   - nbformat=5.7.0=py310haa95532_0
220 |   - nest-asyncio=1.5.6=py310haa95532_0
221 |   - networkx=2.8.4=py310haa95532_0
222 |   - ninja=1.10.2=haa95532_5
223 |   - ninja-base=1.10.2=h6d14046_5
224 |   - nltk=3.7=pyhd3eb1b0_0
225 |   - notebook=6.5.2=py310haa95532_0
226 |   - notebook-shim=0.2.2=py310haa95532_0
227 |   - numba=0.56.4=py310h4ed8f06_0
228 |   - numexpr=2.8.4=py310hd213c9f_0
229 |   - numpy=1.23.5=py310h60c9a35_0
230 |   - numpy-base=1.23.5=py310h04254f7_0
231 |   - numpydoc=1.5.0=py310haa95532_0
232 |   - openjpeg=2.4.0=h4fc8c34_0
233 |   - openpyxl=3.0.10=py310h2bbff1b_0
234 |   - openssl=1.1.1t=h2bbff1b_0
235 |   - packaging=22.0=py310haa95532_0
236 |   - pandas=1.5.3=py310h4ed8f06_0
237 |   - pandocfilters=1.5.0=pyhd3eb1b0_0
238 |   - panel=0.14.3=py310haa95532_0
239 |   - param=1.12.3=py310haa95532_0
240 |   - paramiko=2.8.1=pyhd3eb1b0_0
241 |   - parsel=1.6.0=py310haa95532_0
242 |   - parso=0.8.3=pyhd3eb1b0_0
243 |   - partd=1.2.0=pyhd3eb1b0_1
244 |   - pathlib=1.0.1=pyhd3eb1b0_1
245 |   - pathspec=0.10.3=py310haa95532_0
246 |   - patsy=0.5.3=py310haa95532_0
247 |   - pcre=8.45=hd77b12b_0
248 |   - pep8=1.7.1=py310haa95532_1
249 |   - pexpect=4.8.0=pyhd3eb1b0_3
250 |   - pickleshare=0.7.5=pyhd3eb1b0_1003
251 |   - pillow=9.4.0=py310hd77b12b_0
252 |   - pip=22.3.1=py310haa95532_0
253 |   - pkginfo=1.9.6=py310haa95532_0
254 |   - platformdirs=2.5.2=py310haa95532_0
255 |   - plotly=5.9.0=py310haa95532_0
256 |   - pluggy=1.0.0=py310haa95532_1
257 |   - ply=3.11=py310haa95532_0
258 |   - pooch=1.4.0=pyhd3eb1b0_0
259 |   - powershell_shortcut=0.0.1=3
260 |   - poyo=0.5.0=pyhd3eb1b0_0
261 |   - prometheus_client=0.14.1=py310haa95532_0
262 |   - prompt-toolkit=3.0.36=py310haa95532_0
263 |   - prompt_toolkit=3.0.36=hd3eb1b0_0
264 |   - protego=0.1.16=py_0
265 |   - psutil=5.9.0=py310h2bbff1b_0
266 |   - ptyprocess=0.7.0=pyhd3eb1b0_2
267 |   - pure_eval=0.2.2=pyhd3eb1b0_0
268 |   - py=1.11.0=pyhd3eb1b0_0
269 |   - py-lief=0.12.3=py310hd77b12b_0
270 |   - pyasn1=0.4.8=pyhd3eb1b0_0
271 |   - pyasn1-modules=0.2.8=py_0
272 |   - pycodestyle=2.10.0=py310haa95532_0
273 |   - pycosat=0.6.4=py310h2bbff1b_0
274 |   - pycparser=2.21=pyhd3eb1b0_0
275 |   - pyct=0.5.0=py310haa95532_0
276 |   - pycurl=7.45.1=py310hcd4344a_0
277 |   - pydispatcher=2.0.5=py310haa95532_2
278 |   - pydocstyle=6.3.0=py310haa95532_0
279 |   - pyerfa=2.0.0=py310h2bbff1b_0
280 |   - pyflakes=3.0.1=py310haa95532_0
281 |   - pygments=2.11.2=pyhd3eb1b0_0
282 |   - pyhamcrest=2.0.2=pyhd3eb1b0_2
283 |   - pyjwt=2.4.0=py310haa95532_0
284 |   - pylint=2.16.2=py310haa95532_0
285 |   - pyls-spyder=0.4.0=pyhd3eb1b0_0
286 |   - pynacl=1.5.0=py310h8cc25b3_0
287 |   - pyodbc=4.0.34=py310hd77b12b_0
288 |   - pyopenssl=23.0.0=py310haa95532_0
289 |   - pyparsing=3.0.9=py310haa95532_0
290 |   - pyqt=5.15.7=py310hd77b12b_0
291 |   - pyqt5-sip=12.11.0=py310hd77b12b_0
292 |   - pyqtwebengine=5.15.7=py310hd77b12b_0
293 |   - pyrsistent=0.18.0=py310h2bbff1b_0
294 |   - pysocks=1.7.1=py310haa95532_0
295 |   - pytables=3.7.0=py310h388bc9b_1
296 |   - pytest=7.1.2=py310haa95532_0
297 |   - python=3.10.9=h966fe2a_1
298 |   - python-dateutil=2.8.2=pyhd3eb1b0_0
299 |   - python-fastjsonschema=2.16.2=py310haa95532_0
300 |   - python-libarchive-c=2.9=pyhd3eb1b0_1
301 |   - python-lsp-black=1.2.1=py310haa95532_0
302 |   - python-lsp-jsonrpc=1.0.0=pyhd3eb1b0_0
303 |   - python-lsp-server=1.7.1=py310haa95532_0
304 |   - python-slugify=5.0.2=pyhd3eb1b0_0
305 |   - python-snappy=0.6.1=py310hd77b12b_0
306 |   - pytoolconfig=1.2.5=py310haa95532_1
307 |   - pytorch=2.0.1=py3.10_cpu_0
308 |   - pytorch-mutex=1.0=cpu
309 |   - pytz=2022.7=py310haa95532_0
310 |   - pyviz_comms=2.0.2=pyhd3eb1b0_0
311 |   - pywavelets=1.4.1=py310h2bbff1b_0
312 |   - pywin32=305=py310h2bbff1b_0
313 |   - pywin32-ctypes=0.2.0=py310haa95532_1000
314 |   - pywinpty=2.0.10=py310h5da7b33_0
315 |   - pyyaml=6.0=py310h2bbff1b_1
316 |   - pyzmq=23.2.0=py310hd77b12b_0
317 |   - qdarkstyle=3.0.2=pyhd3eb1b0_0
318 |   - qt-main=5.15.2=he8e5bd7_7
319 |   - qt-webengine=5.15.9=hb9a9bb5_5
320 |   - qtpy=2.2.0=py310haa95532_0
321 |   - qtwebkit=5.212=h3ad3cdb_4
322 |   - queuelib=1.5.0=py310haa95532_0
323 |   - regex=2022.7.9=py310h2bbff1b_0
324 |   - requests=2.28.1=py310haa95532_0
325 |   - requests-file=1.5.1=pyhd3eb1b0_0
326 |   - requests-toolbelt=0.9.1=pyhd3eb1b0_0
327 |   - rope=1.7.0=py310haa95532_0
328 |   - rtree=1.0.1=py310h2eaa2aa_0
329 |   - ruamel.yaml=0.17.21=py310h2bbff1b_0
330 |   - ruamel.yaml.clib=0.2.6=py310h2bbff1b_1
331 |   - ruamel_yaml=0.17.21=py310h2bbff1b_0
332 |   - scikit-image=0.19.3=py310hd77b12b_1
333 |   - scikit-learn=1.2.1=py310hd77b12b_0
334 |   - scikit-learn-intelex=2023.0.2=py310haa95532_0
335 |   - scipy=1.10.0=py310hb9afe5d_1
336 |   - scrapy=2.8.0=py310haa95532_0
337 |   - seaborn=0.12.2=py310haa95532_0
338 |   - send2trash=1.8.0=pyhd3eb1b0_1
339 |   - service_identity=18.1.0=pyhd3eb1b0_1
340 |   - setuptools=65.6.3=py310haa95532_0
341 |   - sip=6.6.2=py310hd77b12b_0
342 |   - six=1.16.0=pyhd3eb1b0_1
343 |   - smart_open=5.2.1=py310haa95532_0
344 |   - snappy=1.1.9=h6c2663c_0
345 |   - sniffio=1.2.0=py310haa95532_1
346 |   - snowballstemmer=2.2.0=pyhd3eb1b0_0
347 |   - sortedcontainers=2.4.0=pyhd3eb1b0_0
348 |   - soupsieve=2.3.2.post1=py310haa95532_0
349 |   - sphinx=5.0.2=py310haa95532_0
350 |   - sphinxcontrib-applehelp=1.0.2=pyhd3eb1b0_0
351 |   - sphinxcontrib-devhelp=1.0.2=pyhd3eb1b0_0
352 |   - sphinxcontrib-htmlhelp=2.0.0=pyhd3eb1b0_0
353 |   - sphinxcontrib-jsmath=1.0.1=pyhd3eb1b0_0
354 |   - sphinxcontrib-qthelp=1.0.3=pyhd3eb1b0_0
355 |   - sphinxcontrib-serializinghtml=1.1.5=pyhd3eb1b0_0
356 |   - spyder=5.4.1=py310haa95532_0
357 |   - spyder-kernels=2.4.1=py310haa95532_0
358 |   - sqlalchemy=1.4.39=py310h2bbff1b_0
359 |   - sqlite=3.40.1=h2bbff1b_0
360 |   - stack_data=0.2.0=pyhd3eb1b0_0
361 |   - statsmodels=0.13.5=py310h9128911_1
362 |   - sympy=1.11.1=py310haa95532_0
363 |   - tabulate=0.8.10=py310haa95532_0
364 |   - tbb=2021.7.0=h59b6b97_0
365 |   - tbb4py=2021.7.0=py310h59b6b97_0
366 |   - tblib=1.7.0=pyhd3eb1b0_0
367 |   - tenacity=8.0.1=py310haa95532_1
368 |   - terminado=0.17.1=py310haa95532_0
369 |   - text-unidecode=1.3=pyhd3eb1b0_0
370 |   - textdistance=4.2.1=pyhd3eb1b0_0
371 |   - threadpoolctl=2.2.0=pyh0d69192_0
372 |   - three-merge=0.1.1=pyhd3eb1b0_0
373 |   - tifffile=2021.7.2=pyhd3eb1b0_2
374 |   - tinycss2=1.2.1=py310haa95532_0
375 |   - tk=8.6.12=h2bbff1b_0
376 |   - tldextract=3.2.0=pyhd3eb1b0_0
377 |   - tokenizers=0.11.4=py310he5181cf_1
378 |   - toml=0.10.2=pyhd3eb1b0_0
379 |   - tomli=2.0.1=py310haa95532_0
380 |   - tomlkit=0.11.1=py310haa95532_0
381 |   - toolz=0.12.0=py310haa95532_0
382 |   - tornado=6.1=py310h2bbff1b_0
383 |   - tqdm=4.64.1=py310haa95532_0
384 |   - traitlets=5.7.1=py310haa95532_0
385 |   - transformers=4.24.0=py310haa95532_0
386 |   - twisted=22.2.0=py310h2bbff1b_1
387 |   - twisted-iocpsupport=1.0.2=py310h2bbff1b_0
388 |   - typing-extensions=4.4.0=py310haa95532_0
389 |   - typing_extensions=4.4.0=py310haa95532_0
390 |   - tzdata=2022g=h04d1e81_0
391 |   - ujson=5.4.0=py310hd77b12b_0
392 |   - unidecode=1.2.0=pyhd3eb1b0_0
393 |   - urllib3=1.26.14=py310haa95532_0
394 |   - vc=14.2=h21ff451_1
395 |   - vs2015_runtime=14.27.29016=h5e58377_2
396 |   - w3lib=1.21.0=pyhd3eb1b0_0
397 |   - watchdog=2.1.6=py310haa95532_0
398 |   - wcwidth=0.2.5=pyhd3eb1b0_0
399 |   - webencodings=0.5.1=py310haa95532_1
400 |   - websocket-client=0.58.0=py310haa95532_4
401 |   - werkzeug=2.2.2=py310haa95532_0
402 |   - whatthepatch=1.0.2=py310haa95532_0
403 |   - wheel=0.38.4=py310haa95532_0
404 |   - widgetsnbextension=3.5.2=py310haa95532_0
405 |   - win_inet_pton=1.1.0=py310haa95532_0
406 |   - wincertstore=0.2=py310haa95532_2
407 |   - winpty=0.4.3=4
408 |   - wrapt=1.14.1=py310h2bbff1b_0
409 |   - xarray=2022.11.0=py310haa95532_0
410 |   - xlwings=0.29.1=py310haa95532_0
411 |   - xz=5.2.10=h8cc25b3_1
412 |   - yaml=0.2.5=he774522_0
413 |   - yapf=0.31.0=pyhd3eb1b0_0
414 |   - zeromq=4.3.4=hd77b12b_0
415 |   - zfp=0.5.5=hd77b12b_6
416 |   - zict=2.1.0=py310haa95532_0
417 |   - zipp=3.11.0=py310haa95532_0
418 |   - zlib=1.2.13=h8cc25b3_0
419 |   - zope=1.0=py310haa95532_1
420 |   - zope.interface=5.4.0=py310h2bbff1b_0
421 |   - zstandard=0.19.0=py310h2bbff1b_0
422 |   - zstd=1.5.2=h19a0ad4_0
423 |   - pip:
424 |       - absl-py==1.4.0
425 |       - ale-py==0.8.1
426 |       - astunparse==1.6.3
427 |       - cachetools==5.3.0
428 |       - dm-tree==0.1.8
429 |       - docker-pycreds==0.4.0
430 |       - farama-notifications==0.0.4
431 |       - flatbuffers==23.5.9
432 |       - gast==0.4.0
433 |       - gitdb==4.0.10
434 |       - gitpython==3.1.31
435 |       - google-auth==2.18.1
436 |       - google-auth-oauthlib==1.0.0
437 |       - google-pasta==0.2.0
438 |       - grpcio==1.54.2
439 |       - gym==0.26.2
440 |       - gym-notices==0.0.8
441 |       - gymnasium==0.28.1
442 |       - importlib-resources==5.12.0
443 |       - jax==0.4.10
444 |       - jax-jumpy==1.0.0
445 |       - keras==2.12.0
446 |       - libclang==16.0.0
447 |       - ml-dtypes==0.1.0
448 |       - numpy-stl==3.0.1
449 |       - oauthlib==3.2.2
450 |       - opencv-contrib-python==4.7.0.72
451 |       - opt-einsum==3.3.0
452 |       - pathtools==0.1.2
453 |       - progressbar2==4.2.0
454 |       - protobuf==3.20.3
455 |       - pyglet==1.5.27
456 |       - pylint-venv==2.3.0
457 |       - python-utils==3.5.2
458 |       - qstylizer==0.2.2
459 |       - qtawesome==1.2.2
460 |       - qtconsole==5.4.0
461 |       - requests-oauthlib==1.3.1
462 |       - rsa==4.9
463 |       - sentry-sdk==1.24.0
464 |       - setproctitle==1.3.2
465 |       - smmap==5.0.0
466 |       - sumolib==1.17.0
467 |       - tensorboard==2.12.3
468 |       - tensorboard-data-server==0.7.0
469 |       - tensorboardx==2.6
470 |       - tensordict==0.1.2
471 |       - tensorflow==2.12.0
472 |       - tensorflow-estimator==2.12.0
473 |       - tensorflow-intel==2.12.0
474 |       - tensorflow-io-gcs-filesystem==0.31.0
475 |       - tensorflow-probability==0.20.1
476 |       - tensorlayer==2.2.5
477 |       - termcolor==2.3.0
478 |       - torchrl==0.1.1
479 |       - traci==1.17.0
480 |       - visdom==0.2.4
481 |       - wandb==0.15.3
482 | prefix: D:\anaconda
483 | 


--------------------------------------------------------------------------------
/environment/requirements.txt:
--------------------------------------------------------------------------------
  1 | absl-py==1.4.0
  2 | alabaster @ file:///home/ktietz/src/ci/alabaster_1611921544520/work
  3 | ale-py==0.8.1
  4 | anaconda-client==1.11.2
  5 | anaconda-navigator==2.4.0
  6 | anaconda-project @ file:///C:/Windows/TEMP/abs_91fu4tfkih/croots/recipe/anaconda-project_1660339890874/work
  7 | anyio @ file:///C:/ci/anyio_1644481856696/work/dist
  8 | appdirs==1.4.4
  9 | argon2-cffi @ file:///opt/conda/conda-bld/argon2-cffi_1645000214183/work
 10 | argon2-cffi-bindings @ file:///C:/ci/argon2-cffi-bindings_1644569876605/work
 11 | arrow @ file:///C:/b/abs_cal7u12ktb/croot/arrow_1676588147908/work
 12 | astroid @ file:///C:/b/abs_d4lg3_taxn/croot/astroid_1676904351456/work
 13 | astropy @ file:///C:/ci/astropy_1657719642921/work
 14 | asttokens @ file:///opt/conda/conda-bld/asttokens_1646925590279/work
 15 | astunparse==1.6.3
 16 | atomicwrites==1.4.0
 17 | attrs @ file:///C:/b/abs_09s3y775ra/croot/attrs_1668696195628/work
 18 | Automat @ file:///tmp/build/80754af9/automat_1600298431173/work
 19 | autopep8 @ file:///opt/conda/conda-bld/autopep8_1650463822033/work
 20 | Babel @ file:///C:/b/abs_a2shv_3tqi/croot/babel_1671782804377/work
 21 | backcall @ file:///home/ktietz/src/ci/backcall_1611930011877/work
 22 | backports.functools-lru-cache @ file:///tmp/build/80754af9/backports.functools_lru_cache_1618170165463/work
 23 | backports.tempfile @ file:///home/linux1/recipes/ci/backports.tempfile_1610991236607/work
 24 | backports.weakref==1.0.post1
 25 | bcrypt @ file:///C:/Windows/Temp/abs_36kl66t_aw/croots/recipe/bcrypt_1659554334050/work
 26 | beautifulsoup4 @ file:///C:/ci/beautifulsoup4_1650293028159/work
 27 | binaryornot @ file:///tmp/build/80754af9/binaryornot_1617751525010/work
 28 | black @ file:///C:/ci/black_1660221726201/work
 29 | bleach @ file:///opt/conda/conda-bld/bleach_1641577558959/work
 30 | bokeh @ file:///C:/Windows/TEMP/abs_4a259bc2-ed05-4a1f-808e-ac712cc0900cddqp8sp7/croots/recipe/bokeh_1658136660686/work
 31 | boltons @ file:///C:/b/abs_707eo7c09t/croot/boltons_1677628723117/work
 32 | Bottleneck @ file:///C:/Windows/Temp/abs_3198ca53-903d-42fd-87b4-03e6d03a8381yfwsuve8/croots/recipe/bottleneck_1657175565403/work
 33 | brotlipy==0.7.0
 34 | cachetools==5.3.0
 35 | certifi @ file:///C:/b/abs_4a0polqwty/croot/certifi_1683875377622/work/certifi
 36 | cffi @ file:///C:/b/abs_49n3v2hyhr/croot/cffi_1670423218144/work
 37 | chardet @ file:///C:/ci_310/chardet_1642114080098/work
 38 | charset-normalizer @ file:///tmp/build/80754af9/charset-normalizer_1630003229654/work
 39 | click @ file:///C:/ci/click_1646056762388/work
 40 | cloudpickle @ file:///tmp/build/80754af9/cloudpickle_1632508026186/work
 41 | clyent==1.2.2
 42 | colorama @ file:///C:/b/abs_a9ozq0l032/croot/colorama_1672387194846/work
 43 | colorcet @ file:///C:/b/abs_46vyu0rpdl/croot/colorcet_1668084513237/work
 44 | comm @ file:///C:/b/abs_1419earm7u/croot/comm_1671231131638/work
 45 | conda==23.3.1
 46 | conda-build==3.24.0
 47 | conda-content-trust @ file:///C:/Windows/TEMP/abs_4589313d-fc62-4ccc-81c0-b801b4449e833j1ajrwu/croots/recipe/conda-content-trust_1658126379362/work
 48 | conda-pack @ file:///tmp/build/80754af9/conda-pack_1611163042455/work
 49 | conda-package-handling @ file:///C:/b/abs_fcga8w0uem/croot/conda-package-handling_1672865024290/work
 50 | conda-repo-cli==1.0.41
 51 | conda-token @ file:///Users/paulyim/miniconda3/envs/c3i/conda-bld/conda-token_1662660369760/work
 52 | conda-verify==3.4.2
 53 | conda_package_streaming @ file:///C:/b/abs_0e5n5hdal3/croot/conda-package-streaming_1670508162902/work
 54 | constantly==15.1.0
 55 | contourpy @ file:///C:/b/abs_d5rpy288vc/croots/recipe/contourpy_1663827418189/work
 56 | cookiecutter @ file:///opt/conda/conda-bld/cookiecutter_1649151442564/work
 57 | cryptography @ file:///C:/b/abs_8ecplyc3n2/croot/cryptography_1677533105000/work
 58 | cssselect==1.1.0
 59 | cycler @ file:///tmp/build/80754af9/cycler_1637851556182/work
 60 | cytoolz @ file:///C:/b/abs_61m9vzb4qh/croot/cytoolz_1667465938275/work
 61 | daal4py==2023.0.2
 62 | dask @ file:///C:/ci/dask-core_1658497112560/work
 63 | datashader @ file:///C:/b/abs_e80f3d7ac0/croot/datashader_1676023254070/work
 64 | datashape==0.5.4
 65 | debugpy @ file:///C:/ci_310/debugpy_1642079916595/work
 66 | decorator @ file:///opt/conda/conda-bld/decorator_1643638310831/work
 67 | defusedxml @ file:///tmp/build/80754af9/defusedxml_1615228127516/work
 68 | diff-match-patch @ file:///Users/ktietz/demo/mc3/conda-bld/diff-match-patch_1630511840874/work
 69 | dill @ file:///C:/b/abs_42h_07z1yj/croot/dill_1667919550096/work
 70 | distributed @ file:///C:/ci/distributed_1658523963030/work
 71 | dm-tree==0.1.8
 72 | docker-pycreds==0.4.0
 73 | docstring-to-markdown @ file:///C:/b/abs_cf10j8nr4q/croot/docstring-to-markdown_1673447652942/work
 74 | docutils @ file:///C:/Windows/TEMP/abs_24e5e278-4d1c-47eb-97b9-f761d871f482dy2vg450/croots/recipe/docutils_1657175444608/work
 75 | entrypoints @ file:///C:/ci/entrypoints_1649926676279/work
 76 | et-xmlfile==1.1.0
 77 | executing @ file:///opt/conda/conda-bld/executing_1646925071911/work
 78 | Farama-Notifications==0.0.4
 79 | fastjsonschema @ file:///C:/Users/BUILDE~1/AppData/Local/Temp/abs_ebruxzvd08/croots/recipe/python-fastjsonschema_1661376484940/work
 80 | filelock @ file:///C:/b/abs_c7yrhs9uz2/croot/filelock_1672387617533/work
 81 | flake8 @ file:///C:/b/abs_9f6_n1jlpc/croot/flake8_1674581816810/work
 82 | Flask @ file:///C:/b/abs_ef16l83sif/croot/flask_1671217367534/work
 83 | flatbuffers==23.5.9
 84 | flit_core @ file:///opt/conda/conda-bld/flit-core_1644941570762/work/source/flit_core
 85 | fonttools==4.25.0
 86 | fsspec @ file:///C:/b/abs_5bjz6v0w_f/croot/fsspec_1670336608940/work
 87 | future @ file:///C:/b/abs_3dcibf18zi/croot/future_1677599891380/work
 88 | gast==0.4.0
 89 | gensim @ file:///C:/b/abs_a5vat69tv8/croot/gensim_1674853640591/work
 90 | gitdb==4.0.10
 91 | GitPython==3.1.31
 92 | glob2 @ file:///home/linux1/recipes/ci/glob2_1610991677669/work
 93 | google-auth==2.18.1
 94 | google-auth-oauthlib==1.0.0
 95 | google-pasta==0.2.0
 96 | greenlet @ file:///C:/b/abs_47lk_w2ajq/croot/greenlet_1670513248400/work
 97 | grpcio==1.54.2
 98 | gym==0.26.2
 99 | gym-notices==0.0.8
100 | gymnasium==0.28.1
101 | h5py==3.9.0
102 | HeapDict @ file:///Users/ktietz/demo/mc3/conda-bld/heapdict_1630598515714/work
103 | holoviews @ file:///C:/b/abs_bbf97_0kcd/croot/holoviews_1676372911083/work
104 | huggingface-hub @ file:///C:/b/abs_8d8wo2z8c6/croot/huggingface_hub_1667490298905/work
105 | hvplot @ file:///C:/b/abs_13un17_4x_/croot/hvplot_1670508919193/work
106 | hyperlink @ file:///tmp/build/80754af9/hyperlink_1610130746837/work
107 | idna @ file:///C:/b/abs_bdhbebrioa/croot/idna_1666125572046/work
108 | imagecodecs @ file:///C:/b/abs_f0cr12h73p/croot/imagecodecs_1677576746499/work
109 | imageio @ file:///C:/b/abs_27kq2gy1us/croot/imageio_1677879918708/work
110 | imagesize @ file:///C:/Windows/TEMP/abs_3cecd249-3fc4-4bfc-b80b-bb227b0d701en12vqzot/croots/recipe/imagesize_1657179501304/work
111 | imbalanced-learn @ file:///C:/b/abs_1911ryuksz/croot/imbalanced-learn_1677191585237/work
112 | importlib-metadata @ file:///C:/ci/importlib-metadata_1648544469310/work
113 | importlib-resources==5.12.0
114 | incremental @ file:///tmp/build/80754af9/incremental_1636629750599/work
115 | inflection==0.5.1
116 | iniconfig @ file:///home/linux1/recipes/ci/iniconfig_1610983019677/work
117 | intake @ file:///C:/b/abs_42yyb2lhwx/croot/intake_1676619887779/work
118 | intervaltree @ file:///Users/ktietz/demo/mc3/conda-bld/intervaltree_1630511889664/work
119 | ipykernel @ file:///C:/b/abs_b4f07tbsyd/croot/ipykernel_1672767104060/work
120 | ipython @ file:///C:/b/abs_d3h279dv3h/croot/ipython_1676582236558/work
121 | ipython-genutils @ file:///tmp/build/80754af9/ipython_genutils_1606773439826/work
122 | ipywidgets @ file:///tmp/build/80754af9/ipywidgets_1634143127070/work
123 | isort @ file:///tmp/build/80754af9/isort_1628603791788/work
124 | itemadapter @ file:///tmp/build/80754af9/itemadapter_1626442940632/work
125 | itemloaders @ file:///opt/conda/conda-bld/itemloaders_1646805235997/work
126 | itsdangerous @ file:///tmp/build/80754af9/itsdangerous_1621432558163/work
127 | jax==0.4.10
128 | jax-jumpy==1.0.0
129 | jedi @ file:///C:/ci/jedi_1644315428305/work
130 | jellyfish @ file:///C:/ci/jellyfish_1647962737334/work
131 | Jinja2 @ file:///C:/b/abs_7cdis66kl9/croot/jinja2_1666908141852/work
132 | jinja2-time @ file:///opt/conda/conda-bld/jinja2-time_1649251842261/work
133 | jmespath @ file:///Users/ktietz/demo/mc3/conda-bld/jmespath_1630583964805/work
134 | joblib @ file:///C:/b/abs_e60_bwl1v6/croot/joblib_1666298845728/work
135 | json5 @ file:///tmp/build/80754af9/json5_1624432770122/work
136 | jsonpatch @ file:///tmp/build/80754af9/jsonpatch_1615747632069/work
137 | jsonpointer==2.1
138 | jsonschema @ file:///C:/b/abs_6ccs97j_l8/croot/jsonschema_1676558690963/work
139 | jupyter @ file:///C:/Windows/TEMP/abs_56xfdi__li/croots/recipe/jupyter_1659349053177/work
140 | jupyter-console @ file:///C:/b/abs_68ttzd5p9c/croot/jupyter_console_1677674667636/work
141 | jupyter-server @ file:///C:/b/abs_1cfi3__jl8/croot/jupyter_server_1671707636383/work
142 | jupyter_client @ file:///C:/ci/jupyter_client_1661834530766/work
143 | jupyter_core @ file:///C:/b/abs_bd7elvu3w2/croot/jupyter_core_1676538600510/work
144 | jupyterlab @ file:///C:/b/abs_513jt6yy74/croot/jupyterlab_1675354138043/work
145 | jupyterlab-pygments @ file:///tmp/build/80754af9/jupyterlab_pygments_1601490720602/work
146 | jupyterlab-widgets @ file:///tmp/build/80754af9/jupyterlab_widgets_1609884341231/work
147 | jupyterlab_server @ file:///C:/b/abs_d1z_g1swc8/croot/jupyterlab_server_1677153204814/work
148 | keras==2.12.0
149 | keyring @ file:///C:/ci_310/keyring_1642165564669/work
150 | kiwisolver @ file:///C:/b/abs_88mdhvtahm/croot/kiwisolver_1672387921783/work
151 | lazy-object-proxy @ file:///C:/ci_310/lazy-object-proxy_1642083437654/work
152 | libarchive-c @ file:///tmp/build/80754af9/python-libarchive-c_1617780486945/work
153 | libclang==16.0.0
154 | llvmlite==0.39.1
155 | locket @ file:///C:/ci/locket_1652904090946/work
156 | lxml @ file:///C:/ci/lxml_1657527492694/work
157 | lz4 @ file:///C:/ci_310/lz4_1643300078932/work
158 | Markdown @ file:///C:/b/abs_98lv_ucina/croot/markdown_1671541919225/work
159 | MarkupSafe @ file:///C:/ci/markupsafe_1654508036328/work
160 | matplotlib @ file:///C:/b/abs_b2d7uv90hg/croot/matplotlib-suite_1677674332463/work
161 | matplotlib-inline @ file:///C:/ci/matplotlib-inline_1661934094726/work
162 | mccabe @ file:///opt/conda/conda-bld/mccabe_1644221741721/work
163 | menuinst @ file:///C:/Users/BUILDE~1/AppData/Local/Temp/abs_455sf5o0ct/croots/recipe/menuinst_1661805970842/work
164 | mistune @ file:///C:/ci_310/mistune_1642084168466/work
165 | mkl-fft==1.3.1
166 | mkl-random @ file:///C:/ci_310/mkl_random_1643050563308/work
167 | mkl-service==2.4.0
168 | ml-dtypes==0.1.0
169 | mock @ file:///tmp/build/80754af9/mock_1607622725907/work
170 | mpmath==1.2.1
171 | msgpack @ file:///C:/ci/msgpack-python_1652348582618/work
172 | # Editable install with no version control (multiagent==0.0.1)
173 | -e c:\users\administrator.desktop-nlh290a\desktop\cm3_code\cm3-master\env\multiagent-particle-envs
174 | multipledispatch @ file:///C:/ci_310/multipledispatch_1642084438481/work
175 | munkres==1.1.4
176 | mypy-extensions==0.4.3
177 | navigator-updater==0.3.0
178 | nbclassic @ file:///C:/b/abs_d0_ze5q0j2/croot/nbclassic_1676902914817/work
179 | nbclient @ file:///C:/ci/nbclient_1650308592199/work
180 | nbconvert @ file:///C:/b/abs_4av3q4okro/croot/nbconvert_1668450658054/work
181 | nbformat @ file:///C:/b/abs_85_3g7dkt4/croot/nbformat_1670352343720/work
182 | nest-asyncio @ file:///C:/b/abs_3a_4jsjlqu/croot/nest-asyncio_1672387322800/work
183 | networkx @ file:///C:/ci/networkx_1657716953747/work
184 | nltk @ file:///opt/conda/conda-bld/nltk_1645628263994/work
185 | notebook @ file:///C:/b/abs_ca13hqvuzw/croot/notebook_1668179888546/work
186 | notebook_shim @ file:///C:/b/abs_ebfczttg6x/croot/notebook-shim_1668160590914/work
187 | numba @ file:///C:/b/abs_e53pp2e4k7/croot/numba_1670258349527/work
188 | numexpr @ file:///C:/b/abs_a7kbak88hk/croot/numexpr_1668713882979/work
189 | numpy @ file:///C:/b/abs_datssh7cer/croot/numpy_and_numpy_base_1672336199388/work
190 | numpy-stl==3.0.1
191 | numpydoc @ file:///C:/b/abs_cfdd4zxbga/croot/numpydoc_1668085912100/work
192 | oauthlib==3.2.2
193 | opencv-contrib-python==4.7.0.72
194 | openpyxl==3.0.10
195 | opt-einsum==3.3.0
196 | packaging @ file:///C:/b/abs_cfsup8ur87/croot/packaging_1671697442297/work
197 | pandas @ file:///C:/b/abs_b9kefbuby2/croot/pandas_1677835593760/work
198 | pandocfilters @ file:///opt/conda/conda-bld/pandocfilters_1643405455980/work
199 | panel @ file:///C:/b/abs_55ujq2fpyh/croot/panel_1676379705003/work
200 | param @ file:///C:/b/abs_d799n8xz_7/croot/param_1671697759755/work
201 | paramiko @ file:///opt/conda/conda-bld/paramiko_1640109032755/work
202 | parsel @ file:///C:/ci/parsel_1646722035970/work
203 | parso @ file:///opt/conda/conda-bld/parso_1641458642106/work
204 | partd @ file:///opt/conda/conda-bld/partd_1647245470509/work
205 | pathlib @ file:///Users/ktietz/demo/mc3/conda-bld/pathlib_1629713961906/work
206 | pathspec @ file:///C:/b/abs_9cu5_2yb3i/croot/pathspec_1674681579249/work
207 | pathtools==0.1.2
208 | patsy==0.5.3
209 | pep8==1.7.1
210 | pexpect @ file:///tmp/build/80754af9/pexpect_1605563209008/work
211 | pickleshare @ file:///tmp/build/80754af9/pickleshare_1606932040724/work
212 | Pillow==9.4.0
213 | pkginfo @ file:///C:/b/abs_d18srtr68x/croot/pkginfo_1679431192239/work
214 | platformdirs @ file:///C:/b/abs_73cc5cz_1u/croots/recipe/platformdirs_1662711386458/work
215 | plotly @ file:///C:/ci/plotly_1658160673416/work
216 | pluggy @ file:///C:/ci/pluggy_1648042746254/work
217 | ply==3.11
218 | pooch @ file:///tmp/build/80754af9/pooch_1623324770023/work
219 | poyo @ file:///tmp/build/80754af9/poyo_1617751526755/work
220 | progressbar2==4.2.0
221 | prometheus-client @ file:///C:/Windows/TEMP/abs_ab9nx8qb08/croots/recipe/prometheus_client_1659455104602/work
222 | prompt-toolkit @ file:///C:/b/abs_6coz5_9f2s/croot/prompt-toolkit_1672387908312/work
223 | Protego @ file:///tmp/build/80754af9/protego_1598657180827/work
224 | protobuf==3.20.3
225 | psutil @ file:///C:/Windows/Temp/abs_b2c2fd7f-9fd5-4756-95ea-8aed74d0039flsd9qufz/croots/recipe/psutil_1656431277748/work
226 | ptyprocess @ file:///tmp/build/80754af9/ptyprocess_1609355006118/work/dist/ptyprocess-0.7.0-py2.py3-none-any.whl
227 | pure-eval @ file:///opt/conda/conda-bld/pure_eval_1646925070566/work
228 | py @ file:///opt/conda/conda-bld/py_1644396412707/work
229 | pyasn1 @ file:///Users/ktietz/demo/mc3/conda-bld/pyasn1_1629708007385/work
230 | pyasn1-modules==0.2.8
231 | pycodestyle @ file:///C:/b/abs_d77nxvklcq/croot/pycodestyle_1674267231034/work
232 | pycosat @ file:///C:/b/abs_4b1rrw8pn9/croot/pycosat_1666807711599/work
233 | pycparser @ file:///tmp/build/80754af9/pycparser_1636541352034/work
234 | pyct @ file:///C:/b/abs_92z17k7ig2/croot/pyct_1675450330889/work
235 | pycurl==7.45.1
236 | PyDispatcher==2.0.5
237 | pydocstyle @ file:///C:/b/abs_6dz687_5i3/croot/pydocstyle_1675221688656/work
238 | pyerfa @ file:///C:/ci_310/pyerfa_1642088497201/work
239 | pyflakes @ file:///C:/b/abs_6dve6e13zh/croot/pyflakes_1674165143327/work
240 | pyglet==1.5.27
241 | Pygments @ file:///opt/conda/conda-bld/pygments_1644249106324/work
242 | PyHamcrest @ file:///tmp/build/80754af9/pyhamcrest_1615748656804/work
243 | PyJWT @ file:///C:/ci/pyjwt_1657529477795/work
244 | pylint @ file:///C:/b/abs_83sq99jc8i/croot/pylint_1676919922167/work
245 | pylint-venv @ file:///C:/b/abs_bf0lepsbij/croot/pylint-venv_1673990138593/work
246 | pyls-spyder==0.4.0
247 | PyNaCl @ file:///C:/Windows/Temp/abs_d5c3ajcm87/croots/recipe/pynacl_1659620667490/work
248 | pyodbc @ file:///C:/Windows/Temp/abs_61e3jz3u05/croots/recipe/pyodbc_1659513801402/work
249 | pyOpenSSL @ file:///C:/b/abs_552w85x1jz/croot/pyopenssl_1677607703691/work
250 | pyparsing @ file:///C:/Users/BUILDE~1/AppData/Local/Temp/abs_7f_7lba6rl/croots/recipe/pyparsing_1661452540662/work
251 | PyQt5==5.15.7
252 | PyQt5-sip @ file:///C:/Windows/Temp/abs_d7gmd2jg8i/croots/recipe/pyqt-split_1659273064801/work/pyqt_sip
253 | PyQtWebEngine==5.15.4
254 | pyrsistent @ file:///C:/ci_310/pyrsistent_1642117077485/work
255 | PySocks @ file:///C:/ci_310/pysocks_1642089375450/work
256 | pytest==7.1.2
257 | python-dateutil @ file:///tmp/build/80754af9/python-dateutil_1626374649649/work
258 | python-lsp-black @ file:///C:/Users/BUILDE~1/AppData/Local/Temp/abs_dddk9lhpp1/croots/recipe/python-lsp-black_1661852041405/work
259 | python-lsp-jsonrpc==1.0.0
260 | python-lsp-server @ file:///C:/b/abs_e44khh1wya/croot/python-lsp-server_1677296772730/work
261 | python-slugify @ file:///tmp/build/80754af9/python-slugify_1620405669636/work
262 | python-snappy @ file:///C:/b/abs_61b1fmzxcn/croot/python-snappy_1670943932513/work
263 | python-utils==3.5.2
264 | pytoolconfig @ file:///C:/b/abs_18sf9z_iwl/croot/pytoolconfig_1676315065270/work
265 | pytz @ file:///C:/b/abs_22fofvpn1x/croot/pytz_1671698059864/work
266 | pyviz-comms @ file:///tmp/build/80754af9/pyviz_comms_1623747165329/work
267 | PyWavelets @ file:///C:/b/abs_a8r4b1511a/croot/pywavelets_1670425185881/work
268 | pywin32==305.1
269 | pywin32-ctypes @ file:///C:/ci_310/pywin32-ctypes_1642657835512/work
270 | pywinpty @ file:///C:/b/abs_73vshmevwq/croot/pywinpty_1677609966356/work/target/wheels/pywinpty-2.0.10-cp310-none-win_amd64.whl
271 | PyYAML @ file:///C:/b/abs_d0g7dqt2xw/croot/pyyaml_1670514768165/work
272 | pyzmq @ file:///C:/ci/pyzmq_1657616000714/work
273 | QDarkStyle @ file:///tmp/build/80754af9/qdarkstyle_1617386714626/work
274 | qstylizer @ file:///C:/b/abs_ef86cgllby/croot/qstylizer_1674008538857/work/dist/qstylizer-0.2.2-py2.py3-none-any.whl
275 | QtAwesome @ file:///C:/b/abs_c5evilj98g/croot/qtawesome_1674008690220/work
276 | qtconsole @ file:///C:/b/abs_5bap7f8n0t/croot/qtconsole_1674008444833/work
277 | QtPy @ file:///C:/ci/qtpy_1662015130233/work
278 | queuelib==1.5.0
279 | regex @ file:///C:/ci/regex_1658258299320/work
280 | requests @ file:///C:/ci/requests_1657735340829/work
281 | requests-file @ file:///Users/ktietz/demo/mc3/conda-bld/requests-file_1629455781986/work
282 | requests-oauthlib==1.3.1
283 | requests-toolbelt @ file:///Users/ktietz/demo/mc3/conda-bld/requests-toolbelt_1629456163440/work
284 | rope @ file:///C:/b/abs_55g_tm_6ff/croot/rope_1676675029164/work
285 | rsa==4.9
286 | Rtree @ file:///C:/b/abs_e116ltblik/croot/rtree_1675157871717/work
287 | ruamel-yaml-conda @ file:///C:/b/abs_6ejaexx82s/croot/ruamel_yaml_1667489767827/work
288 | ruamel.yaml @ file:///C:/b/abs_30ee5qbthd/croot/ruamel.yaml_1666304562000/work
289 | ruamel.yaml.clib @ file:///C:/b/abs_aarblxbilo/croot/ruamel.yaml.clib_1666302270884/work
290 | scikit-image @ file:///C:/b/abs_63r0vmx78u/croot/scikit-image_1669241746873/work
291 | scikit-learn @ file:///C:/b/abs_7ck_bnw91r/croot/scikit-learn_1676911676133/work
292 | scikit-learn-intelex==20230228.214818
293 | scipy==1.10.0
294 | Scrapy @ file:///C:/b/abs_9fn69i_d86/croot/scrapy_1677738199744/work
295 | seaborn @ file:///C:/b/abs_68ltdkoyoo/croot/seaborn_1673479199997/work
296 | Send2Trash @ file:///tmp/build/80754af9/send2trash_1632406701022/work
297 | sentry-sdk==1.24.0
298 | service-identity @ file:///Users/ktietz/demo/mc3/conda-bld/service_identity_1629460757137/work
299 | setproctitle==1.3.2
300 | sip @ file:///C:/Windows/Temp/abs_b8fxd17m2u/croots/recipe/sip_1659012372737/work
301 | six @ file:///tmp/build/80754af9/six_1644875935023/work
302 | smart-open @ file:///C:/ci/smart_open_1651235038100/work
303 | smmap==5.0.0
304 | sniffio @ file:///C:/ci_310/sniffio_1642092172680/work
305 | snowballstemmer @ file:///tmp/build/80754af9/snowballstemmer_1637937080595/work
306 | sortedcontainers @ file:///tmp/build/80754af9/sortedcontainers_1623949099177/work
307 | soupsieve @ file:///C:/b/abs_fasraqxhlv/croot/soupsieve_1666296394662/work
308 | Sphinx @ file:///C:/ci/sphinx_1657617157451/work
309 | sphinxcontrib-applehelp @ file:///home/ktietz/src/ci/sphinxcontrib-applehelp_1611920841464/work
310 | sphinxcontrib-devhelp @ file:///home/ktietz/src/ci/sphinxcontrib-devhelp_1611920923094/work
311 | sphinxcontrib-htmlhelp @ file:///tmp/build/80754af9/sphinxcontrib-htmlhelp_1623945626792/work
312 | sphinxcontrib-jsmath @ file:///home/ktietz/src/ci/sphinxcontrib-jsmath_1611920942228/work
313 | sphinxcontrib-qthelp @ file:///home/ktietz/src/ci/sphinxcontrib-qthelp_1611921055322/work
314 | sphinxcontrib-serializinghtml @ file:///tmp/build/80754af9/sphinxcontrib-serializinghtml_1624451540180/work
315 | spyder @ file:///C:/b/abs_93s9xkw3pn/croot/spyder_1677776163871/work
316 | spyder-kernels @ file:///C:/b/abs_feh4xo1mrn/croot/spyder-kernels_1673292245176/work
317 | SQLAlchemy @ file:///C:/Windows/Temp/abs_f8661157-660b-49bb-a790-69ab9f3b8f7c8a8s2psb/croots/recipe/sqlalchemy_1657867864564/work
318 | stack-data @ file:///opt/conda/conda-bld/stack_data_1646927590127/work
319 | statsmodels @ file:///C:/b/abs_bdqo3zaryj/croot/statsmodels_1676646249859/work
320 | sumolib==1.17.0
321 | sympy @ file:///C:/b/abs_95fbf1z7n6/croot/sympy_1668202411612/work
322 | tables==3.7.0
323 | tabulate @ file:///C:/ci/tabulate_1657600805799/work
324 | TBB==0.2
325 | tblib @ file:///Users/ktietz/demo/mc3/conda-bld/tblib_1629402031467/work
326 | tenacity @ file:///C:/Windows/TEMP/abs_980d07a6-8e21-4174-9c17-7296219678ads7dhdov_/croots/recipe/tenacity_1657899108023/work
327 | tensorboard==2.12.3
328 | tensorboard-data-server==0.7.0
329 | tensorboardX==2.6
330 | tensordict==0.1.2
331 | tensorflow==2.12.0
332 | tensorflow-estimator==2.12.0
333 | tensorflow-intel==2.12.0
334 | tensorflow-io-gcs-filesystem==0.31.0
335 | tensorflow-probability==0.20.1
336 | tensorlayer==2.2.5
337 | termcolor==2.3.0
338 | terminado @ file:///C:/b/abs_25nakickad/croot/terminado_1671751845491/work
339 | text-unidecode @ file:///Users/ktietz/demo/mc3/conda-bld/text-unidecode_1629401354553/work
340 | textdistance @ file:///tmp/build/80754af9/textdistance_1612461398012/work
341 | threadpoolctl @ file:///Users/ktietz/demo/mc3/conda-bld/threadpoolctl_1629802263681/work
342 | three-merge @ file:///tmp/build/80754af9/three-merge_1607553261110/work
343 | tifffile @ file:///tmp/build/80754af9/tifffile_1627275862826/work
344 | tinycss2 @ file:///C:/b/abs_52w5vfuaax/croot/tinycss2_1668168823131/work
345 | tldextract @ file:///opt/conda/conda-bld/tldextract_1646638314385/work
346 | tokenizers @ file:///C:/ci/tokenizers_1651821358528/work
347 | toml @ file:///tmp/build/80754af9/toml_1616166611790/work
348 | tomli @ file:///C:/Windows/TEMP/abs_ac109f85-a7b3-4b4d-bcfd-52622eceddf0hy332ojo/croots/recipe/tomli_1657175513137/work
349 | tomlkit @ file:///C:/Windows/TEMP/abs_3296qo9v6b/croots/recipe/tomlkit_1658946894808/work
350 | toolz @ file:///C:/b/abs_cfvk6rc40d/croot/toolz_1667464080130/work
351 | torch==2.0.1
352 | torchrl==0.1.1
353 | torchvision==0.15.2
354 | tornado @ file:///C:/ci_310/tornado_1642093111997/work
355 | tqdm @ file:///C:/b/abs_0axbz66qik/croots/recipe/tqdm_1664392691071/work
356 | traci==1.17.0
357 | traitlets @ file:///C:/b/abs_e5m_xjjl94/croot/traitlets_1671143896266/work
358 | transformers @ file:///C:/b/abs_8byf5_j714/croot/transformers_1667919454001/work
359 | Twisted @ file:///C:/Windows/Temp/abs_ccblv2rzfa/croots/recipe/twisted_1659592764512/work
360 | twisted-iocpsupport @ file:///C:/ci/twisted-iocpsupport_1646817083730/work
361 | typing_extensions @ file:///C:/b/abs_89eui86zuq/croot/typing_extensions_1669923792806/work
362 | ujson @ file:///C:/ci/ujson_1657525893897/work
363 | Unidecode @ file:///tmp/build/80754af9/unidecode_1614712377438/work
364 | urllib3 @ file:///C:/b/abs_9bcwxczrvm/croot/urllib3_1673575521331/work
365 | visdom==0.2.4
366 | w3lib @ file:///Users/ktietz/demo/mc3/conda-bld/w3lib_1629359764703/work
367 | wandb==0.15.3
368 | watchdog @ file:///C:/ci_310/watchdog_1642113443984/work
369 | wcwidth @ file:///Users/ktietz/demo/mc3/conda-bld/wcwidth_1629357192024/work
370 | webencodings==0.5.1
371 | websocket-client @ file:///C:/ci_310/websocket-client_1642093970919/work
372 | Werkzeug @ file:///C:/b/abs_17q5kgb8bo/croot/werkzeug_1671216014857/work
373 | whatthepatch @ file:///C:/Users/BUILDE~1/AppData/Local/Temp/abs_e7bihs8grh/croots/recipe/whatthepatch_1661796085215/work
374 | widgetsnbextension @ file:///C:/ci/widgetsnbextension_1645009839917/work
375 | win-inet-pton @ file:///C:/ci_310/win_inet_pton_1642658466512/work
376 | wincertstore==0.2
377 | wrapt @ file:///C:/Windows/Temp/abs_7c3dd407-1390-477a-b542-fd15df6a24085_diwiza/croots/recipe/wrapt_1657814452175/work
378 | xarray @ file:///C:/b/abs_2fi_umrauo/croot/xarray_1668776806973/work
379 | xlwings @ file:///C:/b/abs_1ejhh6s00l/croot/xlwings_1677024180629/work
380 | yapf @ file:///tmp/build/80754af9/yapf_1615749224965/work
381 | zict==2.1.0
382 | zipp @ file:///C:/b/abs_b9jfdr908q/croot/zipp_1672387552360/work
383 | zope.interface @ file:///C:/ci_310/zope.interface_1642113633904/work
384 | zstandard==0.19.0
385 | 


--------------------------------------------------------------------------------
/algorithm/MA-SAC_main.py:
--------------------------------------------------------------------------------
  1 |  # -*- coding: utf-8 -*-
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
  6 | import numpy as np
  7 | import math
  8 | # import gym
  9 | import sympy
 10 | from matplotlib import pyplot as plt
 11 | from scipy.io import loadmat
 12 | import os
 13 | os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'
 14 | # env = gym.make("LargeGridWorld-v0").unwrapped
 15 | state_number=2
 16 | action_number=10 #9
 17 | max_action = 1
 18 | min_action = -1
 19 | RENDER=False
 20 | EP_MAX = 500
 21 | EP_LEN = 1000
 22 | GAMMA = 0.9
 23 | q_lr = 5e-5#3e-4
 24 | value_lr = 5e-4#3e-3
 25 | policy_lr = 1.5e-4#3
 26 | BATCH = 128
 27 | tau = 1e-2
 28 | MemoryCapacity=20000
 29 | Switch=0
 30 | n_width=100
 31 | n_height = 100
 32 | m = loadmat("mapdata_0717.mat") 
 33 | #correct_action=0
 34 | MARK= m["MARK_new"]
 35 | PL_AP=m["MARK_PL_real"]
 36 | 
 37 | 
 38 | class ActorNet(nn.Module):
 39 |     def __init__(self,inp,outp):
 40 |         super(ActorNet, self).__init__()
 41 |         self.in_to_y1=nn.Linear(inp,256)
 42 |         self.in_to_y1.weight.data.normal_(0,0.1)
 43 |         self.y1_to_y2=nn.Linear(256,256)
 44 |         self.y1_to_y2.weight.data.normal_(0,0.1)
 45 |         self.out=nn.Linear(256,outp)
 46 |         self.out.weight.data.normal_(0,0.1)
 47 |         self.std_out = nn.Linear(256, outp)
 48 |         self.std_out.weight.data.normal_(0, 0.1)
 49 | 
 50 |     def forward(self,inputstate):
 51 |         inputstate=self.in_to_y1(inputstate)
 52 |         inputstate=F.relu(inputstate)
 53 |         inputstate=self.y1_to_y2(inputstate)
 54 |         inputstate=F.relu(inputstate)
 55 |         mean=max_action*torch.tanh(self.out(inputstate))#输出概率分布的均值mean
 56 |         log_std=self.std_out(inputstate)#softplus激活函数的值域>0
 57 |         log_std=torch.clamp(log_std,-20,2)
 58 |         std=log_std.exp()
 59 |         return mean,std
 60 | 
 61 | class CriticNet(nn.Module):
 62 |     def __init__(self,input,output):
 63 |         super(CriticNet, self).__init__()
 64 |         #q1
 65 |         self.in_to_y1=nn.Linear(input+output,256)
 66 |         self.in_to_y1.weight.data.normal_(0,0.1)
 67 |         self.y1_to_y2=nn.Linear(256,256)
 68 |         self.y1_to_y2.weight.data.normal_(0,0.1)
 69 |         self.out=nn.Linear(256,1)
 70 |         self.out.weight.data.normal_(0,0.1)
 71 |         #q2
 72 |         self.q2_in_to_y1 = nn.Linear(input+output, 256)
 73 |         self.q2_in_to_y1.weight.data.normal_(0, 0.1)
 74 |         self.q2_y1_to_y2 = nn.Linear(256, 256)
 75 |         self.q2_y1_to_y2.weight.data.normal_(0, 0.1)
 76 |         self.q2_out = nn.Linear(256, 1)
 77 |         self.q2_out.weight.data.normal_(0, 0.1)
 78 |     def forward(self,s,a):
 79 |         inputstate = torch.cat((s, a), dim=1)
 80 |         #q1
 81 |         q1=self.in_to_y1(inputstate)
 82 |         q1=F.relu(q1)
 83 |         q1=self.y1_to_y2(q1)
 84 |         q1=F.relu(q1)
 85 |         q1=self.out(q1)
 86 |         #q2
 87 |         q2 = self.in_to_y1(inputstate)
 88 |         q2 = F.relu(q2)
 89 |         q2 = self.y1_to_y2(q2)
 90 |         q2 = F.relu(q2)
 91 |         q2 = self.out(q2)
 92 |         return q1,q2
 93 | 
 94 | class Memory():
 95 |     def __init__(self,capacity,dims,type_m):
 96 |         self.capacity=capacity
 97 |         self.mem=np.zeros((capacity,dims))
 98 |         self.memory_counter=0
 99 |         self.type_m=type_m
100 |     '''存储记忆'''
101 |     def store_transition(self,s,a,r,s_):
102 |         if self.type_m==1:
103 |             tran = np.hstack((s, [a[0],a[1],a[2],a[3],a[4],a[5],a[6],a[7],a[8],a[9],r], s_))  # 把s,a,r,s_困在一起，水平拼接
104 |         else:
105 |             tran = np.hstack((s, [a[0],r], s_))  # 把s,a,r,s_困在一起，水平拼接
106 | 
107 |         index = self.memory_counter % self.capacity#除余得索引
108 |         self.mem[index, :] = tran  # 给索引存值，第index行所有列都为其中一次的s,a,r,s_；mem会是一个capacity行，（s+a+r+s_）列的数组
109 |         self.memory_counter+=1
110 |     '''随机从记忆库里抽取'''
111 |     def sample(self,n):
112 |         assert self.memory_counter>=self.capacity,'记忆库没有存满记忆'
113 |         sample_index = np.random.choice(self.capacity, n)#从capacity个记忆里随机抽取n个为一批，可得到抽样后的索引号
114 |         new_mem = self.mem[sample_index, :]#由抽样得到的索引号在所有的capacity个记忆中  得到记忆s，a，r，s_
115 |         return new_mem
116 | class Actor():
117 |     def __init__(self):
118 |         self.action_net=ActorNet(state_number,action_number)#这只是均值mean
119 |         self.optimizer=torch.optim.Adam(self.action_net.parameters(),lr=policy_lr)
120 | 
121 |     def choose_action(self,s):
122 |         inputstate = torch.FloatTensor(s)
123 |         mean,std=self.action_net(inputstate)
124 |         dist = torch.distributions.Normal(mean, std)
125 |         action=dist.sample()
126 |         action=torch.clamp(action,min_action,max_action)
127 |         return action.detach().numpy()
128 |     def evaluate(self,s):
129 |         inputstate = torch.FloatTensor(s)
130 |         mean,std=self.action_net(inputstate)
131 |         dist = torch.distributions.Normal(mean, std)
132 |         noise = torch.distributions.Normal(0, 1)
133 |         z = noise.sample()
134 |         action=torch.tanh(mean+std*z)
135 |         action=torch.clamp(action,min_action,max_action)
136 |         action_logprob=dist.log_prob(mean+std*z)-torch.log(1-action.pow(2)+1e-6)
137 |         return action,action_logprob,z,mean,std
138 | 
139 |     def learn(self,actor_loss):
140 |         loss=actor_loss
141 |         self.optimizer.zero_grad()
142 |         loss.backward()
143 |         self.optimizer.step()
144 | 
145 | class Entroy():
146 |     def __init__(self):
147 |         self.target_entropy = -action_number
148 |         self.log_alpha = torch.zeros(1, requires_grad=True)
149 |         self.alpha = self.log_alpha.exp()
150 |         self.optimizer = torch.optim.Adam([self.log_alpha], lr=q_lr)
151 | 
152 |     def learn(self,entroy_loss):
153 |         loss=entroy_loss
154 |         self.optimizer.zero_grad()
155 |         loss.backward()
156 |         self.optimizer.step()
157 | 
158 | class Critic():
159 |     def __init__(self):
160 |         self.critic_v,self.target_critic_v=CriticNet(state_number,action_number),CriticNet(state_number,action_number)#改网络输入状态，生成一个Q值
161 |         self.optimizer = torch.optim.Adam(self.critic_v.parameters(), lr=value_lr,eps=1e-5)
162 |         self.lossfunc = nn.MSELoss()
163 |     def soft_update(self):
164 |         for target_param, param in zip(self.target_critic_v.parameters(), self.critic_v.parameters()):
165 |             target_param.data.copy_(target_param.data * (1.0 - tau) + param.data * tau)
166 | 
167 |     def get_v(self,s,a):
168 |         return self.critic_v(s,a)
169 | 
170 |     def learn(self,current_q1,current_q2,target_q):
171 |         loss = self.lossfunc(current_q1, target_q) + self.lossfunc(current_q2, target_q)
172 |         self.optimizer.zero_grad()
173 |         loss.backward()
174 |         self.optimizer.step()
175 | 
176 | def cosVector(x,y):
177 |     result1=0.0;
178 |     result2=0.0;
179 |     result3=0.0;
180 |     for i in range(len(x)):
181 |         result1+=x[i]*y[i]   #sum(X*Y)
182 |         result2+=x[i]**2     #sum(X*X)
183 |         result3+=y[i]**2     #sum(Y*Y)
184 |     return result1/((result2*result3)**0.5)
185 | 
186 | 
187 | if Switch==0:
188 |     print('SAC训练中...')
189 |     actor = Actor()
190 |     critic = Critic()
191 |     entroy=Entroy()
192 |     M = Memory(MemoryCapacity, 2 * state_number + action_number + 1,1)
193 |     all_ep_r = []
194 |     # actor2 = Actor()
195 |     # critic2 = Critic()
196 |     # entroy2=Entroy()
197 |     # M2 = Memory(MemoryCapacity, 2 * state_number + action_number + 1,1)
198 |     # all_ep_r2 = []
199 |     # actor3 = Actor()
200 |     # critic3 = Critic()
201 |     # entroy3=Entroy()
202 |     # M3 = Memory(MemoryCapacity, 2 * state_number + action_number + 1,1)
203 |     # all_ep_r3 = []
204 |     # state_number=6
205 |     # action_number=1 #9
206 |     # actor4 = Actor()
207 |     # critic4 = Critic()
208 |     # entroy4=Entroy()
209 |     # M4 = Memory(MemoryCapacity, 2 * state_number + 1+ 1,2)
210 |     # all_ep_r4 = []
211 |     # state_number=2
212 |     # action_number=9 #9
213 |     # end_location = [26*2,18*2] #8*2,9*2[]
214 |     
215 |     end_location = [15*2,32*2]
216 |     end_location2 = [45*2,45*2]
217 |     end_location3 = [47,38*2]
218 |     D=100
219 |     m_d=100
220 |     lambda_q=10
221 | 
222 |     for episode in range(EP_MAX):
223 |         observation = np.array([4*2,5*2], dtype=np.float32)  # 环境重置
224 |         # observation2 = np.array([20*2,20*2], dtype=np.float32)
225 |         # observation3 = np.array([40*2,10*2], dtype=np.float32)
226 |         observation_su1 = np.array([17, 25*2], dtype=np.float32)
227 |         observation_su2 = np.array([50, 25*2], dtype=np.float32)
228 |         observation_su3 = np.array([84, 25*2], dtype=np.float32)
229 |         # observation4 = np.array([observation[0], observation[1], observation2[0], observation2[1], observation3[0], observation3[1]])
230 | 
231 |         reward_totle = 0
232 |         reward_totle2 = 0
233 |         reward_totle3 = 0
234 |         reward_totle4 = 0
235 |         done1=False
236 |         done2=False
237 |         done3=False
238 |         bobao=0
239 |         bobao2=0
240 |         bobao3=0
241 |         for timestep in range(EP_LEN):
242 |             # if RENDER:
243 |             #     env.render()
244 |             action = actor.choose_action(observation)
245 |             # action2 = actor2.choose_action(observation2)
246 |             # action3 = actor3.choose_action(observation3)
247 |             # action4 = actor4.choose_action(observation4)
248 |             if not done1:
249 |                 [old_x, old_y] = observation
250 |                 new_x, new_y = int(old_x), int(old_y)
251 |                 new_x=int(old_x+action[0])
252 |                 new_y=int(old_y+action[1])
253 |                 if int(new_x) <= 0: 
254 |                     new_x = 1
255 |                 if int(new_x) >= n_width: 
256 |                     new_x = int(n_width)-1
257 |                 if int(new_y) <= 0: 
258 |                     new_y = 1
259 |                 if int(new_y) >= n_height: 
260 |                     new_y = int(n_height)-1
261 |                 if MARK[new_x,new_y] == 2:
262 |                     new_x, new_y = old_x, old_y
263 |                 observation_=np.array([new_x, new_y], dtype=np.float32)
264 |             else:
265 |                 observation_ = observation
266 |             # if not done2:
267 |             #     [old_x, old_y] = observation2
268 |             #     new_x, new_y = int(old_x), int(old_y)
269 |             #     new_x=int(old_x+action2[0])
270 |             #     new_y=int(old_y+action2[1])
271 |             #     if int(new_x) <= 0: 
272 |             #         new_x = 1 
273 |             #     if int(new_x) >= n_width: 
274 |             #         new_x = int(n_width)-1
275 |             #     if int(new_y) <= 0: 
276 |             #         new_y = 1
277 |             #     if int(new_y) >= n_height: 
278 |             #         new_y = int(n_height)-1
279 |             #     if MARK[new_x,new_y] == 2:
280 |             #         new_x, new_y = old_x, old_y
281 |             #     observation2_=np.array([new_x, new_y], dtype=np.float32)
282 |             # else:
283 |             #     observation2_ = observation2
284 |             # if not done3:
285 |             #     [old_x, old_y] = observation3
286 |             #     new_x, new_y = int(old_x), int(old_y)
287 |             #     new_x=int(old_x+action3[0])
288 |             #     new_y=int(old_y+action3[1])
289 |             #     if int(new_x) <= 0: 
290 |             #         new_x = 1 
291 |             #     if int(new_x) >= n_width: 
292 |             #         new_x = int(n_width)-1
293 |             #     if int(new_y) <= 0: 
294 |             #         new_y = 1
295 |             #     if int(new_y) >= n_height: 
296 |             #         new_y = int(n_height)-1
297 |             #     if MARK[new_x,new_y] == 2:
298 |             #         new_x, new_y = old_x, old_y
299 |             #     observation3_=np.array([new_x, new_y], dtype=np.float32)
300 |             # else:
301 |             #     observation3_ = observation3
302 |             # observation_ = env.step(observation, 1, action)  # 单步交互
303 |             # observation4_ = np.array([observation_[0], observation_[1], observation2_[0], observation2_[1], observation3_[0], observation3_[1]])
304 |             # state7_ = np.array([state_[0], state_[1], state2_[0], state2_[1], state3_[0], state3_[1]])
305 |             # done_sys = done1 and done2 and done3
306 |             
307 |             if action[8]==-1:
308 |                 action[8]=-0.9999999
309 |             # if action2[8]==-1:
310 |             #     action2[8]=-0.9999999
311 |             # if action3[8]==-1:
312 |             #     action3[8]=-0.9999999
313 |             if action[8]==1:
314 |                 action[8]=0.9999999
315 |             # if action2[8]==1:
316 |             #     action2[8]=0.9999999
317 |             # if action3[8]==1:
318 |             #     action3[8]=0.9999999
319 | 
320 |             w_1=np.array([action[2]* math.exp(1)**(1j*(1+action[3])*math.pi), action[4]* math.exp(1)**(1j*(1+action[5])*math.pi), action[6]* math.exp(1)**(1j*(1+action[7])*math.pi)])
321 |             # w_2=np.array([action2[2]* math.exp(1)**(1j*(1+action2[3])*math.pi), action2[4]* math.exp(1)**(1j*(1+action2[5])*math.pi), action2[6]* math.exp(1)**(1j*(1+action2[7])*math.pi)])
322 |             # w_3=np.array([action3[2]* math.exp(1)**(1j*(1+action3[3])*math.pi), action3[4]* math.exp(1)**(1j*(1+action3[5])*math.pi), action3[6]* math.exp(1)**(1j*(1+action3[7])*math.pi)])
323 |             theta_1=cosVector([1,0,0],[observation_[0]-50,observation_[1]-100, 1-2])
324 |             a_1=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_1)), math.exp(1)**(-2*1j*(math.pi*theta_1))])#
325 |             b_1_AP_LOS=math.sqrt(PL_AP[int(observation_[0]), int(observation_[1])])
326 |             h_1=b_1_AP_LOS*a_1
327 |             interference_1=10**(-9)
328 |             # theta_2=cosVector([1,0,0],[observation2_[0]-50,observation2_[1]-100, 1-2])
329 |             # a_2=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_2)), math.exp(1)**(-2*1j*(math.pi*theta_2))])#
330 |             # b_2_AP_LOS=math.sqrt(PL_AP[int(observation2_[0]), int(observation2_[1])])
331 |             # h_2=b_2_AP_LOS*a_2
332 |             # interference_2=10**(-9)
333 |             # theta_3=cosVector([1,0,0],[observation3_[0]-50,observation3_[1]-100, 1-2])
334 |             # a_3=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_3)), math.exp(1)**(-2*1j*(math.pi*theta_3))])#
335 |             # b_3_AP_LOS=math.sqrt(PL_AP[int(observation3_[0]), int(observation3_[1])])
336 |             # h_3=b_3_AP_LOS*a_3
337 |             # interference_3=10**(-9)
338 |             theta_4=cosVector([1,0,0],[observation_su1[0]-50,observation_su1[1]-100, 1-2])
339 |             a_4=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_4)), math.exp(1)**(-2*1j*(math.pi*theta_4))])#
340 |             b_4_AP_LOS=math.sqrt(PL_AP[int(observation_su1[0]), int(observation_su1[1])])
341 |             h_4=b_4_AP_LOS*a_4
342 |             interference_4=10**(-9)
343 |             theta_5=cosVector([1,0,0],[observation_su2[0]-50,observation_su2[1]-100, 1-2])
344 |             a_5=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_5)), math.exp(1)**(-2*1j*(math.pi*theta_5))])#
345 |             b_5_AP_LOS=math.sqrt(PL_AP[int(observation_su2[0]), int(observation_su2[1])])
346 |             h_5=b_5_AP_LOS*a_5
347 |             interference_5=10**(-9)
348 |             theta_6=cosVector([1,0,0],[observation_su3[0]-50,observation_su3[1]-100, 1-2])
349 |             a_6=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_6)), math.exp(1)**(-2*1j*(math.pi*theta_6))])#
350 |             b_6_AP_LOS=math.sqrt(PL_AP[int(observation_su3[0]), int(observation_su3[1])])
351 |             h_6=b_6_AP_LOS*a_6
352 |             interference_6=10**(-9)
353 |             if action[8]>0:
354 |                 interference_1+=(1-(action[8]+1)/2)*(np.linalg.norm(h_1*w_1))**2
355 |             else:
356 |                 interference_4+=((action[8]+1)/2)*(np.linalg.norm(h_4*w_1))**2
357 |             # if action2[8]>0.5:
358 |             #     interference_2+=(1-(action2[8]+1)/2)*(np.linalg.norm(h_2*w_2))**2
359 |             # else:
360 |             #     interference_5+=((action2[8]+1)/2)*(np.linalg.norm(h_5*w_2))**2
361 |             # if action3[8]>0.5:
362 |             #     interference_3+=(1-(action3[8]+1)/2)*(np.linalg.norm(h_3*w_3))**2
363 |             # else:
364 |             #     interference_6+=((action3[8]+1)/2)*(np.linalg.norm(h_6*w_3))**2
365 |             SINR_1=((action[8]+1)/2)*(np.linalg.norm(h_1*w_1))**2/interference_1
366 |             # SINR_2=((action2[8]+1)/2)*(np.linalg.norm(h_2*w_2))**2/interference_2
367 |             # SINR_3=((action3[8]+1)/2)*(np.linalg.norm(h_3*w_3))**2/interference_3
368 |             SINR_4=(1-(action[8]+1)/2)*(np.linalg.norm(h_4*w_1))**2/interference_4
369 |             # SINR_5=(1-(action2[8]+1)/2)*(np.linalg.norm(h_5*w_2))**2/interference_5
370 |             # SINR_6=(1-(action3[8]+1)/2)*(np.linalg.norm(h_6*w_3))**2/interference_6
371 |             # calculate reward
372 |             # V_sinr_1=1-(1+SINR_1)**(-2)
373 |             # # integrate(x**2, (x, 1, 2))
374 |             # f_x=math.log(2)*math.sqrt(m_d/V_sinr_1)*(math.log(1+SINR_1, 2)-D/m_d)
375 |             # x=sympy.Symbol('x')
376 |             # f = sympy.exp(-x**2/2)
377 |             # epsilon_d_1=1/(math.sqrt(2*math.pi))*(sympy.integrate(f, (x, f_x, sympy.oo)))
378 |             # # print(epsilon_d_1)
379 |             # #sympy.integrate()*math.exp(1)**(-u_var**2)/2, (u_var, f_x, float('inf')))
380 |             # #ue 2
381 |             # V_sinr_2=1-(1+SINR_2)**(-2)
382 |             # f_x=math.log(2)*math.sqrt(m_d/V_sinr_2)*(math.log(1+SINR_2, 2)-D/m_d)
383 |             # x=sympy.Symbol('x')
384 |             # f = sympy.exp(-x**2/2)
385 |             # epsilon_d_2=1/(math.sqrt(2*math.pi))*(sympy.integrate(f, (x, f_x, sympy.oo)))
386 |             # #ue 3
387 |             # V_sinr_3=1-(1+SINR_3)**(-2)
388 |             # f_x=math.log(2)*math.sqrt(m_d/V_sinr_3)*(math.log(1+SINR_3, 2)-D/m_d)
389 |             # x=sympy.Symbol('x')
390 |             # f = sympy.exp(-x**2/2)
391 |             # epsilon_d_3=1/(math.sqrt(2*math.pi))*(sympy.integrate(f, (x, f_x, sympy.oo)))
392 |             # #ue 4
393 |             # V_sinr_4=1-(1+SINR_4)**(-2)
394 |             # f_x=math.log(2)*math.sqrt(m_d/V_sinr_4)*(math.log(1+SINR_4, 2)-D/m_d)
395 |             # x=sympy.Symbol('x')
396 |             # f = sympy.exp(-x**2/2)
397 |             # epsilon_d_4=1/(math.sqrt(2*math.pi))*(sympy.integrate(f, (x, f_x, sympy.oo)))
398 |             # #ue 4
399 |             # V_sinr_5=1-(1+SINR_5)**(-2)
400 |             # f_x=math.log(2)*math.sqrt(m_d/V_sinr_5)*(math.log(1+SINR_5, 2)-D/m_d)
401 |             # x=sympy.Symbol('x')
402 |             # f = sympy.exp(-x**2/2)
403 |             # epsilon_d_5=1/(math.sqrt(2*math.pi))*(sympy.integrate(f, (x, f_x, sympy.oo)))
404 |             # V_sinr_6=1-(1+SINR_6)**(-2)
405 |             # f_x=math.log(2)*math.sqrt(m_d/V_sinr_6)*(math.log(1+SINR_6, 2)-D/m_d)
406 |             # x=sympy.Symbol('x')
407 |             # f = sympy.exp(-x**2/2)
408 |             # epsilon_d_6=1/(math.sqrt(2*math.pi))*(sympy.integrate(f, (x, f_x, sympy.oo)))
409 |             
410 | 
411 | 
412 |             distance_01_2=(observation_[0]-end_location[0])*(observation_[0]-end_location[0])/4+(observation_[1]-end_location[1])*(observation_[1]-end_location[1])/4
413 |             distance_01 = math.sqrt(distance_01_2)
414 |             #print(distance_01)
415 |             # if epsilon_d_1<10**(-14):
416 |             #     epsilon_d_1=10**(-14)
417 |             reward = -(distance_01/50)+max(0.01, min(SINR_1, SINR_4)/1000)-0.01
418 |             #reward = -1
419 |             #reward=0
420 |             if distance_01==0:
421 |                 reward = 1
422 |             if not done1:
423 |                 reward_totle += reward
424 |             # if epsilon_d_2<10**(-14):
425 |             #     epsilon_d_2=10**(-14)
426 |             # distance_02_2=(observation2_[0]-end_location2[0])*(observation2_[0]-end_location2[0])/4+(observation2_[1]-end_location2[1])*(observation2_[1]-end_location2[1])/4
427 |             # distance_02 = math.sqrt(distance_02_2)
428 |             # reward2 = -(distance_02/50)#+max(0.2, min(SINR_2, SINR_5)/50)
429 |             # if distance_02==0:
430 |             #     reward2 = 1
431 |             # if not done2:
432 |             #     reward_totle2 += reward2
433 |             # distance_03_2=(observation3_[0]-end_location3[0])*(observation3_[0]-end_location3[0])/4+(observation3_[1]-end_location3[1])*(observation3_[1]-end_location3[1])/4
434 |             # distance_03 = math.sqrt(distance_03_2)
435 |             # # if epsilon_d_3<10**(-14):
436 |             # #     epsilon_d_3=10**(-14)
437 |             # reward3 = -(distance_03/50)#+max(0.2, min(SINR_3, SINR_6)/50)
438 |             # if distance_03==0:
439 |             #     reward3 = 1
440 |             # if not done3:
441 |             #     reward_totle3 += reward3
442 |             
443 |             # reward4=(reward+reward2+reward3)/3
444 |             
445 |             
446 |             # distance_01_2=(observation_[0]-end_location[0])*(observation_[0]-end_location[0])/4+(observation_[1]-end_location[1])*(observation_[1]-end_location[1])/4
447 |             # distance_01 = math.sqrt(distance_01_2)
448 |             # reward= -(distance_01/10)
449 |             # if distance_01==0:
450 |             #     done1 = True
451 |             #     #os.system("pause")
452 |             #     reward=10
453 |             #print(observation, action, observation_)
454 |             M.store_transition(observation, action, reward, observation_)
455 |             # M2.store_transition(observation2, action2, reward2, observation2_)
456 |             # M3.store_transition(observation3, action3, reward3, observation3_)
457 |             # M4.store_transition(observation4, action4, reward4, observation4_)
458 | 
459 | 
460 |             # 记忆库存储
461 |             # 有的2000个存储数据就开始学习
462 |             if M.memory_counter > MemoryCapacity and not done1:
463 |                 b_M = M.sample(BATCH)
464 |                 b_s = b_M[:, :state_number]
465 |                 b_a = b_M[:, state_number: state_number + action_number]
466 |                 b_r = b_M[:, -state_number - 1: -state_number]
467 |                 b_s_ = b_M[:, -state_number:]
468 |                 b_s = torch.FloatTensor(b_s)
469 |                 b_a = torch.FloatTensor(b_a)
470 |                 b_r = torch.FloatTensor(b_r)
471 |                 b_s_ = torch.FloatTensor(b_s_)
472 |                 new_action, log_prob_, z, mean, log_std = actor.evaluate(b_s_)
473 |                 target_q1,target_q2=critic.get_v(b_s_,new_action)
474 |                 target_q=b_r+GAMMA*(torch.min(target_q1,target_q2)-entroy.alpha*log_prob_)
475 |                 current_q1, current_q2 = critic.get_v(b_s, b_a)
476 |                 critic.learn(current_q1,current_q2,target_q.detach())
477 |                 a,log_prob,_,_,_=actor.evaluate(b_s)
478 |                 q1,q2=critic.get_v(b_s,a)
479 |                 q=torch.min(q1,q2)
480 |                 actor_loss = (entroy.alpha * log_prob - q).mean()
481 |                 actor.learn(actor_loss)
482 |                 alpha_loss = -(entroy.log_alpha.exp() * (log_prob + entroy.target_entropy).detach()).mean()
483 |                 entroy.learn(alpha_loss)
484 |                 entroy.alpha=entroy.log_alpha.exp()
485 |                 # 软更新
486 |                 critic.soft_update()
487 |             observation = observation_
488 |             # reward_totle += reward
489 |             if distance_01==0:
490 |                 done1=True
491 |                 # print("arrive success!!!!!!!!!!!!!!")
492 |             # if M2.memory_counter > MemoryCapacity and not done2:
493 |             #     b_M = M2.sample(BATCH)
494 |             #     b_s = b_M[:, :state_number]
495 |             #     b_a = b_M[:, state_number: state_number + action_number]
496 |             #     b_r = b_M[:, -state_number - 1: -state_number]
497 |             #     b_s_ = b_M[:, -state_number:]
498 |             #     b_s = torch.FloatTensor(b_s)
499 |             #     b_a = torch.FloatTensor(b_a)
500 |             #     b_r = torch.FloatTensor(b_r)
501 |             #     b_s_ = torch.FloatTensor(b_s_)
502 |             #     new_action, log_prob_, z, mean, log_std = actor2.evaluate(b_s_)
503 |             #     target_q1,target_q2=critic2.get_v(b_s_,new_action)
504 |             #     target_q=b_r+GAMMA*(torch.min(target_q1,target_q2)-entroy2.alpha*log_prob_)
505 |             #     current_q1, current_q2 = critic2.get_v(b_s, b_a)
506 |             #     critic2.learn(current_q1,current_q2,target_q.detach())
507 |             #     a,log_prob,_,_,_=actor2.evaluate(b_s)
508 |             #     q1,q2=critic2.get_v(b_s,a)
509 |             #     q=torch.min(q1,q2)
510 |             #     actor_loss = (entroy2.alpha * log_prob - q).mean()
511 |             #     actor2.learn(actor_loss)
512 |             #     alpha_loss = -(entroy2.log_alpha.exp() * (log_prob + entroy2.target_entropy).detach()).mean()
513 |             #     entroy2.learn(alpha_loss)
514 |             #     entroy2.alpha=entroy2.log_alpha.exp()
515 |             #     # 软更新
516 |             #     critic2.soft_update()
517 |             # observation2 = observation2_
518 |             # # reward_totle2 += reward2
519 |             # if distance_02==0:
520 |             #     done2=True
521 |             #     # print("arrive success 2 !!!!!!!!!!!!!!")
522 |             # if M3.memory_counter > MemoryCapacity and not done3:
523 |             #     b_M = M3.sample(BATCH)
524 |             #     b_s = b_M[:, :state_number]
525 |             #     b_a = b_M[:, state_number: state_number + action_number]
526 |             #     b_r = b_M[:, -state_number - 1: -state_number]
527 |             #     b_s_ = b_M[:, -state_number:]
528 |             #     b_s = torch.FloatTensor(b_s)
529 |             #     b_a = torch.FloatTensor(b_a)
530 |             #     b_r = torch.FloatTensor(b_r)
531 |             #     b_s_ = torch.FloatTensor(b_s_)
532 |             #     new_action, log_prob_, z, mean, log_std = actor3.evaluate(b_s_)
533 |             #     target_q1,target_q3=critic3.get_v(b_s_,new_action)
534 |             #     target_q=b_r+GAMMA*(torch.min(target_q1,target_q3)-entroy3.alpha*log_prob_)
535 |             #     current_q1, current_q3 = critic3.get_v(b_s, b_a)
536 |             #     critic3.learn(current_q1,current_q3,target_q.detach())
537 |             #     a,log_prob,_,_,_=actor3.evaluate(b_s)
538 |             #     q1,q3=critic3.get_v(b_s,a)
539 |             #     q=torch.min(q1,q3)
540 |             #     actor_loss = (entroy3.alpha * log_prob - q).mean()
541 |             #     actor3.learn(actor_loss)
542 |             #     alpha_loss = -(entroy3.log_alpha.exp() * (log_prob + entroy3.target_entropy).detach()).mean()
543 |             #     entroy3.learn(alpha_loss)
544 |             #     entroy3.alpha=entroy3.log_alpha.exp()
545 |             #     # 软更新
546 |             #     critic3.soft_update()
547 |             # observation3 = observation3_
548 |             # # reward_totle += reward
549 |             # if distance_03==0:
550 |             #     done3=True
551 |                 # print("arrive success 3!!!!!!!!!!!!!!")
552 |             # state_number=6
553 |             # action_number=1 
554 |             # if M4.memory_counter > MemoryCapacity:
555 |             #     b_M = M4.sample(BATCH)
556 |             #     b_s = b_M[:, :state_number]
557 |             #     b_a = b_M[:, state_number: state_number + action_number]
558 |             #     b_r = b_M[:, -state_number - 1: -state_number]
559 |             #     b_s_ = b_M[:, -state_number:]
560 |             #     b_s = torch.FloatTensor(b_s)
561 |             #     b_a = torch.FloatTensor(b_a)
562 |             #     b_r = torch.FloatTensor(b_r)
563 |             #     b_s_ = torch.FloatTensor(b_s_)
564 |             #     new_action, log_prob_, z, mean, log_std = actor4.evaluate(b_s_)
565 |             #     target_q1,target_q4=critic4.get_v(b_s_,new_action)
566 |             #     target_q=b_r+GAMMA*(torch.min(target_q1,target_q4)-entroy4.alpha*log_prob_)
567 |             #     current_q1, current_q4 = critic4.get_v(b_s, b_a)
568 |             #     critic4.learn(current_q1,current_q4,target_q.detach())
569 |             #     a,log_prob,_,_,_=actor4.evaluate(b_s)
570 |             #     q1,q4=critic4.get_v(b_s,a)
571 |             #     q=torch.min(q1,q4)
572 |             #     actor_loss = (entroy4.alpha * log_prob - q).mean()
573 |             #     actor4.learn(actor_loss)
574 |             #     alpha_loss = -(entroy4.log_alpha.exp() * (log_prob + entroy4.target_entropy).detach()).mean()
575 |             #     entroy4.learn(alpha_loss)
576 |             #     entroy4.alpha=entroy4.log_alpha.exp()
577 |             #     # 软更新
578 |             #     critic4.soft_update()
579 |             # observation4 = observation4_
580 |             if done1:
581 |                 # print("arrive success!!!!!!!!!!!!!!")
582 |                 break
583 |         print("Ep: {} | rewards: {} {} {} {} | Step: {:.4f} | END: {}".format(episode, reward_totle, reward_totle2, reward_totle3, reward_totle4, timestep, observation))
584 |         # if reward_totle > -10: RENDER = True
585 |         all_ep_r.append(reward_totle)
586 |         # all_ep_r2.append(reward_totle2)
587 |         # all_ep_r3.append(reward_totle3)
588 |         # all_ep_r4.append(reward_totle4)
589 |         #if episode % 20 == 0 and episode > 200:#保存神经网络参数
590 |          #   save_data = {'net': actor.action_net.observation_dict(), 'opt': actor.optimizer.state_dict(), 'i': episode}
591 |             #torch.save(save_data, "C:\\Users\Administrator.DESKTOP-NLH290A\Desktop\2302_code\0606\model_SAC.pth")
592 |     # env.close()
593 |     plt.plot(np.arange(len(all_ep_r)), all_ep_r)
594 |     # plt.plot(np.arange(len(all_ep_r2)), all_ep_r2)
595 |     # plt.plot(np.arange(len(all_ep_r3)), all_ep_r3)
596 |     # plt.plot(np.arange(len(all_ep_r4)), all_ep_r4)
597 |     plt.xlabel('Episode')
598 |     plt.ylabel('Moving averaged episode reward')
599 |     plt.show()
600 | else:
601 |     print('SAC测试中...')
602 |     aa=Actor()
603 |     checkpoint_aa = torch.load("C:\\Users\Administrator.DESKTOP-NLH290A\Desktop\2302_code\0606\model_SAC.pth")
604 |     aa.action_net.load_state_dict(checkpoint_aa['net'])
605 |     for j in range(10):
606 |         # state = env.reset()
607 |         total_rewards = 0
608 |         for timestep in range(EP_LEN):
609 |             # env.render()
610 |             # action = aa.choose_action(state)
611 |             # new_state, reward, done, info = env.step(action)  # 执行动作
612 |             total_rewards += reward
613 |             # state = new_state
614 |         print("Score：", total_rewards)
615 |     # env.close()
616 | 


--------------------------------------------------------------------------------
/algorithm/MA-DDPG_main.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Deep Deterministic Policy Gradient (DDPG)
  3 | -----------------------------------------
  4 | An algorithm concurrently learns a Q-function and a policy.
  5 | It uses off-policy data and the Bellman equation to learn the Q-function,
  6 | and uses the Q-function to learn the policy.
  7 | Reference
  8 | ---------
  9 | Deterministic Policy Gradient Algorithms, Silver et al. 2014
 10 | Continuous Control With Deep Reinforcement Learning, Lillicrap et al. 2016
 11 | MorvanZhou's tutorial page: https://morvanzhou.github.io/tutorials/
 12 | Environment
 13 | -----------
 14 | Openai Gym Pendulum-v0, continual action space
 15 | Prerequisites
 16 | -------------
 17 | tensorflow >=2.0.0a0
 18 | tensorflow-proactionsbility 0.6.0
 19 | tensorlayer >=2.0.0
 20 | To run
 21 | ------
 22 | python tutorial_DDPG.py --train/test
 23 | """
 24 | 
 25 | import argparse
 26 | import os
 27 | import random
 28 | import time
 29 | import math
 30 | 
 31 | #import gym
 32 | import matplotlib.pyplot as plt
 33 | import numpy as np
 34 | import tensorflow as tf
 35 | from scipy.io import loadmat
 36 | 
 37 | import tensorlayer as tl
 38 | 
 39 | # add arguments in command  --train/test
 40 | parser = argparse.ArgumentParser(description='Train or test neural net motor controller.')
 41 | parser.add_argument('--train', dest='train', action='store_true', default=True)
 42 | parser.add_argument('--test', dest='test', action='store_true', default=False)
 43 | args = parser.parse_args()
 44 | 
 45 | #####################  hyper parameters  ####################
 46 | 
 47 | ENV_ID = 'LargeGridWorld-v0'  # environment id
 48 | RANDOM_SEED = 666  # random seed, can be either an int number or None
 49 | RENDER = False  # render while training
 50 | 
 51 | ALG_NAME = 'DDPG'
 52 | TRAIN_EPISODES = 500  # total number of episodes for training
 53 | TEST_EPISODES = 10  # total number of episodes for training
 54 | MAX_STEPS = 1000  # 20000total number of steps for each episode
 55 | 
 56 | LR_A = 0.001  # learning rate for actor
 57 | LR_C = 0.002  # learning rate for critic
 58 | GAMMA = 0.9  # reward discount
 59 | TAU = 0.01  # soft replacemen 
 60 | MEMORY_CAPACITY = 20000  # 500000size of replay buffer
 61 | BATCH_SIZE = 64  # update action batch size
 62 | VAR = 5  # control exploration
 63 | #var_real=VAR
 64 | ###############################  DDPG  ####################################
 65 | n_width=93
 66 | n_height = 93
 67 | m = loadmat("C://Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/mapdata_0717.mat") 
 68 | #correct_action=0
 69 | MARK= m["MARK_new"]
 70 | PL_AP=m["MARK_PL_real"]
 71 | 
 72 | 
 73 | class ReplayBuffer:
 74 |     """
 75 |     a ring buffer for storing transitions and sampling for training
 76 |     :state: (state_dim,)
 77 |     :action: (action_dim,)
 78 |     :reward: (,), scalar
 79 |     :next_state: (state_dim,)
 80 |     :done: (,), scalar (0 and 1) or bool (True and False)
 81 |     """
 82 | 
 83 |     def __init__(self, capacity):
 84 |         self.capacity = capacity
 85 |         self.buffer = []
 86 |         self.position = 0
 87 | 
 88 |     def push(self, state, action, reward, next_state, done):
 89 |         if len(self.buffer) < self.capacity:
 90 |             self.buffer.append(None)
 91 |         self.buffer[self.position] = (state, action, reward, next_state, done)
 92 |         self.position = int((self.position + 1) % self.capacity)  # as a ring buffer
 93 | 
 94 |     def sample(self, batch_size):
 95 |         batch = random.sample(self.buffer, batch_size)
 96 |         state, action, reward, next_state, done = map(np.stack, zip(*batch))  # stack for each element
 97 |         return state, action, reward, next_state, done
 98 | 
 99 |     def __len__(self):
100 |         return len(self.buffer)
101 | 
102 | def cosVector(x,y):
103 |     result1=0.0;
104 |     result2=0.0;
105 |     result3=0.0;
106 |     for i in range(len(x)):
107 |         result1+=x[i]*y[i]   #sum(X*Y)
108 |         result2+=x[i]**2     #sum(X*X)
109 |         result3+=y[i]**2     #sum(Y*Y)
110 |     return result1/((result2*result3)**0.5)
111 | 
112 | class DDPG(object):
113 |     """
114 |     DDPG class
115 |     """
116 |     def __init__(self, action_dim, state_dim, action_range, replay_buffer, agent_num=0):
117 |         self.replay_buffer = replay_buffer
118 |         self.action_dim, self.state_dim, self.action_range = action_dim, state_dim, action_range
119 |         self.var = VAR
120 |         self.agent_num=agent_num
121 | 
122 |         W_init = tf.random_normal_initializer(mean=0, stddev=0.3)
123 |         b_init = tf.constant_initializer(0.1)
124 | 
125 |         def get_actor(input_state_shape, name=str(self.agent_num)):
126 |             """
127 |             Build actor network
128 |             :param input_state_shape: state
129 |             :param name: name
130 |             :return: act
131 |             """
132 |             input_layer = tl.layers.Input(input_state_shape, name='A_input'+str(self.agent_num))
133 |             layer = tl.layers.Dense(n_units=64, act=tf.nn.relu, W_init=W_init, b_init=b_init, name=str(self.agent_num)+'A_l1')(input_layer)
134 |             layer = tl.layers.Dense(n_units=64, act=tf.nn.relu, W_init=W_init, b_init=b_init, name=str(self.agent_num)+'A_l2')(layer)
135 |             layer = tl.layers.Dense(n_units=action_dim, act=tf.nn.tanh, W_init=W_init, b_init=b_init, name=str(self.agent_num)+'A_a')(layer)
136 |             layer = tl.layers.Lambda(lambda x: action_range * x)(layer)
137 |             return tl.models.Model(inputs=input_layer, outputs=layer, name='Actor' + name)
138 | 
139 |         def get_critic(input_state_shape, input_action_shape, name=str(self.agent_num)):
140 |             """
141 |             Build critic network
142 |             :param input_state_shape: state
143 |             :param input_action_shape: act
144 |             :param name: name
145 |             :return: Q value Q(s,a)
146 |             """
147 |             state_input = tl.layers.Input(input_state_shape, name=str(self.agent_num)+'C_s_input')
148 |             action_input = tl.layers.Input(input_action_shape, name=str(self.agent_num)+'C_a_input')
149 |             layer = tl.layers.Concat(1)([state_input, action_input])
150 |             layer = tl.layers.Dense(n_units=64, act=tf.nn.relu, W_init=W_init, b_init=b_init, name=str(self.agent_num)+'C_l1')(layer)
151 |             layer = tl.layers.Dense(n_units=64, act=tf.nn.relu, W_init=W_init, b_init=b_init, name=str(self.agent_num)+'C_l2')(layer)
152 |             layer = tl.layers.Dense(n_units=1, W_init=W_init, b_init=b_init, name=str(self.agent_num)+'C_out')(layer)
153 |             return tl.models.Model(inputs=[state_input, action_input], outputs=layer, name='Critic' + name)
154 | 
155 |         self.actor = get_actor([None, state_dim])
156 |         self.critic = get_critic([None, state_dim], [None, action_dim])
157 |         self.actor.train()
158 |         self.critic.train()
159 | 
160 |         def copy_para(from_model, to_model):
161 |             """
162 |             Copy parameters for soft updating
163 |             :param from_model: latest model
164 |             :param to_model: target model
165 |             :return: None
166 |             """
167 |             for i, j in zip(from_model.trainable_weights, to_model.trainable_weights):
168 |                 j.assign(i)
169 | 
170 |         self.actor_target = get_actor([None, state_dim], name=str(self.agent_num)+'_target')
171 |         copy_para(self.actor, self.actor_target)
172 |         self.actor_target.eval()
173 | 
174 |         self.critic_target = get_critic([None, state_dim], [None, action_dim], name=str(self.agent_num)+'_target')
175 |         copy_para(self.critic, self.critic_target)
176 |         self.critic_target.eval()
177 | 
178 |         self.ema = tf.train.ExponentialMovingAverage(decay=1 - TAU)  # soft replacement
179 | 
180 |         self.actor_opt = tf.optimizers.Adam(LR_A)
181 |         self.critic_opt = tf.optimizers.Adam(LR_C)
182 | 
183 |     def ema_update(self):
184 |         """
185 |         Soft updating by exponential smoothing
186 |         :return: None
187 |         """
188 |         paras = self.actor.trainable_weights + self.critic.trainable_weights
189 |         self.ema.apply(paras)
190 |         for i, j in zip(self.actor_target.trainable_weights + self.critic_target.trainable_weights, paras):
191 |             i.assign(self.ema.average(j))
192 | 
193 |     def get_action(self, state, greedy=False):
194 |         """
195 |         Choose action
196 |         :param s: state
197 |         :param greedy: get action greedy or not
198 |         :return: act
199 |         """
200 |         action = self.actor(np.array([state]))[0]
201 |         if greedy:
202 |             return action
203 |             #return np.random.rand(len(action)).astype(np.float32)- action_range
204 |         return np.clip(
205 |             np.random.normal(action, self.var), -self.action_range, self.action_range
206 |         ).astype(np.float32)  # add randomness to action selection for exploration
207 | 
208 |     def learn(self, exact_var):
209 |         """，
210 |         Update parameters
211 |         :return: None
212 |         """
213 |         self.var = exact_var
214 |         #print(self.var)
215 |         states, actions, rewards, states_, done = self.replay_buffer.sample(BATCH_SIZE)
216 |         rewards = rewards[:, np.newaxis]
217 |         done = done[:, np.newaxis]
218 | 
219 |         with tf.GradientTape() as tape:
220 |             actions_ = self.actor_target(states_)
221 |             q_ = self.critic_target([states_, actions_])
222 |             target = rewards + (1 - done) * GAMMA * q_
223 |             q_pred = self.critic([states, actions])
224 |             td_error = tf.losses.mean_squared_error(target, q_pred)
225 |         critic_grads = tape.gradient(td_error, self.critic.trainable_weights)
226 |         self.critic_opt.apply_gradients(zip(critic_grads, self.critic.trainable_weights))
227 | 
228 |         with tf.GradientTape() as tape:
229 |             actions = self.actor(states)
230 |             q = self.critic([states, actions])
231 |             actor_loss = -tf.reduce_mean(q)  # maximize the q
232 |         actor_grads = tape.gradient(actor_loss, self.actor.trainable_weights)
233 |         self.actor_opt.apply_gradients(zip(actor_grads, self.actor.trainable_weights))
234 |         self.ema_update()
235 | 
236 | 
237 |     def save(self):
238 |         """
239 |         save trained weights
240 |         :return: None
241 |         """
242 |         path = os.path.join('model', '_'.join([ALG_NAME, ENV_ID]))
243 |         if not os.path.exists(path):
244 |             os.makedirs(path)
245 |         tl.files.save_weights_to_hdf5(os.path.join(path, 'actor.hdf5'), self.actor)
246 |         tl.files.save_weights_to_hdf5(os.path.join(path, 'actor_target.hdf5'), self.actor_target)
247 |         tl.files.save_weights_to_hdf5(os.path.join(path, 'critic.hdf5'), self.critic)
248 |         tl.files.save_weights_to_hdf5(os.path.join(path, 'critic_target.hdf5'), self.critic_target)
249 | 
250 |     def load(self):
251 |         """
252 |         load trained weights
253 |         :return: None
254 |         """
255 |         path = os.path.join('model', '_'.join([ALG_NAME, ENV_ID]))
256 |         tl.files.load_hdf5_to_weights_in_order(os.path.join(path, 'actor.hdf5'), self.actor)
257 |         tl.files.load_hdf5_to_weights_in_order(os.path.join(path, 'actor_target.hdf5'), self.actor_target)
258 |         tl.files.load_hdf5_to_weights_in_order(os.path.join(path, 'critic.hdf5'), self.critic)
259 |         tl.files.load_hdf5_to_weights_in_order(os.path.join(path, 'critic_target.hdf5'), self.critic_target)
260 | 
261 | 
262 | if __name__ == '__main__':
263 |     n_mu=3
264 |     n_M=5
265 |     n_o=6*7
266 |     
267 |     #env = gym.make(ENV_ID).unwrapped
268 |     #env = gym.make(ENV_ID).unwrapped
269 | 
270 |     # reproducible
271 |     # env.seed(RANDOM_SEED)
272 |     np.random.seed(RANDOM_SEED)
273 |     tf.random.set_seed(RANDOM_SEED)
274 | 
275 |     state_dim = 2
276 |     action_dim = 10
277 |     action_range = 1  # scale action, [-action_range, action_range]
278 |     action_range_su = np.array([1, 1, 1], dtype=np.float32)
279 |     action_range_ris = np.array([1]*(2*n_M+1), dtype=np.float32)
280 |     action_range_n_o = np.array([1]*(2+1), dtype=np.float32)
281 |     
282 |     buffer = ReplayBuffer(MEMORY_CAPACITY) #MU1
283 |     buffer2 = ReplayBuffer(MEMORY_CAPACITY)  #MU2
284 |     buffer3 = ReplayBuffer(MEMORY_CAPACITY)  #MU3
285 |     # buffer4 = ReplayBuffer(MEMORY_CAPACITY)  #su1
286 |     # buffer5 = ReplayBuffer(MEMORY_CAPACITY)  #su2
287 |     # buffer6 = ReplayBuffer(MEMORY_CAPACITY)  #ris
288 |     # buffer7 = ReplayBuffer(MEMORY_CAPACITY)  #commu
289 |     
290 |     
291 |     agent = DDPG(action_dim, state_dim, action_range, buffer, 1) #mu
292 |     agent2 = DDPG(action_dim, state_dim, action_range, buffer2, 2)
293 |     agent3 = DDPG(action_dim, state_dim, action_range, buffer3, 3)
294 |     # agent4 = DDPG(action_dim-2, state_dim, action_range_su, buffer4, 4) #su
295 |     # agent5 = DDPG(action_dim-2, state_dim, action_range_su, buffer5, 5)
296 |     # agent6 = DDPG(2*n_M+1, state_dim*n_mu, action_range_ris, buffer6, 6) #ris
297 |     # agent7 = DDPG(2+1, state_dim*n_mu, action_range_n_o, buffer7, 7) #commu n_o
298 |     
299 |     VAR1=VAR
300 |     VAR2=VAR
301 |     VAR3=VAR
302 |     t0 = time.time()
303 |     if args.train:  # train
304 |         all_episode_reward = []
305 |         all_episode_reward2 = []
306 |         all_episode_reward3 = []
307 |         # all_episode_reward4 = []
308 |         # all_episode_reward5 = []
309 |         # all_episode_reward6 = []
310 |         # all_episode_reward7 = []
311 |         init=0
312 |         
313 |         # end_location = [38*2,11*2]
314 |         # end_location2 = [26*2,18*2]  #[8*2,9*2]
315 |         # end_location3 = [16*2,32*2] #[35*2,9*2]
316 |         end_location = [15*2,32*2]
317 |         end_location2 = [45*2,45*2]
318 |         end_location3 = [47,38*2]
319 |         #end_location = end_location3
320 |         #end_location2 = end_location3
321 |         study=0
322 |         study2=0
323 |         study3=0
324 |         for episode in range(TRAIN_EPISODES):
325 |             #state initialize
326 |             x_k1_array = []
327 |             y_k1_array = []
328 |             x_k2_array = []
329 |             y_k2_array = []
330 |             x_k3_array = []
331 |             y_k3_array = []
332 |             state = np.array([4*2,5*2], dtype=np.float32)  # 环境重置
333 |             state2 = np.array([20*2,20*2], dtype=np.float32)
334 |             state3 = np.array([40*2,10*2], dtype=np.float32)
335 |             state_su1 = np.array([17, 25*2], dtype=np.float32)
336 |             state_su2 = np.array([50, 25*2], dtype=np.float32)
337 |             state_su3 = np.array([84, 25*2], dtype=np.float32)
338 |             episode_reward = 0
339 |             episode_reward2 = 0
340 |             episode_reward3 = 0
341 |             # episode_reward4 = 0
342 |             # episode_reward5 = 0
343 |             # episode_reward6 = 0
344 |             # episode_reward7 = 0
345 |             done1=False
346 |             done2=False
347 |             done3=False
348 |             bobao=0
349 |             bobao2=0
350 |             bobao3=0
351 |             x_k1_array.append(state[0])
352 |             y_k1_array.append(state[1])
353 |             x_k2_array.append(state2[0])
354 |             y_k2_array.append(state2[1])
355 |             x_k3_array.append(state3[0])
356 |             y_k3_array.append(state3[1])
357 |             #greedy0=True
358 |             for steps in range(MAX_STEPS):
359 |                 # if RENDER:
360 |                 #     env.render()
361 |                 # Add exploration noise
362 |                 # action selection
363 |                 #if len(buffer) >= MEMORY_CAPACITY:
364 |                 #    greedy0=False
365 |                 action = agent.get_action(state)
366 |                 action2 = agent2.get_action(state2)
367 |                 action3 = agent3.get_action(state3)
368 |                 
369 |                 
370 |                 # action4 = agent4.get_action(state4)
371 |                 # action5 = agent5.get_action(state5)
372 |                 # action6 = agent6.get_action(state6)
373 |                 # action7 = agent7.get_action(state7)
374 |                 # Step
375 |                 if not done1:
376 |                     [old_x, old_y] = state
377 |                     new_x, new_y = int(old_x), int(old_y)
378 |                     new_x=int(old_x+action[0])
379 |                     new_y=int(old_y+action[1])
380 |                     if int(new_x) <= 0: 
381 |                         new_x = 1
382 |                     if int(new_x) >= n_width: 
383 |                         new_x = int(n_width)-1
384 |                     if int(new_y) <= 0: 
385 |                         new_y = 1
386 |                     if int(new_y) >= n_height: 
387 |                         new_y = int(n_height)-1
388 |                     if MARK[new_x,new_y] == 2:
389 |                         new_x, new_y = old_x, old_y
390 |                     state_=np.array([new_x, new_y], dtype=np.float32)
391 |                     x_k1_array.append(state_[0])
392 |                     y_k1_array.append(state_[1])
393 |                 else:
394 |                     state_ = state
395 |                 if not done2:
396 |                     [old_x, old_y] = state2
397 |                     new_x, new_y = int(old_x), int(old_y)
398 |                     new_x=int(old_x+action2[0])
399 |                     new_y=int(old_y+action2[1])
400 |                     if int(new_x) <= 0: 
401 |                         new_x = 1 
402 |                     if int(new_x) >= n_width: 
403 |                         new_x = int(n_width)-1
404 |                     if int(new_y) <= 0: 
405 |                         new_y = 1
406 |                     if int(new_y) >= n_height: 
407 |                         new_y = int(n_height)-1
408 |                     if MARK[new_x,new_y] == 2:
409 |                         new_x, new_y = old_x, old_y
410 |                     state2_=np.array([new_x, new_y], dtype=np.float32)
411 |                     x_k2_array.append(state2_[0])
412 |                     y_k2_array.append(state2_[1])
413 |                 else:
414 |                     state2_ = state2
415 |                 if not done3:
416 |                     [old_x, old_y] = state3
417 |                     new_x, new_y = int(old_x), int(old_y)
418 |                     new_x=int(old_x+action3[0])
419 |                     new_y=int(old_y+action3[1])
420 |                     if int(new_x) <= 0: 
421 |                         new_x = 1 
422 |                     if int(new_x) >= n_width: 
423 |                         new_x = int(n_width)-1
424 |                     if int(new_y) <= 0: 
425 |                         new_y = 1
426 |                     if int(new_y) >= n_height: 
427 |                         new_y = int(n_height)-1
428 |                     if MARK[new_x,new_y] == 2:
429 |                         new_x, new_y = old_x, old_y
430 |                     state3_=np.array([new_x, new_y], dtype=np.float32)
431 |                     x_k3_array.append(state3_[0])
432 |                     y_k3_array.append(state3_[1])
433 |                 else:
434 |                     state3_ = state3
435 |                 # state4+5 static
436 |                 
437 |                 # state6_ = np.array([state_[0], state_[1], state2_[0], state2_[1], state3_[0], state3_[1]])
438 |                 # state7_ = np.array([state_[0], state_[1], state2_[0], state2_[1], state3_[0], state3_[1]])
439 |                 done_sys = done1 and done2 and done3
440 |                 
441 |                 if action[8]==-1:
442 |                     action[8]=-0.9999999
443 |                 if action2[8]==-1:
444 |                     action2[8]=-0.9999999
445 |                 if action3[8]==-1:
446 |                     action3[8]=-0.9999999
447 |                 if action[8]==1:
448 |                     action[8]=0.9999999
449 |                 if action2[8]==1:
450 |                     action2[8]=0.9999999
451 |                 if action3[8]==1:
452 |                     action3[8]=0.9999999
453 |                 
454 |                 w_1=np.array([action[2]* math.exp(1)**(1j*(1+action[3])*math.pi), action[4]* math.exp(1)**(1j*(1+action[5])*math.pi), action[6]* math.exp(1)**(1j*(1+action[7])*math.pi)])
455 |                 w_2=np.array([action2[2]* math.exp(1)**(1j*(1+action2[3])*math.pi), action2[4]* math.exp(1)**(1j*(1+action2[5])*math.pi), action2[6]* math.exp(1)**(1j*(1+action2[7])*math.pi)])
456 |                 w_3=np.array([action3[2]* math.exp(1)**(1j*(1+action3[3])*math.pi), action3[4]* math.exp(1)**(1j*(1+action3[5])*math.pi), action3[6]* math.exp(1)**(1j*(1+action3[7])*math.pi)])
457 |                 theta_1=cosVector([1,0,0],[state_[0]-50,state_[1]-100, 1-2])
458 |                 a_1=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_1)), math.exp(1)**(-2*1j*(math.pi*theta_1))])#
459 |                 b_1_AP_LOS=math.sqrt(PL_AP[int(state_[0]), int(state_[1])])
460 |                 h_1=b_1_AP_LOS*a_1
461 |                 interference_1=10**(-9)
462 |                 theta_2=cosVector([1,0,0],[state2_[0]-50,state2_[1]-100, 1-2])
463 |                 a_2=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_2)), math.exp(1)**(-2*1j*(math.pi*theta_2))])#
464 |                 b_2_AP_LOS=math.sqrt(PL_AP[int(state2_[0]), int(state2_[1])])
465 |                 h_2=b_2_AP_LOS*a_2
466 |                 interference_2=10**(-9)
467 |                 theta_3=cosVector([1,0,0],[state3_[0]-50,state3_[1]-100, 1-2])
468 |                 a_3=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_3)), math.exp(1)**(-2*1j*(math.pi*theta_3))])#
469 |                 b_3_AP_LOS=math.sqrt(PL_AP[int(state3_[0]), int(state3_[1])])
470 |                 h_3=b_3_AP_LOS*a_3
471 |                 interference_3=10**(-9)
472 |                 theta_4=cosVector([1,0,0],[state_su1[0]-50,state_su1[1]-100, 1-2])
473 |                 a_4=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_4)), math.exp(1)**(-2*1j*(math.pi*theta_4))])#
474 |                 b_4_AP_LOS=math.sqrt(PL_AP[int(state_su1[0]), int(state_su1[1])])
475 |                 h_4=b_4_AP_LOS*a_4
476 |                 interference_4=10**(-9)
477 |                 theta_5=cosVector([1,0,0],[state_su2[0]-50,state_su2[1]-100, 1-2])
478 |                 a_5=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_5)), math.exp(1)**(-2*1j*(math.pi*theta_5))])#
479 |                 b_5_AP_LOS=math.sqrt(PL_AP[int(state_su2[0]), int(state_su2[1])])
480 |                 h_5=b_5_AP_LOS*a_5
481 |                 interference_5=10**(-9)
482 |                 theta_6=cosVector([1,0,0],[state_su3[0]-50,state_su3[1]-100, 1-2])
483 |                 a_6=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_6)), math.exp(1)**(-2*1j*(math.pi*theta_6))])#
484 |                 b_6_AP_LOS=math.sqrt(PL_AP[int(state_su3[0]), int(state_su3[1])])
485 |                 h_6=b_6_AP_LOS*a_6
486 |                 interference_6=10**(-9)
487 |                 action1=action
488 |                 order_array=[action1[9], action2[9], action3[9]]
489 |                 order_index=[b[0] for b in sorted(enumerate(order_array), key=lambda i:i[1])]
490 |                 # action1=action
491 |                 # for order_i in order_index:
492 |                 #     exec('''if action{}[8]>0.5:
493 |                 #         interference_{}+=(1-(action{}[8]+1)/2)*(np.linalg.norm(h_{}*w_{}))**2
494 |                 #     else:
495 |                 #         interference_4+=((action[8]+1)/2)*(np.linalg.norm(h_4*w_1))**2
496 |                 #          ''')
497 |                 
498 |                 exec('''if action{}[8]>0:
499 |     interference_{}+=(1-(action{}[8]+1)/2)*(np.linalg.norm(h_{}*w_{}))**2
500 | else:
501 |     interference_4+=((action{}[8]+1)/2)*(np.linalg.norm(h_4*w_{}))**2'''.format(order_index[0]+1, order_index[0]+1, order_index[0]+1, order_index[0]+1, order_index[0]+1, order_index[0]+1, order_index[0]+1))
502 |                 exec('''if action{}[8]>0:
503 |     interference_{}+=(1-(action{}[8]+1)/2)*(np.linalg.norm(h_{}*w_{}))**2
504 | else:
505 |     interference_5+=((action{}[8]+1)/2)*(np.linalg.norm(h_5*w_{}))**2'''.format(order_index[1]+1, order_index[1]+1, order_index[1]+1, order_index[1]+1, order_index[1]+1, order_index[1]+1, order_index[1]+1))
506 |                 exec('''if action{}[8]>0:
507 |     interference_{}+=(1-(action{}[8]+1)/2)*(np.linalg.norm(h_{}*w_{}))**2
508 | else:
509 |     interference_6+=((action{}[8]+1)/2)*(np.linalg.norm(h_6*w_{}))**2'''.format(order_index[2]+1, order_index[2]+1, order_index[2]+1, order_index[2]+1, order_index[2]+1, order_index[2]+1, order_index[2]+1))
510 |     
511 |                 SINR_1=((action1[8]+1)/2)*(np.linalg.norm(h_1*w_1))**2/interference_1
512 |                 SINR_2=((action2[8]+1)/2)*(np.linalg.norm(h_2*w_2))**2/interference_2
513 |                 SINR_3=((action3[8]+1)/2)*(np.linalg.norm(h_3*w_3))**2/interference_3
514 |                 exec('''SINR_4=(1-(action{}[8]+1)/2)*(np.linalg.norm(h_4*w_{}))**2/interference_4'''.format(order_index[0]+1, order_index[0]+1))
515 |                 exec('''SINR_5=(1-(action{}[8]+1)/2)*(np.linalg.norm(h_5*w_{}))**2/interference_5'''.format(order_index[1]+1, order_index[1]+1))
516 |                 exec('''SINR_6=(1-(action{}[8]+1)/2)*(np.linalg.norm(h_6*w_{}))**2/interference_6'''.format(order_index[2]+1, order_index[2]+1))
517 | 
518 |                 
519 |                 # calculate reward
520 |                 distance_01_2=(state_[0]-end_location[0])*(state_[0]-end_location[0])/4+(state_[1]-end_location[1])*(state_[1]-end_location[1])/4
521 |                 distance_01 = math.sqrt(distance_01_2)
522 |                 #print(distance_01)
523 |                 exec('''reward = -(distance_01/50)+max(0.01, min(SINR_1, SINR_{})/1000)-0.01'''.format(order_index.index(0)+4))
524 |                 #reward = -1
525 |                 #reward=0
526 |                 if distance_01==0:
527 |                     reward = 1
528 |                 if not done1:
529 |                     episode_reward += reward
530 |                 distance_02_2=(state2_[0]-end_location2[0])*(state2_[0]-end_location2[0])/4+(state2_[1]-end_location2[1])*(state2_[1]-end_location2[1])/4
531 |                 distance_02 = math.sqrt(distance_02_2)
532 |                 exec('''reward2 = -(distance_02/50)+max(0.01, min(SINR_2, SINR_{})/1000)-0.01'''.format(order_index.index(1)+4))
533 |                 if distance_02==0:
534 |                     reward2 = 1
535 |                 if not done2:
536 |                     episode_reward2 += reward2
537 |                 distance_03_2=(state3_[0]-end_location3[0])*(state3_[0]-end_location3[0])/4+(state3_[1]-end_location3[1])*(state3_[1]-end_location3[1])/4
538 |                 distance_03 = math.sqrt(distance_03_2)
539 |                 exec('''reward3 = -(distance_03/50)+max(0.01, min(SINR_3, SINR_{})/1000)-0.01'''.format(order_index.index(2)+4))
540 |                 if distance_03==0:
541 |                     reward3 = 1
542 |                 if not done3:
543 |                     episode_reward3 += reward3
544 |                 state_ = np.array(state_, dtype=np.float32)
545 |                 state2_ = np.array(state2_, dtype=np.float32)
546 |                 state3_ = np.array(state3_, dtype=np.float32)
547 | 
548 |                 # if  len(buffer) >= MEMORY_CAPACITY and steps%100==0:
549 |                 #     VAR *= .99995
550 |                 #print(state)
551 |                 #done = 1 if done is True else 0
552 |                 buffer.push(state, action, reward, state_, done1)
553 |                 buffer2.push(state2, action2, reward2, state2_, done2)
554 |                 buffer3.push(state3, action3, reward3, state3_, done3)
555 |                 if not done1:
556 |                     study=study+1
557 |                 if not done2:
558 |                     study2=study2+1 
559 |                 if not done3:
560 |                     study3=study3+1 
561 |                 if len(buffer) >= MEMORY_CAPACITY and not done1 and episode >= MEMORY_CAPACITY/MAX_STEPS:
562 |                     #print("in")
563 |                     #for i in range(20):
564 |                     # if study>=10:
565 |                     VAR1 *= math.sqrt(.99995)
566 |                         # study=-1
567 |                     agent.learn(VAR1)
568 |                     
569 |                 if len(buffer2) >= MEMORY_CAPACITY and not done2 and episode>=MEMORY_CAPACITY/MAX_STEPS:
570 |                     # if study2>=10:
571 |                     #     study2=-1
572 |                     VAR2 *= math.sqrt(.99995)
573 |                    # for i in range(20):
574 |                     agent2.learn(VAR2)
575 |                     
576 |                 if len(buffer3) >= MEMORY_CAPACITY and not done3 and episode>=MEMORY_CAPACITY/MAX_STEPS:
577 |                     #
578 |                     # if study3>=10:
579 |                     #     study3=-1
580 |                     VAR3 *= math.sqrt(.99995)
581 |                     # for i in range(20):
582 |                     agent3.learn(VAR3)
583 |                     
584 |                 if distance_01==0 and bobao==0:
585 |                     done1=True
586 |                     if steps<100:
587 |                         for x in range(len(x_k1_array)):
588 |                             filename = 'x_k1'+str(episode)+"_"+str(steps)+'.txt'
589 |                             with open(filename,'a') as fileobject: #使用‘a'来提醒python用附加模式的方式打开
590 |                                   fileobject.write(str(x_k1_array[x])+'\n')
591 |                         for y in range(len(y_k1_array)):
592 |                             filename = 'y_k1'+str(episode)+"_"+str(steps)+'.txt'
593 |                             with open(filename,'a') as fileobject: #使用‘a'来提醒python用附加模式的方式打开
594 |                                   fileobject.write(str(y_k1_array[y])+'\n')
595 |                     print("1 arrive success!!!!!!!!!!!!!!")
596 |                     bobao=1
597 |                 if distance_02==0 and bobao2==0:
598 |                     if steps<100:
599 |                         for x in range(len(x_k2_array)):
600 |                             filename = 'x_k2'+str(episode)+"_"+str(steps)+'.txt'
601 |                             with open(filename,'a') as fileobject: #使用‘a'来提醒python用附加模式的方式打开
602 |                                   fileobject.write(str(x_k2_array[x])+'\n')
603 |                         for y in range(len(y_k2_array)):
604 |                             filename = 'y_k2'+str(episode)+"_"+str(steps)+'.txt'
605 |                             with open(filename,'a') as fileobject: #使用‘a'来提醒python用附加模式的方式打开
606 |                                   fileobject.write(str(y_k2_array[y])+'\n')
607 | 
608 |                     done2=True
609 |                     print("2 arrive success!!!!!!!!!!!!!!")
610 |                     bobao2=1
611 |                 if distance_03==0 and bobao3==0:
612 |                     if steps<100:
613 |                         for x in range(len(x_k3_array)):
614 |                             filename = 'x_k3'+str(episode)+"_"+str(steps)+'.txt'
615 |                             with open(filename,'a') as fileobject: #使用‘a'来提醒python用附加模式的方式打开
616 |                                   fileobject.write(str(x_k3_array[x])+'\n')
617 |                         for y in range(len(y_k3_array)):
618 |                             filename = 'y_k3'+str(episode)+"_"+str(steps)+'.txt'
619 |                             with open(filename,'a') as fileobject: #使用‘a'来提醒python用附加模式的方式打开
620 |                                   fileobject.write(str(y_k3_array[y])+'\n')
621 | 
622 |                     done3=True
623 |                     print("3 arrive success!!!!!!!!!!!!!!")
624 |                     bobao3=1
625 |                 if done1 and done2 and done3:
626 |                     break
627 |                 
628 |                 state = state_
629 |                 state2 = state2_
630 |                 state3 = state3_
631 |                 
632 |                 
633 |             if episode == 0:
634 |                 all_episode_reward.append(episode_reward)
635 |                 all_episode_reward2.append(episode_reward2)
636 |                 all_episode_reward3.append(episode_reward3)
637 |                 # filename='Reward_v2_agent1.txt'
638 |                 # with open (filename, 'a') as fileobject:
639 |                 #     fileobject.write(str(episode_reward)+'\n')
640 |                 # filename='Reward_v2_agent2.txt'
641 |                 # with open (filename, 'a') as fileobject:
642 |                 #      fileobject.write(str(episode_reward2)+'\n')
643 |                 # filename='Reward_v2_agent3.txt'
644 |                 # with open (filename, 'a') as fileobject:
645 |                 #     fileobject.write(str(episode_reward3)+'\n')   
646 |             else:
647 |                 all_episode_reward.append(all_episode_reward[-1] * 0.9 + episode_reward * 0.1)
648 |                 all_episode_reward2.append(all_episode_reward2[-1] * 0.9 + episode_reward2 * 0.1)
649 |                 all_episode_reward3.append(all_episode_reward3[-1] * 0.9 + episode_reward3 * 0.1)
650 |                 # filename='Reward_v2_agent1.txt'
651 |                 # with open (filename, 'a') as fileobject:
652 |                 #     fileobject.write(str(all_episode_reward[-1] * 0.9 + episode_reward * 0.1)+'\n')
653 |                 # filename='Reward_v2_agent2.txt'
654 |                 # with open (filename, 'a') as fileobject:
655 |                 #      fileobject.write(str(all_episode_reward2[-1] * 0.9 + episode_reward2 * 0.1)+'\n')
656 |                 # filename='Reward_v2_agent3.txt'
657 |                 # with open (filename, 'a') as fileobject:
658 |                 #     fileobject.write(str(all_episode_reward3[-1] * 0.9 + episode_reward3 * 0.1)+'\n')   
659 |             #print(var_real)
660 |             print(
661 |                 ' Episode: {}/{} | Reward: {:.4f} & {:.4f} & {:.4f}  | Step: {:.4f}| END: {}-{} {}-{} {}-{}'.format(
662 |                     episode + 1, TRAIN_EPISODES, episode_reward, episode_reward2, episode_reward3,
663 |                     steps, end_location, state, end_location2, state2, end_location3, state3
664 |                 ))
665 |             #print(len(buffer3))
666 |             
667 |         #env.close()
668 |         #agent.save()
669 |         # filename = os.path.basename(path) 
670 |         plt.plot(all_episode_reward)
671 |         plt.plot(all_episode_reward2)
672 |         plt.plot(all_episode_reward3)
673 |         if not os.path.exists('image'):
674 |             os.makedirs('image')
675 |         plt.savefig(os.path.join('image', '_'.join([ALG_NAME, ENV_ID])))
676 | 
677 |     # if args.test:
678 |     #     # test
679 |     #     agent.load()
680 |     #     for episode in range(TEST_EPISODES):
681 |     #         state = env.reset().astype(np.float32)
682 |     #         episode_reward = 0
683 |     #         for step in range(MAX_STEPS):
684 |     #             env.render()
685 |     #             state, reward, done, info = env.step(agent.get_action(state, greedy=True))
686 |     #             state = state.astype(np.float32)
687 |     #             episode_reward += reward
688 |     #             if done:
689 |     #                 break
690 |     #         print(
691 |     #             'Testing  | Episode: {}/{}  | Episode Reward: {:.4f}  | Running Time: {:.4f} '.format(
692 |     #                 episode + 1, TEST_EPISODES, episode_reward,
693 |     #                 time.time() - t0
694 |     #             )
695 |     #         )
696 |     #     env.close()


--------------------------------------------------------------------------------