├── radio_map
├── AABB_plot.m
└── map_data.mat
├── _doc
├── simulation_fig.png
└── simulation_fig2.png
├── plot_figure
├── FIGURE_2.m
├── FIGURE_1.m
├── FIGURE_5.m
├── FIGURE_4.m
└── FIGURE_3.m
├── algorithm
├── MA-TD3_core.py
├── MA-TD3_main.py
├── MA-PPO_main.py
├── MA-SAC_main.py
└── MA-DDPG_main.py
├── tradition_baseline
├── A_search.py
├── fig5.m
├── fig3.m
└── pso.py
├── README.md
└── environment
├── environment.yaml
└── requirements.txt
/radio_map/AABB_plot.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lry-bupt/MAAC_DRL/HEAD/radio_map/AABB_plot.m
--------------------------------------------------------------------------------
/radio_map/map_data.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lry-bupt/MAAC_DRL/HEAD/radio_map/map_data.mat
--------------------------------------------------------------------------------
/_doc/simulation_fig.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lry-bupt/MAAC_DRL/HEAD/_doc/simulation_fig.png
--------------------------------------------------------------------------------
/_doc/simulation_fig2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lry-bupt/MAAC_DRL/HEAD/_doc/simulation_fig2.png
--------------------------------------------------------------------------------
/plot_figure/FIGURE_2.m:
--------------------------------------------------------------------------------
1 | clc;
2 | clear all;
3 | close all;
4 | X=[1:1:400];
5 | TD3_avg=importdata('C:\Users\Administrator.DESKTOP-NLH290A\Desktop\2302_code\FIGURE_2\TD3.txt');
6 | % TD3_2=importdata('C:\Users\Administrator.DESKTOP-NLH290A\Desktop\2302_code\FIGURE_1\MATLAB\TD3-2.txt');
7 | % TD3_3=importdata('C:\Users\Administrator.DESKTOP-NLH290A\Desktop\2302_code\FIGURE_1\MATLAB\TD3-2.txt');
8 | % TD3_avg=(TD3_1+TD3_2+TD3_3)/3;
9 |
10 | % importdata('C:\Users\Administrator.DESKTOP-NLH290A\Desktop\2302_code\FIGURE_1\MATLAB\TD3-avg.txt');
11 | PPO_avg=importdata('C:\Users\Administrator.DESKTOP-NLH290A\Desktop\2302_code\FIGURE_2\PPO.txt');
12 | DDPG_avg=importdata('C:\Users\Administrator.DESKTOP-NLH290A\Desktop\2302_code\FIGURE_2\DDPG.txt');
13 | SAC_avg=importdata('C:\Users\Administrator.DESKTOP-NLH290A\Desktop\2302_code\FIGURE_2\SAC.txt');
14 | RANDOM_avg=ones(400)*TD3_avg(1);
15 | % PPO_avg(1)=TD3_avg(1);
16 | % DDPG_avg(20)=TD3_avg(1);
17 | % SAC_avg(20)=TD3_avg(1);
18 |
19 | p1=plot(X, TD3_avg(20:419), '-p', 'MarkerSize',5, 'LineWidth',1.2,'Color',[0.00,0.45,0.74],'MarkerIndices',1:100:400);
20 | hold on
21 | p2=plot(X, PPO_avg(1:400), '-o', 'MarkerSize',5, 'LineWidth',1.2,'Color',[0.47,0.67,0.19],'MarkerIndices',1:100:400);
22 | hold on
23 | p3=plot(X, DDPG_avg(20:419), '-.*', 'MarkerSize',5, 'LineWidth',1.5,'Color',[1 0.54902 0],'MarkerIndices',1:100:400);
24 | hold on
25 | p4=plot(X, SAC_avg(20:419), '-', 'MarkerSize',5, 'LineWidth',1.2,'Color',[0.50,0.50,0.50],'MarkerIndices',1:100:400);
26 | hold on
27 | p5=plot(X, RANDOM_avg(1:400), '--', 'MarkerSize',5, 'LineWidth',1.2,'Color',[0.72,0.27,1.00],'MarkerIndices',1:100:400);
28 | % p1.MarkerIndices = 400:500:length(y1_ping);
29 | legend([p1 p4 p3 p2 p5 ],{'TD3','SAC','DDPG','PPO','Random'},'Location','SouthEast','Interpreter','latex')
30 | xlabel('Episode','Interpreter','latex')
31 | ylabel('Reward','Interpreter','latex')
32 | ylim([-900, 0])
--------------------------------------------------------------------------------
/plot_figure/FIGURE_1.m:
--------------------------------------------------------------------------------
1 | clc;
2 | clear all;
3 | close all;
4 | X=[1:1:400];
5 | TD3_1=importdata('C:\Users\Administrator.DESKTOP-NLH290A\Desktop\2302_code\FIGURE_1\MATLAB\TD3-1.txt');
6 | TD3_2=importdata('C:\Users\Administrator.DESKTOP-NLH290A\Desktop\2302_code\FIGURE_1\MATLAB\TD3-2.txt');
7 | TD3_3=importdata('C:\Users\Administrator.DESKTOP-NLH290A\Desktop\2302_code\FIGURE_1\MATLAB\TD3-2.txt');
8 | TD3_avg=(TD3_1+TD3_2+TD3_3)/3;
9 |
10 | % importdata('C:\Users\Administrator.DESKTOP-NLH290A\Desktop\2302_code\FIGURE_1\MATLAB\TD3-avg.txt');
11 | PPO_avg=importdata('C:\Users\Administrator.DESKTOP-NLH290A\Desktop\2302_code\FIGURE_1\MATLAB\PPO-avg.txt');
12 | DDPG_avg=importdata('C:\Users\Administrator.DESKTOP-NLH290A\Desktop\2302_code\FIGURE_1\MATLAB\DDPG-avg.txt');
13 | SAC_avg=importdata('C:\Users\Administrator.DESKTOP-NLH290A\Desktop\2302_code\FIGURE_1\MATLAB\SAC-avg.txt');
14 | RANDOM_avg=ones(400)*TD3_avg(1);
15 | PPO_avg(1)=TD3_avg(1);
16 | DDPG_avg(20)=TD3_avg(1);
17 | SAC_avg(20)=TD3_avg(1);
18 |
19 | p1=plot(X, TD3_avg(20:419), '-p', 'MarkerSize',5, 'LineWidth',1.2,'Color',[0.00,0.45,0.74],'MarkerIndices',1:100:400);
20 | hold on
21 | p2=plot(X, PPO_avg(1:400), '-o', 'MarkerSize',5, 'LineWidth',1.2,'Color',[0.47,0.67,0.19],'MarkerIndices',1:100:400);
22 | hold on
23 | p3=plot(X, DDPG_avg(20:419), '-.*', 'MarkerSize',5, 'LineWidth',1.5,'Color',[1 0.54902 0],'MarkerIndices',1:100:400);
24 | hold on
25 | p4=plot(X, SAC_avg(20:419), '-', 'MarkerSize',5, 'LineWidth',1.2,'Color',[0.50,0.50,0.50],'MarkerIndices',1:100:400);
26 | hold on
27 | p5=plot(X, RANDOM_avg(1:400), '--', 'MarkerSize',5, 'LineWidth',1.2,'Color',[0.72,0.27,1.00],'MarkerIndices',1:100:400);
28 | % p1.MarkerIndices = 400:500:length(y1_ping);
29 | legend([p1 p4 p3 p2 p5 ],{'TD3','SAC','DDPG','PPO','Random'},'Location','SouthEast','Interpreter','latex')
30 | xlabel('Episode','Interpreter','latex')
31 | ylabel('Reward','Interpreter','latex')
32 | ylim([-1500, 0])
--------------------------------------------------------------------------------
/algorithm/MA-TD3_core.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import scipy.signal
3 |
4 | import torch
5 | import torch.nn as nn
6 |
7 |
8 | def combined_shape(length, shape=None):
9 | if shape is None:
10 | return (length,)
11 | return (length, shape) if np.isscalar(shape) else (length, *shape)
12 |
13 | def mlp(sizes, activation, output_activation=nn.Identity):
14 | layers = []
15 | for j in range(len(sizes)-1):
16 | act = activation if j < len(sizes)-2 else output_activation
17 | layers += [nn.Linear(sizes[j], sizes[j+1]), act()]
18 | return nn.Sequential(*layers)
19 |
20 | def count_vars(module):
21 | return sum([np.prod(p.shape) for p in module.parameters()])
22 |
23 | class MLPActor(nn.Module):
24 |
25 | def __init__(self, obs_dim, act_dim, hidden_sizes, activation):
26 | super().__init__()
27 | pi_sizes = [obs_dim] + list(hidden_sizes) + [act_dim]
28 | self.pi = mlp(pi_sizes, activation, nn.Tanh)
29 |
30 | def forward(self, obs):
31 | # Return output from network scaled to action space limits.
32 | return self.pi(obs)
33 |
34 | class MLPQFunction(nn.Module):
35 |
36 | def __init__(self, obs_dim, act_dim, hidden_sizes, activation):
37 | super().__init__()
38 | self.q = mlp([obs_dim + act_dim] + list(hidden_sizes) + [1], activation)
39 |
40 | def forward(self, obs, act):
41 | q = self.q(torch.cat([obs, act], dim=-1))
42 | return torch.squeeze(q, -1) # Critical to ensure q has right shape.
43 |
44 | class MLPActorCritic(nn.Module):
45 |
46 | def __init__(self, obs_dim, act_dim, hidden_sizes=(256,256),
47 | activation=nn.ReLU):
48 | super().__init__()
49 |
50 | # build policy and value functions
51 | self.pi = MLPActor(obs_dim, act_dim, hidden_sizes, activation)
52 | self.q1 = MLPQFunction(obs_dim, act_dim, hidden_sizes, activation)
53 | self.q2 = MLPQFunction(obs_dim, act_dim, hidden_sizes, activation)
54 |
55 | def act(self, obs):
56 | with torch.no_grad():
57 | return self.pi(obs).numpy()
58 |
--------------------------------------------------------------------------------
/plot_figure/FIGURE_5.m:
--------------------------------------------------------------------------------
1 | X_1 = [0.02, 0.04, 0.06, 0.08, 0.1];
2 | Y1_NOMA = [-1.333717993, -7.825136502, -10.44519537, -13.65757053, -16];
3 | Y1_OMA = [-1.42E-05,-0.025458926,-0.356210216,-0.629268766,-0.818900052];
4 | Y2_NOMA = [-0.271909959, -2.990650853,-5.692890644,-6.519637193,-9.841153601]; %FIX
5 | Y2_OMA = [-2.88E-03,-0.027303647,-0.147327698,-0.395702838,-0.600764221];
6 | Y3_NOMA = [-0.002032929,-0.391495296,-1.822073156,-2.974095926,-7.442980097];%3 U
7 | Y3_OMA = [-2.98E-11,-1.93E-07,-8.47E-07,-1.86E-05,-8.91E-05];
8 | Y4_NOMA = [-0.004375222,-1.439502907,-3.918489578,-7.066198785,-9.329658858];%0.01
9 | Y4_OMA = [-2.25E-06,-0.000636687,-0.071654114,-0.217504705,-0.407177846];
10 | Y5_NOMA = [-0.004367413,-2.221372461,-4.209532276,-6.103669332,-7.890873251];%ddpg
11 | Y5_OMA = [-4.12E-11,-9.69E-05,-0.004444109,-0.037339037,-0.142955241];
12 | Y6_NOMA = [-0.859490241,-2.221372461,-5.558103089,-7.355402684,-8.419060732];%2 envir
13 | Y6_OMA = [-2.57E-01,-0.906342075,-1.958136833,-2.708482246,-4.548873318];
14 |
15 | % fig = figure;
16 | % left_color = [0 0 0];
17 | % right_color = [0 0 0];
18 | % set(fig,'defaultAxesColorOrder',[left_color; right_color]);
19 |
20 | %激活左侧
21 | % yyaxis left
22 | p1=plot(X_1,Y1_NOMA,'LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'k', 'MarkerFaceColor','b');
23 | hold on
24 | p2=plot(X_1,Y1_OMA,'-o','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'k', 'MarkerFaceColor','b');
25 | hold on
26 | % p3=plot(X_1,Y2_NOMA,'-^','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'k', 'MarkerFaceColor','b');
27 | % ylabel('Averaged data rate recieved by each robot')
28 | % hold on
29 | % % yyaxis right
30 | % p4=plot(X_1,Y2_OMA,'LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'r', 'MarkerFaceColor','b');
31 | % hold on
32 | % p5=plot(X_1,Y3_NOMA,'-o','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'r', 'MarkerFaceColor','b');
33 | % hold on
34 | % p6=plot(X_1,Y3_OMA,'-^','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'r', 'MarkerFaceColor','b');
35 | % hold on
36 | p1=plot(X_1,Y4_NOMA,'LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'k', 'MarkerFaceColor','b');
37 | hold on
38 | p2=plot(X_1,Y4_OMA,'-o','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'k', 'MarkerFaceColor','b');
39 | hold on
40 | p3=plot(X_1,Y5_NOMA,'-^','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'k', 'MarkerFaceColor','b');
41 | ylabel('Averaged data rate recieved by each robot')
42 | hold on
43 | % yyaxis right
44 | p4=plot(X_1,Y5_OMA,'LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'r', 'MarkerFaceColor','b');
45 | hold on
46 | p5=plot(X_1,Y6_NOMA,'-o','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'r', 'MarkerFaceColor','b');
47 | hold on
48 | p6=plot(X_1,Y6_OMA,'-^','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'r', 'MarkerFaceColor','b');
49 | hold on
50 | ylabel('Decoding Error Probability $\lg (\mathcal{P}_1)$')
51 | xlabel('$P_{\max}$')
52 | legend('UE distribution $1$, $\kappa_1=0.1$, MA-TD3, NOMA','UE distribution $1$, $\kappa_1=0.1$, MA-TD3, OMA','$UE distribution $1$, \kappa_1=0.01$, MA-TD3, NOMA','$UE distribution $1$, \kappa_1=0.01$, MA-TD3, OMA','$UE distribution $1$, \kappa_1=0.1$, MA-DDPG,NOMA','$UE distribution $1$, \kappa_1=0.1$, MA-DDPG,OMA','UE distribution $2$, $\kappa_1=0.1$, MA-TD3, NOMA','UE distribution $2$, $\kappa_1=0.1$, MA-TD3, OMA')
53 | % set(get(a(1),'Ylabel'),'String','Averaged data rate of each robot')
54 | % set(get(a(2),'Ylabel'),'String','Averaged arriving step of each robot')
55 |
56 | box on
57 | grid off
58 |
--------------------------------------------------------------------------------
/tradition_baseline/A_search.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import heapq
3 | from scipy.io import loadmat
4 | import math
5 |
6 | x_max=99
7 | y_max=99
8 | m = loadmat("C://Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/mapdata_0717.mat")
9 | #correct_action=0
10 | MARK= m["MARK_new"]
11 |
12 | def generate_directions(num_directions):
13 | # 生成均匀分布的方向向量
14 | directions = []
15 | angle_step = 360 / num_directions
16 | for i in range(num_directions):
17 | angle = math.radians(i * angle_step)
18 | directions.append((math.cos(angle), math.sin(angle)))
19 | return directions
20 |
21 | def is_valid(x, y, MARK, x_max, y_max):
22 | # Check if (x, y) is within bounds and not an obstacle
23 | return 0 <= int(x) < x_max and 0 <= int(y) < y_max and MARK[int(x), int(y)] != 2
24 |
25 | def heuristic(a, b):
26 | # Manhattan distance heuristic
27 | return abs(a[0] - b[0]) + abs(a[1] - b[1])
28 |
29 | def a_star_search(start, goal, MARK, x_max, y_max):
30 | # A* search to find the shortest path from start to goal
31 | neighbors = [ (0, -1), (-1, 0),(1, 0), (0, 1)]
32 | # neighbors = generate_directions(8)
33 | close_set = set()
34 | came_from = {}
35 | gscore = {start: 0}
36 | fscore = {start: heuristic(start, goal)}
37 | oheap = []
38 |
39 | heapq.heappush(oheap, (fscore[start], start))
40 |
41 | while oheap:
42 | current = heapq.heappop(oheap)[1]
43 |
44 | if current == goal:
45 | data = []
46 | while current in came_from:
47 | data.append(current)
48 | current = came_from[current]
49 | data.append(start)
50 | data.reverse()
51 | return data
52 |
53 | close_set.add(current)
54 | for i, j in neighbors:
55 | neighbor = current[0] + i, current[1] + j
56 | tentative_g_score = gscore[current] + 1
57 |
58 | if not is_valid(neighbor[0], neighbor[1], MARK, x_max, y_max):
59 | continue
60 |
61 | if neighbor in close_set and tentative_g_score >= gscore.get(neighbor, 0):
62 | continue
63 |
64 | if tentative_g_score < gscore.get(neighbor, 0) or neighbor not in [i[1] for i in oheap]:
65 | came_from[neighbor] = current
66 | gscore[neighbor] = tentative_g_score
67 | fscore[neighbor] = tentative_g_score + heuristic(neighbor, goal)
68 | heapq.heappush(oheap, (fscore[neighbor], neighbor))
69 |
70 | return False
71 |
72 | def save_path_to_txt(path, filename):
73 | with open(filename, 'w') as f:
74 | for x, y in path:
75 | f.write(f"{x},{y}\n")
76 |
77 | # Define start, goal, and MARK matrix
78 | x1, y1 = 40, 40 # Starting point
79 | x2, y2 = 90, 90 # Goal point
80 | # x_max, y_max = 10, 10 # Grid size
81 |
82 | # Example MARK matrix with obstacles
83 | # MARK = np.zeros((x_max, y_max))
84 | # MARK[4, 4] = 2
85 | # MARK[4, 5] = 2
86 | # MARK[4, 6] = 2
87 | # MARK[5, 4] = 2
88 | # MARK[6, 4] = 2
89 |
90 | start = (x1, y1)
91 | goal = (x2, y2)
92 |
93 | # Find path
94 | path = a_star_search(start, goal, MARK, x_max, y_max)
95 |
96 | # Save path to txt file
97 | if path:
98 | save_path_to_txt(path, 'robot_path_2_v2.txt')
99 | print("Path found and saved to robot_path.txt")
100 | else:
101 | print("No path found")
102 |
--------------------------------------------------------------------------------
/tradition_baseline/fig5.m:
--------------------------------------------------------------------------------
1 | X_1 = [0.02, 0.04, 0.06, 0.08, 0.1];
2 | Y1_NOMA = [-1.333717993, -7.825136502, -10.44519537, -13.65757053, -16];
3 | Y1_OMA = [-1.42E-05,-0.025458926,-0.356210216,-0.629268766,-0.818900052];
4 | Y2_NOMA = [-0.271909959, -2.990650853,-5.692890644,-6.519637193,-9.841153601]; %FIX
5 | Y2_OMA = [-2.88E-03,-0.027303647,-0.147327698,-0.395702838,-0.600764221];
6 | Y3_NOMA = [-0.002032929,-0.391495296,-1.822073156,-2.974095926,-7.442980097];%3 U
7 | Y3_OMA = [-2.98E-11,-1.93E-07,-8.47E-07,-1.86E-05,-8.91E-05];
8 | Y4_NOMA = [-0.004375222,-1.439502907,-3.918489578,-7.066198785,-9.329658858];%0.01
9 | Y4_OMA = [-2.25E-06,-0.000636687,-0.071654114,-0.217504705,-0.407177846];
10 | Y5_NOMA = [-0.004367413,-2.221372461,-4.209532276,-6.103669332,-7.890873251];%ddpg
11 | Y5_OMA = [-4.12E-11,-9.69E-05,-0.004444109,-0.037339037,-0.142955241];
12 | Y6_NOMA = [-0.859490241,-2.221372461,-5.558103089,-7.355402684,-8.419060732];%2 envir
13 | Y6_OMA = [-2.57E-01,-0.906342075,-1.958136833,-2.708482246,-4.548873318];
14 |
15 | Optimal_NOMA = [-5.66606907, -15.94919966, -21.868942625, -25.36415256, -27.00059453];
16 |
17 | % fig = figure;
18 | % left_color = [0 0 0];
19 | % right_color = [0 0 0];
20 | % set(fig,'defaultAxesColorOrder',[left_color; right_color]);
21 |
22 | %激活左侧
23 | % yyaxis left
24 | p1=plot(X_1,Y1_NOMA,'LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'k', 'MarkerFaceColor','b');
25 | hold on
26 | p2=plot(X_1,Y1_OMA,'-o','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'k', 'MarkerFaceColor','b');
27 | hold on
28 | % p3=plot(X_1,Y2_NOMA,'-^','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'k', 'MarkerFaceColor','b');
29 | % ylabel('Averaged data rate recieved by each robot')
30 | % hold on
31 | % % yyaxis right
32 | % p4=plot(X_1,Y2_OMA,'LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'r', 'MarkerFaceColor','b');
33 | % hold on
34 | % p5=plot(X_1,Y3_NOMA,'-o','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'r', 'MarkerFaceColor','b');
35 | % hold on
36 | % p6=plot(X_1,Y3_OMA,'-^','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'r', 'MarkerFaceColor','b');
37 | % hold on
38 | p1=plot(X_1,Y4_NOMA,'LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'k', 'MarkerFaceColor','b');
39 | hold on
40 | p2=plot(X_1,Y4_OMA,'-o','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'k', 'MarkerFaceColor','b');
41 | hold on
42 | p3=plot(X_1,Y5_NOMA,'-^','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'k', 'MarkerFaceColor','b');
43 | ylabel('Averaged data rate recieved by each robot')
44 | hold on
45 | % yyaxis right
46 | p4=plot(X_1,Y5_OMA,'LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'r', 'MarkerFaceColor','b');
47 | hold on
48 | p5=plot(X_1,Y6_NOMA,'-o','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'r', 'MarkerFaceColor','b');
49 | hold on
50 | p6=plot(X_1,Y6_OMA,'-^','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'r', 'MarkerFaceColor','b');
51 | hold on
52 | p7=plot(X_1, Optimal_NOMA,'-^','LineWidth',2, 'MarkerSize',6, 'MarkerEdgeColor', 'r', 'MarkerFaceColor','b');
53 | hold on
54 | ylabel('Decoding Error Probability $\lg (\mathcal{P}_1)$')
55 | xlabel('$P_{\max}$')
56 | legend('UE distribution $1$, $\kappa_1=0.1$, MA-TD3, NOMA','UE distribution $1$, $\kappa_1=0.1$, MA-TD3, OMA','$UE distribution $1$, \kappa_1=0.01$, MA-TD3, NOMA','$UE distribution $1$, \kappa_1=0.01$, MA-TD3, OMA','$UE distribution $1$, \kappa_1=0.1$, MA-DDPG,NOMA','$UE distribution $1$, \kappa_1=0.1$, MA-DDPG,OMA','UE distribution $2$, $\kappa_1=0.1$, MA-TD3, NOMA','UE distribution $2$, $\kappa_1=0.1$, MA-TD3, OMA', 'Traditional Optimization Method')
57 | % set(get(a(1),'Ylabel'),'String','Averaged data rate of each robot')
58 | % set(get(a(2),'Ylabel'),'String','Averaged arriving step of each robot')
59 |
60 | box on
61 | grid off
62 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # MAAC_DRL
2 | This repository contains the Python implementation of our submitted paper titled "Deep Reinforcement Learning Enables Joint Trajectory and Communication in Internet of Robotic Things" .
3 | ## Quick Links
4 | [[Installation]](#installation) [[Installation]](#installation) [[Usage]](#usage)
5 | ## Introduction
6 | We learn the multi-agent actor-critic deep reinforcement learning (MAAC-DRL) algorithms to reduce the decoding error rate and arriving time of robots in industrial Internet of Robotic Things (IoRT) with the requirements of ultra-reliable and low-latency communications.
7 |
8 | Here are the settings of the considered IoRT environment.
9 | | Notation | Simulation Value | Physical Meaning |
10 | | ------------ | ------------------ | ------------------------------------------------------------ |
11 | | $K$ | $\{2, 4, 6\}$ | the number of users |
12 | | $L$ | $\{2, 3\}$ | the number of antennas |
13 | | $K_{\rm MU}$ | $\{1, 2, 3\}$ | the number of robots |
14 | | $D$ | $100 \ {\rm bits}$ | packet size |
15 | | $M$ | $50 \ {\rm symbols}$ | the number of transmitted symbols |
16 | | $T_{\max}$ | $2000 \ {\rm s}$ | the moving deadline of robots |
17 | | $H_0$ | $1 \ {\rm m}$ | the height of antennas |
18 | | $P_{\max}$ | $[0.02, 0.1] \ {\rm W}$ | the maximal transmit power |
19 | | $\sigma^2$ | $-100 \ {\rm dBm/Hz}$ | the variance of the additive white Gaussian noise |
20 | | $v$ | $5 \ {\rm m/s}$ | the moving speed |
21 |
22 |
23 |
24 | ## Results
25 |
26 |
27 |  |
28 |  |
29 |
30 |
31 |
32 | For more details and simulation results, please check our paper.
33 |
34 | ## Installation
35 | Dependencies can be installed by Conda:
36 |
37 | For example to install env used for IoRT environments with URLLC requirements:
38 | ```
39 | conda env create -f environment/environment.yml URLLC
40 | conda activate URLLC
41 | ```
42 |
43 | Then activate it by
44 | ```
45 | conda activate URLLC
46 | ```
47 | To run on atari environment, please further install the considered environment by
48 | ```
49 | pip install -r environment/requirements.txt
50 | ```
51 |
52 | ## Usage
53 |
54 | Here are the parameters of our simulations.
55 | | Notation | Simulation Value | Physical Meaning |
56 | | ------------ | ------------------ | ------------------------------------------------------------ |
57 | | $lr$ | $\{10^{-4}, 2 \times 10^{-3}\}$ | the learning rate of the DRL algorithms |
58 | | $\kappa_1$ | $\{0, 0.01, 0.1\}$ | the parameters of the reward designs |
59 | | $\|\mathcal{D}_0\|$ | $128$ | the size of the mini-batch buffer |
60 | | $\|\mathcal{D}\|$ | $10^{6}$ | the maximal size of the experevce buffer |
61 |
62 | ### algorithm (`python codes of different MA-DRL algorithms`):
63 | - `'MA-DDPG_main.py'` (Main functions and MDP transitions of MA-DDPG)
64 | - `'MA-PPO_main.py'` (Main functions and MDP transitions of MA-PPO)
65 | - `'MA-SAC_main.py'` (Main functions and MDP transitions of MA-SAC)
66 | - `'MA-TD3_core.py'` (MLP operators of MA-TD3)
67 | - `'MA-TD3_main.py'` (Main functions and MDP transitions of MA-TD3)
68 |
69 | ### environment (`documents of considered system`):
70 | - `'environment.yaml'` (Conda environmental document)
71 | - `'requirements.txt'` (Pip environmental document)
72 |
73 | ### plot_figure (`matlab codes of different algorithms`):
74 | - `'FIGURE_1.m'` (Reward comparison under different MA-DRL algorithms)
75 | - `'FIGURE_2.m'` (Robots' trajectory comparison under different reward settings)
76 | - `'FIGURE_3.m'` (Average decoding error probability under different clustering and multiple access scheme)
77 | - `'FIGURE_4.m'` (Objective function under different environmental settings)
78 | - `'FIGURE_5.m'` (Arriving time under different environmental settings)
79 |
80 | ### radio_map (`documents of building environment`):
81 | - `'AABB_plot.m'` (Construct a radio map based on the deployment of obstacles and intersection detection)
82 | - `'map_data.mat'` (Raw data of the built radio map)
83 |
84 |
--------------------------------------------------------------------------------
/plot_figure/FIGURE_4.m:
--------------------------------------------------------------------------------
1 | y1_0=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Downloads/matdisk/TD3-STEP/STEP_v1_agent1_0.txt").';
2 | y2_0=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Downloads/matdisk/TD3-STEP/STEP_v1_agent1_1.txt").';
3 | y3_0=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Downloads/matdisk/TD3-STEP/STEP_v1_agent1_2.txt").';
4 | y4_0=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Downloads/matdisk/TD3-STEP/STEP_v1_agent1_3.txt").';
5 | y5_0=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Downloads/matdisk/TD3-STEP/STEP_v1_agent1_4.txt").';
6 | h4=cdfplot(y1_0(200:500));
7 | hold on
8 | % h2=cdfplot(y2_0(200:500));
9 | % hold on
10 | h2=cdfplot(y3_0(200:500));
11 | % hold on
12 | % h4=cdfplot(y4_0(200:500));
13 | % hold on
14 | h3=cdfplot(y5_0(200:500));
15 | hold on
16 |
17 | y1_0=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Downloads/matdisk/TD3-STEP-v2/STEP_v1_agent1_0.txt").';
18 | y2_0=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Downloads/matdisk/TD3-STEP-v2/STEP_v1_agent1_1.txt").';
19 | y3_0=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Downloads/matdisk/TD3-STEP-v2/STEP_v1_agent1_2.txt").';
20 | y4_0=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Downloads/matdisk/TD3-STEP-v2/STEP_v1_agent1_3.txt").';
21 | y5_0=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Downloads/matdisk/TD3-STEP-v2/STEP_v1_agent1_4.txt").';
22 | h5=cdfplot(y1_0(200:500));
23 | hold on
24 | h6=cdfplot(y2_0(200:500));
25 | hold on
26 | % h3=cdfplot(y3_0(200:500));
27 | % hold on
28 | % h4=cdfplot(y4_0(200:500));
29 | % hold on
30 | h1=cdfplot(y5_0(200:500));
31 | hold on
32 | % h6=cdfplot(y4(1500:4999));
33 | % hold on
34 | set(h1,'Color',[0,0,0],'LineWidth',1.2, 'MarkerSize',4,'MarkerIndices',1:60:300,'LineStyle','--')
35 | set(h2,'Color',[0,0,0],'Marker','^','LineWidth',1.2, 'MarkerSize',4,'MarkerIndices',1:60:300,'LineStyle','--')
36 | set(h3,'Color',[0,0,0],'Marker','*','LineWidth',1.2, 'MarkerSize',6,'MarkerIndices',1:60:300,'LineStyle','--')
37 | set(h4,'Color',[0,0,0],'LineWidth',1.2, 'MarkerSize',4,'MarkerIndices',1:60:300)
38 | set(h5,'Color',[0,0,0],'Marker','^','LineWidth',1.2, 'MarkerSize',4,'MarkerIndices',1:60:300)
39 | set(h6,'Color',[0,0,0],'Marker','*','LineWidth',1.2, 'MarkerSize',6,'MarkerIndices',1:60:300)
40 |
41 | ax1 = gca;
42 | % set(gca,'XAxisLocation','top')
43 | % set(gca,'YAxisLocation','right')
44 |
45 | xlim([100 350]);
46 | ylim([0 0.85]);
47 | xlabel('Arriving step','Interpreter','latex')
48 | ylabel('Cumulative distribution function','Interpreter','latex')
49 | set(gca,'XTickLabel',{'$90 \%$','$100 \%$','$110 \%$','$120 \%$','$130 \%$','$140 \%$'});
50 | set(gca,'YTickLabel',{'0','0.2','0.4','0.6','0.8','1.0'});
51 | set(gca,'xtick',100:250/5:350)
52 | set(gca,'ytick',0:0.85/5:0.85)
53 |
54 | % 去掉上面和右面边框上的刻度 保留边框
55 | % box off;
56 | grid off
57 |
58 | xl=xlim;
59 | yl=ylim;
60 | % line([xl(1),xl(2)],[yl(2),yl(2)],'color',[0 0 0]); %画上边框,线条的颜色设置为黑色
61 | % line([xl(2),xl(2)],[yl(1),yl(2)],'color',[0 0 0]); %画右边框 ,线条的颜色设置为黑色
62 | gs=legend([h1 h2 h3 h4 h5 h6],{'$P_{\max}=0.02, \kappa_1=0.1$','$P_{\max}=0.04, \kappa_1=0.1$','$P_{\max}=0.1, \kappa_1=0.1$','$P_{\max}=0.02, \kappa_1=0.01$','$P_{\max}=0.04, \kappa_1=0.01$'},'Interpreter','latex','Location','northwest','NumColumns',1);
63 | % set(gs,'Location',best)
64 | title(" ")
65 |
66 | % ax2=axes('Position',get(ax1,'Position'),...
67 | % 'XAxisLocation','top',...
68 | % 'YAxisLocation','left',...
69 | % 'Color','none',...
70 | % 'XColor','b','YColor','b');
71 | % hold on
72 | %
73 | % X_1 = [0, 0.02, 0.04, 0.06, 0.08, 0.1];
74 | % % Y1_1 = [73.32247929, 73.59625821, 73.87003713, 73.58897458, 73.54327335, 73.57012282, 73.48700371, 73.74321622, 73.56069694, 73.43016281, 73.41759497];
75 | % % Y1_2 = [77.09425878, 77.96301057, 78.32762068, 78.77320765, 78.1179663, 78.80548415, 78.4038846, 78.67466438, 78.16966581, 78.41473865, 78.54641531];
76 | % % Y1_3 = [82.97800628, 84.27606398, 84.41930877, 83.87746358, 83.93316195, 84.33647529, 84.48843188, 84.8706084, 84.52499286, 85.03199086, 84.51185376];
77 | % Y2_1 = [70.94397619, 80.87746462, 83.66667014, 85.63058596, 86.90541567, 87.85830977, 88.64414142, 89.31973965, 89.89561792, 90.38694579, 90.87500257];
78 | % Y2_2 = [70.96419081, 80.91497367, 83.69259198, 85.64478655, 86.8985237, 87.87413717, 88.68100766, 89.43362594, 89.9297314, 90.50355753, 90.8866812];
79 | % Y2_3 = [70.99211407, 80.94282523, 83.75781121, 85.71406009, 87.01403988, 87.92457729, 88.7456004, 89.4045163, 90.00031461, 90.44857253, 90.92024851];
80 | % p4=plot(X_1,Y2_1,'b:o','LineWidth',1.2, 'MarkerSize',4, 'MarkerEdgeColor', 'b', 'MarkerFaceColor','b');
81 | % p5=plot(X_1,Y2_2,'b-.*','LineWidth',1.2, 'MarkerSize',6, 'MarkerEdgeColor', 'b', 'MarkerFaceColor','b');
82 | % p6=plot(X_1,Y2_3,'b--^','LineWidth',1.2, 'MarkerSize',4, 'MarkerEdgeColor', 'b', 'MarkerFaceColor','b');
83 | % ylabel('Average sum-rate','Interpreter','latex')
84 | % xlabel('Power budget $P_{\max}$','Interpreter','latex')
85 | % % legend([p4 p5 p6],{'$\kappa_1=0.0001$','$\kappa_1=0.002$','$\kappa_1=0.005$'},'Location','NorthWest','Interpreter','latex','NumColumns',1)
86 | % % legend('$\kappa_1=0.002$','$\kappa_1=0.005$','$\kappa_1=0.0001$')
87 | % xlim([0 1]);
88 | % % set(gca,'XTickLabel',{'0','0.2','0.4','0.6','0.8','1'});
89 | % set(gca,'XAxisLocation','bottom')
90 | % set(gca,'xtick',0:0.2:1)
91 | % grid off
92 |
--------------------------------------------------------------------------------
/algorithm/MA-TD3_main.py:
--------------------------------------------------------------------------------
1 | from copy import deepcopy
2 | import itertools
3 | import numpy as np
4 | import torch
5 | from torch.optim import Adam
6 | import core
7 |
8 |
9 | class ReplayBuffer:
10 | """
11 | A simple FIFO experience replay buffer for TD3 agents.
12 | """
13 |
14 | def __init__(self, obs_dim, act_dim, size):
15 | self.obs_buf = np.zeros(core.combined_shape(size, obs_dim), dtype=np.float32)
16 | self.obs2_buf = np.zeros(core.combined_shape(size, obs_dim), dtype=np.float32)
17 | self.act_buf = np.zeros(core.combined_shape(size, act_dim), dtype=np.float32)
18 | self.rew_buf = np.zeros(size, dtype=np.float32)
19 | self.done_buf = np.zeros(size, dtype=np.float32)
20 | self.ptr, self.size, self.max_size = 0, 0, size
21 |
22 | def store(self, obs, act, rew, next_obs, done):
23 | self.obs_buf[self.ptr] = obs
24 | self.obs2_buf[self.ptr] = next_obs
25 | self.act_buf[self.ptr] = act
26 | self.rew_buf[self.ptr] = rew
27 | self.done_buf[self.ptr] = done
28 | self.ptr = (self.ptr+1) % self.max_size
29 | self.size = min(self.size+1, self.max_size)
30 |
31 | def sample_batch(self, batch_size=32):
32 | idxs = np.random.randint(0, self.size, size=batch_size)
33 | batch = dict(obs=self.obs_buf[idxs],
34 | obs2=self.obs2_buf[idxs],
35 | act=self.act_buf[idxs],
36 | rew=self.rew_buf[idxs],
37 | done=self.done_buf[idxs])
38 | return {k: torch.as_tensor(v, dtype=torch.float32) for k,v in batch.items()}
39 |
40 | class TD3:
41 | def __init__(self, obs_dim, act_dim, actor_critic=core.MLPActorCritic,
42 | replay_size=int(1e6), gamma=0.99, polyak=0.995, pi_lr=1e-3, q_lr=5e-4,
43 | act_noise=0.5, target_noise=0.25, noise_clip=0.5, policy_delay=2):
44 | self.obs_dim = obs_dim
45 | self.act_dim = act_dim
46 | self.gamma = gamma
47 | self.polyak = polyak
48 | self.act_noise = act_noise
49 | self.target_noise = target_noise
50 | self.noise_clip = noise_clip
51 | self.policy_delay = policy_delay
52 | self.replay_buffer = ReplayBuffer(obs_dim=obs_dim, act_dim=act_dim,size=replay_size)
53 |
54 | self.ac = actor_critic(obs_dim, act_dim)
55 | self.ac_targ = deepcopy(self.ac)
56 |
57 | for p in self.ac_targ.parameters():
58 | p.requires_grad = False
59 |
60 | # List of parameters for both Q-networks (save this for convenience)
61 | self.q_params = itertools.chain(self.ac.q1.parameters(), self.ac.q2.parameters())
62 |
63 | # Set up optimizers for policy and q-function
64 | self.pi_optimizer = Adam(self.ac.pi.parameters(), lr=pi_lr)
65 | self.q_optimizer = Adam(self.q_params, lr=q_lr)
66 |
67 | # Experience buffer
68 | replay_buffer = ReplayBuffer(obs_dim=obs_dim, act_dim=act_dim, size=replay_size)
69 | def compute_loss_q(self,data):
70 | o, a, r, o2, d = data['obs'], data['act'], data['rew'], data['obs2'], data['done']
71 |
72 | q1 = self.ac.q1(o,a)
73 | q2 = self.ac.q2(o,a)
74 |
75 | # Bellman backup for Q functions
76 | with torch.no_grad():
77 | pi_targ = self.ac_targ.pi(o2)
78 |
79 | # Target policy smoothing
80 | epsilon = torch.randn_like(pi_targ) * self.target_noise
81 | epsilon = torch.clamp(epsilon, -self.noise_clip, self.noise_clip)
82 | a2 = pi_targ + epsilon
83 | a2 = torch.clamp(a2, -1, 1)
84 |
85 | # Target Q-values
86 | q1_pi_targ = self.ac_targ.q1(o2, a2)
87 | q2_pi_targ = self.ac_targ.q2(o2, a2)
88 | q_pi_targ = torch.min(q1_pi_targ, q2_pi_targ)
89 | backup = r + self.gamma * (1 - d) * q_pi_targ
90 |
91 | # MSE loss against Bellman backup
92 | loss_q1 = ((q1 - backup)**2).mean()
93 | loss_q2 = ((q2 - backup)**2).mean()
94 | loss_q = loss_q1 + loss_q2
95 |
96 | return loss_q
97 |
98 | def compute_loss_pi(self, data):
99 | o = data['obs']
100 | q1_pi = self.ac.q1(o, self.ac.pi(o))
101 | return -q1_pi.mean()
102 |
103 | def update(self, batch_size, repeat_times):
104 | for i in range(int(repeat_times)):
105 | data = self.replay_buffer.sample_batch(batch_size)
106 | # First run one gradient descent step for Q1 and Q2
107 | self.q_optimizer.zero_grad()
108 | loss_q = self.compute_loss_q(data)
109 | loss_q.backward()
110 | self.q_optimizer.step()
111 |
112 | # Possibly update pi and target networks
113 | if i % self.policy_delay == 0:
114 |
115 | # Freeze Q-networks so you don't waste computational effort
116 | # computing gradients for them during the policy learning step.
117 | for p in self.q_params:
118 | p.requires_grad = False
119 |
120 | # Next run one gradient descent step for pi.
121 | self.pi_optimizer.zero_grad()
122 | loss_pi = self.compute_loss_pi(data)
123 | loss_pi.backward()
124 | self.pi_optimizer.step()
125 |
126 | # Unfreeze Q-networks so you can optimize it at next DDPG step.
127 | for p in self.q_params:
128 | p.requires_grad = True
129 |
130 | # Finally, update target networks by polyak averaging.
131 | with torch.no_grad():
132 | for p, p_targ in zip(self.ac.parameters(), self.ac_targ.parameters()):
133 | # NB: We use an in-place operations "mul_", "add_" to update target
134 | # params, as opposed to "mul" and "add", which would make new tensors.
135 | p_targ.data.mul_(self.polyak)
136 | p_targ.data.add_((1 - self.polyak) * p.data)
137 |
138 | def get_action(self, o, noise_scale):
139 | a = self.ac.act(torch.as_tensor(o, dtype=torch.float32))
140 | a += noise_scale * np.random.randn(self.act_dim)
141 | return np.clip(a, -1, 1)
142 |
--------------------------------------------------------------------------------
/plot_figure/FIGURE_3.m:
--------------------------------------------------------------------------------
1 | % X = [ 8 8 12 12]*6;
2 | % Y = [8 12 12 8]*6;
3 | % Z = [0.5 0.5 0.5 0.5];
4 | % p6=plot(10*6,10*6,'s','MarkerEdgeColor',[0.41176 0.41176 0.41176], 'MarkerFaceColor',[0.41176 0.41176 0.41176],'MarkerSize',10, 'LineWidth',2);
5 | %
6 | % patch(Y,X,Z,[0.41176 0.41176 0.41176])
7 | % hold on
8 | %
9 | % X = [ 28 28 32 32]*6;
10 | % Y = [8 12 12 8]*6;
11 | % Z = [0.5 0.5 0.5 0.5];
12 | %
13 | % patch(Y,X,Z,[0.41176 0.41176 0.41176])
14 | %
15 | % X = [ 28 28 32 32]*6;
16 | % Y = [28 32 32 28]*6;
17 | % Z = [0.5 0.5 0.5 0.5];
18 | %
19 | % patch(Y,X,Z,[0.41176 0.41176 0.41176])
20 | %
21 | % X = [ 8 8 12 12]*6;
22 | % Y = [28 32 32 28]*6;
23 | % Z = [0.5 0.5 0.5 0.5];
24 | %
25 | % patch(Y,X,Z,[0.41176 0.41176 0.41176])
26 | %
27 | % view(90,90)
28 | % p5=plot(0*6,15*6,'p','MarkerEdgeColor',[1 1 0 ], 'MarkerFaceColor',[1 1 0 ],'MarkerSize',5, 'LineWidth',2);
29 | % hold on
30 | % plot(5*6,17*6,'p','MarkerEdgeColor',[1 1 0 ], 'MarkerFaceColor',[1 1 0 ],'MarkerSize',5, 'LineWidth',2)
31 | % hold on
32 | % plot(13*6,17*6,'p','MarkerEdgeColor',[1 1 0 ], 'MarkerFaceColor',[1 1 0 ],'MarkerSize',5, 'LineWidth',2)
33 | % hold on
34 | % plot(23*6,1*6,'p','MarkerEdgeColor',[1 1 0 ], 'MarkerFaceColor',[1 1 0 ],'MarkerSize',5, 'LineWidth',2)
35 | % hold on
36 | % plot(31*6,3*6,'p','MarkerEdgeColor',[1 1 0 ], 'MarkerFaceColor',[1 1 0 ],'MarkerSize',7, 'LineWidth',2)
37 | % hold on
38 | % plot(35*6,17*6,'p','MarkerEdgeColor',[1 1 0 ], 'MarkerFaceColor',[1 1 0 ],'MarkerSize',7, 'LineWidth',2)
39 | % hold on
40 | % %
41 | % plot(20*6,35*6,'p','MarkerEdgeColor',[1 1 0 ], 'MarkerFaceColor',[1 1 0 ],'MarkerSize',5, 'LineWidth',2);
42 | % hold on
43 | % plot(25*6,37*6,'p','MarkerEdgeColor',[1 1 0 ], 'MarkerFaceColor',[1 1 0 ],'MarkerSize',5, 'LineWidth',2)
44 | % hold on
45 | % plot(35*6,37*6,'p','MarkerEdgeColor',[1 1 0 ], 'MarkerFaceColor',[1 1 0 ],'MarkerSize',5, 'LineWidth',2)
46 | % hold on
47 | % plot(5*6,23*6,'p','MarkerEdgeColor',[1 1 0 ], 'MarkerFaceColor',[1 1 0 ],'MarkerSize',5, 'LineWidth',2)
48 | % hold on
49 | % plot(10*6,25*6,'p','MarkerEdgeColor',[1 1 0 ], 'MarkerFaceColor',[1 1 0 ],'MarkerSize',7, 'LineWidth',2)
50 | % hold on
51 | % plot(15*6,25*6,'p','MarkerEdgeColor',[1 1 0 ], 'MarkerFaceColor',[1 1 0 ],'MarkerSize',7, 'LineWidth',2)
52 | % hold on
53 |
54 | radio_map_=-load("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/mapdata_0717.mat").MARK_PL;
55 | % radio_map=rot90(radio_map_);
56 | img=imagesc(radio_map_);%画图
57 | % axis xy
58 | %imrotated_img = imrotate(img, 90, 'bilinear');
59 | colorbar;
60 | hold on
61 | % figure(2)
62 | y1=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短/y_k169_38.txt").';
63 | x1=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短/x_k169_38.txt").';
64 | y2=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短/y_k2131_35.txt").';
65 | x2=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短/x_k2131_35.txt").';
66 | y3=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短/y_k3255_44.txt").';
67 | x3=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短/x_k3255_44.txt").';
68 |
69 |
70 | y4=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短+SINR/y_k1428_95.txt").';
71 | x4=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短+SINR/x_k1428_95.txt").';
72 | y5=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短+SINR/y_k2424_32.txt").';
73 | x5=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短+SINR/x_k2424_32.txt").';
74 | y6=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短+SINR/y_k3483_48.txt").';
75 | x6=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短+SINR/x_k3483_48.txt").';
76 | p5=plot(50,17,'p','MarkerEdgeColor',[1 0 0], 'MarkerFaceColor',[1 0 0],'MarkerSize',10,'LineWidth',2);
77 | hold on
78 | plot(50,50, 'p','MarkerEdgeColor',[1 0 0], 'MarkerFaceColor',[1 0 0],'MarkerSize',10,'LineWidth',2)
79 | hold on
80 | plot(50,84, 'p','MarkerEdgeColor',[1 0 0], 'MarkerFaceColor',[1 0 0],'MarkerSize',10,'LineWidth',2)
81 | hold on
82 | for i=1:1:length(x1)
83 | p1=plot(y1(i),x1(i),'o','MarkerSize',3,'MarkerEdgeColor',[0.93,0.69,0.13], 'MarkerFaceColor',[0.93,0.69,0.13]);
84 | end
85 | for i=1:1:length(x2)
86 | plot(y2(i),x2(i),'o','MarkerSize',3,'MarkerEdgeColor',[0.85,0.33,0.10], 'MarkerFaceColor',[0.85,0.33,0.10]) ;
87 | end
88 | for i=1:1:length(x3)
89 | plot(y3(i),x3(i),'o','MarkerSize',4,'MarkerEdgeColor',[1.00,0.41,0.16], 'MarkerFaceColor',[1.00,0.41,0.16]) ;
90 | end
91 | for i=1:1:length(x4)
92 | p2=plot(y4(i),x4(i),'^','MarkerSize',3,'MarkerEdgeColor',[0.76,0.43,0.96], 'MarkerFaceColor',[0.76,0.43,0.96]);
93 | end
94 | for i=1:1:length(x5)
95 | plot(y5(i),x5(i),'^','MarkerSize',4,'MarkerEdgeColor',[0.40,0.14,0.58], 'MarkerFaceColor',[0.40,0.14,0.58])
96 | end
97 | for i=1:1:length(x6)
98 | plot(y6(i),x6(i),'^','MarkerSize',4,'MarkerEdgeColor',[0.58,0.27,0.78], 'MarkerFaceColor',[0.58,0.27,0.78])
99 | end
100 | hold on
101 | p3=plot(y1(1),x1(1),'+','MarkerEdgeColor',[0.50,0.50,0.50], 'MarkerFaceColor',[0,0,0],'MarkerSize',13, 'LineWidth',3);
102 | hold on
103 | p4=plot(y1(length(x1)),x1(length(x1)),'X','MarkerEdgeColor',[0.50,0.50,0.50], 'MarkerFaceColor',[0,0,0],'MarkerSize',13, 'LineWidth',3);
104 | hold on
105 | plot(y2(1),x2(1),'+','MarkerEdgeColor',[0 0 0], 'MarkerFaceColor',[0 0 0],'MarkerSize',13,'LineWidth',3);
106 | hold on
107 | plot(y2(length(x2)),x2(length(x2)),'X','MarkerEdgeColor',[0 0 0], 'MarkerFaceColor',[0 0 0],'MarkerSize',13,'LineWidth',3)
108 | hold on
109 | plot(y3(1),x3(1),'+','MarkerEdgeColor',[0.31,0.30,0.30], 'MarkerFaceColor',[0.31,0.30,0.30],'MarkerSize',13, 'LineWidth',3);
110 | hold on
111 | plot(y3(length(x3)),x3(length(x3)),'+','MarkerEdgeColor',[0.31,0.30,0.30], 'MarkerFaceColor',[0.31,0.30,0.30],'MarkerSize',13, 'LineWidth',3);
112 | hold on
113 |
114 | % rot90;
115 | view(-90,90);
116 | axis equal
117 | axis([0 100 0 100]);
118 | legend([p1 p2],{'Distance-Aware','Communication-Aware'},'Location','NorthWest','Interpreter','latex','NumColumns',2)
119 | set(gca,'XTick',0:20:100)
120 | set(gca,'XTickLabel',{'0','10','20','30','40','50'})
121 | set(gca,'YTick',0:20:100)
122 | set(gca,'YTickLabel',{'0','10','20','30','40','50'})
123 | xlabel('$x \ ({\rm m})$','Interpreter','latex')
124 | ylabel('$y \ ({\rm m})$','Interpreter','latex')
125 | box on
126 | % grid on
127 | ah=axes('position',get(gca,'position'), 'visible','off');
128 | legend(ah,[p3 p4 p5],{'MU Starting Point ','MU Destination','SU'},'Location','NorthEast','Interpreter','latex','NumColumns',4)
129 |
130 |
--------------------------------------------------------------------------------
/tradition_baseline/fig3.m:
--------------------------------------------------------------------------------
1 | % X = [ 8 8 12 12]*6;
2 | % Y = [8 12 12 8]*6;
3 | % Z = [0.5 0.5 0.5 0.5];
4 | % p6=plot(10*6,10*6,'s','MarkerEdgeColor',[0.41176 0.41176 0.41176], 'MarkerFaceColor',[0.41176 0.41176 0.41176],'MarkerSize',10, 'LineWidth',2);
5 | %
6 | % patch(Y,X,Z,[0.41176 0.41176 0.41176])
7 | % hold on
8 | %
9 | % X = [ 28 28 32 32]*6;
10 | % Y = [8 12 12 8]*6;
11 | % Z = [0.5 0.5 0.5 0.5];
12 | %
13 | % patch(Y,X,Z,[0.41176 0.41176 0.41176])
14 | %
15 | % X = [ 28 28 32 32]*6;
16 | % Y = [28 32 32 28]*6;
17 | % Z = [0.5 0.5 0.5 0.5];
18 | %
19 | % patch(Y,X,Z,[0.41176 0.41176 0.41176])
20 | %
21 | % X = [ 8 8 12 12]*6;
22 | % Y = [28 32 32 28]*6;
23 | % Z = [0.5 0.5 0.5 0.5];
24 | %
25 | % patch(Y,X,Z,[0.41176 0.41176 0.41176])
26 | %
27 | % view(90,90)
28 | % p5=plot(0*6,15*6,'p','MarkerEdgeColor',[1 1 0 ], 'MarkerFaceColor',[1 1 0 ],'MarkerSize',5, 'LineWidth',2);
29 | % hold on
30 | % plot(5*6,17*6,'p','MarkerEdgeColor',[1 1 0 ], 'MarkerFaceColor',[1 1 0 ],'MarkerSize',5, 'LineWidth',2)
31 | % hold on
32 | % plot(13*6,17*6,'p','MarkerEdgeColor',[1 1 0 ], 'MarkerFaceColor',[1 1 0 ],'MarkerSize',5, 'LineWidth',2)
33 | % hold on
34 | % plot(23*6,1*6,'p','MarkerEdgeColor',[1 1 0 ], 'MarkerFaceColor',[1 1 0 ],'MarkerSize',5, 'LineWidth',2)
35 | % hold on
36 | % plot(31*6,3*6,'p','MarkerEdgeColor',[1 1 0 ], 'MarkerFaceColor',[1 1 0 ],'MarkerSize',7, 'LineWidth',2)
37 | % hold on
38 | % plot(35*6,17*6,'p','MarkerEdgeColor',[1 1 0 ], 'MarkerFaceColor',[1 1 0 ],'MarkerSize',7, 'LineWidth',2)
39 | % hold on
40 | % %
41 | % plot(20*6,35*6,'p','MarkerEdgeColor',[1 1 0 ], 'MarkerFaceColor',[1 1 0 ],'MarkerSize',5, 'LineWidth',2);
42 | % hold on
43 | % plot(25*6,37*6,'p','MarkerEdgeColor',[1 1 0 ], 'MarkerFaceColor',[1 1 0 ],'MarkerSize',5, 'LineWidth',2)
44 | % hold on
45 | % plot(35*6,37*6,'p','MarkerEdgeColor',[1 1 0 ], 'MarkerFaceColor',[1 1 0 ],'MarkerSize',5, 'LineWidth',2)
46 | % hold on
47 | % plot(5*6,23*6,'p','MarkerEdgeColor',[1 1 0 ], 'MarkerFaceColor',[1 1 0 ],'MarkerSize',5, 'LineWidth',2)
48 | % hold on
49 | % plot(10*6,25*6,'p','MarkerEdgeColor',[1 1 0 ], 'MarkerFaceColor',[1 1 0 ],'MarkerSize',7, 'LineWidth',2)
50 | % hold on
51 | % plot(15*6,25*6,'p','MarkerEdgeColor',[1 1 0 ], 'MarkerFaceColor',[1 1 0 ],'MarkerSize',7, 'LineWidth',2)
52 | % hold on
53 |
54 | radio_map_=-load("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/mapdata_0717.mat").MARK_PL;
55 | % radio_map=rot90(radio_map_);
56 | img=imagesc(radio_map_);%画图
57 | % axis xy
58 | %imrotated_img = imrotate(img, 90, 'bilinear');
59 | colorbar;
60 | hold on
61 | % figure(2)
62 | y1=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短/y_k169_38.txt").';
63 | x1=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短/x_k169_38.txt").';
64 | y2=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短/y_k2131_35.txt").';
65 | x2=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短/x_k2131_35.txt").';
66 | y3=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短/y_k3255_44.txt").';
67 | x3=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短/x_k3255_44.txt").';
68 |
69 |
70 | y4=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短+SINR/y_k1428_95.txt").';
71 | x4=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短+SINR/x_k1428_95.txt").';
72 | y5=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短+SINR/y_k2424_32.txt").';
73 | x5=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短+SINR/x_k2424_32.txt").';
74 | y6=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短+SINR/y_k3483_48.txt").';
75 | x6=importdata("C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/路径短+SINR/x_k3483_48.txt").';
76 | p6=plot(50,17,'p','MarkerEdgeColor',[1 0 0], 'MarkerFaceColor',[1 0 0],'MarkerSize',10,'LineWidth',2);
77 | hold on
78 | plot(50,50, 'p','MarkerEdgeColor',[1 0 0], 'MarkerFaceColor',[1 0 0],'MarkerSize',10,'LineWidth',2)
79 | hold on
80 | plot(50,84, 'p','MarkerEdgeColor',[1 0 0], 'MarkerFaceColor',[1 0 0],'MarkerSize',10,'LineWidth',2)
81 | hold on
82 | for i=1:1:length(x1)
83 | p1=plot(y1(i),x1(i),'o','MarkerSize',3,'MarkerEdgeColor',[0.15,0.15,0.15], 'MarkerFaceColor',[0.15,0.15,0.15]);
84 | end
85 | for i=1:1:length(x2)
86 | plot(y2(i),x2(i),'o','MarkerSize',3,'MarkerEdgeColor',[1.00,0.41,0.16], 'MarkerFaceColor',[1.00,0.41,0.16]) ;
87 | end
88 | for i=1:1:length(x3)
89 | plot(y3(i),x3(i),'o','MarkerSize',4,'MarkerEdgeColor',[ 0.58,0.27,0.78], 'MarkerFaceColor',[ 0.58,0.27,0.78]) ;
90 | end
91 | for i=1:1:length(x4)
92 | p2=plot(y4(i),x4(i),'^','MarkerSize',3,'MarkerEdgeColor',[0.24,0.24,0.24], 'MarkerFaceColor',[0.24,0.24,0.24]);
93 | end
94 | for i=1:1:length(x5)
95 | plot(y5(i),x5(i),'^','MarkerSize',4,'MarkerEdgeColor',[0.85,0.33,0.10], 'MarkerFaceColor',[0.85,0.33,0.10])
96 | end
97 | for i=1:1:length(x6)
98 | plot(y6(i),x6(i),'^','MarkerSize',4,'MarkerEdgeColor',[0.40,0.14,0.58], 'MarkerFaceColor',[ 0.40,0.14,0.58])
99 | end
100 | hold on
101 | p4=plot(y1(1),x1(1),'+','MarkerEdgeColor',[0.50,0.50,0.50], 'MarkerFaceColor',[0,0,0],'MarkerSize',13, 'LineWidth',3);
102 | hold on
103 | p5=plot(y1(length(x1)),x1(length(x1)),'X','MarkerEdgeColor',[0.50,0.50,0.50], 'MarkerFaceColor',[0,0,0],'MarkerSize',13, 'LineWidth',3);
104 | hold on
105 | plot(y2(1),x2(1),'+','MarkerEdgeColor',[0 0 0], 'MarkerFaceColor',[0 0 0],'MarkerSize',13,'LineWidth',3);
106 | hold on
107 | plot(y2(length(x2)),x2(length(x2)),'X','MarkerEdgeColor',[0 0 0], 'MarkerFaceColor',[0 0 0],'MarkerSize',13,'LineWidth',3)
108 | hold on
109 | plot(y3(1),x3(1),'+','MarkerEdgeColor',[0.31,0.30,0.30], 'MarkerFaceColor',[0.31,0.30,0.30],'MarkerSize',13, 'LineWidth',3);
110 | hold on
111 | plot(y3(length(x3)),x3(length(x3)),'+','MarkerEdgeColor',[0.31,0.30,0.30], 'MarkerFaceColor',[0.31,0.30,0.30],'MarkerSize',13, 'LineWidth',3);
112 | hold on
113 |
114 | fileID = fopen('C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/response_code/fig3/A算法/robot_path_1.txt', 'r');
115 | % fileID="C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/response_code/fig3/A算法/robot_path_1.txt";
116 | data = textscan(fileID, '%f32,%f32', 'Delimiter', ',');
117 | x1=data{1};
118 | y1=data{2};
119 | fclose(fileID);
120 | fileID = fopen('C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/response_code/fig3/A算法/robot_path_2.txt', 'r');
121 | data = textscan(fileID, '%f,%f', 'Delimiter', ',');
122 | x2=data{1};
123 | y2=data{2};
124 | fclose(fileID);
125 | fileID = fopen('C:/Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/response_code/fig3/A算法/robot_path_3.txt', 'r');
126 | data = textscan(fileID, '%f,%f', 'Delimiter', ',');
127 | x3=data{1};
128 | y3=data{2};
129 | fclose(fileID);
130 | for i=1:2:length(x1)
131 | p3=plot(y1(i),x1(i),'*','MarkerSize',5,'MarkerEdgeColor',[0.50,0.50,0.50], 'MarkerFaceColor',[0.50,0.50,0.50]);
132 | end
133 | for i=1:2:length(x2)
134 | plot(y2(i),x2(i),'*','MarkerSize',6,'MarkerEdgeColor',[ 0.93,0.69,0.13], 'MarkerFaceColor',[ 0.93,0.69,0.13 ]) ;
135 | end
136 | for i=1:2:length(x3)
137 | plot(y3(i),x3(i),'*','MarkerSize',5,'MarkerEdgeColor',[ 0.76,0.43,0.96], 'MarkerFaceColor',[0.76,0.43,0.96]) ;
138 | end
139 |
140 | % rot90;
141 | view(-90,90);
142 | axis equal
143 | axis([0 100 0 100]);
144 | legend([p1 p2 p3],{'Distance-Aware MA-DDPG ($\kappa_1=0$)','Communication-Aware MA-DDPG ($\kappa_1=0.1$)', '$A^{\ast}$ Search Algorithm'},'Location','NorthWest','Interpreter','latex','NumColumns',2)
145 | set(gca,'XTick',0:20:100)
146 | set(gca,'XTickLabel',{'0','10','20','30','40','50'})
147 | set(gca,'YTick',0:20:100)
148 | set(gca,'YTickLabel',{'0','10','20','30','40','50'})
149 | xlabel('$x \ ({\rm m})$','Interpreter','latex')
150 | ylabel('$y \ ({\rm m})$','Interpreter','latex')
151 | box on
152 | % grid on
153 | ah=axes('position',get(gca,'position'), 'visible','off');
154 | legend(ah,[p4 p5 p6],{'MU Starting Point ','MU Destination','SU'},'Location','NorthEast','Interpreter','latex','NumColumns',4)
155 |
156 |
--------------------------------------------------------------------------------
/algorithm/MA-PPO_main.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from tensorflow import keras
3 | from keras.layers import *
4 | import numpy as np
5 | import gym
6 | import matplotlib.pyplot as plt
7 | import math
8 | from scipy.io import loadmat
9 | np.random.seed(2)
10 | tf.random.set_seed(2)
11 |
12 | EP_MAX = 500
13 | BATCH = 32
14 | EP_LEN = 1000
15 | GAMMA = 0.9
16 | A_LR = 0.0001
17 | C_LR = 0.0005
18 |
19 | A_UPDATE_STEPS = 20
20 | C_UPDATE_STEPS = 20
21 | S_DIM, A_DIM = 2, 10
22 | epsilon=0.2
23 |
24 | n_width=100
25 | n_height = 100
26 | m = loadmat("mapdata_0717.mat")
27 | #correct_action=0
28 | MARK= m["MARK_new"]
29 | PL_AP=m["MARK_PL_real"]
30 | n_mu=3
31 | n_M=5
32 | n_o=6*7
33 | a_bound=1
34 |
35 | class PPO(object):
36 |
37 | def __init__(self):
38 | self.opt_a = tf.compat.v1.train.AdamOptimizer(A_LR)
39 | self.opt_c = tf.compat.v1.train.AdamOptimizer(C_LR)
40 |
41 | self.model_a = self._build_anet(trainable=True)
42 | self.model_a_old = self._build_anet(trainable=False)
43 | self.model_c = self._build_cnet()
44 |
45 | def _build_anet(self,trainable=True):
46 | tfs_a = Input([S_DIM], )
47 | l1 = Dense(100, 'relu',trainable=trainable)(tfs_a)
48 | mu = a_bound * Dense(A_DIM, 'tanh',trainable=trainable)(l1)
49 | sigma = Dense(A_DIM, 'softplus',trainable=trainable)(l1)
50 | model_a = keras.models.Model(inputs=tfs_a, outputs=[mu, sigma])
51 | return model_a
52 |
53 | def _build_cnet(self):
54 | tfs_c = Input([S_DIM], )
55 | l1 = Dense(100, 'relu')(tfs_c)
56 | v = Dense(1)(l1)
57 | model_c = keras.models.Model(inputs=tfs_c, outputs=v)
58 | model_c.compile(optimizer=self.opt_c, loss='mse')
59 | return model_c
60 |
61 | def update(self, s, a, r):
62 | self.model_a_old.set_weights(self.model_a.get_weights())
63 |
64 | mu, sigma = self.model_a_old(s)
65 | oldpi = tf.compat.v1.distributions.Normal(loc=mu, scale=sigma)
66 | old_prob_a = oldpi.prob(a)
67 |
68 | v = self.get_v(s)
69 | adv = r - v
70 |
71 | for i in range(A_UPDATE_STEPS):
72 | with tf.GradientTape() as tape:
73 | mu, sigma = self.model_a(s)
74 | pi = tf.compat.v1.distributions.Normal(loc=mu, scale=sigma)
75 | ratio = pi.prob(a) / (old_prob_a + 1e-5)
76 | surr = ratio * adv
77 | x2 = tf.clip_by_value(ratio, 1. - epsilon, 1. + epsilon) * adv
78 | x3 = tf.minimum(surr, x2)
79 | aloss = -tf.reduce_mean(x3)
80 |
81 | a_grads = tape.gradient(aloss, self.model_a.trainable_weights)
82 | a_grads_and_vars = zip(a_grads, self.model_a.trainable_weights)
83 | self.opt_a.apply_gradients(a_grads_and_vars)
84 |
85 | self.model_c.fit(s, r, verbose=0, shuffle=False,epochs=C_UPDATE_STEPS)
86 |
87 | def choose_action(self, s):
88 | mu, sigma = self.model_a(s)
89 | pi = tf.compat.v1.distributions.Normal(loc=mu, scale=sigma)
90 | a = tf.squeeze(pi.sample(1), axis=0)
91 | return np.clip(a, -2, 2)
92 |
93 | def get_v(self, s):
94 | v = self.model_c(s)
95 | return v
96 |
97 | def cosVector(x,y):
98 | result1=0.0;
99 | result2=0.0;
100 | result3=0.0;
101 | for i in range(len(x)):
102 | result1+=x[i]*y[i] #sum(X*Y)
103 | result2+=x[i]**2 #sum(X*X)
104 | result3+=y[i]**2 #sum(Y*Y)
105 | return result1/((result2*result3)**0.5)
106 |
107 | ppo = PPO()
108 | end_location = [15*2,32*2]
109 | all_ep_r = []
110 | all_ep_reward_p=[]
111 | for ep in range(EP_MAX): #train
112 | s = np.array([4*2,5*2], dtype=np.float32)
113 | buffer_s, buffer_a, buffer_r = [], [], []
114 | ep_r = 0
115 | done1 = False
116 | distance_01_max=math.sqrt((s[0]-end_location[0])*(s[0]-end_location[0])/4+(s[1]-end_location[1])*(s[1]-end_location[1])/4)
117 |
118 | s = np.reshape(s, (-1, S_DIM))
119 | observation_su1 = np.array([17, 25*2], dtype=np.float32)
120 | for t in range(EP_LEN): # in one episode
121 | a = ppo.choose_action(s)
122 | if not done1:
123 | [old_x, old_y] = s[0]
124 | new_x, new_y = int(old_x), int(old_y)
125 | new_x=int(old_x+a[0,0])
126 | new_y=int(old_y+a[0,1])
127 | if int(new_x) <= 0:
128 | new_x = 1
129 | if int(new_x) >= n_width:
130 | new_x = int(n_width)-1
131 | if int(new_y) <= 0:
132 | new_y = 1
133 | if int(new_y) >= n_height:
134 | new_y = int(n_height)-1
135 | if MARK[new_x,new_y] == 2:
136 | new_x, new_y = old_x, old_y
137 | s_=np.array([new_x, new_y], dtype=np.float32)
138 | else:
139 | s_ = s
140 | a=a[0]
141 | if a[8]==-1:
142 | a[8]=-0.9999999
143 | # if action2[8]==-1:
144 | # action2[8]=-0.9999999
145 | # if action3[8]==-1:
146 | # action3[8]=-0.9999999
147 | if a[8]==1:
148 | a[8]=0.9999999
149 |
150 | w_1=np.array([a[2]* math.exp(1)**(1j*(1+a[3])*math.pi), a[4]* math.exp(1)**(1j*(1+a[5])*math.pi), a[6]* math.exp(1)**(1j*(1+a[7])*math.pi)])
151 | # w_2=np.array([action2[2]* math.exp(1)**(1j*(1+action2[3])*math.pi), action2[4]* math.exp(1)**(1j*(1+action2[5])*math.pi), action2[6]* math.exp(1)**(1j*(1+action2[7])*math.pi)])
152 | # w_3=np.array([action3[2]* math.exp(1)**(1j*(1+action3[3])*math.pi), action3[4]* math.exp(1)**(1j*(1+action3[5])*math.pi), action3[6]* math.exp(1)**(1j*(1+action3[7])*math.pi)])
153 | theta_1=cosVector([1,0,0],[s_[0]-50,s_[1]-100, 1-2])
154 | a_1=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_1)), math.exp(1)**(-2*1j*(math.pi*theta_1))])#
155 | b_1_AP_LOS=math.sqrt(PL_AP[int(s_[0]), int(s_[1])])
156 | h_1=b_1_AP_LOS*a_1
157 | interference_1=10**(-9)
158 | # theta_2=cosVector([1,0,0],[observation2_[0]-50,observation2_[1]-100, 1-2])
159 | # a_2=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_2)), math.exp(1)**(-2*1j*(math.pi*theta_2))])#
160 | # b_2_AP_LOS=math.sqrt(PL_AP[int(observation2_[0]), int(observation2_[1])])
161 | # h_2=b_2_AP_LOS*a_2
162 | # interference_2=10**(-9)
163 | # theta_3=cosVector([1,0,0],[observation3_[0]-50,observation3_[1]-100, 1-2])
164 | # a_3=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_3)), math.exp(1)**(-2*1j*(math.pi*theta_3))])#
165 | # b_3_AP_LOS=math.sqrt(PL_AP[int(observation3_[0]), int(observation3_[1])])
166 | # h_3=b_3_AP_LOS*a_3
167 | # interference_3=10**(-9)
168 | theta_4=cosVector([1,0,0],[observation_su1[0]-50,observation_su1[1]-100, 1-2])
169 | a_4=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_4)), math.exp(1)**(-2*1j*(math.pi*theta_4))])#
170 | b_4_AP_LOS=math.sqrt(PL_AP[int(observation_su1[0]), int(observation_su1[1])])
171 | h_4=b_4_AP_LOS*a_4
172 | interference_4=10**(-9)
173 |
174 | if a[8]>0:
175 | interference_1+=(1-(a[8]+1)/2)*(np.linalg.norm(h_1*w_1))**2
176 | else:
177 | interference_4+=((a[8]+1)/2)*(np.linalg.norm(h_4*w_1))**2
178 |
179 | SINR_1=((a[8]+1)/2)*(np.linalg.norm(h_1*w_1))**2/interference_1
180 | # SINR_2=((action2[8]+1)/2)*(np.linalg.norm(h_2*w_2))**2/interference_2
181 | # SINR_3=((action3[8]+1)/2)*(np.linalg.norm(h_3*w_3))**2/interference_3
182 | SINR_4=(1-(a[8]+1)/2)*(np.linalg.norm(h_4*w_1))**2/interference_4
183 |
184 |
185 | buffer_s.append(s)
186 | buffer_a.append(a)
187 | distance_01_2=(s_[0]-end_location[0])*(s_[0]-end_location[0])/4+(s_[1]-end_location[1])*(s_[1]-end_location[1])/4
188 | distance_01 = math.sqrt(distance_01_2)
189 | s_ = np.reshape(s_, (-1, S_DIM))
190 | r= -(distance_01/50)
191 | if distance_01==0:
192 | done1 = True
193 | #os.system("pause")
194 | r=1
195 | r = np.reshape(r, (-1, 1))
196 | buffer_r.append(r) # normalize reward, find to be useful
197 | a = np.reshape(a, (-1, A_DIM))
198 | s_ = np.reshape(s_, (-1, S_DIM))
199 | s = s_
200 | ep_r += r[0]
201 |
202 | # update ppo
203 | if (t + 1) % BATCH == 0 or t == EP_LEN - 1 or done1:
204 | #print("here")
205 | v_s_ = ppo.get_v(s_)[0,0]
206 | discounted_r = []
207 | for r in buffer_r[::-1]:
208 | v_s_ = r + GAMMA * v_s_
209 | discounted_r.append(v_s_)
210 | discounted_r.reverse()
211 |
212 | bs = np.vstack(buffer_s)
213 | ba = np.vstack(buffer_a)
214 | br = np.array(discounted_r)
215 | buffer_s, buffer_a, buffer_r = [], [], []
216 | ppo.update(bs, ba, br)
217 | if done1:
218 | print("success!!!!!!!!!!!!")
219 | break
220 | if ep == 0:
221 | # all_ep_r.append(ep_r)
222 | all_ep_reward_p.append(ep_r)
223 | else:
224 | # all_ep_r.append(all_ep_r[-1] * 0.9 + ep_r * 0.1)
225 | all_ep_reward_p.append(all_ep_reward_p[-1] * 0.9 + ep_r * 0.1)
226 | print(
227 | 'Ep: %i' % ep,
228 | "|Ep_r: %i" % ep_r,
229 | )
230 |
231 | plt.plot(all_ep_reward_p)
232 |
233 |
234 |
235 | # while 1: #play
236 | # s = env.reset()
237 | # for t in range(EP_LEN):
238 | # s = s.reshape([-1, S_DIM])
239 | # env.render()
240 | # s, r, done, info = env.step(ppo.choose_action(s))
241 | # if done:
242 | # break
243 |
--------------------------------------------------------------------------------
/tradition_baseline/pso.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from scipy.special import erfcinv
3 | import numpy as np
4 | from sklearn.cluster import KMeans
5 | import matplotlib.pyplot as plt
6 | from scipy.io import loadmat
7 | import math
8 | import numpy as np
9 | from scipy.special import erfc
10 |
11 | m = loadmat("C://Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/mapdata_0717.mat")
12 | #correct_action=0
13 | MARK= m["MARK_new"]
14 | PL_AP=m["MARK_PL_real"]
15 |
16 | def cosVector(x,y):
17 | result1=0.0;
18 | result2=0.0;
19 | result3=0.0;
20 | for i in range(len(x)):
21 | result1+=x[i]*y[i] #sum(X*Y)
22 | result2+=x[i]**2 #sum(X*X)
23 | result3+=y[i]**2 #sum(Y*Y)
24 | return result1/((result2*result3)**0.5)
25 |
26 | # 高斯Q函数的逆函数
27 | def Q_inv(x):
28 | return np.sqrt(2) * erfcinv(2 * x)
29 |
30 | # 计算V(γk(t))
31 | def V(gamma):
32 | # 根据具体公式定义V(γk(t))
33 | return gamma # Placeholder, replace with actual formula if necessary
34 |
35 | # 目标函数
36 | def objective_function(w, num_points, A1, A2, h_k, sigma2, M, D):
37 | # 计算gamma_k(t)
38 | term_tol=0
39 | db_tol=0
40 | for i in range(num_points):
41 | gamma_k_t = np.abs(A1[i] @ w)**2 / (i * np.abs(A1[i] @ w)**2 + sigma2)
42 |
43 | # 计算目标函数
44 | term1 = np.log(2) * np.sqrt(M / (1-(1+gamma_k_t)**(-2)))
45 | term2 = np.log2(1 + gamma_k_t) - D / M
46 | term_tol=term1 * term2
47 | db_tol+=math.log10(max(1-0.5 * erfc(term_tol / np.sqrt(2)),10**(-20)))
48 |
49 | return (db_tol/num_points)
50 |
51 | import random
52 |
53 | class Particle:
54 | def __init__(self, dimension):
55 | self.position = np.random.rand(dimension)
56 | self.velocity = np.random.rand(dimension) - 0.5
57 | self.best_position = self.position.copy()
58 | self.best_score = -np.inf
59 |
60 | def update_velocity(self, global_best_position, inertia_weight=0.5, cognitive_coeff=2, social_coeff=2):
61 | cognitive_component = cognitive_coeff * random.random() * (self.best_position - self.position)
62 | social_component = social_coeff * random.random() * (global_best_position - self.position)
63 | self.velocity = inertia_weight * self.velocity + cognitive_component + social_component
64 |
65 | def update_position(self):
66 | self.position += self.velocity
67 | self.position = np.clip(self.position, 0, 1) # Ensure within bounds
68 |
69 | class PSO:
70 | def __init__(self, objective_function, dimension, swarm_size=30, iterations=100):
71 | self.objective_function = objective_function
72 | self.dimension = dimension
73 | self.swarm_size = swarm_size
74 | self.iterations = iterations
75 | self.swarm = [Particle(dimension) for _ in range(swarm_size)]
76 | self.global_best_position = np.random.rand(dimension)
77 | self.global_best_score = -np.inf
78 |
79 | def optimize(self, *args):
80 | for iteration in range(self.iterations):
81 | for particle in self.swarm:
82 | # print(particle.position)
83 | score = self.objective_function(particle.position, *args)
84 | if score > particle.best_score:
85 | particle.best_score = score
86 | particle.best_position = particle.position.copy()
87 |
88 | if score > self.global_best_score:
89 | self.global_best_score = score
90 | self.global_best_position = particle.position.copy()
91 |
92 | for particle in self.swarm:
93 | particle.update_velocity(self.global_best_position)
94 | particle.update_position()
95 |
96 | # print(f"Iteration {iteration + 1}/{self.iterations}, Best Score: {self.global_best_score}")
97 |
98 | return self.global_best_position, self.global_best_score
99 |
100 | for k in range(3):
101 | import matplotlib.pyplot as plt
102 | exec('''path_file = 'C://Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/response_code/fig3/A算法/robot_path_{}.txt' '''.format(k+1))
103 | with open(path_file, 'r') as f:
104 | lines = f.readlines()
105 |
106 | # 解析路径数据
107 | path = [(int(line.split(',')[0]), int(line.split(',')[1])) for line in lines]
108 |
109 | # 提取 x 和 y 坐标
110 | exec('''x_coords_{} = [point[0] for point in path]'''.format(k+1))
111 | exec('''y_coords_{} = [point[1] for point in path]'''.format(k+1))
112 |
113 | def calculate_interference(coords, labels, cluster_num):
114 | interference = 0
115 | cluster_points = coords[labels == cluster_num]
116 | num_points = len(cluster_points)
117 | for i in range(num_points):
118 | for j in range(i + 1, num_points):
119 | interference += np.linalg.norm(cluster_points[i] - cluster_points[j])
120 | return interference
121 |
122 | power_array=[math.sqrt(0.02), math.sqrt(0.04), math.sqrt(0.06), math.sqrt(0.08),math.sqrt(0.1)]
123 | for power_j in range(len(power_array)):
124 | w = np.random.rand(3)
125 | for t in range(77):
126 | # print('time+++++++++')
127 | # for cl in range(1):
128 | # for cl in range(2):
129 | # # 假设三个机器人的坐标
130 | robot_coords = np.array([[x_coords_1[t], y_coords_1[t]], [x_coords_2[t], y_coords_2[t]]])
131 |
132 | # 假设三个固定用户的坐标
133 | user_coords = np.array([[17, 50], [50, 50], [84, 50]])
134 |
135 | # 合并所有坐标
136 | all_coords = np.vstack((robot_coords, user_coords))
137 |
138 | # 定义 KMeans 模型
139 | kmeans = KMeans(n_clusters=3)
140 |
141 | # 使用所有坐标进行聚类
142 | kmeans.fit(all_coords)
143 |
144 | # 获取聚类结果
145 | labels = kmeans.labels_
146 |
147 | # 获取聚类中心
148 | cluster_centers = kmeans.cluster_centers_
149 |
150 | # 打印结果
151 | # print("Labels:", labels)
152 | # print("Cluster centers:", cluster_centers)
153 |
154 | # 绘制聚类结果
155 | # plt.scatter(all_coords[:, 0], all_coords[:, 1], c=labels, cmap='viridis')
156 | # plt.scatter(cluster_centers[:, 0], cluster_centers[:, 1], s=300, c='red', marker='x')
157 | # plt.xlabel('X')
158 | # plt.ylabel('Y')
159 | # plt.title('KMeans Clustering of Robots and Users')
160 | # plt.show()
161 |
162 | robot_powers = np.array([10, 20, 30, 10, 20])
163 | theta_1=cosVector([1,0,0],[all_coords[0][0]-50,all_coords[0][1]-100, 1-2])
164 | aLP_1=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_1)), math.exp(1)**(-2*1j*(math.pi*theta_1))])#
165 | b_1_AP_LOS=math.sqrt(PL_AP[int(all_coords[0][0]), int(all_coords[0][1])])
166 | h_1=b_1_AP_LOS*aLP_1
167 | interference_1=10**(-9)
168 | theta_2=cosVector([1,0,0],[all_coords[1][0]-50,all_coords[1][1]-100, 1-2])
169 | aLP_2=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_2)), math.exp(1)**(-2*1j*(math.pi*theta_2))])#
170 | b_2_AP_LOS=math.sqrt(PL_AP[int(all_coords[1][0]), int(all_coords[1][1])])
171 | h_2=b_2_AP_LOS*aLP_2
172 | interference_2=10**(-9)
173 | theta_3=cosVector([1,0,0],[all_coords[2][0]-50,all_coords[2][1]-100, 1-2])
174 | aLP_3=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_3)), math.exp(1)**(-2*1j*(math.pi*theta_3))])#
175 | b_3_AP_LOS=math.sqrt(PL_AP[int(all_coords[2][0]), int(all_coords[2][1])])
176 | h_3=b_3_AP_LOS*aLP_3
177 | interference_3=10**(-9)
178 | theta_4=cosVector([1,0,0],[all_coords[3][0]-50,all_coords[3][1]-100, 1-2])
179 | a_4=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_4)), math.exp(1)**(-2*1j*(math.pi*theta_4))])#
180 | b_4_AP_LOS=math.sqrt(PL_AP[int(all_coords[3][0]), int(all_coords[3][1])])
181 | h_4=b_4_AP_LOS*a_4
182 | interference_4=10**(-9)
183 | theta_5=cosVector([1,0,0],[all_coords[4][0]-50,all_coords[4][1]-100, 1-2])
184 | a_5=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_5)), math.exp(1)**(-2*1j*(math.pi*theta_5))])#
185 | b_5_AP_LOS=math.sqrt(PL_AP[int(all_coords[4][0]), int(all_coords[4][1])])
186 | h_5=b_5_AP_LOS*a_5
187 | interference_5=10**(-9)
188 | # theta_6=cosVector([1,0,0],[all_coords[5][0]-50,all_coords[5][1]-100, 1-2])
189 | # a_6=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_6)), math.exp(1)**(-2*1j*(math.pi*theta_6))])#
190 | # b_6_AP_LOS=math.sqrt(PL_AP[int(all_coords[5][0]), int(all_coords[5][1])])
191 | # h_6=b_6_AP_LOS*a_6
192 | # interference_6=10**(-9)
193 | H_array=[]
194 | H_array.append(h_1)
195 | H_array.append(h_2)
196 | H_array.append(h_3)
197 | H_array.append(h_4)
198 | H_array.append(h_5)
199 | # H_array.append(h_6)
200 | H_array=np.array(H_array)
201 |
202 | for r in range(len(robot_powers)):
203 | robot_powers[r]=np.abs(H_array[r] @ w*power_array[power_j]/math.sqrt(3))**2
204 |
205 | #
206 | num_clusters = 3
207 | interference_list = []
208 |
209 | gamma_avg=0
210 | gamma_array=np.zeros(3)
211 |
212 | for cluster_num in range(num_clusters):
213 | # interference = calculate_interference(all_coords, labels, cluster_num)
214 | w = np.random.rand(3)
215 | previous_A = w
216 | counter = 0
217 | for episode in range(500):
218 | interference = 0
219 | cluster_points = all_coords[labels == cluster_num]
220 | cluster_powers = robot_powers[labels == cluster_num]
221 |
222 | # 根据功率对簇内的用户进行排序,功率大的优先
223 | sorted_indices = np.argsort(-cluster_powers)
224 | sorted_points = cluster_points[sorted_indices]
225 | # sorted_powers = cluster_powers[sorted_indices]
226 | cluster_hk = H_array[labels == cluster_num]
227 | # cluster_wk = wk[labels == cluster_num]
228 |
229 | # for i_j in range(len(sorted_points)):
230 | # sorted_powers
231 |
232 | num_points = len(sorted_points)
233 | # for i in range(num_points):
234 | interference = 0
235 | A1 = cluster_hk * power_array[power_j]/math.sqrt(3)
236 | A2 = 1
237 | h_k = H_array # 示例信道向量
238 | # K_c = [0, 1, 2, 3] # 示例其他用户索引
239 | sigma2 = 10**(-9)
240 | M = 50
241 | D = 100
242 |
243 | # 粒子群优化
244 | pso = PSO(objective_function, dimension=3, swarm_size=30, iterations=100)
245 | w, best_score = pso.optimize(num_points, A1, A2, h_k, sigma2, M, D)
246 |
247 |
248 | # sinr = np.abs(A1 @ w)**2 / (i * np.abs(A1 @ w)**2 + sigma2)
249 |
250 | # print("最佳位置(波束成形向量):", best_position)
251 | # if episode==99:
252 | if w.all() == previous_A.all():
253 | counter += 1
254 | else:
255 | counter = 0
256 |
257 | previous_A = w
258 |
259 | # 如果A持续十轮不变,则跳出循环
260 | if counter >= 10:
261 | # print(cluster_num, 'sucess!!!!!!!!!!!!')
262 | break
263 |
264 |
265 |
266 | for i in range(num_points):
267 | gamma_k_t = np.abs(A1[i] @ w)**2 / (i * np.abs(A1[i] @ w)**2 + sigma2)
268 | gamma_avg+=gamma_k_t
269 | gamma_k_t=gamma_k_t#/num_points
270 |
271 | gamma_array[cluster_num] = best_score
272 | # print("最佳得分:", best_score)#math.log10(max(1-0.5 * erfc(best_score / np.sqrt(2)),10**(-20))))
273 |
274 | filename='DB_NOMA_new_'+str(power_j)+'.txt'
275 | with open (filename, 'a') as fileobject:
276 | fileobject.write(str((gamma_array[0]+gamma_array[1]+gamma_array[2])/6)+'\n')
277 |
278 |
279 | # for j in range(0, i):
280 | # # 计算干扰,假设干扰与距离成反比
281 | # interference += np.abs(H_array[i] @ w*power_array[power_j]/math.sqrt(3))**2
282 |
283 | # sinr += sorted_powers[i] / interference # 假设干扰公式
284 | # return interference
285 | # interference_list.append(interference)
286 |
287 | # 打印每个簇的干扰
288 | # for i, interference in enumerate(interference_list):
289 | # print(f"Cluster {i} interference: {interference}")
290 |
291 |
292 | # 示例参数
293 |
--------------------------------------------------------------------------------
/environment/environment.yaml:
--------------------------------------------------------------------------------
1 | name: base
2 | channels:
3 | - https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/pytorch
4 | - https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/pytorch/
5 | - https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/msys2/
6 | - defaults
7 | - conda-forge
8 | dependencies:
9 | - alabaster=0.7.12=pyhd3eb1b0_0
10 | - anaconda-client=1.11.2=py310haa95532_0
11 | - anaconda-navigator=2.4.0=py310haa95532_0
12 | - anaconda-project=0.11.1=py310haa95532_0
13 | - anyio=3.5.0=py310haa95532_0
14 | - appdirs=1.4.4=pyhd3eb1b0_0
15 | - argon2-cffi=21.3.0=pyhd3eb1b0_0
16 | - argon2-cffi-bindings=21.2.0=py310h2bbff1b_0
17 | - arrow=1.2.3=py310haa95532_1
18 | - astroid=2.14.2=py310haa95532_0
19 | - astropy=5.1=py310h9128911_0
20 | - asttokens=2.0.5=pyhd3eb1b0_0
21 | - atomicwrites=1.4.0=py_0
22 | - attrs=22.1.0=py310haa95532_0
23 | - automat=20.2.0=py_0
24 | - autopep8=1.6.0=pyhd3eb1b0_1
25 | - babel=2.11.0=py310haa95532_0
26 | - backcall=0.2.0=pyhd3eb1b0_0
27 | - backports=1.1=pyhd3eb1b0_0
28 | - backports.functools_lru_cache=1.6.4=pyhd3eb1b0_0
29 | - backports.tempfile=1.0=pyhd3eb1b0_1
30 | - backports.weakref=1.0.post1=py_1
31 | - bcrypt=3.2.0=py310h2bbff1b_1
32 | - beautifulsoup4=4.11.1=py310haa95532_0
33 | - binaryornot=0.4.4=pyhd3eb1b0_1
34 | - black=22.6.0=py310haa95532_0
35 | - blas=1.0=mkl
36 | - bleach=4.1.0=pyhd3eb1b0_0
37 | - blosc=1.21.3=h6c2663c_0
38 | - bokeh=2.4.3=py310haa95532_0
39 | - boltons=23.0.0=py310haa95532_0
40 | - bottleneck=1.3.5=py310h9128911_0
41 | - brotli=1.0.9=h2bbff1b_7
42 | - brotli-bin=1.0.9=h2bbff1b_7
43 | - brotlipy=0.7.0=py310h2bbff1b_1002
44 | - bzip2=1.0.8=he774522_0
45 | - ca-certificates=2023.05.30=haa95532_0
46 | - certifi=2023.5.7=py310haa95532_0
47 | - cffi=1.15.1=py310h2bbff1b_3
48 | - cfitsio=3.470=h2bbff1b_7
49 | - chardet=4.0.0=py310haa95532_1003
50 | - charls=2.2.0=h6c2663c_0
51 | - charset-normalizer=2.0.4=pyhd3eb1b0_0
52 | - click=8.0.4=py310haa95532_0
53 | - cloudpickle=2.0.0=pyhd3eb1b0_0
54 | - clyent=1.2.2=py310haa95532_1
55 | - colorama=0.4.6=py310haa95532_0
56 | - colorcet=3.0.1=py310haa95532_0
57 | - comm=0.1.2=py310haa95532_0
58 | - conda=23.3.1=py310haa95532_0
59 | - conda-build=3.24.0=py310haa95532_0
60 | - conda-content-trust=0.1.3=py310haa95532_0
61 | - conda-pack=0.6.0=pyhd3eb1b0_0
62 | - conda-package-handling=2.0.2=py310haa95532_0
63 | - conda-package-streaming=0.7.0=py310haa95532_0
64 | - conda-repo-cli=1.0.41=py310haa95532_0
65 | - conda-token=0.4.0=pyhd3eb1b0_0
66 | - conda-verify=3.4.2=py_1
67 | - console_shortcut=0.1.1=4
68 | - constantly=15.1.0=py310haa95532_0
69 | - contourpy=1.0.5=py310h59b6b97_0
70 | - cookiecutter=1.7.3=pyhd3eb1b0_0
71 | - cryptography=39.0.1=py310h21b164f_0
72 | - cssselect=1.1.0=pyhd3eb1b0_0
73 | - curl=7.87.0=h2bbff1b_0
74 | - cycler=0.11.0=pyhd3eb1b0_0
75 | - cytoolz=0.12.0=py310h2bbff1b_0
76 | - daal4py=2023.0.2=py310hf497b98_0
77 | - dal=2023.0.1=h59b6b97_26646
78 | - dask=2022.7.0=py310haa95532_0
79 | - dask-core=2022.7.0=py310haa95532_0
80 | - datashader=0.14.4=py310haa95532_0
81 | - datashape=0.5.4=py310haa95532_1
82 | - debugpy=1.5.1=py310hd77b12b_0
83 | - decorator=5.1.1=pyhd3eb1b0_0
84 | - defusedxml=0.7.1=pyhd3eb1b0_0
85 | - diff-match-patch=20200713=pyhd3eb1b0_0
86 | - dill=0.3.6=py310haa95532_0
87 | - distributed=2022.7.0=py310haa95532_0
88 | - docstring-to-markdown=0.11=py310haa95532_0
89 | - docutils=0.18.1=py310haa95532_3
90 | - entrypoints=0.4=py310haa95532_0
91 | - et_xmlfile=1.1.0=py310haa95532_0
92 | - executing=0.8.3=pyhd3eb1b0_0
93 | - filelock=3.9.0=py310haa95532_0
94 | - flake8=6.0.0=py310haa95532_0
95 | - flask=2.2.2=py310haa95532_0
96 | - flit-core=3.6.0=pyhd3eb1b0_0
97 | - fonttools=4.25.0=pyhd3eb1b0_0
98 | - freetype=2.12.1=ha860e81_0
99 | - fsspec=2022.11.0=py310haa95532_0
100 | - future=0.18.3=py310haa95532_0
101 | - gensim=4.3.0=py310h4ed8f06_0
102 | - giflib=5.2.1=h8cc25b3_3
103 | - glib=2.69.1=h5dc1a3c_2
104 | - glob2=0.7=pyhd3eb1b0_0
105 | - greenlet=2.0.1=py310hd77b12b_0
106 | - gst-plugins-base=1.18.5=h9e645db_0
107 | - gstreamer=1.18.5=hd78058f_0
108 | - hdf5=1.10.6=h1756f20_1
109 | - heapdict=1.0.1=pyhd3eb1b0_0
110 | - holoviews=1.15.4=py310haa95532_0
111 | - huggingface_hub=0.10.1=py310haa95532_0
112 | - hvplot=0.8.2=py310haa95532_0
113 | - hyperlink=21.0.0=pyhd3eb1b0_0
114 | - icc_rt=2022.1.0=h6049295_2
115 | - icu=58.2=ha925a31_3
116 | - idna=3.4=py310haa95532_0
117 | - imagecodecs=2021.8.26=py310h4c966c4_2
118 | - imageio=2.26.0=py310haa95532_0
119 | - imagesize=1.4.1=py310haa95532_0
120 | - imbalanced-learn=0.10.1=py310haa95532_0
121 | - importlib-metadata=4.11.3=py310haa95532_0
122 | - importlib_metadata=4.11.3=hd3eb1b0_0
123 | - incremental=21.3.0=pyhd3eb1b0_0
124 | - inflection=0.5.1=py310haa95532_0
125 | - iniconfig=1.1.1=pyhd3eb1b0_0
126 | - intake=0.6.7=py310haa95532_0
127 | - intel-openmp=2021.4.0=haa95532_3556
128 | - intervaltree=3.1.0=pyhd3eb1b0_0
129 | - ipykernel=6.19.2=py310h9909e9c_0
130 | - ipython=8.10.0=py310haa95532_0
131 | - ipython_genutils=0.2.0=pyhd3eb1b0_1
132 | - ipywidgets=7.6.5=pyhd3eb1b0_1
133 | - isort=5.9.3=pyhd3eb1b0_0
134 | - itemadapter=0.3.0=pyhd3eb1b0_0
135 | - itemloaders=1.0.4=pyhd3eb1b0_1
136 | - itsdangerous=2.0.1=pyhd3eb1b0_0
137 | - jedi=0.18.1=py310haa95532_1
138 | - jellyfish=0.9.0=py310h2bbff1b_0
139 | - jinja2=3.1.2=py310haa95532_0
140 | - jinja2-time=0.2.0=pyhd3eb1b0_3
141 | - jmespath=0.10.0=pyhd3eb1b0_0
142 | - joblib=1.1.1=py310haa95532_0
143 | - jpeg=9e=h2bbff1b_1
144 | - jq=1.6=haa95532_1
145 | - json5=0.9.6=pyhd3eb1b0_0
146 | - jsonpatch=1.32=pyhd3eb1b0_0
147 | - jsonpointer=2.1=pyhd3eb1b0_0
148 | - jsonschema=4.17.3=py310haa95532_0
149 | - jupyter=1.0.0=py310haa95532_8
150 | - jupyter_client=7.3.4=py310haa95532_0
151 | - jupyter_console=6.6.2=py310haa95532_0
152 | - jupyter_core=5.2.0=py310haa95532_0
153 | - jupyter_server=1.23.4=py310haa95532_0
154 | - jupyterlab=3.5.3=py310haa95532_0
155 | - jupyterlab_pygments=0.1.2=py_0
156 | - jupyterlab_server=2.19.0=py310haa95532_0
157 | - jupyterlab_widgets=1.0.0=pyhd3eb1b0_1
158 | - jxrlib=1.1=he774522_2
159 | - keyring=23.4.0=py310haa95532_0
160 | - kiwisolver=1.4.4=py310hd77b12b_0
161 | - lazy-object-proxy=1.6.0=py310h2bbff1b_0
162 | - lcms2=2.12=h83e58a3_0
163 | - lerc=3.0=hd77b12b_0
164 | - libaec=1.0.4=h33f27b4_1
165 | - libarchive=3.6.2=hebabd0d_0
166 | - libbrotlicommon=1.0.9=h2bbff1b_7
167 | - libbrotlidec=1.0.9=h2bbff1b_7
168 | - libbrotlienc=1.0.9=h2bbff1b_7
169 | - libcurl=7.87.0=h86230a5_0
170 | - libdeflate=1.17=h2bbff1b_0
171 | - libffi=3.4.2=hd77b12b_6
172 | - libiconv=1.16=h2bbff1b_2
173 | - liblief=0.12.3=hd77b12b_0
174 | - libogg=1.3.5=h2bbff1b_1
175 | - libpng=1.6.39=h8cc25b3_0
176 | - libsodium=1.0.18=h62dcd97_0
177 | - libspatialindex=1.9.3=h6c2663c_0
178 | - libssh2=1.10.0=hcd4344a_0
179 | - libtiff=4.5.0=h6c2663c_2
180 | - libuv=1.44.2=h2bbff1b_0
181 | - libvorbis=1.3.7=he774522_0
182 | - libwebp=1.2.4=hbc33d0d_1
183 | - libwebp-base=1.2.4=h2bbff1b_1
184 | - libxml2=2.9.14=h0ad7f3c_0
185 | - libxslt=1.1.35=h2bbff1b_0
186 | - libzopfli=1.0.3=ha925a31_0
187 | - llvmlite=0.39.1=py310h23ce68f_0
188 | - locket=1.0.0=py310haa95532_0
189 | - lxml=4.9.1=py310h1985fb9_0
190 | - lz4=3.1.3=py310h2bbff1b_0
191 | - lz4-c=1.9.4=h2bbff1b_0
192 | - lzo=2.10=he774522_2
193 | - m2-msys2-runtime=2.5.0.17080.65c939c=3
194 | - m2-patch=2.7.5=2
195 | - m2w64-libwinpthread-git=5.0.0.4634.697f757=2
196 | - markdown=3.4.1=py310haa95532_0
197 | - markupsafe=2.1.1=py310h2bbff1b_0
198 | - matplotlib=3.7.0=py310haa95532_0
199 | - matplotlib-base=3.7.0=py310h4ed8f06_0
200 | - matplotlib-inline=0.1.6=py310haa95532_0
201 | - mccabe=0.7.0=pyhd3eb1b0_0
202 | - menuinst=1.4.19=py310h59b6b97_0
203 | - mistune=0.8.4=py310h2bbff1b_1000
204 | - mkl=2021.4.0=haa95532_640
205 | - mkl-service=2.4.0=py310h2bbff1b_0
206 | - mkl_fft=1.3.1=py310ha0764ea_0
207 | - mkl_random=1.2.2=py310h4ed8f06_0
208 | - mock=4.0.3=pyhd3eb1b0_0
209 | - mpmath=1.2.1=py310haa95532_0
210 | - msgpack-python=1.0.3=py310h59b6b97_0
211 | - msys2-conda-epoch=20160418=1
212 | - multipledispatch=0.6.0=py310haa95532_0
213 | - munkres=1.1.4=py_0
214 | - mypy_extensions=0.4.3=py310haa95532_1
215 | - navigator-updater=0.3.0=py310haa95532_0
216 | - nbclassic=0.5.2=py310haa95532_0
217 | - nbclient=0.5.13=py310haa95532_0
218 | - nbconvert=6.5.4=py310haa95532_0
219 | - nbformat=5.7.0=py310haa95532_0
220 | - nest-asyncio=1.5.6=py310haa95532_0
221 | - networkx=2.8.4=py310haa95532_0
222 | - ninja=1.10.2=haa95532_5
223 | - ninja-base=1.10.2=h6d14046_5
224 | - nltk=3.7=pyhd3eb1b0_0
225 | - notebook=6.5.2=py310haa95532_0
226 | - notebook-shim=0.2.2=py310haa95532_0
227 | - numba=0.56.4=py310h4ed8f06_0
228 | - numexpr=2.8.4=py310hd213c9f_0
229 | - numpy=1.23.5=py310h60c9a35_0
230 | - numpy-base=1.23.5=py310h04254f7_0
231 | - numpydoc=1.5.0=py310haa95532_0
232 | - openjpeg=2.4.0=h4fc8c34_0
233 | - openpyxl=3.0.10=py310h2bbff1b_0
234 | - openssl=1.1.1t=h2bbff1b_0
235 | - packaging=22.0=py310haa95532_0
236 | - pandas=1.5.3=py310h4ed8f06_0
237 | - pandocfilters=1.5.0=pyhd3eb1b0_0
238 | - panel=0.14.3=py310haa95532_0
239 | - param=1.12.3=py310haa95532_0
240 | - paramiko=2.8.1=pyhd3eb1b0_0
241 | - parsel=1.6.0=py310haa95532_0
242 | - parso=0.8.3=pyhd3eb1b0_0
243 | - partd=1.2.0=pyhd3eb1b0_1
244 | - pathlib=1.0.1=pyhd3eb1b0_1
245 | - pathspec=0.10.3=py310haa95532_0
246 | - patsy=0.5.3=py310haa95532_0
247 | - pcre=8.45=hd77b12b_0
248 | - pep8=1.7.1=py310haa95532_1
249 | - pexpect=4.8.0=pyhd3eb1b0_3
250 | - pickleshare=0.7.5=pyhd3eb1b0_1003
251 | - pillow=9.4.0=py310hd77b12b_0
252 | - pip=22.3.1=py310haa95532_0
253 | - pkginfo=1.9.6=py310haa95532_0
254 | - platformdirs=2.5.2=py310haa95532_0
255 | - plotly=5.9.0=py310haa95532_0
256 | - pluggy=1.0.0=py310haa95532_1
257 | - ply=3.11=py310haa95532_0
258 | - pooch=1.4.0=pyhd3eb1b0_0
259 | - powershell_shortcut=0.0.1=3
260 | - poyo=0.5.0=pyhd3eb1b0_0
261 | - prometheus_client=0.14.1=py310haa95532_0
262 | - prompt-toolkit=3.0.36=py310haa95532_0
263 | - prompt_toolkit=3.0.36=hd3eb1b0_0
264 | - protego=0.1.16=py_0
265 | - psutil=5.9.0=py310h2bbff1b_0
266 | - ptyprocess=0.7.0=pyhd3eb1b0_2
267 | - pure_eval=0.2.2=pyhd3eb1b0_0
268 | - py=1.11.0=pyhd3eb1b0_0
269 | - py-lief=0.12.3=py310hd77b12b_0
270 | - pyasn1=0.4.8=pyhd3eb1b0_0
271 | - pyasn1-modules=0.2.8=py_0
272 | - pycodestyle=2.10.0=py310haa95532_0
273 | - pycosat=0.6.4=py310h2bbff1b_0
274 | - pycparser=2.21=pyhd3eb1b0_0
275 | - pyct=0.5.0=py310haa95532_0
276 | - pycurl=7.45.1=py310hcd4344a_0
277 | - pydispatcher=2.0.5=py310haa95532_2
278 | - pydocstyle=6.3.0=py310haa95532_0
279 | - pyerfa=2.0.0=py310h2bbff1b_0
280 | - pyflakes=3.0.1=py310haa95532_0
281 | - pygments=2.11.2=pyhd3eb1b0_0
282 | - pyhamcrest=2.0.2=pyhd3eb1b0_2
283 | - pyjwt=2.4.0=py310haa95532_0
284 | - pylint=2.16.2=py310haa95532_0
285 | - pyls-spyder=0.4.0=pyhd3eb1b0_0
286 | - pynacl=1.5.0=py310h8cc25b3_0
287 | - pyodbc=4.0.34=py310hd77b12b_0
288 | - pyopenssl=23.0.0=py310haa95532_0
289 | - pyparsing=3.0.9=py310haa95532_0
290 | - pyqt=5.15.7=py310hd77b12b_0
291 | - pyqt5-sip=12.11.0=py310hd77b12b_0
292 | - pyqtwebengine=5.15.7=py310hd77b12b_0
293 | - pyrsistent=0.18.0=py310h2bbff1b_0
294 | - pysocks=1.7.1=py310haa95532_0
295 | - pytables=3.7.0=py310h388bc9b_1
296 | - pytest=7.1.2=py310haa95532_0
297 | - python=3.10.9=h966fe2a_1
298 | - python-dateutil=2.8.2=pyhd3eb1b0_0
299 | - python-fastjsonschema=2.16.2=py310haa95532_0
300 | - python-libarchive-c=2.9=pyhd3eb1b0_1
301 | - python-lsp-black=1.2.1=py310haa95532_0
302 | - python-lsp-jsonrpc=1.0.0=pyhd3eb1b0_0
303 | - python-lsp-server=1.7.1=py310haa95532_0
304 | - python-slugify=5.0.2=pyhd3eb1b0_0
305 | - python-snappy=0.6.1=py310hd77b12b_0
306 | - pytoolconfig=1.2.5=py310haa95532_1
307 | - pytorch=2.0.1=py3.10_cpu_0
308 | - pytorch-mutex=1.0=cpu
309 | - pytz=2022.7=py310haa95532_0
310 | - pyviz_comms=2.0.2=pyhd3eb1b0_0
311 | - pywavelets=1.4.1=py310h2bbff1b_0
312 | - pywin32=305=py310h2bbff1b_0
313 | - pywin32-ctypes=0.2.0=py310haa95532_1000
314 | - pywinpty=2.0.10=py310h5da7b33_0
315 | - pyyaml=6.0=py310h2bbff1b_1
316 | - pyzmq=23.2.0=py310hd77b12b_0
317 | - qdarkstyle=3.0.2=pyhd3eb1b0_0
318 | - qt-main=5.15.2=he8e5bd7_7
319 | - qt-webengine=5.15.9=hb9a9bb5_5
320 | - qtpy=2.2.0=py310haa95532_0
321 | - qtwebkit=5.212=h3ad3cdb_4
322 | - queuelib=1.5.0=py310haa95532_0
323 | - regex=2022.7.9=py310h2bbff1b_0
324 | - requests=2.28.1=py310haa95532_0
325 | - requests-file=1.5.1=pyhd3eb1b0_0
326 | - requests-toolbelt=0.9.1=pyhd3eb1b0_0
327 | - rope=1.7.0=py310haa95532_0
328 | - rtree=1.0.1=py310h2eaa2aa_0
329 | - ruamel.yaml=0.17.21=py310h2bbff1b_0
330 | - ruamel.yaml.clib=0.2.6=py310h2bbff1b_1
331 | - ruamel_yaml=0.17.21=py310h2bbff1b_0
332 | - scikit-image=0.19.3=py310hd77b12b_1
333 | - scikit-learn=1.2.1=py310hd77b12b_0
334 | - scikit-learn-intelex=2023.0.2=py310haa95532_0
335 | - scipy=1.10.0=py310hb9afe5d_1
336 | - scrapy=2.8.0=py310haa95532_0
337 | - seaborn=0.12.2=py310haa95532_0
338 | - send2trash=1.8.0=pyhd3eb1b0_1
339 | - service_identity=18.1.0=pyhd3eb1b0_1
340 | - setuptools=65.6.3=py310haa95532_0
341 | - sip=6.6.2=py310hd77b12b_0
342 | - six=1.16.0=pyhd3eb1b0_1
343 | - smart_open=5.2.1=py310haa95532_0
344 | - snappy=1.1.9=h6c2663c_0
345 | - sniffio=1.2.0=py310haa95532_1
346 | - snowballstemmer=2.2.0=pyhd3eb1b0_0
347 | - sortedcontainers=2.4.0=pyhd3eb1b0_0
348 | - soupsieve=2.3.2.post1=py310haa95532_0
349 | - sphinx=5.0.2=py310haa95532_0
350 | - sphinxcontrib-applehelp=1.0.2=pyhd3eb1b0_0
351 | - sphinxcontrib-devhelp=1.0.2=pyhd3eb1b0_0
352 | - sphinxcontrib-htmlhelp=2.0.0=pyhd3eb1b0_0
353 | - sphinxcontrib-jsmath=1.0.1=pyhd3eb1b0_0
354 | - sphinxcontrib-qthelp=1.0.3=pyhd3eb1b0_0
355 | - sphinxcontrib-serializinghtml=1.1.5=pyhd3eb1b0_0
356 | - spyder=5.4.1=py310haa95532_0
357 | - spyder-kernels=2.4.1=py310haa95532_0
358 | - sqlalchemy=1.4.39=py310h2bbff1b_0
359 | - sqlite=3.40.1=h2bbff1b_0
360 | - stack_data=0.2.0=pyhd3eb1b0_0
361 | - statsmodels=0.13.5=py310h9128911_1
362 | - sympy=1.11.1=py310haa95532_0
363 | - tabulate=0.8.10=py310haa95532_0
364 | - tbb=2021.7.0=h59b6b97_0
365 | - tbb4py=2021.7.0=py310h59b6b97_0
366 | - tblib=1.7.0=pyhd3eb1b0_0
367 | - tenacity=8.0.1=py310haa95532_1
368 | - terminado=0.17.1=py310haa95532_0
369 | - text-unidecode=1.3=pyhd3eb1b0_0
370 | - textdistance=4.2.1=pyhd3eb1b0_0
371 | - threadpoolctl=2.2.0=pyh0d69192_0
372 | - three-merge=0.1.1=pyhd3eb1b0_0
373 | - tifffile=2021.7.2=pyhd3eb1b0_2
374 | - tinycss2=1.2.1=py310haa95532_0
375 | - tk=8.6.12=h2bbff1b_0
376 | - tldextract=3.2.0=pyhd3eb1b0_0
377 | - tokenizers=0.11.4=py310he5181cf_1
378 | - toml=0.10.2=pyhd3eb1b0_0
379 | - tomli=2.0.1=py310haa95532_0
380 | - tomlkit=0.11.1=py310haa95532_0
381 | - toolz=0.12.0=py310haa95532_0
382 | - tornado=6.1=py310h2bbff1b_0
383 | - tqdm=4.64.1=py310haa95532_0
384 | - traitlets=5.7.1=py310haa95532_0
385 | - transformers=4.24.0=py310haa95532_0
386 | - twisted=22.2.0=py310h2bbff1b_1
387 | - twisted-iocpsupport=1.0.2=py310h2bbff1b_0
388 | - typing-extensions=4.4.0=py310haa95532_0
389 | - typing_extensions=4.4.0=py310haa95532_0
390 | - tzdata=2022g=h04d1e81_0
391 | - ujson=5.4.0=py310hd77b12b_0
392 | - unidecode=1.2.0=pyhd3eb1b0_0
393 | - urllib3=1.26.14=py310haa95532_0
394 | - vc=14.2=h21ff451_1
395 | - vs2015_runtime=14.27.29016=h5e58377_2
396 | - w3lib=1.21.0=pyhd3eb1b0_0
397 | - watchdog=2.1.6=py310haa95532_0
398 | - wcwidth=0.2.5=pyhd3eb1b0_0
399 | - webencodings=0.5.1=py310haa95532_1
400 | - websocket-client=0.58.0=py310haa95532_4
401 | - werkzeug=2.2.2=py310haa95532_0
402 | - whatthepatch=1.0.2=py310haa95532_0
403 | - wheel=0.38.4=py310haa95532_0
404 | - widgetsnbextension=3.5.2=py310haa95532_0
405 | - win_inet_pton=1.1.0=py310haa95532_0
406 | - wincertstore=0.2=py310haa95532_2
407 | - winpty=0.4.3=4
408 | - wrapt=1.14.1=py310h2bbff1b_0
409 | - xarray=2022.11.0=py310haa95532_0
410 | - xlwings=0.29.1=py310haa95532_0
411 | - xz=5.2.10=h8cc25b3_1
412 | - yaml=0.2.5=he774522_0
413 | - yapf=0.31.0=pyhd3eb1b0_0
414 | - zeromq=4.3.4=hd77b12b_0
415 | - zfp=0.5.5=hd77b12b_6
416 | - zict=2.1.0=py310haa95532_0
417 | - zipp=3.11.0=py310haa95532_0
418 | - zlib=1.2.13=h8cc25b3_0
419 | - zope=1.0=py310haa95532_1
420 | - zope.interface=5.4.0=py310h2bbff1b_0
421 | - zstandard=0.19.0=py310h2bbff1b_0
422 | - zstd=1.5.2=h19a0ad4_0
423 | - pip:
424 | - absl-py==1.4.0
425 | - ale-py==0.8.1
426 | - astunparse==1.6.3
427 | - cachetools==5.3.0
428 | - dm-tree==0.1.8
429 | - docker-pycreds==0.4.0
430 | - farama-notifications==0.0.4
431 | - flatbuffers==23.5.9
432 | - gast==0.4.0
433 | - gitdb==4.0.10
434 | - gitpython==3.1.31
435 | - google-auth==2.18.1
436 | - google-auth-oauthlib==1.0.0
437 | - google-pasta==0.2.0
438 | - grpcio==1.54.2
439 | - gym==0.26.2
440 | - gym-notices==0.0.8
441 | - gymnasium==0.28.1
442 | - importlib-resources==5.12.0
443 | - jax==0.4.10
444 | - jax-jumpy==1.0.0
445 | - keras==2.12.0
446 | - libclang==16.0.0
447 | - ml-dtypes==0.1.0
448 | - numpy-stl==3.0.1
449 | - oauthlib==3.2.2
450 | - opencv-contrib-python==4.7.0.72
451 | - opt-einsum==3.3.0
452 | - pathtools==0.1.2
453 | - progressbar2==4.2.0
454 | - protobuf==3.20.3
455 | - pyglet==1.5.27
456 | - pylint-venv==2.3.0
457 | - python-utils==3.5.2
458 | - qstylizer==0.2.2
459 | - qtawesome==1.2.2
460 | - qtconsole==5.4.0
461 | - requests-oauthlib==1.3.1
462 | - rsa==4.9
463 | - sentry-sdk==1.24.0
464 | - setproctitle==1.3.2
465 | - smmap==5.0.0
466 | - sumolib==1.17.0
467 | - tensorboard==2.12.3
468 | - tensorboard-data-server==0.7.0
469 | - tensorboardx==2.6
470 | - tensordict==0.1.2
471 | - tensorflow==2.12.0
472 | - tensorflow-estimator==2.12.0
473 | - tensorflow-intel==2.12.0
474 | - tensorflow-io-gcs-filesystem==0.31.0
475 | - tensorflow-probability==0.20.1
476 | - tensorlayer==2.2.5
477 | - termcolor==2.3.0
478 | - torchrl==0.1.1
479 | - traci==1.17.0
480 | - visdom==0.2.4
481 | - wandb==0.15.3
482 | prefix: D:\anaconda
483 |
--------------------------------------------------------------------------------
/environment/requirements.txt:
--------------------------------------------------------------------------------
1 | absl-py==1.4.0
2 | alabaster @ file:///home/ktietz/src/ci/alabaster_1611921544520/work
3 | ale-py==0.8.1
4 | anaconda-client==1.11.2
5 | anaconda-navigator==2.4.0
6 | anaconda-project @ file:///C:/Windows/TEMP/abs_91fu4tfkih/croots/recipe/anaconda-project_1660339890874/work
7 | anyio @ file:///C:/ci/anyio_1644481856696/work/dist
8 | appdirs==1.4.4
9 | argon2-cffi @ file:///opt/conda/conda-bld/argon2-cffi_1645000214183/work
10 | argon2-cffi-bindings @ file:///C:/ci/argon2-cffi-bindings_1644569876605/work
11 | arrow @ file:///C:/b/abs_cal7u12ktb/croot/arrow_1676588147908/work
12 | astroid @ file:///C:/b/abs_d4lg3_taxn/croot/astroid_1676904351456/work
13 | astropy @ file:///C:/ci/astropy_1657719642921/work
14 | asttokens @ file:///opt/conda/conda-bld/asttokens_1646925590279/work
15 | astunparse==1.6.3
16 | atomicwrites==1.4.0
17 | attrs @ file:///C:/b/abs_09s3y775ra/croot/attrs_1668696195628/work
18 | Automat @ file:///tmp/build/80754af9/automat_1600298431173/work
19 | autopep8 @ file:///opt/conda/conda-bld/autopep8_1650463822033/work
20 | Babel @ file:///C:/b/abs_a2shv_3tqi/croot/babel_1671782804377/work
21 | backcall @ file:///home/ktietz/src/ci/backcall_1611930011877/work
22 | backports.functools-lru-cache @ file:///tmp/build/80754af9/backports.functools_lru_cache_1618170165463/work
23 | backports.tempfile @ file:///home/linux1/recipes/ci/backports.tempfile_1610991236607/work
24 | backports.weakref==1.0.post1
25 | bcrypt @ file:///C:/Windows/Temp/abs_36kl66t_aw/croots/recipe/bcrypt_1659554334050/work
26 | beautifulsoup4 @ file:///C:/ci/beautifulsoup4_1650293028159/work
27 | binaryornot @ file:///tmp/build/80754af9/binaryornot_1617751525010/work
28 | black @ file:///C:/ci/black_1660221726201/work
29 | bleach @ file:///opt/conda/conda-bld/bleach_1641577558959/work
30 | bokeh @ file:///C:/Windows/TEMP/abs_4a259bc2-ed05-4a1f-808e-ac712cc0900cddqp8sp7/croots/recipe/bokeh_1658136660686/work
31 | boltons @ file:///C:/b/abs_707eo7c09t/croot/boltons_1677628723117/work
32 | Bottleneck @ file:///C:/Windows/Temp/abs_3198ca53-903d-42fd-87b4-03e6d03a8381yfwsuve8/croots/recipe/bottleneck_1657175565403/work
33 | brotlipy==0.7.0
34 | cachetools==5.3.0
35 | certifi @ file:///C:/b/abs_4a0polqwty/croot/certifi_1683875377622/work/certifi
36 | cffi @ file:///C:/b/abs_49n3v2hyhr/croot/cffi_1670423218144/work
37 | chardet @ file:///C:/ci_310/chardet_1642114080098/work
38 | charset-normalizer @ file:///tmp/build/80754af9/charset-normalizer_1630003229654/work
39 | click @ file:///C:/ci/click_1646056762388/work
40 | cloudpickle @ file:///tmp/build/80754af9/cloudpickle_1632508026186/work
41 | clyent==1.2.2
42 | colorama @ file:///C:/b/abs_a9ozq0l032/croot/colorama_1672387194846/work
43 | colorcet @ file:///C:/b/abs_46vyu0rpdl/croot/colorcet_1668084513237/work
44 | comm @ file:///C:/b/abs_1419earm7u/croot/comm_1671231131638/work
45 | conda==23.3.1
46 | conda-build==3.24.0
47 | conda-content-trust @ file:///C:/Windows/TEMP/abs_4589313d-fc62-4ccc-81c0-b801b4449e833j1ajrwu/croots/recipe/conda-content-trust_1658126379362/work
48 | conda-pack @ file:///tmp/build/80754af9/conda-pack_1611163042455/work
49 | conda-package-handling @ file:///C:/b/abs_fcga8w0uem/croot/conda-package-handling_1672865024290/work
50 | conda-repo-cli==1.0.41
51 | conda-token @ file:///Users/paulyim/miniconda3/envs/c3i/conda-bld/conda-token_1662660369760/work
52 | conda-verify==3.4.2
53 | conda_package_streaming @ file:///C:/b/abs_0e5n5hdal3/croot/conda-package-streaming_1670508162902/work
54 | constantly==15.1.0
55 | contourpy @ file:///C:/b/abs_d5rpy288vc/croots/recipe/contourpy_1663827418189/work
56 | cookiecutter @ file:///opt/conda/conda-bld/cookiecutter_1649151442564/work
57 | cryptography @ file:///C:/b/abs_8ecplyc3n2/croot/cryptography_1677533105000/work
58 | cssselect==1.1.0
59 | cycler @ file:///tmp/build/80754af9/cycler_1637851556182/work
60 | cytoolz @ file:///C:/b/abs_61m9vzb4qh/croot/cytoolz_1667465938275/work
61 | daal4py==2023.0.2
62 | dask @ file:///C:/ci/dask-core_1658497112560/work
63 | datashader @ file:///C:/b/abs_e80f3d7ac0/croot/datashader_1676023254070/work
64 | datashape==0.5.4
65 | debugpy @ file:///C:/ci_310/debugpy_1642079916595/work
66 | decorator @ file:///opt/conda/conda-bld/decorator_1643638310831/work
67 | defusedxml @ file:///tmp/build/80754af9/defusedxml_1615228127516/work
68 | diff-match-patch @ file:///Users/ktietz/demo/mc3/conda-bld/diff-match-patch_1630511840874/work
69 | dill @ file:///C:/b/abs_42h_07z1yj/croot/dill_1667919550096/work
70 | distributed @ file:///C:/ci/distributed_1658523963030/work
71 | dm-tree==0.1.8
72 | docker-pycreds==0.4.0
73 | docstring-to-markdown @ file:///C:/b/abs_cf10j8nr4q/croot/docstring-to-markdown_1673447652942/work
74 | docutils @ file:///C:/Windows/TEMP/abs_24e5e278-4d1c-47eb-97b9-f761d871f482dy2vg450/croots/recipe/docutils_1657175444608/work
75 | entrypoints @ file:///C:/ci/entrypoints_1649926676279/work
76 | et-xmlfile==1.1.0
77 | executing @ file:///opt/conda/conda-bld/executing_1646925071911/work
78 | Farama-Notifications==0.0.4
79 | fastjsonschema @ file:///C:/Users/BUILDE~1/AppData/Local/Temp/abs_ebruxzvd08/croots/recipe/python-fastjsonschema_1661376484940/work
80 | filelock @ file:///C:/b/abs_c7yrhs9uz2/croot/filelock_1672387617533/work
81 | flake8 @ file:///C:/b/abs_9f6_n1jlpc/croot/flake8_1674581816810/work
82 | Flask @ file:///C:/b/abs_ef16l83sif/croot/flask_1671217367534/work
83 | flatbuffers==23.5.9
84 | flit_core @ file:///opt/conda/conda-bld/flit-core_1644941570762/work/source/flit_core
85 | fonttools==4.25.0
86 | fsspec @ file:///C:/b/abs_5bjz6v0w_f/croot/fsspec_1670336608940/work
87 | future @ file:///C:/b/abs_3dcibf18zi/croot/future_1677599891380/work
88 | gast==0.4.0
89 | gensim @ file:///C:/b/abs_a5vat69tv8/croot/gensim_1674853640591/work
90 | gitdb==4.0.10
91 | GitPython==3.1.31
92 | glob2 @ file:///home/linux1/recipes/ci/glob2_1610991677669/work
93 | google-auth==2.18.1
94 | google-auth-oauthlib==1.0.0
95 | google-pasta==0.2.0
96 | greenlet @ file:///C:/b/abs_47lk_w2ajq/croot/greenlet_1670513248400/work
97 | grpcio==1.54.2
98 | gym==0.26.2
99 | gym-notices==0.0.8
100 | gymnasium==0.28.1
101 | h5py==3.9.0
102 | HeapDict @ file:///Users/ktietz/demo/mc3/conda-bld/heapdict_1630598515714/work
103 | holoviews @ file:///C:/b/abs_bbf97_0kcd/croot/holoviews_1676372911083/work
104 | huggingface-hub @ file:///C:/b/abs_8d8wo2z8c6/croot/huggingface_hub_1667490298905/work
105 | hvplot @ file:///C:/b/abs_13un17_4x_/croot/hvplot_1670508919193/work
106 | hyperlink @ file:///tmp/build/80754af9/hyperlink_1610130746837/work
107 | idna @ file:///C:/b/abs_bdhbebrioa/croot/idna_1666125572046/work
108 | imagecodecs @ file:///C:/b/abs_f0cr12h73p/croot/imagecodecs_1677576746499/work
109 | imageio @ file:///C:/b/abs_27kq2gy1us/croot/imageio_1677879918708/work
110 | imagesize @ file:///C:/Windows/TEMP/abs_3cecd249-3fc4-4bfc-b80b-bb227b0d701en12vqzot/croots/recipe/imagesize_1657179501304/work
111 | imbalanced-learn @ file:///C:/b/abs_1911ryuksz/croot/imbalanced-learn_1677191585237/work
112 | importlib-metadata @ file:///C:/ci/importlib-metadata_1648544469310/work
113 | importlib-resources==5.12.0
114 | incremental @ file:///tmp/build/80754af9/incremental_1636629750599/work
115 | inflection==0.5.1
116 | iniconfig @ file:///home/linux1/recipes/ci/iniconfig_1610983019677/work
117 | intake @ file:///C:/b/abs_42yyb2lhwx/croot/intake_1676619887779/work
118 | intervaltree @ file:///Users/ktietz/demo/mc3/conda-bld/intervaltree_1630511889664/work
119 | ipykernel @ file:///C:/b/abs_b4f07tbsyd/croot/ipykernel_1672767104060/work
120 | ipython @ file:///C:/b/abs_d3h279dv3h/croot/ipython_1676582236558/work
121 | ipython-genutils @ file:///tmp/build/80754af9/ipython_genutils_1606773439826/work
122 | ipywidgets @ file:///tmp/build/80754af9/ipywidgets_1634143127070/work
123 | isort @ file:///tmp/build/80754af9/isort_1628603791788/work
124 | itemadapter @ file:///tmp/build/80754af9/itemadapter_1626442940632/work
125 | itemloaders @ file:///opt/conda/conda-bld/itemloaders_1646805235997/work
126 | itsdangerous @ file:///tmp/build/80754af9/itsdangerous_1621432558163/work
127 | jax==0.4.10
128 | jax-jumpy==1.0.0
129 | jedi @ file:///C:/ci/jedi_1644315428305/work
130 | jellyfish @ file:///C:/ci/jellyfish_1647962737334/work
131 | Jinja2 @ file:///C:/b/abs_7cdis66kl9/croot/jinja2_1666908141852/work
132 | jinja2-time @ file:///opt/conda/conda-bld/jinja2-time_1649251842261/work
133 | jmespath @ file:///Users/ktietz/demo/mc3/conda-bld/jmespath_1630583964805/work
134 | joblib @ file:///C:/b/abs_e60_bwl1v6/croot/joblib_1666298845728/work
135 | json5 @ file:///tmp/build/80754af9/json5_1624432770122/work
136 | jsonpatch @ file:///tmp/build/80754af9/jsonpatch_1615747632069/work
137 | jsonpointer==2.1
138 | jsonschema @ file:///C:/b/abs_6ccs97j_l8/croot/jsonschema_1676558690963/work
139 | jupyter @ file:///C:/Windows/TEMP/abs_56xfdi__li/croots/recipe/jupyter_1659349053177/work
140 | jupyter-console @ file:///C:/b/abs_68ttzd5p9c/croot/jupyter_console_1677674667636/work
141 | jupyter-server @ file:///C:/b/abs_1cfi3__jl8/croot/jupyter_server_1671707636383/work
142 | jupyter_client @ file:///C:/ci/jupyter_client_1661834530766/work
143 | jupyter_core @ file:///C:/b/abs_bd7elvu3w2/croot/jupyter_core_1676538600510/work
144 | jupyterlab @ file:///C:/b/abs_513jt6yy74/croot/jupyterlab_1675354138043/work
145 | jupyterlab-pygments @ file:///tmp/build/80754af9/jupyterlab_pygments_1601490720602/work
146 | jupyterlab-widgets @ file:///tmp/build/80754af9/jupyterlab_widgets_1609884341231/work
147 | jupyterlab_server @ file:///C:/b/abs_d1z_g1swc8/croot/jupyterlab_server_1677153204814/work
148 | keras==2.12.0
149 | keyring @ file:///C:/ci_310/keyring_1642165564669/work
150 | kiwisolver @ file:///C:/b/abs_88mdhvtahm/croot/kiwisolver_1672387921783/work
151 | lazy-object-proxy @ file:///C:/ci_310/lazy-object-proxy_1642083437654/work
152 | libarchive-c @ file:///tmp/build/80754af9/python-libarchive-c_1617780486945/work
153 | libclang==16.0.0
154 | llvmlite==0.39.1
155 | locket @ file:///C:/ci/locket_1652904090946/work
156 | lxml @ file:///C:/ci/lxml_1657527492694/work
157 | lz4 @ file:///C:/ci_310/lz4_1643300078932/work
158 | Markdown @ file:///C:/b/abs_98lv_ucina/croot/markdown_1671541919225/work
159 | MarkupSafe @ file:///C:/ci/markupsafe_1654508036328/work
160 | matplotlib @ file:///C:/b/abs_b2d7uv90hg/croot/matplotlib-suite_1677674332463/work
161 | matplotlib-inline @ file:///C:/ci/matplotlib-inline_1661934094726/work
162 | mccabe @ file:///opt/conda/conda-bld/mccabe_1644221741721/work
163 | menuinst @ file:///C:/Users/BUILDE~1/AppData/Local/Temp/abs_455sf5o0ct/croots/recipe/menuinst_1661805970842/work
164 | mistune @ file:///C:/ci_310/mistune_1642084168466/work
165 | mkl-fft==1.3.1
166 | mkl-random @ file:///C:/ci_310/mkl_random_1643050563308/work
167 | mkl-service==2.4.0
168 | ml-dtypes==0.1.0
169 | mock @ file:///tmp/build/80754af9/mock_1607622725907/work
170 | mpmath==1.2.1
171 | msgpack @ file:///C:/ci/msgpack-python_1652348582618/work
172 | # Editable install with no version control (multiagent==0.0.1)
173 | -e c:\users\administrator.desktop-nlh290a\desktop\cm3_code\cm3-master\env\multiagent-particle-envs
174 | multipledispatch @ file:///C:/ci_310/multipledispatch_1642084438481/work
175 | munkres==1.1.4
176 | mypy-extensions==0.4.3
177 | navigator-updater==0.3.0
178 | nbclassic @ file:///C:/b/abs_d0_ze5q0j2/croot/nbclassic_1676902914817/work
179 | nbclient @ file:///C:/ci/nbclient_1650308592199/work
180 | nbconvert @ file:///C:/b/abs_4av3q4okro/croot/nbconvert_1668450658054/work
181 | nbformat @ file:///C:/b/abs_85_3g7dkt4/croot/nbformat_1670352343720/work
182 | nest-asyncio @ file:///C:/b/abs_3a_4jsjlqu/croot/nest-asyncio_1672387322800/work
183 | networkx @ file:///C:/ci/networkx_1657716953747/work
184 | nltk @ file:///opt/conda/conda-bld/nltk_1645628263994/work
185 | notebook @ file:///C:/b/abs_ca13hqvuzw/croot/notebook_1668179888546/work
186 | notebook_shim @ file:///C:/b/abs_ebfczttg6x/croot/notebook-shim_1668160590914/work
187 | numba @ file:///C:/b/abs_e53pp2e4k7/croot/numba_1670258349527/work
188 | numexpr @ file:///C:/b/abs_a7kbak88hk/croot/numexpr_1668713882979/work
189 | numpy @ file:///C:/b/abs_datssh7cer/croot/numpy_and_numpy_base_1672336199388/work
190 | numpy-stl==3.0.1
191 | numpydoc @ file:///C:/b/abs_cfdd4zxbga/croot/numpydoc_1668085912100/work
192 | oauthlib==3.2.2
193 | opencv-contrib-python==4.7.0.72
194 | openpyxl==3.0.10
195 | opt-einsum==3.3.0
196 | packaging @ file:///C:/b/abs_cfsup8ur87/croot/packaging_1671697442297/work
197 | pandas @ file:///C:/b/abs_b9kefbuby2/croot/pandas_1677835593760/work
198 | pandocfilters @ file:///opt/conda/conda-bld/pandocfilters_1643405455980/work
199 | panel @ file:///C:/b/abs_55ujq2fpyh/croot/panel_1676379705003/work
200 | param @ file:///C:/b/abs_d799n8xz_7/croot/param_1671697759755/work
201 | paramiko @ file:///opt/conda/conda-bld/paramiko_1640109032755/work
202 | parsel @ file:///C:/ci/parsel_1646722035970/work
203 | parso @ file:///opt/conda/conda-bld/parso_1641458642106/work
204 | partd @ file:///opt/conda/conda-bld/partd_1647245470509/work
205 | pathlib @ file:///Users/ktietz/demo/mc3/conda-bld/pathlib_1629713961906/work
206 | pathspec @ file:///C:/b/abs_9cu5_2yb3i/croot/pathspec_1674681579249/work
207 | pathtools==0.1.2
208 | patsy==0.5.3
209 | pep8==1.7.1
210 | pexpect @ file:///tmp/build/80754af9/pexpect_1605563209008/work
211 | pickleshare @ file:///tmp/build/80754af9/pickleshare_1606932040724/work
212 | Pillow==9.4.0
213 | pkginfo @ file:///C:/b/abs_d18srtr68x/croot/pkginfo_1679431192239/work
214 | platformdirs @ file:///C:/b/abs_73cc5cz_1u/croots/recipe/platformdirs_1662711386458/work
215 | plotly @ file:///C:/ci/plotly_1658160673416/work
216 | pluggy @ file:///C:/ci/pluggy_1648042746254/work
217 | ply==3.11
218 | pooch @ file:///tmp/build/80754af9/pooch_1623324770023/work
219 | poyo @ file:///tmp/build/80754af9/poyo_1617751526755/work
220 | progressbar2==4.2.0
221 | prometheus-client @ file:///C:/Windows/TEMP/abs_ab9nx8qb08/croots/recipe/prometheus_client_1659455104602/work
222 | prompt-toolkit @ file:///C:/b/abs_6coz5_9f2s/croot/prompt-toolkit_1672387908312/work
223 | Protego @ file:///tmp/build/80754af9/protego_1598657180827/work
224 | protobuf==3.20.3
225 | psutil @ file:///C:/Windows/Temp/abs_b2c2fd7f-9fd5-4756-95ea-8aed74d0039flsd9qufz/croots/recipe/psutil_1656431277748/work
226 | ptyprocess @ file:///tmp/build/80754af9/ptyprocess_1609355006118/work/dist/ptyprocess-0.7.0-py2.py3-none-any.whl
227 | pure-eval @ file:///opt/conda/conda-bld/pure_eval_1646925070566/work
228 | py @ file:///opt/conda/conda-bld/py_1644396412707/work
229 | pyasn1 @ file:///Users/ktietz/demo/mc3/conda-bld/pyasn1_1629708007385/work
230 | pyasn1-modules==0.2.8
231 | pycodestyle @ file:///C:/b/abs_d77nxvklcq/croot/pycodestyle_1674267231034/work
232 | pycosat @ file:///C:/b/abs_4b1rrw8pn9/croot/pycosat_1666807711599/work
233 | pycparser @ file:///tmp/build/80754af9/pycparser_1636541352034/work
234 | pyct @ file:///C:/b/abs_92z17k7ig2/croot/pyct_1675450330889/work
235 | pycurl==7.45.1
236 | PyDispatcher==2.0.5
237 | pydocstyle @ file:///C:/b/abs_6dz687_5i3/croot/pydocstyle_1675221688656/work
238 | pyerfa @ file:///C:/ci_310/pyerfa_1642088497201/work
239 | pyflakes @ file:///C:/b/abs_6dve6e13zh/croot/pyflakes_1674165143327/work
240 | pyglet==1.5.27
241 | Pygments @ file:///opt/conda/conda-bld/pygments_1644249106324/work
242 | PyHamcrest @ file:///tmp/build/80754af9/pyhamcrest_1615748656804/work
243 | PyJWT @ file:///C:/ci/pyjwt_1657529477795/work
244 | pylint @ file:///C:/b/abs_83sq99jc8i/croot/pylint_1676919922167/work
245 | pylint-venv @ file:///C:/b/abs_bf0lepsbij/croot/pylint-venv_1673990138593/work
246 | pyls-spyder==0.4.0
247 | PyNaCl @ file:///C:/Windows/Temp/abs_d5c3ajcm87/croots/recipe/pynacl_1659620667490/work
248 | pyodbc @ file:///C:/Windows/Temp/abs_61e3jz3u05/croots/recipe/pyodbc_1659513801402/work
249 | pyOpenSSL @ file:///C:/b/abs_552w85x1jz/croot/pyopenssl_1677607703691/work
250 | pyparsing @ file:///C:/Users/BUILDE~1/AppData/Local/Temp/abs_7f_7lba6rl/croots/recipe/pyparsing_1661452540662/work
251 | PyQt5==5.15.7
252 | PyQt5-sip @ file:///C:/Windows/Temp/abs_d7gmd2jg8i/croots/recipe/pyqt-split_1659273064801/work/pyqt_sip
253 | PyQtWebEngine==5.15.4
254 | pyrsistent @ file:///C:/ci_310/pyrsistent_1642117077485/work
255 | PySocks @ file:///C:/ci_310/pysocks_1642089375450/work
256 | pytest==7.1.2
257 | python-dateutil @ file:///tmp/build/80754af9/python-dateutil_1626374649649/work
258 | python-lsp-black @ file:///C:/Users/BUILDE~1/AppData/Local/Temp/abs_dddk9lhpp1/croots/recipe/python-lsp-black_1661852041405/work
259 | python-lsp-jsonrpc==1.0.0
260 | python-lsp-server @ file:///C:/b/abs_e44khh1wya/croot/python-lsp-server_1677296772730/work
261 | python-slugify @ file:///tmp/build/80754af9/python-slugify_1620405669636/work
262 | python-snappy @ file:///C:/b/abs_61b1fmzxcn/croot/python-snappy_1670943932513/work
263 | python-utils==3.5.2
264 | pytoolconfig @ file:///C:/b/abs_18sf9z_iwl/croot/pytoolconfig_1676315065270/work
265 | pytz @ file:///C:/b/abs_22fofvpn1x/croot/pytz_1671698059864/work
266 | pyviz-comms @ file:///tmp/build/80754af9/pyviz_comms_1623747165329/work
267 | PyWavelets @ file:///C:/b/abs_a8r4b1511a/croot/pywavelets_1670425185881/work
268 | pywin32==305.1
269 | pywin32-ctypes @ file:///C:/ci_310/pywin32-ctypes_1642657835512/work
270 | pywinpty @ file:///C:/b/abs_73vshmevwq/croot/pywinpty_1677609966356/work/target/wheels/pywinpty-2.0.10-cp310-none-win_amd64.whl
271 | PyYAML @ file:///C:/b/abs_d0g7dqt2xw/croot/pyyaml_1670514768165/work
272 | pyzmq @ file:///C:/ci/pyzmq_1657616000714/work
273 | QDarkStyle @ file:///tmp/build/80754af9/qdarkstyle_1617386714626/work
274 | qstylizer @ file:///C:/b/abs_ef86cgllby/croot/qstylizer_1674008538857/work/dist/qstylizer-0.2.2-py2.py3-none-any.whl
275 | QtAwesome @ file:///C:/b/abs_c5evilj98g/croot/qtawesome_1674008690220/work
276 | qtconsole @ file:///C:/b/abs_5bap7f8n0t/croot/qtconsole_1674008444833/work
277 | QtPy @ file:///C:/ci/qtpy_1662015130233/work
278 | queuelib==1.5.0
279 | regex @ file:///C:/ci/regex_1658258299320/work
280 | requests @ file:///C:/ci/requests_1657735340829/work
281 | requests-file @ file:///Users/ktietz/demo/mc3/conda-bld/requests-file_1629455781986/work
282 | requests-oauthlib==1.3.1
283 | requests-toolbelt @ file:///Users/ktietz/demo/mc3/conda-bld/requests-toolbelt_1629456163440/work
284 | rope @ file:///C:/b/abs_55g_tm_6ff/croot/rope_1676675029164/work
285 | rsa==4.9
286 | Rtree @ file:///C:/b/abs_e116ltblik/croot/rtree_1675157871717/work
287 | ruamel-yaml-conda @ file:///C:/b/abs_6ejaexx82s/croot/ruamel_yaml_1667489767827/work
288 | ruamel.yaml @ file:///C:/b/abs_30ee5qbthd/croot/ruamel.yaml_1666304562000/work
289 | ruamel.yaml.clib @ file:///C:/b/abs_aarblxbilo/croot/ruamel.yaml.clib_1666302270884/work
290 | scikit-image @ file:///C:/b/abs_63r0vmx78u/croot/scikit-image_1669241746873/work
291 | scikit-learn @ file:///C:/b/abs_7ck_bnw91r/croot/scikit-learn_1676911676133/work
292 | scikit-learn-intelex==20230228.214818
293 | scipy==1.10.0
294 | Scrapy @ file:///C:/b/abs_9fn69i_d86/croot/scrapy_1677738199744/work
295 | seaborn @ file:///C:/b/abs_68ltdkoyoo/croot/seaborn_1673479199997/work
296 | Send2Trash @ file:///tmp/build/80754af9/send2trash_1632406701022/work
297 | sentry-sdk==1.24.0
298 | service-identity @ file:///Users/ktietz/demo/mc3/conda-bld/service_identity_1629460757137/work
299 | setproctitle==1.3.2
300 | sip @ file:///C:/Windows/Temp/abs_b8fxd17m2u/croots/recipe/sip_1659012372737/work
301 | six @ file:///tmp/build/80754af9/six_1644875935023/work
302 | smart-open @ file:///C:/ci/smart_open_1651235038100/work
303 | smmap==5.0.0
304 | sniffio @ file:///C:/ci_310/sniffio_1642092172680/work
305 | snowballstemmer @ file:///tmp/build/80754af9/snowballstemmer_1637937080595/work
306 | sortedcontainers @ file:///tmp/build/80754af9/sortedcontainers_1623949099177/work
307 | soupsieve @ file:///C:/b/abs_fasraqxhlv/croot/soupsieve_1666296394662/work
308 | Sphinx @ file:///C:/ci/sphinx_1657617157451/work
309 | sphinxcontrib-applehelp @ file:///home/ktietz/src/ci/sphinxcontrib-applehelp_1611920841464/work
310 | sphinxcontrib-devhelp @ file:///home/ktietz/src/ci/sphinxcontrib-devhelp_1611920923094/work
311 | sphinxcontrib-htmlhelp @ file:///tmp/build/80754af9/sphinxcontrib-htmlhelp_1623945626792/work
312 | sphinxcontrib-jsmath @ file:///home/ktietz/src/ci/sphinxcontrib-jsmath_1611920942228/work
313 | sphinxcontrib-qthelp @ file:///home/ktietz/src/ci/sphinxcontrib-qthelp_1611921055322/work
314 | sphinxcontrib-serializinghtml @ file:///tmp/build/80754af9/sphinxcontrib-serializinghtml_1624451540180/work
315 | spyder @ file:///C:/b/abs_93s9xkw3pn/croot/spyder_1677776163871/work
316 | spyder-kernels @ file:///C:/b/abs_feh4xo1mrn/croot/spyder-kernels_1673292245176/work
317 | SQLAlchemy @ file:///C:/Windows/Temp/abs_f8661157-660b-49bb-a790-69ab9f3b8f7c8a8s2psb/croots/recipe/sqlalchemy_1657867864564/work
318 | stack-data @ file:///opt/conda/conda-bld/stack_data_1646927590127/work
319 | statsmodels @ file:///C:/b/abs_bdqo3zaryj/croot/statsmodels_1676646249859/work
320 | sumolib==1.17.0
321 | sympy @ file:///C:/b/abs_95fbf1z7n6/croot/sympy_1668202411612/work
322 | tables==3.7.0
323 | tabulate @ file:///C:/ci/tabulate_1657600805799/work
324 | TBB==0.2
325 | tblib @ file:///Users/ktietz/demo/mc3/conda-bld/tblib_1629402031467/work
326 | tenacity @ file:///C:/Windows/TEMP/abs_980d07a6-8e21-4174-9c17-7296219678ads7dhdov_/croots/recipe/tenacity_1657899108023/work
327 | tensorboard==2.12.3
328 | tensorboard-data-server==0.7.0
329 | tensorboardX==2.6
330 | tensordict==0.1.2
331 | tensorflow==2.12.0
332 | tensorflow-estimator==2.12.0
333 | tensorflow-intel==2.12.0
334 | tensorflow-io-gcs-filesystem==0.31.0
335 | tensorflow-probability==0.20.1
336 | tensorlayer==2.2.5
337 | termcolor==2.3.0
338 | terminado @ file:///C:/b/abs_25nakickad/croot/terminado_1671751845491/work
339 | text-unidecode @ file:///Users/ktietz/demo/mc3/conda-bld/text-unidecode_1629401354553/work
340 | textdistance @ file:///tmp/build/80754af9/textdistance_1612461398012/work
341 | threadpoolctl @ file:///Users/ktietz/demo/mc3/conda-bld/threadpoolctl_1629802263681/work
342 | three-merge @ file:///tmp/build/80754af9/three-merge_1607553261110/work
343 | tifffile @ file:///tmp/build/80754af9/tifffile_1627275862826/work
344 | tinycss2 @ file:///C:/b/abs_52w5vfuaax/croot/tinycss2_1668168823131/work
345 | tldextract @ file:///opt/conda/conda-bld/tldextract_1646638314385/work
346 | tokenizers @ file:///C:/ci/tokenizers_1651821358528/work
347 | toml @ file:///tmp/build/80754af9/toml_1616166611790/work
348 | tomli @ file:///C:/Windows/TEMP/abs_ac109f85-a7b3-4b4d-bcfd-52622eceddf0hy332ojo/croots/recipe/tomli_1657175513137/work
349 | tomlkit @ file:///C:/Windows/TEMP/abs_3296qo9v6b/croots/recipe/tomlkit_1658946894808/work
350 | toolz @ file:///C:/b/abs_cfvk6rc40d/croot/toolz_1667464080130/work
351 | torch==2.0.1
352 | torchrl==0.1.1
353 | torchvision==0.15.2
354 | tornado @ file:///C:/ci_310/tornado_1642093111997/work
355 | tqdm @ file:///C:/b/abs_0axbz66qik/croots/recipe/tqdm_1664392691071/work
356 | traci==1.17.0
357 | traitlets @ file:///C:/b/abs_e5m_xjjl94/croot/traitlets_1671143896266/work
358 | transformers @ file:///C:/b/abs_8byf5_j714/croot/transformers_1667919454001/work
359 | Twisted @ file:///C:/Windows/Temp/abs_ccblv2rzfa/croots/recipe/twisted_1659592764512/work
360 | twisted-iocpsupport @ file:///C:/ci/twisted-iocpsupport_1646817083730/work
361 | typing_extensions @ file:///C:/b/abs_89eui86zuq/croot/typing_extensions_1669923792806/work
362 | ujson @ file:///C:/ci/ujson_1657525893897/work
363 | Unidecode @ file:///tmp/build/80754af9/unidecode_1614712377438/work
364 | urllib3 @ file:///C:/b/abs_9bcwxczrvm/croot/urllib3_1673575521331/work
365 | visdom==0.2.4
366 | w3lib @ file:///Users/ktietz/demo/mc3/conda-bld/w3lib_1629359764703/work
367 | wandb==0.15.3
368 | watchdog @ file:///C:/ci_310/watchdog_1642113443984/work
369 | wcwidth @ file:///Users/ktietz/demo/mc3/conda-bld/wcwidth_1629357192024/work
370 | webencodings==0.5.1
371 | websocket-client @ file:///C:/ci_310/websocket-client_1642093970919/work
372 | Werkzeug @ file:///C:/b/abs_17q5kgb8bo/croot/werkzeug_1671216014857/work
373 | whatthepatch @ file:///C:/Users/BUILDE~1/AppData/Local/Temp/abs_e7bihs8grh/croots/recipe/whatthepatch_1661796085215/work
374 | widgetsnbextension @ file:///C:/ci/widgetsnbextension_1645009839917/work
375 | win-inet-pton @ file:///C:/ci_310/win_inet_pton_1642658466512/work
376 | wincertstore==0.2
377 | wrapt @ file:///C:/Windows/Temp/abs_7c3dd407-1390-477a-b542-fd15df6a24085_diwiza/croots/recipe/wrapt_1657814452175/work
378 | xarray @ file:///C:/b/abs_2fi_umrauo/croot/xarray_1668776806973/work
379 | xlwings @ file:///C:/b/abs_1ejhh6s00l/croot/xlwings_1677024180629/work
380 | yapf @ file:///tmp/build/80754af9/yapf_1615749224965/work
381 | zict==2.1.0
382 | zipp @ file:///C:/b/abs_b9jfdr908q/croot/zipp_1672387552360/work
383 | zope.interface @ file:///C:/ci_310/zope.interface_1642113633904/work
384 | zstandard==0.19.0
385 |
--------------------------------------------------------------------------------
/algorithm/MA-SAC_main.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import torch
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 | # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
6 | import numpy as np
7 | import math
8 | # import gym
9 | import sympy
10 | from matplotlib import pyplot as plt
11 | from scipy.io import loadmat
12 | import os
13 | os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'
14 | # env = gym.make("LargeGridWorld-v0").unwrapped
15 | state_number=2
16 | action_number=10 #9
17 | max_action = 1
18 | min_action = -1
19 | RENDER=False
20 | EP_MAX = 500
21 | EP_LEN = 1000
22 | GAMMA = 0.9
23 | q_lr = 5e-5#3e-4
24 | value_lr = 5e-4#3e-3
25 | policy_lr = 1.5e-4#3
26 | BATCH = 128
27 | tau = 1e-2
28 | MemoryCapacity=20000
29 | Switch=0
30 | n_width=100
31 | n_height = 100
32 | m = loadmat("mapdata_0717.mat")
33 | #correct_action=0
34 | MARK= m["MARK_new"]
35 | PL_AP=m["MARK_PL_real"]
36 |
37 |
38 | class ActorNet(nn.Module):
39 | def __init__(self,inp,outp):
40 | super(ActorNet, self).__init__()
41 | self.in_to_y1=nn.Linear(inp,256)
42 | self.in_to_y1.weight.data.normal_(0,0.1)
43 | self.y1_to_y2=nn.Linear(256,256)
44 | self.y1_to_y2.weight.data.normal_(0,0.1)
45 | self.out=nn.Linear(256,outp)
46 | self.out.weight.data.normal_(0,0.1)
47 | self.std_out = nn.Linear(256, outp)
48 | self.std_out.weight.data.normal_(0, 0.1)
49 |
50 | def forward(self,inputstate):
51 | inputstate=self.in_to_y1(inputstate)
52 | inputstate=F.relu(inputstate)
53 | inputstate=self.y1_to_y2(inputstate)
54 | inputstate=F.relu(inputstate)
55 | mean=max_action*torch.tanh(self.out(inputstate))#输出概率分布的均值mean
56 | log_std=self.std_out(inputstate)#softplus激活函数的值域>0
57 | log_std=torch.clamp(log_std,-20,2)
58 | std=log_std.exp()
59 | return mean,std
60 |
61 | class CriticNet(nn.Module):
62 | def __init__(self,input,output):
63 | super(CriticNet, self).__init__()
64 | #q1
65 | self.in_to_y1=nn.Linear(input+output,256)
66 | self.in_to_y1.weight.data.normal_(0,0.1)
67 | self.y1_to_y2=nn.Linear(256,256)
68 | self.y1_to_y2.weight.data.normal_(0,0.1)
69 | self.out=nn.Linear(256,1)
70 | self.out.weight.data.normal_(0,0.1)
71 | #q2
72 | self.q2_in_to_y1 = nn.Linear(input+output, 256)
73 | self.q2_in_to_y1.weight.data.normal_(0, 0.1)
74 | self.q2_y1_to_y2 = nn.Linear(256, 256)
75 | self.q2_y1_to_y2.weight.data.normal_(0, 0.1)
76 | self.q2_out = nn.Linear(256, 1)
77 | self.q2_out.weight.data.normal_(0, 0.1)
78 | def forward(self,s,a):
79 | inputstate = torch.cat((s, a), dim=1)
80 | #q1
81 | q1=self.in_to_y1(inputstate)
82 | q1=F.relu(q1)
83 | q1=self.y1_to_y2(q1)
84 | q1=F.relu(q1)
85 | q1=self.out(q1)
86 | #q2
87 | q2 = self.in_to_y1(inputstate)
88 | q2 = F.relu(q2)
89 | q2 = self.y1_to_y2(q2)
90 | q2 = F.relu(q2)
91 | q2 = self.out(q2)
92 | return q1,q2
93 |
94 | class Memory():
95 | def __init__(self,capacity,dims,type_m):
96 | self.capacity=capacity
97 | self.mem=np.zeros((capacity,dims))
98 | self.memory_counter=0
99 | self.type_m=type_m
100 | '''存储记忆'''
101 | def store_transition(self,s,a,r,s_):
102 | if self.type_m==1:
103 | tran = np.hstack((s, [a[0],a[1],a[2],a[3],a[4],a[5],a[6],a[7],a[8],a[9],r], s_)) # 把s,a,r,s_困在一起,水平拼接
104 | else:
105 | tran = np.hstack((s, [a[0],r], s_)) # 把s,a,r,s_困在一起,水平拼接
106 |
107 | index = self.memory_counter % self.capacity#除余得索引
108 | self.mem[index, :] = tran # 给索引存值,第index行所有列都为其中一次的s,a,r,s_;mem会是一个capacity行,(s+a+r+s_)列的数组
109 | self.memory_counter+=1
110 | '''随机从记忆库里抽取'''
111 | def sample(self,n):
112 | assert self.memory_counter>=self.capacity,'记忆库没有存满记忆'
113 | sample_index = np.random.choice(self.capacity, n)#从capacity个记忆里随机抽取n个为一批,可得到抽样后的索引号
114 | new_mem = self.mem[sample_index, :]#由抽样得到的索引号在所有的capacity个记忆中 得到记忆s,a,r,s_
115 | return new_mem
116 | class Actor():
117 | def __init__(self):
118 | self.action_net=ActorNet(state_number,action_number)#这只是均值mean
119 | self.optimizer=torch.optim.Adam(self.action_net.parameters(),lr=policy_lr)
120 |
121 | def choose_action(self,s):
122 | inputstate = torch.FloatTensor(s)
123 | mean,std=self.action_net(inputstate)
124 | dist = torch.distributions.Normal(mean, std)
125 | action=dist.sample()
126 | action=torch.clamp(action,min_action,max_action)
127 | return action.detach().numpy()
128 | def evaluate(self,s):
129 | inputstate = torch.FloatTensor(s)
130 | mean,std=self.action_net(inputstate)
131 | dist = torch.distributions.Normal(mean, std)
132 | noise = torch.distributions.Normal(0, 1)
133 | z = noise.sample()
134 | action=torch.tanh(mean+std*z)
135 | action=torch.clamp(action,min_action,max_action)
136 | action_logprob=dist.log_prob(mean+std*z)-torch.log(1-action.pow(2)+1e-6)
137 | return action,action_logprob,z,mean,std
138 |
139 | def learn(self,actor_loss):
140 | loss=actor_loss
141 | self.optimizer.zero_grad()
142 | loss.backward()
143 | self.optimizer.step()
144 |
145 | class Entroy():
146 | def __init__(self):
147 | self.target_entropy = -action_number
148 | self.log_alpha = torch.zeros(1, requires_grad=True)
149 | self.alpha = self.log_alpha.exp()
150 | self.optimizer = torch.optim.Adam([self.log_alpha], lr=q_lr)
151 |
152 | def learn(self,entroy_loss):
153 | loss=entroy_loss
154 | self.optimizer.zero_grad()
155 | loss.backward()
156 | self.optimizer.step()
157 |
158 | class Critic():
159 | def __init__(self):
160 | self.critic_v,self.target_critic_v=CriticNet(state_number,action_number),CriticNet(state_number,action_number)#改网络输入状态,生成一个Q值
161 | self.optimizer = torch.optim.Adam(self.critic_v.parameters(), lr=value_lr,eps=1e-5)
162 | self.lossfunc = nn.MSELoss()
163 | def soft_update(self):
164 | for target_param, param in zip(self.target_critic_v.parameters(), self.critic_v.parameters()):
165 | target_param.data.copy_(target_param.data * (1.0 - tau) + param.data * tau)
166 |
167 | def get_v(self,s,a):
168 | return self.critic_v(s,a)
169 |
170 | def learn(self,current_q1,current_q2,target_q):
171 | loss = self.lossfunc(current_q1, target_q) + self.lossfunc(current_q2, target_q)
172 | self.optimizer.zero_grad()
173 | loss.backward()
174 | self.optimizer.step()
175 |
176 | def cosVector(x,y):
177 | result1=0.0;
178 | result2=0.0;
179 | result3=0.0;
180 | for i in range(len(x)):
181 | result1+=x[i]*y[i] #sum(X*Y)
182 | result2+=x[i]**2 #sum(X*X)
183 | result3+=y[i]**2 #sum(Y*Y)
184 | return result1/((result2*result3)**0.5)
185 |
186 |
187 | if Switch==0:
188 | print('SAC训练中...')
189 | actor = Actor()
190 | critic = Critic()
191 | entroy=Entroy()
192 | M = Memory(MemoryCapacity, 2 * state_number + action_number + 1,1)
193 | all_ep_r = []
194 | # actor2 = Actor()
195 | # critic2 = Critic()
196 | # entroy2=Entroy()
197 | # M2 = Memory(MemoryCapacity, 2 * state_number + action_number + 1,1)
198 | # all_ep_r2 = []
199 | # actor3 = Actor()
200 | # critic3 = Critic()
201 | # entroy3=Entroy()
202 | # M3 = Memory(MemoryCapacity, 2 * state_number + action_number + 1,1)
203 | # all_ep_r3 = []
204 | # state_number=6
205 | # action_number=1 #9
206 | # actor4 = Actor()
207 | # critic4 = Critic()
208 | # entroy4=Entroy()
209 | # M4 = Memory(MemoryCapacity, 2 * state_number + 1+ 1,2)
210 | # all_ep_r4 = []
211 | # state_number=2
212 | # action_number=9 #9
213 | # end_location = [26*2,18*2] #8*2,9*2[]
214 |
215 | end_location = [15*2,32*2]
216 | end_location2 = [45*2,45*2]
217 | end_location3 = [47,38*2]
218 | D=100
219 | m_d=100
220 | lambda_q=10
221 |
222 | for episode in range(EP_MAX):
223 | observation = np.array([4*2,5*2], dtype=np.float32) # 环境重置
224 | # observation2 = np.array([20*2,20*2], dtype=np.float32)
225 | # observation3 = np.array([40*2,10*2], dtype=np.float32)
226 | observation_su1 = np.array([17, 25*2], dtype=np.float32)
227 | observation_su2 = np.array([50, 25*2], dtype=np.float32)
228 | observation_su3 = np.array([84, 25*2], dtype=np.float32)
229 | # observation4 = np.array([observation[0], observation[1], observation2[0], observation2[1], observation3[0], observation3[1]])
230 |
231 | reward_totle = 0
232 | reward_totle2 = 0
233 | reward_totle3 = 0
234 | reward_totle4 = 0
235 | done1=False
236 | done2=False
237 | done3=False
238 | bobao=0
239 | bobao2=0
240 | bobao3=0
241 | for timestep in range(EP_LEN):
242 | # if RENDER:
243 | # env.render()
244 | action = actor.choose_action(observation)
245 | # action2 = actor2.choose_action(observation2)
246 | # action3 = actor3.choose_action(observation3)
247 | # action4 = actor4.choose_action(observation4)
248 | if not done1:
249 | [old_x, old_y] = observation
250 | new_x, new_y = int(old_x), int(old_y)
251 | new_x=int(old_x+action[0])
252 | new_y=int(old_y+action[1])
253 | if int(new_x) <= 0:
254 | new_x = 1
255 | if int(new_x) >= n_width:
256 | new_x = int(n_width)-1
257 | if int(new_y) <= 0:
258 | new_y = 1
259 | if int(new_y) >= n_height:
260 | new_y = int(n_height)-1
261 | if MARK[new_x,new_y] == 2:
262 | new_x, new_y = old_x, old_y
263 | observation_=np.array([new_x, new_y], dtype=np.float32)
264 | else:
265 | observation_ = observation
266 | # if not done2:
267 | # [old_x, old_y] = observation2
268 | # new_x, new_y = int(old_x), int(old_y)
269 | # new_x=int(old_x+action2[0])
270 | # new_y=int(old_y+action2[1])
271 | # if int(new_x) <= 0:
272 | # new_x = 1
273 | # if int(new_x) >= n_width:
274 | # new_x = int(n_width)-1
275 | # if int(new_y) <= 0:
276 | # new_y = 1
277 | # if int(new_y) >= n_height:
278 | # new_y = int(n_height)-1
279 | # if MARK[new_x,new_y] == 2:
280 | # new_x, new_y = old_x, old_y
281 | # observation2_=np.array([new_x, new_y], dtype=np.float32)
282 | # else:
283 | # observation2_ = observation2
284 | # if not done3:
285 | # [old_x, old_y] = observation3
286 | # new_x, new_y = int(old_x), int(old_y)
287 | # new_x=int(old_x+action3[0])
288 | # new_y=int(old_y+action3[1])
289 | # if int(new_x) <= 0:
290 | # new_x = 1
291 | # if int(new_x) >= n_width:
292 | # new_x = int(n_width)-1
293 | # if int(new_y) <= 0:
294 | # new_y = 1
295 | # if int(new_y) >= n_height:
296 | # new_y = int(n_height)-1
297 | # if MARK[new_x,new_y] == 2:
298 | # new_x, new_y = old_x, old_y
299 | # observation3_=np.array([new_x, new_y], dtype=np.float32)
300 | # else:
301 | # observation3_ = observation3
302 | # observation_ = env.step(observation, 1, action) # 单步交互
303 | # observation4_ = np.array([observation_[0], observation_[1], observation2_[0], observation2_[1], observation3_[0], observation3_[1]])
304 | # state7_ = np.array([state_[0], state_[1], state2_[0], state2_[1], state3_[0], state3_[1]])
305 | # done_sys = done1 and done2 and done3
306 |
307 | if action[8]==-1:
308 | action[8]=-0.9999999
309 | # if action2[8]==-1:
310 | # action2[8]=-0.9999999
311 | # if action3[8]==-1:
312 | # action3[8]=-0.9999999
313 | if action[8]==1:
314 | action[8]=0.9999999
315 | # if action2[8]==1:
316 | # action2[8]=0.9999999
317 | # if action3[8]==1:
318 | # action3[8]=0.9999999
319 |
320 | w_1=np.array([action[2]* math.exp(1)**(1j*(1+action[3])*math.pi), action[4]* math.exp(1)**(1j*(1+action[5])*math.pi), action[6]* math.exp(1)**(1j*(1+action[7])*math.pi)])
321 | # w_2=np.array([action2[2]* math.exp(1)**(1j*(1+action2[3])*math.pi), action2[4]* math.exp(1)**(1j*(1+action2[5])*math.pi), action2[6]* math.exp(1)**(1j*(1+action2[7])*math.pi)])
322 | # w_3=np.array([action3[2]* math.exp(1)**(1j*(1+action3[3])*math.pi), action3[4]* math.exp(1)**(1j*(1+action3[5])*math.pi), action3[6]* math.exp(1)**(1j*(1+action3[7])*math.pi)])
323 | theta_1=cosVector([1,0,0],[observation_[0]-50,observation_[1]-100, 1-2])
324 | a_1=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_1)), math.exp(1)**(-2*1j*(math.pi*theta_1))])#
325 | b_1_AP_LOS=math.sqrt(PL_AP[int(observation_[0]), int(observation_[1])])
326 | h_1=b_1_AP_LOS*a_1
327 | interference_1=10**(-9)
328 | # theta_2=cosVector([1,0,0],[observation2_[0]-50,observation2_[1]-100, 1-2])
329 | # a_2=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_2)), math.exp(1)**(-2*1j*(math.pi*theta_2))])#
330 | # b_2_AP_LOS=math.sqrt(PL_AP[int(observation2_[0]), int(observation2_[1])])
331 | # h_2=b_2_AP_LOS*a_2
332 | # interference_2=10**(-9)
333 | # theta_3=cosVector([1,0,0],[observation3_[0]-50,observation3_[1]-100, 1-2])
334 | # a_3=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_3)), math.exp(1)**(-2*1j*(math.pi*theta_3))])#
335 | # b_3_AP_LOS=math.sqrt(PL_AP[int(observation3_[0]), int(observation3_[1])])
336 | # h_3=b_3_AP_LOS*a_3
337 | # interference_3=10**(-9)
338 | theta_4=cosVector([1,0,0],[observation_su1[0]-50,observation_su1[1]-100, 1-2])
339 | a_4=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_4)), math.exp(1)**(-2*1j*(math.pi*theta_4))])#
340 | b_4_AP_LOS=math.sqrt(PL_AP[int(observation_su1[0]), int(observation_su1[1])])
341 | h_4=b_4_AP_LOS*a_4
342 | interference_4=10**(-9)
343 | theta_5=cosVector([1,0,0],[observation_su2[0]-50,observation_su2[1]-100, 1-2])
344 | a_5=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_5)), math.exp(1)**(-2*1j*(math.pi*theta_5))])#
345 | b_5_AP_LOS=math.sqrt(PL_AP[int(observation_su2[0]), int(observation_su2[1])])
346 | h_5=b_5_AP_LOS*a_5
347 | interference_5=10**(-9)
348 | theta_6=cosVector([1,0,0],[observation_su3[0]-50,observation_su3[1]-100, 1-2])
349 | a_6=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_6)), math.exp(1)**(-2*1j*(math.pi*theta_6))])#
350 | b_6_AP_LOS=math.sqrt(PL_AP[int(observation_su3[0]), int(observation_su3[1])])
351 | h_6=b_6_AP_LOS*a_6
352 | interference_6=10**(-9)
353 | if action[8]>0:
354 | interference_1+=(1-(action[8]+1)/2)*(np.linalg.norm(h_1*w_1))**2
355 | else:
356 | interference_4+=((action[8]+1)/2)*(np.linalg.norm(h_4*w_1))**2
357 | # if action2[8]>0.5:
358 | # interference_2+=(1-(action2[8]+1)/2)*(np.linalg.norm(h_2*w_2))**2
359 | # else:
360 | # interference_5+=((action2[8]+1)/2)*(np.linalg.norm(h_5*w_2))**2
361 | # if action3[8]>0.5:
362 | # interference_3+=(1-(action3[8]+1)/2)*(np.linalg.norm(h_3*w_3))**2
363 | # else:
364 | # interference_6+=((action3[8]+1)/2)*(np.linalg.norm(h_6*w_3))**2
365 | SINR_1=((action[8]+1)/2)*(np.linalg.norm(h_1*w_1))**2/interference_1
366 | # SINR_2=((action2[8]+1)/2)*(np.linalg.norm(h_2*w_2))**2/interference_2
367 | # SINR_3=((action3[8]+1)/2)*(np.linalg.norm(h_3*w_3))**2/interference_3
368 | SINR_4=(1-(action[8]+1)/2)*(np.linalg.norm(h_4*w_1))**2/interference_4
369 | # SINR_5=(1-(action2[8]+1)/2)*(np.linalg.norm(h_5*w_2))**2/interference_5
370 | # SINR_6=(1-(action3[8]+1)/2)*(np.linalg.norm(h_6*w_3))**2/interference_6
371 | # calculate reward
372 | # V_sinr_1=1-(1+SINR_1)**(-2)
373 | # # integrate(x**2, (x, 1, 2))
374 | # f_x=math.log(2)*math.sqrt(m_d/V_sinr_1)*(math.log(1+SINR_1, 2)-D/m_d)
375 | # x=sympy.Symbol('x')
376 | # f = sympy.exp(-x**2/2)
377 | # epsilon_d_1=1/(math.sqrt(2*math.pi))*(sympy.integrate(f, (x, f_x, sympy.oo)))
378 | # # print(epsilon_d_1)
379 | # #sympy.integrate()*math.exp(1)**(-u_var**2)/2, (u_var, f_x, float('inf')))
380 | # #ue 2
381 | # V_sinr_2=1-(1+SINR_2)**(-2)
382 | # f_x=math.log(2)*math.sqrt(m_d/V_sinr_2)*(math.log(1+SINR_2, 2)-D/m_d)
383 | # x=sympy.Symbol('x')
384 | # f = sympy.exp(-x**2/2)
385 | # epsilon_d_2=1/(math.sqrt(2*math.pi))*(sympy.integrate(f, (x, f_x, sympy.oo)))
386 | # #ue 3
387 | # V_sinr_3=1-(1+SINR_3)**(-2)
388 | # f_x=math.log(2)*math.sqrt(m_d/V_sinr_3)*(math.log(1+SINR_3, 2)-D/m_d)
389 | # x=sympy.Symbol('x')
390 | # f = sympy.exp(-x**2/2)
391 | # epsilon_d_3=1/(math.sqrt(2*math.pi))*(sympy.integrate(f, (x, f_x, sympy.oo)))
392 | # #ue 4
393 | # V_sinr_4=1-(1+SINR_4)**(-2)
394 | # f_x=math.log(2)*math.sqrt(m_d/V_sinr_4)*(math.log(1+SINR_4, 2)-D/m_d)
395 | # x=sympy.Symbol('x')
396 | # f = sympy.exp(-x**2/2)
397 | # epsilon_d_4=1/(math.sqrt(2*math.pi))*(sympy.integrate(f, (x, f_x, sympy.oo)))
398 | # #ue 4
399 | # V_sinr_5=1-(1+SINR_5)**(-2)
400 | # f_x=math.log(2)*math.sqrt(m_d/V_sinr_5)*(math.log(1+SINR_5, 2)-D/m_d)
401 | # x=sympy.Symbol('x')
402 | # f = sympy.exp(-x**2/2)
403 | # epsilon_d_5=1/(math.sqrt(2*math.pi))*(sympy.integrate(f, (x, f_x, sympy.oo)))
404 | # V_sinr_6=1-(1+SINR_6)**(-2)
405 | # f_x=math.log(2)*math.sqrt(m_d/V_sinr_6)*(math.log(1+SINR_6, 2)-D/m_d)
406 | # x=sympy.Symbol('x')
407 | # f = sympy.exp(-x**2/2)
408 | # epsilon_d_6=1/(math.sqrt(2*math.pi))*(sympy.integrate(f, (x, f_x, sympy.oo)))
409 |
410 |
411 |
412 | distance_01_2=(observation_[0]-end_location[0])*(observation_[0]-end_location[0])/4+(observation_[1]-end_location[1])*(observation_[1]-end_location[1])/4
413 | distance_01 = math.sqrt(distance_01_2)
414 | #print(distance_01)
415 | # if epsilon_d_1<10**(-14):
416 | # epsilon_d_1=10**(-14)
417 | reward = -(distance_01/50)+max(0.01, min(SINR_1, SINR_4)/1000)-0.01
418 | #reward = -1
419 | #reward=0
420 | if distance_01==0:
421 | reward = 1
422 | if not done1:
423 | reward_totle += reward
424 | # if epsilon_d_2<10**(-14):
425 | # epsilon_d_2=10**(-14)
426 | # distance_02_2=(observation2_[0]-end_location2[0])*(observation2_[0]-end_location2[0])/4+(observation2_[1]-end_location2[1])*(observation2_[1]-end_location2[1])/4
427 | # distance_02 = math.sqrt(distance_02_2)
428 | # reward2 = -(distance_02/50)#+max(0.2, min(SINR_2, SINR_5)/50)
429 | # if distance_02==0:
430 | # reward2 = 1
431 | # if not done2:
432 | # reward_totle2 += reward2
433 | # distance_03_2=(observation3_[0]-end_location3[0])*(observation3_[0]-end_location3[0])/4+(observation3_[1]-end_location3[1])*(observation3_[1]-end_location3[1])/4
434 | # distance_03 = math.sqrt(distance_03_2)
435 | # # if epsilon_d_3<10**(-14):
436 | # # epsilon_d_3=10**(-14)
437 | # reward3 = -(distance_03/50)#+max(0.2, min(SINR_3, SINR_6)/50)
438 | # if distance_03==0:
439 | # reward3 = 1
440 | # if not done3:
441 | # reward_totle3 += reward3
442 |
443 | # reward4=(reward+reward2+reward3)/3
444 |
445 |
446 | # distance_01_2=(observation_[0]-end_location[0])*(observation_[0]-end_location[0])/4+(observation_[1]-end_location[1])*(observation_[1]-end_location[1])/4
447 | # distance_01 = math.sqrt(distance_01_2)
448 | # reward= -(distance_01/10)
449 | # if distance_01==0:
450 | # done1 = True
451 | # #os.system("pause")
452 | # reward=10
453 | #print(observation, action, observation_)
454 | M.store_transition(observation, action, reward, observation_)
455 | # M2.store_transition(observation2, action2, reward2, observation2_)
456 | # M3.store_transition(observation3, action3, reward3, observation3_)
457 | # M4.store_transition(observation4, action4, reward4, observation4_)
458 |
459 |
460 | # 记忆库存储
461 | # 有的2000个存储数据就开始学习
462 | if M.memory_counter > MemoryCapacity and not done1:
463 | b_M = M.sample(BATCH)
464 | b_s = b_M[:, :state_number]
465 | b_a = b_M[:, state_number: state_number + action_number]
466 | b_r = b_M[:, -state_number - 1: -state_number]
467 | b_s_ = b_M[:, -state_number:]
468 | b_s = torch.FloatTensor(b_s)
469 | b_a = torch.FloatTensor(b_a)
470 | b_r = torch.FloatTensor(b_r)
471 | b_s_ = torch.FloatTensor(b_s_)
472 | new_action, log_prob_, z, mean, log_std = actor.evaluate(b_s_)
473 | target_q1,target_q2=critic.get_v(b_s_,new_action)
474 | target_q=b_r+GAMMA*(torch.min(target_q1,target_q2)-entroy.alpha*log_prob_)
475 | current_q1, current_q2 = critic.get_v(b_s, b_a)
476 | critic.learn(current_q1,current_q2,target_q.detach())
477 | a,log_prob,_,_,_=actor.evaluate(b_s)
478 | q1,q2=critic.get_v(b_s,a)
479 | q=torch.min(q1,q2)
480 | actor_loss = (entroy.alpha * log_prob - q).mean()
481 | actor.learn(actor_loss)
482 | alpha_loss = -(entroy.log_alpha.exp() * (log_prob + entroy.target_entropy).detach()).mean()
483 | entroy.learn(alpha_loss)
484 | entroy.alpha=entroy.log_alpha.exp()
485 | # 软更新
486 | critic.soft_update()
487 | observation = observation_
488 | # reward_totle += reward
489 | if distance_01==0:
490 | done1=True
491 | # print("arrive success!!!!!!!!!!!!!!")
492 | # if M2.memory_counter > MemoryCapacity and not done2:
493 | # b_M = M2.sample(BATCH)
494 | # b_s = b_M[:, :state_number]
495 | # b_a = b_M[:, state_number: state_number + action_number]
496 | # b_r = b_M[:, -state_number - 1: -state_number]
497 | # b_s_ = b_M[:, -state_number:]
498 | # b_s = torch.FloatTensor(b_s)
499 | # b_a = torch.FloatTensor(b_a)
500 | # b_r = torch.FloatTensor(b_r)
501 | # b_s_ = torch.FloatTensor(b_s_)
502 | # new_action, log_prob_, z, mean, log_std = actor2.evaluate(b_s_)
503 | # target_q1,target_q2=critic2.get_v(b_s_,new_action)
504 | # target_q=b_r+GAMMA*(torch.min(target_q1,target_q2)-entroy2.alpha*log_prob_)
505 | # current_q1, current_q2 = critic2.get_v(b_s, b_a)
506 | # critic2.learn(current_q1,current_q2,target_q.detach())
507 | # a,log_prob,_,_,_=actor2.evaluate(b_s)
508 | # q1,q2=critic2.get_v(b_s,a)
509 | # q=torch.min(q1,q2)
510 | # actor_loss = (entroy2.alpha * log_prob - q).mean()
511 | # actor2.learn(actor_loss)
512 | # alpha_loss = -(entroy2.log_alpha.exp() * (log_prob + entroy2.target_entropy).detach()).mean()
513 | # entroy2.learn(alpha_loss)
514 | # entroy2.alpha=entroy2.log_alpha.exp()
515 | # # 软更新
516 | # critic2.soft_update()
517 | # observation2 = observation2_
518 | # # reward_totle2 += reward2
519 | # if distance_02==0:
520 | # done2=True
521 | # # print("arrive success 2 !!!!!!!!!!!!!!")
522 | # if M3.memory_counter > MemoryCapacity and not done3:
523 | # b_M = M3.sample(BATCH)
524 | # b_s = b_M[:, :state_number]
525 | # b_a = b_M[:, state_number: state_number + action_number]
526 | # b_r = b_M[:, -state_number - 1: -state_number]
527 | # b_s_ = b_M[:, -state_number:]
528 | # b_s = torch.FloatTensor(b_s)
529 | # b_a = torch.FloatTensor(b_a)
530 | # b_r = torch.FloatTensor(b_r)
531 | # b_s_ = torch.FloatTensor(b_s_)
532 | # new_action, log_prob_, z, mean, log_std = actor3.evaluate(b_s_)
533 | # target_q1,target_q3=critic3.get_v(b_s_,new_action)
534 | # target_q=b_r+GAMMA*(torch.min(target_q1,target_q3)-entroy3.alpha*log_prob_)
535 | # current_q1, current_q3 = critic3.get_v(b_s, b_a)
536 | # critic3.learn(current_q1,current_q3,target_q.detach())
537 | # a,log_prob,_,_,_=actor3.evaluate(b_s)
538 | # q1,q3=critic3.get_v(b_s,a)
539 | # q=torch.min(q1,q3)
540 | # actor_loss = (entroy3.alpha * log_prob - q).mean()
541 | # actor3.learn(actor_loss)
542 | # alpha_loss = -(entroy3.log_alpha.exp() * (log_prob + entroy3.target_entropy).detach()).mean()
543 | # entroy3.learn(alpha_loss)
544 | # entroy3.alpha=entroy3.log_alpha.exp()
545 | # # 软更新
546 | # critic3.soft_update()
547 | # observation3 = observation3_
548 | # # reward_totle += reward
549 | # if distance_03==0:
550 | # done3=True
551 | # print("arrive success 3!!!!!!!!!!!!!!")
552 | # state_number=6
553 | # action_number=1
554 | # if M4.memory_counter > MemoryCapacity:
555 | # b_M = M4.sample(BATCH)
556 | # b_s = b_M[:, :state_number]
557 | # b_a = b_M[:, state_number: state_number + action_number]
558 | # b_r = b_M[:, -state_number - 1: -state_number]
559 | # b_s_ = b_M[:, -state_number:]
560 | # b_s = torch.FloatTensor(b_s)
561 | # b_a = torch.FloatTensor(b_a)
562 | # b_r = torch.FloatTensor(b_r)
563 | # b_s_ = torch.FloatTensor(b_s_)
564 | # new_action, log_prob_, z, mean, log_std = actor4.evaluate(b_s_)
565 | # target_q1,target_q4=critic4.get_v(b_s_,new_action)
566 | # target_q=b_r+GAMMA*(torch.min(target_q1,target_q4)-entroy4.alpha*log_prob_)
567 | # current_q1, current_q4 = critic4.get_v(b_s, b_a)
568 | # critic4.learn(current_q1,current_q4,target_q.detach())
569 | # a,log_prob,_,_,_=actor4.evaluate(b_s)
570 | # q1,q4=critic4.get_v(b_s,a)
571 | # q=torch.min(q1,q4)
572 | # actor_loss = (entroy4.alpha * log_prob - q).mean()
573 | # actor4.learn(actor_loss)
574 | # alpha_loss = -(entroy4.log_alpha.exp() * (log_prob + entroy4.target_entropy).detach()).mean()
575 | # entroy4.learn(alpha_loss)
576 | # entroy4.alpha=entroy4.log_alpha.exp()
577 | # # 软更新
578 | # critic4.soft_update()
579 | # observation4 = observation4_
580 | if done1:
581 | # print("arrive success!!!!!!!!!!!!!!")
582 | break
583 | print("Ep: {} | rewards: {} {} {} {} | Step: {:.4f} | END: {}".format(episode, reward_totle, reward_totle2, reward_totle3, reward_totle4, timestep, observation))
584 | # if reward_totle > -10: RENDER = True
585 | all_ep_r.append(reward_totle)
586 | # all_ep_r2.append(reward_totle2)
587 | # all_ep_r3.append(reward_totle3)
588 | # all_ep_r4.append(reward_totle4)
589 | #if episode % 20 == 0 and episode > 200:#保存神经网络参数
590 | # save_data = {'net': actor.action_net.observation_dict(), 'opt': actor.optimizer.state_dict(), 'i': episode}
591 | #torch.save(save_data, "C:\\Users\Administrator.DESKTOP-NLH290A\Desktop\2302_code\0606\model_SAC.pth")
592 | # env.close()
593 | plt.plot(np.arange(len(all_ep_r)), all_ep_r)
594 | # plt.plot(np.arange(len(all_ep_r2)), all_ep_r2)
595 | # plt.plot(np.arange(len(all_ep_r3)), all_ep_r3)
596 | # plt.plot(np.arange(len(all_ep_r4)), all_ep_r4)
597 | plt.xlabel('Episode')
598 | plt.ylabel('Moving averaged episode reward')
599 | plt.show()
600 | else:
601 | print('SAC测试中...')
602 | aa=Actor()
603 | checkpoint_aa = torch.load("C:\\Users\Administrator.DESKTOP-NLH290A\Desktop\2302_code\0606\model_SAC.pth")
604 | aa.action_net.load_state_dict(checkpoint_aa['net'])
605 | for j in range(10):
606 | # state = env.reset()
607 | total_rewards = 0
608 | for timestep in range(EP_LEN):
609 | # env.render()
610 | # action = aa.choose_action(state)
611 | # new_state, reward, done, info = env.step(action) # 执行动作
612 | total_rewards += reward
613 | # state = new_state
614 | print("Score:", total_rewards)
615 | # env.close()
616 |
--------------------------------------------------------------------------------
/algorithm/MA-DDPG_main.py:
--------------------------------------------------------------------------------
1 | """
2 | Deep Deterministic Policy Gradient (DDPG)
3 | -----------------------------------------
4 | An algorithm concurrently learns a Q-function and a policy.
5 | It uses off-policy data and the Bellman equation to learn the Q-function,
6 | and uses the Q-function to learn the policy.
7 | Reference
8 | ---------
9 | Deterministic Policy Gradient Algorithms, Silver et al. 2014
10 | Continuous Control With Deep Reinforcement Learning, Lillicrap et al. 2016
11 | MorvanZhou's tutorial page: https://morvanzhou.github.io/tutorials/
12 | Environment
13 | -----------
14 | Openai Gym Pendulum-v0, continual action space
15 | Prerequisites
16 | -------------
17 | tensorflow >=2.0.0a0
18 | tensorflow-proactionsbility 0.6.0
19 | tensorlayer >=2.0.0
20 | To run
21 | ------
22 | python tutorial_DDPG.py --train/test
23 | """
24 |
25 | import argparse
26 | import os
27 | import random
28 | import time
29 | import math
30 |
31 | #import gym
32 | import matplotlib.pyplot as plt
33 | import numpy as np
34 | import tensorflow as tf
35 | from scipy.io import loadmat
36 |
37 | import tensorlayer as tl
38 |
39 | # add arguments in command --train/test
40 | parser = argparse.ArgumentParser(description='Train or test neural net motor controller.')
41 | parser.add_argument('--train', dest='train', action='store_true', default=True)
42 | parser.add_argument('--test', dest='test', action='store_true', default=False)
43 | args = parser.parse_args()
44 |
45 | ##################### hyper parameters ####################
46 |
47 | ENV_ID = 'LargeGridWorld-v0' # environment id
48 | RANDOM_SEED = 666 # random seed, can be either an int number or None
49 | RENDER = False # render while training
50 |
51 | ALG_NAME = 'DDPG'
52 | TRAIN_EPISODES = 500 # total number of episodes for training
53 | TEST_EPISODES = 10 # total number of episodes for training
54 | MAX_STEPS = 1000 # 20000total number of steps for each episode
55 |
56 | LR_A = 0.001 # learning rate for actor
57 | LR_C = 0.002 # learning rate for critic
58 | GAMMA = 0.9 # reward discount
59 | TAU = 0.01 # soft replacemen
60 | MEMORY_CAPACITY = 20000 # 500000size of replay buffer
61 | BATCH_SIZE = 64 # update action batch size
62 | VAR = 5 # control exploration
63 | #var_real=VAR
64 | ############################### DDPG ####################################
65 | n_width=93
66 | n_height = 93
67 | m = loadmat("C://Users/Administrator.DESKTOP-NLH290A/Desktop/2302_code/FIGURE_3/mapdata_0717.mat")
68 | #correct_action=0
69 | MARK= m["MARK_new"]
70 | PL_AP=m["MARK_PL_real"]
71 |
72 |
73 | class ReplayBuffer:
74 | """
75 | a ring buffer for storing transitions and sampling for training
76 | :state: (state_dim,)
77 | :action: (action_dim,)
78 | :reward: (,), scalar
79 | :next_state: (state_dim,)
80 | :done: (,), scalar (0 and 1) or bool (True and False)
81 | """
82 |
83 | def __init__(self, capacity):
84 | self.capacity = capacity
85 | self.buffer = []
86 | self.position = 0
87 |
88 | def push(self, state, action, reward, next_state, done):
89 | if len(self.buffer) < self.capacity:
90 | self.buffer.append(None)
91 | self.buffer[self.position] = (state, action, reward, next_state, done)
92 | self.position = int((self.position + 1) % self.capacity) # as a ring buffer
93 |
94 | def sample(self, batch_size):
95 | batch = random.sample(self.buffer, batch_size)
96 | state, action, reward, next_state, done = map(np.stack, zip(*batch)) # stack for each element
97 | return state, action, reward, next_state, done
98 |
99 | def __len__(self):
100 | return len(self.buffer)
101 |
102 | def cosVector(x,y):
103 | result1=0.0;
104 | result2=0.0;
105 | result3=0.0;
106 | for i in range(len(x)):
107 | result1+=x[i]*y[i] #sum(X*Y)
108 | result2+=x[i]**2 #sum(X*X)
109 | result3+=y[i]**2 #sum(Y*Y)
110 | return result1/((result2*result3)**0.5)
111 |
112 | class DDPG(object):
113 | """
114 | DDPG class
115 | """
116 | def __init__(self, action_dim, state_dim, action_range, replay_buffer, agent_num=0):
117 | self.replay_buffer = replay_buffer
118 | self.action_dim, self.state_dim, self.action_range = action_dim, state_dim, action_range
119 | self.var = VAR
120 | self.agent_num=agent_num
121 |
122 | W_init = tf.random_normal_initializer(mean=0, stddev=0.3)
123 | b_init = tf.constant_initializer(0.1)
124 |
125 | def get_actor(input_state_shape, name=str(self.agent_num)):
126 | """
127 | Build actor network
128 | :param input_state_shape: state
129 | :param name: name
130 | :return: act
131 | """
132 | input_layer = tl.layers.Input(input_state_shape, name='A_input'+str(self.agent_num))
133 | layer = tl.layers.Dense(n_units=64, act=tf.nn.relu, W_init=W_init, b_init=b_init, name=str(self.agent_num)+'A_l1')(input_layer)
134 | layer = tl.layers.Dense(n_units=64, act=tf.nn.relu, W_init=W_init, b_init=b_init, name=str(self.agent_num)+'A_l2')(layer)
135 | layer = tl.layers.Dense(n_units=action_dim, act=tf.nn.tanh, W_init=W_init, b_init=b_init, name=str(self.agent_num)+'A_a')(layer)
136 | layer = tl.layers.Lambda(lambda x: action_range * x)(layer)
137 | return tl.models.Model(inputs=input_layer, outputs=layer, name='Actor' + name)
138 |
139 | def get_critic(input_state_shape, input_action_shape, name=str(self.agent_num)):
140 | """
141 | Build critic network
142 | :param input_state_shape: state
143 | :param input_action_shape: act
144 | :param name: name
145 | :return: Q value Q(s,a)
146 | """
147 | state_input = tl.layers.Input(input_state_shape, name=str(self.agent_num)+'C_s_input')
148 | action_input = tl.layers.Input(input_action_shape, name=str(self.agent_num)+'C_a_input')
149 | layer = tl.layers.Concat(1)([state_input, action_input])
150 | layer = tl.layers.Dense(n_units=64, act=tf.nn.relu, W_init=W_init, b_init=b_init, name=str(self.agent_num)+'C_l1')(layer)
151 | layer = tl.layers.Dense(n_units=64, act=tf.nn.relu, W_init=W_init, b_init=b_init, name=str(self.agent_num)+'C_l2')(layer)
152 | layer = tl.layers.Dense(n_units=1, W_init=W_init, b_init=b_init, name=str(self.agent_num)+'C_out')(layer)
153 | return tl.models.Model(inputs=[state_input, action_input], outputs=layer, name='Critic' + name)
154 |
155 | self.actor = get_actor([None, state_dim])
156 | self.critic = get_critic([None, state_dim], [None, action_dim])
157 | self.actor.train()
158 | self.critic.train()
159 |
160 | def copy_para(from_model, to_model):
161 | """
162 | Copy parameters for soft updating
163 | :param from_model: latest model
164 | :param to_model: target model
165 | :return: None
166 | """
167 | for i, j in zip(from_model.trainable_weights, to_model.trainable_weights):
168 | j.assign(i)
169 |
170 | self.actor_target = get_actor([None, state_dim], name=str(self.agent_num)+'_target')
171 | copy_para(self.actor, self.actor_target)
172 | self.actor_target.eval()
173 |
174 | self.critic_target = get_critic([None, state_dim], [None, action_dim], name=str(self.agent_num)+'_target')
175 | copy_para(self.critic, self.critic_target)
176 | self.critic_target.eval()
177 |
178 | self.ema = tf.train.ExponentialMovingAverage(decay=1 - TAU) # soft replacement
179 |
180 | self.actor_opt = tf.optimizers.Adam(LR_A)
181 | self.critic_opt = tf.optimizers.Adam(LR_C)
182 |
183 | def ema_update(self):
184 | """
185 | Soft updating by exponential smoothing
186 | :return: None
187 | """
188 | paras = self.actor.trainable_weights + self.critic.trainable_weights
189 | self.ema.apply(paras)
190 | for i, j in zip(self.actor_target.trainable_weights + self.critic_target.trainable_weights, paras):
191 | i.assign(self.ema.average(j))
192 |
193 | def get_action(self, state, greedy=False):
194 | """
195 | Choose action
196 | :param s: state
197 | :param greedy: get action greedy or not
198 | :return: act
199 | """
200 | action = self.actor(np.array([state]))[0]
201 | if greedy:
202 | return action
203 | #return np.random.rand(len(action)).astype(np.float32)- action_range
204 | return np.clip(
205 | np.random.normal(action, self.var), -self.action_range, self.action_range
206 | ).astype(np.float32) # add randomness to action selection for exploration
207 |
208 | def learn(self, exact_var):
209 | """,
210 | Update parameters
211 | :return: None
212 | """
213 | self.var = exact_var
214 | #print(self.var)
215 | states, actions, rewards, states_, done = self.replay_buffer.sample(BATCH_SIZE)
216 | rewards = rewards[:, np.newaxis]
217 | done = done[:, np.newaxis]
218 |
219 | with tf.GradientTape() as tape:
220 | actions_ = self.actor_target(states_)
221 | q_ = self.critic_target([states_, actions_])
222 | target = rewards + (1 - done) * GAMMA * q_
223 | q_pred = self.critic([states, actions])
224 | td_error = tf.losses.mean_squared_error(target, q_pred)
225 | critic_grads = tape.gradient(td_error, self.critic.trainable_weights)
226 | self.critic_opt.apply_gradients(zip(critic_grads, self.critic.trainable_weights))
227 |
228 | with tf.GradientTape() as tape:
229 | actions = self.actor(states)
230 | q = self.critic([states, actions])
231 | actor_loss = -tf.reduce_mean(q) # maximize the q
232 | actor_grads = tape.gradient(actor_loss, self.actor.trainable_weights)
233 | self.actor_opt.apply_gradients(zip(actor_grads, self.actor.trainable_weights))
234 | self.ema_update()
235 |
236 |
237 | def save(self):
238 | """
239 | save trained weights
240 | :return: None
241 | """
242 | path = os.path.join('model', '_'.join([ALG_NAME, ENV_ID]))
243 | if not os.path.exists(path):
244 | os.makedirs(path)
245 | tl.files.save_weights_to_hdf5(os.path.join(path, 'actor.hdf5'), self.actor)
246 | tl.files.save_weights_to_hdf5(os.path.join(path, 'actor_target.hdf5'), self.actor_target)
247 | tl.files.save_weights_to_hdf5(os.path.join(path, 'critic.hdf5'), self.critic)
248 | tl.files.save_weights_to_hdf5(os.path.join(path, 'critic_target.hdf5'), self.critic_target)
249 |
250 | def load(self):
251 | """
252 | load trained weights
253 | :return: None
254 | """
255 | path = os.path.join('model', '_'.join([ALG_NAME, ENV_ID]))
256 | tl.files.load_hdf5_to_weights_in_order(os.path.join(path, 'actor.hdf5'), self.actor)
257 | tl.files.load_hdf5_to_weights_in_order(os.path.join(path, 'actor_target.hdf5'), self.actor_target)
258 | tl.files.load_hdf5_to_weights_in_order(os.path.join(path, 'critic.hdf5'), self.critic)
259 | tl.files.load_hdf5_to_weights_in_order(os.path.join(path, 'critic_target.hdf5'), self.critic_target)
260 |
261 |
262 | if __name__ == '__main__':
263 | n_mu=3
264 | n_M=5
265 | n_o=6*7
266 |
267 | #env = gym.make(ENV_ID).unwrapped
268 | #env = gym.make(ENV_ID).unwrapped
269 |
270 | # reproducible
271 | # env.seed(RANDOM_SEED)
272 | np.random.seed(RANDOM_SEED)
273 | tf.random.set_seed(RANDOM_SEED)
274 |
275 | state_dim = 2
276 | action_dim = 10
277 | action_range = 1 # scale action, [-action_range, action_range]
278 | action_range_su = np.array([1, 1, 1], dtype=np.float32)
279 | action_range_ris = np.array([1]*(2*n_M+1), dtype=np.float32)
280 | action_range_n_o = np.array([1]*(2+1), dtype=np.float32)
281 |
282 | buffer = ReplayBuffer(MEMORY_CAPACITY) #MU1
283 | buffer2 = ReplayBuffer(MEMORY_CAPACITY) #MU2
284 | buffer3 = ReplayBuffer(MEMORY_CAPACITY) #MU3
285 | # buffer4 = ReplayBuffer(MEMORY_CAPACITY) #su1
286 | # buffer5 = ReplayBuffer(MEMORY_CAPACITY) #su2
287 | # buffer6 = ReplayBuffer(MEMORY_CAPACITY) #ris
288 | # buffer7 = ReplayBuffer(MEMORY_CAPACITY) #commu
289 |
290 |
291 | agent = DDPG(action_dim, state_dim, action_range, buffer, 1) #mu
292 | agent2 = DDPG(action_dim, state_dim, action_range, buffer2, 2)
293 | agent3 = DDPG(action_dim, state_dim, action_range, buffer3, 3)
294 | # agent4 = DDPG(action_dim-2, state_dim, action_range_su, buffer4, 4) #su
295 | # agent5 = DDPG(action_dim-2, state_dim, action_range_su, buffer5, 5)
296 | # agent6 = DDPG(2*n_M+1, state_dim*n_mu, action_range_ris, buffer6, 6) #ris
297 | # agent7 = DDPG(2+1, state_dim*n_mu, action_range_n_o, buffer7, 7) #commu n_o
298 |
299 | VAR1=VAR
300 | VAR2=VAR
301 | VAR3=VAR
302 | t0 = time.time()
303 | if args.train: # train
304 | all_episode_reward = []
305 | all_episode_reward2 = []
306 | all_episode_reward3 = []
307 | # all_episode_reward4 = []
308 | # all_episode_reward5 = []
309 | # all_episode_reward6 = []
310 | # all_episode_reward7 = []
311 | init=0
312 |
313 | # end_location = [38*2,11*2]
314 | # end_location2 = [26*2,18*2] #[8*2,9*2]
315 | # end_location3 = [16*2,32*2] #[35*2,9*2]
316 | end_location = [15*2,32*2]
317 | end_location2 = [45*2,45*2]
318 | end_location3 = [47,38*2]
319 | #end_location = end_location3
320 | #end_location2 = end_location3
321 | study=0
322 | study2=0
323 | study3=0
324 | for episode in range(TRAIN_EPISODES):
325 | #state initialize
326 | x_k1_array = []
327 | y_k1_array = []
328 | x_k2_array = []
329 | y_k2_array = []
330 | x_k3_array = []
331 | y_k3_array = []
332 | state = np.array([4*2,5*2], dtype=np.float32) # 环境重置
333 | state2 = np.array([20*2,20*2], dtype=np.float32)
334 | state3 = np.array([40*2,10*2], dtype=np.float32)
335 | state_su1 = np.array([17, 25*2], dtype=np.float32)
336 | state_su2 = np.array([50, 25*2], dtype=np.float32)
337 | state_su3 = np.array([84, 25*2], dtype=np.float32)
338 | episode_reward = 0
339 | episode_reward2 = 0
340 | episode_reward3 = 0
341 | # episode_reward4 = 0
342 | # episode_reward5 = 0
343 | # episode_reward6 = 0
344 | # episode_reward7 = 0
345 | done1=False
346 | done2=False
347 | done3=False
348 | bobao=0
349 | bobao2=0
350 | bobao3=0
351 | x_k1_array.append(state[0])
352 | y_k1_array.append(state[1])
353 | x_k2_array.append(state2[0])
354 | y_k2_array.append(state2[1])
355 | x_k3_array.append(state3[0])
356 | y_k3_array.append(state3[1])
357 | #greedy0=True
358 | for steps in range(MAX_STEPS):
359 | # if RENDER:
360 | # env.render()
361 | # Add exploration noise
362 | # action selection
363 | #if len(buffer) >= MEMORY_CAPACITY:
364 | # greedy0=False
365 | action = agent.get_action(state)
366 | action2 = agent2.get_action(state2)
367 | action3 = agent3.get_action(state3)
368 |
369 |
370 | # action4 = agent4.get_action(state4)
371 | # action5 = agent5.get_action(state5)
372 | # action6 = agent6.get_action(state6)
373 | # action7 = agent7.get_action(state7)
374 | # Step
375 | if not done1:
376 | [old_x, old_y] = state
377 | new_x, new_y = int(old_x), int(old_y)
378 | new_x=int(old_x+action[0])
379 | new_y=int(old_y+action[1])
380 | if int(new_x) <= 0:
381 | new_x = 1
382 | if int(new_x) >= n_width:
383 | new_x = int(n_width)-1
384 | if int(new_y) <= 0:
385 | new_y = 1
386 | if int(new_y) >= n_height:
387 | new_y = int(n_height)-1
388 | if MARK[new_x,new_y] == 2:
389 | new_x, new_y = old_x, old_y
390 | state_=np.array([new_x, new_y], dtype=np.float32)
391 | x_k1_array.append(state_[0])
392 | y_k1_array.append(state_[1])
393 | else:
394 | state_ = state
395 | if not done2:
396 | [old_x, old_y] = state2
397 | new_x, new_y = int(old_x), int(old_y)
398 | new_x=int(old_x+action2[0])
399 | new_y=int(old_y+action2[1])
400 | if int(new_x) <= 0:
401 | new_x = 1
402 | if int(new_x) >= n_width:
403 | new_x = int(n_width)-1
404 | if int(new_y) <= 0:
405 | new_y = 1
406 | if int(new_y) >= n_height:
407 | new_y = int(n_height)-1
408 | if MARK[new_x,new_y] == 2:
409 | new_x, new_y = old_x, old_y
410 | state2_=np.array([new_x, new_y], dtype=np.float32)
411 | x_k2_array.append(state2_[0])
412 | y_k2_array.append(state2_[1])
413 | else:
414 | state2_ = state2
415 | if not done3:
416 | [old_x, old_y] = state3
417 | new_x, new_y = int(old_x), int(old_y)
418 | new_x=int(old_x+action3[0])
419 | new_y=int(old_y+action3[1])
420 | if int(new_x) <= 0:
421 | new_x = 1
422 | if int(new_x) >= n_width:
423 | new_x = int(n_width)-1
424 | if int(new_y) <= 0:
425 | new_y = 1
426 | if int(new_y) >= n_height:
427 | new_y = int(n_height)-1
428 | if MARK[new_x,new_y] == 2:
429 | new_x, new_y = old_x, old_y
430 | state3_=np.array([new_x, new_y], dtype=np.float32)
431 | x_k3_array.append(state3_[0])
432 | y_k3_array.append(state3_[1])
433 | else:
434 | state3_ = state3
435 | # state4+5 static
436 |
437 | # state6_ = np.array([state_[0], state_[1], state2_[0], state2_[1], state3_[0], state3_[1]])
438 | # state7_ = np.array([state_[0], state_[1], state2_[0], state2_[1], state3_[0], state3_[1]])
439 | done_sys = done1 and done2 and done3
440 |
441 | if action[8]==-1:
442 | action[8]=-0.9999999
443 | if action2[8]==-1:
444 | action2[8]=-0.9999999
445 | if action3[8]==-1:
446 | action3[8]=-0.9999999
447 | if action[8]==1:
448 | action[8]=0.9999999
449 | if action2[8]==1:
450 | action2[8]=0.9999999
451 | if action3[8]==1:
452 | action3[8]=0.9999999
453 |
454 | w_1=np.array([action[2]* math.exp(1)**(1j*(1+action[3])*math.pi), action[4]* math.exp(1)**(1j*(1+action[5])*math.pi), action[6]* math.exp(1)**(1j*(1+action[7])*math.pi)])
455 | w_2=np.array([action2[2]* math.exp(1)**(1j*(1+action2[3])*math.pi), action2[4]* math.exp(1)**(1j*(1+action2[5])*math.pi), action2[6]* math.exp(1)**(1j*(1+action2[7])*math.pi)])
456 | w_3=np.array([action3[2]* math.exp(1)**(1j*(1+action3[3])*math.pi), action3[4]* math.exp(1)**(1j*(1+action3[5])*math.pi), action3[6]* math.exp(1)**(1j*(1+action3[7])*math.pi)])
457 | theta_1=cosVector([1,0,0],[state_[0]-50,state_[1]-100, 1-2])
458 | a_1=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_1)), math.exp(1)**(-2*1j*(math.pi*theta_1))])#
459 | b_1_AP_LOS=math.sqrt(PL_AP[int(state_[0]), int(state_[1])])
460 | h_1=b_1_AP_LOS*a_1
461 | interference_1=10**(-9)
462 | theta_2=cosVector([1,0,0],[state2_[0]-50,state2_[1]-100, 1-2])
463 | a_2=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_2)), math.exp(1)**(-2*1j*(math.pi*theta_2))])#
464 | b_2_AP_LOS=math.sqrt(PL_AP[int(state2_[0]), int(state2_[1])])
465 | h_2=b_2_AP_LOS*a_2
466 | interference_2=10**(-9)
467 | theta_3=cosVector([1,0,0],[state3_[0]-50,state3_[1]-100, 1-2])
468 | a_3=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_3)), math.exp(1)**(-2*1j*(math.pi*theta_3))])#
469 | b_3_AP_LOS=math.sqrt(PL_AP[int(state3_[0]), int(state3_[1])])
470 | h_3=b_3_AP_LOS*a_3
471 | interference_3=10**(-9)
472 | theta_4=cosVector([1,0,0],[state_su1[0]-50,state_su1[1]-100, 1-2])
473 | a_4=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_4)), math.exp(1)**(-2*1j*(math.pi*theta_4))])#
474 | b_4_AP_LOS=math.sqrt(PL_AP[int(state_su1[0]), int(state_su1[1])])
475 | h_4=b_4_AP_LOS*a_4
476 | interference_4=10**(-9)
477 | theta_5=cosVector([1,0,0],[state_su2[0]-50,state_su2[1]-100, 1-2])
478 | a_5=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_5)), math.exp(1)**(-2*1j*(math.pi*theta_5))])#
479 | b_5_AP_LOS=math.sqrt(PL_AP[int(state_su2[0]), int(state_su2[1])])
480 | h_5=b_5_AP_LOS*a_5
481 | interference_5=10**(-9)
482 | theta_6=cosVector([1,0,0],[state_su3[0]-50,state_su3[1]-100, 1-2])
483 | a_6=np.array([1, math.exp(1)**(-1*1j*(math.pi*theta_6)), math.exp(1)**(-2*1j*(math.pi*theta_6))])#
484 | b_6_AP_LOS=math.sqrt(PL_AP[int(state_su3[0]), int(state_su3[1])])
485 | h_6=b_6_AP_LOS*a_6
486 | interference_6=10**(-9)
487 | action1=action
488 | order_array=[action1[9], action2[9], action3[9]]
489 | order_index=[b[0] for b in sorted(enumerate(order_array), key=lambda i:i[1])]
490 | # action1=action
491 | # for order_i in order_index:
492 | # exec('''if action{}[8]>0.5:
493 | # interference_{}+=(1-(action{}[8]+1)/2)*(np.linalg.norm(h_{}*w_{}))**2
494 | # else:
495 | # interference_4+=((action[8]+1)/2)*(np.linalg.norm(h_4*w_1))**2
496 | # ''')
497 |
498 | exec('''if action{}[8]>0:
499 | interference_{}+=(1-(action{}[8]+1)/2)*(np.linalg.norm(h_{}*w_{}))**2
500 | else:
501 | interference_4+=((action{}[8]+1)/2)*(np.linalg.norm(h_4*w_{}))**2'''.format(order_index[0]+1, order_index[0]+1, order_index[0]+1, order_index[0]+1, order_index[0]+1, order_index[0]+1, order_index[0]+1))
502 | exec('''if action{}[8]>0:
503 | interference_{}+=(1-(action{}[8]+1)/2)*(np.linalg.norm(h_{}*w_{}))**2
504 | else:
505 | interference_5+=((action{}[8]+1)/2)*(np.linalg.norm(h_5*w_{}))**2'''.format(order_index[1]+1, order_index[1]+1, order_index[1]+1, order_index[1]+1, order_index[1]+1, order_index[1]+1, order_index[1]+1))
506 | exec('''if action{}[8]>0:
507 | interference_{}+=(1-(action{}[8]+1)/2)*(np.linalg.norm(h_{}*w_{}))**2
508 | else:
509 | interference_6+=((action{}[8]+1)/2)*(np.linalg.norm(h_6*w_{}))**2'''.format(order_index[2]+1, order_index[2]+1, order_index[2]+1, order_index[2]+1, order_index[2]+1, order_index[2]+1, order_index[2]+1))
510 |
511 | SINR_1=((action1[8]+1)/2)*(np.linalg.norm(h_1*w_1))**2/interference_1
512 | SINR_2=((action2[8]+1)/2)*(np.linalg.norm(h_2*w_2))**2/interference_2
513 | SINR_3=((action3[8]+1)/2)*(np.linalg.norm(h_3*w_3))**2/interference_3
514 | exec('''SINR_4=(1-(action{}[8]+1)/2)*(np.linalg.norm(h_4*w_{}))**2/interference_4'''.format(order_index[0]+1, order_index[0]+1))
515 | exec('''SINR_5=(1-(action{}[8]+1)/2)*(np.linalg.norm(h_5*w_{}))**2/interference_5'''.format(order_index[1]+1, order_index[1]+1))
516 | exec('''SINR_6=(1-(action{}[8]+1)/2)*(np.linalg.norm(h_6*w_{}))**2/interference_6'''.format(order_index[2]+1, order_index[2]+1))
517 |
518 |
519 | # calculate reward
520 | distance_01_2=(state_[0]-end_location[0])*(state_[0]-end_location[0])/4+(state_[1]-end_location[1])*(state_[1]-end_location[1])/4
521 | distance_01 = math.sqrt(distance_01_2)
522 | #print(distance_01)
523 | exec('''reward = -(distance_01/50)+max(0.01, min(SINR_1, SINR_{})/1000)-0.01'''.format(order_index.index(0)+4))
524 | #reward = -1
525 | #reward=0
526 | if distance_01==0:
527 | reward = 1
528 | if not done1:
529 | episode_reward += reward
530 | distance_02_2=(state2_[0]-end_location2[0])*(state2_[0]-end_location2[0])/4+(state2_[1]-end_location2[1])*(state2_[1]-end_location2[1])/4
531 | distance_02 = math.sqrt(distance_02_2)
532 | exec('''reward2 = -(distance_02/50)+max(0.01, min(SINR_2, SINR_{})/1000)-0.01'''.format(order_index.index(1)+4))
533 | if distance_02==0:
534 | reward2 = 1
535 | if not done2:
536 | episode_reward2 += reward2
537 | distance_03_2=(state3_[0]-end_location3[0])*(state3_[0]-end_location3[0])/4+(state3_[1]-end_location3[1])*(state3_[1]-end_location3[1])/4
538 | distance_03 = math.sqrt(distance_03_2)
539 | exec('''reward3 = -(distance_03/50)+max(0.01, min(SINR_3, SINR_{})/1000)-0.01'''.format(order_index.index(2)+4))
540 | if distance_03==0:
541 | reward3 = 1
542 | if not done3:
543 | episode_reward3 += reward3
544 | state_ = np.array(state_, dtype=np.float32)
545 | state2_ = np.array(state2_, dtype=np.float32)
546 | state3_ = np.array(state3_, dtype=np.float32)
547 |
548 | # if len(buffer) >= MEMORY_CAPACITY and steps%100==0:
549 | # VAR *= .99995
550 | #print(state)
551 | #done = 1 if done is True else 0
552 | buffer.push(state, action, reward, state_, done1)
553 | buffer2.push(state2, action2, reward2, state2_, done2)
554 | buffer3.push(state3, action3, reward3, state3_, done3)
555 | if not done1:
556 | study=study+1
557 | if not done2:
558 | study2=study2+1
559 | if not done3:
560 | study3=study3+1
561 | if len(buffer) >= MEMORY_CAPACITY and not done1 and episode >= MEMORY_CAPACITY/MAX_STEPS:
562 | #print("in")
563 | #for i in range(20):
564 | # if study>=10:
565 | VAR1 *= math.sqrt(.99995)
566 | # study=-1
567 | agent.learn(VAR1)
568 |
569 | if len(buffer2) >= MEMORY_CAPACITY and not done2 and episode>=MEMORY_CAPACITY/MAX_STEPS:
570 | # if study2>=10:
571 | # study2=-1
572 | VAR2 *= math.sqrt(.99995)
573 | # for i in range(20):
574 | agent2.learn(VAR2)
575 |
576 | if len(buffer3) >= MEMORY_CAPACITY and not done3 and episode>=MEMORY_CAPACITY/MAX_STEPS:
577 | #
578 | # if study3>=10:
579 | # study3=-1
580 | VAR3 *= math.sqrt(.99995)
581 | # for i in range(20):
582 | agent3.learn(VAR3)
583 |
584 | if distance_01==0 and bobao==0:
585 | done1=True
586 | if steps<100:
587 | for x in range(len(x_k1_array)):
588 | filename = 'x_k1'+str(episode)+"_"+str(steps)+'.txt'
589 | with open(filename,'a') as fileobject: #使用‘a'来提醒python用附加模式的方式打开
590 | fileobject.write(str(x_k1_array[x])+'\n')
591 | for y in range(len(y_k1_array)):
592 | filename = 'y_k1'+str(episode)+"_"+str(steps)+'.txt'
593 | with open(filename,'a') as fileobject: #使用‘a'来提醒python用附加模式的方式打开
594 | fileobject.write(str(y_k1_array[y])+'\n')
595 | print("1 arrive success!!!!!!!!!!!!!!")
596 | bobao=1
597 | if distance_02==0 and bobao2==0:
598 | if steps<100:
599 | for x in range(len(x_k2_array)):
600 | filename = 'x_k2'+str(episode)+"_"+str(steps)+'.txt'
601 | with open(filename,'a') as fileobject: #使用‘a'来提醒python用附加模式的方式打开
602 | fileobject.write(str(x_k2_array[x])+'\n')
603 | for y in range(len(y_k2_array)):
604 | filename = 'y_k2'+str(episode)+"_"+str(steps)+'.txt'
605 | with open(filename,'a') as fileobject: #使用‘a'来提醒python用附加模式的方式打开
606 | fileobject.write(str(y_k2_array[y])+'\n')
607 |
608 | done2=True
609 | print("2 arrive success!!!!!!!!!!!!!!")
610 | bobao2=1
611 | if distance_03==0 and bobao3==0:
612 | if steps<100:
613 | for x in range(len(x_k3_array)):
614 | filename = 'x_k3'+str(episode)+"_"+str(steps)+'.txt'
615 | with open(filename,'a') as fileobject: #使用‘a'来提醒python用附加模式的方式打开
616 | fileobject.write(str(x_k3_array[x])+'\n')
617 | for y in range(len(y_k3_array)):
618 | filename = 'y_k3'+str(episode)+"_"+str(steps)+'.txt'
619 | with open(filename,'a') as fileobject: #使用‘a'来提醒python用附加模式的方式打开
620 | fileobject.write(str(y_k3_array[y])+'\n')
621 |
622 | done3=True
623 | print("3 arrive success!!!!!!!!!!!!!!")
624 | bobao3=1
625 | if done1 and done2 and done3:
626 | break
627 |
628 | state = state_
629 | state2 = state2_
630 | state3 = state3_
631 |
632 |
633 | if episode == 0:
634 | all_episode_reward.append(episode_reward)
635 | all_episode_reward2.append(episode_reward2)
636 | all_episode_reward3.append(episode_reward3)
637 | # filename='Reward_v2_agent1.txt'
638 | # with open (filename, 'a') as fileobject:
639 | # fileobject.write(str(episode_reward)+'\n')
640 | # filename='Reward_v2_agent2.txt'
641 | # with open (filename, 'a') as fileobject:
642 | # fileobject.write(str(episode_reward2)+'\n')
643 | # filename='Reward_v2_agent3.txt'
644 | # with open (filename, 'a') as fileobject:
645 | # fileobject.write(str(episode_reward3)+'\n')
646 | else:
647 | all_episode_reward.append(all_episode_reward[-1] * 0.9 + episode_reward * 0.1)
648 | all_episode_reward2.append(all_episode_reward2[-1] * 0.9 + episode_reward2 * 0.1)
649 | all_episode_reward3.append(all_episode_reward3[-1] * 0.9 + episode_reward3 * 0.1)
650 | # filename='Reward_v2_agent1.txt'
651 | # with open (filename, 'a') as fileobject:
652 | # fileobject.write(str(all_episode_reward[-1] * 0.9 + episode_reward * 0.1)+'\n')
653 | # filename='Reward_v2_agent2.txt'
654 | # with open (filename, 'a') as fileobject:
655 | # fileobject.write(str(all_episode_reward2[-1] * 0.9 + episode_reward2 * 0.1)+'\n')
656 | # filename='Reward_v2_agent3.txt'
657 | # with open (filename, 'a') as fileobject:
658 | # fileobject.write(str(all_episode_reward3[-1] * 0.9 + episode_reward3 * 0.1)+'\n')
659 | #print(var_real)
660 | print(
661 | ' Episode: {}/{} | Reward: {:.4f} & {:.4f} & {:.4f} | Step: {:.4f}| END: {}-{} {}-{} {}-{}'.format(
662 | episode + 1, TRAIN_EPISODES, episode_reward, episode_reward2, episode_reward3,
663 | steps, end_location, state, end_location2, state2, end_location3, state3
664 | ))
665 | #print(len(buffer3))
666 |
667 | #env.close()
668 | #agent.save()
669 | # filename = os.path.basename(path)
670 | plt.plot(all_episode_reward)
671 | plt.plot(all_episode_reward2)
672 | plt.plot(all_episode_reward3)
673 | if not os.path.exists('image'):
674 | os.makedirs('image')
675 | plt.savefig(os.path.join('image', '_'.join([ALG_NAME, ENV_ID])))
676 |
677 | # if args.test:
678 | # # test
679 | # agent.load()
680 | # for episode in range(TEST_EPISODES):
681 | # state = env.reset().astype(np.float32)
682 | # episode_reward = 0
683 | # for step in range(MAX_STEPS):
684 | # env.render()
685 | # state, reward, done, info = env.step(agent.get_action(state, greedy=True))
686 | # state = state.astype(np.float32)
687 | # episode_reward += reward
688 | # if done:
689 | # break
690 | # print(
691 | # 'Testing | Episode: {}/{} | Episode Reward: {:.4f} | Running Time: {:.4f} '.format(
692 | # episode + 1, TEST_EPISODES, episode_reward,
693 | # time.time() - t0
694 | # )
695 | # )
696 | # env.close()
--------------------------------------------------------------------------------