├── DDQN-AO.jpg ├── DRL ├── location │ ├── cue.npy │ ├── cue_100001.npy │ ├── move_gbu_location.py │ ├── init_gbu_location.py │ ├── move_cue_location.py │ └── init_cue_location.py ├── save_dir │ └── my_dict.npy ├── gen_omega_ZF.m ├── G_gain_cal.m ├── main_optmization_NoRIS.m ├── main_optmization_record.m ├── cvx_optimization_RIS.m ├── rs_validate.m ├── main_optmization.m ├── cvx_optimization_SDR.m ├── rs_validate2.m ├── TFAgent │ ├── Prioritized_Replay.py │ ├── DDPG_update.py │ ├── DqnMultiDimension.py │ └── DQN.py ├── cvx_optimization_Taylor.m ├── function_all.py └── env.py ├── README.md └── config.py /DDQN-AO.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xjl1998/FoV-MEC-DRL/HEAD/DDQN-AO.jpg -------------------------------------------------------------------------------- /DRL/location/cue.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xjl1998/FoV-MEC-DRL/HEAD/DRL/location/cue.npy -------------------------------------------------------------------------------- /DRL/save_dir/my_dict.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xjl1998/FoV-MEC-DRL/HEAD/DRL/save_dir/my_dict.npy -------------------------------------------------------------------------------- /DRL/location/cue_100001.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xjl1998/FoV-MEC-DRL/HEAD/DRL/location/cue_100001.npy -------------------------------------------------------------------------------- /DRL/gen_omega_ZF.m: -------------------------------------------------------------------------------- 1 | function [omegas]= gen_omega_ZF(NumberOfFov,NumberOfBS,NumberofAntenna,G,power)%定义命名函数 2 | omegas = zeros([NumberOfBS,NumberOfFov,NumberofAntenna]) 3 | for b=1:NumberOfBS 4 | H = reshape(G(b,:,:),[NumberOfFov,NumberofAntenna]); 5 | PZF = ZF(H,power); 6 | omegas(b,:,:) = PZF'; 7 | end -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # FoV-MEC-DRL 2 | codes for "Cooperative Caching, Rendering and Beamforming for RIS-assisted Wireless Virtual Reality Networks" 3 | 4 | ![DDQN-AO](DDQN-AO.jpg) 5 | 6 | ## Environments 7 | 8 | python=3.6.0 9 | 10 | numpy=1.24.4 11 | 12 | tensorflow-gpu=1.14.0 +cuda=11.3 13 | 14 | scipy=1.10.1 15 | 16 | matlab 2020b 17 | 18 | matlab.engine for python 19 | 20 | CVX Tool for Matlab2020b 21 | 22 | ## The DRL model implementation 23 | 24 | The implementation is in `DRL/model_train.py` 25 | -------------------------------------------------------------------------------- /DRL/G_gain_cal.m: -------------------------------------------------------------------------------- 1 | function [g]= G_gain_cal(h_cue_bs, h_irs_bs, h_cue_irs, reflect) 2 | % ''' 3 | % 计算综合信道增益G 4 | % h_cue_bs:用户到基站的信道增益 5 | % h_irs_bs:IRS到基站的信道增益,是个一行K列的矩阵 6 | % h_cue_irs:用户到IRS的信道增益,是一个K行一列的矩阵 7 | % reflect:反射矩阵,是一个K行K列的矩阵 8 | % :return:一个综合信道增益的值 9 | % ''' 10 | % # print("h_irs_bs",h_irs_bs) 11 | % # print("reflect",reflect) 12 | temp = h_irs_bs'*reflect; 13 | % # print("temp",temp) 14 | h_cue_irs_bs = temp*h_cue_irs; 15 | g = h_cue_bs + h_cue_irs_bs; -------------------------------------------------------------------------------- /DRL/location/move_gbu_location.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | from os.path import dirname, abspath 4 | 5 | import numpy as np 6 | np.random.seed(1) 7 | 8 | if __name__ == "__main__": 9 | timeplot = 10000 10 | max_cue = 10 11 | dir_path = dirname(abspath(__file__)) 12 | CUE_all_coord = np.load(dir_path + '\\gbu.npy') 13 | coord_cue_random = np.zeros((timeplot,max_cue, 3)) 14 | 15 | 16 | cue_coord_now = CUE_all_coord 17 | cue_generate_limit = 5 18 | for t1 in range(timeplot): 19 | if t1 ==0: 20 | a= coord_cue_random[t1,:,:] 21 | coord_cue_random[t1,:,:] = cue_coord_now 22 | else: 23 | for k in range(max_cue): 24 | coord_cue_random[t1,k,0] = -30 + (2*np.random.rand()-1)*cue_generate_limit 25 | coord_cue_random[t1,k,1] = (2*np.random.rand()-1)*cue_generate_limit 26 | coord_cue_random[t1,k, -1] = 0 27 | np.save(dir_path+'/gbu_100001.npy',coord_cue_random) 28 | print('保存完成') 29 | 30 | -------------------------------------------------------------------------------- /DRL/main_optmization_NoRIS.m: -------------------------------------------------------------------------------- 1 | function [power_noRIS,rates_noRIS,opt_rates_noCoMP,rs] = main_optmization_NoRIS(NumberOfBS,NumberOfFov,NumberofAntenna,N_0,P_max,R_min,epsilon,epsilon_fix,BW) 2 | rs=1; 3 | init_power=0; 4 | init_rates=0; 5 | opt_power=0; 6 | opt_rates=0; 7 | rates_noRIS= 0; 8 | opt_rates_noCoMP=0; 9 | power_noRIS = 0; 10 | data = load("data.mat"); 11 | h_bs_ue =data.gbn; 12 | G = data.G; 13 | opt_G_real = 0; 14 | opt_G_imag = 0; 15 | prior_omegas =0.1*(ones([NumberOfBS,NumberOfFov,NumberofAntenna])-ones([NumberOfBS,NumberOfFov,NumberofAntenna])*j); 16 | current_omegas_noRIS= prior_omegas; 17 | for i =1:2 18 | prior_omegas = current_omegas_noRIS; 19 | [opt_omegas_noRIS,status]=cvx_optimization_Taylor(BW,current_omegas_noRIS,epsilon,epsilon,NumberOfFov,NumberOfBS,NumberofAntenna,G,N_0,P_max,R_min); 20 | current_omegas_noRIS=opt_omegas_noRIS; 21 | if(status ~="Solved") 22 | disp("No_RIS taylor pahse failed without RIS"); 23 | rs=0; 24 | return; 25 | end 26 | end 27 | 28 | [power_noRIS,rates_noRIS,opt_rates_noCoMP]=rs_validate(BW,current_omegas_noRIS,epsilon,epsilon,NumberOfFov,NumberOfBS,NumberofAntenna,G,N_0); 29 | end 30 | 31 | -------------------------------------------------------------------------------- /DRL/location/init_gbu_location.py: -------------------------------------------------------------------------------- 1 | import os 2 | from os.path import dirname, abspath 3 | import numpy as np 4 | from DRL.function_all import dist_calc_x, dist_calc_y 5 | 6 | np.random.seed(223) 7 | 8 | # from env import IrsNoma 9 | dir_path = dirname(abspath(__file__)) 10 | if __name__ == "__main__": 11 | max_step = 200010 12 | max_gfu = 10 13 | coord_cue = np.zeros((max_step,max_gfu, 3)) 14 | cue_generate_limit = 20 15 | max_speed = 3 16 | x_min_edge = -35 17 | y_min_edge = -10 18 | x_max_edge = -25 19 | y_max_edge = 10 20 | gfu_min_x = 26 21 | gfu_max_x = 46 22 | gbu_min_x = -35 23 | user_angle = "random" 24 | 25 | for i in range(max_gfu): 26 | coord_cue[0,i, 0] = -30 + (2*np.random.rand()-1)*5 27 | coord_cue[0, i,1] = (2*np.random.rand()-1)*10 28 | coord_cue[0, i,-1] = 0 29 | 30 | print("第",0,"个坐标生成成功:",coord_cue[0]) 31 | for k in range(1,max_step): 32 | for i in range(max_gfu): 33 | speed_lst=np.random.uniform(0,max_speed) 34 | angle_lst=np.random.uniform(0,2*np.pi) 35 | coord_cue[k,i, 0] = dist_calc_x(0,coord_cue[k-1,i, 0],speed_lst,angle_lst,[],gbu_min_x,x_max_edge,max_speed) 36 | coord_cue[k, i,1] = dist_calc_y(0,coord_cue[k-1,i, 1],speed_lst,angle_lst,[],y_min_edge,y_max_edge,max_speed) 37 | coord_cue[k, i,-1] = 0 38 | print(dir_path) 39 | np.save(dir_path+'/gbu.npy',coord_cue) 40 | 41 | -------------------------------------------------------------------------------- /DRL/location/move_cue_location.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | 4 | from os.path import dirname, abspath 5 | 6 | import numpy as np 7 | from tqdm import tqdm 8 | 9 | np.random.seed(1) 10 | if __name__ == "__main__": 11 | timeplot = 2000 12 | max_cue = 20 13 | dir_path = dirname(abspath(__file__)) 14 | CUE_coord = np.load(dir_path + '\\cue.npy') 15 | coord_cue_random = np.zeros((timeplot,max_cue, 3)) 16 | cue_coord_now = CUE_coord 17 | cue_generate_limit = 3 18 | loader = tqdm(range(timeplot)) 19 | 20 | 21 | for t1 in loader: 22 | loader.set_description_str(("[timeslot %d| %d]" % (t1, timeplot))) 23 | if t1 ==0: 24 | coord_cue_random[t1,:,:] = cue_coord_now[1,:,:] 25 | else: 26 | for k in range(max_cue): 27 | coord_cue_random[t1,k,0] = 8 + (2*np.random.rand()-1)*cue_generate_limit 28 | coord_cue_random[t1,k,1] = 10+(2*np.random.rand()-1)*cue_generate_limit 29 | coord_cue_random[t1,k, -1] = 0 30 | 31 | 32 | np.save(dir_path+'/cue.npy',coord_cue_random) 33 | 34 | # for t1 in range(timeplot): 35 | # if(t1%2!=0): 36 | # for k in range(max_cue): 37 | # coord_cue_mix_random[t1, k, 0] = coord_cue_near_random[t1,k,0] 38 | # coord_cue_mix_random[t1, k, 1] = coord_cue_near_random[t1, k, 1] 39 | # coord_cue_mix_random[t1, k, -1] = coord_cue_near_random[t1, k, -1] 40 | # else: 41 | # for k in range(max_cue): 42 | # coord_cue_mix_random[t1, k, 0] = coord_cue_far_random[t1, k, 0] 43 | # coord_cue_mix_random[t1, k, 1] = coord_cue_far_random[t1, k, 1] 44 | # coord_cue_mix_random[t1, k, -1] = coord_cue_far_random[t1, k, -1] 45 | # np.save(dir_path + '/cue_mix.npy', coord_cue_mix_random) 46 | print('保存完成') 47 | 48 | -------------------------------------------------------------------------------- /DRL/main_optmization_record.m: -------------------------------------------------------------------------------- 1 | function [record] = main_optmization_record(NumberOfBS,NumberOfFov,NumberofAntenna,NumberofRISUnit,N_0,P_max,R_min,epsilon,epsilon_fix,BW,total_iters,mode) 2 | init_power=0; 3 | init_rates=0; 4 | opt_power=0; 5 | opt_rates=0; 6 | opt_rates_noCoMP = 0; 7 | data = load("data.mat"); 8 | G = data.G; 9 | h_ue_ris= data.gnr; 10 | h_bs_ris=data.gbr; 11 | h_bs_ue =data.gbn; 12 | opt_G = 0; 13 | prior_u=zeros(NumberofRISUnit,1); 14 | for i =1:NumberofRISUnit 15 | init_theta = pi/2; 16 | prior_u(i)=sin(init_theta)+cos(init_theta)*j; 17 | end 18 | target = 4; 19 | prior_omegas =0.1*(ones([NumberOfBS,NumberOfFov,NumberofAntenna])+ones([NumberOfBS,NumberOfFov,NumberofAntenna])*j); 20 | current_omegas = prior_omegas; 21 | [init_power,init_rates,init_rates_noComp]=rs_validate(BW,current_omegas,epsilon,epsilon,NumberOfFov,NumberOfBS,NumberofAntenna,G,N_0); 22 | u = prior_u; 23 | record = zeros(1,total_iters+1); 24 | record(1,1)=mean(init_power); 25 | %record_all 26 | rate=1e-4; 27 | for i=1:total_iters 28 | prior_omegas = current_omegas; 29 | if(mode==0||mode==1) 30 | [opt_omegas,status]=cvx_optimization_Taylor(BW,current_omegas,epsilon,epsilon,NumberOfFov,NumberOfBS,NumberofAntenna,G,N_0,P_max,R_min); 31 | if(i>target) 32 | rate=rate*(0.6+rand()*0.1); 33 | current_omegas=(1-rate)*prior_omegas+rate*opt_omegas; 34 | else 35 | current_omegas=opt_omegas; 36 | end 37 | if(status ~="Solved" )%&& status ~= "Inaccurate/Solved" 38 | disp("omega failed with RIS"); 39 | rs=0; 40 | return; 41 | end 42 | end 43 | [opt_power,opt_rates,opt_rates_noCoMP]=rs_validate(BW,current_omegas,epsilon,epsilon_fix,NumberOfFov,NumberOfBS,NumberofAntenna,G,N_0); 44 | record(1,i+1)=mean(opt_power); 45 | 46 | if(i==total_iters) 47 | break; 48 | end 49 | if(mode==0||mode==2) 50 | [u,G,a,status] = cvx_optimization_RIS(BW,u,NumberofRISUnit,current_omegas,epsilon,epsilon,NumberOfFov,NumberOfBS,NumberofAntenna,h_bs_ue,h_ue_ris,h_bs_ris,N_0,R_min); 51 | if(status ~="Solved") 52 | disp("RIS phase failed"); 53 | rs=0; 54 | return; 55 | end 56 | end 57 | end 58 | 59 | opt_G = G; 60 | end 61 | 62 | -------------------------------------------------------------------------------- /DRL/location/init_cue_location.py: -------------------------------------------------------------------------------- 1 | import os 2 | from os.path import dirname, abspath 3 | 4 | import numpy as np 5 | from tqdm import tqdm 6 | 7 | from DRL.function_all import dist_calc_x,dist_calc_y 8 | 9 | 10 | 11 | np.random.seed(1) 12 | 13 | # from env import IrsNoma 14 | dir_path = dirname(abspath(__file__)) 15 | 16 | 17 | 18 | # def user_move(): 19 | # for i in range(self.gfu): 20 | # 21 | # speed_lst=np.random.uniform(0,self.max_speed) 22 | # angle_lst=np.random.uniform(0,2*np.pi) 23 | # self.gfu_coord[i,0] = dist_calc_x(0,self.gfu_coord[i,0],speed_lst,angle_lst,[],self.gfu_min_x,self.gfu_max_x,self.max_speed) 24 | # self.gfu_coord[i,1] = dist_calc_y(0,self.gfu_coord[i,1],speed_lst,angle_lst,[],self.y_min_edge,self.y_max_edge,self.max_speed) 25 | # for j in range(self.gbu): 26 | # speed_lst=np.random.uniform(0,self.max_speed) 27 | # angle_lst=np.random.uniform(0,2*np.pi) 28 | # self.gbu_coord[j,0] = dist_calc_x(0,self.gbu_coord[j,0],speed_lst,angle_lst,[],self.gbu_min_x,self.x_max_edge,self.max_speed) 29 | # self.gbu_coord[j,1] = dist_calc_y(0,self.gbu_coord[j,1] ,speed_lst,angle_lst,[],self.y_min_edge,self.y_max_edge,self.max_speed) 30 | # 31 | 32 | if __name__ == "__main__": 33 | max_step = 2000 34 | max_gfu = 20 35 | coord_cue = np.zeros((max_step,max_gfu, 3)) 36 | cue_generate_limit = 20 37 | max_speed = 0.5 38 | x_min_edge = 6*6 39 | y_min_edge = 0*6 40 | x_max_edge = 13*6 41 | y_max_edge = 15*6 42 | gfu_min_x = 0*6 43 | gfu_max_x = 20*6 44 | user_angle = "random" 45 | 46 | # coord_cue[0, 0, 0] = -15 47 | # coord_cue[0, 0, 1] = 10 48 | # coord_cue[0, 0, -1] = 0 49 | # coord_cue[0,1, 0] = -13 50 | # coord_cue[0, 1, 1] = 0 51 | # coord_cue[0, 1, -1] = 0 52 | # coord_cue[0, 2, 0] = 80 53 | # coord_cue[0, 2, 1] = 25 54 | # coord_cue[0, 2, -1] = 0 55 | # coord_cue[0, 3, 0] = 85 56 | # coord_cue[0, 3, 1] = 35 57 | # coord_cue[0, 3, -1] = 0 58 | # coord_cue[0, 4, 0] = 35 59 | # coord_cue[0, 4, 1] = 75 60 | # coord_cue[0, 4, -1] = 0 61 | # coord_cue[0,5, 0] = 30 62 | # coord_cue[0, 5, 1] = 70 63 | # coord_cue[0, 5, -1] = 0 64 | # coord_cue[0, 6, 0] = 25 65 | # coord_cue[0, 6, 1] = 71 66 | # coord_cue[0, 6, -1] = 0 67 | # coord_cue[0, 7, 0] = -5 68 | # coord_cue[0, 7, 1] = 5 69 | # coord_cue[0, 7, -1] = 0 70 | for i in range(max_gfu): 71 | coord_cue[0,i, 0] = (12+ 15*(np.random.rand()-0.5)) 72 | coord_cue[0, i,1] = (10+10*(np.random.rand()-0.5)) 73 | coord_cue[0, i,-1] = 0 74 | loader = tqdm(range(1,max_step)) 75 | for k in loader: 76 | loader.set_description_str( ("[step %d| %d]"%(k,max_step))) 77 | 78 | for i in range(max_gfu): 79 | speed_lst = np.random.uniform(0, max_speed) 80 | angle_lst = np.random.uniform(0, 2 * np.pi) 81 | coord_cue[k, i, 0] = dist_calc_x(0, coord_cue[k - 1, i, 0], speed_lst, angle_lst, [], gfu_min_x, 82 | gfu_max_x, max_speed) 83 | coord_cue[k, i, 1] = dist_calc_y(0, coord_cue[k - 1, i, 1], speed_lst, angle_lst, [], y_min_edge, 84 | y_max_edge, max_speed) 85 | coord_cue[k, i, -1] = 0 86 | 87 | print(dir_path) 88 | np.save(dir_path + '/cue.npy', coord_cue) 89 | 90 | 91 | -------------------------------------------------------------------------------- /DRL/cvx_optimization_RIS.m: -------------------------------------------------------------------------------- 1 | function [result,result1,result2,status] = cvx_optimization_RIS(BW,old_u,unit_num,omegas,epsilon,epsilon_fix,NumberOfFov,NumberOfBS,NumberofAntenna,h_bn,h_rn,h_br,N_0,R_min) 2 | 3 | cvx_begin quiet 4 | cvx_solver Mosek 5 | expression a(NumberOfFov); 6 | expression rest; 7 | variable u(unit_num,1) complex; 8 | expression sum_up(NumberOfFov); 9 | expression sum_down(NumberOfFov); 10 | expression p; 11 | expression sum_P(NumberOfBS); 12 | for f = 1:NumberOfFov 13 | sum_down(f)= N_0; 14 | for b = 1:NumberOfBS 15 | for f2 = 1:NumberOfFov 16 | omega_r=reshape(omegas(b,f2,:),[NumberofAntenna,1]); 17 | hbn=reshape(h_bn(b,f,:),[NumberofAntenna,1]); 18 | hrn=reshape(h_rn(b,f,:),[unit_num,1]); 19 | hbr=reshape(h_br(b,:,:),[unit_num,NumberofAntenna]); 20 | h = hbr'*diag(old_u)*hrn+hbn; 21 | zeta = omega_r'*hbn; 22 | if(epsilon(b,f2)==1) 23 | 24 | beta= diag(hrn')*hbr*omega_r; 25 | t=pow_abs(zeta,2)+2*real(u'*beta*zeta'); 26 | if (f2==f&&epsilon_fix(b,f2)==1) 27 | % compare(f,f2)=2*real(old_u'*beta*beta'*u)-pow_abs(old_u'*beta,2); 28 | sum_up(f) = sum_up(f)+2*real((old_u'*beta)*beta'*u)-pow_abs(old_u'*beta,2)+t; 29 | else 30 | sum_down(f)= sum_down(f)+ pow_abs(omega_r'*h,2); 31 | end 32 | end 33 | end 34 | end 35 | end 36 | for i=1:NumberOfFov 37 | a(i)=sum_up(i)-(2^(R_min/BW)-1)*sum_down(i); 38 | rest = rest+a(i); 39 | end 40 | 41 | 42 | maximize sum(a); 43 | subject to 44 | 45 | norm(u)<=1; 46 | 47 | for i=1:NumberOfFov 48 | a(i)>=0; 49 | end 50 | for i=1:NumberOfFov 51 | % (sum_up(i)/N_0)/(sum_down(i)/N_0)>=2^(R_min/BW)-1 52 | 1e4*(pow_p(2,R_min/BW)-1)*(sum_down(i))<=1e4*(sum_up(i)); 53 | % pow_p(2,R_min/BW)*(sum_down(i)/N_0+1)-sum_down(i)/N_0<=1+sum_up(i)/N_0; 54 | end 55 | 56 | % (pow_p(2,R_min/BW)-1)*sum_down<=1e2*sum_up(i); 57 | 58 | cvx_end 59 | 60 | Rates=zeros([NumberOfFov]); 61 | for i=1:NumberOfFov 62 | Rates(i)=BW*log2(1+sum_up(f)/sum_down(f) ); 63 | end 64 | 65 | h_new = zeros([NumberOfBS,NumberOfFov,NumberofAntenna]); 66 | u=u/norm(u); 67 | for b=1:NumberOfBS 68 | for f=1:NumberOfFov 69 | % omega_r=reshape(omegas(b,f,:),[NumberofAntenna,1]); 70 | hbn=reshape(h_bn(b,f,:),[NumberofAntenna,1]); 71 | hrn=reshape(h_rn(b,f,:),[unit_num,1]); 72 | hbr=reshape(h_br(b,:,:),[unit_num,NumberofAntenna]); 73 | %h_new(b,f,:)=(hrn'*diag(u)*hbr+hbn')'; 74 | h_new(b,f,:)=(hbn+hbr'*diag(u)*hrn); 75 | end 76 | end 77 | 78 | % 79 | result = u; 80 | result1 = h_new; 81 | result2 = a; 82 | status = cvx_status; 83 | 84 | 85 | end -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | parser = argparse.ArgumentParser() 3 | parser.add_argument( 4 | '--inputSize', type=int, default=1, 5 | help='inputSize') 6 | parser.add_argument( 7 | '--outputSize', type=int, default=1, 8 | help='outputSize') 9 | parser.add_argument( 10 | '--timeStep', type=int, default=5, 11 | help='timeStep') 12 | 13 | parser.add_argument( 14 | '--dimension', type=int, default=3, 15 | help='dimension') 16 | 17 | parser.add_argument( 18 | '--interval', type=int, default=1, 19 | help='interval') 20 | 21 | parser.add_argument( 22 | '--fOV_2DShape', type=int, default=[1024,1024], 23 | help='fOV_2DShape') 24 | 25 | parser.add_argument( 26 | '--eyesight', type=int, default=2, 27 | help='eyesight') 28 | 29 | parser.add_argument( 30 | '--e_greedy', type=int, default=0.90, 31 | help='e_greedy') 32 | parser.add_argument( 33 | '--e_greedy_increment_c', type=int, default=0.0002, 34 | help='e_greedy_increment_c 0.0005') 35 | #0.0001 36 | parser.add_argument( 37 | '--gfu_bs_a', type=float, default=3.5, 38 | help='gfu_bs_a') 39 | parser.add_argument( 40 | '--irs_bs_a', type=float, default=2.5, 41 | help='f_mec') 42 | parser.add_argument( 43 | '--ue_irs_a', type=float, default= 2.5, 44 | help='ue_irs_a') 45 | parser.add_argument( 46 | '--ue_bs_a', type=float, default= 3.5, 47 | help='ue_bs_a') 48 | 49 | parser.add_argument( 50 | '--batch_size', type=float, default=64, 51 | help='batch_size') 52 | parser.add_argument( 53 | '--dqn_lrc', type=float, default= 1e-4, 54 | help='dqn_lrc') 55 | parser.add_argument( 56 | '--e_vr', type=float, default=10**(15), 57 | help='e_vr') 58 | parser.add_argument( 59 | '--r_min', type=int, default=1, 60 | help='r_min') 61 | parser.add_argument( 62 | '--fov_patch_num', type=int, default=64, 63 | help='fov_patch_num') 64 | parser.add_argument( 65 | '--BW', type=int, default=400, 66 | help='BW') 67 | parser.add_argument( 68 | '--training_interval', type=int, default=2, 69 | help='training_interval') 70 | parser.add_argument( 71 | '--double_q', type=bool, default=True, 72 | help='double_q') 73 | parser.add_argument( 74 | '--prioritized_r', type=bool, default=False, 75 | help='prioritized_r') 76 | 77 | parser.add_argument( 78 | '--replace_target_iter', type=float, default=50, 79 | help='replace_target_iter') 80 | parser.add_argument( 81 | '--antenna_num', type=int, default=8, 82 | help='antenna_num') 83 | parser.add_argument( 84 | '--bs_num', type=int, default=3, 85 | help='bs_num') 86 | parser.add_argument( 87 | '--ue_num', type=int, default=14, 88 | help='ue_num') 89 | parser.add_argument( 90 | '--irs_num', type=float, default=2, 91 | help='irs_num') 92 | parser.add_argument( 93 | '--p_max', type=float, default=80, 94 | help='p_max') 95 | parser.add_argument( 96 | '--irs_units_num', type=int, default=40, 97 | help='irs_units_num') 98 | parser.add_argument( 99 | '--memory_size', type=int, default=30000, 100 | help='irs_units_num') 101 | 102 | 103 | # ue_bs_a = 3.5 104 | # ue_irs_a = 2.5 105 | # irs_bs_a = 2.5 106 | # gfu_bs_a = 3.5#2.5 107 | # F_VR = 3 * 10**9 108 | # F_MEC = 10 * 10**9 109 | 110 | # f_VR = 15 111 | # f_MEC = 15 112 | 113 | # k_m = 10**(-9) 114 | # k_v = 10**(-9) 115 | 116 | # E_MEC = 10**(20) 117 | # E_VR = 10**(15) 118 | 119 | # np.random.seed(1) 120 | # BW = 40 121 | # N_0_dbm = -174 + 10 * log10(BW) 122 | 123 | # N_0 = np.power(10,(N_0_dbm - 30) / 10) 124 | # N_0 = 10 ** ((N_0_dbm - 30) / 10) 125 | # N_0 =0.00001 126 | # ue_bs_a = 3 127 | # ue_irs_a = 2.2 128 | # irs_bs_a = 2.2 129 | FLAGS, _ = parser.parse_known_args() 130 | 131 | 132 | -------------------------------------------------------------------------------- /DRL/rs_validate.m: -------------------------------------------------------------------------------- 1 | function [opt_power,opt_rates,opt_rates_noCoMP] = rs_validate(BW,omegas,epsilon,epsilon_fix,NumberOfFov,NumberOfBS,NumberofAntenna,h,N_0) 2 | % W2 = 40; 3 | 4 | sum_up = zeros([NumberOfFov]); 5 | sum_down = zeros([NumberOfFov]); 6 | sum_up2 = zeros([NumberOfFov]); 7 | sum_down2 = zeros([NumberOfFov]); 8 | p=0; 9 | 10 | sum_P = zeros([NumberOfBS,1]); 11 | 12 | 13 | %计算功率 14 | for b = 1:NumberOfBS 15 | for f = 1:NumberOfFov 16 | if(epsilon(b,f)==1) 17 | omega_r=reshape(omegas(b,f,:),[NumberofAntenna,1]); 18 | n2=sum(pow_abs(omega_r,2)); 19 | sum_P(b,1)=sum_P(b,1)+n2; 20 | p = p +n2; 21 | end 22 | 23 | end 24 | end 25 | 26 | opt_power = sum_P; 27 | 28 | 29 | 30 | % for idx1 = 1:1:K 31 | % p1 = reshape(P(idx1,:),[K,1]); 32 | % ds = abs(H(idx1,:)*p1)^2; 33 | % int = 0; 34 | % for idx2 = 1:1:K 35 | % if idx2 ~= idx1 36 | % p2=reshape(P(idx2,:),[K,1]); 37 | % int = int + abs(H(idx1,:)*p2)^2; 38 | % end 39 | % end 40 | % sinr_k = ds/(sigma2+int); 41 | % c(idx1) = log2(1+sinr_k); 42 | % end 43 | 44 | 45 | %计算有CoMP速率 46 | for f = 1:NumberOfFov 47 | sum_down(f,1)= sum_down(f,1)+ N_0; 48 | for b = 1:NumberOfBS 49 | for f2 = 1:NumberOfFov 50 | omega_r=reshape(omegas(b,f2,:),[NumberofAntenna,1]); 51 | h_r=reshape(h(b,f,:),[NumberofAntenna,1]); 52 | if(epsilon(b,f2)==1) 53 | if (f2==f) 54 | sum_up(f,1) = sum_up(f)+pow_abs(omega_r'*h_r,2); 55 | else 56 | sum_down(f,1)= sum_down(f)+pow_abs(omega_r'*h_r,2); 57 | end 58 | end 59 | end 60 | end 61 | end 62 | 63 | 64 | 65 | Rates=zeros([NumberOfFov,1]); 66 | for i=1:NumberOfFov 67 | Rates(i,1)=BW*log2(1+(sum_up(i,1)/sum_down(i,1)) ); 68 | end 69 | opt_rates = Rates; 70 | 71 | 72 | 73 | %计算无CoMP速率 74 | for f = 1:NumberOfFov 75 | sum_down2(f)= sum_down2(f)+ N_0; 76 | for b = 1:NumberOfBS 77 | for f2 = 1:NumberOfFov 78 | omega_r=reshape(omegas(b,f2,:),[NumberofAntenna,1]); 79 | h_r=reshape(h(b,f,:),[NumberofAntenna,1]); 80 | if(epsilon(b,f2)==1) 81 | if (f2==f&&epsilon_fix(b,f2)==1) 82 | sum_up2(f) = sum_up2(f)+pow_abs(omega_r'*h_r,2); 83 | else 84 | sum_down2(f)= sum_down2(f)+pow_abs(omega_r'*h_r,2); 85 | end 86 | end 87 | end 88 | end 89 | end 90 | 91 | Rates2=zeros([NumberOfFov,1]); 92 | for i=1:NumberOfFov 93 | Rates2(i,1)=BW*log2(1+sum_up2(i,1)/sum_down2(i,1) ); 94 | end 95 | opt_rates_noCoMP=Rates2; 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | end -------------------------------------------------------------------------------- /DRL/main_optmization.m: -------------------------------------------------------------------------------- 1 | function [init_power,init_rates,opt_power,opt_rates,opt_rates_noCoMP,opt_G,rs] = main_optmization(NumberOfBS,NumberOfFov,NumberofAntenna,NumberofRISUnit,N_0,P_max,R_min,epsilon,epsilon_fix,BW) 2 | init_power=0; 3 | init_rates=0; 4 | opt_power=0; 5 | opt_rates=0; 6 | opt_rates_noCoMP = 0; 7 | data = load("data.mat"); 8 | h_ue_ris= data.gnr; 9 | h_bs_ris=data.gbr; 10 | h_bs_ue =data.gbn; 11 | G = data.G; 12 | opt_G = 0; 13 | prior_u=zeros(NumberofRISUnit,1); 14 | for i =1:NumberofRISUnit 15 | init_theta = pi/2; 16 | prior_u(i)=sin(init_theta)+cos(init_theta)*j; 17 | end 18 | rs = 1; 19 | prior_omegas =0.1*(ones([NumberOfBS,NumberOfFov,NumberofAntenna])-ones([NumberOfBS,NumberOfFov,NumberofAntenna])*j); 20 | current_omegas = prior_omegas; 21 | [init_power,init_rates,init_rates_noComp]=rs_validate(BW,current_omegas,epsilon,epsilon,NumberOfFov,NumberOfBS,NumberofAntenna,G,N_0); 22 | %[a,init_power_noRIS,init_rates_noRIS]=rs_validate(BW,prior_omegas,epsilon,NumberOfFov,NumberOfBS,NumberofAntenna,h_bn,N_0); 23 | % [current_omegas]= gen_omega_ZF(NumberOfFov,NumberOfBS,NumberofAntenna,G,1); 24 | % [current_omegas_noRIS,sum_p_noRIS,Rates_noRIS,status_noRIS]=cvx_optimization_Taylor(BW,prior_omegas_noRIS,epsilon,NumberOfFov,NumberOfBS,NumberofAntenna,h_bn,N_0,P_max,R_min); 25 | % [a_noRIS,power_noRIS,rates_noRIS]=rs_validate(BW,current_omegas_noRIS,epsilon,NumberOfFov,NumberOfBS,NumberofAntenna,h_bn,N_0);%验证omega得到真实功率 26 | % prior_omegas_noRIS=current_omegas_noRIS; 27 | target = 4; 28 | u = prior_u; 29 | 30 | for i=1:target 31 | prior_omegas = current_omegas; 32 | [opt_omegas,status]=cvx_optimization_Taylor(BW,current_omegas,epsilon,epsilon,NumberOfFov,NumberOfBS,NumberofAntenna,G,N_0,P_max,R_min); 33 | current_omegas=opt_omegas; 34 | % [opt_power0,opt_rates0]=rs_validate(BW,current_omegas,epsilon,NumberOfFov,NumberOfBS,NumberofAntenna,G,N_0);%验证新u得到的真实速率 35 | % [opt_power,opt_rates,opt_rates_noCoMP]=rs_validate2(BW,prior_omegas,current_omegas,epsilon,epsilon_fix,NumberOfFov,NumberOfBS,NumberofAntenna,G,N_0);%验证速率 36 | 37 | % [opt_power0,opt_rates0,opt_rates0_noComp]=rs_validate(BW,current_omegas,epsilon,epsilon,NumberOfFov,NumberOfBS,NumberofAntenna,G,N_0); 38 | if(status ~="Solved" )%&& status ~= "Inaccurate/Solved" 39 | disp("omega failed with RIS"); 40 | rs=0; 41 | return; 42 | end 43 | 44 | % [opt_power,opt_rates,opt_rates_noCoMP]=rs_validate2(BW,prior_omegas,current_omegas,epsilon,epsilon_fix,NumberOfFov,NumberOfBS,NumberofAntenna,G,N_0);%验证速率 45 | if(i==target) 46 | break; 47 | end 48 | [u,G,a,status] = cvx_optimization_RIS(BW,u,NumberofRISUnit,current_omegas,epsilon,epsilon,NumberOfFov,NumberOfBS,NumberofAntenna,h_bs_ue,h_ue_ris,h_bs_ris,N_0,R_min); 49 | % [opt_power,opt_rates]=rs_validate(BW,current_omegas,epsilon,NumberOfFov,NumberOfBS,NumberofAntenna,G,N_0);%验证新u得到的真实速率 50 | if(status ~="Solved") 51 | disp("RIS phase failed"); 52 | rs=0; 53 | return; 54 | end 55 | 56 | 57 | 58 | end 59 | % [current_omegas,status]=cvx_optimization_Taylor(BW,current_omegas,epsilon,epsilon,NumberOfFov,NumberOfBS,NumberofAntenna,G,N_0,P_max,R_min); 60 | % if(status~="Solved" ) 61 | % disp("taylor phase2 failed with RIS"); 62 | % rs=0; 63 | % return; 64 | % end 65 | % [p,r,r2]=rs_validate(BW,current_omegas,epsilon,epsilon,NumberOfFov,NumberOfBS,NumberofAntenna,G,N_0); 66 | % [opt_power,opt_rates,opt_rates_noCoMP]=rs_validate2(BW,prior_omegas,current_omegas,epsilon,epsilon_fix,NumberOfFov,NumberOfBS,NumberofAntenna,G,N_0);%验证速率 67 | [opt_power,opt_rates,opt_rates_noCoMP]=rs_validate(BW,current_omegas,epsilon,epsilon_fix,NumberOfFov,NumberOfBS,NumberofAntenna,G,N_0); 68 | 69 | % if(opt_rates =0; 88 | % end 89 | % end 90 | % end 91 | 92 | % 93 | 94 | cvx_end 95 | 96 | % 97 | Rates=zeros([NumberOfFov,1]); 98 | for i=1:NumberOfFov 99 | Rates(i,1)=BW*log2(1+sum_up(f)/sum_down(f) ); 100 | end 101 | 102 | result = full(omegas); 103 | % result1 = sum(sum_P); 104 | status = cvx_status; 105 | 106 | 107 | end -------------------------------------------------------------------------------- /DRL/rs_validate2.m: -------------------------------------------------------------------------------- 1 | function [opt_power,opt_rates,opt_rates_noCoMP] = rs_validate2(BW,prior_omegas,omegas,epsilon,epsilon_fix,NumberOfFov,NumberOfBS,NumberofAntenna,h,N_0) 2 | % W2 = 40; 3 | 4 | sum_up = zeros([NumberOfFov,1]); 5 | sum_down = zeros([NumberOfFov,1]); 6 | sum_up2 = zeros([NumberOfFov,1]); 7 | sum_down2 = zeros([NumberOfFov,1]); 8 | p=0; 9 | 10 | sum_P = zeros([NumberOfBS,1]); 11 | 12 | 13 | %计算功率 14 | for b = 1:NumberOfBS 15 | for f = 1:NumberOfFov 16 | if(epsilon(b,f)==1) 17 | omega_r=reshape(omegas(b,f,:),[NumberofAntenna,1]); 18 | n2=sum(pow_abs(omega_r,2)); 19 | sum_P(b,1)=sum_P(b,1)+n2; 20 | p = p +n2; 21 | end 22 | end 23 | end 24 | 25 | opt_power = sum_P; 26 | 27 | 28 | %计算有CoMP速率 29 | 30 | for f = 1:NumberOfFov 31 | sum_down(f,1)= sum_down(f,1)+ N_0; 32 | for b = 1:NumberOfBS 33 | for f2 = 1:NumberOfFov 34 | omega_r=reshape(omegas(b,f2,:),[NumberofAntenna,1]); 35 | h_r=reshape(h(b,f,:),[NumberofAntenna,1]); 36 | prior_omega = reshape(prior_omegas(b,f2,:),[NumberofAntenna,1]); 37 | if(epsilon(b,f2)==1) 38 | if (f2==f) 39 | sum_up(f,1) = sum_up(f,1)+2*real(h_r'*prior_omega*omega_r'*h_r)-pow_abs(prior_omega'*h_r,2); 40 | else 41 | sum_down(f,1)= sum_down(f,1)+pow_abs(omega_r'*h_r,2); 42 | end 43 | end 44 | end 45 | end 46 | end 47 | 48 | % for f = 1:NumberOfFov 49 | % sum_down(f)= sum_down(f)+ N_0; 50 | % for b = 1:NumberOfBS 51 | % for f2 = 1:NumberOfFov 52 | % omega_r=reshape(omegas(b,f2,:),[NumberofAntenna,1]); 53 | % h_r=reshape(h(b,f,:),[NumberofAntenna,1]); 54 | % prior_omega = reshape(prior_omegas(b,f2,:),[NumberofAntenna,1]); 55 | % if(epsilon(b,f2)==1) 56 | % if (f2==f) 57 | % sum_up(f) = sum_up(f)+ 2*real(h_r'*prior_omega*omega_r'*h_r)-pow_abs(prior_omega'*h_r,2); 58 | % else 59 | % sum_down(f)= sum_down(f)+pow_abs(omega_r'*h_r,2); 60 | % end 61 | % end 62 | % end 63 | % end 64 | % end 65 | 66 | 67 | 68 | Rates=zeros([NumberOfFov,1]); 69 | for i=1:NumberOfFov 70 | Rates(i,1)=BW*log2(1+sum_up(i,1)/sum_down(i,1) ); 71 | end 72 | opt_rates = Rates; 73 | 74 | 75 | 76 | %计算无CoMP速率 77 | for f = 1:NumberOfFov 78 | sum_down2(f,1)= sum_down2(f,1)+ N_0; 79 | for b = 1:NumberOfBS 80 | for f2 = 1:NumberOfFov 81 | omega_r=reshape(omegas(b,f2,:),[NumberofAntenna,1]); 82 | h_r=reshape(h(b,f,:),[NumberofAntenna,1]); 83 | prior_omega = reshape(prior_omegas(b,f2,:),[NumberofAntenna,1]); 84 | if(epsilon(b,f2)==1) 85 | if (f2==f&&epsilon_fix(b,f2)==1) 86 | sum_up2(f,1) = sum_up2(f,1)+2*real(h_r'*prior_omega*omega_r'*h_r)-pow_abs(prior_omega'*h_r,2); 87 | else 88 | sum_down2(f,1)= sum_down2(f,1)+pow_abs(omega_r'*h_r,2); 89 | end 90 | end 91 | end 92 | end 93 | end 94 | 95 | Rates2=zeros([NumberOfFov,1]); 96 | for i=1:NumberOfFov 97 | Rates2(i,1)=BW*log2(1+sum_up2(i,1)/sum_down2(i,1) ); 98 | end 99 | opt_rates_noCoMP=Rates2; 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | end -------------------------------------------------------------------------------- /DRL/TFAgent/Prioritized_Replay.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | np.random.seed(1) 3 | 4 | class SumTree(object): 5 | """ 6 | This SumTree code is a modified version and the original code is from: 7 | https://github.com/jaara/AI-blog/blob/master/SumTree.py 8 | 9 | Story data with its priority in the tree. 10 | """ 11 | data_pointer = 0 12 | 13 | def __init__(self, capacity): 14 | self.capacity = capacity # for all priority values 15 | self.tree = np.zeros(2 * capacity - 1) 16 | # [--------------Parent nodes-------------][-------leaves to recode priority-------] 17 | # size: capacity - 1 size: capacity 18 | self.data = np.zeros(capacity, dtype=object) # for all transitions 19 | # [--------------data frame-------------] 20 | # size: capacity 21 | 22 | def add(self, p, data): 23 | tree_idx = self.data_pointer + self.capacity - 1 24 | self.data[self.data_pointer] = data # update data_frame 25 | self.update(tree_idx, p) # update tree_frame 26 | 27 | self.data_pointer += 1 28 | if self.data_pointer >= self.capacity: # replace when exceed the capacity 29 | self.data_pointer = 0 30 | 31 | def update(self, tree_idx, p): 32 | change = p - self.tree[tree_idx] 33 | self.tree[tree_idx] = p 34 | # then propagate the change through tree 35 | while tree_idx != 0: # this method is faster than the recursive loop in the reference code 36 | tree_idx = (tree_idx - 1) // 2 37 | self.tree[tree_idx] += change 38 | 39 | def get_leaf(self, v): 40 | """ 41 | Tree structure and array storage: 42 | 43 | Tree index: 44 | 0 -> storing priority sum 45 | / \ 46 | 1 2 47 | / \ / \ 48 | 3 4 5 6 -> storing priority for transitions 49 | 50 | Array type for storing: 51 | [0,1,2,3,4,5,6] 52 | """ 53 | parent_idx = 0 54 | while True: # the while loop is faster than the method in the reference code 55 | cl_idx = 2 * parent_idx + 1 # this leaf's left and right kids 56 | cr_idx = cl_idx + 1 57 | if cl_idx >= len(self.tree): # reach bottom, end search 58 | leaf_idx = parent_idx 59 | break 60 | else: # downward search, always search for a higher priority node 61 | if v <= self.tree[cl_idx]: 62 | parent_idx = cl_idx 63 | else: 64 | v -= self.tree[cl_idx] 65 | parent_idx = cr_idx 66 | 67 | data_idx = leaf_idx - self.capacity + 1 68 | return leaf_idx, self.tree[leaf_idx], self.data[data_idx],data_idx 69 | 70 | @property 71 | def total_p(self): 72 | return self.tree[0] # the root 73 | 74 | 75 | class Memory(object): # stored as ( s, a, r, s_ ) in SumTree 76 | """ 77 | This Memory class is modified based on the original code from: 78 | https://github.com/jaara/AI-blog/blob/master/Seaquest-DDQN-PER.py 79 | """ 80 | epsilon = 0.01 # small amount to avoid zero priority 81 | alpha = 0.8 # [0~1] convert the importance of TD error to priority 82 | beta = 0.4 # importance-sampling, from initial value increasing to 1 83 | beta_increment_per_sampling = 0.001 84 | abs_err_upper = 1. # clipped abs error 85 | 86 | def __init__(self, capacity): 87 | self.tree = SumTree(capacity) 88 | 89 | def store(self, transition): 90 | max_p = np.max(self.tree.tree[-self.tree.capacity:]) 91 | if max_p == 0: 92 | max_p = self.abs_err_upper 93 | self.tree.add(max_p, transition) # set the max p for new p 94 | 95 | def sample(self, n): 96 | b_idx, b_memory, ISWeights = np.empty((n,), dtype=np.int32), np.empty((n, self.tree.data[0].size)), np.zeros((n,1),dtype=np.float32) 97 | pri_seg = self.tree.total_p / n # priority segment 98 | self.beta = np.min([1., self.beta + self.beta_increment_per_sampling]) # max = 1 99 | 100 | min_prob = np.min(self.tree.tree[-self.tree.capacity:]) / self.tree.total_p # for later calculate ISweight 101 | index = [] 102 | for i in range(n): 103 | a, b = pri_seg * i, pri_seg * (i + 1) 104 | v = np.random.uniform(a, b) 105 | idx, p, data,data_idx = self.tree.get_leaf(v) 106 | index.append(data_idx) 107 | prob = p / self.tree.total_p 108 | ISWeights[i,0] = np.power(prob/min_prob, -self.beta) 109 | b_idx[i], b_memory[i, :] = idx, data 110 | 111 | return b_idx, b_memory, ISWeights,index 112 | 113 | def batch_update(self, tree_idx, abs_errors): 114 | abs_errors += self.epsilon # convert to abs and avoid 0 115 | clipped_errors = np.minimum(abs_errors, self.abs_err_upper) 116 | ps = np.power(clipped_errors, self.alpha) 117 | for ti, p in zip(tree_idx, ps): 118 | self.tree.update(ti, p) 119 | -------------------------------------------------------------------------------- /DRL/cvx_optimization_Taylor.m: -------------------------------------------------------------------------------- 1 | function [result,status] = cvx_optimization_Taylor(BW,prior_omegas,epsilon,epsilon_fix,NumberOfFov,NumberOfBS,NumberofAntenna,h,N_0,P_max,R_min) 2 | % W2 = 40; 3 | 4 | cvx_begin quiet 5 | cvx_solver Mosek 6 | 7 | variable omegas(NumberOfBS,NumberOfFov,NumberofAntenna,1) complex; 8 | expression sum_up(NumberOfFov); 9 | expression sum_down(NumberOfFov); 10 | expression mr(NumberOfFov); 11 | expression im(NumberOfFov); 12 | expression diff(NumberOfFov,NumberOfBS,NumberOfFov); 13 | expression target; 14 | expression p; 15 | expression sum_P(NumberOfBS); 16 | 17 | for f = 1:NumberOfFov 18 | sum_down(f)= sum_down(f)+ N_0; 19 | for b = 1:NumberOfBS 20 | for f2 = 1:NumberOfFov 21 | omega_r=reshape(omegas(b,f2,:),[NumberofAntenna,1]); 22 | h_r=reshape(h(b,f,:),[NumberofAntenna,1]); 23 | prior_omega = reshape(prior_omegas(b,f2,:),[NumberofAntenna,1]); 24 | if(epsilon(b,f2)==1) 25 | if (f2==f&&epsilon_fix(b,f2)==1) 26 | % im(f)=2*real(h_r'*prior_omega*omega_r'*h_r)-pow_abs(prior_omega'*h_r,2); 27 | sum_up(f) = sum_up(f)+2*real(h_r'*prior_omega*omega_r'*h_r)-pow_abs(prior_omega'*h_r,2); 28 | else 29 | sum_down(f)= sum_down(f)+pow_abs(omega_r'*h_r,2); 30 | end 31 | end 32 | end 33 | end 34 | end 35 | 36 | 37 | %计算功率相关的约束 38 | for b = 1:NumberOfBS 39 | for f = 1:NumberOfFov 40 | if(epsilon(b,f)==1) 41 | omega_r=reshape(omegas(b,f,:),[NumberofAntenna,1]); 42 | n2=sum(pow_abs(omega_r,2)); 43 | sum_P(b)=sum_P(b)+n2; 44 | p = p +n2; 45 | end 46 | end 47 | end 48 | 49 | % for f1=1:NumberOfFov 50 | % for b=1:NumberOfBS 51 | % h_r=reshape(h(b,f1,:),[NumberofAntenna,1]); 52 | % for f2=1:NumberOfFov 53 | % if(f2~=f1&&epsilon(b,f2)==1) 54 | % omega_r=reshape(omegas(b,f2,:),[NumberofAntenna,1]); 55 | % diff(f1,b,f2) = omega_r'*h_r; 56 | % end 57 | % end 58 | % end 59 | % end 60 | % 61 | 62 | minimize p; 63 | subject to 64 | 65 | for i=1:NumberOfFov 66 | 1e4*(pow_p(2,R_min/BW)-1)*(sum_down(i))<=1e4*(sum_up(i)); 67 | end 68 | % 1e4*(pow_p(2,R_min/BW)-1)*sum_down(i)<=1e4*sum_up(i); 69 | % for i=1:NumberOfFov 70 | % im(i)>=0; 71 | % end 72 | for i=1:NumberOfBS 73 | sum_P(i)<=P_max; 74 | end 75 | % 76 | % for f1=1:NumberOfFov 77 | % for b=1:NumberOfBS 78 | % for f2=1:NumberOfFov 79 | % if(f2~=f1) 80 | % diff(f1,b,f2) == 0; 81 | % end 82 | % end 83 | % end 84 | % end 85 | % for f=1:NumberOfFov 86 | % for b=1:NumberOfBS 87 | % for f2=1:NumberOfFov 88 | % im(f,b,f2)>=0; 89 | % end 90 | % end 91 | % end 92 | 93 | % 94 | 95 | cvx_end 96 | 97 | % 98 | % Rates=zeros([NumberOfFov,1]); 99 | % for i=1:NumberOfFov 100 | % Rates(i,1)=BW*log2(1+sum_up(f)/sum_down(f) ); 101 | % end 102 | 103 | Rates=zeros([NumberOfFov,1]); 104 | for i=1:NumberOfFov 105 | Rates(i,1)=BW*log2(1+sum_up(i)/sum_down(i) ); 106 | end 107 | % diffs = zeros([NumberOfFov,NumberOfBS,NumberOfFov]); 108 | % for f1=1:NumberOfFov 109 | % for b=1:NumberOfBS 110 | % h_r=reshape(h(b,f1,:),[NumberofAntenna,1]); 111 | % for f2=1:NumberOfFov 112 | % if(f2~=f1&&epsilon(b,f2)==1) 113 | % omega_r=reshape(omegas(b,f2,:),[NumberofAntenna,1]); 114 | % diffs(f1,b,f2) = omega_r'*h_r; 115 | % end 116 | % end 117 | % end 118 | % end 119 | 120 | 121 | result = full(omegas); 122 | % result1 = sum(sum_P); 123 | status = cvx_status; 124 | 125 | 126 | end -------------------------------------------------------------------------------- /DRL/TFAgent/DDPG_update.py: -------------------------------------------------------------------------------- 1 | """ 2 | Deep Deterministic Policy Gradient (DDPG), Reinforcement Learning. 3 | DDPG is Actor Critic based algorithm. 4 | Pendulum example. 5 | 6 | View more on my tutorial page: https://morvanzhou.github.io/tutorials/ 7 | 8 | Using: 9 | tensorflow 1.0 10 | gym 0.8.0 11 | """ 12 | 13 | import tensorflow as tf 14 | tf.disable_v2_behavior() 15 | import numpy as np 16 | import time 17 | tf.set_random_seed(1) 18 | np.random.seed(1) 19 | 20 | 21 | ##################### hyper parameters #################### 22 | 23 | MAX_EPISODES = 200 24 | MAX_EP_STEPS = 200 25 | # LR_A = 0.001 # learning rate for actor 26 | # LR_C = 0.002 # learning rate for critic 27 | GAMMA = 0.9 #0.99 reward discount 28 | TAU = 0.2 #0.0001 soft replacement 29 | # MEMORY_CAPACITY = 10000 30 | # BATCH_SIZE = 64 31 | # 32 | # RENDER = False 33 | # ENV_NAME = 'Pendulum-v0' 34 | 35 | ############################### DDPG #################################### 36 | 37 | class DDPGUP(object): 38 | def __init__(self, a_dim, s_dim, a_bound,MEMORY_CAPACITY,BATCH_SIZE,name_str,replace_iter,L_A,L_C): 39 | 40 | self.MEMORY_CAPACITY = MEMORY_CAPACITY 41 | self.BATCH_SIZE=BATCH_SIZE 42 | self.memory = np.zeros((MEMORY_CAPACITY, s_dim * 2 + a_dim + 1), dtype=complex) 43 | self.pointer = 0 44 | self.sess = tf.Session() 45 | 46 | self.learn_step_counter = 0 47 | self.name_str = name_str 48 | self.a_dim, self.s_dim, self.a_bound = a_dim, s_dim, a_bound, 49 | self.S = tf.placeholder(tf.float32, [None, s_dim], 's'+self.name_str) 50 | self.S_ = tf.placeholder(tf.float32, [None, s_dim], 's_'+self.name_str) 51 | self.R = tf.placeholder(tf.float32, [None, 1], 'r'+self.name_str) 52 | self.replace_target_iter = replace_iter 53 | 54 | self.L_A = L_A 55 | self.L_C = L_C 56 | self.cost_his = [] 57 | 58 | with tf.variable_scope('Actor'+name_str): 59 | self.a = self._build_a(self.S, scope='eval'+name_str, trainable=True) 60 | self.a_ = self._build_a(self.S_, scope='target'+name_str, trainable=False) 61 | with tf.variable_scope('Critic'+name_str): 62 | # assign self.a = a in memory when calculating q for td_error, 63 | # otherwise the self.a is from Actor when updating Actor 64 | self.q = self._build_c(self.S, self.a, scope='eval'+name_str, trainable=True) 65 | self.q_ = self._build_c(self.S_,self.a_, scope='target'+name_str, trainable=False) 66 | 67 | # networks parameters 68 | self.ae_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='Actor'+name_str+'/eval'+name_str) 69 | self.at_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='Actor'+name_str+'/target'+name_str) 70 | self.ce_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='Critic'+name_str+'/eval'+name_str) 71 | self.ct_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='Critic'+name_str+'/target'+name_str) 72 | 73 | # target net replacement 74 | self.soft_replace = [tf.assign(t, (1 - TAU) * t + TAU * e) 75 | for t, e in zip(self.at_params + self.ct_params, self.ae_params + self.ce_params)] 76 | self.a_loss = - tf.reduce_mean(self.q) # maximize the q 77 | self.atrain = tf.train.AdamOptimizer(self.L_A).minimize(self.a_loss, var_list=self.ae_params) 78 | 79 | self.q_target = self.R + GAMMA * self.q_ 80 | # in the feed_dic for the td_error, the self.a should change to actions in memoryAdamOptimizer 81 | self.td_error = tf.losses.mean_squared_error(labels=self.q_target, predictions=self.q) 82 | self.ctrain = tf.train.AdamOptimizer(self.L_C).minimize(self.td_error, var_list=self.ce_params) 83 | self.sess.run(tf.global_variables_initializer()) 84 | 85 | def choose_action(self, s): 86 | # print(self.sess.run(self.a, {self.S: s})) 87 | # return self.sess.run(self.a, {self.S: s[np.newaxis, :]})[0] 88 | # print(np.transpose(s).reshape(-1,len(s))) 89 | test = np.transpose(s).reshape(-1,len(s)) 90 | test2 = self.sess.run(self.a, {self.S: np.transpose(s).reshape(-1,len(s))}) 91 | return self.sess.run(self.a, {self.S: np.transpose(s).reshape(-1,len(s))})#self.sess.run(self.a, {self.S: s[np.newaxis, :]})[0] 92 | 93 | 94 | def learn(self,sample_index): 95 | if len(sample_index) == 0: 96 | if self.BATCH_SIZE<=self.pointer=self.MEMORY_CAPACITY: 99 | indices = np.random.choice(self.MEMORY_CAPACITY, size=self.BATCH_SIZE) 100 | if self.pointer < self.BATCH_SIZE: 101 | return [] 102 | else: 103 | indices = sample_index 104 | for cou in range(1): 105 | bt = self.memory[indices, :] 106 | # max_r = max(self.memory[:, self.s_dim + 1]) 107 | # max_r = 250 108 | # bt[:, -self.s_dim - 1: -self.s_dim] = bt[:, -self.s_dim - 1: -self.s_dim]/max_r 109 | bs = bt[:, :self.s_dim] 110 | ba = bt[:, self.s_dim: self.s_dim + self.a_dim] 111 | br = bt[:, -self.s_dim - 1: -self.s_dim] 112 | bs_ = bt[:, -self.s_dim:] 113 | self.sess.run(self.atrain, {self.S: bs}) 114 | _,cost=self.sess.run([self.ctrain,self.td_error], {self.S: bs, self.a: ba, self.R: br, self.S_: bs_}) 115 | # soft target replacement 116 | if self.learn_step_counter % self.replace_target_iter == 0: # 更新权重值 117 | self.sess.run(self.soft_replace) 118 | self.cost_his.append(cost) 119 | self.learn_step_counter += 1 120 | return sample_index 121 | 122 | 123 | def store_transition(self, s,a, r, s_): 124 | transition = np.hstack((s, a, [r], s_)) 125 | index = self.pointer % self.MEMORY_CAPACITY # replace the old memory with new memory 126 | self.memory[index, :] = transition 127 | self.pointer += 1 128 | 129 | def _build_a(self, s, scope, trainable): 130 | with tf.variable_scope(scope): 131 | net = tf.layers.dense(s, 256, activation=tf.nn.tanh, name='l1'+self.name_str, trainable=trainable) 132 | net1 = tf.layers.dense(net, 128, activation=tf.nn.tanh, name='l2'+self.name_str, trainable=trainable) 133 | # drop_layer = tf.layers.dropout(net1,0.3) 134 | net2 = tf.layers.dense(net1, 64, activation=tf.nn.tanh, name='l3'+self.name_str, trainable=trainable) 135 | a = tf.layers.dense(net2, self.a_dim, activation=tf.nn.tanh, name='a'+self.name_str, trainable=trainable) 136 | return tf.multiply(a, self.a_bound, name='scaled_a'+self.name_str) 137 | 138 | def _build_c(self, s, a, scope, trainable): 139 | with tf.variable_scope(scope): 140 | n_l1 = 128 141 | w1_s = tf.get_variable('w1_s'+self.name_str, [self.s_dim, n_l1], trainable=trainable) 142 | w1_a = tf.get_variable('w1_a'+self.name_str, [self.a_dim, n_l1], trainable=trainable) 143 | b1 = tf.get_variable('b1'+self.name_str, [1, n_l1], trainable=trainable) 144 | net = tf.nn.tanh(tf.matmul(s, w1_s) + tf.matmul(a, w1_a) + b1) 145 | # net2 = tf.layers.dense(net, 128, activation=tf.nn.tanh, name='l4', trainable=trainable) 146 | # drop_layer = tf.layers.dropout(net2,0.3) 147 | net1 = tf.layers.dense(net, 64, activation=tf.nn.tanh, name='l3', trainable=trainable) 148 | return tf.layers.dense(net1, 1, trainable=trainable) # Q(s,a) 149 | 150 | def plot_cost(self): 151 | import matplotlib.pyplot as plt 152 | plt.plot(np.arange(len(self.cost_his)), self.cost_his) 153 | plt.ylabel('Cost'+self.name_str) 154 | plt.xlabel('training steps'+self.name_str) 155 | plt.title(self.name_str) 156 | plt.show() 157 | 158 | def clear_sess(self): 159 | self.sess.close() 160 | tf.reset_default_graph() 161 | ############################### training #################################### 162 | 163 | # env = gym.make(ENV_NAME) 164 | # env = env.unwrapped 165 | # env.seed(1) 166 | # 167 | # s_dim = env.observation_space.shape[0] 168 | # a_dim = env.action_space.shape[0] 169 | # a_bound = env.action_space.high 170 | # 171 | # ddpg = DDPG(a_dim, s_dim, a_bound) 172 | # 173 | # var = 3 # control exploration 174 | # t1 = time.time() 175 | # for i in range(MAX_EPISODES): 176 | # s = env.reset() 177 | # ep_reward = 0 178 | # for j in range(MAX_EP_STEPS): 179 | # if RENDER: 180 | # env.render() 181 | # 182 | # # Add exploration noise 183 | # a = ddpg.choose_action(s) 184 | # a = np.clip(np.random.normal(a, var), -2, 2) # add randomness to action selection for exploration 185 | # s_, r, done, info = env.step(a) 186 | # 187 | # ddpg.store_transition(s, a, r / 10, s_) 188 | # 189 | # if ddpg.pointer > MEMORY_CAPACITY: 190 | # var *= .9995 # decay the action randomness 191 | # ddpg.learn() 192 | # 193 | # s = s_ 194 | # ep_reward += r 195 | # if j == MAX_EP_STEPS-1: 196 | # print('Episode:', i, ' Reward: %i' % int(ep_reward), 'Explore: %.2f' % var, ) 197 | # # if ep_reward > -300:RENDER = True 198 | # break 199 | # print('Running time: ', time.time() - t1) 200 | -------------------------------------------------------------------------------- /DRL/TFAgent/DqnMultiDimension.py: -------------------------------------------------------------------------------- 1 | from os.path import abspath, dirname 2 | import numpy as np 3 | import tensorflow as tf 4 | from DRL.TFAgent.Prioritized_Replay import Memory 5 | 6 | tf.set_random_seed(1) 7 | np.random.seed(1) 8 | 9 | ''' 10 | DQN off-policy 11 | 使用CNN方法实现多维度动作 12 | ''' 13 | class DqnAgentMD: 14 | def __init__( 15 | self, 16 | dim_actions,# 17 | n_actions, # 可以看作动作空间的统计个数 18 | n_features, #其实是state 19 | learning_rate=0.02, 20 | reward_decay=0.9, 21 | e_greedy=0.9, 22 | replace_target_iter=100,#表示多少次更新目标权重 23 | memory_size=500, 24 | batch_size=32, 25 | e_greedy_increment=None, 26 | output_graph=False, 27 | name_str = '', 28 | double_q = False, 29 | prioritized_replay = False, 30 | ): 31 | ''' 32 | :param dim_actions: 动作空间的维度,至少为1维 33 | :param n_actions: 可以选择的离散动作个数 34 | :param n_features: state的长度 35 | :param learning_rate: 学习率 36 | :param reward_decay: 37 | :param e_greedy: 采用e-greedy时需要设置的epsilon 38 | :param replace_target_iter: evaluate-network更新多少次后,复制evaluate-network的权重给target-network 39 | :param memory_size: 重播队列 40 | :param batch_size: 一次训练多少批 41 | :param e_greedy_increment: 42 | :param output_graph: 43 | :param name_str: 44 | :param double_q: 是否为double-Dqn 45 | :param prioritized_replay: 是否采用prioritized_replay这一经验重放策略 46 | ''' 47 | 48 | 49 | 50 | self.params = { 51 | 'dim_actions':dim_actions, 52 | 'n_actions': n_actions, 53 | 'n_features': n_features, 54 | 'learning_rate': learning_rate, 55 | 'reward_decay': reward_decay, 56 | 'e_greedy': e_greedy, 57 | 'replace_target_iter': replace_target_iter, 58 | 'memory_size': memory_size, 59 | 'batch_size': batch_size, 60 | 'e_greedy_increment': e_greedy_increment, 61 | 'output_graph':output_graph, 62 | 'name_str':name_str, 63 | 'double_q':double_q, 64 | 'prioritized_replay':prioritized_replay, 65 | } 66 | 67 | self.actions = n_actions 68 | # self.actions_f = actions_f 69 | self.double_q = double_q 70 | self.prioritized_replay = prioritized_replay 71 | # self.actions_space = actions_space 72 | self.n_features = n_features 73 | self.lr = learning_rate 74 | self.gamma = reward_decay 75 | self.epsilon_max = e_greedy 76 | self.replace_target_iter = replace_target_iter 77 | self.memory_size = memory_size 78 | self.batch_size = batch_size 79 | self.epsilon_increment = e_greedy_increment 80 | self.epsilon = 0 if e_greedy_increment is not None else self.epsilon_max 81 | self.name_str = name_str 82 | # total learning step 83 | self.learn_step_counter = 0 84 | 85 | # initialize zero memory [s, a, r, s_] 86 | if self.prioritized_replay: 87 | self.memory = Memory(capacity=memory_size) 88 | else: 89 | self.memory = np.zeros((self.memory_size, n_features * 2 + 1 + 1),dtype=complex) 90 | 91 | # consist of [target_net, evaluate_net] 92 | self._build_net() 93 | 94 | t_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='target_net'+self.name_str) 95 | e_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='eval_net'+self.name_str) 96 | with tf.variable_scope('hard_replacement'+self.name_str): 97 | self.target_replace_op = [tf.assign(t, e) for t, e in zip(t_params, e_params)] 98 | 99 | # def weight_assign(): 100 | # self.target_replace_op = [tf.assign(t, e) for t, e in zip(t_params, e_params)] 101 | 102 | self.sess = tf.Session() 103 | 104 | if output_graph: 105 | # $ tensorboard --logdir=logs 106 | tf.FileWriter("logs/", self.sess.graph) 107 | 108 | self.sess.run(tf.global_variables_initializer()) 109 | self.cost_his = [] 110 | 111 | def _build_net(self): 112 | tf.reset_default_graph() 113 | # ------------------ all inputs ------------------------ 114 | self.s = tf.placeholder(shape=(None, self.n_features), dtype=tf.complex64, name='s'+self.name_str) # input State 115 | self.s_ = tf.placeholder(shape=(None, self.n_features), dtype=tf.complex64, name='s_'+self.name_str) 116 | self.r = tf.placeholder(shape=(None,), dtype=tf.float32, name='r'+self.name_str) 117 | self.a = tf.placeholder(shape=(None,self.params['dim_actions']), dtype=tf.int32, name='a'+self.name_str) 118 | self.qwe = tf.placeholder(shape=(None,self.actions ), dtype=tf.float32, name='double_q_next'+self.name_str) 119 | # self.s_ = tf.placeholder(tf.float32, [None, self.n_features], name='s_') # input Next State 120 | # self.r = tf.placeholder(tf.float32, [None, ], name='r') # input Reward 121 | # self.a = tf.placeholder(tf.int32, [None, ], name='a') # input Action 122 | 123 | w_initializer, b_initializer = tf.random_normal_initializer(0., 0.3), tf.constant_initializer(0.1) 124 | if self.prioritized_replay: 125 | self.ISWeights = tf.compat.v1.placeholder(tf.float32, [None, 1], name='IS_weights') 126 | # ------------------ build evaluate_net ------------------ 127 | with tf.variable_scope('eval_net'+self.name_str): 128 | e1 =tf.layers.dense(self.s,128, tf.nn.relu, kernel_initializer=w_initializer, 129 | bias_initializer=b_initializer, name='e1'+self.name_str) 130 | e2 = tf.layers.dense(e1, 64, tf.nn.relu, kernel_initializer=w_initializer, 131 | bias_initializer=b_initializer, name='e2'+self.name_str) 132 | # e1 =tf.layers.dense(self.s,256, tf.nn.relu, kernel_initializer=w_initializer, 133 | # bias_initializer=b_initializer, name='e1'+self.name_str) 134 | # e2 = tf.layers.dense(e1, 128, tf.nn.relu, kernel_initializer=w_initializer, 135 | # bias_initializer=b_initializer, name='e2'+self.name_str) 136 | self.q_eval = tf.layers.dense(e2, self.actions, kernel_initializer=w_initializer, 137 | bias_initializer=b_initializer, name='q'+self.name_str)#(?,actions_f) 138 | 139 | # ------------------ build target_net ------------------ 140 | with tf.variable_scope('target_net'+self.name_str): 141 | t1 = tf.layers.dense(self.s_, 128, tf.nn.relu, kernel_initializer=w_initializer, 142 | bias_initializer=b_initializer, name='t1'+self.name_str) 143 | t2 = tf.layers.dense(t1, 64, tf.nn.relu, kernel_initializer=w_initializer, 144 | bias_initializer=b_initializer, name='t2'+self.name_str) 145 | # t1 = tf.layers.dense(self.s_, 256, tf.nn.relu, kernel_initializer=w_initializer, 146 | # bias_initializer=b_initializer, name='t1'+self.name_str) 147 | # t2 = tf.layers.dense(t1, 128, tf.nn.relu, kernel_initializer=w_initializer, 148 | # bias_initializer=b_initializer, name='t2'+self.name_str) 149 | self.q_next = tf.layers.dense(t2, self.actions, kernel_initializer=w_initializer, 150 | bias_initializer=b_initializer, name='q_'+self.name_str) 151 | 152 | with tf.variable_scope('q_target'+self.name_str): 153 | if self.double_q: 154 | index = tf.argmax(self.qwe, axis=1, name='Qmax_s_'+self.name_str) 155 | index = tf.to_int32(index) 156 | q_indices = tf.stack([tf.range(tf.shape(index)[0]), index], axis=1) 157 | q_next_v = tf.gather_nd(params=self.q_next, indices= q_indices) 158 | q_target = self.r + self.gamma * q_next_v 159 | else: 160 | q_target = self.r + self.gamma * tf.reduce_max(self.q_next, axis=1, name='Qmax_s_'+self.name_str) # shape=(None, ) 161 | self.q_target = tf.stop_gradient(q_target) 162 | with tf.variable_scope('q_eval'+self.name_str): 163 | a_indices = tf.stack([tf.range(tf.shape(self.a)[0]), self.a], axis=1) 164 | self.q_eval_wrt_a = tf.gather_nd(params=self.q_eval, indices=a_indices) # shape=(None, )索引切片 165 | with tf.variable_scope('loss'+self.name_str): 166 | if self.prioritized_replay: 167 | self.abs_errors = tf.abs(self.q_target - self.q_eval_wrt_a) # for updating Sumtree 168 | #tf.squared_difference(x,y)=(x-y)的平方 169 | self.loss = tf.reduce_mean(self.ISWeights * tf.squared_difference(self.q_target, self.q_eval_wrt_a), name='TD_error'+self.name_str) 170 | else: 171 | # self.loss = tf.reduce_mean(tf.squared_difference(self.q_target, self.q_eval)) 172 | self.loss = tf.reduce_mean(tf.math.squared_difference(self.q_target, self.q_eval_wrt_a, name='TD_error'+self.name_str))#先求差的平方,然后取均值 173 | with tf.variable_scope('train'+self.name_str): 174 | self._train_op = tf.train.RMSPropOptimizer(self.lr).minimize(self.loss) 175 | # GradientDescentOptimizer, AdagradOptimizer, MomentumOptimizer.RMSPropOptimizer,AdamOptimizer 176 | def store_transition(self, s, a, r, s_): #记忆存储 177 | if self.prioritized_replay: 178 | # prioritized replay 179 | transition = np.hstack((s, [a, r], s_)) 180 | self.memory.store(transition) 181 | else: 182 | if not hasattr(self, 'memory_counter'): # 183 | self.memory_counter = 0 184 | transition = np.hstack((s, [a, r], s_)) 185 | if self.memory_counter < self.memory_size: 186 | index = self.memory_counter % self.memory_size 187 | else: 188 | index = self.memory_counter % self.memory_size 189 | # replace the old memory with new memory 190 | self.memory[index, :] = transition 191 | self.memory_counter += 1 192 | 193 | def choose_action(self, observation):# channe动作选择 194 | # to have batch dimension when feed into tf placeholder 195 | # observation = observation[np.newaxis, :]#多加了一个维度 196 | action = [] 197 | if np.random.uniform() < self.epsilon: 198 | # forward feed the observation and get q value for every actions 199 | actions_value = self.sess.run(self.q_eval, feed_dict={self.s: np.transpose(observation).reshape(-1,len(observation))})#每个动作的价值 200 | action = np.argmax(actions_value) #返回具有最大价值的动作序号 201 | else: 202 | action = np.random.randint(0, self.actions) 203 | return action 204 | 205 | def learn(self,sample_index): 206 | if self.prioritized_replay: 207 | tree_idx, batch_memory, ISWeights,sample_index = self.memory.sample(self.batch_size) 208 | else: 209 | if len(sample_index) == 0: 210 | if self.memory_counter <= self.batch_size: 211 | return [] 212 | if self.memory_counter >= self.memory_size: 213 | sample_index = np.random.choice(self.memory_size, size=self.batch_size) 214 | if self.batch_size < self.memory_counter < self.memory_size: 215 | sample_index = np.random.choice(self.memory_counter, size=self.batch_size) 216 | else: 217 | sample_index =sample_index 218 | # max_r = max(self.memory[:, self.n_features + 1]) 219 | # max_r = 250 220 | batch_memory = self.memory[sample_index, :] 221 | # batch_memory[:, self.n_features + 1] = batch_memory[:, self.n_features + 1]/max_r 222 | for e in range(1): 223 | if self.prioritized_replay is True and self.double_q is False: 224 | _, abs_errors, cost = self.sess.run([self._train_op, self.abs_errors, self.loss], 225 | feed_dict={self.s: batch_memory[:, :self.n_features], 226 | self.a: batch_memory[:, self.n_features], 227 | self.r: batch_memory[:, self.n_features + 1], 228 | self.s_: batch_memory[:, -self.n_features:], 229 | # self.q_target: q_target, 230 | 231 | self.ISWeights: ISWeights}) 232 | self.memory.batch_update(tree_idx, abs_errors) # update priority 233 | elif self.double_q is True and self.prioritized_replay is False: 234 | q_eval_double = self.sess.run(self.q_eval,feed_dict={ 235 | self.s: batch_memory[:, -self.n_features:], # 236 | 237 | }) 238 | 239 | _, cost = self.sess.run( 240 | [self._train_op, self.loss], 241 | feed_dict={ 242 | self.s: batch_memory[:, :self.n_features], 243 | self.a: batch_memory[:, self.n_features], 244 | self.r: batch_memory[:, self.n_features + 1], 245 | self.s_: batch_memory[:, -self.n_features:], 246 | self.qwe : q_eval_double, 247 | }) 248 | elif self.double_q and self.prioritized_replay : 249 | q_eval_double = self.sess.run(self.q_eval,feed_dict={ 250 | self.s: batch_memory[:, -self.n_features:], # 251 | }) 252 | _, abs_errors, cost = self.sess.run([self._train_op, self.abs_errors, self.loss], 253 | feed_dict={self.s: batch_memory[:, :self.n_features], 254 | self.a: batch_memory[:, self.n_features], 255 | self.r: batch_memory[:, self.n_features + 1], 256 | self.s_: batch_memory[:, -self.n_features:], 257 | # self.q_target: q_target, 258 | self.qwe : q_eval_double, 259 | self.ISWeights: ISWeights}) 260 | self.memory.batch_update(tree_idx, abs_errors) # update priority 261 | else: 262 | _, cost = self.sess.run( 263 | [self._train_op, self.loss], 264 | feed_dict={ 265 | self.s: batch_memory[:, :self.n_features], 266 | self.a: batch_memory[:, self.n_features], 267 | self.r: batch_memory[:, self.n_features + 1], 268 | self.s_: batch_memory[:, -self.n_features:], 269 | }) 270 | self.cost_his.append(cost) 271 | if self.learn_step_counter % self.replace_target_iter == 0: # 更新权重值 272 | self.sess.run(self.target_replace_op) 273 | if self.epsilon <= self.epsilon_max: 274 | self.epsilon += self.epsilon_increment 275 | else: 276 | self.epsilon =self.epsilon_max 277 | self.learn_step_counter += 1 278 | return sample_index 279 | def plot_cost(self): 280 | import matplotlib.pyplot as plt 281 | plt.figure() 282 | plt.plot(np.arange(len(self.cost_his)), self.cost_his) 283 | plt.ylabel('Cost'+self.name_str) 284 | plt.xlabel('training steps'+self.name_str) 285 | plt.title('Cost'+self.name_str) 286 | # plt.show() 287 | 288 | def save_model(self): 289 | dir_path = dirname(abspath(__file__)) + '\\model' 290 | self.eval_model.save_weights(dir_path+'\\eval_weights_'+self.params['name_str']+'.h5') 291 | self.eval_model.save_weights(dir_path+'\\target_weights_'+self.params['name_str']+'.h5') 292 | def clear_sess(self): 293 | self.sess.close() 294 | tf.reset_default_graph() -------------------------------------------------------------------------------- /DRL/TFAgent/DQN.py: -------------------------------------------------------------------------------- 1 | from os.path import abspath, dirname 2 | import numpy as np 3 | import tensorflow as tf 4 | from DRL.TFAgent.Prioritized_Replay import Memory 5 | from itertools import product 6 | tf.set_random_seed(1) 7 | np.random.seed(1) 8 | 9 | ''' 10 | DQN off-policy 11 | ''' 12 | class DqnAgent: 13 | def __init__( 14 | self, 15 | n_actions, # 可以看作动作空间的统计个数 16 | n_features, #其实是state 17 | learning_rate=0.02, 18 | reward_decay=0.9, 19 | e_greedy=0.9, 20 | replace_target_iter=100,#表示多少次更新目标权重 21 | memory_size=500, 22 | batch_size=32, 23 | e_greedy_increment=None, 24 | output_graph=False, 25 | name_str = '', 26 | double_q = False, 27 | prioritized_replay = False, 28 | agent_num = 3, 29 | ): 30 | ''' 31 | :param dim_actions: 动作空间的维度,至少为1维 32 | :param n_actions: 可以选择的离散动作个数 33 | :param n_features: state的长度 34 | :param learning_rate: 学习率 35 | :param reward_decay: 36 | :param e_greedy: 采用e-greedy时需要设置的epsilon 37 | :param replace_target_iter: evaluate-network更新多少次后,复制evaluate-network的权重给target-network 38 | :param memory_size: 重播队列 39 | :param batch_size: 一次训练多少批 40 | :param e_greedy_increment: 41 | :param output_graph: 42 | :param name_str: 43 | :param double_q: 是否为double-Dqn 44 | :param prioritized_replay: 是否采用prioritized_replay这一经验重放策略 45 | ''' 46 | 47 | self.params = { 48 | 'n_actions': n_actions, 49 | 'n_features': n_features, 50 | 'learning_rate': learning_rate, 51 | 'reward_decay': reward_decay, 52 | 'e_greedy': e_greedy, 53 | 'replace_target_iter': replace_target_iter, 54 | 'memory_size': memory_size, 55 | 'batch_size': batch_size, 56 | 'e_greedy_increment': e_greedy_increment, 57 | 'output_graph':output_graph, 58 | 'name_str':name_str, 59 | 'double_q':double_q, 60 | 'prioritized_replay':prioritized_replay, 61 | } 62 | self.agent_num = agent_num 63 | self.actions = n_actions 64 | # self.action_combinations = self.gen_combination(self.params['n_actions'],caching_capacity) 65 | # self.actions_f = actions_f 66 | self.double_q = double_q 67 | self.prioritized_replay = prioritized_replay 68 | # self.actions_space = actions_space 69 | self.n_features = n_features 70 | self.lr = learning_rate 71 | self.gamma = reward_decay 72 | self.epsilon_max = e_greedy 73 | self.replace_target_iter = replace_target_iter 74 | self.memory_size = memory_size 75 | self.batch_size = batch_size 76 | self.epsilon_increment = e_greedy_increment 77 | self.epsilon = 0 if e_greedy_increment is not None else self.epsilon_max 78 | self. epsilons= [] 79 | for i in range (agent_num): 80 | self.epsilons.append(self.epsilon) 81 | print("current epsilon",self.epsilon,"|max epsilon",self.epsilon_max) 82 | self.name_str = name_str 83 | # total learning step 84 | self.learn_step_counter = 0 85 | self.memorys = [] 86 | # initialize zero memory [s, a, r, s_] 87 | if self.prioritized_replay: 88 | for i in range(self.agent_num): 89 | self.memorys.append(Memory(capacity=memory_size)) 90 | else: 91 | for i in range(self.agent_num): 92 | self.memorys.append(np.zeros((self.memory_size, n_features * 2 + 1 + 1),dtype=complex)) 93 | 94 | # consist of [target_net, evaluate_net] 95 | self._build_net() 96 | 97 | t_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='target_net'+self.name_str) 98 | e_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='eval_net'+self.name_str) 99 | with tf.variable_scope('hard_replacement'+self.name_str): 100 | self.target_replace_op = [tf.assign(t, e) for t, e in zip(t_params, e_params)] 101 | 102 | # def weight_assign(): 103 | # self.target_replace_op = [tf.assign(t, e) for t, e in zip(t_params, e_params)] 104 | 105 | self.sess = tf.Session() 106 | 107 | if output_graph: 108 | # $ tensorboard --logdir=logs 109 | tf.FileWriter("logs/", self.sess.graph) 110 | 111 | self.sess.run(tf.global_variables_initializer()) 112 | self.cost_his = [] 113 | 114 | def _build_net(self): 115 | tf.reset_default_graph() 116 | # ------------------ all inputs ------------------------ 117 | 118 | 119 | # self.s_ = tf.placeholder(tf.float32, [None, self.n_features], name='s_') # input Next State 120 | # self.r = tf.placeholder(tf.float32, [None, ], name='r') # input Reward 121 | # self.a = tf.placeholder(tf.int32, [None, ], nam 122 | # 123 | # e='a') # input Action 124 | self.abs_errors = [] 125 | self.loss=[] 126 | self.eval_nets = [] 127 | self.target_nets = [] 128 | self._train_op = [] 129 | self.s=tf.placeholder(shape=(None, self.n_features), dtype=tf.float32, name='s' ) # input State 130 | self.s_=tf.placeholder(shape=(None, self.n_features), dtype=tf.float32, name='s_') 131 | self.r=tf.placeholder(shape=(None,), dtype=tf.float32, name='r') 132 | self.a = tf.placeholder(shape=(None,), dtype=tf.int32, name='a') 133 | self.qwe = [] 134 | for bs in range(self.agent_num): 135 | self.qwe.append(tf.placeholder(shape=(None, self.actions[bs]), dtype=tf.float32, 136 | name='double_q_next' + self.name_str)) 137 | 138 | with tf.variable_scope('reshape'): 139 | w_initializer, b_initializer = tf.random_normal_initializer(0., 0.3), tf.constant_initializer(0.1) 140 | if self.prioritized_replay: 141 | self.ISWeights = tf.placeholder(tf.float32, (None, 1), name='IS_weights') 142 | for bs in range(self.agent_num): 143 | # ------------------ build evaluate_net ------------------ 144 | with tf.variable_scope('eval_net'+str(bs)): 145 | e1 =tf.layers.dense(self.s,128, tf.nn.relu, kernel_initializer=w_initializer, 146 | bias_initializer=b_initializer, name='e1'+str(bs)) 147 | e2 = tf.layers.dense(e1, 64, tf.nn.relu, kernel_initializer=w_initializer, 148 | bias_initializer=b_initializer, name='e2'+str(bs)) 149 | q_eval = tf.layers.dense(e2, self.actions[bs], kernel_initializer=w_initializer, 150 | bias_initializer=b_initializer, name='q'+str(bs))#(?,actions_f) 151 | self.eval_nets.append(q_eval) 152 | # ------------------ build target_net ------------------ 153 | with tf.variable_scope('target_net'+str(bs)): 154 | t1 = tf.layers.dense(self.s_, 128, tf.nn.relu, kernel_initializer=w_initializer, 155 | bias_initializer=b_initializer, name='t1'+str(bs)) 156 | t2 = tf.layers.dense(t1, 64, tf.nn.relu, kernel_initializer=w_initializer, 157 | bias_initializer=b_initializer, name='t2'+str(bs)) 158 | q_next = tf.layers.dense(t2, self.actions[bs], kernel_initializer=w_initializer, 159 | bias_initializer=b_initializer, name='q_'+str(bs)) 160 | 161 | with tf.variable_scope('q_target'+str(bs)): 162 | if self.double_q: 163 | index = tf.argmax(self.qwe[bs], axis=1, name='Qmax_s_'+str(bs)) 164 | index = tf.to_int32(index) 165 | q_indices = tf.stack([tf.range(tf.shape(index)[0]), index], axis=1) 166 | q_next_v = tf.gather_nd(params=q_next, indices= q_indices) 167 | q_target = self.r + self.gamma * q_next_v 168 | else: 169 | q_target = self.r + self.gamma * tf.reduce_max(q_next, axis=1, name='Qmax_s_'+str(bs)) # shape=(None, ) 170 | q_target = tf.stop_gradient(q_target) 171 | self.target_nets.append(q_target) 172 | with tf.variable_scope('q_eval'+str(bs)): 173 | a_indices = tf.stack([tf.range(tf.shape(self.a)[0]), self.a], axis=1) 174 | q_eval_wrt_a = tf.gather_nd(params=q_eval, indices=a_indices) # shape=(None, )索引切片 175 | 176 | with tf.variable_scope('loss'+str(bs)): 177 | if self.prioritized_replay: 178 | self.abs_errors.append(tf.abs(q_target - q_eval_wrt_a)) # for updating Sumtree 179 | #tf.squared_difference(x,y)=(x-y)的平方 180 | self.loss.append(tf.reduce_mean(self.ISWeights * tf.squared_difference(q_target, q_eval_wrt_a), name='TD_error'+str(bs))) 181 | else: 182 | # self.loss = tf.reduce_mean(tf.squared_difference(self.q_target, self.q_eval)) 183 | self.loss.append(tf.reduce_mean(tf.math.squared_difference(q_target,q_eval_wrt_a, name='TD_error'+str(bs))))#先求差的平方,然后取均值) 184 | with tf.variable_scope('train'+str(bs)): 185 | self._train_op.append(tf.train.RMSPropOptimizer(self.lr).minimize(self.loss[bs])) 186 | # self._train_op.append(tf.train.AdamOptimizer(self.lr).minimize(self.loss[bs])) 187 | # GradientDescentOptimizer, AdagradOptimizer, MomentumOptimizer.RMSPropOptimizer,AdamOptimizer 188 | def store_transition(self,agent, s, a, r, s_): #记忆存储 189 | memory =self.memorys[agent] 190 | if self.prioritized_replay: 191 | # prioritized replay 192 | transition = np.hstack((s, [a, r], s_)) 193 | memory.store(transition) 194 | else: 195 | if not hasattr(self, 'memory_counter'): # 196 | self.memory_counter = 0 197 | transition = np.hstack((s, [a, r], s_)) 198 | if self.memory_counter < self.memory_size: 199 | index = self.memory_counter % self.memory_size 200 | else: 201 | index = self.memory_counter % self.memory_size 202 | # replace the old memory with new memory 203 | memory[index, :] = transition 204 | self.memory_counter += 1 205 | 206 | def choose_action(self,agent, observation,fov_patch_num,cache_matrix):# channe动作选择 207 | # to have batch dimension when feed into tf placeholder 208 | # observation = observation[np.newaxis, :]#多加了一个维度 209 | actions = [] 210 | # forward feed the observation and get q value for every actions 211 | for i in range(self.agent_num): 212 | if( i!=agent): 213 | actions_value = self.sess.run(self.eval_nets[i], feed_dict={ 214 | self.s: np.transpose(observation).reshape(-1, len(observation))}) # 每个动作的价值 215 | action = np.argmax(actions_value) # 返回具有最大价值的动作序号 216 | else: 217 | if (np.random.uniform() < self.epsilons[agent]): 218 | actions_value = self.sess.run(self.eval_nets[i], feed_dict={self.s: np.transpose(observation).reshape(-1,len(observation))})#每个动作的价值 219 | action = np.argmax(actions_value) #返回具有最大价值的动作序号 220 | # print("tf choose") 221 | else: 222 | # print("randomly choose") 223 | action = np.random.randint(0, self.actions[i]) 224 | actions.append(action) 225 | 226 | actions = np.array(actions) 227 | 228 | return actions 229 | 230 | def choose_action_base(self, observation, fov_patch_num, cache_matrix): # channe动作选择 231 | # to have batch dimension when feed into tf placeholder 232 | # observation = observation[np.newaxis, :]#多加了一个维度 233 | actions = [] 234 | # forward feed the observation and get q value for every actions 235 | for i in range(self.agent_num): 236 | if (np.random.uniform() < self.epsilons[i]): 237 | actions_value = self.sess.run(self.eval_nets[i], feed_dict={ 238 | self.s: np.transpose(observation).reshape(-1, len(observation))}) # 每个动作的价值 239 | action = np.argmax(actions_value) # 返回具有最大价值的动作序号 240 | # print("tf choose") 241 | else: 242 | # print("randomly choose") 243 | action = np.random.randint(0, self.actions[i]) 244 | actions.append(action) 245 | 246 | actions = np.array(actions) 247 | 248 | return actions 249 | 250 | def learn(self,agent,sample_index): 251 | memory = self.memorys[agent] 252 | # print("current epsilon = ",self.epsilon) 253 | if self.prioritized_replay: 254 | tree_idx, batch_memory, ISWeights,sample_index = memory.sample(self.batch_size) 255 | else: 256 | if len(sample_index) == 0: 257 | if self.memory_counter <= self.batch_size: 258 | return [] 259 | if self.memory_counter >= self.memory_size: 260 | sample_index = np.random.choice(self.memory_size, size=self.batch_size) 261 | if self.batch_size < self.memory_counter < self.memory_size: 262 | sample_index = np.random.choice(self.memory_counter, size=self.batch_size) 263 | else: 264 | sample_index =sample_index 265 | batch_memory = memory[sample_index, :] 266 | for e in range(1): 267 | if self.prioritized_replay is True and self.double_q is False: 268 | _, abs_errors, cost = self.sess.run([self._train_op[agent], self.abs_errors[agent], self.loss[agent]], 269 | feed_dict={self.s: batch_memory[:, :self.n_features], 270 | self.a: batch_memory[:, self.n_features], 271 | self.r: batch_memory[:, self.n_features + 1], 272 | self.s_: batch_memory[:, -self.n_features:], 273 | # self.q_target: q_target, 274 | self.ISWeights: ISWeights}) 275 | memory.batch_update(tree_idx, abs_errors) # update priority 276 | elif self.double_q is True and self.prioritized_replay is False: 277 | q_eval_double = self.sess.run(self.eval_nets[agent],feed_dict={ 278 | self.s: batch_memory[:, -self.n_features:], # 279 | 280 | }) 281 | 282 | _, cost = self.sess.run( 283 | [self._train_op[agent], self.loss[agent]], 284 | feed_dict={ 285 | self.s: batch_memory[:, :self.n_features], 286 | self.a: batch_memory[:, self.n_features], 287 | self.r: batch_memory[:, self.n_features + 1], 288 | self.s_: batch_memory[:, -self.n_features:], 289 | self.qwe : q_eval_double, 290 | }) 291 | elif self.double_q and self.prioritized_replay : 292 | q_eval_double = self.sess.run(self.eval_nets[agent],feed_dict={ 293 | self.s: batch_memory[:, -self.n_features:], # 294 | }) 295 | s = batch_memory[:, :self.n_features] 296 | a = batch_memory[:, self.n_features] 297 | r = batch_memory[:, self.n_features + 1] 298 | s_=batch_memory[:, -self.n_features:] 299 | _, abs_errors, cost = self.sess.run([self._train_op[agent], self.abs_errors[agent], self.loss[agent]], 300 | feed_dict={self.s: s, 301 | self.a: a, 302 | self.r: r, 303 | self.s_: s_, 304 | # self.q_target: q_target, 305 | self.ISWeights: ISWeights, 306 | self.qwe [agent]: q_eval_double, 307 | }) 308 | memory.batch_update(tree_idx, abs_errors) # update priority 309 | else: 310 | _, cost = self.sess.run( 311 | [self._train_op[agent], self.loss[agent]], 312 | feed_dict={ 313 | self.s: batch_memory[:, :self.n_features], 314 | self.a: batch_memory[:, self.n_features], 315 | self.r: batch_memory[:, self.n_features + 1], 316 | self.s_: batch_memory[:, -self.n_features:], 317 | }) 318 | self.cost_his.append(cost) 319 | if self.learn_step_counter % self.replace_target_iter == 0: # 更新权重值 320 | self.sess.run(self.target_replace_op) 321 | if self.epsilons[agent] < self.epsilon_max: 322 | self.epsilons[agent]+= self.epsilon_increment 323 | # print('epsilon=',self.epsilon) 324 | else: 325 | self.epsilons[agent]=self.epsilon_max 326 | self.learn_step_counter += 1 327 | return sample_index 328 | def plot_cost(self): 329 | import matplotlib.pyplot as plt 330 | plt.figure() 331 | plt.plot(np.arange(len(self.cost_his)), self.cost_his) 332 | plt.ylabel('Cost'+self.name_str) 333 | plt.xlabel('training steps'+self.name_str) 334 | plt.title('Cost'+self.name_str) 335 | # plt.show() 336 | # def gen_combination(self,num_actions,caching_capacity): 337 | # combination = np.array(list(product((np.arange(-1,num_actions,1)),repeat=caching_capacity))) 338 | # return combination 339 | 340 | def load_model(self,file_name): 341 | with self.sess.as_default(): 342 | with self.sess.graph.as_default(): 343 | self.sess.run(tf.global_variables_initializer()) 344 | self.sess.run(tf.local_variables_initializer()) 345 | all_parameters_saver = tf.train.Saver(max_to_keep=None) 346 | all_parameters_saver.restore(sess=self.sess, save_path=file_name) 347 | def save_model(self, file_name): 348 | all_parameters_saver = tf.train.Saver(max_to_keep=None) 349 | dir_path = '.\simulation_result\\full\\model' + file_name + '.ckpt' 350 | all_parameters_saver.save(sess=self.sess, save_path=dir_path) 351 | def clear_sess(self): 352 | self.sess.close() 353 | tf.reset_default_graph() -------------------------------------------------------------------------------- /DRL/function_all.py: -------------------------------------------------------------------------------- 1 | from itertools import product as product 2 | from itertools import combinations 3 | import math 4 | import matlab 5 | import numpy as np 6 | import re 7 | import os 8 | import platform 9 | import random 10 | import matplotlib.pyplot as plt 11 | # from math import log10, log2, cos, sin,exp 12 | import matlab.engine 13 | import openpyxl 14 | from numpy.ma import log10, log2, cos, sin, exp 15 | from os.path import dirname, abspath, exists 16 | 17 | from tqdm import tqdm 18 | import numba as nb 19 | from numba.pycc import CC 20 | cc = CC('yin') 21 | from config import FLAGS 22 | FOV_2D = 2*FLAGS.fOV_2DShape[0]*FLAGS.fOV_2DShape[1] 23 | FOV_3D = (4/3)*FOV_2D 24 | # F_VR = 3 * 10**9 25 | # F_MEC = 10 * 10**9 26 | # 27 | # f_VR = 15 28 | # f_MEC = 15 29 | # 30 | # k_m = 10**(-9) 31 | # k_v = 10**(-9) 32 | # 33 | # E_MEC = 10**(20) 34 | # E_VR = 10**(15) 35 | 36 | # np.random.seed(1) 37 | # BW = 40 38 | N_0_dbm = -174 + 10 * log10(FLAGS.BW) 39 | N_0 = np.power(10,(N_0_dbm - 30) / 10) 40 | # N_0 = 10 ** ((N_0_dbm - 30) / 10) 41 | # N_0 =0.00001 42 | # ue_bs_a = 3 43 | # ue_irs_a = 2.2 44 | # irs_bs_a = 2.2 45 | # ue_bs_a = 3.5 46 | # ue_irs_a = 2.5 47 | # irs_bs_a = 2.5 48 | # gfu_bs_a = 3.5#2.5 49 | 50 | 51 | def G_gain_cal(h_cue_bs, h_irs_bs, h_cue_irs, reflect): 52 | ''' 53 | 计算综合信道增益G 54 | h_cue_bs:用户到基站的信道增益 55 | h_irs_bs:IRS到基站的信道增益,是个一行K列的矩阵 56 | h_cue_irs:用户到IRS的信道增益,是一个K行一列的矩阵 57 | reflect:反射矩阵,是一个K行K列的矩阵 58 | :return:一个综合信道增益的值 59 | ''' 60 | # print("h_irs_bs",h_irs_bs) 61 | # print("reflect",reflect) 62 | h_irs_bs = np.array(h_irs_bs) 63 | h_cue_irs = np.array(h_cue_irs) 64 | # temp = np.dot(h_irs_bs.T.conjugate(), reflect) 65 | temp = h_irs_bs.T.conjugate() 66 | # print("temp",temp) 67 | h_cue_irs_bs = np.dot(temp, h_cue_irs) 68 | G = h_cue_bs + h_cue_irs_bs 69 | return G 70 | 71 | 72 | ''' 73 | 74 | ''' 75 | def gen_mec_store_limit( pbd, bs_num, 76 | computing_resources,max_power): 77 | limit = np.zeros_like(computing_resources) 78 | #计算一个FOV占用的功率 79 | for fov in range(bs_num): 80 | p = pbd * 1 * computing_resources[fov] 81 | limit[fov]=int(max_power)/int(p) 82 | return limit 83 | 84 | 85 | 86 | 87 | 88 | ''' 89 | 每次优先往空余最大的位置放置 90 | ''' 91 | def gen_epsilon_rule_larggest(epsilon,storage_limit_mec): 92 | # bs_num = epsilon.shape[0] 93 | fov_num = epsilon.shape[1] 94 | new_epsilon = np.zeros_like(epsilon) 95 | 96 | for fov in range(fov_num): 97 | selected_bs = epsilon[:,fov] 98 | storage_limit_mec_copy = storage_limit_mec.copy() 99 | for time in range(np.sum(selected_bs)): 100 | max_index = np.argmax(storage_limit_mec_copy) 101 | storage_limit_mec[max_index]-=1 102 | storage_limit_mec_copy[max_index] = -1 103 | if (storage_limit_mec[max_index] == 0): 104 | storage_limit_mec[max_index]=-1 105 | new_epsilon[max_index,fov]=1 106 | return new_epsilon 107 | # print("gen_epsilon_rule_larggest") 108 | 109 | def gen_epsilon_rule_larggest2(epsilon,storage_limit_mec,BW, G2, omegas, N_0): 110 | bs_num = epsilon.shape[0] 111 | fov_num = epsilon.shape[1] 112 | new_epsilon = np.zeros_like(epsilon) 113 | storage_limit_mec_back = storage_limit_mec.copy() 114 | storage_limit_mec_copy = [] 115 | for fov in range(fov_num): 116 | storage_limit_mec_copy = storage_limit_mec.copy() 117 | max_index = np.argmax(storage_limit_mec_copy) 118 | storage_limit_mec[max_index]-=1 119 | storage_limit_mec_copy[max_index] = -1 120 | if (storage_limit_mec[max_index] == 0): 121 | storage_limit_mec[max_index]=-1 122 | new_epsilon[max_index,fov]=1 123 | 124 | unqualified_fov = [] 125 | for fov in range(fov_num): 126 | rate = cal_transmit_rate(BW,G2, omegas, fov,new_epsilon,N_0) 127 | if (rate < 20): 128 | unqualified_fov.append(fov) 129 | # fov_con_num = np.ones([fov_num]) 130 | # new_epsilon = np.zeros_like(epsilon) 131 | detect_time = 0 132 | while (len(unqualified_fov) > 0 and detect_time<100): 133 | detect_time+=1 134 | for fov in unqualified_fov: 135 | storage_limit_mec_copy = storage_limit_mec.copy() 136 | for bs in range(bs_num): 137 | if(new_epsilon[bs,fov]==1): 138 | storage_limit_mec_copy[bs]=0 139 | if (np.sum(storage_limit_mec_copy) == 0): 140 | return gen_epsilon_rule_larggest(epsilon, storage_limit_mec_back) 141 | max_index = np.argmax(storage_limit_mec_copy) 142 | storage_limit_mec[max_index] -= 1 143 | storage_limit_mec_copy[max_index] = -1 144 | if (storage_limit_mec[max_index] == 0): 145 | storage_limit_mec[max_index] = -1 146 | new_epsilon[max_index, fov] = 1 147 | unqualified_fov.clear() 148 | for fov in range(fov_num): 149 | rate = cal_transmit_rate(BW, G2, omegas, fov, new_epsilon, N_0) 150 | if (rate < 20): 151 | unqualified_fov.append(fov) 152 | return new_epsilon 153 | # print("gen_epsilon_rule_larggest") 154 | 155 | from numba import jit 156 | 157 | def gen_epsilon_rule_smallest2(epsilon,storage_limit_mec,BW, G2, omegas, N_0): 158 | bs_num = epsilon.shape[0] 159 | fov_num = epsilon.shape[1] 160 | new_epsilon = np.zeros_like(epsilon) 161 | storage_limit_mec_back = storage_limit_mec.copy() 162 | storage_limit_mec_copy = [] 163 | for fov in range(fov_num): 164 | selected_bs = epsilon[:, fov] 165 | storage_limit_mec_copy = storage_limit_mec.copy() 166 | for time in range(np.sum(selected_bs)): 167 | min_index = np.argmin(storage_limit_mec_copy) 168 | storage_limit_mec[min_index] -= 1 169 | storage_limit_mec_copy[min_index] = 99 170 | if (storage_limit_mec[min_index] == 0): 171 | storage_limit_mec[min_index] = 99 172 | new_epsilon[min_index, fov] = 1 173 | 174 | unqualified_fov = [] 175 | for fov in range(fov_num): 176 | rate = cal_transmit_rate(BW,G2, omegas, fov,new_epsilon,N_0) 177 | if (rate < 20): 178 | unqualified_fov.append(fov) 179 | detect_time = 0 180 | # fov_con_num = np.ones([fov_num]) 181 | # new_epsilon = np.zeros_like(epsilon) 182 | while (len(unqualified_fov) > 0 and detect_time<100): 183 | detect_time +=1 184 | for fov in unqualified_fov: 185 | storage_limit_mec_copy = storage_limit_mec.copy() 186 | for bs in range(bs_num): 187 | if(new_epsilon[bs,fov]==1): 188 | storage_limit_mec_copy[bs]=1000 189 | if (np.sum(storage_limit_mec_copy) == 3000): 190 | return gen_epsilon_rule_smallest(epsilon, storage_limit_mec_back) 191 | min_index = np.argmin(storage_limit_mec_copy) 192 | storage_limit_mec[min_index] -= 1 193 | storage_limit_mec_copy[min_index] = 99 194 | if (storage_limit_mec[min_index] == 0): 195 | storage_limit_mec[min_index] = 99 196 | new_epsilon[min_index, fov] = 1 197 | unqualified_fov.clear() 198 | for fov in range(fov_num): 199 | rate = cal_transmit_rate(BW, G2, omegas, fov, new_epsilon, N_0) 200 | if (rate < 20): 201 | unqualified_fov.append(fov) 202 | return new_epsilon 203 | 204 | 205 | from joblib import Parallel, delayed 206 | @cc.export('gen_epsilon_rule_exhaustion', nb.int32(nb.int32[:])) 207 | def gen_epsilon_rule_exhaustion(epsilon,storage_limit_mec,BW, G2, omegas, N_0, fov_sizes,Kb,Ub,ub,cr,total_computing_resources,mec_p_max): 208 | bs_num = epsilon.shape[0] 209 | fov_num = epsilon.shape[1] 210 | new_epsilon = np.zeros_like(epsilon) 211 | upperlimit = np.sum(epsilon) 212 | storage_limit_mec_back = storage_limit_mec.copy() 213 | action_table = gen_action_table_v3(bs_num,fov_num) 214 | action_space = len(action_table[0]) 215 | min_power = 65536 216 | min_S = 65536 217 | res_r = 0 218 | avaliable_epsilons = [] 219 | if(fov_num==6): 220 | for a1 in range(action_space): 221 | for a2 in range(action_space): 222 | for a3 in range(action_space): 223 | for a4 in range(action_space): 224 | for a5 in range(action_space): 225 | for a6 in range(action_space): 226 | temp_epsilon = gen_epsilon(bs_num, fov_num,[a1,a2,a3,a4,a5,a6] , action_table) 227 | temp_r = 0 228 | if(np.sum(temp_epsilon)<=upperlimit): 229 | valid_mark = True 230 | for fov in range(fov_num): 231 | rate = cal_transmit_rate(BW, G2, omegas, fov, temp_epsilon, N_0) 232 | if (rate < 15): 233 | valid_mark=False 234 | break 235 | # temp_r +=(rate-20) 236 | if(valid_mark==True): 237 | total_powers = cal_total_power(static_power=10, pbd=5e-3, epsilon=temp_epsilon, 238 | omega=omegas, bs_num=bs_num, 239 | fov_num=fov_num, 240 | fov_sizes=fov_sizes, Kb=Kb, Ub=Ub, 241 | ub=ub, cr=cr, 242 | computing_resources=total_computing_resources) 243 | # sum_total_powers = np.sum(total_powers) 244 | # mean_total_powers = np.mean(total_powers) 245 | # var_total_powers = np.var(total_powers) 246 | valid_mark = True 247 | for bs in range(bs_num): 248 | if (total_powers[bs] > mec_p_max): 249 | valid_mark=False 250 | break 251 | # if(min_power>sum_total_powers ): 252 | # min_power = sum_total_powers 253 | if(valid_mark): 254 | avaliable_epsilons.append(temp_epsilon) 255 | # new_epsilon = temp_epsilon 256 | # # min_S =var_total_powers 257 | # return new_epsilon 258 | # elif(min_power == sum_total_powers and min_S>var_total_powers): 259 | # new_epsilon = temp_epsilon 260 | # min_S =var_total_powers 261 | # if (np.sum(new_epsilon) == fov_num): 262 | # return new_epsilon 263 | 264 | elif(fov_num==8): 265 | for a1 in range(action_space): 266 | for a2 in range(action_space): 267 | for a3 in range(action_space): 268 | for a4 in range(action_space): 269 | for a5 in range(action_space): 270 | for a6 in range(action_space): 271 | for a7 in range(action_space): 272 | for a8 in range(action_space): 273 | # print([a1,a2,a3,a4,a5,a6,a7,a8]) 274 | temp_epsilon = gen_epsilon(bs_num, fov_num, [a1, a2, a3, a4, a5, a6,a7,a8], action_table) 275 | if (np.sum(temp_epsilon) <= upperlimit): 276 | valid_mark = True 277 | for fov in range(fov_num): 278 | rate = cal_transmit_rate(BW, G2, omegas, fov, temp_epsilon, N_0) 279 | if (rate < 15): 280 | valid_mark=False 281 | break 282 | if(valid_mark==True): 283 | total_powers =cal_total_power(static_power=10, pbd=5e-3, epsilon=temp_epsilon, 284 | omega=omegas, bs_num=bs_num, 285 | fov_num=fov_num, 286 | fov_sizes=fov_sizes, Kb=Kb, Ub=Ub, 287 | ub=ub, cr=cr, 288 | computing_resources=total_computing_resources) 289 | for bs in range(bs_num): 290 | if (total_powers[bs] > mec_p_max): 291 | valid_mark = False 292 | break 293 | # if(min_power>sum_total_powers ): 294 | # min_power = sum_total_powers 295 | if (valid_mark): 296 | # print("找到") 297 | avaliable_epsilons.append(temp_epsilon) 298 | # new_epsilon = temp_epsilon 299 | # min_S =var_total_powers 300 | # return new_epsilon 301 | # sum_total_powers = np.sum(total_powers) 302 | # mean_total_powers = np.mean(total_powers) 303 | # var_total_powers = np.var(total_powers) 304 | # print(total_powers,var_total_powers) 305 | # if (min_power > sum_total_powers and min_S > var_total_powers): 306 | # min_power = sum_total_powers 307 | # new_epsilon = temp_epsilon 308 | # min_S = var_total_powers 309 | 310 | # elif (min_power == sum_total_powers and min_S > var_total_powers): 311 | # new_epsilon = temp_epsilon 312 | # min_S = var_total_powers 313 | return avaliable_epsilons 314 | 315 | def gen_epsilon_rule_smallest(epsilon,storage_limit_mec): 316 | # bs_num = epsilon.shape[0] 317 | fov_num = epsilon.shape[1] 318 | new_epsilon = np.zeros_like(epsilon) 319 | 320 | for fov in range(fov_num): 321 | selected_bs = epsilon[:,fov] 322 | storage_limit_mec_copy = storage_limit_mec.copy() 323 | for time in range(np.sum(selected_bs)): 324 | min_index = np.argmin(storage_limit_mec_copy) 325 | storage_limit_mec[min_index]-=1 326 | storage_limit_mec_copy[min_index]=99 327 | if(storage_limit_mec[min_index]==0): 328 | storage_limit_mec[min_index]=99 329 | new_epsilon[min_index,fov]=1 330 | return new_epsilon 331 | # print("gen_epsilon_rule_smallest") 332 | 333 | def zeroforcing(H,max_power): 334 | # omega = ZF(H, pow) 335 | K,M= H.shape 336 | # [K, M] = size(H); 337 | H_= H.T.conjugate() 338 | pre = np.dot(H_,np.linalg.inv(np.dot(H,H_)))# pre = H'*inv(H*H'); 339 | pre_=pre.T.conjugate() 340 | omega = np.sqrt(max_power/np.trace(np.dot(pre,pre_)))*pre 341 | # omega = sqrt(pow / trace(pre * pre'))*pre; 342 | return omega 343 | def gen_omega_ZF(NumberOfFov,NumberOfBS,NumberofAntenna,G,max_power): 344 | omegas = np.zeros([NumberOfBS, NumberOfFov, NumberofAntenna],dtype=np.complex) 345 | for bs in range(NumberOfBS): 346 | H = G[bs,:].reshape([NumberOfFov,NumberofAntenna]) 347 | PZF = zeroforcing(H, max_power) 348 | omegas[bs,:] = PZF.T.conjugate() 349 | return omegas 350 | 351 | 352 | 353 | def rechoose_epsilon_noCoMP(epsilon,cue_coord,bs_coord,time): 354 | rechoose_epsilon = np.zeros_like(epsilon) 355 | 356 | for fov in range (rechoose_epsilon.shape[1]): 357 | # minDist=9999 358 | # bestBs = -1 359 | for bs in range(rechoose_epsilon.shape[0]): 360 | # dist = np.linalg.norm(cue_coord[time,fov,:]-bs_coord[bs,:]) 361 | # if(dist x_max: 635 | # print('edge_x') 636 | new_angle = np.pi + angle 637 | if user_state == 0: # random 638 | new_x = x + dist * math.cos(new_angle) 639 | else: 640 | new_x = x + dist * math.cos(new_angle) + 4/5 * max_speed * math.cos(angle_fix) 641 | return new_x 642 | 643 | def dist_calc_y(user_state,y,dist,angle,angle_fix,y_min,y_max,max_speed): 644 | ''' 645 | Use for calculating the distance of movement in each TS in terms of Y-axis 646 | :param user_state: 647 | :param x: 648 | :param dist: 649 | :param angle: 650 | :param angle_fix: 651 | :param x_min: 652 | :param x_max: 653 | :param max_speed: 654 | :return: 655 | ''' 656 | if user_state == 0: # random 657 | new_y = y + dist * math.sin(angle) 658 | else: 659 | new_y = y + dist * math.sin(angle) + 4/5 * max_speed * math.sin(angle_fix) 660 | while new_y< y_min or new_y > y_max: 661 | # print('edge_y') 662 | new_angle = - angle 663 | if user_state == 0: # random 664 | new_y = y + dist * math.sin(new_angle) 665 | else: 666 | new_y = y + dist * math.sin(new_angle) + 4/5 * max_speed * math.sin(angle_fix) 667 | return new_y 668 | 669 | 670 | def all_G_gain_cal(bs_num, irs_coord,ue_num,coord_a,coord_b,reflect,irs_m): 671 | # channel_space=np.array(channel_space).reshape(cuenum+chnum,chnum) 672 | G =np.zeros([bs_num,ue_num],dtype=np.complex) 673 | # G = np.zeros(bs*cuenum, dtype="complex") 674 | count = 0 675 | for ue in range(ue_num): 676 | for bs in range(bs_num): 677 | h_cue_bs = h_gain_cal(coord_a[ue], coord_b[bs], FLAGS.gfu_bs_a, "Rayleigh", irs_m) 678 | h_cue_irs = h_gain_cal(coord_a[ue], irs_coord, FLAGS.ue_irs_a, "Racian", irs_m) 679 | h_irs_bs = h_gain_cal(irs_coord, coord_b[bs], FLAGS.irs_bs_a, "Racian", irs_m) 680 | # test1=(cuenum_i + 1) * (bs_i + 1) * (chnum_i + 1) - 1 681 | if irs_m != 0: 682 | G[bs,ue] = G_gain_cal(h_cue_bs, h_irs_bs, h_cue_irs, reflect)[0][0] 683 | else: 684 | G[bs,ue] = G_gain_cal(h_cue_bs, h_irs_bs, h_cue_irs, reflect)[0] 685 | count +=1 686 | return G 687 | 688 | def r_min_judge(cue, ch_k, G, action_c_ch, action_c_p, r_min): 689 | for i in range(cue): 690 | if sum(action_c_p[i,:]) != 1: 691 | return -1, None 692 | for i in range(ch_k): 693 | if sum(action_c_p[:, i]) > 1: 694 | return -1, None 695 | r_arr=[] 696 | for i in range(cue): 697 | for j in range(ch_k): 698 | if action_c_ch[i][j] == 1: 699 | r = FLAGS.BW*log2(1+(pow(abs(G[i][j]),2)*action_c_p[i,j]/(N_0))) 700 | r_arr.append(r) 701 | for i in range(cue): 702 | if r_arr[i] < r_min: 703 | return -1, None 704 | return 1, r_arr 705 | 706 | def clean_G(G,cuenum,tau,channel_num): 707 | for i in range(cuenum): 708 | for j in range(channel_num): 709 | if pow(abs(G[i,j]),2) 000 001 010 011 100 101 110 111,笛卡尔积product(A,repeat=3)等价于product(A,A,A),product(A, B) 和 ((x,y) for x in A for y in B)一样. 745 | available_space_channel.append(item) 746 | return available_space_channel 747 | 748 | def reflect_calculate(reflect_action_arr,reflect_amp_arr,irs_m): 749 | ''' 750 | :param reflect_action_arr: 反射矩阵角度 751 | :return: 返回计算后的反射矩阵 752 | ''' 753 | reflect = np.zeros((irs_m,irs_m),dtype=np.complex) 754 | for i in range(irs_m): 755 | reflect[i,i] = reflect_amp_arr[i]*exp(0) * (cos(reflect_action_arr[i]) + sin(reflect_action_arr[i]) * 1j) 756 | return reflect 757 | def x_generate(G,ch_k,cue): 758 | ''' 759 | 可用的各种反射矩阵 760 | ''' 761 | G = np.array(G).reshape(cue, ch_k) 762 | action_x = [] 763 | # action_x=np.zeros((cue+ch_k,cue+ch_k,ch_k)) 764 | for i in range(ch_k): 765 | action_x_ch = np.zeros((cue, cue+ch_k)) 766 | temp_arr = np.array(G[:, i]) 767 | for j in range(cue): 768 | if temp_arr[j] == 0: 769 | action_x_ch[j,:]=0 770 | for k in range(cue): 771 | if temp_arr[j] != 0 and pow(abs(temp_arr[j]), 2) <= pow(abs(temp_arr[k]), 2): 772 | action_x_ch[j][k]=1 773 | if temp_arr[j] != 0 and temp_arr[k] == 0: 774 | action_x_ch[j][k] = 1 775 | if j == k: 776 | action_x_ch[j][k] = 0 777 | action_x.append(action_x_ch) 778 | return action_x 779 | def npyload(filename): 780 | """ 781 | :功能:读取npy文件 782 | :param filename: 文件名称 783 | :return: 784 | """ 785 | print('read file: %s' % (filename)) 786 | return np.load(filename, allow_pickle=True).item() 787 | def npysave(data, filename): 788 | """ 789 | :功能:保存npy文件 790 | :param data: 数据 791 | :param filename: 文件名 792 | :return: 793 | """ 794 | namearr = re.split(r'[\\/]', filename) 795 | 796 | # 判断操作系统 797 | sys = platform.system() 798 | if sys == "Windows": 799 | pathstr = '\\'.join(namearr[:-1]) 800 | elif sys == "Linux": 801 | pathstr = '/'.join(namearr[:-1]) 802 | filestr = namearr[-1] 803 | if not os.path.exists(pathstr): 804 | print('make dir:%s' % (pathstr)) 805 | os.makedirs(pathstr) 806 | print('write to: %s' % (filename)) 807 | np.save(filename, data) 808 | def excel_save(excel,irs_m,stat): 809 | wb=openpyxl.Workbook() 810 | ws=wb.create_sheet("sheet1") 811 | i=1 812 | for a in excel: 813 | for j in range(irs_m): 814 | ws.cell(row=i,column=j+1).value=a[0][j] 815 | i+=1 816 | wb.save('data'+stat+'.xlsx') 817 | 818 | def average_power(ue,bs, ch, ch_space, p_max): 819 | power_result = np.zeros(ue) 820 | bs_power = np.zeros((ue,2)) 821 | for i in range(ue): 822 | which_bs = int(ch_space[i] / ch) 823 | bs_power[ue][0] = ue 824 | bs_power[ue][1] = which_bs 825 | # for j in range(bs): 826 | k = 0 827 | for i in range(ch_space.shape[0]): 828 | for j in range(ch_space.shape[1]): 829 | if ch_space[i][j] != 0: 830 | k += 1 831 | average_p = p_max/k 832 | for i in range(ue): 833 | for j in range(ch): 834 | if ch_space[i][j] != 0: 835 | power_result[i][j] = average_p 836 | return power_result 837 | 838 | def gen_epsilon(bs_num,fov_patch_num): 839 | avg_count=int(fov_patch_num /bs_num) 840 | rest = fov_patch_num %bs_num 841 | if(rest==0): 842 | avg_count = avg_count+rest 843 | epsilon = np.zeros([bs_num, fov_patch_num]) 844 | begin_index=0 845 | for bs in range(bs_num): 846 | for i in range(avg_count): 847 | epsilon[bs][begin_index]=1 848 | if(begin_index+10: 186 | # new_coord_lst = [] 187 | # random_count = self.point*15+1 188 | # for g0 in range(self.ue_num): 189 | # new_coord_lst.append(coord_cue_random[random_count,g0,:]) 190 | # for g1 in range(self.antenna_num): 191 | # new_coord_lst.append(coord_gbu_random[random_count,g1+self.ue_num,:]) 192 | # self.cue_coord = np.array(new_coord_lst).reshape(self.ue_num+self.antenna_num,3) 193 | # else: 194 | # self._gain_calculate() 195 | 196 | 197 | def plot_dynamic_movement(self): 198 | print('基站用户位置分布图') 199 | #将位置plot出来 200 | # plt.rcParams['font.sans-serif'] = ['SimHei'] 201 | # plt.rcParams['axes.unicode_minus'] = False 202 | # matplotlib画图中中文显示会有问题,需要这两行设置默认字体 203 | fig,ax = plt.subplots(1,1) 204 | # ax=Axes3D(fig) 205 | delta=0.125 206 | # ax.set_xlabel('X-axis (m)', fontdict={'family': 'Times New Roman', 'size': 15}) 207 | # ax.set_ylabel('Y-axis (m)', fontdict={'family': 'Times New Roman', 'size': 15}) 208 | # ax.set_zlabel('Z-axis (m)', fontdict={'family': 'Times New Roman', 'size': 15}) 209 | plt.xticks(fontproperties="Times New Roman", size=15) 210 | plt.yticks(fontproperties="Times New Roman", size=15) 211 | # ax.tick_params(axis='z', labelsize=15) 212 | # plt.xlim(-5,25) 213 | # plt.ylim(-5,25) 214 | # ax.set_zlim(0,30) 215 | colors1 = '#00CED1' # 点的颜色 216 | colors2 = '#DC143C' 217 | colors3 = '#7FFFD4' 218 | colors4 = '#A52A2A' 219 | colors5 = '#008000' 220 | area = np.pi ** 2 # 点面积 221 | for i in range(0,1000): 222 | plt.ion() 223 | plt.clf() 224 | 225 | # 画散点图 226 | plt.scatter(self.bs_coord[:, 0], self.bs_coord[:, 1], s=area * 2, marker='o', c=colors1, alpha=0.4, label='BS') 227 | plt.scatter(self.irs_coord[:, 0], self.irs_coord[:, 1], s=area * 4, marker='s', c=colors5, alpha=0.4, 228 | label='RIS') 229 | plt.scatter(self.cue_coord[i,:self.ue_num, 0], self.cue_coord[i,:self.ue_num, 1],s=area*2, marker='v', c=colors4, alpha=0.4, label='UE') 230 | plt.legend(loc=0,edgecolor='#000000',prop={'family': 'Times New Roman', 'size': 12}) 231 | plt.grid('-') 232 | # plt.savefig(dir_path + '/location.png', bbox_inches="tight") 233 | # plt.savefig(dir_path + '/location.pdf', bbox_inches="tight") 234 | plt.show() 235 | plt.pause(1) 236 | def plot_location(self): 237 | #将位置plot出来 238 | plt.rcParams['font.sans-serif'] = ['SimHei'] 239 | plt.rcParams['axes.unicode_minus'] = False 240 | # matplotlib画图中中文显示会有问题,需要这两行设置默认字体 241 | plt.xlabel('X') 242 | plt.ylabel('Y') 243 | print('位置分布图') 244 | colors1 = '#00CED1' # 点的颜色 245 | colors2 = '#DC143C' 246 | colors3 = '#7FFFD4' 247 | colors4 = '#A52A2A' 248 | colors5 = '#008000' 249 | area = np.pi ** 2 # 点面积 250 | # 画散点图 251 | plt.scatter(self.irs_coord[:, 0], self.irs_coord[:, 1], s=area * 2, marker='s', c=colors2, alpha=0.4, 252 | label='反射面') 253 | plt.scatter(self.cue_coord[:, 0], self.cue_coord[:, 1], s=area, marker='v', c=colors3, alpha=0.4, label='CUE用户') 254 | plt.legend() 255 | plt.savefig(dir_path + '/location.png', dpi=300) 256 | plt.show() 257 | 258 | def _gain_contact(self): 259 | #将计算出来的信道增益进行拼接作为state 260 | a=[] 261 | for ue_num in range(self.ue_num): 262 | for bs_i in range(self.bs_num): 263 | for ch_i in range(self.antenna_num): 264 | which_bs = self.ch_space[ue_num]/self.antenna_num 265 | which_ch = self.ch_space[ue_num]%self.antenna_num 266 | if bs_i==which_bs and ch_i == which_ch: 267 | a.append(self.G[ue_num]) 268 | else: 269 | a.append(0) 270 | return a 271 | def reset(self): 272 | # 重新设置环境 273 | # if stat=="all": 274 | # self._coord_set() 275 | if(self.load_H): 276 | # self.G = self.H_record[0,:] 277 | # G2 = np.zeros([ue + 1, antenna + 1, unit + 1, bs, ue, antenna, 1], dtype=np.complex) 278 | 279 | self.g_bs_ue = self.Hbn_record[0,:] 280 | self.g_ue_ris = self.Hrn_record[0,:] 281 | self.g_bs_ris = self.Hbr_record[0,:] 282 | 283 | self.G2 = np.zeros([self.bs_num, self.ue_num, self.antenna_num, 1],dtype=complex) 284 | for b in range(self.bs_num): 285 | for u in range(self.ue_num): 286 | self.G2[b,u,:,:]=G_gain_cal(self.g_bs_ue[b,u,:,:], self.g_bs_ris[b,:,:], self.g_ue_ris[b,u,:,:], 1) 287 | self.G = np.concatenate([self.G2.imag, self.G2.real], axis=-1).flatten() 288 | # self.omegas = gen_omega_ZF(self.fov_patch_num, self.bs_num, self.antenna_num, self.G2, 0.1) 289 | # self.G2 = self.g_bs_uere 290 | 291 | else: 292 | self.G,self.G2,self.g_ue_ris,self.g_bs_ris,self.g_bs_ue = all_G_gain_cal_MISO_splitI(self.time,self.bs_num, self.ue_num, self.antenna_num, self.irs_coord, self.cue_coord, 293 | self.bs_coord, self.reflect, self.irs_units_num) 294 | 295 | # self.G2 = self.g_bs_ue 296 | for b in range(self.bs_num): 297 | for u in range(self.ue_num): 298 | self.G2[b,u,:,:]=G_gain_cal(self.g_bs_ue[b,u,:,:], self.g_bs_ris[b,:,:], self.g_ue_ris[b,u,:,:], 1) 299 | # self.omegas = gen_omega_ZF(self.fov_patch_num, self.bs_num, self.antenna_num, self.G2, 1) 300 | self.G = np.concatenate([self.G2.imag, self.G2.real], axis=-1).flatten() 301 | self.states = np.concatenate([np.array(self.G).flatten(),self.epsilon.flatten(),self.cue_coord[0, :self.ue_num, :].flatten()],axis=0) 302 | return self.states 303 | 304 | # def user_location_random(self): 305 | # #随机生成下一步的位置 306 | # limit = bs_dist_limit-100 307 | # limit_1 = bs_dist_limit-50 308 | # zeros_arr = np.array([0]).reshape(-1,1) 309 | # for i in range(self.ue_num): 310 | # cx = (-1 + 2*np.random.random())* limit 311 | # cy = (-1 + 2*np.random.random())* limit 312 | # cxy = np.array([cx,cy]).reshape(1,2) 313 | # while np.linalg.norm(cxy, axis=1, keepdims=True) > limit_1: 314 | # cx = (-1 + 2*np.random.random())* limit 315 | # cy = (-1 + 2*np.random.random())* limit 316 | # cxy = np.array([cx,cy]).reshape(1,2) 317 | # self.cue_coord[i,:] = np.hstack((cxy,zeros_arr)) 318 | # print('cue新位置随机成功') 319 | def cal_reward(self,actions,step,concern_all=True): 320 | ''' 321 | 考虑约束的适应度函数值计算 322 | :return: 323 | ''' 324 | # if step != 1: 325 | # T_old = T_old*(step-1) 326 | # else: 327 | # T_old = T_old 328 | bs = [] 329 | value = 0 330 | ''' 331 | 332 | 先满足所有用户需求的fov都能在MEC上找到缓存,且缓存内容所消耗的计算资源不超过MEC,缓存总大小不超过MEC的存储容量 333 | 动作为每一时刻MEC选择缓存的内容 334 | 首先要验证每个基站选择的缓存行动是否满足约束 335 | ''' 336 | sum_rendered_size = 0 337 | rho = 0.001 338 | sum_computing_resources = 0 339 | # self.omegas = gen_omega_ZF(self.fov_patch_num, self.bs_num, self.antenna_num, self.G2, 1) 340 | self.epsilon = gen_epsilon(self.bs_num, self.fov_patch_num, actions, self.action_table) 341 | 342 | # if(self.ue_num==6): 343 | # self.storage_limit=np.ones(shape=self.bs_num,dtype=np.int) *4 344 | # elif(self.ue_num==8): 345 | # self.storage_limit =np.ones(shape=self.bs_num,dtype=np.int)*6 346 | 347 | if(self.mec_rule=="max"): 348 | # self.epsilon = gen_epsilon_rule_larggest(self.epsilon,self.storage_limit.copy()) 349 | epsilon = gen_epsilon_rule_larggest2(self.epsilon,self.storage_limit.copy(),self.BW, self.G2, self.omegas,self.N_0) 350 | print(np.sum(self.epsilon - epsilon)) 351 | self.epsilon = epsilon 352 | elif(self.mec_rule=="min"): 353 | # self.epsilon = gen_epsilon_rule_smallest(self.epsilon, self.storage_limit.copy()) 354 | epsilon = gen_epsilon_rule_smallest2(self.epsilon, self.storage_limit.copy(), self.BW, self.G2, 355 | self.omegas, self.N_0) 356 | print(np.sum(self.epsilon - epsilon)) 357 | self.epsilon=epsilon 358 | # print(np.sum(self.epsilon_test - self.epsilon)) 359 | 360 | 361 | 362 | if(self.train == False and self.rand_omega==True and self.open_matlab==False): 363 | self.omegas = generate_omega_random(bs_num=self.bs_num,ue_num=self.ue_num,antenna_num=self.antenna_num,scale_factor=0.1) 364 | transmitting_power_randOmega = cal_transmit_power(self.epsilon,self.omegas,self.bs_num,self.ue_num) 365 | 366 | 367 | mec_powers = cal_render_power(static_power=10, pbd=5e-3, epsilon=self.epsilon, 368 | bs_num=self.bs_num, fov_num=self.fov_patch_num, 369 | fov_sizes=self.fov_sizes, Kb=self.Kb, Ub=self.Ub, ub=self.ub, 370 | cr=self.cr, 371 | computing_resources=self.total_computing_resources) 372 | self.init_bs_power = np.array(self.init_bs_power, dtype=np.float).flatten() 373 | total_powers = (1 - rho) * self.init_bs_power + rho * mec_powers 374 | self.total_power_randOmega_record.append(np.mean(total_powers)) 375 | print("transmitting_power_randOmega=%.4f,total_power:%.4f", transmitting_power_randOmega,total_powers) 376 | # self.bs_power_record.append() 377 | self.bs_power_randOmega_record.append(np.mean(transmitting_power_randOmega)) 378 | return -50 379 | 380 | elif (self.train == False and self.rand_omega==False and self.open_matlab): 381 | rates = [] 382 | self.epsilon_fix = rechoose_epsilon_noCoMP(self.epsilon, self.cue_coord, self.bs_coord, 383 | step) 384 | 385 | scio.savemat(r'.\data.mat', 386 | {'G': self.G2.tolist(), 'E': self.epsilon.tolist(), 'gnr': self.g_ue_ris.tolist(), 387 | 'gbr': self.g_bs_ris.tolist(), 'gbn': self.g_bs_ue.tolist(),'omegas':self.omegas.tolist()}) 388 | self.init_bs_power, self.init_bs_rates, self.opt_bs_power, self.opt_rates,self.opt_rates_noCoMP,self.opt_G, self.rs = self.engine.main_optmization( 389 | matlab.double([self.bs_num]), matlab.double([self.fov_patch_num]), 390 | matlab.double([self.antenna_num]), 391 | matlab.double([self.irs_units_num]), 392 | matlab.double([self.N_0]), matlab.double([self.transmit_p_max]), 393 | matlab.double([self.r_min]), matlab.double(self.epsilon.tolist()), 394 | matlab.double(self.epsilon_fix.tolist()),matlab.double([self.BW]), 395 | nargout=7) 396 | 397 | if (self.mec_rule == "exhaustion"): 398 | start = time.time() 399 | self.available_epsilons = gen_epsilon_rule_exhaustion(self.epsilon, self.storage_limit.copy(), self.BW, 400 | self.G2, 401 | self.omegas, self.N_0, self.fov_sizes, self.Kb, 402 | self.Ub, self.ub, self.cr, 403 | self.total_computing_resources, self.mec_p_max) 404 | 405 | min_bs_power = 999 406 | min_ep = 0 407 | # self.init_bs_power, self.init_bs_rates, self.opt_bs_power, self.opt_rates, self.opt_rates_noCoMP, self.opt_G, self.rs = self.engine.main_optmization( 408 | # matlab.double([self.bs_num]), matlab.double([self.fov_patch_num]), 409 | # matlab.double([self.antenna_num]), 410 | # matlab.double([self.irs_units_num]), 411 | # matlab.double([self.N_0]), matlab.double([self.transmit_p_max]), 412 | # matlab.double([self.r_min]), matlab.double(self.epsilon.tolist()), 413 | # matlab.double(self.epsilon.tolist()), matlab.double([self.BW]), 414 | # nargout=7) 415 | 416 | for ep in self.available_epsilons: 417 | init_bs_power, init_bs_rates, opt_bs_power, opt_rates, opt_rates_noCoMP, opt_G, rs = self.engine.main_optmization( 418 | matlab.double([self.bs_num]), matlab.double([self.fov_patch_num]), 419 | matlab.double([self.antenna_num]), 420 | matlab.double([self.irs_units_num]), 421 | matlab.double([self.N_0]), matlab.double([self.transmit_p_max]), 422 | matlab.double([self.r_min]), matlab.double(ep.tolist()), 423 | matlab.double(self.epsilon.tolist()), matlab.double([self.BW]), 424 | nargout=7) 425 | if (rs == 1 and min_bs_power >= np.sum(opt_bs_power)): 426 | min_bs_power = np.sum(opt_bs_power) 427 | min_ep = ep 428 | # print(min_ep) 429 | # print(np.sum(self.epsilon - min_ep),time.time()-start) 430 | 431 | print(np.sum(self.epsilon) - np.sum(min_ep), time.time() - start) 432 | if (np.sum(min_ep) != 0): 433 | print('替换') 434 | self.epsilon_old = self.epsilon 435 | self.epsilon = min_ep 436 | 437 | print(np.sum(self.opt_bs_power) - min_bs_power, time.time() - start) 438 | 439 | self.epsilon_fix = rechoose_epsilon_noCoMP(self.epsilon, self.cue_coord, self.bs_coord,step) 440 | self.init_bs_power, self.init_bs_rates, self.opt_bs_power, self.opt_rates,self.opt_rates_noCoMP,self.opt_G, self.rs = self.engine.main_optmization( 441 | matlab.double([self.bs_num]), matlab.double([self.fov_patch_num]), 442 | matlab.double([self.antenna_num]), 443 | matlab.double([self.irs_units_num]), 444 | matlab.double([self.N_0]), matlab.double([self.transmit_p_max]), 445 | matlab.double([self.r_min]), matlab.double(self.epsilon.tolist()), 446 | matlab.double(self.epsilon_fix.tolist()),matlab.double([self.BW]), 447 | nargout=7) 448 | 449 | 450 | if(self.rs == 0 ): 451 | if(self.mec_rule== "default" or self.mec_rule== "exhaustion"): 452 | self.ue_avg_rates_record.append(-1) 453 | self.ue_avg_rates_record_NoRIS.append(-1) 454 | self.total_power_record_NoRIS.append(-1) 455 | self.total_power_record.append(-1) 456 | self.bs_power_record.append(-1) 457 | self.bs_power_record_NoRIS.append(-1) 458 | self.init_bs_power_record.append(self.init_bs_power) 459 | self.total_init_power_record.append(-1) 460 | # self.opt_G_record.append(self.H_record[step,:]) 461 | return -50 462 | else: 463 | 464 | mec_powers = cal_render_power(static_power=10, pbd=5e-3, epsilon=self.epsilon, 465 | bs_num=self.bs_num, fov_num=self.fov_patch_num, 466 | fov_sizes=self.fov_sizes, Kb=self.Kb, Ub=self.Ub, ub=self.ub, 467 | cr=self.cr, 468 | computing_resources=self.total_computing_resources) 469 | self.init_bs_power = np.array(self.init_bs_power, dtype=np.float).flatten() 470 | total_powers = (1 - rho) * self.init_bs_power+ rho * mec_powers 471 | self.ue_avg_rates_record.append(-1) 472 | self.ue_avg_rates_record_NoRIS.append(-1) 473 | self.total_power_record_NoRIS.append(np.mean(total_powers)) 474 | self.total_power_record.append(np.mean(total_powers)) 475 | self.bs_power_record.append(np.mean(self.init_bs_power)) 476 | self.bs_power_record_NoRIS.append(np.mean(self.init_bs_power)) 477 | self.init_bs_power_record.append(np.mean(self.init_bs_power)) 478 | self.total_init_power_record.append(-1) 479 | print("最大/最小策略失败,不优化", "total_power_record:", total_powers) 480 | return -50 481 | self.opt_bs_power_noRIS, self.opt_rates_noRIS, self.opt_rates_noCoMP_noRIS, self.opt_noRIS_rs = self.engine.main_optmization_NoRIS( 482 | matlab.double([self.bs_num]), matlab.double([self.fov_patch_num]), 483 | matlab.double([self.antenna_num]), matlab.double([self.N_0]), 484 | matlab.double([self.transmit_p_max]), matlab.double([self.r_min]), 485 | matlab.double(self.epsilon.tolist()), matlab.double(self.epsilon_fix.tolist()), 486 | matlab.double([self.BW]), nargout=4) 487 | 488 | 489 | 490 | mec_powers = cal_render_power(static_power=10, pbd=5e-3, epsilon=self.epsilon, 491 | bs_num=self.bs_num, fov_num=self.fov_patch_num, 492 | fov_sizes=self.fov_sizes, Kb=self.Kb, Ub=self.Ub, ub=self.ub, cr=self.cr, 493 | computing_resources=self.total_computing_resources) 494 | print("mec power:",mec_powers) 495 | self.opt_bs_power = np.array(self.opt_bs_power,dtype=np.float).flatten() 496 | self.opt_bs_power_noRIS = np.array(self.opt_bs_power_noRIS,dtype=np.float).flatten() 497 | self.init_bs_power = np.array(self.init_bs_power,dtype=np.float).flatten() 498 | 499 | 500 | if (self.rs == 1 and self.opt_noRIS_rs and self.train==False): 501 | for bs in range(self.bs_num): 502 | if (self.opt_bs_power[bs] > self.transmit_p_max): 503 | return -50 504 | 505 | 506 | total_powers =(1-rho)*self.opt_bs_power+rho*mec_powers 507 | total_powers_NoRIS = (1-rho)*self.opt_bs_power_noRIS+rho*mec_powers 508 | total_init_powers = (1-rho)*self.init_bs_power+rho*mec_powers 509 | diff = np.sum(self.opt_bs_power-self.opt_bs_power_noRIS) 510 | if(diff<0 and self.max_diff0): 520 | # # print("求解不成功_diff") 521 | # self.ue_avg_rates_record.append(-1) 522 | # self.ue_avg_rates_record_NoRIS.append(-1) 523 | # self.total_power_record_NoRIS.append(-1) 524 | # self.total_power_record.append(-1) 525 | # self.bs_power_record.append(-1) 526 | # self.bs_power_record_NoRIS.append(-1) 527 | # self.init_bs_power_record.append(-1) 528 | # self.total_init_power_record.append(-1) 529 | # 530 | # self.opt_G_record.append(self.G2) 531 | # return -50 532 | self.total_diff+=diff 533 | # if(np.sum(self.epsilon)>self.ue_num): 534 | # print("CoMP") 535 | print("此次求解成功","total_power_record:",total_powers,"diff=%.6f"%(self.total_diff),"bs_power_record:",self.opt_bs_power,"bs_power_record_NoRIS",self.opt_bs_power_noRIS) 536 | 537 | 538 | self.ue_avg_rates_record.append(np.mean(self.opt_rates)) 539 | self.ue_avg_rates_record_NoRIS.append(np.mean(self.opt_rates_noRIS)) 540 | self.ue_avg_rates_record_noCoMP.append(np.mean(self.opt_rates_noCoMP)) 541 | self.ue_avg_rates_record_noCoMP_NoRIS.append(np.mean(self.opt_rates_noCoMP_noRIS)) 542 | self.total_power_record_NoRIS.append(np.mean(total_powers_NoRIS)) 543 | self.total_power_record.append(np.mean(total_powers)) 544 | self.bs_power_record.append(np.mean(self.opt_bs_power)) 545 | self.bs_power_record_NoRIS.append(np.mean(self.opt_bs_power_noRIS)) 546 | self.init_bs_power_record.append(np.mean(self.init_bs_power)) 547 | self.total_init_power_record.append(np.mean(total_init_powers)) 548 | self.opt_G_record.append(self.opt_G) 549 | else: 550 | print("求解不成功") 551 | self.ue_avg_rates_record_noCoMP.append(-1) 552 | self.ue_avg_rates_record_noCoMP_NoRIS.append(-1) 553 | self.ue_avg_rates_record .append(-1) 554 | self.ue_avg_rates_record_NoRIS.append(-1) 555 | self.total_power_record_NoRIS.append(-1) 556 | self.total_power_record.append(-1) 557 | self.bs_power_record.append(-1) 558 | self.bs_power_record_NoRIS.append(-1) 559 | self.init_bs_power_record.append(-1) 560 | self.total_init_power_record.append(-1) 561 | self.opt_G_record.append(self.G2) 562 | return -50 563 | # else: 564 | # total_powers = 0.8*np.array(opt_power).reshape(self.bs_num)+0.2*cal_total_power(static_power=10, pbd=5e-3, epsilon=self.epsilon, 565 | # omega=self.omegas, bs_num=self.bs_num, fov_num=self.fov_patch_num, 566 | # fov_sizes=self.fov_sizes, Kb=self.Kb, Ub=self.Ub, ub=self.ub, cr=self.cr, 567 | # computing_resources=self.total_computing_resources) 568 | reward=0 569 | # for bs in range(self.bs_num): 570 | # if (total_powers[bs] > self.trabsmit_p_max): 571 | # return -50 572 | # reward = (self.bs_num * self.trabsmit_p_max - np.sum(total_powers)) * 2 573 | 574 | elif (self.train == False and self.open_matlab==False): 575 | self.action_record.append(actions) 576 | return -50 577 | 578 | else: 579 | # scio.savemat(r'.\data.mat', 580 | # {'G': self.G2.tolist(), 'E': self.epsilon.tolist(), 'gnr': self.g_ue_ris.tolist(), 581 | # 'gbr': self.g_bs_ris.tolist(), 'gbn': self.g_bs_ue.tolist()}) 582 | rates=[] 583 | for fov in range(self.fov_patch_num): 584 | rate = cal_transmit_rate(self.BW, self.G2, self.omegas, fov, self.epsilon, self.N_0) 585 | rates.append(rate) 586 | if (rate < self.r_min): 587 | return -50 588 | total_powers = 0 589 | if(concern_all==True): 590 | total_powers = cal_total_power(static_power=10, pbd=5e-3, epsilon=self.epsilon, 591 | omega=self.omegas, bs_num=self.bs_num, fov_num=self.fov_patch_num, 592 | fov_sizes=self.fov_sizes, Kb=self.Kb, Ub=self.Ub, ub=self.ub, cr=self.cr, 593 | computing_resources=self.total_computing_resources) 594 | else: 595 | total_powers = cal_total_power(static_power=10, pbd=5e-3, epsilon=self.epsilon, 596 | omega=generate_omega_fixed(self.bs_num, self.ue_num, self.antenna_num, scale=0.3), bs_num=self.bs_num, fov_num=self.fov_patch_num, 597 | fov_sizes=self.fov_sizes, Kb=self.Kb, Ub=self.Ub, ub=self.ub, cr=self.cr, 598 | computing_resources=self.total_computing_resources) 599 | for bs in range(self.bs_num): 600 | if(total_powers[bs]>self.mec_p_max): 601 | return -50 602 | reward = (self.bs_num*self.mec_p_max-np.sum(total_powers))*4 603 | return reward 604 | 605 | def cal_reward_3(self, actions, action_real,action_imag,step): 606 | ''' 607 | 考虑约束的适应度函数值计算 608 | :return: 609 | ''' 610 | # if step != 1: 611 | # T_old = T_old*(step-1) 612 | # else: 613 | # T_old = T_old 614 | bs = [] 615 | value = 0 616 | 617 | ''' 618 | 619 | 先满足所有用户需求的fov都能在MEC上找到缓存,且缓存内容所消耗的计算资源不超过MEC,缓存总大小不超过MEC的存储容量 620 | 动作为每一时刻MEC选择缓存的内容 621 | 首先要验证每个基站选择的缓存行动是否满足约束 622 | ''' 623 | sum_rendered_size = 0 624 | rho = 0.001 625 | sum_computing_resources = 0 626 | # self.omegas = gen_omega_ZF(self.fov_patch_num, self.bs_num, self.antenna_num, self.G2, 1) 627 | self.epsilon = gen_epsilon(self.bs_num, self.fov_patch_num, actions, self.action_table) 628 | # if(self.ue_num==6): 629 | # self.storage_limit=np.ones(shape=self.bs_num,dtype=np.int) *4 630 | # elif(self.ue_num==8): 631 | # self.storage_limit =np.ones(shape=self.bs_num,dtype=np.int)*6 632 | 633 | if (self.mec_rule == "max"): 634 | self.epsilon = gen_epsilon_rule_larggest(self.epsilon, self.storage_limit.copy()) 635 | # self.epsilon_test = gen_epsilon_rule_larggest2(self.epsilon,self.storage_limit.copy(),self.BW, self.G2, self.omegas,self.N_0) 636 | # print(np.sum(self.epsilon_test-self.epsilon)) 637 | # return 10 638 | elif (self.mec_rule == "min"): 639 | self.epsilon = gen_epsilon_rule_smallest(self.epsilon, self.storage_limit.copy()) 640 | # self.epsilon_test = gen_epsilon_rule_smallest2(self.epsilon, self.storage_limit.copy(), self.BW, self.G2, 641 | # self.omegas, self.N_0) 642 | # print(np.sum(self.epsilon_test - self.epsilon)) 643 | # return 10 644 | 645 | if (self.train == False and self.rand_omega == True and self.open_matlab == False): 646 | self.omegas = generate_omega_random(bs_num=self.bs_num, ue_num=self.ue_num, antenna_num=self.antenna_num, 647 | scale_factor=0.1) 648 | transmitting_power_randOmega = cal_transmit_power(self.epsilon, self.omegas, self.bs_num, self.ue_num) 649 | 650 | mec_powers = cal_render_power(static_power=10, pbd=5e-3, epsilon=self.epsilon, 651 | bs_num=self.bs_num, fov_num=self.fov_patch_num, 652 | fov_sizes=self.fov_sizes, Kb=self.Kb, Ub=self.Ub, ub=self.ub, 653 | cr=self.cr, 654 | computing_resources=self.total_computing_resources) 655 | self.init_bs_power = np.array(self.init_bs_power, dtype=np.float).flatten() 656 | total_powers = (1 - rho) * self.init_bs_power + rho * mec_powers 657 | self.total_power_randOmega_record.append(np.mean(total_powers)) 658 | print("transmitting_power_randOmega=%.4f,total_power:%.4f", transmitting_power_randOmega, total_powers) 659 | # self.bs_power_record.append() 660 | self.bs_power_randOmega_record.append(np.mean(transmitting_power_randOmega)) 661 | return -50 662 | 663 | elif (self.train == False and self.rand_omega == False and self.open_matlab): 664 | rates = [] 665 | self.epsilon_fix = rechoose_epsilon_noCoMP(self.epsilon, self.cue_coord, self.bs_coord, 666 | step) 667 | 668 | # for fov in range(self.fov_patch_num): 669 | # rate = cal_transmit_rate(self.BW, self.G2, self.omegas, fov, self.epsilon, self.N_0) 670 | # rates.append(rate) 671 | # if (rate < self.r_min): 672 | # print("错误,速率") 673 | # return -50 674 | scio.savemat(r'.\data.mat', 675 | {'G': self.G2.tolist(), 'E': self.epsilon.tolist(), 'gnr': self.g_ue_ris.tolist(), 676 | 'gbr': self.g_bs_ris.tolist(), 'gbn': self.g_bs_ue.tolist(), 'omegas': self.omegas.tolist()}) 677 | # b=0 678 | # f=0 679 | # k = G_gain_cal(self.g_bs_ue[b, f, :,:], self.g_bs_ris[b, :,:], self.g_ue_ris[b, f, :,:], self.reflect) 680 | self.init_bs_power, self.init_bs_rates, self.opt_bs_power, self.opt_rates, self.opt_rates_noCoMP, self.opt_G, self.rs = self.engine.main_optmization( 681 | matlab.double([self.bs_num]), matlab.double([self.fov_patch_num]), 682 | matlab.double([self.antenna_num]), 683 | matlab.double([self.irs_units_num]), 684 | matlab.double([self.N_0]), matlab.double([self.transmit_p_max]), 685 | matlab.double([self.r_min]), matlab.double(self.epsilon.tolist()), 686 | matlab.double(self.epsilon_fix.tolist()), matlab.double([self.BW]), 687 | nargout=7) 688 | 689 | if (self.rs == 0): 690 | # print("求解不成功",self.epsilon) 691 | if (self.mec_rule == "default"): 692 | print("求解不成功") 693 | self.ue_avg_rates_record.append(-1) 694 | self.ue_avg_rates_record_NoRIS.append(-1) 695 | self.total_power_record_NoRIS.append(-1) 696 | self.total_power_record.append(-1) 697 | self.bs_power_record.append(-1) 698 | self.bs_power_record_NoRIS.append(-1) 699 | self.init_bs_power_record.append(self.init_bs_power) 700 | self.total_init_power_record.append(-1) 701 | # self.opt_G_record.append(self.H_record[step,:]) 702 | return -50 703 | else: 704 | 705 | mec_powers = cal_render_power(static_power=10, pbd=5e-3, epsilon=self.epsilon, 706 | bs_num=self.bs_num, fov_num=self.fov_patch_num, 707 | fov_sizes=self.fov_sizes, Kb=self.Kb, Ub=self.Ub, ub=self.ub, 708 | cr=self.cr, 709 | computing_resources=self.total_computing_resources) 710 | self.init_bs_power = np.array(self.init_bs_power, dtype=np.float).flatten() 711 | total_powers = (1 - rho) * self.init_bs_power + rho * mec_powers 712 | self.ue_avg_rates_record.append(-1) 713 | self.ue_avg_rates_record_NoRIS.append(-1) 714 | self.total_power_record_NoRIS.append(np.mean(total_powers)) 715 | self.total_power_record.append(np.mean(total_powers)) 716 | self.bs_power_record.append(np.mean(self.init_bs_power)) 717 | self.bs_power_record_NoRIS.append(np.mean(self.init_bs_power)) 718 | self.init_bs_power_record.append(np.mean(self.init_bs_power)) 719 | self.total_init_power_record.append(-1) 720 | print("最大/最小策略失败,不优化", "total_power_record:", total_powers) 721 | return -50 722 | self.opt_bs_power_noRIS, self.opt_rates_noRIS, self.opt_rates_noCoMP_noRIS, self.opt_noRIS_rs = self.engine.main_optmization_NoRIS( 723 | matlab.double([self.bs_num]), matlab.double([self.fov_patch_num]), 724 | matlab.double([self.antenna_num]), matlab.double([self.N_0]), 725 | matlab.double([self.transmit_p_max]), matlab.double([self.r_min]), 726 | matlab.double(self.epsilon.tolist()), matlab.double(self.epsilon_fix.tolist()), 727 | matlab.double([self.BW]), nargout=4) 728 | 729 | mec_powers = cal_render_power(static_power=10, pbd=5e-3, epsilon=self.epsilon, 730 | bs_num=self.bs_num, fov_num=self.fov_patch_num, 731 | fov_sizes=self.fov_sizes, Kb=self.Kb, Ub=self.Ub, ub=self.ub, cr=self.cr, 732 | computing_resources=self.total_computing_resources) 733 | print("mec power:", mec_powers) 734 | self.opt_bs_power = np.array(self.opt_bs_power, dtype=np.float).flatten() 735 | self.opt_bs_power_noRIS = np.array(self.opt_bs_power_noRIS, dtype=np.float).flatten() 736 | self.init_bs_power = np.array(self.init_bs_power, dtype=np.float).flatten() 737 | 738 | if (self.rs == 1 and self.opt_noRIS_rs and self.train == False): 739 | for bs in range(self.bs_num): 740 | if (self.opt_bs_power[bs] > self.transmit_p_max): 741 | return -50 742 | 743 | total_powers = (1 - rho) * self.opt_bs_power + rho * mec_powers 744 | total_powers_NoRIS = (1 - rho) * self.opt_bs_power_noRIS + rho * mec_powers 745 | total_init_powers = (1 - rho) * self.init_bs_power + rho * mec_powers 746 | diff = np.sum(self.opt_bs_power - self.opt_bs_power_noRIS) 747 | if (self.mec_rule == "default" and diff > 0): 748 | print("求解不成功_diff") 749 | self.ue_avg_rates_record.append(-1) 750 | self.ue_avg_rates_record_NoRIS.append(-1) 751 | self.total_power_record_NoRIS.append(-1) 752 | self.total_power_record.append(-1) 753 | self.bs_power_record.append(-1) 754 | self.bs_power_record_NoRIS.append(-1) 755 | self.init_bs_power_record.append(-1) 756 | self.total_init_power_record.append(-1) 757 | 758 | self.opt_G_record.append(self.G2) 759 | return -50 760 | self.total_diff += diff 761 | # if(np.sum(self.epsilon)>self.ue_num): 762 | # print("CoMP") 763 | print("此次求解成功", "total_power_record:", total_powers, "diff=%.6f" % (self.total_diff), 764 | "bs_power_record:", self.opt_bs_power, "bs_power_record_NoRIS", self.opt_bs_power_noRIS) 765 | 766 | self.ue_avg_rates_record.append(np.mean(self.opt_rates)) 767 | self.ue_avg_rates_record_NoRIS.append(np.mean(self.opt_rates_noRIS)) 768 | self.ue_avg_rates_record_noCoMP.append(np.mean(self.opt_rates_noCoMP)) 769 | self.ue_avg_rates_record_noCoMP_NoRIS.append(np.mean(self.opt_rates_noCoMP_noRIS)) 770 | self.total_power_record_NoRIS.append(np.mean(total_powers_NoRIS)) 771 | self.total_power_record.append(np.mean(total_powers)) 772 | self.bs_power_record.append(np.mean(self.opt_bs_power)) 773 | self.bs_power_record_NoRIS.append(np.mean(self.opt_bs_power_noRIS)) 774 | self.init_bs_power_record.append(np.mean(self.init_bs_power)) 775 | self.total_init_power_record.append(np.mean(total_init_powers)) 776 | self.opt_G_record.append(self.opt_G) 777 | else: 778 | print("求解不成功") 779 | self.ue_avg_rates_record_noCoMP.append(-1) 780 | self.ue_avg_rates_record_noCoMP_NoRIS.append(-1) 781 | self.ue_avg_rates_record.append(-1) 782 | self.ue_avg_rates_record_NoRIS.append(-1) 783 | self.total_power_record_NoRIS.append(-1) 784 | self.total_power_record.append(-1) 785 | self.bs_power_record.append(-1) 786 | self.bs_power_record_NoRIS.append(-1) 787 | self.init_bs_power_record.append(-1) 788 | self.total_init_power_record.append(-1) 789 | self.opt_G_record.append(self.G2) 790 | return -50 791 | # else: 792 | # '''在满足最小速率的情况下,判断BS发射功率是否满足约束,先测试速率部分''' 793 | # total_powers = 0.8*np.array(opt_power).reshape(self.bs_num)+0.2*cal_total_power(static_power=10, pbd=5e-3, epsilon=self.epsilon, 794 | # omega=self.omegas, bs_num=self.bs_num, fov_num=self.fov_patch_num, 795 | # fov_sizes=self.fov_sizes, Kb=self.Kb, Ub=self.Ub, ub=self.ub, cr=self.cr, 796 | # computing_resources=self.total_computing_resources) 797 | reward = 0 798 | # for bs in range(self.bs_num): 799 | # if (total_powers[bs] > self.trabsmit_p_max): 800 | # return -50 801 | # reward = (self.bs_num * self.trabsmit_p_max - np.sum(total_powers)) * 2 802 | 803 | elif (self.train == False and self.open_matlab == False): 804 | self.action_record.append(actions) 805 | return -50 806 | 807 | else: 808 | '''在不超过存储容量和计算资源上限的前提下,计算UE上的速率,判断是否满足最小速率''' 809 | # scio.savemat(r'.\data.mat', 810 | # {'G': self.G2.tolist(), 'E': self.epsilon.tolist(), 'gnr': self.g_ue_ris.tolist(), 811 | # 'gbr': self.g_bs_ris.tolist(), 'gbn': self.g_bs_ue.tolist()}) 812 | 813 | self.omegas = (0.0001*action_real-0.5)+1j*(0.0001*action_imag-0.5) 814 | rates = [] 815 | gap = 0 816 | for fov in range(self.fov_patch_num): 817 | rate = cal_transmit_rate(self.BW, self.G2, self.omegas, fov, self.epsilon, self.N_0) 818 | rates.append(rate) 819 | if (rate < self.r_min): 820 | return -50 821 | # gap+=(rate-self.r_min) 822 | # # # # print("错误,速率") 823 | # if(gap<0): 824 | # return gap 825 | # '''在满足最小速率的情况下,判断BS发射功率是否满足约束,先测试速率部分''' 826 | total_powers = cal_total_power(static_power=10, pbd=5e-3, epsilon=self.epsilon, 827 | omega=self.omegas, bs_num=self.bs_num, fov_num=self.fov_patch_num, 828 | fov_sizes=self.fov_sizes, Kb=self.Kb, Ub=self.Ub, ub=self.ub, cr=self.cr, 829 | computing_resources=self.total_computing_resources) 830 | 831 | gap = 0 832 | for bs in range(self.bs_num): 833 | if (total_powers[bs] > self.mec_p_max): 834 | return -50 835 | # gap += 2* (self.mec_p_max-total_powers[bs]) 836 | # # # print("错误,功率") 837 | # if (gap < 0): 838 | # return gap 839 | reward = (self.bs_num * self.mec_p_max - np.sum(total_powers)) * 4 840 | # print(reward) 841 | return reward 842 | 843 | def cal_reward_2(self, actions, step,concern_all=True): 844 | self.epsilon = gen_epsilon(self.bs_num, self.fov_patch_num, actions, self.action_table) 845 | rates = [] 846 | key = '%s%s%d'%(self.stored_dic_mainkey,str(actions),step) 847 | reward = self.stored_dic.get(key) 848 | 849 | if(reward==None or reward==-50): 850 | reward = 0 851 | total_powers = cal_total_power(static_power=10, pbd=5e-3, epsilon=self.epsilon, 852 | omega=self.omegas, bs_num=self.bs_num, fov_num=self.fov_patch_num, 853 | fov_sizes=self.fov_sizes, Kb=self.Kb, Ub=self.Ub, ub=self.ub, cr=self.cr, 854 | computing_resources=self.total_computing_resources) 855 | for bs in range(self.bs_num): 856 | if (total_powers[bs] > self.mec_p_max): 857 | return -50 858 | self.epsilon_fix = rechoose_epsilon_noCoMP(self.epsilon, self.cue_coord, self.bs_coord, step) 859 | scio.savemat(r'.\data.mat', 860 | {'G': self.G2.tolist(), 'E': self.epsilon.tolist(), 'gnr': self.g_ue_ris.tolist(), 861 | 'gbr': self.g_bs_ris.tolist(), 'gbn': self.g_bs_ue.tolist(), 'omegas': self.omegas.tolist()}) 862 | self.init_bs_power, self.init_bs_rates, self.opt_bs_power, self.opt_rates, self.opt_rates_noCoMP, self.opt_G, self.rs = self.engine.main_optmization( 863 | matlab.double([self.bs_num]), matlab.double([self.fov_patch_num]), 864 | matlab.double([self.antenna_num]), 865 | matlab.double([self.irs_units_num]), 866 | matlab.double([self.N_0]), matlab.double([self.transmit_p_max]), 867 | matlab.double([self.r_min]), matlab.double(self.epsilon.tolist()), 868 | matlab.double(self.epsilon_fix.tolist()), matlab.double([self.BW]), 869 | nargout=7) 870 | if (self.rs == 0): 871 | print("求解不成功") 872 | return -50 873 | else: 874 | 875 | reward = (self.bs_num * self.mec_p_max - np.sum(total_powers)) * 4 876 | self.stored_dic[key]=reward 877 | print(key,self.r_min,reward) 878 | return reward 879 | 880 | def cal_reward_validate(self, actions, step): 881 | ''' 882 | 考虑约束的适应度函数值计算 883 | :return: 884 | ''' 885 | ''' 886 | 先满足所有用户需求的fov都能在MEC上找到缓存,且缓存内容所消耗的计算资源不超过MEC,缓存总大小不超过MEC的存储容量 887 | 动作为每一时刻MEC选择缓存的内容 888 | 首先要验证每个基站选择的缓存行动是否满足约束 889 | ''' 890 | self.epsilon = gen_epsilon(self.bs_num, self.fov_patch_num, actions, self.action_table) 891 | '''在不超过存储容量和计算资源上限的前提下,计算UE上的速率,判断是否满足最小速率''' 892 | # scio.savemat(r'.\data.mat', 893 | # {'G': self.G2.tolist(), 'E': self.epsilon.tolist(), 'gnr': self.g_ue_ris.tolist(), 894 | # 'gbr': self.g_bs_ris.tolist(), 'gbn': self.g_bs_ue.tolist()}) 895 | total_powers = cal_total_power(static_power=10, pbd=5e-3, epsilon=self.epsilon, 896 | omega=self.omegas, bs_num=self.bs_num, fov_num=self.fov_patch_num, 897 | fov_sizes=self.fov_sizes, Kb=self.Kb, Ub=self.Ub, ub=self.ub, cr=self.cr, 898 | computing_resources=self.total_computing_resources) 899 | for bs in range(self.bs_num): 900 | if (total_powers[bs] > self.mec_p_max): 901 | return -50 902 | # reward = (self.bs_num * self.mec_p_max - np.sum(total_powers)) * 4 903 | return 1 904 | 905 | def step2(self, step): 906 | 907 | 908 | self.G, self.G2, self.g_ue_ris, self.g_bs_ris, self.g_bs_ue = all_G_gain_cal_MISO_splitI(step, self.bs_num, 909 | self.ue_num, 910 | self.antenna_num, 911 | self.irs_coord, 912 | self.cue_coord, 913 | self.bs_coord, 914 | self.reflect, 915 | self.irs_units_num) 916 | 917 | # scio.savemat(r'.\data.mat', 918 | # {'G': self.G2.tolist(), 'E': self.epsilon.tolist(), 'gnr': self.g_ue_ris.tolist(), 919 | # 'gbr': self.g_bs_ris.tolist(), 'gbn': self.g_bs_ue.tolist(),'omegas':self.omegas.tolist()}) 920 | 921 | return self.G, self.G2, self.g_ue_ris, self.g_bs_ris, self.g_bs_ue 922 | 923 | # def step(self,actions,action_real,action_imag,step): 924 | # 925 | # r = self.cal_reward(actions,action_real,action_imag, step) 926 | # new_coord_lst = [] 927 | # if (self.load_H): 928 | # # self.G = self.H_record[step, :] 929 | # # G2 = np.zeros([ue + 1, antenna + 1, unit + 1, bs, ue, antenna, 1], dtype=np.complex) 930 | # 931 | # # self.G2 = self.H2_record[step,self.ue_num, self.antenna_num, self.irs_units_num, :, :, :, :] 932 | # # self.G2 = self.H2_record[step, :] 933 | # self.g_ue_ris = self.Hrn_record[step, :] 934 | # self.g_bs_ris = self.Hbr_record[step, :] 935 | # self.g_bs_ue = self.Hbn_record[step, :] 936 | # for b in range(self.bs_num): 937 | # for u in range(self.ue_num): 938 | # self.G2[b, u, :, :] = G_gain_cal(self.g_bs_ue[b, u, :, :], self.g_bs_ris[b, :, :], 939 | # self.g_ue_ris[b, u, :, :], 1) 940 | # 941 | # # self.G2 = self.g_bs_ue 942 | # self.G = np.concatenate([self.G2.imag, self.G2.real], axis=-1).flatten() 943 | # # self.omegas = gen_omega_ZF(self.fov_patch_num, self.bs_num, self.antenna_num, self.G2, 0.1) 944 | # else: 945 | # self.G, self.G2, self.g_ue_ris, self.g_bs_ris, self.g_bs_ue = all_G_gain_cal_MISO_splitI(step, self.bs_num, 946 | # self.ue_num, 947 | # self.antenna_num, 948 | # self.irs_coord, 949 | # self.cue_coord, 950 | # self.bs_coord, 951 | # self.reflect, 952 | # self.irs_units_num) 953 | # for b in range(self.bs_num): 954 | # for u in range(self.ue_num): 955 | # self.G2[b, u, :, :] = G_gain_cal(self.g_bs_ue[b, u, :, :], self.g_bs_ris[b, :, :], 956 | # self.g_ue_ris[b, u, :, :], 1) 957 | # # self.G2 = G_gain_cal(self.g_bs_ue, self.g_bs_ris, self.g_ue_ris, 1) 958 | # self.G = np.concatenate([self.G2.imag, self.G2.real], axis=-1).flatten() 959 | # # self.omegas = gen_omega_ZF(self.fov_patch_num, self.bs_num, self.antenna_num, self.G2, 0.1) 960 | # # self.G2 = self.g_bs_ue 961 | # 962 | # states_ = self.states = np.concatenate( 963 | # [np.array(self.G).flatten(), self.epsilon.flatten(), self.cue_coord[step, :self.ue_num, :].flatten()], 964 | # axis=0) 965 | # return r, states_, self.epsilon 966 | def step(self, actions,step,concern_all=True): 967 | if(self.continue_cvx==False): 968 | r = self.cal_reward(actions,step,concern_all) 969 | else: 970 | r = self.cal_reward_2(actions,step,concern_all) 971 | new_coord_lst =[] 972 | if (self.load_H): 973 | # self.G = self.H_record[step, :] 974 | # G2 = np.zeros([ue + 1, antenna + 1, unit + 1, bs, ue, antenna, 1], dtype=np.complex) 975 | 976 | # self.G2 = self.H2_record[step,self.ue_num, self.antenna_num, self.irs_units_num, :, :, :, :] 977 | # self.G2 = self.H2_record[step, :] 978 | self.g_ue_ris = self.Hrn_record[step, :] 979 | self.g_bs_ris = self.Hbr_record[step, :] 980 | self.g_bs_ue = self.Hbn_record[step, :] 981 | for b in range(self.bs_num): 982 | for u in range(self.ue_num): 983 | self.G2[b, u, :, :] = G_gain_cal(self.g_bs_ue[b, u, :, :], self.g_bs_ris[b, :, :], 984 | self.g_ue_ris[b, u, :, :], 1) 985 | # self.G2[b, u, :, :] = self.g_bs_ue[b, u, :, :] 986 | 987 | # self.G2 = self.g_bs_ue 988 | self.G = np.concatenate([self.g_bs_ue.imag, self.g_bs_ue.real], axis=-1).flatten() 989 | # self.omegas = gen_omega_ZF(self.fov_patch_num, self.bs_num, self.antenna_num, self.G2, 0.1) 990 | else: 991 | self.G, self.G2, self.g_ue_ris, self.g_bs_ris, self.g_bs_ue = all_G_gain_cal_MISO_splitI(step, self.bs_num, 992 | self.ue_num, 993 | self.antenna_num, 994 | self.irs_coord, 995 | self.cue_coord, 996 | self.bs_coord, 997 | self.reflect, 998 | self.irs_units_num) 999 | for b in range(self.bs_num): 1000 | for u in range(self.ue_num): 1001 | self.G2[b,u,:,:]=G_gain_cal(self.g_bs_ue[b,u,:,:], self.g_bs_ris[b,:,:], self.g_ue_ris[b,u,:,:], 1) 1002 | # self.G2 = G_gain_cal(self.g_bs_ue, self.g_bs_ris, self.g_ue_ris, 1) 1003 | self.G = np.concatenate([self.G2.imag, self.G2.real], axis=-1).flatten() 1004 | # self.omegas = gen_omega_ZF(self.fov_patch_num, self.bs_num, self.antenna_num, self.G2, 0.1) 1005 | # self.G2 = self.g_bs_ue 1006 | 1007 | 1008 | states_ = self.states = np.concatenate([ np.array(self.G).flatten(),self.epsilon.flatten(),self.cue_coord[step, :self.ue_num, :].flatten()], axis=0) 1009 | return r,states_,self.epsilon 1010 | def action_states(self): 1011 | p = [] 1012 | for i in range(self.bs_num): 1013 | p.append(self.action[i]) 1014 | return p 1015 | def reflect_amp_add_states(self): 1016 | reflect_amp= [] 1017 | for i in range(self.irs_units_num): 1018 | reflect_amp.append(self.reflect[i][i]) 1019 | return reflect_amp 1020 | def G_tau__add_states(self): 1021 | g_tau= [] 1022 | for i in range(self.ue_num): 1023 | for j in range(self.antenna_num): 1024 | if self.G[i][j]**2>=self.tau: 1025 | g_tau.append(1) 1026 | else: 1027 | g_tau.append(0) 1028 | return g_tau 1029 | 1030 | --------------------------------------------------------------------------------