├── README.md
├── utils.py
├── run.py
├── a2c.py
├── a2c_lstm.py
├── dqn.py
└── env.py


/README.md:
--------------------------------------------------------------------------------
 1 | # LSTM-based A2C
 2 | This repository provides the codes of the paper "**The LSTM-based Advantage Actor-Critic Learning for Resource Management in Network Slicing with User Mobility**" in IEEE Communications Letters. Note that this is a research project and by definition is unstable. Please write to us if you find something not correct or strange. 
 3 | *We are sharing the codes under the condition that reproducing full or part of codes must cite the paper.*
 4 | 
 5 | @article{li2020lstm,
 6 | 
 7 |   title={The LSTM-based Advantage Actor-Critic Learning for Resource Management in Network Slicing with User Mobility},
 8 |   
 9 |   author={Li, Rongpeng and Wang, Chujie and Zhao, Zhifeng and Guo, Rongbin and Zhang, Honggang},
10 |   
11 |   journal={IEEE Communications Letters},
12 |   
13 |   year={2020},
14 |   
15 |   publisher={IEEE}
16 |   
17 | }
18 | 
19 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import itertools
 4 | import numpy as np
 5 | 
 6 | 
 7 | def action_space(total, ser_num):
 8 |     tmp = list(itertools.product(range(total + 1), repeat=ser_num))
 9 |     result = []
10 |     for value in tmp:
11 |         if sum(value) == total:
12 |             result.append(list(value))
13 |     result = np.array(result)
14 |     [i, j] = np.where(result == 0)
15 |     result = np.delete(result, i, axis=0)
16 |     print(result.shape)
17 |     return result
18 | 
19 | 
20 | def gen_state_(pkt_nums, pos):
21 |     mean = np.array([218.8, 5338, 293])
22 |     std = np.array([51, 847, 42.5])
23 |     state = np.hstack(((pkt_nums - mean) / std, pos))
24 |     return state
25 | 
26 | # 1:2:3
27 | def gen_state(pkt_nums):
28 |     mean = np.array([218.8, 5338, 293])
29 |     std = np.array([51, 847, 42.5])
30 |     state = (pkt_nums - mean) / std
31 |     return state
32 | 
33 | 
34 | def calc__reward(qoe, se):
35 |     qoe_weight = [1, 1, 1]
36 |     se_weight = 0.01
37 |     uility = sum([w * q for w, q in zip(qoe_weight, qoe.tolist())]) + se_weight * se
38 |     if qoe[1] >= 0.98 and qoe[0] >= 0.98:
39 |         if qoe[2] >= 0.95:
40 |             if se < 280:
41 |                 reward = 4
42 |             else:
43 |                 reward = 4 + (se - 280) * 0.1
44 |         else:
45 |             reward = (qoe[2] - 0.7) * 10
46 |     else:
47 |         reward = -5
48 | 
49 |     return uility, reward
50 | 
51 | 


--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | import copy
  6 | import os
  7 | from a2c_lstm import A2CLSTM
  8 | from env import EnvMove
  9 | import utils
 10 | 
 11 | # QoE_WEIGHT = [1, 1, 1]
 12 | # SE_WEIGHT = 0.01
 13 | UE_NUMS = 1200
 14 | SER_PROB = [1, 2, 3]
 15 | LEARNING_WINDOW = 2000
 16 | BAND_WHOLE = 10  # M
 17 | BAND_PER = 0.2  # M
 18 | DL_MIMO = 64
 19 | SER_CAT = ['volte', 'embb_general', 'urllc']
 20 | 
 21 | LR_A = 0.002
 22 | LR_C = 0.01
 23 | GAMMA = 0
 24 | ENTROY_BETA = 0.001
 25 | LSTM_LEN = 10
 26 | MAX_ITERATIONS = 10000
 27 | 
 28 | LOG_TRAIN = './logs/a2clstm.txt'
 29 | # LOG_TRAIN = './logs/a2c.txt'
 30 | 
 31 | action_space = utils.action_space(int(BAND_WHOLE // BAND_PER), len(SER_CAT)) * BAND_PER * 10 ** 6
 32 | n_actions = len(action_space)
 33 | print(n_actions)
 34 | 
 35 | config = tf.ConfigProto()
 36 | config.gpu_options.allow_growth = True
 37 | sess = tf.Session(config=config)
 38 | 
 39 | model = A2CLSTM(sess, n_features=len(SER_CAT), n_actions=n_actions, lr_a=LR_A, lr_c=LR_C, entropy_beta=ENTROY_BETA)
 40 | 
 41 | env = EnvMove(UE_max_no=UE_NUMS, ser_prob=np.array(SER_PROB, dtype=np.float32), learning_windows=LEARNING_WINDOW, dl_mimo=DL_MIMO)
 42 | 
 43 | qoe_lst, se_lst = [], []
 44 | reward_lst = []
 45 | 
 46 | buffer_ob = []
 47 | 
 48 | for i in range(LSTM_LEN):
 49 |     env.countReset()
 50 |     env.user_move()
 51 |     env.activity()
 52 | 
 53 |     action = np.random.choice(n_actions)
 54 |     env.band_ser_cat = action_space[action]
 55 | 
 56 |     for i_subframe in range(LEARNING_WINDOW):
 57 |         env.scheduling()
 58 |         env.provisioning()
 59 |         if i_subframe < LEARNING_WINDOW - 1:
 60 |             env.activity()
 61 | 
 62 |     pkt, dis = env.get_state()
 63 |     observe = utils.gen_state(pkt)
 64 |     buffer_ob.append(observe)
 65 | 
 66 | for i_iter in range(MAX_ITERATIONS):
 67 |     env.countReset()
 68 |     env.user_move()
 69 |     env.activity()
 70 | 
 71 |     s = np.vstack(buffer_ob)
 72 |     action = model.choose_action(s)
 73 |     env.band_ser_cat = action_space[action]
 74 | 
 75 |     for i_subframe in range(LEARNING_WINDOW):
 76 |         env.scheduling()
 77 |         env.provisioning()
 78 |         if i_subframe < LEARNING_WINDOW - 1:
 79 |             env.activity()
 80 | 
 81 |     pkt, dis = env.get_state()
 82 |     observe = utils.gen_state(pkt)
 83 |     buffer_ob.pop(0)
 84 |     buffer_ob.append(observe)
 85 |     s_ = np.vstack(buffer_ob)
 86 | 
 87 |     qoe, se = env.get_reward()
 88 |     qoe_lst.append(qoe.tolist())
 89 |     se_lst.append(se[0])
 90 | 
 91 |     uility, reward = utils.calc__reward(qoe, se[0])
 92 |     reward_lst.append(reward)
 93 | 
 94 |     print('\nStep-%d' % i_iter)
 95 |     print('qoe: ', qoe)
 96 |     print('se: ', se[0])
 97 |     print('reward: ', reward)
 98 | 
 99 |     v_s_ = model.target_v(s_)
100 |     td_target = reward + GAMMA * v_s_
101 | 
102 |     feed_dict = {
103 |         model.s: s,
104 |         model.a: np.vstack([action]),
105 |         model.td_target: np.vstack([td_target])
106 |     }
107 | 
108 |     model.learn(feed_dict)
109 | 
110 |     if (i_iter + 1) % 500 == 0:
111 |         with open(LOG_TRAIN, 'a+') as f:
112 |             for i in range(len(se_lst)):
113 |                 print(
114 |                     'Reward: %.4f, SE: %.4f, QoE_volte: %.4f, QoE_embb: %.4f, QoE_urllc: %.4f' % (
115 |                         reward_lst[i], se_lst[i], qoe_lst[i][0], qoe_lst[i][1], qoe_lst[i][2]), file=f)
116 |         qoe_lst, se_lst = [], []
117 |         reward_lst = []
118 | 


--------------------------------------------------------------------------------
/a2c.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | 
  6 | class A2C(object):
  7 |     def __init__(
  8 |             self,
  9 |             sess,
 10 |             n_actions,
 11 |             n_features,
 12 |             lr_a=0.001,
 13 |             lr_c=0.01,
 14 |             reward_decay=0.9
 15 |     ):
 16 |         self.sess = sess
 17 |         self.n_actions = n_actions
 18 |         self.n_features = n_features
 19 |         self.lr_a = lr_a
 20 |         self.lr_c = lr_c
 21 |         self.gamma = reward_decay
 22 | 
 23 |         with tf.name_scope('inputs'):
 24 |             self.s = tf.placeholder(tf.float32, [None, self.n_features], "state")
 25 |             self.a = tf.placeholder(tf.int32, None, "action")
 26 |             self.r = tf.placeholder(tf.float32, None, 'reward')
 27 |             self.v_ = tf.placeholder(tf.float32, [None, 1], "v_next")
 28 | 
 29 |         self.acts_prob, self.v = self._build_net()
 30 | 
 31 |         with tf.name_scope('TD_error'):
 32 |             self.td_error = self.r + self.gamma * self.v_ - self.v
 33 | 
 34 |         with tf.name_scope('c_loss'):
 35 |             self.c_loss = tf.square(self.td_error)  # TD_error = (r+gamma*V_next) - V_eval
 36 | 
 37 |         with tf.name_scope('a_loss'):
 38 |             log_prob = tf.log(self.acts_prob[0, self.a])
 39 |             self.a_loss = -tf.reduce_mean(log_prob * tf.stop_gradient(self.td_error))  # advantage (TD_error) guided loss
 40 | 
 41 |         with tf.name_scope('c_train'):
 42 |             self.c_train_op = tf.train.AdamOptimizer(self.lr_c).minimize(self.c_loss)
 43 | 
 44 |         with tf.name_scope('a_train'):
 45 |             self.a_train_op = tf.train.AdamOptimizer(self.lr_a).minimize(self.a_loss)
 46 | 
 47 |         self.sess.run(tf.global_variables_initializer())
 48 | 
 49 |     def _build_net(self):
 50 |         w_init = tf.random_normal_initializer(0., .1)
 51 |         b_init = tf.constant_initializer(0.1)
 52 | 
 53 |         with tf.variable_scope('Actor'):
 54 |             l_a = tf.layers.dense(
 55 |                 inputs=self.s,
 56 |                 units=32,  # number of hidden units
 57 |                 activation=tf.nn.relu,
 58 |                 kernel_initializer=w_init,  # weights
 59 |                 bias_initializer=b_init,  # biases
 60 |                 name='l_a'
 61 |             )
 62 | 
 63 |             acts_prob = tf.layers.dense(
 64 |                 inputs=l_a,
 65 |                 units=self.n_actions,  # output units
 66 |                 activation=tf.nn.softmax,  # get action probabilities
 67 |                 kernel_initializer=w_init,  # weights
 68 |                 bias_initializer=b_init,  # biases
 69 |                 name='acts_prob'
 70 |             )
 71 | 
 72 |         with tf.variable_scope('Critic'):
 73 |             l_c = tf.layers.dense(
 74 |                 inputs=self.s,
 75 |                 units=32,  # number of hidden units
 76 |                 activation=tf.nn.relu,  # None
 77 |                 # have to be linear to make sure the convergence of actor.
 78 |                 # But linear approximator seems hardly learns the correct Q.
 79 |                 kernel_initializer=w_init,  # weights
 80 |                 bias_initializer=b_init,  # biases
 81 |                 name='l_c'
 82 |             )
 83 | 
 84 |             v = tf.layers.dense(
 85 |                 inputs=l_c,
 86 |                 units=1,  # output units
 87 |                 activation=None,
 88 |                 kernel_initializer=w_init,  # weights
 89 |                 bias_initializer=b_init,  # biases
 90 |                 name='V'
 91 |             )
 92 | 
 93 |         return acts_prob, v
 94 | 
 95 |     def choose_action(self, s):
 96 |         s = s[np.newaxis, :]
 97 |         probs = self.sess.run(self.acts_prob, {self.s: s})  # get probabilities for all actions
 98 |         return np.random.choice(np.arange(probs.shape[1]), p=probs.ravel())  # return a int
 99 | 
100 |     def learn(self, s, a, r, s_):
101 |         s, s_ = s[np.newaxis, :], s_[np.newaxis, :]
102 |         v_ = self.sess.run(self.v, feed_dict={self.s: s_})
103 |         feed_dict = {self.s: s, self.a: a, self.v_: v_, self.r: r}
104 |         self.sess.run([self.a_train_op, self.c_train_op], feed_dict=feed_dict)
105 | 


--------------------------------------------------------------------------------
/a2c_lstm.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | 
  6 | class A2CLSTM(object):
  7 |     def __init__(
  8 |             self,
  9 |             sess,
 10 |             n_actions,
 11 |             n_features,
 12 |             lr_a=0.001,
 13 |             lr_c=0.01,
 14 |             entropy_beta=0.01
 15 |     ):
 16 |         self.sess = sess
 17 |         self.n_actions = n_actions
 18 |         self.n_features = n_features
 19 |         self.lr_a = lr_a
 20 |         self.lr_c = lr_c
 21 |         self.entroy_beta = entropy_beta
 22 | 
 23 |         self.lstm_cell_size = 64
 24 | 
 25 |         OPT_A = tf.train.AdamOptimizer(self.lr_a)
 26 |         OPT_C = tf.train.AdamOptimizer(self.lr_c)
 27 | 
 28 |         with tf.name_scope('inputs'):
 29 |             self.s = tf.placeholder(tf.float32, [None, self.n_features], "state")
 30 |             self.a = tf.placeholder(tf.int32, [None, 1], "action")
 31 |             self.td_target = tf.placeholder(tf.float32, [None, 1], "td_target")
 32 | 
 33 |         self.acts_prob, self.v, self.a_params, self.c_params = self._build_net()
 34 | 
 35 |         with tf.name_scope('TD_error'):
 36 |             self.td_error = tf.subtract(self.td_target, self.v, name='TD_error')
 37 | 
 38 |         with tf.name_scope('c_loss'):
 39 |             self.c_loss = tf.reduce_mean(tf.square(self.td_error))
 40 | 
 41 |         with tf.name_scope('a_loss'):
 42 |             log_prob = tf.reduce_sum(tf.log(self.acts_prob + 1e-5) * tf.one_hot(self.a, self.n_actions, dtype=tf.float32),
 43 |                                      axis=1, keepdims=True)
 44 |             exp_v = log_prob * tf.stop_gradient(self.td_error)
 45 |             entropy = -tf.reduce_sum(self.acts_prob * tf.log(self.acts_prob + 1e-5), axis=1,
 46 |                                      keepdims=True)  # encourage exploration
 47 |             self.exp_v = self.entroy_beta * entropy + exp_v
 48 |             self.a_loss = tf.reduce_mean(-self.exp_v)
 49 | 
 50 |         with tf.name_scope('compute_grads'):
 51 |             self.a_grads = tf.gradients(self.a_loss, self.a_params)
 52 |             self.c_grads = tf.gradients(self.c_loss, self.c_params)
 53 | 
 54 |         with tf.name_scope('c_train'):
 55 |             self.c_train_op = OPT_C.apply_gradients(zip(self.c_grads, self.c_params))
 56 | 
 57 |         with tf.name_scope('a_train'):
 58 |             self.a_train_op = OPT_A.apply_gradients(zip(self.a_grads, self.a_params))
 59 | 
 60 |         self.sess.run(tf.global_variables_initializer())
 61 | 
 62 |     def _build_net(self):
 63 |         w_init = tf.random_normal_initializer(0., .1)
 64 |         b_init = tf.constant_initializer(0.1)
 65 | 
 66 |         with tf.variable_scope('Critic'):
 67 |             # [time_step, feature] => [time_step, batch, feature]
 68 |             s = tf.expand_dims(self.s, axis=1, name='timely_input')
 69 | 
 70 |             lstm_cell =  tf.nn.rnn_cell.LSTMCell(self.lstm_cell_size)
 71 |             self.lstm_state_init = lstm_cell.zero_state(batch_size=1, dtype=tf.float32)
 72 | 
 73 |             outputs, _ = tf.nn.dynamic_rnn(
 74 |                 cell=lstm_cell,
 75 |                 inputs=s,
 76 |                 initial_state=self.lstm_state_init,
 77 |                 time_major=True
 78 |             )
 79 |             cell_out = tf.reshape(outputs[-1, :, :], [-1, self.lstm_cell_size],
 80 |                                   name='flatten_lstm_outputs')  # joined state representation
 81 | 
 82 |             l_c1 = tf.layers.dense(
 83 |                 inputs=cell_out,
 84 |                 units=32,
 85 |                 activation=tf.nn.tanh,
 86 |                 kernel_initializer=w_init,
 87 |                 bias_initializer=b_init,
 88 |                 name='l_c1'
 89 |             )
 90 | 
 91 |             v = tf.layers.dense(
 92 |                 inputs=l_c1,
 93 |                 units=1,
 94 |                 kernel_initializer=w_init,
 95 |                 bias_initializer=b_init,
 96 |                 name='V'
 97 |             )  # state value
 98 | 
 99 |         with tf.variable_scope('Actor'):
100 |             l_a1 = tf.layers.dense(
101 |                 inputs=cell_out,
102 |                 units=32,  # number of hidden units
103 |                 activation=tf.nn.tanh,
104 |                 kernel_initializer=w_init,  # weights
105 |                 bias_initializer=b_init,  # biases
106 |                 name='l_a1'
107 |             )
108 | 
109 |             acts_prob = tf.layers.dense(
110 |                 inputs=l_a1,
111 |                 units=self.n_actions,  # output units
112 |                 activation=tf.nn.softmax,  # get action probabilities
113 |                 kernel_initializer=w_init,  # weights
114 |                 name='acts_prob'
115 |             )
116 |         a_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='Actor')
117 |         c_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='Critic')
118 | 
119 |         return acts_prob, v, a_params, c_params
120 | 
121 |     def choose_action(self, s):
122 |         probs = self.sess.run(self.acts_prob, feed_dict={self.s: s})  # get probabilities for all actions
123 |         a = np.random.choice(np.arange(probs.shape[1]), p=probs.ravel())
124 |         return a
125 | 
126 |     def learn(self, feed_dict):
127 |         self.sess.run([self.a_train_op, self.c_train_op], feed_dict=feed_dict)
128 | 
129 |     def target_v(self, s):
130 |         v = self.sess.run(self.v, {self.s: s})
131 |         return v
132 | 


--------------------------------------------------------------------------------
/dqn.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | import math
  6 | 
  7 | class DQN:
  8 |     def __init__(
  9 |             self,
 10 |             sess,
 11 |             n_actions,
 12 |             n_features,
 13 |             learning_rate=0.01,
 14 |             reward_decay=0.9,
 15 |             epsilon_max=0.9,
 16 |             epsilon_decay=3000,
 17 |             replace_target_iter=300,
 18 |             memory_size=2000,
 19 |             batch_size=32,
 20 |             # e_greedy_increment=None,
 21 |             summary_writer=None,
 22 |             summary_every=10
 23 |     ):
 24 |         self.sess = sess
 25 |         self.n_actions = n_actions
 26 |         self.n_features = n_features
 27 |         self.learning_rate = learning_rate
 28 |         self.gamma = reward_decay
 29 |         self.epsilon_max = epsilon_max
 30 |         self.epsilon_decay = epsilon_decay
 31 |         self.replace_target_iter = replace_target_iter
 32 |         self.memory_size = memory_size
 33 |         self.batch_size = batch_size
 34 |         self.epsilon = 0 if epsilon_decay is not None else self.epsilon_max
 35 |         # self.epsilon_increment = e_greedy_increment
 36 |         # self.epsilon = 0 if e_greedy_increment is not None else self.epsilon_max
 37 | 
 38 |         # total learning step
 39 |         self.learn_step_counter = 0
 40 | 
 41 |         # initialize zero memory [s, a, r, s_]
 42 |         self.memory = np.zeros((self.memory_size, n_features * 2 + 2))
 43 | 
 44 |         self._build_net()
 45 | 
 46 |         self.sess.run(tf.global_variables_initializer())
 47 | 
 48 |         if summary_writer is not None:
 49 |             # graph was not available when journalist was created
 50 |             self.summary_writer = tf.summary.FileWriter(summary_writer)
 51 |             self.summary_writer.add_graph(self.sess.graph)
 52 |             self.summary_every = summary_every
 53 | 
 54 |         self.cost_his = []
 55 | 
 56 |     def _build_net(self):
 57 |         # ------------------ all inputs ------------------------
 58 |         with tf.name_scope('model_inputs'):
 59 |             self.s = tf.placeholder(tf.float32, [None, self.n_features], name='s')  # input State
 60 |             self.s_ = tf.placeholder(tf.float32, [None, self.n_features], name='s_')  # input Next State
 61 |             self.r = tf.placeholder(tf.float32, [None, ], name='r')  # input Reward
 62 |             self.a = tf.placeholder(tf.int32, [None, ], name='a')  # input Action
 63 | 
 64 |         # w_initializer, b_initializer = tf.random_normal_initializer(0., 0.3), tf.constant_initializer(0.1)
 65 |         # --------------------- build net ---------------------
 66 |         with tf.variable_scope('eval_net'):
 67 |             self.q_eval = self.q_network(self.s)
 68 | 
 69 |         with tf.variable_scope('target_net'):
 70 |             self.q_next = self.q_network(self.s_)
 71 | 
 72 |         eval_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='eval_net')
 73 |         target_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='target_net')
 74 | 
 75 |         # ----------------------- train -----------------------
 76 |         with tf.name_scope('q_target'):
 77 |             q_target = self.r + self.gamma * tf.reduce_max(self.q_next, axis=1, name='Qmax_s_')    # shape=(None, )
 78 |             self.q_target = tf.stop_gradient(q_target)
 79 | 
 80 |         with tf.name_scope('q_eval'):
 81 |             a_indices = tf.stack([tf.range(tf.shape(self.a)[0], dtype=tf.int32), self.a], axis=1)
 82 |             self.q_eval_wrt_a = tf.gather_nd(params=self.q_eval, indices=a_indices)    # shape=(None, )
 83 | 
 84 |         with tf.name_scope('loss'):
 85 |             self.loss = tf.reduce_mean(tf.squared_difference(self.q_target, self.q_eval_wrt_a, name='TD_error'))
 86 | 
 87 |         with tf.name_scope('train'):
 88 |             self.train_op = tf.train.AdamOptimizer(self.learning_rate).minimize(self.loss)
 89 | 
 90 |         with tf.name_scope('target_update'):
 91 |             self.target_replace_op = [tf.assign(t, e) for t, e in zip(target_params, eval_params)]
 92 | 
 93 |         self.summarize = tf.summary.merge_all()
 94 | 
 95 |     def q_network(self, s):
 96 |         init_w, init_b = tf.random_normal_initializer(0., 0.3), tf.constant_initializer(0.1)
 97 |         layer1 = tf.layers.dense(
 98 |             inputs=s,
 99 |             units=20,
100 |             activation=tf.nn.relu,
101 |             kernel_initializer=init_w,
102 |             bias_initializer=init_b,
103 |             name='layer1'
104 |         )
105 | 
106 |         q_val = tf.layers.dense(
107 |             inputs=layer1,
108 |             units=self.n_actions,
109 |             kernel_initializer=init_w,
110 |             bias_initializer=init_b,
111 |             name='q_val'
112 |         )
113 |         return q_val
114 | 
115 |     def store_transition(self, s, a, r, s_):
116 |         if not hasattr(self, 'memory_counter'):
117 |             self.memory_counter = 0
118 |         transition = np.hstack((s, [a, r], s_))
119 |         # replace the old memory with new memory
120 |         index = self.memory_counter % self.memory_size
121 |         self.memory[index, :] = transition
122 |         self.memory_counter += 1
123 | 
124 |     def choose_action(self, observation):
125 |         # to have batch dimension when feed into tf placeholder
126 |         observation = observation[np.newaxis, :]
127 | 
128 |         if np.random.uniform() < self.epsilon:
129 |             # forward feed the observation and get q value for every actions
130 |             actions_value = self.sess.run(self.q_eval, feed_dict={self.s: observation})
131 |             action = np.argmax(actions_value)
132 |         else:
133 |             action = np.random.randint(0, self.n_actions)
134 |         return action
135 | 
136 |     def learn(self):
137 |         # check to replace target parameters
138 |         if self.learn_step_counter % self.replace_target_iter == 0:
139 |             self.sess.run(self.target_replace_op)
140 |             print('\ntarget_params_replaced\n')
141 | 
142 |         # sample batch memory from all memory
143 |         if self.memory_counter > self.memory_size:
144 |             sample_index = np.random.choice(self.memory_size, size=self.batch_size)
145 |         else:
146 |             sample_index = np.random.choice(self.memory_counter, size=self.batch_size)
147 |         batch_memory = self.memory[sample_index, :]
148 | 
149 |         _, cost = self.sess.run(
150 |             [self.train_op, self.loss],
151 |             feed_dict={
152 |                 self.s: batch_memory[:, :self.n_features],
153 |                 self.a: batch_memory[:, self.n_features],
154 |                 self.r: batch_memory[:, self.n_features + 1],
155 |                 self.s_: batch_memory[:, -self.n_features:],
156 |             })
157 | 
158 |         self.cost_his.append(cost)
159 | 
160 |         # increasing epsilon
161 |         # self.epsilon = self.epsilon + self.epsilon_increment if self.epsilon < self.epsilon_max else self.epsilon_max
162 |         self.epsilon = max(0, self.epsilon_max - math.exp(-1 * self.learn_step_counter / self.epsilon_decay))
163 |         self.learn_step_counter += 1
164 | 


--------------------------------------------------------------------------------
/env.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | For downlink simulations in one-single base station environment
  3 | 
  4 | 
  5 | '''
  6 | 
  7 | import numpy as np
  8 | import time
  9 | 
 10 | np.random.seed(1)
 11 | 
 12 | class EnvMove(object):
 13 |     def __init__(self,
 14 |                  BS_pos=np.array([0, 0]),
 15 |                  BS_radius=40, ##-----
 16 |                  # BS_tx_power = 0, #unit is dBW
 17 |                  BS_tx_power=16,  # unit is dBW, 46dBm
 18 |                  UE_max_no=1000,    ##-----
 19 |                  Queue_max=5,
 20 |                  noise_PSD=-204,  # -174 dbm/Hz
 21 |                  chan_mod='36814',
 22 |                  carrier_freq=2 * 10 ** 9,  # 2 GHz
 23 |                  time_subframe=0.5 * 10 ** (-3),  # by LTE, 0.5 ms
 24 |                  ser_cat=['volte', 'embb_general', 'urllc'],
 25 |                  band_whole=10 * 10 ** 6,  # 10MHz
 26 |                  schedu_method='round_robin',
 27 |                  ser_prob=np.array([6, 6, 1], dtype=np.float32),
 28 |                  dl_mimo=32,
 29 |                  rx_gain=20,  # dB
 30 |                  learning_windows=60000,
 31 |                  ):
 32 |         self.BS_pos = BS_pos
 33 |         self.BS_tx_power = BS_tx_power
 34 |         self.BS_radius = BS_radius
 35 |         self.band_whole = band_whole
 36 |         self.chan_mod = chan_mod
 37 |         self.carrier_freq = carrier_freq
 38 |         self.time_subframe = round(time_subframe, 4)
 39 |         self.noise_PSD = noise_PSD
 40 |         self.sys_clock = 0
 41 |         self.schedu_method = schedu_method
 42 |         self.dl_mimo = dl_mimo
 43 |         self.UE_rx_gain = rx_gain
 44 |         self.UE_max_no = UE_max_no
 45 |         self.UE_buffer = np.zeros([Queue_max, UE_max_no])
 46 |         self.UE_buffer_backup = np.zeros([Queue_max, UE_max_no])
 47 |         self.UE_latency = np.zeros([Queue_max, UE_max_no])
 48 |         self.UE_readtime = np.zeros(UE_max_no)
 49 |         self.UE_band = np.zeros(UE_max_no)
 50 |         self.learning_windows = round(learning_windows * self.time_subframe, 4)
 51 |         self.ser_cat = ser_cat
 52 |         if len(self.ser_cat) > 1:
 53 |             self.band_ser_cat = np.zeros(len(ser_cat))
 54 |             if len(ser_prob) == len(self.ser_cat):
 55 |                 self.ser_prob = ser_prob / np.sum(ser_prob)
 56 |             else:
 57 |                 self.ser_prob = np.ones(len(ser_cat)) / len(ser_cat)
 58 |         else:
 59 |             self.ser_prob = np.array([1])
 60 |             self.band_ser_cat = self.band_whole
 61 | 
 62 |         self.UE_cat = np.random.choice(self.ser_cat, self.UE_max_no, p=self.ser_prob)  # TBD
 63 | 
 64 |         ##--------------------------------------------------------------------------
 65 |         self.UE_pos = np.random.uniform(-3 * self.BS_radius, 3 * self.BS_radius, [self.UE_max_no, 2])
 66 |         self.UE_cell = np.zeros(self.UE_max_no)
 67 |         self.UE_cell[np.where(np.sum(self.UE_pos ** 2, axis=1) <= self.BS_radius ** 2)] = 1
 68 |         self.UE_speed = np.zeros(UE_max_no)
 69 |         self.UE_speed[np.where(self.UE_cat == 'volte')] = 1
 70 |         self.UE_speed[np.where(self.UE_cat == 'embb_general')] = 4
 71 |         self.UE_speed[np.where(self.UE_cat == 'urllc')] = 8
 72 |         self.UE_direction = np.random.uniform(-180, 180, self.UE_max_no)
 73 |         ##-------------------------------------------------------------------------##
 74 | 
 75 |         self.tx_pkt_no = np.zeros(len(self.ser_cat))
 76 | 
 77 |     def channel_model(self):
 78 |         # 信道模型
 79 |         if self.chan_mod == '36814':
 80 |             shadowing_var = 8  # rayleigh fading shadowing variance 8dB
 81 | 
 82 |             ##--------------------------------------------------------------------
 83 |             dis = np.sqrt(np.sum((self.BS_pos - self.UE_pos) ** 2, axis=1)) / 1000  # unit changes to km
 84 |             ##-------------------------------------------------------------------##
 85 | 
 86 |             self.path_loss = 145.4 + 37.5 * np.log10(dis).reshape(-1, 1)
 87 |             self.chan_loss = self.path_loss + np.random.normal(0, shadowing_var, self.UE_max_no).reshape(-1, 1)
 88 | 
 89 |     ##-----------------------------------------------------------------------------------------------------------
 90 |     def user_move(self):
 91 |         l = self.UE_speed * self.learning_windows
 92 |         delta_x = l * np.cos(self.UE_direction * np.pi / 180)
 93 |         delta_y = l * np.sin(self.UE_direction * np.pi / 180)
 94 |         self.UE_pos[:, 0] = self.UE_pos[:, 0] + delta_x
 95 |         self.UE_pos[:, 1] = self.UE_pos[:, 1] + delta_y
 96 | 
 97 |         UE_index = np.where(self.UE_pos[:, 0] < -3 * self.BS_radius)
 98 |         self.UE_pos[UE_index, 0] = -6 * self.BS_radius - self.UE_pos[UE_index, 0]
 99 |         self.UE_direction[UE_index] = 180 - self.UE_direction[UE_index]
100 |         UE_index = np.where(self.UE_direction >= 180)
101 |         self.UE_direction[UE_index] -= 360
102 | 
103 |         UE_index = np.where(self.UE_pos[:, 0] >= 3 * self.BS_radius)
104 |         self.UE_pos[UE_index, 0] = 6 * self.BS_radius - self.UE_pos[UE_index, 0]
105 |         self.UE_direction[UE_index] = 180 - self.UE_direction[UE_index]
106 |         UE_index = np.where(self.UE_direction >= 180)
107 |         self.UE_direction[UE_index] -= 360
108 | 
109 |         UE_index = np.where(self.UE_pos[:, 1] < -3 * self.BS_radius)
110 |         self.UE_pos[UE_index, 1] = -6 * self.BS_radius - self.UE_pos[UE_index, 1]
111 |         self.UE_direction[UE_index] = -self.UE_direction[UE_index]
112 | 
113 |         UE_index = np.where(self.UE_pos[:, 1] >= 3 * self.BS_radius)
114 |         self.UE_pos[UE_index, 1] = 6 * self.BS_radius - self.UE_pos[UE_index, 1]
115 |         self.UE_direction[UE_index] = -self.UE_direction[UE_index]
116 | 
117 |         self.UE_cell = np.zeros(self.UE_max_no)
118 |         self.UE_cell[np.where(np.sum(self.UE_pos ** 2, axis=1) <= self.BS_radius ** 2)] = 1
119 | 
120 |         tmp_u = self.UE_pos[np.where(self.UE_cat == 'volte')]
121 |         tmp_dis = np.sum(tmp_u ** 2, axis=1)
122 |         n1 = np.sum(tmp_dis <= (4 * self.BS_radius / 4) ** 2)
123 |         self.volte_dis = n1 / tmp_u.shape[0]
124 | 
125 |         tmp_u = self.UE_pos[np.where(self.UE_cat == 'embb_general')]
126 |         tmp_dis = np.sum(tmp_u ** 2, axis=1)
127 |         n2 = np.sum(tmp_dis <= (4 * self.BS_radius / 4) ** 2)
128 |         self.embb_dis = n2 / tmp_u.shape[0]
129 | 
130 |         tmp_u = self.UE_pos[np.where(self.UE_cat == 'urllc')]
131 |         tmp_dis = np.sum(tmp_u ** 2, axis=1)
132 |         n3 = np.sum(tmp_dis <= (4 * self.BS_radius / 4) ** 2)
133 |         self.urllc_dis = n3 / tmp_u.shape[0]
134 | 
135 |     ##----------------------------------------------------------------------------------------------------------##
136 | 
137 |     def scheduling(self):
138 |         # 调度模型
139 |         self.UE_band = np.zeros(self.UE_max_no)  # initializing
140 |         if self.schedu_method == 'round_robin':
141 |             ser_cat = len(self.ser_cat)
142 |             band_ser_cat = self.band_ser_cat
143 |             if (self.sys_clock * 10000) % (self.learning_windows * 10000) == (self.time_subframe * 10000):
144 |                 self.ser_schedu_ind = [0] * ser_cat
145 | 
146 |             for i in range(ser_cat):
147 | 
148 |                 ##-----------------------------------------------------------------
149 |                 UE_index = np.where((self.UE_cell == 1) &
150 |                                     (self.UE_buffer[0, :] != 0) & (self.UE_cat == self.ser_cat[i]))[0]
151 |                 ##---------------------------------------------------------------##
152 | 
153 |                 UE_Active_No = len(UE_index)
154 |                 if UE_Active_No != 0:
155 |                     RB_No = band_ser_cat[i] // (180 * 10 ** 3)
156 |                     RB_round = RB_No // UE_Active_No
157 |                     self.UE_band[UE_index] += 180 * 10 ** 3 * RB_round
158 | 
159 |                     RB_rem_no = int(RB_No - RB_round * UE_Active_No)
160 |                     left_no = np.where(UE_index > self.ser_schedu_ind[i])[0].size
161 |                     if left_no >= RB_rem_no:
162 |                         UE_act_index = UE_index[np.where(np.greater_equal(UE_index, self.ser_schedu_ind[i]))]
163 |                         UE_act_index = UE_act_index[:RB_rem_no]
164 |                         if UE_act_index.size != 0:
165 |                             self.UE_band[UE_act_index] += 180 * 10 ** 3
166 |                             self.ser_schedu_ind[i] = UE_act_index[-1] + 1
167 |                     else:
168 |                         UE_act_index_par1 = UE_index[np.where(UE_index > self.ser_schedu_ind[i])]
169 |                         UE_act_index_par2 = UE_index[0:RB_rem_no - left_no]
170 |                         self.UE_band[np.hstack((UE_act_index_par1, UE_act_index_par2))] += 180 * 10 ** 3
171 |                         self.ser_schedu_ind[i] = UE_act_index_par2[-1] + 1
172 |         elif self.schedu_method == 'round_robin_nons':
173 |             band_whole = self.band_whole
174 |             if self.sys_clock == self.time_subframe:
175 |                 self.ser_schedu_ind = 0
176 | 
177 |             UE_index = np.where((self.UE_buffer[0, :] != 0))[0]
178 |             UE_Active_No = len(UE_index)
179 |             if UE_Active_No != 0:
180 |                 RB_No = band_whole // (180 * 10 ** 3)
181 |                 RB_round = RB_No // UE_Active_No
182 | 
183 |                 self.UE_band[UE_index] += 180 * 10 ** 3 * RB_round
184 | 
185 |                 RB_rem_no = RB_No % UE_Active_No
186 |                 left_no = np.where(UE_index > self.ser_schedu_ind)[0].size
187 |                 if left_no >= RB_rem_no:
188 |                     UE_act_index = UE_index[np.where(np.logical_and(np.greater_equal(UE_index, self.ser_schedu_ind),
189 |                                                                     np.less(UE_index,
190 |                                                                             RB_rem_no + self.ser_schedu_ind)))]
191 |                     if UE_act_index.size != 0:
192 |                         self.UE_band[UE_act_index] += 180 * 10 ** 3
193 |                         self.ser_schedu_ind = UE_act_index[-1] + 1
194 |                 else:
195 |                     UE_act_index_par1 = UE_index[np.where(UE_index > self.ser_schedu_ind)]
196 |                     UE_act_index_par2 = UE_index[0:RB_rem_no - left_no]
197 |                     self.UE_band[np.hstack((UE_act_index_par1, UE_act_index_par2))] += 180 * 10 ** 3
198 |                     self.ser_schedu_ind = UE_act_index_par2[-1] + 1
199 |             if (self.sys_clock * 10000) % (self.learning_windows * 10000) == (self.time_subframe * 10000):
200 |                 self.band_ser_cat = np.zeros(len(self.ser_cat))
201 |             for i in range(len(self.ser_cat)):
202 |                 if (self.sys_clock * 10000) % (self.learning_windows * 10000) == (self.time_subframe * 10000):
203 |                     self.band_ser_cat[i] = np.sum(self.UE_band[self.UE_cat == self.ser_cat[i]])
204 |                 else:
205 |                     self.band_ser_cat[i] += np.sum(self.UE_band[self.UE_cat == self.ser_cat[i]])
206 |                     if (self.sys_clock * 10000) % (self.learning_windows * 10000) == 0:
207 |                         lw = (self.learning_windows * 10000) / (self.time_subframe * 10000)
208 |                         self.band_ser_cat[i] = self.band_ser_cat[i] / lw
209 | 
210 |     def provisioning(self):
211 |         UE_index = np.where(self.UE_band != 0)
212 |         self.channel_model()
213 |         rx_power = 10 ** ((self.BS_tx_power - self.chan_loss + self.UE_rx_gain) / 10)
214 |         rx_power = rx_power.reshape(1, -1)[0]
215 |         rate = np.zeros(self.UE_max_no)
216 |         rate[UE_index] = self.UE_band[UE_index] * np.log10(
217 |             1 + rx_power[UE_index] / (10 ** (self.noise_PSD / 10) * self.UE_band[UE_index])) * self.dl_mimo
218 | 
219 |         ##----------------------------------------------------------------------------------------------------
220 |         self.UE_latency[np.where(self.UE_buffer != 0)] += self.time_subframe
221 | 
222 |         for ue_id in UE_index[0]:
223 |             self.UE_buffer[:, ue_id] = bufferUpdate(self.UE_buffer[:, ue_id], rate[ue_id], self.time_subframe)
224 | 
225 |         self.store_reward(rate)
226 | 
227 |         self.bufferClear()
228 | 
229 |     def activity(self):  # https://www.ngmn.org/fileadmin/user_upload/NGMN_Radio_Access_Performance_Evaluation_Methodology.pdf
230 |         # VoLTE uses the VoIP model
231 |         # embb_general uses the video streaming model
232 |         # urllc uses the FTP2 model
233 |         if self.sys_clock == 0:
234 |             for ser_name in self.ser_cat:
235 |                 ue_index = np.where((self.UE_cat == ser_name) & (self.UE_cell == 1))
236 |                 ue_index_Size = ue_index[0].size
237 |                 if ser_name == 'volte':
238 |                     self.UE_readtime[ue_index] = np.random.uniform(0, 160 * 10 ** (-3), [1, ue_index_Size])  # the silence lasts 160 ms in maximum
239 |                 elif ser_name == 'embb_general':
240 |                     tmp_readtime = np.random.pareto(1.2, [1, ue_index_Size]) * 6 * 10 ** -3
241 |                     tmp_readtime[tmp_readtime > 12.5 * 10 ** -3] = 12.5 * 10 ** -3
242 |                     self.UE_readtime[ue_index] = tmp_readtime
243 |                 elif ser_name == 'urllc':
244 |                     self.UE_readtime[ue_index] = np.random.exponential(180 * 10 ** -3, [1, ue_index_Size])  # read time is determines much smaller; the spec shows the average time is 180s, but here it is defined as 180 ms
245 | 
246 |         ##----------------------------------------------------------------
247 |         UE_index_readtime = np.where(self.UE_readtime <= 0)[0].tolist()
248 | 
249 |         for ue_id in UE_index_readtime:
250 |         ##---------------------------------------------------------------##
251 |             if self.UE_buffer[:, ue_id].size - np.count_nonzero(
252 |                     self.UE_buffer[:, ue_id]) != 0:  # The buffer is not full
253 |                 buf_ind = np.where(self.UE_buffer[:, ue_id] == 0)[0][0]
254 |                 if self.UE_cat[ue_id] == 'volte':
255 |                     self.UE_buffer[buf_ind, ue_id] = 40 * 8
256 |                     self.UE_readtime[ue_id] = np.random.uniform(0, 160 * 10 ** (-3), 1)
257 |                 elif self.UE_cat[ue_id] == 'embb_general':
258 |                     tmp_buffer_size = np.random.pareto(1.2, 1) * 800
259 |                     if tmp_buffer_size > 2000:
260 |                         tmp_buffer_size = 2000
261 |                     # tmp_buffer_size = np.random.choice([1*8*10**6, 2*8*10**6, 3*8*10**6, 4*8*10**6, 5*8*10**6])
262 |                     self.UE_buffer[buf_ind, ue_id] = tmp_buffer_size
263 |                     self.UE_readtime[ue_id] = np.random.pareto(1.2, [1, 1]) * 6 * 10 ** -3
264 |                     if self.UE_readtime[ue_id] > 12.5 * 10 ** -3:
265 |                         self.UE_readtime[ue_id] = 12.5 * 10 ** -3
266 |                 elif self.UE_cat[ue_id] == 'urllc':
267 |                     # tmp_buffer_size = np.random.lognormal(14.45,0.35,[1,1])
268 |                     # if tmp_buffer_size > 5 * 10 **6:
269 |                     #      tmp_buffer_size > 5 * 10 **6
270 |                     # tmp_buffer_size = np.random.choice([6.4*8*10**3, 12.8*8*10**3, 19.2*8*10**3, 25.6*8*10**3, 32*8*10**3])
271 |                     tmp_buffer_size = np.random.choice([0.3 * 8 * 10 ** 6])
272 |                     # tmp_buffer_size = np.random.choice(
273 |                     #     [0.3 * 8 * 10 ** 6, 0.4 * 8 * 10 ** 6, 0.5 * 8 * 10 ** 6, 0.6 * 8 * 10 ** 6,
274 |                     #      0.7 * 8 * 10 ** 6])
275 |                     self.UE_buffer[buf_ind, ue_id] = tmp_buffer_size
276 |                     self.UE_readtime[ue_id] = np.random.exponential(180 * 10 ** -3, [1, 1])  # read time is determines much smaller; the spec shows the average time is 180s, but here it is defined as 180 ms
277 | 
278 |                 self.UE_buffer_backup[buf_ind, ue_id] = self.UE_buffer[buf_ind, ue_id]
279 | 
280 |                 ##---------------------------------------------------------------
281 |                 self.tx_pkt_no[self.ser_cat.index(self.UE_cat[ue_id])] += 1
282 | 
283 |             else:
284 |                 if self.UE_cat[ue_id] == 'volte':
285 |                     self.UE_readtime[ue_id] = np.random.uniform(0, 160 * 10 ** (-3), 1)
286 |                 elif self.UE_cat[ue_id] == 'embb_general':
287 |                     self.UE_readtime[ue_id] = np.random.pareto(1.2, [1, 1]) * 6 * 10 ** -3
288 |                 else:
289 |                     self.UE_readtime[ue_id] = np.random.exponential(180 * 10 ** -3, [1, 1])
290 | 
291 |                 self.drop_pkt_no[self.ser_cat.index(self.UE_cat[ue_id])] += 1
292 | 
293 |         self.UE_readtime[np.where(self.UE_cell == 1)] -= self.time_subframe
294 |         ##---------------------------------------------------------------------##
295 | 
296 |         self.sys_clock += self.time_subframe
297 |         self.sys_clock = round(self.sys_clock, 4)
298 | 
299 |     def get_state(self):
300 |         pkt = self.tx_pkt_no + self.drop_pkt_no
301 |         dis = np.array([self.volte_dis, self.embb_dis, self.urllc_dis])
302 |         return pkt, dis
303 | 
304 |     def store_reward(self, rate):
305 |         # 计算系统的吞吐率和QoE满足率。
306 |         # Calculating the SE and EE for each UE
307 |         se = np.zeros(len(self.ser_cat))
308 |         ee = np.zeros(len(self.ser_cat))
309 |         sys_rate_frame = 0
310 |         for ser_name in self.ser_cat:
311 |             ser_index = self.ser_cat.index(ser_name)
312 |             ue_index_ = np.where(self.UE_cat == ser_name)
313 |             allo_band = np.sum(self.UE_band[ue_index_])
314 |             sum_rate = np.sum(rate[ue_index_])
315 |             if allo_band != 0:
316 |                 sys_rate_frame += sum_rate
317 |                 se[ser_index] = sum_rate / allo_band
318 |                 ee[ser_index] = se[ser_index] / 10 ** (self.BS_tx_power / 10)
319 | 
320 |         # Calculating the system SE and EE
321 |         self.sys_se_per_frame += sys_rate_frame / self.band_whole
322 |         if sys_rate_frame == 0:
323 |             self.idle_frame += 1
324 | 
325 |         handling_latency = 2 * 10 ** (-3)
326 |         handling_latency = 0
327 |         ##-----------------------------------------------
328 |         UE_index = np.where(self.UE_cell == 1)[0]
329 |         for ue_id in UE_index:
330 |             for i in range(self.UE_latency[:, ue_id].size):
331 |                 if (self.UE_buffer[i, ue_id] == 0) & (self.UE_latency[i, ue_id] != 0):
332 |                     if self.UE_cat[ue_id] == 'volte':
333 |                         cat_index = self.ser_cat.index('volte')
334 |                         if (self.UE_latency[i, ue_id] == self.time_subframe):
335 |                             if (rate[ue_id] >= 51 * 10 ** 3) & (
336 |                                     self.UE_latency[i, ue_id] < 10 * 10 ** (-3) - handling_latency):
337 |                                 self.succ_tx_pkt_no[cat_index] += 1
338 |                         else:
339 |                             if (self.UE_buffer_backup[i, ue_id] / self.UE_latency[i, ue_id] >= 51 * 10 ** 3) & (
340 |                                     self.UE_latency[i, ue_id] < 10 * 10 ** (-3) - handling_latency):
341 |                                 self.succ_tx_pkt_no[cat_index] += 1
342 |                     elif self.UE_cat[ue_id] == 'embb_general':
343 |                         cat_index = self.ser_cat.index('embb_general')
344 |                         if (self.UE_latency[i, ue_id] == self.time_subframe):
345 |                             # if (rate[ue_id] >= 5 * 10 ** 6) & (self.UE_latency[i,ue_id] < 10 * 10 **(-3) - handling_latency):
346 |                             if (rate[ue_id] >= 100 * 10 ** 6) & (
347 |                                     self.UE_latency[i, ue_id] < 10 * 10 ** (-3) - handling_latency):
348 |                                 self.succ_tx_pkt_no[cat_index] += 1
349 |                         else:
350 |                             if (self.UE_buffer_backup[i, ue_id] / self.UE_latency[i, ue_id] >= 100 * 10 ** 6) & (
351 |                                     self.UE_latency[i, ue_id] < 10 * 10 ** (-3) - handling_latency):
352 |                                 self.succ_tx_pkt_no[cat_index] += 1
353 |                     elif self.UE_cat[ue_id] == 'urllc':
354 |                         cat_index = self.ser_cat.index('urllc')
355 |                         if (self.UE_latency[i, ue_id] == self.time_subframe):
356 |                             if (rate[ue_id] >= 10 * 10 ** 6) & (
357 |                                     self.UE_latency[i, ue_id] < 3 * 10 ** (-3) - handling_latency):
358 |                                 self.succ_tx_pkt_no[cat_index] += 1
359 |                         else:
360 |                             if (self.UE_buffer_backup[i, ue_id] / self.UE_latency[i, ue_id] >= 10 * 10 ** 6) & (
361 |                                     self.UE_latency[i, ue_id] < 3 * 10 ** (-3) - handling_latency):
362 |                                 self.succ_tx_pkt_no[cat_index] += 1
363 | 
364 |     def get_reward(self):
365 |         se_total = self.sys_se_per_frame / (self.learning_windows / self.time_subframe - self.idle_frame)
366 |         # ee_total = se_total/10**(self.BS_tx_power/10)
367 |         self.tx_pkt_no[np.where(self.tx_pkt_no == 0)] += 1
368 |         qoe = self.succ_tx_pkt_no / (self.tx_pkt_no + self.drop_pkt_no)
369 |         return qoe, se_total
370 | 
371 | 
372 |     def bufferClear(self):
373 |         latency = np.sum(self.UE_latency, axis=0)
374 |         UE_index = np.where(latency != 0)
375 |         bufSize = self.UE_latency[:, 0].size
376 |         for ue_id in UE_index[0]:
377 | 
378 |             buffer_ = self.UE_buffer[:, ue_id].copy()
379 |             buffer_bk = self.UE_buffer_backup[:, ue_id].copy()
380 |             latency_ = self.UE_latency[:, ue_id].copy()
381 |             ind_1 = np.where(np.logical_and(buffer_ == 0, latency_ != 0))
382 |             indSize_1 = ind_1[0].size
383 |             if indSize_1 != 0:
384 |                 self.UE_latency[ind_1, ue_id] = np.zeros(indSize_1)
385 |                 self.UE_buffer_backup[ind_1, ue_id] = np.zeros(indSize_1)
386 | 
387 |             ind = np.where(np.logical_and(buffer_ != 0, latency_ != 0))
388 |             ind = ind[0]
389 |             indSize = ind.size
390 |             if indSize != 0:
391 |                 self.UE_buffer[:, ue_id] = np.zeros(bufSize)
392 |                 self.UE_latency[:, ue_id] = np.zeros(bufSize)
393 |                 self.UE_buffer_backup[:, ue_id] = np.zeros(bufSize)
394 |                 self.UE_buffer[:indSize, ue_id] = buffer_[ind]
395 |                 self.UE_latency[:indSize, ue_id] = latency_[ind]
396 |                 self.UE_buffer_backup[:indSize, ue_id] = buffer_bk[ind]
397 | 
398 |     def countReset(self):
399 |         self.sys_clock = 0
400 |         self.UE_readtime = np.ones(self.UE_max_no)
401 |         self.tx_pkt_no = np.zeros(len(self.ser_cat))
402 |         self.drop_pkt_no = np.zeros(len(self.ser_cat))
403 |         self.idle_frame = 0
404 |         '''for ser_name in self.ser_cat:
405 |             ser_index = self.ser_cat.index(ser_name)
406 |             ue_index_ = np.where(self.UE_cat == ser_name)
407 |             self.tx_pkt_no[ser_index] = np.where(self.UE_buffer[:,ue_index_]!=0)[0].size'''
408 |         self.succ_tx_pkt_no = np.zeros(len(self.ser_cat))
409 |         self.sys_se_per_frame = np.zeros(1)
410 |         self.UE_buffer = np.zeros(self.UE_buffer.shape)
411 |         self.UE_buffer_backup = np.zeros(self.UE_buffer.shape)
412 |         self.UE_latency = np.zeros(self.UE_buffer.shape)
413 | 
414 | 
415 | def bufferUpdate(buffer, rate, time_subframe):
416 |     bSize = buffer.size
417 |     for i in range(bSize):
418 |         if buffer[i] >= rate * time_subframe:
419 |             buffer[i] -= rate * time_subframe
420 |             rate = 0
421 |             break
422 |         else:
423 |             rate -= buffer[i] / time_subframe
424 |             buffer[i] = 0
425 |     return buffer
426 | 
427 | 
428 | def latencyUpdate(latency, buffer, time_subframe):
429 |     lSize = latency.size
430 |     for i in range(lSize):
431 |         if buffer[i] != 0:
432 |             latency[i] += time_subframe
433 |     return latency
434 | 


--------------------------------------------------------------------------------