├── wireless ├── agents │ ├── __init__.py │ ├── time_freq_resource_allocation_v0 │ │ ├── __init__.py │ │ ├── round_robin_agent.py │ │ └── proportional_fair.py │ ├── noma_ul_time_freq_resource_allocation_v0 │ │ ├── __init__.py │ │ └── noma_ul_proportional_fair.py │ ├── random_agent.py │ ├── bosch_agent.py │ └── q_learning.py ├── test │ ├── __init__.py │ ├── test_umts_olpc.py │ ├── test_tfrav0.py │ └── test_noma_ul_tfrav0.py ├── utils │ ├── __init__.py │ ├── misc.py │ └── prop_model.py ├── doc │ └── TimeFreqResourceAllocation-v0.pdf ├── envs │ ├── __init__.py │ ├── umts_olpc.py │ ├── noma_ul_time_freq_resource_allocation_v0.py │ └── time_freq_resource_allocation_v0.py ├── __init__.py └── scripts │ ├── launch_q_learn_umts_olpc.py │ └── launch_agent.py ├── config ├── config_agent.json ├── config_sacred.json └── config_environment.json ├── setup.py ├── LICENSE └── README.md /wireless/agents/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /wireless/test/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /wireless/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /wireless/agents/time_freq_resource_allocation_v0/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /wireless/agents/noma_ul_time_freq_resource_allocation_v0/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /wireless/doc/TimeFreqResourceAllocation-v0.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nokia/wireless-suite/HEAD/wireless/doc/TimeFreqResourceAllocation-v0.pdf -------------------------------------------------------------------------------- /config/config_agent.json: -------------------------------------------------------------------------------- 1 | { 2 | "agent": { 3 | "agent_type": "proportional fair channel aware", 4 | "t_max": 65536, 5 | "n_episodes": 16 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /wireless/envs/__init__.py: -------------------------------------------------------------------------------- 1 | from wireless.envs.time_freq_resource_allocation_v0 import TimeFreqResourceAllocationV0 2 | from wireless.envs.noma_ul_time_freq_resource_allocation_v0 import NomaULTimeFreqResourceAllocationV0 3 | from wireless.envs.umts_olpc import UlOpenLoopPowerControl 4 | -------------------------------------------------------------------------------- /config/config_sacred.json: -------------------------------------------------------------------------------- 1 | { 2 | "sacred": { 3 | "sacred_host": "127.0.0.1", 4 | "sacred_port": "27017", 5 | "sacred_user": "sacred_user", 6 | "sacred_pwd": "sacred_pwd", 7 | "sacred_db": "sacred0000", 8 | "n_metrics_points": 128, 9 | "experiment_name": "Random" 10 | }, 11 | "seed": 0 12 | } 13 | -------------------------------------------------------------------------------- /config/config_environment.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": { 3 | "env": "NomaULTimeFreqResourceAllocation-v0", 4 | "n_ues": 32, 5 | "n_prbs": 25, 6 | "n_ues_per_prb": 2, 7 | "buffer_max_size": 8, 8 | "eirp_dbm": 13, 9 | "f_carrier_mhz": 2655, 10 | "max_pkt_size_bits": 41250, 11 | "non_gbr_traffic_mean_interarrival_time_ttis": 10 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /wireless/utils/misc.py: -------------------------------------------------------------------------------- 1 | """ 2 | © 2020 Nokia 3 | Licensed under the BSD 3 Clause license 4 | SPDX-License-Identifier: BSD-3-Clause 5 | """ 6 | from scipy import constants 7 | 8 | 9 | def clip(value, min_value, max_value): 10 | return max(min(value, max_value), min_value) 11 | 12 | 13 | def calculate_thermal_noise(bw_mhz): 14 | t0_kelvin = 290 15 | return constants.Boltzmann * t0_kelvin * bw_mhz * 1E6 * 1000 16 | -------------------------------------------------------------------------------- /wireless/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.registration import register 2 | 3 | register( 4 | id='TimeFreqResourceAllocation-v0', 5 | entry_point='wireless.envs.time_freq_resource_allocation_v0:TimeFreqResourceAllocationV0', 6 | ) 7 | 8 | register( 9 | id='NomaULTimeFreqResourceAllocation-v0', 10 | entry_point='wireless.envs.noma_ul_time_freq_resource_allocation_v0:NomaULTimeFreqResourceAllocationV0', 11 | ) 12 | 13 | register( 14 | id='UlOpenLoopPowerControl-v0', 15 | entry_point='wireless.envs.umts_olpc:UlOpenLoopPowerControl', 16 | ) 17 | -------------------------------------------------------------------------------- /wireless/agents/random_agent.py: -------------------------------------------------------------------------------- 1 | """ 2 | © 2020 Nokia 3 | Licensed under the BSD 3 Clause license 4 | SPDX-License-Identifier: BSD-3-Clause 5 | """ 6 | 7 | 8 | class RandomAgent: 9 | """ 10 | The world's simplest agent! 11 | 12 | See: https://github.com/openai/gym/blob/master/examples/agents/random_agent.py 13 | """ 14 | def __init__(self, action_space): 15 | self.action_space = action_space 16 | 17 | def act(self, state, reward, done): 18 | return self.action_space.sample() 19 | 20 | def seed(self, seed=0): 21 | self.action_space.seed(seed) 22 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """ 2 | © 2020 Nokia 3 | Licensed under the BSD 3 Clause license 4 | SPDX-License-Identifier: BSD-3-Clause 5 | """ 6 | from setuptools import setup 7 | 8 | setup(name='wireless-suite', 9 | version='1.1', 10 | packages=['wireless', 'wireless.agents', 'wireless.agents.time_freq_resource_allocation_v0', 'wireless.agents.noma_ul_time_freq_resource_allocation_v0', 'wireless.envs', 'wireless.utils'], 11 | license='„2020 Nokia. Licensed under the BSD 3 Clause license. SPDX-License-Identifier: BSD-3-Clause', 12 | description='Modules for executing wireless communication problems as OpenAI Gym environments.', 13 | install_requires=['gym', 'matplotlib', 'numpy', 'scipy', 'sacred', 'pytest'] 14 | ) 15 | -------------------------------------------------------------------------------- /wireless/utils/prop_model.py: -------------------------------------------------------------------------------- 1 | """ 2 | © 2020 Nokia 3 | Licensed under the BSD 3 Clause license 4 | SPDX-License-Identifier: BSD-3-Clause 5 | """ 6 | import numpy as np 7 | from scipy import constants 8 | 9 | 10 | class PropModel: 11 | """ 12 | Propagation Model class 13 | It can be used to define more complex prop models in the future 14 | """ 15 | 16 | def __init__(self, f_mhz, n=2): 17 | self.f_mhz = f_mhz 18 | self.n = n # Attenuation exponent 19 | 20 | def get_free_space_pl_db(self, d_m, shadowing_db=0): 21 | noise = np.random.normal(scale=shadowing_db, size=d_m.size) 22 | return self.n * 10 * np.log10(4 * constants.pi * d_m * self.f_mhz * 1E6 / constants.c) + noise 23 | 24 | def seed(self, seed=0): 25 | np.random.seed(seed) 26 | -------------------------------------------------------------------------------- /wireless/agents/time_freq_resource_allocation_v0/round_robin_agent.py: -------------------------------------------------------------------------------- 1 | """ 2 | © 2020 Nokia 3 | Licensed under the BSD 3 Clause license 4 | SPDX-License-Identifier: BSD-3-Clause 5 | """ 6 | from wireless.agents.random_agent import RandomAgent 7 | import numpy as np 8 | 9 | 10 | class RoundRobinAgent(RandomAgent): 11 | def __init__(self, action_space, n_ues, buffer_max_size): 12 | RandomAgent.__init__(self, action_space) 13 | self.t = 0 # Current time step 14 | 15 | self.K = n_ues # Number of UEs 16 | self.L = buffer_max_size # Maximum number of packets per UE buffer 17 | 18 | def act(self, state, reward, done): 19 | action = self.t % self.K 20 | self.t += 1 21 | return action 22 | 23 | 24 | class RoundRobinIfTrafficAgent(RoundRobinAgent): 25 | def __init__(self, action_space, n_ues, buffer_max_size): 26 | RoundRobinAgent.__init__(self, action_space, n_ues, buffer_max_size) 27 | 28 | def act(self, state, reward, done): 29 | action0 = self.t % self.K 30 | 31 | s = np.reshape(state[self.K:self.K*(1 + self.L)], (self.K, self.L)) 32 | buffer_size_per_ue = np.sum(s, axis=1) 33 | 34 | action = action0 35 | while buffer_size_per_ue[action] == 0: 36 | action = (action + 1) % self.K 37 | if action == action0: 38 | break 39 | 40 | self.t += 1 41 | return action 42 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2020, Nokia 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /wireless/agents/bosch_agent.py: -------------------------------------------------------------------------------- 1 | from .random_agent import RandomAgent 2 | import numpy as np 3 | 4 | 5 | class BoschAgent(RandomAgent): 6 | def __init__(self, action_space, n_ues, buffer_max_size, max_pkt_size_bits): 7 | RandomAgent.__init__(self, action_space) 8 | self.t = 0 # Current time step 9 | self.K = n_ues # Number of UEs 10 | self.L = buffer_max_size # Maximum number of packets per UE buffer 11 | self.n = np.zeros(n_ues) # Number of past PRB assignments for each UE 12 | self.max_pkt_buffer = buffer_max_size*max_pkt_size_bits 13 | 14 | self.alpha = 2.07705283 15 | self.beta = 7.74421717 16 | self.gamma = 0.0507541467 17 | self.mu = -0.00151544198 18 | 19 | def act(self, state, reward, done): 20 | cqi_data = state[0:self.K] 21 | s = np.reshape(state[self.K:self.K*(1 + self.L)], (self.K, self.L)) # Sizes in bits of packets in UEs' buffers 22 | buffer_size_per_ue = np.sum(s, axis=1) 23 | 24 | e = np.reshape(state[self.K*(1 + self.L):self.K*(1 + 2*self.L)], (self.K, self.L)) # Packet ages in TTIs 25 | o = np.max(e, axis=1) # Age of oldest packet for each UE 26 | 27 | qi_ohe = np.reshape(state[self.K + 2 * self.K * self.L:5 * self.K + 2 * self.K * self.L], (self.K, 4)) 28 | qi = np.array([np.where(r == 1)[0][0] for r in qi_ohe]) # Decode One-Hot-Encoded QIs 29 | 30 | # Extract packet delay budget for all UEs 31 | b = np.zeros(qi.shape) 32 | b[qi == 3] = 100 33 | b[qi == 2] = 150 34 | b[qi == 1] = 30 35 | b[qi == 0] = 300 36 | 37 | p_cqi = (self.alpha*cqi_data/15) 38 | p_buffer = (self.beta*buffer_size_per_ue/self.max_pkt_buffer) 39 | p_age = (self.gamma*o/b) 40 | p_fairness = (self.mu * 1/(1+self.n)) 41 | 42 | priorities = p_cqi + p_buffer + p_age + p_fairness 43 | 44 | action = np.argmax(priorities) 45 | self.n[action] += 1 46 | 47 | self.t += 1 48 | return action 49 | -------------------------------------------------------------------------------- /wireless/agents/q_learning.py: -------------------------------------------------------------------------------- 1 | """ 2 | © 2020 Nokia 3 | Licensed under the BSD 3 Clause license 4 | SPDX-License-Identifier: BSD-3-Clause 5 | """ 6 | from collections import defaultdict 7 | import numpy as np 8 | 9 | 10 | class QLearningAgent: 11 | def __init__(self, seed=1, 12 | learning_rate=1, 13 | discount_factor=0.995, 14 | exploration_rate=1.0, 15 | exploration_decay_rate=0.9999, 16 | num_actions=4): 17 | # Episode 458 is the first episode for epsilon min 18 | self.learning_rate = learning_rate # alpha 19 | self.discount_factor = discount_factor # gamma 20 | self.exploration_rate = exploration_rate # epsilon 21 | self.exploration_rate_min = 0.010 22 | self.exploration_decay_rate = exploration_decay_rate # d 23 | self.seed = seed 24 | self.num_actions = num_actions 25 | self.q_table = defaultdict(lambda: np.zeros(self.num_actions)) 26 | 27 | def _policy(self, state): 28 | """ 29 | Returns the probabilities for each action. 30 | """ 31 | action_probs = np.ones(self.num_actions, dtype=float) * self.exploration_rate / self.num_actions 32 | best_action = np.argmax(self.q_table[state]) 33 | action_probs[best_action] += (1.0 - self.exploration_rate) 34 | return action_probs 35 | 36 | def td_update(self, state, action, next_state, reward): 37 | best_next_action = np.argmax(self.q_table[next_state]) 38 | td_target = reward + self.discount_factor * self.q_table[next_state][best_next_action] 39 | td_delta = td_target - self.q_table[state][action] 40 | self.q_table[state][action] += self.learning_rate * td_delta 41 | 42 | def exploration_rate_update(self): 43 | self.exploration_rate *= self.exploration_decay_rate 44 | self.exploration_rate = max(self.exploration_rate, self.exploration_rate_min) 45 | 46 | def act(self, state, *_): 47 | action_probs = self._policy(state) 48 | action = np.random.choice(np.arange(len(action_probs)), p=action_probs) 49 | return action 50 | -------------------------------------------------------------------------------- /wireless/test/test_umts_olpc.py: -------------------------------------------------------------------------------- 1 | """ 2 | © 2020 Nokia 3 | Licensed under the BSD 3 Clause license 4 | SPDX-License-Identifier: BSD-3-Clause 5 | """ 6 | import pytest 7 | import random 8 | import gym 9 | import numpy as np 10 | 11 | 12 | @pytest.fixture 13 | def env(): 14 | env = gym.make('UlOpenLoopPowerControl-v0') # Init environment 15 | yield env 16 | 17 | 18 | class TestTfraV0: 19 | def test_reproducibility(self, env): 20 | env.seed(seed=1234) 21 | random.seed(1234) 22 | states = [] 23 | rewards = [] 24 | dones = [] 25 | for t in range(64): 26 | action = random.randint(0, 3) 27 | state, reward, done, _ = env.step(action) 28 | states.append(state) 29 | rewards.append(reward) 30 | dones.append(done) 31 | 32 | env2 = gym.make('UlOpenLoopPowerControl-v0') # Init environment 33 | env2.seed(seed=1234) 34 | random.seed(1234) 35 | for t in range(64): 36 | action = random.randint(0, 3) 37 | state, reward, done, _ = env2.step(action) 38 | np.testing.assert_array_equal(state, states[t]) 39 | assert reward == rewards[action] 40 | assert done == dones[action] 41 | 42 | def test_variability(self, env): 43 | env.seed(seed=1234) 44 | random.seed(1234) 45 | states = [] 46 | rewards = [] 47 | dones = [] 48 | for t in range(64): 49 | action = random.randint(0, 3) 50 | state, reward, done, _ = env.step(action) 51 | states.append(state) 52 | rewards.append(reward) 53 | dones.append(done) 54 | 55 | env2 = gym.make('UlOpenLoopPowerControl-v0') # Init environment 56 | env2.seed(seed=12345) 57 | random.seed(12345) 58 | for t in range(64): 59 | action = random.randint(0, 3) 60 | state, reward, done, _ = env.step(action) 61 | if not np.array_equal(state, states[t]): 62 | return 63 | if reward != rewards[t]: 64 | return 65 | if done != dones[t]: 66 | return 67 | 68 | pytest.fail("Different seeds produced the same results.") 69 | -------------------------------------------------------------------------------- /wireless/test/test_tfrav0.py: -------------------------------------------------------------------------------- 1 | """ 2 | © 2020 Nokia 3 | Licensed under the BSD 3 Clause license 4 | SPDX-License-Identifier: BSD-3-Clause 5 | """ 6 | import pytest 7 | import random 8 | import gym 9 | import numpy as np 10 | 11 | 12 | @pytest.fixture 13 | def env(): 14 | env = gym.make('TimeFreqResourceAllocation-v0') # Init environment 15 | yield env 16 | 17 | 18 | @pytest.fixture 19 | def env64(): 20 | env = gym.make('TimeFreqResourceAllocation-v0', n_ues=64) # Init environment 21 | yield env 22 | 23 | 24 | class TestTfraV0: 25 | def test_reproducibility(self, env64): 26 | env64.seed(seed=1234) 27 | states = [] 28 | rewards = [] 29 | dones = [] 30 | for action in list(range(64)): 31 | state, reward, done, _ = env64.step(action) 32 | states.append(state) 33 | rewards.append(reward) 34 | dones.append(done) 35 | 36 | env = gym.make('TimeFreqResourceAllocation-v0', n_ues=64) # Init environment 37 | env.seed(seed=1234) 38 | for action in list(range(64)): 39 | state, reward, done, _ = env.step(action) 40 | np.testing.assert_array_equal(state, states[action]) 41 | assert reward == rewards[action] 42 | assert done == dones[action] 43 | 44 | def test_variability(self, env64): 45 | env64.seed(seed=1234) 46 | states = [] 47 | rewards = [] 48 | dones = [] 49 | for action in list(range(64)): 50 | state, reward, done, _ = env64.step(action) 51 | states.append(state) 52 | rewards.append(reward) 53 | dones.append(done) 54 | 55 | env = gym.make('TimeFreqResourceAllocation-v0', n_ues=64) # Init environment 56 | env.seed(seed=12345) 57 | for action in list(range(64)): 58 | state, reward, done, _ = env.step(action) 59 | if not np.array_equal(state, states[action]): 60 | return 61 | if reward != rewards[action]: 62 | return 63 | if done != dones[action]: 64 | return 65 | 66 | pytest.fail("Different seeds produced the same results.") 67 | 68 | def test_state_features(self): 69 | n_ues = 64 70 | n_steps = 512 71 | env = gym.make('TimeFreqResourceAllocation-v0', n_ues=n_ues, eirp_dbm=7) # Low power to have some CQI=0 72 | env.seed(seed=1234) 73 | 74 | state, _, _, _ = env.step(0) # Get state to measure its length 75 | states = np.zeros((n_steps, len(state)), dtype=np.uint32) # Memory pre-allocation 76 | for t in range(n_steps): 77 | action = random.randint(0, n_ues-1) 78 | state, _, _, _ = env.step(action) 79 | states[t, :] = state 80 | 81 | # Check CQI range 82 | assert states[:, :n_ues].min() == 0 83 | assert states[:, :n_ues].max() == 15 84 | assert 0 < states[:, :n_ues].mean() < 15 85 | assert states[:, :n_ues].std() > 1 86 | 87 | # Check size (in bits) of packets in UEs' buffers 88 | assert states[:, n_ues:n_ues + n_ues * env.L].min() == 0 89 | assert states[:, n_ues:n_ues + n_ues * env.L].max() >= 41250 90 | assert states[:, n_ues:n_ues + n_ues * env.L].mean() > 100 91 | 92 | # Check age (in ms) of packets in UEs' buffers 93 | assert states[:, n_ues + n_ues * env.L:n_ues + 2*n_ues * env.L].min() == 0 94 | assert states[:, n_ues + n_ues * env.L:n_ues + 2 * n_ues * env.L].max() > 10 # Less n_prbs yield higher ages 95 | 96 | # TODO: Maybe also check QI 97 | 98 | # Check PRB counter 99 | assert states[:, -1].min() == 0 100 | assert states[:, -1].max() == env.Nf-1 101 | -------------------------------------------------------------------------------- /wireless/scripts/launch_q_learn_umts_olpc.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import json 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | from collections import defaultdict 6 | from sacred import Experiment 7 | 8 | from wireless.agents.q_learning import QLearningAgent 9 | 10 | 11 | num_episodes = 512 12 | max_steps_per_episode = 512 13 | snr_tgt_db = 4 14 | 15 | # Memory allocation 16 | episode_rewards = np.zeros(num_episodes) 17 | epsilon = np.zeros(num_episodes) # To store exploration level 18 | snr_in_some_episodes = defaultdict(lambda: np.zeros(max_steps_per_episode)) # To store Power Control dynamics 19 | episodes_to_save = np.linspace(0, num_episodes, num=5, dtype=int) 20 | 21 | 22 | def run_episode(e, env, agent, save_snr=False): 23 | state = env.reset() 24 | 25 | s = 0 # Step count 26 | while True: 27 | # Take a step 28 | action = agent.act(state) 29 | next_state, reward, done, _ = env.step(action) 30 | 31 | # Collect progress 32 | if save_snr: 33 | snr_in_some_episodes[e][s] = state 34 | episode_rewards[e] += reward 35 | agent.td_update(state, action, next_state, reward) 36 | agent.exploration_rate_update() 37 | s += 1 38 | if done: 39 | break 40 | state = next_state 41 | 42 | 43 | def run_n_episodes(num_episodes, env, agent, seed=0, log_progress=True): 44 | log_period = round(num_episodes / 10) 45 | 46 | for e in range(num_episodes): 47 | if log_progress and e % log_period == 0: 48 | print(f"\rEpisode {e}/{num_episodes}.") 49 | 50 | env.seed(seed=seed + e) 51 | epsilon[e] = agent.exploration_rate 52 | run_episode(e, env, agent, save_snr=e in episodes_to_save) 53 | 54 | 55 | # Load agent parameters 56 | with open('../../config/config_agent.json') as f: 57 | ac = json.load(f) 58 | 59 | # Configure experiment 60 | with open('../../config/config_sacred.json') as f: 61 | sc = json.load(f) # Sacred Configuration 62 | ns = sc["sacred"]["n_metrics_points"] # Number of points per episode to log in Sacred 63 | ex = Experiment(ac["agent"]["agent_type"], save_git_info=False) 64 | ex.add_config(sc) 65 | ex.add_config(ac) 66 | mongo_db_url = f'mongodb://{sc["sacred"]["sacred_user"]}:{sc["sacred"]["sacred_pwd"]}@' + \ 67 | f'{sc["sacred"]["sacred_host"]}:{sc["sacred"]["sacred_port"]}/{sc["sacred"]["sacred_db"]}' 68 | # ex.observers.append(MongoObserver(url=mongo_db_url, db_name=sc["sacred"]["sacred_db"])) # Uncomment to save to DB 69 | 70 | # Load environment parameters 71 | with open('../../config/config_environment.json') as f: 72 | ec = json.load(f) 73 | ex.add_config(ec) 74 | 75 | 76 | @ex.automain 77 | def main(_run): 78 | env = gym.make('UlOpenLoopPowerControl-v0', f_carrier_mhz=_run.config['env']['f_carrier_mhz'], 79 | t_max=max_steps_per_episode) # Init environment 80 | 81 | agent = QLearningAgent(seed=_run.config['seed'], num_actions=env.action_space.n) 82 | 83 | run_n_episodes(num_episodes, env, agent, _run.config['seed']) 84 | 85 | # Plot results 86 | plt.figure() 87 | fig, ax1 = plt.subplots() 88 | ax2 = ax1.twinx() 89 | ax1.plot(range(num_episodes), episode_rewards, 'g-') 90 | ax2.plot(range(num_episodes), epsilon, 'b-') 91 | 92 | ax1.set_xlabel('Episodes') 93 | ax1.set_ylabel('Episode reward', color='g') 94 | ax2.set_ylabel('Exploration rate', color='b') 95 | plt.grid(True) 96 | 97 | plt.figure() 98 | for e, snr in snr_in_some_episodes.items(): 99 | plt.plot(snr, label=f'Episode {e}') 100 | plt.grid(True) 101 | plt.xlabel('Steps') 102 | plt.ylabel('SNR') 103 | plt.legend(loc='upper right') 104 | 105 | plt.show() 106 | -------------------------------------------------------------------------------- /wireless/agents/time_freq_resource_allocation_v0/proportional_fair.py: -------------------------------------------------------------------------------- 1 | """ 2 | © 2020 Nokia 3 | Licensed under the BSD 3 Clause license 4 | SPDX-License-Identifier: BSD-3-Clause 5 | """ 6 | from wireless.agents.random_agent import RandomAgent 7 | import numpy as np 8 | 9 | 10 | class ProportionalFairAgent(RandomAgent): 11 | def __init__(self, action_space, n_ues, buffer_max_size): 12 | RandomAgent.__init__(self, action_space) 13 | self.t = 0 # Current time step 14 | 15 | self.K = n_ues # Number of UEs 16 | self.L = buffer_max_size # Maximum number of packets per UE buffer 17 | self.n = np.zeros(n_ues) # Number of past PRB assignments for each UE 18 | 19 | def _calculate_priorities(self, cqi, o, b, buffer_size_per_ue): 20 | priorities = (1 + o) / b * buffer_size_per_ue / (1 + self.n) 21 | return priorities 22 | 23 | @staticmethod 24 | def parse_state(state, num_ues, max_pkts): 25 | s = np.reshape(state[num_ues:num_ues * (1 + max_pkts)], (num_ues, max_pkts)) # Sizes in bits of packets in UEs' buffers 26 | buffer_size_per_ue = np.sum(s, axis=1) 27 | 28 | e = np.reshape(state[num_ues * (1 + max_pkts):num_ues * (1 + 2 * max_pkts)], (num_ues, max_pkts)) # Packet ages in TTIs 29 | o = np.max(e, axis=1) # Age of oldest packet for each UE 30 | 31 | cqi = state[0:num_ues] 32 | 33 | qi_ohe = np.reshape(state[num_ues + 2 * num_ues * max_pkts:5 * num_ues + 2 * num_ues * max_pkts], (num_ues, 4)) 34 | qi = np.array([np.where(r == 1)[0][0] for r in qi_ohe]) # Decode One-Hot-Encoded QIs 35 | 36 | # Extract packet delay budget for all UEs 37 | b = np.zeros(qi.shape) 38 | b[qi == 3] = 100 39 | b[qi == 2] = 150 40 | b[qi == 1] = 30 41 | b[qi == 0] = 300 42 | 43 | return o, cqi, b, buffer_size_per_ue 44 | 45 | def act(self, state, reward, done): 46 | o, cqi, b, buffer_size_per_ue = self.parse_state(state, self.K, self.L) 47 | 48 | priorities = self._calculate_priorities(cqi, o, b, buffer_size_per_ue) 49 | 50 | action = np.argmax(priorities) 51 | self.n[action] += 1 52 | 53 | self.t += 1 54 | return action 55 | 56 | 57 | class ProportionalFairChannelAwareAgent(ProportionalFairAgent): 58 | CQI2SE = [0.1523, 0.2344, 0.3770, 0.6016, 0.8770, 1.1758, 1.4766, 1.9141, 2.4063, 2.7305, 3.3223, 3.9023, 4.5234, 59 | 5.1152, 5.5547, 9.6] 60 | 61 | def __init__(self, action_space, n_ues, buffer_max_size): 62 | super().__init__(action_space, n_ues, buffer_max_size) 63 | 64 | def _calculate_priorities(self, cqi, o, b, buffer_size_per_ue): 65 | se = np.zeros(shape=(self.K,)) 66 | for i in range(16): 67 | se[cqi == i] = self.CQI2SE[i] 68 | priorities = (1 + o) / b * buffer_size_per_ue * se 69 | return priorities 70 | 71 | 72 | class Knapsackagent(ProportionalFairAgent): 73 | def __init__(self, action_space, n_ues, buffer_max_size, nprb): 74 | super().__init__(action_space, n_ues, buffer_max_size) 75 | self.r = None 76 | self.Nf = nprb 77 | self.window = self.Nf * 15 78 | 79 | def _calculate_priorities(self, cqi, o, b, buffer_size_per_ue): 80 | # Normalized values 81 | k_cqi = (cqi / 15) 82 | k_buffer = (buffer_size_per_ue / (self.r + 1)) 83 | k_age = (o / b) 84 | k_fairness = (1 / (1 + self.n)) 85 | # tanh as ranking function for values 86 | priorities = 1 * np.tanh(k_cqi) + 1 * np.tanh(k_buffer) + 1 * np.tanh(k_age) + 1 * np.tanh(k_fairness) 87 | return priorities 88 | 89 | def act(self, state, reward, done): 90 | # reset the self.r 91 | if self.t % self.window == 0: 92 | self.r = np.zeros(shape=(self.K,), dtype=np.float32) 93 | 94 | o, cqi, b, buffer_size_per_ue = self.parse_state(state, self.K, self.L) 95 | 96 | priorities = self._calculate_priorities(cqi, o, b, buffer_size_per_ue) 97 | 98 | self.buffer_size_moving_average(state) 99 | 100 | action = np.argmax(priorities) 101 | self.n[action] += 1 102 | 103 | self.t += 1 104 | return action 105 | 106 | def buffer_size_moving_average(self, state): 107 | s = np.reshape(state[self.K:self.K * (1 + self.L)], (self.K, self.L)) # Size in bits of packets in UEs' buffers 108 | buffer_size_per_ue = np.sum(s, axis=1) 109 | # Moving Average of buffer sizes 110 | if self.t % self.Nf == 0 and self.t != 0: 111 | self.r = (1 - self.Nf / self.window) * self.r + buffer_size_per_ue * self.Nf / self.window 112 | -------------------------------------------------------------------------------- /wireless/envs/umts_olpc.py: -------------------------------------------------------------------------------- 1 | """ 2 | © 2020 Nokia 3 | Licensed under the BSD 3 Clause license 4 | SPDX-License-Identifier: BSD-3-Clause 5 | """ 6 | import math 7 | import random 8 | from gym import spaces, Env 9 | import numpy as np 10 | from numpy import linalg as la 11 | from scipy import constants 12 | 13 | from ..utils.misc import calculate_thermal_noise 14 | from ..utils.prop_model import PropModel 15 | 16 | 17 | class UlOpenLoopPowerControl(Env): 18 | BTS_POS = [0, 0] # Base Transceiver Station position 19 | P0_TX_UE_DBM = +3 # Initial uplink transmit power of User Equipment (UE). 20 | UE_V = 2 # UE speed in m/s 21 | DT_MS = 20 # Time equivalence of one step 22 | SNR_MIN = -20 # Minimum measurable SNR value in dB 23 | SNR_MAX = 20 # Maximum measurable SNR value in dB 24 | 25 | def __init__(self, x_max_m=10, y_max_m=10, f_carrier_mhz=2655, bw_mhz=10, snr_tgt_db=4, t_max=512, n=3): 26 | """ 27 | This environment implements a free-space scenario with a BTS at coordinates 28 | [0, 0] and one UE at a random location. Each step the UE moves 29 | linearly in a random direction with constant speed 2 m/s . 30 | The agent interacting with the environment is the BTS. 31 | On each time step the agent must select one of four possible Power 32 | Control (PC) commands to increase/decrease the UL transmit power. The 33 | objective of this power control is to measure an UL SNR as close as 34 | possible to the SNR target (4 dB by default). The PC commands (i.e. 35 | action space) are: 36 | Action 0 --> -1 dB 37 | Action 1 --> 0 dB 38 | Action 2 --> +1 dB 39 | Action 3 --> +3 dB 40 | 41 | As output of each step, the environment returns the following to the 42 | invoking agent: 43 | State: Current UL SNR (single integer value between -20 and +20 with 1 dB step resolution) 44 | Reward: 0 if |SNR-SNR_target| <= 1 dB 45 | -1 otherwise 46 | """ 47 | self._seed = None 48 | self.x_max_m = x_max_m # Width of 2D scenario 49 | self.y_max_m = y_max_m # Height of 2D scenario 50 | self.bts_pos = [0, 0] 51 | self.f_carrier_mhz = f_carrier_mhz 52 | self.bw_mhz = bw_mhz 53 | self.snr_tgt_db = snr_tgt_db 54 | self.t_max = t_max 55 | self.propagation_model = PropModel(self.f_carrier_mhz, n=n) 56 | self.ue_pos = None # To be initialized in reset 57 | self.v_x = None # To be initialized in reset 58 | self.v_y = None # To be initialized in reset 59 | self.p_tx_ue_dbm = None # To be initialized in reset 60 | self.step_count = None # To be initialized in reset 61 | self.state = None # To be initialized in reset 62 | 63 | self.observation_space = spaces.Box(np.array([-10, -10]), np.array([+10, +10]), dtype=np.uint8) 64 | self.action_space = spaces.Discrete(4) 65 | 66 | self.seed() 67 | self.reset() 68 | 69 | def seed(self, seed=0): 70 | random.seed(seed) 71 | np.random.seed(seed) 72 | self.propagation_model.seed(seed=seed) 73 | self._seed = seed 74 | 75 | def _calculate_ul_snr(self): 76 | ue_bts_distance_m = la.norm(self.ue_pos - self.bts_pos) 77 | loss_db = self.propagation_model.get_free_space_pl_db(ue_bts_distance_m)[0] 78 | p_rx_dbm = self.p_tx_ue_dbm - loss_db 79 | n_mw = calculate_thermal_noise(self.bw_mhz) 80 | snr_db = p_rx_dbm - 10 * np.log10(n_mw) 81 | snr_db = round(snr_db) 82 | return max(min(snr_db, self.SNR_MAX), self.SNR_MIN) 83 | 84 | def render(self, mode='human'): 85 | pass 86 | 87 | def reset(self): 88 | self.ue_pos = np.random.rand(2) * np.array([self.x_max_m, self.y_max_m]) 89 | 90 | theta = random.random() * 2 * constants.pi # Random direction 91 | self.v_x = math.cos(theta) * self.UE_V 92 | self.v_y = math.sin(theta) * self.UE_V 93 | 94 | self.p_tx_ue_dbm = self.P0_TX_UE_DBM 95 | self.step_count = 0 96 | self.state = self._calculate_ul_snr() 97 | return self.state 98 | 99 | def _update_tx_pwr(self, action): 100 | if action == 0: 101 | self.p_tx_ue_dbm -= 1 102 | elif action == 2: 103 | self.p_tx_ue_dbm += 1 104 | elif action == 3: 105 | self.p_tx_ue_dbm += 3 106 | 107 | def step(self, action): 108 | assert self.action_space.contains(action) 109 | self.ue_pos += np.array([self.v_x, self.v_y]) * self.DT_MS * 1E-3 # Move UE 110 | self._update_tx_pwr(action) 111 | self.step_count += 1 112 | snr = self._calculate_ul_snr() 113 | self.state = snr # Update state 114 | reward = 0 if np.abs(snr - self.snr_tgt_db) <= 1 else -1 115 | done = True if self.step_count >= self.t_max else False 116 | 117 | return self.state, reward, done, {} 118 | -------------------------------------------------------------------------------- /wireless/agents/noma_ul_time_freq_resource_allocation_v0/noma_ul_proportional_fair.py: -------------------------------------------------------------------------------- 1 | """ 2 | © 2020 Nokia 3 | Licensed under the BSD 3 Clause license 4 | SPDX-License-Identifier: BSD-3-Clause 5 | """ 6 | from wireless.agents.random_agent import RandomAgent 7 | from wireless.agents.time_freq_resource_allocation_v0.proportional_fair import ProportionalFairAgent 8 | import itertools 9 | import heapq 10 | import numpy as np 11 | 12 | 13 | class NomaULProportionalFairChannelAwareAgent(RandomAgent): 14 | CQI2SE = [0.1523, 0.2344, 0.3770, 0.6016, 0.8770, 1.1758, 1.4766, 1.9141, 2.4063, 2.7305, 3.3223, 3.9023, 4.5234, 15 | 5.1152, 5.5547, 9.6] 16 | 17 | def __init__(self, action_space, n_ues, n_ues_per_prb, buffer_max_size, n_mw, sinr_coeff): 18 | RandomAgent.__init__(self, action_space) 19 | self.t = 0 # Current time step 20 | 21 | self.K = n_ues # Number of UEs 22 | self.L = buffer_max_size # Maximum number of packets per UE buffer 23 | self.M = n_ues_per_prb # Maximum number of users multiplexed on a PRB 24 | self.n = np.zeros(n_ues) # Number of past PRB assignments for each UE 25 | self.n_mw = n_mw # Thermal noise in mW 26 | self.sinr_coeff = sinr_coeff # Rho coefficient to map SINR to spectral efficient. 27 | 28 | interference_dbm = -105 # Constant interference level throughout the coverage area 29 | self._interference_mw = 10 ** (interference_dbm / 10) 30 | self._cqi2rx_pwr_mw = (np.power(2, self.CQI2SE)-1) * (self.n_mw+self._interference_mw) * self.sinr_coeff 31 | 32 | # All possible allocations: all permutations of self.K our of self.M UEs 33 | self._permutations = list(itertools.permutations(range(self.K), self.M)) 34 | # WSR of each permutation stored as a heapq 35 | # It is re-computed entirely when p == 0, and updated lazily at each other step 36 | self._permutations_wsr = None 37 | 38 | def _calculate_wsr(self, perm_idx, rx_pwr_mw, w): 39 | permutation = self._permutations[perm_idx] 40 | cumulated_rx_pwr_mw = 0 41 | wsr = 0 42 | for pos in range(self.M - 1, -1, -1): 43 | # ue decoded in pos-th order 44 | ue = permutation[pos] 45 | # SINR taking into account the interference from other UEs superposed on the same PRB 46 | sinr = rx_pwr_mw[ue] / (self.n_mw + self._interference_mw + cumulated_rx_pwr_mw) 47 | wsr += w[ue] * np.log2(1 + sinr / self.sinr_coeff) # DL spectral efficiency in bps/Hz 48 | # Store the current UE rx_pwr_mw as interference 49 | cumulated_rx_pwr_mw += rx_pwr_mw[ue] 50 | return wsr 51 | 52 | def act(self, state, reward, done): 53 | o, cqi, b, buffer_size_per_ue = ProportionalFairAgent.parse_state(state, self.K, self.L) 54 | 55 | p = state[-1] 56 | 57 | w = (1+o)/b * buffer_size_per_ue # Weight of each UE in the PF scheduler 58 | rx_pwr_mw = np.zeros(shape=(self.K,)) # Receive power of each UE 59 | for i in range(16): 60 | rx_pwr_mw[cqi == i] = self._cqi2rx_pwr_mw[i] 61 | 62 | # Weighted sum-rate maximization considering w and rx_pwr_mw: 63 | # Find the M UEs out of K that maximize sum w[i]*se[i] 64 | if p == 0 or self._permutations_wsr is None: # Re-compute entirely self._permutations_wsr 65 | self._permutations_wsr = [] 66 | heapq.heapify(self._permutations_wsr) 67 | for index in range(len(self._permutations)): 68 | wsr = self._calculate_wsr(index, rx_pwr_mw, w) 69 | heapq.heappush(self._permutations_wsr, (-wsr, index)) 70 | 71 | max_wsr, max_wsr_index = heapq.heappop(self._permutations_wsr) 72 | heapq.heappush(self._permutations_wsr, (max_wsr, max_wsr_index)) 73 | # When p!=0, perform lazy update since: 74 | # 1) The weights w has only changed (decreased) for up to self.M UEs 75 | # 2) In addition, the spectral efficiency have not changed 76 | else: 77 | while True: 78 | old_wsr, max_wsr_index = heapq.heappop(self._permutations_wsr) 79 | old_wsr = -old_wsr 80 | new_wsr = self._calculate_wsr(max_wsr_index, rx_pwr_mw, w) # Compute the new WSR 81 | assert old_wsr >= new_wsr, "The WSR should only decrease in the lazy updates" 82 | if old_wsr == new_wsr: 83 | # This WSR has not changed -> it is still the highest value 84 | # Push it back in the heapq and terminate the while loop 85 | heapq.heappush(self._permutations_wsr, (-old_wsr, max_wsr_index)) 86 | break 87 | else: 88 | # Otherwise, we update its WSR info and push it in the heapq 89 | # The while loop continues 90 | heapq.heappush(self._permutations_wsr, (-new_wsr, max_wsr_index)) 91 | 92 | action = list(self._permutations[max_wsr_index]) 93 | self.n[action] += 1 94 | self.t += 1 95 | return action 96 | -------------------------------------------------------------------------------- /wireless/envs/noma_ul_time_freq_resource_allocation_v0.py: -------------------------------------------------------------------------------- 1 | """ 2 | © 2020 Nokia 3 | Licensed under the BSD 3 Clause license 4 | SPDX-License-Identifier: BSD-3-Clause 5 | """ 6 | 7 | from .time_freq_resource_allocation_v0 import * 8 | 9 | 10 | class NomaULTimeFreqResourceAllocationV0(TimeFreqResourceAllocationV0): 11 | 12 | def __init__(self, n_ues=32, n_prbs=25, n_ues_per_prb=2, buffer_max_size=32, eirp_dbm=13, f_carrier_mhz=2655, 13 | max_pkt_size_bits=41250, it=10, t_max=65536): 14 | super().__init__(n_ues, n_prbs, buffer_max_size, eirp_dbm, f_carrier_mhz, max_pkt_size_bits, it, t_max) 15 | 16 | self.M = n_ues_per_prb # Maximum number of users multiplexed on a PRB 17 | self.action_space = spaces.MultiDiscrete([self.K+1]*self.M) 18 | 19 | def reset(self): 20 | self.rx_pwr_mw = np.zeros(shape=(self.K,)) # Received powers at the current time step 21 | 22 | return super().reset() 23 | 24 | def step(self, action): 25 | assert self.action_space.contains(action), f"{action} ({type(action)}) invalid" 26 | 27 | # Execute action from the last to the first decoded UE 28 | cumulated_rx_pwr_mw = 0 29 | # Convert action to a numpy array in case it is a list 30 | action = np.array(action) 31 | # Only keep the unique values (UEs) in action 32 | _, indices = np.unique(action, return_index=True) 33 | action_tmp = np.ones(self.M,dtype=np.uint32)*self.K 34 | action_tmp[indices] = action[indices] 35 | action = action_tmp 36 | for dim in range(self.M-1, -1, -1): 37 | ue_action = action[dim] 38 | # First check if ue_action is not NOOP (no UE selected at this order) 39 | # NOOP is defined as := self.K, while the UE are 0, ... ,self.K-1 40 | if ue_action != self.K: 41 | if np.sum(self.s[ue_action, :]) > 0: # If packets exist in UE's buffer 42 | # Find oldest packet in UE's buffer 43 | mask = (self.s[ue_action, :] > 0) 44 | subset_idx = np.argmax(self.e[ue_action, mask]) 45 | l_old = np.arange(self.L)[mask][subset_idx] 46 | 47 | assert self.s[ue_action, l_old] > 0, f"t={self.t}. Oldest packet has size {self.s[ue_action, l_old]} " +\ 48 | f"and age {self.e[ue_action, l_old]}. " +\ 49 | f"User has {np.sum(self.s[ue_action, :])} bits in buffer." # Sanity check 50 | 51 | interference_dbm = -105 # Constant interference level throughout the coverage area 52 | interference_mw = 10 ** (interference_dbm / 10) 53 | sinr = self.rx_pwr_mw[ue_action] / (self.n_mw + interference_mw + cumulated_rx_pwr_mw) # SINR taking into account the intereference from other UEs superposed on the same PRB 54 | se = np.log2(1 + sinr / self.SINR_COEFF) # DL spectral efficiency in bps/Hz 55 | se = np.clip(se, 0, 9.6) # Define an upper bound for the spectral efficiency. 56 | tx_data_bits = floor(se * self.bw_mhz / self.Nf * 1E3) # Bits that can be transmitted 57 | # Store the current UE rx_pwr_mw as interference 58 | cumulated_rx_pwr_mw += self.rx_pwr_mw[ue_action] 59 | while tx_data_bits > 0 and self.s[ue_action, l_old] > 0: # While there are packets & available capacity 60 | if tx_data_bits >= self.s[ue_action, l_old]: # Full packet transmission 61 | tx_data_bits -= self.s[ue_action, l_old] 62 | self.s[ue_action, l_old] = 0 63 | self.e[ue_action, l_old] = 0 64 | l_old = np.argmax(self.e[ue_action, :]) # Find oldest packet in UE's buffer 65 | else: # Partial packet transmission 66 | self.s[ue_action, l_old] -= tx_data_bits 67 | break 68 | 69 | reward = 0 70 | self.t += 1 # Update time-step 71 | self.p = self.t % self.Nf # Update PRB counter 72 | if self.p == 0: 73 | reward = self._calculate_reward() 74 | self.tti += 1 # Update TTI counter 75 | self.e[self.s > 0] += 1 # Age buffer packets 76 | self._generate_traffic() 77 | self._move_ues() 78 | self._recalculate_rf() 79 | 80 | self._update_state() 81 | done = bool(self.t >= self.t_max) 82 | return np.array(self.state), reward, done, {} 83 | 84 | def _calculate_spectral_efficiency(self, rx_pwr_dbm): 85 | interference_dbm = -105 # Constant interference level throughout the coverage area 86 | 87 | p_mw = (10 ** (rx_pwr_dbm / 10)) # Rx power in mw 88 | self.rx_pwr_mw = p_mw 89 | interference_mw = 10 ** (interference_dbm / 10) 90 | 91 | sinr = p_mw / (self.n_mw + interference_mw) 92 | se = np.log2(1 + sinr / self.SINR_COEFF) # DL spectral efficiency in bps/Hz 93 | 94 | self.spectral_efficiency = np.clip(se, 0, 9.6) # Define an upper bound for the spectral efficiency. -------------------------------------------------------------------------------- /wireless/test/test_noma_ul_tfrav0.py: -------------------------------------------------------------------------------- 1 | """ 2 | © 2020 Nokia 3 | Licensed under the BSD 3 Clause license 4 | SPDX-License-Identifier: BSD-3-Clause 5 | """ 6 | import pytest 7 | import gym 8 | import numpy as np 9 | 10 | 11 | @pytest.fixture(params=[2, 3]) 12 | def env(request): 13 | env = gym.make('NomaULTimeFreqResourceAllocation-v0', n_ues_per_prb=request.param) # Init environment 14 | yield env 15 | 16 | 17 | @pytest.fixture(params=[2, 3]) 18 | def env64(request): 19 | env = gym.make('NomaULTimeFreqResourceAllocation-v0', n_ues=64, n_ues_per_prb=request.param) # Init environment 20 | yield env 21 | 22 | 23 | class TestNomaULTfraV0: 24 | def test_reproducibility(self, env64): 25 | M = env64.M 26 | np.random.seed(1234) 27 | actions = np.random.randint(0,64,size=(100,M)) 28 | env64.seed(seed=1234) 29 | states = [] 30 | rewards = [] 31 | dones = [] 32 | for action in actions: 33 | state, reward, done, _ = env64.step(action) 34 | states.append(state) 35 | rewards.append(reward) 36 | dones.append(done) 37 | 38 | env = gym.make('NomaULTimeFreqResourceAllocation-v0', n_ues=64, n_ues_per_prb=M) # Init environment 39 | env.seed(seed=1234) 40 | pt = 0 41 | for action in actions: 42 | state, reward, done, _ = env.step(action) 43 | np.testing.assert_array_equal(state, states[pt]) 44 | assert reward == rewards[pt] 45 | assert done == dones[pt] 46 | pt += 1 47 | 48 | def test_variability(self, env64): 49 | M = env64.M 50 | np.random.seed(1234) 51 | actions = np.random.randint(0,64,size=(100,M)) 52 | env64.seed(seed=1234) 53 | states = [] 54 | rewards = [] 55 | dones = [] 56 | for action in actions: 57 | state, reward, done, _ = env64.step(action) 58 | states.append(state) 59 | rewards.append(reward) 60 | dones.append(done) 61 | 62 | env = gym.make('NomaULTimeFreqResourceAllocation-v0', n_ues=64, n_ues_per_prb=M) # Init environment 63 | env.seed(seed=12345) 64 | pt = 0 65 | for action in actions: 66 | state, reward, done, _ = env.step(action) 67 | if not np.array_equal(state, states[pt]): 68 | return 69 | if reward != rewards[pt]: 70 | return 71 | if done != dones[pt]: 72 | return 73 | pt += 1 74 | 75 | pytest.fail("Different seeds produced the same results.") 76 | 77 | def test_empty_action(self, env, env64): 78 | env.step([32]*env.M) 79 | env64.step([64]*env64.M) 80 | 81 | def test_state_features(self): 82 | n_ues = 64 83 | n_steps = 512 84 | M = 2 85 | env = gym.make('NomaULTimeFreqResourceAllocation-v0', n_ues=n_ues, eirp_dbm=7) # Low power to have some CQI=0 86 | env.seed(seed=1234) 87 | 88 | state, _, _, _ = env.step([0]*M) # Get state to measure its length 89 | states = np.zeros((n_steps, len(state)), dtype=np.uint32) # Memory pre-allocation 90 | for t in range(n_steps): 91 | action = np.random.randint(0,n_ues,size=M) 92 | state, _, _, _ = env.step(action) 93 | states[t, :] = state 94 | 95 | # Check CQI range 96 | assert states[:, :n_ues].min() == 0 97 | assert states[:, :n_ues].max() == 15 98 | assert 0 < states[:, :n_ues].mean() < 15 99 | assert states[:, :n_ues].std() > 1 100 | 101 | # Check size (in bits) of packets in UEs' buffers 102 | assert states[:, n_ues:n_ues + n_ues * env.L].min() == 0 103 | assert states[:, n_ues:n_ues + n_ues * env.L].max() >= 41250 104 | assert states[:, n_ues:n_ues + n_ues * env.L].mean() > 100 105 | 106 | # Check age (in ms) of packets in UEs' buffers 107 | assert states[:, n_ues + n_ues * env.L:n_ues + 2*n_ues * env.L].min() == 0 108 | assert states[:, n_ues + n_ues * env.L:n_ues + 2 * n_ues * env.L].max() > 10 # Less n_prbs yield higher ages 109 | 110 | # TODO: Maybe also check QI 111 | 112 | # Check PRB counter 113 | assert states[:, -1].min() == 0 114 | assert states[:, -1].max() == env.Nf-1 115 | 116 | def test_consistency_superclass(self, env64): 117 | M = env64.M 118 | np.random.seed(1234) 119 | ofdm_actions = np.random.randint(0,64,size=100) 120 | env64.seed(seed=1234) 121 | states = [] 122 | rewards = [] 123 | dones = [] 124 | for action in ofdm_actions: 125 | noma_action = [action]+[64]*(M-1) 126 | state, reward, done, _ = env64.step(noma_action) 127 | states.append(state) 128 | rewards.append(reward) 129 | dones.append(done) 130 | 131 | env = gym.make('TimeFreqResourceAllocation-v0', n_ues=64) # Init TimeFreqResourceAllocation-V0 environment 132 | env.seed(seed=1234) 133 | pt = 0 134 | for action in ofdm_actions: 135 | state, reward, done, _ = env.step(action) 136 | np.testing.assert_array_equal(state, states[pt]) 137 | assert reward == rewards[pt] 138 | assert done == dones[pt] 139 | pt += 1 -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Wireless Suite 2 | 3 | ## Overview 4 | Wireless Suite is a collection of problems in wireless telecommunications. 5 | 6 | Comparing research results in telecoms remains a challenge due to the lack of standard problem implementations against 7 | which to benchmark. 8 | To solve this, Wireless Suite implements some well-known problems, built as Open-AI Gym compatible classes. 9 | These are intended to establish performance benchmarks, stimulate reproducible research and foster quantitative 10 | comparison of algorithms for telecommunication problems. 11 | 12 | ## Getting started 13 | The code has been tested to work on Python 3.7 under Windows 10. 14 | 15 | 1. Get the code: 16 | ``` 17 | git clone https://github.com/nokia/wireless-suite.git 18 | ``` 19 | 20 | 2. Use `pip3` to install the package: 21 | ``` 22 | cd wireless-suite 23 | pip3 install . 24 | ``` 25 | 26 | 3. **OPTIONAL**: Modify the script *scripts/launch_agent.py* to execute a problem of your choosing. 27 | 28 | 4. **OPTIONAL**: Modify the configuration of your problem at *config/config_environment.json*. 29 | 30 | 5. Simulate an agent-environment interaction: 31 | ``` 32 | cd wireless/scripts 33 | python launch_agent.py 34 | ``` 35 | 36 | ## Provided problems 37 | 38 | ### TimeFreqResourceAllocation-v0 39 | This environment simulates a OFDM resource allocation task, where a limited number of frequency resources are to be 40 | allocated to a large number of User Equipments (UEs) over time. 41 | An agent interacting with this environment plays the role of the MAC scheduler. On each time step, the agent must 42 | allocate one frequency resource to one of a large number of UEs. The agent gets rewarded for these resource allocation 43 | decisions. The reward increases with the number of UEs, whose traffic requirements are satisfied. 44 | The traffic requirements for each UE are expressed in terms of their Guaranteed Bit Rate (if any) and their Packet 45 | Delay Budget (PDP). 46 | 47 | You are invited to develop a new agent that interacts with this environment and takes effective resource allocation 48 | decisions. 49 | Five sample agents are provided for reference in the *wireless/agents* folder. 50 | The performance obtained by the default agents on the default environment configuration is: 51 | * Random -69590 52 | * Round Robin -69638 53 | * Round Robin IfTraffic -3284 54 | * Proportional Fair -9595 55 | * Proportional Fair Channel Aware -1729 56 | 57 | Note that the above average rewards are negative values. The best performing agent is thus the Proportional Fair Channel Aware. 58 | 59 | Additional details about this problem are provided in document *wireless/doc/TimeFreqResourceAllocation-v0.pdf* 60 | 61 | ### NomaULTimeFreqResourceAllocation-v0 62 | This environment is an extension of the above TimeFreqResourceAllocation-v0 environment, with the difference that it 63 | allows multiple UEs to be allocated on a time-frequency resource. It consists on an uplink power-domain NOMA system, 64 | wherein the base station receives superimposed signals from the multiplexed UEs and performs successive interference 65 | cancellation (SIC) to decode them. 66 | 67 | The default environment can be obtained by setting `"env": "NomaULTimeFreqResourceAllocation-v0"` and 68 | `"n_ues_per_prb": 2` in *config/config_environment.json*. 69 | Two sample agents are provided for reference in the *wireless/agents* folder. 70 | The performance obtained on the default environment configuration is: 71 | * Random -33499 72 | * NOMA UL Proportional Fair Channel Aware -1431 73 | 74 | ### UlOpenLoopPowerControl-v0 75 | This environment simulates a free-space scenario with a Base Station located at coordinates [0, 0] and one UE at a 76 | random location. On each time step, the UE moves linearly in a random direction with constant speed 2 m/s . The agent 77 | interacting with the environment plays the role of the the Base Station. On each time step the agent must select one of 78 | four possible Power Control (PC) commands to increase/decrease the uplink transmit power. The objective is to measure an 79 | uplink SNR as close as possible to the SNR target (4 dB by default). 80 | 81 | See the file `envs/umts_olpc.py` for more details. 82 | 83 | ### Evaluation 84 | The simulated environment can be chosen by setting `"env": "TimeFreqResourceAllocation-v0"` or `"env": "NomaULTimeFreqResourceAllocation-v0"` in *config/config_environment.json*. The script *wireless/scripts/launch_agent.py* runs 16 episodes with a maximum of 65536 time steps each, and collects the reward 85 | obtained by the agent on each time step. The result is calculated as the average reward obtained in all time steps on all episodes. 86 | 87 | ## How to contribute 88 | There are two main ways of contributing to Wireless Suite: 89 | 90 | 1. **Implementing new problems**: This version of Wireless Suite contains two problems implementation. New 91 | problems can be easily added as simple variations of the existing ones (e.g. by changing their parameters), or by introducing 92 | fully new problem implementations (e.g. Adaptive Modulation and Coding, Open Loop Power Control, Handover optimization, 93 | etc). 94 | 95 | 2. **Implementing new agents**: Ideally, new agent contributions shall perform better than the default ones. 96 | 97 | ## References 98 | 1. [Open AI Gym Documentation](http://gym.openai.com/docs/) 99 | 2. [How to create new environments for Gym](https://github.com/openai/gym/blob/master/docs/creating-environments.md) 100 | 3. [Sacred Documentation](https://sacred.readthedocs.io/en/stable/index.html) 101 | 102 | 103 | ## License 104 | 105 | This project is licensed under the BSD-3-Clause license - see the [LICENSE](https://github.com/nokia/wireless-suite/blob/master/LICENSE). -------------------------------------------------------------------------------- /wireless/scripts/launch_agent.py: -------------------------------------------------------------------------------- 1 | """ 2 | © 2020 Nokia 3 | Licensed under the BSD 3 Clause license 4 | SPDX-License-Identifier: BSD-3-Clause 5 | """ 6 | import gym 7 | import json 8 | 9 | from sacred import Experiment 10 | 11 | from wireless.agents.bosch_agent import BoschAgent 12 | from wireless.agents.time_freq_resource_allocation_v0.round_robin_agent import * 13 | from wireless.agents.time_freq_resource_allocation_v0.proportional_fair import * 14 | from wireless.agents.noma_ul_time_freq_resource_allocation_v0.noma_ul_proportional_fair import * 15 | 16 | # Load agent parameters 17 | with open('../../config/config_agent.json') as f: 18 | ac = json.load(f) 19 | 20 | # Configure experiment 21 | with open('../../config/config_sacred.json') as f: 22 | sc = json.load(f) # Sacred Configuration 23 | ns = sc["sacred"]["n_metrics_points"] # Number of points per episode to log in Sacred 24 | ex = Experiment(ac["agent"]["agent_type"], save_git_info=False) 25 | ex.add_config(sc) 26 | ex.add_config(ac) 27 | mongo_db_url = f'mongodb://{sc["sacred"]["sacred_user"]}:{sc["sacred"]["sacred_pwd"]}@' +\ 28 | f'{sc["sacred"]["sacred_host"]}:{sc["sacred"]["sacred_port"]}/{sc["sacred"]["sacred_db"]}' 29 | # ex.observers.append(MongoObserver(url=mongo_db_url, db_name=sc["sacred"]["sacred_db"])) # Uncomment to save to DB 30 | 31 | # Load environment parameters 32 | with open('../../config/config_environment.json') as f: 33 | ec = json.load(f) 34 | ex.add_config(ec) 35 | 36 | 37 | @ex.automain 38 | def main(_run): 39 | n_eps = _run.config["agent"]["n_episodes"] 40 | t_max = _run.config['agent']['t_max'] 41 | n_sf = t_max//_run.config['env']['n_prbs'] # Number of complete subframes to run per episode 42 | log_period_t = max(1, (n_sf//ns)*_run.config['env']['n_prbs']) # Only log rwd on last step of each subframe 43 | 44 | rwd = np.zeros((n_eps, t_max)) # Memory allocation 45 | 46 | # Simulate 47 | for ep in range(n_eps): # Run episodes 48 | if _run.config['env']['env'] == 'TimeFreqResourceAllocation-v0': 49 | env = gym.make('TimeFreqResourceAllocation-v0', n_ues=_run.config['env']['n_ues'], 50 | n_prbs=_run.config['env']['n_prbs'], buffer_max_size=_run.config['env']['buffer_max_size'], 51 | eirp_dbm=_run.config['env']['eirp_dbm'], f_carrier_mhz=_run.config['env']['f_carrier_mhz'], 52 | max_pkt_size_bits=_run.config['env']['max_pkt_size_bits'], 53 | it=_run.config['env']['non_gbr_traffic_mean_interarrival_time_ttis']) # Init environment 54 | env.seed(seed=_run.config['seed'] + ep) 55 | 56 | # Init agent 57 | if ac["agent"]["agent_type"] == "random": 58 | agent = RandomAgent(env.action_space) 59 | agent.seed(seed=_run.config['seed'] + ep) 60 | elif ac["agent"]["agent_type"] == "round robin": 61 | agent = RoundRobinAgent(env.action_space, env.K, env.L) 62 | elif ac["agent"]["agent_type"] == "round robin iftraffic": 63 | agent = RoundRobinIfTrafficAgent(env.action_space, env.K, env.L) 64 | elif ac["agent"]["agent_type"] == "proportional fair": 65 | agent = ProportionalFairAgent(env.action_space, env.K, env.L) 66 | elif ac["agent"]["agent_type"] == "proportional fair channel aware": 67 | agent = ProportionalFairChannelAwareAgent(env.action_space, env.K, env.L) 68 | elif ac["agent"]["agent_type"] == "knapsack": 69 | agent = Knapsackagent(env.action_space, env.K, env.L, env.Nf) 70 | elif ac["agent"]["agent_type"] == "Bosch": 71 | agent = BoschAgent(env.action_space, env.K, env.L, env.max_pkt_size_bits) 72 | else: 73 | raise NotImplemented 74 | 75 | elif _run.config['env']['env'] == 'NomaULTimeFreqResourceAllocation-v0': 76 | env = gym.make('NomaULTimeFreqResourceAllocation-v0', n_ues=_run.config['env']['n_ues'], 77 | n_prbs=_run.config['env']['n_prbs'], n_ues_per_prb=_run.config['env']['n_ues_per_prb'], buffer_max_size=_run.config['env']['buffer_max_size'], 78 | eirp_dbm=_run.config['env']['eirp_dbm'], f_carrier_mhz=_run.config['env']['f_carrier_mhz'], 79 | max_pkt_size_bits=_run.config['env']['max_pkt_size_bits'], 80 | it=_run.config['env']['non_gbr_traffic_mean_interarrival_time_ttis']) # Init environment 81 | env.seed(seed=_run.config['seed'] + ep) 82 | 83 | # Init agent 84 | if ac["agent"]["agent_type"] == "random": 85 | agent = RandomAgent(env.action_space) 86 | agent.seed(seed=_run.config['seed'] + ep) 87 | elif ac["agent"]["agent_type"] == "proportional fair channel aware": 88 | agent = NomaULProportionalFairChannelAwareAgent(env.action_space, env.K, env.M, env.L, env.n_mw, env.SINR_COEFF) 89 | else: 90 | raise NotImplemented 91 | else: 92 | raise NotImplemented 93 | 94 | reward = 0 95 | done = False 96 | state = env.reset() 97 | for t in range(t_max): # Run one episode 98 | # Collect progress 99 | if t_max < ns or (t > 0 and (t+1) % log_period_t == 0): # If it's time to log 100 | s = np.reshape(state[env.K:env.K * (1 + env.L)], (env.K, env.L)) 101 | qi_ohe = np.reshape(state[env.K+2*env.K*env.L:5*env.K + 2*env.K*env.L], (env.K, 4)) 102 | qi = [np.where(r == 1)[0][0] for r in qi_ohe] # Decode One-Hot-Encoded QIs 103 | for u in range(0, env.K, env.K//2): # Log KPIs for some UEs 104 | _run.log_scalar(f"Episode {ep}. UE {u}. CQI vs time step", state[u], t) 105 | _run.log_scalar(f"Episode {ep}. UE {u}. Buffer occupancy [bits] vs time step", np.sum(s[u, :]), t) 106 | _run.log_scalar(f"Episode {ep}. UE {u}. QoS Identifier vs time step", qi[u], t) 107 | 108 | action = agent.act(state, reward, done) 109 | state, reward, done, _ = env.step(action) 110 | 111 | # Collect progress 112 | if t_max < ns or (t > 0 and (t+1) % log_period_t == 0): 113 | _run.log_scalar(f"Episode {ep}. Rwd vs time step", reward, t) 114 | 115 | rwd[ep, t] = reward 116 | if done: 117 | break 118 | if (ep*t_max + t) % log_period_t == 0: 119 | print(f"{(ep*t_max + t)*100/(n_eps*t_max):3.0f}% completed.") 120 | 121 | env.close() 122 | 123 | if n_eps > 1: 124 | rwd_avg = np.mean(rwd, axis=0) 125 | for t in range(t_max): 126 | if t_max < ns or (t > 0 and (t+1) % log_period_t == 0): # If it's time to log 127 | _run.log_scalar(f"Mean rwd vs time step", rwd_avg[t], t) 128 | 129 | result = np.mean(rwd) # Save experiment result 130 | print(f"Result: {result}") 131 | return result 132 | -------------------------------------------------------------------------------- /wireless/envs/time_freq_resource_allocation_v0.py: -------------------------------------------------------------------------------- 1 | """ 2 | © 2020 Nokia 3 | Licensed under the BSD 3 Clause license 4 | SPDX-License-Identifier: BSD-3-Clause 5 | """ 6 | import random 7 | from math import floor, ceil 8 | 9 | import numpy as np 10 | from gym import spaces, Env 11 | from scipy import constants 12 | 13 | from ..utils.misc import calculate_thermal_noise 14 | from ..utils.prop_model import PropModel 15 | 16 | 17 | class TimeFreqResourceAllocationV0(Env): 18 | metadata = { 19 | 'render.modes': ['human', 'rgb_array'] 20 | } 21 | 22 | bw_mhz = 5 # System bandwidth 23 | max_pkt_size_bits = 5096 24 | x_max_m = 1000 25 | y_max_m = 1000 26 | 27 | SINR_COEFF = 8 # Rho coefficient to map SINR to spectral efficient. See G. Piro 2011 paper. 28 | 29 | def __init__(self, n_ues=32, n_prbs=25, buffer_max_size=32, eirp_dbm=13, f_carrier_mhz=2655, 30 | max_pkt_size_bits=41250, it=10, t_max=65536): 31 | super().__init__() 32 | self._seed = None 33 | self.K = n_ues # Number of UEs 34 | self.Nf = n_prbs # Number of Physical Resource Blocks (PRBs) 35 | self.L = buffer_max_size # Maximum number of packets per UE buffer 36 | self.it = it # Mean inter-packet arrival time for Non-GBR traffic 37 | self.EIRP_DBM = eirp_dbm 38 | self.f_carrier_mhz = f_carrier_mhz # Carrier frequency 39 | self.max_pkt_size_bits = max_pkt_size_bits 40 | self.t_max = t_max 41 | self.tti_max = ceil(t_max/n_prbs) 42 | 43 | self.bts_pos = [self.x_max_m / 2, self.y_max_m / 2] 44 | self.propagation_model = PropModel(self.f_carrier_mhz) 45 | self.n_mw = calculate_thermal_noise(self.bw_mhz * 1E-6) 46 | 47 | self.low = np.array([0] * self.K + # CQI 48 | [0] * self.K * self.L + # Size (in bits) of packets in UEs' buffers 49 | [0] * self.K * self.L + # Age (in ms) of packets in UEs' buffers 50 | [0, 0, 0, 0] * self.K + # QoS Identifier classes (ohe) of all UEs 51 | [0]) # Index of the current PRB being allocated 52 | self.high = np.array([15] * self.K + # CQI 53 | [self.max_pkt_size_bits] * self.K * self.L + # Size (in bits) of packets in UEs' buffers 54 | [self.tti_max] * self.K * self.L + # Age (in ms) of packets in UEs' buffers 55 | [1, 1, 1, 1] * self.K + # QoS Identifier classes (ohe) of all UEs 56 | [self.Nf - 1]) # Index of the current PRB being allocated 57 | self.observation_space = spaces.Box(self.low, self.high, dtype=np.uint32) 58 | 59 | self.action_space = spaces.Discrete(self.K) 60 | self.reward_range = (0, 1) 61 | 62 | # Features of observation vector 63 | self.cqi = None 64 | self.s = None # Sizes in bits of all packets in each UE's buffer 65 | self.e = None # Ages in TTIs of all packets in each UE's buffer 66 | self.qi = None 67 | self.p = 0 68 | 69 | # Internal state features 70 | self.t = 0 # Time step 71 | self.tti = 0 # Transmission Time Interval (TTI) counter 72 | self.ue_pos = None # UE positions in meters 73 | self.ue_v_mps = None # UE speeds in meter/second 74 | self.ue_dir = None # UE move direction in radians 75 | self.spectral_efficiency = None 76 | self.tti_next_pkt = None # TTI of next incoming packet for each UE 77 | 78 | self.seed() 79 | self.reset() 80 | 81 | assert self.K % 4 == 0, "K must be a multiple of 4 in order to have the same number of UEs per QoS class." 82 | 83 | def reset(self): 84 | self.cqi = np.zeros(shape=(self.K,), dtype=np.uint8) 85 | self.s = np.zeros(shape=(self.K, self.L), dtype=np.uint32) 86 | self.e = np.zeros(shape=(self.K, self.L), dtype=np.uint32) 87 | self.qi = np.concatenate((np.repeat(np.array([[0, 0, 0, 1]]), self.K // 4, axis=0), 88 | np.repeat(np.array([[0, 0, 1, 0]]), self.K // 4, axis=0), 89 | np.repeat(np.array([[0, 1, 0, 0]]), self.K // 4, axis=0), 90 | np.repeat(np.array([[1, 0, 0, 0]]), self.K // 4, axis=0)) 91 | ) 92 | np.random.shuffle(self.qi) 93 | self.p = 0 94 | 95 | self.t = 0 96 | self.tti = 0 97 | self.ue_pos = np.random.uniform([0, 0], [self.x_max_m, self.y_max_m], size=(self.K, 2)) # Place UEs 98 | self.ue_v_mps = np.random.normal(1.36, scale=0.19, size=(self.K,)) # UE walking speeds in m/s 99 | self.ue_dir = np.random.uniform(0, 2 * constants.pi, size=(self.K,)) # UE move direction in radians 100 | self.spectral_efficiency = np.zeros(shape=(self.K,)) 101 | self.tti_next_pkt = np.random.randint(8, size=(self.K,)) # TTI of first transmission for each UE 102 | self._recalculate_rf() 103 | self._generate_traffic() 104 | self._update_state() 105 | 106 | return np.array(self.state) 107 | 108 | def seed(self, seed=0): 109 | random.seed(seed) 110 | np.random.seed(seed) 111 | self.propagation_model.seed(seed=seed) 112 | self._seed = seed 113 | 114 | def step(self, action): 115 | assert self.action_space.contains(action), f"{action} ({type(action)}) invalid" 116 | 117 | # Execute action 118 | if np.sum(self.s[action, :]) > 0: # If packets exist in UE's buffer 119 | # Find oldest packet in UE's buffer 120 | mask = (self.s[action, :] > 0) 121 | subset_idx = np.argmax(self.e[action, mask]) 122 | l_old = np.arange(self.L)[mask][subset_idx] 123 | 124 | assert self.s[action, l_old] > 0, f"t={self.t}. Oldest packet has size {self.s[action, l_old]} " +\ 125 | f"and age {self.e[action, l_old]}. " +\ 126 | f"User has {np.sum(self.s[action, :])} bits in buffer." # Sanity check 127 | tx_data_bits = floor( 128 | self.spectral_efficiency[action] * self.bw_mhz / self.Nf * 1E3) # Bits that can be transmitted 129 | while tx_data_bits > 0 and self.s[action, l_old] > 0: # While there are packets & available capacity 130 | if tx_data_bits >= self.s[action, l_old]: # Full packet transmission 131 | tx_data_bits -= self.s[action, l_old] 132 | self.s[action, l_old] = 0 133 | self.e[action, l_old] = 0 134 | l_old = np.argmax(self.e[action, :]) # Find oldest packet in UE's buffer 135 | else: # Partial packet transmission 136 | self.s[action, l_old] -= tx_data_bits 137 | break 138 | 139 | reward = 0 140 | self.t += 1 # Update time-step 141 | self.p = self.t % self.Nf # Update PRB counter 142 | if self.p == 0: 143 | reward = self._calculate_reward() 144 | self.tti += 1 # Update TTI counter 145 | self.e[self.s > 0] += 1 # Age buffer packets 146 | self._generate_traffic() 147 | self._move_ues() 148 | self._recalculate_rf() 149 | 150 | self._update_state() 151 | done = bool(self.t >= self.t_max) 152 | return np.array(self.state), reward, done, {} 153 | 154 | def render(self, mode='human', close=False): 155 | pass 156 | 157 | def _calculate_reward(self): 158 | r_gbr = 0 159 | r_non_gbr = 0 160 | 161 | for u, qi in enumerate(self.qi): 162 | gbr_delayed_pkts = np.array([]) 163 | non_gbr_pkts = np.array([]) 164 | non_gbr_delayed_pkts = np.array([]) 165 | if np.array_equal(qi, [0, 0, 0, 1]): 166 | gbr_delayed_pkts = np.where(self.e[u, :] > 100)[0] 167 | elif np.array_equal(qi, [0, 0, 1, 0]): 168 | gbr_delayed_pkts = np.where(self.e[u, :] > 150)[0] 169 | elif np.array_equal(qi, [0, 1, 0, 0]): 170 | gbr_delayed_pkts = np.where(self.e[u, :] > 30)[0] 171 | elif np.array_equal(qi, [1, 0, 0, 0]): 172 | non_gbr_delayed_pkts = np.where(self.e[u, :] > 300)[0] 173 | non_gbr_pkts = np.where(self.s[u, :] > 0)[0] 174 | 175 | if gbr_delayed_pkts.size > 0: 176 | r_gbr += np.sum(self.s[u, gbr_delayed_pkts]) 177 | 178 | if non_gbr_delayed_pkts.size > 0: 179 | r_non_gbr += np.sum(self.s[u, non_gbr_delayed_pkts]) 180 | if non_gbr_pkts.size > 0: 181 | r_non_gbr += np.sum(self.s[u, non_gbr_pkts]) 182 | 183 | return -r_gbr - r_non_gbr 184 | 185 | def _move_ues(self): 186 | d_m = self.ue_v_mps * 1E-3 # Moved distance in meters 187 | delta_x = d_m * np.cos(self.ue_dir) 188 | delta_y = d_m * np.sin(self.ue_dir) 189 | 190 | for u, pos in enumerate(self.ue_pos): 191 | if pos[0] + delta_x[u] > self.x_max_m or pos[0] + delta_x[u] < 0: 192 | delta_x[u] = -delta_x[u] 193 | self.ue_dir[u] = np.random.uniform(0, 2 * constants.pi) # UE move direction in radians 194 | if pos[1] + delta_y[u] > self.y_max_m or pos[1] + delta_y[u] < 0: 195 | delta_y[u] = -delta_y[u] 196 | self.ue_dir[u] = np.random.uniform(0, 2 * constants.pi) # UE move direction in radians 197 | 198 | self.ue_pos[:, 0] += delta_x 199 | self.ue_pos[:, 1] += delta_y 200 | 201 | def _recalculate_rf(self): 202 | distances_m = np.linalg.norm(self.ue_pos - self.bts_pos, axis=1) 203 | pathloss_db = self.propagation_model.get_free_space_pl_db(distances_m, shadowing_db=6) 204 | rx_pwr_dbm = self.EIRP_DBM - pathloss_db # Received power 205 | self._calculate_spectral_efficiency(rx_pwr_dbm) 206 | self._spectral_efficiency_to_cqi() 207 | 208 | def _calculate_spectral_efficiency(self, rx_pwr_dbm): 209 | interference_dbm = -105 # Constant interference level throughout the coverage area 210 | 211 | p_mw = (10 ** (rx_pwr_dbm / 10)) # Rx power in mw 212 | interference_mw = 10 ** (interference_dbm / 10) 213 | 214 | sinr = p_mw / (self.n_mw + interference_mw) 215 | se = np.log2(1 + sinr / self.SINR_COEFF) # DL spectral efficiency in bps/Hz 216 | 217 | self.spectral_efficiency = np.clip(se, 0, 9.6) # Define an upper bound for the spectral efficiency. 218 | 219 | def _spectral_efficiency_to_cqi(self): 220 | # As per Table 7.2.3-1 in TS 36.213 Rel-11 221 | self.cqi[np.where(self.spectral_efficiency <= 0.1523)] = 0 222 | self.cqi[np.where((0.1523 < self.spectral_efficiency) & (self.spectral_efficiency <= 0.2344))] = 1 223 | self.cqi[np.where((0.2344 < self.spectral_efficiency) & (self.spectral_efficiency <= 0.3770))] = 2 224 | self.cqi[np.where((0.3770 < self.spectral_efficiency) & (self.spectral_efficiency <= 0.6016))] = 3 225 | self.cqi[np.where((0.6016 < self.spectral_efficiency) & (self.spectral_efficiency <= 0.8770))] = 4 226 | self.cqi[np.where((0.8770 < self.spectral_efficiency) & (self.spectral_efficiency <= 1.1758))] = 5 227 | self.cqi[np.where((1.1758 < self.spectral_efficiency) & (self.spectral_efficiency <= 1.4766))] = 6 228 | self.cqi[np.where((1.4766 < self.spectral_efficiency) & (self.spectral_efficiency <= 1.9141))] = 7 229 | self.cqi[np.where((1.9141 < self.spectral_efficiency) & (self.spectral_efficiency <= 2.4063))] = 8 230 | self.cqi[np.where((2.4063 < self.spectral_efficiency) & (self.spectral_efficiency <= 2.7305))] = 9 231 | self.cqi[np.where((2.7305 < self.spectral_efficiency) & (self.spectral_efficiency <= 3.3223))] = 10 232 | self.cqi[np.where((3.3223 < self.spectral_efficiency) & (self.spectral_efficiency <= 3.9023))] = 11 233 | self.cqi[np.where((3.9023 < self.spectral_efficiency) & (self.spectral_efficiency <= 4.5234))] = 12 234 | self.cqi[np.where((4.5234 < self.spectral_efficiency) & (self.spectral_efficiency <= 5.1152))] = 13 235 | self.cqi[np.where((5.1152 < self.spectral_efficiency) & (self.spectral_efficiency <= 5.5547))] = 14 236 | self.cqi[np.where(5.5547 < self.spectral_efficiency)] = 15 237 | 238 | def _generate_traffic(self): 239 | for u, qi in enumerate(self.qi): 240 | if self.tti == self.tti_next_pkt[u]: 241 | buffer_gaps = np.where(self.s[u, :] == 0)[0] # Find slots for packets in the queue. 242 | if buffer_gaps.size == 0: # Large negative rwd unnecessary b/c rwd is already max due to full buffer. 243 | print(f"Buffer overflow. Disregarding new GBR (Conversational Voice) packet for UE {u}.") 244 | g = None 245 | else: 246 | g = buffer_gaps[0] # First available slot in buffer 247 | self.e[u, g] = 0 # Set the age of this new packet to 0 248 | 249 | if np.array_equal(qi, [0, 0, 0, 1]): # 3: GBR (Conversational Voice) 250 | if buffer_gaps.size > 0: 251 | self.s[u, g] = 584 252 | self.tti_next_pkt[u] = self.tti + 20 253 | elif np.array_equal(qi, [0, 0, 1, 0]): # 2: GBR (Conversational Video) 254 | # TODO: Use perhaps a more complex video traffic model such as the Markov-modulated Gamma model. 255 | if buffer_gaps.size > 0: 256 | self.s[u, g] = 41250 257 | self.tti_next_pkt[u] = self.tti + 33 258 | elif np.array_equal(qi, [0, 1, 0, 0]): # 1: Delay Critical GBR 259 | if buffer_gaps.size > 0: 260 | self.s[u, g] = 200 261 | self.tti_next_pkt[u] = self.tti + 20 262 | elif np.array_equal(qi, [1, 0, 0, 0]): # 0: Non-GBR 263 | # Inspired by: https://www.nsnam.org/docs/models/html/applications.html?highlight=traffic%20model 264 | if buffer_gaps.size > 0: 265 | self.s[u, g] = min(max(1, np.random.geometric(1 / 20000)), self.max_pkt_size_bits) 266 | self.tti_next_pkt[u] = self.tti + np.random.geometric(1 / self.it) 267 | 268 | if buffer_gaps.size > 0: 269 | assert 1 <= self.s[u, g] <= self.max_pkt_size_bits, f"Packet size {self.s[u, g]} out of range." 270 | 271 | def _update_state(self): 272 | self.state = np.concatenate((self.cqi, self.s.flatten(), self.e.flatten(), self.qi.flatten(), [self.p])) 273 | --------------------------------------------------------------------------------