├── wireless
    ├── agents
    │   ├── __init__.py
    │   ├── time_freq_resource_allocation_v0
    │   │   ├── __init__.py
    │   │   ├── round_robin_agent.py
    │   │   └── proportional_fair.py
    │   ├── noma_ul_time_freq_resource_allocation_v0
    │   │   ├── __init__.py
    │   │   └── noma_ul_proportional_fair.py
    │   ├── random_agent.py
    │   ├── bosch_agent.py
    │   └── q_learning.py
    ├── test
    │   ├── __init__.py
    │   ├── test_umts_olpc.py
    │   ├── test_tfrav0.py
    │   └── test_noma_ul_tfrav0.py
    ├── utils
    │   ├── __init__.py
    │   ├── misc.py
    │   └── prop_model.py
    ├── doc
    │   └── TimeFreqResourceAllocation-v0.pdf
    ├── envs
    │   ├── __init__.py
    │   ├── umts_olpc.py
    │   ├── noma_ul_time_freq_resource_allocation_v0.py
    │   └── time_freq_resource_allocation_v0.py
    ├── __init__.py
    └── scripts
    │   ├── launch_q_learn_umts_olpc.py
    │   └── launch_agent.py
├── config
    ├── config_agent.json
    ├── config_sacred.json
    └── config_environment.json
├── setup.py
├── LICENSE
└── README.md


/wireless/agents/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/wireless/test/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/wireless/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/wireless/agents/time_freq_resource_allocation_v0/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/wireless/agents/noma_ul_time_freq_resource_allocation_v0/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/wireless/doc/TimeFreqResourceAllocation-v0.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nokia/wireless-suite/HEAD/wireless/doc/TimeFreqResourceAllocation-v0.pdf


--------------------------------------------------------------------------------
/config/config_agent.json:
--------------------------------------------------------------------------------
1 | {
2 |   "agent": {
3 |     "agent_type": "proportional fair channel aware",
4 |     "t_max": 65536,
5 |     "n_episodes": 16
6 |   }
7 | }
8 | 


--------------------------------------------------------------------------------
/wireless/envs/__init__.py:
--------------------------------------------------------------------------------
1 | from wireless.envs.time_freq_resource_allocation_v0 import TimeFreqResourceAllocationV0
2 | from wireless.envs.noma_ul_time_freq_resource_allocation_v0 import NomaULTimeFreqResourceAllocationV0
3 | from wireless.envs.umts_olpc import UlOpenLoopPowerControl
4 | 


--------------------------------------------------------------------------------
/config/config_sacred.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "sacred": {
 3 |     "sacred_host": "127.0.0.1",
 4 |     "sacred_port": "27017",
 5 |     "sacred_user": "sacred_user",
 6 |     "sacred_pwd": "sacred_pwd",
 7 |     "sacred_db": "sacred0000",
 8 |     "n_metrics_points": 128,
 9 |     "experiment_name": "Random"
10 |   },
11 |   "seed": 0
12 | }
13 | 


--------------------------------------------------------------------------------
/config/config_environment.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "env": {
 3 |     "env": "NomaULTimeFreqResourceAllocation-v0",
 4 |     "n_ues": 32,
 5 |     "n_prbs": 25,
 6 |     "n_ues_per_prb": 2,
 7 |     "buffer_max_size": 8,
 8 |     "eirp_dbm": 13,
 9 |     "f_carrier_mhz": 2655,
10 |     "max_pkt_size_bits": 41250,
11 |     "non_gbr_traffic_mean_interarrival_time_ttis": 10
12 |   }
13 | }
14 | 


--------------------------------------------------------------------------------
/wireless/utils/misc.py:
--------------------------------------------------------------------------------
 1 | """
 2 | © 2020 Nokia
 3 | Licensed under the BSD 3 Clause license
 4 | SPDX-License-Identifier: BSD-3-Clause
 5 | """
 6 | from scipy import constants
 7 | 
 8 | 
 9 | def clip(value, min_value, max_value):
10 |     return max(min(value, max_value), min_value)
11 | 
12 | 
13 | def calculate_thermal_noise(bw_mhz):
14 |     t0_kelvin = 290
15 |     return constants.Boltzmann * t0_kelvin * bw_mhz * 1E6 * 1000
16 | 


--------------------------------------------------------------------------------
/wireless/__init__.py:
--------------------------------------------------------------------------------
 1 | from gym.envs.registration import register
 2 | 
 3 | register(
 4 |     id='TimeFreqResourceAllocation-v0',
 5 |     entry_point='wireless.envs.time_freq_resource_allocation_v0:TimeFreqResourceAllocationV0',
 6 | )
 7 | 
 8 | register(
 9 |     id='NomaULTimeFreqResourceAllocation-v0',
10 |     entry_point='wireless.envs.noma_ul_time_freq_resource_allocation_v0:NomaULTimeFreqResourceAllocationV0',
11 | )
12 | 
13 | register(
14 |     id='UlOpenLoopPowerControl-v0',
15 |     entry_point='wireless.envs.umts_olpc:UlOpenLoopPowerControl',
16 | )
17 | 


--------------------------------------------------------------------------------
/wireless/agents/random_agent.py:
--------------------------------------------------------------------------------
 1 | """
 2 | © 2020 Nokia
 3 | Licensed under the BSD 3 Clause license
 4 | SPDX-License-Identifier: BSD-3-Clause
 5 | """
 6 | 
 7 | 
 8 | class RandomAgent:
 9 |     """
10 |     The world's simplest agent!
11 | 
12 |     See: https://github.com/openai/gym/blob/master/examples/agents/random_agent.py
13 |     """
14 |     def __init__(self, action_space):
15 |         self.action_space = action_space
16 | 
17 |     def act(self, state, reward, done):
18 |         return self.action_space.sample()
19 | 
20 |     def seed(self, seed=0):
21 |         self.action_space.seed(seed)
22 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | """
 2 | © 2020 Nokia
 3 | Licensed under the BSD 3 Clause license
 4 | SPDX-License-Identifier: BSD-3-Clause
 5 | """
 6 | from setuptools import setup
 7 | 
 8 | setup(name='wireless-suite',
 9 |       version='1.1',
10 |       packages=['wireless', 'wireless.agents', 'wireless.agents.time_freq_resource_allocation_v0', 'wireless.agents.noma_ul_time_freq_resource_allocation_v0', 'wireless.envs', 'wireless.utils'],
11 |       license='„2020 Nokia. Licensed under the BSD 3 Clause license. SPDX-License-Identifier: BSD-3-Clause',
12 |       description='Modules for executing wireless communication problems as OpenAI Gym environments.',
13 |       install_requires=['gym', 'matplotlib', 'numpy', 'scipy', 'sacred', 'pytest']
14 |       )
15 | 


--------------------------------------------------------------------------------
/wireless/utils/prop_model.py:
--------------------------------------------------------------------------------
 1 | """
 2 | © 2020 Nokia
 3 | Licensed under the BSD 3 Clause license
 4 | SPDX-License-Identifier: BSD-3-Clause
 5 | """
 6 | import numpy as np
 7 | from scipy import constants
 8 | 
 9 | 
10 | class PropModel:
11 |     """
12 |     Propagation Model class
13 |     It can be used to define more complex prop models in the future
14 |     """
15 | 
16 |     def __init__(self, f_mhz, n=2):
17 |         self.f_mhz = f_mhz
18 |         self.n = n  # Attenuation exponent
19 | 
20 |     def get_free_space_pl_db(self, d_m, shadowing_db=0):
21 |         noise = np.random.normal(scale=shadowing_db, size=d_m.size)
22 |         return self.n * 10 * np.log10(4 * constants.pi * d_m * self.f_mhz * 1E6 / constants.c) + noise
23 | 
24 |     def seed(self, seed=0):
25 |         np.random.seed(seed)
26 | 


--------------------------------------------------------------------------------
/wireless/agents/time_freq_resource_allocation_v0/round_robin_agent.py:
--------------------------------------------------------------------------------
 1 | """
 2 | © 2020 Nokia
 3 | Licensed under the BSD 3 Clause license
 4 | SPDX-License-Identifier: BSD-3-Clause
 5 | """
 6 | from wireless.agents.random_agent import RandomAgent
 7 | import numpy as np
 8 | 
 9 | 
10 | class RoundRobinAgent(RandomAgent):
11 |     def __init__(self, action_space, n_ues, buffer_max_size):
12 |         RandomAgent.__init__(self, action_space)
13 |         self.t = 0      # Current time step
14 | 
15 |         self.K = n_ues              # Number of UEs
16 |         self.L = buffer_max_size    # Maximum number of packets per UE buffer
17 | 
18 |     def act(self, state, reward, done):
19 |         action = self.t % self.K
20 |         self.t += 1
21 |         return action
22 | 
23 | 
24 | class RoundRobinIfTrafficAgent(RoundRobinAgent):
25 |     def __init__(self, action_space, n_ues, buffer_max_size):
26 |         RoundRobinAgent.__init__(self, action_space, n_ues, buffer_max_size)
27 | 
28 |     def act(self, state, reward, done):
29 |         action0 = self.t % self.K
30 | 
31 |         s = np.reshape(state[self.K:self.K*(1 + self.L)], (self.K, self.L))
32 |         buffer_size_per_ue = np.sum(s, axis=1)
33 | 
34 |         action = action0
35 |         while buffer_size_per_ue[action] == 0:
36 |             action = (action + 1) % self.K
37 |             if action == action0:
38 |                 break
39 | 
40 |         self.t += 1
41 |         return action
42 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2020, Nokia
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright notice, this
10 |    list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 |    this list of conditions and the following disclaimer in the documentation
14 |    and/or other materials provided with the distribution.
15 | 
16 | 3. Neither the name of the copyright holder nor the names of its
17 |    contributors may be used to endorse or promote products derived from
18 |    this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/wireless/agents/bosch_agent.py:
--------------------------------------------------------------------------------
 1 | from .random_agent import RandomAgent
 2 | import numpy as np
 3 | 
 4 | 
 5 | class BoschAgent(RandomAgent):
 6 |     def __init__(self, action_space, n_ues, buffer_max_size, max_pkt_size_bits):
 7 |         RandomAgent.__init__(self, action_space)
 8 |         self.t = 0      # Current time step
 9 |         self.K = n_ues              # Number of UEs
10 |         self.L = buffer_max_size    # Maximum number of packets per UE buffer
11 |         self.n = np.zeros(n_ues)    # Number of past PRB assignments for each UE
12 |         self.max_pkt_buffer = buffer_max_size*max_pkt_size_bits
13 |         
14 |         self.alpha = 2.07705283
15 |         self.beta = 7.74421717
16 |         self.gamma = 0.0507541467
17 |         self.mu = -0.00151544198
18 | 
19 |     def act(self, state, reward, done):
20 |         cqi_data = state[0:self.K]
21 |         s = np.reshape(state[self.K:self.K*(1 + self.L)], (self.K, self.L))  # Sizes in bits of packets in UEs' buffers
22 |         buffer_size_per_ue = np.sum(s, axis=1)
23 | 
24 |         e = np.reshape(state[self.K*(1 + self.L):self.K*(1 + 2*self.L)], (self.K, self.L))  # Packet ages in TTIs
25 |         o = np.max(e, axis=1)  # Age of oldest packet for each UE
26 | 
27 |         qi_ohe = np.reshape(state[self.K + 2 * self.K * self.L:5 * self.K + 2 * self.K * self.L], (self.K, 4))
28 |         qi = np.array([np.where(r == 1)[0][0] for r in qi_ohe])  # Decode One-Hot-Encoded QIs
29 | 
30 |         # Extract packet delay budget for all UEs
31 |         b = np.zeros(qi.shape)
32 |         b[qi == 3] = 100
33 |         b[qi == 2] = 150
34 |         b[qi == 1] = 30
35 |         b[qi == 0] = 300
36 | 
37 |         p_cqi = (self.alpha*cqi_data/15)
38 |         p_buffer = (self.beta*buffer_size_per_ue/self.max_pkt_buffer)
39 |         p_age = (self.gamma*o/b)
40 |         p_fairness = (self.mu * 1/(1+self.n))
41 |         
42 |         priorities = p_cqi + p_buffer + p_age + p_fairness
43 |         
44 |         action = np.argmax(priorities)
45 |         self.n[action] += 1
46 | 
47 |         self.t += 1
48 |         return action
49 | 


--------------------------------------------------------------------------------
/wireless/agents/q_learning.py:
--------------------------------------------------------------------------------
 1 | """
 2 | © 2020 Nokia
 3 | Licensed under the BSD 3 Clause license
 4 | SPDX-License-Identifier: BSD-3-Clause
 5 | """
 6 | from collections import defaultdict
 7 | import numpy as np
 8 | 
 9 | 
10 | class QLearningAgent:
11 |     def __init__(self, seed=1,
12 |                  learning_rate=1,
13 |                  discount_factor=0.995,
14 |                  exploration_rate=1.0,
15 |                  exploration_decay_rate=0.9999,
16 |                  num_actions=4):
17 |         # Episode 458 is the first episode for epsilon min
18 |         self.learning_rate = learning_rate  # alpha
19 |         self.discount_factor = discount_factor  # gamma
20 |         self.exploration_rate = exploration_rate  # epsilon
21 |         self.exploration_rate_min = 0.010
22 |         self.exploration_decay_rate = exploration_decay_rate  # d
23 |         self.seed = seed
24 |         self.num_actions = num_actions
25 |         self.q_table = defaultdict(lambda: np.zeros(self.num_actions))
26 | 
27 |     def _policy(self, state):
28 |         """
29 |         Returns the probabilities for each action.
30 |         """
31 |         action_probs = np.ones(self.num_actions, dtype=float) * self.exploration_rate / self.num_actions
32 |         best_action = np.argmax(self.q_table[state])
33 |         action_probs[best_action] += (1.0 - self.exploration_rate)
34 |         return action_probs
35 | 
36 |     def td_update(self, state, action, next_state, reward):
37 |         best_next_action = np.argmax(self.q_table[next_state])
38 |         td_target = reward + self.discount_factor * self.q_table[next_state][best_next_action]
39 |         td_delta = td_target - self.q_table[state][action]
40 |         self.q_table[state][action] += self.learning_rate * td_delta
41 | 
42 |     def exploration_rate_update(self):
43 |         self.exploration_rate *= self.exploration_decay_rate
44 |         self.exploration_rate = max(self.exploration_rate, self.exploration_rate_min)
45 | 
46 |     def act(self, state, *_):
47 |         action_probs = self._policy(state)
48 |         action = np.random.choice(np.arange(len(action_probs)), p=action_probs)
49 |         return action
50 | 


--------------------------------------------------------------------------------
/wireless/test/test_umts_olpc.py:
--------------------------------------------------------------------------------
 1 | """
 2 | © 2020 Nokia
 3 | Licensed under the BSD 3 Clause license
 4 | SPDX-License-Identifier: BSD-3-Clause
 5 | """
 6 | import pytest
 7 | import random
 8 | import gym
 9 | import numpy as np
10 | 
11 | 
12 | @pytest.fixture
13 | def env():
14 |     env = gym.make('UlOpenLoopPowerControl-v0')  # Init environment
15 |     yield env
16 | 
17 | 
18 | class TestTfraV0:
19 |     def test_reproducibility(self, env):
20 |         env.seed(seed=1234)
21 |         random.seed(1234)
22 |         states = []
23 |         rewards = []
24 |         dones = []
25 |         for t in range(64):
26 |             action = random.randint(0, 3)
27 |             state, reward, done, _ = env.step(action)
28 |             states.append(state)
29 |             rewards.append(reward)
30 |             dones.append(done)
31 | 
32 |         env2 = gym.make('UlOpenLoopPowerControl-v0')  # Init environment
33 |         env2.seed(seed=1234)
34 |         random.seed(1234)
35 |         for t in range(64):
36 |             action = random.randint(0, 3)
37 |             state, reward, done, _ = env2.step(action)
38 |             np.testing.assert_array_equal(state, states[t])
39 |             assert reward == rewards[action]
40 |             assert done == dones[action]
41 | 
42 |     def test_variability(self, env):
43 |         env.seed(seed=1234)
44 |         random.seed(1234)
45 |         states = []
46 |         rewards = []
47 |         dones = []
48 |         for t in range(64):
49 |             action = random.randint(0, 3)
50 |             state, reward, done, _ = env.step(action)
51 |             states.append(state)
52 |             rewards.append(reward)
53 |             dones.append(done)
54 | 
55 |         env2 = gym.make('UlOpenLoopPowerControl-v0')  # Init environment
56 |         env2.seed(seed=12345)
57 |         random.seed(12345)
58 |         for t in range(64):
59 |             action = random.randint(0, 3)
60 |             state, reward, done, _ = env.step(action)
61 |             if not np.array_equal(state, states[t]):
62 |                 return
63 |             if reward != rewards[t]:
64 |                 return
65 |             if done != dones[t]:
66 |                 return
67 | 
68 |         pytest.fail("Different seeds produced the same results.")
69 | 


--------------------------------------------------------------------------------
/wireless/test/test_tfrav0.py:
--------------------------------------------------------------------------------
  1 | """
  2 | © 2020 Nokia
  3 | Licensed under the BSD 3 Clause license
  4 | SPDX-License-Identifier: BSD-3-Clause
  5 | """
  6 | import pytest
  7 | import random
  8 | import gym
  9 | import numpy as np
 10 | 
 11 | 
 12 | @pytest.fixture
 13 | def env():
 14 |     env = gym.make('TimeFreqResourceAllocation-v0')  # Init environment
 15 |     yield env
 16 | 
 17 | 
 18 | @pytest.fixture
 19 | def env64():
 20 |     env = gym.make('TimeFreqResourceAllocation-v0', n_ues=64)  # Init environment
 21 |     yield env
 22 | 
 23 | 
 24 | class TestTfraV0:
 25 |     def test_reproducibility(self, env64):
 26 |         env64.seed(seed=1234)
 27 |         states = []
 28 |         rewards = []
 29 |         dones = []
 30 |         for action in list(range(64)):
 31 |             state, reward, done, _ = env64.step(action)
 32 |             states.append(state)
 33 |             rewards.append(reward)
 34 |             dones.append(done)
 35 | 
 36 |         env = gym.make('TimeFreqResourceAllocation-v0', n_ues=64)  # Init environment
 37 |         env.seed(seed=1234)
 38 |         for action in list(range(64)):
 39 |             state, reward, done, _ = env.step(action)
 40 |             np.testing.assert_array_equal(state, states[action])
 41 |             assert reward == rewards[action]
 42 |             assert done == dones[action]
 43 | 
 44 |     def test_variability(self, env64):
 45 |         env64.seed(seed=1234)
 46 |         states = []
 47 |         rewards = []
 48 |         dones = []
 49 |         for action in list(range(64)):
 50 |             state, reward, done, _ = env64.step(action)
 51 |             states.append(state)
 52 |             rewards.append(reward)
 53 |             dones.append(done)
 54 | 
 55 |         env = gym.make('TimeFreqResourceAllocation-v0', n_ues=64)  # Init environment
 56 |         env.seed(seed=12345)
 57 |         for action in list(range(64)):
 58 |             state, reward, done, _ = env.step(action)
 59 |             if not np.array_equal(state, states[action]):
 60 |                 return
 61 |             if reward != rewards[action]:
 62 |                 return
 63 |             if done != dones[action]:
 64 |                 return
 65 | 
 66 |         pytest.fail("Different seeds produced the same results.")
 67 | 
 68 |     def test_state_features(self):
 69 |         n_ues = 64
 70 |         n_steps = 512
 71 |         env = gym.make('TimeFreqResourceAllocation-v0', n_ues=n_ues, eirp_dbm=7)  # Low power to have some CQI=0
 72 |         env.seed(seed=1234)
 73 | 
 74 |         state, _, _, _ = env.step(0)  # Get state to measure its length
 75 |         states = np.zeros((n_steps, len(state)), dtype=np.uint32)  # Memory pre-allocation
 76 |         for t in range(n_steps):
 77 |             action = random.randint(0, n_ues-1)
 78 |             state, _, _, _ = env.step(action)
 79 |             states[t, :] = state
 80 | 
 81 |         # Check CQI range
 82 |         assert states[:, :n_ues].min() == 0
 83 |         assert states[:, :n_ues].max() == 15
 84 |         assert 0 < states[:, :n_ues].mean() < 15
 85 |         assert states[:, :n_ues].std() > 1
 86 | 
 87 |         # Check size (in bits) of packets in UEs' buffers
 88 |         assert states[:, n_ues:n_ues + n_ues * env.L].min() == 0
 89 |         assert states[:, n_ues:n_ues + n_ues * env.L].max() >= 41250
 90 |         assert states[:, n_ues:n_ues + n_ues * env.L].mean() > 100
 91 | 
 92 |         # Check age (in ms) of packets in UEs' buffers
 93 |         assert states[:, n_ues + n_ues * env.L:n_ues + 2*n_ues * env.L].min() == 0
 94 |         assert states[:, n_ues + n_ues * env.L:n_ues + 2 * n_ues * env.L].max() > 10  # Less n_prbs  yield higher ages
 95 | 
 96 |         # TODO: Maybe also check QI
 97 | 
 98 |         # Check PRB counter
 99 |         assert states[:, -1].min() == 0
100 |         assert states[:, -1].max() == env.Nf-1
101 | 


--------------------------------------------------------------------------------
/wireless/scripts/launch_q_learn_umts_olpc.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | import json
  3 | import numpy as np
  4 | import matplotlib.pyplot as plt
  5 | from collections import defaultdict
  6 | from sacred import Experiment
  7 | 
  8 | from wireless.agents.q_learning import QLearningAgent
  9 | 
 10 | 
 11 | num_episodes = 512
 12 | max_steps_per_episode = 512
 13 | snr_tgt_db = 4
 14 | 
 15 | # Memory allocation
 16 | episode_rewards = np.zeros(num_episodes)
 17 | epsilon = np.zeros(num_episodes)          # To store exploration level
 18 | snr_in_some_episodes = defaultdict(lambda: np.zeros(max_steps_per_episode))  # To store Power Control dynamics
 19 | episodes_to_save = np.linspace(0, num_episodes, num=5, dtype=int)
 20 | 
 21 | 
 22 | def run_episode(e, env, agent, save_snr=False):
 23 |     state = env.reset()
 24 | 
 25 |     s = 0  # Step count
 26 |     while True:
 27 |         # Take a step
 28 |         action = agent.act(state)
 29 |         next_state, reward, done, _ = env.step(action)
 30 | 
 31 |         # Collect progress
 32 |         if save_snr:
 33 |             snr_in_some_episodes[e][s] = state
 34 |         episode_rewards[e] += reward
 35 |         agent.td_update(state, action, next_state, reward)
 36 |         agent.exploration_rate_update()
 37 |         s += 1
 38 |         if done:
 39 |             break
 40 |         state = next_state
 41 | 
 42 | 
 43 | def run_n_episodes(num_episodes, env, agent, seed=0, log_progress=True):
 44 |     log_period = round(num_episodes / 10)
 45 | 
 46 |     for e in range(num_episodes):
 47 |         if log_progress and e % log_period == 0:
 48 |             print(f"\rEpisode {e}/{num_episodes}.")
 49 | 
 50 |         env.seed(seed=seed + e)
 51 |         epsilon[e] = agent.exploration_rate
 52 |         run_episode(e, env, agent, save_snr=e in episodes_to_save)
 53 | 
 54 | 
 55 | # Load agent parameters
 56 | with open('../../config/config_agent.json') as f:
 57 |     ac = json.load(f)
 58 | 
 59 | # Configure experiment
 60 | with open('../../config/config_sacred.json') as f:
 61 |     sc = json.load(f)  # Sacred Configuration
 62 |     ns = sc["sacred"]["n_metrics_points"]  # Number of points per episode to log in Sacred
 63 |     ex = Experiment(ac["agent"]["agent_type"], save_git_info=False)
 64 |     ex.add_config(sc)
 65 |     ex.add_config(ac)
 66 | mongo_db_url = f'mongodb://{sc["sacred"]["sacred_user"]}:{sc["sacred"]["sacred_pwd"]}@' + \
 67 |                f'{sc["sacred"]["sacred_host"]}:{sc["sacred"]["sacred_port"]}/{sc["sacred"]["sacred_db"]}'
 68 | # ex.observers.append(MongoObserver(url=mongo_db_url, db_name=sc["sacred"]["sacred_db"]))  # Uncomment to save to DB
 69 | 
 70 | # Load environment parameters
 71 | with open('../../config/config_environment.json') as f:
 72 |     ec = json.load(f)
 73 |     ex.add_config(ec)
 74 | 
 75 | 
 76 | @ex.automain
 77 | def main(_run):
 78 |     env = gym.make('UlOpenLoopPowerControl-v0', f_carrier_mhz=_run.config['env']['f_carrier_mhz'],
 79 |                    t_max=max_steps_per_episode)  # Init environment
 80 | 
 81 |     agent = QLearningAgent(seed=_run.config['seed'], num_actions=env.action_space.n)
 82 | 
 83 |     run_n_episodes(num_episodes, env, agent, _run.config['seed'])
 84 | 
 85 |     # Plot results
 86 |     plt.figure()
 87 |     fig, ax1 = plt.subplots()
 88 |     ax2 = ax1.twinx()
 89 |     ax1.plot(range(num_episodes), episode_rewards, 'g-')
 90 |     ax2.plot(range(num_episodes), epsilon, 'b-')
 91 | 
 92 |     ax1.set_xlabel('Episodes')
 93 |     ax1.set_ylabel('Episode reward', color='g')
 94 |     ax2.set_ylabel('Exploration rate', color='b')
 95 |     plt.grid(True)
 96 | 
 97 |     plt.figure()
 98 |     for e, snr in snr_in_some_episodes.items():
 99 |         plt.plot(snr, label=f'Episode {e}')
100 |     plt.grid(True)
101 |     plt.xlabel('Steps')
102 |     plt.ylabel('SNR')
103 |     plt.legend(loc='upper right')
104 | 
105 |     plt.show()
106 | 


--------------------------------------------------------------------------------
/wireless/agents/time_freq_resource_allocation_v0/proportional_fair.py:
--------------------------------------------------------------------------------
  1 | """
  2 | © 2020 Nokia
  3 | Licensed under the BSD 3 Clause license
  4 | SPDX-License-Identifier: BSD-3-Clause
  5 | """
  6 | from wireless.agents.random_agent import RandomAgent
  7 | import numpy as np
  8 | 
  9 | 
 10 | class ProportionalFairAgent(RandomAgent):
 11 |     def __init__(self, action_space, n_ues, buffer_max_size):
 12 |         RandomAgent.__init__(self, action_space)
 13 |         self.t = 0      # Current time step
 14 | 
 15 |         self.K = n_ues              # Number of UEs
 16 |         self.L = buffer_max_size    # Maximum number of packets per UE buffer
 17 |         self.n = np.zeros(n_ues)    # Number of past PRB assignments for each UE
 18 | 
 19 |     def _calculate_priorities(self, cqi, o, b, buffer_size_per_ue):
 20 |         priorities = (1 + o) / b * buffer_size_per_ue / (1 + self.n)
 21 |         return priorities
 22 | 
 23 |     @staticmethod
 24 |     def parse_state(state, num_ues, max_pkts):
 25 |         s = np.reshape(state[num_ues:num_ues * (1 + max_pkts)], (num_ues, max_pkts))  # Sizes in bits of packets in UEs' buffers
 26 |         buffer_size_per_ue = np.sum(s, axis=1)
 27 | 
 28 |         e = np.reshape(state[num_ues * (1 + max_pkts):num_ues * (1 + 2 * max_pkts)], (num_ues, max_pkts))  # Packet ages in TTIs
 29 |         o = np.max(e, axis=1)  # Age of oldest packet for each UE
 30 | 
 31 |         cqi = state[0:num_ues]
 32 | 
 33 |         qi_ohe = np.reshape(state[num_ues + 2 * num_ues * max_pkts:5 * num_ues + 2 * num_ues * max_pkts], (num_ues, 4))
 34 |         qi = np.array([np.where(r == 1)[0][0] for r in qi_ohe])  # Decode One-Hot-Encoded QIs
 35 | 
 36 |         # Extract packet delay budget for all UEs
 37 |         b = np.zeros(qi.shape)
 38 |         b[qi == 3] = 100
 39 |         b[qi == 2] = 150
 40 |         b[qi == 1] = 30
 41 |         b[qi == 0] = 300
 42 | 
 43 |         return o, cqi, b, buffer_size_per_ue
 44 | 
 45 |     def act(self, state, reward, done):
 46 |         o, cqi, b, buffer_size_per_ue = self.parse_state(state, self.K, self.L)
 47 | 
 48 |         priorities = self._calculate_priorities(cqi, o, b, buffer_size_per_ue)
 49 | 
 50 |         action = np.argmax(priorities)
 51 |         self.n[action] += 1
 52 | 
 53 |         self.t += 1
 54 |         return action
 55 | 
 56 | 
 57 | class ProportionalFairChannelAwareAgent(ProportionalFairAgent):
 58 |     CQI2SE = [0.1523, 0.2344, 0.3770, 0.6016, 0.8770, 1.1758, 1.4766, 1.9141, 2.4063, 2.7305, 3.3223, 3.9023, 4.5234,
 59 |               5.1152, 5.5547, 9.6]
 60 | 
 61 |     def __init__(self, action_space, n_ues, buffer_max_size):
 62 |         super().__init__(action_space, n_ues, buffer_max_size)
 63 | 
 64 |     def _calculate_priorities(self, cqi, o, b, buffer_size_per_ue):
 65 |         se = np.zeros(shape=(self.K,))
 66 |         for i in range(16):
 67 |             se[cqi == i] = self.CQI2SE[i]
 68 |         priorities = (1 + o) / b * buffer_size_per_ue * se
 69 |         return priorities
 70 | 
 71 | 
 72 | class Knapsackagent(ProportionalFairAgent):
 73 |     def __init__(self, action_space, n_ues, buffer_max_size, nprb):
 74 |         super().__init__(action_space, n_ues, buffer_max_size)
 75 |         self.r = None
 76 |         self.Nf = nprb
 77 |         self.window = self.Nf * 15
 78 | 
 79 |     def _calculate_priorities(self, cqi, o, b, buffer_size_per_ue):
 80 |         # Normalized values
 81 |         k_cqi = (cqi / 15)
 82 |         k_buffer = (buffer_size_per_ue / (self.r + 1))
 83 |         k_age = (o / b)
 84 |         k_fairness = (1 / (1 + self.n))
 85 |         # tanh as ranking function for values
 86 |         priorities = 1 * np.tanh(k_cqi) + 1 * np.tanh(k_buffer) + 1 * np.tanh(k_age) + 1 * np.tanh(k_fairness)
 87 |         return priorities
 88 | 
 89 |     def act(self, state, reward, done):
 90 |         # reset the self.r
 91 |         if self.t % self.window == 0:
 92 |             self.r = np.zeros(shape=(self.K,), dtype=np.float32)
 93 | 
 94 |         o, cqi, b, buffer_size_per_ue = self.parse_state(state, self.K, self.L)
 95 | 
 96 |         priorities = self._calculate_priorities(cqi, o, b, buffer_size_per_ue)
 97 | 
 98 |         self.buffer_size_moving_average(state)
 99 | 
100 |         action = np.argmax(priorities)
101 |         self.n[action] += 1
102 | 
103 |         self.t += 1
104 |         return action
105 | 
106 |     def buffer_size_moving_average(self, state):
107 |         s = np.reshape(state[self.K:self.K * (1 + self.L)], (self.K, self.L))  # Size in bits of packets in UEs' buffers
108 |         buffer_size_per_ue = np.sum(s, axis=1)
109 |         # Moving Average of buffer sizes
110 |         if self.t % self.Nf == 0 and self.t != 0:
111 |             self.r = (1 - self.Nf / self.window) * self.r + buffer_size_per_ue * self.Nf / self.window
112 | 


--------------------------------------------------------------------------------
/wireless/envs/umts_olpc.py:
--------------------------------------------------------------------------------
  1 | """
  2 | © 2020 Nokia
  3 | Licensed under the BSD 3 Clause license
  4 | SPDX-License-Identifier: BSD-3-Clause
  5 | """
  6 | import math
  7 | import random
  8 | from gym import spaces, Env
  9 | import numpy as np
 10 | from numpy import linalg as la
 11 | from scipy import constants
 12 | 
 13 | from ..utils.misc import calculate_thermal_noise
 14 | from ..utils.prop_model import PropModel
 15 | 
 16 | 
 17 | class UlOpenLoopPowerControl(Env):
 18 |     BTS_POS = [0, 0]  # Base Transceiver Station position
 19 |     P0_TX_UE_DBM = +3  # Initial uplink transmit power of User Equipment (UE).
 20 |     UE_V = 2  # UE speed in m/s
 21 |     DT_MS = 20  # Time equivalence of one step
 22 |     SNR_MIN = -20  # Minimum measurable SNR value in dB
 23 |     SNR_MAX = 20  # Maximum measurable SNR value in dB
 24 | 
 25 |     def __init__(self, x_max_m=10, y_max_m=10, f_carrier_mhz=2655, bw_mhz=10, snr_tgt_db=4, t_max=512, n=3):
 26 |         """
 27 |          This environment implements a free-space scenario with a BTS at coordinates
 28 |          [0, 0] and one UE at a random location. Each step the UE moves
 29 |          linearly in a random direction with constant speed 2 m/s .
 30 |          The agent interacting with the environment is the BTS.
 31 |          On each time step the agent must select one of four possible Power
 32 |          Control (PC) commands to increase/decrease the UL transmit power. The
 33 |          objective of this power control is to measure an UL SNR as close as
 34 |          possible to the SNR target (4 dB by default). The PC commands (i.e.
 35 |          action space) are:
 36 |             Action 0 --> -1 dB
 37 |             Action 1 -->  0 dB
 38 |             Action 2 --> +1 dB
 39 |             Action 3 --> +3 dB
 40 | 
 41 |          As output of each step, the environment returns the following to the
 42 |          invoking agent:
 43 |             State:  Current UL SNR (single integer value between -20 and +20 with 1 dB step resolution)
 44 |             Reward:  0  if |SNR-SNR_target| <= 1 dB
 45 |                     -1  otherwise
 46 |         """
 47 |         self._seed = None
 48 |         self.x_max_m = x_max_m  # Width of 2D scenario
 49 |         self.y_max_m = y_max_m  # Height of 2D scenario
 50 |         self.bts_pos = [0, 0]
 51 |         self.f_carrier_mhz = f_carrier_mhz
 52 |         self.bw_mhz = bw_mhz
 53 |         self.snr_tgt_db = snr_tgt_db
 54 |         self.t_max = t_max
 55 |         self.propagation_model = PropModel(self.f_carrier_mhz, n=n)
 56 |         self.ue_pos = None          # To be initialized in reset
 57 |         self.v_x = None             # To be initialized in reset
 58 |         self.v_y = None             # To be initialized in reset
 59 |         self.p_tx_ue_dbm = None     # To be initialized in reset
 60 |         self.step_count = None      # To be initialized in reset
 61 |         self.state = None           # To be initialized in reset
 62 | 
 63 |         self.observation_space = spaces.Box(np.array([-10, -10]), np.array([+10, +10]), dtype=np.uint8)
 64 |         self.action_space = spaces.Discrete(4)
 65 | 
 66 |         self.seed()
 67 |         self.reset()
 68 | 
 69 |     def seed(self, seed=0):
 70 |         random.seed(seed)
 71 |         np.random.seed(seed)
 72 |         self.propagation_model.seed(seed=seed)
 73 |         self._seed = seed
 74 | 
 75 |     def _calculate_ul_snr(self):
 76 |         ue_bts_distance_m = la.norm(self.ue_pos - self.bts_pos)
 77 |         loss_db = self.propagation_model.get_free_space_pl_db(ue_bts_distance_m)[0]
 78 |         p_rx_dbm = self.p_tx_ue_dbm - loss_db
 79 |         n_mw = calculate_thermal_noise(self.bw_mhz)
 80 |         snr_db = p_rx_dbm - 10 * np.log10(n_mw)
 81 |         snr_db = round(snr_db)
 82 |         return max(min(snr_db, self.SNR_MAX), self.SNR_MIN)
 83 | 
 84 |     def render(self, mode='human'):
 85 |         pass
 86 | 
 87 |     def reset(self):
 88 |         self.ue_pos = np.random.rand(2) * np.array([self.x_max_m, self.y_max_m])
 89 | 
 90 |         theta = random.random() * 2 * constants.pi  # Random direction
 91 |         self.v_x = math.cos(theta) * self.UE_V
 92 |         self.v_y = math.sin(theta) * self.UE_V
 93 | 
 94 |         self.p_tx_ue_dbm = self.P0_TX_UE_DBM
 95 |         self.step_count = 0
 96 |         self.state = self._calculate_ul_snr()
 97 |         return self.state
 98 | 
 99 |     def _update_tx_pwr(self, action):
100 |         if action == 0:
101 |             self.p_tx_ue_dbm -= 1
102 |         elif action == 2:
103 |             self.p_tx_ue_dbm += 1
104 |         elif action == 3:
105 |             self.p_tx_ue_dbm += 3
106 | 
107 |     def step(self, action):
108 |         assert self.action_space.contains(action)
109 |         self.ue_pos += np.array([self.v_x, self.v_y]) * self.DT_MS * 1E-3  # Move UE
110 |         self._update_tx_pwr(action)
111 |         self.step_count += 1
112 |         snr = self._calculate_ul_snr()
113 |         self.state = snr  # Update state
114 |         reward = 0 if np.abs(snr - self.snr_tgt_db) <= 1 else -1
115 |         done = True if self.step_count >= self.t_max else False
116 | 
117 |         return self.state, reward, done, {}
118 | 


--------------------------------------------------------------------------------
/wireless/agents/noma_ul_time_freq_resource_allocation_v0/noma_ul_proportional_fair.py:
--------------------------------------------------------------------------------
 1 | """
 2 | © 2020 Nokia
 3 | Licensed under the BSD 3 Clause license
 4 | SPDX-License-Identifier: BSD-3-Clause
 5 | """
 6 | from wireless.agents.random_agent import RandomAgent
 7 | from wireless.agents.time_freq_resource_allocation_v0.proportional_fair import ProportionalFairAgent
 8 | import itertools
 9 | import heapq 
10 | import numpy as np
11 | 
12 | 
13 | class NomaULProportionalFairChannelAwareAgent(RandomAgent):
14 |     CQI2SE = [0.1523, 0.2344, 0.3770, 0.6016, 0.8770, 1.1758, 1.4766, 1.9141, 2.4063, 2.7305, 3.3223, 3.9023, 4.5234,
15 |               5.1152, 5.5547, 9.6]
16 | 
17 |     def __init__(self, action_space, n_ues, n_ues_per_prb, buffer_max_size, n_mw, sinr_coeff):
18 |         RandomAgent.__init__(self, action_space)
19 |         self.t = 0      # Current time step
20 | 
21 |         self.K = n_ues              # Number of UEs
22 |         self.L = buffer_max_size    # Maximum number of packets per UE buffer
23 |         self.M = n_ues_per_prb      # Maximum number of users multiplexed on a PRB 
24 |         self.n = np.zeros(n_ues)    # Number of past PRB assignments for each UE
25 |         self.n_mw = n_mw            # Thermal noise in mW
26 |         self.sinr_coeff = sinr_coeff  # Rho coefficient to map SINR to spectral efficient.
27 |         
28 |         interference_dbm = -105  # Constant interference level throughout the coverage area
29 |         self._interference_mw = 10 ** (interference_dbm / 10)
30 |         self._cqi2rx_pwr_mw = (np.power(2, self.CQI2SE)-1) * (self.n_mw+self._interference_mw) * self.sinr_coeff
31 |         
32 |         # All possible allocations: all permutations of self.K our of self.M UEs
33 |         self._permutations = list(itertools.permutations(range(self.K), self.M))
34 |         # WSR of each permutation stored as a heapq
35 |         # It is re-computed entirely when p == 0, and updated lazily at each other step
36 |         self._permutations_wsr = None
37 | 
38 |     def _calculate_wsr(self, perm_idx, rx_pwr_mw, w):
39 |         permutation = self._permutations[perm_idx]
40 |         cumulated_rx_pwr_mw = 0
41 |         wsr = 0
42 |         for pos in range(self.M - 1, -1, -1):
43 |             # ue decoded in pos-th order
44 |             ue = permutation[pos]
45 |             # SINR taking into account the interference from other UEs superposed on the same PRB
46 |             sinr = rx_pwr_mw[ue] / (self.n_mw + self._interference_mw + cumulated_rx_pwr_mw)
47 |             wsr += w[ue] * np.log2(1 + sinr / self.sinr_coeff)  # DL spectral efficiency in bps/Hz
48 |             # Store the current UE rx_pwr_mw as interference
49 |             cumulated_rx_pwr_mw += rx_pwr_mw[ue]
50 |         return wsr
51 | 
52 |     def act(self, state, reward, done):
53 |         o, cqi, b, buffer_size_per_ue = ProportionalFairAgent.parse_state(state, self.K, self.L)
54 | 
55 |         p = state[-1]
56 | 
57 |         w = (1+o)/b * buffer_size_per_ue        # Weight of each UE in the PF scheduler
58 |         rx_pwr_mw = np.zeros(shape=(self.K,))   # Receive power of each UE
59 |         for i in range(16):
60 |             rx_pwr_mw[cqi == i] = self._cqi2rx_pwr_mw[i]
61 | 
62 |         # Weighted sum-rate maximization considering w and rx_pwr_mw:
63 |         # Find the M UEs out of K that maximize sum w[i]*se[i]      
64 |         if p == 0 or self._permutations_wsr is None:  # Re-compute entirely self._permutations_wsr
65 |             self._permutations_wsr = []
66 |             heapq.heapify(self._permutations_wsr)
67 |             for index in range(len(self._permutations)):
68 |                 wsr = self._calculate_wsr(index, rx_pwr_mw, w)
69 |                 heapq.heappush(self._permutations_wsr, (-wsr, index))
70 |             
71 |             max_wsr, max_wsr_index = heapq.heappop(self._permutations_wsr)
72 |             heapq.heappush(self._permutations_wsr, (max_wsr, max_wsr_index))
73 |         # When p!=0, perform lazy update since:
74 |         # 1) The weights w has only changed (decreased) for up to self.M UEs
75 |         # 2) In addition, the spectral efficiency have not changed 
76 |         else:   
77 |             while True:
78 |                 old_wsr, max_wsr_index = heapq.heappop(self._permutations_wsr)
79 |                 old_wsr = -old_wsr
80 |                 new_wsr = self._calculate_wsr(max_wsr_index, rx_pwr_mw, w)  # Compute the new WSR
81 |                 assert old_wsr >= new_wsr, "The WSR should only decrease in the lazy updates"
82 |                 if old_wsr == new_wsr: 
83 |                     # This WSR has not changed -> it is still the highest value
84 |                     # Push it back in the heapq and terminate the while loop
85 |                     heapq.heappush(self._permutations_wsr, (-old_wsr, max_wsr_index))
86 |                     break
87 |                 else: 
88 |                     # Otherwise, we update its WSR info and push it in the heapq
89 |                     # The while loop continues
90 |                     heapq.heappush(self._permutations_wsr, (-new_wsr, max_wsr_index))
91 |         
92 |         action = list(self._permutations[max_wsr_index])
93 |         self.n[action] += 1
94 |         self.t += 1
95 |         return action
96 | 


--------------------------------------------------------------------------------
/wireless/envs/noma_ul_time_freq_resource_allocation_v0.py:
--------------------------------------------------------------------------------
 1 | """
 2 | © 2020 Nokia
 3 | Licensed under the BSD 3 Clause license
 4 | SPDX-License-Identifier: BSD-3-Clause
 5 | """
 6 | 
 7 | from .time_freq_resource_allocation_v0 import *
 8 | 
 9 | 
10 | class NomaULTimeFreqResourceAllocationV0(TimeFreqResourceAllocationV0):
11 | 
12 |     def __init__(self, n_ues=32, n_prbs=25, n_ues_per_prb=2, buffer_max_size=32, eirp_dbm=13, f_carrier_mhz=2655,
13 |                  max_pkt_size_bits=41250, it=10, t_max=65536):
14 |         super().__init__(n_ues, n_prbs, buffer_max_size, eirp_dbm, f_carrier_mhz, max_pkt_size_bits, it, t_max)
15 | 
16 |         self.M = n_ues_per_prb # Maximum number of users multiplexed on a PRB 
17 |         self.action_space = spaces.MultiDiscrete([self.K+1]*self.M)
18 | 
19 |     def reset(self):
20 |         self.rx_pwr_mw = np.zeros(shape=(self.K,))  # Received powers at the current time step
21 | 
22 |         return super().reset()
23 | 
24 |     def step(self, action):
25 |         assert self.action_space.contains(action), f"{action} ({type(action)}) invalid"
26 | 
27 |         # Execute action from the last to the first decoded UE
28 |         cumulated_rx_pwr_mw = 0
29 |         # Convert action to a numpy array in case it is a list
30 |         action = np.array(action)
31 |         # Only keep the unique values (UEs) in action
32 |         _, indices = np.unique(action, return_index=True)
33 |         action_tmp = np.ones(self.M,dtype=np.uint32)*self.K
34 |         action_tmp[indices] = action[indices]
35 |         action = action_tmp
36 |         for dim in range(self.M-1, -1, -1):
37 |             ue_action = action[dim]
38 |             # First check if ue_action is not NOOP (no UE selected at this order)
39 |             # NOOP is defined as := self.K, while the UE are 0, ... ,self.K-1
40 |             if ue_action != self.K:
41 |                 if np.sum(self.s[ue_action, :]) > 0:  # If packets exist in UE's buffer
42 |                     # Find oldest packet in UE's buffer
43 |                     mask = (self.s[ue_action, :] > 0)
44 |                     subset_idx = np.argmax(self.e[ue_action, mask])
45 |                     l_old = np.arange(self.L)[mask][subset_idx]
46 |         
47 |                     assert self.s[ue_action, l_old] > 0, f"t={self.t}. Oldest packet has size {self.s[ue_action, l_old]} " +\
48 |                                                       f"and age {self.e[ue_action, l_old]}. " +\
49 |                                                       f"User has {np.sum(self.s[ue_action, :])} bits in buffer."  # Sanity check
50 |                     
51 |                     interference_dbm = -105  # Constant interference level throughout the coverage area
52 |                     interference_mw = 10 ** (interference_dbm / 10)
53 |                     sinr = self.rx_pwr_mw[ue_action] / (self.n_mw + interference_mw + cumulated_rx_pwr_mw) # SINR taking into account the intereference from other UEs superposed on the same PRB
54 |                     se = np.log2(1 + sinr / self.SINR_COEFF)  # DL spectral efficiency in bps/Hz
55 |                     se = np.clip(se, 0, 9.6)  # Define an upper bound for the spectral efficiency.
56 |                     tx_data_bits = floor(se * self.bw_mhz / self.Nf * 1E3)  # Bits that can be transmitted
57 |                     # Store the current UE rx_pwr_mw as interference
58 |                     cumulated_rx_pwr_mw += self.rx_pwr_mw[ue_action]
59 |                     while tx_data_bits > 0 and self.s[ue_action, l_old] > 0:  # While there are packets & available capacity
60 |                         if tx_data_bits >= self.s[ue_action, l_old]:  # Full packet transmission
61 |                             tx_data_bits -= self.s[ue_action, l_old]
62 |                             self.s[ue_action, l_old] = 0
63 |                             self.e[ue_action, l_old] = 0
64 |                             l_old = np.argmax(self.e[ue_action, :])  # Find oldest packet in UE's buffer
65 |                         else:  # Partial packet transmission
66 |                             self.s[ue_action, l_old] -= tx_data_bits
67 |                             break
68 | 
69 |         reward = 0
70 |         self.t += 1  # Update time-step
71 |         self.p = self.t % self.Nf  # Update PRB counter
72 |         if self.p == 0:
73 |             reward = self._calculate_reward()
74 |             self.tti += 1  # Update TTI counter
75 |             self.e[self.s > 0] += 1  # Age buffer packets
76 |             self._generate_traffic()
77 |             self._move_ues()
78 |             self._recalculate_rf()
79 | 
80 |         self._update_state()
81 |         done = bool(self.t >= self.t_max)
82 |         return np.array(self.state), reward, done, {}
83 | 
84 |     def _calculate_spectral_efficiency(self, rx_pwr_dbm):
85 |         interference_dbm = -105  # Constant interference level throughout the coverage area
86 | 
87 |         p_mw = (10 ** (rx_pwr_dbm / 10))  # Rx power in mw
88 |         self.rx_pwr_mw = p_mw
89 |         interference_mw = 10 ** (interference_dbm / 10)
90 | 
91 |         sinr = p_mw / (self.n_mw + interference_mw)
92 |         se = np.log2(1 + sinr / self.SINR_COEFF)  # DL spectral efficiency in bps/Hz
93 | 
94 |         self.spectral_efficiency = np.clip(se, 0, 9.6)  # Define an upper bound for the spectral efficiency.


--------------------------------------------------------------------------------
/wireless/test/test_noma_ul_tfrav0.py:
--------------------------------------------------------------------------------
  1 | """
  2 | © 2020 Nokia
  3 | Licensed under the BSD 3 Clause license
  4 | SPDX-License-Identifier: BSD-3-Clause
  5 | """
  6 | import pytest
  7 | import gym
  8 | import numpy as np
  9 | 
 10 | 
 11 | @pytest.fixture(params=[2, 3])
 12 | def env(request):
 13 |     env = gym.make('NomaULTimeFreqResourceAllocation-v0', n_ues_per_prb=request.param)  # Init environment
 14 |     yield env
 15 | 
 16 | 
 17 | @pytest.fixture(params=[2, 3])
 18 | def env64(request):
 19 |     env = gym.make('NomaULTimeFreqResourceAllocation-v0', n_ues=64, n_ues_per_prb=request.param)  # Init environment
 20 |     yield env
 21 | 
 22 | 
 23 | class TestNomaULTfraV0:
 24 |     def test_reproducibility(self, env64):
 25 |         M = env64.M
 26 |         np.random.seed(1234)
 27 |         actions = np.random.randint(0,64,size=(100,M))
 28 |         env64.seed(seed=1234)
 29 |         states = []
 30 |         rewards = []
 31 |         dones = []
 32 |         for action in actions:
 33 |             state, reward, done, _ = env64.step(action)
 34 |             states.append(state)
 35 |             rewards.append(reward)
 36 |             dones.append(done)
 37 | 
 38 |         env = gym.make('NomaULTimeFreqResourceAllocation-v0', n_ues=64, n_ues_per_prb=M)  # Init environment
 39 |         env.seed(seed=1234)
 40 |         pt = 0
 41 |         for action in actions:
 42 |             state, reward, done, _ = env.step(action)
 43 |             np.testing.assert_array_equal(state, states[pt])
 44 |             assert reward == rewards[pt]
 45 |             assert done == dones[pt]
 46 |             pt += 1
 47 | 
 48 |     def test_variability(self, env64):
 49 |         M = env64.M
 50 |         np.random.seed(1234)
 51 |         actions = np.random.randint(0,64,size=(100,M))
 52 |         env64.seed(seed=1234)
 53 |         states = []
 54 |         rewards = []
 55 |         dones = []
 56 |         for action in actions:
 57 |             state, reward, done, _ = env64.step(action)
 58 |             states.append(state)
 59 |             rewards.append(reward)
 60 |             dones.append(done)
 61 | 
 62 |         env = gym.make('NomaULTimeFreqResourceAllocation-v0', n_ues=64, n_ues_per_prb=M)  # Init environment
 63 |         env.seed(seed=12345)
 64 |         pt = 0
 65 |         for action in actions:
 66 |             state, reward, done, _ = env.step(action)
 67 |             if not np.array_equal(state, states[pt]):
 68 |                 return
 69 |             if reward != rewards[pt]:
 70 |                 return
 71 |             if done != dones[pt]:
 72 |                 return
 73 |             pt += 1
 74 | 
 75 |         pytest.fail("Different seeds produced the same results.")
 76 | 
 77 |     def test_empty_action(self, env, env64):
 78 |         env.step([32]*env.M)
 79 |         env64.step([64]*env64.M)
 80 | 
 81 |     def test_state_features(self):
 82 |         n_ues = 64
 83 |         n_steps = 512
 84 |         M = 2
 85 |         env = gym.make('NomaULTimeFreqResourceAllocation-v0', n_ues=n_ues, eirp_dbm=7)  # Low power to have some CQI=0
 86 |         env.seed(seed=1234)
 87 | 
 88 |         state, _, _, _ = env.step([0]*M)  # Get state to measure its length
 89 |         states = np.zeros((n_steps, len(state)), dtype=np.uint32)  # Memory pre-allocation
 90 |         for t in range(n_steps):
 91 |             action = np.random.randint(0,n_ues,size=M)
 92 |             state, _, _, _ = env.step(action)
 93 |             states[t, :] = state
 94 | 
 95 |         # Check CQI range
 96 |         assert states[:, :n_ues].min() == 0
 97 |         assert states[:, :n_ues].max() == 15
 98 |         assert 0 < states[:, :n_ues].mean() < 15
 99 |         assert states[:, :n_ues].std() > 1
100 | 
101 |         # Check size (in bits) of packets in UEs' buffers
102 |         assert states[:, n_ues:n_ues + n_ues * env.L].min() == 0
103 |         assert states[:, n_ues:n_ues + n_ues * env.L].max() >= 41250
104 |         assert states[:, n_ues:n_ues + n_ues * env.L].mean() > 100
105 | 
106 |         # Check age (in ms) of packets in UEs' buffers
107 |         assert states[:, n_ues + n_ues * env.L:n_ues + 2*n_ues * env.L].min() == 0
108 |         assert states[:, n_ues + n_ues * env.L:n_ues + 2 * n_ues * env.L].max() > 10  # Less n_prbs  yield higher ages
109 | 
110 |         # TODO: Maybe also check QI
111 | 
112 |         # Check PRB counter
113 |         assert states[:, -1].min() == 0
114 |         assert states[:, -1].max() == env.Nf-1
115 | 
116 |     def test_consistency_superclass(self, env64):
117 |         M = env64.M
118 |         np.random.seed(1234)
119 |         ofdm_actions = np.random.randint(0,64,size=100)
120 |         env64.seed(seed=1234)
121 |         states = []
122 |         rewards = []
123 |         dones = []
124 |         for action in ofdm_actions:
125 |             noma_action = [action]+[64]*(M-1)
126 |             state, reward, done, _ = env64.step(noma_action)
127 |             states.append(state)
128 |             rewards.append(reward)
129 |             dones.append(done)
130 | 
131 |         env = gym.make('TimeFreqResourceAllocation-v0', n_ues=64)  # Init TimeFreqResourceAllocation-V0 environment
132 |         env.seed(seed=1234)
133 |         pt = 0
134 |         for action in ofdm_actions:
135 |             state, reward, done, _ = env.step(action)
136 |             np.testing.assert_array_equal(state, states[pt])
137 |             assert reward == rewards[pt]
138 |             assert done == dones[pt]
139 |             pt += 1


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Wireless Suite
  2 | 
  3 | ## Overview
  4 | Wireless Suite is a collection of problems in wireless telecommunications.
  5 | 
  6 | Comparing research results in telecoms remains a challenge due to the lack of standard problem implementations against
  7 | which to benchmark.
  8 | To solve this, Wireless Suite implements some well-known problems, built as Open-AI Gym compatible classes.
  9 | These are intended to establish performance benchmarks, stimulate reproducible research and foster quantitative
 10 | comparison of algorithms for telecommunication problems.
 11 | 
 12 | ## Getting started
 13 | The code has been tested to work on Python 3.7 under Windows 10.
 14 | 
 15 | 1. Get the code:
 16 |     ```
 17 |     git clone https://github.com/nokia/wireless-suite.git
 18 |     ```
 19 | 
 20 | 2. Use `pip3` to install the package:
 21 |    ```
 22 |    cd wireless-suite
 23 |    pip3 install .
 24 |    ```
 25 | 
 26 | 3. **OPTIONAL**: Modify the script *scripts/launch_agent.py* to execute a problem of your choosing.
 27 | 
 28 | 4. **OPTIONAL**: Modify the configuration of your problem at *config/config_environment.json*.
 29 | 
 30 | 5. Simulate an agent-environment interaction:
 31 |     ```
 32 |    cd wireless/scripts
 33 |    python launch_agent.py
 34 |    ```
 35 | 
 36 | ## Provided problems 
 37 | 
 38 | ### TimeFreqResourceAllocation-v0
 39 | This environment simulates a OFDM resource allocation task, where a limited number of frequency resources are to be
 40 | allocated to a large number of User Equipments (UEs) over time.
 41 | An agent interacting with this environment plays the role of the MAC scheduler. On each time step, the agent must
 42 | allocate one frequency resource to one of a large number of UEs. The agent gets rewarded for these resource allocation
 43 | decisions. The reward increases with the number of UEs, whose traffic requirements are satisfied.
 44 | The traffic requirements for each UE are expressed in terms of their Guaranteed Bit Rate (if any) and their Packet
 45 | Delay Budget (PDP).
 46 | 
 47 | You are invited to develop a new agent that interacts with this environment and takes effective resource allocation
 48 | decisions.
 49 | Five sample agents are provided for reference in the *wireless/agents* folder.
 50 | The performance obtained by the default agents on the default environment configuration is:
 51 | * Random                          -69590
 52 | * Round Robin                     -69638
 53 | * Round Robin IfTraffic           -3284
 54 | * Proportional Fair               -9595
 55 | * Proportional Fair Channel Aware -1729
 56 | 
 57 | Note that the above average rewards are negative values. The best performing agent is thus the Proportional Fair Channel Aware.
 58 | 
 59 | Additional details about this problem are provided in document *wireless/doc/TimeFreqResourceAllocation-v0.pdf*
 60 | 
 61 | ### NomaULTimeFreqResourceAllocation-v0
 62 | This environment is an extension of the above TimeFreqResourceAllocation-v0 environment, with the difference that it
 63 | allows multiple UEs to be allocated on a time-frequency resource. It consists on an uplink power-domain NOMA system,
 64 | wherein the base station receives superimposed signals from the multiplexed UEs and performs successive interference
 65 | cancellation (SIC) to decode them. 
 66 | 
 67 | The default environment can be obtained by setting `"env": "NomaULTimeFreqResourceAllocation-v0"` and
 68 | `"n_ues_per_prb": 2` in *config/config_environment.json*. 
 69 | Two sample agents are provided for reference in the *wireless/agents* folder. 
 70 | The performance obtained on the default environment configuration is:
 71 | * Random                          -33499
 72 | * NOMA UL Proportional Fair Channel Aware -1431
 73 | 
 74 | ### UlOpenLoopPowerControl-v0
 75 | This environment simulates a free-space scenario with a Base Station located at coordinates [0, 0] and one UE at a
 76 | random location. On each time step, the UE moves linearly in a random direction with constant speed 2 m/s . The agent
 77 | interacting with the environment plays the role of the the Base Station. On each time step the agent must select one of
 78 | four possible Power Control (PC) commands to increase/decrease the uplink transmit power. The objective is to measure an
 79 | uplink SNR as close as possible to the SNR target (4 dB by default).
 80 | 
 81 | See the file `envs/umts_olpc.py` for more details.
 82 | 
 83 | ### Evaluation
 84 | The simulated environment can be chosen by setting `"env": "TimeFreqResourceAllocation-v0"` or `"env": "NomaULTimeFreqResourceAllocation-v0"` in *config/config_environment.json*. The script *wireless/scripts/launch_agent.py* runs 16 episodes with a maximum of 65536 time steps each, and collects the reward
 85 | obtained by the agent on each time step. The result is calculated as the average reward obtained in all time steps on all episodes.
 86 | 
 87 | ## How to contribute
 88 | There are two main ways of contributing to Wireless Suite:
 89 | 
 90 | 1. **Implementing new problems**: This version of Wireless Suite contains two problems implementation. New
 91 | problems can be easily added as simple variations of the existing ones (e.g. by changing their parameters), or by introducing
 92 | fully new problem implementations (e.g. Adaptive Modulation and Coding, Open Loop Power Control, Handover optimization,
 93 | etc).
 94 | 
 95 | 2. **Implementing new agents**: Ideally, new agent contributions shall perform better than the default ones.
 96 | 
 97 | ## References
 98 | 1. [Open AI Gym Documentation](http://gym.openai.com/docs/)
 99 | 2. [How to create new environments for Gym](https://github.com/openai/gym/blob/master/docs/creating-environments.md)
100 | 3. [Sacred Documentation](https://sacred.readthedocs.io/en/stable/index.html)
101 | 
102 | 
103 | ## License
104 | 
105 | This project is licensed under the BSD-3-Clause license - see the [LICENSE](https://github.com/nokia/wireless-suite/blob/master/LICENSE).


--------------------------------------------------------------------------------
/wireless/scripts/launch_agent.py:
--------------------------------------------------------------------------------
  1 | """
  2 | © 2020 Nokia
  3 | Licensed under the BSD 3 Clause license
  4 | SPDX-License-Identifier: BSD-3-Clause
  5 | """
  6 | import gym
  7 | import json
  8 | 
  9 | from sacred import Experiment
 10 | 
 11 | from wireless.agents.bosch_agent import BoschAgent
 12 | from wireless.agents.time_freq_resource_allocation_v0.round_robin_agent import *
 13 | from wireless.agents.time_freq_resource_allocation_v0.proportional_fair import *
 14 | from wireless.agents.noma_ul_time_freq_resource_allocation_v0.noma_ul_proportional_fair import *
 15 | 
 16 | # Load agent parameters
 17 | with open('../../config/config_agent.json') as f:
 18 |     ac = json.load(f)
 19 | 
 20 | # Configure experiment
 21 | with open('../../config/config_sacred.json') as f:
 22 |     sc = json.load(f)   # Sacred Configuration
 23 |     ns = sc["sacred"]["n_metrics_points"]  # Number of points per episode to log in Sacred
 24 |     ex = Experiment(ac["agent"]["agent_type"], save_git_info=False)
 25 |     ex.add_config(sc)
 26 |     ex.add_config(ac)
 27 | mongo_db_url = f'mongodb://{sc["sacred"]["sacred_user"]}:{sc["sacred"]["sacred_pwd"]}@' +\
 28 |                f'{sc["sacred"]["sacred_host"]}:{sc["sacred"]["sacred_port"]}/{sc["sacred"]["sacred_db"]}'
 29 | # ex.observers.append(MongoObserver(url=mongo_db_url, db_name=sc["sacred"]["sacred_db"]))  # Uncomment to save to DB
 30 | 
 31 | # Load environment parameters
 32 | with open('../../config/config_environment.json') as f:
 33 |     ec = json.load(f)
 34 |     ex.add_config(ec)
 35 | 
 36 | 
 37 | @ex.automain
 38 | def main(_run):
 39 |     n_eps = _run.config["agent"]["n_episodes"]
 40 |     t_max = _run.config['agent']['t_max']
 41 |     n_sf = t_max//_run.config['env']['n_prbs']  # Number of complete subframes to run per episode
 42 |     log_period_t = max(1, (n_sf//ns)*_run.config['env']['n_prbs'])  # Only log rwd on last step of each subframe
 43 | 
 44 |     rwd = np.zeros((n_eps, t_max))  # Memory allocation
 45 | 
 46 |     # Simulate
 47 |     for ep in range(n_eps):  # Run episodes
 48 |         if _run.config['env']['env'] == 'TimeFreqResourceAllocation-v0':
 49 |             env = gym.make('TimeFreqResourceAllocation-v0', n_ues=_run.config['env']['n_ues'],
 50 |                            n_prbs=_run.config['env']['n_prbs'], buffer_max_size=_run.config['env']['buffer_max_size'],
 51 |                            eirp_dbm=_run.config['env']['eirp_dbm'], f_carrier_mhz=_run.config['env']['f_carrier_mhz'],
 52 |                            max_pkt_size_bits=_run.config['env']['max_pkt_size_bits'],
 53 |                            it=_run.config['env']['non_gbr_traffic_mean_interarrival_time_ttis'])  # Init environment
 54 |             env.seed(seed=_run.config['seed'] + ep)
 55 | 
 56 |             # Init agent
 57 |             if ac["agent"]["agent_type"] == "random":
 58 |                 agent = RandomAgent(env.action_space)
 59 |                 agent.seed(seed=_run.config['seed'] + ep)
 60 |             elif ac["agent"]["agent_type"] == "round robin":
 61 |                 agent = RoundRobinAgent(env.action_space, env.K, env.L)
 62 |             elif ac["agent"]["agent_type"] == "round robin iftraffic":
 63 |                 agent = RoundRobinIfTrafficAgent(env.action_space, env.K, env.L)
 64 |             elif ac["agent"]["agent_type"] == "proportional fair":
 65 |                 agent = ProportionalFairAgent(env.action_space, env.K, env.L)
 66 |             elif ac["agent"]["agent_type"] == "proportional fair channel aware":
 67 |                 agent = ProportionalFairChannelAwareAgent(env.action_space, env.K, env.L)
 68 |             elif ac["agent"]["agent_type"] == "knapsack":
 69 |                 agent = Knapsackagent(env.action_space, env.K, env.L, env.Nf)
 70 |             elif ac["agent"]["agent_type"] == "Bosch":
 71 |                 agent = BoschAgent(env.action_space, env.K, env.L, env.max_pkt_size_bits)
 72 |             else:
 73 |                 raise NotImplemented
 74 |                 
 75 |         elif _run.config['env']['env'] == 'NomaULTimeFreqResourceAllocation-v0':
 76 |             env = gym.make('NomaULTimeFreqResourceAllocation-v0', n_ues=_run.config['env']['n_ues'],
 77 |                            n_prbs=_run.config['env']['n_prbs'], n_ues_per_prb=_run.config['env']['n_ues_per_prb'], buffer_max_size=_run.config['env']['buffer_max_size'],
 78 |                            eirp_dbm=_run.config['env']['eirp_dbm'], f_carrier_mhz=_run.config['env']['f_carrier_mhz'],
 79 |                            max_pkt_size_bits=_run.config['env']['max_pkt_size_bits'],
 80 |                            it=_run.config['env']['non_gbr_traffic_mean_interarrival_time_ttis'])  # Init environment
 81 |             env.seed(seed=_run.config['seed'] + ep)
 82 |             
 83 |             # Init agent
 84 |             if ac["agent"]["agent_type"] == "random":
 85 |                 agent = RandomAgent(env.action_space)
 86 |                 agent.seed(seed=_run.config['seed'] + ep)
 87 |             elif ac["agent"]["agent_type"] == "proportional fair channel aware":
 88 |                 agent = NomaULProportionalFairChannelAwareAgent(env.action_space, env.K, env.M, env.L, env.n_mw, env.SINR_COEFF)
 89 |             else:
 90 |                 raise NotImplemented
 91 |         else:
 92 |             raise NotImplemented
 93 | 
 94 |         reward = 0
 95 |         done = False
 96 |         state = env.reset()
 97 |         for t in range(t_max):  # Run one episode
 98 |             # Collect progress
 99 |             if t_max < ns or (t > 0 and (t+1) % log_period_t == 0):  # If it's time to log
100 |                 s = np.reshape(state[env.K:env.K * (1 + env.L)], (env.K, env.L))
101 |                 qi_ohe = np.reshape(state[env.K+2*env.K*env.L:5*env.K + 2*env.K*env.L], (env.K, 4))
102 |                 qi = [np.where(r == 1)[0][0] for r in qi_ohe]  # Decode One-Hot-Encoded QIs
103 |                 for u in range(0, env.K, env.K//2):  # Log KPIs for some UEs
104 |                     _run.log_scalar(f"Episode {ep}. UE {u}. CQI vs time step", state[u], t)
105 |                     _run.log_scalar(f"Episode {ep}. UE {u}. Buffer occupancy [bits] vs time step", np.sum(s[u, :]), t)
106 |                     _run.log_scalar(f"Episode {ep}. UE {u}. QoS Identifier vs time step", qi[u], t)
107 | 
108 |             action = agent.act(state, reward, done)
109 |             state, reward, done, _ = env.step(action)
110 | 
111 |             # Collect progress
112 |             if t_max < ns or (t > 0 and (t+1) % log_period_t == 0):
113 |                 _run.log_scalar(f"Episode {ep}. Rwd vs time step", reward, t)
114 | 
115 |             rwd[ep, t] = reward
116 |             if done:
117 |                 break
118 |             if (ep*t_max + t) % log_period_t == 0:
119 |                 print(f"{(ep*t_max + t)*100/(n_eps*t_max):3.0f}% completed.")
120 | 
121 |         env.close()
122 | 
123 |     if n_eps > 1:
124 |         rwd_avg = np.mean(rwd, axis=0)
125 |         for t in range(t_max):
126 |             if t_max < ns or (t > 0 and (t+1) % log_period_t == 0):  # If it's time to log
127 |                 _run.log_scalar(f"Mean rwd vs time step", rwd_avg[t], t)
128 | 
129 |     result = np.mean(rwd)  # Save experiment result
130 |     print(f"Result: {result}")
131 |     return result
132 | 


--------------------------------------------------------------------------------
/wireless/envs/time_freq_resource_allocation_v0.py:
--------------------------------------------------------------------------------
  1 | """
  2 | © 2020 Nokia
  3 | Licensed under the BSD 3 Clause license
  4 | SPDX-License-Identifier: BSD-3-Clause
  5 | """
  6 | import random
  7 | from math import floor, ceil
  8 | 
  9 | import numpy as np
 10 | from gym import spaces, Env
 11 | from scipy import constants
 12 | 
 13 | from ..utils.misc import calculate_thermal_noise
 14 | from ..utils.prop_model import PropModel
 15 | 
 16 | 
 17 | class TimeFreqResourceAllocationV0(Env):
 18 |     metadata = {
 19 |         'render.modes': ['human', 'rgb_array']
 20 |     }
 21 | 
 22 |     bw_mhz = 5  # System bandwidth
 23 |     max_pkt_size_bits = 5096
 24 |     x_max_m = 1000
 25 |     y_max_m = 1000
 26 | 
 27 |     SINR_COEFF = 8  # Rho coefficient to map SINR to spectral efficient. See G. Piro 2011 paper.
 28 | 
 29 |     def __init__(self, n_ues=32, n_prbs=25, buffer_max_size=32, eirp_dbm=13, f_carrier_mhz=2655,
 30 |                  max_pkt_size_bits=41250, it=10, t_max=65536):
 31 |         super().__init__()
 32 |         self._seed = None
 33 |         self.K = n_ues  # Number of UEs
 34 |         self.Nf = n_prbs  # Number of Physical Resource Blocks (PRBs)
 35 |         self.L = buffer_max_size  # Maximum number of packets per UE buffer
 36 |         self.it = it  # Mean inter-packet arrival time for Non-GBR traffic
 37 |         self.EIRP_DBM = eirp_dbm
 38 |         self.f_carrier_mhz = f_carrier_mhz  # Carrier frequency
 39 |         self.max_pkt_size_bits = max_pkt_size_bits
 40 |         self.t_max = t_max
 41 |         self.tti_max = ceil(t_max/n_prbs)
 42 | 
 43 |         self.bts_pos = [self.x_max_m / 2, self.y_max_m / 2]
 44 |         self.propagation_model = PropModel(self.f_carrier_mhz)
 45 |         self.n_mw = calculate_thermal_noise(self.bw_mhz * 1E-6)
 46 | 
 47 |         self.low = np.array([0] * self.K +  # CQI
 48 |                             [0] * self.K * self.L +  # Size (in bits) of packets in UEs' buffers
 49 |                             [0] * self.K * self.L +  # Age (in ms) of packets in UEs' buffers
 50 |                             [0, 0, 0, 0] * self.K +  # QoS Identifier classes (ohe) of all UEs
 51 |                             [0])  # Index of the current PRB being allocated
 52 |         self.high = np.array([15] * self.K +  # CQI
 53 |                              [self.max_pkt_size_bits] * self.K * self.L +  # Size (in bits) of packets in UEs' buffers
 54 |                              [self.tti_max] * self.K * self.L +  # Age (in ms) of packets in UEs' buffers
 55 |                              [1, 1, 1, 1] * self.K +  # QoS Identifier classes (ohe) of all UEs
 56 |                              [self.Nf - 1])  # Index of the current PRB being allocated
 57 |         self.observation_space = spaces.Box(self.low, self.high, dtype=np.uint32)
 58 | 
 59 |         self.action_space = spaces.Discrete(self.K)
 60 |         self.reward_range = (0, 1)
 61 | 
 62 |         # Features of observation vector
 63 |         self.cqi = None
 64 |         self.s = None  # Sizes in bits of all packets in each UE's buffer
 65 |         self.e = None  # Ages in TTIs of all packets in each UE's buffer
 66 |         self.qi = None
 67 |         self.p = 0
 68 | 
 69 |         # Internal state features
 70 |         self.t = 0  # Time step
 71 |         self.tti = 0  # Transmission Time Interval (TTI) counter
 72 |         self.ue_pos = None  # UE positions in meters
 73 |         self.ue_v_mps = None  # UE speeds in meter/second
 74 |         self.ue_dir = None  # UE move direction in radians
 75 |         self.spectral_efficiency = None
 76 |         self.tti_next_pkt = None  # TTI of next incoming packet for each UE
 77 | 
 78 |         self.seed()
 79 |         self.reset()
 80 | 
 81 |         assert self.K % 4 == 0, "K must be a multiple of 4 in order to have the same number of UEs per QoS class."
 82 | 
 83 |     def reset(self):
 84 |         self.cqi = np.zeros(shape=(self.K,), dtype=np.uint8)
 85 |         self.s = np.zeros(shape=(self.K, self.L), dtype=np.uint32)
 86 |         self.e = np.zeros(shape=(self.K, self.L), dtype=np.uint32)
 87 |         self.qi = np.concatenate((np.repeat(np.array([[0, 0, 0, 1]]), self.K // 4, axis=0),
 88 |                                   np.repeat(np.array([[0, 0, 1, 0]]), self.K // 4, axis=0),
 89 |                                   np.repeat(np.array([[0, 1, 0, 0]]), self.K // 4, axis=0),
 90 |                                   np.repeat(np.array([[1, 0, 0, 0]]), self.K // 4, axis=0))
 91 |                                  )
 92 |         np.random.shuffle(self.qi)
 93 |         self.p = 0
 94 | 
 95 |         self.t = 0
 96 |         self.tti = 0
 97 |         self.ue_pos = np.random.uniform([0, 0], [self.x_max_m, self.y_max_m], size=(self.K, 2))  # Place UEs
 98 |         self.ue_v_mps = np.random.normal(1.36, scale=0.19, size=(self.K,))  # UE walking speeds in m/s
 99 |         self.ue_dir = np.random.uniform(0, 2 * constants.pi, size=(self.K,))  # UE move direction in radians
100 |         self.spectral_efficiency = np.zeros(shape=(self.K,))
101 |         self.tti_next_pkt = np.random.randint(8, size=(self.K,))  # TTI of first transmission for each UE
102 |         self._recalculate_rf()
103 |         self._generate_traffic()
104 |         self._update_state()
105 | 
106 |         return np.array(self.state)
107 | 
108 |     def seed(self, seed=0):
109 |         random.seed(seed)
110 |         np.random.seed(seed)
111 |         self.propagation_model.seed(seed=seed)
112 |         self._seed = seed
113 | 
114 |     def step(self, action):
115 |         assert self.action_space.contains(action), f"{action} ({type(action)}) invalid"
116 | 
117 |         # Execute action
118 |         if np.sum(self.s[action, :]) > 0:  # If packets exist in UE's buffer
119 |             # Find oldest packet in UE's buffer
120 |             mask = (self.s[action, :] > 0)
121 |             subset_idx = np.argmax(self.e[action, mask])
122 |             l_old = np.arange(self.L)[mask][subset_idx]
123 | 
124 |             assert self.s[action, l_old] > 0, f"t={self.t}. Oldest packet has size {self.s[action, l_old]} " +\
125 |                                               f"and age {self.e[action, l_old]}. " +\
126 |                                               f"User has {np.sum(self.s[action, :])} bits in buffer."  # Sanity check
127 |             tx_data_bits = floor(
128 |                 self.spectral_efficiency[action] * self.bw_mhz / self.Nf * 1E3)  # Bits that can be transmitted
129 |             while tx_data_bits > 0 and self.s[action, l_old] > 0:  # While there are packets & available capacity
130 |                 if tx_data_bits >= self.s[action, l_old]:  # Full packet transmission
131 |                     tx_data_bits -= self.s[action, l_old]
132 |                     self.s[action, l_old] = 0
133 |                     self.e[action, l_old] = 0
134 |                     l_old = np.argmax(self.e[action, :])  # Find oldest packet in UE's buffer
135 |                 else:  # Partial packet transmission
136 |                     self.s[action, l_old] -= tx_data_bits
137 |                     break
138 | 
139 |         reward = 0
140 |         self.t += 1  # Update time-step
141 |         self.p = self.t % self.Nf  # Update PRB counter
142 |         if self.p == 0:
143 |             reward = self._calculate_reward()
144 |             self.tti += 1  # Update TTI counter
145 |             self.e[self.s > 0] += 1  # Age buffer packets
146 |             self._generate_traffic()
147 |             self._move_ues()
148 |             self._recalculate_rf()
149 | 
150 |         self._update_state()
151 |         done = bool(self.t >= self.t_max)
152 |         return np.array(self.state), reward, done, {}
153 | 
154 |     def render(self, mode='human', close=False):
155 |         pass
156 | 
157 |     def _calculate_reward(self):
158 |         r_gbr = 0
159 |         r_non_gbr = 0
160 | 
161 |         for u, qi in enumerate(self.qi):
162 |             gbr_delayed_pkts = np.array([])
163 |             non_gbr_pkts = np.array([])
164 |             non_gbr_delayed_pkts = np.array([])
165 |             if np.array_equal(qi, [0, 0, 0, 1]):
166 |                 gbr_delayed_pkts = np.where(self.e[u, :] > 100)[0]
167 |             elif np.array_equal(qi, [0, 0, 1, 0]):
168 |                 gbr_delayed_pkts = np.where(self.e[u, :] > 150)[0]
169 |             elif np.array_equal(qi, [0, 1, 0, 0]):
170 |                 gbr_delayed_pkts = np.where(self.e[u, :] > 30)[0]
171 |             elif np.array_equal(qi, [1, 0, 0, 0]):
172 |                 non_gbr_delayed_pkts = np.where(self.e[u, :] > 300)[0]
173 |                 non_gbr_pkts = np.where(self.s[u, :] > 0)[0]
174 | 
175 |             if gbr_delayed_pkts.size > 0:
176 |                 r_gbr += np.sum(self.s[u, gbr_delayed_pkts])
177 | 
178 |             if non_gbr_delayed_pkts.size > 0:
179 |                 r_non_gbr += np.sum(self.s[u, non_gbr_delayed_pkts])
180 |             if non_gbr_pkts.size > 0:
181 |                 r_non_gbr += np.sum(self.s[u, non_gbr_pkts])
182 | 
183 |         return -r_gbr - r_non_gbr
184 | 
185 |     def _move_ues(self):
186 |         d_m = self.ue_v_mps * 1E-3  # Moved distance in meters
187 |         delta_x = d_m * np.cos(self.ue_dir)
188 |         delta_y = d_m * np.sin(self.ue_dir)
189 | 
190 |         for u, pos in enumerate(self.ue_pos):
191 |             if pos[0] + delta_x[u] > self.x_max_m or pos[0] + delta_x[u] < 0:
192 |                 delta_x[u] = -delta_x[u]
193 |                 self.ue_dir[u] = np.random.uniform(0, 2 * constants.pi)  # UE move direction in radians
194 |             if pos[1] + delta_y[u] > self.y_max_m or pos[1] + delta_y[u] < 0:
195 |                 delta_y[u] = -delta_y[u]
196 |                 self.ue_dir[u] = np.random.uniform(0, 2 * constants.pi)  # UE move direction in radians
197 | 
198 |         self.ue_pos[:, 0] += delta_x
199 |         self.ue_pos[:, 1] += delta_y
200 | 
201 |     def _recalculate_rf(self):
202 |         distances_m = np.linalg.norm(self.ue_pos - self.bts_pos, axis=1)
203 |         pathloss_db = self.propagation_model.get_free_space_pl_db(distances_m, shadowing_db=6)
204 |         rx_pwr_dbm = self.EIRP_DBM - pathloss_db  # Received power
205 |         self._calculate_spectral_efficiency(rx_pwr_dbm)
206 |         self._spectral_efficiency_to_cqi()
207 | 
208 |     def _calculate_spectral_efficiency(self, rx_pwr_dbm):
209 |         interference_dbm = -105  # Constant interference level throughout the coverage area
210 | 
211 |         p_mw = (10 ** (rx_pwr_dbm / 10))  # Rx power in mw
212 |         interference_mw = 10 ** (interference_dbm / 10)
213 | 
214 |         sinr = p_mw / (self.n_mw + interference_mw)
215 |         se = np.log2(1 + sinr / self.SINR_COEFF)  # DL spectral efficiency in bps/Hz
216 | 
217 |         self.spectral_efficiency = np.clip(se, 0, 9.6)  # Define an upper bound for the spectral efficiency.
218 | 
219 |     def _spectral_efficiency_to_cqi(self):
220 |         # As per Table 7.2.3-1 in TS 36.213 Rel-11
221 |         self.cqi[np.where(self.spectral_efficiency <= 0.1523)] = 0
222 |         self.cqi[np.where((0.1523 < self.spectral_efficiency) & (self.spectral_efficiency <= 0.2344))] = 1
223 |         self.cqi[np.where((0.2344 < self.spectral_efficiency) & (self.spectral_efficiency <= 0.3770))] = 2
224 |         self.cqi[np.where((0.3770 < self.spectral_efficiency) & (self.spectral_efficiency <= 0.6016))] = 3
225 |         self.cqi[np.where((0.6016 < self.spectral_efficiency) & (self.spectral_efficiency <= 0.8770))] = 4
226 |         self.cqi[np.where((0.8770 < self.spectral_efficiency) & (self.spectral_efficiency <= 1.1758))] = 5
227 |         self.cqi[np.where((1.1758 < self.spectral_efficiency) & (self.spectral_efficiency <= 1.4766))] = 6
228 |         self.cqi[np.where((1.4766 < self.spectral_efficiency) & (self.spectral_efficiency <= 1.9141))] = 7
229 |         self.cqi[np.where((1.9141 < self.spectral_efficiency) & (self.spectral_efficiency <= 2.4063))] = 8
230 |         self.cqi[np.where((2.4063 < self.spectral_efficiency) & (self.spectral_efficiency <= 2.7305))] = 9
231 |         self.cqi[np.where((2.7305 < self.spectral_efficiency) & (self.spectral_efficiency <= 3.3223))] = 10
232 |         self.cqi[np.where((3.3223 < self.spectral_efficiency) & (self.spectral_efficiency <= 3.9023))] = 11
233 |         self.cqi[np.where((3.9023 < self.spectral_efficiency) & (self.spectral_efficiency <= 4.5234))] = 12
234 |         self.cqi[np.where((4.5234 < self.spectral_efficiency) & (self.spectral_efficiency <= 5.1152))] = 13
235 |         self.cqi[np.where((5.1152 < self.spectral_efficiency) & (self.spectral_efficiency <= 5.5547))] = 14
236 |         self.cqi[np.where(5.5547 < self.spectral_efficiency)] = 15
237 | 
238 |     def _generate_traffic(self):
239 |         for u, qi in enumerate(self.qi):
240 |             if self.tti == self.tti_next_pkt[u]:
241 |                 buffer_gaps = np.where(self.s[u, :] == 0)[0]  # Find slots for packets in the queue.
242 |                 if buffer_gaps.size == 0:  # Large negative rwd unnecessary b/c rwd is already max due to full buffer.
243 |                     print(f"Buffer overflow. Disregarding new GBR (Conversational Voice) packet for UE {u}.")
244 |                     g = None
245 |                 else:
246 |                     g = buffer_gaps[0]  # First available slot in buffer
247 |                     self.e[u, g] = 0  # Set the age of this new packet to 0
248 | 
249 |                 if np.array_equal(qi, [0, 0, 0, 1]):  # 3: GBR (Conversational Voice)
250 |                     if buffer_gaps.size > 0:
251 |                         self.s[u, g] = 584
252 |                     self.tti_next_pkt[u] = self.tti + 20
253 |                 elif np.array_equal(qi, [0, 0, 1, 0]):  # 2: GBR (Conversational Video)
254 |                     # TODO: Use perhaps a more complex video traffic model such as the Markov-modulated Gamma model.
255 |                     if buffer_gaps.size > 0:
256 |                         self.s[u, g] = 41250
257 |                     self.tti_next_pkt[u] = self.tti + 33
258 |                 elif np.array_equal(qi, [0, 1, 0, 0]):  # 1: Delay Critical GBR
259 |                     if buffer_gaps.size > 0:
260 |                         self.s[u, g] = 200
261 |                     self.tti_next_pkt[u] = self.tti + 20
262 |                 elif np.array_equal(qi, [1, 0, 0, 0]):  # 0: Non-GBR
263 |                     # Inspired by: https://www.nsnam.org/docs/models/html/applications.html?highlight=traffic%20model
264 |                     if buffer_gaps.size > 0:
265 |                         self.s[u, g] = min(max(1, np.random.geometric(1 / 20000)), self.max_pkt_size_bits)
266 |                     self.tti_next_pkt[u] = self.tti + np.random.geometric(1 / self.it)
267 | 
268 |                 if buffer_gaps.size > 0:
269 |                     assert 1 <= self.s[u, g] <= self.max_pkt_size_bits, f"Packet size {self.s[u, g]} out of range."
270 | 
271 |     def _update_state(self):
272 |         self.state = np.concatenate((self.cqi, self.s.flatten(), self.e.flatten(), self.qi.flatten(), [self.p]))
273 | 


--------------------------------------------------------------------------------