├── .gitignore ├── NSPRs ├── box.graphml ├── dummy_NSPR_1.graphml └── dummy_NSPR_2.graphml ├── PSNs ├── hadrl_1-16_5-10_15-4.graphml ├── hadrl_psn.graphml ├── hadrl_psn_1-10_1-6_1-4.graphml ├── heenso_1-16_5-10_15-4.graphml ├── new_hadrl_1-16_5-10_15-4.graphml ├── simple_hadrl_psn.graphml ├── waxman_100_servers.graphml ├── waxman_20_servers.graphml └── waxman_50_servers.graphml ├── README.md ├── requirements.txt └── src ├── callbacks ├── __init__.py ├── acceptance_ratio_callbacks.py ├── hparam_callback.py ├── psn_load_callback.py └── seen_nsprs_callback.py ├── demo.py ├── eval_script.py ├── heuristic_layers.py ├── network_simulator.py ├── policies ├── __init__.py ├── features_extractors │ ├── __init__.py │ └── hadrl_features_extractor.py ├── hadrl_policy.py └── mlp_extractors │ ├── __init__.py │ └── hadrl_mlp_extractor.py ├── reader.py ├── spaces ├── __init__.py └── discrete_with_negatives.py ├── trainer.py ├── utils.py └── wrappers ├── __init__.py ├── dynamic_connectivity.py ├── hadrl_nsprs_generator.py ├── no_placement_state.py └── reset_with_load.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Intellij stuff 10 | .idea/ 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | pip-wheel-metadata/ 27 | share/python-wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .nox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | *.py,cover 54 | .hypothesis/ 55 | .pytest_cache/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | local_settings.py 64 | db.sqlite3 65 | db.sqlite3-journal 66 | 67 | # Flask stuff: 68 | instance/ 69 | .webassets-cache 70 | 71 | # Scrapy stuff: 72 | .scrapy 73 | 74 | # Sphinx documentation 75 | docs/_build/ 76 | 77 | # PyBuilder 78 | target/ 79 | 80 | # Jupyter Notebook 81 | .ipynb_checkpoints 82 | 83 | # IPython 84 | profile_default/ 85 | ipython_config.py 86 | 87 | # pyenv 88 | .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 98 | __pypackages__/ 99 | 100 | # Celery stuff 101 | celerybeat-schedule 102 | celerybeat.pid 103 | 104 | # SageMath parsed files 105 | *.sage.py 106 | 107 | # Environments 108 | .env 109 | .venv 110 | env/ 111 | venv/ 112 | ENV/ 113 | env.bak/ 114 | venv.bak/ 115 | 116 | # Spyder project settings 117 | .spyderproject 118 | .spyproject 119 | 120 | # Rope project settings 121 | .ropeproject 122 | 123 | # mkdocs documentation 124 | /site 125 | 126 | # mypy 127 | .mypy_cache/ 128 | .dmypy.json 129 | dmypy.json 130 | 131 | # Pyre type checker 132 | .pyre/ 133 | 134 | # project-specific stuff 135 | tb_logs*/ 136 | models*/ 137 | wandb/ 138 | .vscode/ 139 | -------------------------------------------------------------------------------- /NSPRs/box.graphml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | Box 20 | 10000 21 | 0 22 | 4 23 | 24 | 25 | 4 26 | 50 27 | 28 | 29 | 10 30 | 5 31 | 32 | 33 | 1 34 | 5 35 | 36 | 37 | 8 38 | 72 39 | 40 | 41 | 42 | 10 43 | 10 44 | 45 | 46 | 10 47 | 10 48 | 49 | 50 | 10 51 | 10 52 | 53 | 54 | 10 55 | 10 56 | 57 | 58 | -------------------------------------------------------------------------------- /NSPRs/dummy_NSPR_1.graphml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | Triangle 20 | 10000 21 | 1 22 | 5 23 | 24 | 25 | 10 26 | 5 27 | 28 | 29 | 50 30 | 10 31 | 32 | 33 | 5 34 | 1 35 | 36 | 37 | 38 | 10 39 | 10 40 | 41 | 42 | 10 43 | 10 44 | 45 | 46 | 10 47 | 10 48 | 49 | 50 | -------------------------------------------------------------------------------- /NSPRs/dummy_NSPR_2.graphml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | Triangle 20 | 10000 21 | 2 22 | 20 23 | 24 | 25 | 1 26 | 5 27 | 28 | 29 | 5 30 | 1 31 | 32 | 33 | 10 34 | 1 35 | 36 | 37 | 38 | 1 39 | 10 40 | 41 | 42 | 1 43 | 10 44 | 45 | 46 | 1 47 | 10 48 | 49 | 50 | -------------------------------------------------------------------------------- /PSNs/hadrl_psn_1-10_1-6_1-4.graphml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | server 11 | 50 12 | 300 13 | 14 | 15 | server 16 | 50 17 | 300 18 | 19 | 20 | server 21 | 50 22 | 300 23 | 24 | 25 | server 26 | 50 27 | 300 28 | 29 | 30 | server 31 | 50 32 | 300 33 | 34 | 35 | server 36 | 50 37 | 300 38 | 39 | 40 | server 41 | 50 42 | 300 43 | 44 | 45 | server 46 | 50 47 | 300 48 | 49 | 50 | server 51 | 50 52 | 300 53 | 54 | 55 | server 56 | 50 57 | 300 58 | 59 | 60 | server 61 | 50 62 | 300 63 | 64 | 65 | server 66 | 50 67 | 300 68 | 69 | 70 | server 71 | 50 72 | 300 73 | 74 | 75 | server 76 | 50 77 | 300 78 | 79 | 80 | server 81 | 50 82 | 300 83 | 84 | 85 | server 86 | 50 87 | 300 88 | 89 | 90 | server 91 | 50 92 | 300 93 | 94 | 95 | server 96 | 50 97 | 300 98 | 99 | 100 | server 101 | 50 102 | 300 103 | 104 | 105 | server 106 | 50 107 | 300 108 | 109 | 110 | switch 111 | 112 | 113 | switch 114 | 115 | 116 | switch 117 | 118 | 119 | router 120 | 121 | 122 | router 123 | 124 | 125 | router 126 | 127 | 128 | 100000 129 | 130 | 131 | 100000 132 | 133 | 134 | 100000 135 | 136 | 137 | 100000 138 | 139 | 140 | 100000 141 | 142 | 143 | 100000 144 | 145 | 146 | 100000 147 | 148 | 149 | 100000 150 | 151 | 152 | 100000 153 | 154 | 155 | 100000 156 | 157 | 158 | 100000 159 | 160 | 161 | 100000 162 | 163 | 164 | 100000 165 | 166 | 167 | 100000 168 | 169 | 170 | 100000 171 | 172 | 173 | 100000 174 | 175 | 176 | 10000 177 | 178 | 179 | 10000 180 | 181 | 182 | 10000 183 | 184 | 185 | 10000 186 | 187 | 188 | 100000 189 | 190 | 191 | 100000 192 | 193 | 194 | 10000 195 | 196 | 197 | 100000 198 | 199 | 200 | 100000 201 | 202 | HA-DRL PSN 203 | 204 | 205 | -------------------------------------------------------------------------------- /PSNs/simple_hadrl_psn.graphml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | server 11 | 50 12 | 300 13 | 14 | 15 | server 16 | 50 17 | 300 18 | 19 | 20 | server 21 | 50 22 | 300 23 | 24 | 25 | server 26 | 50 27 | 300 28 | 29 | 30 | server 31 | 50 32 | 300 33 | 34 | 35 | server 36 | 50 37 | 300 38 | 39 | 40 | server 41 | 50 42 | 300 43 | 44 | 45 | server 46 | 50 47 | 300 48 | 49 | 50 | server 51 | 50 52 | 300 53 | 54 | 55 | server 56 | 50 57 | 300 58 | 59 | 60 | server 61 | 50 62 | 300 63 | 64 | 65 | server 66 | 50 67 | 300 68 | 69 | 70 | server 71 | 50 72 | 300 73 | 74 | 75 | switch 76 | 77 | 78 | switch 79 | 80 | 81 | switch 82 | 83 | 84 | switch 85 | 86 | 87 | router 88 | 89 | 90 | router 91 | 92 | 93 | router 94 | 95 | 96 | router 97 | 98 | 99 | 100000 100 | 101 | 102 | 100000 103 | 104 | 105 | 100000 106 | 107 | 108 | 100000 109 | 110 | 111 | 100000 112 | 113 | 114 | 100000 115 | 116 | 117 | 100000 118 | 119 | 120 | 100000 121 | 122 | 123 | 100000 124 | 125 | 126 | 100000 127 | 128 | 129 | 100000 130 | 131 | 132 | 10000 133 | 134 | 135 | 10000 136 | 137 | 138 | 100000 139 | 140 | 141 | 100000 142 | 143 | 144 | 100000 145 | 146 | 147 | 10000 148 | 149 | 150 | 100000 151 | 152 | 153 | 100000 154 | 155 | 156 | 100000 157 | 158 | 159 | 100000 160 | 161 | 162 | 100000 163 | 164 | HA-DRL PSN 165 | 166 | 167 | -------------------------------------------------------------------------------- /PSNs/waxman_20_servers.graphml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | server 13 | 50 14 | 50 15 | 300 16 | 300 17 | 18 | 19 | server 20 | 50 21 | 50 22 | 300 23 | 300 24 | 25 | 26 | server 27 | 50 28 | 50 29 | 300 30 | 300 31 | 32 | 33 | server 34 | 50 35 | 50 36 | 300 37 | 300 38 | 39 | 40 | server 41 | 50 42 | 50 43 | 300 44 | 300 45 | 46 | 47 | server 48 | 50 49 | 50 50 | 300 51 | 300 52 | 53 | 54 | server 55 | 50 56 | 50 57 | 300 58 | 300 59 | 60 | 61 | server 62 | 50 63 | 50 64 | 300 65 | 300 66 | 67 | 68 | server 69 | 50 70 | 50 71 | 300 72 | 300 73 | 74 | 75 | server 76 | 50 77 | 50 78 | 300 79 | 300 80 | 81 | 82 | server 83 | 50 84 | 50 85 | 300 86 | 300 87 | 88 | 89 | server 90 | 50 91 | 50 92 | 300 93 | 300 94 | 95 | 96 | server 97 | 50 98 | 50 99 | 300 100 | 300 101 | 102 | 103 | server 104 | 50 105 | 50 106 | 300 107 | 300 108 | 109 | 110 | server 111 | 50 112 | 50 113 | 300 114 | 300 115 | 116 | 117 | server 118 | 50 119 | 50 120 | 300 121 | 300 122 | 123 | 124 | server 125 | 50 126 | 50 127 | 300 128 | 300 129 | 130 | 131 | server 132 | 50 133 | 50 134 | 300 135 | 300 136 | 137 | 138 | server 139 | 50 140 | 50 141 | 300 142 | 300 143 | 144 | 145 | server 146 | 50 147 | 50 148 | 300 149 | 300 150 | 151 | 152 | 10000 153 | 10000 154 | 155 | 156 | 10000 157 | 10000 158 | 159 | 160 | 10000 161 | 10000 162 | 163 | 164 | 10000 165 | 10000 166 | 167 | 168 | 10000 169 | 10000 170 | 171 | 172 | 10000 173 | 10000 174 | 175 | 176 | 10000 177 | 10000 178 | 179 | 180 | 10000 181 | 10000 182 | 183 | 184 | 10000 185 | 10000 186 | 187 | 188 | 10000 189 | 10000 190 | 191 | 192 | 10000 193 | 10000 194 | 195 | 196 | 10000 197 | 10000 198 | 199 | 200 | 10000 201 | 10000 202 | 203 | 204 | 10000 205 | 10000 206 | 207 | 208 | 10000 209 | 10000 210 | 211 | 212 | 10000 213 | 10000 214 | 215 | 216 | 10000 217 | 10000 218 | 219 | 220 | 10000 221 | 10000 222 | 223 | 224 | 10000 225 | 10000 226 | 227 | 228 | 10000 229 | 10000 230 | 231 | 232 | 10000 233 | 10000 234 | 235 | 236 | 10000 237 | 10000 238 | 239 | 240 | 10000 241 | 10000 242 | 243 | 244 | 10000 245 | 10000 246 | 247 | 248 | 10000 249 | 10000 250 | 251 | 252 | 10000 253 | 10000 254 | 255 | 256 | 10000 257 | 10000 258 | 259 | 260 | 10000 261 | 10000 262 | 263 | 264 | 10000 265 | 10000 266 | 267 | 268 | 10000 269 | 10000 270 | 271 | 272 | 10000 273 | 10000 274 | 275 | 276 | 10000 277 | 10000 278 | 279 | 280 | 10000 281 | 10000 282 | 283 | 284 | 10000 285 | 10000 286 | 287 | 288 | 10000 289 | 10000 290 | 291 | 292 | 10000 293 | 10000 294 | 295 | 296 | 10000 297 | 10000 298 | 299 | 300 | 10000 301 | 10000 302 | 303 | 304 | 10000 305 | 10000 306 | 307 | 308 | 309 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DeepNetSlice 2 | ### _A Deep Reinforcement Learning Open-Source Toolkit for Network Slice Placement_ 3 | 4 | ## Demo 5 | ```bash 6 | cd src 7 | python demo.py 8 | ``` 9 | 10 | ## General training script structure 11 | ```python 12 | # create trainer object 13 | # It creates the model and the training and evaluation environments 14 | trainer = Trainer( ... ) # parameters description on trainer.py docstring 15 | 16 | # create list of training callbacks. 17 | callbacks = [ ... ] # see 'src/callbacks/' or Stable Baselines3 docs 18 | 19 | # train the model 20 | trainer.train( 21 | tot_steps=<...>, # number of overall training steps 22 | callbacks=callbacks, 23 | log_interval=<...>, # number of steps between each log 24 | wandb=<...>, # (bool) whether to use wandb logging 25 | ) 26 | ``` 27 | 28 | ## Directories structure 29 | - `NSPRs`: contains graphml files containing the definition of some Network Slice Placement Requests (NSPRs). 30 | These can also be created on the fly during training, with no need to read files. 31 | 32 | - `PSNs`: contains graphml files containing the definition of some Physical Substrate Networks (PSNs) architectures. 33 | 34 | - `src`: contains the source code of the toolkit. 35 | 36 | - `callbacks`: contains some training callbacks. 37 | All callbacks in the library [Stable Baselines3](https://github.com/Stable-Baselines-Team/stable-baselines3-contrib) can be used as well. 38 | 39 | - `policies`: contains the implmentation of policy networks. 40 | It follows the nomenclature of [Stable Baselines3](https://github.com/Stable-Baselines-Team/stable-baselines3-contrib) policies, where the policy nets are composed of a features extractor followed by a MlpExtractor. 41 | - `features_extractors`: contains the implementation of features extractors modules. 42 | - `mlp_extractors`: contains the implementation of mlp extractors modules. 43 | 44 | - `spaces`: contains the implementation of custom [Gym](https://github.com/openai/gym) / [Gymnasium](https://github.com/Farama-Foundation/Gymnasium) spaces. 45 | 46 | - `wrappers`: contains the implementation of custom environment wrappers. 47 | Wrappers from [Stable Baselines3](https://github.com/Stable-Baselines-Team/stable-baselines3-contrib) can also be used. 48 | 49 | - `network_simulator.py`: contains the implementation of the environment. 50 | 51 | - `trainer.py`: contains the implementation of the trainer object (see demo). 52 | 53 | - `demo.py`: contains a demo script. 54 | 55 | 56 | ## Contributing 57 | Constributions are welcome! :rocket: 58 | 59 | To contribute: 60 | - If you want to **work on an open issue**, comment on that issue before opening a PR. 61 | - If you want to implement a **new feature** or an **improvement**, write about it in the Discussions tab. 62 | 63 | ## Reference 64 | ``` 65 | Alex Pasquali, Vincenzo Lomonaco, Davide Bacciu and Federica Paganelli, 66 | Deep Reinforcement Learning for Network Slice Placement and the DeepNetSlice Toolkit, 67 | IEEE International Conference on Machine Learning for Communication and Networking, ICMLCN 2024, 5-8 May 2024, Stockholm, Sweden 68 | ``` -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | wheel<=0.38.4 2 | setuptools~=64.0 3 | gym~=0.21.0 4 | numpy~=1.23.4 5 | networkx~=2.8.7 6 | torch~=1.12.1 7 | stable-baselines3~=1.7.0 8 | sb3-contrib~=1.7.0 9 | torch-geometric~=2.1.0 10 | tensorboard~=2.10.0 11 | wandb~=0.13.4 12 | -------------------------------------------------------------------------------- /src/callbacks/__init__.py: -------------------------------------------------------------------------------- 1 | from .acceptance_ratio_callbacks import AcceptanceRatioByStepsCallback, AcceptanceRatioByNSPRsCallback 2 | from .hparam_callback import HParamCallback 3 | from .psn_load_callback import PSNLoadCallback 4 | from .seen_nsprs_callback import SeenNSPRsCallback 5 | -------------------------------------------------------------------------------- /src/callbacks/acceptance_ratio_callbacks.py: -------------------------------------------------------------------------------- 1 | from queue import Queue 2 | import gym 3 | import numpy as np 4 | from stable_baselines3.common.callbacks import BaseCallback 5 | from stable_baselines3.common.vec_env import VecEnv 6 | 7 | 8 | class AcceptanceRatioByStepsCallback(BaseCallback): 9 | """ 10 | A custom callback that derives from ``BaseCallback``. 11 | It logs the acceptance ratio on Tensorboard. 12 | 13 | :param env: environment 14 | :param name: name of the metric to log 15 | :param steps_per_tr_phase: number of steps that define a training phase. 16 | The acceptance ratio is logged once per training phase. 17 | :param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages 18 | """ 19 | def __init__( 20 | self, 21 | env: gym.Env, 22 | name: str = "Acceptance ratio", 23 | steps_per_tr_phase: int = 1, 24 | verbose=0 25 | ): 26 | super(AcceptanceRatioByStepsCallback, self).__init__(verbose) 27 | self.env = env 28 | self.name = name 29 | self.steps_per_tr_phase = steps_per_tr_phase 30 | self.tot_to_subtract = None 31 | self.accepted_to_subtract = None 32 | # Those variables will be accessible in the callback 33 | # (they are defined in the base class) 34 | # The RL model 35 | # self.model = None # type: BaseAlgorithm 36 | # An alias for self.model.get_env(), the environment used for training 37 | # self.training_env = None # type: Union[gym.Env, VecEnv, None] 38 | # Number of time the callback was called 39 | # self.n_calls = 0 # type: int 40 | # self.num_timesteps = 0 # type: int 41 | # local and global variables 42 | # self.locals = None # type: Dict[str, Any] 43 | # self.globals = None # type: Dict[str, Any] 44 | # The logger object, used to report things in the terminal 45 | # self.logger = None # stable_baselines3.common.logger 46 | # # Sometimes, for event callback, it is useful 47 | # # to have access to the parent object 48 | # self.parent = None # type: Optional[BaseCallback] 49 | 50 | def _on_step(self) -> bool: 51 | """ 52 | This method will be called by the model after each call to `env.step()`. 53 | 54 | For child callback (of an `EventCallback`), this will be called 55 | when the event is triggered. 56 | 57 | :return: (bool) If the callback returns False, training is aborted early. 58 | """ 59 | if self.n_calls % self.steps_per_tr_phase == 0: 60 | accepted_nsprs_per_env = np.array(self.env.get_attr("accepted_nsprs"), dtype=np.float32) 61 | tot_nsprs_per_env = np.array(self.env.get_attr("tot_seen_nsprs"), dtype=np.float32) 62 | if self.tot_to_subtract is None: # or self.accepted_to_subtract is None, either way 63 | self.tot_to_subtract = np.zeros_like(tot_nsprs_per_env) 64 | self.accepted_to_subtract = np.zeros_like(accepted_nsprs_per_env) 65 | accepted_nsprs_per_env -= self.accepted_to_subtract 66 | tot_nsprs_per_env -= self.tot_to_subtract 67 | accept_ratio_per_env = np.divide(accepted_nsprs_per_env, 68 | tot_nsprs_per_env, 69 | out=np.zeros_like(tot_nsprs_per_env), 70 | where=tot_nsprs_per_env != 0) 71 | overall_accept_ratio = np.mean(accept_ratio_per_env) 72 | self.logger.record(self.name, overall_accept_ratio) 73 | self.tot_to_subtract = tot_nsprs_per_env 74 | self.accepted_to_subtract = accepted_nsprs_per_env 75 | return True 76 | 77 | 78 | class AcceptanceRatioByNSPRsCallback(BaseCallback): 79 | """ 80 | A custom callback that derives from ``BaseCallback``. 81 | It logs the acceptance ratio on Tensorboard. 82 | 83 | :param env: environment 84 | :param name: name of the metric to log 85 | :param nsprs_per_tr_phase: number of NSPRs that define a training phase. 86 | The acceptance ratio is logged once per training phase. 87 | :param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages 88 | """ 89 | def __init__( 90 | self, 91 | env: gym.Env, 92 | name: str = "Acceptance ratio", 93 | nsprs_per_tr_phase: int = 1000, 94 | verbose=0 95 | ): 96 | super().__init__(verbose) 97 | self.env = env 98 | self.name = name 99 | self.nsprs_per_tr_phase = nsprs_per_tr_phase 100 | # num of seen NSPRs to subtract form the total number of seen NSPRs (per env) 101 | self.seen_to_subtract = [0] * env.num_envs 102 | # num of accepted NSPRs to subtract form the total number of accepted NSPRs (per env) 103 | self.accepted_to_subtract = [0] * env.num_envs 104 | # num of seen NSPRs last considered for logging (per env), 105 | # used to ensure it loggs once per training phase 106 | self.last_seen = [0] * env.num_envs 107 | # num of accepted NSPRs during this training phase (per env) 108 | self.accepted_this_training_phase = [0] * env.num_envs 109 | # num of NSPRs seen during this training phase (per env) 110 | self.seen_this_training_phase = [0] * env.num_envs 111 | # acceptance ratio of each env 112 | self.acceptance_ratios = [Queue() for _ in range(env.num_envs)] 113 | # once an env is ready for logging, its cell is increased by 1, 114 | # and it is decreased by 1 when the acceptance ratio is logged 115 | self.ready_envs = np.zeros(shape=env.num_envs, dtype=int) 116 | 117 | def _on_step(self) -> bool: 118 | if isinstance(self.env, VecEnv): 119 | seen_nsprs = self.env.get_attr('tot_seen_nsprs') 120 | accepted_nsprs = self.env.get_attr('accepted_nsprs') 121 | else: 122 | seen_nsprs = [self.env.tot_seen_nsprs] 123 | accepted_nsprs = [self.env.accepted_nsprs] 124 | 125 | for env_idx in range(self.env.num_envs): 126 | if seen_nsprs[env_idx] > self.last_seen[env_idx] and seen_nsprs[env_idx] % self.nsprs_per_tr_phase == 0: 127 | self.ready_envs[env_idx] += 1 128 | self.last_seen[env_idx] = seen_nsprs[env_idx] 129 | # NSPRs seen and accepted in this training phase 130 | seen_this_tr_phase = seen_nsprs[env_idx] - self.seen_to_subtract[env_idx] 131 | accepted_this_tr_phase = accepted_nsprs[env_idx] - self.accepted_to_subtract[env_idx] 132 | # update how much to subtract to get the quantities for next tr phase 133 | self.seen_to_subtract[env_idx] = seen_nsprs[env_idx] 134 | self.accepted_to_subtract[env_idx] = accepted_nsprs[env_idx] 135 | # compute acceptance ratio 136 | try: 137 | self.acceptance_ratios[env_idx].put(accepted_this_tr_phase / seen_this_tr_phase) 138 | except ZeroDivisionError: 139 | self.acceptance_ratios[env_idx].put(0.) 140 | 141 | if all(self.ready_envs): 142 | ratios = [self.acceptance_ratios[env_idx].get() for env_idx in range(self.env.num_envs)] 143 | self.logger.record(self.name, np.mean(ratios)) 144 | self.ready_envs -= 1 145 | 146 | return True 147 | -------------------------------------------------------------------------------- /src/callbacks/hparam_callback.py: -------------------------------------------------------------------------------- 1 | from stable_baselines3.common.callbacks import BaseCallback 2 | from stable_baselines3.common.logger import HParam 3 | 4 | 5 | class HParamCallback(BaseCallback): 6 | def __init__( 7 | self, 8 | n_tr_envs: int = None, 9 | n_eval_envs: int = None, 10 | tr_nsprs_per_ep: int = None, 11 | tr_psn_load: float = None, 12 | tr_max_ep_steps: int = None, 13 | eval_nsprs_per_ep: int = None, 14 | eval_psn_load: float = None, 15 | eval_max_ep_steps: int = None, 16 | vnfs_per_nsprs: int = None, 17 | use_placement_state: bool = None, 18 | use_heuristic: bool = False, 19 | heu_kwargs: dict = None, 20 | ): 21 | """ 22 | Saves the hyperparameters and metrics at the start of the training, 23 | and logs them to TensorBoard. 24 | 25 | :param n_tr_envs: number of training environments 26 | """ 27 | super().__init__() 28 | self.n_tr_envs = n_tr_envs 29 | self.n_eval_envs = n_eval_envs 30 | self.tr_nsprs_per_ep = tr_nsprs_per_ep 31 | self.tr_psn_load = tr_psn_load 32 | self.tr_max_ep_steps = tr_max_ep_steps 33 | self.eval_nsprs_per_ep = eval_nsprs_per_ep 34 | self.eval_psn_load = eval_psn_load 35 | self.eval_max_ep_steps = eval_max_ep_steps 36 | self.vnfs_per_nspr = vnfs_per_nsprs 37 | self.use_placement_state = use_placement_state, 38 | self.use_heuristic = use_heuristic 39 | self.heu_kwargs = heu_kwargs if heu_kwargs is not None else {} 40 | if 'heu_class' in self.heu_kwargs: 41 | self.heu_class = self.heu_kwargs['heu_class'](None, None, None).__class__.__name__ 42 | else: 43 | self.heu_class = None 44 | 45 | def _on_training_start(self) -> None: 46 | try: 47 | gcn_layers_dims = str(self.model.policy.features_extractor.gcn_layers_dims) 48 | except AttributeError: 49 | gcn_layers_dims = str(self.model.policy.gcn_layers_dims) 50 | 51 | hparam_dict = { 52 | "algorithm": self.model.__class__.__name__, 53 | "n training envs": self.n_tr_envs, 54 | "n eval envs": self.n_eval_envs, 55 | "n steps before update": self.model.n_steps, 56 | "learning rate": self.model.learning_rate, 57 | "gamma": self.model.gamma, 58 | "entropy coefficient": self.model.ent_coef, 59 | "NSPRs per training episode": self.tr_nsprs_per_ep, 60 | "max steps per training episode": self.tr_max_ep_steps, 61 | "PSN load (training)": self.tr_psn_load, 62 | "NSPRs per eval episode": self.eval_nsprs_per_ep, 63 | "PSN load (eval)": self.eval_psn_load, 64 | "max steps per eval episode": self.eval_max_ep_steps, 65 | "VNFs/NSPR": self.vnfs_per_nspr, 66 | "GCN layers dimensions": gcn_layers_dims, 67 | "Use placement state": str(self.use_placement_state), 68 | "Use heuristic": self.use_heuristic, 69 | "Heuristic class": self.heu_class, 70 | "heu's num sampled servers": self.heu_kwargs.get("n_servers_to_sample", None), 71 | "heu's eta": self.heu_kwargs.get("eta", None), 72 | "heu's xi": self.heu_kwargs.get("xi", None), 73 | "heu's beta": self.heu_kwargs.get("beta", None), 74 | } 75 | # define the metrics that will appear in the `HPARAMS` Tensorboard tab by referencing their tag 76 | # Tensorboard will find & display metrics from the `SCALARS` tab 77 | metric_dict = { 78 | "Acceptance ratio": 0, 79 | "Eval acceptance ratio": 0, 80 | "eval/mean_reward": 0, 81 | "rollout/ep_rew_mean": 0, 82 | "train/entropy_loss": 0, 83 | "train/policy_loss": 0, 84 | "train/value_loss": 0, 85 | } 86 | self.logger.record( 87 | "hparams", 88 | HParam(hparam_dict, metric_dict), 89 | exclude=("stdout", "log", "json", "csv"), 90 | ) 91 | 92 | def _on_step(self) -> bool: 93 | return True 94 | -------------------------------------------------------------------------------- /src/callbacks/psn_load_callback.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | import gym 4 | import numpy as np 5 | from stable_baselines3.common.callbacks import BaseCallback 6 | 7 | 8 | class PSNLoadCallback(BaseCallback): 9 | """ 10 | Class for logging the load of the PSN. 11 | 12 | :param env: environment 13 | :param freq: logging frequency (in number of steps) 14 | :param cpu: if True, track CPU load 15 | :param ram: if True, track RAM load 16 | :param bw: if True, track BW load 17 | :param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages 18 | """ 19 | def __init__( 20 | self, 21 | env: gym.Env, 22 | freq: int, 23 | cpu: bool = True, 24 | ram: bool = True, 25 | bw: bool = True, 26 | verbose: int = 0 27 | ): 28 | super(PSNLoadCallback, self).__init__(verbose) 29 | self.env = env 30 | self.freq = freq 31 | self.cpu, self.ram, self.bw = cpu, ram, bw 32 | 33 | def _on_step(self) -> bool: 34 | if self.n_calls % self.freq == 0: 35 | cpu_loads, ram_loads, bw_loads = [], [], [] 36 | observations = self.env.get_attr('obs_dict') 37 | for e, obs in enumerate(observations): 38 | # get the available CPU and RAM for each server 39 | serv_cpu_avails, serv_ram_avails = [], [] 40 | for idx in self.env.get_attr('servers_map_idx_id')[e].keys(): 41 | serv_cpu_avails.append(obs['cpu_avails'][idx]) 42 | serv_ram_avails.append(obs['ram_avails'][idx]) 43 | avail_cpu_perc = np.sum(serv_cpu_avails) * self.env.get_attr('max_cpu')[e] / self.env.get_attr('tot_cpu_cap')[e] 44 | avail_ram_perc = np.sum(serv_ram_avails) * self.env.get_attr('max_ram')[e] / self.env.get_attr('tot_ram_cap')[e] 45 | cpu_loads.append(1. - avail_cpu_perc) 46 | ram_loads.append(1. - avail_ram_perc) 47 | # get the available BW for each link 48 | link_bw_avails_perc = [] 49 | for link in self.env.get_attr('psn')[e].edges.values(): 50 | link_bw_avails_perc.append(link['availBW'] / link['BWcap']) 51 | bw_loads.append(1. - np.mean(link_bw_avails_perc)) 52 | try: 53 | if self.cpu: 54 | avg_cpu_load = np.mean(cpu_loads) 55 | self.logger.record("Average CPU load of training envs", avg_cpu_load) 56 | if self.ram: 57 | avg_ram_load = np.mean(ram_loads) 58 | self.logger.record("Average RAM load of training envs", avg_ram_load) 59 | if self.bw: 60 | avg_bw_load = np.mean(bw_loads) 61 | self.logger.record("Average BW load of training envs", avg_bw_load) 62 | if self.verbose > 0: 63 | try: 64 | print(f"Average CPU load of training envs: {avg_cpu_load}") 65 | print(f"Average RAM load of training envs: {avg_ram_load}") 66 | print(f"Average BW load of training envs: {avg_bw_load}") 67 | except NameError: 68 | # in case some variables are not defined. It means we're not tracking that load 69 | pass 70 | except AttributeError: 71 | warnings.warn("No logger for resources load callback, data not being logged") 72 | 73 | return True 74 | -------------------------------------------------------------------------------- /src/callbacks/seen_nsprs_callback.py: -------------------------------------------------------------------------------- 1 | from stable_baselines3.common.callbacks import BaseCallback 2 | import gym 3 | import numpy as np 4 | 5 | 6 | class SeenNSPRsCallback(BaseCallback): 7 | """ 8 | Class for logging the number of seen NSPRs so far. 9 | 10 | It logs the average number of seen NSPRs for each environment. 11 | The average is chosen, instead of the sum, because the loss is based on the 12 | average of the "values" in the various steps: 13 | - policy_loss = -(advantages * log_prob).mean() 14 | - value_loss = F.mse_loss(rollout_data.returns, values) 15 | - entropy_loss = -th.mean(entropy) 16 | If there are multiple parallel envs, the "values" of each env are flattened, 17 | and again the average is computed for the loss. 18 | Therefore, we don't have more updates if we have more envs, just more precise. 19 | If 2 envs have seen 10 NSPRs, it's not like an env has seen 20 (in terms of updates and steps). 20 | 21 | :param env: environment 22 | :param freq: logging frequency (in number of steps) 23 | :param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages 24 | """ 25 | def __init__( 26 | self, 27 | env: gym.Env, 28 | freq: int = 1, 29 | verbose: int = 0 30 | ): 31 | super().__init__(verbose) 32 | self.env = env 33 | self.freq = freq 34 | 35 | def _on_step(self) -> bool: 36 | if self.n_calls % self.freq == 0: 37 | # log the number of seen NSPRs 38 | seen_nsprs_per_env = self.env.get_attr('tot_seen_nsprs') 39 | # why the mean and not the sum, you ask? Read the docstring of the class 40 | avg_seen_nsprs = int(round(np.mean(seen_nsprs_per_env))) 41 | self.logger.record("Avg seen NSPRs per env", avg_seen_nsprs) 42 | if self.verbose > 0: 43 | print(f"Average seen NSPRs per env: {avg_seen_nsprs}") 44 | return True -------------------------------------------------------------------------------- /src/demo.py: -------------------------------------------------------------------------------- 1 | from callbacks.acceptance_ratio_callbacks import AcceptanceRatioByNSPRsCallback 2 | from callbacks.hparam_callback import HParamCallback 3 | from callbacks.psn_load_callback import PSNLoadCallback 4 | from callbacks.seen_nsprs_callback import SeenNSPRsCallback 5 | from trainer import Trainer 6 | from wrappers.reset_with_load import ResetWithRealisticLoad 7 | from stable_baselines3.common.callbacks import EvalCallback 8 | from wandb.integration.sb3 import WandbCallback 9 | 10 | 11 | if __name__ == '__main__': 12 | # create trainer object. 13 | # It creates the model and the training and evaluation environments. 14 | trainer = Trainer( 15 | psn_path="../PSNs/hadrl_1-16_5-10_15-4.graphml", 16 | n_tr_envs=20, 17 | load_perc=0.8, 18 | time_limit=False, 19 | max_ep_steps=1000, 20 | reset_load_class=ResetWithRealisticLoad, 21 | generate_nsprs=True, 22 | nsprs_per_ep=1, 23 | vnfs_per_nspr=5, 24 | always_one=True, 25 | seed=12, 26 | tensorboard_log="../tensorboard", 27 | create_eval_env=True 28 | ) 29 | tr_env = trainer.tr_env 30 | eval_env = trainer.eval_env 31 | 32 | # training callbacks 33 | list_of_callbacks = [ 34 | AcceptanceRatioByNSPRsCallback( 35 | env=tr_env, 36 | name="Train acceptance ratio (by NSPRs)", 37 | nsprs_per_tr_phase=1000, 38 | verbose=2 39 | ), 40 | 41 | EvalCallback( 42 | eval_env=eval_env, 43 | n_eval_episodes=1000, 44 | warn=True, 45 | eval_freq=5_000, 46 | deterministic=True, 47 | verbose=2, 48 | callback_after_eval=AcceptanceRatioByNSPRsCallback( 49 | env=eval_env, 50 | name="Eval acceptance ratio (by NSPRs)", 51 | nsprs_per_tr_phase=1, # must be 1 for eval (default value) 52 | verbose=2 53 | ) 54 | ), 55 | 56 | PSNLoadCallback(env=tr_env, freq=500, verbose=1), 57 | 58 | # SeenNSPRsCallback(env=tr_env, freq=100, verbose=1), 59 | ] 60 | 61 | trainer.train( 62 | tot_steps=10_000_000, 63 | callbacks=list_of_callbacks, 64 | ) 65 | -------------------------------------------------------------------------------- /src/eval_script.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from tqdm import tqdm 3 | from stable_baselines3 import A2C 4 | from stable_baselines3.common.env_util import make_vec_env 5 | 6 | from callbacks import PSNLoadCallback 7 | from utils import make_env 8 | from wrappers.reset_with_load import ResetWithRealisticLoad 9 | 10 | if __name__ == '__main__': 11 | # load model 12 | model = A2C.load( 13 | path="/root/NSPR-simulator/wandb/run-20230103_155854-3o0vtz6x/files/model.zip", 14 | env=None, 15 | device='cpu', 16 | print_system_info=True, 17 | force_reset=True, # True as default -> avoids unexpected behavior 18 | ) 19 | 20 | # re-create env 21 | env = make_vec_env( 22 | env_id=make_env, 23 | n_envs=1, 24 | env_kwargs=dict( 25 | psn_path="../PSNs/waxman_20_servers.graphml", 26 | base_env_kwargs=dict(accumulate_reward=True), 27 | time_limit=True, 28 | time_limit_kwargs=dict(max_episode_steps=1000), 29 | hadrl_nsprs=True, 30 | hadrl_nsprs_kwargs=dict( 31 | nsprs_per_ep=1, 32 | vnfs_per_nspr=5, 33 | always_one=True 34 | ), 35 | # hadrl_nsprs_kwargs=dict( 36 | # nsprs_per_ep=None, 37 | # load=0.5 38 | # ) 39 | reset_load_class=ResetWithRealisticLoad, 40 | reset_load_kwargs = dict(cpu_load=0.5), 41 | placement_state=True, 42 | dynamic_connectivity=True, 43 | dynamic_connectivity_kwargs=dict(link_bw=10_000), 44 | ), 45 | seed=12, 46 | ) 47 | 48 | # cpu_load_callback = PSNLoadCallback(env, freq=300, verbose=2) 49 | # cpu_load_callback.init_callback(model) 50 | 51 | # evaluate model 52 | obs = env.reset() 53 | accepted = seen = 0 54 | # accept_ratio_per_ep = [] 55 | tot_nsprs = 10000 56 | pbar = tqdm(total=tot_nsprs) # progerss bar 57 | while seen < tot_nsprs: 58 | action, _ = model.predict(obs, deterministic=True) 59 | obs, rewards, done, info = env.step(action) 60 | # cpu_load_callback.on_step() 61 | # acceptance ratio 62 | if rewards[0] != 0.0: 63 | seen += 1 64 | pbar.update(1) 65 | if rewards[0] > 0.0: 66 | accepted += 1 67 | if done: 68 | # if seen != 0.: 69 | # cur_ep_accept_ratio = accepted / seen 70 | # accept_ratio_per_ep.append(cur_ep_accept_ratio) 71 | # print(f"Current episode's acceptance ratio: {cur_ep_accept_ratio}") 72 | # accepted = seen = 0 73 | obs = env.reset() 74 | 75 | # print(f"Acceptance ratio: {np.mean(accept_ratio_per_ep)}") 76 | print(f"Acceptance ratio: {accepted / seen}") 77 | -------------------------------------------------------------------------------- /src/heuristic_layers.py: -------------------------------------------------------------------------------- 1 | import random 2 | from typing import Dict 3 | import math 4 | 5 | import gym 6 | import networkx as nx 7 | import numpy as np 8 | import torch as th 9 | from torch import nn 10 | 11 | 12 | class P2CLoadBalanceHeuristic(nn.Module): 13 | """ Layer executing the P2C heuristic """ 14 | name = "P2C load balance heuristic" 15 | 16 | def __init__( 17 | self, 18 | action_space: gym.spaces.Space, 19 | servers_map_idx_id: Dict[int, int], 20 | psn: nx.Graph, 21 | n_servers_to_sample: int = 2, 22 | eta: float = 0., 23 | xi: float = 1., 24 | beta: float = 1., # TODO: when not 1, could cause NaNs 25 | **kwargs 26 | ): 27 | """ Constructor 28 | 29 | :param action_space: Action space 30 | :param servers_map_idx_id: map (dict) between servers indexes (agent's actions) and their ids 31 | :param psn: the env's physical substrate network 32 | :param eta: hyperparameter of the P2C heuristic 33 | :param xi: hyperparameter of the P2C heuristic 34 | :param beta: hyperparameter of the P2C heuristic 35 | """ 36 | super().__init__() 37 | self.action_space = action_space 38 | self.servers_map_idx_id = servers_map_idx_id 39 | self.psn = psn 40 | self.n_servers_to_sample = n_servers_to_sample 41 | self.eta, self.xi, self.beta = eta, xi, beta 42 | 43 | def forward(self, x: th.Tensor, obs: th.Tensor) -> th.Tensor: 44 | n_envs = x.shape[0] 45 | max_values, max_idxs = th.max(x, dim=1) 46 | H = th.zeros_like(x) 47 | heu_selected_servers = self.HEU(obs, self.n_servers_to_sample) 48 | if th.all(heu_selected_servers == -1): 49 | return H # it means no selected action by the heuristic 50 | for e in range(n_envs): 51 | heu_action = heu_selected_servers[e, :].item() 52 | H[e, heu_action] = max_values[e] - x[e, heu_action] + self.eta 53 | out = x + self.xi * th.pow(H, self.beta) 54 | return out 55 | 56 | def HEU(self, obs: th.Tensor, n_servers_to_sample: int) -> th.Tensor: 57 | """ P2C heuristic to select the servers where to place the current VNFs. 58 | Selects one server for each environment (in case of vectorized envs). 59 | :param obs: Observation 60 | :param n_servers_to_sample: number of servers to sample 61 | :return: indexes of the selected servers 62 | """ 63 | n_envs = obs['bw_avails'].shape[0] 64 | indexes = th.empty(n_envs, n_servers_to_sample, dtype=th.int) 65 | req_cpu = obs['cur_vnf_cpu_req'] 66 | req_ram = obs['cur_vnf_ram_req'] 67 | load_balances = th.empty(n_envs, n_servers_to_sample) 68 | for e in range(n_envs): 69 | for s in range(n_servers_to_sample): 70 | # actions (indexes of the servers in the servers list) 71 | indexes[e, s] = self.action_space.sample() 72 | # servers ids 73 | node_id = self.servers_map_idx_id[indexes[e, s].item()] 74 | # actual servers (nodes in the graph) 75 | node = self.psn.nodes[node_id] 76 | # compute the load balance of each server when placing the VNF 77 | cpu_load_balance = (node['availCPU'] - req_cpu[e]) / node['CPUcap'] 78 | ram_load_balance = (node['availRAM'] - req_ram[e]) / node['RAMcap'] 79 | load_balances[e, s] = cpu_load_balance + ram_load_balance 80 | 81 | # return the best server for each environment (the indexes) 82 | winners = th.argmax(load_balances, dim=1, keepdim=True) 83 | return th.gather(indexes, 0, winners) 84 | 85 | 86 | class HADRLHeuristic(nn.Module): 87 | def __init__( 88 | self, 89 | action_space: gym.spaces.Space, 90 | servers_map_idx_id: Dict[int, int], 91 | psn: nx.Graph, 92 | bw_req_per_vl: int = 2000, 93 | n_servers_to_sample: int = 2, 94 | eta: float = 0., 95 | xi: float = 1., 96 | beta: float = 1., # TODO: when not 1, could cause NaNs 97 | **kwargs 98 | ): 99 | """ Constructor 100 | 101 | :param action_space: Action space 102 | :param servers_map_idx_id: map (dict) between servers indexes (agent's actions) and their ids 103 | :param psn: the env's physical substrate network 104 | :param eta: hyperparameter of the P2C heuristic 105 | :param xi: hyperparameter of the P2C heuristic 106 | :param beta: hyperparameter of the P2C heuristic 107 | """ 108 | super().__init__() 109 | self.action_space = action_space 110 | self.servers_map_idx_id = servers_map_idx_id 111 | self.psn = psn 112 | self.bw_req_per_vl = bw_req_per_vl 113 | self.n_servers_to_sample = n_servers_to_sample 114 | self.eta, self.xi, self.beta = eta, xi, beta 115 | self.prev_selected_servers = None 116 | self.n_envs = None 117 | 118 | def forward(self, x: th.Tensor, obs: th.Tensor) -> th.Tensor: 119 | self.n_envs = x.shape[0] 120 | if self.prev_selected_servers is None or self.n_envs != self.prev_selected_servers.shape[0]: 121 | self.prev_selected_servers = -th.ones(self.n_envs, dtype=th.int) 122 | max_values, max_idxs = th.max(x, dim=1) 123 | H = th.zeros_like(x) 124 | heu_selected_servers = self.HEU(obs, self.n_servers_to_sample) 125 | if th.all(heu_selected_servers == -1): 126 | # it means no selected action by the heuristic 127 | return H 128 | for e in range(self.n_envs): 129 | heu_action = heu_selected_servers[e, :].item() 130 | H[e, heu_action] = max_values[e] - x[e, heu_action] + self.eta 131 | out = x + self.xi * th.pow(H, self.beta) 132 | return out 133 | 134 | def HEU(self, obs: th.Tensor, n_servers_to_sample: int) -> th.Tensor: 135 | """ P2C heuristic to select the servers where to place the current VNFs. 136 | Selects one server for each environment (in case of vectorized envs). 137 | :param obs: Observation 138 | :param n_servers_to_sample: number of servers to sample 139 | :return: indexes of the selected servers 140 | """ 141 | indexes = th.empty(self.n_envs, n_servers_to_sample, dtype=th.int) 142 | path_lengths = th.zeros(self.n_envs, n_servers_to_sample) 143 | all_actions = list(range(self.action_space.n)) 144 | for e in range(self.n_envs): 145 | # random permutation of the actions 146 | all_actions = np.random.permutation(all_actions) 147 | for s in range(n_servers_to_sample): 148 | # instead of selecting first all the feasible servers and then 149 | # sampling on them, we first create a list of all the actions 150 | # (i.e. servers) in random order, then we start going through 151 | # the list and pick the first action which is feasible. 152 | # This way we don't run through all the servers avery time 153 | for i in range(s, len(all_actions)): 154 | a = all_actions[i] 155 | if self.action_is_feasible(a, obs, e): 156 | indexes[e, s] = a 157 | break 158 | # if no action is feasible, return no choice form the heuristic 159 | # (i.e. tensor of -1's) 160 | if i == len(all_actions) - 1: 161 | return -th.ones(self.n_envs, 1) 162 | 163 | # server ID 164 | server_id = self.servers_map_idx_id[indexes[e, s].item()] 165 | 166 | if self.prev_selected_servers[e] == -1: 167 | path_lengths[e, s] = -math.inf 168 | else: 169 | # if the server was the one selected for the prev VNF, choose it 170 | if self.prev_selected_servers[e] == server_id: 171 | path_lengths[e, s] = -math.inf 172 | # self.prev_selected_servers[e] = server_id 173 | else: 174 | # evaluate bandwidth consumption when placing the current VNF on this server 175 | path = nx.shortest_path(G=self.psn, 176 | source=self.prev_selected_servers[e].item(), 177 | target=server_id, 178 | weight=self.compute_link_weight, 179 | method='dijkstra') 180 | path_lengths[e, s] = len(path) 181 | 182 | # return the best server for each environment (the indexes) 183 | winners = th.argmin(path_lengths, dim=1, keepdim=True) 184 | selected_servers = th.gather(indexes, 1, winners) 185 | self.prev_selected_servers = selected_servers.squeeze(dim=1) 186 | return selected_servers 187 | 188 | @staticmethod 189 | def action_is_feasible(a: int, obs: th.Tensor, env_idx: int): 190 | """ Check if it's feasible to place the current VNF on a specific server 191 | 192 | 1. if a server has enough CPU and RAM to host this VNF and the next one 193 | (all VNFs are assumed to have identical requirements, if this is not the 194 | case, then you can see this as "if a server has enough CPU and RAM to 195 | host double the requirements of this VNF", like a greedy safety margin), 196 | then it is eligible. 197 | 198 | 2. if a server has enough CPU and RAM to host only this VNF, then if it 199 | has enough bandwidth in its outgoing links to host the connection with 200 | the neighboring VNFs, then it is eligible. 201 | 202 | 3. if a server does not have enough CPU or RAM to host the current VNF, 203 | then it is NOT eligible. 204 | 205 | :param a: action, i.e. a server index 206 | :param obs: instance of an observation from the environment 207 | :param env_idx: index of the environment (in case of vectorized envs) 208 | :return: true if the action is feasible, false otherwise 209 | """ 210 | req_cpu = obs['cur_vnf_cpu_req'][env_idx].item() 211 | req_ram = obs['cur_vnf_ram_req'][env_idx].item() 212 | req_bw = obs['cur_vnf_bw_req'][env_idx].item() 213 | avail_cpu = obs['cpu_avails'][env_idx][a].item() 214 | avail_ram = obs['ram_avails'][env_idx][a].item() 215 | avail_bw = obs['bw_avails'][env_idx][a] 216 | 217 | if (avail_cpu >= 2 * req_cpu and avail_ram >= 2 * req_ram) or \ 218 | (avail_cpu >= req_cpu and avail_ram >= req_ram and avail_bw >= req_bw): 219 | return True 220 | 221 | return False 222 | 223 | def compute_link_weight(self, source: int, target: int, link: dict): 224 | return 1 if link['availBW'] >= self.bw_req_per_vl else math.inf 225 | -------------------------------------------------------------------------------- /src/network_simulator.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import math 3 | from typing import Optional, Union, Tuple 4 | 5 | import gym 6 | import networkx as nx 7 | import numpy as np 8 | 9 | from gym.spaces import Dict, Box, Discrete 10 | 11 | import reader 12 | 13 | GymObs = Union[Tuple, dict, np.ndarray, int] 14 | 15 | 16 | class NetworkSimulator(gym.Env): 17 | """ Class implementing the network simulator (RL environment) """ 18 | 19 | def __init__( 20 | self, 21 | psn_file: str, 22 | nsprs_path: str = "../NSPRs/", 23 | nsprs_per_episode: int = None, 24 | nsprs_max_duration: int = 100, 25 | accumulate_reward: bool = True, 26 | discount_acc_rew: bool = True, 27 | perc_avail_nodes: Optional[float] = 1. 28 | ): 29 | """ Constructor 30 | :param psn_file: GraphML file containing the definition of the PSN 31 | :param nsprs_path: either directory with the GraphML files defining the NSPRs or path to a single GraphML file 32 | :param nsprs_per_episode: max number of NSPRs to be evaluated in each episode. If None, there is no limit. 33 | :param nsprs_max_duration: (optional) max duration of the NSPRs. 34 | :param accumulate_reward: if true, the reward is accumulated and given to the agent only after each NSPRs 35 | :param discount_acc_rew: if true, an increasing discount factor is applied to the acceptance reward during each NSPR. 36 | It starts from the inverse of the number of VNFs in the NSPR and grows to 1. 37 | :param perc_avail_nodes: in case some action masking is implemented (i.e., env wrapped in ActionMasker 38 | wrapper from sbe-contrib), it specifies the percentage of available nodes we.r.t. the total. 39 | """ 40 | super(NetworkSimulator, self).__init__() 41 | 42 | self.psn_file = psn_file 43 | self.psn = reader.read_psn(graphml_file=psn_file) # physical substrate network 44 | self.nsprs_path = nsprs_path 45 | self.nsprs_per_episode = nsprs_per_episode 46 | self.accumulate_reward = accumulate_reward 47 | self.nsprs_seen_in_cur_ep = 0 48 | self.nsprs_max_duration = nsprs_max_duration 49 | self.done = False 50 | self.nsprs = None # will be initialized in the reset method 51 | self.waiting_nsprs = [] # list of NSPRs that arrived already and are waiting to be evaluated 52 | self.cur_nspr = None # used to keep track of the current NSPR being evaluated 53 | self.cur_nspr_unplaced_vnfs_ids = [] # used to keep track of the VNFs' IDs of the current NSPR that haven't been placed yet 54 | self.cur_vnf_id = None # used to keep track of the current VNF being evaluated 55 | self._cur_vl_reqBW = 0 # auxiliary attribute needed in method 'self.compute_link_weight' 56 | self.time_step = 0 # keep track of current time step 57 | self.ep_number = 0 # keep track of current episode number 58 | self.tot_seen_nsprs = 0 # keep track of the number of NSPRs seen so far 59 | self.accepted_nsprs = 0 # for the overall acceptance ratio 60 | self.discount_acc_rew = discount_acc_rew # whether or not to discount the acceptance reward 61 | self.acc_rew_disc_fact = 1. # current discount factor for the acceptance reward 62 | self.base_acc_rew_disc_fact = 1. # base discount factor for the acceptance reward 63 | 64 | # map (dict) between IDs of PSN's nodes and their respective index (see self._init_map_id_idx's docstring) 65 | nodes_ids = list(self.psn.nodes.keys()) 66 | self.map_id_idx = {nodes_ids[idx]: idx for idx in range(len(nodes_ids))} 67 | 68 | # map (dict) between an index of a list (incrementing int) and the ID of a server 69 | servers_ids = [node_id for node_id, node in self.psn.nodes.items() 70 | if node['NodeType'] == 'server'] 71 | self.servers_map_idx_id = {idx: servers_ids[idx] for idx in range(len(servers_ids))} 72 | 73 | # partial rewards to be accumulated across the steps of evaluation of a single NSPR 74 | self._acceptance_rewards = [] 75 | self._resource_consumption_rewards = [] 76 | self._cur_resource_consumption_rewards = [] 77 | self._load_balance_rewards = [] 78 | 79 | # reward values for specific outcomes 80 | self.rval_accepted_vnf = 100 81 | self.rval_rejected_vnf = -100 82 | 83 | # Action space and observation space (gym.Env required attributes) 84 | ONE_BILLION = 1_000_000_000 # constant for readability 85 | n_nodes = len(self.psn.nodes) 86 | # action space = number of servers 87 | self.action_space = Discrete(len(servers_ids)) 88 | self.observation_space = Dict({ 89 | # PSN STATE 90 | 'cpu_avails': Box(low=0., high=1., shape=(n_nodes,), dtype=np.float32), 91 | 'ram_avails': Box(low=0., high=1., shape=(n_nodes,), dtype=np.float32), 92 | # for each physical node, sum of the BW of the physical links connected to it 93 | 'bw_avails': Box(low=0., high=1., shape=(n_nodes,), dtype=np.float32), 94 | # for each physical node, number of VNFs of the current NSPR placed on it 95 | 'placement_state': Box(low=0, high=ONE_BILLION, shape=(n_nodes,), dtype=int), 96 | 97 | # NSPR STATE 98 | # note: apparently it's not possible to pass "math.inf" or "sys.maxsize" as a gym.spaces.Box's high value 99 | 'cur_vnf_cpu_req': Box(low=0, high=ONE_BILLION, shape=(1,), dtype=np.float32), 100 | 'cur_vnf_ram_req': Box(low=0, high=ONE_BILLION, shape=(1,), dtype=np.float32), 101 | # sum of the required BW of each VL connected to the current VNF 102 | 'cur_vnf_bw_req': Box(low=0, high=ONE_BILLION, shape=(1,), dtype=np.float32), 103 | 'vnfs_still_to_place': Box(low=0, high=ONE_BILLION, shape=(1,), dtype=int), 104 | }) 105 | self._empty_psn_obs_dict = None # used to store the observation resulting from an empty PSN 106 | self.obs_dict = self._init_obs_dict() # used to store the current observation 107 | 108 | # action mask determining available actions. Init with all actions are available (it will be update in 'reset') 109 | self._action_mask = np.ones(shape=(len(servers_ids),), dtype=bool) 110 | assert 0. <= perc_avail_nodes <= 1. 111 | self.perc_avail_nodes = perc_avail_nodes 112 | 113 | @property 114 | def cur_vnf(self): 115 | return self.cur_nspr.nodes[self.cur_vnf_id] if self.cur_nspr is not None else None 116 | 117 | def get_action_mask(self, env): 118 | # 'action_mask' needs to be callable to be passed ActionMasker wrapper 119 | # note: env needs to be an argument for compatibility, but in this case it's useless 120 | return self._action_mask 121 | 122 | def reset_partial_rewards(self): 123 | """ Resets the partial rewards (used in case a NSPR cannot be placed) """ 124 | self._acceptance_rewards = [] 125 | self._resource_consumption_rewards = [] 126 | self._load_balance_rewards = [] 127 | 128 | def enough_avail_resources(self, physical_node_id: int, vnf: dict) -> bool: 129 | """ Check that the physical node has enough resources to satisfy the VNF's requirements 130 | 131 | :param physical_node_id: ID of the physical node to check 132 | :param vnf: VNF to check 133 | :return: True if the physical node has enough resources to satisfy the VNF's requirements, False otherwise 134 | """ 135 | idx = self.map_id_idx[physical_node_id] 136 | enough_cpu = self.obs_dict['cpu_avails'][idx] >= vnf['reqCPU'] / self.max_cpu 137 | enough_ram = self.obs_dict['ram_avails'][idx] >= vnf['reqRAM'] / self.max_ram 138 | return enough_cpu and enough_ram 139 | 140 | def restore_avail_resources(self, nspr: nx.Graph): 141 | """ Method called in case a NSPR is not accepted, or it has reached 142 | its departure time. 143 | Restores the PSN resources occupied by that NSPR. 144 | 145 | :param nspr: the rejected NSPR 146 | """ 147 | if nspr is not None: 148 | nspr.graph['departed'] = True 149 | for vnf_id, vnf in nspr.nodes.items(): 150 | # restore nodes' resources availabilities 151 | if vnf['placed'] >= 0: 152 | idx = self.map_id_idx[vnf['placed']] 153 | self.obs_dict['cpu_avails'][idx] += vnf['reqCPU'] / self.max_cpu 154 | self.obs_dict['ram_avails'][idx] += vnf['reqRAM'] / self.max_ram 155 | self.obs_dict['placement_state'][idx] -= 1 156 | for _, vl in nspr.edges.items(): 157 | # restore links' resources availabilities 158 | if vl['placed']: 159 | # vl['placed'] is the list of the physical nodes traversed by the link 160 | rewBW_normalized = vl['reqBW'] / self.max_bw 161 | for i in range(len(vl['placed']) - 1): 162 | id_1 = vl['placed'][i] 163 | id_2 = vl['placed'][i + 1] 164 | physical_link = self.psn.edges[id_1, id_2] 165 | # recall that BW in physical links is actually updated 166 | physical_link['availBW'] += vl['reqBW'] 167 | idx_1 = self.map_id_idx[id_1] 168 | idx_2 = self.map_id_idx[id_2] 169 | self.obs_dict['bw_avails'][idx_1] += rewBW_normalized 170 | self.obs_dict['bw_avails'][idx_2] += rewBW_normalized 171 | 172 | def pick_next_nspr(self): 173 | """ Pick the next NSPR to be evaluated and updates the attribute 'self.cur_nspr' """ 174 | if self.cur_nspr is None and self.waiting_nsprs: 175 | self.cur_nspr = self.waiting_nsprs.pop(0) 176 | self.cur_nspr.graph['DepartureTime'] = self.time_step + self.cur_nspr.graph['duration'] 177 | self.cur_nspr_unplaced_vnfs_ids = list(self.cur_nspr.nodes.keys()) 178 | self.cur_vnf_id = self.cur_nspr_unplaced_vnfs_ids.pop(0) 179 | # reset acceptance reward discount factor 180 | self.base_acc_rew_disc_fact = 1 / len(self.cur_nspr.nodes) 181 | self.acc_rew_disc_fact = 0. 182 | # self.tot_seen_nsprs += 1 183 | _ = self.update_nspr_state() # obs_dict updated within method 184 | 185 | def check_for_departed_nsprs(self): 186 | """ Checks it some NSPRs have reached their departure time and in case 187 | it frees the PSN resources occupied by them. """ 188 | all_arrival_times = list(self.nsprs.keys()) 189 | all_arrival_times.sort() 190 | for arrival_time in all_arrival_times: 191 | if arrival_time >= self.time_step: 192 | break 193 | cur_nsprs = self.nsprs[arrival_time] 194 | for nspr in cur_nsprs: 195 | departed = nspr.graph.get('departed', False) 196 | if nspr.graph.get('DepartureTime', self.time_step) < self.time_step and not departed: 197 | self.restore_avail_resources(nspr=nspr) 198 | 199 | # This should be useless now 200 | # if nspr == self.cur_nspr: 201 | # # haven't finished placing this NSPR, but its departure time has come. 202 | # # remove NSPR, no reward, neither positive nor negative 203 | # # (not agent's fault, too many requests at the same time) 204 | # self.cur_nspr = None 205 | # self.reset_partial_rewards() 206 | 207 | def manage_unsuccessful_action(self) -> Tuple[GymObs, int]: 208 | """ Method to manage an unsuccessful action, executed when a VNF/VL cannot be placed onto the PSN. 209 | - Restore the PSN resources occupied by VNFs and VLs of the current NSPR 210 | - Reset the partial rewards 211 | - Set the reward as the one for an unsuccessful action 212 | - Pick the next NSPR to be evaluated (if exists) 213 | - get an observation from the environment 214 | 215 | :return: the reward for the unsuccessful action 216 | """ 217 | self.restore_avail_resources(nspr=self.cur_nspr) 218 | self.reset_partial_rewards() 219 | self.cur_nspr = None 220 | self.nsprs_seen_in_cur_ep += 1 221 | 222 | self.tot_seen_nsprs += 1 223 | if self.nsprs_seen_in_cur_ep >= self.nsprs_per_episode: 224 | self.done = True 225 | self.waiting_nsprs += self.nsprs.get(self.time_step, []) 226 | self.pick_next_nspr() 227 | obs = self.update_nspr_state() 228 | reward = self.rval_rejected_vnf 229 | self.time_step += 1 230 | return obs, reward 231 | 232 | def _normalize_reward_0_10(self, reward): 233 | """ Normalize the reward to be in [0, 10] (as in HA-DRL) """ 234 | # since the global reward is given by the sum for each time step of the 235 | # current NSPR (i.e. for each VNF in the NSPR) of the product of the 3 236 | # partial rewards at time t, 237 | # the maximum possible reward for the given NSPR is given by: 238 | # the number of VNF in the NSPR times 239 | # the maximum acceptance reward value (i.e. every VNF is accepted) times 240 | # the maximum resource consumption reward value (i.e. 1) times 241 | # the maximum tr_load balancing reward value (i.e. 1+1=2) 242 | max_reward = len(self.cur_nspr.nodes) * self.rval_accepted_vnf * 1 * 2 243 | return reward / max_reward * 10 244 | 245 | @staticmethod 246 | def get_cur_vnf_vls(vnf_id: int, nspr: nx.Graph) -> dict: 247 | """ Get all the virtual links connected to a specific VNF 248 | 249 | :param vnf_id: ID of a VNF whose VLs have to be returned 250 | :param nspr: the NSPR to which the VNF belongs 251 | :return: dict of the VLs connected to the specified VNF 252 | """ 253 | vnf_links = {} 254 | for extremes, vl in nspr.edges.items(): 255 | if vnf_id in extremes: 256 | vnf_links[extremes] = vl 257 | return vnf_links 258 | 259 | def compute_link_weight(self, source: int, target: int, link: dict): 260 | """ Compute the weight of an edge between two nodes. 261 | If the edge satisfies the bandwidth requirement, the weight is 1, else infinity. 262 | 263 | This method is passed to networkx's shortest_path function as a weight function, and it's subject to networkx's API. 264 | It must take exactly 3 arguments: the two endpoints of an edge and the dictionary of edge attributes for that edge. 265 | We need the required bandwidth to compute an edge's weight, so we save it into an attribute of the simulator (self._cur_vl_reqBW). 266 | 267 | :param source: source node in the PSN 268 | :param target: target node in the PSN 269 | :param link: dict of the link's (source - target) attributes 270 | :return: the weight of that link 271 | """ 272 | return 1 if link['availBW'] >= self._cur_vl_reqBW else math.inf 273 | 274 | def _init_obs_dict(self) -> dict: 275 | """ 276 | Initialize the observation dict. 277 | 278 | To be called after reading a PSN and before placing any VNF/VL on it. 279 | """ 280 | # check that the env has a PSN 281 | try: 282 | if self.psn is None: 283 | raise ValueError("self.psn is None") 284 | except AttributeError: 285 | raise AttributeError("self.psn is not defined") 286 | 287 | # initialize lists 288 | cpu_avails = np.zeros(len(self.psn.nodes), dtype=np.float32) 289 | ram_avails = np.zeros(len(self.psn.nodes), dtype=np.float32) 290 | bw_avails = np.zeros(len(self.psn.nodes), dtype=np.float32) 291 | placement_state = np.zeros(len(self.psn.nodes), dtype=int) 292 | 293 | # scan all nodes and save data in lists 294 | self.tot_cpu_cap = self.tot_ram_cap = self.tot_bw_cap = 0 295 | for node_id, node in self.psn.nodes.items(): 296 | self.tot_cpu_cap += node.get('CPUcap', 0) 297 | self.tot_ram_cap += node.get('RAMcap', 0) 298 | cpu_avails[self.map_id_idx[node_id]] = node.get('availCPU', 0) 299 | ram_avails[self.map_id_idx[node_id]] = node.get('availRAM', 0) 300 | # scan all links and save data in list 301 | for extremes, link in self.psn.edges.items(): 302 | self.tot_bw_cap += link['BWcap'] 303 | bw_avails[self.map_id_idx[extremes[0]]] += link['availBW'] 304 | bw_avails[self.map_id_idx[extremes[1]]] += link['availBW'] 305 | 306 | # save max CPU/RAM/BW capacities (= availabilities in empty PSN) of all nodes 307 | self.max_cpu = np.max(cpu_avails) 308 | self.max_ram = np.max(ram_avails) 309 | self.max_bw = np.max(bw_avails) 310 | 311 | # normalize the quantities 312 | cpu_avails /= self.max_cpu 313 | ram_avails /= self.max_ram 314 | bw_avails /= self.max_bw 315 | 316 | obs = { 317 | # PSN state 318 | 'cpu_avails': cpu_avails, 319 | 'ram_avails': ram_avails, 320 | 'bw_avails': bw_avails, 321 | 'placement_state': placement_state, 322 | # NSPR state 323 | 'cur_vnf_cpu_req': np.array([0], dtype=int), 324 | 'cur_vnf_ram_req': np.array([0], dtype=int), 325 | 'cur_vnf_bw_req': np.array([0], dtype=int), 326 | 'vnfs_still_to_place': np.array([0], dtype=int) 327 | } 328 | 329 | # store the obs for an empty PSN 330 | del self._empty_psn_obs_dict 331 | self._empty_psn_obs_dict = copy.deepcopy(obs) 332 | 333 | return obs 334 | 335 | def update_nspr_state(self) -> GymObs: 336 | """ Get an observation from the environment. 337 | 338 | The PSN state is already dynamically kept updated, so this method 339 | will only collect data about the NSPR state and complete the observation 340 | dict, that will be returned. 341 | 342 | :return: an instance of an observation from the environment 343 | """ 344 | # state regarding the NSPR 345 | if self.cur_vnf is not None: 346 | cur_vnf_vls = self.get_cur_vnf_vls(vnf_id=self.cur_vnf_id, 347 | nspr=self.cur_nspr) 348 | cur_vnf_cpu_req = np.array( 349 | [self.cur_vnf['reqCPU'] / self.max_cpu], dtype=np.float32) 350 | 351 | cur_vnf_ram_req = np.array( 352 | [self.cur_vnf['reqRAM'] / self.max_ram], dtype=np.float32) 353 | 354 | cur_vnf_bw_req = np.array( 355 | [sum([vl['reqBW'] for vl in cur_vnf_vls.values()]) / self.max_bw], 356 | dtype=np.float32) 357 | 358 | vnfs_still_to_place = np.array( 359 | [len(self.cur_nspr_unplaced_vnfs_ids) + 1], dtype=int) 360 | else: 361 | cur_vnf_cpu_req = np.array([0], dtype=np.float32) 362 | cur_vnf_ram_req = np.array([0], dtype=np.float32) 363 | cur_vnf_bw_req = np.array([0], dtype=np.float32) 364 | vnfs_still_to_place = np.array([0], dtype=int) 365 | 366 | self.obs_dict['cur_vnf_cpu_req'] = cur_vnf_cpu_req 367 | self.obs_dict['cur_vnf_ram_req'] = cur_vnf_ram_req 368 | self.obs_dict['cur_vnf_bw_req'] = cur_vnf_bw_req 369 | self.obs_dict['vnfs_still_to_place'] = vnfs_still_to_place 370 | return self.obs_dict 371 | 372 | def reset(self, **kwargs) -> GymObs: 373 | """ Method used to reset the environment 374 | 375 | :return: the starting/initial observation of the environment 376 | """ 377 | self.done = False # re-set 'done' attribute 378 | 379 | # if last NSPR has not been placed completely, remove it, this is a new episode 380 | self.cur_nspr = None 381 | 382 | # reset network status (simply re-read the PSN file) 383 | # (needed because the available BW of the links gets actually modified) 384 | self.psn = reader.read_psn(graphml_file=self.psn_file) 385 | 386 | self.ep_number += 1 387 | self.nsprs_seen_in_cur_ep = 0 388 | 389 | # read the NSPRs to be evaluated 390 | # self.nsprs = reader.read_nsprs(nsprs_path=self.nsprs_path) 391 | self.nsprs = reader.sample_nsprs(nsprs_path=self.nsprs_path, 392 | n=self.nsprs_per_episode, 393 | min_arrival_time=self.time_step, 394 | max_duration=self.nsprs_max_duration) 395 | 396 | # reset partial rewards to be accumulated across the episodes' steps 397 | self.reset_partial_rewards() 398 | 399 | # return the obs corresponding to an empty PSN: 400 | # ALTERNATIVE 1: slower, but runs through the network and works with changing PSNs 401 | # self._obs_dict = self._init_obs_dict() 402 | 403 | # ALTERNATIVE 2: slightly faster on paper, but does not work with changing PSNs 404 | del self.obs_dict 405 | self.obs_dict = copy.deepcopy(self._empty_psn_obs_dict) 406 | 407 | # get arrived NSPRs 408 | self.waiting_nsprs += self.nsprs.get(self.time_step, []) 409 | self.pick_next_nspr() 410 | 411 | # update action mask (if no action masking is implemented, it has no effect) 412 | self._action_mask[:] = True 413 | # verison one: more randomic 414 | # indexes = np.random.rand(*self._action_mask.shape) < self.perc_avail_nodes 415 | # version two: less randomic 416 | size = round((1. - self.perc_avail_nodes) * self.action_space.n) 417 | indexes = np.random.choice(self.action_space.n, size=size, replace=False) 418 | self._action_mask[indexes] = False 419 | 420 | # new observation 421 | obs = self.update_nspr_state() 422 | 423 | return obs 424 | 425 | def step(self, action: int) -> Tuple[GymObs, float, bool, dict]: 426 | """ Perform an action in the environment 427 | 428 | :param action: the action to be performed 429 | more in detail, it's the index in the list of server corresponding 430 | ot a certain server ID, the mapping between this index and the 431 | server ID is done in the self.servers_map_idx_id dictionary 432 | :return: next observation, reward, done (True if the episode is over), info 433 | """ 434 | reward, info = 0, {} 435 | 436 | # this happens only when the agent is prevented from choosing nodes that don't have enough resources, 437 | # i.e., when the environment is wrapped with PreventInfeasibleActions 438 | # if action < 0: 439 | # obs, reward = self.manage_unsuccessful_action() 440 | # return obs, reward, done, info 441 | 442 | # place the VNF and update the resources availabilities of the physical node 443 | if self.cur_nspr is not None: 444 | physical_node_id = self.servers_map_idx_id[action] 445 | physical_node = self.psn.nodes[physical_node_id] 446 | 447 | if not self.enough_avail_resources(physical_node_id, self.cur_vnf): 448 | # the VNF cannot be placed on the physical node 449 | obs, reward = self.manage_unsuccessful_action() 450 | return obs, reward, self.done, info 451 | 452 | # update acceptance reward and tr_load balancing reward 453 | idx = self.map_id_idx[physical_node_id] 454 | self._acceptance_rewards.append(self.rval_accepted_vnf) 455 | self._load_balance_rewards.append( 456 | self.obs_dict['cpu_avails'][idx] * self.max_cpu / physical_node['CPUcap'] + 457 | self.obs_dict['ram_avails'][idx] * self.max_ram / physical_node['RAMcap'] 458 | ) 459 | 460 | # update the resources availabilities of the physical node in the obs dict 461 | self.cur_vnf['placed'] = physical_node_id 462 | self.obs_dict['cpu_avails'][idx] -= self.cur_vnf['reqCPU'] / self.max_cpu 463 | self.obs_dict['ram_avails'][idx] -= self.cur_vnf['reqRAM'] / self.max_ram 464 | self.obs_dict['placement_state'][idx] += 1 465 | 466 | # connect the placed VNF to the other VNFs it's supposed to be connected to 467 | cur_vnf_VLs = self.get_cur_vnf_vls(self.cur_vnf_id, self.cur_nspr) 468 | if not cur_vnf_VLs: 469 | # if the VNF is detached from all others, R.C. reward is 1, 470 | # so it's the neutral when aggregating the rewards into the global one 471 | self._resource_consumption_rewards.append(1) 472 | else: 473 | for (source_vnf, target_vnf), vl in cur_vnf_VLs.items(): 474 | # get the physical nodes where the source and target VNFs are placed 475 | source_node = self.cur_nspr.nodes[source_vnf]['placed'] 476 | target_node = self.cur_nspr.nodes[target_vnf]['placed'] 477 | 478 | # if the VL isn't placed yet and both the source and target VNFs are placed, place the VL 479 | if not vl['placed'] and source_node >= 0 and target_node >= 0: 480 | self._cur_vl_reqBW = vl['reqBW'] 481 | psn_path = nx.shortest_path(G=self.psn, 482 | source=source_node, 483 | target=target_node, 484 | weight=self.compute_link_weight, 485 | method='dijkstra') 486 | 487 | """ if NO path is available, 'nx.shortest_path' will 488 | return an invalid path. Only after the whole VL has been 489 | placed, it is possible to restore the resources 490 | availabilities, so we use this variable to save that the 491 | resources have been exceeded as soon as we find this to 492 | happen, and only after the VL placement, if this var is 493 | True, we restore the resources availabilities. """ 494 | exceeded_bw = False 495 | # place VL onto the PSN 496 | # and update the resources availabilities of physical links involved 497 | for i in range(len(psn_path) - 1): 498 | physical_link = self.psn.edges[psn_path[i], psn_path[i + 1]] 499 | extreme1_idx = self.map_id_idx[psn_path[i]] 500 | extreme2_idx = self.map_id_idx[psn_path[i + 1]] 501 | self.obs_dict['bw_avails'][extreme1_idx] -= vl['reqBW'] / self.max_bw 502 | self.obs_dict['bw_avails'][extreme2_idx] -= vl['reqBW'] / self.max_bw 503 | # note: here the PSN is actually modified: the available 504 | # BW of the link is decreased. Needed for shortest path computation 505 | physical_link['availBW'] -= vl['reqBW'] 506 | if physical_link['availBW'] < 0: 507 | exceeded_bw = True 508 | vl['placed'] = psn_path 509 | 510 | if exceeded_bw: 511 | obs, reward = self.manage_unsuccessful_action() 512 | return obs, reward, self.done, info 513 | 514 | # update the resource consumption reward 515 | path_length = len(psn_path) - 1 516 | self._cur_resource_consumption_rewards.append( 517 | 1 / path_length if path_length > 0 else 1) 518 | 519 | # aggregate the resource consumption rewards into a single value for this action 520 | n_VLs_placed_now = len(self._cur_resource_consumption_rewards) 521 | if n_VLs_placed_now == 0: 522 | self._resource_consumption_rewards.append(1.) 523 | else: 524 | self._resource_consumption_rewards.append( 525 | sum(self._cur_resource_consumption_rewards) / n_VLs_placed_now) 526 | self._cur_resource_consumption_rewards = [] 527 | 528 | # save the ID of the next VNF 529 | if self.cur_nspr_unplaced_vnfs_ids: 530 | self.cur_vnf_id = self.cur_nspr_unplaced_vnfs_ids.pop(0) 531 | if self.accumulate_reward: 532 | reward = 0 # global reward is non-zero only after the whole NSPR is placed (as HADRL) 533 | else: 534 | # eventual discount factor of the acceptance reward 535 | if self.discount_acc_rew: 536 | self.acc_rew_disc_fact += self.base_acc_rew_disc_fact 537 | else: 538 | self.acc_rew_disc_fact = 1. 539 | # reward always givent to the agent 540 | reward = self._acceptance_rewards[-1] * self.acc_rew_disc_fact * \ 541 | self._load_balance_rewards[-1] * \ 542 | self._resource_consumption_rewards[-1] / len(self.cur_nspr.nodes) / \ 543 | 10. # scaling factor 544 | reward = self._normalize_reward_0_10(reward) 545 | else: 546 | # it means we finished the VNFs of the current NSPR 547 | self.nsprs_seen_in_cur_ep += 1 548 | self.tot_seen_nsprs += 1 549 | if self.nsprs_seen_in_cur_ep >= self.nsprs_per_episode: 550 | self.done = True 551 | # reset placement state 552 | self.obs_dict['placement_state'] = np.zeros(len(self.psn.nodes), dtype=int) 553 | # update global reward because the NSPR is fully placed 554 | reward = np.stack((self._acceptance_rewards, 555 | self._resource_consumption_rewards, 556 | self._load_balance_rewards)).prod(axis=0).sum() 557 | # normalize the reward to be in [0, 10] (as they do in HA-DRL) 558 | reward = self._normalize_reward_0_10(reward) * \ 559 | 2 # TODO: per dargli più peso (non da HADRL) 560 | self.reset_partial_rewards() 561 | self.cur_nspr = None # marked as None so a new one can be picked 562 | # update the acceptance ratio 563 | self.accepted_nsprs += 1 564 | 565 | # increase time step 566 | self.time_step += 1 567 | 568 | # check for new and departing NSPRs 569 | if self.nsprs is not None: 570 | self.check_for_departed_nsprs() 571 | self.waiting_nsprs += self.nsprs.get(self.time_step, []) 572 | self.pick_next_nspr() 573 | 574 | # new observation 575 | obs = self.update_nspr_state() 576 | 577 | return obs, reward, self.done, info 578 | 579 | def render(self, mode="human"): 580 | raise NotImplementedError 581 | -------------------------------------------------------------------------------- /src/policies/__init__.py: -------------------------------------------------------------------------------- 1 | from .hadrl_policy import HADRLPolicy 2 | -------------------------------------------------------------------------------- /src/policies/features_extractors/__init__.py: -------------------------------------------------------------------------------- 1 | from .hadrl_features_extractor import GCNsFeaturesExtractor 2 | -------------------------------------------------------------------------------- /src/policies/features_extractors/hadrl_features_extractor.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple, Type 2 | 3 | import gym 4 | import networkx as nx 5 | import numpy as np 6 | import torch as th 7 | from stable_baselines3.common.torch_layers import BaseFeaturesExtractor 8 | from torch import nn 9 | from torch.nn import Linear 10 | from torch_geometric.nn import GCNConv 11 | 12 | 13 | class GCNsFeaturesExtractor(BaseFeaturesExtractor): 14 | def __init__( 15 | self, 16 | observation_space: gym.Space, 17 | psn: nx.Graph, 18 | activation_fn: Type[nn.Module], 19 | gcn_layers_dims: Tuple[int], 20 | nspr_out_features: int = 4 21 | ): 22 | """ Constructor 23 | 24 | :param observation_space: the observation space of the agent using this feature extractor 25 | :param psn: the PSN graph of the environment which the agent acts upon 26 | :param activation_fn: activation function to be used (e.g. torch.relu) 27 | :param gcn_layers_dims: dimensions of the features vector of each node in each GCN layer 28 | - number of layers = length of the tuple 29 | :param nspr_out_features: dimension of the features vector of the NSPR state 30 | """ 31 | self.activation = activation_fn 32 | self.n_nodes = len(psn.nodes) 33 | self.gcn_layers_dims = gcn_layers_dims 34 | gcn_out_channels = gcn_layers_dims[-1] 35 | features_dim = gcn_out_channels * self.n_nodes + nspr_out_features 36 | super().__init__(observation_space, features_dim=features_dim) 37 | 38 | self.psn_state_features = 4 if 'placement_state' in observation_space.spaces else 3 39 | self.nspr_state_features = 4 40 | 41 | edges = th.tensor(np.array(psn.edges).reshape((len(psn.edges), 2)), 42 | dtype=th.long) 43 | double_edges = th.cat((edges, th.flip(edges, dims=(1,)))) 44 | self.edge_index = double_edges.t().contiguous() 45 | 46 | # GCN layers 47 | gcn_layers_dims = [self.psn_state_features] + list(gcn_layers_dims) 48 | self.gcn_layers = nn.ModuleList() 49 | for i in range(len(gcn_layers_dims) - 1): 50 | self.gcn_layers.append(GCNConv(gcn_layers_dims[i], gcn_layers_dims[i + 1])) 51 | 52 | self.nspr_fc = Linear(in_features=self.nspr_state_features, 53 | out_features=nspr_out_features) 54 | 55 | def forward(self, observations: th.Tensor) -> th.Tensor: 56 | # save device (the one where the weights and observations are) 57 | device = observations['cpu_avails'].device 58 | 59 | # move edge_index to the correct device 60 | self.edge_index = self.edge_index.to(device) 61 | 62 | # save length of rollout buffer 63 | len_rollout_buffer = len(observations['cpu_avails']) 64 | 65 | # features extraction of the PSN state 66 | psn_state = th.empty( 67 | size=(len_rollout_buffer, self.n_nodes, self.psn_state_features), 68 | dtype=th.float, device=device) 69 | psn_state[:, :, 0] = observations['cpu_avails'] 70 | psn_state[:, :, 1] = observations['ram_avails'] 71 | psn_state[:, :, 2] = observations['bw_avails'] 72 | if 'placement_state' in observations: 73 | psn_state[:, :, 3] = observations['placement_state'] 74 | 75 | # pass the psn_state through the GCN layers 76 | gcn_out = psn_state 77 | for i in range(len(self.gcn_layers)): 78 | gcn_out = self.gcn_layers[i](gcn_out, self.edge_index) 79 | gcn_out = self.activation()(gcn_out) 80 | gcn_out = gcn_out.flatten(start_dim=1) 81 | 82 | # features extraction of the NSPR state 83 | nspr_state = th.empty(size=(len_rollout_buffer, 1, self.nspr_state_features), 84 | dtype=th.float, device=device) 85 | nspr_state[:, :, 0] = observations['cur_vnf_cpu_req'] 86 | nspr_state[:, :, 1] = observations['cur_vnf_ram_req'] 87 | nspr_state[:, :, 2] = observations['cur_vnf_bw_req'] 88 | nspr_state[:, :, 3] = observations['vnfs_still_to_place'] 89 | nspr_fc_out = self.nspr_fc(nspr_state.flatten(start_dim=1)) 90 | nspr_fc_out = self.activation()(nspr_fc_out) 91 | 92 | # concatenation of the two features vectors 93 | global_out = th.cat((gcn_out, nspr_fc_out), dim=1) 94 | # global_out = self.activation(global_out) 95 | 96 | return global_out 97 | -------------------------------------------------------------------------------- /src/policies/hadrl_policy.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | from typing import Callable, Dict, List, Optional, Type, Union, Tuple 3 | 4 | import gym 5 | import networkx as nx 6 | import numpy as np 7 | import torch as th 8 | from stable_baselines3.common.distributions import Distribution 9 | from stable_baselines3.common.policies import MultiInputActorCriticPolicy 10 | from stable_baselines3.common.preprocessing import preprocess_obs 11 | from stable_baselines3.common.type_aliases import Schedule 12 | from torch import nn 13 | 14 | from .features_extractors import GCNsFeaturesExtractor 15 | from .mlp_extractors.hadrl_mlp_extractor import HADRLActorCriticNet 16 | 17 | 18 | class HADRLPolicy(MultiInputActorCriticPolicy): 19 | """ Policy network from the paper HA-DRL [1] 20 | 21 | [1] https://ieeexplore.ieee.org/document/9632824 22 | """ 23 | name = 'HADRL Policy' 24 | 25 | def __init__( 26 | self, 27 | observation_space: gym.spaces.Space, 28 | action_space: gym.spaces.Space, 29 | lr_schedule: Callable[[float], float], 30 | psn: nx.Graph, 31 | servers_map_idx_id: Dict[int, int], 32 | net_arch: Optional[Union[List[int], Dict[str, List[int]]]] = None, 33 | activation_fn: Type[nn.Module] = nn.Tanh, 34 | gcn_layers_dims: Tuple[int] = (60,), 35 | nspr_out_features: int = 4, 36 | use_heuristic: bool = False, 37 | heu_kwargs: dict = None, 38 | *args, 39 | **kwargs, 40 | ): 41 | """ 42 | :param observation_space: Observation space of the agent 43 | :param action_space: Action space of the agent 44 | :param lr_schedule: Learning rate schedule 45 | :param psn: Physical Service Network 46 | :param servers_map_idx_id: Mapping between servers' indexes and their IDs 47 | :param net_arch: architecture of the policy and value networks after the feature extractor 48 | :param activation_fn: Activation function 49 | :param gcn_layers_dims: Dimensions of the GCN layers 50 | :param nspr_out_features: Number of output features of the NSPR state 51 | :param use_heuristic: Whether to use the heuristic or not 52 | :param heu_kwargs: Keyword arguments for the heuristic 53 | """ 54 | 55 | # assert len(net_arch) == 1 and isinstance(net_arch[0], dict), \ 56 | # "This policy allows net_arch to be a list with only one dict" 57 | 58 | self.psn = psn 59 | self.gcn_layers_dims = gcn_layers_dims # saved in an attribute for logging purposes 60 | self.servers_map_idx_id = servers_map_idx_id 61 | self.use_heuristic = use_heuristic 62 | self.heu_kwargs = heu_kwargs 63 | 64 | super(HADRLPolicy, self).__init__( 65 | observation_space, 66 | action_space, 67 | lr_schedule, 68 | net_arch, 69 | activation_fn, 70 | # Pass remaining arguments to base class 71 | *args, 72 | **kwargs, 73 | ) 74 | # non-shared features extractors for the actor and the critic 75 | self.policy_features_extractor = GCNsFeaturesExtractor( 76 | observation_space, psn, nn.Tanh, gcn_layers_dims, 77 | nspr_out_features 78 | ) 79 | self.value_features_extractor = GCNsFeaturesExtractor( 80 | observation_space, psn, nn.ReLU, gcn_layers_dims, 81 | nspr_out_features 82 | ) 83 | self.features_dim = {'pi': self.policy_features_extractor.features_dim, 84 | 'vf': self.value_features_extractor.features_dim} 85 | delattr(self, "features_extractor") # remove the shared features extractor 86 | 87 | # TODO: check what this step actually does 88 | # Disable orthogonal initialization 89 | # self.ortho_init = False 90 | 91 | # Workaround alert! 92 | # This method is called in the super-constructor. It creates the optimizer, 93 | # but using also the params of the features extractor before creating 94 | # our own 2 separate ones ('policy_features_extractor' and 95 | # 'value_features_extractor'). Therefore we need to re-create the optimizer 96 | # using the params of the correct new features extractor. 97 | # (it will also re-do a bunch of things like re-creating the mlp_extractor, 98 | # which was fine, but it's not a problem). 99 | self._rebuild(lr_schedule) 100 | 101 | def _rebuild(self, lr_schedule: Schedule) -> None: 102 | """ 103 | Like method _build, but needed to be re-called to re-create the 104 | optimizer, since it was created using obsolete parameters, i.e. params 105 | including the ones of the default shared features extractor and NOT 106 | including the ones of the new features extractors. 107 | The mlp_extractor is recreated too, since it was created with incorrect features_dim. 108 | 109 | :param lr_schedule: Learning rate schedule 110 | lr_schedule(1) is the initial learning rate 111 | """ 112 | self._build_mlp_extractor() 113 | 114 | # action_net and value_net as created in the '_build' method are OK, 115 | # no need to recreate them. 116 | 117 | # Init weights: use orthogonal initialization 118 | # with small initial weight for the output 119 | if self.ortho_init: 120 | # TODO: check for features_extractor 121 | # Values from stable-baselines. 122 | # features_extractor/mlp values are 123 | # originally from openai/baselines (default gains/init_scales). 124 | module_gains = { 125 | self.policy_features_extractor: np.sqrt(2), 126 | self.value_features_extractor: np.sqrt(2), 127 | self.mlp_extractor: np.sqrt(2), 128 | self.action_net: 0.01, 129 | self.value_net: 1, 130 | } 131 | for module, gain in module_gains.items(): 132 | module.apply(partial(self.init_weights, gain=gain)) 133 | 134 | # Setup optimizer with initial learning rate 135 | self.optimizer = self.optimizer_class(self.parameters(), lr=lr_schedule(1), **self.optimizer_kwargs) 136 | 137 | def _build_mlp_extractor(self) -> None: 138 | self.mlp_extractor = HADRLActorCriticNet( 139 | action_space=self.action_space, 140 | psn=self.psn, 141 | net_arch=self.net_arch, 142 | servers_map_idx_id=self.servers_map_idx_id, 143 | features_dim=self.features_dim, 144 | use_heuristic=self.use_heuristic, 145 | heu_kwargs=self.heu_kwargs 146 | ) 147 | 148 | def extract_features(self, obs: th.Tensor) -> Tuple[th.Tensor, th.Tensor]: 149 | """ 150 | Preprocess the observation if needed and extract features. 151 | 152 | :param obs: Observation 153 | :return: the output of the feature extractor(s) 154 | """ 155 | assert self.policy_features_extractor is not None and \ 156 | self.value_features_extractor is not None 157 | preprocessed_obs = preprocess_obs(obs, self.observation_space, 158 | normalize_images=self.normalize_images) 159 | policy_features = self.policy_features_extractor(preprocessed_obs) 160 | value_features = self.value_features_extractor(preprocessed_obs) 161 | return policy_features, value_features 162 | 163 | def forward(self, obs: th.Tensor, deterministic: bool = False) -> \ 164 | Tuple[th.Tensor, th.Tensor, th.Tensor]: 165 | """ 166 | Forward pass in all the networks (actor and critic) 167 | 168 | :param obs: Observation 169 | :param deterministic: Whether to sample or use deterministic actions 170 | :return: action, value and log probability of the action 171 | """ 172 | # Preprocess the observation if needed 173 | policy_features, value_features = self.extract_features(obs) 174 | latent_pi = self.mlp_extractor.forward_actor(policy_features, obs) 175 | latent_vf = self.mlp_extractor.forward_critic(value_features) 176 | 177 | # Evaluate the values for the given observations 178 | values = self.value_net(latent_vf) 179 | distribution = self._get_action_dist_from_latent(latent_pi) 180 | actions = distribution.get_actions(deterministic=deterministic) 181 | log_prob = distribution.log_prob(actions) 182 | return actions, values, log_prob 183 | 184 | def evaluate_actions(self, obs: th.Tensor, actions: th.Tensor) -> \ 185 | Tuple[th.Tensor, th.Tensor, th.Tensor]: 186 | """ 187 | Evaluate actions according to the current policy, 188 | given the observations. 189 | 190 | :param obs: Observation 191 | :param actions: Actions 192 | :return: estimated value, log likelihood of taking those actions 193 | and entropy of the action distribution. 194 | """ 195 | # Preprocess the observation if needed 196 | policy_features, value_features = self.extract_features(obs) 197 | latent_pi = self.mlp_extractor.forward_actor(policy_features, obs) 198 | latent_vf = self.mlp_extractor.forward_critic(value_features) 199 | distribution = self._get_action_dist_from_latent(latent_pi) 200 | log_prob = distribution.log_prob(actions) 201 | values = self.value_net(latent_vf) 202 | return values, log_prob, distribution.entropy() 203 | 204 | def get_distribution(self, obs: th.Tensor) -> Distribution: 205 | """ 206 | Get the current policy distribution given the observations. 207 | 208 | :param obs: Observation 209 | :return: the action distribution. 210 | """ 211 | policy_features, _ = self.extract_features(obs) 212 | latent_pi = self.mlp_extractor.forward_actor(policy_features, obs) 213 | return self._get_action_dist_from_latent(latent_pi) 214 | 215 | def predict_values(self, obs: th.Tensor) -> th.Tensor: 216 | """ 217 | Get the estimated values according to the current policy given the observations. 218 | 219 | :param obs: Observation 220 | :return: the estimated values. 221 | """ 222 | _, value_features = self.extract_features(obs) 223 | latent_vf = self.mlp_extractor.forward_critic(value_features) 224 | return self.value_net(latent_vf) 225 | -------------------------------------------------------------------------------- /src/policies/mlp_extractors/__init__.py: -------------------------------------------------------------------------------- 1 | from .hadrl_mlp_extractor import HADRLActor, HADRLCritic, HADRLActorCriticNet 2 | -------------------------------------------------------------------------------- /src/policies/mlp_extractors/hadrl_mlp_extractor.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple, Dict, Union, List 2 | 3 | import gym 4 | import networkx as nx 5 | import torch as th 6 | from torch import nn 7 | 8 | from heuristic_layers import P2CLoadBalanceHeuristic, HADRLHeuristic 9 | 10 | 11 | class HADRLActor(nn.Module): 12 | """ Actor network for the HA-DRL [1] algorithm 13 | 14 | [1] https://ieeexplore.ieee.org/document/9632824 15 | """ 16 | 17 | def __init__( 18 | self, 19 | action_space: gym.Space, 20 | psn: nx.Graph, 21 | net_arch: Union[List[int], Dict[str, List[int]]], 22 | servers_map_idx_id: Dict[int, int], 23 | in_features: int, 24 | use_heuristic: bool = False, 25 | heu_kwargs: dict = None, 26 | ): 27 | """ Constructor 28 | 29 | :param action_space: action space 30 | :param psn: env's physical substrate network 31 | :param servers_map_idx_id: map (dict) between servers indexes (agent's actions) and their ids 32 | :param use_heuristic: if True, actor will use P2C heuristic 33 | """ 34 | super().__init__() 35 | self.use_heuristic = use_heuristic 36 | heu_class = heu_kwargs.get('heu_class', HADRLHeuristic) 37 | 38 | # layers 39 | dims = [in_features] + net_arch['pi'] 40 | modules = nn.ModuleList() 41 | for i in range(len(dims) - 1): 42 | modules.append(nn.Linear(dims[i], dims[i + 1])) 43 | modules.append(nn.Tanh()) 44 | 45 | if self.use_heuristic: 46 | self.heu_layer = heu_class(action_space, servers_map_idx_id, psn, 47 | **heu_kwargs).requires_grad_(False) 48 | 49 | self.layers = nn.Sequential(*modules) 50 | 51 | def forward(self, x: th.Tensor, obs: th.Tensor) -> th.Tensor: 52 | x = self.layers(x) 53 | if self.use_heuristic: 54 | x = self.heu_layer(x, obs) 55 | return x 56 | 57 | 58 | class HADRLCritic(nn.Module): 59 | """ Critic network for the HA-DRL [1] algorithm 60 | 61 | [1] https://ieeexplore.ieee.org/document/9632824 62 | """ 63 | 64 | def __init__( 65 | self, 66 | in_features: int, 67 | net_arch: List[Union[int, Dict[str, List[int]]]] 68 | ): 69 | """ Constructor 70 | 71 | :param in_features: number of features extracted by the features extractor, 72 | i.e., input dim of the first layer of the network 73 | """ 74 | super().__init__() 75 | dims = [in_features] + net_arch['vf'] 76 | modules = nn.ModuleList() 77 | for i in range(len(dims) - 1): 78 | modules.append(nn.Linear(dims[i], dims[i + 1])) 79 | modules.append(nn.ReLU()) 80 | self.layers = nn.Sequential(*modules) 81 | 82 | def forward(self, x: th.Tensor) -> th.Tensor: 83 | return self.layers(x) 84 | 85 | 86 | class HADRLActorCriticNet(nn.Module): 87 | """ 88 | Actor-Critic network for the HA-DRL [1] algorithm 89 | 90 | [1] https://ieeexplore.ieee.org/document/9632824 91 | """ 92 | 93 | def __init__( 94 | self, 95 | action_space: gym.Space, 96 | psn: nx.Graph, 97 | net_arch: List[Union[int, Dict[str, List[int]]]], 98 | servers_map_idx_id: Dict[int, int], 99 | features_dim: Union[int, Dict[str, int]], 100 | gcn_out_channels: int = 60, 101 | nspr_out_features: int = 4, 102 | use_heuristic: bool = False, 103 | heu_kwargs: dict = None, 104 | ): 105 | """ Constructor 106 | 107 | :param action_space: action space 108 | :param psn: env's physical substrate network 109 | :param servers_map_idx_id: map (dict) between servers indexes (agent's actions) and their ids 110 | :param policy_features_dim: 111 | :param value_features_dim: 112 | :param gcn_out_channels: number of output channels of the GCN layer 113 | :param nspr_out_features: output dim of the layer that receives the NSPR state 114 | :param use_heuristic: if True, actor will use P2C heuristic 115 | """ 116 | super(HADRLActorCriticNet, self).__init__() 117 | 118 | # IMPORTANT: 119 | # Save output dimensions, used to create the distributions 120 | self.latent_dim_pi = net_arch['pi'][-1] 121 | self.latent_dim_vf = net_arch['vf'][-1] 122 | 123 | if isinstance(features_dim, int): 124 | policy_features_dim = value_features_dim = features_dim 125 | else: 126 | policy_features_dim = features_dim['pi'] 127 | value_features_dim = features_dim['vf'] 128 | 129 | # policy network 130 | self.policy_net = HADRLActor(action_space, psn, net_arch, 131 | servers_map_idx_id, policy_features_dim, 132 | use_heuristic, heu_kwargs) 133 | # value network 134 | self.value_net = HADRLCritic(value_features_dim, net_arch) 135 | 136 | def forward(self, features: th.Tensor, obs: th.Tensor) -> Tuple[th.Tensor, th.Tensor]: 137 | """ 138 | :return: (th.Tensor, th.Tensor) latent_policy, latent_value of the specified network. 139 | If all layers are shared, then ``latent_policy == latent_value`` 140 | """ 141 | return self.policy_net(features, obs), self.value_net(features) 142 | 143 | def forward_actor(self, features: th.Tensor, obs: th.Tensor) -> th.Tensor: 144 | return self.policy_net(features, obs) 145 | 146 | def forward_critic(self, features: th.Tensor) -> th.Tensor: 147 | return self.value_net(features) 148 | -------------------------------------------------------------------------------- /src/reader.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | from typing import Tuple, List, Dict 4 | 5 | import networkx as nx 6 | 7 | 8 | def check_if_graphml(file: str): 9 | """ Checks if a file is a GraphML file (checking the extension) 10 | 11 | :param file: path to the file to be checked 12 | :raise ValueError: is case the file is not a GraphML file 13 | """ 14 | if not file.endswith(".graphml"): 15 | raise ValueError("{} is not a GraphML file".format(file)) 16 | 17 | 18 | def _check_graph(network: nx.Graph): 19 | """ Checks that the graph is correct 20 | 21 | :param network: network that needs to be checked 22 | 23 | :raise AssertionError: if some graph's attributes are not correct 24 | """ 25 | if "E2ELatency" in network.graph.keys(): 26 | assert network.graph['E2ELatency'] > 0 27 | # if E2ELatency is present, it means the network is a NSPR 28 | if "ArrivalTime" in network.graph.keys(): 29 | assert network.graph['ArrivalTime'] >= 0 30 | else: 31 | network.graph['ArrivalTime'] = 0 32 | if "DepartureTime" in network.graph.keys(): 33 | assert network.graph['DepartureTime'] >= \ 34 | network.graph['ArrivalTime'] + len(network.nodes.keys()) 35 | 36 | 37 | def _check_nodes(network: nx.Graph, required_node_attributes: Tuple[str, ...], 38 | **admissible_values: tuple): 39 | """ Checks that the nodes of the network are correct 40 | 41 | :param network: network whose nodes have to be checked 42 | :param required_node_attributes: tuple with all required attributes for the nodes 43 | :param admissible_values: (optional) extra arguments where the name is an 44 | attribute name and the value is a tuple with the admissible values 45 | 46 | :raise AssertionError: 47 | - in case some nodes don't contain all the required parameters 48 | - in case some non-admissible values are used for some arguments 49 | """ 50 | for node_id, node in network.nodes.items(): 51 | # if the admissible values for a certain attribute are passed, 52 | # check that the value of each attribute is admissible 53 | for attrib, value in node.items(): 54 | assert value in admissible_values.get(attrib, (value,)) 55 | if attrib in ("CPUcap", "RAMcap", "availCPU", "availRAM", "reqCPU", "reqRAM"): 56 | assert value >= 0 57 | # the following checks are for servers or VNFs only, in case skip 58 | if node.get("NodeType", "server") != "server": 59 | # if node hasn't attrib "NodeType", it's a VNF, so don't skip iteration 60 | continue 61 | if "reqCPU" in node.keys(): 62 | # 'reqCPU' is a mandatory argument for NSPR, so if it's present, the node is a VNF 63 | # add an attribute to specify if a VNF has been placed onto the PSN 64 | node['placed'] = -1 65 | else: 66 | # it means the node belongs to a PSN and not to a NSPR 67 | node['availCPU'] = node['CPUcap'] 68 | node['availRAM'] = node['RAMcap'] 69 | # check that all required attributes are present in the current node 70 | assert all(req_attrib in node.keys() for req_attrib in required_node_attributes) 71 | 72 | 73 | def _check_edges(network: nx.Graph, required_link_attributes: Tuple[str, ...], **admissible_values: tuple): 74 | """ Checks that the edges of the network are correct 75 | 76 | :param network: network whose edges have to be checked 77 | :param required_link_attributes: tuple with all required attributes for the links 78 | :param admissible_values: (optional) extra arguments where the name is an 79 | attribute name and the value is a tuple with the admissible values 80 | 81 | :raise AssertionError: 82 | - in case some links don't contain all the required parameters 83 | - in case some non-admissible values are used for some arguments 84 | """ 85 | for node_A, node_B in list(network.edges): 86 | cur_link_attribs = network.edges[node_A, node_B].keys() 87 | cur_link_values = network.edges[node_A, node_B].values() 88 | cur_link_attribs_values = zip(cur_link_attribs, cur_link_values) 89 | # check that all required attributes are present in the current link 90 | assert all(attrib in cur_link_attribs for attrib in required_link_attributes) 91 | # if the admissible values for a certain attribute are passed, 92 | # check that the value of each attribute is admissible 93 | for attrib, value in cur_link_attribs_values: 94 | assert value in admissible_values.get(attrib, (value,)) 95 | if attrib in ("BWcap", "reqBW", "Latency", "reqLatency"): 96 | assert value >= 0 97 | # initialize resources availabilities if PSN 98 | if "reqBW" in cur_link_attribs: 99 | # 'reqBW' is a mandatory argument for NSPR, so if it's present, the link is a VL 100 | network.edges[node_A, node_B]['placed'] = [] 101 | else: 102 | # it means the link is physical and belongs to a PSN (and not to a NSPR) 103 | network.edges[node_A, node_B]['availBW'] = network.edges[node_A, node_B]['BWcap'] 104 | 105 | 106 | def check_required_attributes(network: nx.Graph, required_node_attributes: Tuple[str, ...], 107 | required_link_attributes: Tuple[str, ...], **admissible_values: tuple): 108 | """ Checks whether all the required attributes are present in the nodes and link of the network passed as argument 109 | 110 | :param network: network whose nodes and links have to be checked 111 | :param required_node_attributes: tuple with all required attributes for the nodes 112 | :param required_link_attributes: tuple with all required attributes for the links 113 | :param admissible_values: (optional) extra arguments where the name is an 114 | attribute name and the value is a tuple with the admissible values 115 | 116 | :raise AssertionError: 117 | - in case some nodes/links don't contain all the required parameters 118 | - in case some non-admissible values are used for some arguments 119 | """ 120 | _check_graph(network) 121 | _check_nodes(network, required_node_attributes, **admissible_values) 122 | _check_edges(network, required_link_attributes, **admissible_values) 123 | 124 | 125 | def read_psn(graphml_file: str) -> nx.Graph: 126 | """ Reads a GraphML file containing the definition of a PSN 127 | 128 | :param graphml_file: GraphML file containing the definition of the PSN 129 | :return: a networkx.Graph representing the PSN 130 | 131 | :raise ValueError: if "graphml_file" is not a GraphML file 132 | :raise AssertionError: if some required attributes of nodes and links are missing 133 | """ 134 | check_if_graphml(graphml_file) # check if the file passed is a GraphML file 135 | 136 | # read the GraphML file and create a nx.Graph object 137 | psn = nx.read_graphml(path=graphml_file, node_type=int) 138 | 139 | # check that the attributes of the graph are correct 140 | check_required_attributes(network=psn, 141 | required_node_attributes=("NodeType", "CPUcap", "RAMcap"), 142 | required_link_attributes=("BWcap",), 143 | NodeType=("UAP", "router", "switch", "server")) 144 | return psn 145 | 146 | 147 | def read_single_nspr(graphml_file: str) -> nx.Graph: 148 | """ Reads a single NSPR (network slice placement request) 149 | 150 | :param graphml_file: GraphML file with the definition of the NSPR 151 | :return: the NSPR as a networkx.Graph object 152 | 153 | :raise ValueError: if "graphml_file" is not a GraphML file 154 | :raise AssertionError: if some required attributes of nodes and links are missing 155 | """ 156 | check_if_graphml(graphml_file) # check if the file passed is a GraphML file 157 | 158 | # read the GraphML file and create a nx.Graph object 159 | nspr = nx.read_graphml(path=graphml_file, node_type=int) 160 | 161 | # check that the attributes of the graph are correct 162 | check_required_attributes(network=nspr, 163 | required_node_attributes=("reqCPU", "reqRAM"), 164 | required_link_attributes=("reqBW",)) 165 | return nspr 166 | 167 | 168 | def read_nsprs(nsprs_path: str) -> Dict[int, List[nx.Graph]]: 169 | """ Reads all the NSPRs (network slice placement requests) in a directory 170 | 171 | :param nsprs_path: either path to the directory with the files defining a 172 | NSPR each or the path to a single NSPR 173 | :return: a dict having as keys the arrival times of the NSPRs and as 174 | values the NSPRs themselves 175 | :raise ValueError: if nsprs_path is neither a directory nor a file 176 | """ 177 | if not os.path.isdir(nsprs_path) and not os.path.isfile(nsprs_path): 178 | raise ValueError(f"{nsprs_path} is neither a directory nor a file") 179 | 180 | nspr_dict = {} # save the NSPRs in a dict with the arrival times as keys 181 | if os.path.isfile(nsprs_path): 182 | nspr = read_single_nspr(nsprs_path) 183 | if nspr.graph['ArrivalTime'] not in nspr_dict.keys(): 184 | nspr_dict[nspr.graph['ArrivalTime']] = [nspr] 185 | else: 186 | nspr_dict[nspr.graph['ArrivalTime']].append(nspr) 187 | return nspr_dict 188 | 189 | dir_path = nsprs_path 190 | for graphml_file in os.listdir(dir_path): 191 | nspr = read_single_nspr(os.path.join(dir_path, graphml_file)) 192 | nspr_dict[nspr.graph['ArrivalTime']] = nspr_dict.get(nspr.graph['ArrivalTime'], []) + [nspr] 193 | return nspr_dict 194 | 195 | 196 | def sample_nsprs(nsprs_path: str, n: int, min_arrival_time: int = 0, 197 | max_duration: int = 100) -> Dict[int, List[nx.Graph]]: 198 | """ Samples a subset of NSPRs from a directory containing multiple NSPRs. 199 | It assigns random arrival and departure time to those NSPRs. 200 | 201 | :param nsprs_path: path to the directory containing the NSPRs 202 | :param n: number of NSPRs to sample 203 | :param min_arrival_time: minimum arrival time to assign to the sampled NSPRs 204 | :param max_duration: maximum duration (dep. time - arr. time) to assign to the sampled NSPRs 205 | :return: a dict having as keys the arrival times of the NSPRs and as 206 | values the NSPRs themselves 207 | :raise ValueError: if nsprs_path is not a directory 208 | """ 209 | if not os.path.isdir(nsprs_path): 210 | raise ValueError(f"{nsprs_path} is not a directory") 211 | 212 | all_nsprs_files = os.listdir(nsprs_path) 213 | n = min(n, len(all_nsprs_files)) if n is not None else len(all_nsprs_files) 214 | sampled_nsprs_files = random.sample(all_nsprs_files, n) 215 | arrival_times = random.sample(range(min_arrival_time, min_arrival_time + max_duration), n) 216 | nspr_dict = {} 217 | for i, arr_time in enumerate(arrival_times): 218 | nspr = read_single_nspr(os.path.join(nsprs_path, sampled_nsprs_files[i])) 219 | nspr.graph['ArrivalTime'] = arr_time 220 | nspr.graph['duration'] = random.randint(len(nspr.nodes), max_duration) 221 | nspr_dict[arr_time] = nspr_dict.get(arr_time, []) + [nspr] 222 | return nspr_dict 223 | -------------------------------------------------------------------------------- /src/spaces/__init__.py: -------------------------------------------------------------------------------- 1 | from .discrete_with_negatives import DiscreteWithNegatives 2 | 3 | __all__ = [ 4 | "DiscreteWithNegatives", 5 | ] 6 | -------------------------------------------------------------------------------- /src/spaces/discrete_with_negatives.py: -------------------------------------------------------------------------------- 1 | """ 2 | Implementation of a space consisting of finitely many elements. 3 | 4 | DISCLAIMER: 5 | This file is taken and slightly modified from the Discrete space of OpenAI gym release 0.25.1. 6 | 7 | stable-baselines3-1.5.0 requires gym==0.21, since they introduced breaking changes in 0.22. 8 | In this project, it is required to have a discrete space with the 'start' attribute, which 9 | was introduced only in later versions of gym, therefore a custom space 10 | (similar to later versions of the Discrete space in gym) is needed. 11 | """ 12 | 13 | from typing import Optional, Union 14 | 15 | import numpy as np 16 | 17 | from gym.spaces.space import Space 18 | from gym.utils import seeding 19 | 20 | 21 | class DiscreteWithNegatives(Space): 22 | r"""A space consisting of finitely many elements. 23 | 24 | This class represents a finite subset of integers, more specifically a set of the form :math:`\{ a, a+1, \dots, a+n-1 \}`. 25 | 26 | Example:: 27 | 28 | >>> DiscreteWithNegatives(2) # {0, 1} 29 | >>> DiscreteWithNegatives(3, start=-1) # {-1, 0, 1} 30 | """ 31 | 32 | def __init__( 33 | self, 34 | n: int, 35 | seed: Optional[int] = None, 36 | start: int = 0, 37 | ): 38 | r"""Constructor of :class:`Discrete` space. 39 | 40 | This will construct the space :math:`\{\text{start}, ..., \text{start} + n - 1\}`. 41 | 42 | Args: 43 | n (int): The number of elements of this space. 44 | seed: Optionally, you can use this argument to seed the RNG that is used to sample from the ``Dict`` space. 45 | start (int): The smallest element of this space. 46 | """ 47 | assert isinstance(n, (int, np.integer)) 48 | assert n > 0, "n (counts) have to be positive" 49 | assert isinstance(start, (int, np.integer)) 50 | self.n = int(n) 51 | self.start = int(start) 52 | super().__init__((), np.int64, seed) 53 | 54 | def sample(self, mask: Optional[np.ndarray] = None) -> int: 55 | """Generates a single random sample from this space. 56 | 57 | A sample will be chosen uniformly at random with the mask if provided 58 | 59 | Args: 60 | mask: An optional mask for if an action can be selected. 61 | Expected `np.ndarray` of shape `(n,)` and dtype `np.int8` where `1` represents valid actions and `0` invalid / infeasible actions. 62 | If there are no possible actions (i.e. `np.all(mask == 0)`) then `space.start` will be returned. 63 | 64 | Returns: 65 | A sampled integer from the space 66 | """ 67 | if mask is not None: 68 | assert isinstance( 69 | mask, np.ndarray 70 | ), f"The expected type of the mask is np.ndarray, actual type: {type(mask)}" 71 | assert ( 72 | mask.dtype == np.int8 73 | ), f"The expected dtype of the mask is np.int8, actual dtype: {mask.dtype}" 74 | assert mask.shape == ( 75 | self.n, 76 | ), f"The expected shape of the mask is {(self.n,)}, actual shape: {mask.shape}" 77 | valid_action_mask = mask == 1 78 | assert np.all( 79 | np.logical_or(mask == 0, valid_action_mask) 80 | ), f"All values of a mask should be 0 or 1, actual values: {mask}" 81 | if np.any(valid_action_mask): 82 | return int( 83 | self.start + self.np_random.choice(np.where(valid_action_mask)[0]) 84 | ) 85 | else: 86 | return self.start 87 | 88 | return int(self.start + self.np_random.randint(self.n)) 89 | 90 | def contains(self, x) -> bool: 91 | """Return boolean specifying if x is a valid member of this space.""" 92 | if isinstance(x, int): 93 | as_int = x 94 | elif isinstance(x, (np.generic, np.ndarray)) and ( 95 | x.dtype.char in np.typecodes["AllInteger"] and x.shape == () 96 | ): 97 | as_int = int(x) # type: ignore 98 | else: 99 | return False 100 | return self.start <= as_int < self.start + self.n 101 | 102 | def __repr__(self) -> str: 103 | """Gives a string representation of this space.""" 104 | if self.start != 0: 105 | return "DiscreteWithNegatives(%d, start=%d)" % (self.n, self.start) 106 | return "DiscreteWithNegatives(%d)" % self.n 107 | 108 | def __eq__(self, other) -> bool: 109 | """Check whether ``other`` is equivalent to this instance.""" 110 | return ( 111 | isinstance(other, DiscreteWithNegatives) 112 | and self.n == other.n 113 | and self.start == other.start 114 | ) 115 | 116 | def __setstate__(self, state): 117 | """Used when loading a pickled space. 118 | 119 | This method has to be implemented explicitly to allow for loading of legacy states. 120 | 121 | Args: 122 | state: The new state 123 | """ 124 | super().__setstate__(state) 125 | 126 | # Don't mutate the original state 127 | state = dict(state) 128 | 129 | # Allow for loading of legacy states. 130 | # See https://github.com/openai/gym/pull/2470 131 | if "start" not in state: 132 | state["start"] = 0 133 | 134 | # Update our state 135 | self.__dict__.update(state) 136 | -------------------------------------------------------------------------------- /src/trainer.py: -------------------------------------------------------------------------------- 1 | import copy 2 | from typing import List, Optional, Type 3 | 4 | import gym 5 | import wandb 6 | from stable_baselines3 import A2C 7 | from stable_baselines3.common.callbacks import BaseCallback, EvalCallback 8 | from stable_baselines3.common.env_util import make_vec_env 9 | from torch import nn 10 | from wandb.integration.sb3 import WandbCallback 11 | 12 | import reader 13 | from callbacks.acceptance_ratio_callbacks import AcceptanceRatioByNSPRsCallback 14 | from callbacks.hparam_callback import HParamCallback 15 | from callbacks.psn_load_callback import PSNLoadCallback 16 | from callbacks.seen_nsprs_callback import SeenNSPRsCallback 17 | from policies.features_extractors.hadrl_features_extractor import \ 18 | GCNsFeaturesExtractor 19 | from utils import make_env 20 | 21 | 22 | class Trainer: 23 | def __init__( 24 | self, 25 | psn_path: str, 26 | n_tr_envs: int, 27 | load_perc: float, 28 | time_limit: bool, 29 | max_ep_steps: int, 30 | tensorboard_log: str, 31 | create_eval_env: bool = False, 32 | reset_load_class: Optional[gym.Wrapper] = None, 33 | reset_load_kwargs: dict = dict(cpu_load=0.8), 34 | # reset_load_kwargs: dict = dict(rand_load=True, rand_range=(0., 1.)), 35 | placement_state: bool = True, 36 | accumulate_rew: bool = True, 37 | discount_acc_rew: bool = True, 38 | dynamic_connectivity: int = False, 39 | dynamic_connectivity_kwargs: dict = dict(link_bw=10_000), 40 | generate_nsprs: bool = True, 41 | nsprs_per_ep: int = 1, 42 | vnfs_per_nspr: int = 5, 43 | always_one: bool = True, 44 | seed: Optional[int] = None, 45 | net_arch: dict = dict(pi=[256, 128], vf=[256, 128, 32]), 46 | activation_fn: Type[nn.Module] = nn.Tanh, 47 | gcn_layers_dims: tuple = (20, 20, 20), 48 | device: str = 'cuda:0', 49 | lr: float = 0.0002, 50 | n_steps: int = 1, 51 | gamma: float = 0.99, 52 | ent_coef: float = 0.01, 53 | gae_lambda: float = 0.92, 54 | # eval_load: Optional[float] = None, 55 | ): 56 | # checks on argumetns 57 | assert n_tr_envs > 0 58 | assert 0. <= load_perc < 1., "Training load must be a percentage between 0 and 1" 59 | 60 | # save some attributes 61 | self.nsprs_per_ep = nsprs_per_ep 62 | self.max_ep_steps = max_ep_steps 63 | self.time_limit = time_limit 64 | self.placement_state = placement_state 65 | 66 | # read PSN file 67 | psn = reader.read_psn(psn_path) 68 | 69 | # create trainin environment 70 | self.tr_env = make_vec_env( 71 | env_id=make_env, 72 | n_envs=n_tr_envs, 73 | env_kwargs=dict( 74 | psn_path=psn_path, 75 | base_env_kwargs=dict( 76 | accumulate_reward=accumulate_rew, 77 | discount_acc_rew=discount_acc_rew, 78 | ), 79 | time_limit=time_limit, 80 | time_limit_kwargs=dict(max_episode_steps=max_ep_steps), 81 | generate_nsprs=generate_nsprs, 82 | nsprs_gen_kwargs=dict( 83 | nsprs_per_ep=nsprs_per_ep, 84 | vnfs_per_nspr=vnfs_per_nspr, 85 | load=load_perc, 86 | always_one=always_one 87 | ), 88 | reset_load_class=reset_load_class, 89 | reset_load_kwargs=reset_load_kwargs, 90 | placement_state=placement_state, 91 | dynamic_connectivity=dynamic_connectivity, 92 | dynamic_connectivity_kwargs=dynamic_connectivity_kwargs 93 | ), 94 | seed=seed, 95 | ) 96 | 97 | # create evaluation environment 98 | if create_eval_env: 99 | self.eval_env = copy.deepcopy(self.tr_env) 100 | 101 | # create the model 102 | self.model = A2C(policy='MultiInputPolicy', env=self.tr_env, verbose=2, device=device, 103 | learning_rate=lr, 104 | n_steps=n_steps, 105 | gamma=gamma, 106 | ent_coef=ent_coef, 107 | gae_lambda=gae_lambda, 108 | seed=seed, 109 | use_rms_prop=True, 110 | tensorboard_log=tensorboard_log, 111 | policy_kwargs=dict( 112 | activation_fn=activation_fn, 113 | net_arch=net_arch, 114 | features_extractor_class=GCNsFeaturesExtractor, 115 | share_features_extractor=False, 116 | features_extractor_kwargs=dict( 117 | psn=psn, 118 | activation_fn=nn.ReLU, 119 | gcn_layers_dims=gcn_layers_dims, 120 | ) 121 | )) 122 | print(self.model.policy) 123 | 124 | # wandb config 125 | if reset_load_kwargs.get('rand_load', False): 126 | load_range = reset_load_kwargs.get('rand_range', (0., 1.)) 127 | self.tr_load = 'random ' + str(load_range) 128 | else: 129 | self.tr_load = reset_load_kwargs.get('cpu_load', 0.8) 130 | # eval_load = eval_load if eval_load is not None else self.tr_load 131 | self.wandb_config = { 132 | "n tr envs": n_tr_envs, 133 | "NSPRs per training ep": nsprs_per_ep, 134 | "max steps per tr ep": max_ep_steps if time_limit else None, 135 | "PSN load (tr)": self.tr_load, 136 | # "PSN load (eval)": eval_load, 137 | "GCNs layers dims": gcn_layers_dims, 138 | "mpl_extractor arch": net_arch, 139 | "use placement state": placement_state, 140 | "accumulate reward": accumulate_rew, 141 | "discount acceptance reward": discount_acc_rew, 142 | "dynamic connectivity": dynamic_connectivity, 143 | "dynamic load range": "0-0.9", 144 | } 145 | 146 | def train( 147 | self, 148 | tot_steps: int, 149 | log_interval: int = 10, 150 | wandb: bool = False, 151 | callbacks: List[BaseCallback] = [], 152 | ): 153 | # wandb things 154 | self.wandb_config["total training steps"] = tot_steps 155 | if wandb: 156 | # init wandb run 157 | wandb_run = wandb.init( 158 | project="Same or different activations", 159 | dir="../", 160 | name="SAME (ReLU) (non-shared f.e.) (wax50, load 0.8, small GCNs)", 161 | config=self.wandb_config, 162 | sync_tensorboard=True, # auto-upload sb3's tensorboard metrics 163 | save_code=True, # optional 164 | ) 165 | # add wandb callback 166 | callbacks.append( 167 | WandbCallback( 168 | model_save_path=f"../models/{wandb_run.id}", 169 | verbose=2, 170 | model_save_freq=10_000 171 | ) 172 | ) 173 | 174 | # add callback for hyperparameters logging 175 | callbacks.append( 176 | HParamCallback( 177 | self.tr_env.num_envs, 178 | self.eval_env.num_envs, 179 | self.nsprs_per_ep, 180 | self.tr_load, 181 | tr_max_ep_steps=self.max_ep_steps if self.time_limit else None, 182 | use_placement_state=self.placement_state, 183 | ), 184 | ) 185 | 186 | # model training 187 | self.model.learn( 188 | total_timesteps=tot_steps, 189 | log_interval=log_interval, 190 | callback=callbacks 191 | ) 192 | 193 | if wandb: 194 | wandb_run.finish() 195 | -------------------------------------------------------------------------------- /src/utils.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple, Union, List, Optional, Type 2 | 3 | import gym 4 | import networkx as nx 5 | import numpy as np 6 | 7 | from gym.utils.env_checker import check_env 8 | from network_simulator import NetworkSimulator 9 | from wrappers import NSPRsGeneratorHADRL, RemovePlacementState, DynamicConnectivity 10 | from sb3_contrib.common.wrappers import ActionMasker 11 | 12 | 13 | def make_env( 14 | psn_path: str, 15 | base_env_kwargs: Optional[dict] = None, 16 | time_limit: bool = False, 17 | time_limit_kwargs: Optional[dict] = None, 18 | reset_load_class: Type[gym.Wrapper] = None, 19 | reset_load_kwargs: Optional[dict] = None, 20 | generate_nsprs: bool = False, 21 | nsprs_gen_kwargs: Optional[dict] = None, 22 | placement_state: bool = True, 23 | dynamic_connectivity: bool = False, 24 | dynamic_connectivity_kwargs: Optional[dict] = dict(link_bw=10_000), 25 | dynamic_topology: bool = False, 26 | ): 27 | """ Create the environment. 28 | It can be wrapped with different wrappers, all with their own arguments. 29 | They wrappers are namely: TimeLimit, ResetWithRandLoad, NSPRsGeneratorHADRL. 30 | 31 | :param psn_path: path to the PSN file 32 | :param base_env_kwargs: kwargs of the base environment 33 | :param time_limit: if True, the env is wrapped with TimeLimit wrapper 34 | :param time_limit_kwargs: kwargs of the TimeLimit wrapper 35 | :param reset_load_class: class of the wrapper to reset the PSN with load 36 | :param reset_load_kwargs: kwargs for the reset-with-load wrapper 37 | :param hadrl_nsprs: if True, the env is wrapped with NSPRsGeneratorHADRL wrapper 38 | :param hadrl_nsprs_kwargs: kwargs for the NSPRsGeneratorHADRL wrapper 39 | :param placement_state: if False, adds a wrapper that removes the placement state from the observations 40 | :param dynamic_connectivity: if True, the connectivity of the PSN changes in every episode 41 | :param dynamic_connectivity_kwargs: kwargs for the DynamicConnectivity wrapper 42 | :param dynamic_topology: if True, the topology of the PSN changes in every episode. 43 | Note: it True, 'dynamic_connectivity' will be forced to True as well, 44 | as there's no way to change the nodes and not the connectivity. 45 | """ 46 | base_env_kwargs = {} if base_env_kwargs is None else base_env_kwargs 47 | time_limit_kwargs = {} if time_limit_kwargs is None else time_limit_kwargs 48 | reset_load_kwargs = {} if reset_load_kwargs is None else reset_load_kwargs 49 | dynamic_connectivity_kwargs = {} if dynamic_connectivity_kwargs is None else dynamic_connectivity_kwargs 50 | 51 | # base env 52 | env = NetworkSimulator(psn_path, **base_env_kwargs) 53 | 54 | # apply wrappers 55 | if time_limit: 56 | env = gym.wrappers.TimeLimit(env, **time_limit_kwargs) 57 | if generate_nsprs: 58 | env = NSPRsGeneratorHADRL(env, **nsprs_gen_kwargs) 59 | if dynamic_topology: 60 | env = ActionMasker(env, action_mask_fn=env.get_action_mask) 61 | env = DynamicConnectivity(env, nodes_mask=env.get_action_mask, **dynamic_connectivity_kwargs) 62 | dynamic_connectivity = False 63 | if dynamic_connectivity: 64 | env = DynamicConnectivity(env, **dynamic_connectivity_kwargs) 65 | if reset_load_class is not None: 66 | env = reset_load_class(env, **reset_load_kwargs) 67 | if not placement_state: 68 | env = RemovePlacementState(env) 69 | # check_env(env) # could make the code crash with masked actions 70 | return env 71 | 72 | 73 | def create_HADRL_PSN_file( 74 | path: str, 75 | n_CCPs: int = 1, 76 | n_CDCs: int = 5, 77 | n_EDCs: int = 15, 78 | n_servers_per_DC: Tuple[int, int, int] = (16, 10, 4), 79 | cpu_cap: int = 50, 80 | ram_cap: int = 300, 81 | intra_CCP_bw_cap: int = 100000, # 100000 Mbps = 100 Gbps 82 | intra_CDC_bw_cap: int = 100000, # 100000 Mbps = 100 Gbps 83 | intra_EDC_bw_cap: int = 10000, # 10000 Mbps = 10 Gbps 84 | outer_DC_bw_cap: int = 100000, # 100000 Mbps = 100 Gbps 85 | n_EDCs_per_CDC: int = 3, 86 | ): 87 | """ Initialize the PSN as in the HA-DRL paper 88 | 89 | :param path: path where to save the file defining the PSN 90 | :param n_CCPs: number of CCPs 91 | :param n_CDCs: number of CDCs 92 | :param n_EDCs: number of EDCs 93 | :param n_servers_per_DC: tuple with the number of servers per (CCP, CDC, EDC) 94 | :param cpu_cap: CPU capacity per server 95 | :param ram_cap: RAM capacity per server 96 | :param intra_CCP_bw_cap: bandwidth of links within a CCP 97 | :param intra_CDC_bw_cap: bandwidth of links within a CDC 98 | :param intra_EDC_bw_cap: bandwidth of links within a EDC 99 | :param outer_DC_bw_cap: bandwidth of links between DCs 100 | :param n_EDCs_per_CDC: number of EDCs connected to each CDC 101 | """ 102 | # number of servers per DC category 103 | n_servers_per_CCP, n_servers_per_CDC, n_servers_per_EDC = n_servers_per_DC 104 | n_ids_CCPs = n_CCPs * n_servers_per_CCP 105 | n_ids_CDCs = n_CDCs * n_servers_per_CDC 106 | n_ids_EDCs = n_EDCs * n_servers_per_EDC 107 | 108 | # ids of servers in various DCs 109 | CCP_ids = np.arange(n_ids_CCPs).reshape(n_CCPs, n_servers_per_CCP) 110 | CDC_ids = np.arange( 111 | n_ids_CCPs, 112 | n_ids_CCPs + n_ids_CDCs).reshape(n_CDCs, n_servers_per_CDC) 113 | EDC_ids = np.arange( 114 | CDC_ids[-1, -1] + 1, 115 | CDC_ids[-1, -1] + 1 + n_ids_EDCs).reshape(n_EDCs, n_servers_per_EDC) 116 | 117 | # one switch per DC (based on Fig. 1 in HA-DRL paper) 118 | n_switches = n_CCPs + n_CDCs + n_EDCs 119 | switches_ids = list(range(EDC_ids[-1, -1] + 1, 120 | EDC_ids[-1, -1] + 1 + n_switches)) 121 | 122 | # one router per DC (based on Fig. 1 in HA-DRL paper) 123 | n_routers = n_CCPs + n_CDCs + n_EDCs 124 | routers_ids = list(range(switches_ids[-1] + 1, switches_ids[-1] + 1 + n_routers)) 125 | 126 | # create graph 127 | g = nx.Graph(Label="HA-DRL PSN") 128 | 129 | # add nodes 130 | _create_nodes(g, CCP_ids, CDC_ids, EDC_ids, switches_ids, routers_ids, 131 | cpu_cap, ram_cap) 132 | 133 | # add links 134 | _create_HADRL_links( 135 | g, n_CCPs, n_CDCs, n_EDCs, n_servers_per_CCP, n_servers_per_CDC, 136 | n_servers_per_EDC, CCP_ids, CDC_ids, EDC_ids, switches_ids, routers_ids, 137 | intra_CCP_bw_cap, intra_CDC_bw_cap, intra_EDC_bw_cap, outer_DC_bw_cap, 138 | n_EDCs_per_CDC) 139 | 140 | # save graph 141 | nx.write_graphml(g, path) 142 | 143 | 144 | def create_HEENSO_PSN_file( 145 | path: str, 146 | n_CCPs: int = 1, 147 | n_CDCs: int = 5, 148 | n_EDCs: int = 15, 149 | n_servers_per_DC: Tuple[int, int, int] = (16, 10, 4), 150 | cpu_cap: int = 50, 151 | ram_cap: int = 300, 152 | intra_CCP_bw_cap: int = 100000, # 100000 Mbps = 100 Gbps 153 | intra_CDC_bw_cap: int = 100000, # 100000 Mbps = 100 Gbps 154 | intra_EDC_bw_cap: int = 10000, # 10000 Mbps = 10 Gbps 155 | outer_DC_bw_cap: int = 100000, # 100000 Mbps = 100 Gbps 156 | n_EDCs_per_CDC: int = 3, 157 | ): 158 | """ Initialize the PSN as in the paper "Heuristic for Edge-enable Network Slice Optimization 159 | using the Power of Two Choices" 160 | 161 | Disclaimer: the topology is slightly different, the ring of nodes in Fig. 4 162 | of the paper is brought one step closer to the CCP and nodes 26 to 30 are 163 | removed, since they don't increase the number of possible paths across the PSN 164 | (they would only make some paths 1 step longer, reducing the reward). 165 | 166 | :param path: path where to save the file defining the PSN 167 | :param n_CCPs: number of CCPs 168 | :param n_CDCs: number of CDCs 169 | :param n_EDCs: number of EDCs 170 | :param n_servers_per_DC: tuple with the number of servers per (CCP, CDC, EDC) 171 | :param cpu_cap: CPU capacity per server 172 | :param ram_cap: RAM capacity per server 173 | :param intra_CCP_bw_cap: bandwidth of links within a CCP 174 | :param intra_CDC_bw_cap: bandwidth of links within a CDC 175 | :param intra_EDC_bw_cap: bandwidth of links within a EDC 176 | :param outer_DC_bw_cap: bandwidth of links between DCs 177 | :param n_EDCs_per_CDC: number of EDCs connected to each CDC 178 | """ 179 | # number of servers per DC category 180 | n_servers_per_CCP, n_servers_per_CDC, n_servers_per_EDC = n_servers_per_DC 181 | n_ids_CCPs = n_CCPs * n_servers_per_CCP 182 | n_ids_CDCs = n_CDCs * n_servers_per_CDC 183 | n_ids_EDCs = n_EDCs * n_servers_per_EDC 184 | 185 | # ids of servers in various DCs 186 | CCP_ids = np.arange(n_ids_CCPs).reshape(n_CCPs, n_servers_per_CCP) 187 | CDC_ids = np.arange( 188 | n_ids_CCPs, 189 | n_ids_CCPs + n_ids_CDCs).reshape(n_CDCs, n_servers_per_CDC) 190 | EDC_ids = np.arange( 191 | CDC_ids[-1, -1] + 1, 192 | CDC_ids[-1, -1] + 1 + n_ids_EDCs).reshape(n_EDCs, n_servers_per_EDC) 193 | 194 | # one switch per DC (based on Fig. 4 in HEENSO paper) 195 | n_switches = n_CCPs + n_CDCs + n_EDCs 196 | switches_ids = list(range(EDC_ids[-1, -1] + 1, 197 | EDC_ids[-1, -1] + 1 + n_switches)) 198 | 199 | # one router per DC (based on Fig. 4 in HEENSO paper) 200 | n_routers = n_CDCs + n_EDCs 201 | routers_ids = list( 202 | range(switches_ids[-1] + 1, switches_ids[-1] + 1 + n_routers)) 203 | 204 | # create graph 205 | g = nx.Graph(Label="HEENSO PSN") 206 | 207 | # add nodes 208 | _create_nodes(g, CCP_ids, CDC_ids, EDC_ids, switches_ids, routers_ids, 209 | cpu_cap, ram_cap) 210 | 211 | # add links 212 | _create_HEENSO_links( 213 | g, n_CCPs, n_CDCs, n_EDCs, n_servers_per_CCP, n_servers_per_CDC, 214 | n_servers_per_EDC, CCP_ids, CDC_ids, EDC_ids, switches_ids, routers_ids, 215 | intra_CCP_bw_cap, intra_CDC_bw_cap, intra_EDC_bw_cap, outer_DC_bw_cap, 216 | n_EDCs_per_CDC) 217 | 218 | # save graph 219 | nx.write_graphml(g, path) 220 | 221 | 222 | def _create_nodes( 223 | g: nx.Graph, 224 | CCP_ids: Union[np.ndarray, List[int]], 225 | CDC_ids: Union[np.ndarray, List[int]], 226 | EDC_ids: Union[np.ndarray, List[int]], 227 | switches_ids: Union[np.ndarray, List[int]], 228 | routers_ids: Union[np.ndarray, List[int]], 229 | cpu_cap: int, 230 | ram_cap: int, 231 | ): 232 | all_server_ids = np.concatenate((CCP_ids.flatten(), 233 | CDC_ids.flatten(), 234 | EDC_ids.flatten())) 235 | for server_id in all_server_ids: 236 | g.add_node(server_id, NodeType="server", CPUcap=cpu_cap, RAMcap=ram_cap) 237 | for switch_id in switches_ids: 238 | g.add_node(switch_id, NodeType="switch") 239 | for router_id in routers_ids: 240 | g.add_node(router_id, NodeType="router") 241 | 242 | 243 | def _create_HADRL_links( 244 | g: nx.Graph, 245 | n_CCPs: int, 246 | n_CDCs: int, 247 | n_EDCs: int, 248 | n_servers_per_CCP: int, 249 | n_servers_per_CDC: int, 250 | n_servers_per_EDC: int, 251 | CCP_ids: Union[np.ndarray, List[int]], 252 | CDC_ids: Union[np.ndarray, List[int]], 253 | EDC_ids: Union[np.ndarray, List[int]], 254 | switches_ids: Union[np.ndarray, List[int]], 255 | routers_ids: Union[np.ndarray, List[int]], 256 | intra_CCP_bw_cap: int, 257 | intra_CDC_bw_cap: int, 258 | intra_EDC_bw_cap: int, 259 | outer_DC_bw_cap: int, 260 | n_EDCs_per_CDC: int 261 | ): 262 | connect_CDCs_EDCs_randomly = False if n_EDCs / n_CDCs == n_EDCs_per_CDC else True 263 | CCPs_switches = switches_ids[:n_CCPs] 264 | CDCs_switches = switches_ids[n_CCPs:n_CCPs + n_CDCs] 265 | EDCs_switches = switches_ids[n_CCPs + n_CDCs:] 266 | CCPs_routers = routers_ids[:n_CCPs] 267 | CDCs_routers = routers_ids[n_CCPs:n_CCPs + n_CDCs] 268 | EDCs_routers = routers_ids[n_CCPs + n_CDCs:] 269 | 270 | # connect CCPs' servers to their switches 271 | for i in range(n_CCPs): 272 | for j in range(n_servers_per_CCP): 273 | g.add_edge(CCP_ids[i, j], CCPs_switches[i], BWcap=intra_CCP_bw_cap) 274 | 275 | # connect CDCs' servers to their switches 276 | for i in range(n_CDCs): 277 | for j in range(n_servers_per_CDC): 278 | g.add_edge(CDC_ids[i, j], CDCs_switches[i], BWcap=intra_CDC_bw_cap) 279 | 280 | # connect EDCs' servers to their switches 281 | for i in range(n_EDCs): 282 | for j in range(n_servers_per_EDC): 283 | g.add_edge(EDC_ids[i, j], EDCs_switches[i], BWcap=intra_EDC_bw_cap) 284 | 285 | # connect CCPs' switches to their routers 286 | for i in range(len(CCPs_switches)): 287 | g.add_edge(CCPs_switches[i], CCPs_routers[i], BWcap=intra_CCP_bw_cap) 288 | 289 | # connect CDCs' switches to their routers 290 | for i in range(len(CDCs_switches)): 291 | g.add_edge(CDCs_switches[i], CDCs_routers[i], BWcap=intra_CDC_bw_cap) 292 | 293 | # connect EDCs' switches to their routers 294 | for i in range(len(EDCs_switches)): 295 | g.add_edge(EDCs_switches[i], EDCs_routers[i], BWcap=intra_EDC_bw_cap) 296 | 297 | # connect CDCs' routers to CCPs' routers 298 | for i in range(n_CDCs): 299 | # each CDC is connected to one CCP 300 | corresp_CCP = np.random.randint(0, n_CCPs) 301 | g.add_edge(CDCs_routers[i], CCPs_routers[corresp_CCP], BWcap=outer_DC_bw_cap) 302 | 303 | # connect each CDCs' router to n EDCs' routers 304 | for i in range(n_CDCs): 305 | if connect_CDCs_EDCs_randomly: 306 | corresp_EDCs = np.random.choice(n_EDCs, n_EDCs_per_CDC, replace=False) 307 | else: 308 | corresp_EDCs = list(range(n_EDCs_per_CDC * i, n_EDCs * i + n_EDCs_per_CDC)) 309 | for j in range(n_EDCs_per_CDC): 310 | g.add_edge(CDCs_routers[i], EDCs_routers[corresp_EDCs[j]], 311 | BWcap=outer_DC_bw_cap) 312 | 313 | # connect CDCs and EDCs' routers in a circular way (like in Fig. 1 in HA-DRL paper) 314 | CDCs_and_EDCs_routers = np.concatenate((CDCs_routers, EDCs_routers)) 315 | for i in range(len(CDCs_and_EDCs_routers)): 316 | g.add_edge(CDCs_and_EDCs_routers[i], 317 | CDCs_and_EDCs_routers[(i + 1) % len(CDCs_and_EDCs_routers)], 318 | BWcap=outer_DC_bw_cap) 319 | 320 | 321 | def _create_HEENSO_links( 322 | g: nx.Graph, 323 | n_CCPs: int, 324 | n_CDCs: int, 325 | n_EDCs: int, 326 | n_servers_per_CCP: int, 327 | n_servers_per_CDC: int, 328 | n_servers_per_EDC: int, 329 | CCP_ids: Union[np.ndarray, List[int]], 330 | CDC_ids: Union[np.ndarray, List[int]], 331 | EDC_ids: Union[np.ndarray, List[int]], 332 | switches_ids: Union[np.ndarray, List[int]], 333 | routers_ids: Union[np.ndarray, List[int]], 334 | intra_CCP_bw_cap: int, 335 | intra_CDC_bw_cap: int, 336 | intra_EDC_bw_cap: int, 337 | outer_DC_bw_cap: int, 338 | n_EDCs_per_CDC: int 339 | ): 340 | connect_CDCs_EDCs_randomly = False if n_EDCs / n_CDCs == n_EDCs_per_CDC else True 341 | CCPs_switches = switches_ids[:n_CCPs] 342 | CDCs_switches = switches_ids[n_CCPs:n_CCPs + n_CDCs] 343 | EDCs_switches = switches_ids[n_CCPs + n_CDCs:] 344 | CDCs_routers = routers_ids[:n_CDCs] 345 | EDCs_routers = routers_ids[n_CDCs:] 346 | 347 | # connect CCPs' servers to their switches 348 | for i in range(n_CCPs): 349 | for j in range(n_servers_per_CCP): 350 | g.add_edge(CCP_ids[i, j], CCPs_switches[i], BWcap=intra_CCP_bw_cap) 351 | 352 | # connect CDCs' servers to their switches 353 | for i in range(n_CDCs): 354 | for j in range(n_servers_per_CDC): 355 | g.add_edge(CDC_ids[i, j], CDCs_switches[i], BWcap=intra_CDC_bw_cap) 356 | 357 | # connect EDCs' servers to their switches 358 | for i in range(n_EDCs): 359 | for j in range(n_servers_per_EDC): 360 | g.add_edge(EDC_ids[i, j], EDCs_switches[i], BWcap=intra_EDC_bw_cap) 361 | 362 | # connect CDCs' switches to their routers 363 | for i in range(len(CDCs_switches)): 364 | g.add_edge(CDCs_switches[i], CDCs_routers[i], BWcap=intra_CDC_bw_cap) 365 | 366 | # connect EDCs' switches to their routers 367 | for i in range(len(EDCs_switches)): 368 | g.add_edge(EDCs_switches[i], EDCs_routers[i], BWcap=intra_EDC_bw_cap) 369 | 370 | # connect CDCs' routers to CCPs' switches 371 | for i in range(n_CDCs): 372 | # each CDC is connected to one CCP 373 | corresp_CCP = np.random.randint(0, n_CCPs) 374 | g.add_edge(CDCs_routers[i], CCPs_switches[corresp_CCP], BWcap=outer_DC_bw_cap) 375 | 376 | # connect each CDCs' switch to n EDCs' routers 377 | for i in range(n_CDCs): 378 | if connect_CDCs_EDCs_randomly: 379 | corresp_EDCs = np.random.choice(n_EDCs, n_EDCs_per_CDC, replace=False) 380 | else: 381 | corresp_EDCs = list(range(n_EDCs_per_CDC * i, n_EDCs_per_CDC * i + n_EDCs_per_CDC)) 382 | for j in range(n_EDCs_per_CDC): 383 | g.add_edge(CDCs_switches[i], EDCs_routers[corresp_EDCs[j]], 384 | BWcap=outer_DC_bw_cap) 385 | 386 | # connect CDCs routers in a circular way (like in Fig. 4 in HEENSO paper) 387 | for i in range(len(CDCs_routers)): 388 | g.add_edge(CDCs_routers[i], 389 | CDCs_routers[(i + 1) % len(CDCs_routers)], 390 | BWcap=outer_DC_bw_cap) 391 | -------------------------------------------------------------------------------- /src/wrappers/__init__.py: -------------------------------------------------------------------------------- 1 | from .reset_with_load import ResetWithFixedLoad, ResetWithRandLoad, ResetWithLoadMixed, ResetWithRealisticLoad 2 | from .hadrl_nsprs_generator import NSPRsGeneratorHADRL 3 | from .no_placement_state import RemovePlacementState 4 | from .dynamic_connectivity import DynamicConnectivity 5 | -------------------------------------------------------------------------------- /src/wrappers/dynamic_connectivity.py: -------------------------------------------------------------------------------- 1 | import random 2 | from typing import Callable, Optional 3 | import gym 4 | import networkx as nx 5 | import numpy as np 6 | 7 | 8 | class DynamicConnectivity(gym.Wrapper): 9 | """ Changes the connectivity of the PSN episode by episode """ 10 | 11 | def __init__( 12 | self, 13 | env: gym.Env, 14 | link_bw: int = 10_000, 15 | nodes_mask: Optional[Callable[[gym.Env], np.ndarray]] = None 16 | ): 17 | """ 18 | :param env: gym environment 19 | :param link_bw: total bandwidth capacity of each link 20 | :param nodes_mask: in not None, contains nodes to be removed form the PSN graph 21 | """ 22 | super().__init__(env) 23 | self.nodes_mask = nodes_mask 24 | self.link_bw = link_bw 25 | self.tot_bw_cap = sum([edge['BWcap'] for edge in self.env.psn.edges.values()]) 26 | self.placed_bw = 0 27 | 28 | def reset(self, **kwargs): 29 | self.env.reset(**kwargs) 30 | # remove all edges from the PSN 31 | self.remove_all_edges() 32 | # eventually remove masked nodes 33 | if self.nodes_mask is not None: 34 | self.remove_masked_nodes() 35 | # initialize the bandwidth placed in the PSN 36 | self.placed_bw = 0 37 | # add edges in the PSN until the target bandwidth capacity is reached 38 | self.add_edges() 39 | return self.env.obs_dict # updated in self.add_edges() 40 | 41 | def remove_all_edges(self): 42 | for u, v in self.env.psn.edges.keys(): 43 | self.env.psn.remove_edge(u, v) 44 | 45 | def remove_masked_nodes(self): 46 | nodes_mask = self.nodes_mask(self.env) 47 | # indexes where the mask is False 48 | indexes_to_remove = np.where(np.logical_not(nodes_mask))[0] 49 | for idx in indexes_to_remove: 50 | node_id = self.env.servers_map_idx_id[idx] 51 | self.env.psn.remove_node(node_id) 52 | 53 | def add_edges(self): 54 | """Add edges to the PSN 55 | 56 | Chooses every time a random node an an unvisited node and connectes them. 57 | When no nodes are isolated, if the target BW hasn't been reached, it does so 58 | by adding further random links in the PSN. 59 | """ 60 | # zero the BW availabilities in the obs dict 61 | self.env.obs_dict['bw_avails'] = np.zeros_like(self.env.obs_dict['bw_avails']) 62 | # set of unvisited nodes 63 | unvisited = set(self.env.psn.nodes) 64 | while unvisited: 65 | # sample a node form the PSN 66 | u = random.choice(list(self.env.psn.nodes)) 67 | # sample an unvisited nodes to connect to it 68 | v = random.choice(list(unvisited)) 69 | if u != v: 70 | # connect the 2 nodes 71 | self.env.psn.add_edge(u, v, BWcap=self.link_bw, availBW=self.link_bw) 72 | # save the amount of bandwidth introduced in the PSN 73 | self.placed_bw += self.link_bw 74 | # get the 2 nodes' indexes in the obs dict and update the obs dict 75 | u_idx = self.env.map_id_idx[u] 76 | v_idx = self.env.map_id_idx[v] 77 | self.env.obs_dict['bw_avails'][u_idx] += self.link_bw 78 | self.env.obs_dict['bw_avails'][v_idx] += self.link_bw 79 | # remove the nodes from the set of unvisited nodes 80 | unvisited.remove(v) 81 | if u in unvisited: 82 | unvisited.remove(u) 83 | 84 | # if the total bandwidth of the PSN hasn't been reached, reach it by adding random links 85 | perc_avail_nodes = self.env.perc_avail_nodes 86 | tot_bw = self.tot_bw_cap * perc_avail_nodes # cut tot bw proportionally to number of nodes 87 | while self.placed_bw < tot_bw: 88 | u, v = random.sample(self.env.psn.nodes, 2) 89 | # check that the 2 nodes aren't connected already 90 | if (u, v) not in self.env.psn.edges: 91 | bw = min(self.link_bw, tot_bw - self.placed_bw) 92 | self.env.psn.add_edge(u, v, BWcap=bw, availBW=bw) 93 | self.placed_bw += bw 94 | # get the 2 nodes' indexes in the obs dict and update the obs dict 95 | u_idx = self.env.map_id_idx[u] 96 | v_idx = self.env.map_id_idx[v] 97 | self.env.obs_dict['bw_avails'][u_idx] += self.link_bw 98 | self.env.obs_dict['bw_avails'][v_idx] += self.link_bw 99 | 100 | # normalize the BW availabilities in the obs dict 101 | self.env.obs_dict['bw_avails'] /= np.max(self.env.obs_dict['bw_avails']) -------------------------------------------------------------------------------- /src/wrappers/hadrl_nsprs_generator.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import math 3 | import warnings 4 | 5 | import gym 6 | import networkx as nx 7 | import numpy as np 8 | 9 | 10 | class NSPRsGeneratorHADRL(gym.Wrapper): 11 | """ 12 | Wrapper to make the simulator generate data the same way as in the 13 | paper HA-DRL[1]. 14 | 15 | [1] https://ieeexplore.ieee.org/document/9632824 16 | """ 17 | 18 | def __init__( 19 | self, 20 | env: gym.Env, 21 | nsprs_per_ep: int = 5, 22 | vnfs_per_nspr: int = 5, 23 | cpu_req_per_vnf: int = 25, 24 | ram_req_per_vnf: int = 150, 25 | bw_req_per_vl: int = 2000, 26 | load: float = 0.5, 27 | always_one: bool = False 28 | ): 29 | super().__init__(env) 30 | if self.env.nsprs_per_episode is not None: 31 | warnings.warn("The environment already has a fixed number of NSPRs" 32 | "per episode. The wrapper will override this value.") 33 | if nsprs_per_ep is None: 34 | # no limit, just use max steps (if not None), otherwise infinite episode 35 | nsprs_per_ep = math.inf 36 | self.unwrapped.nsprs_per_episode = nsprs_per_ep 37 | self.nsprs_per_ep = nsprs_per_ep 38 | self.vnfs_per_nspr = vnfs_per_nspr 39 | self.cpu_req_per_vnf = cpu_req_per_vnf 40 | self.ram_req_per_vnf = ram_req_per_vnf 41 | self.bw_req_per_vl = bw_req_per_vl 42 | self.load = load 43 | self.always_one = always_one 44 | self.tot_cpu_cap = self._get_tot_cpu_cap() 45 | self.nspr_model = self._get_nspr_model() 46 | self.max_steps = None 47 | try: 48 | # if env is wrapped in TimeLimit, max arrival time of NSPRs is max episode length 49 | self.max_steps = self.env._max_episode_steps 50 | self.nsprs_duration = min(self.max_steps, 100) 51 | except AttributeError or TypeError: 52 | self.nsprs_duration = 100 53 | # computed according to Sec. VII.C of HA-DRL paper 54 | self.arr_rate = self.load * self.tot_cpu_cap / self.nsprs_duration / self.cpu_req_per_vnf / self.vnfs_per_nspr 55 | 56 | def reset(self, **kwargs): 57 | self.env.reset(**kwargs) 58 | self.unwrapped.nsprs = self._generate_nsprs() 59 | self.unwrapped.waiting_nsprs += self.unwrapped.nsprs.get(self.unwrapped.time_step, []) 60 | self.unwrapped.pick_next_nspr() 61 | obs = self.unwrapped.update_nspr_state() 62 | return obs 63 | 64 | def _get_nspr_model(self): 65 | nspr_model = nx.DiGraph() 66 | nspr_model.add_node(0, reqCPU=self.cpu_req_per_vnf, 67 | reqRAM=self.ram_req_per_vnf, placed=-1) 68 | for i in range(1, self.vnfs_per_nspr): 69 | nspr_model.add_edge(i - 1, i, reqBW=self.bw_req_per_vl, placed=[]) 70 | nspr_model.add_node(i, reqCPU=self.cpu_req_per_vnf, 71 | reqRAM=self.ram_req_per_vnf, placed=-1) 72 | return nspr_model 73 | 74 | def _generate_nsprs(self): 75 | if self.always_one: 76 | nsprs_dict = self._generate_one_nspr() 77 | elif self.arr_rate >= 0.3: 78 | nsprs_dict = self._generate_nsprs_poisson() 79 | else: 80 | nsprs_dict = self._generate_nsprs_deterministic() 81 | return nsprs_dict 82 | 83 | def _generate_one_nspr(self): 84 | nspr = self._get_nspr_model() 85 | nspr.graph['ArrivalTime'] = self.env.time_step 86 | nspr.graph['duration'] = 100 87 | return {self.env.time_step: [nspr]} 88 | 89 | def _generate_nsprs_poisson(self): 90 | cur_arr_time = self.env.time_step 91 | created_nsprs = 0 92 | nsprs_dict = {} 93 | while True: 94 | # NOTE: if self.max_steps is None, and the poisson sampling keeps 95 | # generating 0, this will loop forever, but since this is executed 96 | # only for a sufficiently high arrival rate, this is extremely unlikely to happen 97 | poisson_sample = np.random.poisson(lam=self.arr_rate) 98 | if poisson_sample > 0: 99 | cur_nspr = copy.deepcopy(self.nspr_model) 100 | cur_nspr.graph['ArrivalTime'] = cur_arr_time 101 | cur_nspr.graph['duration'] = self.nsprs_duration 102 | nsprs_to_create = min(poisson_sample, self.nsprs_per_ep - created_nsprs) 103 | if nsprs_to_create <= 0: 104 | break 105 | nsprs_dict[cur_arr_time] = [copy.deepcopy(cur_nspr) for _ in range(nsprs_to_create)] 106 | created_nsprs += nsprs_to_create 107 | cur_arr_time += 1 108 | if self.max_steps is not None and cur_arr_time - self.env.time_step > self.max_steps: 109 | break 110 | return nsprs_dict 111 | 112 | def _generate_nsprs_deterministic(self): 113 | if self.arr_rate >= 1: 114 | raise NotImplementedError 115 | # this function is called only for low arrival rates 116 | else: 117 | one_every_how_many_steps = round(1 / self.arr_rate) 118 | # decimal_part = round(one_every_how_many_steps - int(one_every_how_many_steps), 2) 119 | # one_every_how_many_steps = int(one_every_how_many_steps) 120 | # correction_every_how_many_steps = round(1 / decimal_part) 121 | nsprs_dict = {} 122 | step = self.env.time_step 123 | # steps_without_correction = 0 124 | created_nsprs = 0 125 | while True: 126 | if step % one_every_how_many_steps == 0: 127 | cur_nspr = copy.deepcopy(self.nspr_model) 128 | cur_nspr.graph['ArrivalTime'] = step 129 | cur_nspr.graph['duration'] = self.nsprs_duration 130 | nsprs_dict[step] = [cur_nspr] 131 | created_nsprs += 1 132 | # if step % one_every_how_many_steps == 0 and \ 133 | # steps_without_correction == correction_every_how_many_steps: 134 | # nsprs_dict[step].append(copy.deepcopy(cur_nspr)) 135 | # created_nsprs += 1 136 | # if steps_without_correction == correction_every_how_many_steps: 137 | # steps_without_correction = 0 138 | step += 1 139 | # steps_without_correction += 1 140 | if created_nsprs >= self.nsprs_per_ep or \ 141 | (self.max_steps is not None and step - self.env.time_step > self.max_steps): 142 | break 143 | return nsprs_dict 144 | 145 | def _get_tot_cpu_cap(self): 146 | tot_cpu_cap = 0 147 | for node_id in self.env.psn.nodes: 148 | node = self.env.psn.nodes[node_id] 149 | if node['NodeType'] == 'server': 150 | tot_cpu_cap += node['CPUcap'] 151 | return tot_cpu_cap 152 | -------------------------------------------------------------------------------- /src/wrappers/no_placement_state.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import numpy as np 3 | 4 | from gym.spaces import Dict, Box 5 | 6 | 7 | class RemovePlacementState(gym.ObservationWrapper): 8 | def __init__(self, env): 9 | super().__init__(env) 10 | ONE_BILLION = 1_000_000_000 # constant for readability 11 | n_nodes = len(self.unwrapped.psn.nodes) 12 | self.observation_space = Dict({ 13 | # PSN STATE 14 | 'cpu_avails': Box(low=0., high=1., shape=(n_nodes,), dtype=np.float32), 15 | 'ram_avails': Box(low=0., high=1., shape=(n_nodes,), dtype=np.float32), 16 | # for each physical node, sum of the BW of the physical links connected to it 17 | 'bw_avails': Box(low=0., high=1., shape=(n_nodes,), dtype=np.float32), 18 | 19 | # NSPR STATE 20 | # note: apparently it's not possible to pass "math.inf" or "sys.maxsize" as a gym.spaces.Box's high value 21 | 'cur_vnf_cpu_req': Box(low=0, high=ONE_BILLION, shape=(1,), dtype=np.float32), 22 | 'cur_vnf_ram_req': Box(low=0, high=ONE_BILLION, shape=(1,), dtype=np.float32), 23 | # sum of the required BW of each VL connected to the current VNF 24 | 'cur_vnf_bw_req': Box(low=0, high=ONE_BILLION, shape=(1,), dtype=np.float32), 25 | 'vnfs_still_to_place': Box(low=0, high=ONE_BILLION, shape=(1,), dtype=int), 26 | }) 27 | 28 | def observation(self, obs): 29 | """returns the observation without the placement state """ 30 | new_obs = { 31 | 'cpu_avails': obs['cpu_avails'], 32 | 'ram_avails': obs['ram_avails'], 33 | 'bw_avails': obs['bw_avails'], 34 | 'cur_vnf_cpu_req': obs['cur_vnf_cpu_req'], 35 | 'cur_vnf_ram_req': obs['cur_vnf_ram_req'], 36 | 'cur_vnf_bw_req': obs['cur_vnf_bw_req'], 37 | 'vnfs_still_to_place': obs['vnfs_still_to_place'], 38 | } 39 | return new_obs -------------------------------------------------------------------------------- /src/wrappers/reset_with_load.py: -------------------------------------------------------------------------------- 1 | import math 2 | import random 3 | from abc import ABC, abstractmethod 4 | from typing import Union, Dict, Tuple 5 | 6 | import gym 7 | import networkx as nx 8 | import numpy as np 9 | from stable_baselines3.common.vec_env import VecEnv 10 | 11 | 12 | class ResetWithLoad(gym.Wrapper, ABC): 13 | """ Abstract class. Wrapper to reset the PSN with a certain tr_load """ 14 | 15 | def __init__(self, env: gym.Env, **kwargs): 16 | super().__init__(env) 17 | self.cpu_load = self.ram_load = self.bw_load = 0. 18 | 19 | def reset(self, **kwargs): 20 | raise NotImplementedError # doesn't work anymore, needs to be adapted 21 | self.env.reset(**kwargs) 22 | self._init_psn_load() 23 | obs = self.env.update_nspr_state() # the obs in the env.reset method is outdated 24 | return obs 25 | 26 | def _init_psn_load(self): 27 | """ Initialize the PSN's load with the specified values """ 28 | for _, node in self.env.psn.nodes.items(): 29 | if node['NodeType'] == "server": 30 | node['availCPU'] = int(node['CPUcap'] * (1 - self.cpu_load)) 31 | node['availRAM'] = int(node['RAMcap'] * (1 - self.ram_load)) 32 | for _, link in self.env.psn.edges.items(): 33 | link['availBW'] = int(link['BWcap'] * (1 - self.bw_load)) 34 | 35 | 36 | class ResetWithFixedLoad(ResetWithLoad): 37 | """ Reset the PSN with a certain - fixed - amount of tr_load """ 38 | 39 | def __init__(self, env: gym.Env, reset_load_perc: Union[float, dict] = 0., 40 | **kwargs): 41 | """ Constructor 42 | 43 | :param env: :param env: the environment to wrap 44 | :param reset_load_perc: init percentage of tr_load of the PSN's resources at each reset: 45 | if float, that value applies to all the resources for all nodes and links; 46 | if dict, it can specify the tr_load for each type of resource. 47 | """ 48 | super().__init__(env) 49 | assert isinstance(reset_load_perc, (float, dict)) 50 | # define the tr_load percentages of each resource 51 | if isinstance(reset_load_perc, float): 52 | assert 0 <= reset_load_perc <= 1 53 | self.cpu_load = self.ram_load = self.bw_load = reset_load_perc 54 | else: 55 | self.cpu_load = reset_load_perc.get('availCPU', 0) 56 | self.ram_load = reset_load_perc.get('availRAM', 0) 57 | self.bw_load = reset_load_perc.get('availBW', 0) 58 | assert 0 <= self.cpu_load <= 1 and 0 <= self.ram_load <= 1 and 0 <= self.bw_load <= 1 59 | 60 | 61 | class ResetWithRandLoad(ResetWithLoad): 62 | """ Reset the PSN with a random uniform amount of load """ 63 | 64 | def __init__(self, env: gym.Env, min_perc: Union[float, dict], 65 | max_perc: Union[float, dict], same_for_all: bool = True, 66 | **kwargs): 67 | """ Constructor 68 | 69 | :param env: the environment to wrap 70 | :param min_perc: minimum percentage of tr_load of the PSN's resources at each reset 71 | :param max_perc: maximum percentage of tr_load of the PSN's resources at each reset 72 | :param same_for_all: if True, the same random value is used for all the nodes / links 73 | """ 74 | super().__init__(env) 75 | self.same_for_all = same_for_all 76 | 77 | # assert that both min_perc and max_perc are either floats or dicts 78 | assert (isinstance(min_perc, float) and isinstance(max_perc, float)) or \ 79 | (isinstance(min_perc, dict) and isinstance(max_perc, dict)) 80 | 81 | # save the min and max percentages of tr_load 82 | if isinstance(min_perc, float): 83 | assert 0 <= min_perc <= 1 and 0 <= max_perc <= 1 and min_perc <= max_perc 84 | self.min_cpu = self.min_ram = self.min_bw = min_perc 85 | self.max_cpu = self.max_ram = self.max_bw = max_perc 86 | else: 87 | self.min_cpu = min_perc.get('availCPU', 0) 88 | self.min_ram = min_perc.get('availRAM', 0) 89 | self.min_bw = min_perc.get('availBW', 0) 90 | self.max_cpu = max_perc.get('availCPU', 0) 91 | self.max_ram = max_perc.get('availRAM', 0) 92 | self.max_bw = max_perc.get('availBW', 0) 93 | assert 0 <= self.min_cpu <= 1 and 0 <= self.max_cpu <= 1 and self.min_cpu <= self.max_cpu 94 | assert 0 <= self.min_ram <= 1 and 0 <= self.max_ram <= 1 and self.min_ram <= self.max_ram 95 | assert 0 <= self.min_bw <= 1 and 0 <= self.max_bw <= 1 and self.min_bw <= self.max_bw 96 | 97 | def reset(self, **kwargs): 98 | if self.same_for_all: 99 | self.cpu_load = np.random.uniform(self.min_cpu, self.max_cpu, size=1).item() 100 | self.ram_load = np.random.uniform(self.min_ram, self.max_ram, size=1).item() 101 | self.bw_load = np.random.uniform(self.min_bw, self.max_bw, size=1).item() 102 | return super().reset(**kwargs) 103 | 104 | def _init_psn_load(self): 105 | if self.same_for_all: 106 | super()._init_psn_load() 107 | else: 108 | for _, node in self.env.psn.nodes.items(): 109 | if node['NodeType'] == "server": 110 | cpu_load = np.random.uniform(self.min_cpu, self.max_cpu, size=1).item() 111 | ram_load = np.random.uniform(self.min_ram, self.max_ram, size=1).item() 112 | node['availCPU'] = int(node['CPUcap'] * (1 - cpu_load)) 113 | node['availRAM'] = int(node['RAMcap'] * (1 - ram_load)) 114 | for _, link in self.env.psn.edges.items(): 115 | bw_load = np.random.uniform(self.min_bw, self.max_bw, size=1).item() 116 | link['availBW'] = int(link['BWcap'] * (1 - bw_load)) 117 | 118 | 119 | class ResetWithLoadMixed(gym.Wrapper): 120 | """ Wrapper to reset the PSN with a certain load. 121 | The load is expressed in percentage and can be resource-specific or general 122 | (each resource reset with the same load). 123 | It selects a load percentage for each node/link such that the overall load of 124 | the PSN is the specified one. It means certain nodes will be free, others 125 | completely occupied and others will be partially occupied, so that the overall 126 | CPU/RAM capacity is the specified one. (Same thing for links with their bandwidth). 127 | """ 128 | def __init__( 129 | self, 130 | env: Union[gym.Env, VecEnv], 131 | load: Union[float, Dict[str, float]] = 0.5, 132 | rand_load: bool = False, 133 | rand_range: Tuple[float, float] = (0., 1.), 134 | **kwargs 135 | ): 136 | """ 137 | :param env: environment 138 | :param load: the target load of the PSN, it can be: 139 | float: single fixed value for all the resources; 140 | Dict[resource: load]: fixed value but specific for each resource (CPU, RAM, BW) 141 | :param rand_load: if True, at every 'reset' the PSN's load will be random (same value for all resources); 142 | note: if 'random' is true, 'load' will be ignored. 143 | :param rand_range: min and max (included) load values tu consider when 'random' is true 144 | """ 145 | super(ResetWithLoadMixed, self).__init__(env) 146 | self.random = rand_load 147 | self.tot_cpu_cap = self.tot_ram_cap = self.tot_bw_cap = None 148 | if not rand_load: 149 | assert isinstance(load, (float, dict)), "Param 'load' is of an incorrect type" 150 | if isinstance(load, float): 151 | assert 0. <= load <= 1. 152 | self.cpu_load = self.ram_load = self.bw_load = load 153 | elif isinstance(load, dict): 154 | self.cpu_load = load.get('cpu', 0) 155 | self.ram_load = load.get('ram', 0) 156 | self.bw_load = load.get('bw', 0) 157 | assert 0. <= self.cpu_load <= 1. and 0. <= self.ram_load <= 1. and \ 158 | 0. <= self.bw_load <= 1. 159 | else: 160 | assert len(rand_range) == 2 and 0. <= rand_range[0] <= 1. and \ 161 | 0. <= rand_range[1] <= 1. 162 | self.rand_vals = np.arange(min(rand_range), max(rand_range), 0.1) 163 | 164 | def reset(self, **kwargs): 165 | self.env.reset(**kwargs) 166 | self._init_psn_load() 167 | obs = self.env.update_nspr_state() # the obs in the env.reset method is outdated 168 | return obs 169 | 170 | def compute_link_weight(self, source, target, link): 171 | return 1 if link['availBW'] >= self.vl_req_bw else math.inf 172 | 173 | def _init_psn_load(self): 174 | """ Initialize the PSN's load """ 175 | if self.random: 176 | load = random.choice(self.rand_vals) 177 | self.cpu_load = self.ram_load = load 178 | self.bw_load = max(0.0, load - 0.4) 179 | 180 | # TODO: occhio che 'reset' qui viene chiamato da ogni env in VecEnv singolarmente... 181 | # TODO: quindi, qui, self.env non è VecEnv, ma solo NetworkSimulator 182 | psns = self.env.get_attr('psn') if isinstance(self.env, VecEnv) else [self.env.psn] 183 | max_cpus = self.env.get_attr('max_cpu') if isinstance(self.env, VecEnv) else [self.env.max_cpu] 184 | max_rams = self.env.get_attr('max_ram') if isinstance(self.env, VecEnv) else [self.env.max_ram] 185 | max_bws = self.env.get_attr('max_bw') if isinstance(self.env, VecEnv) else [self.env.max_bw] 186 | obs_dicts = self.env.get_attr('obs_dict') if isinstance(self.env, VecEnv) else [self.env.obs_dict] 187 | maps_id_idx = self.env.get_attr('map_id_idx') if isinstance(self.env, VecEnv) else [self.env.map_id_idx] 188 | 189 | # NOTE: only works if all the envs in the VecEnv use the same PSN 190 | if self.tot_cpu_cap is None or self.tot_ram_cap is None or self.tot_bw_cap is None: 191 | self.tot_cpu_cap = self.env.tot_cpu_cap 192 | self.tot_ram_cap = self.env.tot_ram_cap 193 | self.tot_bw_cap = self.env.tot_bw_cap 194 | 195 | self.vl_req_bw = 2000 196 | for i, psn in enumerate(psns): 197 | max_cpu, max_ram, max_bw = max_cpus[i], max_rams[i], max_bws[i] 198 | obs_dict, map_id_idx = obs_dicts[i], maps_id_idx[i] 199 | tot_cpu_to_remove = self.cpu_load * self.tot_cpu_cap / max_cpu 200 | tot_ram_to_remove = self.ram_load * self.tot_ram_cap / max_ram 201 | tot_bw_to_remove = self.bw_load * self.tot_bw_cap / max_bw 202 | # iterate over nodes in a random order and reduce the CPU/RAM availabilities 203 | nodes = list(psn.nodes.items()) 204 | while tot_cpu_to_remove > 0 or tot_ram_to_remove > 0: 205 | node_id, node = random.sample(nodes, 1)[0] 206 | if node['NodeType'] == 'server': 207 | idx = map_id_idx[node_id] 208 | # TODO: consider to extend as [0.25, 0.5, 0.75, 1.] 209 | perc_to_remove = random.choice([0.5]) 210 | # CPU to remove 211 | # x% of the node capacity (normalized) 212 | cur_cpu_to_remove = perc_to_remove * node['CPUcap'] / max_cpu 213 | cur_cpu_to_remove = min([round(cur_cpu_to_remove, 3), 214 | tot_cpu_to_remove, 215 | obs_dict['cpu_avails'][idx]]) 216 | # RAM to remove 217 | cur_ram_to_remove = perc_to_remove * node['RAMcap'] / max_ram 218 | cur_ram_to_remove = min([round(cur_ram_to_remove, 3), 219 | tot_ram_to_remove, 220 | obs_dict['ram_avails'][idx]]) 221 | # remove resources 222 | obs_dict['cpu_avails'][idx] -= cur_cpu_to_remove 223 | obs_dict['ram_avails'][idx] -= cur_ram_to_remove 224 | tot_cpu_to_remove -= cur_cpu_to_remove 225 | tot_ram_to_remove -= cur_ram_to_remove 226 | 227 | # iterate over links in random order and reduce the BW availability 228 | links = list(psn.edges.items()) 229 | while tot_bw_to_remove > 0: 230 | extremes, link = random.sample(links, 1)[0] 231 | # TODO: consider to extend as [0.25, 0.5, 0.75, 1.] 232 | perc_to_remove = random.choice([0.5]) 233 | # cur_bw_to_remove = np.random.randint(0, link['availBW'] + 1, 1)[0] 234 | cur_bw_to_remove = perc_to_remove * link['BWcap'] 235 | # cur_bw_to_remove = min(cur_bw_to_remove, tot_bw_to_remove * max_bw) 236 | idx_0, idx_1 = map_id_idx[extremes[0]], map_id_idx[extremes[1]] 237 | cur_bw_to_remove = min([round(cur_bw_to_remove, 6), 238 | tot_bw_to_remove * max_bw, 239 | link['availBW']]) 240 | cur_bw_to_remove_normal = cur_bw_to_remove / max_bw 241 | # links' BW actually reduced because needed for shortest path calculation 242 | link['availBW'] -= cur_bw_to_remove 243 | obs_dict['bw_avails'][idx_0] -= cur_bw_to_remove_normal 244 | obs_dict['bw_avails'][idx_1] -= cur_bw_to_remove_normal 245 | tot_bw_to_remove -= cur_bw_to_remove_normal 246 | 247 | 248 | class ResetWithLoadBinary(ResetWithLoadMixed): 249 | """ Wrapper to reset the PSN with a certain load. 250 | The load is expressed in percentage and can be resource-specific or general 251 | (each resource reset with the same load). 252 | It put a certain amount of nodes with zero available resources, so that 253 | the overall load of the PSN is the one specified. 254 | 255 | Note: only the CPU and RAM are modified, not the bandwidth 256 | """ 257 | 258 | def __init__( 259 | self, 260 | env: Union[gym.Env, VecEnv], 261 | load: Union[float, Dict[str, float]] = 0.5, 262 | rand_load: bool = False, 263 | rand_range: Tuple[float, float] = (0., 1.), 264 | **kwargs 265 | ): 266 | """ 267 | :param env: environment 268 | :param load: the target load of the PSN, it can be: 269 | float: single fixed value for all the resources; 270 | Dict[resource: load]: fixed value but specific for each resource (CPU, RAM, BW) 271 | :param rand_load: if True, at every 'reset' the PSN's load will be random (same value for all resources); 272 | note: if 'random' is true, 'load' will be ignored. 273 | :param rand_range: min and max (included) load values tu consider when 'random' is true 274 | """ 275 | super().__init__(env, load, rand_load, rand_range) 276 | 277 | def _init_psn_load(self): 278 | """ Initialize the PSN's load """ 279 | if self.random: 280 | load = random.choice(self.rand_vals) 281 | self.cpu_load = self.ram_load = self.bw_load = load 282 | 283 | psns = self.env.get_attr('psn') if isinstance(self.env, VecEnv) else [self.env.psn] 284 | max_cpus = self.env.get_attr('max_cpu') if isinstance(self.env, VecEnv) else [self.env.max_cpu] 285 | max_rams = self.env.get_attr('max_ram') if isinstance(self.env, VecEnv) else [self.env.max_ram] 286 | max_bws = self.env.get_attr('max_bw') if isinstance(self.env, VecEnv) else [self.env.max_bw] 287 | obs_dicts = self.env.get_attr('obs_dict') if isinstance(self.env, VecEnv) else [self.env.obs_dict] 288 | maps_id_idx = self.env.get_attr('map_id_idx') if isinstance(self.env, VecEnv) else [self.env.map_id_idx] 289 | 290 | if self.tot_cpu_cap is None or self.tot_ram_cap is None or self.tot_bw_cap is None: 291 | self.tot_cpu_cap = self.env.tot_cpu_cap 292 | self.tot_ram_cap = self.env.tot_ram_cap 293 | self.tot_bw_cap = self.env.tot_bw_cap 294 | 295 | for i, psn in enumerate(psns): 296 | max_cpu, max_ram, max_bw = max_cpus[i], max_rams[i], max_bws[i] 297 | obs_dict, map_id_idx = obs_dicts[i], maps_id_idx[i] 298 | tot_cpu_to_remove = self.cpu_load * self.tot_cpu_cap / max_cpu 299 | tot_ram_to_remove = self.ram_load * self.tot_ram_cap / max_ram 300 | tot_bw_to_remove = self.bw_load * self.tot_bw_cap / max_bw 301 | # iterate over nodes in a random order and reduce the CPU/RAM availabilities 302 | nodes = list(psn.nodes.items()) 303 | while tot_cpu_to_remove > 0 or tot_ram_to_remove > 0: 304 | node_id, node = random.sample(nodes, 1)[0] 305 | if node['NodeType'] == 'server': 306 | idx = map_id_idx[node_id] 307 | cur_removed_cpu = obs_dict['cpu_avails'][idx] 308 | obs_dict['cpu_avails'][idx] = 0. 309 | obs_dict['ram_avails'][idx] = 0. 310 | tot_cpu_to_remove -= cur_removed_cpu 311 | tot_ram_to_remove -= cur_removed_cpu 312 | 313 | 314 | class ResetWithRealisticLoad(gym.Wrapper): 315 | """ Wrapper that resets the PSN with a certain amount of load already. 316 | It does so in a way that resembles a how the state of the PSN might be in 317 | case an agent has been actually placing NSPRs. 318 | 319 | It samples NSPRs from the ones that should arrive during the current episode 320 | and place their VNFs in random nodes and connects them via shortest path. 321 | This way the CPU/RAM and even the BW allocation should be realistic. 322 | """ 323 | 324 | def __init__(self, env: gym.Env, cpu_load: float, **kwargs): 325 | """ 326 | :param env: environment 327 | :param cpu_load: target percentage of CPU load of the PSN 328 | """ 329 | super().__init__(env) 330 | assert 0. <= cpu_load <= 1. 331 | self.cpu_load = cpu_load 332 | 333 | def reset(self, **kwargs): 334 | self.env.reset(**kwargs) 335 | self.init_psn_load() 336 | obs = self.env.update_nspr_state() # the obs in the env.reset method is outdated 337 | return obs 338 | 339 | def init_psn_load(self): 340 | """ Initialize the PSN with the target load """ 341 | cpu_to_remove_normal = self.env.tot_cpu_cap * self.cpu_load / self.env.max_cpu 342 | removed_cpu_normal = 0 343 | while removed_cpu_normal < cpu_to_remove_normal: 344 | nspr = self.sample_nspr() 345 | placement_map = {} 346 | # place all VNFs 347 | for vnf_id, vnf in nspr.nodes.items(): 348 | node_id, node_idx = self.sample_suitable_node(vnf) 349 | placement_map[vnf_id] = node_id 350 | self.env.obs_dict['cpu_avails'][node_idx] -= vnf['reqCPU'] / self.env.max_cpu 351 | self.env.obs_dict['ram_avails'][node_idx] -= vnf['reqRAM'] / self.env.max_ram 352 | removed_cpu_normal += vnf['reqCPU'] / self.env.max_cpu 353 | if removed_cpu_normal >= cpu_to_remove_normal: 354 | break 355 | # place all VLs 356 | for (src_vnf_id, dst_vnf_id), vl in nspr.edges.items(): 357 | self.req_bw = vl['reqBW'] 358 | try: 359 | src_node_id = placement_map[src_vnf_id] 360 | dst_node_id = placement_map[dst_vnf_id] 361 | except KeyError: 362 | # it means either src_vnf_id, dst_vnf_id or both hasn't been placed -> skip link placement 363 | continue 364 | try: 365 | path = nx.shortest_path(G=self.env.psn, source=src_node_id, 366 | target=dst_node_id, weight=self.compute_links_weights, 367 | method='dijkstra') 368 | for i in range(len(path) - 1): 369 | # if this VL exceeds the bandwidth available, don't place it, it's okù 370 | # it can happen when there is no available path 371 | if self.env.psn.edges[path[i], path[i+1]]['availBW'] - vl['reqBW'] < 0: 372 | continue 373 | self.env.psn.edges[path[i], path[i+1]]['availBW'] -= vl['reqBW'] 374 | idx1 = self.env.map_id_idx[path[i]] 375 | idx2 = self.env.map_id_idx[path[i+1]] 376 | self.env.obs_dict['bw_avails'][idx1] -= vl['reqBW'] / self.env.max_bw 377 | self.env.obs_dict['bw_avails'][idx2] -= vl['reqBW'] / self.env.max_bw 378 | except nx.NetworkXNoPath: 379 | pass 380 | 381 | def compute_links_weights(self, source, target, link): 382 | """ Method called automatically by nx.shortest_path() """ 383 | return 1 if link['availBW'] >= self.req_bw else math.inf 384 | 385 | def sample_suitable_node(self, vnf: dict): 386 | """ Sample a random node with enough resources to host the VNF """ 387 | server_idx = random.choice(list(self.env.servers_map_idx_id.keys())) 388 | server_id = self.env.servers_map_idx_id[server_idx] 389 | while not self.env.enough_avail_resources(server_id, vnf): 390 | server_idx = random.choice(list(self.env.servers_map_idx_id.keys())) 391 | server_id = self.env.servers_map_idx_id[server_idx] 392 | return server_id, server_idx 393 | 394 | def sample_nspr(self): 395 | """ Sample a NSPR among the ones that will arrive in this episode """ 396 | arr_time = random.choice(list(self.env.nsprs.keys())) 397 | idx = np.random.choice(len(self.env.nsprs[arr_time])) 398 | nspr = self.env.nsprs[arr_time][idx] 399 | return nspr 400 | --------------------------------------------------------------------------------