├── .gitignore
├── NSPRs
├── box.graphml
├── dummy_NSPR_1.graphml
└── dummy_NSPR_2.graphml
├── PSNs
├── hadrl_1-16_5-10_15-4.graphml
├── hadrl_psn.graphml
├── hadrl_psn_1-10_1-6_1-4.graphml
├── heenso_1-16_5-10_15-4.graphml
├── new_hadrl_1-16_5-10_15-4.graphml
├── simple_hadrl_psn.graphml
├── waxman_100_servers.graphml
├── waxman_20_servers.graphml
└── waxman_50_servers.graphml
├── README.md
├── requirements.txt
└── src
├── callbacks
├── __init__.py
├── acceptance_ratio_callbacks.py
├── hparam_callback.py
├── psn_load_callback.py
└── seen_nsprs_callback.py
├── demo.py
├── eval_script.py
├── heuristic_layers.py
├── network_simulator.py
├── policies
├── __init__.py
├── features_extractors
│ ├── __init__.py
│ └── hadrl_features_extractor.py
├── hadrl_policy.py
└── mlp_extractors
│ ├── __init__.py
│ └── hadrl_mlp_extractor.py
├── reader.py
├── spaces
├── __init__.py
└── discrete_with_negatives.py
├── trainer.py
├── utils.py
└── wrappers
├── __init__.py
├── dynamic_connectivity.py
├── hadrl_nsprs_generator.py
├── no_placement_state.py
└── reset_with_load.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Intellij stuff
10 | .idea/
11 |
12 | # Distribution / packaging
13 | .Python
14 | build/
15 | develop-eggs/
16 | dist/
17 | downloads/
18 | eggs/
19 | .eggs/
20 | lib/
21 | lib64/
22 | parts/
23 | sdist/
24 | var/
25 | wheels/
26 | pip-wheel-metadata/
27 | share/python-wheels/
28 | *.egg-info/
29 | .installed.cfg
30 | *.egg
31 | MANIFEST
32 |
33 | # PyInstaller
34 | # Usually these files are written by a python script from a template
35 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
36 | *.manifest
37 | *.spec
38 |
39 | # Installer logs
40 | pip-log.txt
41 | pip-delete-this-directory.txt
42 |
43 | # Unit test / coverage reports
44 | htmlcov/
45 | .tox/
46 | .nox/
47 | .coverage
48 | .coverage.*
49 | .cache
50 | nosetests.xml
51 | coverage.xml
52 | *.cover
53 | *.py,cover
54 | .hypothesis/
55 | .pytest_cache/
56 |
57 | # Translations
58 | *.mo
59 | *.pot
60 |
61 | # Django stuff:
62 | *.log
63 | local_settings.py
64 | db.sqlite3
65 | db.sqlite3-journal
66 |
67 | # Flask stuff:
68 | instance/
69 | .webassets-cache
70 |
71 | # Scrapy stuff:
72 | .scrapy
73 |
74 | # Sphinx documentation
75 | docs/_build/
76 |
77 | # PyBuilder
78 | target/
79 |
80 | # Jupyter Notebook
81 | .ipynb_checkpoints
82 |
83 | # IPython
84 | profile_default/
85 | ipython_config.py
86 |
87 | # pyenv
88 | .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
98 | __pypackages__/
99 |
100 | # Celery stuff
101 | celerybeat-schedule
102 | celerybeat.pid
103 |
104 | # SageMath parsed files
105 | *.sage.py
106 |
107 | # Environments
108 | .env
109 | .venv
110 | env/
111 | venv/
112 | ENV/
113 | env.bak/
114 | venv.bak/
115 |
116 | # Spyder project settings
117 | .spyderproject
118 | .spyproject
119 |
120 | # Rope project settings
121 | .ropeproject
122 |
123 | # mkdocs documentation
124 | /site
125 |
126 | # mypy
127 | .mypy_cache/
128 | .dmypy.json
129 | dmypy.json
130 |
131 | # Pyre type checker
132 | .pyre/
133 |
134 | # project-specific stuff
135 | tb_logs*/
136 | models*/
137 | wandb/
138 | .vscode/
139 |
--------------------------------------------------------------------------------
/NSPRs/box.graphml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 | Box
20 | 10000
21 | 0
22 | 4
23 |
24 |
25 | 4
26 | 50
27 |
28 |
29 | 10
30 | 5
31 |
32 |
33 | 1
34 | 5
35 |
36 |
37 | 8
38 | 72
39 |
40 |
41 |
42 | 10
43 | 10
44 |
45 |
46 | 10
47 | 10
48 |
49 |
50 | 10
51 | 10
52 |
53 |
54 | 10
55 | 10
56 |
57 |
58 |
--------------------------------------------------------------------------------
/NSPRs/dummy_NSPR_1.graphml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 | Triangle
20 | 10000
21 | 1
22 | 5
23 |
24 |
25 | 10
26 | 5
27 |
28 |
29 | 50
30 | 10
31 |
32 |
33 | 5
34 | 1
35 |
36 |
37 |
38 | 10
39 | 10
40 |
41 |
42 | 10
43 | 10
44 |
45 |
46 | 10
47 | 10
48 |
49 |
50 |
--------------------------------------------------------------------------------
/NSPRs/dummy_NSPR_2.graphml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 | Triangle
20 | 10000
21 | 2
22 | 20
23 |
24 |
25 | 1
26 | 5
27 |
28 |
29 | 5
30 | 1
31 |
32 |
33 | 10
34 | 1
35 |
36 |
37 |
38 | 1
39 | 10
40 |
41 |
42 | 1
43 | 10
44 |
45 |
46 | 1
47 | 10
48 |
49 |
50 |
--------------------------------------------------------------------------------
/PSNs/hadrl_psn_1-10_1-6_1-4.graphml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 | server
11 | 50
12 | 300
13 |
14 |
15 | server
16 | 50
17 | 300
18 |
19 |
20 | server
21 | 50
22 | 300
23 |
24 |
25 | server
26 | 50
27 | 300
28 |
29 |
30 | server
31 | 50
32 | 300
33 |
34 |
35 | server
36 | 50
37 | 300
38 |
39 |
40 | server
41 | 50
42 | 300
43 |
44 |
45 | server
46 | 50
47 | 300
48 |
49 |
50 | server
51 | 50
52 | 300
53 |
54 |
55 | server
56 | 50
57 | 300
58 |
59 |
60 | server
61 | 50
62 | 300
63 |
64 |
65 | server
66 | 50
67 | 300
68 |
69 |
70 | server
71 | 50
72 | 300
73 |
74 |
75 | server
76 | 50
77 | 300
78 |
79 |
80 | server
81 | 50
82 | 300
83 |
84 |
85 | server
86 | 50
87 | 300
88 |
89 |
90 | server
91 | 50
92 | 300
93 |
94 |
95 | server
96 | 50
97 | 300
98 |
99 |
100 | server
101 | 50
102 | 300
103 |
104 |
105 | server
106 | 50
107 | 300
108 |
109 |
110 | switch
111 |
112 |
113 | switch
114 |
115 |
116 | switch
117 |
118 |
119 | router
120 |
121 |
122 | router
123 |
124 |
125 | router
126 |
127 |
128 | 100000
129 |
130 |
131 | 100000
132 |
133 |
134 | 100000
135 |
136 |
137 | 100000
138 |
139 |
140 | 100000
141 |
142 |
143 | 100000
144 |
145 |
146 | 100000
147 |
148 |
149 | 100000
150 |
151 |
152 | 100000
153 |
154 |
155 | 100000
156 |
157 |
158 | 100000
159 |
160 |
161 | 100000
162 |
163 |
164 | 100000
165 |
166 |
167 | 100000
168 |
169 |
170 | 100000
171 |
172 |
173 | 100000
174 |
175 |
176 | 10000
177 |
178 |
179 | 10000
180 |
181 |
182 | 10000
183 |
184 |
185 | 10000
186 |
187 |
188 | 100000
189 |
190 |
191 | 100000
192 |
193 |
194 | 10000
195 |
196 |
197 | 100000
198 |
199 |
200 | 100000
201 |
202 | HA-DRL PSN
203 |
204 |
205 |
--------------------------------------------------------------------------------
/PSNs/simple_hadrl_psn.graphml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 | server
11 | 50
12 | 300
13 |
14 |
15 | server
16 | 50
17 | 300
18 |
19 |
20 | server
21 | 50
22 | 300
23 |
24 |
25 | server
26 | 50
27 | 300
28 |
29 |
30 | server
31 | 50
32 | 300
33 |
34 |
35 | server
36 | 50
37 | 300
38 |
39 |
40 | server
41 | 50
42 | 300
43 |
44 |
45 | server
46 | 50
47 | 300
48 |
49 |
50 | server
51 | 50
52 | 300
53 |
54 |
55 | server
56 | 50
57 | 300
58 |
59 |
60 | server
61 | 50
62 | 300
63 |
64 |
65 | server
66 | 50
67 | 300
68 |
69 |
70 | server
71 | 50
72 | 300
73 |
74 |
75 | switch
76 |
77 |
78 | switch
79 |
80 |
81 | switch
82 |
83 |
84 | switch
85 |
86 |
87 | router
88 |
89 |
90 | router
91 |
92 |
93 | router
94 |
95 |
96 | router
97 |
98 |
99 | 100000
100 |
101 |
102 | 100000
103 |
104 |
105 | 100000
106 |
107 |
108 | 100000
109 |
110 |
111 | 100000
112 |
113 |
114 | 100000
115 |
116 |
117 | 100000
118 |
119 |
120 | 100000
121 |
122 |
123 | 100000
124 |
125 |
126 | 100000
127 |
128 |
129 | 100000
130 |
131 |
132 | 10000
133 |
134 |
135 | 10000
136 |
137 |
138 | 100000
139 |
140 |
141 | 100000
142 |
143 |
144 | 100000
145 |
146 |
147 | 10000
148 |
149 |
150 | 100000
151 |
152 |
153 | 100000
154 |
155 |
156 | 100000
157 |
158 |
159 | 100000
160 |
161 |
162 | 100000
163 |
164 | HA-DRL PSN
165 |
166 |
167 |
--------------------------------------------------------------------------------
/PSNs/waxman_20_servers.graphml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 | server
13 | 50
14 | 50
15 | 300
16 | 300
17 |
18 |
19 | server
20 | 50
21 | 50
22 | 300
23 | 300
24 |
25 |
26 | server
27 | 50
28 | 50
29 | 300
30 | 300
31 |
32 |
33 | server
34 | 50
35 | 50
36 | 300
37 | 300
38 |
39 |
40 | server
41 | 50
42 | 50
43 | 300
44 | 300
45 |
46 |
47 | server
48 | 50
49 | 50
50 | 300
51 | 300
52 |
53 |
54 | server
55 | 50
56 | 50
57 | 300
58 | 300
59 |
60 |
61 | server
62 | 50
63 | 50
64 | 300
65 | 300
66 |
67 |
68 | server
69 | 50
70 | 50
71 | 300
72 | 300
73 |
74 |
75 | server
76 | 50
77 | 50
78 | 300
79 | 300
80 |
81 |
82 | server
83 | 50
84 | 50
85 | 300
86 | 300
87 |
88 |
89 | server
90 | 50
91 | 50
92 | 300
93 | 300
94 |
95 |
96 | server
97 | 50
98 | 50
99 | 300
100 | 300
101 |
102 |
103 | server
104 | 50
105 | 50
106 | 300
107 | 300
108 |
109 |
110 | server
111 | 50
112 | 50
113 | 300
114 | 300
115 |
116 |
117 | server
118 | 50
119 | 50
120 | 300
121 | 300
122 |
123 |
124 | server
125 | 50
126 | 50
127 | 300
128 | 300
129 |
130 |
131 | server
132 | 50
133 | 50
134 | 300
135 | 300
136 |
137 |
138 | server
139 | 50
140 | 50
141 | 300
142 | 300
143 |
144 |
145 | server
146 | 50
147 | 50
148 | 300
149 | 300
150 |
151 |
152 | 10000
153 | 10000
154 |
155 |
156 | 10000
157 | 10000
158 |
159 |
160 | 10000
161 | 10000
162 |
163 |
164 | 10000
165 | 10000
166 |
167 |
168 | 10000
169 | 10000
170 |
171 |
172 | 10000
173 | 10000
174 |
175 |
176 | 10000
177 | 10000
178 |
179 |
180 | 10000
181 | 10000
182 |
183 |
184 | 10000
185 | 10000
186 |
187 |
188 | 10000
189 | 10000
190 |
191 |
192 | 10000
193 | 10000
194 |
195 |
196 | 10000
197 | 10000
198 |
199 |
200 | 10000
201 | 10000
202 |
203 |
204 | 10000
205 | 10000
206 |
207 |
208 | 10000
209 | 10000
210 |
211 |
212 | 10000
213 | 10000
214 |
215 |
216 | 10000
217 | 10000
218 |
219 |
220 | 10000
221 | 10000
222 |
223 |
224 | 10000
225 | 10000
226 |
227 |
228 | 10000
229 | 10000
230 |
231 |
232 | 10000
233 | 10000
234 |
235 |
236 | 10000
237 | 10000
238 |
239 |
240 | 10000
241 | 10000
242 |
243 |
244 | 10000
245 | 10000
246 |
247 |
248 | 10000
249 | 10000
250 |
251 |
252 | 10000
253 | 10000
254 |
255 |
256 | 10000
257 | 10000
258 |
259 |
260 | 10000
261 | 10000
262 |
263 |
264 | 10000
265 | 10000
266 |
267 |
268 | 10000
269 | 10000
270 |
271 |
272 | 10000
273 | 10000
274 |
275 |
276 | 10000
277 | 10000
278 |
279 |
280 | 10000
281 | 10000
282 |
283 |
284 | 10000
285 | 10000
286 |
287 |
288 | 10000
289 | 10000
290 |
291 |
292 | 10000
293 | 10000
294 |
295 |
296 | 10000
297 | 10000
298 |
299 |
300 | 10000
301 | 10000
302 |
303 |
304 | 10000
305 | 10000
306 |
307 |
308 |
309 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # DeepNetSlice
2 | ### _A Deep Reinforcement Learning Open-Source Toolkit for Network Slice Placement_
3 |
4 | ## Demo
5 | ```bash
6 | cd src
7 | python demo.py
8 | ```
9 |
10 | ## General training script structure
11 | ```python
12 | # create trainer object
13 | # It creates the model and the training and evaluation environments
14 | trainer = Trainer( ... ) # parameters description on trainer.py docstring
15 |
16 | # create list of training callbacks.
17 | callbacks = [ ... ] # see 'src/callbacks/' or Stable Baselines3 docs
18 |
19 | # train the model
20 | trainer.train(
21 | tot_steps=<...>, # number of overall training steps
22 | callbacks=callbacks,
23 | log_interval=<...>, # number of steps between each log
24 | wandb=<...>, # (bool) whether to use wandb logging
25 | )
26 | ```
27 |
28 | ## Directories structure
29 | - `NSPRs`: contains graphml files containing the definition of some Network Slice Placement Requests (NSPRs).
30 | These can also be created on the fly during training, with no need to read files.
31 |
32 | - `PSNs`: contains graphml files containing the definition of some Physical Substrate Networks (PSNs) architectures.
33 |
34 | - `src`: contains the source code of the toolkit.
35 |
36 | - `callbacks`: contains some training callbacks.
37 | All callbacks in the library [Stable Baselines3](https://github.com/Stable-Baselines-Team/stable-baselines3-contrib) can be used as well.
38 |
39 | - `policies`: contains the implmentation of policy networks.
40 | It follows the nomenclature of [Stable Baselines3](https://github.com/Stable-Baselines-Team/stable-baselines3-contrib) policies, where the policy nets are composed of a features extractor followed by a MlpExtractor.
41 | - `features_extractors`: contains the implementation of features extractors modules.
42 | - `mlp_extractors`: contains the implementation of mlp extractors modules.
43 |
44 | - `spaces`: contains the implementation of custom [Gym](https://github.com/openai/gym) / [Gymnasium](https://github.com/Farama-Foundation/Gymnasium) spaces.
45 |
46 | - `wrappers`: contains the implementation of custom environment wrappers.
47 | Wrappers from [Stable Baselines3](https://github.com/Stable-Baselines-Team/stable-baselines3-contrib) can also be used.
48 |
49 | - `network_simulator.py`: contains the implementation of the environment.
50 |
51 | - `trainer.py`: contains the implementation of the trainer object (see demo).
52 |
53 | - `demo.py`: contains a demo script.
54 |
55 |
56 | ## Contributing
57 | Constributions are welcome! :rocket:
58 |
59 | To contribute:
60 | - If you want to **work on an open issue**, comment on that issue before opening a PR.
61 | - If you want to implement a **new feature** or an **improvement**, write about it in the Discussions tab.
62 |
63 | ## Reference
64 | ```
65 | Alex Pasquali, Vincenzo Lomonaco, Davide Bacciu and Federica Paganelli,
66 | Deep Reinforcement Learning for Network Slice Placement and the DeepNetSlice Toolkit,
67 | IEEE International Conference on Machine Learning for Communication and Networking, ICMLCN 2024, 5-8 May 2024, Stockholm, Sweden
68 | ```
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | wheel<=0.38.4
2 | setuptools~=64.0
3 | gym~=0.21.0
4 | numpy~=1.23.4
5 | networkx~=2.8.7
6 | torch~=1.12.1
7 | stable-baselines3~=1.7.0
8 | sb3-contrib~=1.7.0
9 | torch-geometric~=2.1.0
10 | tensorboard~=2.10.0
11 | wandb~=0.13.4
12 |
--------------------------------------------------------------------------------
/src/callbacks/__init__.py:
--------------------------------------------------------------------------------
1 | from .acceptance_ratio_callbacks import AcceptanceRatioByStepsCallback, AcceptanceRatioByNSPRsCallback
2 | from .hparam_callback import HParamCallback
3 | from .psn_load_callback import PSNLoadCallback
4 | from .seen_nsprs_callback import SeenNSPRsCallback
5 |
--------------------------------------------------------------------------------
/src/callbacks/acceptance_ratio_callbacks.py:
--------------------------------------------------------------------------------
1 | from queue import Queue
2 | import gym
3 | import numpy as np
4 | from stable_baselines3.common.callbacks import BaseCallback
5 | from stable_baselines3.common.vec_env import VecEnv
6 |
7 |
8 | class AcceptanceRatioByStepsCallback(BaseCallback):
9 | """
10 | A custom callback that derives from ``BaseCallback``.
11 | It logs the acceptance ratio on Tensorboard.
12 |
13 | :param env: environment
14 | :param name: name of the metric to log
15 | :param steps_per_tr_phase: number of steps that define a training phase.
16 | The acceptance ratio is logged once per training phase.
17 | :param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages
18 | """
19 | def __init__(
20 | self,
21 | env: gym.Env,
22 | name: str = "Acceptance ratio",
23 | steps_per_tr_phase: int = 1,
24 | verbose=0
25 | ):
26 | super(AcceptanceRatioByStepsCallback, self).__init__(verbose)
27 | self.env = env
28 | self.name = name
29 | self.steps_per_tr_phase = steps_per_tr_phase
30 | self.tot_to_subtract = None
31 | self.accepted_to_subtract = None
32 | # Those variables will be accessible in the callback
33 | # (they are defined in the base class)
34 | # The RL model
35 | # self.model = None # type: BaseAlgorithm
36 | # An alias for self.model.get_env(), the environment used for training
37 | # self.training_env = None # type: Union[gym.Env, VecEnv, None]
38 | # Number of time the callback was called
39 | # self.n_calls = 0 # type: int
40 | # self.num_timesteps = 0 # type: int
41 | # local and global variables
42 | # self.locals = None # type: Dict[str, Any]
43 | # self.globals = None # type: Dict[str, Any]
44 | # The logger object, used to report things in the terminal
45 | # self.logger = None # stable_baselines3.common.logger
46 | # # Sometimes, for event callback, it is useful
47 | # # to have access to the parent object
48 | # self.parent = None # type: Optional[BaseCallback]
49 |
50 | def _on_step(self) -> bool:
51 | """
52 | This method will be called by the model after each call to `env.step()`.
53 |
54 | For child callback (of an `EventCallback`), this will be called
55 | when the event is triggered.
56 |
57 | :return: (bool) If the callback returns False, training is aborted early.
58 | """
59 | if self.n_calls % self.steps_per_tr_phase == 0:
60 | accepted_nsprs_per_env = np.array(self.env.get_attr("accepted_nsprs"), dtype=np.float32)
61 | tot_nsprs_per_env = np.array(self.env.get_attr("tot_seen_nsprs"), dtype=np.float32)
62 | if self.tot_to_subtract is None: # or self.accepted_to_subtract is None, either way
63 | self.tot_to_subtract = np.zeros_like(tot_nsprs_per_env)
64 | self.accepted_to_subtract = np.zeros_like(accepted_nsprs_per_env)
65 | accepted_nsprs_per_env -= self.accepted_to_subtract
66 | tot_nsprs_per_env -= self.tot_to_subtract
67 | accept_ratio_per_env = np.divide(accepted_nsprs_per_env,
68 | tot_nsprs_per_env,
69 | out=np.zeros_like(tot_nsprs_per_env),
70 | where=tot_nsprs_per_env != 0)
71 | overall_accept_ratio = np.mean(accept_ratio_per_env)
72 | self.logger.record(self.name, overall_accept_ratio)
73 | self.tot_to_subtract = tot_nsprs_per_env
74 | self.accepted_to_subtract = accepted_nsprs_per_env
75 | return True
76 |
77 |
78 | class AcceptanceRatioByNSPRsCallback(BaseCallback):
79 | """
80 | A custom callback that derives from ``BaseCallback``.
81 | It logs the acceptance ratio on Tensorboard.
82 |
83 | :param env: environment
84 | :param name: name of the metric to log
85 | :param nsprs_per_tr_phase: number of NSPRs that define a training phase.
86 | The acceptance ratio is logged once per training phase.
87 | :param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages
88 | """
89 | def __init__(
90 | self,
91 | env: gym.Env,
92 | name: str = "Acceptance ratio",
93 | nsprs_per_tr_phase: int = 1000,
94 | verbose=0
95 | ):
96 | super().__init__(verbose)
97 | self.env = env
98 | self.name = name
99 | self.nsprs_per_tr_phase = nsprs_per_tr_phase
100 | # num of seen NSPRs to subtract form the total number of seen NSPRs (per env)
101 | self.seen_to_subtract = [0] * env.num_envs
102 | # num of accepted NSPRs to subtract form the total number of accepted NSPRs (per env)
103 | self.accepted_to_subtract = [0] * env.num_envs
104 | # num of seen NSPRs last considered for logging (per env),
105 | # used to ensure it loggs once per training phase
106 | self.last_seen = [0] * env.num_envs
107 | # num of accepted NSPRs during this training phase (per env)
108 | self.accepted_this_training_phase = [0] * env.num_envs
109 | # num of NSPRs seen during this training phase (per env)
110 | self.seen_this_training_phase = [0] * env.num_envs
111 | # acceptance ratio of each env
112 | self.acceptance_ratios = [Queue() for _ in range(env.num_envs)]
113 | # once an env is ready for logging, its cell is increased by 1,
114 | # and it is decreased by 1 when the acceptance ratio is logged
115 | self.ready_envs = np.zeros(shape=env.num_envs, dtype=int)
116 |
117 | def _on_step(self) -> bool:
118 | if isinstance(self.env, VecEnv):
119 | seen_nsprs = self.env.get_attr('tot_seen_nsprs')
120 | accepted_nsprs = self.env.get_attr('accepted_nsprs')
121 | else:
122 | seen_nsprs = [self.env.tot_seen_nsprs]
123 | accepted_nsprs = [self.env.accepted_nsprs]
124 |
125 | for env_idx in range(self.env.num_envs):
126 | if seen_nsprs[env_idx] > self.last_seen[env_idx] and seen_nsprs[env_idx] % self.nsprs_per_tr_phase == 0:
127 | self.ready_envs[env_idx] += 1
128 | self.last_seen[env_idx] = seen_nsprs[env_idx]
129 | # NSPRs seen and accepted in this training phase
130 | seen_this_tr_phase = seen_nsprs[env_idx] - self.seen_to_subtract[env_idx]
131 | accepted_this_tr_phase = accepted_nsprs[env_idx] - self.accepted_to_subtract[env_idx]
132 | # update how much to subtract to get the quantities for next tr phase
133 | self.seen_to_subtract[env_idx] = seen_nsprs[env_idx]
134 | self.accepted_to_subtract[env_idx] = accepted_nsprs[env_idx]
135 | # compute acceptance ratio
136 | try:
137 | self.acceptance_ratios[env_idx].put(accepted_this_tr_phase / seen_this_tr_phase)
138 | except ZeroDivisionError:
139 | self.acceptance_ratios[env_idx].put(0.)
140 |
141 | if all(self.ready_envs):
142 | ratios = [self.acceptance_ratios[env_idx].get() for env_idx in range(self.env.num_envs)]
143 | self.logger.record(self.name, np.mean(ratios))
144 | self.ready_envs -= 1
145 |
146 | return True
147 |
--------------------------------------------------------------------------------
/src/callbacks/hparam_callback.py:
--------------------------------------------------------------------------------
1 | from stable_baselines3.common.callbacks import BaseCallback
2 | from stable_baselines3.common.logger import HParam
3 |
4 |
5 | class HParamCallback(BaseCallback):
6 | def __init__(
7 | self,
8 | n_tr_envs: int = None,
9 | n_eval_envs: int = None,
10 | tr_nsprs_per_ep: int = None,
11 | tr_psn_load: float = None,
12 | tr_max_ep_steps: int = None,
13 | eval_nsprs_per_ep: int = None,
14 | eval_psn_load: float = None,
15 | eval_max_ep_steps: int = None,
16 | vnfs_per_nsprs: int = None,
17 | use_placement_state: bool = None,
18 | use_heuristic: bool = False,
19 | heu_kwargs: dict = None,
20 | ):
21 | """
22 | Saves the hyperparameters and metrics at the start of the training,
23 | and logs them to TensorBoard.
24 |
25 | :param n_tr_envs: number of training environments
26 | """
27 | super().__init__()
28 | self.n_tr_envs = n_tr_envs
29 | self.n_eval_envs = n_eval_envs
30 | self.tr_nsprs_per_ep = tr_nsprs_per_ep
31 | self.tr_psn_load = tr_psn_load
32 | self.tr_max_ep_steps = tr_max_ep_steps
33 | self.eval_nsprs_per_ep = eval_nsprs_per_ep
34 | self.eval_psn_load = eval_psn_load
35 | self.eval_max_ep_steps = eval_max_ep_steps
36 | self.vnfs_per_nspr = vnfs_per_nsprs
37 | self.use_placement_state = use_placement_state,
38 | self.use_heuristic = use_heuristic
39 | self.heu_kwargs = heu_kwargs if heu_kwargs is not None else {}
40 | if 'heu_class' in self.heu_kwargs:
41 | self.heu_class = self.heu_kwargs['heu_class'](None, None, None).__class__.__name__
42 | else:
43 | self.heu_class = None
44 |
45 | def _on_training_start(self) -> None:
46 | try:
47 | gcn_layers_dims = str(self.model.policy.features_extractor.gcn_layers_dims)
48 | except AttributeError:
49 | gcn_layers_dims = str(self.model.policy.gcn_layers_dims)
50 |
51 | hparam_dict = {
52 | "algorithm": self.model.__class__.__name__,
53 | "n training envs": self.n_tr_envs,
54 | "n eval envs": self.n_eval_envs,
55 | "n steps before update": self.model.n_steps,
56 | "learning rate": self.model.learning_rate,
57 | "gamma": self.model.gamma,
58 | "entropy coefficient": self.model.ent_coef,
59 | "NSPRs per training episode": self.tr_nsprs_per_ep,
60 | "max steps per training episode": self.tr_max_ep_steps,
61 | "PSN load (training)": self.tr_psn_load,
62 | "NSPRs per eval episode": self.eval_nsprs_per_ep,
63 | "PSN load (eval)": self.eval_psn_load,
64 | "max steps per eval episode": self.eval_max_ep_steps,
65 | "VNFs/NSPR": self.vnfs_per_nspr,
66 | "GCN layers dimensions": gcn_layers_dims,
67 | "Use placement state": str(self.use_placement_state),
68 | "Use heuristic": self.use_heuristic,
69 | "Heuristic class": self.heu_class,
70 | "heu's num sampled servers": self.heu_kwargs.get("n_servers_to_sample", None),
71 | "heu's eta": self.heu_kwargs.get("eta", None),
72 | "heu's xi": self.heu_kwargs.get("xi", None),
73 | "heu's beta": self.heu_kwargs.get("beta", None),
74 | }
75 | # define the metrics that will appear in the `HPARAMS` Tensorboard tab by referencing their tag
76 | # Tensorboard will find & display metrics from the `SCALARS` tab
77 | metric_dict = {
78 | "Acceptance ratio": 0,
79 | "Eval acceptance ratio": 0,
80 | "eval/mean_reward": 0,
81 | "rollout/ep_rew_mean": 0,
82 | "train/entropy_loss": 0,
83 | "train/policy_loss": 0,
84 | "train/value_loss": 0,
85 | }
86 | self.logger.record(
87 | "hparams",
88 | HParam(hparam_dict, metric_dict),
89 | exclude=("stdout", "log", "json", "csv"),
90 | )
91 |
92 | def _on_step(self) -> bool:
93 | return True
94 |
--------------------------------------------------------------------------------
/src/callbacks/psn_load_callback.py:
--------------------------------------------------------------------------------
1 | import warnings
2 |
3 | import gym
4 | import numpy as np
5 | from stable_baselines3.common.callbacks import BaseCallback
6 |
7 |
8 | class PSNLoadCallback(BaseCallback):
9 | """
10 | Class for logging the load of the PSN.
11 |
12 | :param env: environment
13 | :param freq: logging frequency (in number of steps)
14 | :param cpu: if True, track CPU load
15 | :param ram: if True, track RAM load
16 | :param bw: if True, track BW load
17 | :param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages
18 | """
19 | def __init__(
20 | self,
21 | env: gym.Env,
22 | freq: int,
23 | cpu: bool = True,
24 | ram: bool = True,
25 | bw: bool = True,
26 | verbose: int = 0
27 | ):
28 | super(PSNLoadCallback, self).__init__(verbose)
29 | self.env = env
30 | self.freq = freq
31 | self.cpu, self.ram, self.bw = cpu, ram, bw
32 |
33 | def _on_step(self) -> bool:
34 | if self.n_calls % self.freq == 0:
35 | cpu_loads, ram_loads, bw_loads = [], [], []
36 | observations = self.env.get_attr('obs_dict')
37 | for e, obs in enumerate(observations):
38 | # get the available CPU and RAM for each server
39 | serv_cpu_avails, serv_ram_avails = [], []
40 | for idx in self.env.get_attr('servers_map_idx_id')[e].keys():
41 | serv_cpu_avails.append(obs['cpu_avails'][idx])
42 | serv_ram_avails.append(obs['ram_avails'][idx])
43 | avail_cpu_perc = np.sum(serv_cpu_avails) * self.env.get_attr('max_cpu')[e] / self.env.get_attr('tot_cpu_cap')[e]
44 | avail_ram_perc = np.sum(serv_ram_avails) * self.env.get_attr('max_ram')[e] / self.env.get_attr('tot_ram_cap')[e]
45 | cpu_loads.append(1. - avail_cpu_perc)
46 | ram_loads.append(1. - avail_ram_perc)
47 | # get the available BW for each link
48 | link_bw_avails_perc = []
49 | for link in self.env.get_attr('psn')[e].edges.values():
50 | link_bw_avails_perc.append(link['availBW'] / link['BWcap'])
51 | bw_loads.append(1. - np.mean(link_bw_avails_perc))
52 | try:
53 | if self.cpu:
54 | avg_cpu_load = np.mean(cpu_loads)
55 | self.logger.record("Average CPU load of training envs", avg_cpu_load)
56 | if self.ram:
57 | avg_ram_load = np.mean(ram_loads)
58 | self.logger.record("Average RAM load of training envs", avg_ram_load)
59 | if self.bw:
60 | avg_bw_load = np.mean(bw_loads)
61 | self.logger.record("Average BW load of training envs", avg_bw_load)
62 | if self.verbose > 0:
63 | try:
64 | print(f"Average CPU load of training envs: {avg_cpu_load}")
65 | print(f"Average RAM load of training envs: {avg_ram_load}")
66 | print(f"Average BW load of training envs: {avg_bw_load}")
67 | except NameError:
68 | # in case some variables are not defined. It means we're not tracking that load
69 | pass
70 | except AttributeError:
71 | warnings.warn("No logger for resources load callback, data not being logged")
72 |
73 | return True
74 |
--------------------------------------------------------------------------------
/src/callbacks/seen_nsprs_callback.py:
--------------------------------------------------------------------------------
1 | from stable_baselines3.common.callbacks import BaseCallback
2 | import gym
3 | import numpy as np
4 |
5 |
6 | class SeenNSPRsCallback(BaseCallback):
7 | """
8 | Class for logging the number of seen NSPRs so far.
9 |
10 | It logs the average number of seen NSPRs for each environment.
11 | The average is chosen, instead of the sum, because the loss is based on the
12 | average of the "values" in the various steps:
13 | - policy_loss = -(advantages * log_prob).mean()
14 | - value_loss = F.mse_loss(rollout_data.returns, values)
15 | - entropy_loss = -th.mean(entropy)
16 | If there are multiple parallel envs, the "values" of each env are flattened,
17 | and again the average is computed for the loss.
18 | Therefore, we don't have more updates if we have more envs, just more precise.
19 | If 2 envs have seen 10 NSPRs, it's not like an env has seen 20 (in terms of updates and steps).
20 |
21 | :param env: environment
22 | :param freq: logging frequency (in number of steps)
23 | :param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages
24 | """
25 | def __init__(
26 | self,
27 | env: gym.Env,
28 | freq: int = 1,
29 | verbose: int = 0
30 | ):
31 | super().__init__(verbose)
32 | self.env = env
33 | self.freq = freq
34 |
35 | def _on_step(self) -> bool:
36 | if self.n_calls % self.freq == 0:
37 | # log the number of seen NSPRs
38 | seen_nsprs_per_env = self.env.get_attr('tot_seen_nsprs')
39 | # why the mean and not the sum, you ask? Read the docstring of the class
40 | avg_seen_nsprs = int(round(np.mean(seen_nsprs_per_env)))
41 | self.logger.record("Avg seen NSPRs per env", avg_seen_nsprs)
42 | if self.verbose > 0:
43 | print(f"Average seen NSPRs per env: {avg_seen_nsprs}")
44 | return True
--------------------------------------------------------------------------------
/src/demo.py:
--------------------------------------------------------------------------------
1 | from callbacks.acceptance_ratio_callbacks import AcceptanceRatioByNSPRsCallback
2 | from callbacks.hparam_callback import HParamCallback
3 | from callbacks.psn_load_callback import PSNLoadCallback
4 | from callbacks.seen_nsprs_callback import SeenNSPRsCallback
5 | from trainer import Trainer
6 | from wrappers.reset_with_load import ResetWithRealisticLoad
7 | from stable_baselines3.common.callbacks import EvalCallback
8 | from wandb.integration.sb3 import WandbCallback
9 |
10 |
11 | if __name__ == '__main__':
12 | # create trainer object.
13 | # It creates the model and the training and evaluation environments.
14 | trainer = Trainer(
15 | psn_path="../PSNs/hadrl_1-16_5-10_15-4.graphml",
16 | n_tr_envs=20,
17 | load_perc=0.8,
18 | time_limit=False,
19 | max_ep_steps=1000,
20 | reset_load_class=ResetWithRealisticLoad,
21 | generate_nsprs=True,
22 | nsprs_per_ep=1,
23 | vnfs_per_nspr=5,
24 | always_one=True,
25 | seed=12,
26 | tensorboard_log="../tensorboard",
27 | create_eval_env=True
28 | )
29 | tr_env = trainer.tr_env
30 | eval_env = trainer.eval_env
31 |
32 | # training callbacks
33 | list_of_callbacks = [
34 | AcceptanceRatioByNSPRsCallback(
35 | env=tr_env,
36 | name="Train acceptance ratio (by NSPRs)",
37 | nsprs_per_tr_phase=1000,
38 | verbose=2
39 | ),
40 |
41 | EvalCallback(
42 | eval_env=eval_env,
43 | n_eval_episodes=1000,
44 | warn=True,
45 | eval_freq=5_000,
46 | deterministic=True,
47 | verbose=2,
48 | callback_after_eval=AcceptanceRatioByNSPRsCallback(
49 | env=eval_env,
50 | name="Eval acceptance ratio (by NSPRs)",
51 | nsprs_per_tr_phase=1, # must be 1 for eval (default value)
52 | verbose=2
53 | )
54 | ),
55 |
56 | PSNLoadCallback(env=tr_env, freq=500, verbose=1),
57 |
58 | # SeenNSPRsCallback(env=tr_env, freq=100, verbose=1),
59 | ]
60 |
61 | trainer.train(
62 | tot_steps=10_000_000,
63 | callbacks=list_of_callbacks,
64 | )
65 |
--------------------------------------------------------------------------------
/src/eval_script.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from tqdm import tqdm
3 | from stable_baselines3 import A2C
4 | from stable_baselines3.common.env_util import make_vec_env
5 |
6 | from callbacks import PSNLoadCallback
7 | from utils import make_env
8 | from wrappers.reset_with_load import ResetWithRealisticLoad
9 |
10 | if __name__ == '__main__':
11 | # load model
12 | model = A2C.load(
13 | path="/root/NSPR-simulator/wandb/run-20230103_155854-3o0vtz6x/files/model.zip",
14 | env=None,
15 | device='cpu',
16 | print_system_info=True,
17 | force_reset=True, # True as default -> avoids unexpected behavior
18 | )
19 |
20 | # re-create env
21 | env = make_vec_env(
22 | env_id=make_env,
23 | n_envs=1,
24 | env_kwargs=dict(
25 | psn_path="../PSNs/waxman_20_servers.graphml",
26 | base_env_kwargs=dict(accumulate_reward=True),
27 | time_limit=True,
28 | time_limit_kwargs=dict(max_episode_steps=1000),
29 | hadrl_nsprs=True,
30 | hadrl_nsprs_kwargs=dict(
31 | nsprs_per_ep=1,
32 | vnfs_per_nspr=5,
33 | always_one=True
34 | ),
35 | # hadrl_nsprs_kwargs=dict(
36 | # nsprs_per_ep=None,
37 | # load=0.5
38 | # )
39 | reset_load_class=ResetWithRealisticLoad,
40 | reset_load_kwargs = dict(cpu_load=0.5),
41 | placement_state=True,
42 | dynamic_connectivity=True,
43 | dynamic_connectivity_kwargs=dict(link_bw=10_000),
44 | ),
45 | seed=12,
46 | )
47 |
48 | # cpu_load_callback = PSNLoadCallback(env, freq=300, verbose=2)
49 | # cpu_load_callback.init_callback(model)
50 |
51 | # evaluate model
52 | obs = env.reset()
53 | accepted = seen = 0
54 | # accept_ratio_per_ep = []
55 | tot_nsprs = 10000
56 | pbar = tqdm(total=tot_nsprs) # progerss bar
57 | while seen < tot_nsprs:
58 | action, _ = model.predict(obs, deterministic=True)
59 | obs, rewards, done, info = env.step(action)
60 | # cpu_load_callback.on_step()
61 | # acceptance ratio
62 | if rewards[0] != 0.0:
63 | seen += 1
64 | pbar.update(1)
65 | if rewards[0] > 0.0:
66 | accepted += 1
67 | if done:
68 | # if seen != 0.:
69 | # cur_ep_accept_ratio = accepted / seen
70 | # accept_ratio_per_ep.append(cur_ep_accept_ratio)
71 | # print(f"Current episode's acceptance ratio: {cur_ep_accept_ratio}")
72 | # accepted = seen = 0
73 | obs = env.reset()
74 |
75 | # print(f"Acceptance ratio: {np.mean(accept_ratio_per_ep)}")
76 | print(f"Acceptance ratio: {accepted / seen}")
77 |
--------------------------------------------------------------------------------
/src/heuristic_layers.py:
--------------------------------------------------------------------------------
1 | import random
2 | from typing import Dict
3 | import math
4 |
5 | import gym
6 | import networkx as nx
7 | import numpy as np
8 | import torch as th
9 | from torch import nn
10 |
11 |
12 | class P2CLoadBalanceHeuristic(nn.Module):
13 | """ Layer executing the P2C heuristic """
14 | name = "P2C load balance heuristic"
15 |
16 | def __init__(
17 | self,
18 | action_space: gym.spaces.Space,
19 | servers_map_idx_id: Dict[int, int],
20 | psn: nx.Graph,
21 | n_servers_to_sample: int = 2,
22 | eta: float = 0.,
23 | xi: float = 1.,
24 | beta: float = 1., # TODO: when not 1, could cause NaNs
25 | **kwargs
26 | ):
27 | """ Constructor
28 |
29 | :param action_space: Action space
30 | :param servers_map_idx_id: map (dict) between servers indexes (agent's actions) and their ids
31 | :param psn: the env's physical substrate network
32 | :param eta: hyperparameter of the P2C heuristic
33 | :param xi: hyperparameter of the P2C heuristic
34 | :param beta: hyperparameter of the P2C heuristic
35 | """
36 | super().__init__()
37 | self.action_space = action_space
38 | self.servers_map_idx_id = servers_map_idx_id
39 | self.psn = psn
40 | self.n_servers_to_sample = n_servers_to_sample
41 | self.eta, self.xi, self.beta = eta, xi, beta
42 |
43 | def forward(self, x: th.Tensor, obs: th.Tensor) -> th.Tensor:
44 | n_envs = x.shape[0]
45 | max_values, max_idxs = th.max(x, dim=1)
46 | H = th.zeros_like(x)
47 | heu_selected_servers = self.HEU(obs, self.n_servers_to_sample)
48 | if th.all(heu_selected_servers == -1):
49 | return H # it means no selected action by the heuristic
50 | for e in range(n_envs):
51 | heu_action = heu_selected_servers[e, :].item()
52 | H[e, heu_action] = max_values[e] - x[e, heu_action] + self.eta
53 | out = x + self.xi * th.pow(H, self.beta)
54 | return out
55 |
56 | def HEU(self, obs: th.Tensor, n_servers_to_sample: int) -> th.Tensor:
57 | """ P2C heuristic to select the servers where to place the current VNFs.
58 | Selects one server for each environment (in case of vectorized envs).
59 | :param obs: Observation
60 | :param n_servers_to_sample: number of servers to sample
61 | :return: indexes of the selected servers
62 | """
63 | n_envs = obs['bw_avails'].shape[0]
64 | indexes = th.empty(n_envs, n_servers_to_sample, dtype=th.int)
65 | req_cpu = obs['cur_vnf_cpu_req']
66 | req_ram = obs['cur_vnf_ram_req']
67 | load_balances = th.empty(n_envs, n_servers_to_sample)
68 | for e in range(n_envs):
69 | for s in range(n_servers_to_sample):
70 | # actions (indexes of the servers in the servers list)
71 | indexes[e, s] = self.action_space.sample()
72 | # servers ids
73 | node_id = self.servers_map_idx_id[indexes[e, s].item()]
74 | # actual servers (nodes in the graph)
75 | node = self.psn.nodes[node_id]
76 | # compute the load balance of each server when placing the VNF
77 | cpu_load_balance = (node['availCPU'] - req_cpu[e]) / node['CPUcap']
78 | ram_load_balance = (node['availRAM'] - req_ram[e]) / node['RAMcap']
79 | load_balances[e, s] = cpu_load_balance + ram_load_balance
80 |
81 | # return the best server for each environment (the indexes)
82 | winners = th.argmax(load_balances, dim=1, keepdim=True)
83 | return th.gather(indexes, 0, winners)
84 |
85 |
86 | class HADRLHeuristic(nn.Module):
87 | def __init__(
88 | self,
89 | action_space: gym.spaces.Space,
90 | servers_map_idx_id: Dict[int, int],
91 | psn: nx.Graph,
92 | bw_req_per_vl: int = 2000,
93 | n_servers_to_sample: int = 2,
94 | eta: float = 0.,
95 | xi: float = 1.,
96 | beta: float = 1., # TODO: when not 1, could cause NaNs
97 | **kwargs
98 | ):
99 | """ Constructor
100 |
101 | :param action_space: Action space
102 | :param servers_map_idx_id: map (dict) between servers indexes (agent's actions) and their ids
103 | :param psn: the env's physical substrate network
104 | :param eta: hyperparameter of the P2C heuristic
105 | :param xi: hyperparameter of the P2C heuristic
106 | :param beta: hyperparameter of the P2C heuristic
107 | """
108 | super().__init__()
109 | self.action_space = action_space
110 | self.servers_map_idx_id = servers_map_idx_id
111 | self.psn = psn
112 | self.bw_req_per_vl = bw_req_per_vl
113 | self.n_servers_to_sample = n_servers_to_sample
114 | self.eta, self.xi, self.beta = eta, xi, beta
115 | self.prev_selected_servers = None
116 | self.n_envs = None
117 |
118 | def forward(self, x: th.Tensor, obs: th.Tensor) -> th.Tensor:
119 | self.n_envs = x.shape[0]
120 | if self.prev_selected_servers is None or self.n_envs != self.prev_selected_servers.shape[0]:
121 | self.prev_selected_servers = -th.ones(self.n_envs, dtype=th.int)
122 | max_values, max_idxs = th.max(x, dim=1)
123 | H = th.zeros_like(x)
124 | heu_selected_servers = self.HEU(obs, self.n_servers_to_sample)
125 | if th.all(heu_selected_servers == -1):
126 | # it means no selected action by the heuristic
127 | return H
128 | for e in range(self.n_envs):
129 | heu_action = heu_selected_servers[e, :].item()
130 | H[e, heu_action] = max_values[e] - x[e, heu_action] + self.eta
131 | out = x + self.xi * th.pow(H, self.beta)
132 | return out
133 |
134 | def HEU(self, obs: th.Tensor, n_servers_to_sample: int) -> th.Tensor:
135 | """ P2C heuristic to select the servers where to place the current VNFs.
136 | Selects one server for each environment (in case of vectorized envs).
137 | :param obs: Observation
138 | :param n_servers_to_sample: number of servers to sample
139 | :return: indexes of the selected servers
140 | """
141 | indexes = th.empty(self.n_envs, n_servers_to_sample, dtype=th.int)
142 | path_lengths = th.zeros(self.n_envs, n_servers_to_sample)
143 | all_actions = list(range(self.action_space.n))
144 | for e in range(self.n_envs):
145 | # random permutation of the actions
146 | all_actions = np.random.permutation(all_actions)
147 | for s in range(n_servers_to_sample):
148 | # instead of selecting first all the feasible servers and then
149 | # sampling on them, we first create a list of all the actions
150 | # (i.e. servers) in random order, then we start going through
151 | # the list and pick the first action which is feasible.
152 | # This way we don't run through all the servers avery time
153 | for i in range(s, len(all_actions)):
154 | a = all_actions[i]
155 | if self.action_is_feasible(a, obs, e):
156 | indexes[e, s] = a
157 | break
158 | # if no action is feasible, return no choice form the heuristic
159 | # (i.e. tensor of -1's)
160 | if i == len(all_actions) - 1:
161 | return -th.ones(self.n_envs, 1)
162 |
163 | # server ID
164 | server_id = self.servers_map_idx_id[indexes[e, s].item()]
165 |
166 | if self.prev_selected_servers[e] == -1:
167 | path_lengths[e, s] = -math.inf
168 | else:
169 | # if the server was the one selected for the prev VNF, choose it
170 | if self.prev_selected_servers[e] == server_id:
171 | path_lengths[e, s] = -math.inf
172 | # self.prev_selected_servers[e] = server_id
173 | else:
174 | # evaluate bandwidth consumption when placing the current VNF on this server
175 | path = nx.shortest_path(G=self.psn,
176 | source=self.prev_selected_servers[e].item(),
177 | target=server_id,
178 | weight=self.compute_link_weight,
179 | method='dijkstra')
180 | path_lengths[e, s] = len(path)
181 |
182 | # return the best server for each environment (the indexes)
183 | winners = th.argmin(path_lengths, dim=1, keepdim=True)
184 | selected_servers = th.gather(indexes, 1, winners)
185 | self.prev_selected_servers = selected_servers.squeeze(dim=1)
186 | return selected_servers
187 |
188 | @staticmethod
189 | def action_is_feasible(a: int, obs: th.Tensor, env_idx: int):
190 | """ Check if it's feasible to place the current VNF on a specific server
191 |
192 | 1. if a server has enough CPU and RAM to host this VNF and the next one
193 | (all VNFs are assumed to have identical requirements, if this is not the
194 | case, then you can see this as "if a server has enough CPU and RAM to
195 | host double the requirements of this VNF", like a greedy safety margin),
196 | then it is eligible.
197 |
198 | 2. if a server has enough CPU and RAM to host only this VNF, then if it
199 | has enough bandwidth in its outgoing links to host the connection with
200 | the neighboring VNFs, then it is eligible.
201 |
202 | 3. if a server does not have enough CPU or RAM to host the current VNF,
203 | then it is NOT eligible.
204 |
205 | :param a: action, i.e. a server index
206 | :param obs: instance of an observation from the environment
207 | :param env_idx: index of the environment (in case of vectorized envs)
208 | :return: true if the action is feasible, false otherwise
209 | """
210 | req_cpu = obs['cur_vnf_cpu_req'][env_idx].item()
211 | req_ram = obs['cur_vnf_ram_req'][env_idx].item()
212 | req_bw = obs['cur_vnf_bw_req'][env_idx].item()
213 | avail_cpu = obs['cpu_avails'][env_idx][a].item()
214 | avail_ram = obs['ram_avails'][env_idx][a].item()
215 | avail_bw = obs['bw_avails'][env_idx][a]
216 |
217 | if (avail_cpu >= 2 * req_cpu and avail_ram >= 2 * req_ram) or \
218 | (avail_cpu >= req_cpu and avail_ram >= req_ram and avail_bw >= req_bw):
219 | return True
220 |
221 | return False
222 |
223 | def compute_link_weight(self, source: int, target: int, link: dict):
224 | return 1 if link['availBW'] >= self.bw_req_per_vl else math.inf
225 |
--------------------------------------------------------------------------------
/src/network_simulator.py:
--------------------------------------------------------------------------------
1 | import copy
2 | import math
3 | from typing import Optional, Union, Tuple
4 |
5 | import gym
6 | import networkx as nx
7 | import numpy as np
8 |
9 | from gym.spaces import Dict, Box, Discrete
10 |
11 | import reader
12 |
13 | GymObs = Union[Tuple, dict, np.ndarray, int]
14 |
15 |
16 | class NetworkSimulator(gym.Env):
17 | """ Class implementing the network simulator (RL environment) """
18 |
19 | def __init__(
20 | self,
21 | psn_file: str,
22 | nsprs_path: str = "../NSPRs/",
23 | nsprs_per_episode: int = None,
24 | nsprs_max_duration: int = 100,
25 | accumulate_reward: bool = True,
26 | discount_acc_rew: bool = True,
27 | perc_avail_nodes: Optional[float] = 1.
28 | ):
29 | """ Constructor
30 | :param psn_file: GraphML file containing the definition of the PSN
31 | :param nsprs_path: either directory with the GraphML files defining the NSPRs or path to a single GraphML file
32 | :param nsprs_per_episode: max number of NSPRs to be evaluated in each episode. If None, there is no limit.
33 | :param nsprs_max_duration: (optional) max duration of the NSPRs.
34 | :param accumulate_reward: if true, the reward is accumulated and given to the agent only after each NSPRs
35 | :param discount_acc_rew: if true, an increasing discount factor is applied to the acceptance reward during each NSPR.
36 | It starts from the inverse of the number of VNFs in the NSPR and grows to 1.
37 | :param perc_avail_nodes: in case some action masking is implemented (i.e., env wrapped in ActionMasker
38 | wrapper from sbe-contrib), it specifies the percentage of available nodes we.r.t. the total.
39 | """
40 | super(NetworkSimulator, self).__init__()
41 |
42 | self.psn_file = psn_file
43 | self.psn = reader.read_psn(graphml_file=psn_file) # physical substrate network
44 | self.nsprs_path = nsprs_path
45 | self.nsprs_per_episode = nsprs_per_episode
46 | self.accumulate_reward = accumulate_reward
47 | self.nsprs_seen_in_cur_ep = 0
48 | self.nsprs_max_duration = nsprs_max_duration
49 | self.done = False
50 | self.nsprs = None # will be initialized in the reset method
51 | self.waiting_nsprs = [] # list of NSPRs that arrived already and are waiting to be evaluated
52 | self.cur_nspr = None # used to keep track of the current NSPR being evaluated
53 | self.cur_nspr_unplaced_vnfs_ids = [] # used to keep track of the VNFs' IDs of the current NSPR that haven't been placed yet
54 | self.cur_vnf_id = None # used to keep track of the current VNF being evaluated
55 | self._cur_vl_reqBW = 0 # auxiliary attribute needed in method 'self.compute_link_weight'
56 | self.time_step = 0 # keep track of current time step
57 | self.ep_number = 0 # keep track of current episode number
58 | self.tot_seen_nsprs = 0 # keep track of the number of NSPRs seen so far
59 | self.accepted_nsprs = 0 # for the overall acceptance ratio
60 | self.discount_acc_rew = discount_acc_rew # whether or not to discount the acceptance reward
61 | self.acc_rew_disc_fact = 1. # current discount factor for the acceptance reward
62 | self.base_acc_rew_disc_fact = 1. # base discount factor for the acceptance reward
63 |
64 | # map (dict) between IDs of PSN's nodes and their respective index (see self._init_map_id_idx's docstring)
65 | nodes_ids = list(self.psn.nodes.keys())
66 | self.map_id_idx = {nodes_ids[idx]: idx for idx in range(len(nodes_ids))}
67 |
68 | # map (dict) between an index of a list (incrementing int) and the ID of a server
69 | servers_ids = [node_id for node_id, node in self.psn.nodes.items()
70 | if node['NodeType'] == 'server']
71 | self.servers_map_idx_id = {idx: servers_ids[idx] for idx in range(len(servers_ids))}
72 |
73 | # partial rewards to be accumulated across the steps of evaluation of a single NSPR
74 | self._acceptance_rewards = []
75 | self._resource_consumption_rewards = []
76 | self._cur_resource_consumption_rewards = []
77 | self._load_balance_rewards = []
78 |
79 | # reward values for specific outcomes
80 | self.rval_accepted_vnf = 100
81 | self.rval_rejected_vnf = -100
82 |
83 | # Action space and observation space (gym.Env required attributes)
84 | ONE_BILLION = 1_000_000_000 # constant for readability
85 | n_nodes = len(self.psn.nodes)
86 | # action space = number of servers
87 | self.action_space = Discrete(len(servers_ids))
88 | self.observation_space = Dict({
89 | # PSN STATE
90 | 'cpu_avails': Box(low=0., high=1., shape=(n_nodes,), dtype=np.float32),
91 | 'ram_avails': Box(low=0., high=1., shape=(n_nodes,), dtype=np.float32),
92 | # for each physical node, sum of the BW of the physical links connected to it
93 | 'bw_avails': Box(low=0., high=1., shape=(n_nodes,), dtype=np.float32),
94 | # for each physical node, number of VNFs of the current NSPR placed on it
95 | 'placement_state': Box(low=0, high=ONE_BILLION, shape=(n_nodes,), dtype=int),
96 |
97 | # NSPR STATE
98 | # note: apparently it's not possible to pass "math.inf" or "sys.maxsize" as a gym.spaces.Box's high value
99 | 'cur_vnf_cpu_req': Box(low=0, high=ONE_BILLION, shape=(1,), dtype=np.float32),
100 | 'cur_vnf_ram_req': Box(low=0, high=ONE_BILLION, shape=(1,), dtype=np.float32),
101 | # sum of the required BW of each VL connected to the current VNF
102 | 'cur_vnf_bw_req': Box(low=0, high=ONE_BILLION, shape=(1,), dtype=np.float32),
103 | 'vnfs_still_to_place': Box(low=0, high=ONE_BILLION, shape=(1,), dtype=int),
104 | })
105 | self._empty_psn_obs_dict = None # used to store the observation resulting from an empty PSN
106 | self.obs_dict = self._init_obs_dict() # used to store the current observation
107 |
108 | # action mask determining available actions. Init with all actions are available (it will be update in 'reset')
109 | self._action_mask = np.ones(shape=(len(servers_ids),), dtype=bool)
110 | assert 0. <= perc_avail_nodes <= 1.
111 | self.perc_avail_nodes = perc_avail_nodes
112 |
113 | @property
114 | def cur_vnf(self):
115 | return self.cur_nspr.nodes[self.cur_vnf_id] if self.cur_nspr is not None else None
116 |
117 | def get_action_mask(self, env):
118 | # 'action_mask' needs to be callable to be passed ActionMasker wrapper
119 | # note: env needs to be an argument for compatibility, but in this case it's useless
120 | return self._action_mask
121 |
122 | def reset_partial_rewards(self):
123 | """ Resets the partial rewards (used in case a NSPR cannot be placed) """
124 | self._acceptance_rewards = []
125 | self._resource_consumption_rewards = []
126 | self._load_balance_rewards = []
127 |
128 | def enough_avail_resources(self, physical_node_id: int, vnf: dict) -> bool:
129 | """ Check that the physical node has enough resources to satisfy the VNF's requirements
130 |
131 | :param physical_node_id: ID of the physical node to check
132 | :param vnf: VNF to check
133 | :return: True if the physical node has enough resources to satisfy the VNF's requirements, False otherwise
134 | """
135 | idx = self.map_id_idx[physical_node_id]
136 | enough_cpu = self.obs_dict['cpu_avails'][idx] >= vnf['reqCPU'] / self.max_cpu
137 | enough_ram = self.obs_dict['ram_avails'][idx] >= vnf['reqRAM'] / self.max_ram
138 | return enough_cpu and enough_ram
139 |
140 | def restore_avail_resources(self, nspr: nx.Graph):
141 | """ Method called in case a NSPR is not accepted, or it has reached
142 | its departure time.
143 | Restores the PSN resources occupied by that NSPR.
144 |
145 | :param nspr: the rejected NSPR
146 | """
147 | if nspr is not None:
148 | nspr.graph['departed'] = True
149 | for vnf_id, vnf in nspr.nodes.items():
150 | # restore nodes' resources availabilities
151 | if vnf['placed'] >= 0:
152 | idx = self.map_id_idx[vnf['placed']]
153 | self.obs_dict['cpu_avails'][idx] += vnf['reqCPU'] / self.max_cpu
154 | self.obs_dict['ram_avails'][idx] += vnf['reqRAM'] / self.max_ram
155 | self.obs_dict['placement_state'][idx] -= 1
156 | for _, vl in nspr.edges.items():
157 | # restore links' resources availabilities
158 | if vl['placed']:
159 | # vl['placed'] is the list of the physical nodes traversed by the link
160 | rewBW_normalized = vl['reqBW'] / self.max_bw
161 | for i in range(len(vl['placed']) - 1):
162 | id_1 = vl['placed'][i]
163 | id_2 = vl['placed'][i + 1]
164 | physical_link = self.psn.edges[id_1, id_2]
165 | # recall that BW in physical links is actually updated
166 | physical_link['availBW'] += vl['reqBW']
167 | idx_1 = self.map_id_idx[id_1]
168 | idx_2 = self.map_id_idx[id_2]
169 | self.obs_dict['bw_avails'][idx_1] += rewBW_normalized
170 | self.obs_dict['bw_avails'][idx_2] += rewBW_normalized
171 |
172 | def pick_next_nspr(self):
173 | """ Pick the next NSPR to be evaluated and updates the attribute 'self.cur_nspr' """
174 | if self.cur_nspr is None and self.waiting_nsprs:
175 | self.cur_nspr = self.waiting_nsprs.pop(0)
176 | self.cur_nspr.graph['DepartureTime'] = self.time_step + self.cur_nspr.graph['duration']
177 | self.cur_nspr_unplaced_vnfs_ids = list(self.cur_nspr.nodes.keys())
178 | self.cur_vnf_id = self.cur_nspr_unplaced_vnfs_ids.pop(0)
179 | # reset acceptance reward discount factor
180 | self.base_acc_rew_disc_fact = 1 / len(self.cur_nspr.nodes)
181 | self.acc_rew_disc_fact = 0.
182 | # self.tot_seen_nsprs += 1
183 | _ = self.update_nspr_state() # obs_dict updated within method
184 |
185 | def check_for_departed_nsprs(self):
186 | """ Checks it some NSPRs have reached their departure time and in case
187 | it frees the PSN resources occupied by them. """
188 | all_arrival_times = list(self.nsprs.keys())
189 | all_arrival_times.sort()
190 | for arrival_time in all_arrival_times:
191 | if arrival_time >= self.time_step:
192 | break
193 | cur_nsprs = self.nsprs[arrival_time]
194 | for nspr in cur_nsprs:
195 | departed = nspr.graph.get('departed', False)
196 | if nspr.graph.get('DepartureTime', self.time_step) < self.time_step and not departed:
197 | self.restore_avail_resources(nspr=nspr)
198 |
199 | # This should be useless now
200 | # if nspr == self.cur_nspr:
201 | # # haven't finished placing this NSPR, but its departure time has come.
202 | # # remove NSPR, no reward, neither positive nor negative
203 | # # (not agent's fault, too many requests at the same time)
204 | # self.cur_nspr = None
205 | # self.reset_partial_rewards()
206 |
207 | def manage_unsuccessful_action(self) -> Tuple[GymObs, int]:
208 | """ Method to manage an unsuccessful action, executed when a VNF/VL cannot be placed onto the PSN.
209 | - Restore the PSN resources occupied by VNFs and VLs of the current NSPR
210 | - Reset the partial rewards
211 | - Set the reward as the one for an unsuccessful action
212 | - Pick the next NSPR to be evaluated (if exists)
213 | - get an observation from the environment
214 |
215 | :return: the reward for the unsuccessful action
216 | """
217 | self.restore_avail_resources(nspr=self.cur_nspr)
218 | self.reset_partial_rewards()
219 | self.cur_nspr = None
220 | self.nsprs_seen_in_cur_ep += 1
221 |
222 | self.tot_seen_nsprs += 1
223 | if self.nsprs_seen_in_cur_ep >= self.nsprs_per_episode:
224 | self.done = True
225 | self.waiting_nsprs += self.nsprs.get(self.time_step, [])
226 | self.pick_next_nspr()
227 | obs = self.update_nspr_state()
228 | reward = self.rval_rejected_vnf
229 | self.time_step += 1
230 | return obs, reward
231 |
232 | def _normalize_reward_0_10(self, reward):
233 | """ Normalize the reward to be in [0, 10] (as in HA-DRL) """
234 | # since the global reward is given by the sum for each time step of the
235 | # current NSPR (i.e. for each VNF in the NSPR) of the product of the 3
236 | # partial rewards at time t,
237 | # the maximum possible reward for the given NSPR is given by:
238 | # the number of VNF in the NSPR times
239 | # the maximum acceptance reward value (i.e. every VNF is accepted) times
240 | # the maximum resource consumption reward value (i.e. 1) times
241 | # the maximum tr_load balancing reward value (i.e. 1+1=2)
242 | max_reward = len(self.cur_nspr.nodes) * self.rval_accepted_vnf * 1 * 2
243 | return reward / max_reward * 10
244 |
245 | @staticmethod
246 | def get_cur_vnf_vls(vnf_id: int, nspr: nx.Graph) -> dict:
247 | """ Get all the virtual links connected to a specific VNF
248 |
249 | :param vnf_id: ID of a VNF whose VLs have to be returned
250 | :param nspr: the NSPR to which the VNF belongs
251 | :return: dict of the VLs connected to the specified VNF
252 | """
253 | vnf_links = {}
254 | for extremes, vl in nspr.edges.items():
255 | if vnf_id in extremes:
256 | vnf_links[extremes] = vl
257 | return vnf_links
258 |
259 | def compute_link_weight(self, source: int, target: int, link: dict):
260 | """ Compute the weight of an edge between two nodes.
261 | If the edge satisfies the bandwidth requirement, the weight is 1, else infinity.
262 |
263 | This method is passed to networkx's shortest_path function as a weight function, and it's subject to networkx's API.
264 | It must take exactly 3 arguments: the two endpoints of an edge and the dictionary of edge attributes for that edge.
265 | We need the required bandwidth to compute an edge's weight, so we save it into an attribute of the simulator (self._cur_vl_reqBW).
266 |
267 | :param source: source node in the PSN
268 | :param target: target node in the PSN
269 | :param link: dict of the link's (source - target) attributes
270 | :return: the weight of that link
271 | """
272 | return 1 if link['availBW'] >= self._cur_vl_reqBW else math.inf
273 |
274 | def _init_obs_dict(self) -> dict:
275 | """
276 | Initialize the observation dict.
277 |
278 | To be called after reading a PSN and before placing any VNF/VL on it.
279 | """
280 | # check that the env has a PSN
281 | try:
282 | if self.psn is None:
283 | raise ValueError("self.psn is None")
284 | except AttributeError:
285 | raise AttributeError("self.psn is not defined")
286 |
287 | # initialize lists
288 | cpu_avails = np.zeros(len(self.psn.nodes), dtype=np.float32)
289 | ram_avails = np.zeros(len(self.psn.nodes), dtype=np.float32)
290 | bw_avails = np.zeros(len(self.psn.nodes), dtype=np.float32)
291 | placement_state = np.zeros(len(self.psn.nodes), dtype=int)
292 |
293 | # scan all nodes and save data in lists
294 | self.tot_cpu_cap = self.tot_ram_cap = self.tot_bw_cap = 0
295 | for node_id, node in self.psn.nodes.items():
296 | self.tot_cpu_cap += node.get('CPUcap', 0)
297 | self.tot_ram_cap += node.get('RAMcap', 0)
298 | cpu_avails[self.map_id_idx[node_id]] = node.get('availCPU', 0)
299 | ram_avails[self.map_id_idx[node_id]] = node.get('availRAM', 0)
300 | # scan all links and save data in list
301 | for extremes, link in self.psn.edges.items():
302 | self.tot_bw_cap += link['BWcap']
303 | bw_avails[self.map_id_idx[extremes[0]]] += link['availBW']
304 | bw_avails[self.map_id_idx[extremes[1]]] += link['availBW']
305 |
306 | # save max CPU/RAM/BW capacities (= availabilities in empty PSN) of all nodes
307 | self.max_cpu = np.max(cpu_avails)
308 | self.max_ram = np.max(ram_avails)
309 | self.max_bw = np.max(bw_avails)
310 |
311 | # normalize the quantities
312 | cpu_avails /= self.max_cpu
313 | ram_avails /= self.max_ram
314 | bw_avails /= self.max_bw
315 |
316 | obs = {
317 | # PSN state
318 | 'cpu_avails': cpu_avails,
319 | 'ram_avails': ram_avails,
320 | 'bw_avails': bw_avails,
321 | 'placement_state': placement_state,
322 | # NSPR state
323 | 'cur_vnf_cpu_req': np.array([0], dtype=int),
324 | 'cur_vnf_ram_req': np.array([0], dtype=int),
325 | 'cur_vnf_bw_req': np.array([0], dtype=int),
326 | 'vnfs_still_to_place': np.array([0], dtype=int)
327 | }
328 |
329 | # store the obs for an empty PSN
330 | del self._empty_psn_obs_dict
331 | self._empty_psn_obs_dict = copy.deepcopy(obs)
332 |
333 | return obs
334 |
335 | def update_nspr_state(self) -> GymObs:
336 | """ Get an observation from the environment.
337 |
338 | The PSN state is already dynamically kept updated, so this method
339 | will only collect data about the NSPR state and complete the observation
340 | dict, that will be returned.
341 |
342 | :return: an instance of an observation from the environment
343 | """
344 | # state regarding the NSPR
345 | if self.cur_vnf is not None:
346 | cur_vnf_vls = self.get_cur_vnf_vls(vnf_id=self.cur_vnf_id,
347 | nspr=self.cur_nspr)
348 | cur_vnf_cpu_req = np.array(
349 | [self.cur_vnf['reqCPU'] / self.max_cpu], dtype=np.float32)
350 |
351 | cur_vnf_ram_req = np.array(
352 | [self.cur_vnf['reqRAM'] / self.max_ram], dtype=np.float32)
353 |
354 | cur_vnf_bw_req = np.array(
355 | [sum([vl['reqBW'] for vl in cur_vnf_vls.values()]) / self.max_bw],
356 | dtype=np.float32)
357 |
358 | vnfs_still_to_place = np.array(
359 | [len(self.cur_nspr_unplaced_vnfs_ids) + 1], dtype=int)
360 | else:
361 | cur_vnf_cpu_req = np.array([0], dtype=np.float32)
362 | cur_vnf_ram_req = np.array([0], dtype=np.float32)
363 | cur_vnf_bw_req = np.array([0], dtype=np.float32)
364 | vnfs_still_to_place = np.array([0], dtype=int)
365 |
366 | self.obs_dict['cur_vnf_cpu_req'] = cur_vnf_cpu_req
367 | self.obs_dict['cur_vnf_ram_req'] = cur_vnf_ram_req
368 | self.obs_dict['cur_vnf_bw_req'] = cur_vnf_bw_req
369 | self.obs_dict['vnfs_still_to_place'] = vnfs_still_to_place
370 | return self.obs_dict
371 |
372 | def reset(self, **kwargs) -> GymObs:
373 | """ Method used to reset the environment
374 |
375 | :return: the starting/initial observation of the environment
376 | """
377 | self.done = False # re-set 'done' attribute
378 |
379 | # if last NSPR has not been placed completely, remove it, this is a new episode
380 | self.cur_nspr = None
381 |
382 | # reset network status (simply re-read the PSN file)
383 | # (needed because the available BW of the links gets actually modified)
384 | self.psn = reader.read_psn(graphml_file=self.psn_file)
385 |
386 | self.ep_number += 1
387 | self.nsprs_seen_in_cur_ep = 0
388 |
389 | # read the NSPRs to be evaluated
390 | # self.nsprs = reader.read_nsprs(nsprs_path=self.nsprs_path)
391 | self.nsprs = reader.sample_nsprs(nsprs_path=self.nsprs_path,
392 | n=self.nsprs_per_episode,
393 | min_arrival_time=self.time_step,
394 | max_duration=self.nsprs_max_duration)
395 |
396 | # reset partial rewards to be accumulated across the episodes' steps
397 | self.reset_partial_rewards()
398 |
399 | # return the obs corresponding to an empty PSN:
400 | # ALTERNATIVE 1: slower, but runs through the network and works with changing PSNs
401 | # self._obs_dict = self._init_obs_dict()
402 |
403 | # ALTERNATIVE 2: slightly faster on paper, but does not work with changing PSNs
404 | del self.obs_dict
405 | self.obs_dict = copy.deepcopy(self._empty_psn_obs_dict)
406 |
407 | # get arrived NSPRs
408 | self.waiting_nsprs += self.nsprs.get(self.time_step, [])
409 | self.pick_next_nspr()
410 |
411 | # update action mask (if no action masking is implemented, it has no effect)
412 | self._action_mask[:] = True
413 | # verison one: more randomic
414 | # indexes = np.random.rand(*self._action_mask.shape) < self.perc_avail_nodes
415 | # version two: less randomic
416 | size = round((1. - self.perc_avail_nodes) * self.action_space.n)
417 | indexes = np.random.choice(self.action_space.n, size=size, replace=False)
418 | self._action_mask[indexes] = False
419 |
420 | # new observation
421 | obs = self.update_nspr_state()
422 |
423 | return obs
424 |
425 | def step(self, action: int) -> Tuple[GymObs, float, bool, dict]:
426 | """ Perform an action in the environment
427 |
428 | :param action: the action to be performed
429 | more in detail, it's the index in the list of server corresponding
430 | ot a certain server ID, the mapping between this index and the
431 | server ID is done in the self.servers_map_idx_id dictionary
432 | :return: next observation, reward, done (True if the episode is over), info
433 | """
434 | reward, info = 0, {}
435 |
436 | # this happens only when the agent is prevented from choosing nodes that don't have enough resources,
437 | # i.e., when the environment is wrapped with PreventInfeasibleActions
438 | # if action < 0:
439 | # obs, reward = self.manage_unsuccessful_action()
440 | # return obs, reward, done, info
441 |
442 | # place the VNF and update the resources availabilities of the physical node
443 | if self.cur_nspr is not None:
444 | physical_node_id = self.servers_map_idx_id[action]
445 | physical_node = self.psn.nodes[physical_node_id]
446 |
447 | if not self.enough_avail_resources(physical_node_id, self.cur_vnf):
448 | # the VNF cannot be placed on the physical node
449 | obs, reward = self.manage_unsuccessful_action()
450 | return obs, reward, self.done, info
451 |
452 | # update acceptance reward and tr_load balancing reward
453 | idx = self.map_id_idx[physical_node_id]
454 | self._acceptance_rewards.append(self.rval_accepted_vnf)
455 | self._load_balance_rewards.append(
456 | self.obs_dict['cpu_avails'][idx] * self.max_cpu / physical_node['CPUcap'] +
457 | self.obs_dict['ram_avails'][idx] * self.max_ram / physical_node['RAMcap']
458 | )
459 |
460 | # update the resources availabilities of the physical node in the obs dict
461 | self.cur_vnf['placed'] = physical_node_id
462 | self.obs_dict['cpu_avails'][idx] -= self.cur_vnf['reqCPU'] / self.max_cpu
463 | self.obs_dict['ram_avails'][idx] -= self.cur_vnf['reqRAM'] / self.max_ram
464 | self.obs_dict['placement_state'][idx] += 1
465 |
466 | # connect the placed VNF to the other VNFs it's supposed to be connected to
467 | cur_vnf_VLs = self.get_cur_vnf_vls(self.cur_vnf_id, self.cur_nspr)
468 | if not cur_vnf_VLs:
469 | # if the VNF is detached from all others, R.C. reward is 1,
470 | # so it's the neutral when aggregating the rewards into the global one
471 | self._resource_consumption_rewards.append(1)
472 | else:
473 | for (source_vnf, target_vnf), vl in cur_vnf_VLs.items():
474 | # get the physical nodes where the source and target VNFs are placed
475 | source_node = self.cur_nspr.nodes[source_vnf]['placed']
476 | target_node = self.cur_nspr.nodes[target_vnf]['placed']
477 |
478 | # if the VL isn't placed yet and both the source and target VNFs are placed, place the VL
479 | if not vl['placed'] and source_node >= 0 and target_node >= 0:
480 | self._cur_vl_reqBW = vl['reqBW']
481 | psn_path = nx.shortest_path(G=self.psn,
482 | source=source_node,
483 | target=target_node,
484 | weight=self.compute_link_weight,
485 | method='dijkstra')
486 |
487 | """ if NO path is available, 'nx.shortest_path' will
488 | return an invalid path. Only after the whole VL has been
489 | placed, it is possible to restore the resources
490 | availabilities, so we use this variable to save that the
491 | resources have been exceeded as soon as we find this to
492 | happen, and only after the VL placement, if this var is
493 | True, we restore the resources availabilities. """
494 | exceeded_bw = False
495 | # place VL onto the PSN
496 | # and update the resources availabilities of physical links involved
497 | for i in range(len(psn_path) - 1):
498 | physical_link = self.psn.edges[psn_path[i], psn_path[i + 1]]
499 | extreme1_idx = self.map_id_idx[psn_path[i]]
500 | extreme2_idx = self.map_id_idx[psn_path[i + 1]]
501 | self.obs_dict['bw_avails'][extreme1_idx] -= vl['reqBW'] / self.max_bw
502 | self.obs_dict['bw_avails'][extreme2_idx] -= vl['reqBW'] / self.max_bw
503 | # note: here the PSN is actually modified: the available
504 | # BW of the link is decreased. Needed for shortest path computation
505 | physical_link['availBW'] -= vl['reqBW']
506 | if physical_link['availBW'] < 0:
507 | exceeded_bw = True
508 | vl['placed'] = psn_path
509 |
510 | if exceeded_bw:
511 | obs, reward = self.manage_unsuccessful_action()
512 | return obs, reward, self.done, info
513 |
514 | # update the resource consumption reward
515 | path_length = len(psn_path) - 1
516 | self._cur_resource_consumption_rewards.append(
517 | 1 / path_length if path_length > 0 else 1)
518 |
519 | # aggregate the resource consumption rewards into a single value for this action
520 | n_VLs_placed_now = len(self._cur_resource_consumption_rewards)
521 | if n_VLs_placed_now == 0:
522 | self._resource_consumption_rewards.append(1.)
523 | else:
524 | self._resource_consumption_rewards.append(
525 | sum(self._cur_resource_consumption_rewards) / n_VLs_placed_now)
526 | self._cur_resource_consumption_rewards = []
527 |
528 | # save the ID of the next VNF
529 | if self.cur_nspr_unplaced_vnfs_ids:
530 | self.cur_vnf_id = self.cur_nspr_unplaced_vnfs_ids.pop(0)
531 | if self.accumulate_reward:
532 | reward = 0 # global reward is non-zero only after the whole NSPR is placed (as HADRL)
533 | else:
534 | # eventual discount factor of the acceptance reward
535 | if self.discount_acc_rew:
536 | self.acc_rew_disc_fact += self.base_acc_rew_disc_fact
537 | else:
538 | self.acc_rew_disc_fact = 1.
539 | # reward always givent to the agent
540 | reward = self._acceptance_rewards[-1] * self.acc_rew_disc_fact * \
541 | self._load_balance_rewards[-1] * \
542 | self._resource_consumption_rewards[-1] / len(self.cur_nspr.nodes) / \
543 | 10. # scaling factor
544 | reward = self._normalize_reward_0_10(reward)
545 | else:
546 | # it means we finished the VNFs of the current NSPR
547 | self.nsprs_seen_in_cur_ep += 1
548 | self.tot_seen_nsprs += 1
549 | if self.nsprs_seen_in_cur_ep >= self.nsprs_per_episode:
550 | self.done = True
551 | # reset placement state
552 | self.obs_dict['placement_state'] = np.zeros(len(self.psn.nodes), dtype=int)
553 | # update global reward because the NSPR is fully placed
554 | reward = np.stack((self._acceptance_rewards,
555 | self._resource_consumption_rewards,
556 | self._load_balance_rewards)).prod(axis=0).sum()
557 | # normalize the reward to be in [0, 10] (as they do in HA-DRL)
558 | reward = self._normalize_reward_0_10(reward) * \
559 | 2 # TODO: per dargli più peso (non da HADRL)
560 | self.reset_partial_rewards()
561 | self.cur_nspr = None # marked as None so a new one can be picked
562 | # update the acceptance ratio
563 | self.accepted_nsprs += 1
564 |
565 | # increase time step
566 | self.time_step += 1
567 |
568 | # check for new and departing NSPRs
569 | if self.nsprs is not None:
570 | self.check_for_departed_nsprs()
571 | self.waiting_nsprs += self.nsprs.get(self.time_step, [])
572 | self.pick_next_nspr()
573 |
574 | # new observation
575 | obs = self.update_nspr_state()
576 |
577 | return obs, reward, self.done, info
578 |
579 | def render(self, mode="human"):
580 | raise NotImplementedError
581 |
--------------------------------------------------------------------------------
/src/policies/__init__.py:
--------------------------------------------------------------------------------
1 | from .hadrl_policy import HADRLPolicy
2 |
--------------------------------------------------------------------------------
/src/policies/features_extractors/__init__.py:
--------------------------------------------------------------------------------
1 | from .hadrl_features_extractor import GCNsFeaturesExtractor
2 |
--------------------------------------------------------------------------------
/src/policies/features_extractors/hadrl_features_extractor.py:
--------------------------------------------------------------------------------
1 | from typing import Tuple, Type
2 |
3 | import gym
4 | import networkx as nx
5 | import numpy as np
6 | import torch as th
7 | from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
8 | from torch import nn
9 | from torch.nn import Linear
10 | from torch_geometric.nn import GCNConv
11 |
12 |
13 | class GCNsFeaturesExtractor(BaseFeaturesExtractor):
14 | def __init__(
15 | self,
16 | observation_space: gym.Space,
17 | psn: nx.Graph,
18 | activation_fn: Type[nn.Module],
19 | gcn_layers_dims: Tuple[int],
20 | nspr_out_features: int = 4
21 | ):
22 | """ Constructor
23 |
24 | :param observation_space: the observation space of the agent using this feature extractor
25 | :param psn: the PSN graph of the environment which the agent acts upon
26 | :param activation_fn: activation function to be used (e.g. torch.relu)
27 | :param gcn_layers_dims: dimensions of the features vector of each node in each GCN layer
28 | - number of layers = length of the tuple
29 | :param nspr_out_features: dimension of the features vector of the NSPR state
30 | """
31 | self.activation = activation_fn
32 | self.n_nodes = len(psn.nodes)
33 | self.gcn_layers_dims = gcn_layers_dims
34 | gcn_out_channels = gcn_layers_dims[-1]
35 | features_dim = gcn_out_channels * self.n_nodes + nspr_out_features
36 | super().__init__(observation_space, features_dim=features_dim)
37 |
38 | self.psn_state_features = 4 if 'placement_state' in observation_space.spaces else 3
39 | self.nspr_state_features = 4
40 |
41 | edges = th.tensor(np.array(psn.edges).reshape((len(psn.edges), 2)),
42 | dtype=th.long)
43 | double_edges = th.cat((edges, th.flip(edges, dims=(1,))))
44 | self.edge_index = double_edges.t().contiguous()
45 |
46 | # GCN layers
47 | gcn_layers_dims = [self.psn_state_features] + list(gcn_layers_dims)
48 | self.gcn_layers = nn.ModuleList()
49 | for i in range(len(gcn_layers_dims) - 1):
50 | self.gcn_layers.append(GCNConv(gcn_layers_dims[i], gcn_layers_dims[i + 1]))
51 |
52 | self.nspr_fc = Linear(in_features=self.nspr_state_features,
53 | out_features=nspr_out_features)
54 |
55 | def forward(self, observations: th.Tensor) -> th.Tensor:
56 | # save device (the one where the weights and observations are)
57 | device = observations['cpu_avails'].device
58 |
59 | # move edge_index to the correct device
60 | self.edge_index = self.edge_index.to(device)
61 |
62 | # save length of rollout buffer
63 | len_rollout_buffer = len(observations['cpu_avails'])
64 |
65 | # features extraction of the PSN state
66 | psn_state = th.empty(
67 | size=(len_rollout_buffer, self.n_nodes, self.psn_state_features),
68 | dtype=th.float, device=device)
69 | psn_state[:, :, 0] = observations['cpu_avails']
70 | psn_state[:, :, 1] = observations['ram_avails']
71 | psn_state[:, :, 2] = observations['bw_avails']
72 | if 'placement_state' in observations:
73 | psn_state[:, :, 3] = observations['placement_state']
74 |
75 | # pass the psn_state through the GCN layers
76 | gcn_out = psn_state
77 | for i in range(len(self.gcn_layers)):
78 | gcn_out = self.gcn_layers[i](gcn_out, self.edge_index)
79 | gcn_out = self.activation()(gcn_out)
80 | gcn_out = gcn_out.flatten(start_dim=1)
81 |
82 | # features extraction of the NSPR state
83 | nspr_state = th.empty(size=(len_rollout_buffer, 1, self.nspr_state_features),
84 | dtype=th.float, device=device)
85 | nspr_state[:, :, 0] = observations['cur_vnf_cpu_req']
86 | nspr_state[:, :, 1] = observations['cur_vnf_ram_req']
87 | nspr_state[:, :, 2] = observations['cur_vnf_bw_req']
88 | nspr_state[:, :, 3] = observations['vnfs_still_to_place']
89 | nspr_fc_out = self.nspr_fc(nspr_state.flatten(start_dim=1))
90 | nspr_fc_out = self.activation()(nspr_fc_out)
91 |
92 | # concatenation of the two features vectors
93 | global_out = th.cat((gcn_out, nspr_fc_out), dim=1)
94 | # global_out = self.activation(global_out)
95 |
96 | return global_out
97 |
--------------------------------------------------------------------------------
/src/policies/hadrl_policy.py:
--------------------------------------------------------------------------------
1 | from functools import partial
2 | from typing import Callable, Dict, List, Optional, Type, Union, Tuple
3 |
4 | import gym
5 | import networkx as nx
6 | import numpy as np
7 | import torch as th
8 | from stable_baselines3.common.distributions import Distribution
9 | from stable_baselines3.common.policies import MultiInputActorCriticPolicy
10 | from stable_baselines3.common.preprocessing import preprocess_obs
11 | from stable_baselines3.common.type_aliases import Schedule
12 | from torch import nn
13 |
14 | from .features_extractors import GCNsFeaturesExtractor
15 | from .mlp_extractors.hadrl_mlp_extractor import HADRLActorCriticNet
16 |
17 |
18 | class HADRLPolicy(MultiInputActorCriticPolicy):
19 | """ Policy network from the paper HA-DRL [1]
20 |
21 | [1] https://ieeexplore.ieee.org/document/9632824
22 | """
23 | name = 'HADRL Policy'
24 |
25 | def __init__(
26 | self,
27 | observation_space: gym.spaces.Space,
28 | action_space: gym.spaces.Space,
29 | lr_schedule: Callable[[float], float],
30 | psn: nx.Graph,
31 | servers_map_idx_id: Dict[int, int],
32 | net_arch: Optional[Union[List[int], Dict[str, List[int]]]] = None,
33 | activation_fn: Type[nn.Module] = nn.Tanh,
34 | gcn_layers_dims: Tuple[int] = (60,),
35 | nspr_out_features: int = 4,
36 | use_heuristic: bool = False,
37 | heu_kwargs: dict = None,
38 | *args,
39 | **kwargs,
40 | ):
41 | """
42 | :param observation_space: Observation space of the agent
43 | :param action_space: Action space of the agent
44 | :param lr_schedule: Learning rate schedule
45 | :param psn: Physical Service Network
46 | :param servers_map_idx_id: Mapping between servers' indexes and their IDs
47 | :param net_arch: architecture of the policy and value networks after the feature extractor
48 | :param activation_fn: Activation function
49 | :param gcn_layers_dims: Dimensions of the GCN layers
50 | :param nspr_out_features: Number of output features of the NSPR state
51 | :param use_heuristic: Whether to use the heuristic or not
52 | :param heu_kwargs: Keyword arguments for the heuristic
53 | """
54 |
55 | # assert len(net_arch) == 1 and isinstance(net_arch[0], dict), \
56 | # "This policy allows net_arch to be a list with only one dict"
57 |
58 | self.psn = psn
59 | self.gcn_layers_dims = gcn_layers_dims # saved in an attribute for logging purposes
60 | self.servers_map_idx_id = servers_map_idx_id
61 | self.use_heuristic = use_heuristic
62 | self.heu_kwargs = heu_kwargs
63 |
64 | super(HADRLPolicy, self).__init__(
65 | observation_space,
66 | action_space,
67 | lr_schedule,
68 | net_arch,
69 | activation_fn,
70 | # Pass remaining arguments to base class
71 | *args,
72 | **kwargs,
73 | )
74 | # non-shared features extractors for the actor and the critic
75 | self.policy_features_extractor = GCNsFeaturesExtractor(
76 | observation_space, psn, nn.Tanh, gcn_layers_dims,
77 | nspr_out_features
78 | )
79 | self.value_features_extractor = GCNsFeaturesExtractor(
80 | observation_space, psn, nn.ReLU, gcn_layers_dims,
81 | nspr_out_features
82 | )
83 | self.features_dim = {'pi': self.policy_features_extractor.features_dim,
84 | 'vf': self.value_features_extractor.features_dim}
85 | delattr(self, "features_extractor") # remove the shared features extractor
86 |
87 | # TODO: check what this step actually does
88 | # Disable orthogonal initialization
89 | # self.ortho_init = False
90 |
91 | # Workaround alert!
92 | # This method is called in the super-constructor. It creates the optimizer,
93 | # but using also the params of the features extractor before creating
94 | # our own 2 separate ones ('policy_features_extractor' and
95 | # 'value_features_extractor'). Therefore we need to re-create the optimizer
96 | # using the params of the correct new features extractor.
97 | # (it will also re-do a bunch of things like re-creating the mlp_extractor,
98 | # which was fine, but it's not a problem).
99 | self._rebuild(lr_schedule)
100 |
101 | def _rebuild(self, lr_schedule: Schedule) -> None:
102 | """
103 | Like method _build, but needed to be re-called to re-create the
104 | optimizer, since it was created using obsolete parameters, i.e. params
105 | including the ones of the default shared features extractor and NOT
106 | including the ones of the new features extractors.
107 | The mlp_extractor is recreated too, since it was created with incorrect features_dim.
108 |
109 | :param lr_schedule: Learning rate schedule
110 | lr_schedule(1) is the initial learning rate
111 | """
112 | self._build_mlp_extractor()
113 |
114 | # action_net and value_net as created in the '_build' method are OK,
115 | # no need to recreate them.
116 |
117 | # Init weights: use orthogonal initialization
118 | # with small initial weight for the output
119 | if self.ortho_init:
120 | # TODO: check for features_extractor
121 | # Values from stable-baselines.
122 | # features_extractor/mlp values are
123 | # originally from openai/baselines (default gains/init_scales).
124 | module_gains = {
125 | self.policy_features_extractor: np.sqrt(2),
126 | self.value_features_extractor: np.sqrt(2),
127 | self.mlp_extractor: np.sqrt(2),
128 | self.action_net: 0.01,
129 | self.value_net: 1,
130 | }
131 | for module, gain in module_gains.items():
132 | module.apply(partial(self.init_weights, gain=gain))
133 |
134 | # Setup optimizer with initial learning rate
135 | self.optimizer = self.optimizer_class(self.parameters(), lr=lr_schedule(1), **self.optimizer_kwargs)
136 |
137 | def _build_mlp_extractor(self) -> None:
138 | self.mlp_extractor = HADRLActorCriticNet(
139 | action_space=self.action_space,
140 | psn=self.psn,
141 | net_arch=self.net_arch,
142 | servers_map_idx_id=self.servers_map_idx_id,
143 | features_dim=self.features_dim,
144 | use_heuristic=self.use_heuristic,
145 | heu_kwargs=self.heu_kwargs
146 | )
147 |
148 | def extract_features(self, obs: th.Tensor) -> Tuple[th.Tensor, th.Tensor]:
149 | """
150 | Preprocess the observation if needed and extract features.
151 |
152 | :param obs: Observation
153 | :return: the output of the feature extractor(s)
154 | """
155 | assert self.policy_features_extractor is not None and \
156 | self.value_features_extractor is not None
157 | preprocessed_obs = preprocess_obs(obs, self.observation_space,
158 | normalize_images=self.normalize_images)
159 | policy_features = self.policy_features_extractor(preprocessed_obs)
160 | value_features = self.value_features_extractor(preprocessed_obs)
161 | return policy_features, value_features
162 |
163 | def forward(self, obs: th.Tensor, deterministic: bool = False) -> \
164 | Tuple[th.Tensor, th.Tensor, th.Tensor]:
165 | """
166 | Forward pass in all the networks (actor and critic)
167 |
168 | :param obs: Observation
169 | :param deterministic: Whether to sample or use deterministic actions
170 | :return: action, value and log probability of the action
171 | """
172 | # Preprocess the observation if needed
173 | policy_features, value_features = self.extract_features(obs)
174 | latent_pi = self.mlp_extractor.forward_actor(policy_features, obs)
175 | latent_vf = self.mlp_extractor.forward_critic(value_features)
176 |
177 | # Evaluate the values for the given observations
178 | values = self.value_net(latent_vf)
179 | distribution = self._get_action_dist_from_latent(latent_pi)
180 | actions = distribution.get_actions(deterministic=deterministic)
181 | log_prob = distribution.log_prob(actions)
182 | return actions, values, log_prob
183 |
184 | def evaluate_actions(self, obs: th.Tensor, actions: th.Tensor) -> \
185 | Tuple[th.Tensor, th.Tensor, th.Tensor]:
186 | """
187 | Evaluate actions according to the current policy,
188 | given the observations.
189 |
190 | :param obs: Observation
191 | :param actions: Actions
192 | :return: estimated value, log likelihood of taking those actions
193 | and entropy of the action distribution.
194 | """
195 | # Preprocess the observation if needed
196 | policy_features, value_features = self.extract_features(obs)
197 | latent_pi = self.mlp_extractor.forward_actor(policy_features, obs)
198 | latent_vf = self.mlp_extractor.forward_critic(value_features)
199 | distribution = self._get_action_dist_from_latent(latent_pi)
200 | log_prob = distribution.log_prob(actions)
201 | values = self.value_net(latent_vf)
202 | return values, log_prob, distribution.entropy()
203 |
204 | def get_distribution(self, obs: th.Tensor) -> Distribution:
205 | """
206 | Get the current policy distribution given the observations.
207 |
208 | :param obs: Observation
209 | :return: the action distribution.
210 | """
211 | policy_features, _ = self.extract_features(obs)
212 | latent_pi = self.mlp_extractor.forward_actor(policy_features, obs)
213 | return self._get_action_dist_from_latent(latent_pi)
214 |
215 | def predict_values(self, obs: th.Tensor) -> th.Tensor:
216 | """
217 | Get the estimated values according to the current policy given the observations.
218 |
219 | :param obs: Observation
220 | :return: the estimated values.
221 | """
222 | _, value_features = self.extract_features(obs)
223 | latent_vf = self.mlp_extractor.forward_critic(value_features)
224 | return self.value_net(latent_vf)
225 |
--------------------------------------------------------------------------------
/src/policies/mlp_extractors/__init__.py:
--------------------------------------------------------------------------------
1 | from .hadrl_mlp_extractor import HADRLActor, HADRLCritic, HADRLActorCriticNet
2 |
--------------------------------------------------------------------------------
/src/policies/mlp_extractors/hadrl_mlp_extractor.py:
--------------------------------------------------------------------------------
1 | from typing import Tuple, Dict, Union, List
2 |
3 | import gym
4 | import networkx as nx
5 | import torch as th
6 | from torch import nn
7 |
8 | from heuristic_layers import P2CLoadBalanceHeuristic, HADRLHeuristic
9 |
10 |
11 | class HADRLActor(nn.Module):
12 | """ Actor network for the HA-DRL [1] algorithm
13 |
14 | [1] https://ieeexplore.ieee.org/document/9632824
15 | """
16 |
17 | def __init__(
18 | self,
19 | action_space: gym.Space,
20 | psn: nx.Graph,
21 | net_arch: Union[List[int], Dict[str, List[int]]],
22 | servers_map_idx_id: Dict[int, int],
23 | in_features: int,
24 | use_heuristic: bool = False,
25 | heu_kwargs: dict = None,
26 | ):
27 | """ Constructor
28 |
29 | :param action_space: action space
30 | :param psn: env's physical substrate network
31 | :param servers_map_idx_id: map (dict) between servers indexes (agent's actions) and their ids
32 | :param use_heuristic: if True, actor will use P2C heuristic
33 | """
34 | super().__init__()
35 | self.use_heuristic = use_heuristic
36 | heu_class = heu_kwargs.get('heu_class', HADRLHeuristic)
37 |
38 | # layers
39 | dims = [in_features] + net_arch['pi']
40 | modules = nn.ModuleList()
41 | for i in range(len(dims) - 1):
42 | modules.append(nn.Linear(dims[i], dims[i + 1]))
43 | modules.append(nn.Tanh())
44 |
45 | if self.use_heuristic:
46 | self.heu_layer = heu_class(action_space, servers_map_idx_id, psn,
47 | **heu_kwargs).requires_grad_(False)
48 |
49 | self.layers = nn.Sequential(*modules)
50 |
51 | def forward(self, x: th.Tensor, obs: th.Tensor) -> th.Tensor:
52 | x = self.layers(x)
53 | if self.use_heuristic:
54 | x = self.heu_layer(x, obs)
55 | return x
56 |
57 |
58 | class HADRLCritic(nn.Module):
59 | """ Critic network for the HA-DRL [1] algorithm
60 |
61 | [1] https://ieeexplore.ieee.org/document/9632824
62 | """
63 |
64 | def __init__(
65 | self,
66 | in_features: int,
67 | net_arch: List[Union[int, Dict[str, List[int]]]]
68 | ):
69 | """ Constructor
70 |
71 | :param in_features: number of features extracted by the features extractor,
72 | i.e., input dim of the first layer of the network
73 | """
74 | super().__init__()
75 | dims = [in_features] + net_arch['vf']
76 | modules = nn.ModuleList()
77 | for i in range(len(dims) - 1):
78 | modules.append(nn.Linear(dims[i], dims[i + 1]))
79 | modules.append(nn.ReLU())
80 | self.layers = nn.Sequential(*modules)
81 |
82 | def forward(self, x: th.Tensor) -> th.Tensor:
83 | return self.layers(x)
84 |
85 |
86 | class HADRLActorCriticNet(nn.Module):
87 | """
88 | Actor-Critic network for the HA-DRL [1] algorithm
89 |
90 | [1] https://ieeexplore.ieee.org/document/9632824
91 | """
92 |
93 | def __init__(
94 | self,
95 | action_space: gym.Space,
96 | psn: nx.Graph,
97 | net_arch: List[Union[int, Dict[str, List[int]]]],
98 | servers_map_idx_id: Dict[int, int],
99 | features_dim: Union[int, Dict[str, int]],
100 | gcn_out_channels: int = 60,
101 | nspr_out_features: int = 4,
102 | use_heuristic: bool = False,
103 | heu_kwargs: dict = None,
104 | ):
105 | """ Constructor
106 |
107 | :param action_space: action space
108 | :param psn: env's physical substrate network
109 | :param servers_map_idx_id: map (dict) between servers indexes (agent's actions) and their ids
110 | :param policy_features_dim:
111 | :param value_features_dim:
112 | :param gcn_out_channels: number of output channels of the GCN layer
113 | :param nspr_out_features: output dim of the layer that receives the NSPR state
114 | :param use_heuristic: if True, actor will use P2C heuristic
115 | """
116 | super(HADRLActorCriticNet, self).__init__()
117 |
118 | # IMPORTANT:
119 | # Save output dimensions, used to create the distributions
120 | self.latent_dim_pi = net_arch['pi'][-1]
121 | self.latent_dim_vf = net_arch['vf'][-1]
122 |
123 | if isinstance(features_dim, int):
124 | policy_features_dim = value_features_dim = features_dim
125 | else:
126 | policy_features_dim = features_dim['pi']
127 | value_features_dim = features_dim['vf']
128 |
129 | # policy network
130 | self.policy_net = HADRLActor(action_space, psn, net_arch,
131 | servers_map_idx_id, policy_features_dim,
132 | use_heuristic, heu_kwargs)
133 | # value network
134 | self.value_net = HADRLCritic(value_features_dim, net_arch)
135 |
136 | def forward(self, features: th.Tensor, obs: th.Tensor) -> Tuple[th.Tensor, th.Tensor]:
137 | """
138 | :return: (th.Tensor, th.Tensor) latent_policy, latent_value of the specified network.
139 | If all layers are shared, then ``latent_policy == latent_value``
140 | """
141 | return self.policy_net(features, obs), self.value_net(features)
142 |
143 | def forward_actor(self, features: th.Tensor, obs: th.Tensor) -> th.Tensor:
144 | return self.policy_net(features, obs)
145 |
146 | def forward_critic(self, features: th.Tensor) -> th.Tensor:
147 | return self.value_net(features)
148 |
--------------------------------------------------------------------------------
/src/reader.py:
--------------------------------------------------------------------------------
1 | import os
2 | import random
3 | from typing import Tuple, List, Dict
4 |
5 | import networkx as nx
6 |
7 |
8 | def check_if_graphml(file: str):
9 | """ Checks if a file is a GraphML file (checking the extension)
10 |
11 | :param file: path to the file to be checked
12 | :raise ValueError: is case the file is not a GraphML file
13 | """
14 | if not file.endswith(".graphml"):
15 | raise ValueError("{} is not a GraphML file".format(file))
16 |
17 |
18 | def _check_graph(network: nx.Graph):
19 | """ Checks that the graph is correct
20 |
21 | :param network: network that needs to be checked
22 |
23 | :raise AssertionError: if some graph's attributes are not correct
24 | """
25 | if "E2ELatency" in network.graph.keys():
26 | assert network.graph['E2ELatency'] > 0
27 | # if E2ELatency is present, it means the network is a NSPR
28 | if "ArrivalTime" in network.graph.keys():
29 | assert network.graph['ArrivalTime'] >= 0
30 | else:
31 | network.graph['ArrivalTime'] = 0
32 | if "DepartureTime" in network.graph.keys():
33 | assert network.graph['DepartureTime'] >= \
34 | network.graph['ArrivalTime'] + len(network.nodes.keys())
35 |
36 |
37 | def _check_nodes(network: nx.Graph, required_node_attributes: Tuple[str, ...],
38 | **admissible_values: tuple):
39 | """ Checks that the nodes of the network are correct
40 |
41 | :param network: network whose nodes have to be checked
42 | :param required_node_attributes: tuple with all required attributes for the nodes
43 | :param admissible_values: (optional) extra arguments where the name is an
44 | attribute name and the value is a tuple with the admissible values
45 |
46 | :raise AssertionError:
47 | - in case some nodes don't contain all the required parameters
48 | - in case some non-admissible values are used for some arguments
49 | """
50 | for node_id, node in network.nodes.items():
51 | # if the admissible values for a certain attribute are passed,
52 | # check that the value of each attribute is admissible
53 | for attrib, value in node.items():
54 | assert value in admissible_values.get(attrib, (value,))
55 | if attrib in ("CPUcap", "RAMcap", "availCPU", "availRAM", "reqCPU", "reqRAM"):
56 | assert value >= 0
57 | # the following checks are for servers or VNFs only, in case skip
58 | if node.get("NodeType", "server") != "server":
59 | # if node hasn't attrib "NodeType", it's a VNF, so don't skip iteration
60 | continue
61 | if "reqCPU" in node.keys():
62 | # 'reqCPU' is a mandatory argument for NSPR, so if it's present, the node is a VNF
63 | # add an attribute to specify if a VNF has been placed onto the PSN
64 | node['placed'] = -1
65 | else:
66 | # it means the node belongs to a PSN and not to a NSPR
67 | node['availCPU'] = node['CPUcap']
68 | node['availRAM'] = node['RAMcap']
69 | # check that all required attributes are present in the current node
70 | assert all(req_attrib in node.keys() for req_attrib in required_node_attributes)
71 |
72 |
73 | def _check_edges(network: nx.Graph, required_link_attributes: Tuple[str, ...], **admissible_values: tuple):
74 | """ Checks that the edges of the network are correct
75 |
76 | :param network: network whose edges have to be checked
77 | :param required_link_attributes: tuple with all required attributes for the links
78 | :param admissible_values: (optional) extra arguments where the name is an
79 | attribute name and the value is a tuple with the admissible values
80 |
81 | :raise AssertionError:
82 | - in case some links don't contain all the required parameters
83 | - in case some non-admissible values are used for some arguments
84 | """
85 | for node_A, node_B in list(network.edges):
86 | cur_link_attribs = network.edges[node_A, node_B].keys()
87 | cur_link_values = network.edges[node_A, node_B].values()
88 | cur_link_attribs_values = zip(cur_link_attribs, cur_link_values)
89 | # check that all required attributes are present in the current link
90 | assert all(attrib in cur_link_attribs for attrib in required_link_attributes)
91 | # if the admissible values for a certain attribute are passed,
92 | # check that the value of each attribute is admissible
93 | for attrib, value in cur_link_attribs_values:
94 | assert value in admissible_values.get(attrib, (value,))
95 | if attrib in ("BWcap", "reqBW", "Latency", "reqLatency"):
96 | assert value >= 0
97 | # initialize resources availabilities if PSN
98 | if "reqBW" in cur_link_attribs:
99 | # 'reqBW' is a mandatory argument for NSPR, so if it's present, the link is a VL
100 | network.edges[node_A, node_B]['placed'] = []
101 | else:
102 | # it means the link is physical and belongs to a PSN (and not to a NSPR)
103 | network.edges[node_A, node_B]['availBW'] = network.edges[node_A, node_B]['BWcap']
104 |
105 |
106 | def check_required_attributes(network: nx.Graph, required_node_attributes: Tuple[str, ...],
107 | required_link_attributes: Tuple[str, ...], **admissible_values: tuple):
108 | """ Checks whether all the required attributes are present in the nodes and link of the network passed as argument
109 |
110 | :param network: network whose nodes and links have to be checked
111 | :param required_node_attributes: tuple with all required attributes for the nodes
112 | :param required_link_attributes: tuple with all required attributes for the links
113 | :param admissible_values: (optional) extra arguments where the name is an
114 | attribute name and the value is a tuple with the admissible values
115 |
116 | :raise AssertionError:
117 | - in case some nodes/links don't contain all the required parameters
118 | - in case some non-admissible values are used for some arguments
119 | """
120 | _check_graph(network)
121 | _check_nodes(network, required_node_attributes, **admissible_values)
122 | _check_edges(network, required_link_attributes, **admissible_values)
123 |
124 |
125 | def read_psn(graphml_file: str) -> nx.Graph:
126 | """ Reads a GraphML file containing the definition of a PSN
127 |
128 | :param graphml_file: GraphML file containing the definition of the PSN
129 | :return: a networkx.Graph representing the PSN
130 |
131 | :raise ValueError: if "graphml_file" is not a GraphML file
132 | :raise AssertionError: if some required attributes of nodes and links are missing
133 | """
134 | check_if_graphml(graphml_file) # check if the file passed is a GraphML file
135 |
136 | # read the GraphML file and create a nx.Graph object
137 | psn = nx.read_graphml(path=graphml_file, node_type=int)
138 |
139 | # check that the attributes of the graph are correct
140 | check_required_attributes(network=psn,
141 | required_node_attributes=("NodeType", "CPUcap", "RAMcap"),
142 | required_link_attributes=("BWcap",),
143 | NodeType=("UAP", "router", "switch", "server"))
144 | return psn
145 |
146 |
147 | def read_single_nspr(graphml_file: str) -> nx.Graph:
148 | """ Reads a single NSPR (network slice placement request)
149 |
150 | :param graphml_file: GraphML file with the definition of the NSPR
151 | :return: the NSPR as a networkx.Graph object
152 |
153 | :raise ValueError: if "graphml_file" is not a GraphML file
154 | :raise AssertionError: if some required attributes of nodes and links are missing
155 | """
156 | check_if_graphml(graphml_file) # check if the file passed is a GraphML file
157 |
158 | # read the GraphML file and create a nx.Graph object
159 | nspr = nx.read_graphml(path=graphml_file, node_type=int)
160 |
161 | # check that the attributes of the graph are correct
162 | check_required_attributes(network=nspr,
163 | required_node_attributes=("reqCPU", "reqRAM"),
164 | required_link_attributes=("reqBW",))
165 | return nspr
166 |
167 |
168 | def read_nsprs(nsprs_path: str) -> Dict[int, List[nx.Graph]]:
169 | """ Reads all the NSPRs (network slice placement requests) in a directory
170 |
171 | :param nsprs_path: either path to the directory with the files defining a
172 | NSPR each or the path to a single NSPR
173 | :return: a dict having as keys the arrival times of the NSPRs and as
174 | values the NSPRs themselves
175 | :raise ValueError: if nsprs_path is neither a directory nor a file
176 | """
177 | if not os.path.isdir(nsprs_path) and not os.path.isfile(nsprs_path):
178 | raise ValueError(f"{nsprs_path} is neither a directory nor a file")
179 |
180 | nspr_dict = {} # save the NSPRs in a dict with the arrival times as keys
181 | if os.path.isfile(nsprs_path):
182 | nspr = read_single_nspr(nsprs_path)
183 | if nspr.graph['ArrivalTime'] not in nspr_dict.keys():
184 | nspr_dict[nspr.graph['ArrivalTime']] = [nspr]
185 | else:
186 | nspr_dict[nspr.graph['ArrivalTime']].append(nspr)
187 | return nspr_dict
188 |
189 | dir_path = nsprs_path
190 | for graphml_file in os.listdir(dir_path):
191 | nspr = read_single_nspr(os.path.join(dir_path, graphml_file))
192 | nspr_dict[nspr.graph['ArrivalTime']] = nspr_dict.get(nspr.graph['ArrivalTime'], []) + [nspr]
193 | return nspr_dict
194 |
195 |
196 | def sample_nsprs(nsprs_path: str, n: int, min_arrival_time: int = 0,
197 | max_duration: int = 100) -> Dict[int, List[nx.Graph]]:
198 | """ Samples a subset of NSPRs from a directory containing multiple NSPRs.
199 | It assigns random arrival and departure time to those NSPRs.
200 |
201 | :param nsprs_path: path to the directory containing the NSPRs
202 | :param n: number of NSPRs to sample
203 | :param min_arrival_time: minimum arrival time to assign to the sampled NSPRs
204 | :param max_duration: maximum duration (dep. time - arr. time) to assign to the sampled NSPRs
205 | :return: a dict having as keys the arrival times of the NSPRs and as
206 | values the NSPRs themselves
207 | :raise ValueError: if nsprs_path is not a directory
208 | """
209 | if not os.path.isdir(nsprs_path):
210 | raise ValueError(f"{nsprs_path} is not a directory")
211 |
212 | all_nsprs_files = os.listdir(nsprs_path)
213 | n = min(n, len(all_nsprs_files)) if n is not None else len(all_nsprs_files)
214 | sampled_nsprs_files = random.sample(all_nsprs_files, n)
215 | arrival_times = random.sample(range(min_arrival_time, min_arrival_time + max_duration), n)
216 | nspr_dict = {}
217 | for i, arr_time in enumerate(arrival_times):
218 | nspr = read_single_nspr(os.path.join(nsprs_path, sampled_nsprs_files[i]))
219 | nspr.graph['ArrivalTime'] = arr_time
220 | nspr.graph['duration'] = random.randint(len(nspr.nodes), max_duration)
221 | nspr_dict[arr_time] = nspr_dict.get(arr_time, []) + [nspr]
222 | return nspr_dict
223 |
--------------------------------------------------------------------------------
/src/spaces/__init__.py:
--------------------------------------------------------------------------------
1 | from .discrete_with_negatives import DiscreteWithNegatives
2 |
3 | __all__ = [
4 | "DiscreteWithNegatives",
5 | ]
6 |
--------------------------------------------------------------------------------
/src/spaces/discrete_with_negatives.py:
--------------------------------------------------------------------------------
1 | """
2 | Implementation of a space consisting of finitely many elements.
3 |
4 | DISCLAIMER:
5 | This file is taken and slightly modified from the Discrete space of OpenAI gym release 0.25.1.
6 |
7 | stable-baselines3-1.5.0 requires gym==0.21, since they introduced breaking changes in 0.22.
8 | In this project, it is required to have a discrete space with the 'start' attribute, which
9 | was introduced only in later versions of gym, therefore a custom space
10 | (similar to later versions of the Discrete space in gym) is needed.
11 | """
12 |
13 | from typing import Optional, Union
14 |
15 | import numpy as np
16 |
17 | from gym.spaces.space import Space
18 | from gym.utils import seeding
19 |
20 |
21 | class DiscreteWithNegatives(Space):
22 | r"""A space consisting of finitely many elements.
23 |
24 | This class represents a finite subset of integers, more specifically a set of the form :math:`\{ a, a+1, \dots, a+n-1 \}`.
25 |
26 | Example::
27 |
28 | >>> DiscreteWithNegatives(2) # {0, 1}
29 | >>> DiscreteWithNegatives(3, start=-1) # {-1, 0, 1}
30 | """
31 |
32 | def __init__(
33 | self,
34 | n: int,
35 | seed: Optional[int] = None,
36 | start: int = 0,
37 | ):
38 | r"""Constructor of :class:`Discrete` space.
39 |
40 | This will construct the space :math:`\{\text{start}, ..., \text{start} + n - 1\}`.
41 |
42 | Args:
43 | n (int): The number of elements of this space.
44 | seed: Optionally, you can use this argument to seed the RNG that is used to sample from the ``Dict`` space.
45 | start (int): The smallest element of this space.
46 | """
47 | assert isinstance(n, (int, np.integer))
48 | assert n > 0, "n (counts) have to be positive"
49 | assert isinstance(start, (int, np.integer))
50 | self.n = int(n)
51 | self.start = int(start)
52 | super().__init__((), np.int64, seed)
53 |
54 | def sample(self, mask: Optional[np.ndarray] = None) -> int:
55 | """Generates a single random sample from this space.
56 |
57 | A sample will be chosen uniformly at random with the mask if provided
58 |
59 | Args:
60 | mask: An optional mask for if an action can be selected.
61 | Expected `np.ndarray` of shape `(n,)` and dtype `np.int8` where `1` represents valid actions and `0` invalid / infeasible actions.
62 | If there are no possible actions (i.e. `np.all(mask == 0)`) then `space.start` will be returned.
63 |
64 | Returns:
65 | A sampled integer from the space
66 | """
67 | if mask is not None:
68 | assert isinstance(
69 | mask, np.ndarray
70 | ), f"The expected type of the mask is np.ndarray, actual type: {type(mask)}"
71 | assert (
72 | mask.dtype == np.int8
73 | ), f"The expected dtype of the mask is np.int8, actual dtype: {mask.dtype}"
74 | assert mask.shape == (
75 | self.n,
76 | ), f"The expected shape of the mask is {(self.n,)}, actual shape: {mask.shape}"
77 | valid_action_mask = mask == 1
78 | assert np.all(
79 | np.logical_or(mask == 0, valid_action_mask)
80 | ), f"All values of a mask should be 0 or 1, actual values: {mask}"
81 | if np.any(valid_action_mask):
82 | return int(
83 | self.start + self.np_random.choice(np.where(valid_action_mask)[0])
84 | )
85 | else:
86 | return self.start
87 |
88 | return int(self.start + self.np_random.randint(self.n))
89 |
90 | def contains(self, x) -> bool:
91 | """Return boolean specifying if x is a valid member of this space."""
92 | if isinstance(x, int):
93 | as_int = x
94 | elif isinstance(x, (np.generic, np.ndarray)) and (
95 | x.dtype.char in np.typecodes["AllInteger"] and x.shape == ()
96 | ):
97 | as_int = int(x) # type: ignore
98 | else:
99 | return False
100 | return self.start <= as_int < self.start + self.n
101 |
102 | def __repr__(self) -> str:
103 | """Gives a string representation of this space."""
104 | if self.start != 0:
105 | return "DiscreteWithNegatives(%d, start=%d)" % (self.n, self.start)
106 | return "DiscreteWithNegatives(%d)" % self.n
107 |
108 | def __eq__(self, other) -> bool:
109 | """Check whether ``other`` is equivalent to this instance."""
110 | return (
111 | isinstance(other, DiscreteWithNegatives)
112 | and self.n == other.n
113 | and self.start == other.start
114 | )
115 |
116 | def __setstate__(self, state):
117 | """Used when loading a pickled space.
118 |
119 | This method has to be implemented explicitly to allow for loading of legacy states.
120 |
121 | Args:
122 | state: The new state
123 | """
124 | super().__setstate__(state)
125 |
126 | # Don't mutate the original state
127 | state = dict(state)
128 |
129 | # Allow for loading of legacy states.
130 | # See https://github.com/openai/gym/pull/2470
131 | if "start" not in state:
132 | state["start"] = 0
133 |
134 | # Update our state
135 | self.__dict__.update(state)
136 |
--------------------------------------------------------------------------------
/src/trainer.py:
--------------------------------------------------------------------------------
1 | import copy
2 | from typing import List, Optional, Type
3 |
4 | import gym
5 | import wandb
6 | from stable_baselines3 import A2C
7 | from stable_baselines3.common.callbacks import BaseCallback, EvalCallback
8 | from stable_baselines3.common.env_util import make_vec_env
9 | from torch import nn
10 | from wandb.integration.sb3 import WandbCallback
11 |
12 | import reader
13 | from callbacks.acceptance_ratio_callbacks import AcceptanceRatioByNSPRsCallback
14 | from callbacks.hparam_callback import HParamCallback
15 | from callbacks.psn_load_callback import PSNLoadCallback
16 | from callbacks.seen_nsprs_callback import SeenNSPRsCallback
17 | from policies.features_extractors.hadrl_features_extractor import \
18 | GCNsFeaturesExtractor
19 | from utils import make_env
20 |
21 |
22 | class Trainer:
23 | def __init__(
24 | self,
25 | psn_path: str,
26 | n_tr_envs: int,
27 | load_perc: float,
28 | time_limit: bool,
29 | max_ep_steps: int,
30 | tensorboard_log: str,
31 | create_eval_env: bool = False,
32 | reset_load_class: Optional[gym.Wrapper] = None,
33 | reset_load_kwargs: dict = dict(cpu_load=0.8),
34 | # reset_load_kwargs: dict = dict(rand_load=True, rand_range=(0., 1.)),
35 | placement_state: bool = True,
36 | accumulate_rew: bool = True,
37 | discount_acc_rew: bool = True,
38 | dynamic_connectivity: int = False,
39 | dynamic_connectivity_kwargs: dict = dict(link_bw=10_000),
40 | generate_nsprs: bool = True,
41 | nsprs_per_ep: int = 1,
42 | vnfs_per_nspr: int = 5,
43 | always_one: bool = True,
44 | seed: Optional[int] = None,
45 | net_arch: dict = dict(pi=[256, 128], vf=[256, 128, 32]),
46 | activation_fn: Type[nn.Module] = nn.Tanh,
47 | gcn_layers_dims: tuple = (20, 20, 20),
48 | device: str = 'cuda:0',
49 | lr: float = 0.0002,
50 | n_steps: int = 1,
51 | gamma: float = 0.99,
52 | ent_coef: float = 0.01,
53 | gae_lambda: float = 0.92,
54 | # eval_load: Optional[float] = None,
55 | ):
56 | # checks on argumetns
57 | assert n_tr_envs > 0
58 | assert 0. <= load_perc < 1., "Training load must be a percentage between 0 and 1"
59 |
60 | # save some attributes
61 | self.nsprs_per_ep = nsprs_per_ep
62 | self.max_ep_steps = max_ep_steps
63 | self.time_limit = time_limit
64 | self.placement_state = placement_state
65 |
66 | # read PSN file
67 | psn = reader.read_psn(psn_path)
68 |
69 | # create trainin environment
70 | self.tr_env = make_vec_env(
71 | env_id=make_env,
72 | n_envs=n_tr_envs,
73 | env_kwargs=dict(
74 | psn_path=psn_path,
75 | base_env_kwargs=dict(
76 | accumulate_reward=accumulate_rew,
77 | discount_acc_rew=discount_acc_rew,
78 | ),
79 | time_limit=time_limit,
80 | time_limit_kwargs=dict(max_episode_steps=max_ep_steps),
81 | generate_nsprs=generate_nsprs,
82 | nsprs_gen_kwargs=dict(
83 | nsprs_per_ep=nsprs_per_ep,
84 | vnfs_per_nspr=vnfs_per_nspr,
85 | load=load_perc,
86 | always_one=always_one
87 | ),
88 | reset_load_class=reset_load_class,
89 | reset_load_kwargs=reset_load_kwargs,
90 | placement_state=placement_state,
91 | dynamic_connectivity=dynamic_connectivity,
92 | dynamic_connectivity_kwargs=dynamic_connectivity_kwargs
93 | ),
94 | seed=seed,
95 | )
96 |
97 | # create evaluation environment
98 | if create_eval_env:
99 | self.eval_env = copy.deepcopy(self.tr_env)
100 |
101 | # create the model
102 | self.model = A2C(policy='MultiInputPolicy', env=self.tr_env, verbose=2, device=device,
103 | learning_rate=lr,
104 | n_steps=n_steps,
105 | gamma=gamma,
106 | ent_coef=ent_coef,
107 | gae_lambda=gae_lambda,
108 | seed=seed,
109 | use_rms_prop=True,
110 | tensorboard_log=tensorboard_log,
111 | policy_kwargs=dict(
112 | activation_fn=activation_fn,
113 | net_arch=net_arch,
114 | features_extractor_class=GCNsFeaturesExtractor,
115 | share_features_extractor=False,
116 | features_extractor_kwargs=dict(
117 | psn=psn,
118 | activation_fn=nn.ReLU,
119 | gcn_layers_dims=gcn_layers_dims,
120 | )
121 | ))
122 | print(self.model.policy)
123 |
124 | # wandb config
125 | if reset_load_kwargs.get('rand_load', False):
126 | load_range = reset_load_kwargs.get('rand_range', (0., 1.))
127 | self.tr_load = 'random ' + str(load_range)
128 | else:
129 | self.tr_load = reset_load_kwargs.get('cpu_load', 0.8)
130 | # eval_load = eval_load if eval_load is not None else self.tr_load
131 | self.wandb_config = {
132 | "n tr envs": n_tr_envs,
133 | "NSPRs per training ep": nsprs_per_ep,
134 | "max steps per tr ep": max_ep_steps if time_limit else None,
135 | "PSN load (tr)": self.tr_load,
136 | # "PSN load (eval)": eval_load,
137 | "GCNs layers dims": gcn_layers_dims,
138 | "mpl_extractor arch": net_arch,
139 | "use placement state": placement_state,
140 | "accumulate reward": accumulate_rew,
141 | "discount acceptance reward": discount_acc_rew,
142 | "dynamic connectivity": dynamic_connectivity,
143 | "dynamic load range": "0-0.9",
144 | }
145 |
146 | def train(
147 | self,
148 | tot_steps: int,
149 | log_interval: int = 10,
150 | wandb: bool = False,
151 | callbacks: List[BaseCallback] = [],
152 | ):
153 | # wandb things
154 | self.wandb_config["total training steps"] = tot_steps
155 | if wandb:
156 | # init wandb run
157 | wandb_run = wandb.init(
158 | project="Same or different activations",
159 | dir="../",
160 | name="SAME (ReLU) (non-shared f.e.) (wax50, load 0.8, small GCNs)",
161 | config=self.wandb_config,
162 | sync_tensorboard=True, # auto-upload sb3's tensorboard metrics
163 | save_code=True, # optional
164 | )
165 | # add wandb callback
166 | callbacks.append(
167 | WandbCallback(
168 | model_save_path=f"../models/{wandb_run.id}",
169 | verbose=2,
170 | model_save_freq=10_000
171 | )
172 | )
173 |
174 | # add callback for hyperparameters logging
175 | callbacks.append(
176 | HParamCallback(
177 | self.tr_env.num_envs,
178 | self.eval_env.num_envs,
179 | self.nsprs_per_ep,
180 | self.tr_load,
181 | tr_max_ep_steps=self.max_ep_steps if self.time_limit else None,
182 | use_placement_state=self.placement_state,
183 | ),
184 | )
185 |
186 | # model training
187 | self.model.learn(
188 | total_timesteps=tot_steps,
189 | log_interval=log_interval,
190 | callback=callbacks
191 | )
192 |
193 | if wandb:
194 | wandb_run.finish()
195 |
--------------------------------------------------------------------------------
/src/utils.py:
--------------------------------------------------------------------------------
1 | from typing import Tuple, Union, List, Optional, Type
2 |
3 | import gym
4 | import networkx as nx
5 | import numpy as np
6 |
7 | from gym.utils.env_checker import check_env
8 | from network_simulator import NetworkSimulator
9 | from wrappers import NSPRsGeneratorHADRL, RemovePlacementState, DynamicConnectivity
10 | from sb3_contrib.common.wrappers import ActionMasker
11 |
12 |
13 | def make_env(
14 | psn_path: str,
15 | base_env_kwargs: Optional[dict] = None,
16 | time_limit: bool = False,
17 | time_limit_kwargs: Optional[dict] = None,
18 | reset_load_class: Type[gym.Wrapper] = None,
19 | reset_load_kwargs: Optional[dict] = None,
20 | generate_nsprs: bool = False,
21 | nsprs_gen_kwargs: Optional[dict] = None,
22 | placement_state: bool = True,
23 | dynamic_connectivity: bool = False,
24 | dynamic_connectivity_kwargs: Optional[dict] = dict(link_bw=10_000),
25 | dynamic_topology: bool = False,
26 | ):
27 | """ Create the environment.
28 | It can be wrapped with different wrappers, all with their own arguments.
29 | They wrappers are namely: TimeLimit, ResetWithRandLoad, NSPRsGeneratorHADRL.
30 |
31 | :param psn_path: path to the PSN file
32 | :param base_env_kwargs: kwargs of the base environment
33 | :param time_limit: if True, the env is wrapped with TimeLimit wrapper
34 | :param time_limit_kwargs: kwargs of the TimeLimit wrapper
35 | :param reset_load_class: class of the wrapper to reset the PSN with load
36 | :param reset_load_kwargs: kwargs for the reset-with-load wrapper
37 | :param hadrl_nsprs: if True, the env is wrapped with NSPRsGeneratorHADRL wrapper
38 | :param hadrl_nsprs_kwargs: kwargs for the NSPRsGeneratorHADRL wrapper
39 | :param placement_state: if False, adds a wrapper that removes the placement state from the observations
40 | :param dynamic_connectivity: if True, the connectivity of the PSN changes in every episode
41 | :param dynamic_connectivity_kwargs: kwargs for the DynamicConnectivity wrapper
42 | :param dynamic_topology: if True, the topology of the PSN changes in every episode.
43 | Note: it True, 'dynamic_connectivity' will be forced to True as well,
44 | as there's no way to change the nodes and not the connectivity.
45 | """
46 | base_env_kwargs = {} if base_env_kwargs is None else base_env_kwargs
47 | time_limit_kwargs = {} if time_limit_kwargs is None else time_limit_kwargs
48 | reset_load_kwargs = {} if reset_load_kwargs is None else reset_load_kwargs
49 | dynamic_connectivity_kwargs = {} if dynamic_connectivity_kwargs is None else dynamic_connectivity_kwargs
50 |
51 | # base env
52 | env = NetworkSimulator(psn_path, **base_env_kwargs)
53 |
54 | # apply wrappers
55 | if time_limit:
56 | env = gym.wrappers.TimeLimit(env, **time_limit_kwargs)
57 | if generate_nsprs:
58 | env = NSPRsGeneratorHADRL(env, **nsprs_gen_kwargs)
59 | if dynamic_topology:
60 | env = ActionMasker(env, action_mask_fn=env.get_action_mask)
61 | env = DynamicConnectivity(env, nodes_mask=env.get_action_mask, **dynamic_connectivity_kwargs)
62 | dynamic_connectivity = False
63 | if dynamic_connectivity:
64 | env = DynamicConnectivity(env, **dynamic_connectivity_kwargs)
65 | if reset_load_class is not None:
66 | env = reset_load_class(env, **reset_load_kwargs)
67 | if not placement_state:
68 | env = RemovePlacementState(env)
69 | # check_env(env) # could make the code crash with masked actions
70 | return env
71 |
72 |
73 | def create_HADRL_PSN_file(
74 | path: str,
75 | n_CCPs: int = 1,
76 | n_CDCs: int = 5,
77 | n_EDCs: int = 15,
78 | n_servers_per_DC: Tuple[int, int, int] = (16, 10, 4),
79 | cpu_cap: int = 50,
80 | ram_cap: int = 300,
81 | intra_CCP_bw_cap: int = 100000, # 100000 Mbps = 100 Gbps
82 | intra_CDC_bw_cap: int = 100000, # 100000 Mbps = 100 Gbps
83 | intra_EDC_bw_cap: int = 10000, # 10000 Mbps = 10 Gbps
84 | outer_DC_bw_cap: int = 100000, # 100000 Mbps = 100 Gbps
85 | n_EDCs_per_CDC: int = 3,
86 | ):
87 | """ Initialize the PSN as in the HA-DRL paper
88 |
89 | :param path: path where to save the file defining the PSN
90 | :param n_CCPs: number of CCPs
91 | :param n_CDCs: number of CDCs
92 | :param n_EDCs: number of EDCs
93 | :param n_servers_per_DC: tuple with the number of servers per (CCP, CDC, EDC)
94 | :param cpu_cap: CPU capacity per server
95 | :param ram_cap: RAM capacity per server
96 | :param intra_CCP_bw_cap: bandwidth of links within a CCP
97 | :param intra_CDC_bw_cap: bandwidth of links within a CDC
98 | :param intra_EDC_bw_cap: bandwidth of links within a EDC
99 | :param outer_DC_bw_cap: bandwidth of links between DCs
100 | :param n_EDCs_per_CDC: number of EDCs connected to each CDC
101 | """
102 | # number of servers per DC category
103 | n_servers_per_CCP, n_servers_per_CDC, n_servers_per_EDC = n_servers_per_DC
104 | n_ids_CCPs = n_CCPs * n_servers_per_CCP
105 | n_ids_CDCs = n_CDCs * n_servers_per_CDC
106 | n_ids_EDCs = n_EDCs * n_servers_per_EDC
107 |
108 | # ids of servers in various DCs
109 | CCP_ids = np.arange(n_ids_CCPs).reshape(n_CCPs, n_servers_per_CCP)
110 | CDC_ids = np.arange(
111 | n_ids_CCPs,
112 | n_ids_CCPs + n_ids_CDCs).reshape(n_CDCs, n_servers_per_CDC)
113 | EDC_ids = np.arange(
114 | CDC_ids[-1, -1] + 1,
115 | CDC_ids[-1, -1] + 1 + n_ids_EDCs).reshape(n_EDCs, n_servers_per_EDC)
116 |
117 | # one switch per DC (based on Fig. 1 in HA-DRL paper)
118 | n_switches = n_CCPs + n_CDCs + n_EDCs
119 | switches_ids = list(range(EDC_ids[-1, -1] + 1,
120 | EDC_ids[-1, -1] + 1 + n_switches))
121 |
122 | # one router per DC (based on Fig. 1 in HA-DRL paper)
123 | n_routers = n_CCPs + n_CDCs + n_EDCs
124 | routers_ids = list(range(switches_ids[-1] + 1, switches_ids[-1] + 1 + n_routers))
125 |
126 | # create graph
127 | g = nx.Graph(Label="HA-DRL PSN")
128 |
129 | # add nodes
130 | _create_nodes(g, CCP_ids, CDC_ids, EDC_ids, switches_ids, routers_ids,
131 | cpu_cap, ram_cap)
132 |
133 | # add links
134 | _create_HADRL_links(
135 | g, n_CCPs, n_CDCs, n_EDCs, n_servers_per_CCP, n_servers_per_CDC,
136 | n_servers_per_EDC, CCP_ids, CDC_ids, EDC_ids, switches_ids, routers_ids,
137 | intra_CCP_bw_cap, intra_CDC_bw_cap, intra_EDC_bw_cap, outer_DC_bw_cap,
138 | n_EDCs_per_CDC)
139 |
140 | # save graph
141 | nx.write_graphml(g, path)
142 |
143 |
144 | def create_HEENSO_PSN_file(
145 | path: str,
146 | n_CCPs: int = 1,
147 | n_CDCs: int = 5,
148 | n_EDCs: int = 15,
149 | n_servers_per_DC: Tuple[int, int, int] = (16, 10, 4),
150 | cpu_cap: int = 50,
151 | ram_cap: int = 300,
152 | intra_CCP_bw_cap: int = 100000, # 100000 Mbps = 100 Gbps
153 | intra_CDC_bw_cap: int = 100000, # 100000 Mbps = 100 Gbps
154 | intra_EDC_bw_cap: int = 10000, # 10000 Mbps = 10 Gbps
155 | outer_DC_bw_cap: int = 100000, # 100000 Mbps = 100 Gbps
156 | n_EDCs_per_CDC: int = 3,
157 | ):
158 | """ Initialize the PSN as in the paper "Heuristic for Edge-enable Network Slice Optimization
159 | using the Power of Two Choices"
160 |
161 | Disclaimer: the topology is slightly different, the ring of nodes in Fig. 4
162 | of the paper is brought one step closer to the CCP and nodes 26 to 30 are
163 | removed, since they don't increase the number of possible paths across the PSN
164 | (they would only make some paths 1 step longer, reducing the reward).
165 |
166 | :param path: path where to save the file defining the PSN
167 | :param n_CCPs: number of CCPs
168 | :param n_CDCs: number of CDCs
169 | :param n_EDCs: number of EDCs
170 | :param n_servers_per_DC: tuple with the number of servers per (CCP, CDC, EDC)
171 | :param cpu_cap: CPU capacity per server
172 | :param ram_cap: RAM capacity per server
173 | :param intra_CCP_bw_cap: bandwidth of links within a CCP
174 | :param intra_CDC_bw_cap: bandwidth of links within a CDC
175 | :param intra_EDC_bw_cap: bandwidth of links within a EDC
176 | :param outer_DC_bw_cap: bandwidth of links between DCs
177 | :param n_EDCs_per_CDC: number of EDCs connected to each CDC
178 | """
179 | # number of servers per DC category
180 | n_servers_per_CCP, n_servers_per_CDC, n_servers_per_EDC = n_servers_per_DC
181 | n_ids_CCPs = n_CCPs * n_servers_per_CCP
182 | n_ids_CDCs = n_CDCs * n_servers_per_CDC
183 | n_ids_EDCs = n_EDCs * n_servers_per_EDC
184 |
185 | # ids of servers in various DCs
186 | CCP_ids = np.arange(n_ids_CCPs).reshape(n_CCPs, n_servers_per_CCP)
187 | CDC_ids = np.arange(
188 | n_ids_CCPs,
189 | n_ids_CCPs + n_ids_CDCs).reshape(n_CDCs, n_servers_per_CDC)
190 | EDC_ids = np.arange(
191 | CDC_ids[-1, -1] + 1,
192 | CDC_ids[-1, -1] + 1 + n_ids_EDCs).reshape(n_EDCs, n_servers_per_EDC)
193 |
194 | # one switch per DC (based on Fig. 4 in HEENSO paper)
195 | n_switches = n_CCPs + n_CDCs + n_EDCs
196 | switches_ids = list(range(EDC_ids[-1, -1] + 1,
197 | EDC_ids[-1, -1] + 1 + n_switches))
198 |
199 | # one router per DC (based on Fig. 4 in HEENSO paper)
200 | n_routers = n_CDCs + n_EDCs
201 | routers_ids = list(
202 | range(switches_ids[-1] + 1, switches_ids[-1] + 1 + n_routers))
203 |
204 | # create graph
205 | g = nx.Graph(Label="HEENSO PSN")
206 |
207 | # add nodes
208 | _create_nodes(g, CCP_ids, CDC_ids, EDC_ids, switches_ids, routers_ids,
209 | cpu_cap, ram_cap)
210 |
211 | # add links
212 | _create_HEENSO_links(
213 | g, n_CCPs, n_CDCs, n_EDCs, n_servers_per_CCP, n_servers_per_CDC,
214 | n_servers_per_EDC, CCP_ids, CDC_ids, EDC_ids, switches_ids, routers_ids,
215 | intra_CCP_bw_cap, intra_CDC_bw_cap, intra_EDC_bw_cap, outer_DC_bw_cap,
216 | n_EDCs_per_CDC)
217 |
218 | # save graph
219 | nx.write_graphml(g, path)
220 |
221 |
222 | def _create_nodes(
223 | g: nx.Graph,
224 | CCP_ids: Union[np.ndarray, List[int]],
225 | CDC_ids: Union[np.ndarray, List[int]],
226 | EDC_ids: Union[np.ndarray, List[int]],
227 | switches_ids: Union[np.ndarray, List[int]],
228 | routers_ids: Union[np.ndarray, List[int]],
229 | cpu_cap: int,
230 | ram_cap: int,
231 | ):
232 | all_server_ids = np.concatenate((CCP_ids.flatten(),
233 | CDC_ids.flatten(),
234 | EDC_ids.flatten()))
235 | for server_id in all_server_ids:
236 | g.add_node(server_id, NodeType="server", CPUcap=cpu_cap, RAMcap=ram_cap)
237 | for switch_id in switches_ids:
238 | g.add_node(switch_id, NodeType="switch")
239 | for router_id in routers_ids:
240 | g.add_node(router_id, NodeType="router")
241 |
242 |
243 | def _create_HADRL_links(
244 | g: nx.Graph,
245 | n_CCPs: int,
246 | n_CDCs: int,
247 | n_EDCs: int,
248 | n_servers_per_CCP: int,
249 | n_servers_per_CDC: int,
250 | n_servers_per_EDC: int,
251 | CCP_ids: Union[np.ndarray, List[int]],
252 | CDC_ids: Union[np.ndarray, List[int]],
253 | EDC_ids: Union[np.ndarray, List[int]],
254 | switches_ids: Union[np.ndarray, List[int]],
255 | routers_ids: Union[np.ndarray, List[int]],
256 | intra_CCP_bw_cap: int,
257 | intra_CDC_bw_cap: int,
258 | intra_EDC_bw_cap: int,
259 | outer_DC_bw_cap: int,
260 | n_EDCs_per_CDC: int
261 | ):
262 | connect_CDCs_EDCs_randomly = False if n_EDCs / n_CDCs == n_EDCs_per_CDC else True
263 | CCPs_switches = switches_ids[:n_CCPs]
264 | CDCs_switches = switches_ids[n_CCPs:n_CCPs + n_CDCs]
265 | EDCs_switches = switches_ids[n_CCPs + n_CDCs:]
266 | CCPs_routers = routers_ids[:n_CCPs]
267 | CDCs_routers = routers_ids[n_CCPs:n_CCPs + n_CDCs]
268 | EDCs_routers = routers_ids[n_CCPs + n_CDCs:]
269 |
270 | # connect CCPs' servers to their switches
271 | for i in range(n_CCPs):
272 | for j in range(n_servers_per_CCP):
273 | g.add_edge(CCP_ids[i, j], CCPs_switches[i], BWcap=intra_CCP_bw_cap)
274 |
275 | # connect CDCs' servers to their switches
276 | for i in range(n_CDCs):
277 | for j in range(n_servers_per_CDC):
278 | g.add_edge(CDC_ids[i, j], CDCs_switches[i], BWcap=intra_CDC_bw_cap)
279 |
280 | # connect EDCs' servers to their switches
281 | for i in range(n_EDCs):
282 | for j in range(n_servers_per_EDC):
283 | g.add_edge(EDC_ids[i, j], EDCs_switches[i], BWcap=intra_EDC_bw_cap)
284 |
285 | # connect CCPs' switches to their routers
286 | for i in range(len(CCPs_switches)):
287 | g.add_edge(CCPs_switches[i], CCPs_routers[i], BWcap=intra_CCP_bw_cap)
288 |
289 | # connect CDCs' switches to their routers
290 | for i in range(len(CDCs_switches)):
291 | g.add_edge(CDCs_switches[i], CDCs_routers[i], BWcap=intra_CDC_bw_cap)
292 |
293 | # connect EDCs' switches to their routers
294 | for i in range(len(EDCs_switches)):
295 | g.add_edge(EDCs_switches[i], EDCs_routers[i], BWcap=intra_EDC_bw_cap)
296 |
297 | # connect CDCs' routers to CCPs' routers
298 | for i in range(n_CDCs):
299 | # each CDC is connected to one CCP
300 | corresp_CCP = np.random.randint(0, n_CCPs)
301 | g.add_edge(CDCs_routers[i], CCPs_routers[corresp_CCP], BWcap=outer_DC_bw_cap)
302 |
303 | # connect each CDCs' router to n EDCs' routers
304 | for i in range(n_CDCs):
305 | if connect_CDCs_EDCs_randomly:
306 | corresp_EDCs = np.random.choice(n_EDCs, n_EDCs_per_CDC, replace=False)
307 | else:
308 | corresp_EDCs = list(range(n_EDCs_per_CDC * i, n_EDCs * i + n_EDCs_per_CDC))
309 | for j in range(n_EDCs_per_CDC):
310 | g.add_edge(CDCs_routers[i], EDCs_routers[corresp_EDCs[j]],
311 | BWcap=outer_DC_bw_cap)
312 |
313 | # connect CDCs and EDCs' routers in a circular way (like in Fig. 1 in HA-DRL paper)
314 | CDCs_and_EDCs_routers = np.concatenate((CDCs_routers, EDCs_routers))
315 | for i in range(len(CDCs_and_EDCs_routers)):
316 | g.add_edge(CDCs_and_EDCs_routers[i],
317 | CDCs_and_EDCs_routers[(i + 1) % len(CDCs_and_EDCs_routers)],
318 | BWcap=outer_DC_bw_cap)
319 |
320 |
321 | def _create_HEENSO_links(
322 | g: nx.Graph,
323 | n_CCPs: int,
324 | n_CDCs: int,
325 | n_EDCs: int,
326 | n_servers_per_CCP: int,
327 | n_servers_per_CDC: int,
328 | n_servers_per_EDC: int,
329 | CCP_ids: Union[np.ndarray, List[int]],
330 | CDC_ids: Union[np.ndarray, List[int]],
331 | EDC_ids: Union[np.ndarray, List[int]],
332 | switches_ids: Union[np.ndarray, List[int]],
333 | routers_ids: Union[np.ndarray, List[int]],
334 | intra_CCP_bw_cap: int,
335 | intra_CDC_bw_cap: int,
336 | intra_EDC_bw_cap: int,
337 | outer_DC_bw_cap: int,
338 | n_EDCs_per_CDC: int
339 | ):
340 | connect_CDCs_EDCs_randomly = False if n_EDCs / n_CDCs == n_EDCs_per_CDC else True
341 | CCPs_switches = switches_ids[:n_CCPs]
342 | CDCs_switches = switches_ids[n_CCPs:n_CCPs + n_CDCs]
343 | EDCs_switches = switches_ids[n_CCPs + n_CDCs:]
344 | CDCs_routers = routers_ids[:n_CDCs]
345 | EDCs_routers = routers_ids[n_CDCs:]
346 |
347 | # connect CCPs' servers to their switches
348 | for i in range(n_CCPs):
349 | for j in range(n_servers_per_CCP):
350 | g.add_edge(CCP_ids[i, j], CCPs_switches[i], BWcap=intra_CCP_bw_cap)
351 |
352 | # connect CDCs' servers to their switches
353 | for i in range(n_CDCs):
354 | for j in range(n_servers_per_CDC):
355 | g.add_edge(CDC_ids[i, j], CDCs_switches[i], BWcap=intra_CDC_bw_cap)
356 |
357 | # connect EDCs' servers to their switches
358 | for i in range(n_EDCs):
359 | for j in range(n_servers_per_EDC):
360 | g.add_edge(EDC_ids[i, j], EDCs_switches[i], BWcap=intra_EDC_bw_cap)
361 |
362 | # connect CDCs' switches to their routers
363 | for i in range(len(CDCs_switches)):
364 | g.add_edge(CDCs_switches[i], CDCs_routers[i], BWcap=intra_CDC_bw_cap)
365 |
366 | # connect EDCs' switches to their routers
367 | for i in range(len(EDCs_switches)):
368 | g.add_edge(EDCs_switches[i], EDCs_routers[i], BWcap=intra_EDC_bw_cap)
369 |
370 | # connect CDCs' routers to CCPs' switches
371 | for i in range(n_CDCs):
372 | # each CDC is connected to one CCP
373 | corresp_CCP = np.random.randint(0, n_CCPs)
374 | g.add_edge(CDCs_routers[i], CCPs_switches[corresp_CCP], BWcap=outer_DC_bw_cap)
375 |
376 | # connect each CDCs' switch to n EDCs' routers
377 | for i in range(n_CDCs):
378 | if connect_CDCs_EDCs_randomly:
379 | corresp_EDCs = np.random.choice(n_EDCs, n_EDCs_per_CDC, replace=False)
380 | else:
381 | corresp_EDCs = list(range(n_EDCs_per_CDC * i, n_EDCs_per_CDC * i + n_EDCs_per_CDC))
382 | for j in range(n_EDCs_per_CDC):
383 | g.add_edge(CDCs_switches[i], EDCs_routers[corresp_EDCs[j]],
384 | BWcap=outer_DC_bw_cap)
385 |
386 | # connect CDCs routers in a circular way (like in Fig. 4 in HEENSO paper)
387 | for i in range(len(CDCs_routers)):
388 | g.add_edge(CDCs_routers[i],
389 | CDCs_routers[(i + 1) % len(CDCs_routers)],
390 | BWcap=outer_DC_bw_cap)
391 |
--------------------------------------------------------------------------------
/src/wrappers/__init__.py:
--------------------------------------------------------------------------------
1 | from .reset_with_load import ResetWithFixedLoad, ResetWithRandLoad, ResetWithLoadMixed, ResetWithRealisticLoad
2 | from .hadrl_nsprs_generator import NSPRsGeneratorHADRL
3 | from .no_placement_state import RemovePlacementState
4 | from .dynamic_connectivity import DynamicConnectivity
5 |
--------------------------------------------------------------------------------
/src/wrappers/dynamic_connectivity.py:
--------------------------------------------------------------------------------
1 | import random
2 | from typing import Callable, Optional
3 | import gym
4 | import networkx as nx
5 | import numpy as np
6 |
7 |
8 | class DynamicConnectivity(gym.Wrapper):
9 | """ Changes the connectivity of the PSN episode by episode """
10 |
11 | def __init__(
12 | self,
13 | env: gym.Env,
14 | link_bw: int = 10_000,
15 | nodes_mask: Optional[Callable[[gym.Env], np.ndarray]] = None
16 | ):
17 | """
18 | :param env: gym environment
19 | :param link_bw: total bandwidth capacity of each link
20 | :param nodes_mask: in not None, contains nodes to be removed form the PSN graph
21 | """
22 | super().__init__(env)
23 | self.nodes_mask = nodes_mask
24 | self.link_bw = link_bw
25 | self.tot_bw_cap = sum([edge['BWcap'] for edge in self.env.psn.edges.values()])
26 | self.placed_bw = 0
27 |
28 | def reset(self, **kwargs):
29 | self.env.reset(**kwargs)
30 | # remove all edges from the PSN
31 | self.remove_all_edges()
32 | # eventually remove masked nodes
33 | if self.nodes_mask is not None:
34 | self.remove_masked_nodes()
35 | # initialize the bandwidth placed in the PSN
36 | self.placed_bw = 0
37 | # add edges in the PSN until the target bandwidth capacity is reached
38 | self.add_edges()
39 | return self.env.obs_dict # updated in self.add_edges()
40 |
41 | def remove_all_edges(self):
42 | for u, v in self.env.psn.edges.keys():
43 | self.env.psn.remove_edge(u, v)
44 |
45 | def remove_masked_nodes(self):
46 | nodes_mask = self.nodes_mask(self.env)
47 | # indexes where the mask is False
48 | indexes_to_remove = np.where(np.logical_not(nodes_mask))[0]
49 | for idx in indexes_to_remove:
50 | node_id = self.env.servers_map_idx_id[idx]
51 | self.env.psn.remove_node(node_id)
52 |
53 | def add_edges(self):
54 | """Add edges to the PSN
55 |
56 | Chooses every time a random node an an unvisited node and connectes them.
57 | When no nodes are isolated, if the target BW hasn't been reached, it does so
58 | by adding further random links in the PSN.
59 | """
60 | # zero the BW availabilities in the obs dict
61 | self.env.obs_dict['bw_avails'] = np.zeros_like(self.env.obs_dict['bw_avails'])
62 | # set of unvisited nodes
63 | unvisited = set(self.env.psn.nodes)
64 | while unvisited:
65 | # sample a node form the PSN
66 | u = random.choice(list(self.env.psn.nodes))
67 | # sample an unvisited nodes to connect to it
68 | v = random.choice(list(unvisited))
69 | if u != v:
70 | # connect the 2 nodes
71 | self.env.psn.add_edge(u, v, BWcap=self.link_bw, availBW=self.link_bw)
72 | # save the amount of bandwidth introduced in the PSN
73 | self.placed_bw += self.link_bw
74 | # get the 2 nodes' indexes in the obs dict and update the obs dict
75 | u_idx = self.env.map_id_idx[u]
76 | v_idx = self.env.map_id_idx[v]
77 | self.env.obs_dict['bw_avails'][u_idx] += self.link_bw
78 | self.env.obs_dict['bw_avails'][v_idx] += self.link_bw
79 | # remove the nodes from the set of unvisited nodes
80 | unvisited.remove(v)
81 | if u in unvisited:
82 | unvisited.remove(u)
83 |
84 | # if the total bandwidth of the PSN hasn't been reached, reach it by adding random links
85 | perc_avail_nodes = self.env.perc_avail_nodes
86 | tot_bw = self.tot_bw_cap * perc_avail_nodes # cut tot bw proportionally to number of nodes
87 | while self.placed_bw < tot_bw:
88 | u, v = random.sample(self.env.psn.nodes, 2)
89 | # check that the 2 nodes aren't connected already
90 | if (u, v) not in self.env.psn.edges:
91 | bw = min(self.link_bw, tot_bw - self.placed_bw)
92 | self.env.psn.add_edge(u, v, BWcap=bw, availBW=bw)
93 | self.placed_bw += bw
94 | # get the 2 nodes' indexes in the obs dict and update the obs dict
95 | u_idx = self.env.map_id_idx[u]
96 | v_idx = self.env.map_id_idx[v]
97 | self.env.obs_dict['bw_avails'][u_idx] += self.link_bw
98 | self.env.obs_dict['bw_avails'][v_idx] += self.link_bw
99 |
100 | # normalize the BW availabilities in the obs dict
101 | self.env.obs_dict['bw_avails'] /= np.max(self.env.obs_dict['bw_avails'])
--------------------------------------------------------------------------------
/src/wrappers/hadrl_nsprs_generator.py:
--------------------------------------------------------------------------------
1 | import copy
2 | import math
3 | import warnings
4 |
5 | import gym
6 | import networkx as nx
7 | import numpy as np
8 |
9 |
10 | class NSPRsGeneratorHADRL(gym.Wrapper):
11 | """
12 | Wrapper to make the simulator generate data the same way as in the
13 | paper HA-DRL[1].
14 |
15 | [1] https://ieeexplore.ieee.org/document/9632824
16 | """
17 |
18 | def __init__(
19 | self,
20 | env: gym.Env,
21 | nsprs_per_ep: int = 5,
22 | vnfs_per_nspr: int = 5,
23 | cpu_req_per_vnf: int = 25,
24 | ram_req_per_vnf: int = 150,
25 | bw_req_per_vl: int = 2000,
26 | load: float = 0.5,
27 | always_one: bool = False
28 | ):
29 | super().__init__(env)
30 | if self.env.nsprs_per_episode is not None:
31 | warnings.warn("The environment already has a fixed number of NSPRs"
32 | "per episode. The wrapper will override this value.")
33 | if nsprs_per_ep is None:
34 | # no limit, just use max steps (if not None), otherwise infinite episode
35 | nsprs_per_ep = math.inf
36 | self.unwrapped.nsprs_per_episode = nsprs_per_ep
37 | self.nsprs_per_ep = nsprs_per_ep
38 | self.vnfs_per_nspr = vnfs_per_nspr
39 | self.cpu_req_per_vnf = cpu_req_per_vnf
40 | self.ram_req_per_vnf = ram_req_per_vnf
41 | self.bw_req_per_vl = bw_req_per_vl
42 | self.load = load
43 | self.always_one = always_one
44 | self.tot_cpu_cap = self._get_tot_cpu_cap()
45 | self.nspr_model = self._get_nspr_model()
46 | self.max_steps = None
47 | try:
48 | # if env is wrapped in TimeLimit, max arrival time of NSPRs is max episode length
49 | self.max_steps = self.env._max_episode_steps
50 | self.nsprs_duration = min(self.max_steps, 100)
51 | except AttributeError or TypeError:
52 | self.nsprs_duration = 100
53 | # computed according to Sec. VII.C of HA-DRL paper
54 | self.arr_rate = self.load * self.tot_cpu_cap / self.nsprs_duration / self.cpu_req_per_vnf / self.vnfs_per_nspr
55 |
56 | def reset(self, **kwargs):
57 | self.env.reset(**kwargs)
58 | self.unwrapped.nsprs = self._generate_nsprs()
59 | self.unwrapped.waiting_nsprs += self.unwrapped.nsprs.get(self.unwrapped.time_step, [])
60 | self.unwrapped.pick_next_nspr()
61 | obs = self.unwrapped.update_nspr_state()
62 | return obs
63 |
64 | def _get_nspr_model(self):
65 | nspr_model = nx.DiGraph()
66 | nspr_model.add_node(0, reqCPU=self.cpu_req_per_vnf,
67 | reqRAM=self.ram_req_per_vnf, placed=-1)
68 | for i in range(1, self.vnfs_per_nspr):
69 | nspr_model.add_edge(i - 1, i, reqBW=self.bw_req_per_vl, placed=[])
70 | nspr_model.add_node(i, reqCPU=self.cpu_req_per_vnf,
71 | reqRAM=self.ram_req_per_vnf, placed=-1)
72 | return nspr_model
73 |
74 | def _generate_nsprs(self):
75 | if self.always_one:
76 | nsprs_dict = self._generate_one_nspr()
77 | elif self.arr_rate >= 0.3:
78 | nsprs_dict = self._generate_nsprs_poisson()
79 | else:
80 | nsprs_dict = self._generate_nsprs_deterministic()
81 | return nsprs_dict
82 |
83 | def _generate_one_nspr(self):
84 | nspr = self._get_nspr_model()
85 | nspr.graph['ArrivalTime'] = self.env.time_step
86 | nspr.graph['duration'] = 100
87 | return {self.env.time_step: [nspr]}
88 |
89 | def _generate_nsprs_poisson(self):
90 | cur_arr_time = self.env.time_step
91 | created_nsprs = 0
92 | nsprs_dict = {}
93 | while True:
94 | # NOTE: if self.max_steps is None, and the poisson sampling keeps
95 | # generating 0, this will loop forever, but since this is executed
96 | # only for a sufficiently high arrival rate, this is extremely unlikely to happen
97 | poisson_sample = np.random.poisson(lam=self.arr_rate)
98 | if poisson_sample > 0:
99 | cur_nspr = copy.deepcopy(self.nspr_model)
100 | cur_nspr.graph['ArrivalTime'] = cur_arr_time
101 | cur_nspr.graph['duration'] = self.nsprs_duration
102 | nsprs_to_create = min(poisson_sample, self.nsprs_per_ep - created_nsprs)
103 | if nsprs_to_create <= 0:
104 | break
105 | nsprs_dict[cur_arr_time] = [copy.deepcopy(cur_nspr) for _ in range(nsprs_to_create)]
106 | created_nsprs += nsprs_to_create
107 | cur_arr_time += 1
108 | if self.max_steps is not None and cur_arr_time - self.env.time_step > self.max_steps:
109 | break
110 | return nsprs_dict
111 |
112 | def _generate_nsprs_deterministic(self):
113 | if self.arr_rate >= 1:
114 | raise NotImplementedError
115 | # this function is called only for low arrival rates
116 | else:
117 | one_every_how_many_steps = round(1 / self.arr_rate)
118 | # decimal_part = round(one_every_how_many_steps - int(one_every_how_many_steps), 2)
119 | # one_every_how_many_steps = int(one_every_how_many_steps)
120 | # correction_every_how_many_steps = round(1 / decimal_part)
121 | nsprs_dict = {}
122 | step = self.env.time_step
123 | # steps_without_correction = 0
124 | created_nsprs = 0
125 | while True:
126 | if step % one_every_how_many_steps == 0:
127 | cur_nspr = copy.deepcopy(self.nspr_model)
128 | cur_nspr.graph['ArrivalTime'] = step
129 | cur_nspr.graph['duration'] = self.nsprs_duration
130 | nsprs_dict[step] = [cur_nspr]
131 | created_nsprs += 1
132 | # if step % one_every_how_many_steps == 0 and \
133 | # steps_without_correction == correction_every_how_many_steps:
134 | # nsprs_dict[step].append(copy.deepcopy(cur_nspr))
135 | # created_nsprs += 1
136 | # if steps_without_correction == correction_every_how_many_steps:
137 | # steps_without_correction = 0
138 | step += 1
139 | # steps_without_correction += 1
140 | if created_nsprs >= self.nsprs_per_ep or \
141 | (self.max_steps is not None and step - self.env.time_step > self.max_steps):
142 | break
143 | return nsprs_dict
144 |
145 | def _get_tot_cpu_cap(self):
146 | tot_cpu_cap = 0
147 | for node_id in self.env.psn.nodes:
148 | node = self.env.psn.nodes[node_id]
149 | if node['NodeType'] == 'server':
150 | tot_cpu_cap += node['CPUcap']
151 | return tot_cpu_cap
152 |
--------------------------------------------------------------------------------
/src/wrappers/no_placement_state.py:
--------------------------------------------------------------------------------
1 | import gym
2 | import numpy as np
3 |
4 | from gym.spaces import Dict, Box
5 |
6 |
7 | class RemovePlacementState(gym.ObservationWrapper):
8 | def __init__(self, env):
9 | super().__init__(env)
10 | ONE_BILLION = 1_000_000_000 # constant for readability
11 | n_nodes = len(self.unwrapped.psn.nodes)
12 | self.observation_space = Dict({
13 | # PSN STATE
14 | 'cpu_avails': Box(low=0., high=1., shape=(n_nodes,), dtype=np.float32),
15 | 'ram_avails': Box(low=0., high=1., shape=(n_nodes,), dtype=np.float32),
16 | # for each physical node, sum of the BW of the physical links connected to it
17 | 'bw_avails': Box(low=0., high=1., shape=(n_nodes,), dtype=np.float32),
18 |
19 | # NSPR STATE
20 | # note: apparently it's not possible to pass "math.inf" or "sys.maxsize" as a gym.spaces.Box's high value
21 | 'cur_vnf_cpu_req': Box(low=0, high=ONE_BILLION, shape=(1,), dtype=np.float32),
22 | 'cur_vnf_ram_req': Box(low=0, high=ONE_BILLION, shape=(1,), dtype=np.float32),
23 | # sum of the required BW of each VL connected to the current VNF
24 | 'cur_vnf_bw_req': Box(low=0, high=ONE_BILLION, shape=(1,), dtype=np.float32),
25 | 'vnfs_still_to_place': Box(low=0, high=ONE_BILLION, shape=(1,), dtype=int),
26 | })
27 |
28 | def observation(self, obs):
29 | """returns the observation without the placement state """
30 | new_obs = {
31 | 'cpu_avails': obs['cpu_avails'],
32 | 'ram_avails': obs['ram_avails'],
33 | 'bw_avails': obs['bw_avails'],
34 | 'cur_vnf_cpu_req': obs['cur_vnf_cpu_req'],
35 | 'cur_vnf_ram_req': obs['cur_vnf_ram_req'],
36 | 'cur_vnf_bw_req': obs['cur_vnf_bw_req'],
37 | 'vnfs_still_to_place': obs['vnfs_still_to_place'],
38 | }
39 | return new_obs
--------------------------------------------------------------------------------
/src/wrappers/reset_with_load.py:
--------------------------------------------------------------------------------
1 | import math
2 | import random
3 | from abc import ABC, abstractmethod
4 | from typing import Union, Dict, Tuple
5 |
6 | import gym
7 | import networkx as nx
8 | import numpy as np
9 | from stable_baselines3.common.vec_env import VecEnv
10 |
11 |
12 | class ResetWithLoad(gym.Wrapper, ABC):
13 | """ Abstract class. Wrapper to reset the PSN with a certain tr_load """
14 |
15 | def __init__(self, env: gym.Env, **kwargs):
16 | super().__init__(env)
17 | self.cpu_load = self.ram_load = self.bw_load = 0.
18 |
19 | def reset(self, **kwargs):
20 | raise NotImplementedError # doesn't work anymore, needs to be adapted
21 | self.env.reset(**kwargs)
22 | self._init_psn_load()
23 | obs = self.env.update_nspr_state() # the obs in the env.reset method is outdated
24 | return obs
25 |
26 | def _init_psn_load(self):
27 | """ Initialize the PSN's load with the specified values """
28 | for _, node in self.env.psn.nodes.items():
29 | if node['NodeType'] == "server":
30 | node['availCPU'] = int(node['CPUcap'] * (1 - self.cpu_load))
31 | node['availRAM'] = int(node['RAMcap'] * (1 - self.ram_load))
32 | for _, link in self.env.psn.edges.items():
33 | link['availBW'] = int(link['BWcap'] * (1 - self.bw_load))
34 |
35 |
36 | class ResetWithFixedLoad(ResetWithLoad):
37 | """ Reset the PSN with a certain - fixed - amount of tr_load """
38 |
39 | def __init__(self, env: gym.Env, reset_load_perc: Union[float, dict] = 0.,
40 | **kwargs):
41 | """ Constructor
42 |
43 | :param env: :param env: the environment to wrap
44 | :param reset_load_perc: init percentage of tr_load of the PSN's resources at each reset:
45 | if float, that value applies to all the resources for all nodes and links;
46 | if dict, it can specify the tr_load for each type of resource.
47 | """
48 | super().__init__(env)
49 | assert isinstance(reset_load_perc, (float, dict))
50 | # define the tr_load percentages of each resource
51 | if isinstance(reset_load_perc, float):
52 | assert 0 <= reset_load_perc <= 1
53 | self.cpu_load = self.ram_load = self.bw_load = reset_load_perc
54 | else:
55 | self.cpu_load = reset_load_perc.get('availCPU', 0)
56 | self.ram_load = reset_load_perc.get('availRAM', 0)
57 | self.bw_load = reset_load_perc.get('availBW', 0)
58 | assert 0 <= self.cpu_load <= 1 and 0 <= self.ram_load <= 1 and 0 <= self.bw_load <= 1
59 |
60 |
61 | class ResetWithRandLoad(ResetWithLoad):
62 | """ Reset the PSN with a random uniform amount of load """
63 |
64 | def __init__(self, env: gym.Env, min_perc: Union[float, dict],
65 | max_perc: Union[float, dict], same_for_all: bool = True,
66 | **kwargs):
67 | """ Constructor
68 |
69 | :param env: the environment to wrap
70 | :param min_perc: minimum percentage of tr_load of the PSN's resources at each reset
71 | :param max_perc: maximum percentage of tr_load of the PSN's resources at each reset
72 | :param same_for_all: if True, the same random value is used for all the nodes / links
73 | """
74 | super().__init__(env)
75 | self.same_for_all = same_for_all
76 |
77 | # assert that both min_perc and max_perc are either floats or dicts
78 | assert (isinstance(min_perc, float) and isinstance(max_perc, float)) or \
79 | (isinstance(min_perc, dict) and isinstance(max_perc, dict))
80 |
81 | # save the min and max percentages of tr_load
82 | if isinstance(min_perc, float):
83 | assert 0 <= min_perc <= 1 and 0 <= max_perc <= 1 and min_perc <= max_perc
84 | self.min_cpu = self.min_ram = self.min_bw = min_perc
85 | self.max_cpu = self.max_ram = self.max_bw = max_perc
86 | else:
87 | self.min_cpu = min_perc.get('availCPU', 0)
88 | self.min_ram = min_perc.get('availRAM', 0)
89 | self.min_bw = min_perc.get('availBW', 0)
90 | self.max_cpu = max_perc.get('availCPU', 0)
91 | self.max_ram = max_perc.get('availRAM', 0)
92 | self.max_bw = max_perc.get('availBW', 0)
93 | assert 0 <= self.min_cpu <= 1 and 0 <= self.max_cpu <= 1 and self.min_cpu <= self.max_cpu
94 | assert 0 <= self.min_ram <= 1 and 0 <= self.max_ram <= 1 and self.min_ram <= self.max_ram
95 | assert 0 <= self.min_bw <= 1 and 0 <= self.max_bw <= 1 and self.min_bw <= self.max_bw
96 |
97 | def reset(self, **kwargs):
98 | if self.same_for_all:
99 | self.cpu_load = np.random.uniform(self.min_cpu, self.max_cpu, size=1).item()
100 | self.ram_load = np.random.uniform(self.min_ram, self.max_ram, size=1).item()
101 | self.bw_load = np.random.uniform(self.min_bw, self.max_bw, size=1).item()
102 | return super().reset(**kwargs)
103 |
104 | def _init_psn_load(self):
105 | if self.same_for_all:
106 | super()._init_psn_load()
107 | else:
108 | for _, node in self.env.psn.nodes.items():
109 | if node['NodeType'] == "server":
110 | cpu_load = np.random.uniform(self.min_cpu, self.max_cpu, size=1).item()
111 | ram_load = np.random.uniform(self.min_ram, self.max_ram, size=1).item()
112 | node['availCPU'] = int(node['CPUcap'] * (1 - cpu_load))
113 | node['availRAM'] = int(node['RAMcap'] * (1 - ram_load))
114 | for _, link in self.env.psn.edges.items():
115 | bw_load = np.random.uniform(self.min_bw, self.max_bw, size=1).item()
116 | link['availBW'] = int(link['BWcap'] * (1 - bw_load))
117 |
118 |
119 | class ResetWithLoadMixed(gym.Wrapper):
120 | """ Wrapper to reset the PSN with a certain load.
121 | The load is expressed in percentage and can be resource-specific or general
122 | (each resource reset with the same load).
123 | It selects a load percentage for each node/link such that the overall load of
124 | the PSN is the specified one. It means certain nodes will be free, others
125 | completely occupied and others will be partially occupied, so that the overall
126 | CPU/RAM capacity is the specified one. (Same thing for links with their bandwidth).
127 | """
128 | def __init__(
129 | self,
130 | env: Union[gym.Env, VecEnv],
131 | load: Union[float, Dict[str, float]] = 0.5,
132 | rand_load: bool = False,
133 | rand_range: Tuple[float, float] = (0., 1.),
134 | **kwargs
135 | ):
136 | """
137 | :param env: environment
138 | :param load: the target load of the PSN, it can be:
139 | float: single fixed value for all the resources;
140 | Dict[resource: load]: fixed value but specific for each resource (CPU, RAM, BW)
141 | :param rand_load: if True, at every 'reset' the PSN's load will be random (same value for all resources);
142 | note: if 'random' is true, 'load' will be ignored.
143 | :param rand_range: min and max (included) load values tu consider when 'random' is true
144 | """
145 | super(ResetWithLoadMixed, self).__init__(env)
146 | self.random = rand_load
147 | self.tot_cpu_cap = self.tot_ram_cap = self.tot_bw_cap = None
148 | if not rand_load:
149 | assert isinstance(load, (float, dict)), "Param 'load' is of an incorrect type"
150 | if isinstance(load, float):
151 | assert 0. <= load <= 1.
152 | self.cpu_load = self.ram_load = self.bw_load = load
153 | elif isinstance(load, dict):
154 | self.cpu_load = load.get('cpu', 0)
155 | self.ram_load = load.get('ram', 0)
156 | self.bw_load = load.get('bw', 0)
157 | assert 0. <= self.cpu_load <= 1. and 0. <= self.ram_load <= 1. and \
158 | 0. <= self.bw_load <= 1.
159 | else:
160 | assert len(rand_range) == 2 and 0. <= rand_range[0] <= 1. and \
161 | 0. <= rand_range[1] <= 1.
162 | self.rand_vals = np.arange(min(rand_range), max(rand_range), 0.1)
163 |
164 | def reset(self, **kwargs):
165 | self.env.reset(**kwargs)
166 | self._init_psn_load()
167 | obs = self.env.update_nspr_state() # the obs in the env.reset method is outdated
168 | return obs
169 |
170 | def compute_link_weight(self, source, target, link):
171 | return 1 if link['availBW'] >= self.vl_req_bw else math.inf
172 |
173 | def _init_psn_load(self):
174 | """ Initialize the PSN's load """
175 | if self.random:
176 | load = random.choice(self.rand_vals)
177 | self.cpu_load = self.ram_load = load
178 | self.bw_load = max(0.0, load - 0.4)
179 |
180 | # TODO: occhio che 'reset' qui viene chiamato da ogni env in VecEnv singolarmente...
181 | # TODO: quindi, qui, self.env non è VecEnv, ma solo NetworkSimulator
182 | psns = self.env.get_attr('psn') if isinstance(self.env, VecEnv) else [self.env.psn]
183 | max_cpus = self.env.get_attr('max_cpu') if isinstance(self.env, VecEnv) else [self.env.max_cpu]
184 | max_rams = self.env.get_attr('max_ram') if isinstance(self.env, VecEnv) else [self.env.max_ram]
185 | max_bws = self.env.get_attr('max_bw') if isinstance(self.env, VecEnv) else [self.env.max_bw]
186 | obs_dicts = self.env.get_attr('obs_dict') if isinstance(self.env, VecEnv) else [self.env.obs_dict]
187 | maps_id_idx = self.env.get_attr('map_id_idx') if isinstance(self.env, VecEnv) else [self.env.map_id_idx]
188 |
189 | # NOTE: only works if all the envs in the VecEnv use the same PSN
190 | if self.tot_cpu_cap is None or self.tot_ram_cap is None or self.tot_bw_cap is None:
191 | self.tot_cpu_cap = self.env.tot_cpu_cap
192 | self.tot_ram_cap = self.env.tot_ram_cap
193 | self.tot_bw_cap = self.env.tot_bw_cap
194 |
195 | self.vl_req_bw = 2000
196 | for i, psn in enumerate(psns):
197 | max_cpu, max_ram, max_bw = max_cpus[i], max_rams[i], max_bws[i]
198 | obs_dict, map_id_idx = obs_dicts[i], maps_id_idx[i]
199 | tot_cpu_to_remove = self.cpu_load * self.tot_cpu_cap / max_cpu
200 | tot_ram_to_remove = self.ram_load * self.tot_ram_cap / max_ram
201 | tot_bw_to_remove = self.bw_load * self.tot_bw_cap / max_bw
202 | # iterate over nodes in a random order and reduce the CPU/RAM availabilities
203 | nodes = list(psn.nodes.items())
204 | while tot_cpu_to_remove > 0 or tot_ram_to_remove > 0:
205 | node_id, node = random.sample(nodes, 1)[0]
206 | if node['NodeType'] == 'server':
207 | idx = map_id_idx[node_id]
208 | # TODO: consider to extend as [0.25, 0.5, 0.75, 1.]
209 | perc_to_remove = random.choice([0.5])
210 | # CPU to remove
211 | # x% of the node capacity (normalized)
212 | cur_cpu_to_remove = perc_to_remove * node['CPUcap'] / max_cpu
213 | cur_cpu_to_remove = min([round(cur_cpu_to_remove, 3),
214 | tot_cpu_to_remove,
215 | obs_dict['cpu_avails'][idx]])
216 | # RAM to remove
217 | cur_ram_to_remove = perc_to_remove * node['RAMcap'] / max_ram
218 | cur_ram_to_remove = min([round(cur_ram_to_remove, 3),
219 | tot_ram_to_remove,
220 | obs_dict['ram_avails'][idx]])
221 | # remove resources
222 | obs_dict['cpu_avails'][idx] -= cur_cpu_to_remove
223 | obs_dict['ram_avails'][idx] -= cur_ram_to_remove
224 | tot_cpu_to_remove -= cur_cpu_to_remove
225 | tot_ram_to_remove -= cur_ram_to_remove
226 |
227 | # iterate over links in random order and reduce the BW availability
228 | links = list(psn.edges.items())
229 | while tot_bw_to_remove > 0:
230 | extremes, link = random.sample(links, 1)[0]
231 | # TODO: consider to extend as [0.25, 0.5, 0.75, 1.]
232 | perc_to_remove = random.choice([0.5])
233 | # cur_bw_to_remove = np.random.randint(0, link['availBW'] + 1, 1)[0]
234 | cur_bw_to_remove = perc_to_remove * link['BWcap']
235 | # cur_bw_to_remove = min(cur_bw_to_remove, tot_bw_to_remove * max_bw)
236 | idx_0, idx_1 = map_id_idx[extremes[0]], map_id_idx[extremes[1]]
237 | cur_bw_to_remove = min([round(cur_bw_to_remove, 6),
238 | tot_bw_to_remove * max_bw,
239 | link['availBW']])
240 | cur_bw_to_remove_normal = cur_bw_to_remove / max_bw
241 | # links' BW actually reduced because needed for shortest path calculation
242 | link['availBW'] -= cur_bw_to_remove
243 | obs_dict['bw_avails'][idx_0] -= cur_bw_to_remove_normal
244 | obs_dict['bw_avails'][idx_1] -= cur_bw_to_remove_normal
245 | tot_bw_to_remove -= cur_bw_to_remove_normal
246 |
247 |
248 | class ResetWithLoadBinary(ResetWithLoadMixed):
249 | """ Wrapper to reset the PSN with a certain load.
250 | The load is expressed in percentage and can be resource-specific or general
251 | (each resource reset with the same load).
252 | It put a certain amount of nodes with zero available resources, so that
253 | the overall load of the PSN is the one specified.
254 |
255 | Note: only the CPU and RAM are modified, not the bandwidth
256 | """
257 |
258 | def __init__(
259 | self,
260 | env: Union[gym.Env, VecEnv],
261 | load: Union[float, Dict[str, float]] = 0.5,
262 | rand_load: bool = False,
263 | rand_range: Tuple[float, float] = (0., 1.),
264 | **kwargs
265 | ):
266 | """
267 | :param env: environment
268 | :param load: the target load of the PSN, it can be:
269 | float: single fixed value for all the resources;
270 | Dict[resource: load]: fixed value but specific for each resource (CPU, RAM, BW)
271 | :param rand_load: if True, at every 'reset' the PSN's load will be random (same value for all resources);
272 | note: if 'random' is true, 'load' will be ignored.
273 | :param rand_range: min and max (included) load values tu consider when 'random' is true
274 | """
275 | super().__init__(env, load, rand_load, rand_range)
276 |
277 | def _init_psn_load(self):
278 | """ Initialize the PSN's load """
279 | if self.random:
280 | load = random.choice(self.rand_vals)
281 | self.cpu_load = self.ram_load = self.bw_load = load
282 |
283 | psns = self.env.get_attr('psn') if isinstance(self.env, VecEnv) else [self.env.psn]
284 | max_cpus = self.env.get_attr('max_cpu') if isinstance(self.env, VecEnv) else [self.env.max_cpu]
285 | max_rams = self.env.get_attr('max_ram') if isinstance(self.env, VecEnv) else [self.env.max_ram]
286 | max_bws = self.env.get_attr('max_bw') if isinstance(self.env, VecEnv) else [self.env.max_bw]
287 | obs_dicts = self.env.get_attr('obs_dict') if isinstance(self.env, VecEnv) else [self.env.obs_dict]
288 | maps_id_idx = self.env.get_attr('map_id_idx') if isinstance(self.env, VecEnv) else [self.env.map_id_idx]
289 |
290 | if self.tot_cpu_cap is None or self.tot_ram_cap is None or self.tot_bw_cap is None:
291 | self.tot_cpu_cap = self.env.tot_cpu_cap
292 | self.tot_ram_cap = self.env.tot_ram_cap
293 | self.tot_bw_cap = self.env.tot_bw_cap
294 |
295 | for i, psn in enumerate(psns):
296 | max_cpu, max_ram, max_bw = max_cpus[i], max_rams[i], max_bws[i]
297 | obs_dict, map_id_idx = obs_dicts[i], maps_id_idx[i]
298 | tot_cpu_to_remove = self.cpu_load * self.tot_cpu_cap / max_cpu
299 | tot_ram_to_remove = self.ram_load * self.tot_ram_cap / max_ram
300 | tot_bw_to_remove = self.bw_load * self.tot_bw_cap / max_bw
301 | # iterate over nodes in a random order and reduce the CPU/RAM availabilities
302 | nodes = list(psn.nodes.items())
303 | while tot_cpu_to_remove > 0 or tot_ram_to_remove > 0:
304 | node_id, node = random.sample(nodes, 1)[0]
305 | if node['NodeType'] == 'server':
306 | idx = map_id_idx[node_id]
307 | cur_removed_cpu = obs_dict['cpu_avails'][idx]
308 | obs_dict['cpu_avails'][idx] = 0.
309 | obs_dict['ram_avails'][idx] = 0.
310 | tot_cpu_to_remove -= cur_removed_cpu
311 | tot_ram_to_remove -= cur_removed_cpu
312 |
313 |
314 | class ResetWithRealisticLoad(gym.Wrapper):
315 | """ Wrapper that resets the PSN with a certain amount of load already.
316 | It does so in a way that resembles a how the state of the PSN might be in
317 | case an agent has been actually placing NSPRs.
318 |
319 | It samples NSPRs from the ones that should arrive during the current episode
320 | and place their VNFs in random nodes and connects them via shortest path.
321 | This way the CPU/RAM and even the BW allocation should be realistic.
322 | """
323 |
324 | def __init__(self, env: gym.Env, cpu_load: float, **kwargs):
325 | """
326 | :param env: environment
327 | :param cpu_load: target percentage of CPU load of the PSN
328 | """
329 | super().__init__(env)
330 | assert 0. <= cpu_load <= 1.
331 | self.cpu_load = cpu_load
332 |
333 | def reset(self, **kwargs):
334 | self.env.reset(**kwargs)
335 | self.init_psn_load()
336 | obs = self.env.update_nspr_state() # the obs in the env.reset method is outdated
337 | return obs
338 |
339 | def init_psn_load(self):
340 | """ Initialize the PSN with the target load """
341 | cpu_to_remove_normal = self.env.tot_cpu_cap * self.cpu_load / self.env.max_cpu
342 | removed_cpu_normal = 0
343 | while removed_cpu_normal < cpu_to_remove_normal:
344 | nspr = self.sample_nspr()
345 | placement_map = {}
346 | # place all VNFs
347 | for vnf_id, vnf in nspr.nodes.items():
348 | node_id, node_idx = self.sample_suitable_node(vnf)
349 | placement_map[vnf_id] = node_id
350 | self.env.obs_dict['cpu_avails'][node_idx] -= vnf['reqCPU'] / self.env.max_cpu
351 | self.env.obs_dict['ram_avails'][node_idx] -= vnf['reqRAM'] / self.env.max_ram
352 | removed_cpu_normal += vnf['reqCPU'] / self.env.max_cpu
353 | if removed_cpu_normal >= cpu_to_remove_normal:
354 | break
355 | # place all VLs
356 | for (src_vnf_id, dst_vnf_id), vl in nspr.edges.items():
357 | self.req_bw = vl['reqBW']
358 | try:
359 | src_node_id = placement_map[src_vnf_id]
360 | dst_node_id = placement_map[dst_vnf_id]
361 | except KeyError:
362 | # it means either src_vnf_id, dst_vnf_id or both hasn't been placed -> skip link placement
363 | continue
364 | try:
365 | path = nx.shortest_path(G=self.env.psn, source=src_node_id,
366 | target=dst_node_id, weight=self.compute_links_weights,
367 | method='dijkstra')
368 | for i in range(len(path) - 1):
369 | # if this VL exceeds the bandwidth available, don't place it, it's okù
370 | # it can happen when there is no available path
371 | if self.env.psn.edges[path[i], path[i+1]]['availBW'] - vl['reqBW'] < 0:
372 | continue
373 | self.env.psn.edges[path[i], path[i+1]]['availBW'] -= vl['reqBW']
374 | idx1 = self.env.map_id_idx[path[i]]
375 | idx2 = self.env.map_id_idx[path[i+1]]
376 | self.env.obs_dict['bw_avails'][idx1] -= vl['reqBW'] / self.env.max_bw
377 | self.env.obs_dict['bw_avails'][idx2] -= vl['reqBW'] / self.env.max_bw
378 | except nx.NetworkXNoPath:
379 | pass
380 |
381 | def compute_links_weights(self, source, target, link):
382 | """ Method called automatically by nx.shortest_path() """
383 | return 1 if link['availBW'] >= self.req_bw else math.inf
384 |
385 | def sample_suitable_node(self, vnf: dict):
386 | """ Sample a random node with enough resources to host the VNF """
387 | server_idx = random.choice(list(self.env.servers_map_idx_id.keys()))
388 | server_id = self.env.servers_map_idx_id[server_idx]
389 | while not self.env.enough_avail_resources(server_id, vnf):
390 | server_idx = random.choice(list(self.env.servers_map_idx_id.keys()))
391 | server_id = self.env.servers_map_idx_id[server_idx]
392 | return server_id, server_idx
393 |
394 | def sample_nspr(self):
395 | """ Sample a NSPR among the ones that will arrive in this episode """
396 | arr_time = random.choice(list(self.env.nsprs.keys()))
397 | idx = np.random.choice(len(self.env.nsprs[arr_time]))
398 | nspr = self.env.nsprs[arr_time][idx]
399 | return nspr
400 |
--------------------------------------------------------------------------------