├── .gitignore
├── NSPRs
    ├── box.graphml
    ├── dummy_NSPR_1.graphml
    └── dummy_NSPR_2.graphml
├── PSNs
    ├── hadrl_1-16_5-10_15-4.graphml
    ├── hadrl_psn.graphml
    ├── hadrl_psn_1-10_1-6_1-4.graphml
    ├── heenso_1-16_5-10_15-4.graphml
    ├── new_hadrl_1-16_5-10_15-4.graphml
    ├── simple_hadrl_psn.graphml
    ├── waxman_100_servers.graphml
    ├── waxman_20_servers.graphml
    └── waxman_50_servers.graphml
├── README.md
├── requirements.txt
└── src
    ├── callbacks
        ├── __init__.py
        ├── acceptance_ratio_callbacks.py
        ├── hparam_callback.py
        ├── psn_load_callback.py
        └── seen_nsprs_callback.py
    ├── demo.py
    ├── eval_script.py
    ├── heuristic_layers.py
    ├── network_simulator.py
    ├── policies
        ├── __init__.py
        ├── features_extractors
        │   ├── __init__.py
        │   └── hadrl_features_extractor.py
        ├── hadrl_policy.py
        └── mlp_extractors
        │   ├── __init__.py
        │   └── hadrl_mlp_extractor.py
    ├── reader.py
    ├── spaces
        ├── __init__.py
        └── discrete_with_negatives.py
    ├── trainer.py
    ├── utils.py
    └── wrappers
        ├── __init__.py
        ├── dynamic_connectivity.py
        ├── hadrl_nsprs_generator.py
        ├── no_placement_state.py
        └── reset_with_load.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Intellij stuff
 10 | .idea/
 11 | 
 12 | # Distribution / packaging
 13 | .Python
 14 | build/
 15 | develop-eggs/
 16 | dist/
 17 | downloads/
 18 | eggs/
 19 | .eggs/
 20 | lib/
 21 | lib64/
 22 | parts/
 23 | sdist/
 24 | var/
 25 | wheels/
 26 | pip-wheel-metadata/
 27 | share/python-wheels/
 28 | *.egg-info/
 29 | .installed.cfg
 30 | *.egg
 31 | MANIFEST
 32 | 
 33 | # PyInstaller
 34 | #  Usually these files are written by a python script from a template
 35 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 36 | *.manifest
 37 | *.spec
 38 | 
 39 | # Installer logs
 40 | pip-log.txt
 41 | pip-delete-this-directory.txt
 42 | 
 43 | # Unit test / coverage reports
 44 | htmlcov/
 45 | .tox/
 46 | .nox/
 47 | .coverage
 48 | .coverage.*
 49 | .cache
 50 | nosetests.xml
 51 | coverage.xml
 52 | *.cover
 53 | *.py,cover
 54 | .hypothesis/
 55 | .pytest_cache/
 56 | 
 57 | # Translations
 58 | *.mo
 59 | *.pot
 60 | 
 61 | # Django stuff:
 62 | *.log
 63 | local_settings.py
 64 | db.sqlite3
 65 | db.sqlite3-journal
 66 | 
 67 | # Flask stuff:
 68 | instance/
 69 | .webassets-cache
 70 | 
 71 | # Scrapy stuff:
 72 | .scrapy
 73 | 
 74 | # Sphinx documentation
 75 | docs/_build/
 76 | 
 77 | # PyBuilder
 78 | target/
 79 | 
 80 | # Jupyter Notebook
 81 | .ipynb_checkpoints
 82 | 
 83 | # IPython
 84 | profile_default/
 85 | ipython_config.py
 86 | 
 87 | # pyenv
 88 | .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 98 | __pypackages__/
 99 | 
100 | # Celery stuff
101 | celerybeat-schedule
102 | celerybeat.pid
103 | 
104 | # SageMath parsed files
105 | *.sage.py
106 | 
107 | # Environments
108 | .env
109 | .venv
110 | env/
111 | venv/
112 | ENV/
113 | env.bak/
114 | venv.bak/
115 | 
116 | # Spyder project settings
117 | .spyderproject
118 | .spyproject
119 | 
120 | # Rope project settings
121 | .ropeproject
122 | 
123 | # mkdocs documentation
124 | /site
125 | 
126 | # mypy
127 | .mypy_cache/
128 | .dmypy.json
129 | dmypy.json
130 | 
131 | # Pyre type checker
132 | .pyre/
133 | 
134 | # project-specific stuff
135 | tb_logs*/
136 | models*/
137 | wandb/
138 | .vscode/
139 | 


--------------------------------------------------------------------------------
/NSPRs/box.graphml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <graphml xmlns="http://graphml.graphdrawing.org/xmlns" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://graphml.graphdrawing.org/xmlns http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd">
 3 |     <!--  General attributes  -->
 4 |     <key attr.name="Label" attr.type="string" id="d1" />
 5 |     <key attr.name="E2ELatency" attr.type="int" id="d2" />
 6 |     <key attr.name="ArrivalTime" attr.type="int" id="d3" />
 7 |     <key attr.name="DepartureTime" attr.type="int" id="d4" />
 8 | 
 9 |     <!--  Nodes' attributes  -->
10 |     <key attr.name="reqCPU" attr.type="int" for="node" id="d5" />
11 |     <key attr.name="reqRAM" attr.type="int" for="node" id="d6" />
12 | 
13 |     <!--  Links' attributes  -->
14 |     <key attr.name="reqBW" attr.type="int" for="edge" id="d7" />
15 |     <key attr.name="reqLatency" attr.type="int" for="edge" id="d8" />
16 | 
17 |     <!--  Definition of the graph  -->
18 |     <graph edgedefault="undirected">
19 |         <data key="d1">Box</data>
20 |         <data key="d2">10000</data>
21 |         <data key="d3">0</data>
22 |         <data key="d4">4</data>
23 | 
24 |         <node id="0">
25 |             <data key="d5">4</data>
26 |             <data key="d6">50</data>
27 |         </node>
28 |         <node id="1">
29 |             <data key="d5">10</data>
30 |             <data key="d6">5</data>
31 |         </node>
32 |         <node id="2">
33 |             <data key="d5">1</data>
34 |             <data key="d6">5</data>
35 |         </node>
36 |         <node id="3">
37 |             <data key="d5">8</data>
38 |             <data key="d6">72</data>
39 |         </node>
40 | 
41 |         <edge source="0" target="1">
42 |             <data key="d7">10</data>
43 |             <data key="d8">10</data>
44 |         </edge>
45 |         <edge source="1" target="2">
46 |             <data key="d7">10</data>
47 |             <data key="d8">10</data>
48 |         </edge>
49 |         <edge source="2" target="3">
50 |             <data key="d7">10</data>
51 |             <data key="d8">10</data>
52 |         </edge>
53 |         <edge source="3" target="0">
54 |             <data key="d7">10</data>
55 |             <data key="d8">10</data>
56 |         </edge>
57 |     </graph>
58 | </graphml>


--------------------------------------------------------------------------------
/NSPRs/dummy_NSPR_1.graphml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <graphml xmlns="http://graphml.graphdrawing.org/xmlns" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://graphml.graphdrawing.org/xmlns http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd">
 3 |     <!--  General attributes  -->
 4 |     <key attr.name="Label" attr.type="string" id="d1" />
 5 |     <key attr.name="E2ELatency" attr.type="int" id="d2" />
 6 |     <key attr.name="ArrivalTime" attr.type="int" id="d3" />
 7 |     <key attr.name="DepartureTime" attr.type="int" id="d4" />
 8 | 
 9 |     <!--  Nodes' attributes  -->
10 |     <key attr.name="reqCPU" attr.type="int" for="node" id="d5" />
11 |     <key attr.name="reqRAM" attr.type="int" for="node" id="d6" />
12 | 
13 |     <!--  Links' attributes  -->
14 |     <key attr.name="reqBW" attr.type="int" for="edge" id="d7" />
15 |     <key attr.name="reqLatency" attr.type="int" for="edge" id="d8" />
16 | 
17 |     <!--  Definition of the graph  -->
18 |     <graph edgedefault="undirected">
19 |         <data key="d1">Triangle</data>
20 |         <data key="d2">10000</data>
21 |         <data key="d3">1</data>
22 |         <data key="d4">5</data>
23 | 
24 |         <node id="0">
25 |             <data key="d5">10</data>
26 |             <data key="d6">5</data>
27 |         </node>
28 |         <node id="1">
29 |             <data key="d5">50</data>
30 |             <data key="d6">10</data>
31 |         </node>
32 |         <node id="2">
33 |             <data key="d5">5</data>
34 |             <data key="d6">1</data>
35 |         </node>
36 | 
37 |         <edge source="0" target="1">
38 |             <data key="d7">10</data>
39 |             <data key="d8">10</data>
40 |         </edge>
41 |         <edge source="1" target="2">
42 |             <data key="d7">10</data>
43 |             <data key="d8">10</data>
44 |         </edge>
45 |         <edge source="2" target="0">
46 |             <data key="d7">10</data>
47 |             <data key="d8">10</data>
48 |         </edge>
49 |     </graph>
50 | </graphml>


--------------------------------------------------------------------------------
/NSPRs/dummy_NSPR_2.graphml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <graphml xmlns="http://graphml.graphdrawing.org/xmlns" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://graphml.graphdrawing.org/xmlns http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd">
 3 |     <!--  General attributes  -->
 4 |     <key attr.name="Label" attr.type="string" id="d1" />
 5 |     <key attr.name="E2ELatency" attr.type="int" id="d2" />
 6 |     <key attr.name="ArrivalTime" attr.type="int" id="d3" />
 7 |     <key attr.name="DepartureTime" attr.type="int" id="d4" />
 8 | 
 9 |     <!--  Nodes' attributes  -->
10 |     <key attr.name="reqCPU" attr.type="int" for="node" id="d5" />
11 |     <key attr.name="reqRAM" attr.type="int" for="node" id="d6" />
12 | 
13 |     <!--  Links' attributes  -->
14 |     <key attr.name="reqBW" attr.type="int" for="edge" id="d7" />
15 |     <key attr.name="reqLatency" attr.type="int" for="edge" id="d8" />
16 | 
17 |     <!--  Definition of the graph  -->
18 |     <graph edgedefault="undirected">
19 |         <data key="d1">Triangle</data>
20 |         <data key="d2">10000</data>
21 |         <data key="d3">2</data>
22 |         <data key="d4">20</data>
23 | 
24 |         <node id="0">
25 |             <data key="d5">1</data>
26 |             <data key="d6">5</data>
27 |         </node>
28 |         <node id="1">
29 |             <data key="d5">5</data>
30 |             <data key="d6">1</data>
31 |         </node>
32 |         <node id="2">
33 |             <data key="d5">10</data>
34 |             <data key="d6">1</data>
35 |         </node>
36 | 
37 |         <edge source="0" target="1">
38 |             <data key="d7">1</data>
39 |             <data key="d8">10</data>
40 |         </edge>
41 |         <edge source="1" target="2">
42 |             <data key="d7">1</data>
43 |             <data key="d8">10</data>
44 |         </edge>
45 |         <edge source="2" target="0">
46 |             <data key="d7">1</data>
47 |             <data key="d8">10</data>
48 |         </edge>
49 |     </graph>
50 | </graphml>


--------------------------------------------------------------------------------
/PSNs/hadrl_psn_1-10_1-6_1-4.graphml:
--------------------------------------------------------------------------------
  1 | <?xml version='1.0' encoding='utf-8'?>
  2 | <graphml xmlns="http://graphml.graphdrawing.org/xmlns" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://graphml.graphdrawing.org/xmlns http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd">
  3 |   <key id="d4" for="edge" attr.name="BWcap" attr.type="long" />
  4 |   <key id="d3" for="node" attr.name="RAMcap" attr.type="long" />
  5 |   <key id="d2" for="node" attr.name="CPUcap" attr.type="long" />
  6 |   <key id="d1" for="node" attr.name="NodeType" attr.type="string" />
  7 |   <key id="d0" for="graph" attr.name="Label" attr.type="string" />
  8 |   <graph edgedefault="undirected">
  9 |     <node id="0">
 10 |       <data key="d1">server</data>
 11 |       <data key="d2">50</data>
 12 |       <data key="d3">300</data>
 13 |     </node>
 14 |     <node id="1">
 15 |       <data key="d1">server</data>
 16 |       <data key="d2">50</data>
 17 |       <data key="d3">300</data>
 18 |     </node>
 19 |     <node id="2">
 20 |       <data key="d1">server</data>
 21 |       <data key="d2">50</data>
 22 |       <data key="d3">300</data>
 23 |     </node>
 24 |     <node id="3">
 25 |       <data key="d1">server</data>
 26 |       <data key="d2">50</data>
 27 |       <data key="d3">300</data>
 28 |     </node>
 29 |     <node id="4">
 30 |       <data key="d1">server</data>
 31 |       <data key="d2">50</data>
 32 |       <data key="d3">300</data>
 33 |     </node>
 34 |     <node id="5">
 35 |       <data key="d1">server</data>
 36 |       <data key="d2">50</data>
 37 |       <data key="d3">300</data>
 38 |     </node>
 39 |     <node id="6">
 40 |       <data key="d1">server</data>
 41 |       <data key="d2">50</data>
 42 |       <data key="d3">300</data>
 43 |     </node>
 44 |     <node id="7">
 45 |       <data key="d1">server</data>
 46 |       <data key="d2">50</data>
 47 |       <data key="d3">300</data>
 48 |     </node>
 49 |     <node id="8">
 50 |       <data key="d1">server</data>
 51 |       <data key="d2">50</data>
 52 |       <data key="d3">300</data>
 53 |     </node>
 54 |     <node id="9">
 55 |       <data key="d1">server</data>
 56 |       <data key="d2">50</data>
 57 |       <data key="d3">300</data>
 58 |     </node>
 59 |     <node id="10">
 60 |       <data key="d1">server</data>
 61 |       <data key="d2">50</data>
 62 |       <data key="d3">300</data>
 63 |     </node>
 64 |     <node id="11">
 65 |       <data key="d1">server</data>
 66 |       <data key="d2">50</data>
 67 |       <data key="d3">300</data>
 68 |     </node>
 69 |     <node id="12">
 70 |       <data key="d1">server</data>
 71 |       <data key="d2">50</data>
 72 |       <data key="d3">300</data>
 73 |     </node>
 74 |     <node id="13">
 75 |       <data key="d1">server</data>
 76 |       <data key="d2">50</data>
 77 |       <data key="d3">300</data>
 78 |     </node>
 79 |     <node id="14">
 80 |       <data key="d1">server</data>
 81 |       <data key="d2">50</data>
 82 |       <data key="d3">300</data>
 83 |     </node>
 84 |     <node id="15">
 85 |       <data key="d1">server</data>
 86 |       <data key="d2">50</data>
 87 |       <data key="d3">300</data>
 88 |     </node>
 89 |     <node id="16">
 90 |       <data key="d1">server</data>
 91 |       <data key="d2">50</data>
 92 |       <data key="d3">300</data>
 93 |     </node>
 94 |     <node id="17">
 95 |       <data key="d1">server</data>
 96 |       <data key="d2">50</data>
 97 |       <data key="d3">300</data>
 98 |     </node>
 99 |     <node id="18">
100 |       <data key="d1">server</data>
101 |       <data key="d2">50</data>
102 |       <data key="d3">300</data>
103 |     </node>
104 |     <node id="19">
105 |       <data key="d1">server</data>
106 |       <data key="d2">50</data>
107 |       <data key="d3">300</data>
108 |     </node>
109 |     <node id="20">
110 |       <data key="d1">switch</data>
111 |     </node>
112 |     <node id="21">
113 |       <data key="d1">switch</data>
114 |     </node>
115 |     <node id="22">
116 |       <data key="d1">switch</data>
117 |     </node>
118 |     <node id="23">
119 |       <data key="d1">router</data>
120 |     </node>
121 |     <node id="24">
122 |       <data key="d1">router</data>
123 |     </node>
124 |     <node id="25">
125 |       <data key="d1">router</data>
126 |     </node>
127 |     <edge source="0" target="20">
128 |       <data key="d4">100000</data>
129 |     </edge>
130 |     <edge source="1" target="20">
131 |       <data key="d4">100000</data>
132 |     </edge>
133 |     <edge source="2" target="20">
134 |       <data key="d4">100000</data>
135 |     </edge>
136 |     <edge source="3" target="20">
137 |       <data key="d4">100000</data>
138 |     </edge>
139 |     <edge source="4" target="20">
140 |       <data key="d4">100000</data>
141 |     </edge>
142 |     <edge source="5" target="20">
143 |       <data key="d4">100000</data>
144 |     </edge>
145 |     <edge source="6" target="20">
146 |       <data key="d4">100000</data>
147 |     </edge>
148 |     <edge source="7" target="20">
149 |       <data key="d4">100000</data>
150 |     </edge>
151 |     <edge source="8" target="20">
152 |       <data key="d4">100000</data>
153 |     </edge>
154 |     <edge source="9" target="20">
155 |       <data key="d4">100000</data>
156 |     </edge>
157 |     <edge source="10" target="21">
158 |       <data key="d4">100000</data>
159 |     </edge>
160 |     <edge source="11" target="21">
161 |       <data key="d4">100000</data>
162 |     </edge>
163 |     <edge source="12" target="21">
164 |       <data key="d4">100000</data>
165 |     </edge>
166 |     <edge source="13" target="21">
167 |       <data key="d4">100000</data>
168 |     </edge>
169 |     <edge source="14" target="21">
170 |       <data key="d4">100000</data>
171 |     </edge>
172 |     <edge source="15" target="21">
173 |       <data key="d4">100000</data>
174 |     </edge>
175 |     <edge source="16" target="22">
176 |       <data key="d4">10000</data>
177 |     </edge>
178 |     <edge source="17" target="22">
179 |       <data key="d4">10000</data>
180 |     </edge>
181 |     <edge source="18" target="22">
182 |       <data key="d4">10000</data>
183 |     </edge>
184 |     <edge source="19" target="22">
185 |       <data key="d4">10000</data>
186 |     </edge>
187 |     <edge source="20" target="23">
188 |       <data key="d4">100000</data>
189 |     </edge>
190 |     <edge source="21" target="24">
191 |       <data key="d4">100000</data>
192 |     </edge>
193 |     <edge source="22" target="25">
194 |       <data key="d4">10000</data>
195 |     </edge>
196 |     <edge source="23" target="24">
197 |       <data key="d4">100000</data>
198 |     </edge>
199 |     <edge source="24" target="25">
200 |       <data key="d4">100000</data>
201 |     </edge>
202 |     <data key="d0">HA-DRL PSN</data>
203 |   </graph>
204 | </graphml>
205 | 


--------------------------------------------------------------------------------
/PSNs/simple_hadrl_psn.graphml:
--------------------------------------------------------------------------------
  1 | <?xml version='1.0' encoding='utf-8'?>
  2 | <graphml xmlns="http://graphml.graphdrawing.org/xmlns" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://graphml.graphdrawing.org/xmlns http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd">
  3 |   <key id="d4" for="edge" attr.name="BWcap" attr.type="long" />
  4 |   <key id="d3" for="node" attr.name="RAMcap" attr.type="long" />
  5 |   <key id="d2" for="node" attr.name="CPUcap" attr.type="long" />
  6 |   <key id="d1" for="node" attr.name="NodeType" attr.type="string" />
  7 |   <key id="d0" for="graph" attr.name="Label" attr.type="string" />
  8 |   <graph edgedefault="undirected">
  9 |     <node id="0">
 10 |       <data key="d1">server</data>
 11 |       <data key="d2">50</data>
 12 |       <data key="d3">300</data>
 13 |     </node>
 14 |     <node id="1">
 15 |       <data key="d1">server</data>
 16 |       <data key="d2">50</data>
 17 |       <data key="d3">300</data>
 18 |     </node>
 19 |     <node id="2">
 20 |       <data key="d1">server</data>
 21 |       <data key="d2">50</data>
 22 |       <data key="d3">300</data>
 23 |     </node>
 24 |     <node id="3">
 25 |       <data key="d1">server</data>
 26 |       <data key="d2">50</data>
 27 |       <data key="d3">300</data>
 28 |     </node>
 29 |     <node id="4">
 30 |       <data key="d1">server</data>
 31 |       <data key="d2">50</data>
 32 |       <data key="d3">300</data>
 33 |     </node>
 34 |     <node id="5">
 35 |       <data key="d1">server</data>
 36 |       <data key="d2">50</data>
 37 |       <data key="d3">300</data>
 38 |     </node>
 39 |     <node id="6">
 40 |       <data key="d1">server</data>
 41 |       <data key="d2">50</data>
 42 |       <data key="d3">300</data>
 43 |     </node>
 44 |     <node id="7">
 45 |       <data key="d1">server</data>
 46 |       <data key="d2">50</data>
 47 |       <data key="d3">300</data>
 48 |     </node>
 49 |     <node id="8">
 50 |       <data key="d1">server</data>
 51 |       <data key="d2">50</data>
 52 |       <data key="d3">300</data>
 53 |     </node>
 54 |     <node id="9">
 55 |       <data key="d1">server</data>
 56 |       <data key="d2">50</data>
 57 |       <data key="d3">300</data>
 58 |     </node>
 59 |     <node id="10">
 60 |       <data key="d1">server</data>
 61 |       <data key="d2">50</data>
 62 |       <data key="d3">300</data>
 63 |     </node>
 64 |     <node id="11">
 65 |       <data key="d1">server</data>
 66 |       <data key="d2">50</data>
 67 |       <data key="d3">300</data>
 68 |     </node>
 69 |     <node id="12">
 70 |       <data key="d1">server</data>
 71 |       <data key="d2">50</data>
 72 |       <data key="d3">300</data>
 73 |     </node>
 74 |     <node id="13">
 75 |       <data key="d1">switch</data>
 76 |     </node>
 77 |     <node id="14">
 78 |       <data key="d1">switch</data>
 79 |     </node>
 80 |     <node id="15">
 81 |       <data key="d1">switch</data>
 82 |     </node>
 83 |     <node id="16">
 84 |       <data key="d1">switch</data>
 85 |     </node>
 86 |     <node id="17">
 87 |       <data key="d1">router</data>
 88 |     </node>
 89 |     <node id="18">
 90 |       <data key="d1">router</data>
 91 |     </node>
 92 |     <node id="19">
 93 |       <data key="d1">router</data>
 94 |     </node>
 95 |     <node id="20">
 96 |       <data key="d1">router</data>
 97 |     </node>
 98 |     <edge source="0" target="13">
 99 |       <data key="d4">100000</data>
100 |     </edge>
101 |     <edge source="1" target="13">
102 |       <data key="d4">100000</data>
103 |     </edge>
104 |     <edge source="2" target="13">
105 |       <data key="d4">100000</data>
106 |     </edge>
107 |     <edge source="3" target="13">
108 |       <data key="d4">100000</data>
109 |     </edge>
110 |     <edge source="4" target="13">
111 |       <data key="d4">100000</data>
112 |     </edge>
113 |     <edge source="5" target="14">
114 |       <data key="d4">100000</data>
115 |     </edge>
116 |     <edge source="6" target="14">
117 |       <data key="d4">100000</data>
118 |     </edge>
119 |     <edge source="7" target="14">
120 |       <data key="d4">100000</data>
121 |     </edge>
122 |     <edge source="8" target="15">
123 |       <data key="d4">100000</data>
124 |     </edge>
125 |     <edge source="9" target="15">
126 |       <data key="d4">100000</data>
127 |     </edge>
128 |     <edge source="10" target="15">
129 |       <data key="d4">100000</data>
130 |     </edge>
131 |     <edge source="11" target="16">
132 |       <data key="d4">10000</data>
133 |     </edge>
134 |     <edge source="12" target="16">
135 |       <data key="d4">10000</data>
136 |     </edge>
137 |     <edge source="13" target="17">
138 |       <data key="d4">100000</data>
139 |     </edge>
140 |     <edge source="14" target="18">
141 |       <data key="d4">100000</data>
142 |     </edge>
143 |     <edge source="15" target="19">
144 |       <data key="d4">100000</data>
145 |     </edge>
146 |     <edge source="16" target="20">
147 |       <data key="d4">10000</data>
148 |     </edge>
149 |     <edge source="17" target="18">
150 |       <data key="d4">100000</data>
151 |     </edge>
152 |     <edge source="17" target="19">
153 |       <data key="d4">100000</data>
154 |     </edge>
155 |     <edge source="18" target="20">
156 |       <data key="d4">100000</data>
157 |     </edge>
158 |     <edge source="18" target="19">
159 |       <data key="d4">100000</data>
160 |     </edge>
161 |     <edge source="19" target="20">
162 |       <data key="d4">100000</data>
163 |     </edge>
164 |     <data key="d0">HA-DRL PSN</data>
165 |   </graph>
166 | </graphml>
167 | 


--------------------------------------------------------------------------------
/PSNs/waxman_20_servers.graphml:
--------------------------------------------------------------------------------
  1 | <?xml version='1.0' encoding='utf-8'?>
  2 | <graphml xmlns="http://graphml.graphdrawing.org/xmlns" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://graphml.graphdrawing.org/xmlns http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd">
  3 |   <key id="d6" for="edge" attr.name="availBW" attr.type="long" />
  4 |   <key id="d5" for="edge" attr.name="BWcap" attr.type="long" />
  5 |   <key id="d4" for="node" attr.name="availRAM" attr.type="long" />
  6 |   <key id="d3" for="node" attr.name="RAMcap" attr.type="long" />
  7 |   <key id="d2" for="node" attr.name="availCPU" attr.type="long" />
  8 |   <key id="d1" for="node" attr.name="CPUcap" attr.type="long" />
  9 |   <key id="d0" for="node" attr.name="NodeType" attr.type="string" />
 10 |   <graph edgedefault="undirected">
 11 |     <node id="0">
 12 |       <data key="d0">server</data>
 13 |       <data key="d1">50</data>
 14 |       <data key="d2">50</data>
 15 |       <data key="d3">300</data>
 16 |       <data key="d4">300</data>
 17 |     </node>
 18 |     <node id="1">
 19 |       <data key="d0">server</data>
 20 |       <data key="d1">50</data>
 21 |       <data key="d2">50</data>
 22 |       <data key="d3">300</data>
 23 |       <data key="d4">300</data>
 24 |     </node>
 25 |     <node id="2">
 26 |       <data key="d0">server</data>
 27 |       <data key="d1">50</data>
 28 |       <data key="d2">50</data>
 29 |       <data key="d3">300</data>
 30 |       <data key="d4">300</data>
 31 |     </node>
 32 |     <node id="3">
 33 |       <data key="d0">server</data>
 34 |       <data key="d1">50</data>
 35 |       <data key="d2">50</data>
 36 |       <data key="d3">300</data>
 37 |       <data key="d4">300</data>
 38 |     </node>
 39 |     <node id="4">
 40 |       <data key="d0">server</data>
 41 |       <data key="d1">50</data>
 42 |       <data key="d2">50</data>
 43 |       <data key="d3">300</data>
 44 |       <data key="d4">300</data>
 45 |     </node>
 46 |     <node id="5">
 47 |       <data key="d0">server</data>
 48 |       <data key="d1">50</data>
 49 |       <data key="d2">50</data>
 50 |       <data key="d3">300</data>
 51 |       <data key="d4">300</data>
 52 |     </node>
 53 |     <node id="6">
 54 |       <data key="d0">server</data>
 55 |       <data key="d1">50</data>
 56 |       <data key="d2">50</data>
 57 |       <data key="d3">300</data>
 58 |       <data key="d4">300</data>
 59 |     </node>
 60 |     <node id="7">
 61 |       <data key="d0">server</data>
 62 |       <data key="d1">50</data>
 63 |       <data key="d2">50</data>
 64 |       <data key="d3">300</data>
 65 |       <data key="d4">300</data>
 66 |     </node>
 67 |     <node id="8">
 68 |       <data key="d0">server</data>
 69 |       <data key="d1">50</data>
 70 |       <data key="d2">50</data>
 71 |       <data key="d3">300</data>
 72 |       <data key="d4">300</data>
 73 |     </node>
 74 |     <node id="9">
 75 |       <data key="d0">server</data>
 76 |       <data key="d1">50</data>
 77 |       <data key="d2">50</data>
 78 |       <data key="d3">300</data>
 79 |       <data key="d4">300</data>
 80 |     </node>
 81 |     <node id="10">
 82 |       <data key="d0">server</data>
 83 |       <data key="d1">50</data>
 84 |       <data key="d2">50</data>
 85 |       <data key="d3">300</data>
 86 |       <data key="d4">300</data>
 87 |     </node>
 88 |     <node id="11">
 89 |       <data key="d0">server</data>
 90 |       <data key="d1">50</data>
 91 |       <data key="d2">50</data>
 92 |       <data key="d3">300</data>
 93 |       <data key="d4">300</data>
 94 |     </node>
 95 |     <node id="12">
 96 |       <data key="d0">server</data>
 97 |       <data key="d1">50</data>
 98 |       <data key="d2">50</data>
 99 |       <data key="d3">300</data>
100 |       <data key="d4">300</data>
101 |     </node>
102 |     <node id="13">
103 |       <data key="d0">server</data>
104 |       <data key="d1">50</data>
105 |       <data key="d2">50</data>
106 |       <data key="d3">300</data>
107 |       <data key="d4">300</data>
108 |     </node>
109 |     <node id="14">
110 |       <data key="d0">server</data>
111 |       <data key="d1">50</data>
112 |       <data key="d2">50</data>
113 |       <data key="d3">300</data>
114 |       <data key="d4">300</data>
115 |     </node>
116 |     <node id="15">
117 |       <data key="d0">server</data>
118 |       <data key="d1">50</data>
119 |       <data key="d2">50</data>
120 |       <data key="d3">300</data>
121 |       <data key="d4">300</data>
122 |     </node>
123 |     <node id="16">
124 |       <data key="d0">server</data>
125 |       <data key="d1">50</data>
126 |       <data key="d2">50</data>
127 |       <data key="d3">300</data>
128 |       <data key="d4">300</data>
129 |     </node>
130 |     <node id="17">
131 |       <data key="d0">server</data>
132 |       <data key="d1">50</data>
133 |       <data key="d2">50</data>
134 |       <data key="d3">300</data>
135 |       <data key="d4">300</data>
136 |     </node>
137 |     <node id="18">
138 |       <data key="d0">server</data>
139 |       <data key="d1">50</data>
140 |       <data key="d2">50</data>
141 |       <data key="d3">300</data>
142 |       <data key="d4">300</data>
143 |     </node>
144 |     <node id="19">
145 |       <data key="d0">server</data>
146 |       <data key="d1">50</data>
147 |       <data key="d2">50</data>
148 |       <data key="d3">300</data>
149 |       <data key="d4">300</data>
150 |     </node>
151 |     <edge source="0" target="2">
152 |       <data key="d5">10000</data>
153 |       <data key="d6">10000</data>
154 |     </edge>
155 |     <edge source="0" target="5">
156 |       <data key="d5">10000</data>
157 |       <data key="d6">10000</data>
158 |     </edge>
159 |     <edge source="0" target="10">
160 |       <data key="d5">10000</data>
161 |       <data key="d6">10000</data>
162 |     </edge>
163 |     <edge source="0" target="11">
164 |       <data key="d5">10000</data>
165 |       <data key="d6">10000</data>
166 |     </edge>
167 |     <edge source="0" target="14">
168 |       <data key="d5">10000</data>
169 |       <data key="d6">10000</data>
170 |     </edge>
171 |     <edge source="0" target="17">
172 |       <data key="d5">10000</data>
173 |       <data key="d6">10000</data>
174 |     </edge>
175 |     <edge source="0" target="18">
176 |       <data key="d5">10000</data>
177 |       <data key="d6">10000</data>
178 |     </edge>
179 |     <edge source="1" target="7">
180 |       <data key="d5">10000</data>
181 |       <data key="d6">10000</data>
182 |     </edge>
183 |     <edge source="1" target="11">
184 |       <data key="d5">10000</data>
185 |       <data key="d6">10000</data>
186 |     </edge>
187 |     <edge source="1" target="13">
188 |       <data key="d5">10000</data>
189 |       <data key="d6">10000</data>
190 |     </edge>
191 |     <edge source="1" target="16">
192 |       <data key="d5">10000</data>
193 |       <data key="d6">10000</data>
194 |     </edge>
195 |     <edge source="2" target="4">
196 |       <data key="d5">10000</data>
197 |       <data key="d6">10000</data>
198 |     </edge>
199 |     <edge source="2" target="5">
200 |       <data key="d5">10000</data>
201 |       <data key="d6">10000</data>
202 |     </edge>
203 |     <edge source="2" target="7">
204 |       <data key="d5">10000</data>
205 |       <data key="d6">10000</data>
206 |     </edge>
207 |     <edge source="2" target="13">
208 |       <data key="d5">10000</data>
209 |       <data key="d6">10000</data>
210 |     </edge>
211 |     <edge source="2" target="16">
212 |       <data key="d5">10000</data>
213 |       <data key="d6">10000</data>
214 |     </edge>
215 |     <edge source="3" target="5">
216 |       <data key="d5">10000</data>
217 |       <data key="d6">10000</data>
218 |     </edge>
219 |     <edge source="4" target="7">
220 |       <data key="d5">10000</data>
221 |       <data key="d6">10000</data>
222 |     </edge>
223 |     <edge source="4" target="12">
224 |       <data key="d5">10000</data>
225 |       <data key="d6">10000</data>
226 |     </edge>
227 |     <edge source="4" target="13">
228 |       <data key="d5">10000</data>
229 |       <data key="d6">10000</data>
230 |     </edge>
231 |     <edge source="4" target="14">
232 |       <data key="d5">10000</data>
233 |       <data key="d6">10000</data>
234 |     </edge>
235 |     <edge source="4" target="18">
236 |       <data key="d5">10000</data>
237 |       <data key="d6">10000</data>
238 |     </edge>
239 |     <edge source="4" target="19">
240 |       <data key="d5">10000</data>
241 |       <data key="d6">10000</data>
242 |     </edge>
243 |     <edge source="6" target="10">
244 |       <data key="d5">10000</data>
245 |       <data key="d6">10000</data>
246 |     </edge>
247 |     <edge source="6" target="18">
248 |       <data key="d5">10000</data>
249 |       <data key="d6">10000</data>
250 |     </edge>
251 |     <edge source="7" target="10">
252 |       <data key="d5">10000</data>
253 |       <data key="d6">10000</data>
254 |     </edge>
255 |     <edge source="7" target="13">
256 |       <data key="d5">10000</data>
257 |       <data key="d6">10000</data>
258 |     </edge>
259 |     <edge source="8" target="11">
260 |       <data key="d5">10000</data>
261 |       <data key="d6">10000</data>
262 |     </edge>
263 |     <edge source="8" target="14">
264 |       <data key="d5">10000</data>
265 |       <data key="d6">10000</data>
266 |     </edge>
267 |     <edge source="9" target="11">
268 |       <data key="d5">10000</data>
269 |       <data key="d6">10000</data>
270 |     </edge>
271 |     <edge source="10" target="15">
272 |       <data key="d5">10000</data>
273 |       <data key="d6">10000</data>
274 |     </edge>
275 |     <edge source="10" target="17">
276 |       <data key="d5">10000</data>
277 |       <data key="d6">10000</data>
278 |     </edge>
279 |     <edge source="10" target="19">
280 |       <data key="d5">10000</data>
281 |       <data key="d6">10000</data>
282 |     </edge>
283 |     <edge source="12" target="15">
284 |       <data key="d5">10000</data>
285 |       <data key="d6">10000</data>
286 |     </edge>
287 |     <edge source="13" target="14">
288 |       <data key="d5">10000</data>
289 |       <data key="d6">10000</data>
290 |     </edge>
291 |     <edge source="13" target="18">
292 |       <data key="d5">10000</data>
293 |       <data key="d6">10000</data>
294 |     </edge>
295 |     <edge source="14" target="19">
296 |       <data key="d5">10000</data>
297 |       <data key="d6">10000</data>
298 |     </edge>
299 |     <edge source="17" target="18">
300 |       <data key="d5">10000</data>
301 |       <data key="d6">10000</data>
302 |     </edge>
303 |     <edge source="17" target="19">
304 |       <data key="d5">10000</data>
305 |       <data key="d6">10000</data>
306 |     </edge>
307 |   </graph>
308 | </graphml>
309 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # DeepNetSlice
 2 | ### _A Deep Reinforcement Learning Open-Source Toolkit for Network Slice Placement_
 3 | 
 4 | ## Demo
 5 | ```bash
 6 | cd src
 7 | python demo.py
 8 | ```
 9 | 
10 | ## General training script structure
11 | ```python
12 | # create trainer object
13 | # It creates the model and the training and evaluation environments
14 | trainer = Trainer( ... )  # parameters description on trainer.py docstring
15 | 
16 | # create list of training callbacks.
17 | callbacks = [ ... ] # see 'src/callbacks/' or Stable Baselines3 docs
18 | 
19 | # train the model
20 | trainer.train(
21 |   tot_steps=<...>,  # number of overall training steps
22 |   callbacks=callbacks,
23 |   log_interval=<...>,  # number of steps between each log
24 |   wandb=<...>,  # (bool) whether to use wandb logging
25 | )
26 | ```
27 | 
28 | ## Directories structure
29 | - `NSPRs`: contains graphml files containing the definition of some Network Slice Placement Requests (NSPRs).
30 | These can also be created on the fly during training, with no need to read files.
31 | 
32 | - `PSNs`: contains graphml files containing the definition of some Physical Substrate Networks (PSNs) architectures.
33 | 
34 | - `src`: contains the source code of the toolkit.
35 |   
36 |   - `callbacks`: contains some training callbacks.
37 |   All callbacks in the library [Stable Baselines3](https://github.com/Stable-Baselines-Team/stable-baselines3-contrib) can be used as well.
38 |   
39 |   - `policies`: contains the implmentation of policy networks.
40 |   It follows the nomenclature of [Stable Baselines3](https://github.com/Stable-Baselines-Team/stable-baselines3-contrib) policies, where the policy nets are composed of a features extractor followed by a MlpExtractor.
41 |     - `features_extractors`: contains the implementation of features extractors modules.
42 |     - `mlp_extractors`: contains the implementation of mlp extractors modules.
43 |   
44 |   - `spaces`: contains the implementation of custom [Gym](https://github.com/openai/gym) / [Gymnasium](https://github.com/Farama-Foundation/Gymnasium) spaces.
45 | 
46 |   - `wrappers`: contains the implementation of custom environment wrappers.
47 |   Wrappers from [Stable Baselines3](https://github.com/Stable-Baselines-Team/stable-baselines3-contrib) can also be used.
48 | 
49 |   - `network_simulator.py`: contains the implementation of the environment.
50 | 
51 |   - `trainer.py`: contains the implementation of the trainer object (see demo).
52 | 
53 |   - `demo.py`: contains a demo script.
54 | 
55 | 
56 | ## Contributing
57 | Constributions are welcome! :rocket:
58 | 
59 | To contribute:
60 | - If you want to **work on an open issue**, comment on that issue before opening a PR.
61 | - If you want to implement a **new feature** or an **improvement**, write about it in the Discussions tab.
62 | 
63 | ## Reference
64 | ```
65 | Alex Pasquali, Vincenzo Lomonaco, Davide Bacciu and Federica Paganelli,
66 | Deep Reinforcement Learning for Network Slice Placement and the DeepNetSlice Toolkit,
67 | IEEE International Conference on Machine Learning for Communication and Networking, ICMLCN 2024, 5-8 May 2024, Stockholm, Sweden
68 | ```


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | wheel<=0.38.4
 2 | setuptools~=64.0
 3 | gym~=0.21.0
 4 | numpy~=1.23.4
 5 | networkx~=2.8.7
 6 | torch~=1.12.1
 7 | stable-baselines3~=1.7.0
 8 | sb3-contrib~=1.7.0
 9 | torch-geometric~=2.1.0
10 | tensorboard~=2.10.0
11 | wandb~=0.13.4
12 | 


--------------------------------------------------------------------------------
/src/callbacks/__init__.py:
--------------------------------------------------------------------------------
1 | from .acceptance_ratio_callbacks import AcceptanceRatioByStepsCallback, AcceptanceRatioByNSPRsCallback
2 | from .hparam_callback import HParamCallback
3 | from .psn_load_callback import PSNLoadCallback
4 | from .seen_nsprs_callback import SeenNSPRsCallback
5 | 


--------------------------------------------------------------------------------
/src/callbacks/acceptance_ratio_callbacks.py:
--------------------------------------------------------------------------------
  1 | from queue import Queue
  2 | import gym
  3 | import numpy as np
  4 | from stable_baselines3.common.callbacks import BaseCallback
  5 | from stable_baselines3.common.vec_env import VecEnv
  6 | 
  7 | 
  8 | class AcceptanceRatioByStepsCallback(BaseCallback):
  9 |     """
 10 |     A custom callback that derives from ``BaseCallback``.
 11 |     It logs the acceptance ratio on Tensorboard.
 12 | 
 13 |     :param env: environment
 14 |     :param name: name of the metric to log
 15 |     :param steps_per_tr_phase: number of steps that define a training phase.
 16 |         The acceptance ratio is logged once per training phase.
 17 |     :param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages
 18 |     """
 19 |     def __init__(
 20 |             self,
 21 |             env: gym.Env,
 22 |             name: str = "Acceptance ratio",
 23 |             steps_per_tr_phase: int = 1,
 24 |             verbose=0
 25 |     ):
 26 |         super(AcceptanceRatioByStepsCallback, self).__init__(verbose)
 27 |         self.env = env
 28 |         self.name = name
 29 |         self.steps_per_tr_phase = steps_per_tr_phase
 30 |         self.tot_to_subtract = None
 31 |         self.accepted_to_subtract = None
 32 |         # Those variables will be accessible in the callback
 33 |         # (they are defined in the base class)
 34 |         # The RL model
 35 |         # self.model = None  # type: BaseAlgorithm
 36 |         # An alias for self.model.get_env(), the environment used for training
 37 |         # self.training_env = None  # type: Union[gym.Env, VecEnv, None]
 38 |         # Number of time the callback was called
 39 |         # self.n_calls = 0  # type: int
 40 |         # self.num_timesteps = 0  # type: int
 41 |         # local and global variables
 42 |         # self.locals = None  # type: Dict[str, Any]
 43 |         # self.globals = None  # type: Dict[str, Any]
 44 |         # The logger object, used to report things in the terminal
 45 |         # self.logger = None  # stable_baselines3.common.logger
 46 |         # # Sometimes, for event callback, it is useful
 47 |         # # to have access to the parent object
 48 |         # self.parent = None  # type: Optional[BaseCallback]
 49 | 
 50 |     def _on_step(self) -> bool:
 51 |         """
 52 |         This method will be called by the model after each call to `env.step()`.
 53 | 
 54 |         For child callback (of an `EventCallback`), this will be called
 55 |         when the event is triggered.
 56 | 
 57 |         :return: (bool) If the callback returns False, training is aborted early.
 58 |         """
 59 |         if self.n_calls % self.steps_per_tr_phase == 0:
 60 |             accepted_nsprs_per_env = np.array(self.env.get_attr("accepted_nsprs"), dtype=np.float32)
 61 |             tot_nsprs_per_env = np.array(self.env.get_attr("tot_seen_nsprs"), dtype=np.float32)
 62 |             if self.tot_to_subtract is None:    # or self.accepted_to_subtract is None, either way
 63 |                 self.tot_to_subtract = np.zeros_like(tot_nsprs_per_env)
 64 |                 self.accepted_to_subtract = np.zeros_like(accepted_nsprs_per_env)
 65 |             accepted_nsprs_per_env -= self.accepted_to_subtract
 66 |             tot_nsprs_per_env -= self.tot_to_subtract
 67 |             accept_ratio_per_env = np.divide(accepted_nsprs_per_env,
 68 |                                              tot_nsprs_per_env,
 69 |                                              out=np.zeros_like(tot_nsprs_per_env),
 70 |                                              where=tot_nsprs_per_env != 0)
 71 |             overall_accept_ratio = np.mean(accept_ratio_per_env)
 72 |             self.logger.record(self.name, overall_accept_ratio)
 73 |             self.tot_to_subtract = tot_nsprs_per_env
 74 |             self.accepted_to_subtract = accepted_nsprs_per_env
 75 |         return True
 76 | 
 77 | 
 78 | class AcceptanceRatioByNSPRsCallback(BaseCallback):
 79 |     """
 80 |    A custom callback that derives from ``BaseCallback``.
 81 |    It logs the acceptance ratio on Tensorboard.
 82 | 
 83 |    :param env: environment
 84 |    :param name: name of the metric to log
 85 |    :param nsprs_per_tr_phase: number of NSPRs that define a training phase.
 86 |        The acceptance ratio is logged once per training phase.
 87 |    :param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages
 88 |    """
 89 |     def __init__(
 90 |             self,
 91 |             env: gym.Env,
 92 |             name: str = "Acceptance ratio",
 93 |             nsprs_per_tr_phase: int = 1000,
 94 |             verbose=0
 95 |     ):
 96 |         super().__init__(verbose)
 97 |         self.env = env
 98 |         self.name = name
 99 |         self.nsprs_per_tr_phase = nsprs_per_tr_phase
100 |         # num of seen NSPRs to subtract form the total number of seen NSPRs (per env)
101 |         self.seen_to_subtract = [0] * env.num_envs
102 |         # num of accepted NSPRs to subtract form the total number of accepted NSPRs (per env)
103 |         self.accepted_to_subtract = [0] * env.num_envs
104 |         # num of seen NSPRs last considered for logging (per env),
105 |         # used to ensure it loggs once per training phase
106 |         self.last_seen = [0] * env.num_envs
107 |         # num of accepted NSPRs during this training phase (per env)
108 |         self.accepted_this_training_phase = [0] * env.num_envs
109 |         # num of NSPRs seen during this training phase (per env)
110 |         self.seen_this_training_phase = [0] * env.num_envs
111 |         # acceptance ratio of each env
112 |         self.acceptance_ratios = [Queue() for _ in range(env.num_envs)]
113 |         # once an env is ready for logging, its cell is increased by 1,
114 |         # and it is decreased by 1 when the acceptance ratio is logged
115 |         self.ready_envs = np.zeros(shape=env.num_envs, dtype=int)
116 | 
117 |     def _on_step(self) -> bool:
118 |         if isinstance(self.env, VecEnv):
119 |             seen_nsprs = self.env.get_attr('tot_seen_nsprs')
120 |             accepted_nsprs = self.env.get_attr('accepted_nsprs')
121 |         else:
122 |             seen_nsprs = [self.env.tot_seen_nsprs]
123 |             accepted_nsprs = [self.env.accepted_nsprs]
124 |         
125 |         for env_idx in range(self.env.num_envs):
126 |             if seen_nsprs[env_idx] > self.last_seen[env_idx] and seen_nsprs[env_idx] % self.nsprs_per_tr_phase == 0:
127 |                 self.ready_envs[env_idx] += 1
128 |                 self.last_seen[env_idx] = seen_nsprs[env_idx]
129 |                 # NSPRs seen and accepted in this training phase
130 |                 seen_this_tr_phase = seen_nsprs[env_idx] - self.seen_to_subtract[env_idx]
131 |                 accepted_this_tr_phase = accepted_nsprs[env_idx] - self.accepted_to_subtract[env_idx]
132 |                 # update how much to subtract to get the quantities for next tr phase
133 |                 self.seen_to_subtract[env_idx] = seen_nsprs[env_idx]
134 |                 self.accepted_to_subtract[env_idx] = accepted_nsprs[env_idx]
135 |                 # compute acceptance ratio
136 |                 try:
137 |                     self.acceptance_ratios[env_idx].put(accepted_this_tr_phase / seen_this_tr_phase)
138 |                 except ZeroDivisionError:
139 |                     self.acceptance_ratios[env_idx].put(0.)
140 |         
141 |         if all(self.ready_envs):
142 |             ratios = [self.acceptance_ratios[env_idx].get() for env_idx in range(self.env.num_envs)]
143 |             self.logger.record(self.name, np.mean(ratios))
144 |             self.ready_envs -= 1
145 |         
146 |         return True
147 | 


--------------------------------------------------------------------------------
/src/callbacks/hparam_callback.py:
--------------------------------------------------------------------------------
 1 | from stable_baselines3.common.callbacks import BaseCallback
 2 | from stable_baselines3.common.logger import HParam
 3 | 
 4 | 
 5 | class HParamCallback(BaseCallback):
 6 |     def __init__(
 7 |             self,
 8 |             n_tr_envs: int = None,
 9 |             n_eval_envs: int = None,
10 |             tr_nsprs_per_ep: int = None,
11 |             tr_psn_load: float = None,
12 |             tr_max_ep_steps: int = None,
13 |             eval_nsprs_per_ep: int = None,
14 |             eval_psn_load: float = None,
15 |             eval_max_ep_steps: int = None,
16 |             vnfs_per_nsprs: int = None,
17 |             use_placement_state: bool = None,
18 |             use_heuristic: bool = False,
19 |             heu_kwargs: dict = None,
20 |     ):
21 |         """
22 |         Saves the hyperparameters and metrics at the start of the training,
23 |         and logs them to TensorBoard.
24 | 
25 |         :param n_tr_envs: number of training environments
26 |         """
27 |         super().__init__()
28 |         self.n_tr_envs = n_tr_envs
29 |         self.n_eval_envs = n_eval_envs
30 |         self.tr_nsprs_per_ep = tr_nsprs_per_ep
31 |         self.tr_psn_load = tr_psn_load
32 |         self.tr_max_ep_steps = tr_max_ep_steps
33 |         self.eval_nsprs_per_ep = eval_nsprs_per_ep
34 |         self.eval_psn_load = eval_psn_load
35 |         self.eval_max_ep_steps = eval_max_ep_steps
36 |         self.vnfs_per_nspr = vnfs_per_nsprs
37 |         self.use_placement_state = use_placement_state,
38 |         self.use_heuristic = use_heuristic
39 |         self.heu_kwargs = heu_kwargs if heu_kwargs is not None else {}
40 |         if 'heu_class' in self.heu_kwargs:
41 |             self.heu_class = self.heu_kwargs['heu_class'](None, None, None).__class__.__name__
42 |         else:
43 |             self.heu_class = None
44 | 
45 |     def _on_training_start(self) -> None:
46 |         try:
47 |             gcn_layers_dims = str(self.model.policy.features_extractor.gcn_layers_dims)
48 |         except AttributeError:
49 |             gcn_layers_dims = str(self.model.policy.gcn_layers_dims)
50 | 
51 |         hparam_dict = {
52 |             "algorithm": self.model.__class__.__name__,
53 |             "n training envs": self.n_tr_envs,
54 |             "n eval envs": self.n_eval_envs,
55 |             "n steps before update": self.model.n_steps,
56 |             "learning rate": self.model.learning_rate,
57 |             "gamma": self.model.gamma,
58 |             "entropy coefficient": self.model.ent_coef,
59 |             "NSPRs per training episode": self.tr_nsprs_per_ep,
60 |             "max steps per training episode": self.tr_max_ep_steps,
61 |             "PSN load (training)": self.tr_psn_load,
62 |             "NSPRs per eval episode": self.eval_nsprs_per_ep,
63 |             "PSN load (eval)": self.eval_psn_load,
64 |             "max steps per eval episode": self.eval_max_ep_steps,
65 |             "VNFs/NSPR": self.vnfs_per_nspr,
66 |             "GCN layers dimensions": gcn_layers_dims,
67 |             "Use placement state": str(self.use_placement_state),
68 |             "Use heuristic": self.use_heuristic,
69 |             "Heuristic class": self.heu_class,
70 |             "heu's num sampled servers": self.heu_kwargs.get("n_servers_to_sample", None),
71 |             "heu's eta": self.heu_kwargs.get("eta", None),
72 |             "heu's xi": self.heu_kwargs.get("xi", None),
73 |             "heu's beta": self.heu_kwargs.get("beta", None),
74 |         }
75 |         # define the metrics that will appear in the `HPARAMS` Tensorboard tab by referencing their tag
76 |         # Tensorboard will find & display metrics from the `SCALARS` tab
77 |         metric_dict = {
78 |             "Acceptance ratio": 0,
79 |             "Eval acceptance ratio": 0,
80 |             "eval/mean_reward": 0,
81 |             "rollout/ep_rew_mean": 0,
82 |             "train/entropy_loss": 0,
83 |             "train/policy_loss": 0,
84 |             "train/value_loss": 0,
85 |         }
86 |         self.logger.record(
87 |             "hparams",
88 |             HParam(hparam_dict, metric_dict),
89 |             exclude=("stdout", "log", "json", "csv"),
90 |         )
91 | 
92 |     def _on_step(self) -> bool:
93 |         return True
94 | 


--------------------------------------------------------------------------------
/src/callbacks/psn_load_callback.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | 
 3 | import gym
 4 | import numpy as np
 5 | from stable_baselines3.common.callbacks import BaseCallback
 6 | 
 7 | 
 8 | class PSNLoadCallback(BaseCallback):
 9 |     """
10 |     Class for logging the load of the PSN.
11 | 
12 |     :param env: environment
13 |     :param freq: logging frequency (in number of steps)
14 |     :param cpu: if True, track CPU load
15 |     :param ram: if True, track RAM load
16 |     :param bw: if True, track BW load
17 |     :param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages
18 |     """
19 |     def __init__(
20 |             self,
21 |             env: gym.Env,
22 |             freq: int,
23 |             cpu: bool = True,
24 |             ram: bool = True,
25 |             bw: bool = True,
26 |             verbose: int = 0
27 |     ):
28 |         super(PSNLoadCallback, self).__init__(verbose)
29 |         self.env = env
30 |         self.freq = freq
31 |         self.cpu, self.ram, self.bw = cpu, ram, bw
32 | 
33 |     def _on_step(self) -> bool:
34 |         if self.n_calls % self.freq == 0:
35 |             cpu_loads, ram_loads, bw_loads = [], [], []
36 |             observations = self.env.get_attr('obs_dict')
37 |             for e, obs in enumerate(observations):
38 |                 # get the available CPU and RAM for each server
39 |                 serv_cpu_avails, serv_ram_avails = [], []
40 |                 for idx in self.env.get_attr('servers_map_idx_id')[e].keys():
41 |                     serv_cpu_avails.append(obs['cpu_avails'][idx])
42 |                     serv_ram_avails.append(obs['ram_avails'][idx])
43 |                 avail_cpu_perc = np.sum(serv_cpu_avails) * self.env.get_attr('max_cpu')[e] / self.env.get_attr('tot_cpu_cap')[e]
44 |                 avail_ram_perc = np.sum(serv_ram_avails) * self.env.get_attr('max_ram')[e] / self.env.get_attr('tot_ram_cap')[e]
45 |                 cpu_loads.append(1. - avail_cpu_perc)
46 |                 ram_loads.append(1. - avail_ram_perc)
47 |                 # get the available BW for each link
48 |                 link_bw_avails_perc = []
49 |                 for link in self.env.get_attr('psn')[e].edges.values():
50 |                     link_bw_avails_perc.append(link['availBW'] / link['BWcap'])
51 |                 bw_loads.append(1. - np.mean(link_bw_avails_perc))
52 |             try:
53 |                 if self.cpu:
54 |                     avg_cpu_load = np.mean(cpu_loads)
55 |                     self.logger.record("Average CPU load of training envs", avg_cpu_load)
56 |                 if self.ram:
57 |                     avg_ram_load = np.mean(ram_loads)
58 |                     self.logger.record("Average RAM load of training envs", avg_ram_load)
59 |                 if self.bw:
60 |                     avg_bw_load = np.mean(bw_loads)
61 |                     self.logger.record("Average BW load of training envs", avg_bw_load)
62 |                 if self.verbose > 0:
63 |                     try:
64 |                         print(f"Average CPU load of training envs: {avg_cpu_load}")
65 |                         print(f"Average RAM load of training envs: {avg_ram_load}")
66 |                         print(f"Average BW load of training envs: {avg_bw_load}")
67 |                     except NameError:
68 |                         # in case some variables are not defined. It means we're not tracking that load
69 |                         pass
70 |             except AttributeError:
71 |                 warnings.warn("No logger for resources load callback, data not being logged")
72 | 
73 |         return True
74 | 


--------------------------------------------------------------------------------
/src/callbacks/seen_nsprs_callback.py:
--------------------------------------------------------------------------------
 1 | from stable_baselines3.common.callbacks import BaseCallback
 2 | import gym
 3 | import numpy as np
 4 | 
 5 | 
 6 | class SeenNSPRsCallback(BaseCallback):
 7 |     """
 8 |     Class for logging the number of seen NSPRs so far.
 9 | 
10 |     It logs the average number of seen NSPRs for each environment.
11 |     The average is chosen, instead of the sum, because the loss is based on the
12 |     average of the "values" in the various steps:
13 |         - policy_loss = -(advantages * log_prob).mean()
14 |         - value_loss = F.mse_loss(rollout_data.returns, values)
15 |         - entropy_loss = -th.mean(entropy)
16 |     If there are multiple parallel envs, the "values" of each env are flattened, 
17 |     and again the average is computed for the loss.
18 |     Therefore, we don't have more updates if we have more envs, just more precise.
19 |     If 2 envs have seen 10 NSPRs, it's not like an env has seen 20 (in terms of updates and steps).
20 | 
21 |     :param env: environment
22 |     :param freq: logging frequency (in number of steps)
23 |     :param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages
24 |     """
25 |     def __init__(
26 |             self,
27 |             env: gym.Env,
28 |             freq: int = 1,
29 |             verbose: int = 0
30 |     ):
31 |         super().__init__(verbose)
32 |         self.env = env
33 |         self.freq = freq
34 | 
35 |     def _on_step(self) -> bool:
36 |         if self.n_calls % self.freq == 0:
37 |             # log the number of seen NSPRs
38 |             seen_nsprs_per_env = self.env.get_attr('tot_seen_nsprs')
39 |             # why the mean and not the sum, you ask? Read the docstring of the class
40 |             avg_seen_nsprs = int(round(np.mean(seen_nsprs_per_env)))
41 |             self.logger.record("Avg seen NSPRs per env", avg_seen_nsprs)
42 |             if self.verbose > 0:
43 |                 print(f"Average seen NSPRs per env: {avg_seen_nsprs}")
44 |         return True


--------------------------------------------------------------------------------
/src/demo.py:
--------------------------------------------------------------------------------
 1 | from callbacks.acceptance_ratio_callbacks import AcceptanceRatioByNSPRsCallback
 2 | from callbacks.hparam_callback import HParamCallback
 3 | from callbacks.psn_load_callback import PSNLoadCallback
 4 | from callbacks.seen_nsprs_callback import SeenNSPRsCallback
 5 | from trainer import Trainer
 6 | from wrappers.reset_with_load import ResetWithRealisticLoad
 7 | from stable_baselines3.common.callbacks import EvalCallback
 8 | from wandb.integration.sb3 import WandbCallback
 9 | 
10 | 
11 | if __name__ == '__main__':
12 |     # create trainer object.
13 |     # It creates the model and the training and evaluation environments.
14 |     trainer = Trainer(
15 |         psn_path="../PSNs/hadrl_1-16_5-10_15-4.graphml",
16 |         n_tr_envs=20,
17 |         load_perc=0.8,
18 |         time_limit=False,
19 |         max_ep_steps=1000,
20 |         reset_load_class=ResetWithRealisticLoad,
21 |         generate_nsprs=True,
22 |         nsprs_per_ep=1,
23 |         vnfs_per_nspr=5,
24 |         always_one=True,
25 |         seed=12,
26 |         tensorboard_log="../tensorboard",
27 |         create_eval_env=True
28 |     )
29 |     tr_env = trainer.tr_env
30 |     eval_env = trainer.eval_env
31 | 
32 |     # training callbacks
33 |     list_of_callbacks = [
34 |         AcceptanceRatioByNSPRsCallback(
35 |             env=tr_env,
36 |             name="Train acceptance ratio (by NSPRs)",
37 |             nsprs_per_tr_phase=1000,
38 |             verbose=2
39 |         ),
40 | 
41 |         EvalCallback(
42 |             eval_env=eval_env,
43 |             n_eval_episodes=1000,
44 |             warn=True,
45 |             eval_freq=5_000,
46 |             deterministic=True,
47 |             verbose=2,
48 |             callback_after_eval=AcceptanceRatioByNSPRsCallback(
49 |                 env=eval_env,
50 |                 name="Eval acceptance ratio (by NSPRs)",
51 |                 nsprs_per_tr_phase=1,  # must be 1 for eval (default value)
52 |                 verbose=2
53 |             )
54 |         ),
55 | 
56 |         PSNLoadCallback(env=tr_env, freq=500, verbose=1),
57 | 
58 |         # SeenNSPRsCallback(env=tr_env, freq=100, verbose=1),
59 |     ]
60 | 
61 |     trainer.train(
62 |         tot_steps=10_000_000,
63 |         callbacks=list_of_callbacks,
64 |     )
65 | 


--------------------------------------------------------------------------------
/src/eval_script.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from tqdm import tqdm
 3 | from stable_baselines3 import A2C
 4 | from stable_baselines3.common.env_util import make_vec_env
 5 | 
 6 | from callbacks import PSNLoadCallback
 7 | from utils import make_env
 8 | from wrappers.reset_with_load import ResetWithRealisticLoad
 9 | 
10 | if __name__ == '__main__':
11 |     # load model
12 |     model = A2C.load(
13 |         path="/root/NSPR-simulator/wandb/run-20230103_155854-3o0vtz6x/files/model.zip",
14 |         env=None,
15 |         device='cpu',
16 |         print_system_info=True,
17 |         force_reset=True,   # True as default -> avoids unexpected behavior
18 |     )
19 | 
20 |     # re-create env
21 |     env = make_vec_env(
22 |         env_id=make_env,
23 |         n_envs=1,
24 |         env_kwargs=dict(
25 |             psn_path="../PSNs/waxman_20_servers.graphml",
26 |             base_env_kwargs=dict(accumulate_reward=True),
27 |             time_limit=True,
28 |             time_limit_kwargs=dict(max_episode_steps=1000),
29 |             hadrl_nsprs=True,
30 |             hadrl_nsprs_kwargs=dict(
31 |                 nsprs_per_ep=1,
32 |                 vnfs_per_nspr=5,
33 |                 always_one=True
34 |             ),
35 |             # hadrl_nsprs_kwargs=dict(
36 |             #     nsprs_per_ep=None,
37 |             #     load=0.5
38 |             # )
39 |             reset_load_class=ResetWithRealisticLoad,
40 |             reset_load_kwargs = dict(cpu_load=0.5),
41 |             placement_state=True,
42 |             dynamic_connectivity=True,
43 |             dynamic_connectivity_kwargs=dict(link_bw=10_000),
44 |         ),
45 |         seed=12,
46 |     )
47 | 
48 |     # cpu_load_callback = PSNLoadCallback(env, freq=300, verbose=2)
49 |     # cpu_load_callback.init_callback(model)
50 | 
51 |     # evaluate model
52 |     obs = env.reset()
53 |     accepted = seen = 0
54 |     # accept_ratio_per_ep = []
55 |     tot_nsprs = 10000
56 |     pbar = tqdm(total=tot_nsprs)  # progerss bar
57 |     while seen < tot_nsprs:
58 |         action, _ = model.predict(obs, deterministic=True)
59 |         obs, rewards, done, info = env.step(action)
60 |         # cpu_load_callback.on_step()
61 |         # acceptance ratio
62 |         if rewards[0] != 0.0:
63 |             seen += 1
64 |             pbar.update(1)
65 |             if rewards[0] > 0.0:
66 |                 accepted += 1
67 |         if done:
68 |             # if seen != 0.:
69 |             #     cur_ep_accept_ratio = accepted / seen
70 |             #     accept_ratio_per_ep.append(cur_ep_accept_ratio)
71 |             #     print(f"Current episode's acceptance ratio: {cur_ep_accept_ratio}")
72 |             # accepted = seen = 0
73 |             obs = env.reset()
74 | 
75 |     # print(f"Acceptance ratio: {np.mean(accept_ratio_per_ep)}")
76 |     print(f"Acceptance ratio: {accepted / seen}")
77 | 


--------------------------------------------------------------------------------
/src/heuristic_layers.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | from typing import Dict
  3 | import math
  4 | 
  5 | import gym
  6 | import networkx as nx
  7 | import numpy as np
  8 | import torch as th
  9 | from torch import nn
 10 | 
 11 | 
 12 | class P2CLoadBalanceHeuristic(nn.Module):
 13 |     """ Layer executing the P2C heuristic """
 14 |     name = "P2C load balance heuristic"
 15 | 
 16 |     def __init__(
 17 |             self,
 18 |             action_space: gym.spaces.Space,
 19 |             servers_map_idx_id: Dict[int, int],
 20 |             psn: nx.Graph,
 21 |             n_servers_to_sample: int = 2,
 22 |             eta: float = 0.,
 23 |             xi: float = 1.,
 24 |             beta: float = 1.,  # TODO: when not 1, could cause NaNs
 25 |             **kwargs
 26 |     ):
 27 |         """ Constructor
 28 | 
 29 |         :param action_space: Action space
 30 |         :param servers_map_idx_id: map (dict) between servers indexes (agent's actions) and their ids
 31 |         :param psn: the env's physical substrate network
 32 |         :param eta: hyperparameter of the P2C heuristic
 33 |         :param xi: hyperparameter of the P2C heuristic
 34 |         :param beta: hyperparameter of the P2C heuristic
 35 |         """
 36 |         super().__init__()
 37 |         self.action_space = action_space
 38 |         self.servers_map_idx_id = servers_map_idx_id
 39 |         self.psn = psn
 40 |         self.n_servers_to_sample = n_servers_to_sample
 41 |         self.eta, self.xi, self.beta = eta, xi, beta
 42 | 
 43 |     def forward(self, x: th.Tensor, obs: th.Tensor) -> th.Tensor:
 44 |         n_envs = x.shape[0]
 45 |         max_values, max_idxs = th.max(x, dim=1)
 46 |         H = th.zeros_like(x)
 47 |         heu_selected_servers = self.HEU(obs, self.n_servers_to_sample)
 48 |         if th.all(heu_selected_servers == -1):
 49 |             return H  # it means no selected action by the heuristic
 50 |         for e in range(n_envs):
 51 |             heu_action = heu_selected_servers[e, :].item()
 52 |             H[e, heu_action] = max_values[e] - x[e, heu_action] + self.eta
 53 |         out = x + self.xi * th.pow(H, self.beta)
 54 |         return out
 55 | 
 56 |     def HEU(self, obs: th.Tensor, n_servers_to_sample: int) -> th.Tensor:
 57 |         """ P2C heuristic to select the servers where to place the current VNFs.
 58 |         Selects one server for each environment (in case of vectorized envs).
 59 |         :param obs: Observation
 60 |         :param n_servers_to_sample: number of servers to sample
 61 |         :return: indexes of the selected servers
 62 |         """
 63 |         n_envs = obs['bw_avails'].shape[0]
 64 |         indexes = th.empty(n_envs, n_servers_to_sample, dtype=th.int)
 65 |         req_cpu = obs['cur_vnf_cpu_req']
 66 |         req_ram = obs['cur_vnf_ram_req']
 67 |         load_balances = th.empty(n_envs, n_servers_to_sample)
 68 |         for e in range(n_envs):
 69 |             for s in range(n_servers_to_sample):
 70 |                 # actions (indexes of the servers in the servers list)
 71 |                 indexes[e, s] = self.action_space.sample()
 72 |                 # servers ids
 73 |                 node_id = self.servers_map_idx_id[indexes[e, s].item()]
 74 |                 # actual servers (nodes in the graph)
 75 |                 node = self.psn.nodes[node_id]
 76 |                 # compute the load balance of each server when placing the VNF
 77 |                 cpu_load_balance = (node['availCPU'] - req_cpu[e]) / node['CPUcap']
 78 |                 ram_load_balance = (node['availRAM'] - req_ram[e]) / node['RAMcap']
 79 |                 load_balances[e, s] = cpu_load_balance + ram_load_balance
 80 | 
 81 |         # return the best server for each environment (the indexes)
 82 |         winners = th.argmax(load_balances, dim=1, keepdim=True)
 83 |         return th.gather(indexes, 0, winners)
 84 | 
 85 | 
 86 | class HADRLHeuristic(nn.Module):
 87 |     def __init__(
 88 |             self,
 89 |             action_space: gym.spaces.Space,
 90 |             servers_map_idx_id: Dict[int, int],
 91 |             psn: nx.Graph,
 92 |             bw_req_per_vl: int = 2000,
 93 |             n_servers_to_sample: int = 2,
 94 |             eta: float = 0.,
 95 |             xi: float = 1.,
 96 |             beta: float = 1.,  # TODO: when not 1, could cause NaNs
 97 |             **kwargs
 98 |     ):
 99 |         """ Constructor
100 | 
101 |         :param action_space: Action space
102 |         :param servers_map_idx_id: map (dict) between servers indexes (agent's actions) and their ids
103 |         :param psn: the env's physical substrate network
104 |         :param eta: hyperparameter of the P2C heuristic
105 |         :param xi: hyperparameter of the P2C heuristic
106 |         :param beta: hyperparameter of the P2C heuristic
107 |         """
108 |         super().__init__()
109 |         self.action_space = action_space
110 |         self.servers_map_idx_id = servers_map_idx_id
111 |         self.psn = psn
112 |         self.bw_req_per_vl = bw_req_per_vl
113 |         self.n_servers_to_sample = n_servers_to_sample
114 |         self.eta, self.xi, self.beta = eta, xi, beta
115 |         self.prev_selected_servers = None
116 |         self.n_envs = None
117 | 
118 |     def forward(self, x: th.Tensor, obs: th.Tensor) -> th.Tensor:
119 |         self.n_envs = x.shape[0]
120 |         if self.prev_selected_servers is None or self.n_envs != self.prev_selected_servers.shape[0]:
121 |             self.prev_selected_servers = -th.ones(self.n_envs, dtype=th.int)
122 |         max_values, max_idxs = th.max(x, dim=1)
123 |         H = th.zeros_like(x)
124 |         heu_selected_servers = self.HEU(obs, self.n_servers_to_sample)
125 |         if th.all(heu_selected_servers == -1):
126 |             # it means no selected action by the heuristic
127 |             return H
128 |         for e in range(self.n_envs):
129 |             heu_action = heu_selected_servers[e, :].item()
130 |             H[e, heu_action] = max_values[e] - x[e, heu_action] + self.eta
131 |         out = x + self.xi * th.pow(H, self.beta)
132 |         return out
133 | 
134 |     def HEU(self, obs: th.Tensor, n_servers_to_sample: int) -> th.Tensor:
135 |         """ P2C heuristic to select the servers where to place the current VNFs.
136 |         Selects one server for each environment (in case of vectorized envs).
137 |         :param obs: Observation
138 |         :param n_servers_to_sample: number of servers to sample
139 |         :return: indexes of the selected servers
140 |         """
141 |         indexes = th.empty(self.n_envs, n_servers_to_sample, dtype=th.int)
142 |         path_lengths = th.zeros(self.n_envs, n_servers_to_sample)
143 |         all_actions = list(range(self.action_space.n))
144 |         for e in range(self.n_envs):
145 |             # random permutation of the actions
146 |             all_actions = np.random.permutation(all_actions)
147 |             for s in range(n_servers_to_sample):
148 |                 # instead of selecting first all the feasible servers and then
149 |                 # sampling on them, we first create a list of all the actions
150 |                 # (i.e. servers) in random order, then we start going through
151 |                 # the list and pick the first action which is feasible.
152 |                 # This way we don't run through all the servers avery time
153 |                 for i in range(s, len(all_actions)):
154 |                     a = all_actions[i]
155 |                     if self.action_is_feasible(a, obs, e):
156 |                         indexes[e, s] = a
157 |                         break
158 |                     # if no action is feasible, return no choice form the heuristic
159 |                     # (i.e. tensor of -1's)
160 |                     if i == len(all_actions) - 1:
161 |                         return -th.ones(self.n_envs, 1)
162 | 
163 |                 # server ID
164 |                 server_id = self.servers_map_idx_id[indexes[e, s].item()]
165 | 
166 |                 if self.prev_selected_servers[e] == -1:
167 |                     path_lengths[e, s] = -math.inf
168 |                 else:
169 |                     # if the server was the one selected for the prev VNF, choose it
170 |                     if self.prev_selected_servers[e] == server_id:
171 |                         path_lengths[e, s] = -math.inf
172 |                         # self.prev_selected_servers[e] = server_id
173 |                     else:
174 |                         # evaluate bandwidth consumption when placing the current VNF on this server
175 |                         path = nx.shortest_path(G=self.psn,
176 |                                                 source=self.prev_selected_servers[e].item(),
177 |                                                 target=server_id,
178 |                                                 weight=self.compute_link_weight,
179 |                                                 method='dijkstra')
180 |                         path_lengths[e, s] = len(path)
181 | 
182 |         # return the best server for each environment (the indexes)
183 |         winners = th.argmin(path_lengths, dim=1, keepdim=True)
184 |         selected_servers = th.gather(indexes, 1, winners)
185 |         self.prev_selected_servers = selected_servers.squeeze(dim=1)
186 |         return selected_servers
187 | 
188 |     @staticmethod
189 |     def action_is_feasible(a: int, obs: th.Tensor, env_idx: int):
190 |         """ Check if it's feasible to place the current VNF on a specific server
191 | 
192 |         1. if a server has enough CPU and RAM to host this VNF and the next one
193 |         (all VNFs are assumed to have identical requirements, if this is not the
194 |         case, then you can see this as "if a server has enough CPU and RAM to
195 |         host double the requirements of this VNF", like a greedy safety margin),
196 |         then it is eligible.
197 | 
198 |         2. if a server has enough CPU and RAM to host only this VNF, then if it
199 |         has enough bandwidth in its outgoing links to host the connection with
200 |         the neighboring VNFs, then it is eligible.
201 | 
202 |         3. if a server does not have enough CPU or RAM to host the current VNF,
203 |         then it is NOT eligible.
204 | 
205 |         :param a: action, i.e. a server index
206 |         :param obs: instance of an observation from the environment
207 |         :param env_idx: index of the environment (in case of vectorized envs)
208 |         :return: true if the action is feasible, false otherwise
209 |         """
210 |         req_cpu = obs['cur_vnf_cpu_req'][env_idx].item()
211 |         req_ram = obs['cur_vnf_ram_req'][env_idx].item()
212 |         req_bw = obs['cur_vnf_bw_req'][env_idx].item()
213 |         avail_cpu = obs['cpu_avails'][env_idx][a].item()
214 |         avail_ram = obs['ram_avails'][env_idx][a].item()
215 |         avail_bw = obs['bw_avails'][env_idx][a]
216 | 
217 |         if (avail_cpu >= 2 * req_cpu and avail_ram >= 2 * req_ram) or \
218 |                 (avail_cpu >= req_cpu and avail_ram >= req_ram and avail_bw >= req_bw):
219 |             return True
220 | 
221 |         return False
222 | 
223 |     def compute_link_weight(self, source: int, target: int, link: dict):
224 |         return 1 if link['availBW'] >= self.bw_req_per_vl else math.inf
225 | 


--------------------------------------------------------------------------------
/src/network_simulator.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import math
  3 | from typing import Optional, Union, Tuple
  4 | 
  5 | import gym
  6 | import networkx as nx
  7 | import numpy as np
  8 | 
  9 | from gym.spaces import Dict, Box, Discrete
 10 | 
 11 | import reader
 12 | 
 13 | GymObs = Union[Tuple, dict, np.ndarray, int]
 14 | 
 15 | 
 16 | class NetworkSimulator(gym.Env):
 17 |     """ Class implementing the network simulator (RL environment) """
 18 | 
 19 |     def __init__(
 20 |             self,
 21 |             psn_file: str,
 22 |             nsprs_path: str = "../NSPRs/",
 23 |             nsprs_per_episode: int = None,
 24 |             nsprs_max_duration: int = 100,
 25 |             accumulate_reward: bool = True,
 26 |             discount_acc_rew: bool = True,
 27 |             perc_avail_nodes: Optional[float] = 1.
 28 |     ):
 29 |         """ Constructor
 30 |         :param psn_file: GraphML file containing the definition of the PSN
 31 |         :param nsprs_path: either directory with the GraphML files defining the NSPRs or path to a single GraphML file
 32 |         :param nsprs_per_episode: max number of NSPRs to be evaluated in each episode. If None, there is no limit.
 33 |         :param nsprs_max_duration: (optional) max duration of the NSPRs.
 34 |         :param accumulate_reward: if true, the reward is accumulated and given to the agent only after each NSPRs
 35 |         :param discount_acc_rew: if true, an increasing discount factor is applied to the acceptance reward during each NSPR.
 36 |             It starts from the inverse of the number of VNFs in the NSPR and grows to 1.
 37 |         :param perc_avail_nodes: in case some action masking is implemented (i.e., env wrapped in ActionMasker
 38 |             wrapper from sbe-contrib), it specifies the percentage of available nodes we.r.t. the total.
 39 |         """
 40 |         super(NetworkSimulator, self).__init__()
 41 | 
 42 |         self.psn_file = psn_file
 43 |         self.psn = reader.read_psn(graphml_file=psn_file)  # physical substrate network
 44 |         self.nsprs_path = nsprs_path
 45 |         self.nsprs_per_episode = nsprs_per_episode
 46 |         self.accumulate_reward = accumulate_reward
 47 |         self.nsprs_seen_in_cur_ep = 0
 48 |         self.nsprs_max_duration = nsprs_max_duration
 49 |         self.done = False
 50 |         self.nsprs = None  # will be initialized in the reset method
 51 |         self.waiting_nsprs = []  # list of NSPRs that arrived already and are waiting to be evaluated
 52 |         self.cur_nspr = None  # used to keep track of the current NSPR being evaluated
 53 |         self.cur_nspr_unplaced_vnfs_ids = []  # used to keep track of the VNFs' IDs of the current NSPR that haven't been placed yet
 54 |         self.cur_vnf_id = None  # used to keep track of the current VNF being evaluated
 55 |         self._cur_vl_reqBW = 0  # auxiliary attribute needed in method 'self.compute_link_weight'
 56 |         self.time_step = 0  # keep track of current time step
 57 |         self.ep_number = 0  # keep track of current episode number
 58 |         self.tot_seen_nsprs = 0  # keep track of the number of NSPRs seen so far
 59 |         self.accepted_nsprs = 0  # for the overall acceptance ratio
 60 |         self.discount_acc_rew = discount_acc_rew    # whether or not to discount the acceptance reward
 61 |         self.acc_rew_disc_fact = 1.     # current discount factor for the acceptance reward
 62 |         self.base_acc_rew_disc_fact = 1.    # base discount factor for the acceptance reward
 63 | 
 64 |         # map (dict) between IDs of PSN's nodes and their respective index (see self._init_map_id_idx's docstring)
 65 |         nodes_ids = list(self.psn.nodes.keys())
 66 |         self.map_id_idx = {nodes_ids[idx]: idx for idx in range(len(nodes_ids))}
 67 | 
 68 |         # map (dict) between an index of a list (incrementing int) and the ID of a server
 69 |         servers_ids = [node_id for node_id, node in self.psn.nodes.items()
 70 |                        if node['NodeType'] == 'server']
 71 |         self.servers_map_idx_id = {idx: servers_ids[idx] for idx in range(len(servers_ids))}
 72 | 
 73 |         # partial rewards to be accumulated across the steps of evaluation of a single NSPR
 74 |         self._acceptance_rewards = []
 75 |         self._resource_consumption_rewards = []
 76 |         self._cur_resource_consumption_rewards = []
 77 |         self._load_balance_rewards = []
 78 | 
 79 |         # reward values for specific outcomes
 80 |         self.rval_accepted_vnf = 100
 81 |         self.rval_rejected_vnf = -100
 82 | 
 83 |         # Action space and observation space (gym.Env required attributes)
 84 |         ONE_BILLION = 1_000_000_000  # constant for readability
 85 |         n_nodes = len(self.psn.nodes)
 86 |         # action space = number of servers
 87 |         self.action_space = Discrete(len(servers_ids))
 88 |         self.observation_space = Dict({
 89 |             # PSN STATE
 90 |             'cpu_avails': Box(low=0., high=1., shape=(n_nodes,), dtype=np.float32),
 91 |             'ram_avails': Box(low=0., high=1., shape=(n_nodes,), dtype=np.float32),
 92 |             # for each physical node, sum of the BW of the physical links connected to it
 93 |             'bw_avails': Box(low=0., high=1., shape=(n_nodes,), dtype=np.float32),
 94 |             # for each physical node, number of VNFs of the current NSPR placed on it
 95 |             'placement_state': Box(low=0, high=ONE_BILLION, shape=(n_nodes,), dtype=int),
 96 | 
 97 |             # NSPR STATE
 98 |             # note: apparently it's not possible to pass "math.inf" or "sys.maxsize" as a gym.spaces.Box's high value
 99 |             'cur_vnf_cpu_req': Box(low=0, high=ONE_BILLION, shape=(1,), dtype=np.float32),
100 |             'cur_vnf_ram_req': Box(low=0, high=ONE_BILLION, shape=(1,), dtype=np.float32),
101 |             # sum of the required BW of each VL connected to the current VNF
102 |             'cur_vnf_bw_req': Box(low=0, high=ONE_BILLION, shape=(1,), dtype=np.float32),
103 |             'vnfs_still_to_place': Box(low=0, high=ONE_BILLION, shape=(1,), dtype=int),
104 |         })
105 |         self._empty_psn_obs_dict = None     # used to store the observation resulting from an empty PSN
106 |         self.obs_dict = self._init_obs_dict()     # used to store the current observation
107 | 
108 |         # action mask determining available actions. Init with all actions are available (it will be update in 'reset')
109 |         self._action_mask = np.ones(shape=(len(servers_ids),), dtype=bool)
110 |         assert 0. <= perc_avail_nodes <= 1.
111 |         self.perc_avail_nodes = perc_avail_nodes
112 | 
113 |     @property
114 |     def cur_vnf(self):
115 |         return self.cur_nspr.nodes[self.cur_vnf_id] if self.cur_nspr is not None else None
116 |     
117 |     def get_action_mask(self, env):
118 |         # 'action_mask' needs to be callable to be passed ActionMasker wrapper
119 |         # note: env needs to be an argument for compatibility, but in this case it's useless
120 |         return self._action_mask
121 |     
122 |     def reset_partial_rewards(self):
123 |         """ Resets the partial rewards (used in case a NSPR cannot be placed) """
124 |         self._acceptance_rewards = []
125 |         self._resource_consumption_rewards = []
126 |         self._load_balance_rewards = []
127 | 
128 |     def enough_avail_resources(self, physical_node_id: int, vnf: dict) -> bool:
129 |         """ Check that the physical node has enough resources to satisfy the VNF's requirements
130 | 
131 |         :param physical_node_id: ID of the physical node to check
132 |         :param vnf: VNF to check
133 |         :return: True if the physical node has enough resources to satisfy the VNF's requirements, False otherwise
134 |         """
135 |         idx = self.map_id_idx[physical_node_id]
136 |         enough_cpu = self.obs_dict['cpu_avails'][idx] >= vnf['reqCPU'] / self.max_cpu
137 |         enough_ram = self.obs_dict['ram_avails'][idx] >= vnf['reqRAM'] / self.max_ram
138 |         return enough_cpu and enough_ram
139 | 
140 |     def restore_avail_resources(self, nspr: nx.Graph):
141 |         """ Method called in case a NSPR is not accepted, or it has reached
142 |         its departure time.
143 |         Restores the PSN resources occupied by that NSPR.
144 | 
145 |         :param nspr: the rejected NSPR
146 |         """
147 |         if nspr is not None:
148 |             nspr.graph['departed'] = True
149 |             for vnf_id, vnf in nspr.nodes.items():
150 |                 # restore nodes' resources availabilities
151 |                 if vnf['placed'] >= 0:
152 |                     idx = self.map_id_idx[vnf['placed']]
153 |                     self.obs_dict['cpu_avails'][idx] += vnf['reqCPU'] / self.max_cpu
154 |                     self.obs_dict['ram_avails'][idx] += vnf['reqRAM'] / self.max_ram
155 |                     self.obs_dict['placement_state'][idx] -= 1
156 |             for _, vl in nspr.edges.items():
157 |                 # restore links' resources availabilities
158 |                 if vl['placed']:
159 |                     # vl['placed'] is the list of the physical nodes traversed by the link
160 |                     rewBW_normalized = vl['reqBW'] / self.max_bw
161 |                     for i in range(len(vl['placed']) - 1):
162 |                         id_1 = vl['placed'][i]
163 |                         id_2 = vl['placed'][i + 1]
164 |                         physical_link = self.psn.edges[id_1, id_2]
165 |                         # recall that BW in physical links is actually updated
166 |                         physical_link['availBW'] += vl['reqBW']
167 |                         idx_1 = self.map_id_idx[id_1]
168 |                         idx_2 = self.map_id_idx[id_2]
169 |                         self.obs_dict['bw_avails'][idx_1] += rewBW_normalized
170 |                         self.obs_dict['bw_avails'][idx_2] += rewBW_normalized
171 | 
172 |     def pick_next_nspr(self):
173 |         """ Pick the next NSPR to be evaluated and updates the attribute 'self.cur_nspr' """
174 |         if self.cur_nspr is None and self.waiting_nsprs:
175 |             self.cur_nspr = self.waiting_nsprs.pop(0)
176 |             self.cur_nspr.graph['DepartureTime'] = self.time_step + self.cur_nspr.graph['duration']
177 |             self.cur_nspr_unplaced_vnfs_ids = list(self.cur_nspr.nodes.keys())
178 |             self.cur_vnf_id = self.cur_nspr_unplaced_vnfs_ids.pop(0)
179 |             # reset acceptance reward discount factor
180 |             self.base_acc_rew_disc_fact = 1 / len(self.cur_nspr.nodes)
181 |             self.acc_rew_disc_fact = 0.
182 |             # self.tot_seen_nsprs += 1
183 |             _ = self.update_nspr_state()    # obs_dict updated within method
184 | 
185 |     def check_for_departed_nsprs(self):
186 |         """ Checks it some NSPRs have reached their departure time and in case
187 |         it frees the PSN resources occupied by them. """
188 |         all_arrival_times = list(self.nsprs.keys())
189 |         all_arrival_times.sort()
190 |         for arrival_time in all_arrival_times:
191 |             if arrival_time >= self.time_step:
192 |                 break
193 |             cur_nsprs = self.nsprs[arrival_time]
194 |             for nspr in cur_nsprs:
195 |                 departed = nspr.graph.get('departed', False)
196 |                 if nspr.graph.get('DepartureTime', self.time_step) < self.time_step and not departed:
197 |                     self.restore_avail_resources(nspr=nspr)
198 | 
199 |                     # This should be useless now
200 |                     # if nspr == self.cur_nspr:
201 |                     #     # haven't finished placing this NSPR, but its departure time has come.
202 |                     #     # remove NSPR, no reward, neither positive nor negative
203 |                     #     # (not agent's fault, too many requests at the same time)
204 |                     #     self.cur_nspr = None
205 |                     #     self.reset_partial_rewards()
206 | 
207 |     def manage_unsuccessful_action(self) -> Tuple[GymObs, int]:
208 |         """ Method to manage an unsuccessful action, executed when a VNF/VL cannot be placed onto the PSN.
209 |         - Restore the PSN resources occupied by VNFs and VLs of the current NSPR
210 |         - Reset the partial rewards
211 |         - Set the reward as the one for an unsuccessful action
212 |         - Pick the next NSPR to be evaluated (if exists)
213 |         - get an observation from the environment
214 | 
215 |         :return: the reward for the unsuccessful action
216 |         """
217 |         self.restore_avail_resources(nspr=self.cur_nspr)
218 |         self.reset_partial_rewards()
219 |         self.cur_nspr = None
220 |         self.nsprs_seen_in_cur_ep += 1
221 | 
222 |         self.tot_seen_nsprs += 1
223 |         if self.nsprs_seen_in_cur_ep >= self.nsprs_per_episode:
224 |             self.done = True
225 |         self.waiting_nsprs += self.nsprs.get(self.time_step, [])
226 |         self.pick_next_nspr()
227 |         obs = self.update_nspr_state()
228 |         reward = self.rval_rejected_vnf
229 |         self.time_step += 1
230 |         return obs, reward
231 | 
232 |     def _normalize_reward_0_10(self, reward):
233 |         """ Normalize the reward to be in [0, 10] (as in HA-DRL) """
234 |         # since the global reward is given by the sum for each time step of the
235 |         # current NSPR (i.e. for each VNF in the NSPR) of the product of the 3
236 |         # partial rewards at time t,
237 |         # the maximum possible reward for the given NSPR is given by:
238 |         #   the number of VNF in the NSPR times
239 |         #   the maximum acceptance reward value (i.e. every VNF is accepted) times
240 |         #   the maximum resource consumption reward value (i.e. 1) times
241 |         #   the maximum tr_load balancing reward value (i.e. 1+1=2)
242 |         max_reward = len(self.cur_nspr.nodes) * self.rval_accepted_vnf * 1 * 2
243 |         return reward / max_reward * 10
244 | 
245 |     @staticmethod
246 |     def get_cur_vnf_vls(vnf_id: int, nspr: nx.Graph) -> dict:
247 |         """ Get all the virtual links connected to a specific VNF
248 | 
249 |         :param vnf_id: ID of a VNF whose VLs have to be returned
250 |         :param nspr: the NSPR to which the VNF belongs
251 |         :return: dict of the VLs connected to the specified VNF
252 |         """
253 |         vnf_links = {}
254 |         for extremes, vl in nspr.edges.items():
255 |             if vnf_id in extremes:
256 |                 vnf_links[extremes] = vl
257 |         return vnf_links
258 | 
259 |     def compute_link_weight(self, source: int, target: int, link: dict):
260 |         """ Compute the weight of an edge between two nodes.
261 |         If the edge satisfies the bandwidth requirement, the weight is 1, else infinity.
262 | 
263 |         This method is passed to networkx's shortest_path function as a weight function, and it's subject to networkx's API.
264 |         It must take exactly 3 arguments: the two endpoints of an edge and the dictionary of edge attributes for that edge.
265 |         We need the required bandwidth to compute an edge's weight, so we save it into an attribute of the simulator (self._cur_vl_reqBW).
266 | 
267 |         :param source: source node in the PSN
268 |         :param target: target node in the PSN
269 |         :param link: dict of the link's (source - target) attributes
270 |         :return: the weight of that link
271 |         """
272 |         return 1 if link['availBW'] >= self._cur_vl_reqBW else math.inf
273 | 
274 |     def _init_obs_dict(self) -> dict:
275 |         """
276 |         Initialize the observation dict.
277 | 
278 |         To be called after reading a PSN and before placing any VNF/VL on it.
279 |         """
280 |         # check that the env has a PSN
281 |         try:
282 |             if self.psn is None:
283 |                 raise ValueError("self.psn is None")
284 |         except AttributeError:
285 |             raise AttributeError("self.psn is not defined")
286 | 
287 |         # initialize lists
288 |         cpu_avails = np.zeros(len(self.psn.nodes), dtype=np.float32)
289 |         ram_avails = np.zeros(len(self.psn.nodes), dtype=np.float32)
290 |         bw_avails = np.zeros(len(self.psn.nodes), dtype=np.float32)
291 |         placement_state = np.zeros(len(self.psn.nodes), dtype=int)
292 | 
293 |         # scan all nodes and save data in lists
294 |         self.tot_cpu_cap = self.tot_ram_cap = self.tot_bw_cap = 0
295 |         for node_id, node in self.psn.nodes.items():
296 |             self.tot_cpu_cap += node.get('CPUcap', 0)
297 |             self.tot_ram_cap += node.get('RAMcap', 0)
298 |             cpu_avails[self.map_id_idx[node_id]] = node.get('availCPU', 0)
299 |             ram_avails[self.map_id_idx[node_id]] = node.get('availRAM', 0)
300 |         # scan all links and save data in list
301 |         for extremes, link in self.psn.edges.items():
302 |             self.tot_bw_cap += link['BWcap']
303 |             bw_avails[self.map_id_idx[extremes[0]]] += link['availBW']
304 |             bw_avails[self.map_id_idx[extremes[1]]] += link['availBW']
305 | 
306 |         # save max CPU/RAM/BW capacities (= availabilities in empty PSN) of all nodes
307 |         self.max_cpu = np.max(cpu_avails)
308 |         self.max_ram = np.max(ram_avails)
309 |         self.max_bw = np.max(bw_avails)
310 | 
311 |         # normalize the quantities
312 |         cpu_avails /= self.max_cpu
313 |         ram_avails /= self.max_ram
314 |         bw_avails /= self.max_bw
315 | 
316 |         obs = {
317 |             # PSN state
318 |             'cpu_avails': cpu_avails,
319 |             'ram_avails': ram_avails,
320 |             'bw_avails': bw_avails,
321 |             'placement_state': placement_state,
322 |             # NSPR state
323 |             'cur_vnf_cpu_req': np.array([0], dtype=int),
324 |             'cur_vnf_ram_req': np.array([0], dtype=int),
325 |             'cur_vnf_bw_req': np.array([0], dtype=int),
326 |             'vnfs_still_to_place': np.array([0], dtype=int)
327 |         }
328 | 
329 |         # store the obs for an empty PSN
330 |         del self._empty_psn_obs_dict
331 |         self._empty_psn_obs_dict = copy.deepcopy(obs)
332 | 
333 |         return obs
334 | 
335 |     def update_nspr_state(self) -> GymObs:
336 |         """ Get an observation from the environment.
337 | 
338 |         The PSN state is already dynamically kept updated, so this method
339 |         will only collect data about the NSPR state and complete the observation
340 |         dict, that will be returned.
341 | 
342 |         :return: an instance of an observation from the environment
343 |         """
344 |         # state regarding the NSPR
345 |         if self.cur_vnf is not None:
346 |             cur_vnf_vls = self.get_cur_vnf_vls(vnf_id=self.cur_vnf_id,
347 |                                                nspr=self.cur_nspr)
348 |             cur_vnf_cpu_req = np.array(
349 |                 [self.cur_vnf['reqCPU'] / self.max_cpu], dtype=np.float32)
350 | 
351 |             cur_vnf_ram_req = np.array(
352 |                 [self.cur_vnf['reqRAM'] / self.max_ram], dtype=np.float32)
353 | 
354 |             cur_vnf_bw_req = np.array(
355 |                 [sum([vl['reqBW'] for vl in cur_vnf_vls.values()]) / self.max_bw],
356 |                 dtype=np.float32)
357 | 
358 |             vnfs_still_to_place = np.array(
359 |                 [len(self.cur_nspr_unplaced_vnfs_ids) + 1], dtype=int)
360 |         else:
361 |             cur_vnf_cpu_req = np.array([0], dtype=np.float32)
362 |             cur_vnf_ram_req = np.array([0], dtype=np.float32)
363 |             cur_vnf_bw_req = np.array([0], dtype=np.float32)
364 |             vnfs_still_to_place = np.array([0], dtype=int)
365 | 
366 |         self.obs_dict['cur_vnf_cpu_req'] = cur_vnf_cpu_req
367 |         self.obs_dict['cur_vnf_ram_req'] = cur_vnf_ram_req
368 |         self.obs_dict['cur_vnf_bw_req'] = cur_vnf_bw_req
369 |         self.obs_dict['vnfs_still_to_place'] = vnfs_still_to_place
370 |         return self.obs_dict
371 | 
372 |     def reset(self, **kwargs) -> GymObs:
373 |         """ Method used to reset the environment
374 | 
375 |         :return: the starting/initial observation of the environment
376 |         """
377 |         self.done = False   # re-set 'done' attribute
378 | 
379 |         # if last NSPR has not been placed completely, remove it, this is a new episode
380 |         self.cur_nspr = None
381 | 
382 |         # reset network status (simply re-read the PSN file)
383 |         # (needed because the available BW of the links gets actually modified)
384 |         self.psn = reader.read_psn(graphml_file=self.psn_file)
385 | 
386 |         self.ep_number += 1
387 |         self.nsprs_seen_in_cur_ep = 0
388 | 
389 |         # read the NSPRs to be evaluated
390 |         # self.nsprs = reader.read_nsprs(nsprs_path=self.nsprs_path)
391 |         self.nsprs = reader.sample_nsprs(nsprs_path=self.nsprs_path,
392 |                                          n=self.nsprs_per_episode,
393 |                                          min_arrival_time=self.time_step,
394 |                                          max_duration=self.nsprs_max_duration)
395 | 
396 |         # reset partial rewards to be accumulated across the episodes' steps
397 |         self.reset_partial_rewards()
398 | 
399 |         # return the obs corresponding to an empty PSN:
400 |         # ALTERNATIVE 1: slower, but runs through the network and works with changing PSNs
401 |         # self._obs_dict = self._init_obs_dict()
402 | 
403 |         # ALTERNATIVE 2: slightly faster on paper, but does not work with changing PSNs
404 |         del self.obs_dict
405 |         self.obs_dict = copy.deepcopy(self._empty_psn_obs_dict)
406 | 
407 |         # get arrived NSPRs
408 |         self.waiting_nsprs += self.nsprs.get(self.time_step, [])
409 |         self.pick_next_nspr()
410 | 
411 |         # update action mask (if no action masking is implemented, it has no effect)
412 |         self._action_mask[:] = True
413 |         # verison one: more randomic
414 |         # indexes = np.random.rand(*self._action_mask.shape) < self.perc_avail_nodes
415 |         # version two: less randomic
416 |         size = round((1. - self.perc_avail_nodes) * self.action_space.n)
417 |         indexes = np.random.choice(self.action_space.n, size=size, replace=False)
418 |         self._action_mask[indexes] = False
419 | 
420 |         # new observation
421 |         obs = self.update_nspr_state()
422 | 
423 |         return obs
424 | 
425 |     def step(self, action: int) -> Tuple[GymObs, float, bool, dict]:
426 |         """ Perform an action in the environment
427 | 
428 |         :param action: the action to be performed
429 |             more in detail, it's the index in the list of server corresponding
430 |             ot a certain server ID, the mapping between this index and the
431 |             server ID is done in the self.servers_map_idx_id dictionary
432 |         :return: next observation, reward, done (True if the episode is over), info
433 |         """
434 |         reward, info = 0, {}
435 | 
436 |         # this happens only when the agent is prevented from choosing nodes that don't have enough resources,
437 |         # i.e., when the environment is wrapped with PreventInfeasibleActions
438 |         # if action < 0:
439 |         #     obs, reward = self.manage_unsuccessful_action()
440 |         #     return obs, reward, done, info
441 | 
442 |         # place the VNF and update the resources availabilities of the physical node
443 |         if self.cur_nspr is not None:
444 |             physical_node_id = self.servers_map_idx_id[action]
445 |             physical_node = self.psn.nodes[physical_node_id]
446 | 
447 |             if not self.enough_avail_resources(physical_node_id, self.cur_vnf):
448 |                 # the VNF cannot be placed on the physical node
449 |                 obs, reward = self.manage_unsuccessful_action()
450 |                 return obs, reward, self.done, info
451 | 
452 |             # update acceptance reward and tr_load balancing reward
453 |             idx = self.map_id_idx[physical_node_id]
454 |             self._acceptance_rewards.append(self.rval_accepted_vnf)
455 |             self._load_balance_rewards.append(
456 |                 self.obs_dict['cpu_avails'][idx] * self.max_cpu / physical_node['CPUcap'] +
457 |                 self.obs_dict['ram_avails'][idx] * self.max_ram / physical_node['RAMcap']
458 |             )
459 | 
460 |             # update the resources availabilities of the physical node in the obs dict
461 |             self.cur_vnf['placed'] = physical_node_id
462 |             self.obs_dict['cpu_avails'][idx] -= self.cur_vnf['reqCPU'] / self.max_cpu
463 |             self.obs_dict['ram_avails'][idx] -= self.cur_vnf['reqRAM'] / self.max_ram
464 |             self.obs_dict['placement_state'][idx] += 1
465 | 
466 |             # connect the placed VNF to the other VNFs it's supposed to be connected to
467 |             cur_vnf_VLs = self.get_cur_vnf_vls(self.cur_vnf_id, self.cur_nspr)
468 |             if not cur_vnf_VLs:
469 |                 # if the VNF is detached from all others, R.C. reward is 1,
470 |                 # so it's the neutral when aggregating the rewards into the global one
471 |                 self._resource_consumption_rewards.append(1)
472 |             else:
473 |                 for (source_vnf, target_vnf), vl in cur_vnf_VLs.items():
474 |                     # get the physical nodes where the source and target VNFs are placed
475 |                     source_node = self.cur_nspr.nodes[source_vnf]['placed']
476 |                     target_node = self.cur_nspr.nodes[target_vnf]['placed']
477 | 
478 |                     # if the VL isn't placed yet and both the source and target VNFs are placed, place the VL
479 |                     if not vl['placed'] and source_node >= 0 and target_node >= 0:
480 |                         self._cur_vl_reqBW = vl['reqBW']
481 |                         psn_path = nx.shortest_path(G=self.psn,
482 |                                                     source=source_node,
483 |                                                     target=target_node,
484 |                                                     weight=self.compute_link_weight,
485 |                                                     method='dijkstra')
486 | 
487 |                         """ if NO path is available, 'nx.shortest_path' will
488 |                         return an invalid path. Only after the whole VL has been
489 |                         placed, it is possible to restore the resources
490 |                         availabilities, so we use this variable to save that the
491 |                         resources have been exceeded as soon as we find this to
492 |                         happen, and only after the VL placement, if this var is
493 |                         True, we restore the resources availabilities. """
494 |                         exceeded_bw = False
495 |                         # place VL onto the PSN
496 |                         # and update the resources availabilities of physical links involved
497 |                         for i in range(len(psn_path) - 1):
498 |                             physical_link = self.psn.edges[psn_path[i], psn_path[i + 1]]
499 |                             extreme1_idx = self.map_id_idx[psn_path[i]]
500 |                             extreme2_idx = self.map_id_idx[psn_path[i + 1]]
501 |                             self.obs_dict['bw_avails'][extreme1_idx] -= vl['reqBW'] / self.max_bw
502 |                             self.obs_dict['bw_avails'][extreme2_idx] -= vl['reqBW'] / self.max_bw
503 |                             # note: here the PSN is actually modified: the available
504 |                             # BW of the link is decreased. Needed for shortest path computation
505 |                             physical_link['availBW'] -= vl['reqBW']
506 |                             if physical_link['availBW'] < 0:
507 |                                 exceeded_bw = True
508 |                         vl['placed'] = psn_path
509 | 
510 |                         if exceeded_bw:
511 |                             obs, reward = self.manage_unsuccessful_action()
512 |                             return obs, reward, self.done, info
513 | 
514 |                         # update the resource consumption reward
515 |                         path_length = len(psn_path) - 1
516 |                         self._cur_resource_consumption_rewards.append(
517 |                             1 / path_length if path_length > 0 else 1)
518 | 
519 |                 # aggregate the resource consumption rewards into a single value for this action
520 |                 n_VLs_placed_now = len(self._cur_resource_consumption_rewards)
521 |                 if n_VLs_placed_now == 0:
522 |                     self._resource_consumption_rewards.append(1.)
523 |                 else:
524 |                     self._resource_consumption_rewards.append(
525 |                         sum(self._cur_resource_consumption_rewards) / n_VLs_placed_now)
526 |                     self._cur_resource_consumption_rewards = []
527 | 
528 |             # save the ID of the next VNF
529 |             if self.cur_nspr_unplaced_vnfs_ids:
530 |                 self.cur_vnf_id = self.cur_nspr_unplaced_vnfs_ids.pop(0)
531 |                 if self.accumulate_reward:
532 |                     reward = 0  # global reward is non-zero only after the whole NSPR is placed (as HADRL)
533 |                 else:
534 |                     # eventual discount factor of the acceptance reward
535 |                     if self.discount_acc_rew:
536 |                         self.acc_rew_disc_fact += self.base_acc_rew_disc_fact
537 |                     else:
538 |                         self.acc_rew_disc_fact = 1.
539 |                     # reward always givent to the agent
540 |                     reward = self._acceptance_rewards[-1] * self.acc_rew_disc_fact * \
541 |                              self._load_balance_rewards[-1] * \
542 |                              self._resource_consumption_rewards[-1] / len(self.cur_nspr.nodes) / \
543 |                              10.    # scaling factor
544 |                 reward = self._normalize_reward_0_10(reward)
545 |             else:
546 |                 # it means we finished the VNFs of the current NSPR
547 |                 self.nsprs_seen_in_cur_ep += 1
548 |                 self.tot_seen_nsprs += 1
549 |                 if self.nsprs_seen_in_cur_ep >= self.nsprs_per_episode:
550 |                     self.done = True
551 |                 # reset placement state
552 |                 self.obs_dict['placement_state'] = np.zeros(len(self.psn.nodes), dtype=int)
553 |                 # update global reward because the NSPR is fully placed
554 |                 reward = np.stack((self._acceptance_rewards,
555 |                                    self._resource_consumption_rewards,
556 |                                    self._load_balance_rewards)).prod(axis=0).sum()
557 |                 # normalize the reward to be in [0, 10] (as they do in HA-DRL)
558 |                 reward = self._normalize_reward_0_10(reward) * \
559 |                          2  # TODO: per dargli più peso (non da HADRL)
560 |                 self.reset_partial_rewards()
561 |                 self.cur_nspr = None    # marked as None so a new one can be picked
562 |                 # update the acceptance ratio
563 |                 self.accepted_nsprs += 1
564 | 
565 |         # increase time step
566 |         self.time_step += 1
567 | 
568 |         # check for new and departing NSPRs
569 |         if self.nsprs is not None:
570 |             self.check_for_departed_nsprs()
571 |             self.waiting_nsprs += self.nsprs.get(self.time_step, [])
572 |             self.pick_next_nspr()
573 | 
574 |         # new observation
575 |         obs = self.update_nspr_state()
576 | 
577 |         return obs, reward, self.done, info
578 | 
579 |     def render(self, mode="human"):
580 |         raise NotImplementedError
581 | 


--------------------------------------------------------------------------------
/src/policies/__init__.py:
--------------------------------------------------------------------------------
1 | from .hadrl_policy import HADRLPolicy
2 | 


--------------------------------------------------------------------------------
/src/policies/features_extractors/__init__.py:
--------------------------------------------------------------------------------
1 | from .hadrl_features_extractor import GCNsFeaturesExtractor
2 | 


--------------------------------------------------------------------------------
/src/policies/features_extractors/hadrl_features_extractor.py:
--------------------------------------------------------------------------------
 1 | from typing import Tuple, Type
 2 | 
 3 | import gym
 4 | import networkx as nx
 5 | import numpy as np
 6 | import torch as th
 7 | from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
 8 | from torch import nn
 9 | from torch.nn import Linear
10 | from torch_geometric.nn import GCNConv
11 | 
12 | 
13 | class GCNsFeaturesExtractor(BaseFeaturesExtractor):
14 |     def __init__(
15 |             self,
16 |             observation_space: gym.Space,
17 |             psn: nx.Graph,
18 |             activation_fn: Type[nn.Module],
19 |             gcn_layers_dims: Tuple[int],
20 |             nspr_out_features: int = 4
21 |     ):
22 |         """ Constructor
23 | 
24 |         :param observation_space: the observation space of the agent using this feature extractor
25 |         :param psn: the PSN graph of the environment which the agent acts upon
26 |         :param activation_fn: activation function to be used (e.g. torch.relu)
27 |         :param gcn_layers_dims: dimensions of the features vector of each node in each GCN layer
28 |             - number of layers = length of the tuple
29 |         :param nspr_out_features: dimension of the features vector of the NSPR state
30 |         """
31 |         self.activation = activation_fn
32 |         self.n_nodes = len(psn.nodes)
33 |         self.gcn_layers_dims = gcn_layers_dims
34 |         gcn_out_channels = gcn_layers_dims[-1]
35 |         features_dim = gcn_out_channels * self.n_nodes + nspr_out_features
36 |         super().__init__(observation_space, features_dim=features_dim)
37 |         
38 |         self.psn_state_features = 4 if 'placement_state' in observation_space.spaces else 3
39 |         self.nspr_state_features = 4
40 |         
41 |         edges = th.tensor(np.array(psn.edges).reshape((len(psn.edges), 2)),
42 |                           dtype=th.long)
43 |         double_edges = th.cat((edges, th.flip(edges, dims=(1,))))
44 |         self.edge_index = double_edges.t().contiguous()
45 | 
46 |         # GCN layers
47 |         gcn_layers_dims = [self.psn_state_features] + list(gcn_layers_dims)
48 |         self.gcn_layers = nn.ModuleList()
49 |         for i in range(len(gcn_layers_dims) - 1):
50 |             self.gcn_layers.append(GCNConv(gcn_layers_dims[i], gcn_layers_dims[i + 1]))
51 | 
52 |         self.nspr_fc = Linear(in_features=self.nspr_state_features,
53 |                               out_features=nspr_out_features)
54 | 
55 |     def forward(self, observations: th.Tensor) -> th.Tensor:
56 |         # save device (the one where the weights and observations are)
57 |         device = observations['cpu_avails'].device
58 | 
59 |         # move edge_index to the correct device
60 |         self.edge_index = self.edge_index.to(device)
61 | 
62 |         # save length of rollout buffer
63 |         len_rollout_buffer = len(observations['cpu_avails'])
64 | 
65 |         # features extraction of the PSN state
66 |         psn_state = th.empty(
67 |             size=(len_rollout_buffer, self.n_nodes, self.psn_state_features),
68 |             dtype=th.float, device=device)
69 |         psn_state[:, :, 0] = observations['cpu_avails']
70 |         psn_state[:, :, 1] = observations['ram_avails']
71 |         psn_state[:, :, 2] = observations['bw_avails']
72 |         if 'placement_state' in observations:
73 |             psn_state[:, :, 3] = observations['placement_state']
74 | 
75 |         # pass the psn_state through the GCN layers
76 |         gcn_out = psn_state
77 |         for i in range(len(self.gcn_layers)):
78 |             gcn_out = self.gcn_layers[i](gcn_out, self.edge_index)
79 |             gcn_out = self.activation()(gcn_out)
80 |         gcn_out = gcn_out.flatten(start_dim=1)
81 | 
82 |         # features extraction of the NSPR state
83 |         nspr_state = th.empty(size=(len_rollout_buffer, 1, self.nspr_state_features),
84 |                               dtype=th.float, device=device)
85 |         nspr_state[:, :, 0] = observations['cur_vnf_cpu_req']
86 |         nspr_state[:, :, 1] = observations['cur_vnf_ram_req']
87 |         nspr_state[:, :, 2] = observations['cur_vnf_bw_req']
88 |         nspr_state[:, :, 3] = observations['vnfs_still_to_place']
89 |         nspr_fc_out = self.nspr_fc(nspr_state.flatten(start_dim=1))
90 |         nspr_fc_out = self.activation()(nspr_fc_out)
91 | 
92 |         # concatenation of the two features vectors
93 |         global_out = th.cat((gcn_out, nspr_fc_out), dim=1)
94 |         # global_out = self.activation(global_out)
95 | 
96 |         return global_out
97 | 


--------------------------------------------------------------------------------
/src/policies/hadrl_policy.py:
--------------------------------------------------------------------------------
  1 | from functools import partial
  2 | from typing import Callable, Dict, List, Optional, Type, Union, Tuple
  3 | 
  4 | import gym
  5 | import networkx as nx
  6 | import numpy as np
  7 | import torch as th
  8 | from stable_baselines3.common.distributions import Distribution
  9 | from stable_baselines3.common.policies import MultiInputActorCriticPolicy
 10 | from stable_baselines3.common.preprocessing import preprocess_obs
 11 | from stable_baselines3.common.type_aliases import Schedule
 12 | from torch import nn
 13 | 
 14 | from .features_extractors import GCNsFeaturesExtractor
 15 | from .mlp_extractors.hadrl_mlp_extractor import HADRLActorCriticNet
 16 | 
 17 | 
 18 | class HADRLPolicy(MultiInputActorCriticPolicy):
 19 |     """ Policy network from the paper HA-DRL [1]
 20 | 
 21 |     [1] https://ieeexplore.ieee.org/document/9632824
 22 |     """
 23 |     name = 'HADRL Policy'
 24 | 
 25 |     def __init__(
 26 |             self,
 27 |             observation_space: gym.spaces.Space,
 28 |             action_space: gym.spaces.Space,
 29 |             lr_schedule: Callable[[float], float],
 30 |             psn: nx.Graph,
 31 |             servers_map_idx_id: Dict[int, int],
 32 |             net_arch: Optional[Union[List[int], Dict[str, List[int]]]] = None,
 33 |             activation_fn: Type[nn.Module] = nn.Tanh,
 34 |             gcn_layers_dims: Tuple[int] = (60,),
 35 |             nspr_out_features: int = 4,
 36 |             use_heuristic: bool = False,
 37 |             heu_kwargs: dict = None,
 38 |             *args,
 39 |             **kwargs,
 40 |     ):
 41 |         """
 42 |         :param observation_space: Observation space of the agent
 43 |         :param action_space: Action space of the agent
 44 |         :param lr_schedule: Learning rate schedule
 45 |         :param psn: Physical Service Network
 46 |         :param servers_map_idx_id: Mapping between servers' indexes and their IDs
 47 |         :param net_arch: architecture of the policy and value networks after the feature extractor
 48 |         :param activation_fn: Activation function
 49 |         :param gcn_layers_dims: Dimensions of the GCN layers
 50 |         :param nspr_out_features: Number of output features of the NSPR state
 51 |         :param use_heuristic: Whether to use the heuristic or not
 52 |         :param heu_kwargs: Keyword arguments for the heuristic
 53 |         """
 54 | 
 55 |         # assert len(net_arch) == 1 and isinstance(net_arch[0], dict), \
 56 |         #     "This policy allows net_arch to be a list with only one dict"
 57 | 
 58 |         self.psn = psn
 59 |         self.gcn_layers_dims = gcn_layers_dims  # saved in an attribute for logging purposes
 60 |         self.servers_map_idx_id = servers_map_idx_id
 61 |         self.use_heuristic = use_heuristic
 62 |         self.heu_kwargs = heu_kwargs
 63 | 
 64 |         super(HADRLPolicy, self).__init__(
 65 |             observation_space,
 66 |             action_space,
 67 |             lr_schedule,
 68 |             net_arch,
 69 |             activation_fn,
 70 |             # Pass remaining arguments to base class
 71 |             *args,
 72 |             **kwargs,
 73 |         )
 74 |         # non-shared features extractors for the actor and the critic
 75 |         self.policy_features_extractor = GCNsFeaturesExtractor(
 76 |             observation_space, psn, nn.Tanh, gcn_layers_dims,
 77 |             nspr_out_features
 78 |         )
 79 |         self.value_features_extractor = GCNsFeaturesExtractor(
 80 |             observation_space, psn, nn.ReLU, gcn_layers_dims,
 81 |             nspr_out_features
 82 |         )
 83 |         self.features_dim = {'pi': self.policy_features_extractor.features_dim,
 84 |                              'vf': self.value_features_extractor.features_dim}
 85 |         delattr(self, "features_extractor")  # remove the shared features extractor
 86 | 
 87 |         # TODO: check what this step actually does
 88 |         # Disable orthogonal initialization
 89 |         # self.ortho_init = False
 90 | 
 91 |         # Workaround alert!
 92 |         # This method is called in the super-constructor. It creates the optimizer,
 93 |         # but using also the params of the features extractor before creating
 94 |         # our own 2 separate ones ('policy_features_extractor' and
 95 |         # 'value_features_extractor'). Therefore we need to re-create the optimizer
 96 |         # using the params of the correct new features extractor.
 97 |         # (it will also re-do a bunch of things like re-creating the mlp_extractor,
 98 |         # which was fine, but it's not a problem).
 99 |         self._rebuild(lr_schedule)
100 | 
101 |     def _rebuild(self, lr_schedule: Schedule) -> None:
102 |         """
103 |         Like method _build, but needed to be re-called to re-create the
104 |         optimizer, since it was created using obsolete parameters, i.e. params
105 |         including the ones of the default shared features extractor and NOT
106 |         including the ones of the new features extractors.
107 |         The mlp_extractor is recreated too, since it was created with incorrect features_dim.
108 | 
109 |         :param lr_schedule: Learning rate schedule
110 |             lr_schedule(1) is the initial learning rate
111 |         """
112 |         self._build_mlp_extractor()
113 | 
114 |         # action_net and value_net as created in the '_build' method are OK,
115 |         # no need to recreate them.
116 | 
117 |         # Init weights: use orthogonal initialization
118 |         # with small initial weight for the output
119 |         if self.ortho_init:
120 |             # TODO: check for features_extractor
121 |             # Values from stable-baselines.
122 |             # features_extractor/mlp values are
123 |             # originally from openai/baselines (default gains/init_scales).
124 |             module_gains = {
125 |                 self.policy_features_extractor: np.sqrt(2),
126 |                 self.value_features_extractor: np.sqrt(2),
127 |                 self.mlp_extractor: np.sqrt(2),
128 |                 self.action_net: 0.01,
129 |                 self.value_net: 1,
130 |             }
131 |             for module, gain in module_gains.items():
132 |                 module.apply(partial(self.init_weights, gain=gain))
133 | 
134 |         # Setup optimizer with initial learning rate
135 |         self.optimizer = self.optimizer_class(self.parameters(), lr=lr_schedule(1), **self.optimizer_kwargs)
136 | 
137 |     def _build_mlp_extractor(self) -> None:
138 |         self.mlp_extractor = HADRLActorCriticNet(
139 |             action_space=self.action_space,
140 |             psn=self.psn,
141 |             net_arch=self.net_arch,
142 |             servers_map_idx_id=self.servers_map_idx_id,
143 |             features_dim=self.features_dim,
144 |             use_heuristic=self.use_heuristic,
145 |             heu_kwargs=self.heu_kwargs
146 |         )
147 | 
148 |     def extract_features(self, obs: th.Tensor) -> Tuple[th.Tensor, th.Tensor]:
149 |         """
150 |         Preprocess the observation if needed and extract features.
151 | 
152 |         :param obs: Observation
153 |         :return: the output of the feature extractor(s)
154 |         """
155 |         assert self.policy_features_extractor is not None and \
156 |                self.value_features_extractor is not None
157 |         preprocessed_obs = preprocess_obs(obs, self.observation_space,
158 |                                           normalize_images=self.normalize_images)
159 |         policy_features = self.policy_features_extractor(preprocessed_obs)
160 |         value_features = self.value_features_extractor(preprocessed_obs)
161 |         return policy_features, value_features
162 | 
163 |     def forward(self, obs: th.Tensor, deterministic: bool = False) -> \
164 |             Tuple[th.Tensor, th.Tensor, th.Tensor]:
165 |         """
166 |         Forward pass in all the networks (actor and critic)
167 | 
168 |         :param obs: Observation
169 |         :param deterministic: Whether to sample or use deterministic actions
170 |         :return: action, value and log probability of the action
171 |         """
172 |         # Preprocess the observation if needed
173 |         policy_features, value_features = self.extract_features(obs)
174 |         latent_pi = self.mlp_extractor.forward_actor(policy_features, obs)
175 |         latent_vf = self.mlp_extractor.forward_critic(value_features)
176 | 
177 |         # Evaluate the values for the given observations
178 |         values = self.value_net(latent_vf)
179 |         distribution = self._get_action_dist_from_latent(latent_pi)
180 |         actions = distribution.get_actions(deterministic=deterministic)
181 |         log_prob = distribution.log_prob(actions)
182 |         return actions, values, log_prob
183 | 
184 |     def evaluate_actions(self, obs: th.Tensor, actions: th.Tensor) -> \
185 |             Tuple[th.Tensor, th.Tensor, th.Tensor]:
186 |         """
187 |         Evaluate actions according to the current policy,
188 |         given the observations.
189 | 
190 |         :param obs: Observation
191 |         :param actions: Actions
192 |         :return: estimated value, log likelihood of taking those actions
193 |             and entropy of the action distribution.
194 |         """
195 |         # Preprocess the observation if needed
196 |         policy_features, value_features = self.extract_features(obs)
197 |         latent_pi = self.mlp_extractor.forward_actor(policy_features, obs)
198 |         latent_vf = self.mlp_extractor.forward_critic(value_features)
199 |         distribution = self._get_action_dist_from_latent(latent_pi)
200 |         log_prob = distribution.log_prob(actions)
201 |         values = self.value_net(latent_vf)
202 |         return values, log_prob, distribution.entropy()
203 | 
204 |     def get_distribution(self, obs: th.Tensor) -> Distribution:
205 |         """
206 |         Get the current policy distribution given the observations.
207 | 
208 |         :param obs: Observation
209 |         :return: the action distribution.
210 |         """
211 |         policy_features, _ = self.extract_features(obs)
212 |         latent_pi = self.mlp_extractor.forward_actor(policy_features, obs)
213 |         return self._get_action_dist_from_latent(latent_pi)
214 | 
215 |     def predict_values(self, obs: th.Tensor) -> th.Tensor:
216 |         """
217 |         Get the estimated values according to the current policy given the observations.
218 | 
219 |         :param obs: Observation
220 |         :return: the estimated values.
221 |         """
222 |         _, value_features = self.extract_features(obs)
223 |         latent_vf = self.mlp_extractor.forward_critic(value_features)
224 |         return self.value_net(latent_vf)
225 | 


--------------------------------------------------------------------------------
/src/policies/mlp_extractors/__init__.py:
--------------------------------------------------------------------------------
1 | from .hadrl_mlp_extractor import HADRLActor, HADRLCritic, HADRLActorCriticNet
2 | 


--------------------------------------------------------------------------------
/src/policies/mlp_extractors/hadrl_mlp_extractor.py:
--------------------------------------------------------------------------------
  1 | from typing import Tuple, Dict, Union, List
  2 | 
  3 | import gym
  4 | import networkx as nx
  5 | import torch as th
  6 | from torch import nn
  7 | 
  8 | from heuristic_layers import P2CLoadBalanceHeuristic, HADRLHeuristic
  9 | 
 10 | 
 11 | class HADRLActor(nn.Module):
 12 |     """ Actor network for the HA-DRL [1] algorithm
 13 | 
 14 |     [1] https://ieeexplore.ieee.org/document/9632824
 15 |     """
 16 | 
 17 |     def __init__(
 18 |             self,
 19 |             action_space: gym.Space,
 20 |             psn: nx.Graph,
 21 |             net_arch: Union[List[int], Dict[str, List[int]]],
 22 |             servers_map_idx_id: Dict[int, int],
 23 |             in_features: int,
 24 |             use_heuristic: bool = False,
 25 |             heu_kwargs: dict = None,
 26 |     ):
 27 |         """ Constructor
 28 | 
 29 |         :param action_space: action space
 30 |         :param psn: env's physical substrate network
 31 |         :param servers_map_idx_id: map (dict) between servers indexes (agent's actions) and their ids
 32 |         :param use_heuristic: if True, actor will use P2C heuristic
 33 |         """
 34 |         super().__init__()
 35 |         self.use_heuristic = use_heuristic
 36 |         heu_class = heu_kwargs.get('heu_class', HADRLHeuristic)
 37 | 
 38 |         # layers
 39 |         dims = [in_features] + net_arch['pi']
 40 |         modules = nn.ModuleList()
 41 |         for i in range(len(dims) - 1):
 42 |             modules.append(nn.Linear(dims[i], dims[i + 1]))
 43 |             modules.append(nn.Tanh())
 44 | 
 45 |         if self.use_heuristic:
 46 |             self.heu_layer = heu_class(action_space, servers_map_idx_id, psn,
 47 |                                   **heu_kwargs).requires_grad_(False)
 48 | 
 49 |         self.layers = nn.Sequential(*modules)
 50 | 
 51 |     def forward(self, x: th.Tensor, obs: th.Tensor) -> th.Tensor:
 52 |         x = self.layers(x)
 53 |         if self.use_heuristic:
 54 |             x = self.heu_layer(x, obs)
 55 |         return x
 56 | 
 57 | 
 58 | class HADRLCritic(nn.Module):
 59 |     """ Critic network for the HA-DRL [1] algorithm
 60 | 
 61 |     [1] https://ieeexplore.ieee.org/document/9632824
 62 |     """
 63 | 
 64 |     def __init__(
 65 |             self,
 66 |             in_features: int,
 67 |             net_arch: List[Union[int, Dict[str, List[int]]]]
 68 |     ):
 69 |         """ Constructor
 70 | 
 71 |         :param in_features: number of features extracted by the features extractor,
 72 |             i.e., input dim of the first layer of the network
 73 |         """
 74 |         super().__init__()
 75 |         dims = [in_features] + net_arch['vf']
 76 |         modules = nn.ModuleList()
 77 |         for i in range(len(dims) - 1):
 78 |             modules.append(nn.Linear(dims[i], dims[i + 1]))
 79 |             modules.append(nn.ReLU())
 80 |         self.layers = nn.Sequential(*modules)
 81 | 
 82 |     def forward(self, x: th.Tensor) -> th.Tensor:
 83 |         return self.layers(x)
 84 | 
 85 | 
 86 | class HADRLActorCriticNet(nn.Module):
 87 |     """
 88 |     Actor-Critic network for the HA-DRL [1] algorithm
 89 | 
 90 |     [1] https://ieeexplore.ieee.org/document/9632824
 91 |     """
 92 | 
 93 |     def __init__(
 94 |             self,
 95 |             action_space: gym.Space,
 96 |             psn: nx.Graph,
 97 |             net_arch: List[Union[int, Dict[str, List[int]]]],
 98 |             servers_map_idx_id: Dict[int, int],
 99 |             features_dim: Union[int, Dict[str, int]],
100 |             gcn_out_channels: int = 60,
101 |             nspr_out_features: int = 4,
102 |             use_heuristic: bool = False,
103 |             heu_kwargs: dict = None,
104 |     ):
105 |         """ Constructor
106 | 
107 |         :param action_space: action space
108 |         :param psn: env's physical substrate network
109 |         :param servers_map_idx_id: map (dict) between servers indexes (agent's actions) and their ids
110 |         :param policy_features_dim:
111 |         :param value_features_dim:
112 |         :param gcn_out_channels: number of output channels of the GCN layer
113 |         :param nspr_out_features: output dim of the layer that receives the NSPR state
114 |         :param use_heuristic: if True, actor will use P2C heuristic
115 |         """
116 |         super(HADRLActorCriticNet, self).__init__()
117 | 
118 |         # IMPORTANT:
119 |         # Save output dimensions, used to create the distributions
120 |         self.latent_dim_pi = net_arch['pi'][-1]
121 |         self.latent_dim_vf = net_arch['vf'][-1]
122 | 
123 |         if isinstance(features_dim, int):
124 |             policy_features_dim = value_features_dim = features_dim
125 |         else:
126 |             policy_features_dim = features_dim['pi']
127 |             value_features_dim = features_dim['vf']
128 | 
129 |         # policy network
130 |         self.policy_net = HADRLActor(action_space, psn, net_arch,
131 |                                      servers_map_idx_id, policy_features_dim,
132 |                                      use_heuristic, heu_kwargs)
133 |         # value network
134 |         self.value_net = HADRLCritic(value_features_dim, net_arch)
135 | 
136 |     def forward(self, features: th.Tensor, obs: th.Tensor) -> Tuple[th.Tensor, th.Tensor]:
137 |         """
138 |         :return: (th.Tensor, th.Tensor) latent_policy, latent_value of the specified network.
139 |             If all layers are shared, then ``latent_policy == latent_value``
140 |         """
141 |         return self.policy_net(features, obs), self.value_net(features)
142 | 
143 |     def forward_actor(self, features: th.Tensor, obs: th.Tensor) -> th.Tensor:
144 |         return self.policy_net(features, obs)
145 | 
146 |     def forward_critic(self, features: th.Tensor) -> th.Tensor:
147 |         return self.value_net(features)
148 | 


--------------------------------------------------------------------------------
/src/reader.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import random
  3 | from typing import Tuple, List, Dict
  4 | 
  5 | import networkx as nx
  6 | 
  7 | 
  8 | def check_if_graphml(file: str):
  9 |     """ Checks if a file is a GraphML file (checking the extension)
 10 | 
 11 |     :param file: path to the file to be checked
 12 |     :raise ValueError: is case the file is not a GraphML file
 13 |     """
 14 |     if not file.endswith(".graphml"):
 15 |         raise ValueError("{} is not a GraphML file".format(file))
 16 | 
 17 | 
 18 | def _check_graph(network: nx.Graph):
 19 |     """ Checks that the graph is correct
 20 | 
 21 |     :param network: network that needs to be checked
 22 | 
 23 |     :raise AssertionError: if some graph's attributes are not correct
 24 |     """
 25 |     if "E2ELatency" in network.graph.keys():
 26 |         assert network.graph['E2ELatency'] > 0
 27 |         # if E2ELatency is present, it means the network is a NSPR
 28 |         if "ArrivalTime" in network.graph.keys():
 29 |             assert network.graph['ArrivalTime'] >= 0
 30 |         else:
 31 |             network.graph['ArrivalTime'] = 0
 32 |         if "DepartureTime" in network.graph.keys():
 33 |             assert network.graph['DepartureTime'] >= \
 34 |                    network.graph['ArrivalTime'] + len(network.nodes.keys())
 35 | 
 36 | 
 37 | def _check_nodes(network: nx.Graph, required_node_attributes: Tuple[str, ...],
 38 |                  **admissible_values: tuple):
 39 |     """ Checks that the nodes of the network are correct
 40 | 
 41 |     :param network: network whose nodes have to be checked
 42 |     :param required_node_attributes: tuple with all required attributes for the nodes
 43 |     :param admissible_values: (optional) extra arguments where the name is an
 44 |         attribute name and the value is a tuple with the admissible values
 45 | 
 46 |     :raise AssertionError:
 47 |         - in case some nodes don't contain all the required parameters
 48 |         - in case some non-admissible values are used for some arguments
 49 |     """
 50 |     for node_id, node in network.nodes.items():
 51 |         # if the admissible values for a certain attribute are passed,
 52 |         # check that the value of each attribute is admissible
 53 |         for attrib, value in node.items():
 54 |             assert value in admissible_values.get(attrib, (value,))
 55 |             if attrib in ("CPUcap", "RAMcap", "availCPU", "availRAM", "reqCPU", "reqRAM"):
 56 |                 assert value >= 0
 57 |         # the following checks are for servers or VNFs only, in case skip
 58 |         if node.get("NodeType", "server") != "server":
 59 |             # if node hasn't attrib "NodeType", it's a VNF, so don't skip iteration
 60 |             continue
 61 |         if "reqCPU" in node.keys():
 62 |             # 'reqCPU' is a mandatory argument for NSPR, so if it's present, the node is a VNF
 63 |             # add an attribute to specify if a VNF has been placed onto the PSN
 64 |             node['placed'] = -1
 65 |         else:
 66 |             # it means the node belongs to a PSN and not to a NSPR
 67 |             node['availCPU'] = node['CPUcap']
 68 |             node['availRAM'] = node['RAMcap']
 69 |         # check that all required attributes are present in the current node
 70 |         assert all(req_attrib in node.keys() for req_attrib in required_node_attributes)
 71 | 
 72 | 
 73 | def _check_edges(network: nx.Graph, required_link_attributes: Tuple[str, ...], **admissible_values: tuple):
 74 |     """ Checks that the edges of the network are correct
 75 | 
 76 |     :param network: network whose edges have to be checked
 77 |     :param required_link_attributes: tuple with all required attributes for the links
 78 |     :param admissible_values: (optional) extra arguments where the name is an
 79 |         attribute name and the value is a tuple with the admissible values
 80 | 
 81 |     :raise AssertionError:
 82 |         - in case some links don't contain all the required parameters
 83 |         - in case some non-admissible values are used for some arguments
 84 |     """
 85 |     for node_A, node_B in list(network.edges):
 86 |         cur_link_attribs = network.edges[node_A, node_B].keys()
 87 |         cur_link_values = network.edges[node_A, node_B].values()
 88 |         cur_link_attribs_values = zip(cur_link_attribs, cur_link_values)
 89 |         # check that all required attributes are present in the current link
 90 |         assert all(attrib in cur_link_attribs for attrib in required_link_attributes)
 91 |         # if the admissible values for a certain attribute are passed,
 92 |         # check that the value of each attribute is admissible
 93 |         for attrib, value in cur_link_attribs_values:
 94 |             assert value in admissible_values.get(attrib, (value,))
 95 |             if attrib in ("BWcap", "reqBW", "Latency", "reqLatency"):
 96 |                 assert value >= 0
 97 |         # initialize resources availabilities if PSN
 98 |         if "reqBW" in cur_link_attribs:
 99 |             # 'reqBW' is a mandatory argument for NSPR, so if it's present, the link is a VL
100 |             network.edges[node_A, node_B]['placed'] = []
101 |         else:
102 |             # it means the link is physical and belongs to a PSN (and not to a NSPR)
103 |             network.edges[node_A, node_B]['availBW'] = network.edges[node_A, node_B]['BWcap']
104 | 
105 | 
106 | def check_required_attributes(network: nx.Graph, required_node_attributes: Tuple[str, ...],
107 |                               required_link_attributes: Tuple[str, ...], **admissible_values: tuple):
108 |     """ Checks whether all the required attributes are present in the nodes and link of the network passed as argument
109 | 
110 |     :param network: network whose nodes and links have to be checked
111 |     :param required_node_attributes: tuple with all required attributes for the nodes
112 |     :param required_link_attributes: tuple with all required attributes for the links
113 |     :param admissible_values: (optional) extra arguments where the name is an
114 |         attribute name and the value is a tuple with the admissible values
115 | 
116 |     :raise AssertionError:
117 |         - in case some nodes/links don't contain all the required parameters
118 |         - in case some non-admissible values are used for some arguments
119 |     """
120 |     _check_graph(network)
121 |     _check_nodes(network, required_node_attributes, **admissible_values)
122 |     _check_edges(network, required_link_attributes, **admissible_values)
123 | 
124 | 
125 | def read_psn(graphml_file: str) -> nx.Graph:
126 |     """ Reads a GraphML file containing the definition of a PSN
127 | 
128 |     :param graphml_file: GraphML file containing the definition of the PSN
129 |     :return: a networkx.Graph representing the PSN
130 | 
131 |     :raise ValueError: if "graphml_file" is not a GraphML file
132 |     :raise AssertionError: if some required attributes of nodes and links are missing
133 |     """
134 |     check_if_graphml(graphml_file)  # check if the file passed is a GraphML file
135 | 
136 |     # read the GraphML file and create a nx.Graph object
137 |     psn = nx.read_graphml(path=graphml_file, node_type=int)
138 | 
139 |     # check that the attributes of the graph are correct
140 |     check_required_attributes(network=psn,
141 |                               required_node_attributes=("NodeType", "CPUcap", "RAMcap"),
142 |                               required_link_attributes=("BWcap",),
143 |                               NodeType=("UAP", "router", "switch", "server"))
144 |     return psn
145 | 
146 | 
147 | def read_single_nspr(graphml_file: str) -> nx.Graph:
148 |     """ Reads a single NSPR (network slice placement request)
149 | 
150 |     :param graphml_file: GraphML file with the definition of the NSPR
151 |     :return: the NSPR as a networkx.Graph object
152 | 
153 |     :raise ValueError: if "graphml_file" is not a GraphML file
154 |     :raise AssertionError: if some required attributes of nodes and links are missing
155 |     """
156 |     check_if_graphml(graphml_file)  # check if the file passed is a GraphML file
157 | 
158 |     # read the GraphML file and create a nx.Graph object
159 |     nspr = nx.read_graphml(path=graphml_file, node_type=int)
160 | 
161 |     # check that the attributes of the graph are correct
162 |     check_required_attributes(network=nspr,
163 |                               required_node_attributes=("reqCPU", "reqRAM"),
164 |                               required_link_attributes=("reqBW",))
165 |     return nspr
166 | 
167 | 
168 | def read_nsprs(nsprs_path: str) -> Dict[int, List[nx.Graph]]:
169 |     """ Reads all the NSPRs (network slice placement requests) in a directory
170 | 
171 |     :param nsprs_path: either path to the directory with the files defining a
172 |         NSPR each or the path to a single NSPR
173 |     :return: a dict having as keys the arrival times of the NSPRs and as
174 |         values the NSPRs themselves
175 |     :raise ValueError: if nsprs_path is neither a directory nor a file
176 |     """
177 |     if not os.path.isdir(nsprs_path) and not os.path.isfile(nsprs_path):
178 |         raise ValueError(f"{nsprs_path} is neither a directory nor a file")
179 | 
180 |     nspr_dict = {}  # save the NSPRs in a dict with the arrival times as keys
181 |     if os.path.isfile(nsprs_path):
182 |         nspr = read_single_nspr(nsprs_path)
183 |         if nspr.graph['ArrivalTime'] not in nspr_dict.keys():
184 |             nspr_dict[nspr.graph['ArrivalTime']] = [nspr]
185 |         else:
186 |             nspr_dict[nspr.graph['ArrivalTime']].append(nspr)
187 |         return nspr_dict
188 | 
189 |     dir_path = nsprs_path
190 |     for graphml_file in os.listdir(dir_path):
191 |         nspr = read_single_nspr(os.path.join(dir_path, graphml_file))
192 |         nspr_dict[nspr.graph['ArrivalTime']] = nspr_dict.get(nspr.graph['ArrivalTime'], []) + [nspr]
193 |     return nspr_dict
194 | 
195 | 
196 | def sample_nsprs(nsprs_path: str, n: int, min_arrival_time: int = 0,
197 |                  max_duration: int = 100) -> Dict[int, List[nx.Graph]]:
198 |     """ Samples a subset of NSPRs from a directory containing multiple NSPRs.
199 |     It assigns random arrival and departure time to those NSPRs.
200 | 
201 |     :param nsprs_path: path to the directory containing the NSPRs
202 |     :param n: number of NSPRs to sample
203 |     :param min_arrival_time: minimum arrival time to assign to the sampled NSPRs
204 |     :param max_duration: maximum duration (dep. time - arr. time) to assign to the sampled NSPRs
205 |     :return: a dict having as keys the arrival times of the NSPRs and as
206 |         values the NSPRs themselves
207 |     :raise ValueError: if nsprs_path is not a directory
208 |     """
209 |     if not os.path.isdir(nsprs_path):
210 |         raise ValueError(f"{nsprs_path} is not a directory")
211 | 
212 |     all_nsprs_files = os.listdir(nsprs_path)
213 |     n = min(n, len(all_nsprs_files)) if n is not None else len(all_nsprs_files)
214 |     sampled_nsprs_files = random.sample(all_nsprs_files, n)
215 |     arrival_times = random.sample(range(min_arrival_time, min_arrival_time + max_duration), n)
216 |     nspr_dict = {}
217 |     for i, arr_time in enumerate(arrival_times):
218 |         nspr = read_single_nspr(os.path.join(nsprs_path, sampled_nsprs_files[i]))
219 |         nspr.graph['ArrivalTime'] = arr_time
220 |         nspr.graph['duration'] = random.randint(len(nspr.nodes), max_duration)
221 |         nspr_dict[arr_time] = nspr_dict.get(arr_time, []) + [nspr]
222 |     return nspr_dict
223 | 


--------------------------------------------------------------------------------
/src/spaces/__init__.py:
--------------------------------------------------------------------------------
1 | from .discrete_with_negatives import DiscreteWithNegatives
2 | 
3 | __all__ = [
4 |     "DiscreteWithNegatives",
5 | ]
6 | 


--------------------------------------------------------------------------------
/src/spaces/discrete_with_negatives.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Implementation of a space consisting of finitely many elements.
  3 | 
  4 | DISCLAIMER:
  5 | This file is taken and slightly modified from the Discrete space of OpenAI gym release 0.25.1.
  6 | 
  7 | stable-baselines3-1.5.0 requires gym==0.21, since they introduced breaking changes in 0.22.
  8 | In this project, it is required to have a discrete space with the 'start' attribute, which
  9 | was introduced only in later versions of gym, therefore a custom space
 10 | (similar to later versions of the Discrete space in gym) is needed.
 11 | """
 12 | 
 13 | from typing import Optional, Union
 14 | 
 15 | import numpy as np
 16 | 
 17 | from gym.spaces.space import Space
 18 | from gym.utils import seeding
 19 | 
 20 | 
 21 | class DiscreteWithNegatives(Space):
 22 |     r"""A space consisting of finitely many elements.
 23 | 
 24 |     This class represents a finite subset of integers, more specifically a set of the form :math:`\{ a, a+1, \dots, a+n-1 \}`.
 25 | 
 26 |     Example::
 27 | 
 28 |         >>> DiscreteWithNegatives(2)            # {0, 1}
 29 |         >>> DiscreteWithNegatives(3, start=-1)  # {-1, 0, 1}
 30 |     """
 31 | 
 32 |     def __init__(
 33 |         self,
 34 |         n: int,
 35 |         seed: Optional[int] = None,
 36 |         start: int = 0,
 37 |     ):
 38 |         r"""Constructor of :class:`Discrete` space.
 39 | 
 40 |         This will construct the space :math:`\{\text{start}, ..., \text{start} + n - 1\}`.
 41 | 
 42 |         Args:
 43 |             n (int): The number of elements of this space.
 44 |             seed: Optionally, you can use this argument to seed the RNG that is used to sample from the ``Dict`` space.
 45 |             start (int): The smallest element of this space.
 46 |         """
 47 |         assert isinstance(n, (int, np.integer))
 48 |         assert n > 0, "n (counts) have to be positive"
 49 |         assert isinstance(start, (int, np.integer))
 50 |         self.n = int(n)
 51 |         self.start = int(start)
 52 |         super().__init__((), np.int64, seed)
 53 | 
 54 |     def sample(self, mask: Optional[np.ndarray] = None) -> int:
 55 |         """Generates a single random sample from this space.
 56 | 
 57 |         A sample will be chosen uniformly at random with the mask if provided
 58 | 
 59 |         Args:
 60 |             mask: An optional mask for if an action can be selected.
 61 |                 Expected `np.ndarray` of shape `(n,)` and dtype `np.int8` where `1` represents valid actions and `0` invalid / infeasible actions.
 62 |                 If there are no possible actions (i.e. `np.all(mask == 0)`) then `space.start` will be returned.
 63 | 
 64 |         Returns:
 65 |             A sampled integer from the space
 66 |         """
 67 |         if mask is not None:
 68 |             assert isinstance(
 69 |                 mask, np.ndarray
 70 |             ), f"The expected type of the mask is np.ndarray, actual type: {type(mask)}"
 71 |             assert (
 72 |                 mask.dtype == np.int8
 73 |             ), f"The expected dtype of the mask is np.int8, actual dtype: {mask.dtype}"
 74 |             assert mask.shape == (
 75 |                 self.n,
 76 |             ), f"The expected shape of the mask is {(self.n,)}, actual shape: {mask.shape}"
 77 |             valid_action_mask = mask == 1
 78 |             assert np.all(
 79 |                 np.logical_or(mask == 0, valid_action_mask)
 80 |             ), f"All values of a mask should be 0 or 1, actual values: {mask}"
 81 |             if np.any(valid_action_mask):
 82 |                 return int(
 83 |                     self.start + self.np_random.choice(np.where(valid_action_mask)[0])
 84 |                 )
 85 |             else:
 86 |                 return self.start
 87 | 
 88 |         return int(self.start + self.np_random.randint(self.n))
 89 | 
 90 |     def contains(self, x) -> bool:
 91 |         """Return boolean specifying if x is a valid member of this space."""
 92 |         if isinstance(x, int):
 93 |             as_int = x
 94 |         elif isinstance(x, (np.generic, np.ndarray)) and (
 95 |             x.dtype.char in np.typecodes["AllInteger"] and x.shape == ()
 96 |         ):
 97 |             as_int = int(x)  # type: ignore
 98 |         else:
 99 |             return False
100 |         return self.start <= as_int < self.start + self.n
101 | 
102 |     def __repr__(self) -> str:
103 |         """Gives a string representation of this space."""
104 |         if self.start != 0:
105 |             return "DiscreteWithNegatives(%d, start=%d)" % (self.n, self.start)
106 |         return "DiscreteWithNegatives(%d)" % self.n
107 | 
108 |     def __eq__(self, other) -> bool:
109 |         """Check whether ``other`` is equivalent to this instance."""
110 |         return (
111 |             isinstance(other, DiscreteWithNegatives)
112 |             and self.n == other.n
113 |             and self.start == other.start
114 |         )
115 | 
116 |     def __setstate__(self, state):
117 |         """Used when loading a pickled space.
118 | 
119 |         This method has to be implemented explicitly to allow for loading of legacy states.
120 | 
121 |         Args:
122 |             state: The new state
123 |         """
124 |         super().__setstate__(state)
125 | 
126 |         # Don't mutate the original state
127 |         state = dict(state)
128 | 
129 |         # Allow for loading of legacy states.
130 |         # See https://github.com/openai/gym/pull/2470
131 |         if "start" not in state:
132 |             state["start"] = 0
133 | 
134 |         # Update our state
135 |         self.__dict__.update(state)
136 | 


--------------------------------------------------------------------------------
/src/trainer.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | from typing import List, Optional, Type
  3 | 
  4 | import gym
  5 | import wandb
  6 | from stable_baselines3 import A2C
  7 | from stable_baselines3.common.callbacks import BaseCallback, EvalCallback
  8 | from stable_baselines3.common.env_util import make_vec_env
  9 | from torch import nn
 10 | from wandb.integration.sb3 import WandbCallback
 11 | 
 12 | import reader
 13 | from callbacks.acceptance_ratio_callbacks import AcceptanceRatioByNSPRsCallback
 14 | from callbacks.hparam_callback import HParamCallback
 15 | from callbacks.psn_load_callback import PSNLoadCallback
 16 | from callbacks.seen_nsprs_callback import SeenNSPRsCallback
 17 | from policies.features_extractors.hadrl_features_extractor import \
 18 |     GCNsFeaturesExtractor
 19 | from utils import make_env
 20 | 
 21 | 
 22 | class Trainer:
 23 |     def __init__(
 24 |         self,
 25 |         psn_path: str,
 26 |         n_tr_envs: int,
 27 |         load_perc: float,
 28 |         time_limit: bool,
 29 |         max_ep_steps: int,
 30 |         tensorboard_log: str,
 31 |         create_eval_env: bool = False,
 32 |         reset_load_class: Optional[gym.Wrapper] = None,
 33 |         reset_load_kwargs: dict = dict(cpu_load=0.8),
 34 |         # reset_load_kwargs: dict = dict(rand_load=True, rand_range=(0., 1.)),
 35 |         placement_state: bool = True,
 36 |         accumulate_rew: bool = True,
 37 |         discount_acc_rew: bool = True,
 38 |         dynamic_connectivity: int = False,
 39 |         dynamic_connectivity_kwargs: dict = dict(link_bw=10_000),
 40 |         generate_nsprs: bool = True,
 41 |         nsprs_per_ep: int = 1,
 42 |         vnfs_per_nspr: int = 5,
 43 |         always_one: bool = True,
 44 |         seed: Optional[int] = None,
 45 |         net_arch: dict = dict(pi=[256, 128], vf=[256, 128, 32]),
 46 |         activation_fn: Type[nn.Module] = nn.Tanh,
 47 |         gcn_layers_dims: tuple = (20, 20, 20),
 48 |         device: str = 'cuda:0',
 49 |         lr: float = 0.0002,
 50 |         n_steps: int = 1,
 51 |         gamma: float = 0.99,
 52 |         ent_coef: float = 0.01,
 53 |         gae_lambda: float = 0.92,
 54 |         # eval_load: Optional[float] = None,
 55 |     ):
 56 |         # checks on argumetns
 57 |         assert n_tr_envs > 0
 58 |         assert 0. <= load_perc < 1., "Training load must be a percentage between 0 and 1"
 59 | 
 60 |         # save some attributes
 61 |         self.nsprs_per_ep = nsprs_per_ep
 62 |         self.max_ep_steps = max_ep_steps
 63 |         self.time_limit = time_limit
 64 |         self.placement_state = placement_state
 65 | 
 66 |         # read PSN file
 67 |         psn = reader.read_psn(psn_path)
 68 | 
 69 |         # create trainin environment
 70 |         self.tr_env = make_vec_env(
 71 |             env_id=make_env,
 72 |             n_envs=n_tr_envs,
 73 |             env_kwargs=dict(
 74 |                 psn_path=psn_path,
 75 |                 base_env_kwargs=dict(
 76 |                     accumulate_reward=accumulate_rew,
 77 |                     discount_acc_rew=discount_acc_rew,
 78 |                 ),
 79 |                 time_limit=time_limit,
 80 |                 time_limit_kwargs=dict(max_episode_steps=max_ep_steps),
 81 |                 generate_nsprs=generate_nsprs,
 82 |                 nsprs_gen_kwargs=dict(
 83 |                     nsprs_per_ep=nsprs_per_ep,
 84 |                     vnfs_per_nspr=vnfs_per_nspr,
 85 |                     load=load_perc,
 86 |                     always_one=always_one
 87 |                 ),
 88 |                 reset_load_class=reset_load_class,
 89 |                 reset_load_kwargs=reset_load_kwargs,
 90 |                 placement_state=placement_state,
 91 |                 dynamic_connectivity=dynamic_connectivity,
 92 |                 dynamic_connectivity_kwargs=dynamic_connectivity_kwargs
 93 |             ),
 94 |             seed=seed,
 95 |         )
 96 | 
 97 |         # create evaluation environment
 98 |         if create_eval_env:
 99 |             self.eval_env = copy.deepcopy(self.tr_env)
100 | 
101 |         # create the model
102 |         self.model = A2C(policy='MultiInputPolicy', env=self.tr_env, verbose=2, device=device,
103 |                     learning_rate=lr,
104 |                     n_steps=n_steps,
105 |                     gamma=gamma,
106 |                     ent_coef=ent_coef,
107 |                     gae_lambda=gae_lambda,
108 |                     seed=seed,
109 |                     use_rms_prop=True,
110 |                     tensorboard_log=tensorboard_log,
111 |                     policy_kwargs=dict(
112 |                         activation_fn=activation_fn,
113 |                         net_arch=net_arch,
114 |                         features_extractor_class=GCNsFeaturesExtractor,
115 |                         share_features_extractor=False,
116 |                         features_extractor_kwargs=dict(
117 |                             psn=psn,
118 |                             activation_fn=nn.ReLU,
119 |                             gcn_layers_dims=gcn_layers_dims,
120 |                         )
121 |                     ))
122 |         print(self.model.policy)
123 | 
124 |         # wandb config
125 |         if reset_load_kwargs.get('rand_load', False):
126 |             load_range = reset_load_kwargs.get('rand_range', (0., 1.))
127 |             self.tr_load = 'random ' + str(load_range)
128 |         else:
129 |             self.tr_load = reset_load_kwargs.get('cpu_load', 0.8)
130 |         # eval_load = eval_load if eval_load is not None else self.tr_load
131 |         self.wandb_config = {
132 |             "n tr envs": n_tr_envs,
133 |             "NSPRs per training ep": nsprs_per_ep,
134 |             "max steps per tr ep": max_ep_steps if time_limit else None,
135 |             "PSN load (tr)": self.tr_load,
136 |             # "PSN load (eval)": eval_load,
137 |             "GCNs layers dims": gcn_layers_dims,
138 |             "mpl_extractor arch": net_arch,
139 |             "use placement state": placement_state,
140 |             "accumulate reward": accumulate_rew,
141 |             "discount acceptance reward": discount_acc_rew,
142 |             "dynamic connectivity": dynamic_connectivity,
143 |             "dynamic load range": "0-0.9",
144 |         }
145 | 
146 |     def train(
147 |             self,
148 |             tot_steps: int,
149 |             log_interval: int = 10,
150 |             wandb: bool = False,
151 |             callbacks: List[BaseCallback] = [],
152 |     ):
153 |         # wandb things
154 |         self.wandb_config["total training steps"] = tot_steps
155 |         if wandb:
156 |             # init wandb run
157 |             wandb_run = wandb.init(
158 |                 project="Same or different activations",
159 |                 dir="../",
160 |                 name="SAME (ReLU) (non-shared f.e.) (wax50, load 0.8, small GCNs)",
161 |                 config=self.wandb_config,
162 |                 sync_tensorboard=True,  # auto-upload sb3's tensorboard metrics
163 |                 save_code=True,  # optional
164 |             )
165 |             # add wandb callback
166 |             callbacks.append(
167 |                 WandbCallback(
168 |                     model_save_path=f"../models/{wandb_run.id}",
169 |                     verbose=2,
170 |                     model_save_freq=10_000
171 |                 )
172 |             )
173 |         
174 |         # add callback for hyperparameters logging
175 |         callbacks.append(
176 |             HParamCallback(
177 |                 self.tr_env.num_envs,
178 |                 self.eval_env.num_envs,
179 |                 self.nsprs_per_ep,
180 |                 self.tr_load,
181 |                 tr_max_ep_steps=self.max_ep_steps if self.time_limit else None,
182 |                 use_placement_state=self.placement_state,
183 |             ),
184 |         )
185 |         
186 |         # model training
187 |         self.model.learn(
188 |             total_timesteps=tot_steps,
189 |             log_interval=log_interval,
190 |             callback=callbacks
191 |         )
192 | 
193 |         if wandb:
194 |             wandb_run.finish()
195 | 


--------------------------------------------------------------------------------
/src/utils.py:
--------------------------------------------------------------------------------
  1 | from typing import Tuple, Union, List, Optional, Type
  2 | 
  3 | import gym
  4 | import networkx as nx
  5 | import numpy as np
  6 | 
  7 | from gym.utils.env_checker import check_env
  8 | from network_simulator import NetworkSimulator
  9 | from wrappers import NSPRsGeneratorHADRL, RemovePlacementState, DynamicConnectivity
 10 | from sb3_contrib.common.wrappers import ActionMasker
 11 | 
 12 | 
 13 | def make_env(
 14 |         psn_path: str,
 15 |         base_env_kwargs: Optional[dict] = None,
 16 |         time_limit: bool = False,
 17 |         time_limit_kwargs: Optional[dict] = None,
 18 |         reset_load_class: Type[gym.Wrapper] = None,
 19 |         reset_load_kwargs: Optional[dict] = None,
 20 |         generate_nsprs: bool = False,
 21 |         nsprs_gen_kwargs: Optional[dict] = None,
 22 |         placement_state: bool = True,
 23 |         dynamic_connectivity: bool = False,
 24 |         dynamic_connectivity_kwargs: Optional[dict] = dict(link_bw=10_000),
 25 |         dynamic_topology: bool = False,
 26 | ):
 27 |     """ Create the environment.
 28 |     It can be wrapped with different wrappers, all with their own arguments.
 29 |     They wrappers are namely: TimeLimit, ResetWithRandLoad, NSPRsGeneratorHADRL.
 30 | 
 31 |     :param psn_path: path to the PSN file
 32 |     :param base_env_kwargs: kwargs of the base environment
 33 |     :param time_limit: if True, the env is wrapped with TimeLimit wrapper
 34 |     :param time_limit_kwargs: kwargs of the TimeLimit wrapper
 35 |     :param reset_load_class: class of the wrapper to reset the PSN with load
 36 |     :param reset_load_kwargs: kwargs for the reset-with-load wrapper
 37 |     :param hadrl_nsprs: if True, the env is wrapped with NSPRsGeneratorHADRL wrapper
 38 |     :param hadrl_nsprs_kwargs: kwargs for the NSPRsGeneratorHADRL wrapper
 39 |     :param placement_state: if False, adds a wrapper that removes the placement state from the observations
 40 |     :param dynamic_connectivity: if True, the connectivity of the PSN changes in every episode
 41 |     :param dynamic_connectivity_kwargs: kwargs for the DynamicConnectivity wrapper
 42 |     :param dynamic_topology: if True, the topology of the PSN changes in every episode.
 43 |         Note: it True, 'dynamic_connectivity' will be forced to True as well,
 44 |         as there's no way to change the nodes and not the connectivity.
 45 |     """
 46 |     base_env_kwargs = {} if base_env_kwargs is None else base_env_kwargs
 47 |     time_limit_kwargs = {} if time_limit_kwargs is None else time_limit_kwargs
 48 |     reset_load_kwargs = {} if reset_load_kwargs is None else reset_load_kwargs
 49 |     dynamic_connectivity_kwargs = {} if dynamic_connectivity_kwargs is None else dynamic_connectivity_kwargs
 50 | 
 51 |     # base env
 52 |     env = NetworkSimulator(psn_path, **base_env_kwargs)
 53 | 
 54 |     # apply wrappers
 55 |     if time_limit:
 56 |         env = gym.wrappers.TimeLimit(env, **time_limit_kwargs)
 57 |     if generate_nsprs:
 58 |         env = NSPRsGeneratorHADRL(env, **nsprs_gen_kwargs)
 59 |     if dynamic_topology:
 60 |         env = ActionMasker(env, action_mask_fn=env.get_action_mask)
 61 |         env = DynamicConnectivity(env, nodes_mask=env.get_action_mask, **dynamic_connectivity_kwargs)
 62 |         dynamic_connectivity = False
 63 |     if dynamic_connectivity:
 64 |         env = DynamicConnectivity(env, **dynamic_connectivity_kwargs)
 65 |     if reset_load_class is not None:
 66 |         env = reset_load_class(env, **reset_load_kwargs)
 67 |     if not placement_state:
 68 |         env = RemovePlacementState(env)
 69 |     # check_env(env)  # could make the code crash with masked actions
 70 |     return env
 71 | 
 72 | 
 73 | def create_HADRL_PSN_file(
 74 |         path: str,
 75 |         n_CCPs: int = 1,
 76 |         n_CDCs: int = 5,
 77 |         n_EDCs: int = 15,
 78 |         n_servers_per_DC: Tuple[int, int, int] = (16, 10, 4),
 79 |         cpu_cap: int = 50,
 80 |         ram_cap: int = 300,
 81 |         intra_CCP_bw_cap: int = 100000,  # 100000 Mbps = 100 Gbps
 82 |         intra_CDC_bw_cap: int = 100000,  # 100000 Mbps = 100 Gbps
 83 |         intra_EDC_bw_cap: int = 10000,  # 10000 Mbps = 10 Gbps
 84 |         outer_DC_bw_cap: int = 100000,  # 100000 Mbps = 100 Gbps
 85 |         n_EDCs_per_CDC: int = 3,
 86 | ):
 87 |     """ Initialize the PSN as in the HA-DRL paper
 88 | 
 89 |     :param path: path where to save the file defining the PSN
 90 |     :param n_CCPs: number of CCPs
 91 |     :param n_CDCs: number of CDCs
 92 |     :param n_EDCs: number of EDCs
 93 |     :param n_servers_per_DC: tuple with the number of servers per (CCP, CDC, EDC)
 94 |     :param cpu_cap: CPU capacity per server
 95 |     :param ram_cap: RAM capacity per server
 96 |     :param intra_CCP_bw_cap: bandwidth of links within a CCP
 97 |     :param intra_CDC_bw_cap: bandwidth of links within a CDC
 98 |     :param intra_EDC_bw_cap: bandwidth of links within a EDC
 99 |     :param outer_DC_bw_cap: bandwidth of links between DCs
100 |     :param n_EDCs_per_CDC: number of EDCs connected to each CDC
101 |     """
102 |     # number of servers per DC category
103 |     n_servers_per_CCP, n_servers_per_CDC, n_servers_per_EDC = n_servers_per_DC
104 |     n_ids_CCPs = n_CCPs * n_servers_per_CCP
105 |     n_ids_CDCs = n_CDCs * n_servers_per_CDC
106 |     n_ids_EDCs = n_EDCs * n_servers_per_EDC
107 | 
108 |     # ids of servers in various DCs
109 |     CCP_ids = np.arange(n_ids_CCPs).reshape(n_CCPs, n_servers_per_CCP)
110 |     CDC_ids = np.arange(
111 |         n_ids_CCPs,
112 |         n_ids_CCPs + n_ids_CDCs).reshape(n_CDCs, n_servers_per_CDC)
113 |     EDC_ids = np.arange(
114 |         CDC_ids[-1, -1] + 1,
115 |         CDC_ids[-1, -1] + 1 + n_ids_EDCs).reshape(n_EDCs, n_servers_per_EDC)
116 | 
117 |     # one switch per DC (based on Fig. 1 in HA-DRL paper)
118 |     n_switches = n_CCPs + n_CDCs + n_EDCs
119 |     switches_ids = list(range(EDC_ids[-1, -1] + 1,
120 |                               EDC_ids[-1, -1] + 1 + n_switches))
121 | 
122 |     # one router per DC (based on Fig. 1 in HA-DRL paper)
123 |     n_routers = n_CCPs + n_CDCs + n_EDCs
124 |     routers_ids = list(range(switches_ids[-1] + 1, switches_ids[-1] + 1 + n_routers))
125 | 
126 |     # create graph
127 |     g = nx.Graph(Label="HA-DRL PSN")
128 | 
129 |     # add nodes
130 |     _create_nodes(g, CCP_ids, CDC_ids, EDC_ids, switches_ids, routers_ids,
131 |                   cpu_cap, ram_cap)
132 | 
133 |     # add links
134 |     _create_HADRL_links(
135 |         g, n_CCPs, n_CDCs, n_EDCs, n_servers_per_CCP, n_servers_per_CDC,
136 |         n_servers_per_EDC, CCP_ids, CDC_ids, EDC_ids, switches_ids, routers_ids,
137 |         intra_CCP_bw_cap, intra_CDC_bw_cap, intra_EDC_bw_cap, outer_DC_bw_cap,
138 |         n_EDCs_per_CDC)
139 | 
140 |     # save graph
141 |     nx.write_graphml(g, path)
142 | 
143 | 
144 | def create_HEENSO_PSN_file(
145 |         path: str,
146 |         n_CCPs: int = 1,
147 |         n_CDCs: int = 5,
148 |         n_EDCs: int = 15,
149 |         n_servers_per_DC: Tuple[int, int, int] = (16, 10, 4),
150 |         cpu_cap: int = 50,
151 |         ram_cap: int = 300,
152 |         intra_CCP_bw_cap: int = 100000,  # 100000 Mbps = 100 Gbps
153 |         intra_CDC_bw_cap: int = 100000,  # 100000 Mbps = 100 Gbps
154 |         intra_EDC_bw_cap: int = 10000,  # 10000 Mbps = 10 Gbps
155 |         outer_DC_bw_cap: int = 100000,  # 100000 Mbps = 100 Gbps
156 |         n_EDCs_per_CDC: int = 3,
157 | ):
158 |     """ Initialize the PSN as in the paper "Heuristic for Edge-enable Network Slice Optimization
159 |     using the Power of Two Choices"
160 | 
161 |     Disclaimer: the topology is slightly different, the ring of nodes in Fig. 4
162 |     of the paper is brought one step closer to the CCP and nodes 26 to 30 are
163 |     removed, since they don't increase the number of possible paths across the PSN
164 |     (they would only make some paths 1 step longer, reducing the reward).
165 | 
166 |     :param path: path where to save the file defining the PSN
167 |     :param n_CCPs: number of CCPs
168 |     :param n_CDCs: number of CDCs
169 |     :param n_EDCs: number of EDCs
170 |     :param n_servers_per_DC: tuple with the number of servers per (CCP, CDC, EDC)
171 |     :param cpu_cap: CPU capacity per server
172 |     :param ram_cap: RAM capacity per server
173 |     :param intra_CCP_bw_cap: bandwidth of links within a CCP
174 |     :param intra_CDC_bw_cap: bandwidth of links within a CDC
175 |     :param intra_EDC_bw_cap: bandwidth of links within a EDC
176 |     :param outer_DC_bw_cap: bandwidth of links between DCs
177 |     :param n_EDCs_per_CDC: number of EDCs connected to each CDC
178 |     """
179 |     # number of servers per DC category
180 |     n_servers_per_CCP, n_servers_per_CDC, n_servers_per_EDC = n_servers_per_DC
181 |     n_ids_CCPs = n_CCPs * n_servers_per_CCP
182 |     n_ids_CDCs = n_CDCs * n_servers_per_CDC
183 |     n_ids_EDCs = n_EDCs * n_servers_per_EDC
184 | 
185 |     # ids of servers in various DCs
186 |     CCP_ids = np.arange(n_ids_CCPs).reshape(n_CCPs, n_servers_per_CCP)
187 |     CDC_ids = np.arange(
188 |         n_ids_CCPs,
189 |         n_ids_CCPs + n_ids_CDCs).reshape(n_CDCs, n_servers_per_CDC)
190 |     EDC_ids = np.arange(
191 |         CDC_ids[-1, -1] + 1,
192 |         CDC_ids[-1, -1] + 1 + n_ids_EDCs).reshape(n_EDCs, n_servers_per_EDC)
193 | 
194 |     # one switch per DC (based on Fig. 4 in HEENSO paper)
195 |     n_switches = n_CCPs + n_CDCs + n_EDCs
196 |     switches_ids = list(range(EDC_ids[-1, -1] + 1,
197 |                               EDC_ids[-1, -1] + 1 + n_switches))
198 | 
199 |     # one router per DC (based on Fig. 4 in HEENSO paper)
200 |     n_routers = n_CDCs + n_EDCs
201 |     routers_ids = list(
202 |         range(switches_ids[-1] + 1, switches_ids[-1] + 1 + n_routers))
203 | 
204 |     # create graph
205 |     g = nx.Graph(Label="HEENSO PSN")
206 | 
207 |     # add nodes
208 |     _create_nodes(g, CCP_ids, CDC_ids, EDC_ids, switches_ids, routers_ids,
209 |                   cpu_cap, ram_cap)
210 | 
211 |     # add links
212 |     _create_HEENSO_links(
213 |         g, n_CCPs, n_CDCs, n_EDCs, n_servers_per_CCP, n_servers_per_CDC,
214 |         n_servers_per_EDC, CCP_ids, CDC_ids, EDC_ids, switches_ids, routers_ids,
215 |         intra_CCP_bw_cap, intra_CDC_bw_cap, intra_EDC_bw_cap, outer_DC_bw_cap,
216 |         n_EDCs_per_CDC)
217 | 
218 |     # save graph
219 |     nx.write_graphml(g, path)
220 | 
221 | 
222 | def _create_nodes(
223 |         g: nx.Graph,
224 |         CCP_ids: Union[np.ndarray, List[int]],
225 |         CDC_ids: Union[np.ndarray, List[int]],
226 |         EDC_ids: Union[np.ndarray, List[int]],
227 |         switches_ids: Union[np.ndarray, List[int]],
228 |         routers_ids: Union[np.ndarray, List[int]],
229 |         cpu_cap: int,
230 |         ram_cap: int,
231 | ):
232 |     all_server_ids = np.concatenate((CCP_ids.flatten(),
233 |                                      CDC_ids.flatten(),
234 |                                      EDC_ids.flatten()))
235 |     for server_id in all_server_ids:
236 |         g.add_node(server_id, NodeType="server", CPUcap=cpu_cap, RAMcap=ram_cap)
237 |     for switch_id in switches_ids:
238 |         g.add_node(switch_id, NodeType="switch")
239 |     for router_id in routers_ids:
240 |         g.add_node(router_id, NodeType="router")
241 | 
242 | 
243 | def _create_HADRL_links(
244 |         g: nx.Graph,
245 |         n_CCPs: int,
246 |         n_CDCs: int,
247 |         n_EDCs: int,
248 |         n_servers_per_CCP: int,
249 |         n_servers_per_CDC: int,
250 |         n_servers_per_EDC: int,
251 |         CCP_ids: Union[np.ndarray, List[int]],
252 |         CDC_ids: Union[np.ndarray, List[int]],
253 |         EDC_ids: Union[np.ndarray, List[int]],
254 |         switches_ids: Union[np.ndarray, List[int]],
255 |         routers_ids: Union[np.ndarray, List[int]],
256 |         intra_CCP_bw_cap: int,
257 |         intra_CDC_bw_cap: int,
258 |         intra_EDC_bw_cap: int,
259 |         outer_DC_bw_cap: int,
260 |         n_EDCs_per_CDC: int
261 | ):
262 |     connect_CDCs_EDCs_randomly = False if n_EDCs / n_CDCs == n_EDCs_per_CDC else True
263 |     CCPs_switches = switches_ids[:n_CCPs]
264 |     CDCs_switches = switches_ids[n_CCPs:n_CCPs + n_CDCs]
265 |     EDCs_switches = switches_ids[n_CCPs + n_CDCs:]
266 |     CCPs_routers = routers_ids[:n_CCPs]
267 |     CDCs_routers = routers_ids[n_CCPs:n_CCPs + n_CDCs]
268 |     EDCs_routers = routers_ids[n_CCPs + n_CDCs:]
269 | 
270 |     # connect CCPs' servers to their switches
271 |     for i in range(n_CCPs):
272 |         for j in range(n_servers_per_CCP):
273 |             g.add_edge(CCP_ids[i, j], CCPs_switches[i], BWcap=intra_CCP_bw_cap)
274 | 
275 |     # connect CDCs' servers to their switches
276 |     for i in range(n_CDCs):
277 |         for j in range(n_servers_per_CDC):
278 |             g.add_edge(CDC_ids[i, j], CDCs_switches[i], BWcap=intra_CDC_bw_cap)
279 | 
280 |     # connect EDCs' servers to their switches
281 |     for i in range(n_EDCs):
282 |         for j in range(n_servers_per_EDC):
283 |             g.add_edge(EDC_ids[i, j], EDCs_switches[i], BWcap=intra_EDC_bw_cap)
284 | 
285 |     # connect CCPs' switches to their routers
286 |     for i in range(len(CCPs_switches)):
287 |         g.add_edge(CCPs_switches[i], CCPs_routers[i], BWcap=intra_CCP_bw_cap)
288 | 
289 |     # connect CDCs' switches to their routers
290 |     for i in range(len(CDCs_switches)):
291 |         g.add_edge(CDCs_switches[i], CDCs_routers[i], BWcap=intra_CDC_bw_cap)
292 | 
293 |     # connect EDCs' switches to their routers
294 |     for i in range(len(EDCs_switches)):
295 |         g.add_edge(EDCs_switches[i], EDCs_routers[i], BWcap=intra_EDC_bw_cap)
296 | 
297 |     # connect CDCs' routers to CCPs' routers
298 |     for i in range(n_CDCs):
299 |         # each CDC is connected to one CCP
300 |         corresp_CCP = np.random.randint(0, n_CCPs)
301 |         g.add_edge(CDCs_routers[i], CCPs_routers[corresp_CCP], BWcap=outer_DC_bw_cap)
302 | 
303 |     # connect each CDCs' router to n EDCs' routers
304 |     for i in range(n_CDCs):
305 |         if connect_CDCs_EDCs_randomly:
306 |             corresp_EDCs = np.random.choice(n_EDCs, n_EDCs_per_CDC, replace=False)
307 |         else:
308 |             corresp_EDCs = list(range(n_EDCs_per_CDC * i, n_EDCs * i + n_EDCs_per_CDC))
309 |         for j in range(n_EDCs_per_CDC):
310 |             g.add_edge(CDCs_routers[i], EDCs_routers[corresp_EDCs[j]],
311 |                        BWcap=outer_DC_bw_cap)
312 | 
313 |     # connect CDCs and EDCs' routers in a circular way (like in Fig. 1 in HA-DRL paper)
314 |     CDCs_and_EDCs_routers = np.concatenate((CDCs_routers, EDCs_routers))
315 |     for i in range(len(CDCs_and_EDCs_routers)):
316 |         g.add_edge(CDCs_and_EDCs_routers[i],
317 |                    CDCs_and_EDCs_routers[(i + 1) % len(CDCs_and_EDCs_routers)],
318 |                    BWcap=outer_DC_bw_cap)
319 | 
320 | 
321 | def _create_HEENSO_links(
322 |         g: nx.Graph,
323 |         n_CCPs: int,
324 |         n_CDCs: int,
325 |         n_EDCs: int,
326 |         n_servers_per_CCP: int,
327 |         n_servers_per_CDC: int,
328 |         n_servers_per_EDC: int,
329 |         CCP_ids: Union[np.ndarray, List[int]],
330 |         CDC_ids: Union[np.ndarray, List[int]],
331 |         EDC_ids: Union[np.ndarray, List[int]],
332 |         switches_ids: Union[np.ndarray, List[int]],
333 |         routers_ids: Union[np.ndarray, List[int]],
334 |         intra_CCP_bw_cap: int,
335 |         intra_CDC_bw_cap: int,
336 |         intra_EDC_bw_cap: int,
337 |         outer_DC_bw_cap: int,
338 |         n_EDCs_per_CDC: int
339 | ):
340 |     connect_CDCs_EDCs_randomly = False if n_EDCs / n_CDCs == n_EDCs_per_CDC else True
341 |     CCPs_switches = switches_ids[:n_CCPs]
342 |     CDCs_switches = switches_ids[n_CCPs:n_CCPs + n_CDCs]
343 |     EDCs_switches = switches_ids[n_CCPs + n_CDCs:]
344 |     CDCs_routers = routers_ids[:n_CDCs]
345 |     EDCs_routers = routers_ids[n_CDCs:]
346 | 
347 |     # connect CCPs' servers to their switches
348 |     for i in range(n_CCPs):
349 |         for j in range(n_servers_per_CCP):
350 |             g.add_edge(CCP_ids[i, j], CCPs_switches[i], BWcap=intra_CCP_bw_cap)
351 | 
352 |     # connect CDCs' servers to their switches
353 |     for i in range(n_CDCs):
354 |         for j in range(n_servers_per_CDC):
355 |             g.add_edge(CDC_ids[i, j], CDCs_switches[i], BWcap=intra_CDC_bw_cap)
356 | 
357 |     # connect EDCs' servers to their switches
358 |     for i in range(n_EDCs):
359 |         for j in range(n_servers_per_EDC):
360 |             g.add_edge(EDC_ids[i, j], EDCs_switches[i], BWcap=intra_EDC_bw_cap)
361 | 
362 |     # connect CDCs' switches to their routers
363 |     for i in range(len(CDCs_switches)):
364 |         g.add_edge(CDCs_switches[i], CDCs_routers[i], BWcap=intra_CDC_bw_cap)
365 | 
366 |     # connect EDCs' switches to their routers
367 |     for i in range(len(EDCs_switches)):
368 |         g.add_edge(EDCs_switches[i], EDCs_routers[i], BWcap=intra_EDC_bw_cap)
369 | 
370 |     # connect CDCs' routers to CCPs' switches
371 |     for i in range(n_CDCs):
372 |         # each CDC is connected to one CCP
373 |         corresp_CCP = np.random.randint(0, n_CCPs)
374 |         g.add_edge(CDCs_routers[i], CCPs_switches[corresp_CCP], BWcap=outer_DC_bw_cap)
375 | 
376 |     # connect each CDCs' switch to n EDCs' routers
377 |     for i in range(n_CDCs):
378 |         if connect_CDCs_EDCs_randomly:
379 |             corresp_EDCs = np.random.choice(n_EDCs, n_EDCs_per_CDC, replace=False)
380 |         else:
381 |             corresp_EDCs = list(range(n_EDCs_per_CDC * i, n_EDCs_per_CDC * i + n_EDCs_per_CDC))
382 |         for j in range(n_EDCs_per_CDC):
383 |             g.add_edge(CDCs_switches[i], EDCs_routers[corresp_EDCs[j]],
384 |                        BWcap=outer_DC_bw_cap)
385 | 
386 |     # connect CDCs routers in a circular way (like in Fig. 4 in HEENSO paper)
387 |     for i in range(len(CDCs_routers)):
388 |         g.add_edge(CDCs_routers[i],
389 |                    CDCs_routers[(i + 1) % len(CDCs_routers)],
390 |                    BWcap=outer_DC_bw_cap)
391 | 


--------------------------------------------------------------------------------
/src/wrappers/__init__.py:
--------------------------------------------------------------------------------
1 | from .reset_with_load import ResetWithFixedLoad, ResetWithRandLoad, ResetWithLoadMixed, ResetWithRealisticLoad
2 | from .hadrl_nsprs_generator import NSPRsGeneratorHADRL
3 | from .no_placement_state import RemovePlacementState
4 | from .dynamic_connectivity import DynamicConnectivity
5 | 


--------------------------------------------------------------------------------
/src/wrappers/dynamic_connectivity.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | from typing import Callable, Optional
  3 | import gym
  4 | import networkx as nx
  5 | import numpy as np
  6 | 
  7 | 
  8 | class DynamicConnectivity(gym.Wrapper):
  9 |     """ Changes the connectivity of the PSN episode by episode """
 10 |     
 11 |     def __init__(
 12 |         self,
 13 |         env: gym.Env,
 14 |         link_bw: int = 10_000,
 15 |         nodes_mask: Optional[Callable[[gym.Env], np.ndarray]] = None
 16 |     ):
 17 |         """
 18 |         :param env: gym environment
 19 |         :param link_bw: total bandwidth capacity of each link
 20 |         :param nodes_mask: in not None, contains nodes to be removed form the PSN graph
 21 |         """
 22 |         super().__init__(env)
 23 |         self.nodes_mask = nodes_mask
 24 |         self.link_bw = link_bw
 25 |         self.tot_bw_cap = sum([edge['BWcap'] for edge in self.env.psn.edges.values()])
 26 |         self.placed_bw = 0
 27 |     
 28 |     def reset(self, **kwargs):
 29 |         self.env.reset(**kwargs)
 30 |         # remove all edges from the PSN
 31 |         self.remove_all_edges()
 32 |         # eventually remove masked nodes
 33 |         if self.nodes_mask is not None:
 34 |             self.remove_masked_nodes()
 35 |         # initialize the bandwidth placed in the PSN
 36 |         self.placed_bw = 0
 37 |         # add edges in the PSN until the target bandwidth capacity is reached
 38 |         self.add_edges()
 39 |         return self.env.obs_dict    # updated in self.add_edges()
 40 |     
 41 |     def remove_all_edges(self):
 42 |         for u, v in self.env.psn.edges.keys():
 43 |             self.env.psn.remove_edge(u, v)
 44 |     
 45 |     def remove_masked_nodes(self):
 46 |         nodes_mask = self.nodes_mask(self.env)
 47 |         # indexes where the mask is False
 48 |         indexes_to_remove = np.where(np.logical_not(nodes_mask))[0]
 49 |         for idx in indexes_to_remove:
 50 |             node_id = self.env.servers_map_idx_id[idx]
 51 |             self.env.psn.remove_node(node_id)
 52 | 
 53 |     def add_edges(self):
 54 |         """Add edges to the PSN
 55 | 
 56 |         Chooses every time a random node an an unvisited node and connectes them.
 57 |         When no nodes are isolated, if the target BW hasn't been reached, it does so
 58 |         by adding further random links in the PSN.
 59 |         """
 60 |         # zero the BW availabilities in the obs dict
 61 |         self.env.obs_dict['bw_avails'] = np.zeros_like(self.env.obs_dict['bw_avails'])
 62 |         # set of unvisited nodes
 63 |         unvisited = set(self.env.psn.nodes)
 64 |         while unvisited:
 65 |             # sample a node form the PSN
 66 |             u = random.choice(list(self.env.psn.nodes))
 67 |             # sample an unvisited nodes to connect to it
 68 |             v = random.choice(list(unvisited))
 69 |             if u != v:
 70 |                 # connect the 2 nodes
 71 |                 self.env.psn.add_edge(u, v, BWcap=self.link_bw, availBW=self.link_bw)
 72 |                 # save the amount of bandwidth introduced in the PSN
 73 |                 self.placed_bw += self.link_bw
 74 |                 # get the 2 nodes' indexes in the obs dict and update the obs dict
 75 |                 u_idx = self.env.map_id_idx[u]
 76 |                 v_idx = self.env.map_id_idx[v]
 77 |                 self.env.obs_dict['bw_avails'][u_idx] += self.link_bw
 78 |                 self.env.obs_dict['bw_avails'][v_idx] += self.link_bw
 79 |                 # remove the nodes from the set of unvisited nodes
 80 |                 unvisited.remove(v)
 81 |                 if u in unvisited:
 82 |                     unvisited.remove(u)
 83 | 
 84 |         # if the total bandwidth of the PSN hasn't been reached, reach it by adding random links
 85 |         perc_avail_nodes = self.env.perc_avail_nodes
 86 |         tot_bw = self.tot_bw_cap * perc_avail_nodes     # cut tot bw proportionally to number of nodes
 87 |         while self.placed_bw < tot_bw:
 88 |             u, v = random.sample(self.env.psn.nodes, 2)
 89 |             # check that the 2 nodes aren't connected already
 90 |             if (u, v) not in self.env.psn.edges:
 91 |                 bw = min(self.link_bw, tot_bw - self.placed_bw)
 92 |                 self.env.psn.add_edge(u, v, BWcap=bw, availBW=bw)
 93 |                 self.placed_bw += bw
 94 |                 # get the 2 nodes' indexes in the obs dict and update the obs dict
 95 |                 u_idx = self.env.map_id_idx[u]
 96 |                 v_idx = self.env.map_id_idx[v]
 97 |                 self.env.obs_dict['bw_avails'][u_idx] += self.link_bw
 98 |                 self.env.obs_dict['bw_avails'][v_idx] += self.link_bw
 99 |         
100 |         # normalize the BW availabilities in the obs dict
101 |         self.env.obs_dict['bw_avails'] /= np.max(self.env.obs_dict['bw_avails'])


--------------------------------------------------------------------------------
/src/wrappers/hadrl_nsprs_generator.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import math
  3 | import warnings
  4 | 
  5 | import gym
  6 | import networkx as nx
  7 | import numpy as np
  8 | 
  9 | 
 10 | class NSPRsGeneratorHADRL(gym.Wrapper):
 11 |     """
 12 |     Wrapper to make the simulator generate data the same way as in the
 13 |     paper HA-DRL[1].
 14 | 
 15 |     [1] https://ieeexplore.ieee.org/document/9632824
 16 |     """
 17 | 
 18 |     def __init__(
 19 |             self,
 20 |             env: gym.Env,
 21 |             nsprs_per_ep: int = 5,
 22 |             vnfs_per_nspr: int = 5,
 23 |             cpu_req_per_vnf: int = 25,
 24 |             ram_req_per_vnf: int = 150,
 25 |             bw_req_per_vl: int = 2000,
 26 |             load: float = 0.5,
 27 |             always_one: bool = False
 28 |     ):
 29 |         super().__init__(env)
 30 |         if self.env.nsprs_per_episode is not None:
 31 |             warnings.warn("The environment already has a fixed number of NSPRs"
 32 |                           "per episode. The wrapper will override this value.")
 33 |         if nsprs_per_ep is None:
 34 |             # no limit, just use max steps (if not None), otherwise infinite episode
 35 |             nsprs_per_ep = math.inf
 36 |         self.unwrapped.nsprs_per_episode = nsprs_per_ep
 37 |         self.nsprs_per_ep = nsprs_per_ep
 38 |         self.vnfs_per_nspr = vnfs_per_nspr
 39 |         self.cpu_req_per_vnf = cpu_req_per_vnf
 40 |         self.ram_req_per_vnf = ram_req_per_vnf
 41 |         self.bw_req_per_vl = bw_req_per_vl
 42 |         self.load = load
 43 |         self.always_one = always_one
 44 |         self.tot_cpu_cap = self._get_tot_cpu_cap()
 45 |         self.nspr_model = self._get_nspr_model()
 46 |         self.max_steps = None
 47 |         try:
 48 |             # if env is wrapped in TimeLimit, max arrival time of NSPRs is max episode length
 49 |             self.max_steps = self.env._max_episode_steps
 50 |             self.nsprs_duration = min(self.max_steps, 100)
 51 |         except AttributeError or TypeError:
 52 |             self.nsprs_duration = 100
 53 |         # computed according to Sec. VII.C of HA-DRL paper
 54 |         self.arr_rate = self.load * self.tot_cpu_cap / self.nsprs_duration / self.cpu_req_per_vnf / self.vnfs_per_nspr
 55 | 
 56 |     def reset(self, **kwargs):
 57 |         self.env.reset(**kwargs)
 58 |         self.unwrapped.nsprs = self._generate_nsprs()
 59 |         self.unwrapped.waiting_nsprs += self.unwrapped.nsprs.get(self.unwrapped.time_step, [])
 60 |         self.unwrapped.pick_next_nspr()
 61 |         obs = self.unwrapped.update_nspr_state()
 62 |         return obs
 63 | 
 64 |     def _get_nspr_model(self):
 65 |         nspr_model = nx.DiGraph()
 66 |         nspr_model.add_node(0, reqCPU=self.cpu_req_per_vnf,
 67 |                             reqRAM=self.ram_req_per_vnf, placed=-1)
 68 |         for i in range(1, self.vnfs_per_nspr):
 69 |             nspr_model.add_edge(i - 1, i, reqBW=self.bw_req_per_vl, placed=[])
 70 |             nspr_model.add_node(i, reqCPU=self.cpu_req_per_vnf,
 71 |                                 reqRAM=self.ram_req_per_vnf, placed=-1)
 72 |         return nspr_model
 73 | 
 74 |     def _generate_nsprs(self):
 75 |         if self.always_one:
 76 |             nsprs_dict = self._generate_one_nspr()
 77 |         elif self.arr_rate >= 0.3:
 78 |             nsprs_dict = self._generate_nsprs_poisson()
 79 |         else:
 80 |             nsprs_dict = self._generate_nsprs_deterministic()
 81 |         return nsprs_dict
 82 | 
 83 |     def _generate_one_nspr(self):
 84 |         nspr = self._get_nspr_model()
 85 |         nspr.graph['ArrivalTime'] = self.env.time_step
 86 |         nspr.graph['duration'] = 100
 87 |         return {self.env.time_step: [nspr]}
 88 | 
 89 |     def _generate_nsprs_poisson(self):
 90 |         cur_arr_time = self.env.time_step
 91 |         created_nsprs = 0
 92 |         nsprs_dict = {}
 93 |         while True:
 94 |             # NOTE: if self.max_steps is None, and the poisson sampling keeps
 95 |             # generating 0, this will loop forever, but since this is executed
 96 |             # only for a sufficiently high arrival rate, this is extremely unlikely to happen
 97 |             poisson_sample = np.random.poisson(lam=self.arr_rate)
 98 |             if poisson_sample > 0:
 99 |                 cur_nspr = copy.deepcopy(self.nspr_model)
100 |                 cur_nspr.graph['ArrivalTime'] = cur_arr_time
101 |                 cur_nspr.graph['duration'] = self.nsprs_duration
102 |                 nsprs_to_create = min(poisson_sample, self.nsprs_per_ep - created_nsprs)
103 |                 if nsprs_to_create <= 0:
104 |                     break
105 |                 nsprs_dict[cur_arr_time] = [copy.deepcopy(cur_nspr) for _ in range(nsprs_to_create)]
106 |                 created_nsprs += nsprs_to_create
107 |             cur_arr_time += 1
108 |             if self.max_steps is not None and cur_arr_time - self.env.time_step > self.max_steps:
109 |                 break
110 |         return nsprs_dict
111 | 
112 |     def _generate_nsprs_deterministic(self):
113 |         if self.arr_rate >= 1:
114 |             raise NotImplementedError
115 |             # this function is called only for low arrival rates
116 |         else:
117 |             one_every_how_many_steps = round(1 / self.arr_rate)
118 |             # decimal_part = round(one_every_how_many_steps - int(one_every_how_many_steps), 2)
119 |             # one_every_how_many_steps = int(one_every_how_many_steps)
120 |             # correction_every_how_many_steps = round(1 / decimal_part)
121 |             nsprs_dict = {}
122 |             step = self.env.time_step
123 |             # steps_without_correction = 0
124 |             created_nsprs = 0
125 |             while True:
126 |                 if step % one_every_how_many_steps == 0:
127 |                     cur_nspr = copy.deepcopy(self.nspr_model)
128 |                     cur_nspr.graph['ArrivalTime'] = step
129 |                     cur_nspr.graph['duration'] = self.nsprs_duration
130 |                     nsprs_dict[step] = [cur_nspr]
131 |                     created_nsprs += 1
132 |                     # if step % one_every_how_many_steps == 0 and \
133 |                     #         steps_without_correction == correction_every_how_many_steps:
134 |                     #     nsprs_dict[step].append(copy.deepcopy(cur_nspr))
135 |                     #     created_nsprs += 1
136 |                     # if steps_without_correction == correction_every_how_many_steps:
137 |                     #     steps_without_correction = 0
138 |                 step += 1
139 |                 # steps_without_correction += 1
140 |                 if created_nsprs >= self.nsprs_per_ep or \
141 |                         (self.max_steps is not None and step - self.env.time_step > self.max_steps):
142 |                     break
143 |             return nsprs_dict
144 | 
145 |     def _get_tot_cpu_cap(self):
146 |         tot_cpu_cap = 0
147 |         for node_id in self.env.psn.nodes:
148 |             node = self.env.psn.nodes[node_id]
149 |             if node['NodeType'] == 'server':
150 |                 tot_cpu_cap += node['CPUcap']
151 |         return tot_cpu_cap
152 | 


--------------------------------------------------------------------------------
/src/wrappers/no_placement_state.py:
--------------------------------------------------------------------------------
 1 | import gym
 2 | import numpy as np
 3 | 
 4 | from gym.spaces import Dict, Box
 5 | 
 6 | 
 7 | class RemovePlacementState(gym.ObservationWrapper):
 8 |     def __init__(self, env):
 9 |         super().__init__(env)
10 |         ONE_BILLION = 1_000_000_000  # constant for readability
11 |         n_nodes = len(self.unwrapped.psn.nodes)
12 |         self.observation_space = Dict({
13 |             # PSN STATE
14 |             'cpu_avails': Box(low=0., high=1., shape=(n_nodes,), dtype=np.float32),
15 |             'ram_avails': Box(low=0., high=1., shape=(n_nodes,), dtype=np.float32),
16 |             # for each physical node, sum of the BW of the physical links connected to it
17 |             'bw_avails': Box(low=0., high=1., shape=(n_nodes,), dtype=np.float32),
18 | 
19 |             # NSPR STATE
20 |             # note: apparently it's not possible to pass "math.inf" or "sys.maxsize" as a gym.spaces.Box's high value
21 |             'cur_vnf_cpu_req': Box(low=0, high=ONE_BILLION, shape=(1,), dtype=np.float32),
22 |             'cur_vnf_ram_req': Box(low=0, high=ONE_BILLION, shape=(1,), dtype=np.float32),
23 |             # sum of the required BW of each VL connected to the current VNF
24 |             'cur_vnf_bw_req': Box(low=0, high=ONE_BILLION, shape=(1,), dtype=np.float32),
25 |             'vnfs_still_to_place': Box(low=0, high=ONE_BILLION, shape=(1,), dtype=int),
26 |         })
27 | 
28 |     def observation(self, obs):
29 |         """returns the observation without the placement state """
30 |         new_obs = {
31 |             'cpu_avails': obs['cpu_avails'],
32 |             'ram_avails': obs['ram_avails'],
33 |             'bw_avails': obs['bw_avails'],
34 |             'cur_vnf_cpu_req': obs['cur_vnf_cpu_req'],
35 |             'cur_vnf_ram_req': obs['cur_vnf_ram_req'],
36 |             'cur_vnf_bw_req': obs['cur_vnf_bw_req'],
37 |             'vnfs_still_to_place': obs['vnfs_still_to_place'],
38 |         }
39 |         return new_obs


--------------------------------------------------------------------------------
/src/wrappers/reset_with_load.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import random
  3 | from abc import ABC, abstractmethod
  4 | from typing import Union, Dict, Tuple
  5 | 
  6 | import gym
  7 | import networkx as nx
  8 | import numpy as np
  9 | from stable_baselines3.common.vec_env import VecEnv
 10 | 
 11 | 
 12 | class ResetWithLoad(gym.Wrapper, ABC):
 13 |     """ Abstract class. Wrapper to reset the PSN with a certain tr_load """
 14 | 
 15 |     def __init__(self, env: gym.Env, **kwargs):
 16 |         super().__init__(env)
 17 |         self.cpu_load = self.ram_load = self.bw_load = 0.
 18 | 
 19 |     def reset(self, **kwargs):
 20 |         raise NotImplementedError   # doesn't work anymore, needs to be adapted
 21 |         self.env.reset(**kwargs)
 22 |         self._init_psn_load()
 23 |         obs = self.env.update_nspr_state()    # the obs in the env.reset method is outdated
 24 |         return obs
 25 | 
 26 |     def _init_psn_load(self):
 27 |         """ Initialize the PSN's load with the specified values """
 28 |         for _, node in self.env.psn.nodes.items():
 29 |             if node['NodeType'] == "server":
 30 |                 node['availCPU'] = int(node['CPUcap'] * (1 - self.cpu_load))
 31 |                 node['availRAM'] = int(node['RAMcap'] * (1 - self.ram_load))
 32 |         for _, link in self.env.psn.edges.items():
 33 |             link['availBW'] = int(link['BWcap'] * (1 - self.bw_load))
 34 | 
 35 | 
 36 | class ResetWithFixedLoad(ResetWithLoad):
 37 |     """ Reset the PSN with a certain - fixed - amount of tr_load """
 38 | 
 39 |     def __init__(self, env: gym.Env, reset_load_perc: Union[float, dict] = 0.,
 40 |                  **kwargs):
 41 |         """ Constructor
 42 | 
 43 |         :param env: :param env: the environment to wrap
 44 |         :param reset_load_perc: init percentage of tr_load of the PSN's resources at each reset:
 45 |             if float, that value applies to all the resources for all nodes and links;
 46 |             if dict, it can specify the tr_load for each type of resource.
 47 |         """
 48 |         super().__init__(env)
 49 |         assert isinstance(reset_load_perc, (float, dict))
 50 |         # define the tr_load percentages of each resource
 51 |         if isinstance(reset_load_perc, float):
 52 |             assert 0 <= reset_load_perc <= 1
 53 |             self.cpu_load = self.ram_load = self.bw_load = reset_load_perc
 54 |         else:
 55 |             self.cpu_load = reset_load_perc.get('availCPU', 0)
 56 |             self.ram_load = reset_load_perc.get('availRAM', 0)
 57 |             self.bw_load = reset_load_perc.get('availBW', 0)
 58 |             assert 0 <= self.cpu_load <= 1 and 0 <= self.ram_load <= 1 and 0 <= self.bw_load <= 1
 59 | 
 60 | 
 61 | class ResetWithRandLoad(ResetWithLoad):
 62 |     """ Reset the PSN with a random uniform amount of load """
 63 | 
 64 |     def __init__(self, env: gym.Env, min_perc: Union[float, dict],
 65 |                  max_perc: Union[float, dict], same_for_all: bool = True,
 66 |                  **kwargs):
 67 |         """ Constructor
 68 | 
 69 |         :param env: the environment to wrap
 70 |         :param min_perc: minimum percentage of tr_load of the PSN's resources at each reset
 71 |         :param max_perc: maximum percentage of tr_load of the PSN's resources at each reset
 72 |         :param same_for_all: if True, the same random value is used for all the nodes / links
 73 |         """
 74 |         super().__init__(env)
 75 |         self.same_for_all = same_for_all
 76 | 
 77 |         # assert that both min_perc and max_perc are either floats or dicts
 78 |         assert (isinstance(min_perc, float) and isinstance(max_perc, float)) or \
 79 |                (isinstance(min_perc, dict) and isinstance(max_perc, dict))
 80 | 
 81 |         # save the min and max percentages of tr_load
 82 |         if isinstance(min_perc, float):
 83 |             assert 0 <= min_perc <= 1 and 0 <= max_perc <= 1 and min_perc <= max_perc
 84 |             self.min_cpu = self.min_ram = self.min_bw = min_perc
 85 |             self.max_cpu = self.max_ram = self.max_bw = max_perc
 86 |         else:
 87 |             self.min_cpu = min_perc.get('availCPU', 0)
 88 |             self.min_ram = min_perc.get('availRAM', 0)
 89 |             self.min_bw = min_perc.get('availBW', 0)
 90 |             self.max_cpu = max_perc.get('availCPU', 0)
 91 |             self.max_ram = max_perc.get('availRAM', 0)
 92 |             self.max_bw = max_perc.get('availBW', 0)
 93 |             assert 0 <= self.min_cpu <= 1 and 0 <= self.max_cpu <= 1 and self.min_cpu <= self.max_cpu
 94 |             assert 0 <= self.min_ram <= 1 and 0 <= self.max_ram <= 1 and self.min_ram <= self.max_ram
 95 |             assert 0 <= self.min_bw <= 1 and 0 <= self.max_bw <= 1 and self.min_bw <= self.max_bw
 96 | 
 97 |     def reset(self, **kwargs):
 98 |         if self.same_for_all:
 99 |             self.cpu_load = np.random.uniform(self.min_cpu, self.max_cpu, size=1).item()
100 |             self.ram_load = np.random.uniform(self.min_ram, self.max_ram, size=1).item()
101 |             self.bw_load = np.random.uniform(self.min_bw, self.max_bw, size=1).item()
102 |         return super().reset(**kwargs)
103 |     
104 |     def _init_psn_load(self):
105 |         if self.same_for_all:
106 |             super()._init_psn_load()
107 |         else:
108 |             for _, node in self.env.psn.nodes.items():
109 |                 if node['NodeType'] == "server":
110 |                     cpu_load = np.random.uniform(self.min_cpu, self.max_cpu, size=1).item()
111 |                     ram_load = np.random.uniform(self.min_ram, self.max_ram, size=1).item()
112 |                     node['availCPU'] = int(node['CPUcap'] * (1 - cpu_load))
113 |                     node['availRAM'] = int(node['RAMcap'] * (1 - ram_load))
114 |             for _, link in self.env.psn.edges.items():
115 |                 bw_load = np.random.uniform(self.min_bw, self.max_bw, size=1).item()
116 |                 link['availBW'] = int(link['BWcap'] * (1 - bw_load))
117 | 
118 | 
119 | class ResetWithLoadMixed(gym.Wrapper):
120 |     """ Wrapper to reset the PSN with a certain load.
121 |     The load is expressed in percentage and can be resource-specific or general
122 |     (each resource reset with the same load).
123 |     It selects a load percentage for each node/link such that the overall load of
124 |     the PSN is the specified one. It means certain nodes will be free, others
125 |     completely occupied and others will be partially occupied, so that the overall
126 |     CPU/RAM capacity is the specified one. (Same thing for links with their bandwidth).
127 |     """
128 |     def __init__(
129 |             self,
130 |             env: Union[gym.Env, VecEnv],
131 |             load: Union[float, Dict[str, float]] = 0.5,
132 |             rand_load: bool = False,
133 |             rand_range: Tuple[float, float] = (0., 1.),
134 |             **kwargs
135 |     ):
136 |         """
137 |         :param env: environment
138 |         :param load: the target load of the PSN, it can be:
139 |             float: single fixed value for all the resources;
140 |             Dict[resource: load]: fixed value but specific for each resource (CPU, RAM, BW)
141 |         :param rand_load: if True, at every 'reset' the PSN's load will be random (same value for all resources);
142 |             note: if 'random' is true, 'load' will be ignored.
143 |         :param rand_range: min and max (included) load values tu consider when 'random' is true
144 |         """
145 |         super(ResetWithLoadMixed, self).__init__(env)
146 |         self.random = rand_load
147 |         self.tot_cpu_cap = self.tot_ram_cap = self.tot_bw_cap = None
148 |         if not rand_load:
149 |             assert isinstance(load, (float, dict)), "Param 'load' is of an incorrect type"
150 |             if isinstance(load, float):
151 |                 assert 0. <= load <= 1.
152 |                 self.cpu_load = self.ram_load = self.bw_load = load
153 |             elif isinstance(load, dict):
154 |                 self.cpu_load = load.get('cpu', 0)
155 |                 self.ram_load = load.get('ram', 0)
156 |                 self.bw_load = load.get('bw', 0)
157 |                 assert 0. <= self.cpu_load <= 1. and 0. <= self.ram_load <= 1. and \
158 |                        0. <= self.bw_load <= 1.
159 |         else:
160 |             assert len(rand_range) == 2 and 0. <= rand_range[0] <= 1. and \
161 |                    0. <= rand_range[1] <= 1.
162 |             self.rand_vals = np.arange(min(rand_range), max(rand_range), 0.1)
163 | 
164 |     def reset(self, **kwargs):
165 |         self.env.reset(**kwargs)
166 |         self._init_psn_load()
167 |         obs = self.env.update_nspr_state()    # the obs in the env.reset method is outdated
168 |         return obs
169 | 
170 |     def compute_link_weight(self, source, target, link):
171 |         return 1 if link['availBW'] >= self.vl_req_bw else math.inf
172 | 
173 |     def _init_psn_load(self):
174 |         """ Initialize the PSN's load """
175 |         if self.random:
176 |             load = random.choice(self.rand_vals)
177 |             self.cpu_load = self.ram_load = load
178 |             self.bw_load = max(0.0, load - 0.4)
179 | 
180 |         # TODO: occhio che 'reset' qui viene chiamato da ogni env in VecEnv singolarmente...
181 |         # TODO: quindi, qui, self.env non è VecEnv, ma solo NetworkSimulator
182 |         psns = self.env.get_attr('psn') if isinstance(self.env, VecEnv) else [self.env.psn]
183 |         max_cpus = self.env.get_attr('max_cpu') if isinstance(self.env, VecEnv) else [self.env.max_cpu]
184 |         max_rams = self.env.get_attr('max_ram') if isinstance(self.env, VecEnv) else [self.env.max_ram]
185 |         max_bws = self.env.get_attr('max_bw') if isinstance(self.env, VecEnv) else [self.env.max_bw]
186 |         obs_dicts = self.env.get_attr('obs_dict') if isinstance(self.env, VecEnv) else [self.env.obs_dict]
187 |         maps_id_idx = self.env.get_attr('map_id_idx') if isinstance(self.env, VecEnv) else [self.env.map_id_idx]
188 | 
189 |         # NOTE: only works if all the envs in the VecEnv use the same PSN
190 |         if self.tot_cpu_cap is None or self.tot_ram_cap is None or self.tot_bw_cap is None:
191 |             self.tot_cpu_cap = self.env.tot_cpu_cap
192 |             self.tot_ram_cap = self.env.tot_ram_cap
193 |             self.tot_bw_cap = self.env.tot_bw_cap
194 | 
195 |         self.vl_req_bw = 2000
196 |         for i, psn in enumerate(psns):
197 |             max_cpu, max_ram, max_bw = max_cpus[i], max_rams[i], max_bws[i]
198 |             obs_dict, map_id_idx = obs_dicts[i], maps_id_idx[i]
199 |             tot_cpu_to_remove = self.cpu_load * self.tot_cpu_cap / max_cpu
200 |             tot_ram_to_remove = self.ram_load * self.tot_ram_cap / max_ram
201 |             tot_bw_to_remove = self.bw_load * self.tot_bw_cap / max_bw
202 |             # iterate over nodes in a random order and reduce the CPU/RAM availabilities
203 |             nodes = list(psn.nodes.items())
204 |             while tot_cpu_to_remove > 0 or tot_ram_to_remove > 0:
205 |                 node_id, node = random.sample(nodes, 1)[0]
206 |                 if node['NodeType'] == 'server':
207 |                     idx = map_id_idx[node_id]
208 |                     # TODO: consider to extend as [0.25, 0.5, 0.75, 1.]
209 |                     perc_to_remove = random.choice([0.5])
210 |                     # CPU to remove
211 |                     # x% of the node capacity (normalized)
212 |                     cur_cpu_to_remove = perc_to_remove * node['CPUcap'] / max_cpu
213 |                     cur_cpu_to_remove = min([round(cur_cpu_to_remove, 3),
214 |                                              tot_cpu_to_remove,
215 |                                              obs_dict['cpu_avails'][idx]])
216 |                     # RAM to remove
217 |                     cur_ram_to_remove = perc_to_remove * node['RAMcap'] / max_ram
218 |                     cur_ram_to_remove = min([round(cur_ram_to_remove, 3),
219 |                                              tot_ram_to_remove,
220 |                                              obs_dict['ram_avails'][idx]])
221 |                     # remove resources
222 |                     obs_dict['cpu_avails'][idx] -= cur_cpu_to_remove
223 |                     obs_dict['ram_avails'][idx] -= cur_ram_to_remove
224 |                     tot_cpu_to_remove -= cur_cpu_to_remove
225 |                     tot_ram_to_remove -= cur_ram_to_remove
226 | 
227 |             # iterate over links in random order and reduce the BW availability
228 |             links = list(psn.edges.items())
229 |             while tot_bw_to_remove > 0:
230 |                 extremes, link = random.sample(links, 1)[0]
231 |                 # TODO: consider to extend as [0.25, 0.5, 0.75, 1.]
232 |                 perc_to_remove = random.choice([0.5])
233 |                 # cur_bw_to_remove = np.random.randint(0, link['availBW'] + 1, 1)[0]
234 |                 cur_bw_to_remove = perc_to_remove * link['BWcap']
235 |                 # cur_bw_to_remove = min(cur_bw_to_remove, tot_bw_to_remove * max_bw)
236 |                 idx_0, idx_1 = map_id_idx[extremes[0]], map_id_idx[extremes[1]]
237 |                 cur_bw_to_remove = min([round(cur_bw_to_remove, 6),
238 |                                         tot_bw_to_remove * max_bw,
239 |                                         link['availBW']])
240 |                 cur_bw_to_remove_normal = cur_bw_to_remove / max_bw
241 |                 # links' BW actually reduced because needed for shortest path calculation
242 |                 link['availBW'] -= cur_bw_to_remove
243 |                 obs_dict['bw_avails'][idx_0] -= cur_bw_to_remove_normal
244 |                 obs_dict['bw_avails'][idx_1] -= cur_bw_to_remove_normal
245 |                 tot_bw_to_remove -= cur_bw_to_remove_normal
246 | 
247 | 
248 | class ResetWithLoadBinary(ResetWithLoadMixed):
249 |     """ Wrapper to reset the PSN with a certain load.
250 |         The load is expressed in percentage and can be resource-specific or general
251 |         (each resource reset with the same load).
252 |         It put a certain amount of nodes with zero available resources, so that
253 |         the overall load of the PSN is the one specified.
254 | 
255 |         Note: only the CPU and RAM are modified, not the bandwidth
256 |         """
257 | 
258 |     def __init__(
259 |             self,
260 |             env: Union[gym.Env, VecEnv],
261 |             load: Union[float, Dict[str, float]] = 0.5,
262 |             rand_load: bool = False,
263 |             rand_range: Tuple[float, float] = (0., 1.),
264 |             **kwargs
265 |     ):
266 |         """
267 |         :param env: environment
268 |         :param load: the target load of the PSN, it can be:
269 |             float: single fixed value for all the resources;
270 |             Dict[resource: load]: fixed value but specific for each resource (CPU, RAM, BW)
271 |         :param rand_load: if True, at every 'reset' the PSN's load will be random (same value for all resources);
272 |             note: if 'random' is true, 'load' will be ignored.
273 |         :param rand_range: min and max (included) load values tu consider when 'random' is true
274 |         """
275 |         super().__init__(env, load, rand_load, rand_range)
276 | 
277 |     def _init_psn_load(self):
278 |         """ Initialize the PSN's load """
279 |         if self.random:
280 |             load = random.choice(self.rand_vals)
281 |             self.cpu_load = self.ram_load = self.bw_load = load
282 | 
283 |         psns = self.env.get_attr('psn') if isinstance(self.env, VecEnv) else [self.env.psn]
284 |         max_cpus = self.env.get_attr('max_cpu') if isinstance(self.env, VecEnv) else [self.env.max_cpu]
285 |         max_rams = self.env.get_attr('max_ram') if isinstance(self.env, VecEnv) else [self.env.max_ram]
286 |         max_bws = self.env.get_attr('max_bw') if isinstance(self.env, VecEnv) else [self.env.max_bw]
287 |         obs_dicts = self.env.get_attr('obs_dict') if isinstance(self.env, VecEnv) else [self.env.obs_dict]
288 |         maps_id_idx = self.env.get_attr('map_id_idx') if isinstance(self.env, VecEnv) else [self.env.map_id_idx]
289 | 
290 |         if self.tot_cpu_cap is None or self.tot_ram_cap is None or self.tot_bw_cap is None:
291 |             self.tot_cpu_cap = self.env.tot_cpu_cap
292 |             self.tot_ram_cap = self.env.tot_ram_cap
293 |             self.tot_bw_cap = self.env.tot_bw_cap
294 | 
295 |         for i, psn in enumerate(psns):
296 |             max_cpu, max_ram, max_bw = max_cpus[i], max_rams[i], max_bws[i]
297 |             obs_dict, map_id_idx = obs_dicts[i], maps_id_idx[i]
298 |             tot_cpu_to_remove = self.cpu_load * self.tot_cpu_cap / max_cpu
299 |             tot_ram_to_remove = self.ram_load * self.tot_ram_cap / max_ram
300 |             tot_bw_to_remove = self.bw_load * self.tot_bw_cap / max_bw
301 |             # iterate over nodes in a random order and reduce the CPU/RAM availabilities
302 |             nodes = list(psn.nodes.items())
303 |             while tot_cpu_to_remove > 0 or tot_ram_to_remove > 0:
304 |                 node_id, node = random.sample(nodes, 1)[0]
305 |                 if node['NodeType'] == 'server':
306 |                     idx = map_id_idx[node_id]
307 |                     cur_removed_cpu = obs_dict['cpu_avails'][idx]
308 |                     obs_dict['cpu_avails'][idx] = 0.
309 |                     obs_dict['ram_avails'][idx] = 0.
310 |                     tot_cpu_to_remove -= cur_removed_cpu
311 |                     tot_ram_to_remove -= cur_removed_cpu
312 | 
313 | 
314 | class ResetWithRealisticLoad(gym.Wrapper):
315 |     """ Wrapper that resets the PSN with a certain amount of load already.
316 |     It does so in a way that resembles a how the state of the PSN might be in
317 |     case an agent has been actually placing NSPRs.
318 | 
319 |     It samples NSPRs from the ones that should arrive during the current episode
320 |     and place their VNFs in random nodes and connects them via shortest path.
321 |     This way the CPU/RAM and even the BW allocation should be realistic.
322 |     """
323 | 
324 |     def __init__(self, env: gym.Env, cpu_load: float, **kwargs):
325 |         """
326 |         :param env: environment
327 |         :param cpu_load: target percentage of CPU load of the PSN
328 |         """
329 |         super().__init__(env)
330 |         assert 0. <= cpu_load <= 1.
331 |         self.cpu_load = cpu_load
332 | 
333 |     def reset(self, **kwargs):
334 |         self.env.reset(**kwargs)
335 |         self.init_psn_load()
336 |         obs = self.env.update_nspr_state()  # the obs in the env.reset method is outdated
337 |         return obs
338 | 
339 |     def init_psn_load(self):
340 |         """ Initialize the PSN with the target load """
341 |         cpu_to_remove_normal = self.env.tot_cpu_cap * self.cpu_load / self.env.max_cpu
342 |         removed_cpu_normal = 0
343 |         while removed_cpu_normal < cpu_to_remove_normal:
344 |             nspr = self.sample_nspr()
345 |             placement_map = {}
346 |             # place all VNFs
347 |             for vnf_id, vnf in nspr.nodes.items():
348 |                 node_id, node_idx = self.sample_suitable_node(vnf)
349 |                 placement_map[vnf_id] = node_id
350 |                 self.env.obs_dict['cpu_avails'][node_idx] -= vnf['reqCPU'] / self.env.max_cpu
351 |                 self.env.obs_dict['ram_avails'][node_idx] -= vnf['reqRAM'] / self.env.max_ram
352 |                 removed_cpu_normal += vnf['reqCPU'] / self.env.max_cpu
353 |                 if removed_cpu_normal >= cpu_to_remove_normal:
354 |                     break
355 |             # place all VLs
356 |             for (src_vnf_id, dst_vnf_id), vl in nspr.edges.items():
357 |                 self.req_bw = vl['reqBW']
358 |                 try:
359 |                     src_node_id = placement_map[src_vnf_id]
360 |                     dst_node_id = placement_map[dst_vnf_id]
361 |                 except KeyError:
362 |                     # it means either src_vnf_id, dst_vnf_id or both hasn't been placed -> skip link placement
363 |                     continue
364 |                 try:
365 |                     path = nx.shortest_path(G=self.env.psn, source=src_node_id,
366 |                                             target=dst_node_id, weight=self.compute_links_weights,
367 |                                             method='dijkstra')
368 |                     for i in range(len(path) - 1):
369 |                         # if this VL exceeds the bandwidth available, don't place it, it's okù
370 |                         # it can happen when there is no available path
371 |                         if self.env.psn.edges[path[i], path[i+1]]['availBW'] - vl['reqBW'] < 0:
372 |                             continue
373 |                         self.env.psn.edges[path[i], path[i+1]]['availBW'] -= vl['reqBW']
374 |                         idx1 = self.env.map_id_idx[path[i]]
375 |                         idx2 = self.env.map_id_idx[path[i+1]]
376 |                         self.env.obs_dict['bw_avails'][idx1] -= vl['reqBW'] / self.env.max_bw
377 |                         self.env.obs_dict['bw_avails'][idx2] -= vl['reqBW'] / self.env.max_bw
378 |                 except nx.NetworkXNoPath:
379 |                     pass
380 | 
381 |     def compute_links_weights(self, source, target, link):
382 |         """ Method called automatically by nx.shortest_path() """
383 |         return 1 if link['availBW'] >= self.req_bw else math.inf
384 | 
385 |     def sample_suitable_node(self, vnf: dict):
386 |         """ Sample a random node with enough resources to host the VNF """
387 |         server_idx = random.choice(list(self.env.servers_map_idx_id.keys()))
388 |         server_id = self.env.servers_map_idx_id[server_idx]
389 |         while not self.env.enough_avail_resources(server_id, vnf):
390 |             server_idx = random.choice(list(self.env.servers_map_idx_id.keys()))
391 |             server_id = self.env.servers_map_idx_id[server_idx]
392 |         return server_id, server_idx
393 | 
394 |     def sample_nspr(self):
395 |         """ Sample a NSPR among the ones that will arrive in this episode """
396 |         arr_time = random.choice(list(self.env.nsprs.keys()))
397 |         idx = np.random.choice(len(self.env.nsprs[arr_time]))
398 |         nspr = self.env.nsprs[arr_time][idx]
399 |         return nspr
400 | 


--------------------------------------------------------------------------------