├── docs
├── CNAME
├── pdfs
│ └── marine_v3.pdf
└── web
│ ├── pct
│ ├── apple-icon.png
│ ├── IMG_20190821_123143.jpg
│ ├── schema at 20.38.17.png
│ ├── schema1 at 20.44.48.png
│ ├── photo_2021-12-24 13.09.10 (1).jpeg
│ └── Screenshot 2023-11-03 at 20.27.43.png
│ └── css-template.css
├── gigala
├── propulsion
│ └── lazy_rocketeer
│ │ ├── .gitignore
│ │ ├── README.md
│ │ ├── logic.py
│ │ └── app.py
└── topology
│ ├── topology_optimiz
│ ├── hierarchical_rl
│ │ ├── HRL
│ │ │ ├── requirements.txt
│ │ │ ├── log.txt
│ │ │ ├── asset
│ │ │ │ ├── __init__.py
│ │ │ │ └── topology_optimization.py
│ │ │ ├── preTrained
│ │ │ │ └── T0-h-v1
│ │ │ │ │ ├── 1level
│ │ │ │ │ ├── HAC_T0-h-v1_level_0_actor.pth
│ │ │ │ │ ├── HAC_T0-h-v1_level_0_crtic.pth
│ │ │ │ │ ├── HAC_T0-h-v1_solved_level_0_actor.pth
│ │ │ │ │ └── HAC_T0-h-v1_solved_level_0_crtic.pth
│ │ │ │ │ ├── 2level
│ │ │ │ │ ├── HAC_T0-h-v1_level_0_actor.pth
│ │ │ │ │ ├── HAC_T0-h-v1_level_0_crtic.pth
│ │ │ │ │ ├── HAC_T0-h-v1_level_1_actor.pth
│ │ │ │ │ ├── HAC_T0-h-v1_level_1_crtic.pth
│ │ │ │ │ ├── HAC_T0-h-v1_solved_level_0_actor.pth
│ │ │ │ │ ├── HAC_T0-h-v1_solved_level_0_crtic.pth
│ │ │ │ │ ├── HAC_T0-h-v1_solved_level_1_actor.pth
│ │ │ │ │ └── HAC_T0-h-v1_solved_level_1_crtic.pth
│ │ │ │ │ └── 3level
│ │ │ │ │ ├── HAC_T0-h-v1_level_0_actor.pth
│ │ │ │ │ ├── HAC_T0-h-v1_level_0_crtic.pth
│ │ │ │ │ ├── HAC_T0-h-v1_level_1_actor.pth
│ │ │ │ │ ├── HAC_T0-h-v1_level_1_crtic.pth
│ │ │ │ │ ├── HAC_T0-h-v1_level_2_actor.pth
│ │ │ │ │ ├── HAC_T0-h-v1_level_2_crtic.pth
│ │ │ │ │ ├── HAC_T0-h-v1_solved_level_0_actor.pth
│ │ │ │ │ ├── HAC_T0-h-v1_solved_level_0_crtic.pth
│ │ │ │ │ ├── HAC_T0-h-v1_solved_level_1_actor.pth
│ │ │ │ │ ├── HAC_T0-h-v1_solved_level_1_crtic.pth
│ │ │ │ │ ├── HAC_T0-h-v1_solved_level_2_actor.pth
│ │ │ │ │ └── HAC_T0-h-v1_solved_level_2_crtic.pth
│ │ │ ├── LICENSE
│ │ │ ├── README.md
│ │ │ ├── test.py
│ │ │ ├── train.py
│ │ │ ├── DDPG.py
│ │ │ └── hpo.py
│ │ ├── HRL_without_dropout
│ │ │ ├── requirements.txt
│ │ │ ├── asset
│ │ │ │ ├── __init__.py
│ │ │ │ └── topology_optimization.py
│ │ │ ├── preTrained
│ │ │ │ └── T0-h-v1
│ │ │ │ │ ├── 1level
│ │ │ │ │ ├── HAC_T0-h-v1_level_0_actor.pth
│ │ │ │ │ ├── HAC_T0-h-v1_level_0_crtic.pth
│ │ │ │ │ ├── HAC_T0-h-v1_solved_level_0_actor.pth
│ │ │ │ │ └── HAC_T0-h-v1_solved_level_0_crtic.pth
│ │ │ │ │ └── 3level
│ │ │ │ │ ├── HAC_T0-h-v1_level_0_actor.pth
│ │ │ │ │ ├── HAC_T0-h-v1_level_0_crtic.pth
│ │ │ │ │ ├── HAC_T0-h-v1_level_1_actor.pth
│ │ │ │ │ ├── HAC_T0-h-v1_level_1_crtic.pth
│ │ │ │ │ ├── HAC_T0-h-v1_level_2_actor.pth
│ │ │ │ │ ├── HAC_T0-h-v1_level_2_crtic.pth
│ │ │ │ │ ├── HAC_T0-h-v1_solved_level_0_actor.pth
│ │ │ │ │ ├── HAC_T0-h-v1_solved_level_0_crtic.pth
│ │ │ │ │ ├── HAC_T0-h-v1_solved_level_1_actor.pth
│ │ │ │ │ ├── HAC_T0-h-v1_solved_level_1_crtic.pth
│ │ │ │ │ ├── HAC_T0-h-v1_solved_level_2_actor.pth
│ │ │ │ │ └── HAC_T0-h-v1_solved_level_2_crtic.pth
│ │ │ ├── log.txt
│ │ │ ├── LICENSE
│ │ │ ├── README.md
│ │ │ ├── test.py
│ │ │ ├── train.py
│ │ │ ├── DDPG.py
│ │ │ └── hpo.py
│ │ └── hrl_draft
│ │ │ ├── HRL_mps
│ │ │ ├── requirements.txt
│ │ │ ├── log.txt
│ │ │ ├── asset
│ │ │ │ ├── __init__.py
│ │ │ │ └── topology_optimization.py
│ │ │ ├── preTrained
│ │ │ │ └── T0-h-v1
│ │ │ │ │ ├── 1level
│ │ │ │ │ ├── HAC_T0-h-v1_level_0_actor.pth
│ │ │ │ │ ├── HAC_T0-h-v1_level_0_crtic.pth
│ │ │ │ │ ├── HAC_T0-h-v1_solved_level_0_actor.pth
│ │ │ │ │ └── HAC_T0-h-v1_solved_level_0_crtic.pth
│ │ │ │ │ ├── 2level
│ │ │ │ │ ├── HAC_T0-h-v1_level_0_actor.pth
│ │ │ │ │ ├── HAC_T0-h-v1_level_0_crtic.pth
│ │ │ │ │ ├── HAC_T0-h-v1_level_1_actor.pth
│ │ │ │ │ ├── HAC_T0-h-v1_level_1_crtic.pth
│ │ │ │ │ ├── HAC_T0-h-v1_solved_level_0_actor.pth
│ │ │ │ │ ├── HAC_T0-h-v1_solved_level_0_crtic.pth
│ │ │ │ │ ├── HAC_T0-h-v1_solved_level_1_actor.pth
│ │ │ │ │ └── HAC_T0-h-v1_solved_level_1_crtic.pth
│ │ │ │ │ └── 3level
│ │ │ │ │ ├── HAC_T0-h-v1_level_0_actor.pth
│ │ │ │ │ ├── HAC_T0-h-v1_level_0_crtic.pth
│ │ │ │ │ ├── HAC_T0-h-v1_level_1_actor.pth
│ │ │ │ │ ├── HAC_T0-h-v1_level_1_crtic.pth
│ │ │ │ │ ├── HAC_T0-h-v1_level_2_actor.pth
│ │ │ │ │ ├── HAC_T0-h-v1_level_2_crtic.pth
│ │ │ │ │ ├── HAC_T0-h-v1_solved_level_0_actor.pth
│ │ │ │ │ ├── HAC_T0-h-v1_solved_level_0_crtic.pth
│ │ │ │ │ ├── HAC_T0-h-v1_solved_level_1_actor.pth
│ │ │ │ │ ├── HAC_T0-h-v1_solved_level_1_crtic.pth
│ │ │ │ │ ├── HAC_T0-h-v1_solved_level_2_actor.pth
│ │ │ │ │ └── HAC_T0-h-v1_solved_level_2_crtic.pth
│ │ │ ├── LICENSE
│ │ │ ├── README.md
│ │ │ ├── test.py
│ │ │ ├── torchfem
│ │ │ │ ├── materials.py
│ │ │ │ └── sparse.py
│ │ │ ├── train.py
│ │ │ ├── DDPG.py
│ │ │ └── hpo.py
│ │ │ ├── HRL_jax_mps
│ │ │ ├── requirements.txt
│ │ │ ├── asset
│ │ │ │ ├── __init__.py
│ │ │ │ └── topology_optimization.py
│ │ │ ├── preTrained
│ │ │ │ └── T0-h-v1
│ │ │ │ │ ├── 1level
│ │ │ │ │ ├── HAC_T0-h-v1_level_0_actor.pth
│ │ │ │ │ ├── HAC_T0-h-v1_level_0_crtic.pth
│ │ │ │ │ ├── HAC_T0-h-v1_solved_level_0_actor.pth
│ │ │ │ │ └── HAC_T0-h-v1_solved_level_0_crtic.pth
│ │ │ │ │ ├── 2level
│ │ │ │ │ ├── HAC_T0-h-v1_level_0_actor.pth
│ │ │ │ │ ├── HAC_T0-h-v1_level_0_crtic.pth
│ │ │ │ │ ├── HAC_T0-h-v1_level_1_actor.pth
│ │ │ │ │ ├── HAC_T0-h-v1_level_1_crtic.pth
│ │ │ │ │ ├── HAC_T0-h-v1_solved_level_0_actor.pth
│ │ │ │ │ ├── HAC_T0-h-v1_solved_level_0_crtic.pth
│ │ │ │ │ ├── HAC_T0-h-v1_solved_level_1_actor.pth
│ │ │ │ │ └── HAC_T0-h-v1_solved_level_1_crtic.pth
│ │ │ │ │ └── 3level
│ │ │ │ │ ├── HAC_T0-h-v1_level_0_actor.pth
│ │ │ │ │ ├── HAC_T0-h-v1_level_0_crtic.pth
│ │ │ │ │ ├── HAC_T0-h-v1_level_1_actor.pth
│ │ │ │ │ ├── HAC_T0-h-v1_level_1_crtic.pth
│ │ │ │ │ ├── HAC_T0-h-v1_level_2_actor.pth
│ │ │ │ │ ├── HAC_T0-h-v1_level_2_crtic.pth
│ │ │ │ │ ├── HAC_T0-h-v1_solved_level_0_actor.pth
│ │ │ │ │ ├── HAC_T0-h-v1_solved_level_0_crtic.pth
│ │ │ │ │ ├── HAC_T0-h-v1_solved_level_1_actor.pth
│ │ │ │ │ ├── HAC_T0-h-v1_solved_level_1_crtic.pth
│ │ │ │ │ ├── HAC_T0-h-v1_solved_level_2_actor.pth
│ │ │ │ │ └── HAC_T0-h-v1_solved_level_2_crtic.pth
│ │ │ ├── LICENSE
│ │ │ ├── README.md
│ │ │ ├── log.txt
│ │ │ ├── test.py
│ │ │ ├── train.py
│ │ │ └── DDPG.py
│ │ │ └── HRL_without_the_trick
│ │ │ ├── requirements.txt
│ │ │ ├── asset
│ │ │ ├── __init__.py
│ │ │ └── topology_optimization.py
│ │ │ ├── preTrained
│ │ │ └── T0-h-v1
│ │ │ │ ├── 1level
│ │ │ │ ├── HAC_T0-h-v1_level_0_actor.pth
│ │ │ │ ├── HAC_T0-h-v1_level_0_crtic.pth
│ │ │ │ ├── HAC_T0-h-v1_solved_level_0_actor.pth
│ │ │ │ └── HAC_T0-h-v1_solved_level_0_crtic.pth
│ │ │ │ ├── 2level
│ │ │ │ ├── HAC_T0-h-v1_level_0_actor.pth
│ │ │ │ ├── HAC_T0-h-v1_level_0_crtic.pth
│ │ │ │ ├── HAC_T0-h-v1_level_1_actor.pth
│ │ │ │ ├── HAC_T0-h-v1_level_1_crtic.pth
│ │ │ │ ├── HAC_T0-h-v1_solved_level_0_actor.pth
│ │ │ │ ├── HAC_T0-h-v1_solved_level_0_crtic.pth
│ │ │ │ ├── HAC_T0-h-v1_solved_level_1_actor.pth
│ │ │ │ └── HAC_T0-h-v1_solved_level_1_crtic.pth
│ │ │ │ └── 3level
│ │ │ │ ├── HAC_T0-h-v1_level_0_actor.pth
│ │ │ │ ├── HAC_T0-h-v1_level_0_crtic.pth
│ │ │ │ ├── HAC_T0-h-v1_level_1_actor.pth
│ │ │ │ ├── HAC_T0-h-v1_level_1_crtic.pth
│ │ │ │ ├── HAC_T0-h-v1_level_2_actor.pth
│ │ │ │ ├── HAC_T0-h-v1_level_2_crtic.pth
│ │ │ │ ├── HAC_T0-h-v1_solved_level_0_actor.pth
│ │ │ │ ├── HAC_T0-h-v1_solved_level_0_crtic.pth
│ │ │ │ ├── HAC_T0-h-v1_solved_level_1_actor.pth
│ │ │ │ ├── HAC_T0-h-v1_solved_level_1_crtic.pth
│ │ │ │ ├── HAC_T0-h-v1_solved_level_2_actor.pth
│ │ │ │ └── HAC_T0-h-v1_solved_level_2_crtic.pth
│ │ │ ├── LICENSE
│ │ │ ├── README.md
│ │ │ ├── test.py
│ │ │ ├── train.py
│ │ │ ├── DDPG.py
│ │ │ └── hpo.py
│ └── rl_beam
│ │ └── sota
│ │ └── my_animation.gif
│ └── sizing_optimiz
│ └── genetic
│ └── gen.png
├── .github
└── FUNDING.yml
├── .gitignore
├── CITATION.cff
├── LICENSE
└── README.md
/docs/CNAME:
--------------------------------------------------------------------------------
1 | gigala.io
--------------------------------------------------------------------------------
/gigala/propulsion/lazy_rocketeer/.gitignore:
--------------------------------------------------------------------------------
1 | config.py
--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | custom: ['https://www.paypal.me/gigatskhondia']
2 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | .idea/
3 | .ipynb_checkpoints
4 | .DS_Store
--------------------------------------------------------------------------------
/docs/pdfs/marine_v3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/docs/pdfs/marine_v3.pdf
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL/requirements.txt:
--------------------------------------------------------------------------------
1 | gym
2 | torch
3 | pyglet
4 | six
--------------------------------------------------------------------------------
/docs/web/pct/apple-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/docs/web/pct/apple-icon.png
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/requirements.txt:
--------------------------------------------------------------------------------
1 | gym
2 | torch
3 | pyglet
4 | six
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/requirements.txt:
--------------------------------------------------------------------------------
1 | gym
2 | torch
3 | pyglet
4 | six
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/requirements.txt:
--------------------------------------------------------------------------------
1 | gym
2 | torch
3 | pyglet
4 | six
--------------------------------------------------------------------------------
/docs/web/pct/IMG_20190821_123143.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/docs/web/pct/IMG_20190821_123143.jpg
--------------------------------------------------------------------------------
/docs/web/pct/schema at 20.38.17.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/docs/web/pct/schema at 20.38.17.png
--------------------------------------------------------------------------------
/docs/web/pct/schema1 at 20.44.48.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/docs/web/pct/schema1 at 20.44.48.png
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/log.txt:
--------------------------------------------------------------------------------
1 | 1,2163.2684115955485
2 | 2,2576.2679841110885
3 |
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/requirements.txt:
--------------------------------------------------------------------------------
1 | gym
2 | torch
3 | pyglet
4 | six
--------------------------------------------------------------------------------
/gigala/topology/sizing_optimiz/genetic/gen.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/sizing_optimiz/genetic/gen.png
--------------------------------------------------------------------------------
/docs/web/pct/photo_2021-12-24 13.09.10 (1).jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/docs/web/pct/photo_2021-12-24 13.09.10 (1).jpeg
--------------------------------------------------------------------------------
/docs/web/pct/Screenshot 2023-11-03 at 20.27.43.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/docs/web/pct/Screenshot 2023-11-03 at 20.27.43.png
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL/log.txt:
--------------------------------------------------------------------------------
1 | 1,1.1094133879049637
2 | 2,1.0495237001048725
3 | 3,0.4074200268520841
4 | 4,0.5236868067443036
5 |
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/rl_beam/sota/my_animation.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/rl_beam/sota/my_animation.gif
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL/asset/__init__.py:
--------------------------------------------------------------------------------
1 | from asset.topology_optimization import CantileverEnv
2 | from gym.envs.registration import register
3 |
4 |
5 | register(
6 | id="T0-h-v1",
7 | entry_point="asset:CantileverEnv",
8 | )
9 |
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/1level/HAC_T0-h-v1_level_0_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/1level/HAC_T0-h-v1_level_0_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/1level/HAC_T0-h-v1_level_0_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/1level/HAC_T0-h-v1_level_0_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_0_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_0_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_0_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_0_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_1_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_1_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_1_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_1_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_0_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_0_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_0_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_0_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_1_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_1_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_1_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_1_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_2_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_2_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_2_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_2_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/1level/HAC_T0-h-v1_solved_level_0_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/1level/HAC_T0-h-v1_solved_level_0_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/1level/HAC_T0-h-v1_solved_level_0_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/1level/HAC_T0-h-v1_solved_level_0_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_0_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_0_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_0_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_0_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_1_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_1_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_1_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_1_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_0_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_0_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_0_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_0_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_1_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_1_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_1_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_1_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_2_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_2_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_2_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_2_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/asset/__init__.py:
--------------------------------------------------------------------------------
1 | from asset.topology_optimization import CantileverEnv
2 | from gym.envs.registration import register
3 |
4 |
5 | register(
6 | id="T0-h-v1",
7 | entry_point="asset:CantileverEnv",
8 | )
9 |
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/asset/__init__.py:
--------------------------------------------------------------------------------
1 | from asset.topology_optimization import CantileverEnv
2 | from gym.envs.registration import register
3 |
4 |
5 | register(
6 | id="T0-h-v1",
7 | entry_point="asset:CantileverEnv",
8 | )
9 |
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/asset/__init__.py:
--------------------------------------------------------------------------------
1 | from asset.topology_optimization import CantileverEnv
2 | from gym.envs.registration import register
3 |
4 |
5 | register(
6 | id="T0-h-v1",
7 | entry_point="asset:CantileverEnv",
8 | )
9 |
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/asset/__init__.py:
--------------------------------------------------------------------------------
1 | from asset.topology_optimization import CantileverEnv
2 | from gym.envs.registration import register
3 |
4 |
5 | register(
6 | id="T0-h-v1",
7 | entry_point="asset:CantileverEnv",
8 | )
9 |
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/1level/HAC_T0-h-v1_level_0_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/1level/HAC_T0-h-v1_level_0_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/1level/HAC_T0-h-v1_level_0_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/1level/HAC_T0-h-v1_level_0_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_0_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_0_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_0_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_0_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_1_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_1_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_1_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_1_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_0_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_0_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_0_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_0_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_1_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_1_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_1_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_1_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_2_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_2_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_2_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_2_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/1level/HAC_T0-h-v1_level_0_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/1level/HAC_T0-h-v1_level_0_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/1level/HAC_T0-h-v1_level_0_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/1level/HAC_T0-h-v1_level_0_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_0_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_0_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_0_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_0_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_1_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_1_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_1_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_1_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_2_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_2_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_2_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_2_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/1level/HAC_T0-h-v1_level_0_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/1level/HAC_T0-h-v1_level_0_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/1level/HAC_T0-h-v1_level_0_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/1level/HAC_T0-h-v1_level_0_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_0_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_0_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_0_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_0_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_1_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_1_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_1_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_1_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_0_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_0_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_0_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_0_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_1_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_1_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_1_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_1_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_2_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_2_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_2_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_2_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/1level/HAC_T0-h-v1_solved_level_0_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/1level/HAC_T0-h-v1_solved_level_0_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/1level/HAC_T0-h-v1_solved_level_0_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/1level/HAC_T0-h-v1_solved_level_0_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_0_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_0_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_0_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_0_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_1_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_1_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_1_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_1_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_2_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_2_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_2_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_2_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/1level/HAC_T0-h-v1_solved_level_0_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/1level/HAC_T0-h-v1_solved_level_0_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/1level/HAC_T0-h-v1_solved_level_0_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/1level/HAC_T0-h-v1_solved_level_0_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_0_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_0_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_0_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_0_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_1_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_1_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_1_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_1_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_0_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_0_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_0_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_0_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_1_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_1_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_1_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_1_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_2_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_2_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_2_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_2_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/1level/HAC_T0-h-v1_solved_level_0_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/1level/HAC_T0-h-v1_solved_level_0_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/1level/HAC_T0-h-v1_solved_level_0_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/1level/HAC_T0-h-v1_solved_level_0_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_0_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_0_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_0_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_0_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_1_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_1_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_1_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_1_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_0_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_0_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_0_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_0_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_1_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_1_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_1_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_1_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_2_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_2_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_2_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_2_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/1level/HAC_T0-h-v1_level_0_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/1level/HAC_T0-h-v1_level_0_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/1level/HAC_T0-h-v1_level_0_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/1level/HAC_T0-h-v1_level_0_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_0_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_0_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_0_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_0_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_1_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_1_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_1_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_1_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_0_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_0_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_0_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_0_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_1_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_1_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_1_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_1_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_2_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_2_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_2_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_2_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/1level/HAC_T0-h-v1_solved_level_0_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/1level/HAC_T0-h-v1_solved_level_0_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/1level/HAC_T0-h-v1_solved_level_0_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/1level/HAC_T0-h-v1_solved_level_0_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_0_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_0_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_0_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_0_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_1_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_1_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_1_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_1_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_0_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_0_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_0_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_0_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_1_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_1_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_1_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_1_crtic.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_2_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_2_actor.pth
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_2_crtic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_2_crtic.pth
--------------------------------------------------------------------------------
/gigala/propulsion/lazy_rocketeer/README.md:
--------------------------------------------------------------------------------
1 | ### Lazy Rocketeer - LLM Agent to design rocket engines
2 |
3 | You can communicate in natural language with the agent. To run: *** streamlit run app.py ***
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/docs/web/css-template.css:
--------------------------------------------------------------------------------
1 | .icon-list li::before {
2 | display: block;
3 | flex-shrink: 0;
4 | width: 1.5em;
5 | height: 1.5em;
6 | margin-right: .5rem;
7 | content: "";
8 | background: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' fill='%23212529' viewBox='0 0 16 16'%3E%3Cpath d='M8 0a8 8 0 1 1 0 16A8 8 0 0 1 8 0zM4.5 7.5a.5.5 0 0 0 0 1h5.793l-2.147 2.146a.5.5 0 0 0 .708.708l3-3a.5.5 0 0 0 0-.708l-3-3a.5.5 0 1 0-.708.708L10.293 7.5H4.5z'/%3E%3C/svg%3E") no-repeat center center / 100% auto;
9 | }
10 |
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/log.txt:
--------------------------------------------------------------------------------
1 | 1,0.7831910645984259
2 | 2,0.7831910645984259
3 | 3,0.7831910645984259
4 | 4,0.7831910645984259
5 | 5,0.7831910645984259
6 | 6,0.7831910645984259
7 | 7,0.7831910645984259
8 | 8,0.7831910645984259
9 | 9,0.7831910645984259
10 | 10,0.7831910645984259
11 | 11,0.7831910645984259
12 | 12,0.7831910645984259
13 | 13,0.7831910645984259
14 | 14,0.7831910645984259
15 | 15,0.7831910645984259
16 | 16,0.7831910645984259
17 | 17,0.7831910645984259
18 | 18,0.7831910645984259
19 | 19,0.7831910645984259
20 | 20,0.7831910645984259
21 |
--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
1 | cff-version: 1.2.0
2 | title: Gigala
3 | message: >-
4 | If you use this software, please cite it using the
5 | metadata from this file.
6 | type: software
7 | authors:
8 | - given-names: Giorgi
9 | family-names: Tskhondia
10 | email: gigatskhondia@gmail.com
11 | affiliation: Independent researcher
12 | identifiers:
13 | - type: url
14 | value: 'https://gigatskhondia.medium.com/'
15 | description: Medium blog about my project's developments.
16 | - type: url
17 | value: 'https://www.researchgate.net/profile/Giorgi-Tskhondia'
18 | description: My ResearchGate profile.
19 | repository-code: 'https://github.com/gigatskhondia/gigala'
20 | abstract: >-
21 | Applying artificial intelligence algorithms for the
22 | purpose of engineering design.
23 | keywords:
24 | - Reinforcement learning
25 | - Finite element methods
26 | - Structural engineering
27 | - Design
28 | - Topology optimization
29 | license: MIT
30 | date-released: '2018-10-13'
31 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Software: Gigala
4 |
5 | Copyright (c) 2018 Georgy Tskhondiya (Giorgi Tskhondia)
6 |
7 | Permission is hereby granted, free of charge, to any person obtaining a copy
8 | of this software and associated documentation files (the "Software"), to deal
9 | in the Software without restriction, including without limitation the rights
10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | copies of the Software, and to permit persons to whom the Software is
12 | furnished to do so, subject to the following conditions:
13 |
14 | The above copyright notice and this permission notice shall be included in all
15 | copies or substantial portions of the Software.
16 |
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | SOFTWARE.
24 |
25 |
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 Georgy Tskhondiya
4 | Copyright (c) 2019 Nikhil Barhate
5 |
6 | Permission is hereby granted, free of charge, to any person obtaining a copy
7 | of this software and associated documentation files (the "Software"), to deal
8 | in the Software without restriction, including without limitation the rights
9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 |
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 |
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 Georgy Tskhondiya
4 | Copyright (c) 2019 Nikhil Barhate
5 |
6 | Permission is hereby granted, free of charge, to any person obtaining a copy
7 | of this software and associated documentation files (the "Software"), to deal
8 | in the Software without restriction, including without limitation the rights
9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 |
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 |
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 Georgy Tskhondiya
4 | Copyright (c) 2019 Nikhil Barhate
5 |
6 | Permission is hereby granted, free of charge, to any person obtaining a copy
7 | of this software and associated documentation files (the "Software"), to deal
8 | in the Software without restriction, including without limitation the rights
9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 |
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 |
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 Georgy Tskhondiya
4 | Copyright (c) 2019 Nikhil Barhate
5 |
6 | Permission is hereby granted, free of charge, to any person obtaining a copy
7 | of this software and associated documentation files (the "Software"), to deal
8 | in the Software without restriction, including without limitation the rights
9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 |
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 |
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2025 Georgy Tskhondiya
4 | Copyright 2024 Nils Meyer
5 | Copyright (c) 2019 Nikhil Barhate
6 |
7 | Permission is hereby granted, free of charge, to any person obtaining a copy
8 | of this software and associated documentation files (the "Software"), to deal
9 | in the Software without restriction, including without limitation the rights
10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | copies of the Software, and to permit persons to whom the Software is
12 | furnished to do so, subject to the following conditions:
13 |
14 | The above copyright notice and this permission notice shall be included in all
15 | copies or substantial portions of the Software.
16 |
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | SOFTWARE.
24 |
--------------------------------------------------------------------------------
/gigala/propulsion/lazy_rocketeer/logic.py:
--------------------------------------------------------------------------------
1 | import langchain_openai
2 | from pydantic import BaseModel, Field
3 | from langchain_core.tools import tool
4 | from config import OPENAI_API_KEY
5 |
6 |
7 | class DialogModel:
8 | openai_client= None
9 | _model= None
10 |
11 | def __init__(self, model, api_key_):
12 | self._model = model
13 | self._key = api_key_
14 | self.openai_client = langchain_openai.ChatOpenAI(model = self._model,
15 | openai_api_key = self._key,
16 | temperature = 0)
17 |
18 |
19 | class ChamberInput(BaseModel):
20 | r: float = Field(..., description="Mixture ratio (O/F)")
21 | F: float = Field(..., description="Thrust,[N]")
22 | p1: float = Field(..., description="Chamber pressure, [MPa]")
23 | CF: float = Field(..., description="Thrust coefficient")
24 | c: float = Field(..., description="Estimated nozzle exit exhaust velocity, [m/sec]")
25 | m_p: float = Field(..., description="Usable propellant mass, [kg]")
26 |
27 | @tool(args_schema=ChamberInput)
28 | def get_thrust_chamber_params(r, F, p1, CF, c, m_p):
29 | """ Thrust chamber dimensions and burn duration calculations.
30 | r = 2.3
31 | F = 50000
32 | p1 = 4826000
33 | CF = 1.9
34 | m_p = 7482
35 | """
36 | m_hat = F/c
37 | m_hat_f =m_hat/(r+1)
38 | m_hat_o = (m_hat*r) / (r + 1)
39 | t_b = m_p /(m_hat_f+m_hat_o)
40 | A_t = F/(p1*CF)
41 | return {"nozzle_throat_area": A_t,
42 | "burn_duration":t_b,
43 | }
44 |
45 | MODEL = DialogModel("gpt-4o-mini", OPENAI_API_KEY)
46 |
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL/README.md:
--------------------------------------------------------------------------------
1 | # Hierarchical reinforcement learning for topology optimization
2 |
3 | This is an implementation of Hierarchical-Actor-Critic-HAC algorithm for topology optimization of cantilever beam.
4 | The codebase was taken from [Nikhil Barhate](https://github.com/nikhilbarhate99/Hierarchical-Actor-Critic-HAC-PyTorch) and adjusted for the topology optimization task at hand.
5 |
6 |
7 | # Hierarchical-Actor-Critic-HAC-PyTorch
8 |
9 | Hierarchical Actor Critic (HAC) algorithm described in the paper, [Learning Multi-Level Hierarchies with Hindsight](https://arxiv.org/abs/1712.00948) (ICLR 2019). The algorithm learns to reach a goal state by dividing the task into short horizon intermediate goals (subgoals).
10 |
11 |
12 | ## Usage
13 | - All the hyperparameters are found by `hpo.py`.
14 | - To train a new network run `train.py`
15 | - To test a preTrained network run `test.py`
16 | - For a detailed explanation of offsets and bounds, refer to [issue #2](https://github.com/nikhilbarhate99/Hierarchical-Actor-Critic-HAC-PyTorch/issues/2)
17 |
18 |
19 | ## Implementation Details
20 |
21 | - The code is implemented as described in the appendix section of the paper and the Official repository, i.e. without target networks and with bounded Q-values.
22 | - Topology optimization model is taken from [A Tutorial on Structural Optimization](https://www.researchgate.net/publication/360698153_A_Tutorial_on_Structural_Optimization)
23 | - Implementation tutorial: [Using Hierarchical Reinforcement Learning for Fast Topology Optimisation](https://gigatskhondia.medium.com/using-hierarchical-reinforcement-learning-for-fast-topology-optimisation-85aa0c07fb7f)
24 |
25 |
26 | ## Citing
27 |
28 | - [How to cite](https://github.com/gigatskhondia/gigala/blob/master/CITATION.cff)
29 |
30 | ## Requirements
31 |
32 | - Python
33 | - PyTorch
34 | - OpenAI gym
35 |
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/README.md:
--------------------------------------------------------------------------------
1 | # Hierarchical reinforcement learning for topology optimization
2 |
3 | This is an implementation of Hierarchical-Actor-Critic-HAC algorithm for topology optimization of cantilever beam.
4 | The codebase was taken from [Nikhil Barhate](https://github.com/nikhilbarhate99/Hierarchical-Actor-Critic-HAC-PyTorch) and adjusted for the topology optimization task at hand.
5 |
6 |
7 | # Hierarchical-Actor-Critic-HAC-PyTorch
8 |
9 | Hierarchical Actor Critic (HAC) algorithm described in the paper, [Learning Multi-Level Hierarchies with Hindsight](https://arxiv.org/abs/1712.00948) (ICLR 2019). The algorithm learns to reach a goal state by dividing the task into short horizon intermediate goals (subgoals).
10 |
11 |
12 | ## Usage
13 | - All the hyperparameters are found by `hpo.py`.
14 | - To train a new network run `train.py`
15 | - To test a preTrained network run `test.py`
16 | - For a detailed explanation of offsets and bounds, refer to [issue #2](https://github.com/nikhilbarhate99/Hierarchical-Actor-Critic-HAC-PyTorch/issues/2)
17 |
18 |
19 | ## Implementation Details
20 |
21 | - The code is implemented as described in the appendix section of the paper and the Official repository, i.e. without target networks and with bounded Q-values.
22 | - Topology optimization model is taken from [A Tutorial on Structural Optimization](https://www.researchgate.net/publication/360698153_A_Tutorial_on_Structural_Optimization)
23 | - Implementation tutorial: [Using Hierarchical Reinforcement Learning for Fast Topology Optimisation](https://gigatskhondia.medium.com/using-hierarchical-reinforcement-learning-for-fast-topology-optimisation-85aa0c07fb7f)
24 |
25 |
26 | ## Citing
27 |
28 | - [How to cite](https://github.com/gigatskhondia/gigala/blob/master/CITATION.cff)
29 |
30 | ## Requirements
31 |
32 | - Python
33 | - PyTorch
34 | - OpenAI gym
35 |
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/README.md:
--------------------------------------------------------------------------------
1 | # Hierarchical reinforcement learning for topology optimization
2 |
3 | This is an implementation of Hierarchical-Actor-Critic-HAC algorithm for topology optimization of cantilever beam.
4 | The codebase was taken from [Nikhil Barhate](https://github.com/nikhilbarhate99/Hierarchical-Actor-Critic-HAC-PyTorch) and adjusted for the topology optimization task at hand.
5 |
6 |
7 | # Hierarchical-Actor-Critic-HAC-PyTorch
8 |
9 | Hierarchical Actor Critic (HAC) algorithm described in the paper, [Learning Multi-Level Hierarchies with Hindsight](https://arxiv.org/abs/1712.00948) (ICLR 2019). The algorithm learns to reach a goal state by dividing the task into short horizon intermediate goals (subgoals).
10 |
11 |
12 | ## Usage
13 | - All the hyperparameters are found by `hpo.py`.
14 | - To train a new network run `train.py`
15 | - To test a preTrained network run `test.py`
16 | - For a detailed explanation of offsets and bounds, refer to [issue #2](https://github.com/nikhilbarhate99/Hierarchical-Actor-Critic-HAC-PyTorch/issues/2)
17 |
18 |
19 | ## Implementation Details
20 |
21 | - The code is implemented as described in the appendix section of the paper and the Official repository, i.e. without target networks and with bounded Q-values.
22 | - Topology optimization model is taken from [A Tutorial on Structural Optimization](https://www.researchgate.net/publication/360698153_A_Tutorial_on_Structural_Optimization)
23 | - Implementation tutorial: [Using Hierarchical Reinforcement Learning for Fast Topology Optimisation](https://gigatskhondia.medium.com/using-hierarchical-reinforcement-learning-for-fast-topology-optimisation-85aa0c07fb7f)
24 |
25 |
26 | ## Citing
27 |
28 | - [How to cite](https://github.com/gigatskhondia/gigala/blob/master/CITATION.cff)
29 |
30 | ## Requirements
31 |
32 | - Python
33 | - PyTorch
34 | - OpenAI gym
35 |
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/README.md:
--------------------------------------------------------------------------------
1 | # Hierarchical reinforcement learning for topology optimization
2 |
3 | This is an implementation of Hierarchical-Actor-Critic-HAC algorithm for topology optimization of cantilever beam.
4 | The codebase was taken from [Nikhil Barhate](https://github.com/nikhilbarhate99/Hierarchical-Actor-Critic-HAC-PyTorch) and adjusted for the topology optimization task at hand.
5 |
6 |
7 | # Hierarchical-Actor-Critic-HAC-PyTorch
8 |
9 | Hierarchical Actor Critic (HAC) algorithm described in the paper, [Learning Multi-Level Hierarchies with Hindsight](https://arxiv.org/abs/1712.00948) (ICLR 2019). The algorithm learns to reach a goal state by dividing the task into short horizon intermediate goals (subgoals).
10 |
11 |
12 | ## Usage
13 | - All the hyperparameters are found by `hpo.py`.
14 | - To train a new network run `train.py`
15 | - To test a preTrained network run `test.py`
16 | - For a detailed explanation of offsets and bounds, refer to [issue #2](https://github.com/nikhilbarhate99/Hierarchical-Actor-Critic-HAC-PyTorch/issues/2)
17 |
18 |
19 | ## Implementation Details
20 |
21 | - The code is implemented as described in the appendix section of the paper and the Official repository, i.e. without target networks and with bounded Q-values.
22 | - Topology optimization model is taken from [A Tutorial on Structural Optimization](https://www.researchgate.net/publication/360698153_A_Tutorial_on_Structural_Optimization)
23 | - Implementation tutorial: [Using Hierarchical Reinforcement Learning for Fast Topology Optimisation](https://gigatskhondia.medium.com/using-hierarchical-reinforcement-learning-for-fast-topology-optimisation-85aa0c07fb7f)
24 |
25 |
26 | ## Citing
27 |
28 | - [How to cite](https://github.com/gigatskhondia/gigala/blob/master/CITATION.cff)
29 |
30 | ## Requirements
31 |
32 | - Python
33 | - PyTorch
34 | - OpenAI gym
35 |
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/README.md:
--------------------------------------------------------------------------------
1 | # Hierarchical reinforcement learning for topology optimization (accelerated by Mac Metal)
2 |
3 | This is an implementation of Hierarchical-Actor-Critic-HAC algorithm for topology optimization of cantilever beam (accelerated by Mac Metal).
4 | The codebase was taken from [Nikhil Barhate](https://github.com/nikhilbarhate99/Hierarchical-Actor-Critic-HAC-PyTorch) and [Nils Meyer](https://github.com/meyer-nils/torch-fem) and adjusted for the topology optimization task at hand.
5 |
6 | ### DISCLAIMER: Work in progress!
7 |
8 | # Hierarchical-Actor-Critic-HAC-PyTorch
9 |
10 | Hierarchical Actor Critic (HAC) algorithm described in the paper, [Learning Multi-Level Hierarchies with Hindsight](https://arxiv.org/abs/1712.00948) (ICLR 2019). The algorithm learns to reach a goal state by dividing the task into short horizon intermediate goals (subgoals).
11 |
12 | ## Usage
13 | - All the hyperparameters are found by `hpo.py`.
14 | - To train a new network run `train.py`
15 | - To test a preTrained network run `test.py`
16 | - For a detailed explanation of offsets and bounds, refer to [issue #2](https://github.com/nikhilbarhate99/Hierarchical-Actor-Critic-HAC-PyTorch/issues/2)
17 |
18 |
19 | ## Implementation Details
20 |
21 | - The code is implemented as described in the appendix section of the paper and the Official repository, i.e. without target networks and with bounded Q-values.
22 | - Topology optimization model is taken from [A Tutorial on Structural Optimization](https://www.researchgate.net/publication/360698153_A_Tutorial_on_Structural_Optimization)
23 | - Implementation tutorial: [Using Hierarchical Reinforcement Learning for Fast Topology Optimisation](https://gigatskhondia.medium.com/using-hierarchical-reinforcement-learning-for-fast-topology-optimisation-85aa0c07fb7f)
24 |
25 |
26 | ## Citing
27 |
28 | - [How to cite](https://github.com/gigatskhondia/gigala/blob/master/CITATION.cff)
29 |
30 | ## Requirements
31 |
32 | - Python
33 | - PyTorch
34 | - OpenAI gym
35 |
--------------------------------------------------------------------------------
/gigala/propulsion/lazy_rocketeer/app.py:
--------------------------------------------------------------------------------
1 | import streamlit as st
2 | from langgraph.checkpoint.memory import MemorySaver
3 | from langgraph.prebuilt import create_react_agent
4 | from logic import get_thrust_chamber_params,MODEL
5 |
6 | memory = MemorySaver()
7 | agent_ = create_react_agent(MODEL.openai_client,
8 | tools=[get_thrust_chamber_params],
9 | checkpointer=memory)
10 |
11 | if "messages" not in st.session_state:
12 | st.session_state.messages = [{'role': "system", "content": """This is a Lazy Rocketeer agent (a part of
13 | Gigala software) to reason around system requirements and mission parameters to design a rocket engine. When helping
14 | in design, it uses paradigm: think, act, observe and considers the following aspects:
15 |
16 | - Decisions on basic parameters
17 | - Stage masses and thrust level
18 | - Propellant flows and dimensions of thrust chamber
19 | - Heat transfer
20 | - Injector design
21 | - Igniter dimensions
22 | - Layout drawings, masses, flows, and pressure drops"""}]
23 |
24 | for message in st.session_state.messages:
25 | with st.chat_message(message["role"]):
26 | st.markdown(message["content"])
27 |
28 | if prompt := st.chat_input("Hello, how can I help you?"):
29 | st.session_state.messages.append({"role": "user", "content": prompt})
30 | with st.chat_message("user"):
31 | st.markdown(prompt)
32 |
33 | with st.chat_message("assistant"):
34 |
35 | stream = agent_.invoke(
36 | {"input": prompt, "messages": [
37 | {"role": m["role"], "content": m["content"]}
38 | for m in st.session_state.messages
39 | ]},
40 | {
41 | # "callbacks":[get_streamlit_cb(st.empty())],
42 | "configurable": {"thread_id": "abc321"},
43 | },
44 | )
45 |
46 | response = list(stream["messages"][len(stream["messages"])-1])[0][1]
47 | st.write(response)
48 |
49 | st.session_state.messages.append({"role": "assistant", "content": response})
50 |
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/log.txt:
--------------------------------------------------------------------------------
1 | 1,0.9797410239278408
2 | 2,0.7975127748912604
3 | 3,0.7723802914032126
4 | 4,0.353660573452987
5 | 5,0.26027533525850116
6 | 6,0.36404099368286785
7 | 7,0.733053987457836
8 | 8,0.6374117509465794
9 | 9,1.084154734328111
10 | 10,0.6911926938015504
11 | 11,0.9554268400162111
12 | 12,0.4533388448274854
13 | 13,0.8082453313949136
14 | 14,1.059557754495363
15 | 15,0.6464023496787109
16 | 16,0.42956891861947955
17 | 17,0.8263143041042855
18 | 18,1.1262730338491609
19 | 19,1.1457076591979931
20 | 20,0.30563800818313147
21 | 21,0.2625561628106679
22 | 22,1.0695366344255084
23 | 23,1.2304393130605795
24 | 24,0.7694056863847839
25 | 25,0.49852529193370343
26 | 26,0.6881675978355768
27 | 27,0.9817872254967156
28 | 28,0.6057507389843019
29 | 29,1.0320603944086526
30 | 30,1.272668785983966
31 | 31,0.8920269552879224
32 | 32,0.4762557282953764
33 | 33,0.45530850633074954
34 | 34,1.303107161463436
35 | 35,0.7948466568398261
36 | 36,0.5146320524478519
37 | 37,0.7961899378649369
38 | 38,0.5182637459576089
39 | 39,0.8846944618944337
40 | 40,1.0499732090039007
41 | 41,0.9964231604218777
42 | 42,0.5350889369526511
43 | 43,0.7081767012460056
44 | 44,0.573527385311719
45 | 45,0.7063965491817927
46 | 46,0.751310566243355
47 | 47,1.470913067570704
48 | 48,0.4257564884836356
49 | 49,1.032085090102188
50 | 50,1.2063975265913127
51 | 51,0.36595814553881933
52 | 52,1.2759097185501345
53 | 53,0.9670328423887196
54 | 54,1.1301857941782705
55 | 55,1.3834535912834658
56 | 56,0.49287823295886213
57 | 57,0.9218599330192192
58 | 58,0.9377552003247814
59 | 59,0.5352374463901877
60 | 60,1.154746021761929
61 | 61,0.9833511924281655
62 | 62,0.26633021683095215
63 | 63,1.0277209889807881
64 | 64,0.7032054355208622
65 | 65,0.6863636545417255
66 | 66,0.3068279922203631
67 | 67,0.4101502457096209
68 | 68,0.16205629966238047
69 | 69,0.8319603639166808
70 | 70,0.9809997110234272
71 | 71,0.8888683184631586
72 | 72,1.0125758926358261
73 | 73,0.17638382729878044
74 | 74,0.5691294752014958
75 | 75,0.6928665281967229
76 | 76,0.7553885026199615
77 | 77,0.4553827129098218
78 | 78,0.009003768893487132
79 | 79,0.7367304883121856
80 | 80,0.11465042924233183
81 | 81,0.42512069834557475
82 | 82,0.9511296243632733
83 | 83,0.7924702958231296
84 | 84,1.128081720797427
85 | 85,0.8760122905218178
86 | 86,1.0748449004435678
87 | 87,0.219299353524388
88 | 88,0.676710409166304
89 | 89,0.5609184089960373
90 | 90,0.636260937859008
91 | 91,1.274648432375538
92 | 92,0.8898651322048404
93 | 93,0.7430671748738745
94 | 94,0.430908247614082
95 | 95,0.3785963738267595
96 | 96,0.5775409856616661
97 | 97,0.8619733872169928
98 | 98,0.2259539403820513
99 | 99,1.0352143614321967
100 | 100,0.8859741168447045
101 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## Gigala (Engineering design by reinforcement learning, genetic algorithms and finite element methods)
2 |
3 | Are you interested in new ways of engineering design? This repository is an attempt to apply artificial intelligence algorithms for the purpose of engineering design of physical products. I combine numerical simulation like finite element analysis with artificial intelligence like reinforcement learning to produce optimal designs. Starting from 2018, my work has been focused on intelligent topology optimization of mechanical structures and elements. I am constantly exploring different ways that AI can be applied to science and engineering.
4 |
5 | Reinforcement learning is a global, gradient-free, non-convex, learning-based, generalizable topology optimization method suitable for practical needs. Sequential nature of reinforcement learning makes it also applicable to technological processes where it can provide manufacturing steps.
6 |
7 | With my diverse interests, I am using this repository as a testbed for my ideas to create software for artificial intelligence aided design. I hope that my work can inspire you to explore new ways that AI can be applied to your field.
8 |
9 | At present, Gigala software mainly consists of topology optimization module, and offshore pipelay dynamics module (now separated into [Ocean Intella](https://github.com/gigatskhondia/ocean_intella) software). It uses artificial intelligence to assist an engineer in her design. You can use it as research or engineering analysis tool to design different physical components and elements.
10 |
11 | RL agent designing a cantilever:
12 | 
13 |
14 |
15 | Philosophy of the software:
16 | * free (accessibility)
17 | * open source (full customization)
18 | * practical performance on your PC (low carbon footprint)
19 | * developed in Python (widespread)
20 | * use AI (modern)
21 |
22 | Please check my [Blog](https://gigatskhondia.medium.com/) and [ResearchGate](https://www.researchgate.net/profile/Giorgi-Tskhondia) for the specifics of the models and algorithms I use.
23 |
24 | For citation please use [Reinforcement Learning Guided Engineering Design: from Topology Optimization to Advanced Modelling](https://jngr5.com/index.php/journal-of-next-generation-resea/article/view/95)
25 |
26 | Topology optimization by reinforcement learning:
27 |
28 | 
29 |
30 |
31 | Topology optimization by genetic algorithms:
32 |
33 | 
34 |
35 |
36 | Pseudo 3D topology optimization by reinforcement learning (see [preprint-0](https://www.researchgate.net/publication/393164291_Pseudo_3D_topology_optimisation_with_reinforcement_learning)):
37 |
38 |
39 |
40 |
41 | For current benchmarks of TO with RL see [preprint-1](https://www.researchgate.net/publication/398406554_Practical_topology_optimization_with_deep_reinforcement_learning).
42 |
43 | To keep up to date with the project please check [Gigala](https://gigala.io/) page.
44 |
45 | #### If you like my project and want to support it, please consider doing any of the following: ####
46 | * Star this project
47 | * [Sponsor](https://www.paypal.me/gigatskhondia) this project
48 | * [Contact](https://gigala.io/) me if you would like to collaborate
49 |
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/asset/topology_optimization.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | from utils import *
3 | from utils_ import *
4 | import gym
5 | from gym import spaces
6 | import random
7 | import numpy as np
8 | import autograd.numpy as anp
9 | from gym.utils import seeding
10 |
11 |
12 | class Model:
13 | def __init__(self, x):
14 | self.flag_ = True
15 | self.n, self.m = x.shape
16 | self.actions_dic = {}
17 |
18 | k = 0
19 | for i in range(self.n):
20 | for j in range(self.m):
21 | self.actions_dic[k] = (i, j)
22 | k += 1
23 |
24 | def action_space_(self, action, x_cap):
25 | x, y = self.actions_dic[action]
26 | x_cap[x][y] = 1
27 |
28 | @staticmethod
29 | def draw(x_cap):
30 | plt.figure(dpi=50)
31 | print('\nFinal Cantilever rl_beam design:')
32 | plt.imshow(x_cap)
33 | plt.show(block=False)
34 | plt.pause(3)
35 | plt.close('all')
36 |
37 |
38 | class CantileverEnv(gym.Env):
39 |
40 | metadata = {"render.modes": ["human"],
41 | # 'video.frames_per_second' : 30
42 | }
43 |
44 | def __init__(self):
45 | super().__init__()
46 |
47 | self.rd = -1
48 | self.args = get_args(*mbb_beam(rd=self.rd))
49 |
50 | dim_cap = self.args.nelx*self.args.nely
51 | self.N_DISCRETE_ACTIONS = self.args.nelx*self.args.nely
52 |
53 | self.action_space = spaces.Box(low=0, high=1,
54 | shape=(self.N_DISCRETE_ACTIONS,), dtype=np.float64)
55 |
56 | self.observation_space = spaces.Box(low=np.array([-1e10 for x in range(dim_cap)]),
57 | high=np.array([1e10 for y in range(dim_cap)]),
58 | shape=(dim_cap,),
59 | dtype=np.float64)
60 |
61 | self.x = anp.ones((self.args.nely, self.args.nelx))*self.args.density
62 |
63 | self.M = Model(self.x)
64 |
65 | self.reward = 0
66 | self.step_ = 0
67 | self.needs_reset = True
68 | self.layer_dim = 4
69 | self.n_layers = 2
70 | self.optimizer = 'Adam'
71 | self.seed()
72 |
73 | def seed(self, seed=None):
74 | self.np_random, seed = seeding.np_random(seed)
75 | return [seed]
76 |
77 | def step(self, action):
78 |
79 | self.args = get_args(*mbb_beam(rd=self.rd))
80 |
81 | act=np.argmax(action)
82 |
83 | self.M.action_space_(act, self.x)
84 |
85 | self.tmp, self.const = fast_stopt(self.args, self.x)
86 | self.step_+=1
87 |
88 | self.reward = (1/self.tmp)**0.5
89 |
90 | done = False
91 |
92 | if self.const > 0.68:
93 | done = True
94 |
95 | if self.step_ > self.M.n*self.M.m:
96 | done = True
97 |
98 | if self.needs_reset:
99 | raise RuntimeError("Tried to step environment that needs reset")
100 |
101 | if done:
102 | self.needs_reset = True
103 |
104 | return self.x.reshape(self.x.shape[0]*self.x.shape[1]), self.reward, done, dict()
105 |
106 | def reset(self):
107 |
108 | if not self.M.flag_:
109 | self.rd = random.choice([0,2,-2])
110 | else:
111 | self.rd = -1
112 |
113 | self.x = anp.ones((self.args.nely, self.args.nelx))*self.args.density
114 |
115 | self.reward = 0
116 | self.needs_reset = False
117 | self.step_ = 0
118 |
119 | return self.x.reshape(self.x.shape[0]*self.x.shape[1])
120 |
121 | def render(self, mode="human"):
122 | self.M.draw(self.x)
123 |
124 | def close(self):
125 | pass
126 |
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/asset/topology_optimization.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | from utils import *
3 | import gym
4 | from gym import spaces
5 | import random
6 | import numpy as np
7 | import autograd.numpy as anp
8 | from gym.utils import seeding
9 |
10 |
11 | class Model:
12 | def __init__(self, x):
13 | self.flag_ = True
14 | self.n, self.m = x.shape
15 | self.actions_dic = {}
16 |
17 | k = 0
18 | for i in range(self.n):
19 | for j in range(self.m):
20 | self.actions_dic[k] = (i, j)
21 | k += 1
22 |
23 | def action_space_(self, action, x_cap):
24 | x, y = self.actions_dic[action]
25 | x_cap[x][y] = 1
26 |
27 | @staticmethod
28 | def draw(x_cap):
29 | plt.figure(dpi=50)
30 | print('\nFinal Cantilever rl_beam design:')
31 | plt.imshow(x_cap)
32 | plt.show(block=False)
33 | plt.pause(3)
34 | plt.close('all')
35 |
36 |
37 | class CantileverEnv(gym.Env):
38 |
39 | metadata = {"render.modes": ["human"],
40 | # 'video.frames_per_second' : 30
41 | }
42 |
43 | def __init__(self):
44 | super().__init__()
45 |
46 | self.rd = -1
47 | self.args = get_args(*mbb_beam(rd=self.rd))
48 |
49 | dim_cap = self.args.nelx*self.args.nely
50 | self.N_DISCRETE_ACTIONS = self.args.nelx*self.args.nely
51 |
52 | self.action_space = spaces.Box(low=0, high=1,
53 | shape=(self.N_DISCRETE_ACTIONS,), dtype=np.float64)
54 |
55 | self.observation_space = spaces.Box(low=np.array([-1e10 for x in range(dim_cap)]),
56 | high=np.array([1e10 for y in range(dim_cap)]),
57 | shape=(dim_cap,),
58 | dtype=np.float64)
59 |
60 | self.x = anp.ones((self.args.nely, self.args.nelx))*self.args.density
61 |
62 | self.M = Model(self.x)
63 |
64 | self.reward = 0
65 | self.step_ = 0
66 | self.needs_reset = True
67 | self.layer_dim = 4
68 | self.n_layers = 2
69 | self.optimizer = 'Adam'
70 | self.seed()
71 |
72 | def seed(self, seed=None):
73 | self.np_random, seed = seeding.np_random(seed)
74 | return [seed]
75 |
76 | def step(self, action):
77 |
78 | self.args = get_args(*mbb_beam(rd=self.rd))
79 |
80 | act=np.argmax(action)
81 |
82 | self.M.action_space_(act, self.x)
83 |
84 | self.tmp, self.const = fast_stopt(self.args, self.x)
85 | self.step_+=1
86 |
87 | # entropy = -np.sum(self.x * np.log2(self.x))
88 | # self.reward = (1/self.tmp)**0.5+entropy
89 |
90 | self.reward = (1/self.tmp)**0.5
91 |
92 | done = False
93 |
94 | if self.const > 0.68:
95 | done = True
96 |
97 | if self.step_ > self.M.n*self.M.m:
98 | done = True
99 |
100 | if self.needs_reset:
101 | raise RuntimeError("Tried to step environment that needs reset")
102 |
103 | if done:
104 | self.needs_reset = True
105 |
106 | return self.x.reshape(self.x.shape[0]*self.x.shape[1]), self.reward, done, dict()
107 |
108 | def reset(self):
109 |
110 | if not self.M.flag_:
111 | self.rd = random.choice([0,2,-2])
112 | else:
113 | self.rd = -1
114 |
115 | self.x = anp.ones((self.args.nely, self.args.nelx))*self.args.density
116 |
117 | self.reward = 0
118 | self.needs_reset = False
119 | self.step_ = 0
120 |
121 | return self.x.reshape(self.x.shape[0]*self.x.shape[1])
122 |
123 | def render(self, mode="human"):
124 | self.M.draw(self.x)
125 |
126 | def close(self):
127 | pass
128 |
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL/test.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import gym
3 | import numpy as np
4 | from HAC import HAC
5 | import asset
6 |
7 |
8 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
9 |
10 | def test():
11 |
12 | #################### Hyperparameters ####################
13 | env_name ="T0-h-v1"
14 |
15 | save_episode = 20 # keep saving every n episodes
16 | max_episodes = 10 # max num of training episodes
17 | random_seed = 1
18 | render = False
19 |
20 | env = gym.make(env_name)
21 | env.layer_dim = 12
22 | env.n_layers = 16
23 | env.optimizer = 'SGD'
24 | state_dim = env.observation_space.shape[0]
25 | action_dim = env.N_DISCRETE_ACTIONS
26 |
27 | """
28 | Actions (both primitive and subgoal) are implemented as follows:
29 | action = ( network output (Tanh) * bounds ) + offset
30 | clip_high and clip_low bound the exploration noise
31 | """
32 |
33 | # primitive action bounds and offset
34 | action_bounds = env.action_space.high[0]
35 | # action_offset = np.array([0.5 for x in range(env.N_DISCRETE_ACTIONS)])
36 | action_offset = np.array([0.0 for x in range(env.N_DISCRETE_ACTIONS)])
37 |
38 | action_offset = torch.FloatTensor(action_offset.reshape(1, -1)).to(device)
39 | action_clip_low = np.array([0 for x in range(env.N_DISCRETE_ACTIONS)])
40 | action_clip_high = np.array([1 for x in range(env.N_DISCRETE_ACTIONS)])
41 |
42 | # state bounds and offset
43 | # state_bounds_np = np.array([0.5, 0.5e7])
44 | state_bounds_np = np.array([1, 1e7])
45 | state_bounds = torch.FloatTensor(state_bounds_np.reshape(1, -1)).to(device)
46 | # state_offset = np.array([0.5, 0.5e7])
47 | state_offset = np.array([0, 0])
48 | state_offset = torch.FloatTensor(state_offset.reshape(1, -1)).to(device)
49 | state_clip_low = np.array([0, 0])
50 | state_clip_high = np.array([1, 1e7])
51 |
52 | exploration_action_noise = np.array([0.024320378739607497])
53 | exploration_state_noise = np.array([ 0.14694893372824905, 19361.835172087693])
54 |
55 | goal_state=np.array([0.68, 20])
56 | threshold=[0.05, 5]
57 |
58 | # HAC parameters:
59 | k_level = 2 # num of levels in hierarchy
60 | H = 6 # time horizon to achieve subgoal
61 | lamda = 0.4337021542899802 # subgoal testing parameter
62 |
63 | # DDPG parameters:
64 | gamma = 0.9703997234344832 # discount factor for future rewards
65 | n_iter = 148 # update policy n_iter times in one DDPG update
66 | batch_size = 183 # num of transitions sampled from replay buffer
67 | lr = 7.943448987978889e-05
68 |
69 | # save trained models
70 | directory = "./preTrained/{}/{}level/".format(env_name, k_level)
71 | filename = "HAC_{}".format(env_name)
72 | #########################################################
73 |
74 | if random_seed:
75 | print("Random Seed: {}".format(random_seed))
76 | env.seed(random_seed)
77 | torch.manual_seed(random_seed)
78 | np.random.seed(random_seed)
79 |
80 | # creating HAC agent and setting parameters
81 | agent = HAC(k_level, H, state_dim, action_dim, render, threshold,
82 | action_bounds, action_offset, state_bounds, state_offset, lr, env.optimizer, env.layer_dim, env.n_layers)
83 |
84 | agent.set_parameters(lamda, gamma, action_clip_low, action_clip_high,
85 | state_clip_low, state_clip_high, exploration_action_noise, exploration_state_noise)
86 |
87 | # load agent
88 | agent.load(directory, filename)
89 |
90 | # Evaluation
91 | # env.M.flag_=False
92 | for i_episode in range(1, max_episodes+1):
93 |
94 | agent.reward = 0
95 | agent.timestep = 0
96 |
97 |
98 | state = env.reset()
99 | agent.run_HAC(env, k_level-1, state, goal_state, True)
100 | env.render()
101 |
102 | print("Episode: {}\t Reward: {}\t len: {}\t tmp: {}\t const: {} ".format(i_episode, agent.reward, agent.timestep,env.tmp,env.const))
103 |
104 | env.close()
105 |
106 |
107 |
108 | if __name__ == '__main__':
109 | test()
110 |
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/test.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import gym
3 | import numpy as np
4 | from HAC import HAC
5 | import asset
6 |
7 |
8 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
9 |
10 | def test():
11 |
12 | #################### Hyperparameters ####################
13 | env_name ="T0-h-v1"
14 |
15 | save_episode = 20 # keep saving every n episodes
16 | max_episodes = 10 # max num of training episodes
17 | random_seed = 42
18 | render = False
19 |
20 | env = gym.make(env_name)
21 | env.layer_dim = 126
22 | env.n_layers = 90
23 | env.optimizer = 'Adam'
24 | state_dim = env.observation_space.shape[0]
25 | action_dim = env.N_DISCRETE_ACTIONS
26 |
27 | """
28 | Actions (both primitive and subgoal) are implemented as follows:
29 | action = ( network output (Tanh) * bounds ) + offset
30 | clip_high and clip_low bound the exploration noise
31 | """
32 |
33 | # primitive action bounds and offset
34 | action_bounds = env.action_space.high[0]
35 | # action_offset = np.array([0.5 for x in range(env.N_DISCRETE_ACTIONS)])
36 | action_offset = np.array([0.0 for x in range(env.N_DISCRETE_ACTIONS)])
37 |
38 | action_offset = torch.FloatTensor(action_offset.reshape(1, -1)).to(device)
39 | action_clip_low = np.array([0 for x in range(env.N_DISCRETE_ACTIONS)])
40 | action_clip_high = np.array([1 for x in range(env.N_DISCRETE_ACTIONS)])
41 |
42 | # state bounds and offset
43 | # state_bounds_np = np.array([0.5, 0.5e7])
44 | state_bounds_np = np.array([1, 1e7])
45 | state_bounds = torch.FloatTensor(state_bounds_np.reshape(1, -1)).to(device)
46 | # state_offset = np.array([0.5, 0.5e7])
47 | state_offset = np.array([0, 0])
48 | state_offset = torch.FloatTensor(state_offset.reshape(1, -1)).to(device)
49 | state_clip_low = np.array([0, 0])
50 | state_clip_high = np.array([1, 1e7])
51 |
52 | exploration_action_noise = np.array([ 0.013524454522609227])
53 | exploration_state_noise = np.array([0.2065791657801734, 4371.871955300335])
54 |
55 | goal_state=np.array([0.68, 23])
56 | threshold=[0.05, 3]
57 |
58 | # HAC parameters:
59 | k_level = 2 # num of levels in hierarchy
60 | H = 8 # time horizon to achieve subgoal
61 | lamda = 0.9759336249447662 # subgoal testing parameter
62 |
63 | # DDPG parameters:
64 | gamma = 0.9845965064662501 # discount factor for future rewards
65 | n_iter = 100 # update policy n_iter times in one DDPG update
66 | batch_size = 100 # num of transitions sampled from replay buffer
67 | lr = 0.061703036438267876
68 |
69 | # save trained models
70 | directory = "./preTrained/{}/{}level/".format(env_name, k_level)
71 | filename = "HAC_{}".format(env_name)
72 | #########################################################
73 |
74 | if random_seed:
75 | print("Random Seed: {}".format(random_seed))
76 | env.seed(random_seed)
77 | torch.manual_seed(random_seed)
78 | np.random.seed(random_seed)
79 |
80 | # creating HAC agent and setting parameters
81 | agent = HAC(k_level, H, state_dim, action_dim, render, threshold,
82 | action_bounds, action_offset, state_bounds, state_offset, lr, env.optimizer, env.layer_dim, env.n_layers)
83 |
84 | agent.set_parameters(lamda, gamma, action_clip_low, action_clip_high,
85 | state_clip_low, state_clip_high, exploration_action_noise, exploration_state_noise)
86 |
87 | # load agent
88 | agent.load(directory, filename)
89 |
90 | # Evaluation
91 | # env.M.flag_=False
92 | for i_episode in range(1, max_episodes+1):
93 |
94 | agent.reward = 0
95 | agent.timestep = 0
96 |
97 |
98 | state = env.reset()
99 | agent.run_HAC(env, k_level-1, state, goal_state, True)
100 | env.render()
101 |
102 | print("Episode: {}\t Reward: {}\t len: {}\t tmp: {}\t const: {} ".format(i_episode, agent.reward, agent.timestep,env.tmp,env.const))
103 |
104 | env.close()
105 |
106 |
107 |
108 | if __name__ == '__main__':
109 | test()
110 |
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/test.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import gym
3 | import numpy as np
4 | from HAC import HAC
5 | import asset
6 |
7 | device = torch.device("mps:0" if torch.mps.is_available() else "cpu")
8 | print(device)
9 |
10 | def test():
11 |
12 | #################### Hyperparameters ####################
13 | env_name ="T0-h-v1"
14 |
15 | save_episode = 20 # keep saving every n episodes
16 | max_episodes = 10 # max num of training episodes
17 | random_seed = 1
18 | render = False
19 |
20 | env = gym.make(env_name)
21 | env.layer_dim = 12
22 | env.n_layers = 16
23 | env.optimizer = 'SGD'
24 | state_dim = env.observation_space.shape[0]
25 | action_dim = env.N_DISCRETE_ACTIONS
26 |
27 | """
28 | Actions (both primitive and subgoal) are implemented as follows:
29 | action = ( network output (Tanh) * bounds ) + offset
30 | clip_high and clip_low bound the exploration noise
31 | """
32 |
33 | # primitive action bounds and offset
34 | action_bounds = env.action_space.high[0]
35 | # action_offset = np.array([0.5 for x in range(env.N_DISCRETE_ACTIONS)])
36 | action_offset = np.array([0.0 for x in range(env.N_DISCRETE_ACTIONS)])
37 |
38 | action_offset = torch.FloatTensor(action_offset.reshape(1, -1)).to(device)
39 | action_clip_low = np.array([0 for x in range(env.N_DISCRETE_ACTIONS)])
40 | action_clip_high = np.array([1 for x in range(env.N_DISCRETE_ACTIONS)])
41 |
42 | # state bounds and offset
43 | # state_bounds_np = np.array([0.5, 0.5e7])
44 | state_bounds_np = np.array([1, 1e7])
45 | state_bounds = torch.FloatTensor(state_bounds_np.reshape(1, -1)).to(device)
46 | # state_offset = np.array([0.5, 0.5e7])
47 | state_offset = np.array([0, 0])
48 | state_offset = torch.FloatTensor(state_offset.reshape(1, -1)).to(device)
49 | state_clip_low = np.array([0, 0])
50 | state_clip_high = np.array([1, 1e7])
51 |
52 | exploration_action_noise = np.array([0.024320378739607497])
53 | exploration_state_noise = np.array([ 0.14694893372824905, 19361.835172087693])
54 |
55 | goal_state=np.array([0.68, 20])
56 | threshold=[0.05, 5]
57 |
58 | # HAC parameters:
59 | k_level = 2 # num of levels in hierarchy
60 | H = 6 # time horizon to achieve subgoal
61 | lamda = 0.4337021542899802 # subgoal testing parameter
62 |
63 | # DDPG parameters:
64 | gamma = 0.9703997234344832 # discount factor for future rewards
65 | n_iter = 148 # update policy n_iter times in one DDPG update
66 | batch_size = 183 # num of transitions sampled from replay buffer
67 | lr = 7.943448987978889e-05
68 |
69 | # save trained models
70 | directory = "./preTrained/{}/{}level/".format(env_name, k_level)
71 | filename = "HAC_{}".format(env_name)
72 | #########################################################
73 |
74 | if random_seed:
75 | print("Random Seed: {}".format(random_seed))
76 | env.seed(random_seed)
77 | torch.manual_seed(random_seed)
78 | np.random.seed(random_seed)
79 |
80 | # creating HAC agent and setting parameters
81 | agent = HAC(k_level, H, state_dim, action_dim, render, threshold,
82 | action_bounds, action_offset, state_bounds, state_offset, lr, env.optimizer, env.layer_dim, env.n_layers)
83 |
84 | agent.set_parameters(lamda, gamma, action_clip_low, action_clip_high,
85 | state_clip_low, state_clip_high, exploration_action_noise, exploration_state_noise)
86 |
87 | # load agent
88 | agent.load(directory, filename)
89 |
90 | # Evaluation
91 | # env.M.flag_=False
92 | for i_episode in range(1, max_episodes+1):
93 |
94 | agent.reward = 0
95 | agent.timestep = 0
96 |
97 |
98 | state = env.reset()
99 | agent.run_HAC(env, k_level-1, state, goal_state, True)
100 | env.render()
101 |
102 | print("Episode: {}\t Reward: {}\t len: {}\t tmp: {}\t const: {} ".format(i_episode, agent.reward, agent.timestep,env.tmp,env.const))
103 |
104 | env.close()
105 |
106 |
107 |
108 | if __name__ == '__main__':
109 | test()
110 |
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/test.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import gym
3 | import numpy as np
4 | from HAC import HAC
5 | import asset
6 |
7 |
8 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
9 |
10 | def test():
11 |
12 | #################### Hyperparameters ####################
13 | env_name ="T0-h-v1"
14 |
15 | save_episode = 20 # keep saving every n episodes
16 | max_episodes = 10 # max num of training episodes
17 | random_seed = 1
18 | render = False
19 |
20 | env = gym.make(env_name)
21 | env.layer_dim = 12
22 | env.n_layers = 14
23 | env.optimizer = 'RMSprop'
24 | state_dim = env.observation_space.shape[0]
25 | action_dim = env.N_DISCRETE_ACTIONS
26 |
27 | """
28 | Actions (both primitive and subgoal) are implemented as follows:
29 | action = ( network output (Tanh) * bounds ) + offset
30 | clip_high and clip_low bound the exploration noise
31 | """
32 |
33 | # primitive action bounds and offset
34 | action_bounds = env.action_space.high[0]
35 | # action_offset = np.array([0.5 for x in range(env.N_DISCRETE_ACTIONS)])
36 | action_offset = np.array([0.0 for x in range(env.N_DISCRETE_ACTIONS)])
37 |
38 | action_offset = torch.FloatTensor(action_offset.reshape(1, -1)).to(device)
39 | action_clip_low = np.array([0 for x in range(env.N_DISCRETE_ACTIONS)])
40 | action_clip_high = np.array([1 for x in range(env.N_DISCRETE_ACTIONS)])
41 |
42 | # state bounds and offset
43 | # state_bounds_np = np.array([0.5, 0.5e7])
44 | # state_bounds_np = np.array([1, 1e7])
45 | # state_bounds = torch.FloatTensor(state_bounds_np.reshape(1, -1)).to(device)
46 | state_bounds = env.observation_space.high[0]
47 | # state_offset = np.array([0.5, 0.5e7])
48 | state_offset = np.array([0 for x in range(env.N_DISCRETE_ACTIONS)])
49 | state_offset = torch.FloatTensor(state_offset.reshape(1, -1)).to(device)
50 | state_clip_low = np.array([0 for x in range(env.N_DISCRETE_ACTIONS)])
51 | state_clip_high = np.array([1 for x in range(env.N_DISCRETE_ACTIONS)])
52 |
53 | exploration_action_noise = np.array([0.5629988256824885])
54 | exploration_state_noise = np.array([0.1313567686964759])
55 |
56 | goal_ = [0.68, 30]
57 | goal_state = np.array(goal_+[0] * (env.N_DISCRETE_ACTIONS - len(goal_)))
58 | # print(goal_state)
59 | threshold = [0.05, 3]
60 |
61 | # HAC parameters:
62 | k_level = 2 # num of levels in hierarchy
63 | H = 9 # time horizon to achieve subgoal
64 | lamda = 0.3453605248576358 # subgoal testing parameter
65 |
66 | # DDPG parameters:
67 | gamma = 0.9777965340075817 # discount factor for future rewards
68 | n_iter = 223 # update policy n_iter times in one DDPG update
69 | batch_size = 340 # num of transitions sampled from replay buffer
70 | lr = 0.04471490153909566
71 |
72 | # save trained models
73 | directory = "./preTrained/{}/{}level/".format(env_name, k_level)
74 | filename = "HAC_{}".format(env_name)
75 | #########################################################
76 |
77 | if random_seed:
78 | print("Random Seed: {}".format(random_seed))
79 | env.seed(random_seed)
80 | torch.manual_seed(random_seed)
81 | np.random.seed(random_seed)
82 |
83 | # creating HAC agent and setting parameters
84 | agent = HAC(k_level, H, state_dim, action_dim, render, threshold,
85 | action_bounds, action_offset, state_bounds, state_offset, lr, env.optimizer, env.layer_dim, env.n_layers)
86 |
87 | agent.set_parameters(lamda, gamma, action_clip_low, action_clip_high,
88 | state_clip_low, state_clip_high, exploration_action_noise, exploration_state_noise)
89 |
90 | # load agent
91 | agent.load(directory, filename)
92 |
93 | # Evaluation
94 | # env.M.flag_=False
95 | for i_episode in range(1, max_episodes+1):
96 |
97 | agent.reward = 0
98 | agent.timestep = 0
99 |
100 |
101 | state = env.reset()
102 | agent.run_HAC(env, k_level-1, state, goal_state, True)
103 | env.render()
104 |
105 | print("Episode: {}\t Reward: {}\t len: {}\t tmp: {}\t const: {} ".format(i_episode, agent.reward, agent.timestep,env.tmp,env.const))
106 |
107 | env.close()
108 |
109 |
110 |
111 | if __name__ == '__main__':
112 | test()
113 |
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/test.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import gym
3 | import numpy as np
4 | from HAC import HAC
5 | import asset
6 |
7 |
8 | # device = torch.device("mps:0" if torch.mps.is_available() else "cpu")
9 | device = torch.device("cpu")
10 | print(device)
11 |
12 | def test():
13 |
14 | #################### Hyperparameters ####################
15 | env_name ="T0-h-v1"
16 |
17 | save_episode = 20 # keep saving every n episodes
18 | max_episodes = 10 # max num of training episodes
19 | random_seed = 1
20 | render = False
21 |
22 | env = gym.make(env_name)
23 | env.layer_dim = 12
24 | env.n_layers = 14
25 | env.optimizer = 'RMSprop'
26 | state_dim = env.observation_space.shape[0]
27 | action_dim = env.N_DISCRETE_ACTIONS
28 |
29 | """
30 | Actions (both primitive and subgoal) are implemented as follows:
31 | action = ( network output (Tanh) * bounds ) + offset
32 | clip_high and clip_low bound the exploration noise
33 | """
34 |
35 | # primitive action bounds and offset
36 | action_bounds = env.action_space.high[0]
37 | # action_offset = np.array([0.5 for x in range(env.N_DISCRETE_ACTIONS)])
38 | action_offset = np.array([0.0 for x in range(env.N_DISCRETE_ACTIONS)])
39 |
40 | action_offset = torch.FloatTensor(action_offset.reshape(1, -1)).to(device)
41 | action_clip_low = np.array([0 for x in range(env.N_DISCRETE_ACTIONS)])
42 | action_clip_high = np.array([1 for x in range(env.N_DISCRETE_ACTIONS)])
43 |
44 | # state bounds and offset
45 | # state_bounds_np = np.array([0.5, 0.5e7])
46 | # state_bounds_np = np.array([1, 1e7])
47 | # state_bounds = torch.FloatTensor(state_bounds_np.reshape(1, -1)).to(device)
48 | state_bounds = env.observation_space.high[0]
49 | # state_offset = np.array([0.5, 0.5e7])
50 | state_offset = np.array([0 for x in range(env.N_DISCRETE_ACTIONS)])
51 | state_offset = torch.FloatTensor(state_offset.reshape(1, -1)).to(device)
52 | state_clip_low = np.array([0 for x in range(env.N_DISCRETE_ACTIONS)])
53 | state_clip_high = np.array([1 for x in range(env.N_DISCRETE_ACTIONS)])
54 |
55 | exploration_action_noise = np.array([0.5629988256824885])
56 | exploration_state_noise = np.array([0.1313567686964759])
57 |
58 | goal_ = [0.68, 30]
59 | goal_state = np.array(goal_+[0] * (env.N_DISCRETE_ACTIONS - len(goal_)))
60 | # print(goal_state)
61 | threshold = [0.05, 3]
62 |
63 | # HAC parameters:
64 | k_level = 2 # num of levels in hierarchy
65 | H = 9 # time horizon to achieve subgoal
66 | lamda = 0.3453605248576358 # subgoal testing parameter
67 |
68 | # DDPG parameters:
69 | gamma = 0.9777965340075817 # discount factor for future rewards
70 | n_iter = 223 # update policy n_iter times in one DDPG update
71 | batch_size = 340 # num of transitions sampled from replay buffer
72 | lr = 0.04471490153909566
73 |
74 | # save trained models
75 | directory = "./preTrained/{}/{}level/".format(env_name, k_level)
76 | filename = "HAC_{}".format(env_name)
77 | #########################################################
78 |
79 | if random_seed:
80 | print("Random Seed: {}".format(random_seed))
81 | env.seed(random_seed)
82 | torch.manual_seed(random_seed)
83 | np.random.seed(random_seed)
84 |
85 | # creating HAC agent and setting parameters
86 | agent = HAC(k_level, H, state_dim, action_dim, render, threshold,
87 | action_bounds, action_offset, state_bounds, state_offset, lr, env.optimizer, env.layer_dim, env.n_layers)
88 |
89 | agent.set_parameters(lamda, gamma, action_clip_low, action_clip_high,
90 | state_clip_low, state_clip_high, exploration_action_noise, exploration_state_noise)
91 |
92 | # load agent
93 | agent.load(directory, filename)
94 |
95 | # Evaluation
96 | # env.M.flag_=False
97 | for i_episode in range(1, max_episodes+1):
98 |
99 | agent.reward = 0
100 | agent.timestep = 0
101 |
102 | state = env.reset()
103 | agent.run_HAC(env, k_level-1, state, goal_state, True)
104 | env.render()
105 |
106 | print("Episode: {}\t Reward: {}\t len: {}\t tmp: {}\t const: {} ".format(i_episode, agent.reward, agent.timestep,env.tmp,env.const))
107 |
108 | env.close()
109 |
110 |
111 |
112 | if __name__ == '__main__':
113 | test()
114 |
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL/train.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import gym
3 | import numpy as np
4 | from HAC import HAC
5 | import asset
6 |
7 |
8 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
9 |
10 | def train():
11 | #################### Hyperparameters ####################
12 | env_name ="T0-h-v1"
13 |
14 | save_episode = 100 # keep saving every n episodes
15 | max_episodes = 5_000 # max num of training episodes
16 | random_seed = 1
17 | render = False
18 |
19 | env = gym.make(env_name)
20 | env.layer_dim= 12
21 | env.n_layers= 16
22 | env.optimizer='SGD'
23 | state_dim = env.observation_space.shape[0]
24 | action_dim = env.N_DISCRETE_ACTIONS
25 |
26 | """
27 | Actions (both primitive and subgoal) are implemented as follows:
28 | action = ( network output (Tanh) * bounds ) + offset
29 | clip_high and clip_low bound the exploration noise
30 | """
31 |
32 | # primitive action bounds and offset
33 | action_bounds = env.action_space.high[0]
34 | # action_offset = np.array([0.5 for x in range(env.N_DISCRETE_ACTIONS)])
35 | action_offset = np.array([0.0 for x in range(env.N_DISCRETE_ACTIONS)])
36 |
37 | action_offset = torch.FloatTensor(action_offset.reshape(1, -1)).to(device)
38 | action_clip_low = np.array([0 for x in range(env.N_DISCRETE_ACTIONS)])
39 | action_clip_high = np.array([1 for x in range(env.N_DISCRETE_ACTIONS)])
40 |
41 | # state bounds and offset
42 | # state_bounds_np = np.array([0.5, 0.5e7])
43 | state_bounds_np = np.array([1, 1e7])
44 | state_bounds = torch.FloatTensor(state_bounds_np.reshape(1, -1)).to(device)
45 | # state_offset = np.array([0.5, 0.5e7])
46 | state_offset = np.array([0, 0])
47 | state_offset = torch.FloatTensor(state_offset.reshape(1, -1)).to(device)
48 | state_clip_low = np.array([0, 0])
49 | state_clip_high = np.array([1, 1e7])
50 |
51 | exploration_action_noise = np.array([0.024320378739607497])
52 | exploration_state_noise = np.array([ 0.14694893372824905, 19361.835172087693])
53 |
54 | goal_state=np.array([0.68, 20])
55 | threshold=[0.05, 5]
56 |
57 | # HAC parameters:
58 | k_level = 2 # num of levels in hierarchy
59 | H = 6 # time horizon to achieve subgoal
60 | lamda = 0.4337021542899802 # subgoal testing parameter
61 |
62 | # DDPG parameters:
63 | gamma = 0.9703997234344832 # discount factor for future rewards
64 | n_iter = 148 # update policy n_iter times in one DDPG update
65 | batch_size = 183 # num of transitions sampled from replay buffer
66 | lr = 7.943448987978889e-05
67 |
68 | # save trained models
69 | directory = "./preTrained/{}/{}level/".format(env_name, k_level)
70 | filename = "HAC_{}".format(env_name)
71 | #########################################################
72 |
73 |
74 | if random_seed:
75 | print("Random Seed: {}".format(random_seed))
76 | env.seed(random_seed)
77 | torch.manual_seed(random_seed)
78 | np.random.seed(random_seed)
79 |
80 | # creating HAC agent and setting parameters
81 | agent = HAC(k_level, H, state_dim, action_dim, render, threshold,
82 | action_bounds, action_offset, state_bounds, state_offset, lr, env.optimizer, env.layer_dim, env.n_layers)
83 |
84 | agent.set_parameters(lamda, gamma, action_clip_low, action_clip_high,
85 | state_clip_low, state_clip_high, exploration_action_noise, exploration_state_noise)
86 |
87 | # logging file:
88 | log_f = open("log.txt","w+")
89 |
90 | # training procedure
91 | R=0
92 | for i_episode in range(1, max_episodes+1):
93 | agent.reward = 0
94 | agent.timestep = 0
95 |
96 | state = env.reset()
97 | # collecting experience in environment
98 | last_state, done = agent.run_HAC(env, k_level-1, state, goal_state, False)
99 |
100 | if agent.check_goal(last_state, goal_state, threshold, env):
101 | print("################ Solved! ################ ")
102 | name = filename + '_solved'
103 | agent.save(directory, name)
104 |
105 | # update all levels
106 | agent.update(n_iter, batch_size, env)
107 |
108 | # logging updates:
109 | log_f.write('{},{}\n'.format(i_episode, agent.reward))
110 | log_f.flush()
111 |
112 | if i_episode % save_episode == 0:
113 | # if agent.reward>R:
114 | R=agent.reward
115 | agent.save(directory, filename)
116 | print('SAVING ################# SAVING ################## SAVING:',R)
117 |
118 | print("Episode: {}\t Reward: {}".format(i_episode, agent.reward))
119 |
120 |
121 | if __name__ == '__main__':
122 | train()
123 |
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/train.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import gym
3 | import numpy as np
4 | from HAC import HAC
5 | import asset
6 |
7 |
8 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
9 |
10 | def train():
11 | #################### Hyperparameters ####################
12 | env_name ="T0-h-v1"
13 |
14 | save_episode = 10 # keep saving every n episodes
15 | max_episodes = 100 # max num of training episodes
16 | random_seed = 42
17 | render = False
18 |
19 | env = gym.make(env_name)
20 | env.layer_dim= 126
21 | env.n_layers= 90
22 | env.optimizer='Adam'
23 | state_dim = env.observation_space.shape[0]
24 | action_dim = env.N_DISCRETE_ACTIONS
25 |
26 | """
27 | Actions (both primitive and subgoal) are implemented as follows:
28 | action = ( network output (Tanh) * bounds ) + offset
29 | clip_high and clip_low bound the exploration noise
30 | """
31 |
32 | # primitive action bounds and offset
33 | action_bounds = env.action_space.high[0]
34 | # action_offset = np.array([0.5 for x in range(env.N_DISCRETE_ACTIONS)])
35 | action_offset = np.array([0.0 for x in range(env.N_DISCRETE_ACTIONS)])
36 |
37 | action_offset = torch.FloatTensor(action_offset.reshape(1, -1)).to(device)
38 | action_clip_low = np.array([0 for x in range(env.N_DISCRETE_ACTIONS)])
39 | action_clip_high = np.array([1 for x in range(env.N_DISCRETE_ACTIONS)])
40 |
41 | # state bounds and offset
42 | # state_bounds_np = np.array([0.5, 0.5e7])
43 | state_bounds_np = np.array([1, 1e7])
44 | state_bounds = torch.FloatTensor(state_bounds_np.reshape(1, -1)).to(device)
45 | # state_offset = np.array([0.5, 0.5e7])
46 | state_offset = np.array([0, 0])
47 | state_offset = torch.FloatTensor(state_offset.reshape(1, -1)).to(device)
48 | state_clip_low = np.array([0, 0])
49 | state_clip_high = np.array([1, 1e7])
50 |
51 | exploration_action_noise = np.array([ 0.013524454522609227])
52 | exploration_state_noise = np.array([0.2065791657801734, 4371.871955300335])
53 |
54 | goal_state=np.array([0.68, 23])
55 | threshold=[0.05, 3]
56 |
57 | # HAC parameters:
58 | k_level = 2 # num of levels in hierarchy
59 | H = 8 # time horizon to achieve subgoal
60 | lamda = 0.9759336249447662 # subgoal testing parameter
61 |
62 | # DDPG parameters:
63 | gamma = 0.9845965064662501 # discount factor for future rewards
64 | n_iter = 100 # update policy n_iter times in one DDPG update
65 | batch_size = 100 # num of transitions sampled from replay buffer
66 | lr = 0.061703036438267876
67 |
68 | # save trained models
69 | directory = "./preTrained/{}/{}level/".format(env_name, k_level)
70 | filename = "HAC_{}".format(env_name)
71 | #########################################################
72 |
73 |
74 | if random_seed:
75 | print("Random Seed: {}".format(random_seed))
76 | env.seed(random_seed)
77 | torch.manual_seed(random_seed)
78 | np.random.seed(random_seed)
79 |
80 | # creating HAC agent and setting parameters
81 | agent = HAC(k_level, H, state_dim, action_dim, render, threshold,
82 | action_bounds, action_offset, state_bounds, state_offset, lr, env.optimizer, env.layer_dim, env.n_layers)
83 |
84 | agent.set_parameters(lamda, gamma, action_clip_low, action_clip_high,
85 | state_clip_low, state_clip_high, exploration_action_noise, exploration_state_noise)
86 |
87 | # logging file:
88 | log_f = open("log.txt","w+")
89 |
90 | # training procedure
91 | R=0
92 | for i_episode in range(1, max_episodes+1):
93 | agent.reward = 0
94 | agent.timestep = 0
95 |
96 | state = env.reset()
97 | # collecting experience in environment
98 | last_state, done = agent.run_HAC(env, k_level-1, state, goal_state, False)
99 |
100 | if agent.check_goal(last_state, goal_state, threshold, env):
101 | print("################ Solved! ################ ")
102 | name = filename + '_solved'
103 | agent.save(directory, name)
104 |
105 | # update all levels
106 | agent.update(n_iter, batch_size, env)
107 |
108 | # logging updates:
109 | log_f.write('{},{}\n'.format(i_episode, agent.reward))
110 | log_f.flush()
111 |
112 | if i_episode % save_episode == 0:
113 | # if agent.reward>R:
114 | R=agent.reward
115 | agent.save(directory, filename)
116 | print('SAVING ################# SAVING ################## SAVING:',R)
117 |
118 | print("Episode: {}\t Reward: {}".format(i_episode, agent.reward))
119 |
120 |
121 | if __name__ == '__main__':
122 | train()
123 |
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/torchfem/materials.py:
--------------------------------------------------------------------------------
1 | from abc import ABC, abstractmethod
2 | from math import sqrt
3 | from typing import Callable
4 |
5 | import torch
6 | from torch import Tensor
7 |
8 |
9 | class Material(ABC):
10 | """Base class for material models."""
11 |
12 | @abstractmethod
13 | def __init__(self):
14 | self.n_state: int
15 | self.is_vectorized: bool
16 | self.C: Tensor
17 | pass
18 |
19 | @abstractmethod
20 | def vectorize(self, n_elem: int):
21 | """Create a vectorized copy of the material for `n_elm` elements."""
22 | pass
23 |
24 | @abstractmethod
25 | def step(self, depsilon: Tensor, epsilon: Tensor, sigma: Tensor, state: Tensor):
26 | """Perform a strain increment."""
27 | pass
28 |
29 | @abstractmethod
30 | def rotate(self, R):
31 | """Rotate the material with rotation matrix R."""
32 | pass
33 |
34 |
35 | class IsotropicElasticity3D(Material):
36 | def __init__(
37 | self,
38 | E: "float | Tensor",
39 | nu: "float | Tenso",
40 | eps0: "float | Tenso" = 0.0,
41 | ):
42 | # Convert float inputs to tensors
43 | if isinstance(E, float):
44 | E = torch.tensor(E)
45 | if isinstance(nu, float):
46 | nu = torch.tensor(nu)
47 | if isinstance(eps0, float):
48 | eps0 = torch.tensor(eps0)
49 |
50 | # Store material properties
51 | self.E = E
52 | self.nu = nu
53 | self.eps0 = eps0
54 |
55 | # There are no internal variables
56 | self.n_state = 0
57 |
58 | # Check if the material is vectorized
59 | if E.dim() > 0:
60 | self.is_vectorized = True
61 | else:
62 | self.is_vectorized = False
63 |
64 | # Lame parameters
65 | self.lbd = self.E * self.nu / ((1.0 + self.nu) * (1.0 - 2.0 * self.nu))
66 | self.G = self.E / (2.0 * (1.0 + self.nu))
67 |
68 | # Stiffness tensor
69 | z = torch.zeros_like(self.E)
70 | diag = self.lbd + 2.0 * self.G
71 | self.C = torch.stack(
72 | [
73 | torch.stack([diag, self.lbd, self.lbd, z, z, z], dim=-1),
74 | torch.stack([self.lbd, diag, self.lbd, z, z, z], dim=-1),
75 | torch.stack([self.lbd, self.lbd, diag, z, z, z], dim=-1),
76 | torch.stack([z, z, z, self.G, z, z], dim=-1),
77 | torch.stack([z, z, z, z, self.G, z], dim=-1),
78 | torch.stack([z, z, z, z, z, self.G], dim=-1),
79 | ],
80 | dim=-1,
81 | )
82 |
83 | # Stiffness tensor for shells
84 | self.Cs = torch.stack(
85 | [torch.stack([self.G, z], dim=-1), torch.stack([z, self.G], dim=-1)], dim=-1
86 | )
87 |
88 | def vectorize(self, n_elem: int):
89 | """Create a vectorized copy of the material for `n_elm` elements."""
90 | if self.is_vectorized:
91 | print("Material is already vectorized.")
92 | return self
93 | else:
94 | E = self.E.repeat(n_elem)
95 | nu = self.nu.repeat(n_elem)
96 | eps0 = self.eps0.repeat(n_elem)
97 | return IsotropicElasticity3D(E, nu, eps0)
98 |
99 | def step(self, depsilon: Tensor, epsilon: Tensor, sigma: Tensor, state: Tensor):
100 | """Perform a strain increment."""
101 | epsilon_new = epsilon + depsilon
102 | sigma_new = sigma + torch.einsum("...ij,...j->...i", self.C, depsilon)
103 | state_new = state
104 | ddsdde = self.C
105 | return epsilon_new, sigma_new, state_new, ddsdde
106 |
107 | def rotate(self, R: Tensor):
108 | """Rotate the material with rotation matrix R."""
109 | print("Rotating an isotropic material has no effect.")
110 | return self
111 |
112 |
113 | class IsotropicElasticityPlaneStress(IsotropicElasticity3D):
114 | """Isotropic 2D plane stress material."""
115 |
116 | def __init__(self, E: "float | Tensor", nu: "float | Tensor"):
117 | super().__init__(E, nu)
118 |
119 | # Overwrite the 3D stiffness tensor with a 2D plane stress tensor
120 | fac = self.E / (1.0 - self.nu**2)
121 | zero = torch.zeros_like(self.E)
122 | self.C = torch.stack(
123 | [
124 | torch.stack([fac, fac * self.nu, zero], dim=-1),
125 | torch.stack([fac * self.nu, fac, zero], dim=-1),
126 | torch.stack([zero, zero, fac * 0.5 * (1.0 - self.nu)], dim=-1),
127 | ],
128 | dim=-1,
129 | )
130 |
131 | def vectorize(self, n_elem: int):
132 | """Create a vectorized copy of the material for `n_elm` elements."""
133 | if self.is_vectorized:
134 | print("Material is already vectorized.")
135 | return self
136 | else:
137 | E = self.E.repeat(n_elem)
138 | nu = self.nu.repeat(n_elem)
139 | return IsotropicElasticityPlaneStress(E, nu)
140 |
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/train.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import gym
3 | import numpy as np
4 | from HAC import HAC
5 | import asset
6 |
7 |
8 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
9 |
10 | def train():
11 | #################### Hyperparameters ####################
12 | env_name ="T0-h-v1"
13 |
14 | save_episode = 100 # keep saving every n episodes
15 | max_episodes = 100_000 # max num of training episodes
16 | random_seed = 1
17 | render = False
18 |
19 | env = gym.make(env_name)
20 | env.layer_dim= 12
21 | env.n_layers= 14
22 | env.optimizer='RMSprop'
23 | state_dim = env.observation_space.shape[0]
24 | action_dim = env.N_DISCRETE_ACTIONS
25 |
26 | """
27 | Actions (both primitive and subgoal) are implemented as follows:
28 | action = ( network output (Tanh) * bounds ) + offset
29 | clip_high and clip_low bound the exploration noise
30 | """
31 |
32 | # primitive action bounds and offset
33 | action_bounds = env.action_space.high[0]
34 | # action_offset = np.array([0.5 for x in range(env.N_DISCRETE_ACTIONS)])
35 | action_offset = np.array([0.0 for x in range(env.N_DISCRETE_ACTIONS)])
36 |
37 | action_offset = torch.FloatTensor(action_offset.reshape(1, -1)).to(device)
38 | action_clip_low = np.array([0 for x in range(env.N_DISCRETE_ACTIONS)])
39 | action_clip_high = np.array([1 for x in range(env.N_DISCRETE_ACTIONS)])
40 |
41 | # state bounds and offset
42 | # state_bounds_np = np.array([0.5, 0.5e7])
43 | # state_bounds_np = np.array([1, 1e7])
44 | # state_bounds = torch.FloatTensor(state_bounds_np.reshape(1, -1)).to(device)
45 | state_bounds = env.observation_space.high[0]
46 | # state_offset = np.array([0.5, 0.5e7])
47 | state_offset = np.array([0 for x in range(env.N_DISCRETE_ACTIONS)])
48 | state_offset = torch.FloatTensor(state_offset.reshape(1, -1)).to(device)
49 | state_clip_low = np.array([0 for x in range(env.N_DISCRETE_ACTIONS)])
50 | state_clip_high = np.array([1 for x in range(env.N_DISCRETE_ACTIONS)])
51 |
52 | exploration_action_noise = np.array([0.5629988256824885])
53 | exploration_state_noise = np.array([0.1313567686964759])
54 |
55 | goal_ = [0.68, 30]
56 | goal_state = np.array(goal_ + [0] * (env.N_DISCRETE_ACTIONS - len(goal_)))
57 | threshold = [0.05, 3]
58 |
59 | # HAC parameters:
60 | k_level = 2 # num of levels in hierarchy
61 | H = 9 # time horizon to achieve subgoal
62 | lamda = 0.3453605248576358 # subgoal testing parameter
63 |
64 | # DDPG parameters:
65 | gamma = 0.9777965340075817 # discount factor for future rewards
66 | n_iter = 223 # update policy n_iter times in one DDPG update
67 | batch_size = 340 # num of transitions sampled from replay buffer
68 | lr = 0.04471490153909566
69 |
70 | # save trained models
71 | directory = "./preTrained/{}/{}level/".format(env_name, k_level)
72 | filename = "HAC_{}".format(env_name)
73 | #########################################################
74 |
75 | if random_seed:
76 | print("Random Seed: {}".format(random_seed))
77 | env.seed(random_seed)
78 | torch.manual_seed(random_seed)
79 | np.random.seed(random_seed)
80 |
81 | # creating HAC agent and setting parameters
82 | agent = HAC(k_level, H, state_dim, action_dim, render, threshold,
83 | action_bounds, action_offset, state_bounds, state_offset, lr, env.optimizer, env.layer_dim, env.n_layers)
84 |
85 | agent.set_parameters(lamda, gamma, action_clip_low, action_clip_high,
86 | state_clip_low, state_clip_high, exploration_action_noise, exploration_state_noise)
87 |
88 | # logging file:
89 | log_f = open("log.txt","w+")
90 |
91 | # training procedure
92 | R=0
93 | for i_episode in range(1, max_episodes+1):
94 | agent.reward = 0
95 | agent.timestep = 0
96 |
97 | state = env.reset()
98 | # collecting experience in environment
99 | last_state, done = agent.run_HAC(env, k_level-1, state, goal_state, False)
100 |
101 | if agent.check_goal(last_state, goal_state, threshold, env):
102 | print("################ Solved! ################ ")
103 | name = filename + '_solved'
104 | agent.save(directory, name)
105 |
106 | # update all levels
107 | agent.update(n_iter, batch_size, env)
108 |
109 | # logging updates:
110 | log_f.write('{},{}\n'.format(i_episode, agent.reward))
111 | log_f.flush()
112 | R += agent.reward
113 | if i_episode % save_episode == 0:
114 | agent.save(directory, filename)
115 | print('SAVING ################# SAVING ################## SAVING:', R/save_episode)
116 | R = 0
117 |
118 | print("Episode: {}\t Reward: {}".format(i_episode, agent.reward))
119 |
120 |
121 | if __name__ == '__main__':
122 | train()
123 |
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/train.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import gym
3 | import numpy as np
4 | from HAC import HAC
5 | import asset
6 |
7 |
8 | # device = torch.device("mps:0" if torch.mps.is_available() else "cpu")
9 | device = torch.device("cpu")
10 | print(device)
11 |
12 | def train():
13 | #################### Hyperparameters ####################
14 | env_name ="T0-h-v1"
15 |
16 | save_episode = 100 # keep saving every n episodes
17 | max_episodes = 100_000 # max num of training episodes
18 | random_seed = 1
19 | render = False
20 |
21 | env = gym.make(env_name)
22 | env.layer_dim= 12
23 | env.n_layers= 14
24 | env.optimizer='RMSprop'
25 | state_dim = env.observation_space.shape[0]
26 | action_dim = env.N_DISCRETE_ACTIONS
27 |
28 | """
29 | Actions (both primitive and subgoal) are implemented as follows:
30 | action = ( network output (Tanh) * bounds ) + offset
31 | clip_high and clip_low bound the exploration noise
32 | """
33 |
34 | # primitive action bounds and offset
35 | action_bounds = env.action_space.high[0]
36 | # action_offset = np.array([0.5 for x in range(env.N_DISCRETE_ACTIONS)])
37 | action_offset = np.array([0.0 for x in range(env.N_DISCRETE_ACTIONS)])
38 |
39 | action_offset = torch.FloatTensor(action_offset.reshape(1, -1)).to(device)
40 | action_clip_low = np.array([0 for x in range(env.N_DISCRETE_ACTIONS)])
41 | action_clip_high = np.array([1 for x in range(env.N_DISCRETE_ACTIONS)])
42 |
43 | # state bounds and offset
44 | # state_bounds_np = np.array([0.5, 0.5e7])
45 | # state_bounds_np = np.array([1, 1e7])
46 | # state_bounds = torch.FloatTensor(state_bounds_np.reshape(1, -1)).to(device)
47 | state_bounds = env.observation_space.high[0]
48 | # state_offset = np.array([0.5, 0.5e7])
49 | state_offset = np.array([0 for x in range(env.N_DISCRETE_ACTIONS)])
50 | state_offset = torch.FloatTensor(state_offset.reshape(1, -1)).to(device)
51 | state_clip_low = np.array([0 for x in range(env.N_DISCRETE_ACTIONS)])
52 | state_clip_high = np.array([1 for x in range(env.N_DISCRETE_ACTIONS)])
53 |
54 | exploration_action_noise = np.array([0.5629988256824885])
55 | exploration_state_noise = np.array([0.1313567686964759])
56 |
57 | goal_ = [0.68, 30]
58 | goal_state = np.array(goal_ + [0] * (env.N_DISCRETE_ACTIONS - len(goal_)))
59 | threshold = [0.05, 3]
60 |
61 | # HAC parameters:
62 | k_level = 2 # num of levels in hierarchy
63 | H = 9 # time horizon to achieve subgoal
64 | lamda = 0.3453605248576358 # subgoal testing parameter
65 |
66 | # DDPG parameters:
67 | gamma = 0.9777965340075817 # discount factor for future rewards
68 | n_iter = 223 # update policy n_iter times in one DDPG update
69 | batch_size = 340 # num of transitions sampled from replay buffer
70 | lr = 0.04471490153909566
71 |
72 | # save trained models
73 | directory = "./preTrained/{}/{}level/".format(env_name, k_level)
74 | filename = "HAC_{}".format(env_name)
75 | #########################################################
76 |
77 | if random_seed:
78 | print("Random Seed: {}".format(random_seed))
79 | env.seed(random_seed)
80 | torch.manual_seed(random_seed)
81 | np.random.seed(random_seed)
82 |
83 | # creating HAC agent and setting parameters
84 | agent = HAC(k_level, H, state_dim, action_dim, render, threshold,
85 | action_bounds, action_offset, state_bounds, state_offset, lr, env.optimizer, env.layer_dim, env.n_layers)
86 |
87 | agent.set_parameters(lamda, gamma, action_clip_low, action_clip_high,
88 | state_clip_low, state_clip_high, exploration_action_noise, exploration_state_noise)
89 |
90 | # logging file:
91 | log_f = open("log.txt","w+")
92 |
93 | # training procedure
94 | R=0
95 | for i_episode in range(1, max_episodes+1):
96 | agent.reward = 0
97 | agent.timestep = 0
98 |
99 | state = env.reset()
100 | # collecting experience in environment
101 | last_state, done = agent.run_HAC(env, k_level-1, state, goal_state, False)
102 |
103 | if agent.check_goal(last_state, goal_state, threshold, env):
104 | print("################ Solved! ################ ")
105 | name = filename + '_solved'
106 | agent.save(directory, name)
107 |
108 | # update all levels
109 | agent.update(n_iter, batch_size, env)
110 |
111 | # logging updates:
112 | log_f.write('{},{}\n'.format(i_episode, agent.reward))
113 | log_f.flush()
114 | R += agent.reward
115 | if i_episode % save_episode == 0:
116 | agent.save(directory, filename)
117 | print('SAVING ################# SAVING ################## SAVING:', R/save_episode)
118 | R = 0
119 |
120 | print("Episode: {}\t Reward: {}".format(i_episode, agent.reward))
121 |
122 |
123 | if __name__ == '__main__':
124 | train()
125 |
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/train.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import gym
3 | import numpy as np
4 | from HAC import HAC
5 | import asset
6 |
7 | # if torch.backends.mps.is_available():
8 | # mps_device = torch.device("mps:0")
9 | # x = torch.ones(1).type(torch.float32).to(mps_device)
10 | # print(x)
11 | # else:
12 | # print ("MPS device not found.")
13 |
14 |
15 | device = torch.device("mps:0" if torch.mps.is_available() else "cpu")
16 | print(device)
17 |
18 |
19 | def train():
20 | #################### Hyperparameters ####################
21 | env_name ="T0-h-v1"
22 |
23 | save_episode = 100 # keep saving every n episodes
24 | max_episodes = 50_000 # max num of training episodes # new line - reversed
25 | random_seed = 1
26 | render = False
27 |
28 | env = gym.make(env_name)
29 | env.layer_dim= 12
30 | env.n_layers= 16
31 | env.optimizer='SGD'
32 | state_dim = env.observation_space.shape[0]
33 | action_dim = env.N_DISCRETE_ACTIONS
34 |
35 | """
36 | Actions (both primitive and subgoal) are implemented as follows:
37 | action = ( network output (Tanh) * bounds ) + offset
38 | clip_high and clip_low bound the exploration noise
39 | """
40 |
41 | # primitive action bounds and offset
42 | action_bounds = env.action_space.high[0]
43 | # action_offset = np.array([0.5 for x in range(env.N_DISCRETE_ACTIONS)])
44 | action_offset = np.array([0.0 for x in range(env.N_DISCRETE_ACTIONS)])
45 |
46 | action_offset = torch.FloatTensor(action_offset.reshape(1, -1)).to(device)
47 | action_clip_low = np.array([0 for x in range(env.N_DISCRETE_ACTIONS)])
48 | action_clip_high = np.array([1 for x in range(env.N_DISCRETE_ACTIONS)])
49 |
50 | # state bounds and offset
51 | # state_bounds_np = np.array([0.5, 0.5e7])
52 | state_bounds_np = np.array([1, 1e7])
53 | state_bounds = torch.FloatTensor(state_bounds_np.reshape(1, -1)).to(device)
54 | # state_offset = np.array([0.5, 0.5e7])
55 | state_offset = np.array([0, 0])
56 | state_offset = torch.FloatTensor(state_offset.reshape(1, -1)).to(device)
57 | state_clip_low = np.array([0, 0])
58 | state_clip_high = np.array([1, 1e7])
59 |
60 | exploration_action_noise = np.array([0.024320378739607497])
61 | exploration_state_noise = np.array([ 0.14694893372824905, 19361.835172087693])
62 |
63 | goal_state = np.array([0.68, 20])
64 | threshold = [0.05, 5]
65 |
66 | # HAC parameters:
67 | k_level = 2 # num of levels in hierarchy
68 | H = 6 # time horizon to achieve subgoal
69 | lamda = 0.4337021542899802 # subgoal testing parameter
70 |
71 | # DDPG parameters:
72 | gamma = 0.9703997234344832 # discount factor for future rewards
73 | n_iter = 148 # update policy n_iter times in one DDPG update
74 | batch_size = 10000 # num of transitions sampled from replay buffer
75 | lr = 7.943448987978889e-05
76 |
77 | # save trained models
78 | directory = "./preTrained/{}/{}level/".format(env_name, k_level)
79 | filename = "HAC_{}".format(env_name)
80 | #########################################################
81 |
82 |
83 | if random_seed:
84 | print("Random Seed: {}".format(random_seed))
85 | env.seed(random_seed)
86 | torch.manual_seed(random_seed)
87 | np.random.seed(random_seed)
88 |
89 | # creating HAC agent and setting parameters
90 | agent = HAC(k_level, H, state_dim, action_dim, render, threshold,
91 | action_bounds, action_offset, state_bounds, state_offset, lr, env.optimizer, env.layer_dim, env.n_layers)
92 |
93 | agent.set_parameters(lamda, gamma, action_clip_low, action_clip_high,
94 | state_clip_low, state_clip_high, exploration_action_noise, exploration_state_noise)
95 |
96 | # logging file:
97 | log_f = open("log.txt","w+")
98 |
99 | # training procedure
100 | R=0
101 | for i_episode in range(1, max_episodes+1):
102 | agent.reward = 0
103 | agent.timestep = 0
104 |
105 | state = env.reset()
106 | # collecting experience in environment
107 | last_state, done = agent.run_HAC(env, k_level-1, state, goal_state, False)
108 |
109 | if agent.check_goal(last_state, goal_state, threshold, env):
110 | print("################ Solved! ################ ")
111 | name = filename + '_solved'
112 | agent.save(directory, name)
113 |
114 | # update all levels
115 | agent.update(n_iter, batch_size, env)
116 |
117 | # logging updates:
118 | log_f.write('{},{}\n'.format(i_episode, agent.reward))
119 | log_f.flush()
120 |
121 | if i_episode % save_episode == 0:
122 | # if agent.reward>R:
123 | R=agent.reward
124 | agent.save(directory, filename)
125 | print('SAVING ################# SAVING ################## SAVING:',R)
126 |
127 | print("Episode: {}\t Reward: {}".format(i_episode, agent.reward))
128 |
129 |
130 | if __name__ == '__main__':
131 | train()
132 |
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL/asset/topology_optimization.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | from utils import *
3 | import gym
4 | from gym import spaces
5 | import random
6 | import numpy as np
7 | import autograd.numpy as anp
8 | from gym.utils import seeding
9 |
10 |
11 | class Model:
12 | def __init__(self, x):
13 | self.flag_ = True
14 | # self.flag_ = False
15 | self.n, self.m = x.shape
16 | self.actions_dic={}
17 |
18 | k=0
19 | for i in range(self.n):
20 | for j in range(self.m):
21 | self.actions_dic[k]=(i,j)
22 | k+=1
23 |
24 | def action_space_(self, action, X):
25 | x,y=self.actions_dic[action]
26 | # penalty=(X[x][y]==1)
27 | X[x][y]=1
28 | # if penalty:
29 | # return 1e-7
30 | # return 0
31 |
32 | def draw(self,X):
33 | plt.figure(dpi=50)
34 | print('\nFinal Cantilever rl_beam design:')
35 | plt.imshow(X)
36 | plt.show(block=False)
37 | plt.pause(3)
38 | plt.close('all')
39 |
40 |
41 | class CantileverEnv(gym.Env):
42 |
43 | metadata = {"render.modes": ["human"],
44 | # 'video.frames_per_second' : 30
45 | }
46 |
47 | def __init__(self):
48 | super().__init__()
49 |
50 |
51 | self.rd=0
52 | self.args = get_args(*mbb_beam(rd=self.rd))
53 |
54 | DIM=self.args.nelx*self.args.nely+(self.args.nelx+1)*(self.args.nely+1)*2
55 | self.N_DISCRETE_ACTIONS=self.args.nelx*self.args.nely
56 |
57 | # self.action_space = spaces.Discrete(self.N_DISCRETE_ACTIONS)
58 |
59 | self.action_space = spaces.Box(low=0, high=1,
60 | shape=(self.N_DISCRETE_ACTIONS,), dtype=np.float64)
61 |
62 | self.low_state=np.array([0, 0])
63 | self.high_state=np.array([1, 1e7])
64 |
65 | self.observation_space = spaces.Box(low=self.low_state, high=self.high_state,
66 | dtype=np.float64)
67 |
68 |
69 | self.x = anp.ones((self.args.nely, self.args.nelx))*self.args.density
70 |
71 | self.M=Model(self.x)
72 |
73 | self.reward=0
74 | self.step_=0
75 | self.needs_reset = True
76 | self.y=np.array([1e-4, 1e7])
77 | self.layer_dim=4
78 | self.n_layers=2
79 | self.optimizer='Adam'
80 | self.seed()
81 |
82 | def seed(self, seed=None):
83 | self.np_random, seed = seeding.np_random(seed)
84 | return [seed]
85 |
86 | def step(self, action):
87 |
88 | # action=action*(1-self.x.reshape(len(action),)+1e-4)
89 | # when altering boundary conditions and forces, do not change action values in those cells
90 |
91 | # to give the agent an ability to do the same actions
92 | self.penalty_coeff= 0.3
93 | action=action*(1-self.penalty_coeff*self.x.reshape(len(action),))
94 |
95 | self.args = get_args(*mbb_beam(rd=self.rd))
96 |
97 | # print(action)
98 | act=np.argmax(action)
99 |
100 |
101 | self.M.action_space_(act, self.x)
102 |
103 | self.tmp, self.const = fast_stopt(self.args, self.x)
104 | self.step_+=1
105 |
106 | # self.reward = (1/self.tmp)**2 if self.const <0.7 else (1/self.tmp)**2-(self.const-0.7)
107 | self.reward = (1/self.tmp)**0.5
108 |
109 | # self.reward=(1/self.tmp+self.const**2)**0.5
110 | # self.reward=(self.const/self.tmp)**0.5
111 |
112 | # self.reward += (1/self.tmp)**2
113 | # self.reward =(1/self.tmp)**2 - penalty
114 | # self.reward =-(self.tmp)**0.1*1e-4 + self.const*1e-2 if self.const<0.75 else -(self.tmp)**0.1*1e-4 - self.const*1e-2
115 |
116 | done=False
117 |
118 | if self.const>0.68:
119 | # self.reward-=1
120 | done=True
121 |
122 | # if self.const>0.65 and 100 self.M.n*self.M.m:
127 | done = True
128 |
129 | if self.needs_reset:
130 | raise RuntimeError("Tried to step environment that needs reset")
131 |
132 |
133 | if done:
134 | self.needs_reset = True
135 |
136 | return np.array([self.const,self.tmp]), self.reward, done, dict()
137 |
138 | def reset(self):
139 |
140 | if not self.M.flag_:
141 | self.rd=random.choice([0,2,-2])
142 | else:
143 | self.rd=-1
144 |
145 | self.x = anp.ones((self.args.nely, self.args.nelx))*self.args.density
146 |
147 | self.reward=0
148 | self.needs_reset = False
149 | self.step_=0
150 |
151 | self.y=np.array([1e-4, 1e7])
152 | return self.y
153 |
154 |
155 | def render(self, mode="human"):
156 | self.M.draw(self.x)
157 |
158 | def close(self):
159 | pass
160 |
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/asset/topology_optimization.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | from utils import *
3 | import gym
4 | from gym import spaces
5 | import random
6 | import numpy as np
7 | import autograd.numpy as anp
8 | from gym.utils import seeding
9 |
10 |
11 | class Model:
12 | def __init__(self, x):
13 | self.flag_ = True
14 | # self.flag_ = False
15 | self.n, self.m = x.shape
16 | self.actions_dic={}
17 |
18 | k=0
19 | for i in range(self.n):
20 | for j in range(self.m):
21 | self.actions_dic[k]=(i,j)
22 | k+=1
23 |
24 | def action_space_(self, action, X):
25 | x,y=self.actions_dic[action]
26 | # penalty=(X[x][y]==1)
27 | X[x][y]=1
28 | # if penalty:
29 | # return 1e-7
30 | # return 0
31 |
32 | def draw(self,X):
33 | plt.figure(dpi=50)
34 | print('\nFinal Cantilever rl_beam design:')
35 | plt.imshow(X)
36 | plt.show(block=False)
37 | plt.pause(3)
38 | plt.close('all')
39 |
40 |
41 | class CantileverEnv(gym.Env):
42 |
43 | metadata = {"render.modes": ["human"],
44 | # 'video.frames_per_second' : 30
45 | }
46 |
47 | def __init__(self):
48 | super().__init__()
49 |
50 |
51 | self.rd=0
52 | self.args = get_args(*mbb_beam(rd=self.rd))
53 |
54 | DIM=self.args.nelx*self.args.nely+(self.args.nelx+1)*(self.args.nely+1)*2
55 | self.N_DISCRETE_ACTIONS=self.args.nelx*self.args.nely
56 |
57 | # self.action_space = spaces.Discrete(self.N_DISCRETE_ACTIONS)
58 |
59 | self.action_space = spaces.Box(low=0, high=1,
60 | shape=(self.N_DISCRETE_ACTIONS,), dtype=np.float64)
61 |
62 | self.low_state=np.array([0, 0])
63 | self.high_state=np.array([1, 1e7])
64 |
65 | self.observation_space = spaces.Box(low=self.low_state, high=self.high_state,
66 | dtype=np.float64)
67 |
68 |
69 | self.x = anp.ones((self.args.nely, self.args.nelx))*self.args.density
70 |
71 | self.M=Model(self.x)
72 |
73 | self.reward=0
74 | self.step_=0
75 | self.needs_reset = True
76 | self.y=np.array([1e-4, 1e7])
77 | self.layer_dim=4
78 | self.n_layers=2
79 | self.optimizer='Adam'
80 | self.seed()
81 |
82 | def seed(self, seed=None):
83 | self.np_random, seed = seeding.np_random(seed)
84 | return [seed]
85 |
86 | def step(self, action):
87 |
88 | # action=action*(1-self.x.reshape(len(action),)+1e-4)
89 | # when altering boundary conditions and forces, do not change action values in those cells
90 |
91 | # to give the agent an ability to do the same actions
92 | self.penalty_coeff= 0.3
93 | action=action*(1-self.penalty_coeff*self.x.reshape(len(action),))
94 |
95 | self.args = get_args(*mbb_beam(rd=self.rd))
96 |
97 | # print(action)
98 | act=np.argmax(action)
99 |
100 |
101 | self.M.action_space_(act, self.x)
102 |
103 | self.tmp, self.const = fast_stopt(self.args, self.x)
104 | self.step_+=1
105 |
106 | # self.reward = (1/self.tmp)**2 if self.const <0.7 else (1/self.tmp)**2-(self.const-0.7)
107 | self.reward = (1/self.tmp)**0.5
108 |
109 | # self.reward=(1/self.tmp+self.const**2)**0.5
110 | # self.reward=(self.const/self.tmp)**0.5
111 |
112 | # self.reward += (1/self.tmp)**2
113 | # self.reward =(1/self.tmp)**2 - penalty
114 | # self.reward =-(self.tmp)**0.1*1e-4 + self.const*1e-2 if self.const<0.75 else -(self.tmp)**0.1*1e-4 - self.const*1e-2
115 |
116 | done=False
117 |
118 | if self.const>0.68:
119 | # self.reward-=1
120 | done=True
121 |
122 | # if self.const>0.65 and 100 self.M.n*self.M.m:
127 | done = True
128 |
129 | if self.needs_reset:
130 | raise RuntimeError("Tried to step environment that needs reset")
131 |
132 |
133 | if done:
134 | self.needs_reset = True
135 |
136 | return np.array([self.const,self.tmp]), self.reward, done, dict()
137 |
138 | def reset(self):
139 |
140 | if not self.M.flag_:
141 | self.rd=random.choice([0,2,-2])
142 | else:
143 | self.rd=-1
144 |
145 | self.x = anp.ones((self.args.nely, self.args.nelx))*self.args.density
146 |
147 | self.reward=0
148 | self.needs_reset = False
149 | self.step_=0
150 |
151 | self.y=np.array([1e-4, 1e7])
152 | return self.y
153 |
154 |
155 | def render(self, mode="human"):
156 | self.M.draw(self.x)
157 |
158 | def close(self):
159 | pass
160 |
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/torchfem/sparse.py:
--------------------------------------------------------------------------------
1 | import pyamg
2 | import torch
3 | from scipy.sparse import coo_matrix as scipy_coo_matrix
4 | from scipy.sparse.linalg import minres as scipy_minres
5 | from scipy.sparse.linalg import spsolve as scipy_spsolve
6 | from torch import Tensor
7 | from torch.autograd import Function
8 |
9 | try:
10 | # TODO - rewrite these functions to Metal or pytorch
11 | import cupy
12 | from cupyx.scipy.sparse import coo_matrix as cupy_coo_matrix
13 | from cupyx.scipy.sparse import diags as cupy_diags
14 | from cupyx.scipy.sparse.linalg import minres as cupy_minres
15 | from cupyx.scipy.sparse.linalg import spsolve as cupy_spsolve
16 |
17 | mps_available = True
18 | except ImportError:
19 | mps_available = False
20 |
21 | print(mps_available)
22 |
23 |
24 | class Solve(Function):
25 | """
26 | Inspired by
27 | - https://blog.flaport.net/solving-sparse-linear-systems-in-pytorch.html
28 | - https://github.com/pytorch/pytorch/issues/69538
29 | - https://github.com/cai4cai/torchsparsegradutils
30 | """
31 |
32 | @staticmethod
33 | def forward(A, b, B=None, rtol=1e-10, device=None, direct=None, M=None):
34 | # Check the input shape
35 | if A.ndim != 2 or (A.shape[0] != A.shape[1]):
36 | raise ValueError("A should be a square 2D matrix.")
37 | shape = A.size()
38 |
39 | # Move to requested device, if available
40 | if device is not None:
41 | A = A.to(device)
42 | b = b.to(device)
43 |
44 | # Default to direct solver for small matrices
45 | if direct is not None:
46 | direct = shape[0] < 10000
47 |
48 | if A.device.type == "mps" and mps_available:
49 | A_cp = cupy_coo_matrix(
50 | (
51 | cupy.asarray(A._values()),
52 | (cupy.asarray(A._indices()[0]), cupy.asarray(A._indices()[1])),
53 | ),
54 | shape=shape,
55 | ).tocsr()
56 | b_cp = cupy.asarray(b.data)
57 | if direct:
58 | x_xp = cupy_spsolve(A_cp, b_cp)
59 | else:
60 | # Jacobi preconditioner
61 | M = cupy_diags(1.0 / A_cp.diagonal())
62 | # Solve with minres
63 | x_xp, exit_code = cupy_minres(A_cp, b_cp, M=M, tol=rtol)
64 | if exit_code != 0:
65 | raise RuntimeError(f"minres failed with exit code {exit_code}")
66 | else:
67 | A_np = scipy_coo_matrix(
68 | (A._values(), (A._indices()[0], A._indices()[1])), shape=shape
69 | ).tocsr()
70 | b_np = b.data.numpy()
71 | if B is None:
72 | B_np = None
73 | else:
74 | B_np = B.data.numpy()
75 | if direct:
76 | x_xp = scipy_spsolve(A_np, b_np)
77 | else:
78 | # AMG preconditioner with Jacobi smoother
79 | if M is None:
80 | ml = pyamg.smoothed_aggregation_solver(A_np, B_np, smooth="jacobi")
81 | M = ml.aspreconditioner()
82 |
83 | # Solve with minres
84 | x_xp, exit_code = scipy_minres(A_np, b_np, M=M, rtol=rtol)
85 | if exit_code != 0:
86 | raise RuntimeError(f"minres failed with exit code {exit_code}")
87 |
88 | # Convert back to torch
89 | x = torch.tensor(x_xp, requires_grad=True, dtype=b.dtype, device=b.device)
90 |
91 | return x
92 |
93 | @staticmethod
94 | def backward(ctx, grad):
95 | # Access the saved variables
96 | A, x = ctx.saved_tensors
97 |
98 | # Backprop rule: gradb = A^T @ grad
99 | gradb = Solve.apply(A.T, grad, ctx.B, ctx.rtol, ctx.device, ctx.direct, ctx.M)
100 |
101 | # Backprop rule: gradA = -gradb @ x^T, sparse version
102 | row = A._indices()[0, :]
103 | col = A._indices()[1, :]
104 | val = -gradb[row] * x[col]
105 | gradA = torch.sparse_coo_tensor(torch.stack([row, col]), val, A.shape)
106 |
107 | return gradA, gradb, None, None, None, None, None
108 |
109 | @staticmethod
110 | def setup_context(ctx, inputs, output):
111 | A, b, B, rtol, device, direct, M = inputs
112 | x = output
113 | ctx.save_for_backward(A, x)
114 |
115 | # Save the parameters for backward pass (including the preconditioner)
116 | ctx.rtol = rtol
117 | ctx.device = device
118 | ctx.direct = direct
119 | ctx.B = B
120 | ctx.M = M
121 |
122 |
123 | sparse_solve = Solve.apply
124 |
125 |
126 | def sparse_index_select(t: Tensor, slices: list["Tensor | None"]) -> Tensor:
127 | coalesced = t.is_coalesced()
128 | indices = t.indices()
129 | values = t.values()
130 | in_shape = t.shape
131 | out_shape = []
132 | for dim, slice in enumerate(slices):
133 | if slice is None:
134 | out_shape.append(in_shape[dim])
135 | else:
136 | out_shape.append(len(slice))
137 | mask = torch.isin(indices[dim], slice)
138 | cumsum = torch.cumsum(torch.isin(torch.arange(0, in_shape[dim]), slice), 0)
139 | indices = indices[:, mask]
140 | values = values[mask]
141 | indices[dim] = cumsum[indices[dim]] - 1
142 |
143 | return torch.sparse_coo_tensor(indices, values, out_shape, is_coalesced=coalesced)
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/asset/topology_optimization.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | from utils import *
3 | import gym
4 | from gym import spaces
5 | import random
6 | import numpy as np
7 | import autograd.numpy as anp
8 | from gym.utils import seeding
9 | import torch
10 |
11 | class Model:
12 | def __init__(self, x):
13 | self.flag_ = True
14 | # self.flag_ = False
15 | self.n, self.m = 5,5
16 | self.actions_dic={}
17 |
18 | k=0
19 | for i in range(self.n):
20 | for j in range(self.m):
21 | self.actions_dic[k]=(i,j)
22 | k+=1
23 |
24 | def action_space_(self, action, X):
25 | x,y=self.actions_dic[action]
26 | # penalty=(X[x][y]==1)
27 | X[x][y]=1
28 | # if penalty:
29 | # return 1e-7
30 | # return 0
31 |
32 | def draw(self,X):
33 | plt.figure(dpi=50)
34 | print('\nFinal Cantilever beam design:')
35 | plt.imshow(X)
36 | plt.show(block=False)
37 | plt.pause(3)
38 | plt.close('all')
39 |
40 |
41 | class CantileverEnv(gym.Env):
42 |
43 | metadata = {"render.modes": ["human"],
44 | # 'video.frames_per_second' : 30
45 | }
46 |
47 | def __init__(self, device="mps"):
48 | super().__init__()
49 |
50 | self.device = device
51 | self.rd=0
52 | # self.args = get_args(*mbb_beam(rd=self.rd))
53 | self.args=None
54 | # DIM=self.args.nelx*self.args.nely+(self.args.nelx+1)*(self.args.nely+1)*2
55 | DIM=25
56 | self.N_DISCRETE_ACTIONS=DIM
57 |
58 | # self.action_space = spaces.Discrete(self.N_DISCRETE_ACTIONS)
59 |
60 | self.action_space = spaces.Box(low=0, high=1,
61 | shape=(self.N_DISCRETE_ACTIONS,), dtype=np.float64)
62 |
63 | self.low_state=np.array([0, 0])
64 | self.high_state=np.array([1, 1e7])
65 |
66 | self.observation_space = spaces.Box(low=self.low_state, high=self.high_state,
67 | dtype=np.float64)
68 |
69 | # self.x = anp.ones((self.args.nely, self.args.nelx))*self.args.density
70 | self.x= torch.ones(25, requires_grad=True)*0.01
71 | self.M=Model(self.x)
72 |
73 | self.reward=0
74 | self.step_=0
75 | self.needs_reset = True
76 | self.y=np.array([1e-4, 1e7])
77 | self.layer_dim=4
78 | self.n_layers=2
79 | self.optimizer='Adam'
80 | self.seed()
81 |
82 | def seed(self, seed=None):
83 | self.np_random, seed = seeding.np_random(seed)
84 | return [seed]
85 |
86 | def step(self, action):
87 |
88 | # action=action*(1-self.x.reshape(len(action),)+1e-4)
89 | # when altering boundary conditions and forces, do not change action values in those cells
90 |
91 | # to give the agent an ability to do the same actions
92 | self.penalty_coeff = 0.3
93 | action = torch.Tensor(action)*torch.Tensor(1-self.penalty_coeff*self.x.reshape(len(action),))
94 | # action = action / np.sqrt(np.sum(action**2)) # new line
95 | # print(action)
96 | # self.args = get_args(*mbb_beam(rd=self.rd))
97 | self.args=None
98 | # print(action)
99 | # act=np.argmax(action)
100 |
101 |
102 | # self.M.action_space_(act, self.x)
103 | try:
104 | self.tmp, self.const = fast_stopt(self.args, torch.Tensor(action))
105 | except:
106 | pass
107 | self.step_+=1
108 | self.x = action.reshape(5,5)
109 | # self.reward = (1/self.tmp)**2 if self.const <0.7 else (1/self.tmp)**2-(self.const-0.7)
110 | self.reward -= torch.log(self.tmp) # new line
111 |
112 | # self.reward=(1/self.tmp+self.const**2)**0.5
113 | # self.reward=(self.const/self.tmp)**0.5
114 |
115 | # self.reward += (1/self.tmp)**2
116 | # self.reward =(1/self.tmp)**2 - penalty
117 | # self.reward =-(self.tmp)**0.1*1e-4 + self.const*1e-2 if self.const<0.75 else -(self.tmp)**0.1*1e-4 - self.const*1e-2
118 |
119 | done=False
120 |
121 | if self.const>0.68:
122 | # self.reward-=1
123 | done=True
124 |
125 | # if self.const>0.65 and 100 self.M.n*self.M.m:
130 | done = True
131 |
132 | if self.needs_reset:
133 | raise RuntimeError("Tried to step environment that needs reset")
134 |
135 |
136 | if done:
137 | self.needs_reset = True
138 | self.reward = 5000 # new line
139 |
140 | return np.array([self.const.detach().numpy() ,self.tmp.detach().numpy() ]), self.reward, done, dict()
141 |
142 | def reset(self):
143 |
144 | if not self.M.flag_:
145 | self.rd=random.choice([0,2,-2])
146 | else:
147 | self.rd=-1
148 |
149 | self.x = torch.ones(25, requires_grad=True)*0.01
150 |
151 | self.reward=0
152 | self.needs_reset = False
153 | self.step_=0
154 |
155 | self.y=np.array([1e-4, 1e7])
156 | return self.y
157 |
158 |
159 | def render(self, mode="human"):
160 | self.M.draw(self.x)
161 |
162 | def close(self):
163 | pass
164 |
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL/DDPG.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.optim as optim
4 | import numpy as np
5 |
6 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
7 |
8 |
9 | class extract_tensor(nn.Module):
10 | def forward(self,x):
11 | # Output shape (batch, features, hidden)
12 | tensor, _ = x
13 | # Reshape shape (batch, hidden)
14 | return tensor
15 |
16 |
17 | class Actor(nn.Module):
18 | def __init__(self, state_dim, action_dim, action_bounds, offset, layer_dim, n_layers):
19 | super(Actor, self).__init__()
20 |
21 | # actor
22 | in_features = state_dim + state_dim
23 |
24 | # layers = [nn.LSTM(input_size=in_features, hidden_size=layer_dim, num_layers=n_layers, batch_first=True)]
25 | # layers.append(extract_tensor())
26 |
27 | layers=[]
28 | # out_features = layer_dim
29 | for i in range(n_layers):
30 |
31 | # Suggest the number of units in each layer
32 | out_features = layer_dim
33 |
34 | # layers.append(nn.Linear(out_features, out_features))
35 | layers.append(nn.Linear(in_features, out_features))
36 | layers.append(nn.ReLU())
37 | layers.append(nn.Dropout(p=0.2))
38 |
39 | in_features = out_features
40 |
41 | # in_features = out_features
42 |
43 | layers.append(nn.Linear(in_features, action_dim))
44 | # layers.append(nn.Tanh())
45 | layers.append(nn.Softmax(dim=1))
46 | self.actor = nn.Sequential(*layers)
47 |
48 | # max value of actions
49 | self.action_bounds = action_bounds
50 | self.offset = offset
51 |
52 | def forward(self, state, goal):
53 | return (self.actor(torch.cat([state, goal], 1)) * self.action_bounds) + self.offset
54 |
55 | class Critic(nn.Module):
56 | def __init__(self, state_dim, action_dim, H, layer_dim, n_layers):
57 | super(Critic, self).__init__()
58 | # UVFA critic
59 | layers = []
60 |
61 |
62 | in_features = state_dim + action_dim + state_dim
63 |
64 | for i in range(n_layers):
65 |
66 | # Suggest the number of units in each layer
67 | out_features = layer_dim
68 |
69 | layers.append(nn.Linear(in_features, out_features))
70 | layers.append(nn.ReLU())
71 | layers.append(nn.Dropout(p=0.2))
72 |
73 | in_features = out_features
74 |
75 | layers.append(nn.Linear(in_features, 1))
76 | layers.append(nn.Sigmoid())
77 | self.critic = nn.Sequential(*layers)
78 |
79 | self.H = H
80 |
81 | def forward(self, state, action, goal):
82 | # rewards are in range [-H, 0]
83 | return -self.critic(torch.cat([state, action, goal], 1))* self.H
84 |
85 |
86 | class DDPG:
87 | def __init__(self, state_dim, action_dim, action_bounds, offset, lr, H, optimizer,layer_dim,n_layers):
88 |
89 | self.actor = Actor(state_dim, action_dim, action_bounds, offset, layer_dim,n_layers).to(device)
90 | self.actor_optimizer=getattr(optim, optimizer)(self.actor.parameters(), lr= lr)
91 | self.critic = Critic(state_dim, action_dim, H, layer_dim, n_layers).to(device)
92 | self.critic_optimizer=getattr(optim, optimizer)(self.critic.parameters(), lr= lr)
93 |
94 | self.mseLoss = torch.nn.MSELoss()
95 |
96 | def select_action(self, state, goal):
97 | state = torch.FloatTensor(state.reshape(1, -1)).to(device)
98 | goal = torch.FloatTensor(goal.reshape(1, -1)).to(device)
99 | return self.actor(state, goal).detach().cpu().data.numpy().flatten()
100 |
101 | def update(self, buffer, n_iter, batch_size,env):
102 |
103 |
104 | for i in range(n_iter):
105 | # Sample a batch of transitions from replay buffer:
106 | state, action, reward, next_state, goal, gamma, done = buffer.sample(batch_size)
107 |
108 | # convert np arrays into tensors
109 | state = torch.FloatTensor(state).to(device)
110 | action = torch.FloatTensor(action).to(device)
111 | reward = torch.FloatTensor(reward).reshape((batch_size,1)).to(device)
112 | next_state = torch.FloatTensor(next_state).to(device)
113 | goal = torch.FloatTensor(goal).to(device)
114 | gamma = torch.FloatTensor(gamma).reshape((batch_size,1)).to(device)
115 | done = torch.FloatTensor(done).reshape((batch_size,1)).to(device)
116 |
117 | # select next action
118 | next_action = self.actor(next_state, goal).detach()
119 |
120 | # Compute target Q-value:
121 | target_Q = self.critic(next_state, next_action, goal).detach()
122 | target_Q = reward + ((1-done) * gamma * target_Q)
123 |
124 |
125 | critic_loss = self.mseLoss(self.critic(state, action, goal), target_Q)
126 | self.critic_optimizer.zero_grad()
127 | critic_loss.backward()
128 | self.critic_optimizer.step()
129 |
130 | # Compute actor loss:
131 | actor_loss = -self.critic(state, self.actor(state, goal), goal).mean()
132 |
133 | # Optimize the actor
134 | self.actor_optimizer.zero_grad()
135 | actor_loss.backward()
136 | self.actor_optimizer.step()
137 |
138 |
139 | def save(self, directory, name):
140 | torch.save(self.actor.state_dict(), '%s/%s_actor.pth' % (directory, name))
141 | torch.save(self.critic.state_dict(), '%s/%s_crtic.pth' % (directory, name))
142 |
143 | def load(self, directory, name):
144 | self.actor.load_state_dict(torch.load('%s/%s_actor.pth' % (directory, name), map_location='cpu'))
145 | self.critic.load_state_dict(torch.load('%s/%s_crtic.pth' % (directory, name), map_location='cpu'))
146 |
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/DDPG.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.optim as optim
4 | import numpy as np
5 |
6 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
7 |
8 |
9 | class extract_tensor(nn.Module):
10 | def forward(self,x):
11 | # Output shape (batch, features, hidden)
12 | tensor, _ = x
13 | # Reshape shape (batch, hidden)
14 | return tensor
15 |
16 |
17 | class Actor(nn.Module):
18 | def __init__(self, state_dim, action_dim, action_bounds, offset, layer_dim, n_layers):
19 | super(Actor, self).__init__()
20 |
21 | # actor
22 | in_features = state_dim + state_dim
23 |
24 | # layers = [nn.LSTM(input_size=in_features, hidden_size=layer_dim, num_layers=n_layers, batch_first=True)]
25 | # layers.append(extract_tensor())
26 |
27 | layers=[]
28 | # out_features = layer_dim
29 | for i in range(n_layers):
30 |
31 | # Suggest the number of units in each layer
32 | out_features = layer_dim
33 |
34 | # layers.append(nn.Linear(out_features, out_features))
35 | layers.append(nn.Linear(in_features, out_features))
36 | layers.append(nn.ReLU())
37 | # layers.append(nn.Dropout(p=0.2))
38 |
39 | in_features = out_features
40 |
41 | # in_features = out_features
42 |
43 | layers.append(nn.Linear(in_features, action_dim))
44 | # layers.append(nn.Tanh())
45 | layers.append(nn.Softmax(dim=1))
46 | self.actor = nn.Sequential(*layers)
47 |
48 | # max value of actions
49 | self.action_bounds = action_bounds
50 | self.offset = offset
51 |
52 | def forward(self, state, goal):
53 | return (self.actor(torch.cat([state, goal], 1)) * self.action_bounds) + self.offset
54 |
55 | class Critic(nn.Module):
56 | def __init__(self, state_dim, action_dim, H, layer_dim, n_layers):
57 | super(Critic, self).__init__()
58 | # UVFA critic
59 | layers = []
60 |
61 |
62 | in_features = state_dim + action_dim + state_dim
63 |
64 | for i in range(n_layers):
65 |
66 | # Suggest the number of units in each layer
67 | out_features = layer_dim
68 |
69 | layers.append(nn.Linear(in_features, out_features))
70 | layers.append(nn.ReLU())
71 | # layers.append(nn.Dropout(p=0.2))
72 |
73 | in_features = out_features
74 |
75 | layers.append(nn.Linear(in_features, 1))
76 | layers.append(nn.Sigmoid())
77 | self.critic = nn.Sequential(*layers)
78 |
79 | self.H = H
80 |
81 | def forward(self, state, action, goal):
82 | # rewards are in range [-H, 0]
83 | return -self.critic(torch.cat([state, action, goal], 1))* self.H
84 |
85 |
86 | class DDPG:
87 | def __init__(self, state_dim, action_dim, action_bounds, offset, lr, H, optimizer,layer_dim,n_layers):
88 |
89 | self.actor = Actor(state_dim, action_dim, action_bounds, offset, layer_dim,n_layers).to(device)
90 | self.actor_optimizer=getattr(optim, optimizer)(self.actor.parameters(), lr= lr)
91 | self.critic = Critic(state_dim, action_dim, H, layer_dim, n_layers).to(device)
92 | self.critic_optimizer=getattr(optim, optimizer)(self.critic.parameters(), lr= lr)
93 |
94 | self.mseLoss = torch.nn.MSELoss()
95 |
96 | def select_action(self, state, goal):
97 | state = torch.FloatTensor(state.reshape(1, -1)).to(device)
98 | goal = torch.FloatTensor(goal.reshape(1, -1)).to(device)
99 | return self.actor(state, goal).detach().cpu().data.numpy().flatten()
100 |
101 | def update(self, buffer, n_iter, batch_size,env):
102 |
103 |
104 | for i in range(n_iter):
105 | # Sample a batch of transitions from replay buffer:
106 | state, action, reward, next_state, goal, gamma, done = buffer.sample(batch_size)
107 |
108 | # convert np arrays into tensors
109 | state = torch.FloatTensor(state).to(device)
110 | action = torch.FloatTensor(action).to(device)
111 | reward = torch.FloatTensor(reward).reshape((batch_size,1)).to(device)
112 | next_state = torch.FloatTensor(next_state).to(device)
113 | goal = torch.FloatTensor(goal).to(device)
114 | gamma = torch.FloatTensor(gamma).reshape((batch_size,1)).to(device)
115 | done = torch.FloatTensor(done).reshape((batch_size,1)).to(device)
116 |
117 | # select next action
118 | next_action = self.actor(next_state, goal).detach()
119 |
120 | # Compute target Q-value:
121 | target_Q = self.critic(next_state, next_action, goal).detach()
122 | target_Q = reward + ((1-done) * gamma * target_Q)
123 |
124 |
125 | critic_loss = self.mseLoss(self.critic(state, action, goal), target_Q)
126 | self.critic_optimizer.zero_grad()
127 | critic_loss.backward()
128 | self.critic_optimizer.step()
129 |
130 | # Compute actor loss:
131 | actor_loss = -self.critic(state, self.actor(state, goal), goal).mean()
132 |
133 | # Optimize the actor
134 | self.actor_optimizer.zero_grad()
135 | actor_loss.backward()
136 | self.actor_optimizer.step()
137 |
138 |
139 | def save(self, directory, name):
140 | torch.save(self.actor.state_dict(), '%s/%s_actor.pth' % (directory, name))
141 | torch.save(self.critic.state_dict(), '%s/%s_crtic.pth' % (directory, name))
142 |
143 | def load(self, directory, name):
144 | self.actor.load_state_dict(torch.load('%s/%s_actor.pth' % (directory, name), map_location='cpu'))
145 | self.critic.load_state_dict(torch.load('%s/%s_crtic.pth' % (directory, name), map_location='cpu'))
146 |
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/DDPG.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.optim as optim
4 | import numpy as np
5 |
6 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
7 |
8 |
9 | class extract_tensor(nn.Module):
10 | def forward(self,x):
11 | # Output shape (batch, features, hidden)
12 | tensor, _ = x
13 | # Reshape shape (batch, hidden)
14 | return tensor
15 |
16 |
17 | class Actor(nn.Module):
18 | def __init__(self, state_dim, action_dim, action_bounds, offset, layer_dim, n_layers):
19 | super(Actor, self).__init__()
20 |
21 | # actor
22 | in_features = state_dim + state_dim
23 |
24 | # layers = [nn.LSTM(input_size=in_features, hidden_size=layer_dim, num_layers=n_layers, batch_first=True)]
25 | # layers.append(extract_tensor())
26 |
27 | layers=[]
28 | # out_features = layer_dim
29 | for i in range(n_layers):
30 |
31 | # Suggest the number of units in each layer
32 | out_features = layer_dim
33 |
34 | # layers.append(nn.Linear(out_features, out_features))
35 | layers.append(nn.Linear(in_features, out_features))
36 | layers.append(nn.ReLU())
37 | layers.append(nn.Dropout(p=0.2))
38 |
39 | in_features = out_features
40 |
41 | # in_features = out_features
42 |
43 | # print(in_features, action_dim)
44 | layers.append(nn.Linear(in_features, action_dim))
45 | # layers.append(nn.Tanh())
46 | layers.append(nn.Softmax(dim=1))
47 | # print(layers)
48 | self.actor = nn.Sequential(*layers)
49 |
50 | # max value of actions
51 | self.action_bounds = action_bounds
52 | self.offset = offset
53 |
54 | def forward(self, state, goal):
55 | # print(state, goal,self.action_bounds,self.offset.shape)
56 | return (self.actor(torch.cat([state, goal], 1)) * self.action_bounds) + self.offset
57 |
58 |
59 | class Critic(nn.Module):
60 | def __init__(self, state_dim, action_dim, H, layer_dim, n_layers):
61 | super(Critic, self).__init__()
62 | # UVFA critic
63 | layers = []
64 |
65 | in_features = state_dim + action_dim + state_dim
66 |
67 | for i in range(n_layers):
68 |
69 | # Suggest the number of units in each layer
70 | out_features = layer_dim
71 |
72 | layers.append(nn.Linear(in_features, out_features))
73 | layers.append(nn.ReLU())
74 | layers.append(nn.Dropout(p=0.2))
75 |
76 | in_features = out_features
77 |
78 | layers.append(nn.Linear(in_features, 1))
79 | layers.append(nn.Sigmoid())
80 | self.critic = nn.Sequential(*layers)
81 |
82 | self.H = H
83 |
84 | def forward(self, state, action, goal):
85 | # rewards are in range [-H, 0]
86 | return -self.critic(torch.cat([state, action, goal], 1))* self.H
87 |
88 |
89 | class DDPG:
90 | def __init__(self, state_dim, action_dim, action_bounds, offset, lr, H, optimizer,layer_dim,n_layers):
91 |
92 | self.actor = Actor(state_dim, action_dim, action_bounds, offset, layer_dim,n_layers).to(device)
93 | self.actor_optimizer=getattr(optim, optimizer)(self.actor.parameters(), lr= lr)
94 | self.critic = Critic(state_dim, action_dim, H, layer_dim, n_layers).to(device)
95 | self.critic_optimizer=getattr(optim, optimizer)(self.critic.parameters(), lr= lr)
96 |
97 | self.mseLoss = torch.nn.MSELoss()
98 |
99 | def select_action(self, state, goal):
100 | state = torch.FloatTensor(state.reshape(1, -1)).to(device)
101 | goal = torch.FloatTensor(goal.reshape(1, -1)).to(device)
102 | return self.actor(state, goal).detach().cpu().data.numpy().flatten()
103 |
104 | def update(self, buffer, n_iter, batch_size,env):
105 |
106 | for i in range(n_iter):
107 | # Sample a batch of transitions from replay buffer:
108 | state, action, reward, next_state, goal, gamma, done = buffer.sample(batch_size)
109 |
110 | # convert np arrays into tensors
111 | state = torch.FloatTensor(state).to(device)
112 | action = torch.FloatTensor(action).to(device)
113 | reward = torch.FloatTensor(reward).reshape((batch_size,1)).to(device)
114 | next_state = torch.FloatTensor(next_state).to(device)
115 | goal = torch.FloatTensor(goal).to(device)
116 | gamma = torch.FloatTensor(gamma).reshape((batch_size,1)).to(device)
117 | done = torch.FloatTensor(done).reshape((batch_size,1)).to(device)
118 |
119 | # select next action
120 | next_action = self.actor(next_state, goal).detach()
121 |
122 | # Compute target Q-value:
123 | target_Q = self.critic(next_state, next_action, goal).detach()
124 | target_Q = reward + ((1-done) * gamma * target_Q)
125 |
126 | critic_loss = self.mseLoss(self.critic(state, action, goal), target_Q)
127 | self.critic_optimizer.zero_grad()
128 | critic_loss.backward()
129 | self.critic_optimizer.step()
130 |
131 | # Compute actor loss:
132 | actor_loss = -self.critic(state, self.actor(state, goal), goal).mean()
133 |
134 | # Optimize the actor
135 | self.actor_optimizer.zero_grad()
136 | actor_loss.backward()
137 | self.actor_optimizer.step()
138 |
139 | def save(self, directory, name):
140 | torch.save(self.actor.state_dict(), '%s/%s_actor.pth' % (directory, name))
141 | torch.save(self.critic.state_dict(), '%s/%s_crtic.pth' % (directory, name))
142 |
143 | def load(self, directory, name):
144 | self.actor.load_state_dict(torch.load('%s/%s_actor.pth' % (directory, name), map_location='cpu'))
145 | self.critic.load_state_dict(torch.load('%s/%s_crtic.pth' % (directory, name), map_location='cpu'))
146 |
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/DDPG.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.optim as optim
4 | import numpy as np
5 |
6 | # device = torch.device("mps:0" if torch.mps.is_available() else "cpu")
7 | device = torch.device("cpu")
8 | print(device)
9 |
10 | class extract_tensor(nn.Module):
11 | def forward(self,x):
12 | # Output shape (batch, features, hidden)
13 | tensor, _ = x
14 | # Reshape shape (batch, hidden)
15 | return tensor
16 |
17 |
18 | class Actor(nn.Module):
19 | def __init__(self, state_dim, action_dim, action_bounds, offset, layer_dim, n_layers):
20 | super(Actor, self).__init__()
21 |
22 | # actor
23 | in_features = state_dim + state_dim
24 |
25 | # layers = [nn.LSTM(input_size=in_features, hidden_size=layer_dim, num_layers=n_layers, batch_first=True)]
26 | # layers.append(extract_tensor())
27 |
28 | layers=[]
29 | # out_features = layer_dim
30 | for i in range(n_layers):
31 |
32 | # Suggest the number of units in each layer
33 | out_features = layer_dim
34 |
35 | # layers.append(nn.Linear(out_features, out_features))
36 | layers.append(nn.Linear(in_features, out_features))
37 | layers.append(nn.ReLU())
38 | layers.append(nn.Dropout(p=0.2))
39 |
40 | in_features = out_features
41 |
42 | # in_features = out_features
43 |
44 | # print(in_features, action_dim)
45 | layers.append(nn.Linear(in_features, action_dim))
46 | # layers.append(nn.Tanh())
47 | layers.append(nn.Softmax(dim=1))
48 | # print(layers)
49 | self.actor = nn.Sequential(*layers)
50 |
51 | # max value of actions
52 | self.action_bounds = action_bounds
53 | self.offset = offset
54 |
55 | def forward(self, state, goal):
56 | # print(state, goal,self.action_bounds,self.offset.shape)
57 | return (self.actor(torch.cat([state, goal], 1)) * self.action_bounds) + self.offset
58 |
59 |
60 | class Critic(nn.Module):
61 | def __init__(self, state_dim, action_dim, H, layer_dim, n_layers):
62 | super(Critic, self).__init__()
63 | # UVFA critic
64 | layers = []
65 |
66 | in_features = state_dim + action_dim + state_dim
67 |
68 | for i in range(n_layers):
69 |
70 | # Suggest the number of units in each layer
71 | out_features = layer_dim
72 |
73 | layers.append(nn.Linear(in_features, out_features))
74 | layers.append(nn.ReLU())
75 | layers.append(nn.Dropout(p=0.2))
76 |
77 | in_features = out_features
78 |
79 | layers.append(nn.Linear(in_features, 1))
80 | layers.append(nn.Sigmoid())
81 | self.critic = nn.Sequential(*layers)
82 |
83 | self.H = H
84 |
85 | def forward(self, state, action, goal):
86 | # rewards are in range [-H, 0]
87 | return -self.critic(torch.cat([state, action, goal], 1))* self.H
88 |
89 |
90 | class DDPG:
91 | def __init__(self, state_dim, action_dim, action_bounds, offset, lr, H, optimizer,layer_dim,n_layers):
92 |
93 | self.actor = Actor(state_dim, action_dim, action_bounds, offset, layer_dim,n_layers).to(device)
94 | self.actor_optimizer=getattr(optim, optimizer)(self.actor.parameters(), lr= lr)
95 | self.critic = Critic(state_dim, action_dim, H, layer_dim, n_layers).to(device)
96 | self.critic_optimizer=getattr(optim, optimizer)(self.critic.parameters(), lr= lr)
97 |
98 | self.mseLoss = torch.nn.MSELoss()
99 |
100 | def select_action(self, state, goal):
101 | state = torch.FloatTensor(state.reshape(1, -1)).to(device)
102 | goal = torch.FloatTensor(goal.reshape(1, -1)).to(device)
103 | return self.actor(state, goal).detach().cpu().data.numpy().flatten()
104 |
105 | def update(self, buffer, n_iter, batch_size,env):
106 |
107 | for i in range(n_iter):
108 | # Sample a batch of transitions from replay buffer:
109 | state, action, reward, next_state, goal, gamma, done = buffer.sample(batch_size)
110 |
111 | # convert np arrays into tensors
112 | state = torch.FloatTensor(state).to(device)
113 | action = torch.FloatTensor(action).to(device)
114 | reward = torch.FloatTensor(reward).reshape((batch_size,1)).to(device)
115 | next_state = torch.FloatTensor(next_state).to(device)
116 | goal = torch.FloatTensor(goal).to(device)
117 | gamma = torch.FloatTensor(gamma).reshape((batch_size,1)).to(device)
118 | done = torch.FloatTensor(done).reshape((batch_size,1)).to(device)
119 |
120 | # select next action
121 | next_action = self.actor(next_state, goal).detach()
122 |
123 | # Compute target Q-value:
124 | target_Q = self.critic(next_state, next_action, goal).detach()
125 | target_Q = reward + ((1-done) * gamma * target_Q)
126 |
127 | critic_loss = self.mseLoss(self.critic(state, action, goal), target_Q)
128 | self.critic_optimizer.zero_grad()
129 | critic_loss.backward()
130 | self.critic_optimizer.step()
131 |
132 | # Compute actor loss:
133 | actor_loss = -self.critic(state, self.actor(state, goal), goal).mean()
134 |
135 | # Optimize the actor
136 | self.actor_optimizer.zero_grad()
137 | actor_loss.backward()
138 | self.actor_optimizer.step()
139 |
140 | def save(self, directory, name):
141 | torch.save(self.actor.state_dict(), '%s/%s_actor.pth' % (directory, name))
142 | torch.save(self.critic.state_dict(), '%s/%s_crtic.pth' % (directory, name))
143 |
144 | def load(self, directory, name):
145 | self.actor.load_state_dict(torch.load('%s/%s_actor.pth' % (directory, name), map_location='cpu'))
146 | self.critic.load_state_dict(torch.load('%s/%s_crtic.pth' % (directory, name), map_location='cpu'))
147 |
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/DDPG.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.optim as optim
4 | import numpy as np
5 |
6 | device = torch.device("mps:0" if torch.mps.is_available() else "cpu")
7 | print(device)
8 |
9 |
10 | class extract_tensor(nn.Module):
11 | def forward(self,x):
12 | # Output shape (batch, features, hidden)
13 | tensor, _ = x
14 | # Reshape shape (batch, hidden)
15 | return tensor
16 |
17 |
18 | class Actor(nn.Module):
19 | def __init__(self, state_dim, action_dim, action_bounds, offset, layer_dim, n_layers):
20 | super(Actor, self).__init__()
21 |
22 | # actor
23 | in_features = state_dim + state_dim
24 |
25 | # layers = [nn.LSTM(input_size=in_features, hidden_size=layer_dim, num_layers=n_layers, batch_first=True)]
26 | # layers.append(extract_tensor())
27 |
28 | layers=[]
29 | # out_features = layer_dim
30 | for i in range(n_layers):
31 |
32 | # Suggest the number of units in each layer
33 | out_features = layer_dim
34 |
35 | # layers.append(nn.Linear(out_features, out_features))
36 | layers.append(nn.Linear(in_features, out_features))
37 | layers.append(nn.ReLU())
38 | layers.append(nn.Dropout(p=0.2))
39 |
40 | in_features = out_features
41 |
42 | # in_features = out_features
43 |
44 | layers.append(nn.Linear(in_features, action_dim))
45 | # layers.append(nn.Tanh())
46 | layers.append(nn.Softmax(dim=1))
47 | self.actor = nn.Sequential(*layers)
48 |
49 | # max value of actions
50 | self.action_bounds = action_bounds
51 | self.offset = offset
52 |
53 | def forward(self, state, goal):
54 | return (self.actor(torch.cat([state, goal], 1)) * self.action_bounds) + self.offset
55 |
56 | class Critic(nn.Module):
57 | def __init__(self, state_dim, action_dim, H, layer_dim, n_layers):
58 | super(Critic, self).__init__()
59 | # UVFA critic
60 | layers = []
61 |
62 |
63 | in_features = state_dim + action_dim + state_dim
64 |
65 | for i in range(n_layers):
66 |
67 | # Suggest the number of units in each layer
68 | out_features = layer_dim
69 |
70 | layers.append(nn.Linear(in_features, out_features))
71 | layers.append(nn.ReLU())
72 | layers.append(nn.Dropout(p=0.2))
73 |
74 | in_features = out_features
75 |
76 | layers.append(nn.Linear(in_features, 1))
77 | layers.append(nn.Sigmoid())
78 | self.critic = nn.Sequential(*layers)
79 |
80 | self.H = H
81 |
82 | def forward(self, state, action, goal):
83 | # rewards are in range [-H, 0]
84 | return -self.critic(torch.cat([state, action, goal], 1))* self.H
85 |
86 |
87 | class DDPG:
88 | def __init__(self, state_dim, action_dim, action_bounds, offset, lr, H, optimizer,layer_dim,n_layers):
89 |
90 | self.actor = Actor(state_dim, action_dim, action_bounds, offset, layer_dim,n_layers).type(torch.float32).to(device)
91 | self.actor_optimizer=getattr(optim, optimizer)(self.actor.parameters(), lr= lr)
92 | self.critic = Critic(state_dim, action_dim, H, layer_dim, n_layers).type(torch.float32).to(device)
93 | self.critic_optimizer=getattr(optim, optimizer)(self.critic.parameters(), lr= lr)
94 |
95 | self.mseLoss = torch.nn.MSELoss()
96 |
97 | def select_action(self, state, goal):
98 | state = torch.FloatTensor(state.reshape(1, -1)).type(torch.float32).to(device)
99 | goal = torch.FloatTensor(goal.reshape(1, -1)).type(torch.float32).to(device)
100 | return self.actor(state, goal).detach().cpu().data.numpy().flatten()
101 |
102 | def update(self, buffer, n_iter, batch_size,env):
103 |
104 |
105 | for i in range(n_iter):
106 | # Sample a batch of transitions from replay buffer:
107 | state, action, reward, next_state, goal, gamma, done = buffer.sample(batch_size)
108 |
109 | # convert np arrays into tensors
110 | state = torch.FloatTensor(state).type(torch.float32).to(device)
111 | action = torch.FloatTensor(action).type(torch.float32).to(device)
112 | reward = torch.FloatTensor(reward).reshape((batch_size,1)).type(torch.float32).to(device)
113 | next_state = torch.FloatTensor(next_state).type(torch.float32).to(device)
114 | goal = torch.FloatTensor(goal).type(torch.float32).to(device)
115 | gamma = torch.FloatTensor(gamma).reshape((batch_size,1)).type(torch.float32).to(device)
116 | done = torch.FloatTensor(done).reshape((batch_size,1)).type(torch.float32).to(device)
117 |
118 | # select next action
119 | next_action = self.actor(next_state, goal).detach()
120 |
121 | # Compute target Q-value:
122 | target_Q = self.critic(next_state, next_action, goal).detach()
123 | target_Q = reward + ((1-done) * gamma * target_Q)
124 |
125 |
126 | critic_loss = self.mseLoss(self.critic(state, action, goal), target_Q)
127 | self.critic_optimizer.zero_grad()
128 | critic_loss.backward()
129 | self.critic_optimizer.step()
130 |
131 | # Compute actor loss:
132 | actor_loss = -self.critic(state, self.actor(state, goal), goal).mean()
133 |
134 | # Optimize the actor
135 | self.actor_optimizer.zero_grad()
136 | actor_loss.backward()
137 | self.actor_optimizer.step()
138 |
139 |
140 | def save(self, directory, name):
141 | torch.save(self.actor.state_dict(), '%s/%s_actor.pth' % (directory, name))
142 | torch.save(self.critic.state_dict(), '%s/%s_crtic.pth' % (directory, name))
143 |
144 | def load(self, directory, name):
145 | self.actor.load_state_dict(torch.load('%s/%s_actor.pth' % (directory, name), map_location='cpu'))
146 | self.critic.load_state_dict(torch.load('%s/%s_crtic.pth' % (directory, name), map_location='cpu'))
147 |
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL/hpo.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import gym
3 | import numpy as np
4 | from HAC import HAC
5 | import optuna
6 | from asset.topology_optimization import CantileverEnv
7 |
8 |
9 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
10 |
11 | # Check for HPO:
12 | # https://towardsdatascience.com/hyperparameter-tuning-of-neural-networks-with-optuna-and-pytorch-22e179efc837
13 |
14 | def train(params):
15 |
16 | #################### Hyperparameters ####################
17 | env_name ="T0-h-v1"
18 |
19 | save_episode = 20 # keep saving every n episodes
20 | # max_episodes = params['max_episodes'] # max num of training episodes
21 | max_episodes = 1_000
22 | random_seed = params['random_seed']
23 | # random_seed=False
24 | render = False
25 |
26 | env = gym.make(env_name)
27 | env.layer_dim=params['layer_dim']
28 | # env.layer_dim=3
29 | env.n_layers=params['n_layers']
30 | # env.n_layers=6
31 | env.optimizer=params['optimizer']
32 | # env.optimizer='SGD'
33 |
34 | state_dim = env.observation_space.shape[0]
35 | action_dim = env.N_DISCRETE_ACTIONS
36 |
37 | """
38 | Actions (both primitive and subgoal) are implemented as follows:
39 | action = ( network output (Tanh) * bounds ) + offset
40 | clip_high and clip_low bound the exploration noise
41 | """
42 |
43 | # primitive action bounds and offset
44 | action_bounds = env.action_space.high[0]
45 | # action_offset = np.array([0.5 for x in range(env.N_DISCRETE_ACTIONS)])
46 | action_offset = np.array([0.0 for x in range(env.N_DISCRETE_ACTIONS)])
47 |
48 | action_offset = torch.FloatTensor(action_offset.reshape(1, -1)).to(device)
49 | action_clip_low = np.array([0 for x in range(env.N_DISCRETE_ACTIONS)])
50 | action_clip_high = np.array([1 for x in range(env.N_DISCRETE_ACTIONS)])
51 |
52 | # state bounds and offset
53 | # state_bounds_np = np.array([0.5, 0.5e7])
54 | state_bounds_np = np.array([1, 1e7])
55 | state_bounds = torch.FloatTensor(state_bounds_np.reshape(1, -1)).to(device)
56 | # state_offset = np.array([0.5, 0.5e7])
57 | state_offset = np.array([0, 0])
58 | state_offset = torch.FloatTensor(state_offset.reshape(1, -1)).to(device)
59 | state_clip_low = np.array([0, 0])
60 | state_clip_high = np.array([1, 1e7])
61 |
62 | exploration_action_noise = np.array([params['action_noise']])
63 | exploration_state_noise = np.array([params['state_noise_1'], params['state_noise_2']])
64 |
65 | goal_state=np.array([0.68, 20])
66 | threshold=[0.05, 5]
67 |
68 | # HAC parameters:
69 | k_level = 2 # num of levels in hierarchy
70 | H = params['H'] # time horizon to achieve subgoal
71 | # H = 11
72 | lamda = params['lamda'] # subgoal testing parameter
73 | # lamda = 0.9453109199655714
74 |
75 | # DDPG parameters:
76 | gamma = params['gamma'] # discount factor for future rewards
77 | # gamma = 0.992256316386673
78 | n_iter = params['n_iter'] # update policy n_iter times in one DDPG update
79 | # n_iter = 186
80 | batch_size = params['batch_size'] # num of transitions sampled from replay buffer
81 | # batch_size =256
82 | lr = params['lr']
83 | # lr= 0.0032967527995782626
84 |
85 | # save trained models
86 | directory = "./preTrained/{}/{}level/".format(env_name, k_level)
87 | filename = "HAC_{}".format(env_name)
88 | #########################################################
89 |
90 |
91 | if random_seed:
92 | print("Random Seed: {}".format(random_seed))
93 | env.seed(random_seed)
94 | torch.manual_seed(random_seed)
95 | np.random.seed(random_seed)
96 |
97 | # creating HAC agent and setting parameters
98 | agent = HAC(k_level, H, state_dim, action_dim, render, threshold,
99 | action_bounds, action_offset, state_bounds, state_offset, lr, env.optimizer, env.layer_dim, env.n_layers)
100 |
101 | agent.set_parameters(lamda, gamma, action_clip_low, action_clip_high,
102 | state_clip_low, state_clip_high, exploration_action_noise, exploration_state_noise)
103 |
104 |
105 | # training procedure
106 | my_res=[]
107 | for i_episode in range(1, max_episodes+1):
108 | agent.reward = 0
109 | agent.timestep = 0
110 |
111 | state = env.reset()
112 | # collecting experience in environment
113 | last_state, done = agent.run_HAC(env, k_level-1, state, goal_state, False)
114 |
115 | agent.update(n_iter, batch_size, env)
116 |
117 | my_res.append(agent.reward)
118 |
119 | return np.mean(my_res)
120 |
121 | def objective(trial):
122 |
123 | params = {
124 | # 'max_episodes':trial.suggest_int("max_episodes", 1000, 1500),
125 | 'random_seed': trial.suggest_int("random_seed", 1, 5),
126 | 'layer_dim':trial.suggest_int("layer_dim", 2, 16),
127 | 'n_layers':trial.suggest_int("n_layers", 2, 16),
128 | 'optimizer': trial.suggest_categorical("optimizer", ["Adam",
129 | "RMSprop",
130 | "SGD"
131 | ]),
132 | 'action_noise':trial.suggest_loguniform('action_noise', 0.01, 1),
133 | 'state_noise_1': trial.suggest_loguniform('state_noise_1', 0.01, 1),
134 | 'state_noise_2': trial.suggest_loguniform('state_noise_2', 1000, 1e7),
135 | 'H': trial.suggest_int("H", 3, 16),
136 | 'lamda': trial.suggest_uniform('lamda', 0.3, 1),
137 | 'gamma': trial.suggest_uniform('gamma', 0.95, 0.999),
138 | 'n_iter': trial.suggest_int('n_iter', 50, 350),
139 | 'batch_size': trial.suggest_int('batch_size', 50, 350),
140 | 'lr': trial.suggest_loguniform('lr', 1e-5, 1)
141 |
142 | }
143 |
144 |
145 |
146 | rev = train(params)
147 |
148 | return rev
149 |
150 |
151 |
152 | study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler())
153 | study.optimize(objective, n_trials=300)
154 |
155 |
156 | best_trial = study.best_trial
157 |
158 | print()
159 |
160 | for key, value in best_trial.params.items():
161 | print("{}: {}".format(key, value))
162 |
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/hpo.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import gym
3 | import numpy as np
4 | from HAC import HAC
5 | import optuna
6 | from asset.topology_optimization import CantileverEnv
7 |
8 |
9 | device = torch.device("mps:0" if torch.mps.is_available() else "cpu")
10 | print(device)
11 |
12 | # Check for HPO:
13 | # https://towardsdatascience.com/hyperparameter-tuning-of-neural-networks-with-optuna-and-pytorch-22e179efc837
14 |
15 | def train(params):
16 |
17 | #################### Hyperparameters ####################
18 | env_name ="T0-h-v1"
19 |
20 | save_episode = 20 # keep saving every n episodes
21 | # max_episodes = params['max_episodes'] # max num of training episodes
22 | max_episodes = 1_000
23 | random_seed = params['random_seed']
24 | # random_seed=False
25 | render = False
26 |
27 | env = gym.make(env_name)
28 | env.layer_dim=params['layer_dim']
29 | # env.layer_dim=3
30 | env.n_layers=params['n_layers']
31 | # env.n_layers=6
32 | env.optimizer=params['optimizer']
33 | # env.optimizer='SGD'
34 |
35 | state_dim = env.observation_space.shape[0]
36 | action_dim = env.N_DISCRETE_ACTIONS
37 |
38 | """
39 | Actions (both primitive and subgoal) are implemented as follows:
40 | action = ( network output (Tanh) * bounds ) + offset
41 | clip_high and clip_low bound the exploration noise
42 | """
43 |
44 | # primitive action bounds and offset
45 | action_bounds = env.action_space.high[0]
46 | # action_offset = np.array([0.5 for x in range(env.N_DISCRETE_ACTIONS)])
47 | action_offset = np.array([0.0 for x in range(env.N_DISCRETE_ACTIONS)])
48 |
49 | action_offset = torch.FloatTensor(action_offset.reshape(1, -1)).to(device)
50 | action_clip_low = np.array([0 for x in range(env.N_DISCRETE_ACTIONS)])
51 | action_clip_high = np.array([1 for x in range(env.N_DISCRETE_ACTIONS)])
52 |
53 | # state bounds and offset
54 | # state_bounds_np = np.array([0.5, 0.5e7])
55 | state_bounds_np = np.array([1, 1e7])
56 | state_bounds = torch.FloatTensor(state_bounds_np.reshape(1, -1)).to(device)
57 | # state_offset = np.array([0.5, 0.5e7])
58 | state_offset = np.array([0, 0])
59 | state_offset = torch.FloatTensor(state_offset.reshape(1, -1)).to(device)
60 | state_clip_low = np.array([0, 0])
61 | state_clip_high = np.array([1, 1e7])
62 |
63 | exploration_action_noise = np.array([params['action_noise']])
64 | exploration_state_noise = np.array([params['state_noise_1'], params['state_noise_2']])
65 |
66 | goal_state=np.array([0.68, 20])
67 | threshold=[0.05, 5]
68 |
69 | # HAC parameters:
70 | k_level = 2 # num of levels in hierarchy
71 | H = params['H'] # time horizon to achieve subgoal
72 | # H = 11
73 | lamda = params['lamda'] # subgoal testing parameter
74 | # lamda = 0.9453109199655714
75 |
76 | # DDPG parameters:
77 | gamma = params['gamma'] # discount factor for future rewards
78 | # gamma = 0.992256316386673
79 | n_iter = params['n_iter'] # update policy n_iter times in one DDPG update
80 | # n_iter = 186
81 | batch_size = params['batch_size'] # num of transitions sampled from replay buffer
82 | # batch_size =256
83 | lr = params['lr']
84 | # lr= 0.0032967527995782626
85 |
86 | # save trained models
87 | directory = "./preTrained/{}/{}level/".format(env_name, k_level)
88 | filename = "HAC_{}".format(env_name)
89 | #########################################################
90 |
91 |
92 | if random_seed:
93 | print("Random Seed: {}".format(random_seed))
94 | env.seed(random_seed)
95 | torch.manual_seed(random_seed)
96 | np.random.seed(random_seed)
97 |
98 | # creating HAC agent and setting parameters
99 | agent = HAC(k_level, H, state_dim, action_dim, render, threshold,
100 | action_bounds, action_offset, state_bounds, state_offset, lr, env.optimizer, env.layer_dim, env.n_layers)
101 |
102 | agent.set_parameters(lamda, gamma, action_clip_low, action_clip_high,
103 | state_clip_low, state_clip_high, exploration_action_noise, exploration_state_noise)
104 |
105 |
106 | # training procedure
107 | my_res=[]
108 | for i_episode in range(1, max_episodes+1):
109 | agent.reward = 0
110 | agent.timestep = 0
111 |
112 | state = env.reset()
113 | # collecting experience in environment
114 | last_state, done = agent.run_HAC(env, k_level-1, state, goal_state, False)
115 |
116 | agent.update(n_iter, batch_size, env)
117 |
118 | my_res.append(agent.reward)
119 |
120 | return np.mean(my_res)
121 |
122 | def objective(trial):
123 |
124 | params = {
125 | # 'max_episodes':trial.suggest_int("max_episodes", 1000, 1500),
126 | 'random_seed': trial.suggest_int("random_seed", 1, 5),
127 | 'layer_dim':trial.suggest_int("layer_dim", 2, 16),
128 | 'n_layers':trial.suggest_int("n_layers", 2, 16),
129 | 'optimizer': trial.suggest_categorical("optimizer", ["Adam",
130 | "RMSprop",
131 | "SGD"
132 | ]),
133 | 'action_noise':trial.suggest_loguniform('action_noise', 0.01, 1),
134 | 'state_noise_1': trial.suggest_loguniform('state_noise_1', 0.01, 1),
135 | 'state_noise_2': trial.suggest_loguniform('state_noise_2', 1000, 1e7),
136 | 'H': trial.suggest_int("H", 3, 16),
137 | 'lamda': trial.suggest_uniform('lamda', 0.3, 1),
138 | 'gamma': trial.suggest_uniform('gamma', 0.95, 0.999),
139 | 'n_iter': trial.suggest_int('n_iter', 50, 350),
140 | 'batch_size': trial.suggest_int('batch_size', 50, 350),
141 | 'lr': trial.suggest_loguniform('lr', 1e-5, 1)
142 |
143 | }
144 |
145 |
146 |
147 | rev = train(params)
148 |
149 | return rev
150 |
151 |
152 |
153 | study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler())
154 | study.optimize(objective, n_trials=300)
155 |
156 |
157 | best_trial = study.best_trial
158 |
159 | print()
160 |
161 | for key, value in best_trial.params.items():
162 | print("{}: {}".format(key, value))
163 |
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/hpo.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import gym
3 | import numpy as np
4 | from HAC import HAC
5 | import optuna
6 | from asset.topology_optimization import CantileverEnv
7 |
8 |
9 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
10 |
11 | # Check for HPO:
12 | # https://towardsdatascience.com/hyperparameter-tuning-of-neural-networks-with-optuna-and-pytorch-22e179efc837
13 |
14 | def train(params):
15 |
16 | #################### Hyperparameters ####################
17 | env_name ="T0-h-v1"
18 |
19 | save_episode = 20 # keep saving every n episodes
20 | # max_episodes = params['max_episodes'] # max num of training episodes
21 | max_episodes = 1_000
22 | # random_seed = params['random_seed']
23 | random_seed = 42
24 | # random_seed=False
25 | render = False
26 |
27 | env = gym.make(env_name)
28 | env.layer_dim=params['layer_dim']
29 | # env.layer_dim=3
30 | env.n_layers=params['n_layers']
31 | # env.n_layers=6
32 | env.optimizer=params['optimizer']
33 | # env.optimizer='SGD'
34 |
35 | state_dim = env.observation_space.shape[0]
36 | action_dim = env.N_DISCRETE_ACTIONS
37 |
38 | """
39 | Actions (both primitive and subgoal) are implemented as follows:
40 | action = ( network output (Tanh) * bounds ) + offset
41 | clip_high and clip_low bound the exploration noise
42 | """
43 |
44 | # primitive action bounds and offset
45 | action_bounds = env.action_space.high[0]
46 | # action_offset = np.array([0.5 for x in range(env.N_DISCRETE_ACTIONS)])
47 | action_offset = np.array([0.0 for x in range(env.N_DISCRETE_ACTIONS)])
48 |
49 | action_offset = torch.FloatTensor(action_offset.reshape(1, -1)).to(device)
50 | action_clip_low = np.array([0 for x in range(env.N_DISCRETE_ACTIONS)])
51 | action_clip_high = np.array([1 for x in range(env.N_DISCRETE_ACTIONS)])
52 |
53 | # state bounds and offset
54 | # state_bounds_np = np.array([0.5, 0.5e7])
55 | state_bounds_np = np.array([1, 1e7])
56 | state_bounds = torch.FloatTensor(state_bounds_np.reshape(1, -1)).to(device)
57 | # state_offset = np.array([0.5, 0.5e7])
58 | state_offset = np.array([0, 0])
59 | state_offset = torch.FloatTensor(state_offset.reshape(1, -1)).to(device)
60 | state_clip_low = np.array([0, 0])
61 | state_clip_high = np.array([1, 1e7])
62 |
63 | exploration_action_noise = np.array([params['action_noise']])
64 | exploration_state_noise = np.array([params['state_noise_1'], params['state_noise_2']])
65 |
66 | goal_state=np.array([0.68, 20])
67 | threshold=[0.05, 5]
68 |
69 | # HAC parameters:
70 | k_level = 2 # num of levels in hierarchy
71 | # H = params['H'] # time horizon to achieve subgoal
72 | H = 7
73 | lamda = params['lamda'] # subgoal testing parameter
74 | # lamda = 0.9453109199655714
75 |
76 | # DDPG parameters:
77 | gamma = params['gamma'] # discount factor for future rewards
78 | # gamma = 0.992256316386673
79 | # n_iter = params['n_iter'] # update policy n_iter times in one DDPG update
80 | n_iter = 100
81 | # batch_size = params['batch_size'] # num of transitions sampled from replay buffer
82 | batch_size =100
83 | lr = params['lr']
84 | # lr= 0.0032967527995782626
85 |
86 | # save trained models
87 | directory = "./preTrained/{}/{}level/".format(env_name, k_level)
88 | filename = "HAC_{}".format(env_name)
89 | #########################################################
90 |
91 |
92 | if random_seed:
93 | print("Random Seed: {}".format(random_seed))
94 | env.seed(random_seed)
95 | torch.manual_seed(random_seed)
96 | np.random.seed(random_seed)
97 |
98 | # creating HAC agent and setting parameters
99 | agent = HAC(k_level, H, state_dim, action_dim, render, threshold,
100 | action_bounds, action_offset, state_bounds, state_offset, lr, env.optimizer, env.layer_dim, env.n_layers)
101 |
102 | agent.set_parameters(lamda, gamma, action_clip_low, action_clip_high,
103 | state_clip_low, state_clip_high, exploration_action_noise, exploration_state_noise)
104 |
105 |
106 | # training procedure
107 | my_res=[]
108 | for i_episode in range(1, max_episodes+1):
109 | agent.reward = 0
110 | agent.timestep = 0
111 |
112 | state = env.reset()
113 | # collecting experience in environment
114 | last_state, done = agent.run_HAC(env, k_level-1, state, goal_state, False)
115 |
116 | agent.update(n_iter, batch_size, env)
117 |
118 | my_res.append(agent.reward)
119 |
120 | return np.mean(my_res)
121 |
122 | def objective(trial):
123 |
124 | params = {
125 | # 'max_episodes':trial.suggest_int("max_episodes", 1000, 1500),
126 | # 'random_seed': trial.suggest_int("random_seed", 1, 5),
127 | 'layer_dim':trial.suggest_int("layer_dim", 2, 50),
128 | 'n_layers':trial.suggest_int("n_layers", 2, 50),
129 | 'optimizer': trial.suggest_categorical("optimizer", ["Adam",
130 | "RMSprop",
131 | "SGD"
132 | ]),
133 | 'action_noise':trial.suggest_loguniform('action_noise', 0.01, 1),
134 | 'state_noise_1': trial.suggest_loguniform('state_noise_1', 0.01, 1),
135 | 'state_noise_2': trial.suggest_loguniform('state_noise_2', 1000, 1e7),
136 | # 'H': trial.suggest_int("H", 3, 16),
137 | 'lamda': trial.suggest_uniform('lamda', 0.3, 1),
138 | 'gamma': trial.suggest_uniform('gamma', 0.95, 0.999),
139 | # 'n_iter': trial.suggest_int('n_iter', 50, 350),
140 | # 'batch_size': trial.suggest_int('batch_size', 50, 350),
141 | 'lr': trial.suggest_loguniform('lr', 1e-5, 1)
142 |
143 | }
144 |
145 |
146 |
147 | rev = train(params)
148 |
149 | return rev
150 |
151 |
152 |
153 | study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler())
154 | study.optimize(objective, n_trials=300)
155 |
156 |
157 | best_trial = study.best_trial
158 |
159 | print()
160 |
161 | for key, value in best_trial.params.items():
162 | print("{}: {}".format(key, value))
163 |
--------------------------------------------------------------------------------
/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/hpo.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import gym
3 | import numpy as np
4 | from HAC import HAC
5 | import optuna
6 | from asset.topology_optimization import CantileverEnv
7 |
8 |
9 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
10 |
11 | # Check for HPO:
12 | # https://towardsdatascience.com/hyperparameter-tuning-of-neural-networks-with-optuna-and-pytorch-22e179efc837
13 |
14 | def train(params):
15 |
16 | #################### Hyperparameters ####################
17 | env_name ="T0-h-v1"
18 |
19 | save_episode = 20 # keep saving every n episodes
20 | # max_episodes = params['max_episodes'] # max num of training episodes
21 | max_episodes = 10_000
22 | random_seed = 1
23 | # random_seed=False
24 | render = False
25 |
26 | env = gym.make(env_name)
27 | env.layer_dim = params['layer_dim']
28 | # env.layer_dim=3
29 | env.n_layers = params['n_layers']
30 | # env.n_layers=6
31 | env.optimizer = params['optimizer']
32 | # env.optimizer='SGD'
33 |
34 | state_dim = env.observation_space.shape[0]
35 | action_dim = env.N_DISCRETE_ACTIONS
36 |
37 | """
38 | Actions (both primitive and subgoal) are implemented as follows:
39 | action = ( network output (Tanh) * bounds ) + offset
40 | clip_high and clip_low bound the exploration noise
41 | """
42 |
43 | # primitive action bounds and offset
44 | action_bounds = env.action_space.high[0]
45 | # action_offset = np.array([0.5 for x in range(env.N_DISCRETE_ACTIONS)])
46 | action_offset = np.array([0.0 for x in range(env.N_DISCRETE_ACTIONS)])
47 |
48 | action_offset = torch.FloatTensor(action_offset.reshape(1, -1)).to(device)
49 | action_clip_low = np.array([0 for x in range(env.N_DISCRETE_ACTIONS)])
50 | action_clip_high = np.array([1 for x in range(env.N_DISCRETE_ACTIONS)])
51 |
52 | # state bounds and offset
53 | # state_bounds_np = np.array([0.5, 0.5e7])
54 | # state_bounds_np = np.array([1, 1e7])
55 | # state_bounds = torch.FloatTensor(state_bounds_np.reshape(1, -1)).to(device)
56 | state_bounds = env.observation_space.high[0]
57 | # state_offset = np.array([0.5, 0.5e7])
58 | state_offset = np.array([0 for x in range(env.N_DISCRETE_ACTIONS)])
59 | state_offset = torch.FloatTensor(state_offset.reshape(1, -1)).to(device)
60 | state_clip_low = np.array([0 for x in range(env.N_DISCRETE_ACTIONS)])
61 | state_clip_high = np.array([1 for x in range(env.N_DISCRETE_ACTIONS)])
62 |
63 | exploration_action_noise = np.array([params['action_noise']])
64 | exploration_state_noise = np.array([params['state_noise']])
65 |
66 | goal_ = [0.68, 30]
67 | goal_state = np.array(goal_ + [0] * (env.N_DISCRETE_ACTIONS - len(goal_)))
68 | threshold = [0.05, 3]
69 |
70 | # HAC parameters:
71 | k_level = 2 # num of levels in hierarchy
72 | H = 9 # time horizon to achieve subgoal
73 | # H = 11
74 | lamda = params['lamda'] # subgoal testing parameter
75 | # lamda = 0.9453109199655714
76 |
77 | # DDPG parameters:
78 | gamma = params['gamma'] # discount factor for future rewards
79 | # gamma = 0.992256316386673
80 | n_iter = params['n_iter'] # update policy n_iter times in one DDPG update
81 | # n_iter = 186
82 | batch_size = params['batch_size'] # num of transitions sampled from replay buffer
83 | # batch_size =256
84 | lr = params['lr']
85 | # lr= 0.0032967527995782626
86 |
87 | # save trained models
88 | directory = "./preTrained/{}/{}level/".format(env_name, k_level)
89 | filename = "HAC_{}".format(env_name)
90 | #########################################################
91 |
92 | if random_seed:
93 | print("Random Seed: {}".format(random_seed))
94 | env.seed(random_seed)
95 | torch.manual_seed(random_seed)
96 | np.random.seed(random_seed)
97 |
98 | # creating HAC agent and setting parameters
99 | agent = HAC(k_level, H, state_dim, action_dim, render, threshold,
100 | action_bounds, action_offset, state_bounds, state_offset, lr, env.optimizer, env.layer_dim,
101 | env.n_layers)
102 |
103 | agent.set_parameters(lamda, gamma, action_clip_low, action_clip_high,
104 | state_clip_low, state_clip_high, exploration_action_noise, exploration_state_noise)
105 |
106 |
107 | # training procedure
108 | my_res=[]
109 | for i_episode in range(1, max_episodes+1):
110 | agent.reward = 0
111 | agent.timestep = 0
112 |
113 | state = env.reset()
114 | # collecting experience in environment
115 | last_state, done = agent.run_HAC(env, k_level-1, state, goal_state, False)
116 |
117 | agent.update(n_iter, batch_size, env)
118 |
119 | my_res.append(agent.reward)
120 |
121 | return np.mean(my_res)
122 |
123 | def objective(trial):
124 |
125 | params = {
126 | # 'max_episodes':trial.suggest_int("max_episodes", 1000, 1500),
127 | # 'random_seed': trial.suggest_int("random_seed", 1, 5),
128 | 'layer_dim':trial.suggest_int("layer_dim", 2, 16),
129 | 'n_layers':trial.suggest_int("n_layers", 2, 16),
130 | 'optimizer': trial.suggest_categorical("optimizer", ["Adam",
131 | "RMSprop",
132 | "SGD"
133 | ]),
134 | 'action_noise':trial.suggest_loguniform('action_noise', 0.01, 1),
135 | 'state_noise': trial.suggest_loguniform('state_noise', 0.01, 1),
136 | # 'state_noise_2': trial.suggest_loguniform('state_noise_2', 1000, 1e7),
137 | # 'H': trial.suggest_int("H", 3, 16),
138 | 'lamda': trial.suggest_uniform('lamda', 0.3, 1),
139 | 'gamma': trial.suggest_uniform('gamma', 0.95, 0.999),
140 | 'n_iter': trial.suggest_int('n_iter', 50, 350),
141 | 'batch_size': trial.suggest_int('batch_size', 50, 350),
142 | 'lr': trial.suggest_loguniform('lr', 1e-5, 1)
143 |
144 | }
145 |
146 | rev = train(params)
147 |
148 | return rev
149 |
150 |
151 | study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler())
152 | study.optimize(objective, n_trials=100)
153 |
154 |
155 | best_trial = study.best_trial
156 |
157 | print()
158 |
159 | for key, value in best_trial.params.items():
160 | print("{}: {}".format(key, value))
161 |
--------------------------------------------------------------------------------