├── docs ├── CNAME ├── pdfs │ └── marine_v3.pdf └── web │ ├── pct │ ├── apple-icon.png │ ├── IMG_20190821_123143.jpg │ ├── schema at 20.38.17.png │ ├── schema1 at 20.44.48.png │ ├── photo_2021-12-24 13.09.10 (1).jpeg │ └── Screenshot 2023-11-03 at 20.27.43.png │ └── css-template.css ├── gigala ├── propulsion │ └── lazy_rocketeer │ │ ├── .gitignore │ │ ├── README.md │ │ ├── logic.py │ │ └── app.py └── topology │ ├── topology_optimiz │ ├── hierarchical_rl │ │ ├── HRL │ │ │ ├── requirements.txt │ │ │ ├── log.txt │ │ │ ├── asset │ │ │ │ ├── __init__.py │ │ │ │ └── topology_optimization.py │ │ │ ├── preTrained │ │ │ │ └── T0-h-v1 │ │ │ │ │ ├── 1level │ │ │ │ │ ├── HAC_T0-h-v1_level_0_actor.pth │ │ │ │ │ ├── HAC_T0-h-v1_level_0_crtic.pth │ │ │ │ │ ├── HAC_T0-h-v1_solved_level_0_actor.pth │ │ │ │ │ └── HAC_T0-h-v1_solved_level_0_crtic.pth │ │ │ │ │ ├── 2level │ │ │ │ │ ├── HAC_T0-h-v1_level_0_actor.pth │ │ │ │ │ ├── HAC_T0-h-v1_level_0_crtic.pth │ │ │ │ │ ├── HAC_T0-h-v1_level_1_actor.pth │ │ │ │ │ ├── HAC_T0-h-v1_level_1_crtic.pth │ │ │ │ │ ├── HAC_T0-h-v1_solved_level_0_actor.pth │ │ │ │ │ ├── HAC_T0-h-v1_solved_level_0_crtic.pth │ │ │ │ │ ├── HAC_T0-h-v1_solved_level_1_actor.pth │ │ │ │ │ └── HAC_T0-h-v1_solved_level_1_crtic.pth │ │ │ │ │ └── 3level │ │ │ │ │ ├── HAC_T0-h-v1_level_0_actor.pth │ │ │ │ │ ├── HAC_T0-h-v1_level_0_crtic.pth │ │ │ │ │ ├── HAC_T0-h-v1_level_1_actor.pth │ │ │ │ │ ├── HAC_T0-h-v1_level_1_crtic.pth │ │ │ │ │ ├── HAC_T0-h-v1_level_2_actor.pth │ │ │ │ │ ├── HAC_T0-h-v1_level_2_crtic.pth │ │ │ │ │ ├── HAC_T0-h-v1_solved_level_0_actor.pth │ │ │ │ │ ├── HAC_T0-h-v1_solved_level_0_crtic.pth │ │ │ │ │ ├── HAC_T0-h-v1_solved_level_1_actor.pth │ │ │ │ │ ├── HAC_T0-h-v1_solved_level_1_crtic.pth │ │ │ │ │ ├── HAC_T0-h-v1_solved_level_2_actor.pth │ │ │ │ │ └── HAC_T0-h-v1_solved_level_2_crtic.pth │ │ │ ├── LICENSE │ │ │ ├── README.md │ │ │ ├── test.py │ │ │ ├── train.py │ │ │ ├── DDPG.py │ │ │ └── hpo.py │ │ ├── HRL_without_dropout │ │ │ ├── requirements.txt │ │ │ ├── asset │ │ │ │ ├── __init__.py │ │ │ │ └── topology_optimization.py │ │ │ ├── preTrained │ │ │ │ └── T0-h-v1 │ │ │ │ │ ├── 1level │ │ │ │ │ ├── HAC_T0-h-v1_level_0_actor.pth │ │ │ │ │ ├── HAC_T0-h-v1_level_0_crtic.pth │ │ │ │ │ ├── HAC_T0-h-v1_solved_level_0_actor.pth │ │ │ │ │ └── HAC_T0-h-v1_solved_level_0_crtic.pth │ │ │ │ │ └── 3level │ │ │ │ │ ├── HAC_T0-h-v1_level_0_actor.pth │ │ │ │ │ ├── HAC_T0-h-v1_level_0_crtic.pth │ │ │ │ │ ├── HAC_T0-h-v1_level_1_actor.pth │ │ │ │ │ ├── HAC_T0-h-v1_level_1_crtic.pth │ │ │ │ │ ├── HAC_T0-h-v1_level_2_actor.pth │ │ │ │ │ ├── HAC_T0-h-v1_level_2_crtic.pth │ │ │ │ │ ├── HAC_T0-h-v1_solved_level_0_actor.pth │ │ │ │ │ ├── HAC_T0-h-v1_solved_level_0_crtic.pth │ │ │ │ │ ├── HAC_T0-h-v1_solved_level_1_actor.pth │ │ │ │ │ ├── HAC_T0-h-v1_solved_level_1_crtic.pth │ │ │ │ │ ├── HAC_T0-h-v1_solved_level_2_actor.pth │ │ │ │ │ └── HAC_T0-h-v1_solved_level_2_crtic.pth │ │ │ ├── log.txt │ │ │ ├── LICENSE │ │ │ ├── README.md │ │ │ ├── test.py │ │ │ ├── train.py │ │ │ ├── DDPG.py │ │ │ └── hpo.py │ │ └── hrl_draft │ │ │ ├── HRL_mps │ │ │ ├── requirements.txt │ │ │ ├── log.txt │ │ │ ├── asset │ │ │ │ ├── __init__.py │ │ │ │ └── topology_optimization.py │ │ │ ├── preTrained │ │ │ │ └── T0-h-v1 │ │ │ │ │ ├── 1level │ │ │ │ │ ├── HAC_T0-h-v1_level_0_actor.pth │ │ │ │ │ ├── HAC_T0-h-v1_level_0_crtic.pth │ │ │ │ │ ├── HAC_T0-h-v1_solved_level_0_actor.pth │ │ │ │ │ └── HAC_T0-h-v1_solved_level_0_crtic.pth │ │ │ │ │ ├── 2level │ │ │ │ │ ├── HAC_T0-h-v1_level_0_actor.pth │ │ │ │ │ ├── HAC_T0-h-v1_level_0_crtic.pth │ │ │ │ │ ├── HAC_T0-h-v1_level_1_actor.pth │ │ │ │ │ ├── HAC_T0-h-v1_level_1_crtic.pth │ │ │ │ │ ├── HAC_T0-h-v1_solved_level_0_actor.pth │ │ │ │ │ ├── HAC_T0-h-v1_solved_level_0_crtic.pth │ │ │ │ │ ├── HAC_T0-h-v1_solved_level_1_actor.pth │ │ │ │ │ └── HAC_T0-h-v1_solved_level_1_crtic.pth │ │ │ │ │ └── 3level │ │ │ │ │ ├── HAC_T0-h-v1_level_0_actor.pth │ │ │ │ │ ├── HAC_T0-h-v1_level_0_crtic.pth │ │ │ │ │ ├── HAC_T0-h-v1_level_1_actor.pth │ │ │ │ │ ├── HAC_T0-h-v1_level_1_crtic.pth │ │ │ │ │ ├── HAC_T0-h-v1_level_2_actor.pth │ │ │ │ │ ├── HAC_T0-h-v1_level_2_crtic.pth │ │ │ │ │ ├── HAC_T0-h-v1_solved_level_0_actor.pth │ │ │ │ │ ├── HAC_T0-h-v1_solved_level_0_crtic.pth │ │ │ │ │ ├── HAC_T0-h-v1_solved_level_1_actor.pth │ │ │ │ │ ├── HAC_T0-h-v1_solved_level_1_crtic.pth │ │ │ │ │ ├── HAC_T0-h-v1_solved_level_2_actor.pth │ │ │ │ │ └── HAC_T0-h-v1_solved_level_2_crtic.pth │ │ │ ├── LICENSE │ │ │ ├── README.md │ │ │ ├── test.py │ │ │ ├── torchfem │ │ │ │ ├── materials.py │ │ │ │ └── sparse.py │ │ │ ├── train.py │ │ │ ├── DDPG.py │ │ │ └── hpo.py │ │ │ ├── HRL_jax_mps │ │ │ ├── requirements.txt │ │ │ ├── asset │ │ │ │ ├── __init__.py │ │ │ │ └── topology_optimization.py │ │ │ ├── preTrained │ │ │ │ └── T0-h-v1 │ │ │ │ │ ├── 1level │ │ │ │ │ ├── HAC_T0-h-v1_level_0_actor.pth │ │ │ │ │ ├── HAC_T0-h-v1_level_0_crtic.pth │ │ │ │ │ ├── HAC_T0-h-v1_solved_level_0_actor.pth │ │ │ │ │ └── HAC_T0-h-v1_solved_level_0_crtic.pth │ │ │ │ │ ├── 2level │ │ │ │ │ ├── HAC_T0-h-v1_level_0_actor.pth │ │ │ │ │ ├── HAC_T0-h-v1_level_0_crtic.pth │ │ │ │ │ ├── HAC_T0-h-v1_level_1_actor.pth │ │ │ │ │ ├── HAC_T0-h-v1_level_1_crtic.pth │ │ │ │ │ ├── HAC_T0-h-v1_solved_level_0_actor.pth │ │ │ │ │ ├── HAC_T0-h-v1_solved_level_0_crtic.pth │ │ │ │ │ ├── HAC_T0-h-v1_solved_level_1_actor.pth │ │ │ │ │ └── HAC_T0-h-v1_solved_level_1_crtic.pth │ │ │ │ │ └── 3level │ │ │ │ │ ├── HAC_T0-h-v1_level_0_actor.pth │ │ │ │ │ ├── HAC_T0-h-v1_level_0_crtic.pth │ │ │ │ │ ├── HAC_T0-h-v1_level_1_actor.pth │ │ │ │ │ ├── HAC_T0-h-v1_level_1_crtic.pth │ │ │ │ │ ├── HAC_T0-h-v1_level_2_actor.pth │ │ │ │ │ ├── HAC_T0-h-v1_level_2_crtic.pth │ │ │ │ │ ├── HAC_T0-h-v1_solved_level_0_actor.pth │ │ │ │ │ ├── HAC_T0-h-v1_solved_level_0_crtic.pth │ │ │ │ │ ├── HAC_T0-h-v1_solved_level_1_actor.pth │ │ │ │ │ ├── HAC_T0-h-v1_solved_level_1_crtic.pth │ │ │ │ │ ├── HAC_T0-h-v1_solved_level_2_actor.pth │ │ │ │ │ └── HAC_T0-h-v1_solved_level_2_crtic.pth │ │ │ ├── LICENSE │ │ │ ├── README.md │ │ │ ├── log.txt │ │ │ ├── test.py │ │ │ ├── train.py │ │ │ └── DDPG.py │ │ │ └── HRL_without_the_trick │ │ │ ├── requirements.txt │ │ │ ├── asset │ │ │ ├── __init__.py │ │ │ └── topology_optimization.py │ │ │ ├── preTrained │ │ │ └── T0-h-v1 │ │ │ │ ├── 1level │ │ │ │ ├── HAC_T0-h-v1_level_0_actor.pth │ │ │ │ ├── HAC_T0-h-v1_level_0_crtic.pth │ │ │ │ ├── HAC_T0-h-v1_solved_level_0_actor.pth │ │ │ │ └── HAC_T0-h-v1_solved_level_0_crtic.pth │ │ │ │ ├── 2level │ │ │ │ ├── HAC_T0-h-v1_level_0_actor.pth │ │ │ │ ├── HAC_T0-h-v1_level_0_crtic.pth │ │ │ │ ├── HAC_T0-h-v1_level_1_actor.pth │ │ │ │ ├── HAC_T0-h-v1_level_1_crtic.pth │ │ │ │ ├── HAC_T0-h-v1_solved_level_0_actor.pth │ │ │ │ ├── HAC_T0-h-v1_solved_level_0_crtic.pth │ │ │ │ ├── HAC_T0-h-v1_solved_level_1_actor.pth │ │ │ │ └── HAC_T0-h-v1_solved_level_1_crtic.pth │ │ │ │ └── 3level │ │ │ │ ├── HAC_T0-h-v1_level_0_actor.pth │ │ │ │ ├── HAC_T0-h-v1_level_0_crtic.pth │ │ │ │ ├── HAC_T0-h-v1_level_1_actor.pth │ │ │ │ ├── HAC_T0-h-v1_level_1_crtic.pth │ │ │ │ ├── HAC_T0-h-v1_level_2_actor.pth │ │ │ │ ├── HAC_T0-h-v1_level_2_crtic.pth │ │ │ │ ├── HAC_T0-h-v1_solved_level_0_actor.pth │ │ │ │ ├── HAC_T0-h-v1_solved_level_0_crtic.pth │ │ │ │ ├── HAC_T0-h-v1_solved_level_1_actor.pth │ │ │ │ ├── HAC_T0-h-v1_solved_level_1_crtic.pth │ │ │ │ ├── HAC_T0-h-v1_solved_level_2_actor.pth │ │ │ │ └── HAC_T0-h-v1_solved_level_2_crtic.pth │ │ │ ├── LICENSE │ │ │ ├── README.md │ │ │ ├── test.py │ │ │ ├── train.py │ │ │ ├── DDPG.py │ │ │ └── hpo.py │ └── rl_beam │ │ └── sota │ │ └── my_animation.gif │ └── sizing_optimiz │ └── genetic │ └── gen.png ├── .github └── FUNDING.yml ├── .gitignore ├── CITATION.cff ├── LICENSE └── README.md /docs/CNAME: -------------------------------------------------------------------------------- 1 | gigala.io -------------------------------------------------------------------------------- /gigala/propulsion/lazy_rocketeer/.gitignore: -------------------------------------------------------------------------------- 1 | config.py -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | custom: ['https://www.paypal.me/gigatskhondia'] 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | .idea/ 3 | .ipynb_checkpoints 4 | .DS_Store -------------------------------------------------------------------------------- /docs/pdfs/marine_v3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/docs/pdfs/marine_v3.pdf -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL/requirements.txt: -------------------------------------------------------------------------------- 1 | gym 2 | torch 3 | pyglet 4 | six -------------------------------------------------------------------------------- /docs/web/pct/apple-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/docs/web/pct/apple-icon.png -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/requirements.txt: -------------------------------------------------------------------------------- 1 | gym 2 | torch 3 | pyglet 4 | six -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/requirements.txt: -------------------------------------------------------------------------------- 1 | gym 2 | torch 3 | pyglet 4 | six -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/requirements.txt: -------------------------------------------------------------------------------- 1 | gym 2 | torch 3 | pyglet 4 | six -------------------------------------------------------------------------------- /docs/web/pct/IMG_20190821_123143.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/docs/web/pct/IMG_20190821_123143.jpg -------------------------------------------------------------------------------- /docs/web/pct/schema at 20.38.17.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/docs/web/pct/schema at 20.38.17.png -------------------------------------------------------------------------------- /docs/web/pct/schema1 at 20.44.48.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/docs/web/pct/schema1 at 20.44.48.png -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/log.txt: -------------------------------------------------------------------------------- 1 | 1,2163.2684115955485 2 | 2,2576.2679841110885 3 | -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/requirements.txt: -------------------------------------------------------------------------------- 1 | gym 2 | torch 3 | pyglet 4 | six -------------------------------------------------------------------------------- /gigala/topology/sizing_optimiz/genetic/gen.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/sizing_optimiz/genetic/gen.png -------------------------------------------------------------------------------- /docs/web/pct/photo_2021-12-24 13.09.10 (1).jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/docs/web/pct/photo_2021-12-24 13.09.10 (1).jpeg -------------------------------------------------------------------------------- /docs/web/pct/Screenshot 2023-11-03 at 20.27.43.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/docs/web/pct/Screenshot 2023-11-03 at 20.27.43.png -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL/log.txt: -------------------------------------------------------------------------------- 1 | 1,1.1094133879049637 2 | 2,1.0495237001048725 3 | 3,0.4074200268520841 4 | 4,0.5236868067443036 5 | -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/rl_beam/sota/my_animation.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/rl_beam/sota/my_animation.gif -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL/asset/__init__.py: -------------------------------------------------------------------------------- 1 | from asset.topology_optimization import CantileverEnv 2 | from gym.envs.registration import register 3 | 4 | 5 | register( 6 | id="T0-h-v1", 7 | entry_point="asset:CantileverEnv", 8 | ) 9 | -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/1level/HAC_T0-h-v1_level_0_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/1level/HAC_T0-h-v1_level_0_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/1level/HAC_T0-h-v1_level_0_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/1level/HAC_T0-h-v1_level_0_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_0_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_0_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_0_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_0_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_1_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_1_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_1_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_1_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_0_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_0_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_0_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_0_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_1_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_1_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_1_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_1_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_2_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_2_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_2_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_2_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/1level/HAC_T0-h-v1_solved_level_0_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/1level/HAC_T0-h-v1_solved_level_0_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/1level/HAC_T0-h-v1_solved_level_0_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/1level/HAC_T0-h-v1_solved_level_0_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_0_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_0_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_0_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_0_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_1_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_1_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_1_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_1_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_0_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_0_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_0_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_0_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_1_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_1_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_1_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_1_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_2_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_2_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_2_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_2_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/asset/__init__.py: -------------------------------------------------------------------------------- 1 | from asset.topology_optimization import CantileverEnv 2 | from gym.envs.registration import register 3 | 4 | 5 | register( 6 | id="T0-h-v1", 7 | entry_point="asset:CantileverEnv", 8 | ) 9 | -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/asset/__init__.py: -------------------------------------------------------------------------------- 1 | from asset.topology_optimization import CantileverEnv 2 | from gym.envs.registration import register 3 | 4 | 5 | register( 6 | id="T0-h-v1", 7 | entry_point="asset:CantileverEnv", 8 | ) 9 | -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/asset/__init__.py: -------------------------------------------------------------------------------- 1 | from asset.topology_optimization import CantileverEnv 2 | from gym.envs.registration import register 3 | 4 | 5 | register( 6 | id="T0-h-v1", 7 | entry_point="asset:CantileverEnv", 8 | ) 9 | -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/asset/__init__.py: -------------------------------------------------------------------------------- 1 | from asset.topology_optimization import CantileverEnv 2 | from gym.envs.registration import register 3 | 4 | 5 | register( 6 | id="T0-h-v1", 7 | entry_point="asset:CantileverEnv", 8 | ) 9 | -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/1level/HAC_T0-h-v1_level_0_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/1level/HAC_T0-h-v1_level_0_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/1level/HAC_T0-h-v1_level_0_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/1level/HAC_T0-h-v1_level_0_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_0_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_0_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_0_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_0_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_1_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_1_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_1_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_1_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_0_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_0_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_0_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_0_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_1_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_1_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_1_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_1_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_2_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_2_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_2_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_2_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/1level/HAC_T0-h-v1_level_0_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/1level/HAC_T0-h-v1_level_0_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/1level/HAC_T0-h-v1_level_0_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/1level/HAC_T0-h-v1_level_0_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_0_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_0_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_0_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_0_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_1_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_1_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_1_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_1_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_2_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_2_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_2_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_2_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/1level/HAC_T0-h-v1_level_0_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/1level/HAC_T0-h-v1_level_0_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/1level/HAC_T0-h-v1_level_0_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/1level/HAC_T0-h-v1_level_0_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_0_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_0_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_0_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_0_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_1_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_1_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_1_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_1_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_0_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_0_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_0_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_0_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_1_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_1_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_1_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_1_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_2_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_2_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_2_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_2_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/1level/HAC_T0-h-v1_solved_level_0_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/1level/HAC_T0-h-v1_solved_level_0_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/1level/HAC_T0-h-v1_solved_level_0_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/1level/HAC_T0-h-v1_solved_level_0_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_0_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_0_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_0_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_0_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_1_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_1_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_1_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_1_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_2_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_2_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_2_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_2_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/1level/HAC_T0-h-v1_solved_level_0_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/1level/HAC_T0-h-v1_solved_level_0_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/1level/HAC_T0-h-v1_solved_level_0_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/1level/HAC_T0-h-v1_solved_level_0_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_0_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_0_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_0_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_0_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_1_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_1_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_1_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_1_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_0_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_0_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_0_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_0_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_1_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_1_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_1_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_1_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_2_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_2_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_2_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_2_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/1level/HAC_T0-h-v1_solved_level_0_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/1level/HAC_T0-h-v1_solved_level_0_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/1level/HAC_T0-h-v1_solved_level_0_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/1level/HAC_T0-h-v1_solved_level_0_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_0_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_0_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_0_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_0_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_1_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_1_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_1_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_1_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_0_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_0_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_0_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_0_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_1_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_1_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_1_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_1_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_2_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_2_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_2_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_2_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/1level/HAC_T0-h-v1_level_0_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/1level/HAC_T0-h-v1_level_0_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/1level/HAC_T0-h-v1_level_0_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/1level/HAC_T0-h-v1_level_0_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_0_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_0_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_0_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_0_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_1_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_1_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_1_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/2level/HAC_T0-h-v1_level_1_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_0_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_0_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_0_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_0_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_1_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_1_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_1_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_1_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_2_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_2_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_2_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/3level/HAC_T0-h-v1_level_2_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/1level/HAC_T0-h-v1_solved_level_0_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/1level/HAC_T0-h-v1_solved_level_0_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/1level/HAC_T0-h-v1_solved_level_0_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/1level/HAC_T0-h-v1_solved_level_0_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_0_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_0_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_0_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_0_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_1_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_1_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_1_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/2level/HAC_T0-h-v1_solved_level_1_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_0_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_0_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_0_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_0_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_1_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_1_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_1_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_1_crtic.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_2_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_2_actor.pth -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_2_crtic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigatskhondia/gigala/HEAD/gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/preTrained/T0-h-v1/3level/HAC_T0-h-v1_solved_level_2_crtic.pth -------------------------------------------------------------------------------- /gigala/propulsion/lazy_rocketeer/README.md: -------------------------------------------------------------------------------- 1 | ### Lazy Rocketeer - LLM Agent to design rocket engines 2 | 3 | You can communicate in natural language with the agent. To run: *** streamlit run app.py *** 4 | 5 | Screenshot 2025-10-07 at 11 14 35 6 | 7 | -------------------------------------------------------------------------------- /docs/web/css-template.css: -------------------------------------------------------------------------------- 1 | .icon-list li::before { 2 | display: block; 3 | flex-shrink: 0; 4 | width: 1.5em; 5 | height: 1.5em; 6 | margin-right: .5rem; 7 | content: ""; 8 | background: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' fill='%23212529' viewBox='0 0 16 16'%3E%3Cpath d='M8 0a8 8 0 1 1 0 16A8 8 0 0 1 8 0zM4.5 7.5a.5.5 0 0 0 0 1h5.793l-2.147 2.146a.5.5 0 0 0 .708.708l3-3a.5.5 0 0 0 0-.708l-3-3a.5.5 0 1 0-.708.708L10.293 7.5H4.5z'/%3E%3C/svg%3E") no-repeat center center / 100% auto; 9 | } 10 | -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/log.txt: -------------------------------------------------------------------------------- 1 | 1,0.7831910645984259 2 | 2,0.7831910645984259 3 | 3,0.7831910645984259 4 | 4,0.7831910645984259 5 | 5,0.7831910645984259 6 | 6,0.7831910645984259 7 | 7,0.7831910645984259 8 | 8,0.7831910645984259 9 | 9,0.7831910645984259 10 | 10,0.7831910645984259 11 | 11,0.7831910645984259 12 | 12,0.7831910645984259 13 | 13,0.7831910645984259 14 | 14,0.7831910645984259 15 | 15,0.7831910645984259 16 | 16,0.7831910645984259 17 | 17,0.7831910645984259 18 | 18,0.7831910645984259 19 | 19,0.7831910645984259 20 | 20,0.7831910645984259 21 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | title: Gigala 3 | message: >- 4 | If you use this software, please cite it using the 5 | metadata from this file. 6 | type: software 7 | authors: 8 | - given-names: Giorgi 9 | family-names: Tskhondia 10 | email: gigatskhondia@gmail.com 11 | affiliation: Independent researcher 12 | identifiers: 13 | - type: url 14 | value: 'https://gigatskhondia.medium.com/' 15 | description: Medium blog about my project's developments. 16 | - type: url 17 | value: 'https://www.researchgate.net/profile/Giorgi-Tskhondia' 18 | description: My ResearchGate profile. 19 | repository-code: 'https://github.com/gigatskhondia/gigala' 20 | abstract: >- 21 | Applying artificial intelligence algorithms for the 22 | purpose of engineering design. 23 | keywords: 24 | - Reinforcement learning 25 | - Finite element methods 26 | - Structural engineering 27 | - Design 28 | - Topology optimization 29 | license: MIT 30 | date-released: '2018-10-13' 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Software: Gigala 4 | 5 | Copyright (c) 2018 Georgy Tskhondiya (Giorgi Tskhondia) 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in all 15 | copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | SOFTWARE. 24 | 25 | -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Georgy Tskhondiya 4 | Copyright (c) 2019 Nikhil Barhate 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Georgy Tskhondiya 4 | Copyright (c) 2019 Nikhil Barhate 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Georgy Tskhondiya 4 | Copyright (c) 2019 Nikhil Barhate 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Georgy Tskhondiya 4 | Copyright (c) 2019 Nikhil Barhate 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Georgy Tskhondiya 4 | Copyright 2024 Nils Meyer 5 | Copyright (c) 2019 Nikhil Barhate 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in all 15 | copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | SOFTWARE. 24 | -------------------------------------------------------------------------------- /gigala/propulsion/lazy_rocketeer/logic.py: -------------------------------------------------------------------------------- 1 | import langchain_openai 2 | from pydantic import BaseModel, Field 3 | from langchain_core.tools import tool 4 | from config import OPENAI_API_KEY 5 | 6 | 7 | class DialogModel: 8 | openai_client= None 9 | _model= None 10 | 11 | def __init__(self, model, api_key_): 12 | self._model = model 13 | self._key = api_key_ 14 | self.openai_client = langchain_openai.ChatOpenAI(model = self._model, 15 | openai_api_key = self._key, 16 | temperature = 0) 17 | 18 | 19 | class ChamberInput(BaseModel): 20 | r: float = Field(..., description="Mixture ratio (O/F)") 21 | F: float = Field(..., description="Thrust,[N]") 22 | p1: float = Field(..., description="Chamber pressure, [MPa]") 23 | CF: float = Field(..., description="Thrust coefficient") 24 | c: float = Field(..., description="Estimated nozzle exit exhaust velocity, [m/sec]") 25 | m_p: float = Field(..., description="Usable propellant mass, [kg]") 26 | 27 | @tool(args_schema=ChamberInput) 28 | def get_thrust_chamber_params(r, F, p1, CF, c, m_p): 29 | """ Thrust chamber dimensions and burn duration calculations. 30 | r = 2.3 31 | F = 50000 32 | p1 = 4826000 33 | CF = 1.9 34 | m_p = 7482 35 | """ 36 | m_hat = F/c 37 | m_hat_f =m_hat/(r+1) 38 | m_hat_o = (m_hat*r) / (r + 1) 39 | t_b = m_p /(m_hat_f+m_hat_o) 40 | A_t = F/(p1*CF) 41 | return {"nozzle_throat_area": A_t, 42 | "burn_duration":t_b, 43 | } 44 | 45 | MODEL = DialogModel("gpt-4o-mini", OPENAI_API_KEY) 46 | -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL/README.md: -------------------------------------------------------------------------------- 1 | # Hierarchical reinforcement learning for topology optimization 2 | 3 | This is an implementation of Hierarchical-Actor-Critic-HAC algorithm for topology optimization of cantilever beam. 4 | The codebase was taken from [Nikhil Barhate](https://github.com/nikhilbarhate99/Hierarchical-Actor-Critic-HAC-PyTorch) and adjusted for the topology optimization task at hand. 5 | 6 | 7 | # Hierarchical-Actor-Critic-HAC-PyTorch 8 | 9 | Hierarchical Actor Critic (HAC) algorithm described in the paper, [Learning Multi-Level Hierarchies with Hindsight](https://arxiv.org/abs/1712.00948) (ICLR 2019). The algorithm learns to reach a goal state by dividing the task into short horizon intermediate goals (subgoals). 10 | 11 | 12 | ## Usage 13 | - All the hyperparameters are found by `hpo.py`. 14 | - To train a new network run `train.py` 15 | - To test a preTrained network run `test.py` 16 | - For a detailed explanation of offsets and bounds, refer to [issue #2](https://github.com/nikhilbarhate99/Hierarchical-Actor-Critic-HAC-PyTorch/issues/2) 17 | 18 | 19 | ## Implementation Details 20 | 21 | - The code is implemented as described in the appendix section of the paper and the Official repository, i.e. without target networks and with bounded Q-values. 22 | - Topology optimization model is taken from [A Tutorial on Structural Optimization](https://www.researchgate.net/publication/360698153_A_Tutorial_on_Structural_Optimization) 23 | - Implementation tutorial: [Using Hierarchical Reinforcement Learning for Fast Topology Optimisation](https://gigatskhondia.medium.com/using-hierarchical-reinforcement-learning-for-fast-topology-optimisation-85aa0c07fb7f) 24 | 25 | 26 | ## Citing 27 | 28 | - [How to cite](https://github.com/gigatskhondia/gigala/blob/master/CITATION.cff) 29 | 30 | ## Requirements 31 | 32 | - Python 33 | - PyTorch 34 | - OpenAI gym 35 | -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/README.md: -------------------------------------------------------------------------------- 1 | # Hierarchical reinforcement learning for topology optimization 2 | 3 | This is an implementation of Hierarchical-Actor-Critic-HAC algorithm for topology optimization of cantilever beam. 4 | The codebase was taken from [Nikhil Barhate](https://github.com/nikhilbarhate99/Hierarchical-Actor-Critic-HAC-PyTorch) and adjusted for the topology optimization task at hand. 5 | 6 | 7 | # Hierarchical-Actor-Critic-HAC-PyTorch 8 | 9 | Hierarchical Actor Critic (HAC) algorithm described in the paper, [Learning Multi-Level Hierarchies with Hindsight](https://arxiv.org/abs/1712.00948) (ICLR 2019). The algorithm learns to reach a goal state by dividing the task into short horizon intermediate goals (subgoals). 10 | 11 | 12 | ## Usage 13 | - All the hyperparameters are found by `hpo.py`. 14 | - To train a new network run `train.py` 15 | - To test a preTrained network run `test.py` 16 | - For a detailed explanation of offsets and bounds, refer to [issue #2](https://github.com/nikhilbarhate99/Hierarchical-Actor-Critic-HAC-PyTorch/issues/2) 17 | 18 | 19 | ## Implementation Details 20 | 21 | - The code is implemented as described in the appendix section of the paper and the Official repository, i.e. without target networks and with bounded Q-values. 22 | - Topology optimization model is taken from [A Tutorial on Structural Optimization](https://www.researchgate.net/publication/360698153_A_Tutorial_on_Structural_Optimization) 23 | - Implementation tutorial: [Using Hierarchical Reinforcement Learning for Fast Topology Optimisation](https://gigatskhondia.medium.com/using-hierarchical-reinforcement-learning-for-fast-topology-optimisation-85aa0c07fb7f) 24 | 25 | 26 | ## Citing 27 | 28 | - [How to cite](https://github.com/gigatskhondia/gigala/blob/master/CITATION.cff) 29 | 30 | ## Requirements 31 | 32 | - Python 33 | - PyTorch 34 | - OpenAI gym 35 | -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/README.md: -------------------------------------------------------------------------------- 1 | # Hierarchical reinforcement learning for topology optimization 2 | 3 | This is an implementation of Hierarchical-Actor-Critic-HAC algorithm for topology optimization of cantilever beam. 4 | The codebase was taken from [Nikhil Barhate](https://github.com/nikhilbarhate99/Hierarchical-Actor-Critic-HAC-PyTorch) and adjusted for the topology optimization task at hand. 5 | 6 | 7 | # Hierarchical-Actor-Critic-HAC-PyTorch 8 | 9 | Hierarchical Actor Critic (HAC) algorithm described in the paper, [Learning Multi-Level Hierarchies with Hindsight](https://arxiv.org/abs/1712.00948) (ICLR 2019). The algorithm learns to reach a goal state by dividing the task into short horizon intermediate goals (subgoals). 10 | 11 | 12 | ## Usage 13 | - All the hyperparameters are found by `hpo.py`. 14 | - To train a new network run `train.py` 15 | - To test a preTrained network run `test.py` 16 | - For a detailed explanation of offsets and bounds, refer to [issue #2](https://github.com/nikhilbarhate99/Hierarchical-Actor-Critic-HAC-PyTorch/issues/2) 17 | 18 | 19 | ## Implementation Details 20 | 21 | - The code is implemented as described in the appendix section of the paper and the Official repository, i.e. without target networks and with bounded Q-values. 22 | - Topology optimization model is taken from [A Tutorial on Structural Optimization](https://www.researchgate.net/publication/360698153_A_Tutorial_on_Structural_Optimization) 23 | - Implementation tutorial: [Using Hierarchical Reinforcement Learning for Fast Topology Optimisation](https://gigatskhondia.medium.com/using-hierarchical-reinforcement-learning-for-fast-topology-optimisation-85aa0c07fb7f) 24 | 25 | 26 | ## Citing 27 | 28 | - [How to cite](https://github.com/gigatskhondia/gigala/blob/master/CITATION.cff) 29 | 30 | ## Requirements 31 | 32 | - Python 33 | - PyTorch 34 | - OpenAI gym 35 | -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/README.md: -------------------------------------------------------------------------------- 1 | # Hierarchical reinforcement learning for topology optimization 2 | 3 | This is an implementation of Hierarchical-Actor-Critic-HAC algorithm for topology optimization of cantilever beam. 4 | The codebase was taken from [Nikhil Barhate](https://github.com/nikhilbarhate99/Hierarchical-Actor-Critic-HAC-PyTorch) and adjusted for the topology optimization task at hand. 5 | 6 | 7 | # Hierarchical-Actor-Critic-HAC-PyTorch 8 | 9 | Hierarchical Actor Critic (HAC) algorithm described in the paper, [Learning Multi-Level Hierarchies with Hindsight](https://arxiv.org/abs/1712.00948) (ICLR 2019). The algorithm learns to reach a goal state by dividing the task into short horizon intermediate goals (subgoals). 10 | 11 | 12 | ## Usage 13 | - All the hyperparameters are found by `hpo.py`. 14 | - To train a new network run `train.py` 15 | - To test a preTrained network run `test.py` 16 | - For a detailed explanation of offsets and bounds, refer to [issue #2](https://github.com/nikhilbarhate99/Hierarchical-Actor-Critic-HAC-PyTorch/issues/2) 17 | 18 | 19 | ## Implementation Details 20 | 21 | - The code is implemented as described in the appendix section of the paper and the Official repository, i.e. without target networks and with bounded Q-values. 22 | - Topology optimization model is taken from [A Tutorial on Structural Optimization](https://www.researchgate.net/publication/360698153_A_Tutorial_on_Structural_Optimization) 23 | - Implementation tutorial: [Using Hierarchical Reinforcement Learning for Fast Topology Optimisation](https://gigatskhondia.medium.com/using-hierarchical-reinforcement-learning-for-fast-topology-optimisation-85aa0c07fb7f) 24 | 25 | 26 | ## Citing 27 | 28 | - [How to cite](https://github.com/gigatskhondia/gigala/blob/master/CITATION.cff) 29 | 30 | ## Requirements 31 | 32 | - Python 33 | - PyTorch 34 | - OpenAI gym 35 | -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/README.md: -------------------------------------------------------------------------------- 1 | # Hierarchical reinforcement learning for topology optimization (accelerated by Mac Metal) 2 | 3 | This is an implementation of Hierarchical-Actor-Critic-HAC algorithm for topology optimization of cantilever beam (accelerated by Mac Metal). 4 | The codebase was taken from [Nikhil Barhate](https://github.com/nikhilbarhate99/Hierarchical-Actor-Critic-HAC-PyTorch) and [Nils Meyer](https://github.com/meyer-nils/torch-fem) and adjusted for the topology optimization task at hand. 5 | 6 | ### DISCLAIMER: Work in progress! 7 | 8 | # Hierarchical-Actor-Critic-HAC-PyTorch 9 | 10 | Hierarchical Actor Critic (HAC) algorithm described in the paper, [Learning Multi-Level Hierarchies with Hindsight](https://arxiv.org/abs/1712.00948) (ICLR 2019). The algorithm learns to reach a goal state by dividing the task into short horizon intermediate goals (subgoals). 11 | 12 | ## Usage 13 | - All the hyperparameters are found by `hpo.py`. 14 | - To train a new network run `train.py` 15 | - To test a preTrained network run `test.py` 16 | - For a detailed explanation of offsets and bounds, refer to [issue #2](https://github.com/nikhilbarhate99/Hierarchical-Actor-Critic-HAC-PyTorch/issues/2) 17 | 18 | 19 | ## Implementation Details 20 | 21 | - The code is implemented as described in the appendix section of the paper and the Official repository, i.e. without target networks and with bounded Q-values. 22 | - Topology optimization model is taken from [A Tutorial on Structural Optimization](https://www.researchgate.net/publication/360698153_A_Tutorial_on_Structural_Optimization) 23 | - Implementation tutorial: [Using Hierarchical Reinforcement Learning for Fast Topology Optimisation](https://gigatskhondia.medium.com/using-hierarchical-reinforcement-learning-for-fast-topology-optimisation-85aa0c07fb7f) 24 | 25 | 26 | ## Citing 27 | 28 | - [How to cite](https://github.com/gigatskhondia/gigala/blob/master/CITATION.cff) 29 | 30 | ## Requirements 31 | 32 | - Python 33 | - PyTorch 34 | - OpenAI gym 35 | -------------------------------------------------------------------------------- /gigala/propulsion/lazy_rocketeer/app.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from langgraph.checkpoint.memory import MemorySaver 3 | from langgraph.prebuilt import create_react_agent 4 | from logic import get_thrust_chamber_params,MODEL 5 | 6 | memory = MemorySaver() 7 | agent_ = create_react_agent(MODEL.openai_client, 8 | tools=[get_thrust_chamber_params], 9 | checkpointer=memory) 10 | 11 | if "messages" not in st.session_state: 12 | st.session_state.messages = [{'role': "system", "content": """This is a Lazy Rocketeer agent (a part of 13 | Gigala software) to reason around system requirements and mission parameters to design a rocket engine. When helping 14 | in design, it uses paradigm: think, act, observe and considers the following aspects: 15 | 16 | - Decisions on basic parameters 17 | - Stage masses and thrust level 18 | - Propellant flows and dimensions of thrust chamber 19 | - Heat transfer 20 | - Injector design 21 | - Igniter dimensions 22 | - Layout drawings, masses, flows, and pressure drops"""}] 23 | 24 | for message in st.session_state.messages: 25 | with st.chat_message(message["role"]): 26 | st.markdown(message["content"]) 27 | 28 | if prompt := st.chat_input("Hello, how can I help you?"): 29 | st.session_state.messages.append({"role": "user", "content": prompt}) 30 | with st.chat_message("user"): 31 | st.markdown(prompt) 32 | 33 | with st.chat_message("assistant"): 34 | 35 | stream = agent_.invoke( 36 | {"input": prompt, "messages": [ 37 | {"role": m["role"], "content": m["content"]} 38 | for m in st.session_state.messages 39 | ]}, 40 | { 41 | # "callbacks":[get_streamlit_cb(st.empty())], 42 | "configurable": {"thread_id": "abc321"}, 43 | }, 44 | ) 45 | 46 | response = list(stream["messages"][len(stream["messages"])-1])[0][1] 47 | st.write(response) 48 | 49 | st.session_state.messages.append({"role": "assistant", "content": response}) 50 | -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/log.txt: -------------------------------------------------------------------------------- 1 | 1,0.9797410239278408 2 | 2,0.7975127748912604 3 | 3,0.7723802914032126 4 | 4,0.353660573452987 5 | 5,0.26027533525850116 6 | 6,0.36404099368286785 7 | 7,0.733053987457836 8 | 8,0.6374117509465794 9 | 9,1.084154734328111 10 | 10,0.6911926938015504 11 | 11,0.9554268400162111 12 | 12,0.4533388448274854 13 | 13,0.8082453313949136 14 | 14,1.059557754495363 15 | 15,0.6464023496787109 16 | 16,0.42956891861947955 17 | 17,0.8263143041042855 18 | 18,1.1262730338491609 19 | 19,1.1457076591979931 20 | 20,0.30563800818313147 21 | 21,0.2625561628106679 22 | 22,1.0695366344255084 23 | 23,1.2304393130605795 24 | 24,0.7694056863847839 25 | 25,0.49852529193370343 26 | 26,0.6881675978355768 27 | 27,0.9817872254967156 28 | 28,0.6057507389843019 29 | 29,1.0320603944086526 30 | 30,1.272668785983966 31 | 31,0.8920269552879224 32 | 32,0.4762557282953764 33 | 33,0.45530850633074954 34 | 34,1.303107161463436 35 | 35,0.7948466568398261 36 | 36,0.5146320524478519 37 | 37,0.7961899378649369 38 | 38,0.5182637459576089 39 | 39,0.8846944618944337 40 | 40,1.0499732090039007 41 | 41,0.9964231604218777 42 | 42,0.5350889369526511 43 | 43,0.7081767012460056 44 | 44,0.573527385311719 45 | 45,0.7063965491817927 46 | 46,0.751310566243355 47 | 47,1.470913067570704 48 | 48,0.4257564884836356 49 | 49,1.032085090102188 50 | 50,1.2063975265913127 51 | 51,0.36595814553881933 52 | 52,1.2759097185501345 53 | 53,0.9670328423887196 54 | 54,1.1301857941782705 55 | 55,1.3834535912834658 56 | 56,0.49287823295886213 57 | 57,0.9218599330192192 58 | 58,0.9377552003247814 59 | 59,0.5352374463901877 60 | 60,1.154746021761929 61 | 61,0.9833511924281655 62 | 62,0.26633021683095215 63 | 63,1.0277209889807881 64 | 64,0.7032054355208622 65 | 65,0.6863636545417255 66 | 66,0.3068279922203631 67 | 67,0.4101502457096209 68 | 68,0.16205629966238047 69 | 69,0.8319603639166808 70 | 70,0.9809997110234272 71 | 71,0.8888683184631586 72 | 72,1.0125758926358261 73 | 73,0.17638382729878044 74 | 74,0.5691294752014958 75 | 75,0.6928665281967229 76 | 76,0.7553885026199615 77 | 77,0.4553827129098218 78 | 78,0.009003768893487132 79 | 79,0.7367304883121856 80 | 80,0.11465042924233183 81 | 81,0.42512069834557475 82 | 82,0.9511296243632733 83 | 83,0.7924702958231296 84 | 84,1.128081720797427 85 | 85,0.8760122905218178 86 | 86,1.0748449004435678 87 | 87,0.219299353524388 88 | 88,0.676710409166304 89 | 89,0.5609184089960373 90 | 90,0.636260937859008 91 | 91,1.274648432375538 92 | 92,0.8898651322048404 93 | 93,0.7430671748738745 94 | 94,0.430908247614082 95 | 95,0.3785963738267595 96 | 96,0.5775409856616661 97 | 97,0.8619733872169928 98 | 98,0.2259539403820513 99 | 99,1.0352143614321967 100 | 100,0.8859741168447045 101 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Gigala (Engineering design by reinforcement learning, genetic algorithms and finite element methods) 2 | 3 | Are you interested in new ways of engineering design? This repository is an attempt to apply artificial intelligence algorithms for the purpose of engineering design of physical products. I combine numerical simulation like finite element analysis with artificial intelligence like reinforcement learning to produce optimal designs. Starting from 2018, my work has been focused on intelligent topology optimization of mechanical structures and elements. I am constantly exploring different ways that AI can be applied to science and engineering. 4 | 5 | Reinforcement learning is a global, gradient-free, non-convex, learning-based, generalizable topology optimization method suitable for practical needs. Sequential nature of reinforcement learning makes it also applicable to technological processes where it can provide manufacturing steps. 6 | 7 | With my diverse interests, I am using this repository as a testbed for my ideas to create software for artificial intelligence aided design. I hope that my work can inspire you to explore new ways that AI can be applied to your field. 8 | 9 | At present, Gigala software mainly consists of topology optimization module, and offshore pipelay dynamics module (now separated into [Ocean Intella](https://github.com/gigatskhondia/ocean_intella) software). It uses artificial intelligence to assist an engineer in her design. You can use it as research or engineering analysis tool to design different physical components and elements. 10 | 11 | RL agent designing a cantilever: 12 | ![Cantilever design by RL](https://github.com/user-attachments/assets/ae471032-56eb-4907-9f0b-e7a7d30038b9) 13 | 14 | 15 | Philosophy of the software: 16 | * free (accessibility) 17 | * open source (full customization) 18 | * practical performance on your PC (low carbon footprint) 19 | * developed in Python (widespread) 20 | * use AI (modern) 21 | 22 | Please check my [Blog](https://gigatskhondia.medium.com/) and [ResearchGate](https://www.researchgate.net/profile/Giorgi-Tskhondia) for the specifics of the models and algorithms I use. 23 | 24 | For citation please use [Reinforcement Learning Guided Engineering Design: from Topology Optimization to Advanced Modelling](https://jngr5.com/index.php/journal-of-next-generation-resea/article/view/95) 25 | 26 | Topology optimization by reinforcement learning: 27 |

28 | Fig1Screenshot 2023-08-02 at 12 39 14 29 |

30 | 31 | Topology optimization by genetic algorithms: 32 |

33 | Fig1Screenshot 2024-07-20 at 06 06 51 34 |

35 | 36 | Pseudo 3D topology optimization by reinforcement learning (see [preprint-0](https://www.researchgate.net/publication/393164291_Pseudo_3D_topology_optimisation_with_reinforcement_learning)): 37 |

38 | Screenshot 2025-10-21 at 18 13 46 39 |

40 | 41 | For current benchmarks of TO with RL see [preprint-1](https://www.researchgate.net/publication/398406554_Practical_topology_optimization_with_deep_reinforcement_learning). 42 | 43 | To keep up to date with the project please check [Gigala](https://gigala.io/) page. 44 | 45 | #### If you like my project and want to support it, please consider doing any of the following: #### 46 | * Star this project 47 | * [Sponsor](https://www.paypal.me/gigatskhondia) this project 48 | * [Contact](https://gigala.io/) me if you would like to collaborate 49 | -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/asset/topology_optimization.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | from utils import * 3 | from utils_ import * 4 | import gym 5 | from gym import spaces 6 | import random 7 | import numpy as np 8 | import autograd.numpy as anp 9 | from gym.utils import seeding 10 | 11 | 12 | class Model: 13 | def __init__(self, x): 14 | self.flag_ = True 15 | self.n, self.m = x.shape 16 | self.actions_dic = {} 17 | 18 | k = 0 19 | for i in range(self.n): 20 | for j in range(self.m): 21 | self.actions_dic[k] = (i, j) 22 | k += 1 23 | 24 | def action_space_(self, action, x_cap): 25 | x, y = self.actions_dic[action] 26 | x_cap[x][y] = 1 27 | 28 | @staticmethod 29 | def draw(x_cap): 30 | plt.figure(dpi=50) 31 | print('\nFinal Cantilever rl_beam design:') 32 | plt.imshow(x_cap) 33 | plt.show(block=False) 34 | plt.pause(3) 35 | plt.close('all') 36 | 37 | 38 | class CantileverEnv(gym.Env): 39 | 40 | metadata = {"render.modes": ["human"], 41 | # 'video.frames_per_second' : 30 42 | } 43 | 44 | def __init__(self): 45 | super().__init__() 46 | 47 | self.rd = -1 48 | self.args = get_args(*mbb_beam(rd=self.rd)) 49 | 50 | dim_cap = self.args.nelx*self.args.nely 51 | self.N_DISCRETE_ACTIONS = self.args.nelx*self.args.nely 52 | 53 | self.action_space = spaces.Box(low=0, high=1, 54 | shape=(self.N_DISCRETE_ACTIONS,), dtype=np.float64) 55 | 56 | self.observation_space = spaces.Box(low=np.array([-1e10 for x in range(dim_cap)]), 57 | high=np.array([1e10 for y in range(dim_cap)]), 58 | shape=(dim_cap,), 59 | dtype=np.float64) 60 | 61 | self.x = anp.ones((self.args.nely, self.args.nelx))*self.args.density 62 | 63 | self.M = Model(self.x) 64 | 65 | self.reward = 0 66 | self.step_ = 0 67 | self.needs_reset = True 68 | self.layer_dim = 4 69 | self.n_layers = 2 70 | self.optimizer = 'Adam' 71 | self.seed() 72 | 73 | def seed(self, seed=None): 74 | self.np_random, seed = seeding.np_random(seed) 75 | return [seed] 76 | 77 | def step(self, action): 78 | 79 | self.args = get_args(*mbb_beam(rd=self.rd)) 80 | 81 | act=np.argmax(action) 82 | 83 | self.M.action_space_(act, self.x) 84 | 85 | self.tmp, self.const = fast_stopt(self.args, self.x) 86 | self.step_+=1 87 | 88 | self.reward = (1/self.tmp)**0.5 89 | 90 | done = False 91 | 92 | if self.const > 0.68: 93 | done = True 94 | 95 | if self.step_ > self.M.n*self.M.m: 96 | done = True 97 | 98 | if self.needs_reset: 99 | raise RuntimeError("Tried to step environment that needs reset") 100 | 101 | if done: 102 | self.needs_reset = True 103 | 104 | return self.x.reshape(self.x.shape[0]*self.x.shape[1]), self.reward, done, dict() 105 | 106 | def reset(self): 107 | 108 | if not self.M.flag_: 109 | self.rd = random.choice([0,2,-2]) 110 | else: 111 | self.rd = -1 112 | 113 | self.x = anp.ones((self.args.nely, self.args.nelx))*self.args.density 114 | 115 | self.reward = 0 116 | self.needs_reset = False 117 | self.step_ = 0 118 | 119 | return self.x.reshape(self.x.shape[0]*self.x.shape[1]) 120 | 121 | def render(self, mode="human"): 122 | self.M.draw(self.x) 123 | 124 | def close(self): 125 | pass 126 | -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/asset/topology_optimization.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | from utils import * 3 | import gym 4 | from gym import spaces 5 | import random 6 | import numpy as np 7 | import autograd.numpy as anp 8 | from gym.utils import seeding 9 | 10 | 11 | class Model: 12 | def __init__(self, x): 13 | self.flag_ = True 14 | self.n, self.m = x.shape 15 | self.actions_dic = {} 16 | 17 | k = 0 18 | for i in range(self.n): 19 | for j in range(self.m): 20 | self.actions_dic[k] = (i, j) 21 | k += 1 22 | 23 | def action_space_(self, action, x_cap): 24 | x, y = self.actions_dic[action] 25 | x_cap[x][y] = 1 26 | 27 | @staticmethod 28 | def draw(x_cap): 29 | plt.figure(dpi=50) 30 | print('\nFinal Cantilever rl_beam design:') 31 | plt.imshow(x_cap) 32 | plt.show(block=False) 33 | plt.pause(3) 34 | plt.close('all') 35 | 36 | 37 | class CantileverEnv(gym.Env): 38 | 39 | metadata = {"render.modes": ["human"], 40 | # 'video.frames_per_second' : 30 41 | } 42 | 43 | def __init__(self): 44 | super().__init__() 45 | 46 | self.rd = -1 47 | self.args = get_args(*mbb_beam(rd=self.rd)) 48 | 49 | dim_cap = self.args.nelx*self.args.nely 50 | self.N_DISCRETE_ACTIONS = self.args.nelx*self.args.nely 51 | 52 | self.action_space = spaces.Box(low=0, high=1, 53 | shape=(self.N_DISCRETE_ACTIONS,), dtype=np.float64) 54 | 55 | self.observation_space = spaces.Box(low=np.array([-1e10 for x in range(dim_cap)]), 56 | high=np.array([1e10 for y in range(dim_cap)]), 57 | shape=(dim_cap,), 58 | dtype=np.float64) 59 | 60 | self.x = anp.ones((self.args.nely, self.args.nelx))*self.args.density 61 | 62 | self.M = Model(self.x) 63 | 64 | self.reward = 0 65 | self.step_ = 0 66 | self.needs_reset = True 67 | self.layer_dim = 4 68 | self.n_layers = 2 69 | self.optimizer = 'Adam' 70 | self.seed() 71 | 72 | def seed(self, seed=None): 73 | self.np_random, seed = seeding.np_random(seed) 74 | return [seed] 75 | 76 | def step(self, action): 77 | 78 | self.args = get_args(*mbb_beam(rd=self.rd)) 79 | 80 | act=np.argmax(action) 81 | 82 | self.M.action_space_(act, self.x) 83 | 84 | self.tmp, self.const = fast_stopt(self.args, self.x) 85 | self.step_+=1 86 | 87 | # entropy = -np.sum(self.x * np.log2(self.x)) 88 | # self.reward = (1/self.tmp)**0.5+entropy 89 | 90 | self.reward = (1/self.tmp)**0.5 91 | 92 | done = False 93 | 94 | if self.const > 0.68: 95 | done = True 96 | 97 | if self.step_ > self.M.n*self.M.m: 98 | done = True 99 | 100 | if self.needs_reset: 101 | raise RuntimeError("Tried to step environment that needs reset") 102 | 103 | if done: 104 | self.needs_reset = True 105 | 106 | return self.x.reshape(self.x.shape[0]*self.x.shape[1]), self.reward, done, dict() 107 | 108 | def reset(self): 109 | 110 | if not self.M.flag_: 111 | self.rd = random.choice([0,2,-2]) 112 | else: 113 | self.rd = -1 114 | 115 | self.x = anp.ones((self.args.nely, self.args.nelx))*self.args.density 116 | 117 | self.reward = 0 118 | self.needs_reset = False 119 | self.step_ = 0 120 | 121 | return self.x.reshape(self.x.shape[0]*self.x.shape[1]) 122 | 123 | def render(self, mode="human"): 124 | self.M.draw(self.x) 125 | 126 | def close(self): 127 | pass 128 | -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL/test.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import gym 3 | import numpy as np 4 | from HAC import HAC 5 | import asset 6 | 7 | 8 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 9 | 10 | def test(): 11 | 12 | #################### Hyperparameters #################### 13 | env_name ="T0-h-v1" 14 | 15 | save_episode = 20 # keep saving every n episodes 16 | max_episodes = 10 # max num of training episodes 17 | random_seed = 1 18 | render = False 19 | 20 | env = gym.make(env_name) 21 | env.layer_dim = 12 22 | env.n_layers = 16 23 | env.optimizer = 'SGD' 24 | state_dim = env.observation_space.shape[0] 25 | action_dim = env.N_DISCRETE_ACTIONS 26 | 27 | """ 28 | Actions (both primitive and subgoal) are implemented as follows: 29 | action = ( network output (Tanh) * bounds ) + offset 30 | clip_high and clip_low bound the exploration noise 31 | """ 32 | 33 | # primitive action bounds and offset 34 | action_bounds = env.action_space.high[0] 35 | # action_offset = np.array([0.5 for x in range(env.N_DISCRETE_ACTIONS)]) 36 | action_offset = np.array([0.0 for x in range(env.N_DISCRETE_ACTIONS)]) 37 | 38 | action_offset = torch.FloatTensor(action_offset.reshape(1, -1)).to(device) 39 | action_clip_low = np.array([0 for x in range(env.N_DISCRETE_ACTIONS)]) 40 | action_clip_high = np.array([1 for x in range(env.N_DISCRETE_ACTIONS)]) 41 | 42 | # state bounds and offset 43 | # state_bounds_np = np.array([0.5, 0.5e7]) 44 | state_bounds_np = np.array([1, 1e7]) 45 | state_bounds = torch.FloatTensor(state_bounds_np.reshape(1, -1)).to(device) 46 | # state_offset = np.array([0.5, 0.5e7]) 47 | state_offset = np.array([0, 0]) 48 | state_offset = torch.FloatTensor(state_offset.reshape(1, -1)).to(device) 49 | state_clip_low = np.array([0, 0]) 50 | state_clip_high = np.array([1, 1e7]) 51 | 52 | exploration_action_noise = np.array([0.024320378739607497]) 53 | exploration_state_noise = np.array([ 0.14694893372824905, 19361.835172087693]) 54 | 55 | goal_state=np.array([0.68, 20]) 56 | threshold=[0.05, 5] 57 | 58 | # HAC parameters: 59 | k_level = 2 # num of levels in hierarchy 60 | H = 6 # time horizon to achieve subgoal 61 | lamda = 0.4337021542899802 # subgoal testing parameter 62 | 63 | # DDPG parameters: 64 | gamma = 0.9703997234344832 # discount factor for future rewards 65 | n_iter = 148 # update policy n_iter times in one DDPG update 66 | batch_size = 183 # num of transitions sampled from replay buffer 67 | lr = 7.943448987978889e-05 68 | 69 | # save trained models 70 | directory = "./preTrained/{}/{}level/".format(env_name, k_level) 71 | filename = "HAC_{}".format(env_name) 72 | ######################################################### 73 | 74 | if random_seed: 75 | print("Random Seed: {}".format(random_seed)) 76 | env.seed(random_seed) 77 | torch.manual_seed(random_seed) 78 | np.random.seed(random_seed) 79 | 80 | # creating HAC agent and setting parameters 81 | agent = HAC(k_level, H, state_dim, action_dim, render, threshold, 82 | action_bounds, action_offset, state_bounds, state_offset, lr, env.optimizer, env.layer_dim, env.n_layers) 83 | 84 | agent.set_parameters(lamda, gamma, action_clip_low, action_clip_high, 85 | state_clip_low, state_clip_high, exploration_action_noise, exploration_state_noise) 86 | 87 | # load agent 88 | agent.load(directory, filename) 89 | 90 | # Evaluation 91 | # env.M.flag_=False 92 | for i_episode in range(1, max_episodes+1): 93 | 94 | agent.reward = 0 95 | agent.timestep = 0 96 | 97 | 98 | state = env.reset() 99 | agent.run_HAC(env, k_level-1, state, goal_state, True) 100 | env.render() 101 | 102 | print("Episode: {}\t Reward: {}\t len: {}\t tmp: {}\t const: {} ".format(i_episode, agent.reward, agent.timestep,env.tmp,env.const)) 103 | 104 | env.close() 105 | 106 | 107 | 108 | if __name__ == '__main__': 109 | test() 110 | -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/test.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import gym 3 | import numpy as np 4 | from HAC import HAC 5 | import asset 6 | 7 | 8 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 9 | 10 | def test(): 11 | 12 | #################### Hyperparameters #################### 13 | env_name ="T0-h-v1" 14 | 15 | save_episode = 20 # keep saving every n episodes 16 | max_episodes = 10 # max num of training episodes 17 | random_seed = 42 18 | render = False 19 | 20 | env = gym.make(env_name) 21 | env.layer_dim = 126 22 | env.n_layers = 90 23 | env.optimizer = 'Adam' 24 | state_dim = env.observation_space.shape[0] 25 | action_dim = env.N_DISCRETE_ACTIONS 26 | 27 | """ 28 | Actions (both primitive and subgoal) are implemented as follows: 29 | action = ( network output (Tanh) * bounds ) + offset 30 | clip_high and clip_low bound the exploration noise 31 | """ 32 | 33 | # primitive action bounds and offset 34 | action_bounds = env.action_space.high[0] 35 | # action_offset = np.array([0.5 for x in range(env.N_DISCRETE_ACTIONS)]) 36 | action_offset = np.array([0.0 for x in range(env.N_DISCRETE_ACTIONS)]) 37 | 38 | action_offset = torch.FloatTensor(action_offset.reshape(1, -1)).to(device) 39 | action_clip_low = np.array([0 for x in range(env.N_DISCRETE_ACTIONS)]) 40 | action_clip_high = np.array([1 for x in range(env.N_DISCRETE_ACTIONS)]) 41 | 42 | # state bounds and offset 43 | # state_bounds_np = np.array([0.5, 0.5e7]) 44 | state_bounds_np = np.array([1, 1e7]) 45 | state_bounds = torch.FloatTensor(state_bounds_np.reshape(1, -1)).to(device) 46 | # state_offset = np.array([0.5, 0.5e7]) 47 | state_offset = np.array([0, 0]) 48 | state_offset = torch.FloatTensor(state_offset.reshape(1, -1)).to(device) 49 | state_clip_low = np.array([0, 0]) 50 | state_clip_high = np.array([1, 1e7]) 51 | 52 | exploration_action_noise = np.array([ 0.013524454522609227]) 53 | exploration_state_noise = np.array([0.2065791657801734, 4371.871955300335]) 54 | 55 | goal_state=np.array([0.68, 23]) 56 | threshold=[0.05, 3] 57 | 58 | # HAC parameters: 59 | k_level = 2 # num of levels in hierarchy 60 | H = 8 # time horizon to achieve subgoal 61 | lamda = 0.9759336249447662 # subgoal testing parameter 62 | 63 | # DDPG parameters: 64 | gamma = 0.9845965064662501 # discount factor for future rewards 65 | n_iter = 100 # update policy n_iter times in one DDPG update 66 | batch_size = 100 # num of transitions sampled from replay buffer 67 | lr = 0.061703036438267876 68 | 69 | # save trained models 70 | directory = "./preTrained/{}/{}level/".format(env_name, k_level) 71 | filename = "HAC_{}".format(env_name) 72 | ######################################################### 73 | 74 | if random_seed: 75 | print("Random Seed: {}".format(random_seed)) 76 | env.seed(random_seed) 77 | torch.manual_seed(random_seed) 78 | np.random.seed(random_seed) 79 | 80 | # creating HAC agent and setting parameters 81 | agent = HAC(k_level, H, state_dim, action_dim, render, threshold, 82 | action_bounds, action_offset, state_bounds, state_offset, lr, env.optimizer, env.layer_dim, env.n_layers) 83 | 84 | agent.set_parameters(lamda, gamma, action_clip_low, action_clip_high, 85 | state_clip_low, state_clip_high, exploration_action_noise, exploration_state_noise) 86 | 87 | # load agent 88 | agent.load(directory, filename) 89 | 90 | # Evaluation 91 | # env.M.flag_=False 92 | for i_episode in range(1, max_episodes+1): 93 | 94 | agent.reward = 0 95 | agent.timestep = 0 96 | 97 | 98 | state = env.reset() 99 | agent.run_HAC(env, k_level-1, state, goal_state, True) 100 | env.render() 101 | 102 | print("Episode: {}\t Reward: {}\t len: {}\t tmp: {}\t const: {} ".format(i_episode, agent.reward, agent.timestep,env.tmp,env.const)) 103 | 104 | env.close() 105 | 106 | 107 | 108 | if __name__ == '__main__': 109 | test() 110 | -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/test.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import gym 3 | import numpy as np 4 | from HAC import HAC 5 | import asset 6 | 7 | device = torch.device("mps:0" if torch.mps.is_available() else "cpu") 8 | print(device) 9 | 10 | def test(): 11 | 12 | #################### Hyperparameters #################### 13 | env_name ="T0-h-v1" 14 | 15 | save_episode = 20 # keep saving every n episodes 16 | max_episodes = 10 # max num of training episodes 17 | random_seed = 1 18 | render = False 19 | 20 | env = gym.make(env_name) 21 | env.layer_dim = 12 22 | env.n_layers = 16 23 | env.optimizer = 'SGD' 24 | state_dim = env.observation_space.shape[0] 25 | action_dim = env.N_DISCRETE_ACTIONS 26 | 27 | """ 28 | Actions (both primitive and subgoal) are implemented as follows: 29 | action = ( network output (Tanh) * bounds ) + offset 30 | clip_high and clip_low bound the exploration noise 31 | """ 32 | 33 | # primitive action bounds and offset 34 | action_bounds = env.action_space.high[0] 35 | # action_offset = np.array([0.5 for x in range(env.N_DISCRETE_ACTIONS)]) 36 | action_offset = np.array([0.0 for x in range(env.N_DISCRETE_ACTIONS)]) 37 | 38 | action_offset = torch.FloatTensor(action_offset.reshape(1, -1)).to(device) 39 | action_clip_low = np.array([0 for x in range(env.N_DISCRETE_ACTIONS)]) 40 | action_clip_high = np.array([1 for x in range(env.N_DISCRETE_ACTIONS)]) 41 | 42 | # state bounds and offset 43 | # state_bounds_np = np.array([0.5, 0.5e7]) 44 | state_bounds_np = np.array([1, 1e7]) 45 | state_bounds = torch.FloatTensor(state_bounds_np.reshape(1, -1)).to(device) 46 | # state_offset = np.array([0.5, 0.5e7]) 47 | state_offset = np.array([0, 0]) 48 | state_offset = torch.FloatTensor(state_offset.reshape(1, -1)).to(device) 49 | state_clip_low = np.array([0, 0]) 50 | state_clip_high = np.array([1, 1e7]) 51 | 52 | exploration_action_noise = np.array([0.024320378739607497]) 53 | exploration_state_noise = np.array([ 0.14694893372824905, 19361.835172087693]) 54 | 55 | goal_state=np.array([0.68, 20]) 56 | threshold=[0.05, 5] 57 | 58 | # HAC parameters: 59 | k_level = 2 # num of levels in hierarchy 60 | H = 6 # time horizon to achieve subgoal 61 | lamda = 0.4337021542899802 # subgoal testing parameter 62 | 63 | # DDPG parameters: 64 | gamma = 0.9703997234344832 # discount factor for future rewards 65 | n_iter = 148 # update policy n_iter times in one DDPG update 66 | batch_size = 183 # num of transitions sampled from replay buffer 67 | lr = 7.943448987978889e-05 68 | 69 | # save trained models 70 | directory = "./preTrained/{}/{}level/".format(env_name, k_level) 71 | filename = "HAC_{}".format(env_name) 72 | ######################################################### 73 | 74 | if random_seed: 75 | print("Random Seed: {}".format(random_seed)) 76 | env.seed(random_seed) 77 | torch.manual_seed(random_seed) 78 | np.random.seed(random_seed) 79 | 80 | # creating HAC agent and setting parameters 81 | agent = HAC(k_level, H, state_dim, action_dim, render, threshold, 82 | action_bounds, action_offset, state_bounds, state_offset, lr, env.optimizer, env.layer_dim, env.n_layers) 83 | 84 | agent.set_parameters(lamda, gamma, action_clip_low, action_clip_high, 85 | state_clip_low, state_clip_high, exploration_action_noise, exploration_state_noise) 86 | 87 | # load agent 88 | agent.load(directory, filename) 89 | 90 | # Evaluation 91 | # env.M.flag_=False 92 | for i_episode in range(1, max_episodes+1): 93 | 94 | agent.reward = 0 95 | agent.timestep = 0 96 | 97 | 98 | state = env.reset() 99 | agent.run_HAC(env, k_level-1, state, goal_state, True) 100 | env.render() 101 | 102 | print("Episode: {}\t Reward: {}\t len: {}\t tmp: {}\t const: {} ".format(i_episode, agent.reward, agent.timestep,env.tmp,env.const)) 103 | 104 | env.close() 105 | 106 | 107 | 108 | if __name__ == '__main__': 109 | test() 110 | -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/test.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import gym 3 | import numpy as np 4 | from HAC import HAC 5 | import asset 6 | 7 | 8 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 9 | 10 | def test(): 11 | 12 | #################### Hyperparameters #################### 13 | env_name ="T0-h-v1" 14 | 15 | save_episode = 20 # keep saving every n episodes 16 | max_episodes = 10 # max num of training episodes 17 | random_seed = 1 18 | render = False 19 | 20 | env = gym.make(env_name) 21 | env.layer_dim = 12 22 | env.n_layers = 14 23 | env.optimizer = 'RMSprop' 24 | state_dim = env.observation_space.shape[0] 25 | action_dim = env.N_DISCRETE_ACTIONS 26 | 27 | """ 28 | Actions (both primitive and subgoal) are implemented as follows: 29 | action = ( network output (Tanh) * bounds ) + offset 30 | clip_high and clip_low bound the exploration noise 31 | """ 32 | 33 | # primitive action bounds and offset 34 | action_bounds = env.action_space.high[0] 35 | # action_offset = np.array([0.5 for x in range(env.N_DISCRETE_ACTIONS)]) 36 | action_offset = np.array([0.0 for x in range(env.N_DISCRETE_ACTIONS)]) 37 | 38 | action_offset = torch.FloatTensor(action_offset.reshape(1, -1)).to(device) 39 | action_clip_low = np.array([0 for x in range(env.N_DISCRETE_ACTIONS)]) 40 | action_clip_high = np.array([1 for x in range(env.N_DISCRETE_ACTIONS)]) 41 | 42 | # state bounds and offset 43 | # state_bounds_np = np.array([0.5, 0.5e7]) 44 | # state_bounds_np = np.array([1, 1e7]) 45 | # state_bounds = torch.FloatTensor(state_bounds_np.reshape(1, -1)).to(device) 46 | state_bounds = env.observation_space.high[0] 47 | # state_offset = np.array([0.5, 0.5e7]) 48 | state_offset = np.array([0 for x in range(env.N_DISCRETE_ACTIONS)]) 49 | state_offset = torch.FloatTensor(state_offset.reshape(1, -1)).to(device) 50 | state_clip_low = np.array([0 for x in range(env.N_DISCRETE_ACTIONS)]) 51 | state_clip_high = np.array([1 for x in range(env.N_DISCRETE_ACTIONS)]) 52 | 53 | exploration_action_noise = np.array([0.5629988256824885]) 54 | exploration_state_noise = np.array([0.1313567686964759]) 55 | 56 | goal_ = [0.68, 30] 57 | goal_state = np.array(goal_+[0] * (env.N_DISCRETE_ACTIONS - len(goal_))) 58 | # print(goal_state) 59 | threshold = [0.05, 3] 60 | 61 | # HAC parameters: 62 | k_level = 2 # num of levels in hierarchy 63 | H = 9 # time horizon to achieve subgoal 64 | lamda = 0.3453605248576358 # subgoal testing parameter 65 | 66 | # DDPG parameters: 67 | gamma = 0.9777965340075817 # discount factor for future rewards 68 | n_iter = 223 # update policy n_iter times in one DDPG update 69 | batch_size = 340 # num of transitions sampled from replay buffer 70 | lr = 0.04471490153909566 71 | 72 | # save trained models 73 | directory = "./preTrained/{}/{}level/".format(env_name, k_level) 74 | filename = "HAC_{}".format(env_name) 75 | ######################################################### 76 | 77 | if random_seed: 78 | print("Random Seed: {}".format(random_seed)) 79 | env.seed(random_seed) 80 | torch.manual_seed(random_seed) 81 | np.random.seed(random_seed) 82 | 83 | # creating HAC agent and setting parameters 84 | agent = HAC(k_level, H, state_dim, action_dim, render, threshold, 85 | action_bounds, action_offset, state_bounds, state_offset, lr, env.optimizer, env.layer_dim, env.n_layers) 86 | 87 | agent.set_parameters(lamda, gamma, action_clip_low, action_clip_high, 88 | state_clip_low, state_clip_high, exploration_action_noise, exploration_state_noise) 89 | 90 | # load agent 91 | agent.load(directory, filename) 92 | 93 | # Evaluation 94 | # env.M.flag_=False 95 | for i_episode in range(1, max_episodes+1): 96 | 97 | agent.reward = 0 98 | agent.timestep = 0 99 | 100 | 101 | state = env.reset() 102 | agent.run_HAC(env, k_level-1, state, goal_state, True) 103 | env.render() 104 | 105 | print("Episode: {}\t Reward: {}\t len: {}\t tmp: {}\t const: {} ".format(i_episode, agent.reward, agent.timestep,env.tmp,env.const)) 106 | 107 | env.close() 108 | 109 | 110 | 111 | if __name__ == '__main__': 112 | test() 113 | -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/test.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import gym 3 | import numpy as np 4 | from HAC import HAC 5 | import asset 6 | 7 | 8 | # device = torch.device("mps:0" if torch.mps.is_available() else "cpu") 9 | device = torch.device("cpu") 10 | print(device) 11 | 12 | def test(): 13 | 14 | #################### Hyperparameters #################### 15 | env_name ="T0-h-v1" 16 | 17 | save_episode = 20 # keep saving every n episodes 18 | max_episodes = 10 # max num of training episodes 19 | random_seed = 1 20 | render = False 21 | 22 | env = gym.make(env_name) 23 | env.layer_dim = 12 24 | env.n_layers = 14 25 | env.optimizer = 'RMSprop' 26 | state_dim = env.observation_space.shape[0] 27 | action_dim = env.N_DISCRETE_ACTIONS 28 | 29 | """ 30 | Actions (both primitive and subgoal) are implemented as follows: 31 | action = ( network output (Tanh) * bounds ) + offset 32 | clip_high and clip_low bound the exploration noise 33 | """ 34 | 35 | # primitive action bounds and offset 36 | action_bounds = env.action_space.high[0] 37 | # action_offset = np.array([0.5 for x in range(env.N_DISCRETE_ACTIONS)]) 38 | action_offset = np.array([0.0 for x in range(env.N_DISCRETE_ACTIONS)]) 39 | 40 | action_offset = torch.FloatTensor(action_offset.reshape(1, -1)).to(device) 41 | action_clip_low = np.array([0 for x in range(env.N_DISCRETE_ACTIONS)]) 42 | action_clip_high = np.array([1 for x in range(env.N_DISCRETE_ACTIONS)]) 43 | 44 | # state bounds and offset 45 | # state_bounds_np = np.array([0.5, 0.5e7]) 46 | # state_bounds_np = np.array([1, 1e7]) 47 | # state_bounds = torch.FloatTensor(state_bounds_np.reshape(1, -1)).to(device) 48 | state_bounds = env.observation_space.high[0] 49 | # state_offset = np.array([0.5, 0.5e7]) 50 | state_offset = np.array([0 for x in range(env.N_DISCRETE_ACTIONS)]) 51 | state_offset = torch.FloatTensor(state_offset.reshape(1, -1)).to(device) 52 | state_clip_low = np.array([0 for x in range(env.N_DISCRETE_ACTIONS)]) 53 | state_clip_high = np.array([1 for x in range(env.N_DISCRETE_ACTIONS)]) 54 | 55 | exploration_action_noise = np.array([0.5629988256824885]) 56 | exploration_state_noise = np.array([0.1313567686964759]) 57 | 58 | goal_ = [0.68, 30] 59 | goal_state = np.array(goal_+[0] * (env.N_DISCRETE_ACTIONS - len(goal_))) 60 | # print(goal_state) 61 | threshold = [0.05, 3] 62 | 63 | # HAC parameters: 64 | k_level = 2 # num of levels in hierarchy 65 | H = 9 # time horizon to achieve subgoal 66 | lamda = 0.3453605248576358 # subgoal testing parameter 67 | 68 | # DDPG parameters: 69 | gamma = 0.9777965340075817 # discount factor for future rewards 70 | n_iter = 223 # update policy n_iter times in one DDPG update 71 | batch_size = 340 # num of transitions sampled from replay buffer 72 | lr = 0.04471490153909566 73 | 74 | # save trained models 75 | directory = "./preTrained/{}/{}level/".format(env_name, k_level) 76 | filename = "HAC_{}".format(env_name) 77 | ######################################################### 78 | 79 | if random_seed: 80 | print("Random Seed: {}".format(random_seed)) 81 | env.seed(random_seed) 82 | torch.manual_seed(random_seed) 83 | np.random.seed(random_seed) 84 | 85 | # creating HAC agent and setting parameters 86 | agent = HAC(k_level, H, state_dim, action_dim, render, threshold, 87 | action_bounds, action_offset, state_bounds, state_offset, lr, env.optimizer, env.layer_dim, env.n_layers) 88 | 89 | agent.set_parameters(lamda, gamma, action_clip_low, action_clip_high, 90 | state_clip_low, state_clip_high, exploration_action_noise, exploration_state_noise) 91 | 92 | # load agent 93 | agent.load(directory, filename) 94 | 95 | # Evaluation 96 | # env.M.flag_=False 97 | for i_episode in range(1, max_episodes+1): 98 | 99 | agent.reward = 0 100 | agent.timestep = 0 101 | 102 | state = env.reset() 103 | agent.run_HAC(env, k_level-1, state, goal_state, True) 104 | env.render() 105 | 106 | print("Episode: {}\t Reward: {}\t len: {}\t tmp: {}\t const: {} ".format(i_episode, agent.reward, agent.timestep,env.tmp,env.const)) 107 | 108 | env.close() 109 | 110 | 111 | 112 | if __name__ == '__main__': 113 | test() 114 | -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL/train.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import gym 3 | import numpy as np 4 | from HAC import HAC 5 | import asset 6 | 7 | 8 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 9 | 10 | def train(): 11 | #################### Hyperparameters #################### 12 | env_name ="T0-h-v1" 13 | 14 | save_episode = 100 # keep saving every n episodes 15 | max_episodes = 5_000 # max num of training episodes 16 | random_seed = 1 17 | render = False 18 | 19 | env = gym.make(env_name) 20 | env.layer_dim= 12 21 | env.n_layers= 16 22 | env.optimizer='SGD' 23 | state_dim = env.observation_space.shape[0] 24 | action_dim = env.N_DISCRETE_ACTIONS 25 | 26 | """ 27 | Actions (both primitive and subgoal) are implemented as follows: 28 | action = ( network output (Tanh) * bounds ) + offset 29 | clip_high and clip_low bound the exploration noise 30 | """ 31 | 32 | # primitive action bounds and offset 33 | action_bounds = env.action_space.high[0] 34 | # action_offset = np.array([0.5 for x in range(env.N_DISCRETE_ACTIONS)]) 35 | action_offset = np.array([0.0 for x in range(env.N_DISCRETE_ACTIONS)]) 36 | 37 | action_offset = torch.FloatTensor(action_offset.reshape(1, -1)).to(device) 38 | action_clip_low = np.array([0 for x in range(env.N_DISCRETE_ACTIONS)]) 39 | action_clip_high = np.array([1 for x in range(env.N_DISCRETE_ACTIONS)]) 40 | 41 | # state bounds and offset 42 | # state_bounds_np = np.array([0.5, 0.5e7]) 43 | state_bounds_np = np.array([1, 1e7]) 44 | state_bounds = torch.FloatTensor(state_bounds_np.reshape(1, -1)).to(device) 45 | # state_offset = np.array([0.5, 0.5e7]) 46 | state_offset = np.array([0, 0]) 47 | state_offset = torch.FloatTensor(state_offset.reshape(1, -1)).to(device) 48 | state_clip_low = np.array([0, 0]) 49 | state_clip_high = np.array([1, 1e7]) 50 | 51 | exploration_action_noise = np.array([0.024320378739607497]) 52 | exploration_state_noise = np.array([ 0.14694893372824905, 19361.835172087693]) 53 | 54 | goal_state=np.array([0.68, 20]) 55 | threshold=[0.05, 5] 56 | 57 | # HAC parameters: 58 | k_level = 2 # num of levels in hierarchy 59 | H = 6 # time horizon to achieve subgoal 60 | lamda = 0.4337021542899802 # subgoal testing parameter 61 | 62 | # DDPG parameters: 63 | gamma = 0.9703997234344832 # discount factor for future rewards 64 | n_iter = 148 # update policy n_iter times in one DDPG update 65 | batch_size = 183 # num of transitions sampled from replay buffer 66 | lr = 7.943448987978889e-05 67 | 68 | # save trained models 69 | directory = "./preTrained/{}/{}level/".format(env_name, k_level) 70 | filename = "HAC_{}".format(env_name) 71 | ######################################################### 72 | 73 | 74 | if random_seed: 75 | print("Random Seed: {}".format(random_seed)) 76 | env.seed(random_seed) 77 | torch.manual_seed(random_seed) 78 | np.random.seed(random_seed) 79 | 80 | # creating HAC agent and setting parameters 81 | agent = HAC(k_level, H, state_dim, action_dim, render, threshold, 82 | action_bounds, action_offset, state_bounds, state_offset, lr, env.optimizer, env.layer_dim, env.n_layers) 83 | 84 | agent.set_parameters(lamda, gamma, action_clip_low, action_clip_high, 85 | state_clip_low, state_clip_high, exploration_action_noise, exploration_state_noise) 86 | 87 | # logging file: 88 | log_f = open("log.txt","w+") 89 | 90 | # training procedure 91 | R=0 92 | for i_episode in range(1, max_episodes+1): 93 | agent.reward = 0 94 | agent.timestep = 0 95 | 96 | state = env.reset() 97 | # collecting experience in environment 98 | last_state, done = agent.run_HAC(env, k_level-1, state, goal_state, False) 99 | 100 | if agent.check_goal(last_state, goal_state, threshold, env): 101 | print("################ Solved! ################ ") 102 | name = filename + '_solved' 103 | agent.save(directory, name) 104 | 105 | # update all levels 106 | agent.update(n_iter, batch_size, env) 107 | 108 | # logging updates: 109 | log_f.write('{},{}\n'.format(i_episode, agent.reward)) 110 | log_f.flush() 111 | 112 | if i_episode % save_episode == 0: 113 | # if agent.reward>R: 114 | R=agent.reward 115 | agent.save(directory, filename) 116 | print('SAVING ################# SAVING ################## SAVING:',R) 117 | 118 | print("Episode: {}\t Reward: {}".format(i_episode, agent.reward)) 119 | 120 | 121 | if __name__ == '__main__': 122 | train() 123 | -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/train.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import gym 3 | import numpy as np 4 | from HAC import HAC 5 | import asset 6 | 7 | 8 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 9 | 10 | def train(): 11 | #################### Hyperparameters #################### 12 | env_name ="T0-h-v1" 13 | 14 | save_episode = 10 # keep saving every n episodes 15 | max_episodes = 100 # max num of training episodes 16 | random_seed = 42 17 | render = False 18 | 19 | env = gym.make(env_name) 20 | env.layer_dim= 126 21 | env.n_layers= 90 22 | env.optimizer='Adam' 23 | state_dim = env.observation_space.shape[0] 24 | action_dim = env.N_DISCRETE_ACTIONS 25 | 26 | """ 27 | Actions (both primitive and subgoal) are implemented as follows: 28 | action = ( network output (Tanh) * bounds ) + offset 29 | clip_high and clip_low bound the exploration noise 30 | """ 31 | 32 | # primitive action bounds and offset 33 | action_bounds = env.action_space.high[0] 34 | # action_offset = np.array([0.5 for x in range(env.N_DISCRETE_ACTIONS)]) 35 | action_offset = np.array([0.0 for x in range(env.N_DISCRETE_ACTIONS)]) 36 | 37 | action_offset = torch.FloatTensor(action_offset.reshape(1, -1)).to(device) 38 | action_clip_low = np.array([0 for x in range(env.N_DISCRETE_ACTIONS)]) 39 | action_clip_high = np.array([1 for x in range(env.N_DISCRETE_ACTIONS)]) 40 | 41 | # state bounds and offset 42 | # state_bounds_np = np.array([0.5, 0.5e7]) 43 | state_bounds_np = np.array([1, 1e7]) 44 | state_bounds = torch.FloatTensor(state_bounds_np.reshape(1, -1)).to(device) 45 | # state_offset = np.array([0.5, 0.5e7]) 46 | state_offset = np.array([0, 0]) 47 | state_offset = torch.FloatTensor(state_offset.reshape(1, -1)).to(device) 48 | state_clip_low = np.array([0, 0]) 49 | state_clip_high = np.array([1, 1e7]) 50 | 51 | exploration_action_noise = np.array([ 0.013524454522609227]) 52 | exploration_state_noise = np.array([0.2065791657801734, 4371.871955300335]) 53 | 54 | goal_state=np.array([0.68, 23]) 55 | threshold=[0.05, 3] 56 | 57 | # HAC parameters: 58 | k_level = 2 # num of levels in hierarchy 59 | H = 8 # time horizon to achieve subgoal 60 | lamda = 0.9759336249447662 # subgoal testing parameter 61 | 62 | # DDPG parameters: 63 | gamma = 0.9845965064662501 # discount factor for future rewards 64 | n_iter = 100 # update policy n_iter times in one DDPG update 65 | batch_size = 100 # num of transitions sampled from replay buffer 66 | lr = 0.061703036438267876 67 | 68 | # save trained models 69 | directory = "./preTrained/{}/{}level/".format(env_name, k_level) 70 | filename = "HAC_{}".format(env_name) 71 | ######################################################### 72 | 73 | 74 | if random_seed: 75 | print("Random Seed: {}".format(random_seed)) 76 | env.seed(random_seed) 77 | torch.manual_seed(random_seed) 78 | np.random.seed(random_seed) 79 | 80 | # creating HAC agent and setting parameters 81 | agent = HAC(k_level, H, state_dim, action_dim, render, threshold, 82 | action_bounds, action_offset, state_bounds, state_offset, lr, env.optimizer, env.layer_dim, env.n_layers) 83 | 84 | agent.set_parameters(lamda, gamma, action_clip_low, action_clip_high, 85 | state_clip_low, state_clip_high, exploration_action_noise, exploration_state_noise) 86 | 87 | # logging file: 88 | log_f = open("log.txt","w+") 89 | 90 | # training procedure 91 | R=0 92 | for i_episode in range(1, max_episodes+1): 93 | agent.reward = 0 94 | agent.timestep = 0 95 | 96 | state = env.reset() 97 | # collecting experience in environment 98 | last_state, done = agent.run_HAC(env, k_level-1, state, goal_state, False) 99 | 100 | if agent.check_goal(last_state, goal_state, threshold, env): 101 | print("################ Solved! ################ ") 102 | name = filename + '_solved' 103 | agent.save(directory, name) 104 | 105 | # update all levels 106 | agent.update(n_iter, batch_size, env) 107 | 108 | # logging updates: 109 | log_f.write('{},{}\n'.format(i_episode, agent.reward)) 110 | log_f.flush() 111 | 112 | if i_episode % save_episode == 0: 113 | # if agent.reward>R: 114 | R=agent.reward 115 | agent.save(directory, filename) 116 | print('SAVING ################# SAVING ################## SAVING:',R) 117 | 118 | print("Episode: {}\t Reward: {}".format(i_episode, agent.reward)) 119 | 120 | 121 | if __name__ == '__main__': 122 | train() 123 | -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/torchfem/materials.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from math import sqrt 3 | from typing import Callable 4 | 5 | import torch 6 | from torch import Tensor 7 | 8 | 9 | class Material(ABC): 10 | """Base class for material models.""" 11 | 12 | @abstractmethod 13 | def __init__(self): 14 | self.n_state: int 15 | self.is_vectorized: bool 16 | self.C: Tensor 17 | pass 18 | 19 | @abstractmethod 20 | def vectorize(self, n_elem: int): 21 | """Create a vectorized copy of the material for `n_elm` elements.""" 22 | pass 23 | 24 | @abstractmethod 25 | def step(self, depsilon: Tensor, epsilon: Tensor, sigma: Tensor, state: Tensor): 26 | """Perform a strain increment.""" 27 | pass 28 | 29 | @abstractmethod 30 | def rotate(self, R): 31 | """Rotate the material with rotation matrix R.""" 32 | pass 33 | 34 | 35 | class IsotropicElasticity3D(Material): 36 | def __init__( 37 | self, 38 | E: "float | Tensor", 39 | nu: "float | Tenso", 40 | eps0: "float | Tenso" = 0.0, 41 | ): 42 | # Convert float inputs to tensors 43 | if isinstance(E, float): 44 | E = torch.tensor(E) 45 | if isinstance(nu, float): 46 | nu = torch.tensor(nu) 47 | if isinstance(eps0, float): 48 | eps0 = torch.tensor(eps0) 49 | 50 | # Store material properties 51 | self.E = E 52 | self.nu = nu 53 | self.eps0 = eps0 54 | 55 | # There are no internal variables 56 | self.n_state = 0 57 | 58 | # Check if the material is vectorized 59 | if E.dim() > 0: 60 | self.is_vectorized = True 61 | else: 62 | self.is_vectorized = False 63 | 64 | # Lame parameters 65 | self.lbd = self.E * self.nu / ((1.0 + self.nu) * (1.0 - 2.0 * self.nu)) 66 | self.G = self.E / (2.0 * (1.0 + self.nu)) 67 | 68 | # Stiffness tensor 69 | z = torch.zeros_like(self.E) 70 | diag = self.lbd + 2.0 * self.G 71 | self.C = torch.stack( 72 | [ 73 | torch.stack([diag, self.lbd, self.lbd, z, z, z], dim=-1), 74 | torch.stack([self.lbd, diag, self.lbd, z, z, z], dim=-1), 75 | torch.stack([self.lbd, self.lbd, diag, z, z, z], dim=-1), 76 | torch.stack([z, z, z, self.G, z, z], dim=-1), 77 | torch.stack([z, z, z, z, self.G, z], dim=-1), 78 | torch.stack([z, z, z, z, z, self.G], dim=-1), 79 | ], 80 | dim=-1, 81 | ) 82 | 83 | # Stiffness tensor for shells 84 | self.Cs = torch.stack( 85 | [torch.stack([self.G, z], dim=-1), torch.stack([z, self.G], dim=-1)], dim=-1 86 | ) 87 | 88 | def vectorize(self, n_elem: int): 89 | """Create a vectorized copy of the material for `n_elm` elements.""" 90 | if self.is_vectorized: 91 | print("Material is already vectorized.") 92 | return self 93 | else: 94 | E = self.E.repeat(n_elem) 95 | nu = self.nu.repeat(n_elem) 96 | eps0 = self.eps0.repeat(n_elem) 97 | return IsotropicElasticity3D(E, nu, eps0) 98 | 99 | def step(self, depsilon: Tensor, epsilon: Tensor, sigma: Tensor, state: Tensor): 100 | """Perform a strain increment.""" 101 | epsilon_new = epsilon + depsilon 102 | sigma_new = sigma + torch.einsum("...ij,...j->...i", self.C, depsilon) 103 | state_new = state 104 | ddsdde = self.C 105 | return epsilon_new, sigma_new, state_new, ddsdde 106 | 107 | def rotate(self, R: Tensor): 108 | """Rotate the material with rotation matrix R.""" 109 | print("Rotating an isotropic material has no effect.") 110 | return self 111 | 112 | 113 | class IsotropicElasticityPlaneStress(IsotropicElasticity3D): 114 | """Isotropic 2D plane stress material.""" 115 | 116 | def __init__(self, E: "float | Tensor", nu: "float | Tensor"): 117 | super().__init__(E, nu) 118 | 119 | # Overwrite the 3D stiffness tensor with a 2D plane stress tensor 120 | fac = self.E / (1.0 - self.nu**2) 121 | zero = torch.zeros_like(self.E) 122 | self.C = torch.stack( 123 | [ 124 | torch.stack([fac, fac * self.nu, zero], dim=-1), 125 | torch.stack([fac * self.nu, fac, zero], dim=-1), 126 | torch.stack([zero, zero, fac * 0.5 * (1.0 - self.nu)], dim=-1), 127 | ], 128 | dim=-1, 129 | ) 130 | 131 | def vectorize(self, n_elem: int): 132 | """Create a vectorized copy of the material for `n_elm` elements.""" 133 | if self.is_vectorized: 134 | print("Material is already vectorized.") 135 | return self 136 | else: 137 | E = self.E.repeat(n_elem) 138 | nu = self.nu.repeat(n_elem) 139 | return IsotropicElasticityPlaneStress(E, nu) 140 | -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/train.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import gym 3 | import numpy as np 4 | from HAC import HAC 5 | import asset 6 | 7 | 8 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 9 | 10 | def train(): 11 | #################### Hyperparameters #################### 12 | env_name ="T0-h-v1" 13 | 14 | save_episode = 100 # keep saving every n episodes 15 | max_episodes = 100_000 # max num of training episodes 16 | random_seed = 1 17 | render = False 18 | 19 | env = gym.make(env_name) 20 | env.layer_dim= 12 21 | env.n_layers= 14 22 | env.optimizer='RMSprop' 23 | state_dim = env.observation_space.shape[0] 24 | action_dim = env.N_DISCRETE_ACTIONS 25 | 26 | """ 27 | Actions (both primitive and subgoal) are implemented as follows: 28 | action = ( network output (Tanh) * bounds ) + offset 29 | clip_high and clip_low bound the exploration noise 30 | """ 31 | 32 | # primitive action bounds and offset 33 | action_bounds = env.action_space.high[0] 34 | # action_offset = np.array([0.5 for x in range(env.N_DISCRETE_ACTIONS)]) 35 | action_offset = np.array([0.0 for x in range(env.N_DISCRETE_ACTIONS)]) 36 | 37 | action_offset = torch.FloatTensor(action_offset.reshape(1, -1)).to(device) 38 | action_clip_low = np.array([0 for x in range(env.N_DISCRETE_ACTIONS)]) 39 | action_clip_high = np.array([1 for x in range(env.N_DISCRETE_ACTIONS)]) 40 | 41 | # state bounds and offset 42 | # state_bounds_np = np.array([0.5, 0.5e7]) 43 | # state_bounds_np = np.array([1, 1e7]) 44 | # state_bounds = torch.FloatTensor(state_bounds_np.reshape(1, -1)).to(device) 45 | state_bounds = env.observation_space.high[0] 46 | # state_offset = np.array([0.5, 0.5e7]) 47 | state_offset = np.array([0 for x in range(env.N_DISCRETE_ACTIONS)]) 48 | state_offset = torch.FloatTensor(state_offset.reshape(1, -1)).to(device) 49 | state_clip_low = np.array([0 for x in range(env.N_DISCRETE_ACTIONS)]) 50 | state_clip_high = np.array([1 for x in range(env.N_DISCRETE_ACTIONS)]) 51 | 52 | exploration_action_noise = np.array([0.5629988256824885]) 53 | exploration_state_noise = np.array([0.1313567686964759]) 54 | 55 | goal_ = [0.68, 30] 56 | goal_state = np.array(goal_ + [0] * (env.N_DISCRETE_ACTIONS - len(goal_))) 57 | threshold = [0.05, 3] 58 | 59 | # HAC parameters: 60 | k_level = 2 # num of levels in hierarchy 61 | H = 9 # time horizon to achieve subgoal 62 | lamda = 0.3453605248576358 # subgoal testing parameter 63 | 64 | # DDPG parameters: 65 | gamma = 0.9777965340075817 # discount factor for future rewards 66 | n_iter = 223 # update policy n_iter times in one DDPG update 67 | batch_size = 340 # num of transitions sampled from replay buffer 68 | lr = 0.04471490153909566 69 | 70 | # save trained models 71 | directory = "./preTrained/{}/{}level/".format(env_name, k_level) 72 | filename = "HAC_{}".format(env_name) 73 | ######################################################### 74 | 75 | if random_seed: 76 | print("Random Seed: {}".format(random_seed)) 77 | env.seed(random_seed) 78 | torch.manual_seed(random_seed) 79 | np.random.seed(random_seed) 80 | 81 | # creating HAC agent and setting parameters 82 | agent = HAC(k_level, H, state_dim, action_dim, render, threshold, 83 | action_bounds, action_offset, state_bounds, state_offset, lr, env.optimizer, env.layer_dim, env.n_layers) 84 | 85 | agent.set_parameters(lamda, gamma, action_clip_low, action_clip_high, 86 | state_clip_low, state_clip_high, exploration_action_noise, exploration_state_noise) 87 | 88 | # logging file: 89 | log_f = open("log.txt","w+") 90 | 91 | # training procedure 92 | R=0 93 | for i_episode in range(1, max_episodes+1): 94 | agent.reward = 0 95 | agent.timestep = 0 96 | 97 | state = env.reset() 98 | # collecting experience in environment 99 | last_state, done = agent.run_HAC(env, k_level-1, state, goal_state, False) 100 | 101 | if agent.check_goal(last_state, goal_state, threshold, env): 102 | print("################ Solved! ################ ") 103 | name = filename + '_solved' 104 | agent.save(directory, name) 105 | 106 | # update all levels 107 | agent.update(n_iter, batch_size, env) 108 | 109 | # logging updates: 110 | log_f.write('{},{}\n'.format(i_episode, agent.reward)) 111 | log_f.flush() 112 | R += agent.reward 113 | if i_episode % save_episode == 0: 114 | agent.save(directory, filename) 115 | print('SAVING ################# SAVING ################## SAVING:', R/save_episode) 116 | R = 0 117 | 118 | print("Episode: {}\t Reward: {}".format(i_episode, agent.reward)) 119 | 120 | 121 | if __name__ == '__main__': 122 | train() 123 | -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/train.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import gym 3 | import numpy as np 4 | from HAC import HAC 5 | import asset 6 | 7 | 8 | # device = torch.device("mps:0" if torch.mps.is_available() else "cpu") 9 | device = torch.device("cpu") 10 | print(device) 11 | 12 | def train(): 13 | #################### Hyperparameters #################### 14 | env_name ="T0-h-v1" 15 | 16 | save_episode = 100 # keep saving every n episodes 17 | max_episodes = 100_000 # max num of training episodes 18 | random_seed = 1 19 | render = False 20 | 21 | env = gym.make(env_name) 22 | env.layer_dim= 12 23 | env.n_layers= 14 24 | env.optimizer='RMSprop' 25 | state_dim = env.observation_space.shape[0] 26 | action_dim = env.N_DISCRETE_ACTIONS 27 | 28 | """ 29 | Actions (both primitive and subgoal) are implemented as follows: 30 | action = ( network output (Tanh) * bounds ) + offset 31 | clip_high and clip_low bound the exploration noise 32 | """ 33 | 34 | # primitive action bounds and offset 35 | action_bounds = env.action_space.high[0] 36 | # action_offset = np.array([0.5 for x in range(env.N_DISCRETE_ACTIONS)]) 37 | action_offset = np.array([0.0 for x in range(env.N_DISCRETE_ACTIONS)]) 38 | 39 | action_offset = torch.FloatTensor(action_offset.reshape(1, -1)).to(device) 40 | action_clip_low = np.array([0 for x in range(env.N_DISCRETE_ACTIONS)]) 41 | action_clip_high = np.array([1 for x in range(env.N_DISCRETE_ACTIONS)]) 42 | 43 | # state bounds and offset 44 | # state_bounds_np = np.array([0.5, 0.5e7]) 45 | # state_bounds_np = np.array([1, 1e7]) 46 | # state_bounds = torch.FloatTensor(state_bounds_np.reshape(1, -1)).to(device) 47 | state_bounds = env.observation_space.high[0] 48 | # state_offset = np.array([0.5, 0.5e7]) 49 | state_offset = np.array([0 for x in range(env.N_DISCRETE_ACTIONS)]) 50 | state_offset = torch.FloatTensor(state_offset.reshape(1, -1)).to(device) 51 | state_clip_low = np.array([0 for x in range(env.N_DISCRETE_ACTIONS)]) 52 | state_clip_high = np.array([1 for x in range(env.N_DISCRETE_ACTIONS)]) 53 | 54 | exploration_action_noise = np.array([0.5629988256824885]) 55 | exploration_state_noise = np.array([0.1313567686964759]) 56 | 57 | goal_ = [0.68, 30] 58 | goal_state = np.array(goal_ + [0] * (env.N_DISCRETE_ACTIONS - len(goal_))) 59 | threshold = [0.05, 3] 60 | 61 | # HAC parameters: 62 | k_level = 2 # num of levels in hierarchy 63 | H = 9 # time horizon to achieve subgoal 64 | lamda = 0.3453605248576358 # subgoal testing parameter 65 | 66 | # DDPG parameters: 67 | gamma = 0.9777965340075817 # discount factor for future rewards 68 | n_iter = 223 # update policy n_iter times in one DDPG update 69 | batch_size = 340 # num of transitions sampled from replay buffer 70 | lr = 0.04471490153909566 71 | 72 | # save trained models 73 | directory = "./preTrained/{}/{}level/".format(env_name, k_level) 74 | filename = "HAC_{}".format(env_name) 75 | ######################################################### 76 | 77 | if random_seed: 78 | print("Random Seed: {}".format(random_seed)) 79 | env.seed(random_seed) 80 | torch.manual_seed(random_seed) 81 | np.random.seed(random_seed) 82 | 83 | # creating HAC agent and setting parameters 84 | agent = HAC(k_level, H, state_dim, action_dim, render, threshold, 85 | action_bounds, action_offset, state_bounds, state_offset, lr, env.optimizer, env.layer_dim, env.n_layers) 86 | 87 | agent.set_parameters(lamda, gamma, action_clip_low, action_clip_high, 88 | state_clip_low, state_clip_high, exploration_action_noise, exploration_state_noise) 89 | 90 | # logging file: 91 | log_f = open("log.txt","w+") 92 | 93 | # training procedure 94 | R=0 95 | for i_episode in range(1, max_episodes+1): 96 | agent.reward = 0 97 | agent.timestep = 0 98 | 99 | state = env.reset() 100 | # collecting experience in environment 101 | last_state, done = agent.run_HAC(env, k_level-1, state, goal_state, False) 102 | 103 | if agent.check_goal(last_state, goal_state, threshold, env): 104 | print("################ Solved! ################ ") 105 | name = filename + '_solved' 106 | agent.save(directory, name) 107 | 108 | # update all levels 109 | agent.update(n_iter, batch_size, env) 110 | 111 | # logging updates: 112 | log_f.write('{},{}\n'.format(i_episode, agent.reward)) 113 | log_f.flush() 114 | R += agent.reward 115 | if i_episode % save_episode == 0: 116 | agent.save(directory, filename) 117 | print('SAVING ################# SAVING ################## SAVING:', R/save_episode) 118 | R = 0 119 | 120 | print("Episode: {}\t Reward: {}".format(i_episode, agent.reward)) 121 | 122 | 123 | if __name__ == '__main__': 124 | train() 125 | -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/train.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import gym 3 | import numpy as np 4 | from HAC import HAC 5 | import asset 6 | 7 | # if torch.backends.mps.is_available(): 8 | # mps_device = torch.device("mps:0") 9 | # x = torch.ones(1).type(torch.float32).to(mps_device) 10 | # print(x) 11 | # else: 12 | # print ("MPS device not found.") 13 | 14 | 15 | device = torch.device("mps:0" if torch.mps.is_available() else "cpu") 16 | print(device) 17 | 18 | 19 | def train(): 20 | #################### Hyperparameters #################### 21 | env_name ="T0-h-v1" 22 | 23 | save_episode = 100 # keep saving every n episodes 24 | max_episodes = 50_000 # max num of training episodes # new line - reversed 25 | random_seed = 1 26 | render = False 27 | 28 | env = gym.make(env_name) 29 | env.layer_dim= 12 30 | env.n_layers= 16 31 | env.optimizer='SGD' 32 | state_dim = env.observation_space.shape[0] 33 | action_dim = env.N_DISCRETE_ACTIONS 34 | 35 | """ 36 | Actions (both primitive and subgoal) are implemented as follows: 37 | action = ( network output (Tanh) * bounds ) + offset 38 | clip_high and clip_low bound the exploration noise 39 | """ 40 | 41 | # primitive action bounds and offset 42 | action_bounds = env.action_space.high[0] 43 | # action_offset = np.array([0.5 for x in range(env.N_DISCRETE_ACTIONS)]) 44 | action_offset = np.array([0.0 for x in range(env.N_DISCRETE_ACTIONS)]) 45 | 46 | action_offset = torch.FloatTensor(action_offset.reshape(1, -1)).to(device) 47 | action_clip_low = np.array([0 for x in range(env.N_DISCRETE_ACTIONS)]) 48 | action_clip_high = np.array([1 for x in range(env.N_DISCRETE_ACTIONS)]) 49 | 50 | # state bounds and offset 51 | # state_bounds_np = np.array([0.5, 0.5e7]) 52 | state_bounds_np = np.array([1, 1e7]) 53 | state_bounds = torch.FloatTensor(state_bounds_np.reshape(1, -1)).to(device) 54 | # state_offset = np.array([0.5, 0.5e7]) 55 | state_offset = np.array([0, 0]) 56 | state_offset = torch.FloatTensor(state_offset.reshape(1, -1)).to(device) 57 | state_clip_low = np.array([0, 0]) 58 | state_clip_high = np.array([1, 1e7]) 59 | 60 | exploration_action_noise = np.array([0.024320378739607497]) 61 | exploration_state_noise = np.array([ 0.14694893372824905, 19361.835172087693]) 62 | 63 | goal_state = np.array([0.68, 20]) 64 | threshold = [0.05, 5] 65 | 66 | # HAC parameters: 67 | k_level = 2 # num of levels in hierarchy 68 | H = 6 # time horizon to achieve subgoal 69 | lamda = 0.4337021542899802 # subgoal testing parameter 70 | 71 | # DDPG parameters: 72 | gamma = 0.9703997234344832 # discount factor for future rewards 73 | n_iter = 148 # update policy n_iter times in one DDPG update 74 | batch_size = 10000 # num of transitions sampled from replay buffer 75 | lr = 7.943448987978889e-05 76 | 77 | # save trained models 78 | directory = "./preTrained/{}/{}level/".format(env_name, k_level) 79 | filename = "HAC_{}".format(env_name) 80 | ######################################################### 81 | 82 | 83 | if random_seed: 84 | print("Random Seed: {}".format(random_seed)) 85 | env.seed(random_seed) 86 | torch.manual_seed(random_seed) 87 | np.random.seed(random_seed) 88 | 89 | # creating HAC agent and setting parameters 90 | agent = HAC(k_level, H, state_dim, action_dim, render, threshold, 91 | action_bounds, action_offset, state_bounds, state_offset, lr, env.optimizer, env.layer_dim, env.n_layers) 92 | 93 | agent.set_parameters(lamda, gamma, action_clip_low, action_clip_high, 94 | state_clip_low, state_clip_high, exploration_action_noise, exploration_state_noise) 95 | 96 | # logging file: 97 | log_f = open("log.txt","w+") 98 | 99 | # training procedure 100 | R=0 101 | for i_episode in range(1, max_episodes+1): 102 | agent.reward = 0 103 | agent.timestep = 0 104 | 105 | state = env.reset() 106 | # collecting experience in environment 107 | last_state, done = agent.run_HAC(env, k_level-1, state, goal_state, False) 108 | 109 | if agent.check_goal(last_state, goal_state, threshold, env): 110 | print("################ Solved! ################ ") 111 | name = filename + '_solved' 112 | agent.save(directory, name) 113 | 114 | # update all levels 115 | agent.update(n_iter, batch_size, env) 116 | 117 | # logging updates: 118 | log_f.write('{},{}\n'.format(i_episode, agent.reward)) 119 | log_f.flush() 120 | 121 | if i_episode % save_episode == 0: 122 | # if agent.reward>R: 123 | R=agent.reward 124 | agent.save(directory, filename) 125 | print('SAVING ################# SAVING ################## SAVING:',R) 126 | 127 | print("Episode: {}\t Reward: {}".format(i_episode, agent.reward)) 128 | 129 | 130 | if __name__ == '__main__': 131 | train() 132 | -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL/asset/topology_optimization.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | from utils import * 3 | import gym 4 | from gym import spaces 5 | import random 6 | import numpy as np 7 | import autograd.numpy as anp 8 | from gym.utils import seeding 9 | 10 | 11 | class Model: 12 | def __init__(self, x): 13 | self.flag_ = True 14 | # self.flag_ = False 15 | self.n, self.m = x.shape 16 | self.actions_dic={} 17 | 18 | k=0 19 | for i in range(self.n): 20 | for j in range(self.m): 21 | self.actions_dic[k]=(i,j) 22 | k+=1 23 | 24 | def action_space_(self, action, X): 25 | x,y=self.actions_dic[action] 26 | # penalty=(X[x][y]==1) 27 | X[x][y]=1 28 | # if penalty: 29 | # return 1e-7 30 | # return 0 31 | 32 | def draw(self,X): 33 | plt.figure(dpi=50) 34 | print('\nFinal Cantilever rl_beam design:') 35 | plt.imshow(X) 36 | plt.show(block=False) 37 | plt.pause(3) 38 | plt.close('all') 39 | 40 | 41 | class CantileverEnv(gym.Env): 42 | 43 | metadata = {"render.modes": ["human"], 44 | # 'video.frames_per_second' : 30 45 | } 46 | 47 | def __init__(self): 48 | super().__init__() 49 | 50 | 51 | self.rd=0 52 | self.args = get_args(*mbb_beam(rd=self.rd)) 53 | 54 | DIM=self.args.nelx*self.args.nely+(self.args.nelx+1)*(self.args.nely+1)*2 55 | self.N_DISCRETE_ACTIONS=self.args.nelx*self.args.nely 56 | 57 | # self.action_space = spaces.Discrete(self.N_DISCRETE_ACTIONS) 58 | 59 | self.action_space = spaces.Box(low=0, high=1, 60 | shape=(self.N_DISCRETE_ACTIONS,), dtype=np.float64) 61 | 62 | self.low_state=np.array([0, 0]) 63 | self.high_state=np.array([1, 1e7]) 64 | 65 | self.observation_space = spaces.Box(low=self.low_state, high=self.high_state, 66 | dtype=np.float64) 67 | 68 | 69 | self.x = anp.ones((self.args.nely, self.args.nelx))*self.args.density 70 | 71 | self.M=Model(self.x) 72 | 73 | self.reward=0 74 | self.step_=0 75 | self.needs_reset = True 76 | self.y=np.array([1e-4, 1e7]) 77 | self.layer_dim=4 78 | self.n_layers=2 79 | self.optimizer='Adam' 80 | self.seed() 81 | 82 | def seed(self, seed=None): 83 | self.np_random, seed = seeding.np_random(seed) 84 | return [seed] 85 | 86 | def step(self, action): 87 | 88 | # action=action*(1-self.x.reshape(len(action),)+1e-4) 89 | # when altering boundary conditions and forces, do not change action values in those cells 90 | 91 | # to give the agent an ability to do the same actions 92 | self.penalty_coeff= 0.3 93 | action=action*(1-self.penalty_coeff*self.x.reshape(len(action),)) 94 | 95 | self.args = get_args(*mbb_beam(rd=self.rd)) 96 | 97 | # print(action) 98 | act=np.argmax(action) 99 | 100 | 101 | self.M.action_space_(act, self.x) 102 | 103 | self.tmp, self.const = fast_stopt(self.args, self.x) 104 | self.step_+=1 105 | 106 | # self.reward = (1/self.tmp)**2 if self.const <0.7 else (1/self.tmp)**2-(self.const-0.7) 107 | self.reward = (1/self.tmp)**0.5 108 | 109 | # self.reward=(1/self.tmp+self.const**2)**0.5 110 | # self.reward=(self.const/self.tmp)**0.5 111 | 112 | # self.reward += (1/self.tmp)**2 113 | # self.reward =(1/self.tmp)**2 - penalty 114 | # self.reward =-(self.tmp)**0.1*1e-4 + self.const*1e-2 if self.const<0.75 else -(self.tmp)**0.1*1e-4 - self.const*1e-2 115 | 116 | done=False 117 | 118 | if self.const>0.68: 119 | # self.reward-=1 120 | done=True 121 | 122 | # if self.const>0.65 and 100 self.M.n*self.M.m: 127 | done = True 128 | 129 | if self.needs_reset: 130 | raise RuntimeError("Tried to step environment that needs reset") 131 | 132 | 133 | if done: 134 | self.needs_reset = True 135 | 136 | return np.array([self.const,self.tmp]), self.reward, done, dict() 137 | 138 | def reset(self): 139 | 140 | if not self.M.flag_: 141 | self.rd=random.choice([0,2,-2]) 142 | else: 143 | self.rd=-1 144 | 145 | self.x = anp.ones((self.args.nely, self.args.nelx))*self.args.density 146 | 147 | self.reward=0 148 | self.needs_reset = False 149 | self.step_=0 150 | 151 | self.y=np.array([1e-4, 1e7]) 152 | return self.y 153 | 154 | 155 | def render(self, mode="human"): 156 | self.M.draw(self.x) 157 | 158 | def close(self): 159 | pass 160 | -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/asset/topology_optimization.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | from utils import * 3 | import gym 4 | from gym import spaces 5 | import random 6 | import numpy as np 7 | import autograd.numpy as anp 8 | from gym.utils import seeding 9 | 10 | 11 | class Model: 12 | def __init__(self, x): 13 | self.flag_ = True 14 | # self.flag_ = False 15 | self.n, self.m = x.shape 16 | self.actions_dic={} 17 | 18 | k=0 19 | for i in range(self.n): 20 | for j in range(self.m): 21 | self.actions_dic[k]=(i,j) 22 | k+=1 23 | 24 | def action_space_(self, action, X): 25 | x,y=self.actions_dic[action] 26 | # penalty=(X[x][y]==1) 27 | X[x][y]=1 28 | # if penalty: 29 | # return 1e-7 30 | # return 0 31 | 32 | def draw(self,X): 33 | plt.figure(dpi=50) 34 | print('\nFinal Cantilever rl_beam design:') 35 | plt.imshow(X) 36 | plt.show(block=False) 37 | plt.pause(3) 38 | plt.close('all') 39 | 40 | 41 | class CantileverEnv(gym.Env): 42 | 43 | metadata = {"render.modes": ["human"], 44 | # 'video.frames_per_second' : 30 45 | } 46 | 47 | def __init__(self): 48 | super().__init__() 49 | 50 | 51 | self.rd=0 52 | self.args = get_args(*mbb_beam(rd=self.rd)) 53 | 54 | DIM=self.args.nelx*self.args.nely+(self.args.nelx+1)*(self.args.nely+1)*2 55 | self.N_DISCRETE_ACTIONS=self.args.nelx*self.args.nely 56 | 57 | # self.action_space = spaces.Discrete(self.N_DISCRETE_ACTIONS) 58 | 59 | self.action_space = spaces.Box(low=0, high=1, 60 | shape=(self.N_DISCRETE_ACTIONS,), dtype=np.float64) 61 | 62 | self.low_state=np.array([0, 0]) 63 | self.high_state=np.array([1, 1e7]) 64 | 65 | self.observation_space = spaces.Box(low=self.low_state, high=self.high_state, 66 | dtype=np.float64) 67 | 68 | 69 | self.x = anp.ones((self.args.nely, self.args.nelx))*self.args.density 70 | 71 | self.M=Model(self.x) 72 | 73 | self.reward=0 74 | self.step_=0 75 | self.needs_reset = True 76 | self.y=np.array([1e-4, 1e7]) 77 | self.layer_dim=4 78 | self.n_layers=2 79 | self.optimizer='Adam' 80 | self.seed() 81 | 82 | def seed(self, seed=None): 83 | self.np_random, seed = seeding.np_random(seed) 84 | return [seed] 85 | 86 | def step(self, action): 87 | 88 | # action=action*(1-self.x.reshape(len(action),)+1e-4) 89 | # when altering boundary conditions and forces, do not change action values in those cells 90 | 91 | # to give the agent an ability to do the same actions 92 | self.penalty_coeff= 0.3 93 | action=action*(1-self.penalty_coeff*self.x.reshape(len(action),)) 94 | 95 | self.args = get_args(*mbb_beam(rd=self.rd)) 96 | 97 | # print(action) 98 | act=np.argmax(action) 99 | 100 | 101 | self.M.action_space_(act, self.x) 102 | 103 | self.tmp, self.const = fast_stopt(self.args, self.x) 104 | self.step_+=1 105 | 106 | # self.reward = (1/self.tmp)**2 if self.const <0.7 else (1/self.tmp)**2-(self.const-0.7) 107 | self.reward = (1/self.tmp)**0.5 108 | 109 | # self.reward=(1/self.tmp+self.const**2)**0.5 110 | # self.reward=(self.const/self.tmp)**0.5 111 | 112 | # self.reward += (1/self.tmp)**2 113 | # self.reward =(1/self.tmp)**2 - penalty 114 | # self.reward =-(self.tmp)**0.1*1e-4 + self.const*1e-2 if self.const<0.75 else -(self.tmp)**0.1*1e-4 - self.const*1e-2 115 | 116 | done=False 117 | 118 | if self.const>0.68: 119 | # self.reward-=1 120 | done=True 121 | 122 | # if self.const>0.65 and 100 self.M.n*self.M.m: 127 | done = True 128 | 129 | if self.needs_reset: 130 | raise RuntimeError("Tried to step environment that needs reset") 131 | 132 | 133 | if done: 134 | self.needs_reset = True 135 | 136 | return np.array([self.const,self.tmp]), self.reward, done, dict() 137 | 138 | def reset(self): 139 | 140 | if not self.M.flag_: 141 | self.rd=random.choice([0,2,-2]) 142 | else: 143 | self.rd=-1 144 | 145 | self.x = anp.ones((self.args.nely, self.args.nelx))*self.args.density 146 | 147 | self.reward=0 148 | self.needs_reset = False 149 | self.step_=0 150 | 151 | self.y=np.array([1e-4, 1e7]) 152 | return self.y 153 | 154 | 155 | def render(self, mode="human"): 156 | self.M.draw(self.x) 157 | 158 | def close(self): 159 | pass 160 | -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/torchfem/sparse.py: -------------------------------------------------------------------------------- 1 | import pyamg 2 | import torch 3 | from scipy.sparse import coo_matrix as scipy_coo_matrix 4 | from scipy.sparse.linalg import minres as scipy_minres 5 | from scipy.sparse.linalg import spsolve as scipy_spsolve 6 | from torch import Tensor 7 | from torch.autograd import Function 8 | 9 | try: 10 | # TODO - rewrite these functions to Metal or pytorch 11 | import cupy 12 | from cupyx.scipy.sparse import coo_matrix as cupy_coo_matrix 13 | from cupyx.scipy.sparse import diags as cupy_diags 14 | from cupyx.scipy.sparse.linalg import minres as cupy_minres 15 | from cupyx.scipy.sparse.linalg import spsolve as cupy_spsolve 16 | 17 | mps_available = True 18 | except ImportError: 19 | mps_available = False 20 | 21 | print(mps_available) 22 | 23 | 24 | class Solve(Function): 25 | """ 26 | Inspired by 27 | - https://blog.flaport.net/solving-sparse-linear-systems-in-pytorch.html 28 | - https://github.com/pytorch/pytorch/issues/69538 29 | - https://github.com/cai4cai/torchsparsegradutils 30 | """ 31 | 32 | @staticmethod 33 | def forward(A, b, B=None, rtol=1e-10, device=None, direct=None, M=None): 34 | # Check the input shape 35 | if A.ndim != 2 or (A.shape[0] != A.shape[1]): 36 | raise ValueError("A should be a square 2D matrix.") 37 | shape = A.size() 38 | 39 | # Move to requested device, if available 40 | if device is not None: 41 | A = A.to(device) 42 | b = b.to(device) 43 | 44 | # Default to direct solver for small matrices 45 | if direct is not None: 46 | direct = shape[0] < 10000 47 | 48 | if A.device.type == "mps" and mps_available: 49 | A_cp = cupy_coo_matrix( 50 | ( 51 | cupy.asarray(A._values()), 52 | (cupy.asarray(A._indices()[0]), cupy.asarray(A._indices()[1])), 53 | ), 54 | shape=shape, 55 | ).tocsr() 56 | b_cp = cupy.asarray(b.data) 57 | if direct: 58 | x_xp = cupy_spsolve(A_cp, b_cp) 59 | else: 60 | # Jacobi preconditioner 61 | M = cupy_diags(1.0 / A_cp.diagonal()) 62 | # Solve with minres 63 | x_xp, exit_code = cupy_minres(A_cp, b_cp, M=M, tol=rtol) 64 | if exit_code != 0: 65 | raise RuntimeError(f"minres failed with exit code {exit_code}") 66 | else: 67 | A_np = scipy_coo_matrix( 68 | (A._values(), (A._indices()[0], A._indices()[1])), shape=shape 69 | ).tocsr() 70 | b_np = b.data.numpy() 71 | if B is None: 72 | B_np = None 73 | else: 74 | B_np = B.data.numpy() 75 | if direct: 76 | x_xp = scipy_spsolve(A_np, b_np) 77 | else: 78 | # AMG preconditioner with Jacobi smoother 79 | if M is None: 80 | ml = pyamg.smoothed_aggregation_solver(A_np, B_np, smooth="jacobi") 81 | M = ml.aspreconditioner() 82 | 83 | # Solve with minres 84 | x_xp, exit_code = scipy_minres(A_np, b_np, M=M, rtol=rtol) 85 | if exit_code != 0: 86 | raise RuntimeError(f"minres failed with exit code {exit_code}") 87 | 88 | # Convert back to torch 89 | x = torch.tensor(x_xp, requires_grad=True, dtype=b.dtype, device=b.device) 90 | 91 | return x 92 | 93 | @staticmethod 94 | def backward(ctx, grad): 95 | # Access the saved variables 96 | A, x = ctx.saved_tensors 97 | 98 | # Backprop rule: gradb = A^T @ grad 99 | gradb = Solve.apply(A.T, grad, ctx.B, ctx.rtol, ctx.device, ctx.direct, ctx.M) 100 | 101 | # Backprop rule: gradA = -gradb @ x^T, sparse version 102 | row = A._indices()[0, :] 103 | col = A._indices()[1, :] 104 | val = -gradb[row] * x[col] 105 | gradA = torch.sparse_coo_tensor(torch.stack([row, col]), val, A.shape) 106 | 107 | return gradA, gradb, None, None, None, None, None 108 | 109 | @staticmethod 110 | def setup_context(ctx, inputs, output): 111 | A, b, B, rtol, device, direct, M = inputs 112 | x = output 113 | ctx.save_for_backward(A, x) 114 | 115 | # Save the parameters for backward pass (including the preconditioner) 116 | ctx.rtol = rtol 117 | ctx.device = device 118 | ctx.direct = direct 119 | ctx.B = B 120 | ctx.M = M 121 | 122 | 123 | sparse_solve = Solve.apply 124 | 125 | 126 | def sparse_index_select(t: Tensor, slices: list["Tensor | None"]) -> Tensor: 127 | coalesced = t.is_coalesced() 128 | indices = t.indices() 129 | values = t.values() 130 | in_shape = t.shape 131 | out_shape = [] 132 | for dim, slice in enumerate(slices): 133 | if slice is None: 134 | out_shape.append(in_shape[dim]) 135 | else: 136 | out_shape.append(len(slice)) 137 | mask = torch.isin(indices[dim], slice) 138 | cumsum = torch.cumsum(torch.isin(torch.arange(0, in_shape[dim]), slice), 0) 139 | indices = indices[:, mask] 140 | values = values[mask] 141 | indices[dim] = cumsum[indices[dim]] - 1 142 | 143 | return torch.sparse_coo_tensor(indices, values, out_shape, is_coalesced=coalesced) -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/asset/topology_optimization.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | from utils import * 3 | import gym 4 | from gym import spaces 5 | import random 6 | import numpy as np 7 | import autograd.numpy as anp 8 | from gym.utils import seeding 9 | import torch 10 | 11 | class Model: 12 | def __init__(self, x): 13 | self.flag_ = True 14 | # self.flag_ = False 15 | self.n, self.m = 5,5 16 | self.actions_dic={} 17 | 18 | k=0 19 | for i in range(self.n): 20 | for j in range(self.m): 21 | self.actions_dic[k]=(i,j) 22 | k+=1 23 | 24 | def action_space_(self, action, X): 25 | x,y=self.actions_dic[action] 26 | # penalty=(X[x][y]==1) 27 | X[x][y]=1 28 | # if penalty: 29 | # return 1e-7 30 | # return 0 31 | 32 | def draw(self,X): 33 | plt.figure(dpi=50) 34 | print('\nFinal Cantilever beam design:') 35 | plt.imshow(X) 36 | plt.show(block=False) 37 | plt.pause(3) 38 | plt.close('all') 39 | 40 | 41 | class CantileverEnv(gym.Env): 42 | 43 | metadata = {"render.modes": ["human"], 44 | # 'video.frames_per_second' : 30 45 | } 46 | 47 | def __init__(self, device="mps"): 48 | super().__init__() 49 | 50 | self.device = device 51 | self.rd=0 52 | # self.args = get_args(*mbb_beam(rd=self.rd)) 53 | self.args=None 54 | # DIM=self.args.nelx*self.args.nely+(self.args.nelx+1)*(self.args.nely+1)*2 55 | DIM=25 56 | self.N_DISCRETE_ACTIONS=DIM 57 | 58 | # self.action_space = spaces.Discrete(self.N_DISCRETE_ACTIONS) 59 | 60 | self.action_space = spaces.Box(low=0, high=1, 61 | shape=(self.N_DISCRETE_ACTIONS,), dtype=np.float64) 62 | 63 | self.low_state=np.array([0, 0]) 64 | self.high_state=np.array([1, 1e7]) 65 | 66 | self.observation_space = spaces.Box(low=self.low_state, high=self.high_state, 67 | dtype=np.float64) 68 | 69 | # self.x = anp.ones((self.args.nely, self.args.nelx))*self.args.density 70 | self.x= torch.ones(25, requires_grad=True)*0.01 71 | self.M=Model(self.x) 72 | 73 | self.reward=0 74 | self.step_=0 75 | self.needs_reset = True 76 | self.y=np.array([1e-4, 1e7]) 77 | self.layer_dim=4 78 | self.n_layers=2 79 | self.optimizer='Adam' 80 | self.seed() 81 | 82 | def seed(self, seed=None): 83 | self.np_random, seed = seeding.np_random(seed) 84 | return [seed] 85 | 86 | def step(self, action): 87 | 88 | # action=action*(1-self.x.reshape(len(action),)+1e-4) 89 | # when altering boundary conditions and forces, do not change action values in those cells 90 | 91 | # to give the agent an ability to do the same actions 92 | self.penalty_coeff = 0.3 93 | action = torch.Tensor(action)*torch.Tensor(1-self.penalty_coeff*self.x.reshape(len(action),)) 94 | # action = action / np.sqrt(np.sum(action**2)) # new line 95 | # print(action) 96 | # self.args = get_args(*mbb_beam(rd=self.rd)) 97 | self.args=None 98 | # print(action) 99 | # act=np.argmax(action) 100 | 101 | 102 | # self.M.action_space_(act, self.x) 103 | try: 104 | self.tmp, self.const = fast_stopt(self.args, torch.Tensor(action)) 105 | except: 106 | pass 107 | self.step_+=1 108 | self.x = action.reshape(5,5) 109 | # self.reward = (1/self.tmp)**2 if self.const <0.7 else (1/self.tmp)**2-(self.const-0.7) 110 | self.reward -= torch.log(self.tmp) # new line 111 | 112 | # self.reward=(1/self.tmp+self.const**2)**0.5 113 | # self.reward=(self.const/self.tmp)**0.5 114 | 115 | # self.reward += (1/self.tmp)**2 116 | # self.reward =(1/self.tmp)**2 - penalty 117 | # self.reward =-(self.tmp)**0.1*1e-4 + self.const*1e-2 if self.const<0.75 else -(self.tmp)**0.1*1e-4 - self.const*1e-2 118 | 119 | done=False 120 | 121 | if self.const>0.68: 122 | # self.reward-=1 123 | done=True 124 | 125 | # if self.const>0.65 and 100 self.M.n*self.M.m: 130 | done = True 131 | 132 | if self.needs_reset: 133 | raise RuntimeError("Tried to step environment that needs reset") 134 | 135 | 136 | if done: 137 | self.needs_reset = True 138 | self.reward = 5000 # new line 139 | 140 | return np.array([self.const.detach().numpy() ,self.tmp.detach().numpy() ]), self.reward, done, dict() 141 | 142 | def reset(self): 143 | 144 | if not self.M.flag_: 145 | self.rd=random.choice([0,2,-2]) 146 | else: 147 | self.rd=-1 148 | 149 | self.x = torch.ones(25, requires_grad=True)*0.01 150 | 151 | self.reward=0 152 | self.needs_reset = False 153 | self.step_=0 154 | 155 | self.y=np.array([1e-4, 1e7]) 156 | return self.y 157 | 158 | 159 | def render(self, mode="human"): 160 | self.M.draw(self.x) 161 | 162 | def close(self): 163 | pass 164 | -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL/DDPG.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.optim as optim 4 | import numpy as np 5 | 6 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 7 | 8 | 9 | class extract_tensor(nn.Module): 10 | def forward(self,x): 11 | # Output shape (batch, features, hidden) 12 | tensor, _ = x 13 | # Reshape shape (batch, hidden) 14 | return tensor 15 | 16 | 17 | class Actor(nn.Module): 18 | def __init__(self, state_dim, action_dim, action_bounds, offset, layer_dim, n_layers): 19 | super(Actor, self).__init__() 20 | 21 | # actor 22 | in_features = state_dim + state_dim 23 | 24 | # layers = [nn.LSTM(input_size=in_features, hidden_size=layer_dim, num_layers=n_layers, batch_first=True)] 25 | # layers.append(extract_tensor()) 26 | 27 | layers=[] 28 | # out_features = layer_dim 29 | for i in range(n_layers): 30 | 31 | # Suggest the number of units in each layer 32 | out_features = layer_dim 33 | 34 | # layers.append(nn.Linear(out_features, out_features)) 35 | layers.append(nn.Linear(in_features, out_features)) 36 | layers.append(nn.ReLU()) 37 | layers.append(nn.Dropout(p=0.2)) 38 | 39 | in_features = out_features 40 | 41 | # in_features = out_features 42 | 43 | layers.append(nn.Linear(in_features, action_dim)) 44 | # layers.append(nn.Tanh()) 45 | layers.append(nn.Softmax(dim=1)) 46 | self.actor = nn.Sequential(*layers) 47 | 48 | # max value of actions 49 | self.action_bounds = action_bounds 50 | self.offset = offset 51 | 52 | def forward(self, state, goal): 53 | return (self.actor(torch.cat([state, goal], 1)) * self.action_bounds) + self.offset 54 | 55 | class Critic(nn.Module): 56 | def __init__(self, state_dim, action_dim, H, layer_dim, n_layers): 57 | super(Critic, self).__init__() 58 | # UVFA critic 59 | layers = [] 60 | 61 | 62 | in_features = state_dim + action_dim + state_dim 63 | 64 | for i in range(n_layers): 65 | 66 | # Suggest the number of units in each layer 67 | out_features = layer_dim 68 | 69 | layers.append(nn.Linear(in_features, out_features)) 70 | layers.append(nn.ReLU()) 71 | layers.append(nn.Dropout(p=0.2)) 72 | 73 | in_features = out_features 74 | 75 | layers.append(nn.Linear(in_features, 1)) 76 | layers.append(nn.Sigmoid()) 77 | self.critic = nn.Sequential(*layers) 78 | 79 | self.H = H 80 | 81 | def forward(self, state, action, goal): 82 | # rewards are in range [-H, 0] 83 | return -self.critic(torch.cat([state, action, goal], 1))* self.H 84 | 85 | 86 | class DDPG: 87 | def __init__(self, state_dim, action_dim, action_bounds, offset, lr, H, optimizer,layer_dim,n_layers): 88 | 89 | self.actor = Actor(state_dim, action_dim, action_bounds, offset, layer_dim,n_layers).to(device) 90 | self.actor_optimizer=getattr(optim, optimizer)(self.actor.parameters(), lr= lr) 91 | self.critic = Critic(state_dim, action_dim, H, layer_dim, n_layers).to(device) 92 | self.critic_optimizer=getattr(optim, optimizer)(self.critic.parameters(), lr= lr) 93 | 94 | self.mseLoss = torch.nn.MSELoss() 95 | 96 | def select_action(self, state, goal): 97 | state = torch.FloatTensor(state.reshape(1, -1)).to(device) 98 | goal = torch.FloatTensor(goal.reshape(1, -1)).to(device) 99 | return self.actor(state, goal).detach().cpu().data.numpy().flatten() 100 | 101 | def update(self, buffer, n_iter, batch_size,env): 102 | 103 | 104 | for i in range(n_iter): 105 | # Sample a batch of transitions from replay buffer: 106 | state, action, reward, next_state, goal, gamma, done = buffer.sample(batch_size) 107 | 108 | # convert np arrays into tensors 109 | state = torch.FloatTensor(state).to(device) 110 | action = torch.FloatTensor(action).to(device) 111 | reward = torch.FloatTensor(reward).reshape((batch_size,1)).to(device) 112 | next_state = torch.FloatTensor(next_state).to(device) 113 | goal = torch.FloatTensor(goal).to(device) 114 | gamma = torch.FloatTensor(gamma).reshape((batch_size,1)).to(device) 115 | done = torch.FloatTensor(done).reshape((batch_size,1)).to(device) 116 | 117 | # select next action 118 | next_action = self.actor(next_state, goal).detach() 119 | 120 | # Compute target Q-value: 121 | target_Q = self.critic(next_state, next_action, goal).detach() 122 | target_Q = reward + ((1-done) * gamma * target_Q) 123 | 124 | 125 | critic_loss = self.mseLoss(self.critic(state, action, goal), target_Q) 126 | self.critic_optimizer.zero_grad() 127 | critic_loss.backward() 128 | self.critic_optimizer.step() 129 | 130 | # Compute actor loss: 131 | actor_loss = -self.critic(state, self.actor(state, goal), goal).mean() 132 | 133 | # Optimize the actor 134 | self.actor_optimizer.zero_grad() 135 | actor_loss.backward() 136 | self.actor_optimizer.step() 137 | 138 | 139 | def save(self, directory, name): 140 | torch.save(self.actor.state_dict(), '%s/%s_actor.pth' % (directory, name)) 141 | torch.save(self.critic.state_dict(), '%s/%s_crtic.pth' % (directory, name)) 142 | 143 | def load(self, directory, name): 144 | self.actor.load_state_dict(torch.load('%s/%s_actor.pth' % (directory, name), map_location='cpu')) 145 | self.critic.load_state_dict(torch.load('%s/%s_crtic.pth' % (directory, name), map_location='cpu')) 146 | -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/DDPG.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.optim as optim 4 | import numpy as np 5 | 6 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 7 | 8 | 9 | class extract_tensor(nn.Module): 10 | def forward(self,x): 11 | # Output shape (batch, features, hidden) 12 | tensor, _ = x 13 | # Reshape shape (batch, hidden) 14 | return tensor 15 | 16 | 17 | class Actor(nn.Module): 18 | def __init__(self, state_dim, action_dim, action_bounds, offset, layer_dim, n_layers): 19 | super(Actor, self).__init__() 20 | 21 | # actor 22 | in_features = state_dim + state_dim 23 | 24 | # layers = [nn.LSTM(input_size=in_features, hidden_size=layer_dim, num_layers=n_layers, batch_first=True)] 25 | # layers.append(extract_tensor()) 26 | 27 | layers=[] 28 | # out_features = layer_dim 29 | for i in range(n_layers): 30 | 31 | # Suggest the number of units in each layer 32 | out_features = layer_dim 33 | 34 | # layers.append(nn.Linear(out_features, out_features)) 35 | layers.append(nn.Linear(in_features, out_features)) 36 | layers.append(nn.ReLU()) 37 | # layers.append(nn.Dropout(p=0.2)) 38 | 39 | in_features = out_features 40 | 41 | # in_features = out_features 42 | 43 | layers.append(nn.Linear(in_features, action_dim)) 44 | # layers.append(nn.Tanh()) 45 | layers.append(nn.Softmax(dim=1)) 46 | self.actor = nn.Sequential(*layers) 47 | 48 | # max value of actions 49 | self.action_bounds = action_bounds 50 | self.offset = offset 51 | 52 | def forward(self, state, goal): 53 | return (self.actor(torch.cat([state, goal], 1)) * self.action_bounds) + self.offset 54 | 55 | class Critic(nn.Module): 56 | def __init__(self, state_dim, action_dim, H, layer_dim, n_layers): 57 | super(Critic, self).__init__() 58 | # UVFA critic 59 | layers = [] 60 | 61 | 62 | in_features = state_dim + action_dim + state_dim 63 | 64 | for i in range(n_layers): 65 | 66 | # Suggest the number of units in each layer 67 | out_features = layer_dim 68 | 69 | layers.append(nn.Linear(in_features, out_features)) 70 | layers.append(nn.ReLU()) 71 | # layers.append(nn.Dropout(p=0.2)) 72 | 73 | in_features = out_features 74 | 75 | layers.append(nn.Linear(in_features, 1)) 76 | layers.append(nn.Sigmoid()) 77 | self.critic = nn.Sequential(*layers) 78 | 79 | self.H = H 80 | 81 | def forward(self, state, action, goal): 82 | # rewards are in range [-H, 0] 83 | return -self.critic(torch.cat([state, action, goal], 1))* self.H 84 | 85 | 86 | class DDPG: 87 | def __init__(self, state_dim, action_dim, action_bounds, offset, lr, H, optimizer,layer_dim,n_layers): 88 | 89 | self.actor = Actor(state_dim, action_dim, action_bounds, offset, layer_dim,n_layers).to(device) 90 | self.actor_optimizer=getattr(optim, optimizer)(self.actor.parameters(), lr= lr) 91 | self.critic = Critic(state_dim, action_dim, H, layer_dim, n_layers).to(device) 92 | self.critic_optimizer=getattr(optim, optimizer)(self.critic.parameters(), lr= lr) 93 | 94 | self.mseLoss = torch.nn.MSELoss() 95 | 96 | def select_action(self, state, goal): 97 | state = torch.FloatTensor(state.reshape(1, -1)).to(device) 98 | goal = torch.FloatTensor(goal.reshape(1, -1)).to(device) 99 | return self.actor(state, goal).detach().cpu().data.numpy().flatten() 100 | 101 | def update(self, buffer, n_iter, batch_size,env): 102 | 103 | 104 | for i in range(n_iter): 105 | # Sample a batch of transitions from replay buffer: 106 | state, action, reward, next_state, goal, gamma, done = buffer.sample(batch_size) 107 | 108 | # convert np arrays into tensors 109 | state = torch.FloatTensor(state).to(device) 110 | action = torch.FloatTensor(action).to(device) 111 | reward = torch.FloatTensor(reward).reshape((batch_size,1)).to(device) 112 | next_state = torch.FloatTensor(next_state).to(device) 113 | goal = torch.FloatTensor(goal).to(device) 114 | gamma = torch.FloatTensor(gamma).reshape((batch_size,1)).to(device) 115 | done = torch.FloatTensor(done).reshape((batch_size,1)).to(device) 116 | 117 | # select next action 118 | next_action = self.actor(next_state, goal).detach() 119 | 120 | # Compute target Q-value: 121 | target_Q = self.critic(next_state, next_action, goal).detach() 122 | target_Q = reward + ((1-done) * gamma * target_Q) 123 | 124 | 125 | critic_loss = self.mseLoss(self.critic(state, action, goal), target_Q) 126 | self.critic_optimizer.zero_grad() 127 | critic_loss.backward() 128 | self.critic_optimizer.step() 129 | 130 | # Compute actor loss: 131 | actor_loss = -self.critic(state, self.actor(state, goal), goal).mean() 132 | 133 | # Optimize the actor 134 | self.actor_optimizer.zero_grad() 135 | actor_loss.backward() 136 | self.actor_optimizer.step() 137 | 138 | 139 | def save(self, directory, name): 140 | torch.save(self.actor.state_dict(), '%s/%s_actor.pth' % (directory, name)) 141 | torch.save(self.critic.state_dict(), '%s/%s_crtic.pth' % (directory, name)) 142 | 143 | def load(self, directory, name): 144 | self.actor.load_state_dict(torch.load('%s/%s_actor.pth' % (directory, name), map_location='cpu')) 145 | self.critic.load_state_dict(torch.load('%s/%s_crtic.pth' % (directory, name), map_location='cpu')) 146 | -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/DDPG.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.optim as optim 4 | import numpy as np 5 | 6 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 7 | 8 | 9 | class extract_tensor(nn.Module): 10 | def forward(self,x): 11 | # Output shape (batch, features, hidden) 12 | tensor, _ = x 13 | # Reshape shape (batch, hidden) 14 | return tensor 15 | 16 | 17 | class Actor(nn.Module): 18 | def __init__(self, state_dim, action_dim, action_bounds, offset, layer_dim, n_layers): 19 | super(Actor, self).__init__() 20 | 21 | # actor 22 | in_features = state_dim + state_dim 23 | 24 | # layers = [nn.LSTM(input_size=in_features, hidden_size=layer_dim, num_layers=n_layers, batch_first=True)] 25 | # layers.append(extract_tensor()) 26 | 27 | layers=[] 28 | # out_features = layer_dim 29 | for i in range(n_layers): 30 | 31 | # Suggest the number of units in each layer 32 | out_features = layer_dim 33 | 34 | # layers.append(nn.Linear(out_features, out_features)) 35 | layers.append(nn.Linear(in_features, out_features)) 36 | layers.append(nn.ReLU()) 37 | layers.append(nn.Dropout(p=0.2)) 38 | 39 | in_features = out_features 40 | 41 | # in_features = out_features 42 | 43 | # print(in_features, action_dim) 44 | layers.append(nn.Linear(in_features, action_dim)) 45 | # layers.append(nn.Tanh()) 46 | layers.append(nn.Softmax(dim=1)) 47 | # print(layers) 48 | self.actor = nn.Sequential(*layers) 49 | 50 | # max value of actions 51 | self.action_bounds = action_bounds 52 | self.offset = offset 53 | 54 | def forward(self, state, goal): 55 | # print(state, goal,self.action_bounds,self.offset.shape) 56 | return (self.actor(torch.cat([state, goal], 1)) * self.action_bounds) + self.offset 57 | 58 | 59 | class Critic(nn.Module): 60 | def __init__(self, state_dim, action_dim, H, layer_dim, n_layers): 61 | super(Critic, self).__init__() 62 | # UVFA critic 63 | layers = [] 64 | 65 | in_features = state_dim + action_dim + state_dim 66 | 67 | for i in range(n_layers): 68 | 69 | # Suggest the number of units in each layer 70 | out_features = layer_dim 71 | 72 | layers.append(nn.Linear(in_features, out_features)) 73 | layers.append(nn.ReLU()) 74 | layers.append(nn.Dropout(p=0.2)) 75 | 76 | in_features = out_features 77 | 78 | layers.append(nn.Linear(in_features, 1)) 79 | layers.append(nn.Sigmoid()) 80 | self.critic = nn.Sequential(*layers) 81 | 82 | self.H = H 83 | 84 | def forward(self, state, action, goal): 85 | # rewards are in range [-H, 0] 86 | return -self.critic(torch.cat([state, action, goal], 1))* self.H 87 | 88 | 89 | class DDPG: 90 | def __init__(self, state_dim, action_dim, action_bounds, offset, lr, H, optimizer,layer_dim,n_layers): 91 | 92 | self.actor = Actor(state_dim, action_dim, action_bounds, offset, layer_dim,n_layers).to(device) 93 | self.actor_optimizer=getattr(optim, optimizer)(self.actor.parameters(), lr= lr) 94 | self.critic = Critic(state_dim, action_dim, H, layer_dim, n_layers).to(device) 95 | self.critic_optimizer=getattr(optim, optimizer)(self.critic.parameters(), lr= lr) 96 | 97 | self.mseLoss = torch.nn.MSELoss() 98 | 99 | def select_action(self, state, goal): 100 | state = torch.FloatTensor(state.reshape(1, -1)).to(device) 101 | goal = torch.FloatTensor(goal.reshape(1, -1)).to(device) 102 | return self.actor(state, goal).detach().cpu().data.numpy().flatten() 103 | 104 | def update(self, buffer, n_iter, batch_size,env): 105 | 106 | for i in range(n_iter): 107 | # Sample a batch of transitions from replay buffer: 108 | state, action, reward, next_state, goal, gamma, done = buffer.sample(batch_size) 109 | 110 | # convert np arrays into tensors 111 | state = torch.FloatTensor(state).to(device) 112 | action = torch.FloatTensor(action).to(device) 113 | reward = torch.FloatTensor(reward).reshape((batch_size,1)).to(device) 114 | next_state = torch.FloatTensor(next_state).to(device) 115 | goal = torch.FloatTensor(goal).to(device) 116 | gamma = torch.FloatTensor(gamma).reshape((batch_size,1)).to(device) 117 | done = torch.FloatTensor(done).reshape((batch_size,1)).to(device) 118 | 119 | # select next action 120 | next_action = self.actor(next_state, goal).detach() 121 | 122 | # Compute target Q-value: 123 | target_Q = self.critic(next_state, next_action, goal).detach() 124 | target_Q = reward + ((1-done) * gamma * target_Q) 125 | 126 | critic_loss = self.mseLoss(self.critic(state, action, goal), target_Q) 127 | self.critic_optimizer.zero_grad() 128 | critic_loss.backward() 129 | self.critic_optimizer.step() 130 | 131 | # Compute actor loss: 132 | actor_loss = -self.critic(state, self.actor(state, goal), goal).mean() 133 | 134 | # Optimize the actor 135 | self.actor_optimizer.zero_grad() 136 | actor_loss.backward() 137 | self.actor_optimizer.step() 138 | 139 | def save(self, directory, name): 140 | torch.save(self.actor.state_dict(), '%s/%s_actor.pth' % (directory, name)) 141 | torch.save(self.critic.state_dict(), '%s/%s_crtic.pth' % (directory, name)) 142 | 143 | def load(self, directory, name): 144 | self.actor.load_state_dict(torch.load('%s/%s_actor.pth' % (directory, name), map_location='cpu')) 145 | self.critic.load_state_dict(torch.load('%s/%s_crtic.pth' % (directory, name), map_location='cpu')) 146 | -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_jax_mps/DDPG.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.optim as optim 4 | import numpy as np 5 | 6 | # device = torch.device("mps:0" if torch.mps.is_available() else "cpu") 7 | device = torch.device("cpu") 8 | print(device) 9 | 10 | class extract_tensor(nn.Module): 11 | def forward(self,x): 12 | # Output shape (batch, features, hidden) 13 | tensor, _ = x 14 | # Reshape shape (batch, hidden) 15 | return tensor 16 | 17 | 18 | class Actor(nn.Module): 19 | def __init__(self, state_dim, action_dim, action_bounds, offset, layer_dim, n_layers): 20 | super(Actor, self).__init__() 21 | 22 | # actor 23 | in_features = state_dim + state_dim 24 | 25 | # layers = [nn.LSTM(input_size=in_features, hidden_size=layer_dim, num_layers=n_layers, batch_first=True)] 26 | # layers.append(extract_tensor()) 27 | 28 | layers=[] 29 | # out_features = layer_dim 30 | for i in range(n_layers): 31 | 32 | # Suggest the number of units in each layer 33 | out_features = layer_dim 34 | 35 | # layers.append(nn.Linear(out_features, out_features)) 36 | layers.append(nn.Linear(in_features, out_features)) 37 | layers.append(nn.ReLU()) 38 | layers.append(nn.Dropout(p=0.2)) 39 | 40 | in_features = out_features 41 | 42 | # in_features = out_features 43 | 44 | # print(in_features, action_dim) 45 | layers.append(nn.Linear(in_features, action_dim)) 46 | # layers.append(nn.Tanh()) 47 | layers.append(nn.Softmax(dim=1)) 48 | # print(layers) 49 | self.actor = nn.Sequential(*layers) 50 | 51 | # max value of actions 52 | self.action_bounds = action_bounds 53 | self.offset = offset 54 | 55 | def forward(self, state, goal): 56 | # print(state, goal,self.action_bounds,self.offset.shape) 57 | return (self.actor(torch.cat([state, goal], 1)) * self.action_bounds) + self.offset 58 | 59 | 60 | class Critic(nn.Module): 61 | def __init__(self, state_dim, action_dim, H, layer_dim, n_layers): 62 | super(Critic, self).__init__() 63 | # UVFA critic 64 | layers = [] 65 | 66 | in_features = state_dim + action_dim + state_dim 67 | 68 | for i in range(n_layers): 69 | 70 | # Suggest the number of units in each layer 71 | out_features = layer_dim 72 | 73 | layers.append(nn.Linear(in_features, out_features)) 74 | layers.append(nn.ReLU()) 75 | layers.append(nn.Dropout(p=0.2)) 76 | 77 | in_features = out_features 78 | 79 | layers.append(nn.Linear(in_features, 1)) 80 | layers.append(nn.Sigmoid()) 81 | self.critic = nn.Sequential(*layers) 82 | 83 | self.H = H 84 | 85 | def forward(self, state, action, goal): 86 | # rewards are in range [-H, 0] 87 | return -self.critic(torch.cat([state, action, goal], 1))* self.H 88 | 89 | 90 | class DDPG: 91 | def __init__(self, state_dim, action_dim, action_bounds, offset, lr, H, optimizer,layer_dim,n_layers): 92 | 93 | self.actor = Actor(state_dim, action_dim, action_bounds, offset, layer_dim,n_layers).to(device) 94 | self.actor_optimizer=getattr(optim, optimizer)(self.actor.parameters(), lr= lr) 95 | self.critic = Critic(state_dim, action_dim, H, layer_dim, n_layers).to(device) 96 | self.critic_optimizer=getattr(optim, optimizer)(self.critic.parameters(), lr= lr) 97 | 98 | self.mseLoss = torch.nn.MSELoss() 99 | 100 | def select_action(self, state, goal): 101 | state = torch.FloatTensor(state.reshape(1, -1)).to(device) 102 | goal = torch.FloatTensor(goal.reshape(1, -1)).to(device) 103 | return self.actor(state, goal).detach().cpu().data.numpy().flatten() 104 | 105 | def update(self, buffer, n_iter, batch_size,env): 106 | 107 | for i in range(n_iter): 108 | # Sample a batch of transitions from replay buffer: 109 | state, action, reward, next_state, goal, gamma, done = buffer.sample(batch_size) 110 | 111 | # convert np arrays into tensors 112 | state = torch.FloatTensor(state).to(device) 113 | action = torch.FloatTensor(action).to(device) 114 | reward = torch.FloatTensor(reward).reshape((batch_size,1)).to(device) 115 | next_state = torch.FloatTensor(next_state).to(device) 116 | goal = torch.FloatTensor(goal).to(device) 117 | gamma = torch.FloatTensor(gamma).reshape((batch_size,1)).to(device) 118 | done = torch.FloatTensor(done).reshape((batch_size,1)).to(device) 119 | 120 | # select next action 121 | next_action = self.actor(next_state, goal).detach() 122 | 123 | # Compute target Q-value: 124 | target_Q = self.critic(next_state, next_action, goal).detach() 125 | target_Q = reward + ((1-done) * gamma * target_Q) 126 | 127 | critic_loss = self.mseLoss(self.critic(state, action, goal), target_Q) 128 | self.critic_optimizer.zero_grad() 129 | critic_loss.backward() 130 | self.critic_optimizer.step() 131 | 132 | # Compute actor loss: 133 | actor_loss = -self.critic(state, self.actor(state, goal), goal).mean() 134 | 135 | # Optimize the actor 136 | self.actor_optimizer.zero_grad() 137 | actor_loss.backward() 138 | self.actor_optimizer.step() 139 | 140 | def save(self, directory, name): 141 | torch.save(self.actor.state_dict(), '%s/%s_actor.pth' % (directory, name)) 142 | torch.save(self.critic.state_dict(), '%s/%s_crtic.pth' % (directory, name)) 143 | 144 | def load(self, directory, name): 145 | self.actor.load_state_dict(torch.load('%s/%s_actor.pth' % (directory, name), map_location='cpu')) 146 | self.critic.load_state_dict(torch.load('%s/%s_crtic.pth' % (directory, name), map_location='cpu')) 147 | -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/DDPG.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.optim as optim 4 | import numpy as np 5 | 6 | device = torch.device("mps:0" if torch.mps.is_available() else "cpu") 7 | print(device) 8 | 9 | 10 | class extract_tensor(nn.Module): 11 | def forward(self,x): 12 | # Output shape (batch, features, hidden) 13 | tensor, _ = x 14 | # Reshape shape (batch, hidden) 15 | return tensor 16 | 17 | 18 | class Actor(nn.Module): 19 | def __init__(self, state_dim, action_dim, action_bounds, offset, layer_dim, n_layers): 20 | super(Actor, self).__init__() 21 | 22 | # actor 23 | in_features = state_dim + state_dim 24 | 25 | # layers = [nn.LSTM(input_size=in_features, hidden_size=layer_dim, num_layers=n_layers, batch_first=True)] 26 | # layers.append(extract_tensor()) 27 | 28 | layers=[] 29 | # out_features = layer_dim 30 | for i in range(n_layers): 31 | 32 | # Suggest the number of units in each layer 33 | out_features = layer_dim 34 | 35 | # layers.append(nn.Linear(out_features, out_features)) 36 | layers.append(nn.Linear(in_features, out_features)) 37 | layers.append(nn.ReLU()) 38 | layers.append(nn.Dropout(p=0.2)) 39 | 40 | in_features = out_features 41 | 42 | # in_features = out_features 43 | 44 | layers.append(nn.Linear(in_features, action_dim)) 45 | # layers.append(nn.Tanh()) 46 | layers.append(nn.Softmax(dim=1)) 47 | self.actor = nn.Sequential(*layers) 48 | 49 | # max value of actions 50 | self.action_bounds = action_bounds 51 | self.offset = offset 52 | 53 | def forward(self, state, goal): 54 | return (self.actor(torch.cat([state, goal], 1)) * self.action_bounds) + self.offset 55 | 56 | class Critic(nn.Module): 57 | def __init__(self, state_dim, action_dim, H, layer_dim, n_layers): 58 | super(Critic, self).__init__() 59 | # UVFA critic 60 | layers = [] 61 | 62 | 63 | in_features = state_dim + action_dim + state_dim 64 | 65 | for i in range(n_layers): 66 | 67 | # Suggest the number of units in each layer 68 | out_features = layer_dim 69 | 70 | layers.append(nn.Linear(in_features, out_features)) 71 | layers.append(nn.ReLU()) 72 | layers.append(nn.Dropout(p=0.2)) 73 | 74 | in_features = out_features 75 | 76 | layers.append(nn.Linear(in_features, 1)) 77 | layers.append(nn.Sigmoid()) 78 | self.critic = nn.Sequential(*layers) 79 | 80 | self.H = H 81 | 82 | def forward(self, state, action, goal): 83 | # rewards are in range [-H, 0] 84 | return -self.critic(torch.cat([state, action, goal], 1))* self.H 85 | 86 | 87 | class DDPG: 88 | def __init__(self, state_dim, action_dim, action_bounds, offset, lr, H, optimizer,layer_dim,n_layers): 89 | 90 | self.actor = Actor(state_dim, action_dim, action_bounds, offset, layer_dim,n_layers).type(torch.float32).to(device) 91 | self.actor_optimizer=getattr(optim, optimizer)(self.actor.parameters(), lr= lr) 92 | self.critic = Critic(state_dim, action_dim, H, layer_dim, n_layers).type(torch.float32).to(device) 93 | self.critic_optimizer=getattr(optim, optimizer)(self.critic.parameters(), lr= lr) 94 | 95 | self.mseLoss = torch.nn.MSELoss() 96 | 97 | def select_action(self, state, goal): 98 | state = torch.FloatTensor(state.reshape(1, -1)).type(torch.float32).to(device) 99 | goal = torch.FloatTensor(goal.reshape(1, -1)).type(torch.float32).to(device) 100 | return self.actor(state, goal).detach().cpu().data.numpy().flatten() 101 | 102 | def update(self, buffer, n_iter, batch_size,env): 103 | 104 | 105 | for i in range(n_iter): 106 | # Sample a batch of transitions from replay buffer: 107 | state, action, reward, next_state, goal, gamma, done = buffer.sample(batch_size) 108 | 109 | # convert np arrays into tensors 110 | state = torch.FloatTensor(state).type(torch.float32).to(device) 111 | action = torch.FloatTensor(action).type(torch.float32).to(device) 112 | reward = torch.FloatTensor(reward).reshape((batch_size,1)).type(torch.float32).to(device) 113 | next_state = torch.FloatTensor(next_state).type(torch.float32).to(device) 114 | goal = torch.FloatTensor(goal).type(torch.float32).to(device) 115 | gamma = torch.FloatTensor(gamma).reshape((batch_size,1)).type(torch.float32).to(device) 116 | done = torch.FloatTensor(done).reshape((batch_size,1)).type(torch.float32).to(device) 117 | 118 | # select next action 119 | next_action = self.actor(next_state, goal).detach() 120 | 121 | # Compute target Q-value: 122 | target_Q = self.critic(next_state, next_action, goal).detach() 123 | target_Q = reward + ((1-done) * gamma * target_Q) 124 | 125 | 126 | critic_loss = self.mseLoss(self.critic(state, action, goal), target_Q) 127 | self.critic_optimizer.zero_grad() 128 | critic_loss.backward() 129 | self.critic_optimizer.step() 130 | 131 | # Compute actor loss: 132 | actor_loss = -self.critic(state, self.actor(state, goal), goal).mean() 133 | 134 | # Optimize the actor 135 | self.actor_optimizer.zero_grad() 136 | actor_loss.backward() 137 | self.actor_optimizer.step() 138 | 139 | 140 | def save(self, directory, name): 141 | torch.save(self.actor.state_dict(), '%s/%s_actor.pth' % (directory, name)) 142 | torch.save(self.critic.state_dict(), '%s/%s_crtic.pth' % (directory, name)) 143 | 144 | def load(self, directory, name): 145 | self.actor.load_state_dict(torch.load('%s/%s_actor.pth' % (directory, name), map_location='cpu')) 146 | self.critic.load_state_dict(torch.load('%s/%s_crtic.pth' % (directory, name), map_location='cpu')) 147 | -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL/hpo.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import gym 3 | import numpy as np 4 | from HAC import HAC 5 | import optuna 6 | from asset.topology_optimization import CantileverEnv 7 | 8 | 9 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 10 | 11 | # Check for HPO: 12 | # https://towardsdatascience.com/hyperparameter-tuning-of-neural-networks-with-optuna-and-pytorch-22e179efc837 13 | 14 | def train(params): 15 | 16 | #################### Hyperparameters #################### 17 | env_name ="T0-h-v1" 18 | 19 | save_episode = 20 # keep saving every n episodes 20 | # max_episodes = params['max_episodes'] # max num of training episodes 21 | max_episodes = 1_000 22 | random_seed = params['random_seed'] 23 | # random_seed=False 24 | render = False 25 | 26 | env = gym.make(env_name) 27 | env.layer_dim=params['layer_dim'] 28 | # env.layer_dim=3 29 | env.n_layers=params['n_layers'] 30 | # env.n_layers=6 31 | env.optimizer=params['optimizer'] 32 | # env.optimizer='SGD' 33 | 34 | state_dim = env.observation_space.shape[0] 35 | action_dim = env.N_DISCRETE_ACTIONS 36 | 37 | """ 38 | Actions (both primitive and subgoal) are implemented as follows: 39 | action = ( network output (Tanh) * bounds ) + offset 40 | clip_high and clip_low bound the exploration noise 41 | """ 42 | 43 | # primitive action bounds and offset 44 | action_bounds = env.action_space.high[0] 45 | # action_offset = np.array([0.5 for x in range(env.N_DISCRETE_ACTIONS)]) 46 | action_offset = np.array([0.0 for x in range(env.N_DISCRETE_ACTIONS)]) 47 | 48 | action_offset = torch.FloatTensor(action_offset.reshape(1, -1)).to(device) 49 | action_clip_low = np.array([0 for x in range(env.N_DISCRETE_ACTIONS)]) 50 | action_clip_high = np.array([1 for x in range(env.N_DISCRETE_ACTIONS)]) 51 | 52 | # state bounds and offset 53 | # state_bounds_np = np.array([0.5, 0.5e7]) 54 | state_bounds_np = np.array([1, 1e7]) 55 | state_bounds = torch.FloatTensor(state_bounds_np.reshape(1, -1)).to(device) 56 | # state_offset = np.array([0.5, 0.5e7]) 57 | state_offset = np.array([0, 0]) 58 | state_offset = torch.FloatTensor(state_offset.reshape(1, -1)).to(device) 59 | state_clip_low = np.array([0, 0]) 60 | state_clip_high = np.array([1, 1e7]) 61 | 62 | exploration_action_noise = np.array([params['action_noise']]) 63 | exploration_state_noise = np.array([params['state_noise_1'], params['state_noise_2']]) 64 | 65 | goal_state=np.array([0.68, 20]) 66 | threshold=[0.05, 5] 67 | 68 | # HAC parameters: 69 | k_level = 2 # num of levels in hierarchy 70 | H = params['H'] # time horizon to achieve subgoal 71 | # H = 11 72 | lamda = params['lamda'] # subgoal testing parameter 73 | # lamda = 0.9453109199655714 74 | 75 | # DDPG parameters: 76 | gamma = params['gamma'] # discount factor for future rewards 77 | # gamma = 0.992256316386673 78 | n_iter = params['n_iter'] # update policy n_iter times in one DDPG update 79 | # n_iter = 186 80 | batch_size = params['batch_size'] # num of transitions sampled from replay buffer 81 | # batch_size =256 82 | lr = params['lr'] 83 | # lr= 0.0032967527995782626 84 | 85 | # save trained models 86 | directory = "./preTrained/{}/{}level/".format(env_name, k_level) 87 | filename = "HAC_{}".format(env_name) 88 | ######################################################### 89 | 90 | 91 | if random_seed: 92 | print("Random Seed: {}".format(random_seed)) 93 | env.seed(random_seed) 94 | torch.manual_seed(random_seed) 95 | np.random.seed(random_seed) 96 | 97 | # creating HAC agent and setting parameters 98 | agent = HAC(k_level, H, state_dim, action_dim, render, threshold, 99 | action_bounds, action_offset, state_bounds, state_offset, lr, env.optimizer, env.layer_dim, env.n_layers) 100 | 101 | agent.set_parameters(lamda, gamma, action_clip_low, action_clip_high, 102 | state_clip_low, state_clip_high, exploration_action_noise, exploration_state_noise) 103 | 104 | 105 | # training procedure 106 | my_res=[] 107 | for i_episode in range(1, max_episodes+1): 108 | agent.reward = 0 109 | agent.timestep = 0 110 | 111 | state = env.reset() 112 | # collecting experience in environment 113 | last_state, done = agent.run_HAC(env, k_level-1, state, goal_state, False) 114 | 115 | agent.update(n_iter, batch_size, env) 116 | 117 | my_res.append(agent.reward) 118 | 119 | return np.mean(my_res) 120 | 121 | def objective(trial): 122 | 123 | params = { 124 | # 'max_episodes':trial.suggest_int("max_episodes", 1000, 1500), 125 | 'random_seed': trial.suggest_int("random_seed", 1, 5), 126 | 'layer_dim':trial.suggest_int("layer_dim", 2, 16), 127 | 'n_layers':trial.suggest_int("n_layers", 2, 16), 128 | 'optimizer': trial.suggest_categorical("optimizer", ["Adam", 129 | "RMSprop", 130 | "SGD" 131 | ]), 132 | 'action_noise':trial.suggest_loguniform('action_noise', 0.01, 1), 133 | 'state_noise_1': trial.suggest_loguniform('state_noise_1', 0.01, 1), 134 | 'state_noise_2': trial.suggest_loguniform('state_noise_2', 1000, 1e7), 135 | 'H': trial.suggest_int("H", 3, 16), 136 | 'lamda': trial.suggest_uniform('lamda', 0.3, 1), 137 | 'gamma': trial.suggest_uniform('gamma', 0.95, 0.999), 138 | 'n_iter': trial.suggest_int('n_iter', 50, 350), 139 | 'batch_size': trial.suggest_int('batch_size', 50, 350), 140 | 'lr': trial.suggest_loguniform('lr', 1e-5, 1) 141 | 142 | } 143 | 144 | 145 | 146 | rev = train(params) 147 | 148 | return rev 149 | 150 | 151 | 152 | study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler()) 153 | study.optimize(objective, n_trials=300) 154 | 155 | 156 | best_trial = study.best_trial 157 | 158 | print() 159 | 160 | for key, value in best_trial.params.items(): 161 | print("{}: {}".format(key, value)) 162 | -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_mps/hpo.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import gym 3 | import numpy as np 4 | from HAC import HAC 5 | import optuna 6 | from asset.topology_optimization import CantileverEnv 7 | 8 | 9 | device = torch.device("mps:0" if torch.mps.is_available() else "cpu") 10 | print(device) 11 | 12 | # Check for HPO: 13 | # https://towardsdatascience.com/hyperparameter-tuning-of-neural-networks-with-optuna-and-pytorch-22e179efc837 14 | 15 | def train(params): 16 | 17 | #################### Hyperparameters #################### 18 | env_name ="T0-h-v1" 19 | 20 | save_episode = 20 # keep saving every n episodes 21 | # max_episodes = params['max_episodes'] # max num of training episodes 22 | max_episodes = 1_000 23 | random_seed = params['random_seed'] 24 | # random_seed=False 25 | render = False 26 | 27 | env = gym.make(env_name) 28 | env.layer_dim=params['layer_dim'] 29 | # env.layer_dim=3 30 | env.n_layers=params['n_layers'] 31 | # env.n_layers=6 32 | env.optimizer=params['optimizer'] 33 | # env.optimizer='SGD' 34 | 35 | state_dim = env.observation_space.shape[0] 36 | action_dim = env.N_DISCRETE_ACTIONS 37 | 38 | """ 39 | Actions (both primitive and subgoal) are implemented as follows: 40 | action = ( network output (Tanh) * bounds ) + offset 41 | clip_high and clip_low bound the exploration noise 42 | """ 43 | 44 | # primitive action bounds and offset 45 | action_bounds = env.action_space.high[0] 46 | # action_offset = np.array([0.5 for x in range(env.N_DISCRETE_ACTIONS)]) 47 | action_offset = np.array([0.0 for x in range(env.N_DISCRETE_ACTIONS)]) 48 | 49 | action_offset = torch.FloatTensor(action_offset.reshape(1, -1)).to(device) 50 | action_clip_low = np.array([0 for x in range(env.N_DISCRETE_ACTIONS)]) 51 | action_clip_high = np.array([1 for x in range(env.N_DISCRETE_ACTIONS)]) 52 | 53 | # state bounds and offset 54 | # state_bounds_np = np.array([0.5, 0.5e7]) 55 | state_bounds_np = np.array([1, 1e7]) 56 | state_bounds = torch.FloatTensor(state_bounds_np.reshape(1, -1)).to(device) 57 | # state_offset = np.array([0.5, 0.5e7]) 58 | state_offset = np.array([0, 0]) 59 | state_offset = torch.FloatTensor(state_offset.reshape(1, -1)).to(device) 60 | state_clip_low = np.array([0, 0]) 61 | state_clip_high = np.array([1, 1e7]) 62 | 63 | exploration_action_noise = np.array([params['action_noise']]) 64 | exploration_state_noise = np.array([params['state_noise_1'], params['state_noise_2']]) 65 | 66 | goal_state=np.array([0.68, 20]) 67 | threshold=[0.05, 5] 68 | 69 | # HAC parameters: 70 | k_level = 2 # num of levels in hierarchy 71 | H = params['H'] # time horizon to achieve subgoal 72 | # H = 11 73 | lamda = params['lamda'] # subgoal testing parameter 74 | # lamda = 0.9453109199655714 75 | 76 | # DDPG parameters: 77 | gamma = params['gamma'] # discount factor for future rewards 78 | # gamma = 0.992256316386673 79 | n_iter = params['n_iter'] # update policy n_iter times in one DDPG update 80 | # n_iter = 186 81 | batch_size = params['batch_size'] # num of transitions sampled from replay buffer 82 | # batch_size =256 83 | lr = params['lr'] 84 | # lr= 0.0032967527995782626 85 | 86 | # save trained models 87 | directory = "./preTrained/{}/{}level/".format(env_name, k_level) 88 | filename = "HAC_{}".format(env_name) 89 | ######################################################### 90 | 91 | 92 | if random_seed: 93 | print("Random Seed: {}".format(random_seed)) 94 | env.seed(random_seed) 95 | torch.manual_seed(random_seed) 96 | np.random.seed(random_seed) 97 | 98 | # creating HAC agent and setting parameters 99 | agent = HAC(k_level, H, state_dim, action_dim, render, threshold, 100 | action_bounds, action_offset, state_bounds, state_offset, lr, env.optimizer, env.layer_dim, env.n_layers) 101 | 102 | agent.set_parameters(lamda, gamma, action_clip_low, action_clip_high, 103 | state_clip_low, state_clip_high, exploration_action_noise, exploration_state_noise) 104 | 105 | 106 | # training procedure 107 | my_res=[] 108 | for i_episode in range(1, max_episodes+1): 109 | agent.reward = 0 110 | agent.timestep = 0 111 | 112 | state = env.reset() 113 | # collecting experience in environment 114 | last_state, done = agent.run_HAC(env, k_level-1, state, goal_state, False) 115 | 116 | agent.update(n_iter, batch_size, env) 117 | 118 | my_res.append(agent.reward) 119 | 120 | return np.mean(my_res) 121 | 122 | def objective(trial): 123 | 124 | params = { 125 | # 'max_episodes':trial.suggest_int("max_episodes", 1000, 1500), 126 | 'random_seed': trial.suggest_int("random_seed", 1, 5), 127 | 'layer_dim':trial.suggest_int("layer_dim", 2, 16), 128 | 'n_layers':trial.suggest_int("n_layers", 2, 16), 129 | 'optimizer': trial.suggest_categorical("optimizer", ["Adam", 130 | "RMSprop", 131 | "SGD" 132 | ]), 133 | 'action_noise':trial.suggest_loguniform('action_noise', 0.01, 1), 134 | 'state_noise_1': trial.suggest_loguniform('state_noise_1', 0.01, 1), 135 | 'state_noise_2': trial.suggest_loguniform('state_noise_2', 1000, 1e7), 136 | 'H': trial.suggest_int("H", 3, 16), 137 | 'lamda': trial.suggest_uniform('lamda', 0.3, 1), 138 | 'gamma': trial.suggest_uniform('gamma', 0.95, 0.999), 139 | 'n_iter': trial.suggest_int('n_iter', 50, 350), 140 | 'batch_size': trial.suggest_int('batch_size', 50, 350), 141 | 'lr': trial.suggest_loguniform('lr', 1e-5, 1) 142 | 143 | } 144 | 145 | 146 | 147 | rev = train(params) 148 | 149 | return rev 150 | 151 | 152 | 153 | study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler()) 154 | study.optimize(objective, n_trials=300) 155 | 156 | 157 | best_trial = study.best_trial 158 | 159 | print() 160 | 161 | for key, value in best_trial.params.items(): 162 | print("{}: {}".format(key, value)) 163 | -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/HRL_without_dropout/hpo.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import gym 3 | import numpy as np 4 | from HAC import HAC 5 | import optuna 6 | from asset.topology_optimization import CantileverEnv 7 | 8 | 9 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 10 | 11 | # Check for HPO: 12 | # https://towardsdatascience.com/hyperparameter-tuning-of-neural-networks-with-optuna-and-pytorch-22e179efc837 13 | 14 | def train(params): 15 | 16 | #################### Hyperparameters #################### 17 | env_name ="T0-h-v1" 18 | 19 | save_episode = 20 # keep saving every n episodes 20 | # max_episodes = params['max_episodes'] # max num of training episodes 21 | max_episodes = 1_000 22 | # random_seed = params['random_seed'] 23 | random_seed = 42 24 | # random_seed=False 25 | render = False 26 | 27 | env = gym.make(env_name) 28 | env.layer_dim=params['layer_dim'] 29 | # env.layer_dim=3 30 | env.n_layers=params['n_layers'] 31 | # env.n_layers=6 32 | env.optimizer=params['optimizer'] 33 | # env.optimizer='SGD' 34 | 35 | state_dim = env.observation_space.shape[0] 36 | action_dim = env.N_DISCRETE_ACTIONS 37 | 38 | """ 39 | Actions (both primitive and subgoal) are implemented as follows: 40 | action = ( network output (Tanh) * bounds ) + offset 41 | clip_high and clip_low bound the exploration noise 42 | """ 43 | 44 | # primitive action bounds and offset 45 | action_bounds = env.action_space.high[0] 46 | # action_offset = np.array([0.5 for x in range(env.N_DISCRETE_ACTIONS)]) 47 | action_offset = np.array([0.0 for x in range(env.N_DISCRETE_ACTIONS)]) 48 | 49 | action_offset = torch.FloatTensor(action_offset.reshape(1, -1)).to(device) 50 | action_clip_low = np.array([0 for x in range(env.N_DISCRETE_ACTIONS)]) 51 | action_clip_high = np.array([1 for x in range(env.N_DISCRETE_ACTIONS)]) 52 | 53 | # state bounds and offset 54 | # state_bounds_np = np.array([0.5, 0.5e7]) 55 | state_bounds_np = np.array([1, 1e7]) 56 | state_bounds = torch.FloatTensor(state_bounds_np.reshape(1, -1)).to(device) 57 | # state_offset = np.array([0.5, 0.5e7]) 58 | state_offset = np.array([0, 0]) 59 | state_offset = torch.FloatTensor(state_offset.reshape(1, -1)).to(device) 60 | state_clip_low = np.array([0, 0]) 61 | state_clip_high = np.array([1, 1e7]) 62 | 63 | exploration_action_noise = np.array([params['action_noise']]) 64 | exploration_state_noise = np.array([params['state_noise_1'], params['state_noise_2']]) 65 | 66 | goal_state=np.array([0.68, 20]) 67 | threshold=[0.05, 5] 68 | 69 | # HAC parameters: 70 | k_level = 2 # num of levels in hierarchy 71 | # H = params['H'] # time horizon to achieve subgoal 72 | H = 7 73 | lamda = params['lamda'] # subgoal testing parameter 74 | # lamda = 0.9453109199655714 75 | 76 | # DDPG parameters: 77 | gamma = params['gamma'] # discount factor for future rewards 78 | # gamma = 0.992256316386673 79 | # n_iter = params['n_iter'] # update policy n_iter times in one DDPG update 80 | n_iter = 100 81 | # batch_size = params['batch_size'] # num of transitions sampled from replay buffer 82 | batch_size =100 83 | lr = params['lr'] 84 | # lr= 0.0032967527995782626 85 | 86 | # save trained models 87 | directory = "./preTrained/{}/{}level/".format(env_name, k_level) 88 | filename = "HAC_{}".format(env_name) 89 | ######################################################### 90 | 91 | 92 | if random_seed: 93 | print("Random Seed: {}".format(random_seed)) 94 | env.seed(random_seed) 95 | torch.manual_seed(random_seed) 96 | np.random.seed(random_seed) 97 | 98 | # creating HAC agent and setting parameters 99 | agent = HAC(k_level, H, state_dim, action_dim, render, threshold, 100 | action_bounds, action_offset, state_bounds, state_offset, lr, env.optimizer, env.layer_dim, env.n_layers) 101 | 102 | agent.set_parameters(lamda, gamma, action_clip_low, action_clip_high, 103 | state_clip_low, state_clip_high, exploration_action_noise, exploration_state_noise) 104 | 105 | 106 | # training procedure 107 | my_res=[] 108 | for i_episode in range(1, max_episodes+1): 109 | agent.reward = 0 110 | agent.timestep = 0 111 | 112 | state = env.reset() 113 | # collecting experience in environment 114 | last_state, done = agent.run_HAC(env, k_level-1, state, goal_state, False) 115 | 116 | agent.update(n_iter, batch_size, env) 117 | 118 | my_res.append(agent.reward) 119 | 120 | return np.mean(my_res) 121 | 122 | def objective(trial): 123 | 124 | params = { 125 | # 'max_episodes':trial.suggest_int("max_episodes", 1000, 1500), 126 | # 'random_seed': trial.suggest_int("random_seed", 1, 5), 127 | 'layer_dim':trial.suggest_int("layer_dim", 2, 50), 128 | 'n_layers':trial.suggest_int("n_layers", 2, 50), 129 | 'optimizer': trial.suggest_categorical("optimizer", ["Adam", 130 | "RMSprop", 131 | "SGD" 132 | ]), 133 | 'action_noise':trial.suggest_loguniform('action_noise', 0.01, 1), 134 | 'state_noise_1': trial.suggest_loguniform('state_noise_1', 0.01, 1), 135 | 'state_noise_2': trial.suggest_loguniform('state_noise_2', 1000, 1e7), 136 | # 'H': trial.suggest_int("H", 3, 16), 137 | 'lamda': trial.suggest_uniform('lamda', 0.3, 1), 138 | 'gamma': trial.suggest_uniform('gamma', 0.95, 0.999), 139 | # 'n_iter': trial.suggest_int('n_iter', 50, 350), 140 | # 'batch_size': trial.suggest_int('batch_size', 50, 350), 141 | 'lr': trial.suggest_loguniform('lr', 1e-5, 1) 142 | 143 | } 144 | 145 | 146 | 147 | rev = train(params) 148 | 149 | return rev 150 | 151 | 152 | 153 | study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler()) 154 | study.optimize(objective, n_trials=300) 155 | 156 | 157 | best_trial = study.best_trial 158 | 159 | print() 160 | 161 | for key, value in best_trial.params.items(): 162 | print("{}: {}".format(key, value)) 163 | -------------------------------------------------------------------------------- /gigala/topology/topology_optimiz/hierarchical_rl/hrl_draft/HRL_without_the_trick/hpo.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import gym 3 | import numpy as np 4 | from HAC import HAC 5 | import optuna 6 | from asset.topology_optimization import CantileverEnv 7 | 8 | 9 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 10 | 11 | # Check for HPO: 12 | # https://towardsdatascience.com/hyperparameter-tuning-of-neural-networks-with-optuna-and-pytorch-22e179efc837 13 | 14 | def train(params): 15 | 16 | #################### Hyperparameters #################### 17 | env_name ="T0-h-v1" 18 | 19 | save_episode = 20 # keep saving every n episodes 20 | # max_episodes = params['max_episodes'] # max num of training episodes 21 | max_episodes = 10_000 22 | random_seed = 1 23 | # random_seed=False 24 | render = False 25 | 26 | env = gym.make(env_name) 27 | env.layer_dim = params['layer_dim'] 28 | # env.layer_dim=3 29 | env.n_layers = params['n_layers'] 30 | # env.n_layers=6 31 | env.optimizer = params['optimizer'] 32 | # env.optimizer='SGD' 33 | 34 | state_dim = env.observation_space.shape[0] 35 | action_dim = env.N_DISCRETE_ACTIONS 36 | 37 | """ 38 | Actions (both primitive and subgoal) are implemented as follows: 39 | action = ( network output (Tanh) * bounds ) + offset 40 | clip_high and clip_low bound the exploration noise 41 | """ 42 | 43 | # primitive action bounds and offset 44 | action_bounds = env.action_space.high[0] 45 | # action_offset = np.array([0.5 for x in range(env.N_DISCRETE_ACTIONS)]) 46 | action_offset = np.array([0.0 for x in range(env.N_DISCRETE_ACTIONS)]) 47 | 48 | action_offset = torch.FloatTensor(action_offset.reshape(1, -1)).to(device) 49 | action_clip_low = np.array([0 for x in range(env.N_DISCRETE_ACTIONS)]) 50 | action_clip_high = np.array([1 for x in range(env.N_DISCRETE_ACTIONS)]) 51 | 52 | # state bounds and offset 53 | # state_bounds_np = np.array([0.5, 0.5e7]) 54 | # state_bounds_np = np.array([1, 1e7]) 55 | # state_bounds = torch.FloatTensor(state_bounds_np.reshape(1, -1)).to(device) 56 | state_bounds = env.observation_space.high[0] 57 | # state_offset = np.array([0.5, 0.5e7]) 58 | state_offset = np.array([0 for x in range(env.N_DISCRETE_ACTIONS)]) 59 | state_offset = torch.FloatTensor(state_offset.reshape(1, -1)).to(device) 60 | state_clip_low = np.array([0 for x in range(env.N_DISCRETE_ACTIONS)]) 61 | state_clip_high = np.array([1 for x in range(env.N_DISCRETE_ACTIONS)]) 62 | 63 | exploration_action_noise = np.array([params['action_noise']]) 64 | exploration_state_noise = np.array([params['state_noise']]) 65 | 66 | goal_ = [0.68, 30] 67 | goal_state = np.array(goal_ + [0] * (env.N_DISCRETE_ACTIONS - len(goal_))) 68 | threshold = [0.05, 3] 69 | 70 | # HAC parameters: 71 | k_level = 2 # num of levels in hierarchy 72 | H = 9 # time horizon to achieve subgoal 73 | # H = 11 74 | lamda = params['lamda'] # subgoal testing parameter 75 | # lamda = 0.9453109199655714 76 | 77 | # DDPG parameters: 78 | gamma = params['gamma'] # discount factor for future rewards 79 | # gamma = 0.992256316386673 80 | n_iter = params['n_iter'] # update policy n_iter times in one DDPG update 81 | # n_iter = 186 82 | batch_size = params['batch_size'] # num of transitions sampled from replay buffer 83 | # batch_size =256 84 | lr = params['lr'] 85 | # lr= 0.0032967527995782626 86 | 87 | # save trained models 88 | directory = "./preTrained/{}/{}level/".format(env_name, k_level) 89 | filename = "HAC_{}".format(env_name) 90 | ######################################################### 91 | 92 | if random_seed: 93 | print("Random Seed: {}".format(random_seed)) 94 | env.seed(random_seed) 95 | torch.manual_seed(random_seed) 96 | np.random.seed(random_seed) 97 | 98 | # creating HAC agent and setting parameters 99 | agent = HAC(k_level, H, state_dim, action_dim, render, threshold, 100 | action_bounds, action_offset, state_bounds, state_offset, lr, env.optimizer, env.layer_dim, 101 | env.n_layers) 102 | 103 | agent.set_parameters(lamda, gamma, action_clip_low, action_clip_high, 104 | state_clip_low, state_clip_high, exploration_action_noise, exploration_state_noise) 105 | 106 | 107 | # training procedure 108 | my_res=[] 109 | for i_episode in range(1, max_episodes+1): 110 | agent.reward = 0 111 | agent.timestep = 0 112 | 113 | state = env.reset() 114 | # collecting experience in environment 115 | last_state, done = agent.run_HAC(env, k_level-1, state, goal_state, False) 116 | 117 | agent.update(n_iter, batch_size, env) 118 | 119 | my_res.append(agent.reward) 120 | 121 | return np.mean(my_res) 122 | 123 | def objective(trial): 124 | 125 | params = { 126 | # 'max_episodes':trial.suggest_int("max_episodes", 1000, 1500), 127 | # 'random_seed': trial.suggest_int("random_seed", 1, 5), 128 | 'layer_dim':trial.suggest_int("layer_dim", 2, 16), 129 | 'n_layers':trial.suggest_int("n_layers", 2, 16), 130 | 'optimizer': trial.suggest_categorical("optimizer", ["Adam", 131 | "RMSprop", 132 | "SGD" 133 | ]), 134 | 'action_noise':trial.suggest_loguniform('action_noise', 0.01, 1), 135 | 'state_noise': trial.suggest_loguniform('state_noise', 0.01, 1), 136 | # 'state_noise_2': trial.suggest_loguniform('state_noise_2', 1000, 1e7), 137 | # 'H': trial.suggest_int("H", 3, 16), 138 | 'lamda': trial.suggest_uniform('lamda', 0.3, 1), 139 | 'gamma': trial.suggest_uniform('gamma', 0.95, 0.999), 140 | 'n_iter': trial.suggest_int('n_iter', 50, 350), 141 | 'batch_size': trial.suggest_int('batch_size', 50, 350), 142 | 'lr': trial.suggest_loguniform('lr', 1e-5, 1) 143 | 144 | } 145 | 146 | rev = train(params) 147 | 148 | return rev 149 | 150 | 151 | study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler()) 152 | study.optimize(objective, n_trials=100) 153 | 154 | 155 | best_trial = study.best_trial 156 | 157 | print() 158 | 159 | for key, value in best_trial.params.items(): 160 | print("{}: {}".format(key, value)) 161 | --------------------------------------------------------------------------------