├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── hpc_rll ├── __init__.py ├── origin │ ├── __init__.py │ ├── gae.py │ ├── padding.py │ ├── ppo.py │ ├── rnn.py │ ├── scatter_connection.py │ ├── td.py │ ├── upgo.py │ └── vtrace.py ├── rl_utils │ ├── __init__.py │ ├── gae.py │ ├── padding.py │ ├── ppo.py │ ├── td.py │ ├── upgo.py │ └── vtrace.py └── torch_utils │ ├── __init__.py │ └── network │ ├── __init__.py │ ├── rnn.py │ └── scatter_connection.py ├── include └── hpc │ └── rll │ └── cuda │ ├── basic_math.h │ ├── common.h │ ├── models │ ├── actor_critic_kernel.h │ └── entry.h │ ├── reduce.h │ ├── rl_utils │ ├── dist_nstep_td_kernel.h │ ├── entry.h │ ├── gae_kernel.h │ ├── iqn_nstep_td_error_kernel.h │ ├── padding_kernel.h │ ├── ppo_kernel.h │ ├── q_nstep_td_kernel.h │ ├── q_nstep_td_rescale_kernel.h │ ├── qrdqn_nstep_td_error_kernel.h │ ├── td_lambda_kernel.h │ ├── upgo_kernel.h │ └── vtrace_kernel.h │ ├── status.h │ └── torch_utils │ └── network │ ├── entry.h │ ├── lstm_kernel.h │ └── scatter_connection_kernel.h ├── setup.py ├── src ├── models │ ├── actor_critic.cu │ └── entry.cpp ├── rl_utils │ ├── dist_nstep_td.cu │ ├── entry.cpp │ ├── gae.cu │ ├── iqn_nstep_td_error.cu │ ├── padding.cu │ ├── ppo.cu │ ├── q_nstep_td.cu │ ├── q_nstep_td_rescale.cu │ ├── qrdqn_nstep_td_error.cu │ ├── td_lambda.cu │ ├── upgo.cu │ └── vtrace.cu └── torch_utils │ └── network │ ├── entry.cpp │ ├── lstm.cu │ └── scatter_connection.cu ├── tests ├── test_actor_critic.py ├── test_dntd.py ├── test_gae.py ├── test_iqn_nstep_td_error.py ├── test_lstm.py ├── test_padding.py ├── test_ppo.py ├── test_qntd.py ├── test_qntd_rescale.py ├── test_qrdqn_nstep_td_error.py ├── test_scatter.py ├── test_tdlambda.py ├── test_upgo.py ├── test_vtrace.py └── testbase.py └── triton_rl ├── README.md └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/.gitignore -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/Dockerfile -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/README.md -------------------------------------------------------------------------------- /hpc_rll/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /hpc_rll/origin/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /hpc_rll/origin/gae.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/hpc_rll/origin/gae.py -------------------------------------------------------------------------------- /hpc_rll/origin/padding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/hpc_rll/origin/padding.py -------------------------------------------------------------------------------- /hpc_rll/origin/ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/hpc_rll/origin/ppo.py -------------------------------------------------------------------------------- /hpc_rll/origin/rnn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/hpc_rll/origin/rnn.py -------------------------------------------------------------------------------- /hpc_rll/origin/scatter_connection.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/hpc_rll/origin/scatter_connection.py -------------------------------------------------------------------------------- /hpc_rll/origin/td.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/hpc_rll/origin/td.py -------------------------------------------------------------------------------- /hpc_rll/origin/upgo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/hpc_rll/origin/upgo.py -------------------------------------------------------------------------------- /hpc_rll/origin/vtrace.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/hpc_rll/origin/vtrace.py -------------------------------------------------------------------------------- /hpc_rll/rl_utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /hpc_rll/rl_utils/gae.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/hpc_rll/rl_utils/gae.py -------------------------------------------------------------------------------- /hpc_rll/rl_utils/padding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/hpc_rll/rl_utils/padding.py -------------------------------------------------------------------------------- /hpc_rll/rl_utils/ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/hpc_rll/rl_utils/ppo.py -------------------------------------------------------------------------------- /hpc_rll/rl_utils/td.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/hpc_rll/rl_utils/td.py -------------------------------------------------------------------------------- /hpc_rll/rl_utils/upgo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/hpc_rll/rl_utils/upgo.py -------------------------------------------------------------------------------- /hpc_rll/rl_utils/vtrace.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/hpc_rll/rl_utils/vtrace.py -------------------------------------------------------------------------------- /hpc_rll/torch_utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /hpc_rll/torch_utils/network/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /hpc_rll/torch_utils/network/rnn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/hpc_rll/torch_utils/network/rnn.py -------------------------------------------------------------------------------- /hpc_rll/torch_utils/network/scatter_connection.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/hpc_rll/torch_utils/network/scatter_connection.py -------------------------------------------------------------------------------- /include/hpc/rll/cuda/basic_math.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/include/hpc/rll/cuda/basic_math.h -------------------------------------------------------------------------------- /include/hpc/rll/cuda/common.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/include/hpc/rll/cuda/common.h -------------------------------------------------------------------------------- /include/hpc/rll/cuda/models/actor_critic_kernel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/include/hpc/rll/cuda/models/actor_critic_kernel.h -------------------------------------------------------------------------------- /include/hpc/rll/cuda/models/entry.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/include/hpc/rll/cuda/models/entry.h -------------------------------------------------------------------------------- /include/hpc/rll/cuda/reduce.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/include/hpc/rll/cuda/reduce.h -------------------------------------------------------------------------------- /include/hpc/rll/cuda/rl_utils/dist_nstep_td_kernel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/include/hpc/rll/cuda/rl_utils/dist_nstep_td_kernel.h -------------------------------------------------------------------------------- /include/hpc/rll/cuda/rl_utils/entry.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/include/hpc/rll/cuda/rl_utils/entry.h -------------------------------------------------------------------------------- /include/hpc/rll/cuda/rl_utils/gae_kernel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/include/hpc/rll/cuda/rl_utils/gae_kernel.h -------------------------------------------------------------------------------- /include/hpc/rll/cuda/rl_utils/iqn_nstep_td_error_kernel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/include/hpc/rll/cuda/rl_utils/iqn_nstep_td_error_kernel.h -------------------------------------------------------------------------------- /include/hpc/rll/cuda/rl_utils/padding_kernel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/include/hpc/rll/cuda/rl_utils/padding_kernel.h -------------------------------------------------------------------------------- /include/hpc/rll/cuda/rl_utils/ppo_kernel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/include/hpc/rll/cuda/rl_utils/ppo_kernel.h -------------------------------------------------------------------------------- /include/hpc/rll/cuda/rl_utils/q_nstep_td_kernel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/include/hpc/rll/cuda/rl_utils/q_nstep_td_kernel.h -------------------------------------------------------------------------------- /include/hpc/rll/cuda/rl_utils/q_nstep_td_rescale_kernel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/include/hpc/rll/cuda/rl_utils/q_nstep_td_rescale_kernel.h -------------------------------------------------------------------------------- /include/hpc/rll/cuda/rl_utils/qrdqn_nstep_td_error_kernel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/include/hpc/rll/cuda/rl_utils/qrdqn_nstep_td_error_kernel.h -------------------------------------------------------------------------------- /include/hpc/rll/cuda/rl_utils/td_lambda_kernel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/include/hpc/rll/cuda/rl_utils/td_lambda_kernel.h -------------------------------------------------------------------------------- /include/hpc/rll/cuda/rl_utils/upgo_kernel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/include/hpc/rll/cuda/rl_utils/upgo_kernel.h -------------------------------------------------------------------------------- /include/hpc/rll/cuda/rl_utils/vtrace_kernel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/include/hpc/rll/cuda/rl_utils/vtrace_kernel.h -------------------------------------------------------------------------------- /include/hpc/rll/cuda/status.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/include/hpc/rll/cuda/status.h -------------------------------------------------------------------------------- /include/hpc/rll/cuda/torch_utils/network/entry.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/include/hpc/rll/cuda/torch_utils/network/entry.h -------------------------------------------------------------------------------- /include/hpc/rll/cuda/torch_utils/network/lstm_kernel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/include/hpc/rll/cuda/torch_utils/network/lstm_kernel.h -------------------------------------------------------------------------------- /include/hpc/rll/cuda/torch_utils/network/scatter_connection_kernel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/include/hpc/rll/cuda/torch_utils/network/scatter_connection_kernel.h -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/setup.py -------------------------------------------------------------------------------- /src/models/actor_critic.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/src/models/actor_critic.cu -------------------------------------------------------------------------------- /src/models/entry.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/src/models/entry.cpp -------------------------------------------------------------------------------- /src/rl_utils/dist_nstep_td.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/src/rl_utils/dist_nstep_td.cu -------------------------------------------------------------------------------- /src/rl_utils/entry.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/src/rl_utils/entry.cpp -------------------------------------------------------------------------------- /src/rl_utils/gae.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/src/rl_utils/gae.cu -------------------------------------------------------------------------------- /src/rl_utils/iqn_nstep_td_error.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/src/rl_utils/iqn_nstep_td_error.cu -------------------------------------------------------------------------------- /src/rl_utils/padding.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/src/rl_utils/padding.cu -------------------------------------------------------------------------------- /src/rl_utils/ppo.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/src/rl_utils/ppo.cu -------------------------------------------------------------------------------- /src/rl_utils/q_nstep_td.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/src/rl_utils/q_nstep_td.cu -------------------------------------------------------------------------------- /src/rl_utils/q_nstep_td_rescale.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/src/rl_utils/q_nstep_td_rescale.cu -------------------------------------------------------------------------------- /src/rl_utils/qrdqn_nstep_td_error.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/src/rl_utils/qrdqn_nstep_td_error.cu -------------------------------------------------------------------------------- /src/rl_utils/td_lambda.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/src/rl_utils/td_lambda.cu -------------------------------------------------------------------------------- /src/rl_utils/upgo.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/src/rl_utils/upgo.cu -------------------------------------------------------------------------------- /src/rl_utils/vtrace.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/src/rl_utils/vtrace.cu -------------------------------------------------------------------------------- /src/torch_utils/network/entry.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/src/torch_utils/network/entry.cpp -------------------------------------------------------------------------------- /src/torch_utils/network/lstm.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/src/torch_utils/network/lstm.cu -------------------------------------------------------------------------------- /src/torch_utils/network/scatter_connection.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/src/torch_utils/network/scatter_connection.cu -------------------------------------------------------------------------------- /tests/test_actor_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/tests/test_actor_critic.py -------------------------------------------------------------------------------- /tests/test_dntd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/tests/test_dntd.py -------------------------------------------------------------------------------- /tests/test_gae.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/tests/test_gae.py -------------------------------------------------------------------------------- /tests/test_iqn_nstep_td_error.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/tests/test_iqn_nstep_td_error.py -------------------------------------------------------------------------------- /tests/test_lstm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/tests/test_lstm.py -------------------------------------------------------------------------------- /tests/test_padding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/tests/test_padding.py -------------------------------------------------------------------------------- /tests/test_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/tests/test_ppo.py -------------------------------------------------------------------------------- /tests/test_qntd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/tests/test_qntd.py -------------------------------------------------------------------------------- /tests/test_qntd_rescale.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/tests/test_qntd_rescale.py -------------------------------------------------------------------------------- /tests/test_qrdqn_nstep_td_error.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/tests/test_qrdqn_nstep_td_error.py -------------------------------------------------------------------------------- /tests/test_scatter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/tests/test_scatter.py -------------------------------------------------------------------------------- /tests/test_tdlambda.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/tests/test_tdlambda.py -------------------------------------------------------------------------------- /tests/test_upgo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/tests/test_upgo.py -------------------------------------------------------------------------------- /tests/test_vtrace.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/tests/test_vtrace.py -------------------------------------------------------------------------------- /tests/testbase.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendilab/DI-hpc/HEAD/tests/testbase.py -------------------------------------------------------------------------------- /triton_rl/README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /triton_rl/requirements.txt: -------------------------------------------------------------------------------- 1 | torch>2 2 | triton 3 | --------------------------------------------------------------------------------