├── .gitignore ├── LICENSE ├── README.md ├── config ├── __init__.py ├── envs │ ├── Gibson │ │ ├── test_env.py │ │ └── train_config.py │ ├── __init__.py │ ├── a1 │ │ └── a1_config.py │ └── base │ │ ├── base_config.py │ │ ├── base_task.py │ │ ├── legged_robot.py │ │ └── legged_robot_config.py ├── scripts │ ├── play.py │ └── train.py ├── tests │ └── test_env.py └── utils │ ├── __init__.py │ ├── helpers.py │ ├── logger.py │ ├── math.py │ ├── task_registry.py │ └── terrain.py └── rl ├── __init__.py ├── algorithms ├── __init__.py └── ppo.py ├── conf └── configs.py ├── env ├── __init__.py ├── my_env.py └── vec_env.py ├── modules ├── SMT.py ├── __init__.py ├── actor_critic.py ├── actor_critic_recurrent.py └── encoder.py ├── runners ├── __init__.py └── on_policy_runner.py ├── storage ├── __init__.py └── rollout_storage.py └── utils ├── __init__.py ├── log_utils.py ├── logging_engine.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 liwy 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Transformer-based-memory-for-visual-navigation 2 | PPO version code for RAL 2023 paper [Transformer Memory for Interactive Visual Navigation in Cluttered Environments](https://www.hrl.uni-bonn.de/teaching/ss23/master-seminar/transformer-memory-for-interactive-visual-navigation-in-cluttered-environments.pdf). 3 | 4 | Transformer belief state encoder for encoding history information 5 | plus PPO algorithm to learn the policy. 6 | 7 | The vector env is designed for IGibson and is also easy to reimplement for other environments such as Habitat. 8 | 9 | ## Training scritps 10 | ``` 11 | python /config/scripts/train.py 12 | ``` 13 | 14 | ## Citation 15 | 16 | ```bibtex 17 | @article{li2023transformer, 18 | title={Transformer Memory for Interactive Visual Navigation in Cluttered Environments}, 19 | author={Li, Weiyuan and Hong, Ruoxin and Shen, Jiwei and Yuan, Liang and Lu, Yue}, 20 | journal={IEEE Robotics and Automation Letters}, 21 | volume={8}, 22 | number={3}, 23 | pages={1731--1738}, 24 | year={2023}, 25 | publisher={IEEE} 26 | } 27 | ``` 28 | -------------------------------------------------------------------------------- /config/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | import os 32 | 33 | LEGGED_GYM_ROOT_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) 34 | LEGGED_GYM_ENVS_DIR = os.path.join(LEGGED_GYM_ROOT_DIR, 'legged_gym', 'envs') -------------------------------------------------------------------------------- /config/envs/Gibson/test_env.py: -------------------------------------------------------------------------------- 1 | import gibson2 2 | from gibson2.envs.igibson_env import iGibsonEnv 3 | from gibson2.envs.parallel_env import ParallelNavEnv 4 | import atexit 5 | import multiprocessing 6 | import sys 7 | import traceback 8 | import numpy as np 9 | import os 10 | from gibson2.utils.utils import parse_config 11 | import logging 12 | logging.getLogger().setLevel(logging.WARNING) 13 | 14 | 15 | if __name__ == "__main__": 16 | config_file_name = '/home/lwy/IGibson2021/iGibson/gibson2/examples/configs/locobot_interactive_nav.yaml' 17 | env_config = parse_config(config_file_name) 18 | GPU_ID = [0,0,0,1,1,1,2,2] 19 | Env = ['Beechwood_1_int','Benevolence_0_int','Ihlen_0_int','Ihlen_1_int','Merom_0_int','Pomaria_0_int','Rs_int','Wainscott_1_int'] 20 | Training_Env = Env[:5] 21 | Testing_Env = Env[-3:] 22 | core_id = 0 23 | num_env = 2 24 | def load_env(): 25 | global core_id 26 | core_id = core_id + 1 27 | return iGibsonEnv(config_file = env_config, 28 | scene_id = Training_Env[core_id], 29 | mode = 'headless', 30 | action_timestep = 1.0 / 10.0, 31 | physics_timestep = 1.0 / 40.0, 32 | device_idx = GPU_ID[core_id], 33 | automatic_reset = True) 34 | 35 | parallel_env = ParallelNavEnv([load_env] * num_env, blocking=False) 36 | 37 | 38 | from time import time 39 | for episode in range(10): 40 | start = time() 41 | print("episode {}".format(episode)) 42 | parallel_env.reset() 43 | for i in range(600): 44 | res = parallel_env.step([[0.5, 0.5] for _ in range(2)]) 45 | state, reward, done, _ = res[0] 46 | if done: ## 设置了自动重启,if done,从info['last_observation'] 中取最后的数据,此时返回的state为reset后获得的状态 47 | print("Episode finished after {} timesteps".format(i + 1)) 48 | # break 49 | print("{} elapsed".format(time() - start)) -------------------------------------------------------------------------------- /config/envs/Gibson/train_config.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | from config.envs.base.base_config import BaseConfig 32 | 33 | class GibsonRobotCfg(BaseConfig): 34 | class env: 35 | num_envs = 4096 36 | num_observations = 235 37 | num_privileged_obs = None # if not None a priviledge_obs_buf will be returned by step() (critic obs for assymetric training). None is returned otherwise 38 | num_actions = 12 39 | env_spacing = 3. # not used with heightfields/trimeshes 40 | send_timeouts = True # send time out information to the algorithm 41 | episode_length_s = 20 # episode length in seconds 42 | 43 | class rewards: 44 | class scales: 45 | termination = -0.0 46 | tracking_lin_vel = 1.0 47 | tracking_ang_vel = 0.5 48 | lin_vel_z = -2.0 49 | ang_vel_xy = -0.05 50 | orientation = -0. 51 | torques = -0.00001 52 | dof_vel = -0. 53 | dof_acc = -2.5e-7 54 | base_height = -0. 55 | feet_air_time = 1.0 56 | collision = -1. 57 | feet_stumble = -0.0 58 | action_rate = -0.01 59 | stand_still = -0. 60 | 61 | only_positive_rewards = True # if true negative total rewards are clipped at zero (avoids early termination problems) 62 | tracking_sigma = 0.25 # tracking reward = exp(-error^2/sigma) 63 | soft_dof_pos_limit = 1. # percentage of urdf limits, values above this limit are penalized 64 | soft_dof_vel_limit = 1. 65 | soft_torque_limit = 1. 66 | base_height_target = 1. 67 | max_contact_force = 100. # forces above this value are penalized 68 | 69 | class normalization: 70 | class obs_scales: 71 | lin_vel = 2.0 72 | ang_vel = 0.25 73 | dof_pos = 1.0 74 | dof_vel = 0.05 75 | height_measurements = 5.0 76 | clip_observations = 100. 77 | clip_actions = 100. 78 | 79 | class noise: 80 | add_noise = True 81 | noise_level = 1.0 # scales other values 82 | class noise_scales: 83 | dof_pos = 0.01 84 | dof_vel = 1.5 85 | lin_vel = 0.1 86 | ang_vel = 0.2 87 | gravity = 0.05 88 | height_measurements = 0.1 89 | 90 | 91 | class GibsonCfgPPO(BaseConfig): 92 | seed = 1 93 | runner_class_name = 'OnPolicyRunner' 94 | class policy: 95 | init_noise_std = 0.0 96 | actor_hidden_dims = [256, 128] 97 | critic_hidden_dims = [256, 128] 98 | activation = 'tanh' # can be elu, relu, selu, crelu, lrelu, tanh, sigmoid 99 | # only for 'ActorCriticRecurrent': 100 | # rnn_type = 'lstm' 101 | # rnn_hidden_size = 512 102 | # rnn_num_layers = 1 103 | 104 | class algorithm: 105 | # training params 106 | value_loss_coef = 0.5 107 | use_clipped_value_loss = True 108 | clip_param = 0.1 109 | entropy_coef = 0.02 110 | num_learning_epochs = 5 111 | num_mini_batches = 5 # mini batch size = num_envs*nsteps / nminibatches 112 | learning_rate = 2.5e-4 #5.e-4 113 | schedule = 'adaptive' # could be adaptive, fixed 114 | gamma = 0.99 115 | lam = 0.95 116 | desired_kl = 0.01 117 | max_grad_norm = 1. 118 | 119 | class runner: 120 | policy_class_name = 'ActorCritic' 121 | algorithm_class_name = 'PPO' 122 | num_steps_per_env = 128 # per iteration 123 | max_iterations = 1e6 # number of policy updates 124 | 125 | # logging 126 | save_interval = 500 # check for potential saves every this many iterations 127 | experiment_name = 'igibson_all' 128 | run_name = '' 129 | # load and resume 130 | resume = False 131 | load_run = -1 # -1 = last run 132 | checkpoint = -1 # -1 = last saved model 133 | resume_path = None # updated from load_run and chkpt -------------------------------------------------------------------------------- /config/envs/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | 32 | # from .base.legged_robot import LeggedRobot 33 | # from .a1.a1_config import A1RoughCfg, A1RoughCfgPPO 34 | # from .Gibson.config import GibsonRobotCfg, GibsonCfgPPO 35 | 36 | # import os 37 | 38 | # from igibson.utils.task_registry import task_registry 39 | 40 | # # task_registry.register( "a1", LeggedRobot, A1RoughCfg(), A1RoughCfgPPO() ) 41 | # task_registry.register( "Gibson", LeggedRobot, GibsonRobotCfg(), GibsonCfgPPO() ) 42 | -------------------------------------------------------------------------------- /config/envs/a1/a1_config.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | from legged_gym.envs.base.legged_robot_config import LeggedRobotCfg, LeggedRobotCfgPPO 32 | 33 | class A1RoughCfg( LeggedRobotCfg ): 34 | class init_state( LeggedRobotCfg.init_state ): 35 | pos = [0.0, 0.0, 0.42] # x,y,z [m] 36 | default_joint_angles = { # = target angles [rad] when action = 0.0 37 | 'FL_hip_joint': 0.1, # [rad] 38 | 'RL_hip_joint': 0.1, # [rad] 39 | 'FR_hip_joint': -0.1 , # [rad] 40 | 'RR_hip_joint': -0.1, # [rad] 41 | 42 | 'FL_thigh_joint': 0.8, # [rad] 43 | 'RL_thigh_joint': 1., # [rad] 44 | 'FR_thigh_joint': 0.8, # [rad] 45 | 'RR_thigh_joint': 1., # [rad] 46 | 47 | 'FL_calf_joint': -1.5, # [rad] 48 | 'RL_calf_joint': -1.5, # [rad] 49 | 'FR_calf_joint': -1.5, # [rad] 50 | 'RR_calf_joint': -1.5, # [rad] 51 | } 52 | 53 | class control( LeggedRobotCfg.control ): 54 | # PD Drive parameters: 55 | control_type = 'P' 56 | stiffness = {'joint': 20.} # [N*m/rad] 57 | damping = {'joint': 0.5} # [N*m*s/rad] 58 | # action scale: target angle = actionScale * action + defaultAngle 59 | action_scale = 0.25 60 | # decimation: Number of control action updates @ sim DT per policy DT 61 | decimation = 4 62 | 63 | class asset( LeggedRobotCfg.asset ): 64 | file = '{LEGGED_GYM_ROOT_DIR}/resources/robots/a1/urdf/a1.urdf' 65 | name = "a1" 66 | foot_name = "foot" 67 | penalize_contacts_on = ["thigh", "calf"] 68 | terminate_after_contacts_on = ["base"] 69 | self_collisions = 1 # 1 to disable, 0 to enable...bitwise filter 70 | 71 | class rewards( LeggedRobotCfg.rewards ): 72 | soft_dof_pos_limit = 0.9 73 | base_height_target = 0.25 74 | class scales( LeggedRobotCfg.rewards.scales ): 75 | torques = -0.0002 76 | dof_pos_limits = -10.0 77 | 78 | class A1RoughCfgPPO( LeggedRobotCfgPPO ): 79 | class algorithm( LeggedRobotCfgPPO.algorithm ): 80 | entropy_coef = 0.01 81 | class runner( LeggedRobotCfgPPO.runner ): 82 | run_name = '' 83 | experiment_name = 'rough_a1' 84 | 85 | -------------------------------------------------------------------------------- /config/envs/base/base_config.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | import inspect 32 | 33 | class BaseConfig: 34 | def __init__(self) -> None: 35 | """ Initializes all member classes recursively. Ignores all namse starting with '__' (buit-in methods).""" 36 | self.init_member_classes(self) 37 | 38 | @staticmethod 39 | def init_member_classes(obj): 40 | # iterate over all attributes names 41 | for key in dir(obj): 42 | # disregard builtin attributes 43 | # if key.startswith("__"): 44 | if key=="__class__": 45 | continue 46 | # get the corresponding attribute object 47 | var = getattr(obj, key) 48 | # check if it the attribute is a class 49 | if inspect.isclass(var): 50 | # instantate the class 51 | i_var = var() 52 | # set the attribute to the instance instead of the type 53 | setattr(obj, key, i_var) 54 | # recursively init members of the attribute 55 | BaseConfig.init_member_classes(i_var) -------------------------------------------------------------------------------- /config/envs/base/base_task.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | import sys 32 | from isaacgym import gymapi 33 | from isaacgym import gymutil 34 | import numpy as np 35 | import torch 36 | 37 | # Base class for RL tasks 38 | class BaseTask(): 39 | 40 | def __init__(self, cfg, sim_params, physics_engine, sim_device, headless): 41 | self.gym = gymapi.acquire_gym() 42 | 43 | self.sim_params = sim_params 44 | self.physics_engine = physics_engine 45 | self.sim_device = sim_device 46 | sim_device_type, self.sim_device_id = gymutil.parse_device_str(self.sim_device) 47 | self.headless = headless 48 | 49 | # env device is GPU only if sim is on GPU and use_gpu_pipeline=True, otherwise returned tensors are copied to CPU by physX. 50 | if sim_device_type=='cuda' and sim_params.use_gpu_pipeline: 51 | self.device = self.sim_device 52 | else: 53 | self.device = 'cpu' 54 | 55 | # graphics device for rendering, -1 for no rendering 56 | self.graphics_device_id = self.sim_device_id 57 | if self.headless == True: 58 | self.graphics_device_id = -1 59 | 60 | self.num_envs = cfg.env.num_envs 61 | self.num_obs = cfg.env.num_observations 62 | self.num_privileged_obs = cfg.env.num_privileged_obs 63 | self.num_actions = cfg.env.num_actions 64 | 65 | # optimization flags for pytorch JIT 66 | torch._C._jit_set_profiling_mode(False) 67 | torch._C._jit_set_profiling_executor(False) 68 | 69 | # allocate buffers 70 | self.obs_buf = torch.zeros(self.num_envs, self.num_obs, device=self.device, dtype=torch.float) 71 | self.rew_buf = torch.zeros(self.num_envs, device=self.device, dtype=torch.float) 72 | self.reset_buf = torch.ones(self.num_envs, device=self.device, dtype=torch.long) 73 | self.episode_length_buf = torch.zeros(self.num_envs, device=self.device, dtype=torch.long) 74 | self.time_out_buf = torch.zeros(self.num_envs, device=self.device, dtype=torch.bool) 75 | if self.num_privileged_obs is not None: 76 | self.privileged_obs_buf = torch.zeros(self.num_envs, self.num_privileged_obs, device=self.device, dtype=torch.float) 77 | else: 78 | self.privileged_obs_buf = None 79 | # self.num_privileged_obs = self.num_obs 80 | 81 | self.extras = {} 82 | 83 | # create envs, sim and viewer 84 | self.create_sim() 85 | self.gym.prepare_sim(self.sim) 86 | 87 | # todo: read from config 88 | self.enable_viewer_sync = True 89 | self.viewer = None 90 | 91 | # if running with a viewer, set up keyboard shortcuts and camera 92 | if self.headless == False: 93 | # subscribe to keyboard shortcuts 94 | self.viewer = self.gym.create_viewer( 95 | self.sim, gymapi.CameraProperties()) 96 | self.gym.subscribe_viewer_keyboard_event( 97 | self.viewer, gymapi.KEY_ESCAPE, "QUIT") 98 | self.gym.subscribe_viewer_keyboard_event( 99 | self.viewer, gymapi.KEY_V, "toggle_viewer_sync") 100 | 101 | def get_observations(self): 102 | return self.obs_buf 103 | 104 | def get_privileged_observations(self): 105 | return self.privileged_obs_buf 106 | 107 | def reset_idx(self, env_ids): 108 | """Reset selected robots""" 109 | raise NotImplementedError 110 | 111 | def reset(self): 112 | """ Reset all robots""" 113 | self.reset_idx(torch.arange(self.num_envs, device=self.device)) 114 | obs, privileged_obs, _, _, _ = self.step(torch.zeros(self.num_envs, self.num_actions, device=self.device, requires_grad=False)) 115 | return obs, privileged_obs 116 | 117 | def step(self, actions): 118 | raise NotImplementedError 119 | 120 | def render(self, sync_frame_time=True): 121 | if self.viewer: 122 | # check for window closed 123 | if self.gym.query_viewer_has_closed(self.viewer): 124 | sys.exit() 125 | 126 | # check for keyboard events 127 | for evt in self.gym.query_viewer_action_events(self.viewer): 128 | if evt.action == "QUIT" and evt.value > 0: 129 | sys.exit() 130 | elif evt.action == "toggle_viewer_sync" and evt.value > 0: 131 | self.enable_viewer_sync = not self.enable_viewer_sync 132 | 133 | # fetch results 134 | if self.device != 'cpu': 135 | self.gym.fetch_results(self.sim, True) 136 | 137 | # step graphics 138 | if self.enable_viewer_sync: 139 | self.gym.step_graphics(self.sim) 140 | self.gym.draw_viewer(self.viewer, self.sim, True) 141 | if sync_frame_time: 142 | self.gym.sync_frame_time(self.sim) 143 | else: 144 | self.gym.poll_viewer_events(self.viewer) -------------------------------------------------------------------------------- /config/envs/base/legged_robot_config.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | from .base_config import BaseConfig 32 | 33 | class LeggedRobotCfg(BaseConfig): 34 | class env: 35 | num_envs = 4096 36 | num_observations = 235 37 | num_privileged_obs = None # if not None a priviledge_obs_buf will be returned by step() (critic obs for assymetric training). None is returned otherwise 38 | num_actions = 12 39 | env_spacing = 3. # not used with heightfields/trimeshes 40 | send_timeouts = True # send time out information to the algorithm 41 | episode_length_s = 20 # episode length in seconds 42 | 43 | class terrain: 44 | mesh_type = 'trimesh' # "heightfield" # none, plane, heightfield or trimesh 45 | horizontal_scale = 0.1 # [m] 46 | vertical_scale = 0.005 # [m] 47 | border_size = 25 # [m] 48 | curriculum = True 49 | static_friction = 1.0 50 | dynamic_friction = 1.0 51 | restitution = 0. 52 | # rough terrain only: 53 | measure_heights = True 54 | measured_points_x = [-0.8, -0.7, -0.6, -0.5, -0.4, -0.3, -0.2, -0.1, 0., 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8] # 1mx1.6m rectangle (without center line) 55 | measured_points_y = [-0.5, -0.4, -0.3, -0.2, -0.1, 0., 0.1, 0.2, 0.3, 0.4, 0.5] 56 | selected = False # select a unique terrain type and pass all arguments 57 | terrain_kwargs = None # Dict of arguments for selected terrain 58 | max_init_terrain_level = 5 # starting curriculum state 59 | terrain_length = 8. 60 | terrain_width = 8. 61 | num_rows= 10 # number of terrain rows (levels) 62 | num_cols = 20 # number of terrain cols (types) 63 | # terrain types: [smooth slope, rough slope, stairs up, stairs down, discrete] 64 | terrain_proportions = [0.1, 0.1, 0.35, 0.25, 0.2] 65 | # trimesh only: 66 | slope_treshold = 0.75 # slopes above this threshold will be corrected to vertical surfaces 67 | 68 | class commands: 69 | curriculum = False 70 | max_curriculum = 1. 71 | num_commands = 4 # default: lin_vel_x, lin_vel_y, ang_vel_yaw, heading (in heading mode ang_vel_yaw is recomputed from heading error) 72 | resampling_time = 10. # time before command are changed[s] 73 | heading_command = True # if true: compute ang vel command from heading error 74 | class ranges: 75 | lin_vel_x = [-1.0, 1.0] # min max [m/s] 76 | lin_vel_y = [-1.0, 1.0] # min max [m/s] 77 | ang_vel_yaw = [-1, 1] # min max [rad/s] 78 | heading = [-3.14, 3.14] 79 | 80 | class init_state: 81 | pos = [0.0, 0.0, 1.] # x,y,z [m] 82 | rot = [0.0, 0.0, 0.0, 1.0] # x,y,z,w [quat] 83 | lin_vel = [0.0, 0.0, 0.0] # x,y,z [m/s] 84 | ang_vel = [0.0, 0.0, 0.0] # x,y,z [rad/s] 85 | default_joint_angles = { # target angles when action = 0.0 86 | "joint_a": 0., 87 | "joint_b": 0.} 88 | 89 | class control: 90 | control_type = 'P' # P: position, V: velocity, T: torques 91 | # PD Drive parameters: 92 | stiffness = {'joint_a': 10.0, 'joint_b': 15.} # [N*m/rad] 93 | damping = {'joint_a': 1.0, 'joint_b': 1.5} # [N*m*s/rad] 94 | # action scale: target angle = actionScale * action + defaultAngle 95 | action_scale = 0.5 96 | # decimation: Number of control action updates @ sim DT per policy DT 97 | decimation = 4 98 | 99 | class asset: 100 | file = "" 101 | name = "legged_robot" # actor name 102 | foot_name = "None" # name of the feet bodies, used to index body state and contact force tensors 103 | penalize_contacts_on = [] 104 | terminate_after_contacts_on = [] 105 | disable_gravity = False 106 | collapse_fixed_joints = True # merge bodies connected by fixed joints. Specific fixed joints can be kept by adding " <... dont_collapse="true"> 107 | fix_base_link = False # fixe the base of the robot 108 | default_dof_drive_mode = 3 # see GymDofDriveModeFlags (0 is none, 1 is pos tgt, 2 is vel tgt, 3 effort) 109 | self_collisions = 0 # 1 to disable, 0 to enable...bitwise filter 110 | replace_cylinder_with_capsule = True # replace collision cylinders with capsules, leads to faster/more stable simulation 111 | flip_visual_attachments = True # Some .obj meshes must be flipped from y-up to z-up 112 | 113 | density = 0.001 114 | angular_damping = 0. 115 | linear_damping = 0. 116 | max_angular_velocity = 1000. 117 | max_linear_velocity = 1000. 118 | armature = 0. 119 | thickness = 0.01 120 | 121 | class domain_rand: 122 | randomize_friction = True 123 | friction_range = [0.5, 1.25] 124 | randomize_base_mass = False 125 | added_mass_range = [-1., 1.] 126 | push_robots = True 127 | push_interval_s = 15 128 | max_push_vel_xy = 1. 129 | 130 | class rewards: 131 | class scales: 132 | termination = -0.0 133 | tracking_lin_vel = 1.0 134 | tracking_ang_vel = 0.5 135 | lin_vel_z = -2.0 136 | ang_vel_xy = -0.05 137 | orientation = -0. 138 | torques = -0.00001 139 | dof_vel = -0. 140 | dof_acc = -2.5e-7 141 | base_height = -0. 142 | feet_air_time = 1.0 143 | collision = -1. 144 | feet_stumble = -0.0 145 | action_rate = -0.01 146 | stand_still = -0. 147 | 148 | only_positive_rewards = True # if true negative total rewards are clipped at zero (avoids early termination problems) 149 | tracking_sigma = 0.25 # tracking reward = exp(-error^2/sigma) 150 | soft_dof_pos_limit = 1. # percentage of urdf limits, values above this limit are penalized 151 | soft_dof_vel_limit = 1. 152 | soft_torque_limit = 1. 153 | base_height_target = 1. 154 | max_contact_force = 100. # forces above this value are penalized 155 | 156 | class normalization: 157 | class obs_scales: 158 | lin_vel = 2.0 159 | ang_vel = 0.25 160 | dof_pos = 1.0 161 | dof_vel = 0.05 162 | height_measurements = 5.0 163 | clip_observations = 100. 164 | clip_actions = 100. 165 | 166 | class noise: 167 | add_noise = True 168 | noise_level = 1.0 # scales other values 169 | class noise_scales: 170 | dof_pos = 0.01 171 | dof_vel = 1.5 172 | lin_vel = 0.1 173 | ang_vel = 0.2 174 | gravity = 0.05 175 | height_measurements = 0.1 176 | 177 | # viewer camera: 178 | class viewer: 179 | ref_env = 0 180 | pos = [10, 0, 6] # [m] 181 | lookat = [11., 5, 3.] # [m] 182 | 183 | class sim: 184 | dt = 0.005 185 | substeps = 1 186 | gravity = [0., 0. ,-9.81] # [m/s^2] 187 | up_axis = 1 # 0 is y, 1 is z 188 | 189 | class physx: 190 | num_threads = 10 191 | solver_type = 1 # 0: pgs, 1: tgs 192 | num_position_iterations = 4 193 | num_velocity_iterations = 0 194 | contact_offset = 0.01 # [m] 195 | rest_offset = 0.0 # [m] 196 | bounce_threshold_velocity = 0.5 #0.5 [m/s] 197 | max_depenetration_velocity = 1.0 198 | max_gpu_contact_pairs = 2**23 #2**24 -> needed for 8000 envs and more 199 | default_buffer_size_multiplier = 5 200 | contact_collection = 2 # 0: never, 1: last sub-step, 2: all sub-steps (default=2) 201 | 202 | class LeggedRobotCfgPPO(BaseConfig): 203 | seed = 1 204 | runner_class_name = 'OnPolicyRunner' 205 | class policy: 206 | init_noise_std = 1.0 207 | actor_hidden_dims = [512, 256, 128] 208 | critic_hidden_dims = [512, 256, 128] 209 | activation = 'elu' # can be elu, relu, selu, crelu, lrelu, tanh, sigmoid 210 | # only for 'ActorCriticRecurrent': 211 | # rnn_type = 'lstm' 212 | # rnn_hidden_size = 512 213 | # rnn_num_layers = 1 214 | 215 | class algorithm: 216 | # training params 217 | value_loss_coef = 1.0 218 | use_clipped_value_loss = True 219 | clip_param = 0.2 220 | entropy_coef = 0.01 221 | num_learning_epochs = 5 222 | num_mini_batches = 4 # mini batch size = num_envs*nsteps / nminibatches 223 | learning_rate = 1.e-3 #5.e-4 224 | schedule = 'adaptive' # could be adaptive, fixed 225 | gamma = 0.99 226 | lam = 0.95 227 | desired_kl = 0.01 228 | max_grad_norm = 1. 229 | 230 | class runner: 231 | policy_class_name = 'ActorCritic' 232 | algorithm_class_name = 'PPO' 233 | num_steps_per_env = 24 # per iteration 234 | max_iterations = 1500 # number of policy updates 235 | 236 | # logging 237 | save_interval = 50 # check for potential saves every this many iterations 238 | experiment_name = 'test' 239 | run_name = '' 240 | # load and resume 241 | resume = False 242 | load_run = -1 # -1 = last run 243 | checkpoint = -1 # -1 = last saved model 244 | resume_path = None # updated from load_run and chkpt -------------------------------------------------------------------------------- /config/scripts/play.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | import os 32 | import sys 33 | sys.path.append('/Extra/lwy/gibson/graduate/') 34 | from rl.env.my_env import VecGibson 35 | import numpy as np 36 | import torch 37 | 38 | 39 | import argparse 40 | 41 | # from simple_agent import RandomAgent, ForwardOnlyAgent 42 | # from rl_agent import SACAgent 43 | from gibson2.utils.utils import parse_config 44 | from gibson2.challenge.challenge import Challenge 45 | from gibson2.envs.igibson_env import iGibsonEnv 46 | import os 47 | import torch 48 | import datetime 49 | from rl.utils.log_utils import ini_logger 50 | from rl.utils.logging_engine import logger 51 | from rl.modules import ActorCritic, ActorCriticRecurrent 52 | from config.utils import task_registry 53 | from rl.env.my_env import VecGibson 54 | from config.envs.Gibson.train_config import GibsonCfgPPO 55 | 56 | import datetime 57 | def main(): 58 | 59 | log_file_name = f"test_{datetime.datetime.now().strftime('%y%m%d%H%M%S')}.log" 60 | ini_logger(log_file_name, level='info') 61 | model_path = '/Extra/lwy/gibson/graduate/logs/igibson_all/Feb17_02-43-54_/model_3500.pt' 62 | # model_path = './transformer_waypoints/11_22/model/SAC_smtI_32_waypoints_11_22_std800' 63 | logger.info(f"Start to run {model_path}") 64 | env = VecGibson() 65 | 66 | # load policy 67 | GibsonCfgPPO.runner.resume = True 68 | GibsonCfgPPO.runner.log_root = '/Extra/lwy/gibson/graduate/logs/igibson_all/' 69 | GibsonCfgPPO.runner.load_run = 'Feb17_02-43-54_' 70 | GibsonCfgPPO.runner.checkpoint = '3500' 71 | ppo_runner, train_cfg = task_registry.make_alg_runner(env=env, name='gibson', train_cfg=GibsonCfgPPO) 72 | policy = ppo_runner.get_inference_policy(device='cuda') 73 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" 74 | 75 | test(policy, ppo_runner, 0) 76 | 77 | def test(agent, runner, gpu): 78 | config_file = '/home/lwy/IGibson2021/iGibson/gibson2/examples/configs/locobot_interactive_nav.yaml' 79 | split = 'test' ## train 80 | episode_dir = '/home/lwy/IGibson2021/iGibson/gibson2/data/episodes_data/interactive_nav' 81 | 82 | eval_episodes_per_scene = os.environ.get( 83 | 'EVAL_EPISODES_PER_SCENE', 100) 84 | 85 | env_config = parse_config(config_file) 86 | task = env_config['task'] 87 | 88 | logger.info(f'{task},{split}') 89 | if task == 'interactive_nav_random': 90 | metrics = {key: 0.0 for key in [ 91 | 'success', 'spl', 'effort_efficiency', 'ins', 'episode_return']} 92 | 93 | elif task == 'social_nav_random': 94 | metrics = {key: 0.0 for key in [ 95 | 'success', 'stl', 'psc', 'episode_return']} 96 | else: 97 | assert False, 'unknown task: {}'.format(task) 98 | 99 | num_episodes_per_scene = eval_episodes_per_scene 100 | split_dir = os.path.join(episode_dir, split) 101 | assert os.path.isdir(split_dir) 102 | num_scenes = len(os.listdir(split_dir)) 103 | assert num_scenes > 0 104 | total_num_episodes = num_scenes * num_episodes_per_scene 105 | 106 | idx = 0 107 | for json_file in os.listdir(split_dir): 108 | scene_id = json_file.split('.')[0] 109 | json_file = os.path.join(split_dir, json_file) 110 | logger.info(json_file) 111 | env_config['scene_id'] = scene_id 112 | env_config['load_scene_episode_config'] = True 113 | env_config['scene_episode_config_name'] = json_file 114 | env = iGibsonEnv(config_file=env_config, 115 | mode='headless', 116 | action_timestep=1.0 / 10.0, 117 | physics_timestep=1.0 / 40.0, 118 | device_idx=gpu) 119 | scene_metrics = {key: 0.0 for key in [ 120 | 'success', 'spl', 'effort_efficiency', 'ins', 'episode_return']} 121 | for _ in range(num_episodes_per_scene): 122 | idx += 1 123 | state = env.reset() 124 | # memory = torch.FloatTensor([]).cuda() 125 | # belief_state, memory = agent.cal_belief_state(state, memory) 126 | episode_return = 0.0 127 | while True: 128 | # action = env.action_space.sample() 129 | action = runner.alg.act([state], [state]) 130 | state, reward, done, info = env.step(action[0]) 131 | # belief_state, memory = agent.cal_belief_state(state, memory) 132 | 133 | episode_return += reward 134 | if done: 135 | logger.info(f'Episode: {idx}/{total_num_episodes}, return :{episode_return}') 136 | break 137 | 138 | metrics['episode_return'] += episode_return 139 | scene_metrics['episode_return'] += episode_return 140 | for key in metrics: 141 | if key in info: 142 | metrics[key] += info[key] 143 | scene_metrics[key] += info[key] 144 | 145 | for key in metrics: 146 | scene_metrics[key] /= num_episodes_per_scene 147 | logger.info('Avg {}: {}'.format(key, scene_metrics[key])) 148 | 149 | env.close() 150 | 151 | for key in metrics: 152 | metrics[key] /= total_num_episodes 153 | logger.info('Avg {}: {}'.format(key, metrics[key])) 154 | return metrics['episode_return'] 155 | 156 | if __name__ == "__main__": 157 | main() 158 | -------------------------------------------------------------------------------- /config/scripts/train.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | import numpy as np 32 | import os 33 | from datetime import datetime 34 | import sys 35 | sys.path.append('/Extra/lwy/gibson/graduate/') 36 | 37 | from rl.env.my_env import VecGibson 38 | from config.utils import task_registry 39 | from config.envs.Gibson.train_config import GibsonCfgPPO 40 | import torch 41 | 42 | def train(): 43 | env = VecGibson() 44 | os.environ["CUDA_VISIBLE_DEVICES"] = "3" 45 | ppo_runner, train_cfg = task_registry.make_alg_runner(env=env, name='igibson', train_cfg=GibsonCfgPPO) 46 | ppo_runner.learn(num_learning_iterations=train_cfg.runner.max_iterations, init_at_random_ep_len=False) 47 | 48 | if __name__ == '__main__': 49 | train() 50 | -------------------------------------------------------------------------------- /config/tests/test_env.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | import numpy as np 32 | import os 33 | from datetime import datetime 34 | 35 | import isaacgym 36 | from legged_gym.envs import * 37 | from legged_gym.utils import get_args, export_policy_as_jit, task_registry, Logger 38 | 39 | import torch 40 | 41 | 42 | def test_env(args): 43 | env_cfg, train_cfg = task_registry.get_cfgs(name=args.task) 44 | # override some parameters for testing 45 | env_cfg.env.num_envs = min(env_cfg.env.num_envs, 10) 46 | 47 | # prepare environment 48 | env, _ = task_registry.make_env(name=args.task, args=args, env_cfg=env_cfg) 49 | for i in range(int(10*env.max_episode_length)): 50 | actions = 0.*torch.ones(env.num_envs, env.num_actions, device=env.device) 51 | obs, _, rew, done, info = env.step(actions) 52 | print("Done") 53 | 54 | if __name__ == '__main__': 55 | args = get_args() 56 | test_env(args) 57 | -------------------------------------------------------------------------------- /config/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | from .helpers import class_to_dict, get_load_path, get_args, export_policy_as_jit, set_seed, update_class_from_dict 32 | from .task_registry import task_registry 33 | from .logger import Logger 34 | from .math import * 35 | # from .terrain import Terrain -------------------------------------------------------------------------------- /config/utils/helpers.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | import os 32 | import copy 33 | import torch 34 | import numpy as np 35 | import random 36 | # from isaacgym import gymapi 37 | # from isaacgym import gymutil 38 | 39 | # from rl import LEGGED_GYM_ROOT_DIR, LEGGED_GYM_ENVS_DIR 40 | 41 | def class_to_dict(obj) -> dict: 42 | if not hasattr(obj,"__dict__"): 43 | return obj 44 | result = {} 45 | for key in dir(obj): 46 | if key.startswith("_"): 47 | continue 48 | element = [] 49 | val = getattr(obj, key) 50 | if isinstance(val, list): 51 | for item in val: 52 | element.append(class_to_dict(item)) 53 | else: 54 | element = class_to_dict(val) 55 | result[key] = element 56 | return result 57 | 58 | def update_class_from_dict(obj, dict): 59 | for key, val in dict.items(): 60 | attr = getattr(obj, key, None) 61 | if isinstance(attr, type): 62 | update_class_from_dict(attr, val) 63 | else: 64 | setattr(obj, key, val) 65 | return 66 | 67 | def set_seed(seed): 68 | if seed == -1: 69 | seed = np.random.randint(0, 10000) 70 | print("Setting seed: {}".format(seed)) 71 | 72 | random.seed(seed) 73 | np.random.seed(seed) 74 | torch.manual_seed(seed) 75 | os.environ['PYTHONHASHSEED'] = str(seed) 76 | torch.cuda.manual_seed(seed) 77 | torch.cuda.manual_seed_all(seed) 78 | 79 | def parse_sim_params(args, cfg): 80 | # code from Isaac Gym Preview 2 81 | # initialize sim params 82 | sim_params = gymapi.SimParams() 83 | 84 | # set some values from args 85 | if args.physics_engine == gymapi.SIM_FLEX: 86 | if args.device != "cpu": 87 | print("WARNING: Using Flex with GPU instead of PHYSX!") 88 | elif args.physics_engine == gymapi.SIM_PHYSX: 89 | sim_params.physx.use_gpu = args.use_gpu 90 | sim_params.physx.num_subscenes = args.subscenes 91 | sim_params.use_gpu_pipeline = args.use_gpu_pipeline 92 | 93 | # if sim options are provided in cfg, parse them and update/override above: 94 | if "sim" in cfg: 95 | gymutil.parse_sim_config(cfg["sim"], sim_params) 96 | 97 | # Override num_threads if passed on the command line 98 | if args.physics_engine == gymapi.SIM_PHYSX and args.num_threads > 0: 99 | sim_params.physx.num_threads = args.num_threads 100 | 101 | return sim_params 102 | 103 | def get_load_path(root, load_run=-1, checkpoint=-1): 104 | try: 105 | runs = os.listdir(root) 106 | #TODO sort by date to handle change of month 107 | runs.sort() 108 | if 'exported' in runs: runs.remove('exported') 109 | last_run = os.path.join(root, runs[-1]) 110 | except: 111 | raise ValueError("No runs in this directory: " + root) 112 | if load_run==-1: 113 | load_run = last_run 114 | else: 115 | load_run = os.path.join(root, load_run) 116 | 117 | if checkpoint==-1: 118 | models = [file for file in os.listdir(load_run) if 'model' in file] 119 | models.sort(key=lambda m: '{0:0>15}'.format(m)) 120 | model = models[-1] 121 | else: 122 | model = "model_{}.pt".format(checkpoint) 123 | 124 | load_path = os.path.join(load_run, model) 125 | return load_path 126 | 127 | def update_cfg_from_args(env_cfg, cfg_train, args): 128 | # seed 129 | if env_cfg is not None: 130 | # num envs 131 | if args.num_envs is not None: 132 | env_cfg.env.num_envs = args.num_envs 133 | if cfg_train is not None: 134 | if args.seed is not None: 135 | cfg_train.seed = args.seed 136 | # alg runner parameters 137 | if args.max_iterations is not None: 138 | cfg_train.runner.max_iterations = args.max_iterations 139 | if args.resume: 140 | cfg_train.runner.resume = args.resume 141 | if args.experiment_name is not None: 142 | cfg_train.runner.experiment_name = args.experiment_name 143 | if args.run_name is not None: 144 | cfg_train.runner.run_name = args.run_name 145 | if args.load_run is not None: 146 | cfg_train.runner.load_run = args.load_run 147 | if args.checkpoint is not None: 148 | cfg_train.runner.checkpoint = args.checkpoint 149 | 150 | return env_cfg, cfg_train 151 | 152 | def get_args(): 153 | custom_parameters = [ 154 | {"name": "--task", "type": str, "default": "anymal_c_flat", "help": "Resume training or start testing from a checkpoint. Overrides config file if provided."}, 155 | {"name": "--resume", "action": "store_true", "default": False, "help": "Resume training from a checkpoint"}, 156 | {"name": "--experiment_name", "type": str, "help": "Name of the experiment to run or load. Overrides config file if provided."}, 157 | {"name": "--run_name", "type": str, "help": "Name of the run. Overrides config file if provided."}, 158 | {"name": "--load_run", "type": str, "help": "Name of the run to load when resume=True. If -1: will load the last run. Overrides config file if provided."}, 159 | {"name": "--checkpoint", "type": int, "help": "Saved model checkpoint number. If -1: will load the last checkpoint. Overrides config file if provided."}, 160 | 161 | {"name": "--headless", "action": "store_true", "default": False, "help": "Force display off at all times"}, 162 | {"name": "--horovod", "action": "store_true", "default": False, "help": "Use horovod for multi-gpu training"}, 163 | {"name": "--rl_device", "type": str, "default": "cuda:0", "help": 'Device used by the RL algorithm, (cpu, gpu, cuda:0, cuda:1 etc..)'}, 164 | {"name": "--num_envs", "type": int, "help": "Number of environments to create. Overrides config file if provided."}, 165 | {"name": "--seed", "type": int, "help": "Random seed. Overrides config file if provided."}, 166 | {"name": "--max_iterations", "type": int, "help": "Maximum number of training iterations. Overrides config file if provided."}, 167 | ] 168 | # parse arguments 169 | args = gymutil.parse_arguments( 170 | description="RL Policy", 171 | custom_parameters=custom_parameters) 172 | 173 | # name allignment 174 | args.sim_device_id = args.compute_device_id 175 | args.sim_device = args.sim_device_type 176 | if args.sim_device=='cuda': 177 | args.sim_device += f":{args.sim_device_id}" 178 | return args 179 | 180 | def export_policy_as_jit(actor_critic, path): 181 | if hasattr(actor_critic, 'memory_a'): 182 | # assumes LSTM: TODO add GRU 183 | exporter = PolicyExporterLSTM(actor_critic) 184 | exporter.export(path) 185 | else: 186 | os.makedirs(path, exist_ok=True) 187 | path = os.path.join(path, 'policy_1.pt') 188 | model = copy.deepcopy(actor_critic.actor).to('cpu') 189 | traced_script_module = torch.jit.script(model) 190 | traced_script_module.save(path) 191 | 192 | 193 | class PolicyExporterLSTM(torch.nn.Module): 194 | def __init__(self, actor_critic): 195 | super().__init__() 196 | self.actor = copy.deepcopy(actor_critic.actor) 197 | self.is_recurrent = actor_critic.is_recurrent 198 | self.memory = copy.deepcopy(actor_critic.memory_a.rnn) 199 | self.memory.cpu() 200 | self.register_buffer(f'hidden_state', torch.zeros(self.memory.num_layers, 1, self.memory.hidden_size)) 201 | self.register_buffer(f'cell_state', torch.zeros(self.memory.num_layers, 1, self.memory.hidden_size)) 202 | 203 | def forward(self, x): 204 | out, (h, c) = self.memory(x.unsqueeze(0), (self.hidden_state, self.cell_state)) 205 | self.hidden_state[:] = h 206 | self.cell_state[:] = c 207 | return self.actor(out.squeeze(0)) 208 | 209 | @torch.jit.export 210 | def reset_memory(self): 211 | self.hidden_state[:] = 0. 212 | self.cell_state[:] = 0. 213 | 214 | def export(self, path): 215 | os.makedirs(path, exist_ok=True) 216 | path = os.path.join(path, 'policy_lstm_1.pt') 217 | self.to('cpu') 218 | traced_script_module = torch.jit.script(self) 219 | traced_script_module.save(path) 220 | 221 | 222 | -------------------------------------------------------------------------------- /config/utils/logger.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | import matplotlib.pyplot as plt 32 | import numpy as np 33 | from collections import defaultdict 34 | from multiprocessing import Process, Value 35 | 36 | class Logger: 37 | def __init__(self, dt): 38 | self.state_log = defaultdict(list) 39 | self.rew_log = defaultdict(list) 40 | self.dt = dt 41 | self.num_episodes = 0 42 | self.plot_process = None 43 | 44 | def log_state(self, key, value): 45 | self.state_log[key].append(value) 46 | 47 | def log_states(self, dict): 48 | for key, value in dict.items(): 49 | self.log_state(key, value) 50 | 51 | def log_rewards(self, dict, num_episodes): 52 | for key, value in dict.items(): 53 | if 'rew' in key: 54 | self.rew_log[key].append(value.item() * num_episodes) 55 | self.num_episodes += num_episodes 56 | 57 | def reset(self): 58 | self.state_log.clear() 59 | self.rew_log.clear() 60 | 61 | def plot_states(self): 62 | self.plot_process = Process(target=self._plot) 63 | self.plot_process.start() 64 | 65 | def _plot(self): 66 | nb_rows = 3 67 | nb_cols = 3 68 | fig, axs = plt.subplots(nb_rows, nb_cols) 69 | for key, value in self.state_log.items(): 70 | time = np.linspace(0, len(value)*self.dt, len(value)) 71 | break 72 | log= self.state_log 73 | # plot joint targets and measured positions 74 | a = axs[1, 0] 75 | if log["dof_pos"]: a.plot(time, log["dof_pos"], label='measured') 76 | if log["dof_pos_target"]: a.plot(time, log["dof_pos_target"], label='target') 77 | a.set(xlabel='time [s]', ylabel='Position [rad]', title='DOF Position') 78 | a.legend() 79 | # plot joint velocity 80 | a = axs[1, 1] 81 | if log["dof_vel"]: a.plot(time, log["dof_vel"], label='measured') 82 | if log["dof_vel_target"]: a.plot(time, log["dof_vel_target"], label='target') 83 | a.set(xlabel='time [s]', ylabel='Velocity [rad/s]', title='Joint Velocity') 84 | a.legend() 85 | # plot base vel x 86 | a = axs[0, 0] 87 | if log["base_vel_x"]: a.plot(time, log["base_vel_x"], label='measured') 88 | if log["command_x"]: a.plot(time, log["command_x"], label='commanded') 89 | a.set(xlabel='time [s]', ylabel='base lin vel [m/s]', title='Base velocity x') 90 | a.legend() 91 | # plot base vel y 92 | a = axs[0, 1] 93 | if log["base_vel_y"]: a.plot(time, log["base_vel_y"], label='measured') 94 | if log["command_y"]: a.plot(time, log["command_y"], label='commanded') 95 | a.set(xlabel='time [s]', ylabel='base lin vel [m/s]', title='Base velocity y') 96 | a.legend() 97 | # plot base vel yaw 98 | a = axs[0, 2] 99 | if log["base_vel_yaw"]: a.plot(time, log["base_vel_yaw"], label='measured') 100 | if log["command_yaw"]: a.plot(time, log["command_yaw"], label='commanded') 101 | a.set(xlabel='time [s]', ylabel='base ang vel [rad/s]', title='Base velocity yaw') 102 | a.legend() 103 | # plot base vel z 104 | a = axs[1, 2] 105 | if log["base_vel_z"]: a.plot(time, log["base_vel_z"], label='measured') 106 | a.set(xlabel='time [s]', ylabel='base lin vel [m/s]', title='Base velocity z') 107 | a.legend() 108 | # plot contact forces 109 | a = axs[2, 0] 110 | if log["contact_forces_z"]: 111 | forces = np.array(log["contact_forces_z"]) 112 | for i in range(forces.shape[1]): 113 | a.plot(time, forces[:, i], label=f'force {i}') 114 | a.set(xlabel='time [s]', ylabel='Forces z [N]', title='Vertical Contact forces') 115 | a.legend() 116 | # plot torque/vel curves 117 | a = axs[2, 1] 118 | if log["dof_vel"]!=[] and log["dof_torque"]!=[]: a.plot(log["dof_vel"], log["dof_torque"], 'x', label='measured') 119 | a.set(xlabel='Joint vel [rad/s]', ylabel='Joint Torque [Nm]', title='Torque/velocity curves') 120 | a.legend() 121 | # plot torques 122 | a = axs[2, 2] 123 | if log["dof_torque"]!=[]: a.plot(time, log["dof_torque"], label='measured') 124 | a.set(xlabel='time [s]', ylabel='Joint Torque [Nm]', title='Torque') 125 | a.legend() 126 | plt.show() 127 | 128 | def print_rewards(self): 129 | print("Average rewards per second:") 130 | for key, values in self.rew_log.items(): 131 | mean = np.sum(np.array(values)) / self.num_episodes 132 | print(f" - {key}: {mean}") 133 | print(f"Total number of episodes: {self.num_episodes}") 134 | 135 | def __del__(self): 136 | if self.plot_process is not None: 137 | self.plot_process.kill() -------------------------------------------------------------------------------- /config/utils/math.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | import torch 32 | from torch import Tensor 33 | import numpy as np 34 | # from isaacgym.torch_utils import quat_apply, normalize 35 | from typing import Tuple 36 | 37 | # @ torch.jit.script 38 | def quat_apply_yaw(quat, vec): 39 | quat_yaw = quat.clone().view(-1, 4) 40 | quat_yaw[:, :2] = 0. 41 | quat_yaw = normalize(quat_yaw) 42 | return quat_apply(quat_yaw, vec) 43 | 44 | # @ torch.jit.script 45 | def wrap_to_pi(angles): 46 | angles %= 2*np.pi 47 | angles -= 2*np.pi * (angles > np.pi) 48 | return angles 49 | 50 | # @ torch.jit.script 51 | def torch_rand_sqrt_float(lower, upper, shape, device): 52 | # type: (float, float, Tuple[int, int], str) -> Tensor 53 | r = 2*torch.rand(*shape, device=device) - 1 54 | r = torch.where(r<0., -torch.sqrt(-r), torch.sqrt(r)) 55 | r = (r + 1.) / 2. 56 | return (upper - lower) * r + lower -------------------------------------------------------------------------------- /config/utils/task_registry.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | import os 32 | from datetime import datetime 33 | from typing import Tuple 34 | import torch 35 | import numpy as np 36 | 37 | from rl.env import VecEnv 38 | from rl.runners import OnPolicyRunner 39 | 40 | from config import LEGGED_GYM_ROOT_DIR, LEGGED_GYM_ENVS_DIR 41 | from .helpers import get_args, update_cfg_from_args, class_to_dict, get_load_path, set_seed, parse_sim_params 42 | from config.envs.base.legged_robot_config import LeggedRobotCfg, LeggedRobotCfgPPO 43 | 44 | class TaskRegistry(): 45 | def __init__(self): 46 | self.task_classes = {} 47 | self.env_cfgs = {} 48 | self.train_cfgs = {} 49 | 50 | def register(self, name: str, task_class: VecEnv, env_cfg: LeggedRobotCfg, train_cfg: LeggedRobotCfgPPO): 51 | self.task_classes[name] = task_class 52 | self.env_cfgs[name] = env_cfg 53 | self.train_cfgs[name] = train_cfg 54 | 55 | def get_task_class(self, name: str) -> VecEnv: 56 | return self.task_classes[name] 57 | 58 | def get_cfgs(self, name) -> Tuple[LeggedRobotCfg, LeggedRobotCfgPPO]: 59 | train_cfg = self.train_cfgs[name] 60 | env_cfg = self.env_cfgs[name] 61 | # copy seed 62 | env_cfg.seed = train_cfg.seed 63 | return env_cfg, train_cfg 64 | 65 | def make_env(self, name, args=None, env_cfg=None) -> Tuple[VecEnv, LeggedRobotCfg]: 66 | """ Creates an environment either from a registered namme or from the provided config file. 67 | 68 | Args: 69 | name (string): Name of a registered env. 70 | args (Args, optional): Isaac Gym comand line arguments. If None get_args() will be called. Defaults to None. 71 | env_cfg (Dict, optional): Environment config file used to override the registered config. Defaults to None. 72 | 73 | Raises: 74 | ValueError: Error if no registered env corresponds to 'name' 75 | 76 | Returns: 77 | isaacgym.VecTaskPython: The created environment 78 | Dict: the corresponding config file 79 | """ 80 | # if no args passed get command line arguments 81 | if args is None: 82 | args = get_args() 83 | # check if there is a registered env with that name 84 | if name in self.task_classes: 85 | task_class = self.get_task_class(name) 86 | else: 87 | raise ValueError(f"Task with name: {name} was not registered") 88 | if env_cfg is None: 89 | # load config files 90 | env_cfg, _ = self.get_cfgs(name) 91 | # override cfg from args (if specified) 92 | env_cfg, _ = update_cfg_from_args(env_cfg, None, args) 93 | set_seed(env_cfg.seed) 94 | # parse sim params (convert to dict first) 95 | sim_params = {"sim": class_to_dict(env_cfg.sim)} 96 | sim_params = parse_sim_params(args, sim_params) 97 | env = task_class( cfg=env_cfg, 98 | sim_params=sim_params, 99 | physics_engine=args.physics_engine, 100 | sim_device=args.sim_device, 101 | headless=args.headless) 102 | return env, env_cfg 103 | 104 | def make_alg_runner(self, env, name=None, args=None, train_cfg=None, log_root="default") -> Tuple[OnPolicyRunner, LeggedRobotCfgPPO]: 105 | """ Creates the training algorithm either from a registered namme or from the provided config file. 106 | 107 | Args: 108 | env (isaacgym.VecTaskPython): The environment to train (TODO: remove from within the algorithm) 109 | name (string, optional): Name of a registered env. If None, the config file will be used instead. Defaults to None. 110 | args (Args, optional): Isaac Gym comand line arguments. If None get_args() will be called. Defaults to None. 111 | train_cfg (Dict, optional): Training config file. If None 'name' will be used to get the config file. Defaults to None. 112 | log_root (str, optional): Logging directory for Tensorboard. Set to 'None' to avoid logging (at test time for example). 113 | Logs will be saved in /_. Defaults to "default"=/logs/. 114 | 115 | Raises: 116 | ValueError: Error if neither 'name' or 'train_cfg' are provided 117 | Warning: If both 'name' or 'train_cfg' are provided 'name' is ignored 118 | 119 | Returns: 120 | PPO: The created algorithm 121 | Dict: the corresponding config file 122 | """ 123 | # # if no args passed get command line arguments 124 | # if args is None: 125 | # args = get_args() 126 | # # if config files are passed use them, otherwise load from the name 127 | # if train_cfg is None: 128 | # if name is None: 129 | # raise ValueError("Either 'name' or 'train_cfg' must be not None") 130 | # # load config files 131 | # _, train_cfg = self.get_cfgs(name) 132 | # else: 133 | # if name is not None: 134 | # print(f"'train_cfg' provided -> Ignoring 'name={name}'") 135 | # # override cfg from args (if specified) 136 | # _, train_cfg = update_cfg_from_args(None, train_cfg, args) 137 | 138 | if log_root=="default": 139 | log_root = os.path.join(LEGGED_GYM_ROOT_DIR, 'logs', train_cfg.runner.experiment_name) 140 | log_dir = os.path.join(log_root, datetime.now().strftime('%b%d_%H-%M-%S') + '_' + train_cfg.runner.run_name) 141 | elif log_root is None: 142 | log_dir = None 143 | else: 144 | log_dir = os.path.join(log_root, datetime.now().strftime('%b%d_%H-%M-%S') + '_' + train_cfg.runner.run_name) 145 | 146 | train_cfg_dict = class_to_dict(train_cfg) 147 | runner = OnPolicyRunner(env, train_cfg_dict, log_dir, device='cuda') 148 | #save resume path before creating a new log_dir 149 | resume = train_cfg.runner.resume 150 | if resume: 151 | # load previously trained model 152 | resume_path = get_load_path(log_root, load_run=train_cfg.runner.load_run, checkpoint=train_cfg.runner.checkpoint) 153 | print(f"Loading model from: {resume_path}") 154 | runner.load(resume_path) 155 | return runner, train_cfg 156 | 157 | # make global task registry 158 | task_registry = TaskRegistry() -------------------------------------------------------------------------------- /config/utils/terrain.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | import numpy as np 32 | from numpy.random import choice 33 | from scipy import interpolate 34 | 35 | from isaacgym import terrain_utils 36 | from legged_gym.envs.base.legged_robot_config import LeggedRobotCfg 37 | 38 | class Terrain: 39 | def __init__(self, cfg: LeggedRobotCfg.terrain, num_robots) -> None: 40 | 41 | self.cfg = cfg 42 | self.num_robots = num_robots 43 | self.type = cfg.mesh_type 44 | if self.type in ["none", 'plane']: 45 | return 46 | self.env_length = cfg.terrain_length 47 | self.env_width = cfg.terrain_width 48 | self.proportions = [np.sum(cfg.terrain_proportions[:i+1]) for i in range(len(cfg.terrain_proportions))] 49 | 50 | self.cfg.num_sub_terrains = cfg.num_rows * cfg.num_cols 51 | self.env_origins = np.zeros((cfg.num_rows, cfg.num_cols, 3)) 52 | 53 | self.width_per_env_pixels = int(self.env_width / cfg.horizontal_scale) 54 | self.length_per_env_pixels = int(self.env_length / cfg.horizontal_scale) 55 | 56 | self.border = int(cfg.border_size/self.cfg.horizontal_scale) 57 | self.tot_cols = int(cfg.num_cols * self.width_per_env_pixels) + 2 * self.border 58 | self.tot_rows = int(cfg.num_rows * self.length_per_env_pixels) + 2 * self.border 59 | 60 | self.height_field_raw = np.zeros((self.tot_rows , self.tot_cols), dtype=np.int16) 61 | if cfg.curriculum: 62 | self.curiculum() 63 | elif cfg.selected: 64 | self.selected_terrain() 65 | else: 66 | self.randomized_terrain() 67 | 68 | self.heightsamples = self.height_field_raw 69 | if self.type=="trimesh": 70 | self.vertices, self.triangles = terrain_utils.convert_heightfield_to_trimesh( self.height_field_raw, 71 | self.cfg.horizontal_scale, 72 | self.cfg.vertical_scale, 73 | self.cfg.slope_treshold) 74 | 75 | def randomized_terrain(self): 76 | for k in range(self.cfg.num_sub_terrains): 77 | # Env coordinates in the world 78 | (i, j) = np.unravel_index(k, (self.cfg.num_rows, self.cfg.num_cols)) 79 | 80 | choice = np.random.uniform(0, 1) 81 | difficulty = np.random.choice([0.5, 0.75, 0.9]) 82 | terrain = self.make_terrain(choice, difficulty) 83 | self.add_terrain_to_map(terrain, i, j) 84 | 85 | def curiculum(self): 86 | for j in range(self.cfg.num_cols): 87 | for i in range(self.cfg.num_rows): 88 | difficulty = i / self.cfg.num_rows 89 | choice = j / self.cfg.num_cols + 0.001 90 | 91 | terrain = self.make_terrain(choice, difficulty) 92 | self.add_terrain_to_map(terrain, i, j) 93 | 94 | def selected_terrain(self): 95 | terrain_type = self.cfg.terrain_kwargs.pop('type') 96 | for k in range(self.cfg.num_sub_terrains): 97 | # Env coordinates in the world 98 | (i, j) = np.unravel_index(k, (self.cfg.num_rows, self.cfg.num_cols)) 99 | 100 | terrain = terrain_utils.SubTerrain("terrain", 101 | width=self.width_per_env_pixels, 102 | length=self.width_per_env_pixels, 103 | vertical_scale=self.vertical_scale, 104 | horizontal_scale=self.horizontal_scale) 105 | 106 | eval(terrain_type)(terrain, **self.cfg.terrain_kwargs.terrain_kwargs) 107 | self.add_terrain_to_map(terrain, i, j) 108 | 109 | def make_terrain(self, choice, difficulty): 110 | terrain = terrain_utils.SubTerrain( "terrain", 111 | width=self.width_per_env_pixels, 112 | length=self.width_per_env_pixels, 113 | vertical_scale=self.cfg.vertical_scale, 114 | horizontal_scale=self.cfg.horizontal_scale) 115 | slope = difficulty * 0.4 116 | step_height = 0.05 + 0.18 * difficulty 117 | discrete_obstacles_height = 0.05 + difficulty * 0.2 118 | stepping_stones_size = 1.5 * (1.05 - difficulty) 119 | stone_distance = 0.05 if difficulty==0 else 0.1 120 | gap_size = 1. * difficulty 121 | pit_depth = 1. * difficulty 122 | if choice < self.proportions[0]: 123 | if choice < self.proportions[0]/ 2: 124 | slope *= -1 125 | terrain_utils.pyramid_sloped_terrain(terrain, slope=slope, platform_size=3.) 126 | elif choice < self.proportions[1]: 127 | terrain_utils.pyramid_sloped_terrain(terrain, slope=slope, platform_size=3.) 128 | terrain_utils.random_uniform_terrain(terrain, min_height=-0.05, max_height=0.05, step=0.005, downsampled_scale=0.2) 129 | elif choice < self.proportions[3]: 130 | if choice self.desired_kl * 2.0: 217 | self.learning_rate = max(1e-5, self.learning_rate / 1.5) 218 | elif kl_mean < self.desired_kl / 2.0 and kl_mean > 0.0: 219 | self.learning_rate = min(1e-3, self.learning_rate * 1.5) 220 | 221 | for param_group in self.optimizer.param_groups: 222 | if param_group['name'] == 'actor_critic': 223 | param_group['lr'] = self.learning_rate 224 | 225 | 226 | # Surrogate loss 227 | ratio = torch.exp(actions_log_prob_batch - torch.squeeze(old_actions_log_prob_batch)) 228 | surrogate = -torch.squeeze(advantages_batch) * ratio 229 | surrogate_clipped = -torch.squeeze(advantages_batch) * torch.clamp(ratio, 1.0 - self.clip_param, 230 | 1.0 + self.clip_param) 231 | surrogate_loss = torch.max(surrogate, surrogate_clipped).mean() 232 | 233 | # Value function loss 234 | if self.use_clipped_value_loss: 235 | value_clipped = target_values_batch + (value_batch - target_values_batch).clamp(-self.clip_param, 236 | self.clip_param) 237 | value_losses = (value_batch - returns_batch).pow(2) 238 | value_losses_clipped = (value_clipped - returns_batch).pow(2) 239 | value_loss = torch.max(value_losses, value_losses_clipped).mean() 240 | else: 241 | value_loss = (returns_batch - value_batch).pow(2).mean() 242 | 243 | loss = surrogate_loss + self.value_loss_coef * 0.5 * value_loss - self.entropy_coef * entropy_batch.mean() 244 | 245 | # Gradient step 246 | self.optimizer.zero_grad() 247 | loss.backward() 248 | nn.utils.clip_grad_norm_(self.actor_critic.parameters(), self.max_grad_norm) 249 | self.optimizer.step() 250 | 251 | mean_value_loss += value_loss.item() 252 | mean_surrogate_loss += surrogate_loss.item() 253 | 254 | num_updates = self.num_learning_epochs * self.num_mini_batches 255 | mean_value_loss /= num_updates 256 | mean_surrogate_loss /= num_updates 257 | self.storage.clear() 258 | 259 | return mean_value_loss, mean_surrogate_loss 260 | -------------------------------------------------------------------------------- /rl/conf/configs.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved. 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy 4 | # of this software and associated documentation files (the "Software"), to deal 5 | # in the Software without restriction, including without limitation the rights 6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | # copies of the Software, and to permit persons to whom the Software is 8 | # furnished to do so, subject to the following conditions: 9 | # 10 | # The above copyright notice and this permission notice shall be included in 11 | # all copies or substantial portions of the Software. 12 | # 13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | # THE SOFTWARE 20 | 21 | import os 22 | 23 | 24 | class Configs(object): 25 | MAX_SCORE = 9999999999 26 | 27 | # 算法切片的时间间隔, time interval of simulator 28 | ALG_RUN_FREQUENCY = 10 # 单位分钟 29 | 30 | # 多目标权重之间的系数 31 | LAMDA = 10000 32 | 33 | # different pallet types of orders 34 | PALLET_TYPE_LABELS = ["PALLET", "HALF_PALLET", "BOX"] 35 | LABEL_TO_DEMAND_UNIT = {"PALLET": 1, "HALF_PALLET": 0.5, "BOX": 0.25} 36 | STANDARD_PALLET_LABEL = "PALLET" 37 | SMALL_PALLET_LABEL = "HALF_PALLET" 38 | BOX_LABEL = "BOX" 39 | 40 | # 订单状态 0: 初始(initialization), 1: 已生成(generated), 2: 进行中(ongoing), 3: 完成(Completed) 41 | ORDER_STATUS_TO_CODE = {"INITIALIZATION": 0, "GENERATED": 1, "ONGOING": 2, "COMPLETED": 3} 42 | 43 | # loading and unloading, 装卸货速度 44 | LOAD_SPEED = 0.25 # 大板/min, unit is standard pallet per minute 45 | UNLOAD_SPEED = 0.25 # 大板/min, unit is standard pallet per minute 46 | 47 | # 靠台时间 48 | DOCK_APPROACHING_TIME = 30 * 60 # unit: second 49 | 50 | # 文件路径 51 | root_folder_path = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 52 | benchmark_folder_path = os.path.join(root_folder_path, "benchmark") 53 | src_folder_path = os.path.join(root_folder_path, "src") 54 | algorithm_folder_path = os.path.join(root_folder_path, "algorithm") 55 | output_folder = os.path.join(root_folder_path, "output") 56 | if not os.path.exists(output_folder): 57 | os.makedirs(output_folder) 58 | 59 | # route_info_file = "route_info.csv" 60 | # factory_info_file = "factory_info.csv" 61 | # route_info_file_path = os.path.join(benchmark_folder_path, route_info_file) 62 | # factory_info_file_path = os.path.join(benchmark_folder_path, factory_info_file) 63 | 64 | # algorithm_data_interaction_folder_path = os.path.join(algorithm_folder_path, "data_interaction") 65 | # if not os.path.exists(algorithm_data_interaction_folder_path): 66 | # os.makedirs(algorithm_data_interaction_folder_path) 67 | # algorithm_vehicle_input_info_path = os.path.join(algorithm_data_interaction_folder_path, "vehicle_info.json") 68 | # algorithm_unallocated_order_items_input_path = os.path.join(algorithm_data_interaction_folder_path, 69 | # "unallocated_order_items.json") 70 | # algorithm_ongoing_order_items_input_path = os.path.join(algorithm_data_interaction_folder_path, 71 | # "ongoing_order_items.json") 72 | 73 | # algorithm_output_destination_path = os.path.join(algorithm_data_interaction_folder_path, 'output_destination.json') 74 | # algorithm_output_planned_route_path = os.path.join(algorithm_data_interaction_folder_path, 'output_route.json') 75 | 76 | # 算法入口文件名,不含扩展名 77 | ALGORITHM_ENTRY_FILE_NAME = 'main_algorithm' 78 | 79 | # 算法语言映射表 80 | ALGORITHM_LANGUAGE_MAP = {'py': 'python', 81 | 'class': 'java', 82 | 'exe': 'c', 83 | 'out': 'c', 84 | } 85 | 86 | # 随机种子 87 | RANDOM_SEED = 0 88 | 89 | # 算法运行超时时间 90 | MAX_RUNTIME_OF_ALGORITHM = 600 91 | 92 | # 算法成功标识 93 | ALGORITHM_SUCCESS_FLAG = 'SUCCESS' 94 | 95 | # 日志文件的最大数量 96 | MAX_LOG_FILE_NUM = 10 97 | 98 | # 一天的秒数 99 | A_DAY_TIME_SECONDS = 24 * 60 * 60 100 | 101 | # 数据集选项,列表为空则选择所有数据集,如[],[1], [1, 2, 3], [64] 102 | selected_instances = [1] 103 | all_test_instances = range(1, 65) 104 | -------------------------------------------------------------------------------- /rl/env/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | from .vec_env import VecEnv 32 | from .my_env import VecGibson -------------------------------------------------------------------------------- /rl/env/my_env.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | import sys 3 | sys.path.append('/Extra/lwy/gibson/graduate/') 4 | 5 | from rl.env.vec_env import VecEnv 6 | import torch 7 | from typing import Tuple, Union 8 | 9 | import gibson2 10 | from gibson2.envs.igibson_env import iGibsonEnv 11 | from gibson2.envs.parallel_env import ParallelNavEnv 12 | import atexit 13 | import multiprocessing 14 | import sys 15 | import traceback 16 | import numpy as np 17 | import os 18 | from gibson2.utils.utils import parse_config 19 | import logging 20 | logging.getLogger().setLevel(logging.WARNING) 21 | 22 | 23 | class VecGibson(VecEnv): 24 | def __init__(self) -> None: 25 | super().__init__() 26 | num_envs: int 27 | num_obs: int 28 | num_privileged_obs: int 29 | num_actions: int 30 | max_episode_length: int 31 | privileged_obs_buf: torch.Tensor 32 | obs_buf: torch.Tensor 33 | rew_buf: torch.Tensor 34 | reset_buf: torch.Tensor 35 | episode_length_buf: torch.Tensor # current episode duration 36 | extras: dict 37 | device: torch.device 38 | self.num_obs = 260 39 | self.num_privileged_obs = None 40 | self.num_actions = 2 41 | self.max_episode_length = 500 42 | 43 | 44 | config_file_name = '/home/lwy/IGibson2021/iGibson/gibson2/examples/configs/locobot_interactive_nav.yaml' 45 | env_config = parse_config(config_file_name) 46 | self.num_envs = 5 47 | GPU_ID = [1,2] * 5 48 | self.Env_name = ['Beechwood_1_int','Benevolence_0_int','Ihlen_0_int','Ihlen_1_int','Merom_0_int','Pomaria_0_int','Rs_int','Wainscott_1_int'] 49 | self.Training_Env = self.Env_name[:5] * 2 50 | self.Testing_Env = self.Env_name[-3:] 51 | self.core_id = 0 52 | class load_env(object): 53 | def __init__(self, num_envs, envs, GPU_ID, i) -> None: 54 | self.num_envs = num_envs 55 | self.id = i 56 | self.envs = envs 57 | self.GPU_ID = GPU_ID 58 | def __call__(self, *args, **kwds): 59 | logging.warning(self.envs[self.id]) 60 | logging.warning(GPU_ID[self.id]) 61 | return iGibsonEnv(config_file = env_config, 62 | scene_id = self.envs[self.id], 63 | mode = 'headless', 64 | action_timestep = 1.0 / 10.0, 65 | physics_timestep = 1.0 / 40.0, 66 | device_idx = GPU_ID[self.id], 67 | automatic_reset = True) 68 | self.parallel_env = ParallelNavEnv([load_env(self.num_envs, self.Training_Env, GPU_ID, i) for i in range(0, self.num_envs)], blocking=False) ## env_constructor list contains callable function 69 | print(self.Training_Env) 70 | 71 | 72 | def step(self, actions: torch.Tensor) -> Tuple[torch.Tensor, Union[torch.Tensor, None], torch.Tensor, torch.Tensor, dict]: 73 | vec_res = self.parallel_env.step(actions) 74 | self.obs, rewards, dones, infos = self.process_vec_env(vec_res) 75 | return self.obs, None, rewards, dones, infos 76 | 77 | 78 | def reset(self, env_ids = 'all'): 79 | ''' 80 | reset state 81 | obs includes list of dicts (task_obs rgb and depth) 82 | ''' 83 | vec_res = self.parallel_env.reset() 84 | self.obs = [obs for obs in vec_res] 85 | return self.obs, None 86 | 87 | def get_observations(self) -> torch.Tensor: 88 | return self.obs 89 | 90 | def get_privileged_observations(self) -> Union[torch.Tensor, None]: 91 | return None 92 | 93 | def process_vec_env(self, vec_res): 94 | ''' 95 | input: vec_res 96 | output: obs, rewards, dones, infos 97 | ''' 98 | obs = [] 99 | rewards = [] 100 | dones = [] 101 | infos = defaultdict(list) 102 | 103 | for res in vec_res: 104 | state, reward, done, info = res 105 | # if done: 106 | # print('done') 107 | obs.append(state if not done else info['last_observation']) ## done 后自动reset丢弃第一帧 108 | rewards.append(reward) 109 | dones.append(done) 110 | # infos.append(info) 111 | info['time_outs'] = True if done and info['episode_length'] == 500 else False 112 | for key in info: 113 | infos[key].append(info[key]) 114 | 115 | return obs, torch.tensor(np.array(rewards)), torch.tensor(np.array(dones)), infos 116 | 117 | def cal_belief_state(self, state, memory): 118 | """ 119 | args: 120 | state: 单张图片 121 | memory 122 | 123 | 先计算当前状态的embedding 124 | 更新 memory 125 | 再求出当前的belief_state 126 | return: 127 | belief_state 128 | """ 129 | with torch.no_grad(): 130 | task_obs = state['task_obs'].copy() 131 | rgb = state['rgb'].copy() 132 | depth = state['depth'].copy() 133 | 134 | ## T D 只加T 135 | task_obs = torch.FloatTensor(task_obs).unsqueeze(0).cuda() 136 | rgb = torch.FloatTensor(rgb).unsqueeze(0).cuda() 137 | depth = torch.FloatTensor(depth).unsqueeze(0).cuda() 138 | 139 | encoder_state, memory = self.encoder_net(rgb,depth,task_obs, 0, memory) 140 | ## cat predicted angle 141 | # angle = self.decoder_net(encoder_state) * math.pi 142 | # encoder_state = torch.cat((encoder_state, angle), -1) 143 | 144 | return encoder_state.detach().cpu().numpy(), memory ## 清除计算图 145 | 146 | if __name__ == "__main__": 147 | env = VecGibson() 148 | print('ok') -------------------------------------------------------------------------------- /rl/env/vec_env.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | from abc import ABC, abstractmethod 32 | import torch 33 | from typing import Tuple, Union 34 | 35 | # minimal interface of the environment 36 | class VecEnv(ABC): 37 | num_envs: int 38 | num_obs: int 39 | num_privileged_obs: int 40 | num_actions: int 41 | max_episode_length: int 42 | privileged_obs_buf: torch.Tensor 43 | obs_buf: torch.Tensor 44 | rew_buf: torch.Tensor 45 | reset_buf: torch.Tensor 46 | episode_length_buf: torch.Tensor # current episode duration 47 | extras: dict 48 | device: torch.device 49 | @abstractmethod 50 | def step(self, actions: torch.Tensor) -> Tuple[torch.Tensor, Union[torch.Tensor, None], torch.Tensor, torch.Tensor, dict]: 51 | pass 52 | @abstractmethod 53 | def reset(self, env_ids: Union[list, torch.Tensor]): 54 | pass 55 | @abstractmethod 56 | def get_observations(self) -> torch.Tensor: 57 | pass 58 | @abstractmethod 59 | def get_privileged_observations(self) -> Union[torch.Tensor, None]: 60 | pass -------------------------------------------------------------------------------- /rl/modules/SMT.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn, Tensor 3 | from torch._C import device 4 | from torch.nn import functional as F 5 | from torch.autograd import Variable 6 | from torch.nn.modules.pooling import MaxPool2d 7 | import torch.utils.model_zoo as model_zoo 8 | 9 | import torchvision.transforms as transforms 10 | import numpy as np 11 | import math 12 | class PositionalEncoding(nn.Module): 13 | 14 | def __init__(self, d_model: int, max_len: int = 501): 15 | super().__init__() 16 | # self.dropout = nn.Dropout(p=dropout) 17 | position = torch.arange(max_len).unsqueeze(1) 18 | div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model)) 19 | pe = torch.zeros(max_len, 1, d_model) 20 | pe[:, 0, 0::2] = torch.sin(position * div_term) 21 | pe[:, 0, 1::2] = torch.cos(position * div_term) 22 | self.register_buffer('pe', pe) 23 | 24 | def forward(self, x: Tensor) -> Tensor: 25 | """ 26 | Args: 27 | x: Tensor, shape [seq_len, batch_size, embedding_dim] 28 | """ 29 | x = x + self.pe[:x.size(0)] 30 | return x 31 | 32 | class AttBlock(nn.Module): 33 | def __init__(self, d_model, nhead: int = 4): 34 | super(AttBlock, self).__init__() 35 | self.multi_att = nn.MultiheadAttention(d_model, nhead) 36 | self.norm1 = nn.LayerNorm(d_model) 37 | # self.norm2 = nn.LayerNorm(d_model) 38 | self.linear = nn.Linear(d_model, d_model) 39 | 40 | 41 | def forward(self, X, Y, attn_mask, key_padding_mask): 42 | ''' 43 | X: query (L, N, E) 44 | Y: key value (S, N, E) 45 | atten_mask: (L, S) 46 | key_padding_mask: `(N, S)` 47 | output: (L, N, E) 48 | ''' 49 | # H = self.norm1(self.multi_att(X, Y, Y, attn_mask = mask)[0] + X) 50 | # return self.norm2(torch.relu(self.linear(H)) + H) 51 | 52 | ## Tr_I wrong 53 | # H = self.norm1(self.multi_att(X, Y, Y, attn_mask = mask)[0]) + X 54 | # return self.norm2(torch.relu(self.linear(H))) + H 55 | 56 | ## Tr_I_fix 57 | H = torch.relu(self.multi_att(X, Y, Y, key_padding_mask = key_padding_mask, attn_mask = attn_mask)[0]) + X 58 | return torch.relu(self.linear(self.norm1(H))) + H 59 | 60 | 61 | class SMT_state_encoder(nn.Module): 62 | def __init__(self, d_model, nhead: int = 4): 63 | super(SMT_state_encoder, self).__init__() 64 | self.encoder = AttBlock(d_model, nhead) ##0处是值,1是权重 65 | self.decoder = AttBlock(d_model, nhead) 66 | self.pos_encoder1 = PositionalEncoding(d_model) 67 | self.pos_encoder2 = PositionalEncoding(d_model) 68 | 69 | def forward(self, o, M, flag, key_padding_mask): 70 | 71 | c_mask = self.casual_mask(M) 72 | M = self.encoder(M, M, c_mask, key_padding_mask) 73 | if flag == 1: ## training 74 | attn_mask = self.sequence_length_mask(M, 32) ## T * T 75 | else: ## inference 76 | attn_mask = self.infer_mask(o, M, 32) 77 | return self.decoder(o, M, attn_mask, key_padding_mask) 78 | 79 | def casual_mask(self, seq): 80 | seq_len, batch_size, _ = seq.size() 81 | mask = torch.triu(torch.ones((seq_len, seq_len), dtype=torch.uint8, device = 'cuda'), 82 | diagonal=1) ## 1 为mask 83 | # mask = mask.unsqueeze(0).expand(batch_size, -1, -1) # [B, L, L] 84 | return mask.to(bool) 85 | 86 | def infer_mask(self, o, M, length): 87 | ''' 88 | o_len * M_len 89 | ''' 90 | o_len, batch_size, _ = o.size() 91 | M_len, batch_size, _ = M.size() 92 | 93 | mask = torch.ones((o_len, M_len), dtype=torch.uint8, device = 'cuda') 94 | mask[:, max(M_len - length, 0) : M_len] = 0 ## 1是mask 95 | return mask.to(bool) 96 | 97 | def sequence_length_mask(self, seq, length): 98 | seq_len, batch_size, _ = seq.size() 99 | casual_mask = torch.triu(torch.ones((seq_len, seq_len), dtype=torch.uint8, device = 'cuda'), 100 | diagonal=1) ## 1 为mask casual_mask 101 | 102 | len_mask = 1 - torch.triu(torch.ones((seq_len, seq_len), dtype=torch.uint8, device = 'cuda'), 103 | diagonal= -(length - 1)) # 输入矩阵保留主对角线与主对角线以上与主对角线下方h行对角线的元素 104 | mask = casual_mask + len_mask 105 | return mask.to(bool).cuda() 106 | ## pytorch N C HW 107 | ## tensorflow NHW C 108 | if __name__ == '__main__': 109 | # encoder = Encoder(feature_dim = 252) 110 | # rgb = Variable(torch.randn(1, 3, 180, 320)) 111 | # depth = Variable(torch.randn(1, 1, 180, 320)) 112 | o_obs = Variable(torch.randn(1, 1, 4)) 113 | M_obs = Variable(torch.randn(33, 1, 4)) 114 | 115 | # print(encoder(rgb,depth,task_obs).size()) 116 | # print(x) 117 | 118 | encoder = SMT_state_encoder(512, 4) 119 | print(encoder.infer_mask(o_obs, M_obs, 32)) 120 | 121 | 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /rl/modules/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | from .actor_critic import ActorCritic 32 | from .actor_critic_recurrent import ActorCriticRecurrent -------------------------------------------------------------------------------- /rl/modules/actor_critic.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | import numpy as np 32 | 33 | import torch 34 | import torch.nn as nn 35 | from torch.distributions import Normal 36 | from torch.nn.modules import rnn 37 | 38 | class ActorCritic(nn.Module): 39 | is_recurrent = False 40 | def __init__(self, num_actor_obs, 41 | num_critic_obs, 42 | num_actions, 43 | actor_hidden_dims=[256, 256, 256], 44 | critic_hidden_dims=[256, 256, 256], 45 | activation='elu', 46 | init_noise_std=1.0, 47 | **kwargs): 48 | if kwargs: 49 | print("ActorCritic.__init__ got unexpected arguments, which will be ignored: " + str([key for key in kwargs.keys()])) 50 | super(ActorCritic, self).__init__() 51 | 52 | activation = get_activation(activation) 53 | 54 | mlp_input_dim_a = num_actor_obs 55 | mlp_input_dim_c = num_critic_obs 56 | 57 | # Policy 58 | actor_layers = [] 59 | actor_layers.append(nn.Linear(mlp_input_dim_a, actor_hidden_dims[0])) 60 | actor_layers.append(activation) 61 | for l in range(len(actor_hidden_dims) - 1): 62 | # if l == len(actor_hidden_dims) - 1: 63 | # actor_layers.append(nn.Linear(actor_hidden_dims[l], num_actions)) 64 | # else: 65 | actor_layers.append(nn.Linear(actor_hidden_dims[l], actor_hidden_dims[l + 1])) 66 | actor_layers.append(activation) 67 | self.policy_embedding = nn.Sequential(*actor_layers) 68 | 69 | self.mean_linear = nn.Linear(actor_hidden_dims[-1], num_actions) 70 | # self.log_std_linear = nn.Linear(actor_hidden_dims[-1], num_actions) 71 | 72 | # Value function 73 | critic_layers = [] 74 | critic_layers.append(nn.Linear(mlp_input_dim_c, critic_hidden_dims[0])) 75 | critic_layers.append(activation) 76 | for l in range(len(critic_hidden_dims)): 77 | if l == len(critic_hidden_dims) - 1: 78 | critic_layers.append(nn.Linear(critic_hidden_dims[l], 1)) 79 | else: 80 | critic_layers.append(nn.Linear(critic_hidden_dims[l], critic_hidden_dims[l + 1])) 81 | critic_layers.append(activation) 82 | self.critic = nn.Sequential(*critic_layers) 83 | 84 | # print(f"Actor MLP: {self.actor}") 85 | # print(f"Critic MLP: {self.critic}") 86 | 87 | # Action noise 0.5噪声初始化 88 | self.actor_logstd = nn.Parameter(init_noise_std * torch.ones(num_actions) - 0.69) 89 | self.std = torch.exp(self.actor_logstd) 90 | self.distribution = None 91 | # disable args validation for speedup 92 | Normal.set_default_validate_args = False 93 | 94 | # seems that we get better performance without init 95 | # self.init_memory_weights(self.memory_a, 0.001, 0.) 96 | # self.init_memory_weights(self.memory_c, 0.001, 0.) 97 | 98 | @staticmethod 99 | # not used at the moment 100 | def init_weights(sequential, scales): 101 | [torch.nn.init.orthogonal_(module.weight, gain=scales[idx]) for idx, module in 102 | enumerate(mod for mod in sequential if isinstance(mod, nn.Linear))] 103 | 104 | 105 | def reset(self, dones=None): 106 | pass 107 | 108 | def forward(self): 109 | raise NotImplementedError 110 | 111 | @property 112 | def action_mean(self): 113 | return self.distribution.mean 114 | 115 | @property 116 | def action_std(self): 117 | return self.distribution.stddev 118 | 119 | @property 120 | def entropy(self): 121 | return self.distribution.entropy().sum(dim=-1) 122 | 123 | def update_distribution(self, observations): 124 | # mean = self.actor(observations) 125 | embedding = self.policy_embedding(observations) 126 | mean = self.mean_linear(embedding) 127 | # self.std = torch.exp(self.actor_logstd) 128 | # log_std = self.log_std_linear(embedding) 129 | # log_std = torch.clamp(self.actor_logstd, -20, 0) ## 初始值为0附近,logstd初始值应该在-0.5附近, 标准差最小为0,最大为0.5即可 130 | self.std = self.actor_logstd.exp() 131 | self.distribution = Normal(mean, mean*0. + self.std) 132 | 133 | def act(self, observations, **kwargs): 134 | self.update_distribution(observations) 135 | return self.distribution.sample() 136 | 137 | def get_actions_log_prob(self, actions): 138 | try: 139 | return self.distribution.log_prob(actions).sum(dim=-1) 140 | except: 141 | print(actions.size()) 142 | print(self.distribution) 143 | 144 | def act_inference(self, observations): 145 | # actions_mean = self.actor(observations) 146 | embedding = self.policy_embedding(observations) 147 | actions_mean = self.mean_linear(embedding) 148 | return actions_mean 149 | 150 | def evaluate(self, critic_observations, **kwargs): 151 | value = self.critic(critic_observations) 152 | return value 153 | 154 | def get_activation(act_name): 155 | if act_name == "elu": 156 | return nn.ELU() 157 | elif act_name == "selu": 158 | return nn.SELU() 159 | elif act_name == "relu": 160 | return nn.ReLU() 161 | elif act_name == "crelu": 162 | return nn.ReLU() 163 | elif act_name == "lrelu": 164 | return nn.LeakyReLU() 165 | elif act_name == "tanh": 166 | return nn.Tanh() 167 | elif act_name == "sigmoid": 168 | return nn.Sigmoid() 169 | else: 170 | print("invalid activation function!") 171 | return None 172 | -------------------------------------------------------------------------------- /rl/modules/actor_critic_recurrent.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | import numpy as np 32 | 33 | import torch 34 | import torch.nn as nn 35 | from torch.distributions import Normal 36 | from torch.nn.modules import rnn 37 | from .actor_critic import ActorCritic, get_activation 38 | from rl.utils import unpad_trajectories 39 | 40 | class ActorCriticRecurrent(ActorCritic): 41 | is_recurrent = True 42 | def __init__(self, num_actor_obs, 43 | num_critic_obs, 44 | num_actions, 45 | actor_hidden_dims=[256, 256, 256], 46 | critic_hidden_dims=[256, 256, 256], 47 | activation='elu', 48 | rnn_type='lstm', 49 | rnn_hidden_size=256, 50 | rnn_num_layers=1, 51 | init_noise_std=1.0, 52 | **kwargs): 53 | if kwargs: 54 | print("ActorCriticRecurrent.__init__ got unexpected arguments, which will be ignored: " + str(kwargs.keys()),) 55 | 56 | super().__init__(num_actor_obs=rnn_hidden_size, 57 | num_critic_obs=rnn_hidden_size, 58 | num_actions=num_actions, 59 | actor_hidden_dims=actor_hidden_dims, 60 | critic_hidden_dims=critic_hidden_dims, 61 | activation=activation, 62 | init_noise_std=init_noise_std) 63 | 64 | activation = get_activation(activation) 65 | 66 | self.memory_a = Memory(num_actor_obs, type=rnn_type, num_layers=rnn_num_layers, hidden_size=rnn_hidden_size) 67 | self.memory_c = Memory(num_critic_obs, type=rnn_type, num_layers=rnn_num_layers, hidden_size=rnn_hidden_size) 68 | 69 | print(f"Actor RNN: {self.memory_a}") 70 | print(f"Critic RNN: {self.memory_c}") 71 | 72 | def reset(self, dones=None): 73 | self.memory_a.reset(dones) 74 | self.memory_c.reset(dones) 75 | 76 | def act(self, observations, masks=None, hidden_states=None): 77 | input_a = self.memory_a(observations, masks, hidden_states) 78 | return super().act(input_a.squeeze(0)) 79 | 80 | def act_inference(self, observations): 81 | input_a = self.memory_a(observations) 82 | return super().act_inference(input_a.squeeze(0)) 83 | 84 | def evaluate(self, critic_observations, masks=None, hidden_states=None): 85 | input_c = self.memory_c(critic_observations, masks, hidden_states) 86 | return super().evaluate(input_c.squeeze(0)) 87 | 88 | def get_hidden_states(self): 89 | return self.memory_a.hidden_states, self.memory_c.hidden_states 90 | 91 | 92 | class Memory(torch.nn.Module): 93 | def __init__(self, input_size, type='lstm', num_layers=1, hidden_size=256): 94 | super().__init__() 95 | # RNN 96 | rnn_cls = nn.GRU if type.lower() == 'gru' else nn.LSTM 97 | self.rnn = rnn_cls(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers) 98 | self.hidden_states = None 99 | 100 | def forward(self, input, masks=None, hidden_states=None): 101 | batch_mode = masks is not None 102 | if batch_mode: 103 | # batch mode (policy update): need saved hidden states 104 | if hidden_states is None: 105 | raise ValueError("Hidden states not passed to memory module during policy update") 106 | out, _ = self.rnn(input, hidden_states) 107 | out = unpad_trajectories(out, masks) 108 | else: 109 | # inference mode (collection): use hidden states of last step 110 | out, self.hidden_states = self.rnn(input.unsqueeze(0), self.hidden_states) 111 | return out 112 | 113 | def reset(self, dones=None): 114 | # When the RNN is an LSTM, self.hidden_states_a is a list with hidden_state and cell_state 115 | for hidden_state in self.hidden_states: 116 | hidden_state[..., dones, :] = 0.0 -------------------------------------------------------------------------------- /rl/modules/encoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import device, nn 3 | from torch.nn import functional as F 4 | from torch.autograd import Variable 5 | from torch.nn.modules.pooling import MaxPool2d 6 | import torch.utils.model_zoo as model_zoo 7 | import torchvision 8 | # from resnet import ResNet50 9 | # from model import Depth_encoding_Net 10 | import kornia 11 | import torchvision.transforms as transforms 12 | import numpy as np 13 | 14 | from rl.modules.SMT import SMT_state_encoder 15 | 16 | # 图像增强 17 | aug_trans = nn.Sequential( 18 | nn.MaxPool2d(kernel_size=2), 19 | nn.ReplicationPad2d(8), 20 | kornia.augmentation.RandomCrop((90,160)) ## 有点慢 drqv2 21 | ) 22 | 23 | # rgb2gray = kornia.color.RgbToGrayscale() ## (N,3,H,W) -> (N,1,H,W) 24 | 25 | def weights_init_(m): 26 | """Custom weight init for Conv2D and Linear layers.""" 27 | if isinstance(m, nn.Linear): 28 | nn.init.orthogonal_(m.weight.data) 29 | if hasattr(m.bias, 'data'): 30 | m.bias.data.fill_(0.0) 31 | elif isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d): 32 | gain = nn.init.calculate_gain('relu') 33 | nn.init.orthogonal_(m.weight.data, gain) 34 | if hasattr(m.bias, 'data'): 35 | m.bias.data.fill_(0.0) 36 | 37 | class Encoder(nn.Module): 38 | """Convolutional encoder for image-based observations.""" 39 | def __init__(self, feature_dim): 40 | super(Encoder, self).__init__() 41 | # assert len(obs_shape) == 3 42 | self.num_layers = 6 43 | self.num_filters = 32 44 | self.output_dim = 35 45 | self.output_logits = False 46 | self.feature_dim = feature_dim 47 | 48 | self.convs_rgb = nn.ModuleList([ 49 | nn.Conv2d(3, self.num_filters, 3, stride=2), ## rgb 50 | nn.Conv2d(self.num_filters, self.num_filters, 3, stride=2), 51 | nn.Conv2d(self.num_filters, self.num_filters, 3, stride=1), 52 | nn.Conv2d(self.num_filters, self.num_filters, 3, stride=1), 53 | nn.Conv2d(self.num_filters, self.num_filters, 3, stride=1), 54 | nn.Conv2d(self.num_filters, self.num_filters, 3, stride=1) 55 | ]) 56 | 57 | self.convs_Depth = nn.ModuleList([ 58 | nn.Conv2d(1, self.num_filters, 3, stride=2), ## d 59 | nn.Conv2d(self.num_filters, self.num_filters, 3, stride=2), 60 | nn.Conv2d(self.num_filters, self.num_filters, 3, stride=1), 61 | nn.Conv2d(self.num_filters, self.num_filters, 3, stride=1), 62 | nn.Conv2d(self.num_filters, self.num_filters, 3, stride=1), 63 | nn.Conv2d(self.num_filters, self.num_filters, 3, stride=1) 64 | ]) 65 | 66 | self.head = nn.Sequential( 67 | nn.Linear(25792, self.feature_dim), 68 | nn.LayerNorm(self.feature_dim)) 69 | 70 | self.outputs = dict() 71 | 72 | # self.apply(weights_init_) ## 初始化 73 | 74 | 75 | def forward_conv_rgb(self, obs): 76 | self.outputs['rgb'] = obs 77 | 78 | conv = torch.relu(self.convs_rgb[0](obs)) 79 | self.outputs['conv1_g'] = conv 80 | 81 | for i in range(1, self.num_layers): 82 | conv = torch.relu(self.convs_rgb[i](conv)) 83 | 84 | self.outputs['conv_g%s' % (i + 1)] = conv 85 | 86 | h = conv.reshape(conv.size(0), -1) 87 | return h 88 | 89 | def forward_conv_depth(self, obs): 90 | self.outputs['depth'] = obs 91 | 92 | conv = torch.relu(self.convs_Depth[0](obs)) 93 | self.outputs['conv1_d'] = conv 94 | 95 | for i in range(1, self.num_layers): 96 | conv = torch.relu(self.convs_Depth[i](conv)) 97 | self.outputs['conv_d%s' % (i + 1)] = conv 98 | 99 | h = conv.reshape(conv.size(0), -1) 100 | return h 101 | 102 | def forward(self, rgb, depth, task_obs): 103 | 104 | ## 训练时执行图像增强 105 | rgb = aug_trans(rgb) 106 | depth = aug_trans(depth) 107 | h1 = self.forward_conv_rgb(rgb) 108 | h2 = self.forward_conv_depth(depth) 109 | 110 | h = torch.cat((h1,h2),1) 111 | 112 | out = self.head(h) 113 | if not self.output_logits: 114 | out = torch.tanh(out) 115 | 116 | out = torch.cat((out, task_obs),axis = 1) 117 | self.outputs['out'] = out 118 | 119 | return out 120 | 121 | 122 | class Trans_Encoder(nn.Module): 123 | """CNN encoder followed by a SMT encoder""" 124 | def __init__(self): 125 | super(Trans_Encoder, self).__init__() 126 | self.encoder = Encoder(feature_dim=252) 127 | self.smt_encoder = SMT_state_encoder(d_model=256, nhead=4) 128 | 129 | def forward(self, rgb, depth, task_obs, key_padding_mask): 130 | ''' 131 | input: 132 | embeddings: from 0 to t 133 | observations: from 0 to t 134 | batch = 1 135 | considering update 136 | output: 137 | T D 138 | ''' 139 | # T N H W C -> T N C H W 64*2*D TND 之前batch就是seqence,现在包含多个环境的信息,需要flatten 成 TN-》N 140 | seq_l, batch_size = task_obs.size(0), task_obs.size(1) 141 | rgb = rgb.flatten(0,1).permute(0, 3, 1, 2) 142 | depth = depth.flatten(0,1).permute(0, 3, 1, 2) 143 | task_obs = task_obs.flatten(0,1) 144 | ## T N D 145 | embeddings = self.encoder(rgb, depth, task_obs).reshape(seq_l, batch_size, -1) 146 | 147 | ## T N D 148 | ## 训练,src 的memory和tgt相同都是embedding 149 | out = self.smt_encoder(o = embeddings, M = embeddings, flag = 1, key_padding_mask = key_padding_mask) 150 | 151 | # T N D 152 | out = torch.cat((out, task_obs.reshape(seq_l, batch_size, -1)), -1) 153 | 154 | return out 155 | 156 | def inference_forward(self, rgb, depth, task_obs, memory, key_padding_mask): 157 | # N H W C -> N C H W 158 | rgb = rgb.permute(0, 3, 1, 2) 159 | depth = depth.permute(0, 3, 1, 2) 160 | 161 | ## T N D 162 | embeddings = self.encoder(rgb, depth, task_obs).unsqueeze(0) 163 | 164 | # 探索,src 变成memory,tgt是当前推理出来的embedding 165 | if len(memory) < 32: ## 窗口加快训练速度和收敛速度 166 | memory = torch.cat((memory, embeddings), 0) 167 | key_padding_mask = None 168 | else: 169 | memory = torch.cat((memory[-31:], embeddings), 0) 170 | # key_padding_mask = torch.cat((key_padding_mask[:,-31:], torch.zeros(2,1,device='cuda')), 1).to(bool) ## N * S 171 | out = self.smt_encoder(o = embeddings, M = memory, flag = 0, key_padding_mask = key_padding_mask) 172 | 173 | # T D 174 | out = torch.cat((out.squeeze(0), task_obs), -1) 175 | 176 | return out, memory 177 | 178 | 179 | ## pytorch N C HW 180 | ## tensorflow NHW C 181 | if __name__ == '__main__': 182 | # encoder = Encoder(feature_dim = 252) 183 | # rgb = Variable(torch.randn(1, 3, 180, 320)) 184 | # depth = Variable(torch.randn(1, 1, 180, 320)) 185 | # task_obs = Variable(torch.randn(1, 4)) 186 | # print(encoder(rgb,depth,task_obs).size()) 187 | # print(x) 188 | 189 | encoder = Trans_Encoder(feature_dim = 252, state_dim=256) 190 | rgb = Variable(torch.randn(1, 500, 180, 320, 3)) 191 | depth = Variable(torch.randn(1, 500, 180, 320, 1)) 192 | task_obs = Variable(torch.randn(1, 500, 4)) 193 | print(encoder(rgb,depth,task_obs,1,0)[0].size()) 194 | # print(encoder.self_attn(task_obs,task_obs,task_obs).size()) 195 | 196 | 197 | -------------------------------------------------------------------------------- /rl/runners/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | from .on_policy_runner import OnPolicyRunner -------------------------------------------------------------------------------- /rl/runners/on_policy_runner.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | import time 32 | import os 33 | from collections import deque 34 | import statistics 35 | 36 | from torch.utils.tensorboard import SummaryWriter 37 | import torch 38 | 39 | from rl.algorithms import PPO 40 | from rl.modules import ActorCritic, ActorCriticRecurrent 41 | from rl.env import VecEnv 42 | 43 | import numpy as np 44 | class OnPolicyRunner: 45 | 46 | def __init__(self, 47 | env: VecEnv, 48 | train_cfg, 49 | log_dir=None, 50 | device='cpu'): 51 | 52 | self.cfg=train_cfg["runner"] 53 | self.alg_cfg = train_cfg["algorithm"] 54 | self.policy_cfg = train_cfg["policy"] 55 | self.device = device 56 | self.env = env 57 | if self.env.num_privileged_obs is not None: 58 | num_critic_obs = self.env.num_privileged_obs 59 | else: 60 | num_critic_obs = self.env.num_obs 61 | actor_critic_class = eval(self.cfg["policy_class_name"]) # ActorCritic 62 | actor_critic: ActorCritic = actor_critic_class( self.env.num_obs, 63 | num_critic_obs, 64 | self.env.num_actions, 65 | **self.policy_cfg).to(self.device) 66 | alg_class = eval(self.cfg["algorithm_class_name"]) # PPO 67 | self.alg: PPO = alg_class(actor_critic, device=self.device, **self.alg_cfg) 68 | self.num_steps_per_env = self.cfg["num_steps_per_env"] 69 | self.save_interval = self.cfg["save_interval"] 70 | 71 | # init storage and model 72 | self.alg.init_storage(self.env.num_envs, self.num_steps_per_env, [self.env.num_obs], [self.env.num_privileged_obs], [self.env.num_actions]) 73 | 74 | # Log 75 | self.log_dir = log_dir 76 | self.writer = None 77 | self.tot_timesteps = 0 78 | self.tot_time = 0 79 | self.current_learning_iteration = 0 80 | 81 | _, _ = self.env.reset() 82 | 83 | def learn(self, num_learning_iterations, init_at_random_ep_len=False): 84 | # initialize writer 85 | if self.log_dir is not None and self.writer is None: 86 | self.writer = SummaryWriter(log_dir=self.log_dir, flush_secs=10) 87 | if init_at_random_ep_len: 88 | self.env.episode_length_buf = torch.randint_like(self.env.episode_length_buf, high=int(self.env.max_episode_length)) 89 | obs = self.env.get_observations() 90 | privileged_obs = self.env.get_privileged_observations() 91 | critic_obs = privileged_obs if privileged_obs is not None else obs 92 | # obs, critic_obs = obs.to(self.device), critic_obs.to(self.device) 93 | self.alg.actor_critic.train() # switch to train mode (for dropout for example) 94 | 95 | ep_infos = [] 96 | rewbuffer = deque(maxlen=100) 97 | lenbuffer = deque(maxlen=100) 98 | eps_stepbuffer = [deque(maxlen=100) for _ in range(self.env.num_envs)] 99 | eps_splbuffer = [deque(maxlen=100) for _ in range(self.env.num_envs)] 100 | eps_effortbuffer = [deque(maxlen=100) for _ in range(self.env.num_envs)] 101 | eps_insbuffer = [deque(maxlen=100) for _ in range(self.env.num_envs)] 102 | cur_reward_sum = torch.zeros(self.env.num_envs, dtype=torch.float, device=self.device) 103 | cur_episode_length = torch.zeros(self.env.num_envs, dtype=torch.float, device=self.device) 104 | 105 | tot_iter = int(self.current_learning_iteration + num_learning_iterations) 106 | for it in range(self.current_learning_iteration, tot_iter): 107 | start = time.time() 108 | # Rollout 109 | with torch.no_grad(): 110 | for i in range(self.num_steps_per_env): 111 | # st = time.time() 112 | actions = self.alg.act(obs, critic_obs) 113 | # print(time.time() - st) 114 | obs, privileged_obs, rewards, dones, infos = self.env.step(actions) 115 | # print(time.time() - st) 116 | critic_obs = privileged_obs if privileged_obs is not None else obs 117 | rewards, dones = rewards.to(self.device), dones.to(self.device) 118 | self.alg.process_env_step(rewards, dones, infos) 119 | # print(time.time() - st) 120 | 121 | if self.log_dir is not None: 122 | # Book keeping 123 | if 'episode' in infos: 124 | ep_infos.append(infos['episode']) 125 | cur_reward_sum += rewards 126 | cur_episode_length += 1 127 | 128 | new_ids = (dones > 0).nonzero(as_tuple=False) 129 | 130 | rewbuffer.extend(cur_reward_sum[new_ids][:, 0].cpu().numpy().tolist()) 131 | lenbuffer.extend(cur_episode_length[new_ids][:, 0].cpu().numpy().tolist()) 132 | for id in new_ids: 133 | eps_stepbuffer[id].extend(torch.tensor(infos['episode_length'])[id].numpy().tolist()) 134 | eps_splbuffer[id].extend(torch.tensor(infos['spl'])[id].numpy().tolist()) 135 | eps_effortbuffer[id].extend(torch.tensor(infos['effort_efficiency'])[id].numpy().tolist()) 136 | eps_insbuffer[id].extend(torch.tensor(infos['ins'])[id].numpy().tolist()) 137 | 138 | cur_reward_sum[new_ids] = 0 139 | cur_episode_length[new_ids] = 0 140 | 141 | stop = time.time() 142 | collection_time = stop - start 143 | 144 | # Learning step 145 | start = stop 146 | self.alg.compute_returns(critic_obs) 147 | 148 | mean_value_loss, mean_surrogate_loss = self.alg.update() 149 | stop = time.time() 150 | learn_time = stop - start 151 | if self.log_dir is not None: 152 | self.log(locals()) 153 | if it % self.save_interval == 0: 154 | self.save(os.path.join(self.log_dir, 'model_{}.pt'.format(it))) 155 | ep_infos.clear() 156 | 157 | self.current_learning_iteration += num_learning_iterations 158 | self.save(os.path.join(self.log_dir, 'model_{}.pt'.format(self.current_learning_iteration))) 159 | 160 | def log(self, locs, width=80, pad=35): 161 | self.tot_timesteps += self.num_steps_per_env * self.env.num_envs 162 | self.tot_time += locs['collection_time'] + locs['learn_time'] 163 | iteration_time = locs['collection_time'] + locs['learn_time'] 164 | 165 | ep_string = f'' 166 | if locs['ep_infos']: 167 | for key in locs['ep_infos'][0]: 168 | infotensor = torch.tensor([], device=self.device) 169 | for ep_info in locs['ep_infos']: 170 | # handle scalar and zero dimensional tensor infos 171 | if not isinstance(ep_info[key], torch.Tensor): 172 | ep_info[key] = torch.Tensor([ep_info[key]]) 173 | if len(ep_info[key].shape) == 0: 174 | ep_info[key] = ep_info[key].unsqueeze(0) 175 | infotensor = torch.cat((infotensor, ep_info[key].to(self.device))) 176 | value = torch.mean(infotensor) 177 | self.writer.add_scalar('Episode/' + key, value, locs['it']) 178 | ep_string += f"""{f'Mean episode {key}:':>{pad}} {value:.4f}\n""" 179 | mean_std = self.alg.actor_critic.std.mean() 180 | fps = int(self.num_steps_per_env * self.env.num_envs / (locs['collection_time'] + locs['learn_time'])) 181 | 182 | self.writer.add_scalar('Loss/value_function', locs['mean_value_loss'], locs['it']) 183 | self.writer.add_scalar('Loss/surrogate', locs['mean_surrogate_loss'], locs['it']) 184 | self.writer.add_scalar('Loss/learning_rate', self.alg.learning_rate, locs['it']) 185 | self.writer.add_scalar('Policy/mean_noise_std', mean_std.item(), locs['it']) 186 | self.writer.add_scalar('Perf/total_fps', fps, locs['it']) 187 | self.writer.add_scalar('Perf/collection time', locs['collection_time'], locs['it']) 188 | self.writer.add_scalar('Perf/learning_time', locs['learn_time'], locs['it']) 189 | if len(locs['rewbuffer']) > 0: ## 分环境记录 190 | self.writer.add_scalar('Train/mean_reward', statistics.mean(locs['rewbuffer']), locs['it']) 191 | self.writer.add_scalar('Train/mean_episode_length', statistics.mean(locs['lenbuffer']), locs['it']) 192 | self.writer.add_scalar('Train/mean_reward/time', statistics.mean(locs['rewbuffer']), self.tot_time) 193 | self.writer.add_scalar('Train/mean_episode_length/time', statistics.mean(locs['lenbuffer']), self.tot_time) 194 | for i, env in enumerate(self.env.Training_Env[:5]): 195 | if locs['eps_stepbuffer'][i] == deque([]): 196 | continue 197 | self.writer.add_scalar(f'{env}/mean_steps', statistics.mean(locs['eps_stepbuffer'][i]), locs['it']) 198 | self.writer.add_scalar(f'{env}/mean_spl', statistics.mean(locs['eps_splbuffer'][i]), locs['it']) 199 | self.writer.add_scalar(f'{env}/mean_effort', statistics.mean(locs['eps_effortbuffer'][i]), locs['it']) 200 | self.writer.add_scalar(f'{env}/mean_ins', statistics.mean(locs['eps_insbuffer'][i]), locs['it']) 201 | 202 | str = f" \033[1m Learning iteration {locs['it']}/{self.current_learning_iteration + locs['num_learning_iterations']} \033[0m " 203 | 204 | if len(locs['rewbuffer']) > 0: 205 | log_string = (f"""{'#' * width}\n""" 206 | f"""{str.center(width, ' ')}\n\n""" 207 | f"""{'Computation:':>{pad}} {fps:.0f} steps/s (collection: {locs[ 208 | 'collection_time']:.3f}s, learning {locs['learn_time']:.3f}s)\n""" 209 | f"""{'Value function loss:':>{pad}} {locs['mean_value_loss']:.4f}\n""" 210 | f"""{'Surrogate loss:':>{pad}} {locs['mean_surrogate_loss']:.4f}\n""" 211 | f"""{'Mean action noise std:':>{pad}} {mean_std.item():.2f}\n""" 212 | f"""{'Mean reward:':>{pad}} {statistics.mean(locs['rewbuffer']):.2f}\n""" 213 | f"""{'Mean episode length:':>{pad}} {statistics.mean(locs['lenbuffer']):.2f}\n""") 214 | # f"""{'Mean reward/step:':>{pad}} {locs['mean_reward']:.2f}\n""" 215 | # f"""{'Mean episode length/episode:':>{pad}} {locs['mean_trajectory_length']:.2f}\n""") 216 | else: 217 | log_string = (f"""{'#' * width}\n""" 218 | f"""{str.center(width, ' ')}\n\n""" 219 | f"""{'Computation:':>{pad}} {fps:.0f} steps/s (collection: {locs[ 220 | 'collection_time']:.3f}s, learning {locs['learn_time']:.3f}s)\n""" 221 | f"""{'Value function loss:':>{pad}} {locs['mean_value_loss']:.4f}\n""" 222 | f"""{'Surrogate loss:':>{pad}} {locs['mean_surrogate_loss']:.4f}\n""" 223 | f"""{'Mean action noise std:':>{pad}} {mean_std.item():.2f}\n""") 224 | # f"""{'Mean reward/step:':>{pad}} {locs['mean_reward']:.2f}\n""" 225 | # f"""{'Mean episode length/episode:':>{pad}} {locs['mean_trajectory_length']:.2f}\n""") 226 | 227 | log_string += ep_string 228 | log_string += (f"""{'-' * width}\n""" 229 | f"""{'Total timesteps:':>{pad}} {self.tot_timesteps}\n""" 230 | f"""{'Iteration time:':>{pad}} {iteration_time:.2f}s\n""" 231 | f"""{'Total time:':>{pad}} {self.tot_time:.2f}s\n""" 232 | f"""{'ETA:':>{pad}} {self.tot_time / (locs['it'] + 1) * ( 233 | locs['num_learning_iterations'] - locs['it']):.1f}s\n""") 234 | print(log_string) 235 | 236 | def save(self, path, infos=None): 237 | torch.save({ 238 | 'model_state_dict': self.alg.actor_critic.state_dict(), 239 | 'encoder_state_dict': self.alg.encoder_net.state_dict(), 240 | 'optimizer_state_dict': self.alg.optimizer.state_dict(), 241 | 'iter': self.current_learning_iteration, 242 | 'infos': infos, 243 | }, path) 244 | 245 | def load(self, path, load_optimizer=True): 246 | loaded_dict = torch.load(path) 247 | self.alg.actor_critic.load_state_dict(loaded_dict['model_state_dict']) 248 | self.alg.encoder_net.load_state_dict(loaded_dict['encoder_state_dict']) 249 | if load_optimizer: 250 | self.alg.optimizer.load_state_dict(loaded_dict['optimizer_state_dict']) 251 | self.current_learning_iteration = loaded_dict['iter'] 252 | return loaded_dict['infos'] 253 | 254 | def get_inference_policy(self, device=None): 255 | self.alg.actor_critic.eval() # switch to evaluation mode (dropout for example) 256 | if device is not None: 257 | self.alg.actor_critic.to(device) 258 | return self.alg.actor_critic.act_inference 259 | -------------------------------------------------------------------------------- /rl/storage/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 ETH Zurich, NVIDIA CORPORATION 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | from .rollout_storage import RolloutStorage -------------------------------------------------------------------------------- /rl/storage/rollout_storage.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | import torch 32 | import numpy as np 33 | 34 | from rl.utils import split_and_pad_trajectories, split_and_pad_trajectories_obs 35 | 36 | class RolloutStorage: 37 | class Transition: 38 | def __init__(self): 39 | self.observations = None 40 | self.critic_observations = None 41 | self.actions = None 42 | self.rewards = None 43 | self.dones = None 44 | self.values = None 45 | self.actions_log_prob = None 46 | self.action_mean = None 47 | self.action_sigma = None 48 | self.hidden_states = None 49 | 50 | def clear(self): 51 | self.__init__() 52 | 53 | def __init__(self, num_envs, num_transitions_per_env, obs_shape, privileged_obs_shape, actions_shape, device='cpu'): 54 | 55 | self.device = device 56 | 57 | self.obs_shape = obs_shape 58 | self.privileged_obs_shape = privileged_obs_shape 59 | self.actions_shape = actions_shape 60 | 61 | # Core 62 | self.observations = torch.zeros(num_transitions_per_env, num_envs, *obs_shape, device=self.device) 63 | if privileged_obs_shape[0] is not None: 64 | self.privileged_observations = torch.zeros(num_transitions_per_env, num_envs, *privileged_obs_shape, device=self.device) 65 | else: 66 | self.privileged_observations = None 67 | self.rewards = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device) 68 | self.actions = torch.zeros(num_transitions_per_env, num_envs, *actions_shape, device=self.device) 69 | self.dones = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device).byte() 70 | 71 | # For PPO 72 | self.actions_log_prob = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device) 73 | self.values = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device) 74 | self.returns = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device) 75 | self.advantages = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device) 76 | self.mu = torch.zeros(num_transitions_per_env, num_envs, *actions_shape, device=self.device) 77 | self.sigma = torch.zeros(num_transitions_per_env, num_envs, *actions_shape, device=self.device) 78 | 79 | self.num_transitions_per_env = num_transitions_per_env 80 | self.num_envs = num_envs 81 | 82 | # rnn 83 | self.saved_hidden_states_a = None 84 | self.saved_hidden_states_c = None 85 | 86 | ## SMT 87 | self.saved_task_obs = None 88 | self.step = 0 89 | 90 | def add_transitions(self, transition: Transition): 91 | if self.step >= self.num_transitions_per_env: 92 | raise AssertionError("Rollout buffer overflow") 93 | # self.observations[self.step].copy_(transition.observations) 94 | # if self.privileged_observations is not None: self.privileged_observations[self.step].copy_(transition.critic_observations) 95 | self.actions[self.step].copy_(transition.actions) 96 | self._save_observation(transition.observations) 97 | self.rewards[self.step].copy_(transition.rewards.view(-1, 1)) 98 | self.dones[self.step].copy_(transition.dones.view(-1, 1)) 99 | self.values[self.step].copy_(transition.values) 100 | self.actions_log_prob[self.step].copy_(transition.actions_log_prob.view(-1, 1)) 101 | self.mu[self.step].copy_(transition.action_mean) 102 | self.sigma[self.step].copy_(transition.action_sigma) 103 | self._save_hidden_states(transition.hidden_states) 104 | self.step += 1 105 | 106 | def _save_observation(self, obs): 107 | # task_obs = [] 108 | # rgb = [] 109 | # depth = [] 110 | # for o in obs: 111 | # task_obs.append(o['task_obs'].copy()) 112 | # rgb.append(o['rgb'].copy()) 113 | # depth.append(o['depth'].copy()) 114 | 115 | ## N D 116 | rgb = obs[0] 117 | depth = obs[1] 118 | task_obs = obs[2] 119 | 120 | # initialize if needed 121 | if self.saved_task_obs is None: 122 | self.saved_task_obs = torch.zeros(self.actions.shape[0], *task_obs.shape, device=self.device) 123 | self.saved_rgb = torch.zeros(self.actions.shape[0], *rgb.shape, device=self.device) 124 | self.saved_depth = torch.zeros(self.actions.shape[0], *depth.shape, device=self.device) 125 | 126 | # # copy the states 127 | self.saved_task_obs[self.step] = task_obs 128 | self.saved_rgb[self.step] = rgb 129 | self.saved_depth[self.step] = depth 130 | 131 | 132 | def _save_hidden_states(self, hidden_states): 133 | if hidden_states is None or hidden_states==(None, None): 134 | return 135 | # make a tuple out of GRU hidden state sto match the LSTM format 136 | hid_a = hidden_states[0] if isinstance(hidden_states[0], tuple) else (hidden_states[0],) 137 | hid_c = hidden_states[1] if isinstance(hidden_states[1], tuple) else (hidden_states[1],) 138 | 139 | # initialize if needed 140 | if self.saved_hidden_states_a is None: 141 | self.saved_hidden_states_a = [torch.zeros(self.observations.shape[0], *hid_a[i].shape, device=self.device) for i in range(len(hid_a))] 142 | self.saved_hidden_states_c = [torch.zeros(self.observations.shape[0], *hid_c[i].shape, device=self.device) for i in range(len(hid_c))] 143 | # copy the states 144 | for i in range(len(hid_a)): 145 | self.saved_hidden_states_a[i][self.step].copy_(hid_a[i]) 146 | self.saved_hidden_states_c[i][self.step].copy_(hid_c[i]) 147 | 148 | 149 | def clear(self): 150 | self.step = 0 151 | 152 | def compute_returns(self, last_values, gamma, lam): 153 | advantage = 0 154 | for step in reversed(range(self.num_transitions_per_env)): 155 | if step == self.num_transitions_per_env - 1: 156 | next_values = last_values 157 | else: 158 | next_values = self.values[step + 1] 159 | next_is_not_terminal = 1.0 - self.dones[step].float() 160 | delta = self.rewards[step] + next_is_not_terminal * gamma * next_values - self.values[step] 161 | advantage = delta + next_is_not_terminal * gamma * lam * advantage 162 | self.returns[step] = advantage + self.values[step] 163 | 164 | # Compute and normalize the advantages 165 | self.advantages = self.returns - self.values 166 | self.advantages = (self.advantages - self.advantages.mean(0)) / (self.advantages.std(0) + 1e-8) 167 | 168 | def get_statistics(self): 169 | done = self.dones 170 | done[-1] = 1 171 | flat_dones = done.permute(1, 0, 2).reshape(-1, 1) 172 | done_indices = torch.cat((flat_dones.new_tensor([-1], dtype=torch.int64), flat_dones.nonzero(as_tuple=False)[:, 0])) 173 | trajectory_lengths = (done_indices[1:] - done_indices[:-1]) 174 | return trajectory_lengths.float().mean(), self.rewards.mean() 175 | 176 | def mini_batch_generator(self, num_mini_batches, num_epochs=8): 177 | batch_size = self.num_envs * self.num_transitions_per_env 178 | mini_batch_size = batch_size // num_mini_batches 179 | indices = torch.randperm(num_mini_batches*mini_batch_size, requires_grad=False, device=self.device) 180 | 181 | observations = self.observations.flatten(0, 1) 182 | if self.privileged_observations is not None: 183 | critic_observations = self.privileged_observations.flatten(0, 1) 184 | else: 185 | critic_observations = observations 186 | 187 | actions = self.actions.flatten(0, 1) 188 | values = self.values.flatten(0, 1) 189 | returns = self.returns.flatten(0, 1) 190 | old_actions_log_prob = self.actions_log_prob.flatten(0, 1) 191 | advantages = self.advantages.flatten(0, 1) 192 | old_mu = self.mu.flatten(0, 1) 193 | old_sigma = self.sigma.flatten(0, 1) 194 | 195 | for epoch in range(num_epochs): 196 | for i in range(num_mini_batches): 197 | 198 | start = i*mini_batch_size 199 | end = (i+1)*mini_batch_size 200 | batch_idx = indices[start:end] 201 | 202 | obs_batch = observations[batch_idx] 203 | critic_observations_batch = critic_observations[batch_idx] 204 | actions_batch = actions[batch_idx] 205 | target_values_batch = values[batch_idx] 206 | returns_batch = returns[batch_idx] 207 | old_actions_log_prob_batch = old_actions_log_prob[batch_idx] 208 | advantages_batch = advantages[batch_idx] 209 | old_mu_batch = old_mu[batch_idx] 210 | old_sigma_batch = old_sigma[batch_idx] 211 | yield obs_batch, critic_observations_batch, actions_batch, target_values_batch, advantages_batch, returns_batch, \ 212 | old_actions_log_prob_batch, old_mu_batch, old_sigma_batch, (None, None), None 213 | 214 | # for SMT only 215 | def SMT_mini_batch_generator(self, num_mini_batches, num_epochs=8): 216 | # padded_task_obs_trajectories, trajectory_masks = split_and_pad_trajectories(self.saved_task_obs, self.dones) 217 | # padded_rgb_obs_trajectories, trajectory_masks = split_and_pad_trajectories(self.saved_rgb, self.dones) 218 | # padded_depth_obs_trajectories, trajectory_masks = split_and_pad_trajectories(self.saved_depth, self.dones) 219 | # padded__trajectories, trajectory_masks = split_and_pad_trajectories_obs([self.saved_task_obs, self.saved_rgb, self.saved_depth], self.dones) 220 | # padded_critic_obs_trajectories = padded_obs_trajectories 221 | 222 | mini_batch_size = self.num_envs // num_mini_batches ## 取整个trajectory, self.observations没有被flatten, 分环境取mini batch 223 | for ep in range(num_epochs): 224 | first_traj = 0 225 | for i in range(num_mini_batches): 226 | start = i*mini_batch_size 227 | stop = (i+1)*mini_batch_size 228 | 229 | dones = self.dones.squeeze(-1) 230 | last_was_done = torch.zeros_like(dones, dtype=torch.bool) 231 | last_was_done[1:] = dones[:-1] 232 | last_was_done[0] = True 233 | trajectories_batch_size = torch.sum(last_was_done[:, start:stop]) ##done的数量确定batchsize 234 | last_traj = first_traj + trajectories_batch_size ## 切割后新增的轨迹 235 | 236 | # masks_batch = trajectory_masks[:, first_traj:last_traj] 237 | # task_obs_batch = padded__trajectories[0][:, first_traj:last_traj] 238 | # rgb_obs_batch = padded__trajectories[1][:, first_traj:last_traj] 239 | # depth_obs_batch = padded__trajectories[2][:, first_traj:last_traj] ### action等应该也要切割 240 | 241 | task_obs_batch = self.saved_task_obs[:, start:stop] ## obeservation split and pad trajectories 然后过了transformer再unpadded 回去 242 | rgb_obs_batch = self.saved_rgb[:, start:stop] 243 | depth_obs_batch = self.saved_depth[:, start:stop] 244 | 245 | actions_batch = self.actions[:, start:stop] 246 | if torch.isnan(actions_batch).any(): 247 | print('nan') 248 | old_mu_batch = self.mu[:, start:stop] 249 | old_sigma_batch = self.sigma[:, start:stop] 250 | returns_batch = self.returns[:, start:stop] 251 | advantages_batch = self.advantages[:, start:stop] 252 | values_batch = self.values[:, start:stop] 253 | old_actions_log_prob_batch = self.actions_log_prob[:, start:stop] 254 | 255 | # reshape to [num_envs, time, num layers, hidden dim] (original shape: [time, num_layers, num_envs, hidden_dim]) 256 | # then take only time steps after dones (flattens num envs and time dimensions), 257 | # take a batch of trajectories and finally reshape back to [num_layers, batch, hidden_dim] 258 | # last_was_done = last_was_done.permute(1, 0) 259 | # hid_a_batch = [ saved_hidden_states.permute(2, 0, 1, 3)[last_was_done][first_traj:last_traj].transpose(1, 0).contiguous() 260 | # for saved_hidden_states in self.saved_hidden_states_a ] 261 | # hid_c_batch = [ saved_hidden_states.permute(2, 0, 1, 3)[last_was_done][first_traj:last_traj].transpose(1, 0).contiguous() 262 | # for saved_hidden_states in self.saved_hidden_states_c ] 263 | # # remove the tuple for GRU 264 | # hid_a_batch = hid_a_batch[0] if len(hid_a_batch)==1 else hid_a_batch 265 | # hid_c_batch = hid_c_batch[0] if len(hid_c_batch)==1 else hid_a_batch 266 | 267 | yield [rgb_obs_batch, depth_obs_batch, task_obs_batch], None, actions_batch, values_batch, advantages_batch, returns_batch, \ 268 | old_actions_log_prob_batch, old_mu_batch, old_sigma_batch, None 269 | 270 | # first_traj = last_traj 271 | 272 | 273 | 274 | # for RNNs only 275 | def reccurent_mini_batch_generator(self, num_mini_batches, num_epochs=8): 276 | 277 | padded_obs_trajectories, trajectory_masks = split_and_pad_trajectories(self.observations, self.dones) 278 | if self.privileged_observations is not None: 279 | padded_critic_obs_trajectories, _ = split_and_pad_trajectories(self.privileged_observations, self.dones) 280 | else: 281 | padded_critic_obs_trajectories = padded_obs_trajectories 282 | 283 | mini_batch_size = self.num_envs // num_mini_batches 284 | for ep in range(num_epochs): 285 | first_traj = 0 286 | for i in range(num_mini_batches): 287 | start = i*mini_batch_size 288 | stop = (i+1)*mini_batch_size 289 | 290 | dones = self.dones.squeeze(-1) 291 | last_was_done = torch.zeros_like(dones, dtype=torch.bool) 292 | last_was_done[1:] = dones[:-1] 293 | last_was_done[0] = True 294 | trajectories_batch_size = torch.sum(last_was_done[:, start:stop]) 295 | last_traj = first_traj + trajectories_batch_size 296 | 297 | masks_batch = trajectory_masks[:, first_traj:last_traj] 298 | obs_batch = padded_obs_trajectories[:, first_traj:last_traj] 299 | critic_obs_batch = padded_critic_obs_trajectories[:, first_traj:last_traj] 300 | 301 | actions_batch = self.actions[:, start:stop] 302 | old_mu_batch = self.mu[:, start:stop] 303 | old_sigma_batch = self.sigma[:, start:stop] 304 | returns_batch = self.returns[:, start:stop] 305 | advantages_batch = self.advantages[:, start:stop] 306 | values_batch = self.values[:, start:stop] 307 | old_actions_log_prob_batch = self.actions_log_prob[:, start:stop] 308 | 309 | # reshape to [num_envs, time, num layers, hidden dim] (original shape: [time, num_layers, num_envs, hidden_dim]) 310 | # then take only time steps after dones (flattens num envs and time dimensions), 311 | # take a batch of trajectories and finally reshape back to [num_layers, batch, hidden_dim] 312 | last_was_done = last_was_done.permute(1, 0) 313 | hid_a_batch = [ saved_hidden_states.permute(2, 0, 1, 3)[last_was_done][first_traj:last_traj].transpose(1, 0).contiguous() 314 | for saved_hidden_states in self.saved_hidden_states_a ] 315 | hid_c_batch = [ saved_hidden_states.permute(2, 0, 1, 3)[last_was_done][first_traj:last_traj].transpose(1, 0).contiguous() 316 | for saved_hidden_states in self.saved_hidden_states_c ] 317 | # remove the tuple for GRU 318 | hid_a_batch = hid_a_batch[0] if len(hid_a_batch)==1 else hid_a_batch 319 | hid_c_batch = hid_c_batch[0] if len(hid_c_batch)==1 else hid_a_batch 320 | 321 | yield obs_batch, critic_obs_batch, actions_batch, values_batch, advantages_batch, returns_batch, \ 322 | old_actions_log_prob_batch, old_mu_batch, old_sigma_batch, (hid_a_batch, hid_c_batch), masks_batch 323 | 324 | first_traj = last_traj -------------------------------------------------------------------------------- /rl/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | from .utils import split_and_pad_trajectories, unpad_trajectories, split_and_pad_trajectories_obs -------------------------------------------------------------------------------- /rl/utils/log_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved. 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy 4 | # of this software and associated documentation files (the "Software"), to deal 5 | # in the Software without restriction, including without limitation the rights 6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | # copies of the Software, and to permit persons to whom the Software is 8 | # furnished to do so, subject to the following conditions: 9 | # 10 | # The above copyright notice and this permission notice shall be included in 11 | # all copies or substantial portions of the Software. 12 | # 13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | # THE SOFTWARE 20 | 21 | import os 22 | 23 | from rl.conf.configs import Configs 24 | from rl.utils.logging_engine import logger 25 | 26 | 27 | # Output logs through console and files 28 | def ini_logger(file_name, level='info'): 29 | log_folder = os.path.join(Configs.output_folder, 'log') 30 | if not os.path.exists(log_folder): 31 | os.makedirs(log_folder) 32 | delete_files(log_folder, Configs.MAX_LOG_FILE_NUM) 33 | log_file = os.path.join(log_folder, file_name) 34 | logger.add_file_output(log_file, level) 35 | 36 | 37 | def remove_file_handler_of_logging(file_name: str): 38 | log_folder = os.path.join(Configs.output_folder, 'log') 39 | file_path = os.path.join(log_folder, file_name) 40 | try: 41 | logger.remove_file_handler(file_path) 42 | except Exception as e: 43 | print(f"Failed to remove file handler {file_path}, reason: {e}") 44 | 45 | 46 | def delete_files(file_folder, max_num): 47 | """ 48 | :param file_folder: 目标文件夹, 绝对路径 49 | :param max_num: 最大文件数量 50 | """ 51 | num = count_file(file_folder) 52 | if num > max_num: 53 | delete_num = max_num // 2 54 | total_files_and_dirs = os.listdir(file_folder) 55 | total_files = [] 56 | for item in total_files_and_dirs: 57 | if not os.path.isdir(os.path.join(file_folder, item)): 58 | total_files.append(item) 59 | total_files.sort() 60 | for i in range(delete_num): 61 | os.remove(os.path.join(file_folder, total_files[i])) 62 | 63 | 64 | # 计算目标文件夹下的文件数量, 不递归文件夹 65 | def count_file(directory): 66 | file_num = 0 67 | if not os.path.exists(directory): 68 | os.makedirs(directory) 69 | for item in os.listdir(directory): 70 | if os.path.isfile(os.path.join(directory, item)): 71 | file_num += 1 72 | return file_num 73 | -------------------------------------------------------------------------------- /rl/utils/logging_engine.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved. 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy 4 | # of this software and associated documentation files (the "Software"), to deal 5 | # in the Software without restriction, including without limitation the rights 6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | # copies of the Software, and to permit persons to whom the Software is 8 | # furnished to do so, subject to the following conditions: 9 | # 10 | # The above copyright notice and this permission notice shall be included in 11 | # all copies or substantial portions of the Software. 12 | # 13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | # THE SOFTWARE 20 | 21 | """ 22 | | 字段/属性名称 | 使用格式 | 描述 | 23 | | --------------- | ------------------- | ------------------------------------------------------------ | 24 | | asctime | %(asctime)s | 日志事件发生的时间--人类可读时间,如:2003-07-08 16:49:45,896 | 25 | | created | %(created)f | 日志事件发生的时间--时间戳,就是当时调用time.time()函数返回的值 | 26 | | relativeCreated | %(relativeCreated)d | 日志事件发生的时间相对于logging模块加载时间的相对毫秒数(目前还不知道干嘛用的) | 27 | | msecs | %(msecs)d | 日志事件发生事件的毫秒部分 | 28 | | levelname | %(levelname)s | 该日志记录的文字形式的日志级别('DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL') | 29 | | levelno | %(levelno)s | 该日志记录的数字形式的日志级别(10, 20, 30, 40, 50) | 30 | | name | %(name)s | 所使用的日志器名称,默认是'root',因为默认使用的是 rootLogger | 31 | | message | %(message)s | 日志记录的文本内容,通过 `msg % args`计算得到的 | 32 | | pathname | %(pathname)s | 调用日志记录函数的源码文件的全路径 | 33 | | filename | %(filename)s | pathname的文件名部分,包含文件后缀 | 34 | | module | %(module)s | filename的名称部分,不包含后缀 | 35 | | lineno | %(lineno)d | 调用日志记录函数的源代码所在的行号 | 36 | | funcName | %(funcName)s | 调用日志记录函数的函数名 | 37 | | process | %(process)d | 进程ID | 38 | | processName | %(processName)s | 进程名称,Python 3.1新增 | 39 | | thread | %(thread)d | 线程ID | 40 | | threadName | %(thread)s | 线程名称 | 41 | """ 42 | 43 | import logging 44 | import sys 45 | 46 | 47 | class LoggingEngine: 48 | def __init__(self, level="debug", contents=None, logger_name=None): 49 | self.logging_level_dict = { 50 | "debug": logging.DEBUG, 51 | "info": logging.INFO, 52 | "warning": logging.WARNING, 53 | "error": logging.ERROR, 54 | "critical": logging.CRITICAL 55 | } 56 | 57 | logging_level = self.logging_level_dict.get(level.lower(), logging.DEBUG) 58 | 59 | if contents is None: 60 | contents = ["asctime", "levelname", "funcName", "lineno", "message"] 61 | 62 | if logger_name is None: 63 | logger_name = 'logging_engine' 64 | 65 | logging_fmt = "%(asctime)s [%(filename)-15s | %(lineno)d] %(levelname)s: %(message)s" 66 | # logging_fmt = " - ".join([f"%({content})s" for content in contents]) 67 | 68 | logger = logging.getLogger(logger_name) 69 | logger.setLevel(level=logging_level) 70 | formatter = logging.Formatter(logging_fmt) 71 | if not logger.handlers: 72 | handler = logging.StreamHandler(sys.stdout) 73 | handler.setFormatter(formatter) 74 | logger.addHandler(handler) 75 | 76 | self.logger = logger 77 | self.logger_name = logger_name 78 | self.handlers = {} 79 | self.formatter = formatter 80 | 81 | self.import_log_funcs() 82 | 83 | def import_log_funcs(self): 84 | log_funcs = ['debug', 'info', 'warning', 'error', 'critical', 'exception'] 85 | for func_name in log_funcs: 86 | func = getattr(self.logger, func_name) 87 | setattr(self, func_name, func) 88 | 89 | def add_file_output(self, filename: str, level='info', mode="w"): 90 | if filename not in self.handlers: 91 | handler = logging.FileHandler(filename, mode=mode, encoding='UTF-8') 92 | handler.setFormatter(self.formatter) 93 | handler.setLevel(self.logging_level_dict.get(level.lower(), logging.DEBUG)) 94 | self.handlers[filename] = handler 95 | self.logger.addHandler(handler) 96 | 97 | def remove_file_handler(self, file_path): 98 | if file_path in self.handlers: 99 | self.logger.removeHandler(self.handlers.get(file_path)) 100 | 101 | def debug(self, msg: str): 102 | pass 103 | 104 | def info(self, msg: str): 105 | pass 106 | 107 | def warning(self, msg: str): 108 | pass 109 | 110 | def error(self, msg: str): 111 | pass 112 | 113 | def critical(self, msg: str): 114 | pass 115 | 116 | def exception(self, msg: str): 117 | pass 118 | 119 | 120 | logger = LoggingEngine(logger_name="glob_logging_engine", 121 | level="info") 122 | 123 | 124 | def test_log(): 125 | log = LoggingEngine(level="debug", 126 | contents=["asctime", "levelname", "filename", "lineno", "funcName", "message"]) 127 | 128 | log.info("Hello World!") 129 | -------------------------------------------------------------------------------- /rl/utils/utils.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | from numpy import pad 32 | import torch 33 | 34 | def split_and_pad_trajectories(tensor, dones): 35 | """ Splits trajectories at done indices. Then concatenates them and padds with zeros up to the length og the longest trajectory. 36 | Returns masks corresponding to valid parts of the trajectories 37 | Example: 38 | Input: [ [a1, a2, a3, a4 | a5, a6], 39 | [b1, b2 | b3, b4, b5 | b6] 40 | ] 41 | 42 | Output:[ [a1, a2, a3, a4], | [ [True, True, True, True], 43 | [a5, a6, 0, 0], | [True, True, False, False], 44 | [b1, b2, 0, 0], | [True, True, False, False], 45 | [b3, b4, b5, 0], | [True, True, True, False], 46 | [b6, 0, 0, 0] | [True, False, False, False], 47 | ] | ] 48 | 49 | Assumes that the inputy has the following dimension order: [time, number of envs, aditional dimensions] 50 | """ 51 | dones = dones.clone() 52 | dones[-1] = 1 53 | # Permute the buffers to have order (num_envs, num_transitions_per_env, ...), for correct reshaping 54 | flat_dones = dones.transpose(1, 0).reshape(-1, 1) 55 | 56 | # Get length of trajectory by counting the number of successive not done elements 57 | done_indices = torch.cat((flat_dones.new_tensor([-1], dtype=torch.int64), flat_dones.nonzero()[:, 0])) 58 | trajectory_lengths = done_indices[1:] - done_indices[:-1] 59 | trajectory_lengths_list = trajectory_lengths.tolist() 60 | # Extract the individual trajectories 61 | trajectories = torch.split(tensor.transpose(1, 0).flatten(0, 1),trajectory_lengths_list) 62 | padded_trajectories = torch.nn.utils.rnn.pad_sequence(trajectories) 63 | 64 | 65 | trajectory_masks = trajectory_lengths > torch.arange(0, tensor.shape[0], device=tensor.device).unsqueeze(1) 66 | return padded_trajectories, trajectory_masks 67 | 68 | def unpad_trajectories(trajectories, masks): 69 | """ Does the inverse operation of split_and_pad_trajectories() 70 | """ 71 | # Need to transpose before and after the masking to have proper reshaping 72 | trajectories = trajectories.transpose(1, 0) ## T N D -> N T D 73 | return trajectories[masks[:trajectories.size(1)].transpose(1, 0)].view(-1, masks.size(0), trajectories.shape[-1]).transpose(1, 0) 74 | 75 | 76 | def split_and_pad_trajectories_obs(obs, dones): 77 | """ Splits trajectories at done indices. Then concatenates them and padds with zeros up to the length og the longest trajectory. 78 | Returns masks corresponding to valid parts of the trajectories 79 | Example: 80 | Input: [ [a1, a2, a3, a4 | a5, a6], 81 | [b1, b2 | b3, b4, b5 | b6] 82 | ] 83 | 84 | Output:[ [a1, a2, a3, a4], | [ [True, True, True, True], 85 | [a5, a6, 0, 0], | [True, True, False, False], 86 | [b1, b2, 0, 0], | [True, True, False, False], 87 | [b3, b4, b5, 0], | [True, True, True, False], 88 | [b6, 0, 0, 0] | [True, False, False, False], 89 | ] | ] 90 | 91 | Assumes that the inputy has the following dimension order: [time, number of envs, aditional dimensions] 92 | """ 93 | dones = dones.clone() 94 | dones[-1] = 1 95 | # Permute the buffers to have order (num_envs, num_transitions_per_env, ...), for correct reshaping 96 | flat_dones = dones.transpose(1, 0).reshape(-1, 1) 97 | 98 | # Get length of trajectory by counting the number of successive not done elements 99 | done_indices = torch.cat((flat_dones.new_tensor([-1], dtype=torch.int64), flat_dones.nonzero()[:, 0])) 100 | trajectory_lengths = done_indices[1:] - done_indices[:-1] 101 | trajectory_lengths_list = trajectory_lengths.tolist() 102 | # Extract the individual trajectories 103 | task_obs_batch = obs[0] ## obeservation split and pad trajectories 然后过了transformer再unpadded 回去 104 | rgb_obs_batch = obs[1] 105 | depth_obs_batch = obs[2] 106 | 107 | trajectories_task = torch.split(task_obs_batch.transpose(1, 0).flatten(0, 1),trajectory_lengths_list) 108 | padded_trajectories_task = torch.nn.utils.rnn.pad_sequence(trajectories_task) 109 | 110 | trajectories_rgb = torch.split(rgb_obs_batch.transpose(1, 0).flatten(0, 1),trajectory_lengths_list) 111 | padded_trajectories_rgb = torch.nn.utils.rnn.pad_sequence(trajectories_rgb) 112 | 113 | trajectories_depth = torch.split(depth_obs_batch.transpose(1, 0).flatten(0, 1),trajectory_lengths_list) 114 | padded_trajectories_depth = torch.nn.utils.rnn.pad_sequence(trajectories_depth) 115 | 116 | trajectory_masks = trajectory_lengths > torch.arange(0, task_obs_batch.shape[0], device=obs[0].device).unsqueeze(1) 117 | return [padded_trajectories_task, padded_trajectories_rgb, padded_trajectories_depth], trajectory_masks 118 | 119 | ## padded_trajectories_task.transpose(1, 0)[(trajectory_lengths > torch.arange(0, padded_trajectories_task.shape[0], device=obs[0].device).unsqueeze(1)).transpose(1, 0)] --------------------------------------------------------------------------------