├── .gitignore
├── LICENSE
├── README.md
├── config
    ├── __init__.py
    ├── envs
    │   ├── Gibson
    │   │   ├── test_env.py
    │   │   └── train_config.py
    │   ├── __init__.py
    │   ├── a1
    │   │   └── a1_config.py
    │   └── base
    │   │   ├── base_config.py
    │   │   ├── base_task.py
    │   │   ├── legged_robot.py
    │   │   └── legged_robot_config.py
    ├── scripts
    │   ├── play.py
    │   └── train.py
    ├── tests
    │   └── test_env.py
    └── utils
    │   ├── __init__.py
    │   ├── helpers.py
    │   ├── logger.py
    │   ├── math.py
    │   ├── task_registry.py
    │   └── terrain.py
└── rl
    ├── __init__.py
    ├── algorithms
        ├── __init__.py
        └── ppo.py
    ├── conf
        └── configs.py
    ├── env
        ├── __init__.py
        ├── my_env.py
        └── vec_env.py
    ├── modules
        ├── SMT.py
        ├── __init__.py
        ├── actor_critic.py
        ├── actor_critic_recurrent.py
        └── encoder.py
    ├── runners
        ├── __init__.py
        └── on_policy_runner.py
    ├── storage
        ├── __init__.py
        └── rollout_storage.py
    └── utils
        ├── __init__.py
        ├── log_utils.py
        ├── logging_engine.py
        └── utils.py


/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 liwy
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Transformer-based-memory-for-visual-navigation
 2 | PPO version code for RAL 2023 paper [Transformer Memory for Interactive Visual Navigation in Cluttered Environments](https://www.hrl.uni-bonn.de/teaching/ss23/master-seminar/transformer-memory-for-interactive-visual-navigation-in-cluttered-environments.pdf).
 3 | 
 4 | Transformer belief state encoder for encoding history information
 5 | plus PPO algorithm to learn the policy.
 6 | 
 7 | The vector env is designed for IGibson and is also easy to reimplement for other environments such as Habitat.
 8 | 
 9 | ## Training scritps
10 | ```
11 |   python /config/scripts/train.py
12 | ```
13 | 
14 | ## Citation
15 | 
16 | ```bibtex
17 | @article{li2023transformer,
18 |   title={Transformer Memory for Interactive Visual Navigation in Cluttered Environments},
19 |   author={Li, Weiyuan and Hong, Ruoxin and Shen, Jiwei and Yuan, Liang and Lu, Yue},
20 |   journal={IEEE Robotics and Automation Letters},
21 |   volume={8},
22 |   number={3},
23 |   pages={1731--1738},
24 |   year={2023},
25 |   publisher={IEEE}
26 | }
27 | ```
28 | 


--------------------------------------------------------------------------------
/config/__init__.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: BSD-3-Clause
 3 | # 
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions are met:
 6 | #
 7 | # 1. Redistributions of source code must retain the above copyright notice, this
 8 | # list of conditions and the following disclaimer.
 9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | #
29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
30 | 
31 | import os
32 | 
33 | LEGGED_GYM_ROOT_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
34 | LEGGED_GYM_ENVS_DIR = os.path.join(LEGGED_GYM_ROOT_DIR, 'legged_gym', 'envs')


--------------------------------------------------------------------------------
/config/envs/Gibson/test_env.py:
--------------------------------------------------------------------------------
 1 | import gibson2
 2 | from gibson2.envs.igibson_env import iGibsonEnv
 3 | from gibson2.envs.parallel_env import ParallelNavEnv
 4 | import atexit
 5 | import multiprocessing
 6 | import sys
 7 | import traceback
 8 | import numpy as np
 9 | import os
10 | from gibson2.utils.utils import parse_config
11 | import logging
12 | logging.getLogger().setLevel(logging.WARNING)
13 | 
14 | 
15 | if __name__ == "__main__":
16 |     config_file_name = '/home/lwy/IGibson2021/iGibson/gibson2/examples/configs/locobot_interactive_nav.yaml'
17 |     env_config = parse_config(config_file_name)
18 |     GPU_ID = [0,0,0,1,1,1,2,2]
19 |     Env = ['Beechwood_1_int','Benevolence_0_int','Ihlen_0_int','Ihlen_1_int','Merom_0_int','Pomaria_0_int','Rs_int','Wainscott_1_int']
20 |     Training_Env = Env[:5]
21 |     Testing_Env = Env[-3:]
22 |     core_id = 0
23 |     num_env = 2
24 |     def load_env():
25 |         global core_id
26 |         core_id = core_id + 1
27 |         return iGibsonEnv(config_file = env_config,
28 |                         scene_id = Training_Env[core_id],
29 |                         mode = 'headless',
30 |                         action_timestep = 1.0 / 10.0,
31 |                         physics_timestep = 1.0 / 40.0,
32 |                         device_idx = GPU_ID[core_id],
33 |                         automatic_reset = True)
34 | 
35 |     parallel_env = ParallelNavEnv([load_env] * num_env, blocking=False)
36 | 
37 | 
38 |     from time import time
39 |     for episode in range(10):
40 |         start = time()
41 |         print("episode {}".format(episode))
42 |         parallel_env.reset()
43 |         for i in range(600):
44 |             res = parallel_env.step([[0.5, 0.5] for _ in range(2)])
45 |             state, reward, done, _ = res[0]
46 |             if done:    ## 设置了自动重启，if done，从info['last_observation'] 中取最后的数据，此时返回的state为reset后获得的状态
47 |                 print("Episode finished after {} timesteps".format(i + 1))
48 |                 # break
49 |         print("{} elapsed".format(time() - start))


--------------------------------------------------------------------------------
/config/envs/Gibson/train_config.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-License-Identifier: BSD-3-Clause
  3 | # 
  4 | # Redistribution and use in source and binary forms, with or without
  5 | # modification, are permitted provided that the following conditions are met:
  6 | #
  7 | # 1. Redistributions of source code must retain the above copyright notice, this
  8 | # list of conditions and the following disclaimer.
  9 | #
 10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 11 | # this list of conditions and the following disclaimer in the documentation
 12 | # and/or other materials provided with the distribution.
 13 | #
 14 | # 3. Neither the name of the copyright holder nor the names of its
 15 | # contributors may be used to endorse or promote products derived from
 16 | # this software without specific prior written permission.
 17 | #
 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28 | #
 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
 30 | 
 31 | from config.envs.base.base_config import BaseConfig
 32 | 
 33 | class GibsonRobotCfg(BaseConfig):
 34 |     class env:
 35 |         num_envs = 4096
 36 |         num_observations = 235
 37 |         num_privileged_obs = None # if not None a priviledge_obs_buf will be returned by step() (critic obs for assymetric training). None is returned otherwise 
 38 |         num_actions = 12
 39 |         env_spacing = 3.  # not used with heightfields/trimeshes 
 40 |         send_timeouts = True # send time out information to the algorithm
 41 |         episode_length_s = 20 # episode length in seconds
 42 | 
 43 |     class rewards:
 44 |         class scales:
 45 |             termination = -0.0
 46 |             tracking_lin_vel = 1.0
 47 |             tracking_ang_vel = 0.5
 48 |             lin_vel_z = -2.0
 49 |             ang_vel_xy = -0.05
 50 |             orientation = -0.
 51 |             torques = -0.00001
 52 |             dof_vel = -0.
 53 |             dof_acc = -2.5e-7
 54 |             base_height = -0. 
 55 |             feet_air_time =  1.0
 56 |             collision = -1.
 57 |             feet_stumble = -0.0 
 58 |             action_rate = -0.01
 59 |             stand_still = -0.
 60 | 
 61 |         only_positive_rewards = True # if true negative total rewards are clipped at zero (avoids early termination problems)
 62 |         tracking_sigma = 0.25 # tracking reward = exp(-error^2/sigma)
 63 |         soft_dof_pos_limit = 1. # percentage of urdf limits, values above this limit are penalized
 64 |         soft_dof_vel_limit = 1.
 65 |         soft_torque_limit = 1.
 66 |         base_height_target = 1.
 67 |         max_contact_force = 100. # forces above this value are penalized
 68 | 
 69 |     class normalization:
 70 |         class obs_scales:
 71 |             lin_vel = 2.0
 72 |             ang_vel = 0.25
 73 |             dof_pos = 1.0
 74 |             dof_vel = 0.05
 75 |             height_measurements = 5.0
 76 |         clip_observations = 100.
 77 |         clip_actions = 100.
 78 | 
 79 |     class noise:
 80 |         add_noise = True
 81 |         noise_level = 1.0 # scales other values
 82 |         class noise_scales:
 83 |             dof_pos = 0.01
 84 |             dof_vel = 1.5
 85 |             lin_vel = 0.1
 86 |             ang_vel = 0.2
 87 |             gravity = 0.05
 88 |             height_measurements = 0.1
 89 | 
 90 | 
 91 | class GibsonCfgPPO(BaseConfig):
 92 |     seed = 1
 93 |     runner_class_name = 'OnPolicyRunner'
 94 |     class policy:
 95 |         init_noise_std = 0.0
 96 |         actor_hidden_dims = [256, 128]
 97 |         critic_hidden_dims = [256, 128]
 98 |         activation = 'tanh' # can be elu, relu, selu, crelu, lrelu, tanh, sigmoid
 99 |         # only for 'ActorCriticRecurrent':
100 |         # rnn_type = 'lstm'
101 |         # rnn_hidden_size = 512
102 |         # rnn_num_layers = 1
103 |         
104 |     class algorithm:
105 |         # training params
106 |         value_loss_coef = 0.5
107 |         use_clipped_value_loss = True
108 |         clip_param = 0.1
109 |         entropy_coef = 0.02
110 |         num_learning_epochs = 5
111 |         num_mini_batches = 5 # mini batch size = num_envs*nsteps / nminibatches
112 |         learning_rate = 2.5e-4 #5.e-4
113 |         schedule = 'adaptive' # could be adaptive, fixed
114 |         gamma = 0.99
115 |         lam = 0.95
116 |         desired_kl = 0.01
117 |         max_grad_norm = 1.
118 | 
119 |     class runner:
120 |         policy_class_name = 'ActorCritic'
121 |         algorithm_class_name = 'PPO'
122 |         num_steps_per_env = 128 # per iteration
123 |         max_iterations = 1e6 # number of policy updates
124 | 
125 |         # logging
126 |         save_interval = 500 # check for potential saves every this many iterations
127 |         experiment_name = 'igibson_all'
128 |         run_name = ''
129 |         # load and resume
130 |         resume = False
131 |         load_run = -1 # -1 = last run
132 |         checkpoint = -1 # -1 = last saved model
133 |         resume_path = None # updated from load_run and chkpt


--------------------------------------------------------------------------------
/config/envs/__init__.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: BSD-3-Clause
 3 | # 
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions are met:
 6 | #
 7 | # 1. Redistributions of source code must retain the above copyright notice, this
 8 | # list of conditions and the following disclaimer.
 9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | #
29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
30 | 
31 | 
32 | # from .base.legged_robot import LeggedRobot
33 | # from .a1.a1_config import A1RoughCfg, A1RoughCfgPPO
34 | # from .Gibson.config import GibsonRobotCfg, GibsonCfgPPO
35 | 
36 | # import os
37 | 
38 | # from igibson.utils.task_registry import task_registry
39 | 
40 | # # task_registry.register( "a1", LeggedRobot, A1RoughCfg(), A1RoughCfgPPO() )
41 | # task_registry.register( "Gibson", LeggedRobot, GibsonRobotCfg(), GibsonCfgPPO() )
42 | 


--------------------------------------------------------------------------------
/config/envs/a1/a1_config.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: BSD-3-Clause
 3 | # 
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions are met:
 6 | #
 7 | # 1. Redistributions of source code must retain the above copyright notice, this
 8 | # list of conditions and the following disclaimer.
 9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | #
29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
30 | 
31 | from legged_gym.envs.base.legged_robot_config import LeggedRobotCfg, LeggedRobotCfgPPO
32 | 
33 | class A1RoughCfg( LeggedRobotCfg ):
34 |     class init_state( LeggedRobotCfg.init_state ):
35 |         pos = [0.0, 0.0, 0.42] # x,y,z [m]
36 |         default_joint_angles = { # = target angles [rad] when action = 0.0
37 |             'FL_hip_joint': 0.1,   # [rad]
38 |             'RL_hip_joint': 0.1,   # [rad]
39 |             'FR_hip_joint': -0.1 ,  # [rad]
40 |             'RR_hip_joint': -0.1,   # [rad]
41 | 
42 |             'FL_thigh_joint': 0.8,     # [rad]
43 |             'RL_thigh_joint': 1.,   # [rad]
44 |             'FR_thigh_joint': 0.8,     # [rad]
45 |             'RR_thigh_joint': 1.,   # [rad]
46 | 
47 |             'FL_calf_joint': -1.5,   # [rad]
48 |             'RL_calf_joint': -1.5,    # [rad]
49 |             'FR_calf_joint': -1.5,  # [rad]
50 |             'RR_calf_joint': -1.5,    # [rad]
51 |         }
52 | 
53 |     class control( LeggedRobotCfg.control ):
54 |         # PD Drive parameters:
55 |         control_type = 'P'
56 |         stiffness = {'joint': 20.}  # [N*m/rad]
57 |         damping = {'joint': 0.5}     # [N*m*s/rad]
58 |         # action scale: target angle = actionScale * action + defaultAngle
59 |         action_scale = 0.25
60 |         # decimation: Number of control action updates @ sim DT per policy DT
61 |         decimation = 4
62 | 
63 |     class asset( LeggedRobotCfg.asset ):
64 |         file = '{LEGGED_GYM_ROOT_DIR}/resources/robots/a1/urdf/a1.urdf'
65 |         name = "a1"
66 |         foot_name = "foot"
67 |         penalize_contacts_on = ["thigh", "calf"]
68 |         terminate_after_contacts_on = ["base"]
69 |         self_collisions = 1 # 1 to disable, 0 to enable...bitwise filter
70 |   
71 |     class rewards( LeggedRobotCfg.rewards ):
72 |         soft_dof_pos_limit = 0.9
73 |         base_height_target = 0.25
74 |         class scales( LeggedRobotCfg.rewards.scales ):
75 |             torques = -0.0002
76 |             dof_pos_limits = -10.0
77 | 
78 | class A1RoughCfgPPO( LeggedRobotCfgPPO ):
79 |     class algorithm( LeggedRobotCfgPPO.algorithm ):
80 |         entropy_coef = 0.01
81 |     class runner( LeggedRobotCfgPPO.runner ):
82 |         run_name = ''
83 |         experiment_name = 'rough_a1'
84 | 
85 |   


--------------------------------------------------------------------------------
/config/envs/base/base_config.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: BSD-3-Clause
 3 | # 
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions are met:
 6 | #
 7 | # 1. Redistributions of source code must retain the above copyright notice, this
 8 | # list of conditions and the following disclaimer.
 9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | #
29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
30 | 
31 | import inspect
32 | 
33 | class BaseConfig:
34 |     def __init__(self) -> None:
35 |         """ Initializes all member classes recursively. Ignores all namse starting with '__' (buit-in methods)."""
36 |         self.init_member_classes(self)
37 |     
38 |     @staticmethod
39 |     def init_member_classes(obj):
40 |         # iterate over all attributes names
41 |         for key in dir(obj):
42 |             # disregard builtin attributes
43 |             # if key.startswith("__"):
44 |             if key=="__class__":
45 |                 continue
46 |             # get the corresponding attribute object
47 |             var =  getattr(obj, key)
48 |             # check if it the attribute is a class
49 |             if inspect.isclass(var):
50 |                 # instantate the class
51 |                 i_var = var()
52 |                 # set the attribute to the instance instead of the type
53 |                 setattr(obj, key, i_var)
54 |                 # recursively init members of the attribute
55 |                 BaseConfig.init_member_classes(i_var)


--------------------------------------------------------------------------------
/config/envs/base/base_task.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-License-Identifier: BSD-3-Clause
  3 | # 
  4 | # Redistribution and use in source and binary forms, with or without
  5 | # modification, are permitted provided that the following conditions are met:
  6 | #
  7 | # 1. Redistributions of source code must retain the above copyright notice, this
  8 | # list of conditions and the following disclaimer.
  9 | #
 10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 11 | # this list of conditions and the following disclaimer in the documentation
 12 | # and/or other materials provided with the distribution.
 13 | #
 14 | # 3. Neither the name of the copyright holder nor the names of its
 15 | # contributors may be used to endorse or promote products derived from
 16 | # this software without specific prior written permission.
 17 | #
 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28 | #
 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
 30 | 
 31 | import sys
 32 | from isaacgym import gymapi
 33 | from isaacgym import gymutil
 34 | import numpy as np
 35 | import torch
 36 | 
 37 | # Base class for RL tasks
 38 | class BaseTask():
 39 | 
 40 |     def __init__(self, cfg, sim_params, physics_engine, sim_device, headless):
 41 |         self.gym = gymapi.acquire_gym()
 42 | 
 43 |         self.sim_params = sim_params
 44 |         self.physics_engine = physics_engine
 45 |         self.sim_device = sim_device
 46 |         sim_device_type, self.sim_device_id = gymutil.parse_device_str(self.sim_device)
 47 |         self.headless = headless
 48 | 
 49 |         # env device is GPU only if sim is on GPU and use_gpu_pipeline=True, otherwise returned tensors are copied to CPU by physX.
 50 |         if sim_device_type=='cuda' and sim_params.use_gpu_pipeline:
 51 |             self.device = self.sim_device
 52 |         else:
 53 |             self.device = 'cpu'
 54 | 
 55 |         # graphics device for rendering, -1 for no rendering
 56 |         self.graphics_device_id = self.sim_device_id
 57 |         if self.headless == True:
 58 |             self.graphics_device_id = -1
 59 | 
 60 |         self.num_envs = cfg.env.num_envs
 61 |         self.num_obs = cfg.env.num_observations
 62 |         self.num_privileged_obs = cfg.env.num_privileged_obs
 63 |         self.num_actions = cfg.env.num_actions
 64 | 
 65 |         # optimization flags for pytorch JIT
 66 |         torch._C._jit_set_profiling_mode(False)
 67 |         torch._C._jit_set_profiling_executor(False)
 68 | 
 69 |         # allocate buffers
 70 |         self.obs_buf = torch.zeros(self.num_envs, self.num_obs, device=self.device, dtype=torch.float)
 71 |         self.rew_buf = torch.zeros(self.num_envs, device=self.device, dtype=torch.float)
 72 |         self.reset_buf = torch.ones(self.num_envs, device=self.device, dtype=torch.long)
 73 |         self.episode_length_buf = torch.zeros(self.num_envs, device=self.device, dtype=torch.long)
 74 |         self.time_out_buf = torch.zeros(self.num_envs, device=self.device, dtype=torch.bool)
 75 |         if self.num_privileged_obs is not None:
 76 |             self.privileged_obs_buf = torch.zeros(self.num_envs, self.num_privileged_obs, device=self.device, dtype=torch.float)
 77 |         else: 
 78 |             self.privileged_obs_buf = None
 79 |             # self.num_privileged_obs = self.num_obs
 80 | 
 81 |         self.extras = {}
 82 | 
 83 |         # create envs, sim and viewer
 84 |         self.create_sim()
 85 |         self.gym.prepare_sim(self.sim)
 86 | 
 87 |         # todo: read from config
 88 |         self.enable_viewer_sync = True
 89 |         self.viewer = None
 90 | 
 91 |         # if running with a viewer, set up keyboard shortcuts and camera
 92 |         if self.headless == False:
 93 |             # subscribe to keyboard shortcuts
 94 |             self.viewer = self.gym.create_viewer(
 95 |                 self.sim, gymapi.CameraProperties())
 96 |             self.gym.subscribe_viewer_keyboard_event(
 97 |                 self.viewer, gymapi.KEY_ESCAPE, "QUIT")
 98 |             self.gym.subscribe_viewer_keyboard_event(
 99 |                 self.viewer, gymapi.KEY_V, "toggle_viewer_sync")
100 | 
101 |     def get_observations(self):
102 |         return self.obs_buf
103 |     
104 |     def get_privileged_observations(self):
105 |         return self.privileged_obs_buf
106 | 
107 |     def reset_idx(self, env_ids):
108 |         """Reset selected robots"""
109 |         raise NotImplementedError
110 | 
111 |     def reset(self):
112 |         """ Reset all robots"""
113 |         self.reset_idx(torch.arange(self.num_envs, device=self.device))
114 |         obs, privileged_obs, _, _, _ = self.step(torch.zeros(self.num_envs, self.num_actions, device=self.device, requires_grad=False))
115 |         return obs, privileged_obs
116 | 
117 |     def step(self, actions):
118 |         raise NotImplementedError
119 | 
120 |     def render(self, sync_frame_time=True):
121 |         if self.viewer:
122 |             # check for window closed
123 |             if self.gym.query_viewer_has_closed(self.viewer):
124 |                 sys.exit()
125 | 
126 |             # check for keyboard events
127 |             for evt in self.gym.query_viewer_action_events(self.viewer):
128 |                 if evt.action == "QUIT" and evt.value > 0:
129 |                     sys.exit()
130 |                 elif evt.action == "toggle_viewer_sync" and evt.value > 0:
131 |                     self.enable_viewer_sync = not self.enable_viewer_sync
132 | 
133 |             # fetch results
134 |             if self.device != 'cpu':
135 |                 self.gym.fetch_results(self.sim, True)
136 | 
137 |             # step graphics
138 |             if self.enable_viewer_sync:
139 |                 self.gym.step_graphics(self.sim)
140 |                 self.gym.draw_viewer(self.viewer, self.sim, True)
141 |                 if sync_frame_time:
142 |                     self.gym.sync_frame_time(self.sim)
143 |             else:
144 |                 self.gym.poll_viewer_events(self.viewer)


--------------------------------------------------------------------------------
/config/envs/base/legged_robot_config.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-License-Identifier: BSD-3-Clause
  3 | # 
  4 | # Redistribution and use in source and binary forms, with or without
  5 | # modification, are permitted provided that the following conditions are met:
  6 | #
  7 | # 1. Redistributions of source code must retain the above copyright notice, this
  8 | # list of conditions and the following disclaimer.
  9 | #
 10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 11 | # this list of conditions and the following disclaimer in the documentation
 12 | # and/or other materials provided with the distribution.
 13 | #
 14 | # 3. Neither the name of the copyright holder nor the names of its
 15 | # contributors may be used to endorse or promote products derived from
 16 | # this software without specific prior written permission.
 17 | #
 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28 | #
 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
 30 | 
 31 | from .base_config import BaseConfig
 32 | 
 33 | class LeggedRobotCfg(BaseConfig):
 34 |     class env:
 35 |         num_envs = 4096
 36 |         num_observations = 235
 37 |         num_privileged_obs = None # if not None a priviledge_obs_buf will be returned by step() (critic obs for assymetric training). None is returned otherwise 
 38 |         num_actions = 12
 39 |         env_spacing = 3.  # not used with heightfields/trimeshes 
 40 |         send_timeouts = True # send time out information to the algorithm
 41 |         episode_length_s = 20 # episode length in seconds
 42 | 
 43 |     class terrain:
 44 |         mesh_type = 'trimesh' # "heightfield" # none, plane, heightfield or trimesh
 45 |         horizontal_scale = 0.1 # [m]
 46 |         vertical_scale = 0.005 # [m]
 47 |         border_size = 25 # [m]
 48 |         curriculum = True
 49 |         static_friction = 1.0
 50 |         dynamic_friction = 1.0
 51 |         restitution = 0.
 52 |         # rough terrain only:
 53 |         measure_heights = True
 54 |         measured_points_x = [-0.8, -0.7, -0.6, -0.5, -0.4, -0.3, -0.2, -0.1, 0., 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8] # 1mx1.6m rectangle (without center line)
 55 |         measured_points_y = [-0.5, -0.4, -0.3, -0.2, -0.1, 0., 0.1, 0.2, 0.3, 0.4, 0.5]
 56 |         selected = False # select a unique terrain type and pass all arguments
 57 |         terrain_kwargs = None # Dict of arguments for selected terrain
 58 |         max_init_terrain_level = 5 # starting curriculum state
 59 |         terrain_length = 8.
 60 |         terrain_width = 8.
 61 |         num_rows= 10 # number of terrain rows (levels)
 62 |         num_cols = 20 # number of terrain cols (types)
 63 |         # terrain types: [smooth slope, rough slope, stairs up, stairs down, discrete]
 64 |         terrain_proportions = [0.1, 0.1, 0.35, 0.25, 0.2]
 65 |         # trimesh only:
 66 |         slope_treshold = 0.75 # slopes above this threshold will be corrected to vertical surfaces
 67 | 
 68 |     class commands:
 69 |         curriculum = False
 70 |         max_curriculum = 1.
 71 |         num_commands = 4 # default: lin_vel_x, lin_vel_y, ang_vel_yaw, heading (in heading mode ang_vel_yaw is recomputed from heading error)
 72 |         resampling_time = 10. # time before command are changed[s]
 73 |         heading_command = True # if true: compute ang vel command from heading error
 74 |         class ranges:
 75 |             lin_vel_x = [-1.0, 1.0] # min max [m/s]
 76 |             lin_vel_y = [-1.0, 1.0]   # min max [m/s]
 77 |             ang_vel_yaw = [-1, 1]    # min max [rad/s]
 78 |             heading = [-3.14, 3.14]
 79 | 
 80 |     class init_state:
 81 |         pos = [0.0, 0.0, 1.] # x,y,z [m]
 82 |         rot = [0.0, 0.0, 0.0, 1.0] # x,y,z,w [quat]
 83 |         lin_vel = [0.0, 0.0, 0.0]  # x,y,z [m/s]
 84 |         ang_vel = [0.0, 0.0, 0.0]  # x,y,z [rad/s]
 85 |         default_joint_angles = { # target angles when action = 0.0
 86 |             "joint_a": 0., 
 87 |             "joint_b": 0.}
 88 | 
 89 |     class control:
 90 |         control_type = 'P' # P: position, V: velocity, T: torques
 91 |         # PD Drive parameters:
 92 |         stiffness = {'joint_a': 10.0, 'joint_b': 15.}  # [N*m/rad]
 93 |         damping = {'joint_a': 1.0, 'joint_b': 1.5}     # [N*m*s/rad]
 94 |         # action scale: target angle = actionScale * action + defaultAngle
 95 |         action_scale = 0.5
 96 |         # decimation: Number of control action updates @ sim DT per policy DT
 97 |         decimation = 4
 98 | 
 99 |     class asset:
100 |         file = ""
101 |         name = "legged_robot"  # actor name
102 |         foot_name = "None" # name of the feet bodies, used to index body state and contact force tensors
103 |         penalize_contacts_on = []
104 |         terminate_after_contacts_on = []
105 |         disable_gravity = False
106 |         collapse_fixed_joints = True # merge bodies connected by fixed joints. Specific fixed joints can be kept by adding " <... dont_collapse="true">
107 |         fix_base_link = False # fixe the base of the robot
108 |         default_dof_drive_mode = 3 # see GymDofDriveModeFlags (0 is none, 1 is pos tgt, 2 is vel tgt, 3 effort)
109 |         self_collisions = 0 # 1 to disable, 0 to enable...bitwise filter
110 |         replace_cylinder_with_capsule = True # replace collision cylinders with capsules, leads to faster/more stable simulation
111 |         flip_visual_attachments = True # Some .obj meshes must be flipped from y-up to z-up
112 |         
113 |         density = 0.001
114 |         angular_damping = 0.
115 |         linear_damping = 0.
116 |         max_angular_velocity = 1000.
117 |         max_linear_velocity = 1000.
118 |         armature = 0.
119 |         thickness = 0.01
120 | 
121 |     class domain_rand:
122 |         randomize_friction = True
123 |         friction_range = [0.5, 1.25]
124 |         randomize_base_mass = False
125 |         added_mass_range = [-1., 1.]
126 |         push_robots = True
127 |         push_interval_s = 15
128 |         max_push_vel_xy = 1.
129 | 
130 |     class rewards:
131 |         class scales:
132 |             termination = -0.0
133 |             tracking_lin_vel = 1.0
134 |             tracking_ang_vel = 0.5
135 |             lin_vel_z = -2.0
136 |             ang_vel_xy = -0.05
137 |             orientation = -0.
138 |             torques = -0.00001
139 |             dof_vel = -0.
140 |             dof_acc = -2.5e-7
141 |             base_height = -0. 
142 |             feet_air_time =  1.0
143 |             collision = -1.
144 |             feet_stumble = -0.0 
145 |             action_rate = -0.01
146 |             stand_still = -0.
147 | 
148 |         only_positive_rewards = True # if true negative total rewards are clipped at zero (avoids early termination problems)
149 |         tracking_sigma = 0.25 # tracking reward = exp(-error^2/sigma)
150 |         soft_dof_pos_limit = 1. # percentage of urdf limits, values above this limit are penalized
151 |         soft_dof_vel_limit = 1.
152 |         soft_torque_limit = 1.
153 |         base_height_target = 1.
154 |         max_contact_force = 100. # forces above this value are penalized
155 | 
156 |     class normalization:
157 |         class obs_scales:
158 |             lin_vel = 2.0
159 |             ang_vel = 0.25
160 |             dof_pos = 1.0
161 |             dof_vel = 0.05
162 |             height_measurements = 5.0
163 |         clip_observations = 100.
164 |         clip_actions = 100.
165 | 
166 |     class noise:
167 |         add_noise = True
168 |         noise_level = 1.0 # scales other values
169 |         class noise_scales:
170 |             dof_pos = 0.01
171 |             dof_vel = 1.5
172 |             lin_vel = 0.1
173 |             ang_vel = 0.2
174 |             gravity = 0.05
175 |             height_measurements = 0.1
176 | 
177 |     # viewer camera:
178 |     class viewer:
179 |         ref_env = 0
180 |         pos = [10, 0, 6]  # [m]
181 |         lookat = [11., 5, 3.]  # [m]
182 | 
183 |     class sim:
184 |         dt =  0.005
185 |         substeps = 1
186 |         gravity = [0., 0. ,-9.81]  # [m/s^2]
187 |         up_axis = 1  # 0 is y, 1 is z
188 | 
189 |         class physx:
190 |             num_threads = 10
191 |             solver_type = 1  # 0: pgs, 1: tgs
192 |             num_position_iterations = 4
193 |             num_velocity_iterations = 0
194 |             contact_offset = 0.01  # [m]
195 |             rest_offset = 0.0   # [m]
196 |             bounce_threshold_velocity = 0.5 #0.5 [m/s]
197 |             max_depenetration_velocity = 1.0
198 |             max_gpu_contact_pairs = 2**23 #2**24 -> needed for 8000 envs and more
199 |             default_buffer_size_multiplier = 5
200 |             contact_collection = 2 # 0: never, 1: last sub-step, 2: all sub-steps (default=2)
201 | 
202 | class LeggedRobotCfgPPO(BaseConfig):
203 |     seed = 1
204 |     runner_class_name = 'OnPolicyRunner'
205 |     class policy:
206 |         init_noise_std = 1.0
207 |         actor_hidden_dims = [512, 256, 128]
208 |         critic_hidden_dims = [512, 256, 128]
209 |         activation = 'elu' # can be elu, relu, selu, crelu, lrelu, tanh, sigmoid
210 |         # only for 'ActorCriticRecurrent':
211 |         # rnn_type = 'lstm'
212 |         # rnn_hidden_size = 512
213 |         # rnn_num_layers = 1
214 |         
215 |     class algorithm:
216 |         # training params
217 |         value_loss_coef = 1.0
218 |         use_clipped_value_loss = True
219 |         clip_param = 0.2
220 |         entropy_coef = 0.01
221 |         num_learning_epochs = 5
222 |         num_mini_batches = 4 # mini batch size = num_envs*nsteps / nminibatches
223 |         learning_rate = 1.e-3 #5.e-4
224 |         schedule = 'adaptive' # could be adaptive, fixed
225 |         gamma = 0.99
226 |         lam = 0.95
227 |         desired_kl = 0.01
228 |         max_grad_norm = 1.
229 | 
230 |     class runner:
231 |         policy_class_name = 'ActorCritic'
232 |         algorithm_class_name = 'PPO'
233 |         num_steps_per_env = 24 # per iteration
234 |         max_iterations = 1500 # number of policy updates
235 | 
236 |         # logging
237 |         save_interval = 50 # check for potential saves every this many iterations
238 |         experiment_name = 'test'
239 |         run_name = ''
240 |         # load and resume
241 |         resume = False
242 |         load_run = -1 # -1 = last run
243 |         checkpoint = -1 # -1 = last saved model
244 |         resume_path = None # updated from load_run and chkpt


--------------------------------------------------------------------------------
/config/scripts/play.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-License-Identifier: BSD-3-Clause
  3 | # 
  4 | # Redistribution and use in source and binary forms, with or without
  5 | # modification, are permitted provided that the following conditions are met:
  6 | #
  7 | # 1. Redistributions of source code must retain the above copyright notice, this
  8 | # list of conditions and the following disclaimer.
  9 | #
 10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 11 | # this list of conditions and the following disclaimer in the documentation
 12 | # and/or other materials provided with the distribution.
 13 | #
 14 | # 3. Neither the name of the copyright holder nor the names of its
 15 | # contributors may be used to endorse or promote products derived from
 16 | # this software without specific prior written permission.
 17 | #
 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28 | #
 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
 30 | 
 31 | import os
 32 | import sys
 33 | sys.path.append('/Extra/lwy/gibson/graduate/')
 34 | from rl.env.my_env import VecGibson
 35 | import numpy as np
 36 | import torch
 37 | 
 38 | 
 39 | import argparse
 40 | 
 41 | # from simple_agent import RandomAgent, ForwardOnlyAgent
 42 | # from rl_agent import SACAgent
 43 | from gibson2.utils.utils import parse_config
 44 | from gibson2.challenge.challenge import Challenge
 45 | from gibson2.envs.igibson_env import iGibsonEnv
 46 | import os
 47 | import torch
 48 | import datetime
 49 | from rl.utils.log_utils import ini_logger
 50 | from rl.utils.logging_engine import logger
 51 | from rl.modules import ActorCritic, ActorCriticRecurrent
 52 | from config.utils import task_registry
 53 | from rl.env.my_env import VecGibson
 54 | from config.envs.Gibson.train_config import GibsonCfgPPO
 55 | 
 56 | import datetime
 57 | def main():
 58 | 
 59 |     log_file_name = f"test_{datetime.datetime.now().strftime('%y%m%d%H%M%S')}.log"
 60 |     ini_logger(log_file_name, level='info')
 61 |     model_path = '/Extra/lwy/gibson/graduate/logs/igibson_all/Feb17_02-43-54_/model_3500.pt'
 62 |     # model_path = './transformer_waypoints/11_22/model/SAC_smtI_32_waypoints_11_22_std800'
 63 |     logger.info(f"Start to run {model_path}")
 64 |     env = VecGibson()
 65 | 
 66 |     # load policy
 67 |     GibsonCfgPPO.runner.resume = True
 68 |     GibsonCfgPPO.runner.log_root = '/Extra/lwy/gibson/graduate/logs/igibson_all/'
 69 |     GibsonCfgPPO.runner.load_run = 'Feb17_02-43-54_'
 70 |     GibsonCfgPPO.runner.checkpoint = '3500'
 71 |     ppo_runner, train_cfg = task_registry.make_alg_runner(env=env, name='gibson', train_cfg=GibsonCfgPPO)
 72 |     policy = ppo_runner.get_inference_policy(device='cuda')
 73 |     os.environ["CUDA_VISIBLE_DEVICES"] = "0"
 74 | 
 75 |     test(policy, ppo_runner, 0)
 76 | 
 77 | def test(agent, runner, gpu):
 78 |     config_file = '/home/lwy/IGibson2021/iGibson/gibson2/examples/configs/locobot_interactive_nav.yaml'
 79 |     split = 'test'  ## train
 80 |     episode_dir = '/home/lwy/IGibson2021/iGibson/gibson2/data/episodes_data/interactive_nav'
 81 | 
 82 |     eval_episodes_per_scene = os.environ.get(
 83 |         'EVAL_EPISODES_PER_SCENE', 100)
 84 | 
 85 |     env_config = parse_config(config_file)
 86 |     task = env_config['task']
 87 | 
 88 |     logger.info(f'{task},{split}')
 89 |     if task == 'interactive_nav_random':
 90 |         metrics = {key: 0.0 for key in [
 91 |             'success', 'spl', 'effort_efficiency', 'ins', 'episode_return']}
 92 | 
 93 |     elif task == 'social_nav_random':
 94 |         metrics = {key: 0.0 for key in [
 95 |             'success', 'stl', 'psc', 'episode_return']}
 96 |     else:
 97 |         assert False, 'unknown task: {}'.format(task)
 98 | 
 99 |     num_episodes_per_scene = eval_episodes_per_scene
100 |     split_dir = os.path.join(episode_dir, split)
101 |     assert os.path.isdir(split_dir)
102 |     num_scenes = len(os.listdir(split_dir))
103 |     assert num_scenes > 0
104 |     total_num_episodes = num_scenes * num_episodes_per_scene
105 | 
106 |     idx = 0
107 |     for json_file in os.listdir(split_dir):
108 |         scene_id = json_file.split('.')[0]
109 |         json_file = os.path.join(split_dir, json_file)
110 |         logger.info(json_file)
111 |         env_config['scene_id'] = scene_id
112 |         env_config['load_scene_episode_config'] = True
113 |         env_config['scene_episode_config_name'] = json_file
114 |         env = iGibsonEnv(config_file=env_config,
115 |                             mode='headless',
116 |                             action_timestep=1.0 / 10.0,
117 |                             physics_timestep=1.0 / 40.0,
118 |                             device_idx=gpu)
119 |         scene_metrics = {key: 0.0 for key in [
120 |             'success', 'spl', 'effort_efficiency', 'ins', 'episode_return']}
121 |         for _ in range(num_episodes_per_scene):
122 |             idx += 1
123 |             state = env.reset()
124 |             # memory = torch.FloatTensor([]).cuda()
125 |             # belief_state, memory = agent.cal_belief_state(state, memory)
126 |             episode_return = 0.0
127 |             while True:
128 |                 # action = env.action_space.sample()
129 |                 action = runner.alg.act([state], [state])   
130 |                 state, reward, done, info = env.step(action[0])
131 |                 # belief_state, memory = agent.cal_belief_state(state, memory)
132 | 
133 |                 episode_return += reward
134 |                 if done:
135 |                     logger.info(f'Episode: {idx}/{total_num_episodes},  return :{episode_return}')
136 |                     break
137 | 
138 |             metrics['episode_return'] += episode_return
139 |             scene_metrics['episode_return'] += episode_return
140 |             for key in metrics:
141 |                 if key in info:
142 |                     metrics[key] += info[key]
143 |                     scene_metrics[key] += info[key]
144 | 
145 |         for key in metrics:
146 |             scene_metrics[key] /= num_episodes_per_scene
147 |             logger.info('Avg {}: {}'.format(key, scene_metrics[key]))
148 |         
149 |         env.close()
150 | 
151 |     for key in metrics:
152 |         metrics[key] /= total_num_episodes
153 |         logger.info('Avg {}: {}'.format(key, metrics[key]))
154 |     return metrics['episode_return']
155 | 
156 | if __name__ == "__main__":
157 |     main()
158 | 


--------------------------------------------------------------------------------
/config/scripts/train.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: BSD-3-Clause
 3 | # 
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions are met:
 6 | #
 7 | # 1. Redistributions of source code must retain the above copyright notice, this
 8 | # list of conditions and the following disclaimer.
 9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | #
29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
30 | 
31 | import numpy as np
32 | import os
33 | from datetime import datetime
34 | import sys
35 | sys.path.append('/Extra/lwy/gibson/graduate/')
36 | 
37 | from rl.env.my_env import VecGibson
38 | from config.utils import task_registry
39 | from config.envs.Gibson.train_config import GibsonCfgPPO
40 | import torch
41 | 
42 | def train():
43 |     env = VecGibson()
44 |     os.environ["CUDA_VISIBLE_DEVICES"] = "3"
45 |     ppo_runner, train_cfg = task_registry.make_alg_runner(env=env, name='igibson', train_cfg=GibsonCfgPPO)
46 |     ppo_runner.learn(num_learning_iterations=train_cfg.runner.max_iterations, init_at_random_ep_len=False)
47 | 
48 | if __name__ == '__main__':
49 |     train()
50 | 


--------------------------------------------------------------------------------
/config/tests/test_env.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: BSD-3-Clause
 3 | # 
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions are met:
 6 | #
 7 | # 1. Redistributions of source code must retain the above copyright notice, this
 8 | # list of conditions and the following disclaimer.
 9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | #
29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
30 | 
31 | import numpy as np
32 | import os
33 | from datetime import datetime
34 | 
35 | import isaacgym
36 | from legged_gym.envs import *
37 | from legged_gym.utils import  get_args, export_policy_as_jit, task_registry, Logger
38 | 
39 | import torch
40 | 
41 | 
42 | def test_env(args):
43 |     env_cfg, train_cfg = task_registry.get_cfgs(name=args.task)
44 |     # override some parameters for testing
45 |     env_cfg.env.num_envs =  min(env_cfg.env.num_envs, 10)
46 | 
47 |     # prepare environment
48 |     env, _ = task_registry.make_env(name=args.task, args=args, env_cfg=env_cfg)
49 |     for i in range(int(10*env.max_episode_length)):
50 |         actions = 0.*torch.ones(env.num_envs, env.num_actions, device=env.device)
51 |         obs, _, rew, done, info = env.step(actions)
52 |     print("Done")
53 | 
54 | if __name__ == '__main__':
55 |     args = get_args()
56 |     test_env(args)
57 | 


--------------------------------------------------------------------------------
/config/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: BSD-3-Clause
 3 | # 
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions are met:
 6 | #
 7 | # 1. Redistributions of source code must retain the above copyright notice, this
 8 | # list of conditions and the following disclaimer.
 9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | #
29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
30 | 
31 | from .helpers import class_to_dict, get_load_path, get_args, export_policy_as_jit, set_seed, update_class_from_dict
32 | from .task_registry import task_registry
33 | from .logger import Logger
34 | from .math import *
35 | # from .terrain import Terrain


--------------------------------------------------------------------------------
/config/utils/helpers.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-License-Identifier: BSD-3-Clause
  3 | # 
  4 | # Redistribution and use in source and binary forms, with or without
  5 | # modification, are permitted provided that the following conditions are met:
  6 | #
  7 | # 1. Redistributions of source code must retain the above copyright notice, this
  8 | # list of conditions and the following disclaimer.
  9 | #
 10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 11 | # this list of conditions and the following disclaimer in the documentation
 12 | # and/or other materials provided with the distribution.
 13 | #
 14 | # 3. Neither the name of the copyright holder nor the names of its
 15 | # contributors may be used to endorse or promote products derived from
 16 | # this software without specific prior written permission.
 17 | #
 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28 | #
 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
 30 | 
 31 | import os
 32 | import copy
 33 | import torch
 34 | import numpy as np
 35 | import random
 36 | # from isaacgym import gymapi
 37 | # from isaacgym import gymutil
 38 | 
 39 | # from rl import LEGGED_GYM_ROOT_DIR, LEGGED_GYM_ENVS_DIR
 40 | 
 41 | def class_to_dict(obj) -> dict:
 42 |     if not  hasattr(obj,"__dict__"):
 43 |         return obj
 44 |     result = {}
 45 |     for key in dir(obj):
 46 |         if key.startswith("_"):
 47 |             continue
 48 |         element = []
 49 |         val = getattr(obj, key)
 50 |         if isinstance(val, list):
 51 |             for item in val:
 52 |                 element.append(class_to_dict(item))
 53 |         else:
 54 |             element = class_to_dict(val)
 55 |         result[key] = element
 56 |     return result
 57 | 
 58 | def update_class_from_dict(obj, dict):
 59 |     for key, val in dict.items():
 60 |         attr = getattr(obj, key, None)
 61 |         if isinstance(attr, type):
 62 |             update_class_from_dict(attr, val)
 63 |         else:
 64 |             setattr(obj, key, val)
 65 |     return
 66 | 
 67 | def set_seed(seed):
 68 |     if seed == -1:
 69 |         seed = np.random.randint(0, 10000)
 70 |     print("Setting seed: {}".format(seed))
 71 |     
 72 |     random.seed(seed)
 73 |     np.random.seed(seed)
 74 |     torch.manual_seed(seed)
 75 |     os.environ['PYTHONHASHSEED'] = str(seed)
 76 |     torch.cuda.manual_seed(seed)
 77 |     torch.cuda.manual_seed_all(seed)
 78 | 
 79 | def parse_sim_params(args, cfg):
 80 |     # code from Isaac Gym Preview 2
 81 |     # initialize sim params
 82 |     sim_params = gymapi.SimParams()
 83 | 
 84 |     # set some values from args
 85 |     if args.physics_engine == gymapi.SIM_FLEX:
 86 |         if args.device != "cpu":
 87 |             print("WARNING: Using Flex with GPU instead of PHYSX!")
 88 |     elif args.physics_engine == gymapi.SIM_PHYSX:
 89 |         sim_params.physx.use_gpu = args.use_gpu
 90 |         sim_params.physx.num_subscenes = args.subscenes
 91 |     sim_params.use_gpu_pipeline = args.use_gpu_pipeline
 92 | 
 93 |     # if sim options are provided in cfg, parse them and update/override above:
 94 |     if "sim" in cfg:
 95 |         gymutil.parse_sim_config(cfg["sim"], sim_params)
 96 | 
 97 |     # Override num_threads if passed on the command line
 98 |     if args.physics_engine == gymapi.SIM_PHYSX and args.num_threads > 0:
 99 |         sim_params.physx.num_threads = args.num_threads
100 | 
101 |     return sim_params
102 | 
103 | def get_load_path(root, load_run=-1, checkpoint=-1):
104 |     try:
105 |         runs = os.listdir(root)
106 |         #TODO sort by date to handle change of month
107 |         runs.sort()
108 |         if 'exported' in runs: runs.remove('exported')
109 |         last_run = os.path.join(root, runs[-1])
110 |     except:
111 |         raise ValueError("No runs in this directory: " + root)
112 |     if load_run==-1:
113 |         load_run = last_run
114 |     else:
115 |         load_run = os.path.join(root, load_run)
116 | 
117 |     if checkpoint==-1:
118 |         models = [file for file in os.listdir(load_run) if 'model' in file]
119 |         models.sort(key=lambda m: '{0:0>15}'.format(m))
120 |         model = models[-1]
121 |     else:
122 |         model = "model_{}.pt".format(checkpoint) 
123 | 
124 |     load_path = os.path.join(load_run, model)
125 |     return load_path
126 | 
127 | def update_cfg_from_args(env_cfg, cfg_train, args):
128 |     # seed
129 |     if env_cfg is not None:
130 |         # num envs
131 |         if args.num_envs is not None:
132 |             env_cfg.env.num_envs = args.num_envs
133 |     if cfg_train is not None:
134 |         if args.seed is not None:
135 |             cfg_train.seed = args.seed
136 |         # alg runner parameters
137 |         if args.max_iterations is not None:
138 |             cfg_train.runner.max_iterations = args.max_iterations
139 |         if args.resume:
140 |             cfg_train.runner.resume = args.resume
141 |         if args.experiment_name is not None:
142 |             cfg_train.runner.experiment_name = args.experiment_name
143 |         if args.run_name is not None:
144 |             cfg_train.runner.run_name = args.run_name
145 |         if args.load_run is not None:
146 |             cfg_train.runner.load_run = args.load_run
147 |         if args.checkpoint is not None:
148 |             cfg_train.runner.checkpoint = args.checkpoint
149 | 
150 |     return env_cfg, cfg_train
151 | 
152 | def get_args():
153 |     custom_parameters = [
154 |         {"name": "--task", "type": str, "default": "anymal_c_flat", "help": "Resume training or start testing from a checkpoint. Overrides config file if provided."},
155 |         {"name": "--resume", "action": "store_true", "default": False,  "help": "Resume training from a checkpoint"},
156 |         {"name": "--experiment_name", "type": str,  "help": "Name of the experiment to run or load. Overrides config file if provided."},
157 |         {"name": "--run_name", "type": str,  "help": "Name of the run. Overrides config file if provided."},
158 |         {"name": "--load_run", "type": str,  "help": "Name of the run to load when resume=True. If -1: will load the last run. Overrides config file if provided."},
159 |         {"name": "--checkpoint", "type": int,  "help": "Saved model checkpoint number. If -1: will load the last checkpoint. Overrides config file if provided."},
160 |         
161 |         {"name": "--headless", "action": "store_true", "default": False, "help": "Force display off at all times"},
162 |         {"name": "--horovod", "action": "store_true", "default": False, "help": "Use horovod for multi-gpu training"},
163 |         {"name": "--rl_device", "type": str, "default": "cuda:0", "help": 'Device used by the RL algorithm, (cpu, gpu, cuda:0, cuda:1 etc..)'},
164 |         {"name": "--num_envs", "type": int, "help": "Number of environments to create. Overrides config file if provided."},
165 |         {"name": "--seed", "type": int, "help": "Random seed. Overrides config file if provided."},
166 |         {"name": "--max_iterations", "type": int, "help": "Maximum number of training iterations. Overrides config file if provided."},
167 |     ]
168 |     # parse arguments
169 |     args = gymutil.parse_arguments(
170 |         description="RL Policy",
171 |         custom_parameters=custom_parameters)
172 | 
173 |     # name allignment
174 |     args.sim_device_id = args.compute_device_id
175 |     args.sim_device = args.sim_device_type
176 |     if args.sim_device=='cuda':
177 |         args.sim_device += f":{args.sim_device_id}"
178 |     return args
179 | 
180 | def export_policy_as_jit(actor_critic, path):
181 |     if hasattr(actor_critic, 'memory_a'):
182 |         # assumes LSTM: TODO add GRU
183 |         exporter = PolicyExporterLSTM(actor_critic)
184 |         exporter.export(path)
185 |     else: 
186 |         os.makedirs(path, exist_ok=True)
187 |         path = os.path.join(path, 'policy_1.pt')
188 |         model = copy.deepcopy(actor_critic.actor).to('cpu')
189 |         traced_script_module = torch.jit.script(model)
190 |         traced_script_module.save(path)
191 | 
192 | 
193 | class PolicyExporterLSTM(torch.nn.Module):
194 |     def __init__(self, actor_critic):
195 |         super().__init__()
196 |         self.actor = copy.deepcopy(actor_critic.actor)
197 |         self.is_recurrent = actor_critic.is_recurrent
198 |         self.memory = copy.deepcopy(actor_critic.memory_a.rnn)
199 |         self.memory.cpu()
200 |         self.register_buffer(f'hidden_state', torch.zeros(self.memory.num_layers, 1, self.memory.hidden_size))
201 |         self.register_buffer(f'cell_state', torch.zeros(self.memory.num_layers, 1, self.memory.hidden_size))
202 | 
203 |     def forward(self, x):
204 |         out, (h, c) = self.memory(x.unsqueeze(0), (self.hidden_state, self.cell_state))
205 |         self.hidden_state[:] = h
206 |         self.cell_state[:] = c
207 |         return self.actor(out.squeeze(0))
208 | 
209 |     @torch.jit.export
210 |     def reset_memory(self):
211 |         self.hidden_state[:] = 0.
212 |         self.cell_state[:] = 0.
213 |  
214 |     def export(self, path):
215 |         os.makedirs(path, exist_ok=True)
216 |         path = os.path.join(path, 'policy_lstm_1.pt')
217 |         self.to('cpu')
218 |         traced_script_module = torch.jit.script(self)
219 |         traced_script_module.save(path)
220 | 
221 |     
222 | 


--------------------------------------------------------------------------------
/config/utils/logger.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-License-Identifier: BSD-3-Clause
  3 | # 
  4 | # Redistribution and use in source and binary forms, with or without
  5 | # modification, are permitted provided that the following conditions are met:
  6 | #
  7 | # 1. Redistributions of source code must retain the above copyright notice, this
  8 | # list of conditions and the following disclaimer.
  9 | #
 10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 11 | # this list of conditions and the following disclaimer in the documentation
 12 | # and/or other materials provided with the distribution.
 13 | #
 14 | # 3. Neither the name of the copyright holder nor the names of its
 15 | # contributors may be used to endorse or promote products derived from
 16 | # this software without specific prior written permission.
 17 | #
 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28 | #
 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
 30 | 
 31 | import matplotlib.pyplot as plt
 32 | import numpy as np
 33 | from collections import defaultdict
 34 | from multiprocessing import Process, Value
 35 | 
 36 | class Logger:
 37 |     def __init__(self, dt):
 38 |         self.state_log = defaultdict(list)
 39 |         self.rew_log = defaultdict(list)
 40 |         self.dt = dt
 41 |         self.num_episodes = 0
 42 |         self.plot_process = None
 43 | 
 44 |     def log_state(self, key, value):
 45 |         self.state_log[key].append(value)
 46 | 
 47 |     def log_states(self, dict):
 48 |         for key, value in dict.items():
 49 |             self.log_state(key, value)
 50 | 
 51 |     def log_rewards(self, dict, num_episodes):
 52 |         for key, value in dict.items():
 53 |             if 'rew' in key:
 54 |                 self.rew_log[key].append(value.item() * num_episodes)
 55 |         self.num_episodes += num_episodes
 56 | 
 57 |     def reset(self):
 58 |         self.state_log.clear()
 59 |         self.rew_log.clear()
 60 | 
 61 |     def plot_states(self):
 62 |         self.plot_process = Process(target=self._plot)
 63 |         self.plot_process.start()
 64 | 
 65 |     def _plot(self):
 66 |         nb_rows = 3
 67 |         nb_cols = 3
 68 |         fig, axs = plt.subplots(nb_rows, nb_cols)
 69 |         for key, value in self.state_log.items():
 70 |             time = np.linspace(0, len(value)*self.dt, len(value))
 71 |             break
 72 |         log= self.state_log
 73 |         # plot joint targets and measured positions
 74 |         a = axs[1, 0]
 75 |         if log["dof_pos"]: a.plot(time, log["dof_pos"], label='measured')
 76 |         if log["dof_pos_target"]: a.plot(time, log["dof_pos_target"], label='target')
 77 |         a.set(xlabel='time [s]', ylabel='Position [rad]', title='DOF Position')
 78 |         a.legend()
 79 |         # plot joint velocity
 80 |         a = axs[1, 1]
 81 |         if log["dof_vel"]: a.plot(time, log["dof_vel"], label='measured')
 82 |         if log["dof_vel_target"]: a.plot(time, log["dof_vel_target"], label='target')
 83 |         a.set(xlabel='time [s]', ylabel='Velocity [rad/s]', title='Joint Velocity')
 84 |         a.legend()
 85 |         # plot base vel x
 86 |         a = axs[0, 0]
 87 |         if log["base_vel_x"]: a.plot(time, log["base_vel_x"], label='measured')
 88 |         if log["command_x"]: a.plot(time, log["command_x"], label='commanded')
 89 |         a.set(xlabel='time [s]', ylabel='base lin vel [m/s]', title='Base velocity x')
 90 |         a.legend()
 91 |         # plot base vel y
 92 |         a = axs[0, 1]
 93 |         if log["base_vel_y"]: a.plot(time, log["base_vel_y"], label='measured')
 94 |         if log["command_y"]: a.plot(time, log["command_y"], label='commanded')
 95 |         a.set(xlabel='time [s]', ylabel='base lin vel [m/s]', title='Base velocity y')
 96 |         a.legend()
 97 |         # plot base vel yaw
 98 |         a = axs[0, 2]
 99 |         if log["base_vel_yaw"]: a.plot(time, log["base_vel_yaw"], label='measured')
100 |         if log["command_yaw"]: a.plot(time, log["command_yaw"], label='commanded')
101 |         a.set(xlabel='time [s]', ylabel='base ang vel [rad/s]', title='Base velocity yaw')
102 |         a.legend()
103 |         # plot base vel z
104 |         a = axs[1, 2]
105 |         if log["base_vel_z"]: a.plot(time, log["base_vel_z"], label='measured')
106 |         a.set(xlabel='time [s]', ylabel='base lin vel [m/s]', title='Base velocity z')
107 |         a.legend()
108 |         # plot contact forces
109 |         a = axs[2, 0]
110 |         if log["contact_forces_z"]:
111 |             forces = np.array(log["contact_forces_z"])
112 |             for i in range(forces.shape[1]):
113 |                 a.plot(time, forces[:, i], label=f'force {i}')
114 |         a.set(xlabel='time [s]', ylabel='Forces z [N]', title='Vertical Contact forces')
115 |         a.legend()
116 |         # plot torque/vel curves
117 |         a = axs[2, 1]
118 |         if log["dof_vel"]!=[] and log["dof_torque"]!=[]: a.plot(log["dof_vel"], log["dof_torque"], 'x', label='measured')
119 |         a.set(xlabel='Joint vel [rad/s]', ylabel='Joint Torque [Nm]', title='Torque/velocity curves')
120 |         a.legend()
121 |         # plot torques
122 |         a = axs[2, 2]
123 |         if log["dof_torque"]!=[]: a.plot(time, log["dof_torque"], label='measured')
124 |         a.set(xlabel='time [s]', ylabel='Joint Torque [Nm]', title='Torque')
125 |         a.legend()
126 |         plt.show()
127 | 
128 |     def print_rewards(self):
129 |         print("Average rewards per second:")
130 |         for key, values in self.rew_log.items():
131 |             mean = np.sum(np.array(values)) / self.num_episodes
132 |             print(f" - {key}: {mean}")
133 |         print(f"Total number of episodes: {self.num_episodes}")
134 |     
135 |     def __del__(self):
136 |         if self.plot_process is not None:
137 |             self.plot_process.kill()


--------------------------------------------------------------------------------
/config/utils/math.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: BSD-3-Clause
 3 | # 
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions are met:
 6 | #
 7 | # 1. Redistributions of source code must retain the above copyright notice, this
 8 | # list of conditions and the following disclaimer.
 9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | #
29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
30 | 
31 | import torch
32 | from torch import Tensor
33 | import numpy as np
34 | # from isaacgym.torch_utils import quat_apply, normalize
35 | from typing import Tuple
36 | 
37 | # @ torch.jit.script
38 | def quat_apply_yaw(quat, vec):
39 |     quat_yaw = quat.clone().view(-1, 4)
40 |     quat_yaw[:, :2] = 0.
41 |     quat_yaw = normalize(quat_yaw)
42 |     return quat_apply(quat_yaw, vec)
43 | 
44 | # @ torch.jit.script
45 | def wrap_to_pi(angles):
46 |     angles %= 2*np.pi
47 |     angles -= 2*np.pi * (angles > np.pi)
48 |     return angles
49 | 
50 | # @ torch.jit.script
51 | def torch_rand_sqrt_float(lower, upper, shape, device):
52 |     # type: (float, float, Tuple[int, int], str) -> Tensor
53 |     r = 2*torch.rand(*shape, device=device) - 1
54 |     r = torch.where(r<0., -torch.sqrt(-r), torch.sqrt(r))
55 |     r =  (r + 1.) / 2.
56 |     return (upper - lower) * r + lower


--------------------------------------------------------------------------------
/config/utils/task_registry.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-License-Identifier: BSD-3-Clause
  3 | # 
  4 | # Redistribution and use in source and binary forms, with or without
  5 | # modification, are permitted provided that the following conditions are met:
  6 | #
  7 | # 1. Redistributions of source code must retain the above copyright notice, this
  8 | # list of conditions and the following disclaimer.
  9 | #
 10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 11 | # this list of conditions and the following disclaimer in the documentation
 12 | # and/or other materials provided with the distribution.
 13 | #
 14 | # 3. Neither the name of the copyright holder nor the names of its
 15 | # contributors may be used to endorse or promote products derived from
 16 | # this software without specific prior written permission.
 17 | #
 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28 | #
 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
 30 | 
 31 | import os
 32 | from datetime import datetime
 33 | from typing import Tuple
 34 | import torch
 35 | import numpy as np
 36 | 
 37 | from rl.env import VecEnv
 38 | from rl.runners import OnPolicyRunner
 39 | 
 40 | from config import LEGGED_GYM_ROOT_DIR, LEGGED_GYM_ENVS_DIR
 41 | from .helpers import get_args, update_cfg_from_args, class_to_dict, get_load_path, set_seed, parse_sim_params
 42 | from config.envs.base.legged_robot_config import LeggedRobotCfg, LeggedRobotCfgPPO
 43 | 
 44 | class TaskRegistry():
 45 |     def __init__(self):
 46 |         self.task_classes = {}
 47 |         self.env_cfgs = {}
 48 |         self.train_cfgs = {}
 49 |     
 50 |     def register(self, name: str, task_class: VecEnv, env_cfg: LeggedRobotCfg, train_cfg: LeggedRobotCfgPPO):
 51 |         self.task_classes[name] = task_class
 52 |         self.env_cfgs[name] = env_cfg
 53 |         self.train_cfgs[name] = train_cfg
 54 |     
 55 |     def get_task_class(self, name: str) -> VecEnv:
 56 |         return self.task_classes[name]
 57 |     
 58 |     def get_cfgs(self, name) -> Tuple[LeggedRobotCfg, LeggedRobotCfgPPO]:
 59 |         train_cfg = self.train_cfgs[name]
 60 |         env_cfg = self.env_cfgs[name]
 61 |         # copy seed
 62 |         env_cfg.seed = train_cfg.seed
 63 |         return env_cfg, train_cfg
 64 |     
 65 |     def make_env(self, name, args=None, env_cfg=None) -> Tuple[VecEnv, LeggedRobotCfg]:
 66 |         """ Creates an environment either from a registered namme or from the provided config file.
 67 | 
 68 |         Args:
 69 |             name (string): Name of a registered env.
 70 |             args (Args, optional): Isaac Gym comand line arguments. If None get_args() will be called. Defaults to None.
 71 |             env_cfg (Dict, optional): Environment config file used to override the registered config. Defaults to None.
 72 | 
 73 |         Raises:
 74 |             ValueError: Error if no registered env corresponds to 'name' 
 75 | 
 76 |         Returns:
 77 |             isaacgym.VecTaskPython: The created environment
 78 |             Dict: the corresponding config file
 79 |         """
 80 |         # if no args passed get command line arguments
 81 |         if args is None:
 82 |             args = get_args()
 83 |         # check if there is a registered env with that name
 84 |         if name in self.task_classes:
 85 |             task_class = self.get_task_class(name)
 86 |         else:
 87 |             raise ValueError(f"Task with name: {name} was not registered")
 88 |         if env_cfg is None:
 89 |             # load config files
 90 |             env_cfg, _ = self.get_cfgs(name)
 91 |         # override cfg from args (if specified)
 92 |         env_cfg, _ = update_cfg_from_args(env_cfg, None, args)
 93 |         set_seed(env_cfg.seed)
 94 |         # parse sim params (convert to dict first)
 95 |         sim_params = {"sim": class_to_dict(env_cfg.sim)}
 96 |         sim_params = parse_sim_params(args, sim_params)
 97 |         env = task_class(   cfg=env_cfg,
 98 |                             sim_params=sim_params,
 99 |                             physics_engine=args.physics_engine,
100 |                             sim_device=args.sim_device,
101 |                             headless=args.headless)
102 |         return env, env_cfg
103 | 
104 |     def make_alg_runner(self, env, name=None, args=None, train_cfg=None, log_root="default") -> Tuple[OnPolicyRunner, LeggedRobotCfgPPO]:
105 |         """ Creates the training algorithm  either from a registered namme or from the provided config file.
106 | 
107 |         Args:
108 |             env (isaacgym.VecTaskPython): The environment to train (TODO: remove from within the algorithm)
109 |             name (string, optional): Name of a registered env. If None, the config file will be used instead. Defaults to None.
110 |             args (Args, optional): Isaac Gym comand line arguments. If None get_args() will be called. Defaults to None.
111 |             train_cfg (Dict, optional): Training config file. If None 'name' will be used to get the config file. Defaults to None.
112 |             log_root (str, optional): Logging directory for Tensorboard. Set to 'None' to avoid logging (at test time for example). 
113 |                                       Logs will be saved in <log_root>/<date_time>_<run_name>. Defaults to "default"=<path_to_LEGGED_GYM>/logs/<experiment_name>.
114 | 
115 |         Raises:
116 |             ValueError: Error if neither 'name' or 'train_cfg' are provided
117 |             Warning: If both 'name' or 'train_cfg' are provided 'name' is ignored
118 | 
119 |         Returns:
120 |             PPO: The created algorithm
121 |             Dict: the corresponding config file
122 |         """
123 |         # # if no args passed get command line arguments
124 |         # if args is None:
125 |         #     args = get_args()
126 |         # # if config files are passed use them, otherwise load from the name
127 |         # if train_cfg is None:
128 |         #     if name is None:
129 |         #         raise ValueError("Either 'name' or 'train_cfg' must be not None")
130 |         #     # load config files
131 |         #     _, train_cfg = self.get_cfgs(name)
132 |         # else:
133 |         #     if name is not None:
134 |         #         print(f"'train_cfg' provided -> Ignoring 'name={name}'")
135 |         # # override cfg from args (if specified)
136 |         # _, train_cfg = update_cfg_from_args(None, train_cfg, args)
137 | 
138 |         if log_root=="default":
139 |             log_root = os.path.join(LEGGED_GYM_ROOT_DIR, 'logs', train_cfg.runner.experiment_name)
140 |             log_dir = os.path.join(log_root, datetime.now().strftime('%b%d_%H-%M-%S') + '_' + train_cfg.runner.run_name)
141 |         elif log_root is None:
142 |             log_dir = None
143 |         else:
144 |             log_dir = os.path.join(log_root, datetime.now().strftime('%b%d_%H-%M-%S') + '_' + train_cfg.runner.run_name)
145 |         
146 |         train_cfg_dict = class_to_dict(train_cfg)
147 |         runner = OnPolicyRunner(env, train_cfg_dict, log_dir, device='cuda')
148 |         #save resume path before creating a new log_dir
149 |         resume = train_cfg.runner.resume
150 |         if resume:
151 |             # load previously trained model
152 |             resume_path = get_load_path(log_root, load_run=train_cfg.runner.load_run, checkpoint=train_cfg.runner.checkpoint)
153 |             print(f"Loading model from: {resume_path}")
154 |             runner.load(resume_path)
155 |         return runner, train_cfg
156 | 
157 | # make global task registry
158 | task_registry = TaskRegistry()


--------------------------------------------------------------------------------
/config/utils/terrain.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-License-Identifier: BSD-3-Clause
  3 | # 
  4 | # Redistribution and use in source and binary forms, with or without
  5 | # modification, are permitted provided that the following conditions are met:
  6 | #
  7 | # 1. Redistributions of source code must retain the above copyright notice, this
  8 | # list of conditions and the following disclaimer.
  9 | #
 10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 11 | # this list of conditions and the following disclaimer in the documentation
 12 | # and/or other materials provided with the distribution.
 13 | #
 14 | # 3. Neither the name of the copyright holder nor the names of its
 15 | # contributors may be used to endorse or promote products derived from
 16 | # this software without specific prior written permission.
 17 | #
 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28 | #
 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
 30 | 
 31 | import numpy as np
 32 | from numpy.random import choice
 33 | from scipy import interpolate
 34 | 
 35 | from isaacgym import terrain_utils
 36 | from legged_gym.envs.base.legged_robot_config import LeggedRobotCfg
 37 | 
 38 | class Terrain:
 39 |     def __init__(self, cfg: LeggedRobotCfg.terrain, num_robots) -> None:
 40 | 
 41 |         self.cfg = cfg
 42 |         self.num_robots = num_robots
 43 |         self.type = cfg.mesh_type
 44 |         if self.type in ["none", 'plane']:
 45 |             return
 46 |         self.env_length = cfg.terrain_length
 47 |         self.env_width = cfg.terrain_width
 48 |         self.proportions = [np.sum(cfg.terrain_proportions[:i+1]) for i in range(len(cfg.terrain_proportions))]
 49 | 
 50 |         self.cfg.num_sub_terrains = cfg.num_rows * cfg.num_cols
 51 |         self.env_origins = np.zeros((cfg.num_rows, cfg.num_cols, 3))
 52 | 
 53 |         self.width_per_env_pixels = int(self.env_width / cfg.horizontal_scale)
 54 |         self.length_per_env_pixels = int(self.env_length / cfg.horizontal_scale)
 55 | 
 56 |         self.border = int(cfg.border_size/self.cfg.horizontal_scale)
 57 |         self.tot_cols = int(cfg.num_cols * self.width_per_env_pixels) + 2 * self.border
 58 |         self.tot_rows = int(cfg.num_rows * self.length_per_env_pixels) + 2 * self.border
 59 | 
 60 |         self.height_field_raw = np.zeros((self.tot_rows , self.tot_cols), dtype=np.int16)
 61 |         if cfg.curriculum:
 62 |             self.curiculum()
 63 |         elif cfg.selected:
 64 |             self.selected_terrain()
 65 |         else:    
 66 |             self.randomized_terrain()   
 67 |         
 68 |         self.heightsamples = self.height_field_raw
 69 |         if self.type=="trimesh":
 70 |             self.vertices, self.triangles = terrain_utils.convert_heightfield_to_trimesh(   self.height_field_raw,
 71 |                                                                                             self.cfg.horizontal_scale,
 72 |                                                                                             self.cfg.vertical_scale,
 73 |                                                                                             self.cfg.slope_treshold)
 74 |     
 75 |     def randomized_terrain(self):
 76 |         for k in range(self.cfg.num_sub_terrains):
 77 |             # Env coordinates in the world
 78 |             (i, j) = np.unravel_index(k, (self.cfg.num_rows, self.cfg.num_cols))
 79 | 
 80 |             choice = np.random.uniform(0, 1)
 81 |             difficulty = np.random.choice([0.5, 0.75, 0.9])
 82 |             terrain = self.make_terrain(choice, difficulty)
 83 |             self.add_terrain_to_map(terrain, i, j)
 84 |         
 85 |     def curiculum(self):
 86 |         for j in range(self.cfg.num_cols):
 87 |             for i in range(self.cfg.num_rows):
 88 |                 difficulty = i / self.cfg.num_rows
 89 |                 choice = j / self.cfg.num_cols + 0.001
 90 | 
 91 |                 terrain = self.make_terrain(choice, difficulty)
 92 |                 self.add_terrain_to_map(terrain, i, j)
 93 | 
 94 |     def selected_terrain(self):
 95 |         terrain_type = self.cfg.terrain_kwargs.pop('type')
 96 |         for k in range(self.cfg.num_sub_terrains):
 97 |             # Env coordinates in the world
 98 |             (i, j) = np.unravel_index(k, (self.cfg.num_rows, self.cfg.num_cols))
 99 | 
100 |             terrain = terrain_utils.SubTerrain("terrain",
101 |                               width=self.width_per_env_pixels,
102 |                               length=self.width_per_env_pixels,
103 |                               vertical_scale=self.vertical_scale,
104 |                               horizontal_scale=self.horizontal_scale)
105 | 
106 |             eval(terrain_type)(terrain, **self.cfg.terrain_kwargs.terrain_kwargs)
107 |             self.add_terrain_to_map(terrain, i, j)
108 |     
109 |     def make_terrain(self, choice, difficulty):
110 |         terrain = terrain_utils.SubTerrain(   "terrain",
111 |                                 width=self.width_per_env_pixels,
112 |                                 length=self.width_per_env_pixels,
113 |                                 vertical_scale=self.cfg.vertical_scale,
114 |                                 horizontal_scale=self.cfg.horizontal_scale)
115 |         slope = difficulty * 0.4
116 |         step_height = 0.05 + 0.18 * difficulty
117 |         discrete_obstacles_height = 0.05 + difficulty * 0.2
118 |         stepping_stones_size = 1.5 * (1.05 - difficulty)
119 |         stone_distance = 0.05 if difficulty==0 else 0.1
120 |         gap_size = 1. * difficulty
121 |         pit_depth = 1. * difficulty
122 |         if choice < self.proportions[0]:
123 |             if choice < self.proportions[0]/ 2:
124 |                 slope *= -1
125 |             terrain_utils.pyramid_sloped_terrain(terrain, slope=slope, platform_size=3.)
126 |         elif choice < self.proportions[1]:
127 |             terrain_utils.pyramid_sloped_terrain(terrain, slope=slope, platform_size=3.)
128 |             terrain_utils.random_uniform_terrain(terrain, min_height=-0.05, max_height=0.05, step=0.005, downsampled_scale=0.2)
129 |         elif choice < self.proportions[3]:
130 |             if choice<self.proportions[2]:
131 |                 step_height *= -1
132 |             terrain_utils.pyramid_stairs_terrain(terrain, step_width=0.31, step_height=step_height, platform_size=3.)
133 |         elif choice < self.proportions[4]:
134 |             num_rectangles = 20
135 |             rectangle_min_size = 1.
136 |             rectangle_max_size = 2.
137 |             terrain_utils.discrete_obstacles_terrain(terrain, discrete_obstacles_height, rectangle_min_size, rectangle_max_size, num_rectangles, platform_size=3.)
138 |         elif choice < self.proportions[5]:
139 |             terrain_utils.stepping_stones_terrain(terrain, stone_size=stepping_stones_size, stone_distance=stone_distance, max_height=0., platform_size=4.)
140 |         elif choice < self.proportions[6]:
141 |             gap_terrain(terrain, gap_size=gap_size, platform_size=3.)
142 |         else:
143 |             pit_terrain(terrain, depth=pit_depth, platform_size=4.)
144 |         
145 |         return terrain
146 | 
147 |     def add_terrain_to_map(self, terrain, row, col):
148 |         i = row
149 |         j = col
150 |         # map coordinate system
151 |         start_x = self.border + i * self.length_per_env_pixels
152 |         end_x = self.border + (i + 1) * self.length_per_env_pixels
153 |         start_y = self.border + j * self.width_per_env_pixels
154 |         end_y = self.border + (j + 1) * self.width_per_env_pixels
155 |         self.height_field_raw[start_x: end_x, start_y:end_y] = terrain.height_field_raw
156 | 
157 |         env_origin_x = (i + 0.5) * self.env_length
158 |         env_origin_y = (j + 0.5) * self.env_width
159 |         x1 = int((self.env_length/2. - 1) / terrain.horizontal_scale)
160 |         x2 = int((self.env_length/2. + 1) / terrain.horizontal_scale)
161 |         y1 = int((self.env_width/2. - 1) / terrain.horizontal_scale)
162 |         y2 = int((self.env_width/2. + 1) / terrain.horizontal_scale)
163 |         env_origin_z = np.max(terrain.height_field_raw[x1:x2, y1:y2])*terrain.vertical_scale
164 |         self.env_origins[i, j] = [env_origin_x, env_origin_y, env_origin_z]
165 | 
166 | def gap_terrain(terrain, gap_size, platform_size=1.):
167 |     gap_size = int(gap_size / terrain.horizontal_scale)
168 |     platform_size = int(platform_size / terrain.horizontal_scale)
169 | 
170 |     center_x = terrain.length // 2
171 |     center_y = terrain.width // 2
172 |     x1 = (terrain.length - platform_size) // 2
173 |     x2 = x1 + gap_size
174 |     y1 = (terrain.width - platform_size) // 2
175 |     y2 = y1 + gap_size
176 |    
177 |     terrain.height_field_raw[center_x-x2 : center_x + x2, center_y-y2 : center_y + y2] = -1000
178 |     terrain.height_field_raw[center_x-x1 : center_x + x1, center_y-y1 : center_y + y1] = 0
179 | 
180 | def pit_terrain(terrain, depth, platform_size=1.):
181 |     depth = int(depth / terrain.vertical_scale)
182 |     platform_size = int(platform_size / terrain.horizontal_scale / 2)
183 |     x1 = terrain.length // 2 - platform_size
184 |     x2 = terrain.length // 2 + platform_size
185 |     y1 = terrain.width // 2 - platform_size
186 |     y2 = terrain.width // 2 + platform_size
187 |     terrain.height_field_raw[x1:x2, y1:y2] = -depth
188 | 


--------------------------------------------------------------------------------
/rl/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rxlqn/Transformer-based-memory-for-visual-navigation/3287edab4bcab0b96314666b477ab908eb0c92b9/rl/__init__.py


--------------------------------------------------------------------------------
/rl/algorithms/__init__.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: BSD-3-Clause
 3 | # 
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions are met:
 6 | #
 7 | # 1. Redistributions of source code must retain the above copyright notice, this
 8 | # list of conditions and the following disclaimer.
 9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | #
29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
30 | 
31 | from .ppo import PPO


--------------------------------------------------------------------------------
/rl/algorithms/ppo.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-License-Identifier: BSD-3-Clause
  3 | # 
  4 | # Redistribution and use in source and binary forms, with or without
  5 | # modification, are permitted provided that the following conditions are met:
  6 | #
  7 | # 1. Redistributions of source code must retain the above copyright notice, this
  8 | # list of conditions and the following disclaimer.
  9 | #
 10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 11 | # this list of conditions and the following disclaimer in the documentation
 12 | # and/or other materials provided with the distribution.
 13 | #
 14 | # 3. Neither the name of the copyright holder nor the names of its
 15 | # contributors may be used to endorse or promote products derived from
 16 | # this software without specific prior written permission.
 17 | #
 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28 | #
 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
 30 | 
 31 | import torch
 32 | import torch.nn as nn
 33 | import torch.optim as optim
 34 | import numpy as np
 35 | from rl.modules import ActorCritic
 36 | from rl.storage import RolloutStorage
 37 | from rl.modules.encoder import Encoder, Trans_Encoder
 38 | from rl.utils import unpad_trajectories
 39 | 
 40 | class PPO:
 41 |     actor_critic: ActorCritic
 42 |     def __init__(self,
 43 |                  actor_critic,
 44 |                  num_learning_epochs=1,
 45 |                  num_mini_batches=1,
 46 |                  clip_param=0.2,
 47 |                  gamma=0.998,
 48 |                  lam=0.95,
 49 |                  value_loss_coef=1.0,
 50 |                  entropy_coef=0.0,
 51 |                  learning_rate=1e-3,
 52 |                  max_grad_norm=1.0,
 53 |                  use_clipped_value_loss=True,
 54 |                  schedule="fixed",
 55 |                  desired_kl=0.01,
 56 |                  device='cpu',
 57 |                  ):
 58 | 
 59 |         self.device = device
 60 | 
 61 |         self.desired_kl = desired_kl
 62 |         self.schedule = schedule
 63 |         self.learning_rate = learning_rate
 64 | 
 65 |         # PPO components
 66 |         self.actor_critic = actor_critic
 67 |         self.actor_critic.to(self.device)
 68 |         self.storage = None # initialized later
 69 |         self.transition = RolloutStorage.Transition()
 70 | 
 71 |         # PPO parameters
 72 |         self.clip_param = clip_param
 73 |         self.num_learning_epochs = num_learning_epochs
 74 |         self.num_mini_batches = num_mini_batches
 75 |         self.value_loss_coef = value_loss_coef
 76 |         self.entropy_coef = entropy_coef
 77 |         self.gamma = gamma
 78 |         self.lam = lam
 79 |         self.max_grad_norm = max_grad_norm
 80 |         self.use_clipped_value_loss = use_clipped_value_loss
 81 | 
 82 |         ## SMT
 83 |         self.encoder_net = Trans_Encoder()
 84 |         self.encoder_net.to(self.device)
 85 |         self.memory = torch.FloatTensor([]).cuda()
 86 | 
 87 |         params = [
 88 |             {'params': self.actor_critic.parameters(), 'name': 'actor_critic'},
 89 |             {'params': self.encoder_net.parameters(), 'name': 'encoder'},
 90 |         ]
 91 | 
 92 |         self.optimizer = optim.Adam(params, lr=learning_rate)
 93 |         # self.optimizer = optim.Adam(self.actor_critic.parameters(), lr=learning_rate)
 94 | 
 95 |     def init_storage(self, num_envs, num_transitions_per_env, actor_obs_shape, critic_obs_shape, action_shape):
 96 |         self.storage = RolloutStorage(num_envs, num_transitions_per_env, actor_obs_shape, critic_obs_shape, action_shape, self.device)
 97 | 
 98 |     def test_mode(self):
 99 |         self.actor_critic.test()
100 |     
101 |     def train_mode(self):
102 |         self.actor_critic.train()
103 | 
104 |     def cal_belief_state(self, obs):
105 |         """
106 |         args:
107 |                 state: num_env * obs
108 |             
109 |                 先计算当前状态的embedding
110 |                 更新 memory
111 |                 再求出当前的belief_state
112 |         return:
113 |                 belief_state
114 |         """
115 |         with torch.no_grad():
116 |             task_obs = []
117 |             rgb = []
118 |             depth = []
119 |             for o in obs:
120 |                 task_obs.append(o['task_obs'])
121 |                 rgb.append(o['rgb'])
122 |                 depth.append(o['depth'])
123 | 
124 |             ## N D 
125 |             # self.task_obs = torch.FloatTensor(task_obs).cuda()
126 |             # self.rgb = torch.FloatTensor(rgb).cuda()
127 |             # self.depth = torch.FloatTensor(depth).cuda()
128 |             self.task_obs = torch.FloatTensor(np.array(task_obs)).cuda()
129 |             self.rgb = torch.FloatTensor(np.array(rgb)).cuda()
130 |             self.depth = torch.FloatTensor(np.array(depth)).cuda()
131 |             ## memory mask for mask embeddings before done
132 |             # 2D or 3D mask that prevents attention to certain positions. A 2D mask will be broadcasted for all
133 |             # the batches while a 3D mask allows to specify a different mask for the entries of each batch.
134 |             if self.transition.dones != None:
135 |                 dones = self.transition.dones.clone()
136 |                 key_padding_mask = dones.repeat(len(self.memory),1).permute(1, 0)    ## padding_mask if dones == false = True  N * S
137 |             else: 
138 |                 key_padding_mask = None
139 |             encoder_state, self.memory = self.encoder_net.inference_forward(self.rgb,self.depth,self.task_obs, self.memory, key_padding_mask)
140 |             ## cat predicted angle
141 |             # angle = self.decoder_net(encoder_state) * math.pi
142 |             # encoder_state = torch.cat((encoder_state, angle), -1)
143 |         return encoder_state.detach()        ## 清除计算图
144 | 
145 | 
146 |     def act(self, obs, critic_obs):     ## obs and critic_obs are list of dict 
147 |         if self.actor_critic.is_recurrent:
148 |             self.transition.hidden_states = self.actor_critic.get_hidden_states()
149 |         else:
150 |             self.belief_state = self.cal_belief_state(obs)      ## plus last_action
151 |         # Compute the actions and values
152 |         self.transition.actions = self.actor_critic.act(self.belief_state).detach()
153 |         self.transition.values = self.actor_critic.evaluate(self.belief_state).detach()
154 |         self.transition.actions_log_prob = self.actor_critic.get_actions_log_prob(self.transition.actions).detach()
155 |         self.transition.action_mean = self.actor_critic.action_mean.detach()
156 |         self.transition.action_sigma = self.actor_critic.action_std.detach()
157 |         # need to record obs and critic_obs before env.step()
158 |         self.transition.observations = [self.rgb,self.depth,self.task_obs]
159 |         # self.transition.critic_observations = [self.rgb,self.depth,self.task_obs]
160 |         return self.transition.actions.cpu().numpy()
161 |     
162 |     def process_env_step(self, rewards, dones, infos):
163 |         self.transition.rewards = rewards.clone()
164 |         self.transition.dones = dones
165 |         # Bootstrapping on time outs, times outs 跟 done不一样
166 |         if 'time_outs' in infos:
167 |             self.transition.rewards += self.gamma * torch.squeeze(self.transition.values * torch.tensor(infos['time_outs']).cuda().unsqueeze(1).to(self.device), 1)
168 | 
169 |         # Record the transition
170 |         self.storage.add_transitions(self.transition)
171 |         # self.transition.clear()     ## 不知道为什么需要clear
172 |         ## reset memory if done, True means selected
173 |         # self.memory[:, dones, ] = torch.FloatTensor([]).cuda()
174 |         # self.actor_critic.reset(dones)
175 |     
176 |     def compute_returns(self, last_critic_obs):
177 |         belief_state = self.cal_belief_state(last_critic_obs)
178 |         last_values= self.actor_critic.evaluate(belief_state).detach()
179 |         self.storage.compute_returns(last_values, self.gamma, self.lam)
180 | 
181 |     def update(self):
182 |         mean_value_loss = 0
183 |         mean_surrogate_loss = 0
184 |         if self.actor_critic.is_recurrent:
185 |             generator = self.storage.reccurent_mini_batch_generator(self.num_mini_batches, self.num_learning_epochs)
186 |         else:
187 |             generator = self.storage.SMT_mini_batch_generator(self.num_mini_batches, self.num_learning_epochs)
188 |         for obs_batch, critic_obs_batch, actions_batch, target_values_batch, advantages_batch, returns_batch, old_actions_log_prob_batch, \
189 |             old_mu_batch, old_sigma_batch, masks_batch in generator:
190 |                 critic_obs_batch = obs_batch
191 |                 # belief_state = self.encoder_net(obs_batch[0], obs_batch[1], obs_batch[2], ~masks_batch.permute(1,0)[:,:obs_batch[0].size(0)]) ## masks batch permute 并且取反
192 |                 # belief_state = unpad_trajectories(belief_state, masks_batch).flatten(0,1)           ## unpadding trajectories and flatten
193 |                 belief_state = self.encoder_net(obs_batch[0], obs_batch[1], obs_batch[2], None).flatten(0,1) ## masks batch permute 并且取反
194 | 
195 |                 actions_batch = actions_batch.flatten(0,1)
196 |                 self.actor_critic.act(belief_state, masks=masks_batch)
197 |                 actions_log_prob_batch = self.actor_critic.get_actions_log_prob(actions_batch)#.flatten(0,1)
198 |                 value_batch = self.actor_critic.evaluate(belief_state, masks=masks_batch)#.flatten(0,1)
199 |                 mu_batch = self.actor_critic.action_mean 
200 |                 old_mu_batch = old_mu_batch.flatten(0,1)
201 |                 old_sigma_batch = old_sigma_batch.flatten(0,1)
202 | 
203 |                 sigma_batch = self.actor_critic.action_std#.flatten(0,1)
204 |                 entropy_batch = self.actor_critic.entropy#.flatten(0,1)
205 |                 old_actions_log_prob_batch = old_actions_log_prob_batch.flatten(0,1)
206 |                 advantages_batch = advantages_batch.flatten(0,1)
207 |                 target_values_batch = target_values_batch.flatten(0,1)
208 |                 returns_batch = returns_batch.flatten(0,1)
209 |                 # KL
210 |                 if self.desired_kl != None and self.schedule == 'adaptive':
211 |                     with torch.no_grad():
212 |                         kl = torch.sum(
213 |                             torch.log(sigma_batch / old_sigma_batch + 1.e-5) + (torch.square(old_sigma_batch) + torch.square(old_mu_batch - mu_batch)) / (2.0 * torch.square(sigma_batch)) - 0.5, axis=-1)
214 |                         kl_mean = torch.mean(kl)
215 | 
216 |                         if kl_mean > self.desired_kl * 2.0:
217 |                             self.learning_rate = max(1e-5, self.learning_rate / 1.5)
218 |                         elif kl_mean < self.desired_kl / 2.0 and kl_mean > 0.0:
219 |                             self.learning_rate = min(1e-3, self.learning_rate * 1.5)
220 |                         
221 |                         for param_group in self.optimizer.param_groups:
222 |                             if param_group['name'] == 'actor_critic':
223 |                                 param_group['lr'] = self.learning_rate
224 | 
225 | 
226 |                 # Surrogate loss
227 |                 ratio = torch.exp(actions_log_prob_batch - torch.squeeze(old_actions_log_prob_batch))
228 |                 surrogate = -torch.squeeze(advantages_batch) * ratio
229 |                 surrogate_clipped = -torch.squeeze(advantages_batch) * torch.clamp(ratio, 1.0 - self.clip_param,
230 |                                                                                 1.0 + self.clip_param)
231 |                 surrogate_loss = torch.max(surrogate, surrogate_clipped).mean()
232 | 
233 |                 # Value function loss
234 |                 if self.use_clipped_value_loss:
235 |                     value_clipped = target_values_batch + (value_batch - target_values_batch).clamp(-self.clip_param,
236 |                                                                                                     self.clip_param)
237 |                     value_losses = (value_batch - returns_batch).pow(2)
238 |                     value_losses_clipped = (value_clipped - returns_batch).pow(2)
239 |                     value_loss = torch.max(value_losses, value_losses_clipped).mean()
240 |                 else:
241 |                     value_loss = (returns_batch - value_batch).pow(2).mean()
242 | 
243 |                 loss = surrogate_loss + self.value_loss_coef * 0.5 * value_loss - self.entropy_coef * entropy_batch.mean()
244 | 
245 |                 # Gradient step
246 |                 self.optimizer.zero_grad()
247 |                 loss.backward()
248 |                 nn.utils.clip_grad_norm_(self.actor_critic.parameters(), self.max_grad_norm)
249 |                 self.optimizer.step()
250 | 
251 |                 mean_value_loss += value_loss.item()
252 |                 mean_surrogate_loss += surrogate_loss.item()
253 | 
254 |         num_updates = self.num_learning_epochs * self.num_mini_batches
255 |         mean_value_loss /= num_updates
256 |         mean_surrogate_loss /= num_updates
257 |         self.storage.clear()
258 | 
259 |         return mean_value_loss, mean_surrogate_loss
260 | 


--------------------------------------------------------------------------------
/rl/conf/configs.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved.
  2 | #
  3 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  4 | # of this software and associated documentation files (the "Software"), to deal
  5 | # in the Software without restriction, including without limitation the rights
  6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  7 | # copies of the Software, and to permit persons to whom the Software is
  8 | # furnished to do so, subject to the following conditions:
  9 | #
 10 | # The above copyright notice and this permission notice shall be included in
 11 | # all copies or substantial portions of the Software.
 12 | #
 13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 19 | # THE SOFTWARE
 20 | 
 21 | import os
 22 | 
 23 | 
 24 | class Configs(object):
 25 |     MAX_SCORE = 9999999999
 26 | 
 27 |     # 算法切片的时间间隔, time interval of simulator
 28 |     ALG_RUN_FREQUENCY = 10  # 单位分钟
 29 | 
 30 |     # 多目标权重之间的系数
 31 |     LAMDA = 10000
 32 | 
 33 |     # different pallet types of orders
 34 |     PALLET_TYPE_LABELS = ["PALLET", "HALF_PALLET", "BOX"]
 35 |     LABEL_TO_DEMAND_UNIT = {"PALLET": 1, "HALF_PALLET": 0.5, "BOX": 0.25}
 36 |     STANDARD_PALLET_LABEL = "PALLET"
 37 |     SMALL_PALLET_LABEL = "HALF_PALLET"
 38 |     BOX_LABEL = "BOX"
 39 | 
 40 |     # 订单状态 0: 初始(initialization), 1: 已生成(generated), 2: 进行中(ongoing), 3: 完成(Completed)
 41 |     ORDER_STATUS_TO_CODE = {"INITIALIZATION": 0, "GENERATED": 1, "ONGOING": 2, "COMPLETED": 3}
 42 | 
 43 |     # loading and unloading, 装卸货速度
 44 |     LOAD_SPEED = 0.25  # 大板/min, unit is standard pallet per minute
 45 |     UNLOAD_SPEED = 0.25  # 大板/min, unit is standard pallet per minute
 46 | 
 47 |     # 靠台时间
 48 |     DOCK_APPROACHING_TIME = 30 * 60  # unit: second
 49 | 
 50 |     # 文件路径
 51 |     root_folder_path = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 52 |     benchmark_folder_path = os.path.join(root_folder_path, "benchmark")
 53 |     src_folder_path = os.path.join(root_folder_path, "src")
 54 |     algorithm_folder_path = os.path.join(root_folder_path, "algorithm")
 55 |     output_folder = os.path.join(root_folder_path, "output")
 56 |     if not os.path.exists(output_folder):
 57 |         os.makedirs(output_folder)
 58 | 
 59 |     # route_info_file = "route_info.csv"
 60 |     # factory_info_file = "factory_info.csv"
 61 |     # route_info_file_path = os.path.join(benchmark_folder_path, route_info_file)
 62 |     # factory_info_file_path = os.path.join(benchmark_folder_path, factory_info_file)
 63 | 
 64 |     # algorithm_data_interaction_folder_path = os.path.join(algorithm_folder_path, "data_interaction")
 65 |     # if not os.path.exists(algorithm_data_interaction_folder_path):
 66 |     #     os.makedirs(algorithm_data_interaction_folder_path)
 67 |     # algorithm_vehicle_input_info_path = os.path.join(algorithm_data_interaction_folder_path, "vehicle_info.json")
 68 |     # algorithm_unallocated_order_items_input_path = os.path.join(algorithm_data_interaction_folder_path,
 69 |     #                                                             "unallocated_order_items.json")
 70 |     # algorithm_ongoing_order_items_input_path = os.path.join(algorithm_data_interaction_folder_path,
 71 |     #                                                         "ongoing_order_items.json")
 72 | 
 73 |     # algorithm_output_destination_path = os.path.join(algorithm_data_interaction_folder_path, 'output_destination.json')
 74 |     # algorithm_output_planned_route_path = os.path.join(algorithm_data_interaction_folder_path, 'output_route.json')
 75 | 
 76 |     # 算法入口文件名，不含扩展名
 77 |     ALGORITHM_ENTRY_FILE_NAME = 'main_algorithm'
 78 | 
 79 |     # 算法语言映射表
 80 |     ALGORITHM_LANGUAGE_MAP = {'py': 'python',
 81 |                               'class': 'java',
 82 |                               'exe': 'c',
 83 |                               'out': 'c',
 84 |                               }
 85 | 
 86 |     # 随机种子
 87 |     RANDOM_SEED = 0
 88 | 
 89 |     # 算法运行超时时间
 90 |     MAX_RUNTIME_OF_ALGORITHM = 600
 91 | 
 92 |     # 算法成功标识
 93 |     ALGORITHM_SUCCESS_FLAG = 'SUCCESS'
 94 | 
 95 |     # 日志文件的最大数量
 96 |     MAX_LOG_FILE_NUM = 10
 97 | 
 98 |     # 一天的秒数
 99 |     A_DAY_TIME_SECONDS = 24 * 60 * 60
100 | 
101 |     # 数据集选项，列表为空则选择所有数据集，如[]，[1], [1, 2, 3], [64]
102 |     selected_instances = [1]
103 |     all_test_instances = range(1, 65)
104 | 


--------------------------------------------------------------------------------
/rl/env/__init__.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: BSD-3-Clause
 3 | # 
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions are met:
 6 | #
 7 | # 1. Redistributions of source code must retain the above copyright notice, this
 8 | # list of conditions and the following disclaimer.
 9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | #
29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
30 | 
31 | from .vec_env import VecEnv
32 | from .my_env import VecGibson


--------------------------------------------------------------------------------
/rl/env/my_env.py:
--------------------------------------------------------------------------------
  1 | from collections import defaultdict
  2 | import sys
  3 | sys.path.append('/Extra/lwy/gibson/graduate/')
  4 | 
  5 | from rl.env.vec_env import VecEnv
  6 | import torch
  7 | from typing import Tuple, Union
  8 | 
  9 | import gibson2
 10 | from gibson2.envs.igibson_env import iGibsonEnv
 11 | from gibson2.envs.parallel_env import ParallelNavEnv
 12 | import atexit
 13 | import multiprocessing
 14 | import sys
 15 | import traceback
 16 | import numpy as np
 17 | import os
 18 | from gibson2.utils.utils import parse_config
 19 | import logging
 20 | logging.getLogger().setLevel(logging.WARNING)
 21 | 
 22 | 
 23 | class VecGibson(VecEnv):
 24 |     def __init__(self) -> None:
 25 |         super().__init__()
 26 |         num_envs: int
 27 |         num_obs: int
 28 |         num_privileged_obs: int
 29 |         num_actions: int
 30 |         max_episode_length: int
 31 |         privileged_obs_buf: torch.Tensor
 32 |         obs_buf: torch.Tensor 
 33 |         rew_buf: torch.Tensor
 34 |         reset_buf: torch.Tensor
 35 |         episode_length_buf: torch.Tensor # current episode duration
 36 |         extras: dict
 37 |         device: torch.device
 38 |         self.num_obs = 260
 39 |         self.num_privileged_obs = None
 40 |         self.num_actions = 2
 41 |         self.max_episode_length = 500
 42 | 
 43 | 
 44 |         config_file_name = '/home/lwy/IGibson2021/iGibson/gibson2/examples/configs/locobot_interactive_nav.yaml'
 45 |         env_config = parse_config(config_file_name)
 46 |         self.num_envs = 5
 47 |         GPU_ID = [1,2] * 5
 48 |         self.Env_name = ['Beechwood_1_int','Benevolence_0_int','Ihlen_0_int','Ihlen_1_int','Merom_0_int','Pomaria_0_int','Rs_int','Wainscott_1_int']
 49 |         self.Training_Env = self.Env_name[:5] * 2
 50 |         self.Testing_Env = self.Env_name[-3:]
 51 |         self.core_id = 0
 52 |         class load_env(object):
 53 |             def __init__(self, num_envs, envs, GPU_ID, i) -> None:
 54 |                 self.num_envs = num_envs
 55 |                 self.id = i
 56 |                 self.envs = envs
 57 |                 self.GPU_ID = GPU_ID
 58 |             def __call__(self, *args, **kwds):
 59 |                 logging.warning(self.envs[self.id])
 60 |                 logging.warning(GPU_ID[self.id])
 61 |                 return iGibsonEnv(config_file = env_config,
 62 |                             scene_id = self.envs[self.id],
 63 |                             mode = 'headless',
 64 |                             action_timestep = 1.0 / 10.0,
 65 |                             physics_timestep = 1.0 / 40.0,
 66 |                             device_idx = GPU_ID[self.id],
 67 |                             automatic_reset = True)
 68 |         self.parallel_env = ParallelNavEnv([load_env(self.num_envs, self.Training_Env, GPU_ID, i) for i in range(0, self.num_envs)], blocking=False)      ## env_constructor list contains callable function
 69 |         print(self.Training_Env)
 70 | 
 71 | 
 72 |     def step(self, actions: torch.Tensor) -> Tuple[torch.Tensor, Union[torch.Tensor, None], torch.Tensor, torch.Tensor, dict]:
 73 |         vec_res = self.parallel_env.step(actions)
 74 |         self.obs, rewards, dones, infos = self.process_vec_env(vec_res)
 75 |         return self.obs, None, rewards, dones, infos
 76 | 
 77 | 
 78 |     def reset(self, env_ids = 'all'):
 79 |         '''
 80 |             reset state  
 81 |             obs includes list of dicts (task_obs rgb and depth)
 82 |         '''
 83 |         vec_res = self.parallel_env.reset() 
 84 |         self.obs = [obs for obs in vec_res]
 85 |         return self.obs, None
 86 | 
 87 |     def get_observations(self) -> torch.Tensor:
 88 |         return self.obs
 89 | 
 90 |     def get_privileged_observations(self) -> Union[torch.Tensor, None]:
 91 |         return None
 92 | 
 93 |     def process_vec_env(self, vec_res):
 94 |         '''
 95 |             input: vec_res
 96 |             output: obs, rewards, dones, infos
 97 |         '''
 98 |         obs = []
 99 |         rewards = []
100 |         dones = []
101 |         infos = defaultdict(list)
102 | 
103 |         for res in vec_res:
104 |             state, reward, done, info = res
105 |             # if done:
106 |             #     print('done')
107 |             obs.append(state if not done else info['last_observation'])     ## done 后自动reset丢弃第一帧
108 |             rewards.append(reward)
109 |             dones.append(done)
110 |             # infos.append(info)
111 |             info['time_outs'] = True if done and info['episode_length'] == 500 else False
112 |             for key in info:
113 |                 infos[key].append(info[key])
114 | 
115 |         return obs, torch.tensor(np.array(rewards)), torch.tensor(np.array(dones)), infos
116 | 
117 |     def cal_belief_state(self, state, memory):
118 |         """
119 |         args:
120 |                 state: 单张图片
121 |                 memory
122 |             
123 |                 先计算当前状态的embedding
124 |                 更新 memory
125 |                 再求出当前的belief_state
126 |         return:
127 |                 belief_state
128 |         """
129 |         with torch.no_grad():
130 |             task_obs = state['task_obs'].copy()
131 |             rgb = state['rgb'].copy()
132 |             depth = state['depth'].copy()
133 | 
134 |             ## T D 只加T
135 |             task_obs = torch.FloatTensor(task_obs).unsqueeze(0).cuda()
136 |             rgb = torch.FloatTensor(rgb).unsqueeze(0).cuda()
137 |             depth = torch.FloatTensor(depth).unsqueeze(0).cuda()
138 |             
139 |             encoder_state, memory = self.encoder_net(rgb,depth,task_obs, 0, memory)
140 |             ## cat predicted angle
141 |             # angle = self.decoder_net(encoder_state) * math.pi
142 |             # encoder_state = torch.cat((encoder_state, angle), -1)
143 |             
144 |         return encoder_state.detach().cpu().numpy(), memory        ## 清除计算图
145 | 
146 | if __name__ == "__main__":
147 |     env = VecGibson()
148 |     print('ok')


--------------------------------------------------------------------------------
/rl/env/vec_env.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: BSD-3-Clause
 3 | # 
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions are met:
 6 | #
 7 | # 1. Redistributions of source code must retain the above copyright notice, this
 8 | # list of conditions and the following disclaimer.
 9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | #
29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
30 | 
31 | from abc import ABC, abstractmethod
32 | import torch
33 | from typing import Tuple, Union
34 | 
35 | # minimal interface of the environment
36 | class VecEnv(ABC):
37 |     num_envs: int
38 |     num_obs: int
39 |     num_privileged_obs: int
40 |     num_actions: int
41 |     max_episode_length: int
42 |     privileged_obs_buf: torch.Tensor
43 |     obs_buf: torch.Tensor 
44 |     rew_buf: torch.Tensor
45 |     reset_buf: torch.Tensor
46 |     episode_length_buf: torch.Tensor # current episode duration
47 |     extras: dict
48 |     device: torch.device
49 |     @abstractmethod
50 |     def step(self, actions: torch.Tensor) -> Tuple[torch.Tensor, Union[torch.Tensor, None], torch.Tensor, torch.Tensor, dict]:
51 |         pass
52 |     @abstractmethod
53 |     def reset(self, env_ids: Union[list, torch.Tensor]):
54 |         pass
55 |     @abstractmethod
56 |     def get_observations(self) -> torch.Tensor:
57 |         pass
58 |     @abstractmethod
59 |     def get_privileged_observations(self) -> Union[torch.Tensor, None]:
60 |         pass


--------------------------------------------------------------------------------
/rl/modules/SMT.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn, Tensor
  3 | from torch._C import device
  4 | from torch.nn import functional as F
  5 | from torch.autograd import Variable
  6 | from torch.nn.modules.pooling import MaxPool2d
  7 | import torch.utils.model_zoo as model_zoo
  8 | 
  9 | import torchvision.transforms as transforms
 10 | import numpy as np
 11 | import math
 12 | class PositionalEncoding(nn.Module):
 13 | 
 14 |     def __init__(self, d_model: int, max_len: int = 501):
 15 |         super().__init__()
 16 |         # self.dropout = nn.Dropout(p=dropout)
 17 |         position = torch.arange(max_len).unsqueeze(1)
 18 |         div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
 19 |         pe = torch.zeros(max_len, 1, d_model)
 20 |         pe[:, 0, 0::2] = torch.sin(position * div_term)
 21 |         pe[:, 0, 1::2] = torch.cos(position * div_term)
 22 |         self.register_buffer('pe', pe)
 23 | 
 24 |     def forward(self, x: Tensor) -> Tensor:
 25 |         """
 26 |         Args:
 27 |             x: Tensor, shape [seq_len, batch_size, embedding_dim]
 28 |         """
 29 |         x = x + self.pe[:x.size(0)]
 30 |         return x
 31 | 
 32 | class AttBlock(nn.Module):
 33 |     def __init__(self, d_model, nhead: int = 4):
 34 |         super(AttBlock, self).__init__()
 35 |         self.multi_att = nn.MultiheadAttention(d_model, nhead)
 36 |         self.norm1 = nn.LayerNorm(d_model)
 37 |         # self.norm2 = nn.LayerNorm(d_model)
 38 |         self.linear = nn.Linear(d_model, d_model)
 39 | 
 40 | 
 41 |     def forward(self, X, Y, attn_mask, key_padding_mask):
 42 |         '''
 43 |         X:      query (L, N, E)
 44 |         Y:      key value (S, N, E)
 45 |         atten_mask: (L, S)
 46 |         key_padding_mask: `(N, S)`
 47 |         output: (L, N, E)
 48 |         '''
 49 |         # H = self.norm1(self.multi_att(X, Y, Y, attn_mask = mask)[0] + X)
 50 |         # return self.norm2(torch.relu(self.linear(H)) + H)
 51 | 
 52 |         ## Tr_I wrong
 53 |         # H = self.norm1(self.multi_att(X, Y, Y, attn_mask = mask)[0]) + X
 54 |         # return self.norm2(torch.relu(self.linear(H))) + H
 55 | 
 56 |         ## Tr_I_fix
 57 |         H = torch.relu(self.multi_att(X, Y, Y, key_padding_mask = key_padding_mask, attn_mask = attn_mask)[0]) + X
 58 |         return torch.relu(self.linear(self.norm1(H))) + H
 59 | 
 60 | 
 61 | class SMT_state_encoder(nn.Module):
 62 |     def __init__(self, d_model, nhead: int = 4):
 63 |         super(SMT_state_encoder, self).__init__()
 64 |         self.encoder = AttBlock(d_model, nhead)  ##0处是值，1是权重
 65 |         self.decoder = AttBlock(d_model, nhead)
 66 |         self.pos_encoder1 = PositionalEncoding(d_model)
 67 |         self.pos_encoder2 = PositionalEncoding(d_model)
 68 | 
 69 |     def forward(self, o, M, flag, key_padding_mask):
 70 | 
 71 |         c_mask  = self.casual_mask(M)
 72 |         M  = self.encoder(M, M, c_mask, key_padding_mask)
 73 |         if flag == 1: ## training
 74 |             attn_mask = self.sequence_length_mask(M, 32)   ## T * T 
 75 |         else:         ## inference
 76 |             attn_mask = self.infer_mask(o, M, 32)
 77 |         return self.decoder(o, M, attn_mask, key_padding_mask)
 78 | 
 79 |     def casual_mask(self, seq):
 80 |         seq_len, batch_size, _ = seq.size()
 81 |         mask = torch.triu(torch.ones((seq_len, seq_len), dtype=torch.uint8, device = 'cuda'),
 82 |                         diagonal=1)  ## 1 为mask
 83 |         # mask = mask.unsqueeze(0).expand(batch_size, -1, -1)  # [B, L, L]
 84 |         return mask.to(bool)
 85 | 
 86 |     def infer_mask(self, o, M, length):
 87 |         '''
 88 |         o_len * M_len
 89 |         '''
 90 |         o_len, batch_size, _ = o.size()
 91 |         M_len, batch_size, _ = M.size()
 92 | 
 93 |         mask = torch.ones((o_len, M_len), dtype=torch.uint8, device = 'cuda')
 94 |         mask[:, max(M_len - length, 0) : M_len] = 0 ## 1是mask
 95 |         return mask.to(bool)
 96 | 
 97 |     def sequence_length_mask(self, seq, length):
 98 |         seq_len, batch_size, _ = seq.size()
 99 |         casual_mask = torch.triu(torch.ones((seq_len, seq_len), dtype=torch.uint8, device = 'cuda'),
100 |                         diagonal=1)  ## 1 为mask casual_mask
101 | 
102 |         len_mask = 1 - torch.triu(torch.ones((seq_len, seq_len), dtype=torch.uint8, device = 'cuda'),
103 |                         diagonal= -(length - 1))    # 输入矩阵保留主对角线与主对角线以上与主对角线下方h行对角线的元素
104 |         mask = casual_mask + len_mask
105 |         return mask.to(bool).cuda()
106 | ## pytorch N C HW
107 | ## tensorflow NHW C
108 | if __name__ == '__main__':
109 |     # encoder = Encoder(feature_dim = 252)
110 |     # rgb = Variable(torch.randn(1, 3, 180, 320))
111 |     # depth = Variable(torch.randn(1, 1, 180, 320))
112 |     o_obs = Variable(torch.randn(1, 1, 4))
113 |     M_obs = Variable(torch.randn(33, 1, 4))
114 | 
115 |     # print(encoder(rgb,depth,task_obs).size())
116 |     # print(x)
117 | 
118 |     encoder = SMT_state_encoder(512, 4)
119 |     print(encoder.infer_mask(o_obs, M_obs, 32))
120 | 
121 | 
122 | 
123 | 
124 | 
125 | 


--------------------------------------------------------------------------------
/rl/modules/__init__.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: BSD-3-Clause
 3 | # 
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions are met:
 6 | #
 7 | # 1. Redistributions of source code must retain the above copyright notice, this
 8 | # list of conditions and the following disclaimer.
 9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | #
29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
30 | 
31 | from .actor_critic import ActorCritic
32 | from .actor_critic_recurrent import ActorCriticRecurrent


--------------------------------------------------------------------------------
/rl/modules/actor_critic.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-License-Identifier: BSD-3-Clause
  3 | # 
  4 | # Redistribution and use in source and binary forms, with or without
  5 | # modification, are permitted provided that the following conditions are met:
  6 | #
  7 | # 1. Redistributions of source code must retain the above copyright notice, this
  8 | # list of conditions and the following disclaimer.
  9 | #
 10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 11 | # this list of conditions and the following disclaimer in the documentation
 12 | # and/or other materials provided with the distribution.
 13 | #
 14 | # 3. Neither the name of the copyright holder nor the names of its
 15 | # contributors may be used to endorse or promote products derived from
 16 | # this software without specific prior written permission.
 17 | #
 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28 | #
 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
 30 | 
 31 | import numpy as np
 32 | 
 33 | import torch
 34 | import torch.nn as nn
 35 | from torch.distributions import Normal
 36 | from torch.nn.modules import rnn
 37 | 
 38 | class ActorCritic(nn.Module):
 39 |     is_recurrent = False
 40 |     def __init__(self,  num_actor_obs,
 41 |                         num_critic_obs,
 42 |                         num_actions,
 43 |                         actor_hidden_dims=[256, 256, 256],
 44 |                         critic_hidden_dims=[256, 256, 256],
 45 |                         activation='elu',
 46 |                         init_noise_std=1.0,
 47 |                         **kwargs):
 48 |         if kwargs:
 49 |             print("ActorCritic.__init__ got unexpected arguments, which will be ignored: " + str([key for key in kwargs.keys()]))
 50 |         super(ActorCritic, self).__init__()
 51 | 
 52 |         activation = get_activation(activation)
 53 | 
 54 |         mlp_input_dim_a = num_actor_obs
 55 |         mlp_input_dim_c = num_critic_obs
 56 | 
 57 |         # Policy
 58 |         actor_layers = []
 59 |         actor_layers.append(nn.Linear(mlp_input_dim_a, actor_hidden_dims[0]))
 60 |         actor_layers.append(activation)
 61 |         for l in range(len(actor_hidden_dims) - 1):
 62 |             # if l == len(actor_hidden_dims) - 1:
 63 |             #     actor_layers.append(nn.Linear(actor_hidden_dims[l], num_actions))
 64 |             # else:
 65 |             actor_layers.append(nn.Linear(actor_hidden_dims[l], actor_hidden_dims[l + 1]))
 66 |             actor_layers.append(activation)
 67 |         self.policy_embedding = nn.Sequential(*actor_layers)
 68 | 
 69 |         self.mean_linear = nn.Linear(actor_hidden_dims[-1], num_actions)
 70 |         # self.log_std_linear = nn.Linear(actor_hidden_dims[-1], num_actions)
 71 | 
 72 |         # Value function
 73 |         critic_layers = []
 74 |         critic_layers.append(nn.Linear(mlp_input_dim_c, critic_hidden_dims[0]))
 75 |         critic_layers.append(activation)
 76 |         for l in range(len(critic_hidden_dims)):
 77 |             if l == len(critic_hidden_dims) - 1:
 78 |                 critic_layers.append(nn.Linear(critic_hidden_dims[l], 1))
 79 |             else:
 80 |                 critic_layers.append(nn.Linear(critic_hidden_dims[l], critic_hidden_dims[l + 1]))
 81 |                 critic_layers.append(activation)
 82 |         self.critic = nn.Sequential(*critic_layers)
 83 | 
 84 |         # print(f"Actor MLP: {self.actor}")
 85 |         # print(f"Critic MLP: {self.critic}")
 86 | 
 87 |         # Action noise  0.5噪声初始化
 88 |         self.actor_logstd  = nn.Parameter(init_noise_std * torch.ones(num_actions) - 0.69)
 89 |         self.std = torch.exp(self.actor_logstd)
 90 |         self.distribution = None
 91 |         # disable args validation for speedup
 92 |         Normal.set_default_validate_args = False
 93 |         
 94 |         # seems that we get better performance without init
 95 |         # self.init_memory_weights(self.memory_a, 0.001, 0.)
 96 |         # self.init_memory_weights(self.memory_c, 0.001, 0.)
 97 | 
 98 |     @staticmethod
 99 |     # not used at the moment
100 |     def init_weights(sequential, scales):
101 |         [torch.nn.init.orthogonal_(module.weight, gain=scales[idx]) for idx, module in
102 |          enumerate(mod for mod in sequential if isinstance(mod, nn.Linear))]
103 | 
104 | 
105 |     def reset(self, dones=None):
106 |         pass
107 | 
108 |     def forward(self):
109 |         raise NotImplementedError
110 |     
111 |     @property
112 |     def action_mean(self):
113 |         return self.distribution.mean
114 | 
115 |     @property
116 |     def action_std(self):
117 |         return self.distribution.stddev
118 |     
119 |     @property
120 |     def entropy(self):
121 |         return self.distribution.entropy().sum(dim=-1)
122 | 
123 |     def update_distribution(self, observations):
124 |         # mean = self.actor(observations)
125 |         embedding = self.policy_embedding(observations)
126 |         mean = self.mean_linear(embedding)
127 |         # self.std = torch.exp(self.actor_logstd)
128 |         # log_std = self.log_std_linear(embedding)
129 |         # log_std = torch.clamp(self.actor_logstd, -20, 0)           ## 初始值为0附近，logstd初始值应该在-0.5附近， 标准差最小为0，最大为0.5即可
130 |         self.std = self.actor_logstd.exp()
131 |         self.distribution = Normal(mean, mean*0. + self.std)
132 | 
133 |     def act(self, observations, **kwargs):
134 |         self.update_distribution(observations)
135 |         return self.distribution.sample()
136 |     
137 |     def get_actions_log_prob(self, actions):
138 |         try:
139 |             return self.distribution.log_prob(actions).sum(dim=-1)
140 |         except:
141 |             print(actions.size())
142 |             print(self.distribution)
143 | 
144 |     def act_inference(self, observations):
145 |         # actions_mean = self.actor(observations)
146 |         embedding = self.policy_embedding(observations)
147 |         actions_mean = self.mean_linear(embedding)
148 |         return actions_mean
149 | 
150 |     def evaluate(self, critic_observations, **kwargs):
151 |         value = self.critic(critic_observations)
152 |         return value
153 | 
154 | def get_activation(act_name):
155 |     if act_name == "elu":
156 |         return nn.ELU()
157 |     elif act_name == "selu":
158 |         return nn.SELU()
159 |     elif act_name == "relu":
160 |         return nn.ReLU()
161 |     elif act_name == "crelu":
162 |         return nn.ReLU()
163 |     elif act_name == "lrelu":
164 |         return nn.LeakyReLU()
165 |     elif act_name == "tanh":
166 |         return nn.Tanh()
167 |     elif act_name == "sigmoid":
168 |         return nn.Sigmoid()
169 |     else:
170 |         print("invalid activation function!")
171 |         return None
172 | 


--------------------------------------------------------------------------------
/rl/modules/actor_critic_recurrent.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-License-Identifier: BSD-3-Clause
  3 | # 
  4 | # Redistribution and use in source and binary forms, with or without
  5 | # modification, are permitted provided that the following conditions are met:
  6 | #
  7 | # 1. Redistributions of source code must retain the above copyright notice, this
  8 | # list of conditions and the following disclaimer.
  9 | #
 10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 11 | # this list of conditions and the following disclaimer in the documentation
 12 | # and/or other materials provided with the distribution.
 13 | #
 14 | # 3. Neither the name of the copyright holder nor the names of its
 15 | # contributors may be used to endorse or promote products derived from
 16 | # this software without specific prior written permission.
 17 | #
 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28 | #
 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
 30 | 
 31 | import numpy as np
 32 | 
 33 | import torch
 34 | import torch.nn as nn
 35 | from torch.distributions import Normal
 36 | from torch.nn.modules import rnn
 37 | from .actor_critic import ActorCritic, get_activation
 38 | from rl.utils import unpad_trajectories
 39 | 
 40 | class ActorCriticRecurrent(ActorCritic):
 41 |     is_recurrent = True
 42 |     def __init__(self,  num_actor_obs,
 43 |                         num_critic_obs,
 44 |                         num_actions,
 45 |                         actor_hidden_dims=[256, 256, 256],
 46 |                         critic_hidden_dims=[256, 256, 256],
 47 |                         activation='elu',
 48 |                         rnn_type='lstm',
 49 |                         rnn_hidden_size=256,
 50 |                         rnn_num_layers=1,
 51 |                         init_noise_std=1.0,
 52 |                         **kwargs):
 53 |         if kwargs:
 54 |             print("ActorCriticRecurrent.__init__ got unexpected arguments, which will be ignored: " + str(kwargs.keys()),)
 55 | 
 56 |         super().__init__(num_actor_obs=rnn_hidden_size,
 57 |                          num_critic_obs=rnn_hidden_size,
 58 |                          num_actions=num_actions,
 59 |                          actor_hidden_dims=actor_hidden_dims,
 60 |                          critic_hidden_dims=critic_hidden_dims,
 61 |                          activation=activation,
 62 |                          init_noise_std=init_noise_std)
 63 | 
 64 |         activation = get_activation(activation)
 65 | 
 66 |         self.memory_a = Memory(num_actor_obs, type=rnn_type, num_layers=rnn_num_layers, hidden_size=rnn_hidden_size)
 67 |         self.memory_c = Memory(num_critic_obs, type=rnn_type, num_layers=rnn_num_layers, hidden_size=rnn_hidden_size)
 68 | 
 69 |         print(f"Actor RNN: {self.memory_a}")
 70 |         print(f"Critic RNN: {self.memory_c}")
 71 | 
 72 |     def reset(self, dones=None):
 73 |         self.memory_a.reset(dones)
 74 |         self.memory_c.reset(dones)
 75 | 
 76 |     def act(self, observations, masks=None, hidden_states=None):
 77 |         input_a = self.memory_a(observations, masks, hidden_states)
 78 |         return super().act(input_a.squeeze(0))
 79 | 
 80 |     def act_inference(self, observations):
 81 |         input_a = self.memory_a(observations)
 82 |         return super().act_inference(input_a.squeeze(0))
 83 | 
 84 |     def evaluate(self, critic_observations, masks=None, hidden_states=None):
 85 |         input_c = self.memory_c(critic_observations, masks, hidden_states)
 86 |         return super().evaluate(input_c.squeeze(0))
 87 |     
 88 |     def get_hidden_states(self):
 89 |         return self.memory_a.hidden_states, self.memory_c.hidden_states
 90 | 
 91 | 
 92 | class Memory(torch.nn.Module):
 93 |     def __init__(self, input_size, type='lstm', num_layers=1, hidden_size=256):
 94 |         super().__init__()
 95 |         # RNN
 96 |         rnn_cls = nn.GRU if type.lower() == 'gru' else nn.LSTM
 97 |         self.rnn = rnn_cls(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers)
 98 |         self.hidden_states = None
 99 |     
100 |     def forward(self, input, masks=None, hidden_states=None):
101 |         batch_mode = masks is not None
102 |         if batch_mode:
103 |             # batch mode (policy update): need saved hidden states
104 |             if hidden_states is None:
105 |                 raise ValueError("Hidden states not passed to memory module during policy update")
106 |             out, _ = self.rnn(input, hidden_states)
107 |             out = unpad_trajectories(out, masks)
108 |         else:
109 |             # inference mode (collection): use hidden states of last step
110 |             out, self.hidden_states = self.rnn(input.unsqueeze(0), self.hidden_states)
111 |         return out
112 | 
113 |     def reset(self, dones=None):
114 |         # When the RNN is an LSTM, self.hidden_states_a is a list with hidden_state and cell_state
115 |         for hidden_state in self.hidden_states:
116 |             hidden_state[..., dones, :] = 0.0


--------------------------------------------------------------------------------
/rl/modules/encoder.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import device, nn
  3 | from torch.nn import functional as F
  4 | from torch.autograd import Variable
  5 | from torch.nn.modules.pooling import MaxPool2d
  6 | import torch.utils.model_zoo as model_zoo
  7 | import torchvision
  8 | # from resnet import ResNet50
  9 | # from model import Depth_encoding_Net
 10 | import kornia
 11 | import torchvision.transforms as transforms
 12 | import numpy as np
 13 | 
 14 | from rl.modules.SMT import SMT_state_encoder
 15 | 
 16 | # 图像增强
 17 | aug_trans = nn.Sequential(
 18 |     nn.MaxPool2d(kernel_size=2),
 19 |     nn.ReplicationPad2d(8),
 20 |     kornia.augmentation.RandomCrop((90,160))    ## 有点慢 drqv2
 21 | )
 22 | 
 23 | # rgb2gray = kornia.color.RgbToGrayscale()        ## (N,3,H,W) -> (N,1,H,W)
 24 | 
 25 | def weights_init_(m):
 26 |     """Custom weight init for Conv2D and Linear layers."""
 27 |     if isinstance(m, nn.Linear):
 28 |         nn.init.orthogonal_(m.weight.data)
 29 |         if hasattr(m.bias, 'data'):  
 30 |             m.bias.data.fill_(0.0)
 31 |     elif isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
 32 |         gain = nn.init.calculate_gain('relu')
 33 |         nn.init.orthogonal_(m.weight.data, gain)
 34 |         if hasattr(m.bias, 'data'):
 35 |             m.bias.data.fill_(0.0)
 36 | 
 37 | class Encoder(nn.Module):
 38 |     """Convolutional encoder for image-based observations."""
 39 |     def __init__(self, feature_dim):
 40 |         super(Encoder, self).__init__()
 41 |         # assert len(obs_shape) == 3
 42 |         self.num_layers = 6
 43 |         self.num_filters = 32
 44 |         self.output_dim = 35
 45 |         self.output_logits = False
 46 |         self.feature_dim = feature_dim
 47 | 
 48 |         self.convs_rgb = nn.ModuleList([
 49 |             nn.Conv2d(3, self.num_filters, 3, stride=2),                    ## rgb 
 50 |             nn.Conv2d(self.num_filters, self.num_filters, 3, stride=2),
 51 |             nn.Conv2d(self.num_filters, self.num_filters, 3, stride=1),
 52 |             nn.Conv2d(self.num_filters, self.num_filters, 3, stride=1),
 53 |             nn.Conv2d(self.num_filters, self.num_filters, 3, stride=1),
 54 |             nn.Conv2d(self.num_filters, self.num_filters, 3, stride=1)
 55 |         ])
 56 | 
 57 |         self.convs_Depth = nn.ModuleList([
 58 |             nn.Conv2d(1, self.num_filters, 3, stride=2),                    ## d
 59 |             nn.Conv2d(self.num_filters, self.num_filters, 3, stride=2),
 60 |             nn.Conv2d(self.num_filters, self.num_filters, 3, stride=1),
 61 |             nn.Conv2d(self.num_filters, self.num_filters, 3, stride=1),
 62 |             nn.Conv2d(self.num_filters, self.num_filters, 3, stride=1),
 63 |             nn.Conv2d(self.num_filters, self.num_filters, 3, stride=1)
 64 |         ])
 65 | 
 66 |         self.head = nn.Sequential(
 67 |             nn.Linear(25792, self.feature_dim),
 68 |             nn.LayerNorm(self.feature_dim))
 69 | 
 70 |         self.outputs = dict()
 71 | 
 72 |         # self.apply(weights_init_)       ## 初始化
 73 | 
 74 | 
 75 |     def forward_conv_rgb(self, obs):
 76 |         self.outputs['rgb'] = obs
 77 | 
 78 |         conv = torch.relu(self.convs_rgb[0](obs))
 79 |         self.outputs['conv1_g'] = conv
 80 | 
 81 |         for i in range(1, self.num_layers):
 82 |             conv = torch.relu(self.convs_rgb[i](conv))
 83 | 
 84 |             self.outputs['conv_g%s' % (i + 1)] = conv
 85 | 
 86 |         h = conv.reshape(conv.size(0), -1)
 87 |         return h
 88 | 
 89 |     def forward_conv_depth(self, obs):
 90 |         self.outputs['depth'] = obs
 91 | 
 92 |         conv = torch.relu(self.convs_Depth[0](obs))
 93 |         self.outputs['conv1_d'] = conv
 94 | 
 95 |         for i in range(1, self.num_layers):
 96 |             conv = torch.relu(self.convs_Depth[i](conv))
 97 |             self.outputs['conv_d%s' % (i + 1)] = conv
 98 | 
 99 |         h = conv.reshape(conv.size(0), -1)
100 |         return h
101 | 
102 |     def forward(self, rgb, depth, task_obs):
103 | 
104 |         ## 训练时执行图像增强
105 |         rgb = aug_trans(rgb)
106 |         depth = aug_trans(depth)
107 |         h1 = self.forward_conv_rgb(rgb)
108 |         h2 = self.forward_conv_depth(depth)
109 | 
110 |         h = torch.cat((h1,h2),1)
111 |  
112 |         out = self.head(h)
113 |         if not self.output_logits:
114 |             out = torch.tanh(out)
115 | 
116 |         out = torch.cat((out, task_obs),axis = 1)
117 |         self.outputs['out'] = out
118 | 
119 |         return out
120 | 
121 | 
122 | class Trans_Encoder(nn.Module):
123 |     """CNN encoder followed by a SMT encoder"""
124 |     def __init__(self):
125 |         super(Trans_Encoder, self).__init__()
126 |         self.encoder = Encoder(feature_dim=252)
127 |         self.smt_encoder = SMT_state_encoder(d_model=256, nhead=4)
128 | 
129 |     def forward(self, rgb, depth, task_obs, key_padding_mask):
130 |         '''
131 |         input: 
132 |                 embeddings: from 0 to t
133 |                 observations: from 0 to t
134 |                 batch = 1
135 |         considering update
136 |         output:
137 |                 T D
138 |         '''
139 |         # T N H W C -> T N C H W    64*2*D    TND  之前batch就是seqence，现在包含多个环境的信息，需要flatten 成 TN-》N
140 |         seq_l, batch_size = task_obs.size(0), task_obs.size(1)
141 |         rgb = rgb.flatten(0,1).permute(0, 3, 1, 2)
142 |         depth = depth.flatten(0,1).permute(0, 3, 1, 2)
143 |         task_obs = task_obs.flatten(0,1)
144 |         ## T N D
145 |         embeddings = self.encoder(rgb, depth, task_obs).reshape(seq_l, batch_size, -1)
146 | 
147 |         ## T N D
148 |         ## 训练，src 的memory和tgt相同都是embedding
149 |         out = self.smt_encoder(o = embeddings, M = embeddings, flag = 1, key_padding_mask = key_padding_mask)
150 | 
151 |         # T N D
152 |         out = torch.cat((out, task_obs.reshape(seq_l, batch_size, -1)), -1)
153 | 
154 |         return out
155 |         
156 |     def inference_forward(self, rgb, depth, task_obs, memory, key_padding_mask):
157 |         # N H W C -> N C H W
158 |         rgb = rgb.permute(0, 3, 1, 2)
159 |         depth = depth.permute(0, 3, 1, 2)
160 | 
161 |         ## T N D
162 |         embeddings = self.encoder(rgb, depth, task_obs).unsqueeze(0)
163 | 
164 |         # 探索，src 变成memory，tgt是当前推理出来的embedding
165 |         if len(memory) < 32:    ## 窗口加快训练速度和收敛速度
166 |             memory = torch.cat((memory, embeddings), 0)
167 |             key_padding_mask = None
168 |         else: 
169 |             memory = torch.cat((memory[-31:], embeddings), 0) 
170 |             # key_padding_mask = torch.cat((key_padding_mask[:,-31:], torch.zeros(2,1,device='cuda')), 1).to(bool)   ## N * S
171 |         out = self.smt_encoder(o = embeddings, M = memory, flag = 0, key_padding_mask = key_padding_mask)
172 | 
173 |         # T D
174 |         out = torch.cat((out.squeeze(0), task_obs), -1)
175 | 
176 |         return out, memory
177 | 
178 | 
179 | ## pytorch N C HW
180 | ## tensorflow NHW C
181 | if __name__ == '__main__':
182 |     # encoder = Encoder(feature_dim = 252)
183 |     # rgb = Variable(torch.randn(1, 3, 180, 320))
184 |     # depth = Variable(torch.randn(1, 1, 180, 320))
185 |     # task_obs = Variable(torch.randn(1, 4))
186 |     # print(encoder(rgb,depth,task_obs).size())
187 |     # print(x)
188 | 
189 |     encoder = Trans_Encoder(feature_dim = 252, state_dim=256)
190 |     rgb = Variable(torch.randn(1, 500, 180, 320, 3))
191 |     depth = Variable(torch.randn(1, 500, 180, 320, 1))
192 |     task_obs = Variable(torch.randn(1, 500, 4))
193 |     print(encoder(rgb,depth,task_obs,1,0)[0].size())
194 |     # print(encoder.self_attn(task_obs,task_obs,task_obs).size())
195 | 
196 | 
197 | 


--------------------------------------------------------------------------------
/rl/runners/__init__.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: BSD-3-Clause
 3 | # 
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions are met:
 6 | #
 7 | # 1. Redistributions of source code must retain the above copyright notice, this
 8 | # list of conditions and the following disclaimer.
 9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | #
29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
30 | 
31 | from .on_policy_runner import OnPolicyRunner


--------------------------------------------------------------------------------
/rl/runners/on_policy_runner.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-License-Identifier: BSD-3-Clause
  3 | # 
  4 | # Redistribution and use in source and binary forms, with or without
  5 | # modification, are permitted provided that the following conditions are met:
  6 | #
  7 | # 1. Redistributions of source code must retain the above copyright notice, this
  8 | # list of conditions and the following disclaimer.
  9 | #
 10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 11 | # this list of conditions and the following disclaimer in the documentation
 12 | # and/or other materials provided with the distribution.
 13 | #
 14 | # 3. Neither the name of the copyright holder nor the names of its
 15 | # contributors may be used to endorse or promote products derived from
 16 | # this software without specific prior written permission.
 17 | #
 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28 | #
 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
 30 | 
 31 | import time
 32 | import os
 33 | from collections import deque
 34 | import statistics
 35 | 
 36 | from torch.utils.tensorboard import SummaryWriter
 37 | import torch
 38 | 
 39 | from rl.algorithms import PPO
 40 | from rl.modules import ActorCritic, ActorCriticRecurrent
 41 | from rl.env import VecEnv
 42 | 
 43 | import numpy as np
 44 | class OnPolicyRunner:
 45 | 
 46 |     def __init__(self,
 47 |                  env: VecEnv,
 48 |                  train_cfg,
 49 |                  log_dir=None,
 50 |                  device='cpu'):
 51 | 
 52 |         self.cfg=train_cfg["runner"]
 53 |         self.alg_cfg = train_cfg["algorithm"]
 54 |         self.policy_cfg = train_cfg["policy"]
 55 |         self.device = device
 56 |         self.env = env
 57 |         if self.env.num_privileged_obs is not None:
 58 |             num_critic_obs = self.env.num_privileged_obs 
 59 |         else:
 60 |             num_critic_obs = self.env.num_obs
 61 |         actor_critic_class = eval(self.cfg["policy_class_name"]) # ActorCritic
 62 |         actor_critic: ActorCritic = actor_critic_class( self.env.num_obs,
 63 |                                                         num_critic_obs,
 64 |                                                         self.env.num_actions,
 65 |                                                         **self.policy_cfg).to(self.device)
 66 |         alg_class = eval(self.cfg["algorithm_class_name"]) # PPO
 67 |         self.alg: PPO = alg_class(actor_critic, device=self.device, **self.alg_cfg)
 68 |         self.num_steps_per_env = self.cfg["num_steps_per_env"]
 69 |         self.save_interval = self.cfg["save_interval"]
 70 | 
 71 |         # init storage and model
 72 |         self.alg.init_storage(self.env.num_envs, self.num_steps_per_env, [self.env.num_obs], [self.env.num_privileged_obs], [self.env.num_actions])
 73 | 
 74 |         # Log
 75 |         self.log_dir = log_dir
 76 |         self.writer = None
 77 |         self.tot_timesteps = 0
 78 |         self.tot_time = 0
 79 |         self.current_learning_iteration = 0
 80 | 
 81 |         _, _ = self.env.reset()
 82 |     
 83 |     def learn(self, num_learning_iterations, init_at_random_ep_len=False):
 84 |         # initialize writer
 85 |         if self.log_dir is not None and self.writer is None:
 86 |             self.writer = SummaryWriter(log_dir=self.log_dir, flush_secs=10)
 87 |         if init_at_random_ep_len:
 88 |             self.env.episode_length_buf = torch.randint_like(self.env.episode_length_buf, high=int(self.env.max_episode_length))
 89 |         obs = self.env.get_observations()
 90 |         privileged_obs = self.env.get_privileged_observations()
 91 |         critic_obs = privileged_obs if privileged_obs is not None else obs
 92 |         # obs, critic_obs = obs.to(self.device), critic_obs.to(self.device)
 93 |         self.alg.actor_critic.train() # switch to train mode (for dropout for example)
 94 | 
 95 |         ep_infos = []
 96 |         rewbuffer = deque(maxlen=100)
 97 |         lenbuffer = deque(maxlen=100)
 98 |         eps_stepbuffer = [deque(maxlen=100) for _ in range(self.env.num_envs)]
 99 |         eps_splbuffer = [deque(maxlen=100) for _ in range(self.env.num_envs)]
100 |         eps_effortbuffer = [deque(maxlen=100) for _ in range(self.env.num_envs)]
101 |         eps_insbuffer = [deque(maxlen=100) for _ in range(self.env.num_envs)]
102 |         cur_reward_sum = torch.zeros(self.env.num_envs, dtype=torch.float, device=self.device)
103 |         cur_episode_length = torch.zeros(self.env.num_envs, dtype=torch.float, device=self.device)
104 | 
105 |         tot_iter = int(self.current_learning_iteration + num_learning_iterations)
106 |         for it in range(self.current_learning_iteration, tot_iter):
107 |             start = time.time()
108 |             # Rollout
109 |             with torch.no_grad():
110 |                 for i in range(self.num_steps_per_env):
111 |                     # st = time.time()
112 |                     actions = self.alg.act(obs, critic_obs)
113 |                     # print(time.time() - st)
114 |                     obs, privileged_obs, rewards, dones, infos = self.env.step(actions)
115 |                     # print(time.time() - st)
116 |                     critic_obs = privileged_obs if privileged_obs is not None else obs
117 |                     rewards, dones = rewards.to(self.device), dones.to(self.device)
118 |                     self.alg.process_env_step(rewards, dones, infos)
119 |                     # print(time.time() - st)
120 | 
121 |                     if self.log_dir is not None:
122 |                         # Book keeping
123 |                         if 'episode' in infos:
124 |                             ep_infos.append(infos['episode'])
125 |                         cur_reward_sum += rewards
126 |                         cur_episode_length += 1
127 | 
128 |                         new_ids = (dones > 0).nonzero(as_tuple=False)
129 | 
130 |                         rewbuffer.extend(cur_reward_sum[new_ids][:, 0].cpu().numpy().tolist())
131 |                         lenbuffer.extend(cur_episode_length[new_ids][:, 0].cpu().numpy().tolist())
132 |                         for id in new_ids:
133 |                             eps_stepbuffer[id].extend(torch.tensor(infos['episode_length'])[id].numpy().tolist())
134 |                             eps_splbuffer[id].extend(torch.tensor(infos['spl'])[id].numpy().tolist())
135 |                             eps_effortbuffer[id].extend(torch.tensor(infos['effort_efficiency'])[id].numpy().tolist())
136 |                             eps_insbuffer[id].extend(torch.tensor(infos['ins'])[id].numpy().tolist())
137 | 
138 |                         cur_reward_sum[new_ids] = 0
139 |                         cur_episode_length[new_ids] = 0
140 | 
141 |                 stop = time.time()
142 |                 collection_time = stop - start
143 | 
144 |                 # Learning step
145 |                 start = stop
146 |                 self.alg.compute_returns(critic_obs)
147 |             
148 |             mean_value_loss, mean_surrogate_loss = self.alg.update()
149 |             stop = time.time()
150 |             learn_time = stop - start
151 |             if self.log_dir is not None:
152 |                 self.log(locals())
153 |             if it % self.save_interval == 0:
154 |                 self.save(os.path.join(self.log_dir, 'model_{}.pt'.format(it)))
155 |             ep_infos.clear()
156 |         
157 |         self.current_learning_iteration += num_learning_iterations
158 |         self.save(os.path.join(self.log_dir, 'model_{}.pt'.format(self.current_learning_iteration)))
159 | 
160 |     def log(self, locs, width=80, pad=35):
161 |         self.tot_timesteps += self.num_steps_per_env * self.env.num_envs
162 |         self.tot_time += locs['collection_time'] + locs['learn_time']
163 |         iteration_time = locs['collection_time'] + locs['learn_time']
164 | 
165 |         ep_string = f''
166 |         if locs['ep_infos']:
167 |             for key in locs['ep_infos'][0]:
168 |                 infotensor = torch.tensor([], device=self.device)
169 |                 for ep_info in locs['ep_infos']:
170 |                     # handle scalar and zero dimensional tensor infos
171 |                     if not isinstance(ep_info[key], torch.Tensor):
172 |                         ep_info[key] = torch.Tensor([ep_info[key]])
173 |                     if len(ep_info[key].shape) == 0:
174 |                         ep_info[key] = ep_info[key].unsqueeze(0)
175 |                     infotensor = torch.cat((infotensor, ep_info[key].to(self.device)))
176 |                 value = torch.mean(infotensor)
177 |                 self.writer.add_scalar('Episode/' + key, value, locs['it'])
178 |                 ep_string += f"""{f'Mean episode {key}:':>{pad}} {value:.4f}\n"""
179 |         mean_std = self.alg.actor_critic.std.mean()
180 |         fps = int(self.num_steps_per_env * self.env.num_envs / (locs['collection_time'] + locs['learn_time']))
181 | 
182 |         self.writer.add_scalar('Loss/value_function', locs['mean_value_loss'], locs['it'])
183 |         self.writer.add_scalar('Loss/surrogate', locs['mean_surrogate_loss'], locs['it'])
184 |         self.writer.add_scalar('Loss/learning_rate', self.alg.learning_rate, locs['it'])
185 |         self.writer.add_scalar('Policy/mean_noise_std', mean_std.item(), locs['it'])
186 |         self.writer.add_scalar('Perf/total_fps', fps, locs['it'])
187 |         self.writer.add_scalar('Perf/collection time', locs['collection_time'], locs['it'])
188 |         self.writer.add_scalar('Perf/learning_time', locs['learn_time'], locs['it'])
189 |         if len(locs['rewbuffer']) > 0:  ## 分环境记录
190 |             self.writer.add_scalar('Train/mean_reward', statistics.mean(locs['rewbuffer']), locs['it'])
191 |             self.writer.add_scalar('Train/mean_episode_length', statistics.mean(locs['lenbuffer']), locs['it'])
192 |             self.writer.add_scalar('Train/mean_reward/time', statistics.mean(locs['rewbuffer']), self.tot_time)
193 |             self.writer.add_scalar('Train/mean_episode_length/time', statistics.mean(locs['lenbuffer']), self.tot_time)
194 |             for i, env in enumerate(self.env.Training_Env[:5]):
195 |                 if locs['eps_stepbuffer'][i] == deque([]):
196 |                     continue
197 |                 self.writer.add_scalar(f'{env}/mean_steps', statistics.mean(locs['eps_stepbuffer'][i]), locs['it'])
198 |                 self.writer.add_scalar(f'{env}/mean_spl',  statistics.mean(locs['eps_splbuffer'][i]), locs['it'])
199 |                 self.writer.add_scalar(f'{env}/mean_effort', statistics.mean(locs['eps_effortbuffer'][i]), locs['it'])
200 |                 self.writer.add_scalar(f'{env}/mean_ins', statistics.mean(locs['eps_insbuffer'][i]), locs['it'])
201 | 
202 |         str = f" \033[1m Learning iteration {locs['it']}/{self.current_learning_iteration + locs['num_learning_iterations']} \033[0m "
203 | 
204 |         if len(locs['rewbuffer']) > 0:
205 |             log_string = (f"""{'#' * width}\n"""
206 |                           f"""{str.center(width, ' ')}\n\n"""
207 |                           f"""{'Computation:':>{pad}} {fps:.0f} steps/s (collection: {locs[
208 |                             'collection_time']:.3f}s, learning {locs['learn_time']:.3f}s)\n"""
209 |                           f"""{'Value function loss:':>{pad}} {locs['mean_value_loss']:.4f}\n"""
210 |                           f"""{'Surrogate loss:':>{pad}} {locs['mean_surrogate_loss']:.4f}\n"""
211 |                           f"""{'Mean action noise std:':>{pad}} {mean_std.item():.2f}\n"""
212 |                           f"""{'Mean reward:':>{pad}} {statistics.mean(locs['rewbuffer']):.2f}\n"""
213 |                           f"""{'Mean episode length:':>{pad}} {statistics.mean(locs['lenbuffer']):.2f}\n""")
214 |                         #   f"""{'Mean reward/step:':>{pad}} {locs['mean_reward']:.2f}\n"""
215 |                         #   f"""{'Mean episode length/episode:':>{pad}} {locs['mean_trajectory_length']:.2f}\n""")
216 |         else:
217 |             log_string = (f"""{'#' * width}\n"""
218 |                           f"""{str.center(width, ' ')}\n\n"""
219 |                           f"""{'Computation:':>{pad}} {fps:.0f} steps/s (collection: {locs[
220 |                             'collection_time']:.3f}s, learning {locs['learn_time']:.3f}s)\n"""
221 |                           f"""{'Value function loss:':>{pad}} {locs['mean_value_loss']:.4f}\n"""
222 |                           f"""{'Surrogate loss:':>{pad}} {locs['mean_surrogate_loss']:.4f}\n"""
223 |                           f"""{'Mean action noise std:':>{pad}} {mean_std.item():.2f}\n""")
224 |                         #   f"""{'Mean reward/step:':>{pad}} {locs['mean_reward']:.2f}\n"""
225 |                         #   f"""{'Mean episode length/episode:':>{pad}} {locs['mean_trajectory_length']:.2f}\n""")
226 | 
227 |         log_string += ep_string
228 |         log_string += (f"""{'-' * width}\n"""
229 |                        f"""{'Total timesteps:':>{pad}} {self.tot_timesteps}\n"""
230 |                        f"""{'Iteration time:':>{pad}} {iteration_time:.2f}s\n"""
231 |                        f"""{'Total time:':>{pad}} {self.tot_time:.2f}s\n"""
232 |                        f"""{'ETA:':>{pad}} {self.tot_time / (locs['it'] + 1) * (
233 |                                locs['num_learning_iterations'] - locs['it']):.1f}s\n""")
234 |         print(log_string)
235 | 
236 |     def save(self, path, infos=None):
237 |         torch.save({
238 |             'model_state_dict': self.alg.actor_critic.state_dict(),
239 |             'encoder_state_dict': self.alg.encoder_net.state_dict(),
240 |             'optimizer_state_dict': self.alg.optimizer.state_dict(),
241 |             'iter': self.current_learning_iteration,
242 |             'infos': infos,
243 |             }, path)
244 | 
245 |     def load(self, path, load_optimizer=True):
246 |         loaded_dict = torch.load(path)
247 |         self.alg.actor_critic.load_state_dict(loaded_dict['model_state_dict'])
248 |         self.alg.encoder_net.load_state_dict(loaded_dict['encoder_state_dict'])
249 |         if load_optimizer:
250 |             self.alg.optimizer.load_state_dict(loaded_dict['optimizer_state_dict'])
251 |         self.current_learning_iteration = loaded_dict['iter']
252 |         return loaded_dict['infos']
253 | 
254 |     def get_inference_policy(self, device=None):
255 |         self.alg.actor_critic.eval() # switch to evaluation mode (dropout for example)
256 |         if device is not None:
257 |             self.alg.actor_critic.to(device)
258 |         return self.alg.actor_critic.act_inference
259 | 


--------------------------------------------------------------------------------
/rl/storage/__init__.py:
--------------------------------------------------------------------------------
1 | #  Copyright 2021 ETH Zurich, NVIDIA CORPORATION
2 | #  SPDX-License-Identifier: BSD-3-Clause
3 | 
4 | from .rollout_storage import RolloutStorage


--------------------------------------------------------------------------------
/rl/storage/rollout_storage.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-License-Identifier: BSD-3-Clause
  3 | # 
  4 | # Redistribution and use in source and binary forms, with or without
  5 | # modification, are permitted provided that the following conditions are met:
  6 | #
  7 | # 1. Redistributions of source code must retain the above copyright notice, this
  8 | # list of conditions and the following disclaimer.
  9 | #
 10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 11 | # this list of conditions and the following disclaimer in the documentation
 12 | # and/or other materials provided with the distribution.
 13 | #
 14 | # 3. Neither the name of the copyright holder nor the names of its
 15 | # contributors may be used to endorse or promote products derived from
 16 | # this software without specific prior written permission.
 17 | #
 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28 | #
 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
 30 | 
 31 | import torch
 32 | import numpy as np
 33 | 
 34 | from rl.utils import split_and_pad_trajectories, split_and_pad_trajectories_obs
 35 | 
 36 | class RolloutStorage:
 37 |     class Transition:
 38 |         def __init__(self):
 39 |             self.observations = None
 40 |             self.critic_observations = None
 41 |             self.actions = None
 42 |             self.rewards = None
 43 |             self.dones = None
 44 |             self.values = None
 45 |             self.actions_log_prob = None
 46 |             self.action_mean = None
 47 |             self.action_sigma = None
 48 |             self.hidden_states = None
 49 |         
 50 |         def clear(self):
 51 |             self.__init__()
 52 | 
 53 |     def __init__(self, num_envs, num_transitions_per_env, obs_shape, privileged_obs_shape, actions_shape, device='cpu'):
 54 | 
 55 |         self.device = device
 56 | 
 57 |         self.obs_shape = obs_shape
 58 |         self.privileged_obs_shape = privileged_obs_shape
 59 |         self.actions_shape = actions_shape
 60 | 
 61 |         # Core
 62 |         self.observations = torch.zeros(num_transitions_per_env, num_envs, *obs_shape, device=self.device)
 63 |         if privileged_obs_shape[0] is not None:
 64 |             self.privileged_observations = torch.zeros(num_transitions_per_env, num_envs, *privileged_obs_shape, device=self.device)
 65 |         else:
 66 |             self.privileged_observations = None
 67 |         self.rewards = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device)
 68 |         self.actions = torch.zeros(num_transitions_per_env, num_envs, *actions_shape, device=self.device)
 69 |         self.dones = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device).byte()
 70 | 
 71 |         # For PPO
 72 |         self.actions_log_prob = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device)
 73 |         self.values = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device)
 74 |         self.returns = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device)
 75 |         self.advantages = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device)
 76 |         self.mu = torch.zeros(num_transitions_per_env, num_envs, *actions_shape, device=self.device)
 77 |         self.sigma = torch.zeros(num_transitions_per_env, num_envs, *actions_shape, device=self.device)
 78 | 
 79 |         self.num_transitions_per_env = num_transitions_per_env
 80 |         self.num_envs = num_envs
 81 | 
 82 |         # rnn
 83 |         self.saved_hidden_states_a = None
 84 |         self.saved_hidden_states_c = None
 85 | 
 86 |         ## SMT
 87 |         self.saved_task_obs = None
 88 |         self.step = 0
 89 | 
 90 |     def add_transitions(self, transition: Transition):
 91 |         if self.step >= self.num_transitions_per_env:
 92 |             raise AssertionError("Rollout buffer overflow")
 93 |         # self.observations[self.step].copy_(transition.observations)
 94 |         # if self.privileged_observations is not None: self.privileged_observations[self.step].copy_(transition.critic_observations)
 95 |         self.actions[self.step].copy_(transition.actions)
 96 |         self._save_observation(transition.observations)
 97 |         self.rewards[self.step].copy_(transition.rewards.view(-1, 1))
 98 |         self.dones[self.step].copy_(transition.dones.view(-1, 1))
 99 |         self.values[self.step].copy_(transition.values)
100 |         self.actions_log_prob[self.step].copy_(transition.actions_log_prob.view(-1, 1))
101 |         self.mu[self.step].copy_(transition.action_mean)
102 |         self.sigma[self.step].copy_(transition.action_sigma)
103 |         self._save_hidden_states(transition.hidden_states)
104 |         self.step += 1
105 | 
106 |     def _save_observation(self, obs):
107 |         # task_obs = []
108 |         # rgb = []
109 |         # depth = []
110 |         # for o in obs:
111 |         #     task_obs.append(o['task_obs'].copy())
112 |         #     rgb.append(o['rgb'].copy())
113 |         #     depth.append(o['depth'].copy())
114 | 
115 |         ## N D 
116 |         rgb = obs[0]
117 |         depth = obs[1]
118 |         task_obs = obs[2]
119 | 
120 |         # initialize if needed 
121 |         if self.saved_task_obs is None:
122 |             self.saved_task_obs = torch.zeros(self.actions.shape[0], *task_obs.shape, device=self.device)
123 |             self.saved_rgb = torch.zeros(self.actions.shape[0], *rgb.shape, device=self.device)
124 |             self.saved_depth = torch.zeros(self.actions.shape[0], *depth.shape, device=self.device)
125 | 
126 |         # # copy the states
127 |         self.saved_task_obs[self.step] = task_obs
128 |         self.saved_rgb[self.step] = rgb
129 |         self.saved_depth[self.step] = depth
130 | 
131 | 
132 |     def _save_hidden_states(self, hidden_states):
133 |         if hidden_states is None or hidden_states==(None, None):
134 |             return
135 |         # make a tuple out of GRU hidden state sto match the LSTM format
136 |         hid_a = hidden_states[0] if isinstance(hidden_states[0], tuple) else (hidden_states[0],)
137 |         hid_c = hidden_states[1] if isinstance(hidden_states[1], tuple) else (hidden_states[1],)
138 | 
139 |         # initialize if needed 
140 |         if self.saved_hidden_states_a is None:
141 |             self.saved_hidden_states_a = [torch.zeros(self.observations.shape[0], *hid_a[i].shape, device=self.device) for i in range(len(hid_a))]
142 |             self.saved_hidden_states_c = [torch.zeros(self.observations.shape[0], *hid_c[i].shape, device=self.device) for i in range(len(hid_c))]
143 |         # copy the states
144 |         for i in range(len(hid_a)):
145 |             self.saved_hidden_states_a[i][self.step].copy_(hid_a[i])
146 |             self.saved_hidden_states_c[i][self.step].copy_(hid_c[i])
147 | 
148 | 
149 |     def clear(self):
150 |         self.step = 0
151 | 
152 |     def compute_returns(self, last_values, gamma, lam):
153 |         advantage = 0
154 |         for step in reversed(range(self.num_transitions_per_env)):
155 |             if step == self.num_transitions_per_env - 1:
156 |                 next_values = last_values
157 |             else:
158 |                 next_values = self.values[step + 1]
159 |             next_is_not_terminal = 1.0 - self.dones[step].float()
160 |             delta = self.rewards[step] + next_is_not_terminal * gamma * next_values - self.values[step]
161 |             advantage = delta + next_is_not_terminal * gamma * lam * advantage
162 |             self.returns[step] = advantage + self.values[step]
163 | 
164 |         # Compute and normalize the advantages
165 |         self.advantages = self.returns - self.values
166 |         self.advantages = (self.advantages - self.advantages.mean(0)) / (self.advantages.std(0) + 1e-8)
167 | 
168 |     def get_statistics(self):
169 |         done = self.dones
170 |         done[-1] = 1
171 |         flat_dones = done.permute(1, 0, 2).reshape(-1, 1)
172 |         done_indices = torch.cat((flat_dones.new_tensor([-1], dtype=torch.int64), flat_dones.nonzero(as_tuple=False)[:, 0]))
173 |         trajectory_lengths = (done_indices[1:] - done_indices[:-1])
174 |         return trajectory_lengths.float().mean(), self.rewards.mean()
175 | 
176 |     def mini_batch_generator(self, num_mini_batches, num_epochs=8):
177 |         batch_size = self.num_envs * self.num_transitions_per_env
178 |         mini_batch_size = batch_size // num_mini_batches
179 |         indices = torch.randperm(num_mini_batches*mini_batch_size, requires_grad=False, device=self.device)
180 | 
181 |         observations = self.observations.flatten(0, 1)
182 |         if self.privileged_observations is not None:
183 |             critic_observations = self.privileged_observations.flatten(0, 1)
184 |         else:
185 |             critic_observations = observations
186 | 
187 |         actions = self.actions.flatten(0, 1)
188 |         values = self.values.flatten(0, 1)
189 |         returns = self.returns.flatten(0, 1)
190 |         old_actions_log_prob = self.actions_log_prob.flatten(0, 1)
191 |         advantages = self.advantages.flatten(0, 1)
192 |         old_mu = self.mu.flatten(0, 1)
193 |         old_sigma = self.sigma.flatten(0, 1)
194 | 
195 |         for epoch in range(num_epochs):
196 |             for i in range(num_mini_batches):
197 | 
198 |                 start = i*mini_batch_size
199 |                 end = (i+1)*mini_batch_size
200 |                 batch_idx = indices[start:end]
201 | 
202 |                 obs_batch = observations[batch_idx]
203 |                 critic_observations_batch = critic_observations[batch_idx]
204 |                 actions_batch = actions[batch_idx]
205 |                 target_values_batch = values[batch_idx]
206 |                 returns_batch = returns[batch_idx]
207 |                 old_actions_log_prob_batch = old_actions_log_prob[batch_idx]
208 |                 advantages_batch = advantages[batch_idx]
209 |                 old_mu_batch = old_mu[batch_idx]
210 |                 old_sigma_batch = old_sigma[batch_idx]
211 |                 yield obs_batch, critic_observations_batch, actions_batch, target_values_batch, advantages_batch, returns_batch, \
212 |                        old_actions_log_prob_batch, old_mu_batch, old_sigma_batch, (None, None), None
213 | 
214 |     # for SMT only
215 |     def SMT_mini_batch_generator(self, num_mini_batches, num_epochs=8):
216 |         # padded_task_obs_trajectories, trajectory_masks = split_and_pad_trajectories(self.saved_task_obs, self.dones)
217 |         # padded_rgb_obs_trajectories, trajectory_masks = split_and_pad_trajectories(self.saved_rgb, self.dones)
218 |         # padded_depth_obs_trajectories, trajectory_masks = split_and_pad_trajectories(self.saved_depth, self.dones)
219 |         # padded__trajectories, trajectory_masks =  split_and_pad_trajectories_obs([self.saved_task_obs, self.saved_rgb, self.saved_depth], self.dones)
220 |         # padded_critic_obs_trajectories = padded_obs_trajectories
221 | 
222 |         mini_batch_size = self.num_envs // num_mini_batches     ## 取整个trajectory, self.observations没有被flatten, 分环境取mini batch
223 |         for ep in range(num_epochs):
224 |             first_traj = 0
225 |             for i in range(num_mini_batches):
226 |                 start = i*mini_batch_size
227 |                 stop = (i+1)*mini_batch_size
228 | 
229 |                 dones = self.dones.squeeze(-1)
230 |                 last_was_done = torch.zeros_like(dones, dtype=torch.bool)
231 |                 last_was_done[1:] = dones[:-1]
232 |                 last_was_done[0] = True
233 |                 trajectories_batch_size = torch.sum(last_was_done[:, start:stop])   ##done的数量确定batchsize
234 |                 last_traj = first_traj + trajectories_batch_size      ## 切割后新增的轨迹
235 |                 
236 |                 # masks_batch = trajectory_masks[:, first_traj:last_traj]
237 |                 # task_obs_batch = padded__trajectories[0][:, first_traj:last_traj]
238 |                 # rgb_obs_batch = padded__trajectories[1][:, first_traj:last_traj]
239 |                 # depth_obs_batch = padded__trajectories[2][:, first_traj:last_traj]    ### action等应该也要切割
240 | 
241 |                 task_obs_batch = self.saved_task_obs[:, start:stop]     ## obeservation split and pad trajectories 然后过了transformer再unpadded 回去
242 |                 rgb_obs_batch = self.saved_rgb[:, start:stop]
243 |                 depth_obs_batch = self.saved_depth[:, start:stop]
244 | 
245 |                 actions_batch = self.actions[:, start:stop]
246 |                 if torch.isnan(actions_batch).any():
247 |                     print('nan')
248 |                 old_mu_batch = self.mu[:, start:stop]
249 |                 old_sigma_batch = self.sigma[:, start:stop]
250 |                 returns_batch = self.returns[:, start:stop]
251 |                 advantages_batch = self.advantages[:, start:stop]
252 |                 values_batch = self.values[:, start:stop]
253 |                 old_actions_log_prob_batch = self.actions_log_prob[:, start:stop]
254 | 
255 |                 # reshape to [num_envs, time, num layers, hidden dim] (original shape: [time, num_layers, num_envs, hidden_dim])
256 |                 # then take only time steps after dones (flattens num envs and time dimensions),
257 |                 # take a batch of trajectories and finally reshape back to [num_layers, batch, hidden_dim]
258 |                 # last_was_done = last_was_done.permute(1, 0)
259 |                 # hid_a_batch = [ saved_hidden_states.permute(2, 0, 1, 3)[last_was_done][first_traj:last_traj].transpose(1, 0).contiguous()
260 |                 #                 for saved_hidden_states in self.saved_hidden_states_a ] 
261 |                 # hid_c_batch = [ saved_hidden_states.permute(2, 0, 1, 3)[last_was_done][first_traj:last_traj].transpose(1, 0).contiguous()
262 |                 #                 for saved_hidden_states in self.saved_hidden_states_c ]
263 |                 # # remove the tuple for GRU
264 |                 # hid_a_batch = hid_a_batch[0] if len(hid_a_batch)==1 else hid_a_batch
265 |                 # hid_c_batch = hid_c_batch[0] if len(hid_c_batch)==1 else hid_a_batch
266 | 
267 |                 yield [rgb_obs_batch, depth_obs_batch, task_obs_batch], None, actions_batch, values_batch, advantages_batch, returns_batch, \
268 |                        old_actions_log_prob_batch, old_mu_batch, old_sigma_batch, None
269 |                 
270 |                 # first_traj = last_traj
271 | 
272 | 
273 | 
274 |     # for RNNs only
275 |     def reccurent_mini_batch_generator(self, num_mini_batches, num_epochs=8):
276 | 
277 |         padded_obs_trajectories, trajectory_masks = split_and_pad_trajectories(self.observations, self.dones)
278 |         if self.privileged_observations is not None: 
279 |             padded_critic_obs_trajectories, _ = split_and_pad_trajectories(self.privileged_observations, self.dones)
280 |         else: 
281 |             padded_critic_obs_trajectories = padded_obs_trajectories
282 | 
283 |         mini_batch_size = self.num_envs // num_mini_batches
284 |         for ep in range(num_epochs):
285 |             first_traj = 0
286 |             for i in range(num_mini_batches):
287 |                 start = i*mini_batch_size
288 |                 stop = (i+1)*mini_batch_size
289 | 
290 |                 dones = self.dones.squeeze(-1)
291 |                 last_was_done = torch.zeros_like(dones, dtype=torch.bool)
292 |                 last_was_done[1:] = dones[:-1]
293 |                 last_was_done[0] = True
294 |                 trajectories_batch_size = torch.sum(last_was_done[:, start:stop])
295 |                 last_traj = first_traj + trajectories_batch_size
296 |                 
297 |                 masks_batch = trajectory_masks[:, first_traj:last_traj]
298 |                 obs_batch = padded_obs_trajectories[:, first_traj:last_traj]
299 |                 critic_obs_batch = padded_critic_obs_trajectories[:, first_traj:last_traj]
300 | 
301 |                 actions_batch = self.actions[:, start:stop]
302 |                 old_mu_batch = self.mu[:, start:stop]
303 |                 old_sigma_batch = self.sigma[:, start:stop]
304 |                 returns_batch = self.returns[:, start:stop]
305 |                 advantages_batch = self.advantages[:, start:stop]
306 |                 values_batch = self.values[:, start:stop]
307 |                 old_actions_log_prob_batch = self.actions_log_prob[:, start:stop]
308 | 
309 |                 # reshape to [num_envs, time, num layers, hidden dim] (original shape: [time, num_layers, num_envs, hidden_dim])
310 |                 # then take only time steps after dones (flattens num envs and time dimensions),
311 |                 # take a batch of trajectories and finally reshape back to [num_layers, batch, hidden_dim]
312 |                 last_was_done = last_was_done.permute(1, 0)
313 |                 hid_a_batch = [ saved_hidden_states.permute(2, 0, 1, 3)[last_was_done][first_traj:last_traj].transpose(1, 0).contiguous()
314 |                                 for saved_hidden_states in self.saved_hidden_states_a ] 
315 |                 hid_c_batch = [ saved_hidden_states.permute(2, 0, 1, 3)[last_was_done][first_traj:last_traj].transpose(1, 0).contiguous()
316 |                                 for saved_hidden_states in self.saved_hidden_states_c ]
317 |                 # remove the tuple for GRU
318 |                 hid_a_batch = hid_a_batch[0] if len(hid_a_batch)==1 else hid_a_batch
319 |                 hid_c_batch = hid_c_batch[0] if len(hid_c_batch)==1 else hid_a_batch
320 | 
321 |                 yield obs_batch, critic_obs_batch, actions_batch, values_batch, advantages_batch, returns_batch, \
322 |                        old_actions_log_prob_batch, old_mu_batch, old_sigma_batch, (hid_a_batch, hid_c_batch), masks_batch
323 |                 
324 |                 first_traj = last_traj


--------------------------------------------------------------------------------
/rl/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: BSD-3-Clause
 3 | # 
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions are met:
 6 | #
 7 | # 1. Redistributions of source code must retain the above copyright notice, this
 8 | # list of conditions and the following disclaimer.
 9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | #
29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
30 | 
31 | from .utils import split_and_pad_trajectories, unpad_trajectories, split_and_pad_trajectories_obs


--------------------------------------------------------------------------------
/rl/utils/log_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved.
 2 | #
 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | # of this software and associated documentation files (the "Software"), to deal
 5 | # in the Software without restriction, including without limitation the rights
 6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | # copies of the Software, and to permit persons to whom the Software is
 8 | # furnished to do so, subject to the following conditions:
 9 | #
10 | # The above copyright notice and this permission notice shall be included in
11 | # all copies or substantial portions of the Software.
12 | #
13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | # THE SOFTWARE
20 | 
21 | import os
22 | 
23 | from rl.conf.configs import Configs
24 | from rl.utils.logging_engine import logger
25 | 
26 | 
27 | # Output logs through console and files
28 | def ini_logger(file_name, level='info'):
29 |     log_folder = os.path.join(Configs.output_folder, 'log')
30 |     if not os.path.exists(log_folder):
31 |         os.makedirs(log_folder)
32 |     delete_files(log_folder, Configs.MAX_LOG_FILE_NUM)
33 |     log_file = os.path.join(log_folder, file_name)
34 |     logger.add_file_output(log_file, level)
35 | 
36 | 
37 | def remove_file_handler_of_logging(file_name: str):
38 |     log_folder = os.path.join(Configs.output_folder, 'log')
39 |     file_path = os.path.join(log_folder, file_name)
40 |     try:
41 |         logger.remove_file_handler(file_path)
42 |     except Exception as e:
43 |         print(f"Failed to remove file handler {file_path}, reason: {e}")
44 | 
45 | 
46 | def delete_files(file_folder, max_num):
47 |     """
48 |     :param file_folder: 目标文件夹, 绝对路径
49 |     :param max_num: 最大文件数量
50 |     """
51 |     num = count_file(file_folder)
52 |     if num > max_num:
53 |         delete_num = max_num // 2
54 |         total_files_and_dirs = os.listdir(file_folder)
55 |         total_files = []
56 |         for item in total_files_and_dirs:
57 |             if not os.path.isdir(os.path.join(file_folder, item)):
58 |                 total_files.append(item)
59 |         total_files.sort()
60 |         for i in range(delete_num):
61 |             os.remove(os.path.join(file_folder, total_files[i]))
62 | 
63 | 
64 | # 计算目标文件夹下的文件数量, 不递归文件夹
65 | def count_file(directory):
66 |     file_num = 0
67 |     if not os.path.exists(directory):
68 |         os.makedirs(directory)
69 |     for item in os.listdir(directory):
70 |         if os.path.isfile(os.path.join(directory, item)):
71 |             file_num += 1
72 |     return file_num
73 | 


--------------------------------------------------------------------------------
/rl/utils/logging_engine.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved.
  2 | #
  3 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  4 | # of this software and associated documentation files (the "Software"), to deal
  5 | # in the Software without restriction, including without limitation the rights
  6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  7 | # copies of the Software, and to permit persons to whom the Software is
  8 | # furnished to do so, subject to the following conditions:
  9 | #
 10 | # The above copyright notice and this permission notice shall be included in
 11 | # all copies or substantial portions of the Software.
 12 | #
 13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 19 | # THE SOFTWARE
 20 | 
 21 | """
 22 | | 字段/属性名称   | 使用格式            | 描述                                                         |
 23 | | --------------- | ------------------- | ------------------------------------------------------------ |
 24 | | asctime         | %(asctime)s         | 日志事件发生的时间--人类可读时间，如：2003-07-08 16:49:45,896 |
 25 | | created         | %(created)f         | 日志事件发生的时间--时间戳，就是当时调用time.time()函数返回的值 |
 26 | | relativeCreated | %(relativeCreated)d | 日志事件发生的时间相对于logging模块加载时间的相对毫秒数（目前还不知道干嘛用的） |
 27 | | msecs           | %(msecs)d           | 日志事件发生事件的毫秒部分                                   |
 28 | | levelname       | %(levelname)s       | 该日志记录的文字形式的日志级别（'DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'） |
 29 | | levelno         | %(levelno)s         | 该日志记录的数字形式的日志级别（10, 20, 30, 40, 50）         |
 30 | | name            | %(name)s            | 所使用的日志器名称，默认是'root'，因为默认使用的是 rootLogger |
 31 | | message         | %(message)s         | 日志记录的文本内容，通过 `msg % args`计算得到的              |
 32 | | pathname        | %(pathname)s        | 调用日志记录函数的源码文件的全路径                           |
 33 | | filename        | %(filename)s        | pathname的文件名部分，包含文件后缀                           |
 34 | | module          | %(module)s          | filename的名称部分，不包含后缀                               |
 35 | | lineno          | %(lineno)d          | 调用日志记录函数的源代码所在的行号                           |
 36 | | funcName        | %(funcName)s        | 调用日志记录函数的函数名                                     |
 37 | | process         | %(process)d         | 进程ID                                                       |
 38 | | processName     | %(processName)s     | 进程名称，Python 3.1新增                                     |
 39 | | thread          | %(thread)d          | 线程ID                                                       |
 40 | | threadName      | %(thread)s          | 线程名称                                                     |
 41 | """
 42 | 
 43 | import logging
 44 | import sys
 45 | 
 46 | 
 47 | class LoggingEngine:
 48 |     def __init__(self, level="debug", contents=None, logger_name=None):
 49 |         self.logging_level_dict = {
 50 |             "debug": logging.DEBUG,
 51 |             "info": logging.INFO,
 52 |             "warning": logging.WARNING,
 53 |             "error": logging.ERROR,
 54 |             "critical": logging.CRITICAL
 55 |         }
 56 | 
 57 |         logging_level = self.logging_level_dict.get(level.lower(), logging.DEBUG)
 58 | 
 59 |         if contents is None:
 60 |             contents = ["asctime", "levelname", "funcName", "lineno", "message"]
 61 | 
 62 |         if logger_name is None:
 63 |             logger_name = 'logging_engine'
 64 | 
 65 |         logging_fmt = "%(asctime)s [%(filename)-15s | %(lineno)d] %(levelname)s: %(message)s"
 66 |         # logging_fmt = " - ".join([f"%({content})s" for content in contents])
 67 | 
 68 |         logger = logging.getLogger(logger_name)
 69 |         logger.setLevel(level=logging_level)
 70 |         formatter = logging.Formatter(logging_fmt)
 71 |         if not logger.handlers:
 72 |             handler = logging.StreamHandler(sys.stdout)
 73 |             handler.setFormatter(formatter)
 74 |             logger.addHandler(handler)
 75 | 
 76 |         self.logger = logger
 77 |         self.logger_name = logger_name
 78 |         self.handlers = {}
 79 |         self.formatter = formatter
 80 | 
 81 |         self.import_log_funcs()
 82 | 
 83 |     def import_log_funcs(self):
 84 |         log_funcs = ['debug', 'info', 'warning', 'error', 'critical', 'exception']
 85 |         for func_name in log_funcs:
 86 |             func = getattr(self.logger, func_name)
 87 |             setattr(self, func_name, func)
 88 | 
 89 |     def add_file_output(self, filename: str, level='info', mode="w"):
 90 |         if filename not in self.handlers:
 91 |             handler = logging.FileHandler(filename, mode=mode, encoding='UTF-8')
 92 |             handler.setFormatter(self.formatter)
 93 |             handler.setLevel(self.logging_level_dict.get(level.lower(), logging.DEBUG))
 94 |             self.handlers[filename] = handler
 95 |             self.logger.addHandler(handler)
 96 | 
 97 |     def remove_file_handler(self, file_path):
 98 |         if file_path in self.handlers:
 99 |             self.logger.removeHandler(self.handlers.get(file_path))
100 | 
101 |     def debug(self, msg: str):
102 |         pass
103 | 
104 |     def info(self, msg: str):
105 |         pass
106 | 
107 |     def warning(self, msg: str):
108 |         pass
109 | 
110 |     def error(self, msg: str):
111 |         pass
112 | 
113 |     def critical(self, msg: str):
114 |         pass
115 | 
116 |     def exception(self, msg: str):
117 |         pass
118 | 
119 | 
120 | logger = LoggingEngine(logger_name="glob_logging_engine",
121 |                        level="info")
122 | 
123 | 
124 | def test_log():
125 |     log = LoggingEngine(level="debug",
126 |                         contents=["asctime", "levelname", "filename", "lineno", "funcName", "message"])
127 | 
128 |     log.info("Hello World!")
129 | 


--------------------------------------------------------------------------------
/rl/utils/utils.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-License-Identifier: BSD-3-Clause
  3 | # 
  4 | # Redistribution and use in source and binary forms, with or without
  5 | # modification, are permitted provided that the following conditions are met:
  6 | #
  7 | # 1. Redistributions of source code must retain the above copyright notice, this
  8 | # list of conditions and the following disclaimer.
  9 | #
 10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 11 | # this list of conditions and the following disclaimer in the documentation
 12 | # and/or other materials provided with the distribution.
 13 | #
 14 | # 3. Neither the name of the copyright holder nor the names of its
 15 | # contributors may be used to endorse or promote products derived from
 16 | # this software without specific prior written permission.
 17 | #
 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28 | #
 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
 30 | 
 31 | from numpy import pad
 32 | import torch
 33 | 
 34 | def split_and_pad_trajectories(tensor, dones):
 35 |     """ Splits trajectories at done indices. Then concatenates them and padds with zeros up to the length og the longest trajectory.
 36 |     Returns masks corresponding to valid parts of the trajectories
 37 |     Example: 
 38 |         Input: [ [a1, a2, a3, a4 | a5, a6],
 39 |                  [b1, b2 | b3, b4, b5 | b6]
 40 |                 ]
 41 | 
 42 |         Output:[ [a1, a2, a3, a4], | [  [True, True, True, True],
 43 |                  [a5, a6, 0, 0],   |    [True, True, False, False],
 44 |                  [b1, b2, 0, 0],   |    [True, True, False, False],
 45 |                  [b3, b4, b5, 0],  |    [True, True, True, False],
 46 |                  [b6, 0, 0, 0]     |    [True, False, False, False],
 47 |                 ]                  | ]    
 48 |             
 49 |     Assumes that the inputy has the following dimension order: [time, number of envs, aditional dimensions]
 50 |     """
 51 |     dones = dones.clone()
 52 |     dones[-1] = 1
 53 |     # Permute the buffers to have order (num_envs, num_transitions_per_env, ...), for correct reshaping
 54 |     flat_dones = dones.transpose(1, 0).reshape(-1, 1)
 55 | 
 56 |     # Get length of trajectory by counting the number of successive not done elements
 57 |     done_indices = torch.cat((flat_dones.new_tensor([-1], dtype=torch.int64), flat_dones.nonzero()[:, 0]))
 58 |     trajectory_lengths = done_indices[1:] - done_indices[:-1]
 59 |     trajectory_lengths_list = trajectory_lengths.tolist()
 60 |     # Extract the individual trajectories
 61 |     trajectories = torch.split(tensor.transpose(1, 0).flatten(0, 1),trajectory_lengths_list)
 62 |     padded_trajectories = torch.nn.utils.rnn.pad_sequence(trajectories)
 63 | 
 64 | 
 65 |     trajectory_masks = trajectory_lengths > torch.arange(0, tensor.shape[0], device=tensor.device).unsqueeze(1)
 66 |     return padded_trajectories, trajectory_masks
 67 | 
 68 | def unpad_trajectories(trajectories, masks):
 69 |     """ Does the inverse operation of  split_and_pad_trajectories()
 70 |     """
 71 |     # Need to transpose before and after the masking to have proper reshaping
 72 |     trajectories = trajectories.transpose(1, 0) ## T N D -> N T D
 73 |     return trajectories[masks[:trajectories.size(1)].transpose(1, 0)].view(-1, masks.size(0), trajectories.shape[-1]).transpose(1, 0)
 74 | 
 75 | 
 76 | def split_and_pad_trajectories_obs(obs, dones):
 77 |     """ Splits trajectories at done indices. Then concatenates them and padds with zeros up to the length og the longest trajectory.
 78 |     Returns masks corresponding to valid parts of the trajectories
 79 |     Example: 
 80 |         Input: [ [a1, a2, a3, a4 | a5, a6],
 81 |                  [b1, b2 | b3, b4, b5 | b6]
 82 |                 ]
 83 | 
 84 |         Output:[ [a1, a2, a3, a4], | [  [True, True, True, True],
 85 |                  [a5, a6, 0, 0],   |    [True, True, False, False],
 86 |                  [b1, b2, 0, 0],   |    [True, True, False, False],
 87 |                  [b3, b4, b5, 0],  |    [True, True, True, False],
 88 |                  [b6, 0, 0, 0]     |    [True, False, False, False],
 89 |                 ]                  | ]    
 90 |             
 91 |     Assumes that the inputy has the following dimension order: [time, number of envs, aditional dimensions]
 92 |     """
 93 |     dones = dones.clone()
 94 |     dones[-1] = 1
 95 |     # Permute the buffers to have order (num_envs, num_transitions_per_env, ...), for correct reshaping
 96 |     flat_dones = dones.transpose(1, 0).reshape(-1, 1)
 97 | 
 98 |     # Get length of trajectory by counting the number of successive not done elements
 99 |     done_indices = torch.cat((flat_dones.new_tensor([-1], dtype=torch.int64), flat_dones.nonzero()[:, 0]))
100 |     trajectory_lengths = done_indices[1:] - done_indices[:-1]
101 |     trajectory_lengths_list = trajectory_lengths.tolist()
102 |     # Extract the individual trajectories
103 |     task_obs_batch = obs[0]     ## obeservation split and pad trajectories 然后过了transformer再unpadded 回去
104 |     rgb_obs_batch = obs[1]
105 |     depth_obs_batch = obs[2]
106 | 
107 |     trajectories_task = torch.split(task_obs_batch.transpose(1, 0).flatten(0, 1),trajectory_lengths_list)
108 |     padded_trajectories_task = torch.nn.utils.rnn.pad_sequence(trajectories_task)
109 | 
110 |     trajectories_rgb = torch.split(rgb_obs_batch.transpose(1, 0).flatten(0, 1),trajectory_lengths_list)
111 |     padded_trajectories_rgb = torch.nn.utils.rnn.pad_sequence(trajectories_rgb)
112 | 
113 |     trajectories_depth = torch.split(depth_obs_batch.transpose(1, 0).flatten(0, 1),trajectory_lengths_list)
114 |     padded_trajectories_depth = torch.nn.utils.rnn.pad_sequence(trajectories_depth)
115 | 
116 |     trajectory_masks = trajectory_lengths > torch.arange(0, task_obs_batch.shape[0], device=obs[0].device).unsqueeze(1)
117 |     return [padded_trajectories_task, padded_trajectories_rgb, padded_trajectories_depth], trajectory_masks
118 | 
119 |     ## padded_trajectories_task.transpose(1, 0)[(trajectory_lengths > torch.arange(0, padded_trajectories_task.shape[0], device=obs[0].device).unsqueeze(1)).transpose(1, 0)]


--------------------------------------------------------------------------------