├── .gitignore ├── README.md ├── envs └── cassie │ ├── cassie.py │ └── config.yaml ├── requirements.txt ├── resources └── robots │ └── cassie │ ├── MJMODEL.TXT │ ├── cassie-stl-meshes │ ├── achilles-rod.stl │ ├── foot-crank.stl │ ├── foot.stl │ ├── heel-spring.stl │ ├── hip-pitch.stl │ ├── hip-roll.stl │ ├── hip-yaw.stl │ ├── knee-spring.stl │ ├── knee.stl │ ├── pelvis.stl │ ├── plantar-rod.stl │ ├── shin.stl │ └── tarsus.stl │ ├── cassie.xml │ ├── cassie_depth.xml │ ├── cassie_hfield.xml │ ├── cassie_mass.xml │ ├── cassie_no_grav.xml │ ├── cassie_noise_terrain.xml │ ├── cassie_slosh_mass.xml │ ├── cassie_tray_box.xml │ ├── cassiepole.xml │ ├── cassiepole_x.xml │ ├── doublependulum.xml │ ├── terrains │ ├── noise.png │ └── noisy.png │ └── test.xml ├── scripts ├── play.py ├── train_paral.py └── train_single.py └── utils ├── MakeDirName.py └── NormalizeActionWrapper.py /.gitignore: -------------------------------------------------------------------------------- 1 | log/* 2 | scripts/outputs/* 3 | envs/cassie/__pycache__/* 4 | utils/__pycache__/* 5 | .idea/* 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # sin-cassie-rl-python 2 | 3 | - Pure python, no C dependencies. 4 | - This work is based on https://github.com/WooQi57/sin-cassie-rl and https://github.com/osudrl/cassie-mujoco-sim. 5 | - The robot needs about 6 hours of training to walk, and it is relatively stable after about 12 hours of training. -------------------------------------------------------------------------------- /envs/cassie/config.yaml: -------------------------------------------------------------------------------- 1 | rbtname: cassie 2 | 3 | system: # 机器人系统参数 4 | GRAV: -9.8 # 重力加速度 5 | TSf: 0.002 # time step in float 6 | Tend: 1000 # 测试时间 second 7 | root_path: ".." 8 | mjcf_path: "/resources/robots/cassie/cassie.xml" 9 | log_path: &log_path 10 | dir: ../log/Cassie/${now:%Y-%m-%d}/${now:%H-%M-%S} 11 | visual: False 12 | dynamics_randomization: True 13 | 14 | trainer: 15 | policy: "MlpPolicy" 16 | n_steps: 512 17 | batch_size: 256 18 | pi_net_arch: [512, 512] 19 | vf_net_arch: [512, 512] 20 | device: "cuda:0" 21 | 22 | n_eval_episodes: 10 23 | 24 | env: 25 | num_envs: 16 26 | state_buffer_size: 1 27 | time_limit: 600 # 最大600个step 28 | 29 | commands: 30 | lin_vel_x: [0.4, 0.7] 31 | lin_vel_y: [0.0, 0.0] 32 | ang_vel_yaw: [-1.0, 1.0] 33 | 34 | init_state: 35 | pos: [0.0, 0.0, 0.1] 36 | # default_left_joint_angles: [0.1 , 0.0, 1.0, -1.8, 1.57, -1.57] 37 | # default_right_joint_angles: [-0.1, 0.0, 1.0, -1.8, 1.57, -1.57] 38 | default_left_joint_angles: [0.0045, 0.0, 0.4973, -1.1997, -1.5968] 39 | default_right_joint_angles: [-0.0045, 0.0, 0.4973, -1.1997, -1.5968] 40 | 41 | control: 42 | # PD Drive parameters: 43 | P: [100.0, 100.0, 88.0, 96.0, 50.0] 44 | D: [ 10.0, 10.0, 8.0, 9.6, 5.0] 45 | # action scale: target angle = actionScale * action + defaultAngle 46 | action_scale: 0.5 47 | # decimation: Number of control action updates @ sim DT per policy DT 48 | decimation: 60 49 | 50 | rewards: 51 | omega: 0.5 52 | scales: 53 | ref: 0.1 54 | spring: 0.1 55 | ori: 0.1 56 | vel: 0.1 57 | termin: 0.1 58 | 59 | hydra: 60 | run: 61 | <<: *log_path 62 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | gym 3 | numpy 4 | stable-baselines3 5 | tensorboard 6 | mujoco 7 | mujoco-python-viewer 8 | pyyaml 9 | hydra-core 10 | -------------------------------------------------------------------------------- /resources/robots/cassie/cassie-stl-meshes/achilles-rod.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwjwudi/sin-cassie-rl-python/c4f100f8bdd0e074b80d2e4bc59fb009c5188128/resources/robots/cassie/cassie-stl-meshes/achilles-rod.stl -------------------------------------------------------------------------------- /resources/robots/cassie/cassie-stl-meshes/foot-crank.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwjwudi/sin-cassie-rl-python/c4f100f8bdd0e074b80d2e4bc59fb009c5188128/resources/robots/cassie/cassie-stl-meshes/foot-crank.stl -------------------------------------------------------------------------------- /resources/robots/cassie/cassie-stl-meshes/foot.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwjwudi/sin-cassie-rl-python/c4f100f8bdd0e074b80d2e4bc59fb009c5188128/resources/robots/cassie/cassie-stl-meshes/foot.stl -------------------------------------------------------------------------------- /resources/robots/cassie/cassie-stl-meshes/heel-spring.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwjwudi/sin-cassie-rl-python/c4f100f8bdd0e074b80d2e4bc59fb009c5188128/resources/robots/cassie/cassie-stl-meshes/heel-spring.stl -------------------------------------------------------------------------------- /resources/robots/cassie/cassie-stl-meshes/hip-pitch.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwjwudi/sin-cassie-rl-python/c4f100f8bdd0e074b80d2e4bc59fb009c5188128/resources/robots/cassie/cassie-stl-meshes/hip-pitch.stl -------------------------------------------------------------------------------- /resources/robots/cassie/cassie-stl-meshes/hip-roll.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwjwudi/sin-cassie-rl-python/c4f100f8bdd0e074b80d2e4bc59fb009c5188128/resources/robots/cassie/cassie-stl-meshes/hip-roll.stl -------------------------------------------------------------------------------- /resources/robots/cassie/cassie-stl-meshes/hip-yaw.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwjwudi/sin-cassie-rl-python/c4f100f8bdd0e074b80d2e4bc59fb009c5188128/resources/robots/cassie/cassie-stl-meshes/hip-yaw.stl -------------------------------------------------------------------------------- /resources/robots/cassie/cassie-stl-meshes/knee-spring.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwjwudi/sin-cassie-rl-python/c4f100f8bdd0e074b80d2e4bc59fb009c5188128/resources/robots/cassie/cassie-stl-meshes/knee-spring.stl -------------------------------------------------------------------------------- /resources/robots/cassie/cassie-stl-meshes/knee.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwjwudi/sin-cassie-rl-python/c4f100f8bdd0e074b80d2e4bc59fb009c5188128/resources/robots/cassie/cassie-stl-meshes/knee.stl -------------------------------------------------------------------------------- /resources/robots/cassie/cassie-stl-meshes/pelvis.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwjwudi/sin-cassie-rl-python/c4f100f8bdd0e074b80d2e4bc59fb009c5188128/resources/robots/cassie/cassie-stl-meshes/pelvis.stl -------------------------------------------------------------------------------- /resources/robots/cassie/cassie-stl-meshes/plantar-rod.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwjwudi/sin-cassie-rl-python/c4f100f8bdd0e074b80d2e4bc59fb009c5188128/resources/robots/cassie/cassie-stl-meshes/plantar-rod.stl -------------------------------------------------------------------------------- /resources/robots/cassie/cassie-stl-meshes/shin.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwjwudi/sin-cassie-rl-python/c4f100f8bdd0e074b80d2e4bc59fb009c5188128/resources/robots/cassie/cassie-stl-meshes/shin.stl -------------------------------------------------------------------------------- /resources/robots/cassie/cassie-stl-meshes/tarsus.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwjwudi/sin-cassie-rl-python/c4f100f8bdd0e074b80d2e4bc59fb009c5188128/resources/robots/cassie/cassie-stl-meshes/tarsus.stl -------------------------------------------------------------------------------- /resources/robots/cassie/cassie.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 302 | -------------------------------------------------------------------------------- /resources/robots/cassie/cassie_depth.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 302 | -------------------------------------------------------------------------------- /resources/robots/cassie/cassie_hfield.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 281 | -------------------------------------------------------------------------------- /resources/robots/cassie/cassie_mass.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 274 | -------------------------------------------------------------------------------- /resources/robots/cassie/cassie_no_grav.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 296 | -------------------------------------------------------------------------------- /resources/robots/cassie/cassie_noise_terrain.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 273 | -------------------------------------------------------------------------------- /resources/robots/cassie/cassie_slosh_mass.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 286 | -------------------------------------------------------------------------------- /resources/robots/cassie/cassie_tray_box.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 288 | -------------------------------------------------------------------------------- /resources/robots/cassie/cassiepole.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 290 | -------------------------------------------------------------------------------- /resources/robots/cassie/cassiepole_x.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 290 | -------------------------------------------------------------------------------- /resources/robots/cassie/doublependulum.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /resources/robots/cassie/terrains/noise.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwjwudi/sin-cassie-rl-python/c4f100f8bdd0e074b80d2e4bc59fb009c5188128/resources/robots/cassie/terrains/noise.png -------------------------------------------------------------------------------- /resources/robots/cassie/terrains/noisy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwjwudi/sin-cassie-rl-python/c4f100f8bdd0e074b80d2e4bc59fb009c5188128/resources/robots/cassie/terrains/noisy.png -------------------------------------------------------------------------------- /resources/robots/cassie/test.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /scripts/play.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import torch 3 | from stable_baselines3 import PPO, SAC 4 | from stable_baselines3.common.vec_env import SubprocVecEnv 5 | from stable_baselines3.common.callbacks import BaseCallback 6 | from stable_baselines3.common.evaluation import evaluate_policy 7 | 8 | from omegaconf import DictConfig, OmegaConf 9 | import hydra 10 | 11 | 12 | import sys 13 | sys.path.append("..") 14 | from utils.NormalizeActionWrapper import NormalizeActionWrapper 15 | from envs.cassie.cassie import CassieRefEnv 16 | 17 | 18 | 19 | @hydra.main(version_base=None, config_path="../envs/cassie", config_name="config") 20 | def run_play(cfg: DictConfig) -> None: 21 | env = CassieRefEnv(cfg=cfg) 22 | env = NormalizeActionWrapper(env) 23 | model = PPO.load("../log/cassie/2022-07-28-21-03-23/model_saved/ppo_cassie_2099200.zip", env) 24 | mean_reward, std_reward = evaluate_policy(model, model.get_env(), n_eval_episodes=10) 25 | obs = env.reset() 26 | for i in range(10000): 27 | action, _states = model.predict(obs) 28 | obs, rewards, done, info = env.step(action) 29 | 30 | 31 | if __name__ == '__main__': 32 | run_play() -------------------------------------------------------------------------------- /scripts/train_paral.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import torch 3 | from stable_baselines3 import PPO 4 | from stable_baselines3.common.vec_env import SubprocVecEnv 5 | from stable_baselines3.common.callbacks import BaseCallback 6 | import numpy as np 7 | 8 | from omegaconf import DictConfig, OmegaConf 9 | import hydra 10 | 11 | import sys 12 | sys.path.append("..") 13 | from utils.NormalizeActionWrapper import NormalizeActionWrapper 14 | from envs.cassie.cassie import CassieRefEnv 15 | from utils.MakeDirName import get_dir_data_name 16 | 17 | 18 | def make_env(env_id, cfg): 19 | def _f(): 20 | if env_id == 0: 21 | env = CassieRefEnv(cfg=cfg) 22 | env = NormalizeActionWrapper(env) 23 | else: 24 | env = CassieRefEnv(cfg=cfg) 25 | env = NormalizeActionWrapper(env) 26 | return env 27 | return _f 28 | 29 | 30 | @hydra.main(version_base=None, config_path="../envs/cassie", config_name="config") 31 | def run_train(cfg: DictConfig) -> None: 32 | # get log path name 33 | log_dir = cfg['system']['log_path']['dir'] 34 | # make env 35 | envs = [make_env(seed, cfg) for seed in range(cfg['env']['num_envs'])] 36 | envs = SubprocVecEnv(envs) 37 | # define callback function 38 | class TensorboardCallback(BaseCallback): 39 | """ 40 | Custom callback for plotting additional values in tensorboard. 41 | """ 42 | def __init__(self, log_dir, verbose=0): 43 | super(TensorboardCallback, self).__init__(verbose) 44 | self.log_dir = log_dir 45 | 46 | def _on_step(self) -> bool: 47 | self.logger.record('reward/ref', np.mean(self.training_env.get_attr('rew_ref_buf'))) 48 | self.logger.record('reward/spring', np.mean(self.training_env.get_attr('rew_spring_buf'))) 49 | self.logger.record('reward/orientation', np.mean(self.training_env.get_attr('rew_ori_buf'))) 50 | self.logger.record('reward/velocity', np.mean(self.training_env.get_attr('rew_vel_buf'))) 51 | self.logger.record('reward/termination', np.mean(self.training_env.get_attr('rew_termin_buf'))) 52 | self.logger.record('reward/steps', np.mean(self.training_env.get_attr('time_buf'))) 53 | self.logger.record('reward/totalreward', np.mean(self.training_env.get_attr('reward_buf'))) 54 | self.logger.record('reward/omega', np.mean(self.training_env.get_attr('omega_buf'))) 55 | self.logger.record('reward/imit', 2*np.mean(self.training_env.get_attr('rew_ref_buf'))) 56 | self.logger.record('reward/perf', 2*np.mean(self.training_env.get_attr('rew_ori_buf')) 57 | + 2*np.mean(self.training_env.get_attr('rew_vel_buf'))) 58 | 59 | if self.n_calls % 51200 == 0: 60 | print("Saving new best model") 61 | self.model.save(self.log_dir + f"/model_saved/ppo_cassie_{self.n_calls}") 62 | 63 | return True 64 | 65 | # make policy rule 66 | policy_kwargs = dict(activation_fn=torch.nn.ReLU, 67 | net_arch=[dict(pi=[int(i) for i in cfg['trainer']['pi_net_arch']], 68 | vf=[int(i) for i in cfg['trainer']['vf_net_arch']])]) 69 | 70 | model = PPO(cfg['trainer']['policy'], envs, verbose=1, n_steps=cfg['trainer']['n_steps'], 71 | policy_kwargs=policy_kwargs, batch_size=cfg['trainer']['batch_size'], 72 | tensorboard_log=log_dir, device=cfg['trainer']['device']) 73 | model.is_tb_set = False 74 | 75 | model.learn(total_timesteps=int(4e7), n_eval_episodes=cfg['trainer']['n_eval_episodes'], 76 | callback=TensorboardCallback(log_dir=log_dir)) 77 | # model.save("ppo_cassie") 78 | 79 | 80 | if __name__ == '__main__': 81 | run_train() 82 | 83 | 84 | 85 | -------------------------------------------------------------------------------- /scripts/train_single.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import torch 3 | from stable_baselines3 import PPO, SAC 4 | from stable_baselines3.common.vec_env import SubprocVecEnv 5 | from stable_baselines3.common.callbacks import BaseCallback 6 | from stable_baselines3.common.evaluation import evaluate_policy 7 | import numpy as np 8 | 9 | import sys 10 | sys.path.append("..") 11 | from utils.NormalizeActionWrapper import NormalizeActionWrapper 12 | from envs.cassie.cassie import CassieRefEnv 13 | 14 | if __name__ == '__main__': 15 | env = CassieRefEnv(visual=True, dynamics_randomization=False) 16 | env = NormalizeActionWrapper(env) 17 | 18 | 19 | class TensorboardCallback(BaseCallback): 20 | """ 21 | Custom callback for plotting additional values in tensorboard. 22 | """ 23 | 24 | def __init__(self, verbose=0): 25 | super(TensorboardCallback, self).__init__(verbose) 26 | 27 | def _on_step(self) -> bool: 28 | if self.n_calls % 51200 == 0: 29 | print("Saving new best model") 30 | self.model.save(f"../model_saved/ppo_cassie_{self.n_calls}") 31 | 32 | return True 33 | 34 | 35 | policy_kwargs = dict(activation_fn=torch.nn.ReLU, 36 | net_arch=[dict(pi=[512, 512], vf=[512, 512])]) 37 | model = PPO("MlpPolicy", env, verbose=1, n_steps=256, policy_kwargs=policy_kwargs, 38 | batch_size=128, tensorboard_log="../log/") 39 | # model = SAC("MlpPolicy", env, verbose=1, policy_kwargs=policy_kwargs, tensorboard_log="./log/") 40 | model.is_tb_set = False 41 | 42 | model.learn(total_timesteps=4e7, n_eval_episodes=10, callback=TensorboardCallback()) 43 | model.save("ppo_m02l") 44 | -------------------------------------------------------------------------------- /utils/MakeDirName.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | 4 | 5 | def get_dir_rank_name(path): 6 | all_folders = os.listdir(path) 7 | if len(all_folders) == 0: 8 | return path + 'PPO_1' 9 | all_folders.sort() 10 | latest = all_folders[-1].replace('PPO_', '') 11 | return path + 'PPO_' + str(int(latest) + 1) 12 | 13 | 14 | def get_dir_data_name(path): 15 | time_name = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime()) 16 | return path + time_name 17 | 18 | -------------------------------------------------------------------------------- /utils/NormalizeActionWrapper.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import gym 3 | 4 | class NormalizeActionWrapper(gym.Wrapper): 5 | """ 6 | :param env: (gym.Env) Gym environment that will be wrapped 7 | """ 8 | 9 | def __init__(self, env): 10 | action_space = env.action_space 11 | assert isinstance(action_space, 12 | gym.spaces.Box), "This wrapper only works with continuous action space (spaces.Box)" 13 | self.low, self.high = action_space.low, action_space.high 14 | 15 | # 重塑动作空间范围 16 | env.action_space = gym.spaces.Box(low=-1, high=1, shape=action_space.shape, dtype=np.float32) 17 | 18 | super(NormalizeActionWrapper, self).__init__(env) 19 | 20 | def rescale_action(self, scaled_action): 21 | """ 22 | Rescale the action from [-1, 1] to [low, high] 23 | (no need for symmetric action space) 24 | :param scaled_action: (np.ndarray) 25 | :return: (np.ndarray) 26 | """ 27 | return self.low + (0.5 * (scaled_action + 1.0) * (self.high - self.low)) 28 | 29 | def reset(self): 30 | """ 31 | Reset the environment 32 | """ 33 | return self.env.reset() 34 | 35 | def step(self, action): 36 | """ 37 | :param action: ([float] or int) Action taken by the agent 38 | :return: (np.ndarray, float, bool, dict) observation, reward, is the episode over?, additional informations 39 | """ 40 | # 重新把动作从[-1,1]放缩到原本的[low,high] 41 | rescaled_action = self.rescale_action(action) 42 | obs, reward, done, info = self.env.step(rescaled_action) 43 | return obs, reward, done, info 44 | --------------------------------------------------------------------------------