├── .gitignore ├── 5k_test.py ├── LICENSE ├── README.md ├── apex-logo.png ├── apex.py ├── cassie ├── __init__.py ├── cassie.py ├── cassie_footdist_env.py ├── cassie_mininput_env.py ├── cassie_noaccel_footdist_env.py ├── cassie_noaccel_footdist_nojoint_env.py ├── cassie_noaccel_footdist_omniscient.py ├── cassie_novel_footdist_env.py ├── cassie_playground.py ├── cassie_standing_env.py ├── cassie_traj.py ├── cassiemujoco │ ├── WhyteField.png │ ├── __init__.py │ ├── cassie-stl-meshes │ │ ├── achilles-rod.stl │ │ ├── bleachers.stl │ │ ├── foot-crank.stl │ │ ├── foot.stl │ │ ├── heel-spring.stl │ │ ├── hip-pitch.stl │ │ ├── hip-roll.stl │ │ ├── hip-yaw.stl │ │ ├── knee-spring.stl │ │ ├── knee.stl │ │ ├── pelvis.stl │ │ ├── plantar-rod.stl │ │ ├── shin.stl │ │ ├── tarsus.stl │ │ └── terrains │ │ │ ├── bowl.png │ │ │ ├── radial_gradient.png │ │ │ ├── side_hill.png │ │ │ ├── side_slope.png │ │ │ ├── slope.png │ │ │ ├── step_pyramid.png │ │ │ └── terrain_1.png │ ├── cassie.xml │ ├── cassie.xml.orig │ ├── cassieUDP.py │ ├── cassie_crown.xml │ ├── cassie_drop_step.xml │ ├── cassie_hfield.xml │ ├── cassie_muTor.xml │ ├── cassie_noise_terrain.xml │ ├── cassie_playground.xml │ ├── cassie_soft.xml │ ├── cassie_stiff.xml │ ├── cassie_track.xml │ ├── cassie_waypoints.xml │ ├── cassiemujoco.py │ ├── cassiemujoco_ctypes.py │ ├── include │ │ ├── CassieCoreSim.h │ │ ├── PdInput.h │ │ ├── StateOutput.h │ │ ├── cassie_in_t.h │ │ ├── cassie_out_t.h │ │ ├── cassie_user_in_t.h │ │ ├── cassiemujoco.h │ │ ├── pd_in_t.h │ │ ├── state_out_t.h │ │ └── udp.h │ ├── libcassiemujoco.so │ ├── terrain_noise.xml │ ├── terrain_racetrack.xml │ ├── terrain_random_hills.xml │ ├── terrains │ │ ├── crown.png │ │ ├── drop_step.png │ │ ├── hfield.png │ │ ├── hfield2.png │ │ ├── hills.png │ │ ├── noise.png │ │ ├── noise1.npy │ │ ├── noise2.npy │ │ ├── noise3.npy │ │ ├── noisy.png │ │ ├── racetrack1.png │ │ ├── rand_hill1.npy │ │ ├── rand_hill2.npy │ │ ├── rand_hill3.npy │ │ ├── slope.png │ │ ├── utils │ │ │ └── noise_generator.py │ │ └── wavefield.png │ ├── test_terrain_noise.xml │ └── test_terrain_slope.xml ├── deprecated │ ├── aslipik_env.py │ ├── aslipik_unified_env.py │ ├── aslipik_unified_no_delta_env.py │ ├── cassie_env.py │ ├── env_test.py │ ├── ground_friction_env.py │ ├── ik_env.py │ ├── no_delta_env.py │ ├── plotting.py │ ├── slipik_env.py │ ├── speed_double_freq_env.py │ ├── speed_env.py │ ├── speed_freq_env.py │ ├── speed_freq_no_delta_env.py │ ├── speed_no_delta_env.py │ ├── speed_no_delta_neutral_foot_env.py │ └── taskspace_env.py ├── missions │ ├── 90_left │ │ ├── command_trajectory_0.5.pkl │ │ ├── command_trajectory_0.9.pkl │ │ ├── command_trajectory_1.4.pkl │ │ ├── command_trajectory_1.9.pkl │ │ ├── command_trajectory_2.3.pkl │ │ ├── command_trajectory_2.8.pkl │ │ ├── waypoints_0.5.csv │ │ ├── waypoints_0.9.csv │ │ ├── waypoints_1.4.csv │ │ ├── waypoints_1.9.csv │ │ ├── waypoints_2.3.csv │ │ └── waypoints_2.8.csv │ ├── 90_right │ │ ├── command_trajectory_0.5.pkl │ │ ├── command_trajectory_0.9.pkl │ │ ├── command_trajectory_1.4.pkl │ │ ├── command_trajectory_1.9.pkl │ │ ├── command_trajectory_2.3.pkl │ │ ├── command_trajectory_2.8.pkl │ │ ├── waypoints_0.5.csv │ │ ├── waypoints_0.9.csv │ │ ├── waypoints_1.4.csv │ │ ├── waypoints_1.9.csv │ │ ├── waypoints_2.3.csv │ │ └── waypoints_2.8.csv │ ├── __init__.py │ ├── add_waypoints.py │ ├── command_mission.py │ ├── curvy │ │ ├── command_trajectory_0.5.pkl │ │ ├── command_trajectory_0.9.pkl │ │ ├── command_trajectory_1.4.pkl │ │ ├── command_trajectory_1.9.pkl │ │ ├── command_trajectory_2.3.pkl │ │ ├── command_trajectory_2.8.pkl │ │ ├── waypoints_0.5.csv │ │ ├── waypoints_0.9.csv │ │ ├── waypoints_1.4.csv │ │ ├── waypoints_1.9.csv │ │ ├── waypoints_2.3.csv │ │ └── waypoints_2.8.csv │ ├── default │ │ ├── command_trajectory.pkl │ │ └── waypoints.csv │ └── straight │ │ ├── command_trajectory_0.5.pkl │ │ ├── command_trajectory_0.9.pkl │ │ ├── command_trajectory_1.4.pkl │ │ ├── command_trajectory_1.9.pkl │ │ ├── command_trajectory_2.3.pkl │ │ ├── command_trajectory_2.8.pkl │ │ ├── waypoints_0.5.csv │ │ ├── waypoints_0.9.csv │ │ ├── waypoints_1.4.csv │ │ ├── waypoints_1.9.csv │ │ ├── waypoints_2.3.csv │ │ ├── waypoints_2.4.csv │ │ └── waypoints_2.8.csv ├── outfile.npz ├── phase_function.py ├── plotting_ex.py ├── quaternion_function.py ├── rewards │ ├── __init__.py │ ├── aslip_rewards.py │ ├── clock_rewards.py │ ├── command_reward.py │ ├── iros_paper_reward.py │ ├── reward_clock_funcs │ │ ├── incentive_clock_smooth.pkl │ │ ├── incentive_clock_smooth_aerial.pkl │ │ ├── incentive_clock_smooth_zero.pkl │ │ ├── incentive_clock_strict0.1.pkl │ │ ├── incentive_clock_strict0.1_aerial.pkl │ │ ├── incentive_clock_strict0.1_zero.pkl │ │ ├── incentive_clock_strict0.4.pkl │ │ ├── incentive_clock_strict0.4_aerial.pkl │ │ ├── incentive_clock_strict0.4_zero.pkl │ │ ├── no_incentive_aslip_clock_strict0.3.pkl │ │ ├── no_incentive_clock_smooth.pkl │ │ ├── no_incentive_clock_smooth_aerial.pkl │ │ ├── no_incentive_clock_smooth_zero.pkl │ │ ├── no_incentive_clock_strict0.1.pkl │ │ ├── no_incentive_clock_strict0.1_aerial.pkl │ │ ├── no_incentive_clock_strict0.1_zero.pkl │ │ ├── no_incentive_clock_strict0.4.pkl │ │ ├── no_incentive_clock_strict0.4_aerial.pkl │ │ └── no_incentive_clock_strict0.4_zero.pkl │ ├── rnn_dyn_random_reward.py │ ├── side_speedmatch_foottraj_reward.py │ ├── side_speedmatch_heightvel_reward.py │ ├── side_speedmatch_heuristic_reward.py │ ├── side_speedmatch_rewards.py │ ├── side_speedmatch_torquesmooth_reward.py │ ├── speedmatch_footorient_joint_smooth_reward.py │ ├── speedmatch_heuristic_reward.py │ ├── speedmatch_rewards.py │ ├── standing_rewards.py │ └── trajmatch_reward.py └── trajectory │ ├── .DS_Store │ ├── __init__.py │ ├── aslipTrajsTaskSpace │ ├── walkCycle_0.0.pkl │ ├── walkCycle_0.1.pkl │ ├── walkCycle_0.2.pkl │ ├── walkCycle_0.3.pkl │ ├── walkCycle_0.4.pkl │ ├── walkCycle_0.5.pkl │ ├── walkCycle_0.6.pkl │ ├── walkCycle_0.7.pkl │ ├── walkCycle_0.8.pkl │ ├── walkCycle_0.9.pkl │ ├── walkCycle_1.0.pkl │ ├── walkCycle_1.1.pkl │ ├── walkCycle_1.2.pkl │ ├── walkCycle_1.3.pkl │ ├── walkCycle_1.4.pkl │ ├── walkCycle_1.5.pkl │ ├── walkCycle_1.6.pkl │ ├── walkCycle_1.7.pkl │ ├── walkCycle_1.8.pkl │ ├── walkCycle_1.9.pkl │ └── walkCycle_2.0.pkl │ ├── aslip_trajectory.py │ ├── backward_trajectory_Nov │ ├── ikNet_state_dict.pt │ ├── more-poses-trial.bin │ ├── spline_stepping_traj.pkl │ ├── stepdata.bin │ ├── stepping_trajectory_Nov │ ├── test.py │ ├── traj_from_ref_foot_data.pkl │ ├── trajectory.py │ └── walk-in-place-downsampled.bin ├── img ├── output.gif └── output2.gif ├── mirror_policy_check.py ├── plot_policy.py ├── rl ├── __init__.py ├── algos │ ├── __init__.py │ ├── ars.py │ ├── async_td3.py │ ├── dpg.py │ ├── ppo.py │ └── sync_td3.py ├── config │ └── monitor.ini ├── distributions │ ├── __init__.py │ ├── beta.py │ └── gaussian.py ├── envs │ ├── __init__.py │ ├── monitor.py │ ├── normalize.py │ ├── vectorize.py │ ├── wrapper.py │ └── wrappers.py ├── policies │ ├── __init__.py │ ├── actor.py │ ├── base.py │ └── critic.py └── utils │ ├── __init__.py │ ├── param_noise.py │ ├── remote_replay.py │ └── render.py ├── test_policy.py ├── tools ├── .DS_Store ├── aslip_tests │ ├── GRF_2KHz.pkl │ ├── GRF_compare.py │ ├── foot_placement.py │ ├── parallelized.py │ ├── plots │ │ └── footpos_err.png │ └── taskspace_tracking.py ├── cassie_top_white.png ├── command_trajectory.pkl ├── compare_pols.py ├── eval_mission.py ├── eval_perturb.py ├── eval_sensitivity.py ├── test_commands.py ├── test_perturb_eval_phase.npy ├── utils │ ├── __init__.py │ └── elements.py ├── vis_input_and_state.py ├── vis_perturb.py └── waypoint_trajectory.py ├── trained_models ├── 5k_retrain │ ├── actor.pt │ ├── critic.pt │ ├── eval_commands.npy │ ├── eval_perturbs.npy │ ├── experiment.info │ └── experiment.pkl └── nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2 │ ├── 5k_test.pkl │ ├── actor.pt │ ├── critic.pt │ ├── eval_commands.npy │ ├── eval_perturbs.npy │ ├── experiment.info │ └── experiment.pkl └── util ├── env.py ├── eval.py ├── log.py └── logo.py /.gitignore: -------------------------------------------------------------------------------- 1 | cassieXie/ 2 | MUJOCO_LOG.TXT 3 | cassie/cassiemujoco/mjkey.txt 4 | cassie/cassiemujoco/mjpro150/ 5 | cassie/pickled 6 | sim-to-real/ 7 | cassie/cassieIK_SL.pt 8 | cassie/taskspace_to_jointpos.pt 9 | .vscode/ 10 | 11 | # Byte-compiled / optimized / DLL files 12 | __pycache__/ 13 | *.py[cod] 14 | *$py.class 15 | 16 | # C extensions 17 | *.so 18 | !libcassiemujoco.so 19 | 20 | # Distribution / packaging 21 | .Python 22 | build/ 23 | develop-eggs/ 24 | dist/ 25 | downloads/ 26 | eggs/ 27 | .eggs/ 28 | lib/ 29 | lib64/ 30 | parts/ 31 | sdist/ 32 | var/ 33 | wheels/ 34 | *.egg-info/ 35 | .installed.cfg 36 | *.egg 37 | MANIFEST 38 | 39 | # PyInstaller 40 | # Usually these files are written by a python script from a template 41 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 42 | *.manifest 43 | *.spec 44 | 45 | # Installer logs 46 | pip-log.txt 47 | pip-delete-this-directory.txt 48 | 49 | # Unit test / coverage reports 50 | htmlcov/ 51 | .tox/ 52 | .coverage 53 | .coverage.* 54 | .cache 55 | nosetests.xml 56 | coverage.xml 57 | *.cover 58 | .hypothesis/ 59 | 60 | # Translations 61 | *.mo 62 | *.pot 63 | 64 | # Django stuff: 65 | *.log 66 | .static_storage/ 67 | .media/ 68 | local_settings.py 69 | 70 | # Flask stuff: 71 | instance/ 72 | .webassets-cache 73 | 74 | # Scrapy stuff: 75 | .scrapy 76 | 77 | # Sphinx documentation 78 | docs/_build/ 79 | 80 | # PyBuilder 81 | target/ 82 | 83 | # Jupyter Notebook 84 | .ipynb_checkpoints 85 | 86 | # pyenv 87 | .python-version 88 | 89 | # celery beat schedule file 90 | celerybeat-schedule 91 | 92 | # SageMath parsed files 93 | *.sage.py 94 | 95 | # Environments 96 | .env 97 | .venv 98 | env/ 99 | venv/ 100 | ENV/ 101 | env.bak/ 102 | venv.bak/ 103 | 104 | # Spyder project settings 105 | .spyderproject 106 | .spyproject 107 | 108 | # Rope project settings 109 | .ropeproject 110 | 111 | # mkdocs documentation 112 | /site 113 | 114 | # mypy 115 | .mypy_cache/ 116 | 117 | # ray files 118 | /ray_tmp 119 | ray_timeline.json 120 | 121 | # trained models 122 | /trained_models/* 123 | 124 | # logging directory 125 | /logs 126 | ref_qposes.png 127 | 128 | # testing directory files 129 | /tools/aslip_pipeline/testTS_logs/* 130 | /tools/aslip_pipeline/testVaryVel_logs/* 131 | /tools/apex_plots/* 132 | /tools/waypoints.csv 133 | 134 | *.sh.e* 135 | *.sh.o* 136 | 137 | 138 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright (c) 2017 Pedro Autran e Morais 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | apex 2 | 3 | ---- 4 | 5 | Apex is a small, modular library that contains some implementations of continuous reinforcement learning algorithms. Fully compatible with OpenAI gym. 6 | 7 | running1 8 | running2 9 | 10 | ## Running experiments 11 | 12 | ### Basics 13 | Any algorithm can be run from the apex.py entry point. 14 | 15 | To run PPO on a cassie environment, 16 | 17 | ```bash 18 | python apex.py ppo --env_name Cassie-v0 --num_procs 12 --run_name experiment01 19 | ``` 20 | 21 | To run TD3 on the gym environment Walker-v2, 22 | 23 | ```bash 24 | python apex.py td3_async --env_name Walker-v2 --num_procs 12 --run_name experiment02 25 | ``` 26 | 27 | ## Logging details / Monitoring live training progress 28 | Tensorboard logging is enabled by default for all algorithms. The logger expects that you supply an argument named ```logdir```, containing the root directory you want to store your logfiles in, and an argument named ```seed```, which is used to seed the pseudorandom number generators. 29 | 30 | A basic command line script illustrating this is: 31 | 32 | ```bash 33 | python apex.py ars --logdir logs/ars --seed 1337 34 | ``` 35 | 36 | The resulting directory tree would look something like this: 37 | ``` 38 | trained_models/ # directory with all of the saved models and tensorboard logs 39 | └── ars # algorithm name 40 | └── Cassie-v0 # environment name 41 | └── 8b8b12-seed1 # unique run name created with hash of hyperparameters 42 | ├── actor.pt # actor network for algo 43 | ├── critic.pt # critic network for algo 44 | ├── events.out.tfevents # tensorboard binary file 45 | ├── experiment.info # readable hyperparameters for this run 46 | └── experiment.pkl # loadable pickle of hyperparameters 47 | ``` 48 | 49 | Using tensorboard makes it easy to compare experiments and resume training later on. 50 | 51 | To see live training progress 52 | 53 | Run ```$ tensorboard --logdir logs/``` then navigate to ```http://localhost:6006/``` in your browser 54 | 55 | ## Cassie Environments: 56 | * `Cassie-v0` : basic unified environment for walking/running policies 57 | * `CassieTraj-v0` : unified environment with reference trajectories 58 | * `CassiePlayground-v0` : environment for executing autonomous missions 59 | * `CassieStanding-v0` : environment for training standing policies 60 | 61 | ## Algorithms: 62 | #### Currently implemented: 63 | * Parallelism with [Ray](https://github.com/ray-project/ray) 64 | * [GAE](https://arxiv.org/abs/1506.02438)/TD(lambda) estimators 65 | * [PPO](https://arxiv.org/abs/1707.06347), VPG with ratio objective and with log likelihood objective 66 | * [TD3](https://arxiv.org/abs/1802.09477) with [Parameter Noise Exploration](https://arxiv.org/abs/1706.01905) 67 | * [DDPG](https://arxiv.org/abs/1509.02971) 68 | * [RDPG](https://arxiv.org/abs/1512.04455) 69 | * [ARS](https://arxiv.org/abs/1803.07055) 70 | * Entropy based exploration bonus 71 | * advantage centering (observation normalization WIP) 72 | 73 | #### To be implemented long term: 74 | * [SAC](https://arxiv.org/abs/1801.01290) 75 | * [GPO](https://arxiv.org/abs/1711.01012) 76 | * [NAF](https://arxiv.org/abs/1603.00748) 77 | * [SVG](https://arxiv.org/abs/1510.09142) 78 | * [I2A](https://arxiv.org/abs/1707.06203) 79 | * [PGPE](http://ieeexplore.ieee.org/document/5708821/?reload=true) 80 | * [Value Distribution](https://arxiv.org/pdf/1707.06887.pdf) 81 | * Oracle methods (e.g. [GPS](https://arxiv.org/abs/1610.00529)) 82 | * CUDA support (should be trivial but I don't have a GPU to test on currently) 83 | 84 | #### Maybe implemented in future: 85 | 86 | * [DXNN](https://arxiv.org/abs/1008.2412) 87 | * [ACER](https://arxiv.org/abs/1611.01224) and other off-policy methods 88 | * Model-based methods 89 | 90 | ## Acknowledgements 91 | 92 | Thanks to @ikostrikov's whose great implementations were used for debugging. Also thanks to @rll for rllab, which inspired a lot of the high level interface and logging for this library, and to @OpenAI for the original PPO tensorflow implementation. Thanks to @sfujim for the clean implementations of TD3 and DDPG in PyTorch. Thanks @modestyachts for the easy to understand ARS implementation. 93 | -------------------------------------------------------------------------------- /apex-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/apex-logo.png -------------------------------------------------------------------------------- /cassie/__init__.py: -------------------------------------------------------------------------------- 1 | # Unified 2 | from .cassie import CassieEnv 3 | from .cassie_traj import CassieTrajEnv 4 | from .cassie_playground import CassiePlayground 5 | from .cassie_standing_env import CassieStandingEnv # sorta old/unused 6 | 7 | # Proprietary 8 | from .cassie_noaccel_footdist_omniscient import CassieEnv_noaccel_footdist_omniscient 9 | from .cassie_footdist_env import CassieEnv_footdist 10 | from .cassie_noaccel_footdist_env import CassieEnv_noaccel_footdist 11 | from .cassie_noaccel_footdist_nojoint_env import CassieEnv_noaccel_footdist_nojoint 12 | from .cassie_novel_footdist_env import CassieEnv_novel_footdist 13 | from .cassie_mininput_env import CassieEnv_mininput 14 | 15 | # CassieMujocoSim 16 | from .cassiemujoco import * 17 | 18 | 19 | ############## 20 | # DEPRECATED # 21 | ############## 22 | # from .cassie_env import CassieEnv 23 | # from .taskspace_env import CassieTSEnv 24 | # from .aslipik_env import CassieIKEnv 25 | # from .aslipik_unified_env import UnifiedCassieIKEnv 26 | # from .aslipik_unified_no_delta_env import UnifiedCassieIKEnvNoDelta 27 | # from .no_delta_env import CassieEnv_nodelta 28 | # from .dynamics_random import CassieEnv_rand_dyn 29 | # from .speed_double_freq_env import CassieEnv_speed_dfreq 30 | # from .ground_friction_env import CassieGroundFrictionEnv 31 | # from .cassie_standing_env import CassieStandingEnv 32 | -------------------------------------------------------------------------------- /cassie/cassiemujoco/WhyteField.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/WhyteField.png -------------------------------------------------------------------------------- /cassie/cassiemujoco/__init__.py: -------------------------------------------------------------------------------- 1 | from .cassiemujoco import * -------------------------------------------------------------------------------- /cassie/cassiemujoco/cassie-stl-meshes/achilles-rod.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/achilles-rod.stl -------------------------------------------------------------------------------- /cassie/cassiemujoco/cassie-stl-meshes/bleachers.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/bleachers.stl -------------------------------------------------------------------------------- /cassie/cassiemujoco/cassie-stl-meshes/foot-crank.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/foot-crank.stl -------------------------------------------------------------------------------- /cassie/cassiemujoco/cassie-stl-meshes/foot.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/foot.stl -------------------------------------------------------------------------------- /cassie/cassiemujoco/cassie-stl-meshes/heel-spring.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/heel-spring.stl -------------------------------------------------------------------------------- /cassie/cassiemujoco/cassie-stl-meshes/hip-pitch.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/hip-pitch.stl -------------------------------------------------------------------------------- /cassie/cassiemujoco/cassie-stl-meshes/hip-roll.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/hip-roll.stl -------------------------------------------------------------------------------- /cassie/cassiemujoco/cassie-stl-meshes/hip-yaw.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/hip-yaw.stl -------------------------------------------------------------------------------- /cassie/cassiemujoco/cassie-stl-meshes/knee-spring.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/knee-spring.stl -------------------------------------------------------------------------------- /cassie/cassiemujoco/cassie-stl-meshes/knee.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/knee.stl -------------------------------------------------------------------------------- /cassie/cassiemujoco/cassie-stl-meshes/pelvis.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/pelvis.stl -------------------------------------------------------------------------------- /cassie/cassiemujoco/cassie-stl-meshes/plantar-rod.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/plantar-rod.stl -------------------------------------------------------------------------------- /cassie/cassiemujoco/cassie-stl-meshes/shin.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/shin.stl -------------------------------------------------------------------------------- /cassie/cassiemujoco/cassie-stl-meshes/tarsus.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/tarsus.stl -------------------------------------------------------------------------------- /cassie/cassiemujoco/cassie-stl-meshes/terrains/bowl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/terrains/bowl.png -------------------------------------------------------------------------------- /cassie/cassiemujoco/cassie-stl-meshes/terrains/radial_gradient.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/terrains/radial_gradient.png -------------------------------------------------------------------------------- /cassie/cassiemujoco/cassie-stl-meshes/terrains/side_hill.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/terrains/side_hill.png -------------------------------------------------------------------------------- /cassie/cassiemujoco/cassie-stl-meshes/terrains/side_slope.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/terrains/side_slope.png -------------------------------------------------------------------------------- /cassie/cassiemujoco/cassie-stl-meshes/terrains/slope.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/terrains/slope.png -------------------------------------------------------------------------------- /cassie/cassiemujoco/cassie-stl-meshes/terrains/step_pyramid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/terrains/step_pyramid.png -------------------------------------------------------------------------------- /cassie/cassiemujoco/cassie-stl-meshes/terrains/terrain_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/terrains/terrain_1.png -------------------------------------------------------------------------------- /cassie/cassiemujoco/cassieUDP.py: -------------------------------------------------------------------------------- 1 | from .cassiemujoco_ctypes import * 2 | import os 3 | import ctypes 4 | import numpy as np 5 | 6 | class CassieUdp: 7 | def __init__(self, remote_addr='127.0.0.1', remote_port='25000', 8 | local_addr='0.0.0.0', local_port='25001'): 9 | self.sock = udp_init_client(str.encode(remote_addr), 10 | str.encode(remote_port), 11 | str.encode(local_addr), 12 | str.encode(local_port)) 13 | self.packet_header_info = packet_header_info_t() 14 | self.recvlen = 2 + 697 15 | self.sendlen = 2 + 58 16 | self.recvlen_pd = 2 + 493 17 | self.sendlen_pd = 2 + 476 18 | self.recvbuf = (ctypes.c_ubyte * max(self.recvlen, self.recvlen_pd))() 19 | self.sendbuf = (ctypes.c_ubyte * max(self.sendlen, self.sendlen_pd))() 20 | self.inbuf = ctypes.cast(ctypes.byref(self.recvbuf, 2), 21 | ctypes.POINTER(ctypes.c_ubyte)) 22 | self.outbuf = ctypes.cast(ctypes.byref(self.sendbuf, 2), 23 | ctypes.POINTER(ctypes.c_ubyte)) 24 | 25 | def send(self, u): 26 | pack_cassie_user_in_t(u, self.outbuf) 27 | send_packet(self.sock, self.sendbuf, self.sendlen, None, 0) 28 | 29 | def send_pd(self, u): 30 | pack_pd_in_t(u, self.outbuf) 31 | send_packet(self.sock, self.sendbuf, self.sendlen_pd, None, 0) 32 | 33 | def recv_wait(self): 34 | nbytes = -1 35 | while nbytes != self.recvlen: 36 | nbytes = get_newest_packet(self.sock, self.recvbuf, self.recvlen, 37 | None, None) 38 | process_packet_header(self.packet_header_info, 39 | self.recvbuf, self.sendbuf) 40 | cassie_out = cassie_out_t() 41 | unpack_cassie_out_t(self.inbuf, cassie_out) 42 | return cassie_out 43 | 44 | def recv_wait_pd(self): 45 | nbytes = -1 46 | while nbytes != self.recvlen_pd: 47 | nbytes = get_newest_packet(self.sock, self.recvbuf, self.recvlen_pd, 48 | None, None) 49 | process_packet_header(self.packet_header_info, 50 | self.recvbuf, self.sendbuf) 51 | state_out = state_out_t() 52 | unpack_state_out_t(self.inbuf, state_out) 53 | return state_out 54 | 55 | def recv_newest(self): 56 | nbytes = get_newest_packet(self.sock, self.recvbuf, self.recvlen, 57 | None, None) 58 | if nbytes != self.recvlen: 59 | return None 60 | process_packet_header(self.packet_header_info, 61 | self.recvbuf, self.sendbuf) 62 | cassie_out = cassie_out_t() 63 | unpack_cassie_out_t(self.inbuf, cassie_out) 64 | return cassie_out 65 | 66 | def recv_newest_pd(self): 67 | nbytes = get_newest_packet(self.sock, self.recvbuf, self.recvlen_pd, 68 | None, None) 69 | if nbytes != self.recvlen_pd: 70 | return None 71 | process_packet_header(self.packet_header_info, 72 | self.recvbuf, self.sendbuf) 73 | state_out = state_out_t() 74 | unpack_state_out_t(self.inbuf, state_out) 75 | return state_out 76 | 77 | def delay(self): 78 | return ord(self.packet_header_info.delay) 79 | 80 | def seq_num_in_diff(self): 81 | return ord(self.packet_header_info.seq_num_in_diff) 82 | 83 | def __del__(self): 84 | udp_close(self.sock) -------------------------------------------------------------------------------- /cassie/cassiemujoco/include/CassieCoreSim.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Agility Robotics 3 | * 4 | * Permission to use, copy, modify, and distribute this software for any 5 | * purpose with or without fee is hereby granted, provided that the above 6 | * copyright notice and this permission notice appear in all copies. 7 | * 8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 | */ 16 | 17 | #ifndef CASSIECORESIM_H 18 | #define CASSIECORESIM_H 19 | 20 | #include "cassie_user_in_t.h" 21 | #include "cassie_out_t.h" 22 | #include "cassie_in_t.h" 23 | 24 | typedef struct CassieCoreSim CassieCoreSim; 25 | 26 | #ifdef __cplusplus 27 | extern "C" { 28 | #endif 29 | 30 | CassieCoreSim* CassieCoreSim_alloc(void); 31 | void CassieCoreSim_copy(CassieCoreSim *dst, const CassieCoreSim *src); 32 | void CassieCoreSim_free(CassieCoreSim *sys); 33 | void CassieCoreSim_setup(CassieCoreSim *sys); 34 | void CassieCoreSim_step(CassieCoreSim *sys, const cassie_user_in_t *in1, 35 | const cassie_out_t *in2, cassie_in_t *out1); 36 | 37 | #ifdef __cplusplus 38 | } 39 | #endif 40 | #endif // CASSIECORESIM_H 41 | -------------------------------------------------------------------------------- /cassie/cassiemujoco/include/PdInput.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Agility Robotics 3 | * 4 | * Permission to use, copy, modify, and distribute this software for any 5 | * purpose with or without fee is hereby granted, provided that the above 6 | * copyright notice and this permission notice appear in all copies. 7 | * 8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 | */ 16 | 17 | #ifndef PDINPUT_H 18 | #define PDINPUT_H 19 | 20 | #include "pd_in_t.h" 21 | #include "cassie_out_t.h" 22 | #include "cassie_user_in_t.h" 23 | 24 | typedef struct PdInput PdInput; 25 | 26 | #ifdef __cplusplus 27 | extern "C" { 28 | #endif 29 | 30 | PdInput* PdInput_alloc(void); 31 | void PdInput_copy(PdInput *dst, const PdInput *src); 32 | void PdInput_free(PdInput *sys); 33 | void PdInput_setup(PdInput *sys); 34 | void PdInput_step(PdInput *sys, const pd_in_t *in1, const cassie_out_t 35 | *in2, cassie_user_in_t *out1); 36 | 37 | #ifdef __cplusplus 38 | } 39 | #endif 40 | #endif // PDINPUT_H 41 | -------------------------------------------------------------------------------- /cassie/cassiemujoco/include/StateOutput.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Agility Robotics 3 | * 4 | * Permission to use, copy, modify, and distribute this software for any 5 | * purpose with or without fee is hereby granted, provided that the above 6 | * copyright notice and this permission notice appear in all copies. 7 | * 8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 | */ 16 | 17 | #ifndef STATEOUTPUT_H 18 | #define STATEOUTPUT_H 19 | 20 | #include "cassie_out_t.h" 21 | #include "state_out_t.h" 22 | 23 | typedef struct StateOutput StateOutput; 24 | 25 | #ifdef __cplusplus 26 | extern "C" { 27 | #endif 28 | 29 | StateOutput* StateOutput_alloc(void); 30 | void StateOutput_copy(StateOutput *dst, const StateOutput *src); 31 | void StateOutput_free(StateOutput *sys); 32 | void StateOutput_setup(StateOutput *sys); 33 | void StateOutput_step(StateOutput *sys, const cassie_out_t *in1, 34 | state_out_t *out1); 35 | 36 | #ifdef __cplusplus 37 | } 38 | #endif 39 | #endif // STATEOUTPUT_H 40 | -------------------------------------------------------------------------------- /cassie/cassiemujoco/include/cassie_in_t.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Agility Robotics 3 | * 4 | * Permission to use, copy, modify, and distribute this software for any 5 | * purpose with or without fee is hereby granted, provided that the above 6 | * copyright notice and this permission notice appear in all copies. 7 | * 8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 | */ 16 | 17 | #ifndef CASSIE_IN_T_H 18 | #define CASSIE_IN_T_H 19 | 20 | #define CASSIE_IN_T_PACKED_LEN 91 21 | 22 | #include 23 | 24 | typedef struct { 25 | unsigned short controlWord; 26 | double torque; 27 | } elmo_in_t; 28 | 29 | typedef struct { 30 | elmo_in_t hipRollDrive; 31 | elmo_in_t hipYawDrive; 32 | elmo_in_t hipPitchDrive; 33 | elmo_in_t kneeDrive; 34 | elmo_in_t footDrive; 35 | } cassie_leg_in_t; 36 | 37 | typedef struct { 38 | short channel[14]; 39 | } radio_in_t; 40 | 41 | typedef struct { 42 | radio_in_t radio; 43 | bool sto; 44 | bool piezoState; 45 | unsigned char piezoTone; 46 | } cassie_pelvis_in_t; 47 | 48 | typedef struct { 49 | cassie_pelvis_in_t pelvis; 50 | cassie_leg_in_t leftLeg; 51 | cassie_leg_in_t rightLeg; 52 | } cassie_in_t; 53 | 54 | 55 | #ifdef __cplusplus 56 | extern "C" { 57 | #endif 58 | 59 | void pack_cassie_in_t(const cassie_in_t *bus, unsigned char *bytes); 60 | void unpack_cassie_in_t(const unsigned char *bytes, cassie_in_t *bus); 61 | 62 | #ifdef __cplusplus 63 | } 64 | #endif 65 | #endif // CASSIE_IN_T_H 66 | -------------------------------------------------------------------------------- /cassie/cassiemujoco/include/cassie_out_t.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Agility Robotics 3 | * 4 | * Permission to use, copy, modify, and distribute this software for any 5 | * purpose with or without fee is hereby granted, provided that the above 6 | * copyright notice and this permission notice appear in all copies. 7 | * 8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 | */ 16 | 17 | #ifndef CASSIE_OUT_T_H 18 | #define CASSIE_OUT_T_H 19 | 20 | #define CASSIE_OUT_T_PACKED_LEN 697 21 | 22 | #include 23 | 24 | typedef short DiagnosticCodes; 25 | 26 | 27 | typedef struct { 28 | bool dataGood; 29 | double stateOfCharge; 30 | double voltage[12]; 31 | double current; 32 | double temperature[4]; 33 | } battery_out_t; 34 | 35 | typedef struct { 36 | double position; 37 | double velocity; 38 | } cassie_joint_out_t; 39 | 40 | typedef struct { 41 | unsigned short statusWord; 42 | double position; 43 | double velocity; 44 | double torque; 45 | double driveTemperature; 46 | double dcLinkVoltage; 47 | double torqueLimit; 48 | double gearRatio; 49 | } elmo_out_t; 50 | 51 | typedef struct { 52 | elmo_out_t hipRollDrive; 53 | elmo_out_t hipYawDrive; 54 | elmo_out_t hipPitchDrive; 55 | elmo_out_t kneeDrive; 56 | elmo_out_t footDrive; 57 | cassie_joint_out_t shinJoint; 58 | cassie_joint_out_t tarsusJoint; 59 | cassie_joint_out_t footJoint; 60 | unsigned char medullaCounter; 61 | unsigned short medullaCpuLoad; 62 | bool reedSwitchState; 63 | } cassie_leg_out_t; 64 | 65 | typedef struct { 66 | bool radioReceiverSignalGood; 67 | bool receiverMedullaSignalGood; 68 | double channel[16]; 69 | } radio_out_t; 70 | 71 | typedef struct { 72 | int etherCatStatus[6]; 73 | int etherCatNotifications[21]; 74 | double taskExecutionTime; 75 | unsigned int overloadCounter; 76 | double cpuTemperature; 77 | } target_pc_out_t; 78 | 79 | typedef struct { 80 | bool dataGood; 81 | unsigned short vpeStatus; 82 | double pressure; 83 | double temperature; 84 | double magneticField[3]; 85 | double angularVelocity[3]; 86 | double linearAcceleration[3]; 87 | double orientation[4]; 88 | } vectornav_out_t; 89 | 90 | typedef struct { 91 | target_pc_out_t targetPc; 92 | battery_out_t battery; 93 | radio_out_t radio; 94 | vectornav_out_t vectorNav; 95 | unsigned char medullaCounter; 96 | unsigned short medullaCpuLoad; 97 | bool bleederState; 98 | bool leftReedSwitchState; 99 | bool rightReedSwitchState; 100 | double vtmTemperature; 101 | } cassie_pelvis_out_t; 102 | 103 | typedef struct { 104 | cassie_pelvis_out_t pelvis; 105 | cassie_leg_out_t leftLeg; 106 | cassie_leg_out_t rightLeg; 107 | bool isCalibrated; 108 | DiagnosticCodes messages[4]; 109 | } cassie_out_t; 110 | 111 | #define EMPTY ((DiagnosticCodes)0) 112 | #define LEFT_HIP_NOT_CALIB ((DiagnosticCodes)5) 113 | #define LEFT_KNEE_NOT_CALIB ((DiagnosticCodes)6) 114 | #define RIGHT_HIP_NOT_CALIB ((DiagnosticCodes)7) 115 | #define RIGHT_KNEE_NOT_CALIB ((DiagnosticCodes)8) 116 | #define LOW_BATTERY_CHARGE ((DiagnosticCodes)200) 117 | #define HIGH_CPU_TEMP ((DiagnosticCodes)205) 118 | #define HIGH_VTM_TEMP ((DiagnosticCodes)210) 119 | #define HIGH_ELMO_DRIVE_TEMP ((DiagnosticCodes)215) 120 | #define HIGH_STATOR_TEMP ((DiagnosticCodes)220) 121 | #define LOW_ELMO_LINK_VOLTAGE ((DiagnosticCodes)221) 122 | #define HIGH_BATTERY_TEMP ((DiagnosticCodes)225) 123 | #define RADIO_DATA_BAD ((DiagnosticCodes)230) 124 | #define RADIO_SIGNAL_BAD ((DiagnosticCodes)231) 125 | #define BMS_DATA_BAD ((DiagnosticCodes)235) 126 | #define VECTORNAV_DATA_BAD ((DiagnosticCodes)236) 127 | #define VPE_GYRO_SATURATION ((DiagnosticCodes)240) 128 | #define VPE_MAG_SATURATION ((DiagnosticCodes)241) 129 | #define VPE_ACC_SATURATION ((DiagnosticCodes)242) 130 | #define VPE_ATTITUDE_BAD ((DiagnosticCodes)245) 131 | #define VPE_ATTITUDE_NOT_TRACKING ((DiagnosticCodes)246) 132 | #define ETHERCAT_DC_ERROR ((DiagnosticCodes)400) 133 | #define ETHERCAT_ERROR ((DiagnosticCodes)410) 134 | #define LOAD_CALIB_DATA_ERROR ((DiagnosticCodes)590) 135 | #define CRITICAL_BATTERY_CHARGE ((DiagnosticCodes)600) 136 | #define CRITICAL_CPU_TEMP ((DiagnosticCodes)605) 137 | #define CRITICAL_VTM_TEMP ((DiagnosticCodes)610) 138 | #define CRITICAL_ELMO_DRIVE_TEMP ((DiagnosticCodes)615) 139 | #define CRITICAL_STATOR_TEMP ((DiagnosticCodes)620) 140 | #define CRITICAL_BATTERY_TEMP ((DiagnosticCodes)625) 141 | #define TORQUE_LIMIT_REACHED ((DiagnosticCodes)630) 142 | #define JOINT_LIMIT_REACHED ((DiagnosticCodes)635) 143 | #define ENCODER_FAILURE ((DiagnosticCodes)640) 144 | #define SPRING_FAILURE ((DiagnosticCodes)645) 145 | #define LEFT_LEG_MEDULLA_HANG ((DiagnosticCodes)700) 146 | #define RIGHT_LEG_MEDULLA_HANG ((DiagnosticCodes)701) 147 | #define PELVIS_MEDULLA_HANG ((DiagnosticCodes)703) 148 | #define CPU_OVERLOAD ((DiagnosticCodes)704) 149 | 150 | #ifdef __cplusplus 151 | extern "C" { 152 | #endif 153 | 154 | void pack_cassie_out_t(const cassie_out_t *bus, unsigned char *bytes); 155 | void unpack_cassie_out_t(const unsigned char *bytes, cassie_out_t *bus); 156 | 157 | #ifdef __cplusplus 158 | } 159 | #endif 160 | #endif // CASSIE_OUT_T_H 161 | -------------------------------------------------------------------------------- /cassie/cassiemujoco/include/cassie_user_in_t.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Agility Robotics 3 | * 4 | * Permission to use, copy, modify, and distribute this software for any 5 | * purpose with or without fee is hereby granted, provided that the above 6 | * copyright notice and this permission notice appear in all copies. 7 | * 8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 | */ 16 | 17 | #ifndef CASSIE_USER_IN_T_H 18 | #define CASSIE_USER_IN_T_H 19 | 20 | #define CASSIE_USER_IN_T_PACKED_LEN 58 21 | 22 | #include 23 | 24 | typedef struct { 25 | double torque[10]; 26 | short telemetry[9]; 27 | } cassie_user_in_t; 28 | 29 | 30 | #ifdef __cplusplus 31 | extern "C" { 32 | #endif 33 | 34 | void pack_cassie_user_in_t(const cassie_user_in_t *bus, unsigned char *bytes); 35 | void unpack_cassie_user_in_t(const unsigned char *bytes, cassie_user_in_t *bus); 36 | 37 | #ifdef __cplusplus 38 | } 39 | #endif 40 | #endif // CASSIE_USER_IN_T_H 41 | -------------------------------------------------------------------------------- /cassie/cassiemujoco/include/pd_in_t.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Agility Robotics 3 | * 4 | * Permission to use, copy, modify, and distribute this software for any 5 | * purpose with or without fee is hereby granted, provided that the above 6 | * copyright notice and this permission notice appear in all copies. 7 | * 8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 | */ 16 | 17 | #ifndef PD_IN_T_H 18 | #define PD_IN_T_H 19 | 20 | #define PD_IN_T_PACKED_LEN 476 21 | 22 | #include 23 | 24 | typedef struct { 25 | double torque[5]; 26 | double pTarget[5]; 27 | double dTarget[5]; 28 | double pGain[5]; 29 | double dGain[5]; 30 | } pd_motor_in_t; 31 | 32 | typedef struct { 33 | double torque[6]; 34 | double pTarget[6]; 35 | double dTarget[6]; 36 | double pGain[6]; 37 | double dGain[6]; 38 | } pd_task_in_t; 39 | 40 | typedef struct { 41 | pd_task_in_t taskPd; 42 | pd_motor_in_t motorPd; 43 | } pd_leg_in_t; 44 | 45 | typedef struct { 46 | pd_leg_in_t leftLeg; 47 | pd_leg_in_t rightLeg; 48 | double telemetry[9]; 49 | } pd_in_t; 50 | 51 | 52 | #ifdef __cplusplus 53 | extern "C" { 54 | #endif 55 | 56 | void pack_pd_in_t(const pd_in_t *bus, unsigned char *bytes); 57 | void unpack_pd_in_t(const unsigned char *bytes, pd_in_t *bus); 58 | 59 | #ifdef __cplusplus 60 | } 61 | #endif 62 | #endif // PD_IN_T_H 63 | -------------------------------------------------------------------------------- /cassie/cassiemujoco/include/state_out_t.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Agility Robotics 3 | * 4 | * Permission to use, copy, modify, and distribute this software for any 5 | * purpose with or without fee is hereby granted, provided that the above 6 | * copyright notice and this permission notice appear in all copies. 7 | * 8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 | */ 16 | 17 | #ifndef STATE_OUT_T_H 18 | #define STATE_OUT_T_H 19 | 20 | #define STATE_OUT_T_PACKED_LEN 493 21 | 22 | #include 23 | 24 | typedef struct { 25 | double stateOfCharge; 26 | double current; 27 | } state_battery_out_t; 28 | 29 | typedef struct { 30 | double position[3]; 31 | double orientation[4]; 32 | double footRotationalVelocity[3]; 33 | double footTranslationalVelocity[3]; 34 | double toeForce[3]; 35 | double heelForce[3]; 36 | } state_foot_out_t; 37 | 38 | typedef struct { 39 | double position[6]; 40 | double velocity[6]; 41 | } state_joint_out_t; 42 | 43 | typedef struct { 44 | double position[10]; 45 | double velocity[10]; 46 | double torque[10]; 47 | } state_motor_out_t; 48 | 49 | typedef struct { 50 | double position[3]; 51 | double orientation[4]; 52 | double rotationalVelocity[3]; 53 | double translationalVelocity[3]; 54 | double translationalAcceleration[3]; 55 | double externalMoment[3]; 56 | double externalForce[3]; 57 | } state_pelvis_out_t; 58 | 59 | typedef struct { 60 | double channel[16]; 61 | bool signalGood; 62 | } state_radio_out_t; 63 | 64 | typedef struct { 65 | double height; 66 | double slope[2]; 67 | } state_terrain_out_t; 68 | 69 | typedef struct { 70 | state_pelvis_out_t pelvis; 71 | state_foot_out_t leftFoot; 72 | state_foot_out_t rightFoot; 73 | state_terrain_out_t terrain; 74 | state_motor_out_t motor; 75 | state_joint_out_t joint; 76 | state_radio_out_t radio; 77 | state_battery_out_t battery; 78 | } state_out_t; 79 | 80 | 81 | #ifdef __cplusplus 82 | extern "C" { 83 | #endif 84 | 85 | void pack_state_out_t(const state_out_t *bus, unsigned char *bytes); 86 | void unpack_state_out_t(const unsigned char *bytes, state_out_t *bus); 87 | 88 | #ifdef __cplusplus 89 | } 90 | #endif 91 | #endif // STATE_OUT_T_H 92 | -------------------------------------------------------------------------------- /cassie/cassiemujoco/include/udp.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Agility Robotics 3 | * 4 | * Permission to use, copy, modify, and distribute this software for any 5 | * purpose with or without fee is hereby granted, provided that the above 6 | * copyright notice and this permission notice appear in all copies. 7 | * 8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 | */ 16 | 17 | #ifndef UDP_H 18 | #define UDP_H 19 | 20 | #define PACKET_HEADER_LEN 2 21 | 22 | // Data and results for processing packet header 23 | typedef struct { 24 | char seq_num_out; 25 | char seq_num_in_last; 26 | char delay; 27 | char seq_num_in_diff; 28 | } packet_header_info_t; 29 | 30 | 31 | // Process packet header used to measure delay and skipped packets 32 | void process_packet_header(packet_header_info_t *info, 33 | const unsigned char *header_in, 34 | unsigned char *header_out); 35 | 36 | #ifndef _WIN32 37 | #include 38 | 39 | // Create a UDP socket listening at a specific address/port 40 | int udp_init_host(const char *addr_str, const char *port_str); 41 | 42 | // Create a UDP socket connected and listening to specific addresses/ports 43 | int udp_init_client(const char *remote_addr_str, const char *remote_port_str, 44 | const char *local_addr_str, const char *local_port_str); 45 | 46 | // Close a UDP socket 47 | void udp_close(int sock); 48 | 49 | // Get newest valid packet in RX buffer 50 | ssize_t get_newest_packet(int sock, void *recvbuf, size_t recvlen, 51 | struct sockaddr *src_addr, socklen_t *addrlen); 52 | 53 | // Wait for a new valid packet 54 | ssize_t wait_for_packet(int sock, void *recvbuf, size_t recvlen, 55 | struct sockaddr *src_addr, socklen_t *addrlen); 56 | 57 | // Send a packet 58 | ssize_t send_packet(int sock, void *sendbuf, size_t sendlen, 59 | struct sockaddr *dst_addr, socklen_t addrlen); 60 | 61 | #endif // _WIN32 62 | #endif // UDP_H 63 | -------------------------------------------------------------------------------- /cassie/cassiemujoco/libcassiemujoco.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/libcassiemujoco.so -------------------------------------------------------------------------------- /cassie/cassiemujoco/terrains/crown.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/terrains/crown.png -------------------------------------------------------------------------------- /cassie/cassiemujoco/terrains/drop_step.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/terrains/drop_step.png -------------------------------------------------------------------------------- /cassie/cassiemujoco/terrains/hfield.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/terrains/hfield.png -------------------------------------------------------------------------------- /cassie/cassiemujoco/terrains/hfield2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/terrains/hfield2.png -------------------------------------------------------------------------------- /cassie/cassiemujoco/terrains/hills.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/terrains/hills.png -------------------------------------------------------------------------------- /cassie/cassiemujoco/terrains/noise.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/terrains/noise.png -------------------------------------------------------------------------------- /cassie/cassiemujoco/terrains/noise1.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/terrains/noise1.npy -------------------------------------------------------------------------------- /cassie/cassiemujoco/terrains/noise2.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/terrains/noise2.npy -------------------------------------------------------------------------------- /cassie/cassiemujoco/terrains/noise3.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/terrains/noise3.npy -------------------------------------------------------------------------------- /cassie/cassiemujoco/terrains/noisy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/terrains/noisy.png -------------------------------------------------------------------------------- /cassie/cassiemujoco/terrains/racetrack1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/terrains/racetrack1.png -------------------------------------------------------------------------------- /cassie/cassiemujoco/terrains/rand_hill1.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/terrains/rand_hill1.npy -------------------------------------------------------------------------------- /cassie/cassiemujoco/terrains/rand_hill2.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/terrains/rand_hill2.npy -------------------------------------------------------------------------------- /cassie/cassiemujoco/terrains/rand_hill3.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/terrains/rand_hill3.npy -------------------------------------------------------------------------------- /cassie/cassiemujoco/terrains/slope.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/terrains/slope.png -------------------------------------------------------------------------------- /cassie/cassiemujoco/terrains/utils/noise_generator.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | if __name__ == '__main__': 6 | parser = argparse.ArgumentParser(description='PNG Noise Generator for MuJoCo height fields') 7 | parser.add_argument('--filename', '-f', action='store', default='noise', 8 | help='Name of file output. ' 9 | 'File will be saved as a PNG file outside of the folder this is located in' 10 | '(usage: -f )') 11 | parser.add_argument('--dimension', '-d', type=int, nargs='+', default=(32, 32), 12 | help='Size of the 2D array (usage: -d )') 13 | parser.add_argument('--granularity', '-g', type=int, default=100, 14 | help='How fine or course the noise is. ' 15 | 'The larger the number, the finer the noise (usage: -g )') 16 | parser.add_argument('--start_size', '-s', type=int, default=2, 17 | help='The middle of the map will be always flat for starting.' 18 | 'Choose how big this block size will be (usage: -s )') 19 | parser.add_argument('--seed', type=int, default=None, 20 | help='Set seed for reproducible maps (usage: --seed )') 21 | 22 | args = parser.parse_args() 23 | 24 | if args.seed: 25 | np.random.seed(args.seed) 26 | 27 | midpoint = (int(args.dimension[0] / 2), int(args.dimension[1] / 2)) 28 | 29 | # build noisy array 30 | terrain = np.random.randint(args.granularity, size=args.dimension) 31 | 32 | terrain[midpoint[0] - args.start_size:midpoint[0] + args.start_size, 33 | midpoint[1] - args.start_size:midpoint[1] + args.start_size] = 0 34 | 35 | # save as png file 36 | plt.imsave('../{}.png'.format(args.filename), terrain, cmap='gray') 37 | -------------------------------------------------------------------------------- /cassie/cassiemujoco/terrains/wavefield.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/terrains/wavefield.png -------------------------------------------------------------------------------- /cassie/deprecated/env_test.py: -------------------------------------------------------------------------------- 1 | # import numpy as np 2 | 3 | # from cassie_env import CassieEnv 4 | 5 | # from mujoco.cassiemujoco import * 6 | # from trajectory.trajectory import CassieTrajectory 7 | 8 | 9 | # traj = CassieTrajectory("trajectory/stepdata.bin") 10 | 11 | 12 | # env = CassieEnv("trajectory/stepdata.bin") 13 | # csim = CassieSim() 14 | 15 | # u = pd_in_t() 16 | 17 | # test actual trajectory 18 | 19 | # for i in range(len(traj.qpos)): 20 | # qpos = traj.qpos[i] 21 | # qvel = traj.qvel[i] 22 | 23 | # csim.set_qpos(qpos) 24 | # csim.set_qvel(qvel) 25 | 26 | # y = csim.step_pd(u) 27 | 28 | # cvis.draw(csim) 29 | 30 | # print(i, end='\r') 31 | 32 | 33 | # test trajectory wrap-around 34 | 35 | # env.render() 36 | # env.reset() 37 | 38 | # u = pd_in_t() 39 | # while True: 40 | # # start = t.time() 41 | # # while True: 42 | # # stop = t.time() 43 | # # #print(stop-start) 44 | # # #print("stop") 45 | # # if stop - start > 0.033: 46 | # # break 47 | 48 | # pos, vel = env.get_ref_state() 49 | 50 | # '''env.phase = env.phase + 14 51 | # pos2, vel2 = env.get_kin_state() 52 | # print(pos[7:21]-pos2[21:35]) 53 | # env.phase = env.phase - 14''' 54 | 55 | # env.phase += 1 56 | # # #print(env.speed) 57 | # if env.phase >= 28: 58 | # env.phase = 0 59 | # env.counter += 1 60 | # #break 61 | # env.sim.set_qpos(pos) 62 | # env.sim.set_qvel(vel) 63 | # y = env.sim.step_pd(u) 64 | # env.render() -------------------------------------------------------------------------------- /cassie/deprecated/plotting.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from tempfile import TemporaryFile 4 | 5 | # from cassie_env import CassieEnv 6 | from trajectory.trajectory import CassieTrajectory 7 | #from mujoco.cassiemujoco import * 8 | import time as t 9 | traj = CassieTrajectory("/home/robot/Desktop/apex/cassie/trajectory/stepdata.bin") 10 | # env = CassieEnv("walking") 11 | 12 | qpos_traj = traj.qpos 13 | time_traj = traj.time 14 | 15 | tt = traj.time 16 | #u = pd_in_t() 17 | 18 | # load your data 19 | data = np.load('cassie/outfile.npz') 20 | motor = data['motor'] 21 | joint = data['joint'] 22 | qpos = data['qpos_replay'] 23 | time = data['time'] 24 | 25 | delt_t = time[4] - time[3] 26 | delt_t_traj = time_traj[4] - time_traj[3] 27 | same_time = delt_t / delt_t_traj 28 | time_traj = time_traj * same_time 29 | 30 | #time = time * (60/2000) 31 | numStates = len(qpos) 32 | 33 | # np.savetxt("test_arr.txt", qpos[0:1000, 34]) 34 | print("Made it") 35 | # test actual trajectory 36 | 37 | rand = np.random.randint(1, 101, 1000) 38 | 39 | #log data 40 | plt.subplot(2,2,1) 41 | plt.plot(time[0:500], motor[0:500,4], 'r') 42 | plt.plot(time[0:500], motor[0:500, 9], 'k') 43 | 44 | ax2 = plt.subplot(2,2,2) 45 | ax2.plot(time[1200:1300], joint[1200:1300,2], 'r') 46 | ax2.plot(time[1200:1300], joint[1200:1300, 5], 'k') 47 | 48 | ax3 = plt.subplot(2,2,3) 49 | ax3.plot(time[1200:1300], qpos[1200:1300,20], 'r') 50 | ax3.plot(time[1200:1300], qpos[1200:1300, 34], 'k') 51 | 52 | ax2.get_shared_x_axes().join(ax2, ax3) 53 | ax2.set_xticklabels([]) 54 | 55 | 56 | #trajectory data 57 | plt.subplot(2,2,4) 58 | plt.plot(time_traj[:], qpos_traj[:,20], 'r') 59 | plt.plot(time_traj[:], qpos_traj[:, 34], 'k') 60 | plt.show() 61 | 62 | #trajectory data 63 | 64 | plt.plot(tt[:], qpos_traj[:,32] + qpos_traj[:, 33], 'r') 65 | # plt.plot(tt[:], qpos_traj[:,19], 'b') 66 | # plt.plot(tt[:], qpos_traj[:, 20], 'k') 67 | plt.show() -------------------------------------------------------------------------------- /cassie/missions/90_left/command_trajectory_0.5.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/90_left/command_trajectory_0.5.pkl -------------------------------------------------------------------------------- /cassie/missions/90_left/command_trajectory_0.9.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/90_left/command_trajectory_0.9.pkl -------------------------------------------------------------------------------- /cassie/missions/90_left/command_trajectory_1.4.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/90_left/command_trajectory_1.4.pkl -------------------------------------------------------------------------------- /cassie/missions/90_left/command_trajectory_1.9.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/90_left/command_trajectory_1.9.pkl -------------------------------------------------------------------------------- /cassie/missions/90_left/command_trajectory_2.3.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/90_left/command_trajectory_2.3.pkl -------------------------------------------------------------------------------- /cassie/missions/90_left/command_trajectory_2.8.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/90_left/command_trajectory_2.8.pkl -------------------------------------------------------------------------------- /cassie/missions/90_right/command_trajectory_0.5.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/90_right/command_trajectory_0.5.pkl -------------------------------------------------------------------------------- /cassie/missions/90_right/command_trajectory_0.9.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/90_right/command_trajectory_0.9.pkl -------------------------------------------------------------------------------- /cassie/missions/90_right/command_trajectory_1.4.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/90_right/command_trajectory_1.4.pkl -------------------------------------------------------------------------------- /cassie/missions/90_right/command_trajectory_1.9.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/90_right/command_trajectory_1.9.pkl -------------------------------------------------------------------------------- /cassie/missions/90_right/command_trajectory_2.3.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/90_right/command_trajectory_2.3.pkl -------------------------------------------------------------------------------- /cassie/missions/90_right/command_trajectory_2.8.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/90_right/command_trajectory_2.8.pkl -------------------------------------------------------------------------------- /cassie/missions/__init__.py: -------------------------------------------------------------------------------- 1 | # We use this directory for storing missions -- high-level commands to policies 2 | 3 | from .command_mission import * 4 | from .add_waypoints import add_waypoints -------------------------------------------------------------------------------- /cassie/missions/add_waypoints.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | import sys 5 | import argparse 6 | import pandas as pd 7 | import lxml.etree as ET 8 | 9 | def read_xml(file): 10 | return ET.parse(file, ET.XMLParser(remove_blank_text=True)) 11 | 12 | space=10 13 | color='1 0.9 0 0.7' 14 | 15 | 16 | def add_waypoints(input_file, output_file, waypoints_file): 17 | 18 | try: 19 | # create trajectory data frame 20 | traj_df = pd.read_csv(waypoints_file, header=None, usecols=[0, 1], names=['X', 'Y']) 21 | 22 | # read xml file 23 | tree = read_xml(input_file) 24 | 25 | except TypeError: 26 | if not input_file: 27 | print('No XML file provided...\n') 28 | else: 29 | print(str(input_file) + ' not found. Check XML file path.') 30 | sys.exit(0) 31 | 32 | # get root of xml tree 33 | root = tree.getroot() 34 | 35 | # get worldbody subelement from root 36 | worldbody = root.find('worldbody') 37 | 38 | for idx, pos in enumerate(traj_df.values[20::int(space)], start=1): 39 | # create a waypoint subelement 40 | ET.SubElement(worldbody, 'geom', {'name': 'waypoint{}'.format(idx), 41 | 'pos': '{} {} 1.01 '.format(pos[0], pos[1]), 42 | 'size': '0.03 0.03 0.03', 43 | 'type': 'sphere', 44 | 'contype': '0', 45 | 'rgba': color}) 46 | 47 | # add to root 48 | tree.write(output_file, encoding='utf-8', pretty_print=True, xml_declaration=True) 49 | 50 | 51 | if __name__ == "__main__": 52 | add_waypoints("default") 53 | -------------------------------------------------------------------------------- /cassie/missions/command_mission.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import numpy as np 3 | import os 4 | 5 | class CommandTrajectory: 6 | def __init__(self, mission_name): 7 | mission_path = os.path.join(mission_name, "command_trajectory.pkl") 8 | with open(mission_path, "rb") as f: 9 | trajectory = pickle.load(f) 10 | 11 | self.global_pos = np.copy(trajectory["compos"]) 12 | self.speed_cmd = np.copy(trajectory["speed"]) 13 | 14 | # NOTE: still need to rotate translational velocity and accleration 15 | self.orient = np.copy(trajectory["orient"]) 16 | self.prev_orient = 0 17 | 18 | self.trajlen = len(self.speed_cmd) 19 | 20 | # print("positions:\n{}\n\nvelocities:\n{}\n\norient:\n{}\n".format(self.global_pos[:5], self.speed_cmd[:5], self.orient[:5])) 21 | # print(self.speed_cmd.shape) 22 | # print(self.orient.shape) 23 | # print(np.max(self.speed_cmd)) 24 | # input() -------------------------------------------------------------------------------- /cassie/missions/curvy/command_trajectory_0.5.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/curvy/command_trajectory_0.5.pkl -------------------------------------------------------------------------------- /cassie/missions/curvy/command_trajectory_0.9.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/curvy/command_trajectory_0.9.pkl -------------------------------------------------------------------------------- /cassie/missions/curvy/command_trajectory_1.4.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/curvy/command_trajectory_1.4.pkl -------------------------------------------------------------------------------- /cassie/missions/curvy/command_trajectory_1.9.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/curvy/command_trajectory_1.9.pkl -------------------------------------------------------------------------------- /cassie/missions/curvy/command_trajectory_2.3.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/curvy/command_trajectory_2.3.pkl -------------------------------------------------------------------------------- /cassie/missions/curvy/command_trajectory_2.8.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/curvy/command_trajectory_2.8.pkl -------------------------------------------------------------------------------- /cassie/missions/default/command_trajectory.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/default/command_trajectory.pkl -------------------------------------------------------------------------------- /cassie/missions/straight/command_trajectory_0.5.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/straight/command_trajectory_0.5.pkl -------------------------------------------------------------------------------- /cassie/missions/straight/command_trajectory_0.9.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/straight/command_trajectory_0.9.pkl -------------------------------------------------------------------------------- /cassie/missions/straight/command_trajectory_1.4.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/straight/command_trajectory_1.4.pkl -------------------------------------------------------------------------------- /cassie/missions/straight/command_trajectory_1.9.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/straight/command_trajectory_1.9.pkl -------------------------------------------------------------------------------- /cassie/missions/straight/command_trajectory_2.3.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/straight/command_trajectory_2.3.pkl -------------------------------------------------------------------------------- /cassie/missions/straight/command_trajectory_2.8.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/straight/command_trajectory_2.8.pkl -------------------------------------------------------------------------------- /cassie/outfile.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/outfile.npz -------------------------------------------------------------------------------- /cassie/plotting_ex.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import time 3 | import math 4 | 5 | # from cassie_env import CassieEnv 6 | 7 | from cassiemujoco import * 8 | from trajectory.trajectory import CassieTrajectory 9 | import matplotlib.pyplot as plt 10 | from matplotlib import style 11 | from matplotlib.animation import FuncAnimation 12 | import matplotlib.animation as animation 13 | from mpl_toolkits.mplot3d import Axes3D 14 | from IPython import display 15 | 16 | def visualise_sim_graph(file_path, freq_of_sim): 17 | traj = np.load(file_path) 18 | # env = CassieEnv("walking") 19 | # csim = CassieSim("./cassie/cassiemujoco/cassie.xml") 20 | # vis = CassieVis(csim, "./cassie/cassiemujoco/cassie.xml") 21 | u = pd_in_t() 22 | 23 | # pelvisXYZ = traj.f.qpos_replay[:, 0:3] 24 | # render_state = vis.draw(csim) 25 | # saved_time = traj.f.time[:] 26 | 27 | #################Graphing########### 28 | log_time = traj.f.time[:] 29 | y_val = traj.f.qpos_replay[:,2] #z - height 30 | x_data= log_time 31 | y_data = y_val 32 | 33 | delt_x = (x_data[1] - x_data[0]) * 1000 #convert seconds to ms 34 | 35 | num_frames = math.ceil(len(x_data) / 10) 36 | 37 | 38 | 39 | Writer = animation.writers['ffmpeg'] 40 | writer = Writer(fps=15, metadata=dict(artist='Me'), bitrate=1800) 41 | 42 | output = plt.plot([]) 43 | plt.close() 44 | print(output[0]) 45 | 46 | x = np.linspace(0,2*np.pi, 100) 47 | 48 | fig = plt.figure() 49 | 50 | lines = plt.plot([]) 51 | line = lines[0] 52 | 53 | #other setup //set x and y lims 54 | plt.xlim(x_data.min(), x_data.max()) 55 | plt.ylim(y_data.min(), y_data.max()) 56 | def animate(frame): 57 | #update 58 | x = x_data[:frame*10] 59 | y = y_data[:frame*10] 60 | # y = np.sin(x + 2*np.pi * frame/100) 61 | line.set_data((x,y)) 62 | 63 | anim = FuncAnimation(fig, animate, frames=num_frames, interval=(1/freq_of_sim * 1000 + (10 * delt_x))) #20 is 50 fps 64 | 65 | anim.save('lines.mp4', writer=writer) 66 | # html = display.HTML(video) 67 | # display.display(html) 68 | 69 | plt.close() 70 | 71 | visualise_sim_graph("./outfile8.npz", 30) -------------------------------------------------------------------------------- /cassie/quaternion_function.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | 4 | def inverse_quaternion(quaternion): 5 | result = np.copy(quaternion) 6 | result[1:4] = -result[1:4] 7 | return result 8 | 9 | def quaternion_product(q1, q2): 10 | result = np.zeros(4) 11 | result[0] = q1[0]*q2[0]-q1[1]*q2[1]-q1[2]*q2[2]-q1[3]*q2[3] 12 | result[1] = q1[0]*q2[1]+q2[0]*q1[1]+q1[2]*q2[3]-q1[3]*q2[2] 13 | result[2] = q1[0]*q2[2]-q1[1]*q2[3]+q1[2]*q2[0]+q1[3]*q2[1] 14 | result[3] = q1[0]*q2[3]+q1[1]*q2[2]-q1[2]*q2[1]+q1[3]*q2[0] 15 | return result 16 | 17 | def rotate_by_quaternion(vector, quaternion): 18 | q1 = np.copy(quaternion) 19 | q2 = np.zeros(4) 20 | q2[1:4] = np.copy(vector) 21 | q3 = inverse_quaternion(quaternion) 22 | q = quaternion_product(q2, q3) 23 | q = quaternion_product(q1, q) 24 | result = q[1:4] 25 | return result 26 | 27 | def quaternion2euler(quaternion): 28 | w = quaternion[0] 29 | x = quaternion[1] 30 | y = quaternion[2] 31 | z = quaternion[3] 32 | ysqr = y * y 33 | 34 | t0 = +2.0 * (w * x + y * z) 35 | t1 = +1.0 - 2.0 * (x * x + ysqr) 36 | X = math.degrees(math.atan2(t0, t1)) 37 | 38 | t2 = +2.0 * (w * y - z * x) 39 | t2 = +1.0 if t2 > +1.0 else t2 40 | t2 = -1.0 if t2 < -1.0 else t2 41 | Y = math.degrees(math.asin(t2)) 42 | 43 | t3 = +2.0 * (w * z + x * y) 44 | t4 = +1.0 - 2.0 * (ysqr + z * z) 45 | Z = math.degrees(math.atan2(t3, t4)) 46 | 47 | result = np.zeros(3) 48 | result[0] = X * np.pi / 180 49 | result[1] = Y * np.pi / 180 50 | result[2] = Z * np.pi / 180 51 | 52 | return result 53 | 54 | def euler2quat(z=0, y=0, x=0): 55 | 56 | z = z/2.0 57 | y = y/2.0 58 | x = x/2.0 59 | cz = math.cos(z) 60 | sz = math.sin(z) 61 | cy = math.cos(y) 62 | sy = math.sin(y) 63 | cx = math.cos(x) 64 | sx = math.sin(x) 65 | result = np.array([ 66 | cx*cy*cz - sx*sy*sz, 67 | cx*sy*sz + cy*cz*sx, 68 | cx*cz*sy - sx*cy*sz, 69 | cx*cy*sz + sx*cz*sy]) 70 | if result[0] < 0: 71 | result = -result 72 | return result -------------------------------------------------------------------------------- /cassie/rewards/__init__.py: -------------------------------------------------------------------------------- 1 | # We use this directory for keeping track of reward functions. Each reward function operates on an object of CassieEnv_v2, passed as 'self' 2 | 3 | from .clock_rewards import * 4 | from .aslip_rewards import * 5 | from .rnn_dyn_random_reward import * 6 | from .iros_paper_reward import * 7 | from .command_reward import * 8 | 9 | # from .speedmatch_footorient_joint_smooth_reward import * 10 | from .speedmatch_rewards import * 11 | from .trajmatch_reward import * 12 | from .standing_rewards import * 13 | # from .speedmatch_heuristic_reward import * 14 | from .side_speedmatch_rewards import * 15 | # from .side_speedmatch_foottraj_reward import * 16 | # from .side_speedmatch_heightvel_reward import * 17 | # from .side_speedmatch_heuristic_reward import * 18 | # from .side_speedmatch_torquesmooth_reward import * -------------------------------------------------------------------------------- /cassie/rewards/command_reward.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | 4 | def quaternion2euler(quaternion): 5 | w = quaternion[0] 6 | x = quaternion[1] 7 | y = quaternion[2] 8 | z = quaternion[3] 9 | ysqr = y * y 10 | 11 | t0 = +2.0 * (w * x + y * z) 12 | t1 = +1.0 - 2.0 * (x * x + ysqr) 13 | X = math.degrees(math.atan2(t0, t1)) 14 | 15 | t2 = +2.0 * (w * y - z * x) 16 | t2 = +1.0 if t2 > +1.0 else t2 17 | t2 = -1.0 if t2 < -1.0 else t2 18 | Y = math.degrees(math.asin(t2)) 19 | 20 | t3 = +2.0 * (w * z + x * y) 21 | t4 = +1.0 - 2.0 * (ysqr + z * z) 22 | Z = math.degrees(math.atan2(t3, t4)) 23 | 24 | result = np.zeros(3) 25 | result[0] = X * np.pi / 180 26 | result[1] = Y * np.pi / 180 27 | result[2] = Z * np.pi / 180 28 | 29 | return result 30 | 31 | def euler2quat(z=0, y=0, x=0): 32 | 33 | z = z/2.0 34 | y = y/2.0 35 | x = x/2.0 36 | cz = math.cos(z) 37 | sz = math.sin(z) 38 | cy = math.cos(y) 39 | sy = math.sin(y) 40 | cx = math.cos(x) 41 | sx = math.sin(x) 42 | result = np.array([ 43 | cx*cy*cz - sx*sy*sz, 44 | cx*sy*sz + cy*cz*sx, 45 | cx*cz*sy - sx*cy*sz, 46 | cx*cy*sz + sx*cz*sy]) 47 | if result[0] < 0: 48 | result = -result 49 | return result 50 | 51 | def command_reward(self): 52 | qpos = np.copy(self.sim.qpos()) 53 | qvel = np.copy(self.sim.qvel()) 54 | 55 | # get current speed and orientation 56 | curr_pos = qpos[0:3] 57 | curr_speed = qvel[0] 58 | curr_orient = quaternion2euler(qpos[3:7])[2] 59 | 60 | # desired speed and orientation 61 | desired_pos = self.command_traj.global_pos[self.command_counter] + self.last_position 62 | desired_speed = self.command_traj.speed_cmd[self.command_counter] 63 | desired_orient = self.command_traj.orient[self.command_counter] 64 | 65 | compos_error = np.linalg.norm(curr_pos - desired_pos) 66 | speed_error = np.linalg.norm(curr_speed - desired_speed) 67 | orientation_error = np.linalg.norm(curr_orient - desired_orient) 68 | 69 | reward = 0.2 * np.exp(-speed_error) + \ 70 | 0.3 * np.exp(-compos_error) + \ 71 | 0.5 * np.exp(-orientation_error) 72 | 73 | if self.debug: 74 | print("reward: {6}\nspeed:\t{0:.2f}, % = {1:.2f}\ncompos:\t{2:.2f}, % = {3:.2f}\norient:\t{4:.2f}, % = {5:.2f}\n\n".format( 75 | 0.325 * np.exp(-speed_error), 0.325 * np.exp(-speed_error) / reward * 100, 76 | 0.35 * np.exp(-compos_error), 0.35 * np.exp(-compos_error) / reward * 100, 77 | 0.325 * np.exp(-orientation_error), 0.325 * np.exp(-orientation_error) / reward * 100, 78 | reward 79 | ) 80 | ) 81 | print(self.command_counter) 82 | print("actual speed: {}\tdesired_speed: {}".format(curr_speed, self.speed)) 83 | print("actual compos: {}\tdesired_pos: {}".format(curr_pos[0:2], desired_pos[0:2])) 84 | print("actual orient: {}\tdesired_orient: {}".format(curr_orient, desired_orient)) 85 | return reward 86 | 87 | def command_reward_no_pos(self): 88 | qpos = np.copy(self.sim.qpos()) 89 | qvel = np.copy(self.sim.qvel()) 90 | 91 | # get current speed and orientation 92 | # curr_pos = qpos[0:3] 93 | curr_speed = qvel[0] 94 | curr_orient = quaternion2euler(qpos[3:7])[2] 95 | 96 | # desired speed and orientation 97 | desired_speed = self.command_traj.speed_cmd[self.command_counter] 98 | desired_orient = self.command_traj.orient[self.command_counter] 99 | 100 | # compos_error = np.linalg.norm(curr_pos - desired_pos) 101 | speed_error = np.linalg.norm(curr_speed - desired_speed) 102 | orientation_error = np.linalg.norm(curr_orient - desired_orient) 103 | 104 | reward = 0.5 * np.exp(-speed_error) + \ 105 | 0.5 * np.exp(-orientation_error) 106 | 107 | if self.debug: 108 | print("reward: {4}\nspeed:\t{0:.2f}, % = {1:.2f}\norient:\t{2:.2f}, % = {3:.2f}\n\n".format( 109 | 0.5 * np.exp(-speed_error), 0.5 * np.exp(-speed_error) / reward * 100, 110 | 0.5 * np.exp(-orientation_error), 0.5 * np.exp(-orientation_error) / reward * 100, 111 | reward 112 | ) 113 | ) 114 | print(self.command_counter) 115 | print("actual speed: {}\tdesired_speed: {}".format(curr_speed, self.speed)) 116 | # print("actual compos: {}\tdesired_pos: {}".format(curr_pos[0:2], desired_pos[0:2])) 117 | print("actual orient: {}\tdesired_orient: {}".format(curr_orient, desired_orient)) 118 | return reward 119 | 120 | def command_reward_keepalive(self): 121 | reward = 1.0 122 | if self.debug: 123 | print("reward = 1.0\tcounter={}".format(self.command_counter)) 124 | return reward -------------------------------------------------------------------------------- /cassie/rewards/iros_paper_reward.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def iros_paper_reward(self): 4 | qpos = np.copy(self.sim.qpos()) 5 | qvel = np.copy(self.sim.qvel()) 6 | 7 | ref_pos, ref_vel = self.get_ref_state(self.phase) 8 | 9 | # TODO: should be variable; where do these come from? 10 | # TODO: see magnitude of state variables to gauge contribution to reward 11 | weight = [0.15, 0.15, 0.1, 0.05, 0.05, 0.15, 0.15, 0.1, 0.05, 0.05] 12 | 13 | joint_error = 0 14 | com_error = 0 15 | orientation_error = 0 16 | spring_error = 0 17 | 18 | # each joint pos 19 | for i, j in enumerate(self.pos_idx): 20 | target = ref_pos[j] 21 | actual = qpos[j] 22 | 23 | joint_error += 30 * weight[i] * (target - actual) ** 2 24 | 25 | # center of mass: x, y, z 26 | for j in [0, 1, 2]: 27 | target = ref_pos[j] 28 | actual = qpos[j] 29 | 30 | # NOTE: in Xie et al y target is 0 31 | 32 | com_error += (target - actual) ** 2 33 | 34 | # COM orientation: qx, qy, qz 35 | for j in [4, 5, 6]: 36 | target = ref_pos[j] # NOTE: in Xie et al orientation target is 0 37 | actual = qpos[j] 38 | 39 | orientation_error += (target - actual) ** 2 40 | 41 | # left and right shin springs 42 | for i in [15, 29]: 43 | target = ref_pos[i] # NOTE: in Xie et al spring target is 0 44 | actual = qpos[i] 45 | 46 | spring_error += 1000 * (target - actual) ** 2 47 | 48 | reward = 0.5 * np.exp(-joint_error) + \ 49 | 0.3 * np.exp(-com_error) + \ 50 | 0.1 * np.exp(-orientation_error) + \ 51 | 0.1 * np.exp(-spring_error) 52 | 53 | # reward = np.sign(qvel[0])*qvel[0]**2 54 | # desired_speed = 3.0 55 | # speed_diff = np.abs(qvel[0] - desired_speed) 56 | # if speed_diff > 1: 57 | # speed_diff = speed_diff**2 58 | # reward = 20 - speed_diff 59 | 60 | return reward -------------------------------------------------------------------------------- /cassie/rewards/reward_clock_funcs/incentive_clock_smooth.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/incentive_clock_smooth.pkl -------------------------------------------------------------------------------- /cassie/rewards/reward_clock_funcs/incentive_clock_smooth_aerial.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/incentive_clock_smooth_aerial.pkl -------------------------------------------------------------------------------- /cassie/rewards/reward_clock_funcs/incentive_clock_smooth_zero.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/incentive_clock_smooth_zero.pkl -------------------------------------------------------------------------------- /cassie/rewards/reward_clock_funcs/incentive_clock_strict0.1.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/incentive_clock_strict0.1.pkl -------------------------------------------------------------------------------- /cassie/rewards/reward_clock_funcs/incentive_clock_strict0.1_aerial.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/incentive_clock_strict0.1_aerial.pkl -------------------------------------------------------------------------------- /cassie/rewards/reward_clock_funcs/incentive_clock_strict0.1_zero.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/incentive_clock_strict0.1_zero.pkl -------------------------------------------------------------------------------- /cassie/rewards/reward_clock_funcs/incentive_clock_strict0.4.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/incentive_clock_strict0.4.pkl -------------------------------------------------------------------------------- /cassie/rewards/reward_clock_funcs/incentive_clock_strict0.4_aerial.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/incentive_clock_strict0.4_aerial.pkl -------------------------------------------------------------------------------- /cassie/rewards/reward_clock_funcs/incentive_clock_strict0.4_zero.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/incentive_clock_strict0.4_zero.pkl -------------------------------------------------------------------------------- /cassie/rewards/reward_clock_funcs/no_incentive_aslip_clock_strict0.3.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/no_incentive_aslip_clock_strict0.3.pkl -------------------------------------------------------------------------------- /cassie/rewards/reward_clock_funcs/no_incentive_clock_smooth.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/no_incentive_clock_smooth.pkl -------------------------------------------------------------------------------- /cassie/rewards/reward_clock_funcs/no_incentive_clock_smooth_aerial.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/no_incentive_clock_smooth_aerial.pkl -------------------------------------------------------------------------------- /cassie/rewards/reward_clock_funcs/no_incentive_clock_smooth_zero.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/no_incentive_clock_smooth_zero.pkl -------------------------------------------------------------------------------- /cassie/rewards/reward_clock_funcs/no_incentive_clock_strict0.1.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/no_incentive_clock_strict0.1.pkl -------------------------------------------------------------------------------- /cassie/rewards/reward_clock_funcs/no_incentive_clock_strict0.1_aerial.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/no_incentive_clock_strict0.1_aerial.pkl -------------------------------------------------------------------------------- /cassie/rewards/reward_clock_funcs/no_incentive_clock_strict0.1_zero.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/no_incentive_clock_strict0.1_zero.pkl -------------------------------------------------------------------------------- /cassie/rewards/reward_clock_funcs/no_incentive_clock_strict0.4.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/no_incentive_clock_strict0.4.pkl -------------------------------------------------------------------------------- /cassie/rewards/reward_clock_funcs/no_incentive_clock_strict0.4_aerial.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/no_incentive_clock_strict0.4_aerial.pkl -------------------------------------------------------------------------------- /cassie/rewards/reward_clock_funcs/no_incentive_clock_strict0.4_zero.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/no_incentive_clock_strict0.4_zero.pkl -------------------------------------------------------------------------------- /cassie/rewards/rnn_dyn_random_reward.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def jonah_RNN_reward(self): 4 | qpos = np.copy(self.sim.qpos()) 5 | qvel = np.copy(self.sim.qvel()) 6 | 7 | ref_pos, ref_vel = self.get_ref_state(self.phase) 8 | 9 | # TODO: should be variable; where do these come from? 10 | # TODO: see magnitude of state variables to gauge contribution to reward 11 | weight = [0.15, 0.15, 0.1, 0.05, 0.05, 0.15, 0.15, 0.1, 0.05, 0.05] 12 | 13 | joint_error = 0 14 | com_error = 0 15 | orientation_error = 0 16 | spring_error = 0 17 | 18 | # each joint pos 19 | for i, j in enumerate(self.pos_idx): 20 | target = ref_pos[j] 21 | actual = qpos[j] 22 | 23 | joint_error += 50 * weight[i] * (target - actual) ** 2 24 | 25 | # center of mass: x, y, z 26 | for j in [0, 1, 2]: 27 | target = ref_pos[j] 28 | actual = qpos[j] 29 | 30 | # NOTE: in Xie et al y target is 0 31 | 32 | com_error += 10 * (target - actual) ** 2 33 | 34 | actual_q = qpos[3:7] 35 | target_q = ref_pos[3:7] 36 | #target_q = [1, 0, 0, 0] 37 | orientation_error = 5 * (1 - np.inner(actual_q, target_q) ** 2) 38 | 39 | # left and right shin springs 40 | for i in [15, 29]: 41 | target = ref_pos[i] # NOTE: in Xie et al spring target is 0 42 | actual = qpos[i] 43 | 44 | spring_error += 1000 * (target - actual) ** 2 45 | 46 | reward = 0.200 * np.exp(-joint_error) + \ 47 | 0.450 * np.exp(-com_error) + \ 48 | 0.300 * np.exp(-orientation_error) + \ 49 | 0.050 * np.exp(-spring_error) 50 | 51 | return reward -------------------------------------------------------------------------------- /cassie/rewards/side_speedmatch_foottraj_reward.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def side_speedmatch_foottraj_reward(self): 4 | qpos = np.copy(self.sim.qpos()) 5 | qvel = np.copy(self.sim.qvel()) 6 | 7 | forward_diff = np.abs(qvel[0] -self.speed) 8 | orient_diff = np.linalg.norm(qpos[3:7] - np.array([1, 0, 0, 0])) 9 | side_diff = np.abs(qvel[1] - self.side_speed) 10 | if forward_diff < 0.05: 11 | forward_diff = 0 12 | if side_diff < 0.05: 13 | side_diff = 0 14 | 15 | reward = .15*np.exp(-forward_diff) + .15*np.exp(-side_diff) + .1*np.exp(-orient_diff) \ 16 | + .1*np.exp(-20*self.l_foot_diff) + .1*np.exp(-20*self.r_foot_diff) \ 17 | + .1*np.exp(-5*self.l_footvel_diff) + .1*np.exp(-5*self.r_footvel_diff) \ 18 | + .1*np.exp(-self.lfoot_orient_cost) + .1*np.exp(-self.rfoot_orient_cost) 19 | 20 | return reward -------------------------------------------------------------------------------- /cassie/rewards/side_speedmatch_heightvel_reward.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def side_speedmatch_heightvel_reward(self): 4 | qpos = np.copy(self.sim.qpos()) 5 | qvel = np.copy(self.sim.qvel()) 6 | 7 | forward_diff = np.abs(qvel[0] -self.speed) 8 | orient_diff = np.linalg.norm(qpos[3:7] - np.array([1, 0, 0, 0])) 9 | side_diff = np.abs(qvel[1] - self.side_speed) 10 | if forward_diff < 0.05: 11 | forward_diff = 0 12 | if side_diff < 0.05: 13 | side_diff = 0 14 | 15 | reward = .2*np.exp(-forward_diff) + .2*np.exp(-side_diff) + .1*np.exp(-orient_diff) \ 16 | + .1*np.exp(-self.lfoot_orient_cost) + .1*np.exp(-self.rfoot_orient_cost) \ 17 | + .15*np.exp(-self.lf_heightvel) + .15*np.exp(-self.rf_heightvel) \ 18 | # + .1*np.exp(-self.ltdvel_cost) * .1*np.exp(-self.rtdvel_cost) 19 | 20 | return reward -------------------------------------------------------------------------------- /cassie/rewards/side_speedmatch_heuristic_reward.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def side_speedmatch_heuristic_reward(self): 4 | qpos = np.copy(self.sim.qpos()) 5 | qvel = np.copy(self.sim.qvel()) 6 | 7 | forward_diff = np.abs(qvel[0] -self.speed) 8 | orient_diff = np.linalg.norm(qpos[3:7] - np.array([1, 0, 0, 0])) 9 | side_diff = np.abs(qvel[1] - self.side_speed) 10 | if forward_diff < 0.05: 11 | forward_diff = 0 12 | if side_diff < 0.05: 13 | side_diff = 0 14 | 15 | ######## Foot position penalty ######## 16 | foot_pos = np.zeros(6) 17 | self.sim.foot_pos(foot_pos) 18 | foot_dist = np.linalg.norm(foot_pos[0:2]-foot_pos[3:5]) 19 | foot_penalty = 0 20 | if foot_dist < 0.22: 21 | foot_penalty = 0.2 22 | ######## Foot force penalty ######## 23 | foot_forces = self.sim.get_foot_forces() 24 | lforce = max((foot_forces[0] - 700)/1000, 0) 25 | rforce = max((foot_forces[1] - 700)/1000, 0) 26 | ######## Torque penalty ######## 27 | torque = np.linalg.norm(self.cassie_state.motor.torque[:]) 28 | ######## Pelvis z accel penalty ######### 29 | pelaccel = np.abs(self.cassie_state.pelvis.translationalAcceleration[2]) 30 | pelaccel_penalty = 0 31 | if pelaccel > 6: 32 | pelaccel_penalty = (pelaccel - 6) / 30 33 | ####### Prev action penalty ######## 34 | if self.prev_action is not None: 35 | prev_penalty = np.linalg.norm(self.curr_action - self.prev_action) / 10 #* (30/self.simrate) 36 | else: 37 | prev_penalty = 0 38 | print("prev_penalty: ", prev_penalty) 39 | ######## Foot height bonus ######## 40 | footheight_penalty = 0 41 | if (np.abs(self.lfoot_vel) < 0.05 and foot_pos[2] < 0.2 and foot_forces[0] == 0) or (np.abs(self.rfoot_vel) < 0.05 and foot_pos[5] < 0.2 and foot_forces[1] == 0): 42 | # print("adding foot height penalty") 43 | footheight_penalty = 0.2 44 | 45 | 46 | reward = .25*np.exp(-forward_diff) + .25*np.exp(-side_diff) + .1*np.exp(-orient_diff) \ 47 | + .1*np.exp(-self.torque_cost) + .1*np.exp(-self.smooth_cost) \ 48 | + .1*np.exp(-self.lfoot_orient_cost) + .1*np.exp(-self.rfoot_orient_cost) \ 49 | - pelaccel_penalty \ 50 | - foot_penalty \ 51 | - lforce - rforce \ 52 | - footheight_penalty 53 | 54 | return reward -------------------------------------------------------------------------------- /cassie/rewards/side_speedmatch_rewards.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def side_speedmatch_reward(self): 4 | qpos = np.copy(self.sim.qpos()) 5 | qvel = np.copy(self.sim.qvel()) 6 | 7 | forward_diff = np.abs(qvel[0] -self.speed) 8 | orient_diff = np.linalg.norm(qpos[3:7] - np.array([1, 0, 0, 0])) 9 | side_diff = np.abs(qvel[1] - self.side_speed) 10 | if forward_diff < 0.05: 11 | forward_diff = 0 12 | if side_diff < 0.05: 13 | side_diff = 0 14 | 15 | reward = .4*np.exp(-forward_diff) + .4*np.exp(-side_diff) + .2*np.exp(-orient_diff) 16 | 17 | return reward 18 | 19 | def side_speedmatch_torquesmooth_reward(self): 20 | qpos = np.copy(self.sim.qpos()) 21 | qvel = np.copy(self.sim.qvel()) 22 | 23 | forward_diff = np.abs(qvel[0] -self.speed) 24 | orient_diff = np.linalg.norm(qpos[3:7] - np.array([1, 0, 0, 0])) 25 | side_diff = np.abs(qvel[1] - self.side_speed) 26 | if forward_diff < 0.05: 27 | forward_diff = 0 28 | if side_diff < 0.05: 29 | side_diff = 0 30 | 31 | reward = .25*np.exp(-forward_diff) + .25*np.exp(-side_diff) + .2*np.exp(-orient_diff) \ 32 | + .1*np.exp(-self.torque_cost) + .2*np.exp(-self.smooth_cost) 33 | 34 | return reward 35 | 36 | def side_speedmatch_foottraj_reward(self): 37 | qpos = np.copy(self.sim.qpos()) 38 | qvel = np.copy(self.sim.qvel()) 39 | 40 | forward_diff = np.abs(qvel[0] -self.speed) 41 | orient_diff = np.linalg.norm(qpos[3:7] - np.array([1, 0, 0, 0])) 42 | side_diff = np.abs(qvel[1] - self.side_speed) 43 | if forward_diff < 0.05: 44 | forward_diff = 0 45 | if side_diff < 0.05: 46 | side_diff = 0 47 | 48 | reward = .15*np.exp(-forward_diff) + .15*np.exp(-side_diff) + .1*np.exp(-orient_diff) \ 49 | + .1*np.exp(-20*self.l_foot_diff) + .1*np.exp(-20*self.r_foot_diff) \ 50 | + .1*np.exp(-5*self.l_footvel_diff) + .1*np.exp(-5*self.r_footvel_diff) \ 51 | + .1*np.exp(-self.lfoot_orient_cost) + .1*np.exp(-self.rfoot_orient_cost) 52 | 53 | return reward 54 | 55 | def side_speedmatch_heightvel_reward(self): 56 | qpos = np.copy(self.sim.qpos()) 57 | qvel = np.copy(self.sim.qvel()) 58 | 59 | forward_diff = np.abs(qvel[0] -self.speed) 60 | orient_diff = np.linalg.norm(qpos[3:7] - np.array([1, 0, 0, 0])) 61 | side_diff = np.abs(qvel[1] - self.side_speed) 62 | if forward_diff < 0.05: 63 | forward_diff = 0 64 | if side_diff < 0.05: 65 | side_diff = 0 66 | 67 | reward = .2*np.exp(-forward_diff) + .2*np.exp(-side_diff) + .1*np.exp(-orient_diff) \ 68 | + .1*np.exp(-self.lfoot_orient_cost) + .1*np.exp(-self.rfoot_orient_cost) \ 69 | + .15*np.exp(-self.lf_heightvel) + .15*np.exp(-self.rf_heightvel) \ 70 | # + .1*np.exp(-self.ltdvel_cost) * .1*np.exp(-self.rtdvel_cost) 71 | 72 | return reward 73 | 74 | def side_speedmatch_heuristic_reward(self): 75 | qpos = np.copy(self.sim.qpos()) 76 | qvel = np.copy(self.sim.qvel()) 77 | 78 | forward_diff = np.abs(qvel[0] -self.speed) 79 | orient_diff = np.linalg.norm(qpos[3:7] - np.array([1, 0, 0, 0])) 80 | side_diff = np.abs(qvel[1] - self.side_speed) 81 | if forward_diff < 0.05: 82 | forward_diff = 0 83 | if side_diff < 0.05: 84 | side_diff = 0 85 | 86 | ######## Foot position penalty ######## 87 | foot_pos = np.zeros(6) 88 | self.sim.foot_pos(foot_pos) 89 | foot_dist = np.linalg.norm(foot_pos[0:2]-foot_pos[3:5]) 90 | foot_penalty = 0 91 | if foot_dist < 0.22: 92 | foot_penalty = 0.2 93 | ######## Foot force penalty ######## 94 | foot_forces = self.sim.get_foot_forces() 95 | lforce = max((foot_forces[0] - 700)/1000, 0) 96 | rforce = max((foot_forces[1] - 700)/1000, 0) 97 | ######## Torque penalty ######## 98 | torque = np.linalg.norm(self.cassie_state.motor.torque[:]) 99 | ######## Pelvis z accel penalty ######### 100 | pelaccel = np.abs(self.cassie_state.pelvis.translationalAcceleration[2]) 101 | pelaccel_penalty = 0 102 | if pelaccel > 6: 103 | pelaccel_penalty = (pelaccel - 6) / 30 104 | ####### Prev action penalty ######## 105 | if self.prev_action is not None: 106 | prev_penalty = np.linalg.norm(self.curr_action - self.prev_action) / 10 #* (30/self.simrate) 107 | else: 108 | prev_penalty = 0 109 | print("prev_penalty: ", prev_penalty) 110 | ######## Foot height bonus ######## 111 | footheight_penalty = 0 112 | if (np.abs(self.lfoot_vel) < 0.05 and foot_pos[2] < 0.2 and foot_forces[0] == 0) or (np.abs(self.rfoot_vel) < 0.05 and foot_pos[5] < 0.2 and foot_forces[1] == 0): 113 | # print("adding foot height penalty") 114 | footheight_penalty = 0.2 115 | 116 | 117 | reward = .25*np.exp(-forward_diff) + .25*np.exp(-side_diff) + .1*np.exp(-orient_diff) \ 118 | + .1*np.exp(-self.torque_cost) + .1*np.exp(-self.smooth_cost) \ 119 | + .1*np.exp(-self.lfoot_orient_cost) + .1*np.exp(-self.rfoot_orient_cost) \ 120 | - pelaccel_penalty \ 121 | - foot_penalty \ 122 | - lforce - rforce \ 123 | - footheight_penalty 124 | 125 | return reward -------------------------------------------------------------------------------- /cassie/rewards/side_speedmatch_torquesmooth_reward.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def side_speedmatch_torquesmooth_reward(self): 4 | qpos = np.copy(self.sim.qpos()) 5 | qvel = np.copy(self.sim.qvel()) 6 | 7 | forward_diff = np.abs(qvel[0] -self.speed) 8 | orient_diff = np.linalg.norm(qpos[3:7] - np.array([1, 0, 0, 0])) 9 | side_diff = np.abs(qvel[1] - self.side_speed) 10 | if forward_diff < 0.05: 11 | forward_diff = 0 12 | if side_diff < 0.05: 13 | side_diff = 0 14 | 15 | reward = .25*np.exp(-forward_diff) + .25*np.exp(-side_diff) + .2*np.exp(-orient_diff) \ 16 | + .1*np.exp(-self.torque_cost) + .2*np.exp(-self.smooth_cost) 17 | return reward -------------------------------------------------------------------------------- /cassie/rewards/speedmatch_footorient_joint_smooth_reward.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def speedmatch_footorient_joint_smooth_reward(self): 4 | qpos = np.copy(self.sim.qpos()) 5 | qvel = np.copy(self.sim.qvel()) 6 | 7 | orient_targ = np.array([1, 0, 0, 0]) 8 | speed_targ = np.array([self.speed, 0, 0]) 9 | if self.time >= self.orient_time: 10 | orient_targ = euler2quat(z=self.orient_add, y=0, x=0) 11 | iquaternion = inverse_quaternion(orient_targ) 12 | speed_targ = rotate_by_quaternion(speed_targ, iquaternion) 13 | new_orient = quaternion_product(iquaternion, self.cassie_state.pelvis.orientation[:]) 14 | if new_orient[0] < 0: 15 | new_orient = -new_orient 16 | forward_diff = np.abs(qvel[0] - speed_targ[0]) 17 | orient_diff = 1 - np.inner(orient_targ, qpos[3:7]) ** 2 18 | # orient_diff = np.linalg.norm(qpos[3:7] - np.array([1, 0, 0, 0])) 19 | y_vel = np.abs(qvel[1] - speed_targ[1]) 20 | if forward_diff < 0.05: 21 | forward_diff = 0 22 | if y_vel < 0.05: 23 | y_vel = 0 24 | straight_diff = 8*np.abs(qpos[1] - self.y_offset) 25 | if np.abs(qpos[1] - self.y_offset) < 0.05: 26 | straight_diff = 0 27 | if orient_diff < 5e-3: 28 | orient_diff = 0 29 | else: 30 | orient_diff *= 30 31 | 32 | reward = .25*np.exp(-forward_diff) + .1*np.exp(-orient_diff) \ 33 | + .1*np.exp(-straight_diff) + .1*np.exp(-y_vel) \ 34 | + .1*np.exp(-self.l_foot_orient) + .1*np.exp(-self.r_foot_orient) \ 35 | + .1*np.exp(-self.smooth_cost) \ 36 | + .15*np.exp(-self.joint_error) 37 | 38 | return reward -------------------------------------------------------------------------------- /cassie/rewards/speedmatch_heuristic_reward.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def speedmatch_heuristic_reward(self): 4 | ######## Pelvis z accel penalty ######### 5 | pelaccel = np.abs(self.cassie_state.pelvis.translationalAcceleration[2]) 6 | pelaccel_penalty = 0 7 | if pelaccel > 5: 8 | pelaccel_penalty = (pelaccel - 5) / 10 9 | pelbonus = 0 10 | if 8 < pelaccel < 10: 11 | pelbonus = 0.2 12 | ######## Foot position penalty ######## 13 | foot_pos = np.zeros(6) 14 | self.sim.foot_pos(foot_pos) 15 | foot_dist = np.linalg.norm(foot_pos[0:2]-foot_pos[3:5]) 16 | foot_penalty = 0 17 | if foot_dist < 0.14: 18 | foot_penalty = 0.2 19 | ######## Foot force penalty ######## 20 | foot_forces = self.sim.get_foot_forces() 21 | lforce = max((foot_forces[0] - 350)/1000, 0) 22 | rforce = max((foot_forces[1] - 350)/1000, 0) 23 | forcebonus = 0 24 | # print("foot force: ", lforce, rforce) 25 | # lbonus = max((800 - foot_forces[0])/1000, 0) 26 | if foot_forces[0] <= 1000 and foot_forces[1] <= 1000: 27 | forcebonus = foot_forces[0] / 5000 + foot_forces[1] / 5000 28 | ######## Foot velocity penalty ######## 29 | lfoot_vel_bonus = 0 30 | rfoot_vel_bonus = 0 31 | # if self.prev_foot is not None and foot_pos[2] < 0.3 and foot_pos[5] < 0.3: 32 | # lfoot_vel = np.abs(foot_pos[2] - self.prev_foot[2]) / 0.03 * 0.03 33 | # rfoot_vel = np.abs(foot_pos[5] - self.prev_foot[5]) / 0.03 * 0.03 34 | # if self.l_high: 35 | # lfoot_vel_bonus = self.lfoot_vel * 0.3 36 | # if self.r_high: 37 | # rfoot_vel_bonus = self.rfoot_vel * 0.3 38 | ######## Foot orientation ######## 39 | lfoot_orient = 1 - np.inner(np.array([1, 0, 0, 0]), self.cassie_state.leftFoot.orientation[:]) ** 2 40 | rfoot_orient = 1 - np.inner(np.array([1, 0, 0, 0]), self.cassie_state.rightFoot.orientation[:]) ** 2 41 | ####### Hip yaw ######## 42 | rhipyaw = np.abs(qpos[22]) 43 | lhipyaw = np.abs(qpos[8]) 44 | if lhipyaw < 0.05: 45 | lhipyaw = 0 46 | if rhipyaw < 0.05: 47 | rhipyaw = 0 48 | ####### Hip roll penalty ######### 49 | lhiproll = np.abs(qpos[7]) 50 | rhiproll = np.abs(qpos[21]) 51 | if lhiproll < 0.05: 52 | lhiproll = 0 53 | if rhiproll < 0.05: 54 | rhiproll = 0 55 | ####### Prev action penalty ######## 56 | if self.prev_action is not None: 57 | prev_penalty = np.linalg.norm(self.curr_action - self.prev_action) / 10 #* (30/self.simrate) 58 | else: 59 | prev_penalty = 0 60 | 61 | reward = .2*np.exp(-self.com_vel_error) + .1*np.exp(-self.com_error) + .1*np.exp(-self.orientation_error) \ 62 | + .1*np.exp(-20*self.l_foot_diff) + .1*np.exp(-5*self.l_footvel_diff) \ 63 | + .1*np.exp(-20*self.r_foot_diff) + .1*np.exp(-5*self.r_footvel_diff) \ 64 | + .1*np.exp(-lfoot_orient) + .1*np.exp(-rfoot_orient) 65 | # reward = .4*np.exp(-forward_diff) + .3*np.exp(-orient_diff) \ 66 | # + .15*np.exp(-straight_diff) + .15*np.exp(-y_vel) \ 67 | # + .1*np.exp(-self.l_foot_orient) + .1*np.exp(-self.r_foot_orient) \ 68 | # + .1*np.exp(-self.smooth_cost) \ 69 | # + .15*np.exp(-self.joint_error) 70 | # + .1*np.exp(-self.torque_cost) + .1*np.exp(-self.smooth_cost) #\ 71 | # 72 | # + .075*np.exp(-10*lhipyaw) + .075*np.exp(-10*rhipyaw) + .075*np.exp(-10*lhiproll) + .075*np.exp(-10*rhiproll) 73 | # + .1*np.exp(-20*self.l_foot_diff) + .1*np.exp(-20*self.r_foot_diff) \ 74 | # + .1*np.exp(-5*self.l_footvel_diff) + .1*np.exp(-5*self.r_footvel_diff) 75 | # - lfoot_vel_bonus - rfoot_vel_bonus - foot_penalty 76 | # - lforce - rforce 77 | #+ pelbonus- pelaccel_penalty - foot_penalty 78 | -------------------------------------------------------------------------------- /cassie/rewards/standing_rewards.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def stand_reward(self): 4 | qpos = np.copy(self.sim.qpos()) 5 | qvel = np.copy(self.sim.qvel()) 6 | 7 | com_vel = np.linalg.norm(qvel[0:3]) 8 | com_height = (0.9 - qpos[2]) ** 2 9 | 10 | reward = 0.5*np.exp(-com_vel) + 0.5*np.exp(-com_height) 11 | 12 | return reward 13 | 14 | def step_even_reward(self): 15 | qpos = np.copy(self.sim.qpos()) 16 | qvel = np.copy(self.sim.qvel()) 17 | 18 | com_vel = np.linalg.norm(qvel[0:3]) 19 | com_height = (0.9 - qpos[2]) ** 2 20 | 21 | reward = 0.2*np.exp(-com_vel) + 0.2*np.exp(-com_height) \ 22 | + 0.3*np.exp(-self.l_foot_cost_even) + 0.3*np.exp(-self.r_foot_cost_even) 23 | 24 | return reward 25 | 26 | def step_even_pelheight_reward(self): 27 | qpos = np.copy(self.sim.qpos()) 28 | qvel = np.copy(self.sim.qvel()) 29 | 30 | com_height = (0.9 - qpos[2]) ** 2 31 | if qpos[2] > 0.8: 32 | com_height = 0 33 | 34 | reward = 0.2*np.exp(-com_height) \ 35 | + 0.4*np.exp(-self.l_foot_cost_even) + 0.4*np.exp(-self.r_foot_cost_even) 36 | 37 | return reward 38 | 39 | def step_smooth_pelheight_reward(self): 40 | qpos = np.copy(self.sim.qpos()) 41 | qvel = np.copy(self.sim.qvel()) 42 | 43 | com_height = (0.9 - qpos[2]) ** 2 44 | if qpos[2] > 0.8: 45 | com_height = 0 46 | 47 | reward = 0.2*np.exp(-com_height) \ 48 | + 0.4*np.exp(-self.l_foot_cost_smooth) + 0.4*np.exp(-self.r_foot_cost_smooth) 49 | 50 | return reward -------------------------------------------------------------------------------- /cassie/rewards/trajmatch_reward.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def trajmatch_reward(self): 4 | qpos = np.copy(self.sim.qpos()) 5 | qvel = np.copy(self.sim.qvel()) 6 | phase_diff = self.phase - np.floor(self.phase) 7 | ref_pos_prev, ref_vel_prev = self.get_ref_state(int(np.floor(self.phase))) 8 | if phase_diff != 0: 9 | ref_pos_next, ref_vel_next = self.get_ref_state(int(np.ceil(self.phase))) 10 | ref_pos_diff = ref_pos_next - ref_pos_prev 11 | ref_vel_diff = ref_vel_next - ref_vel_prev 12 | ref_pos = ref_pos_prev + phase_diff*ref_pos_diff 13 | ref_vel = ref_vel_prev + phase_diff*ref_vel_diff 14 | else: 15 | ref_pos = ref_pos_prev 16 | ref_vel = ref_vel_prev 17 | 18 | ref_pos, ref_vel = self.get_ref_state(self.phase) 19 | 20 | # TODO: should be variable; where do these come from? 21 | # TODO: see magnitude of state variables to gauge contribution to reward 22 | weight = [0.15, 0.15, 0.1, 0.05, 0.05, 0.15, 0.15, 0.1, 0.05, 0.05] 23 | 24 | joint_error = 0 25 | com_error = 0 26 | orientation_error = 0 27 | spring_error = 0 28 | 29 | # each joint pos 30 | for i, j in enumerate(self.pos_idx): 31 | target = ref_pos[j] 32 | actual = qpos[j] 33 | 34 | joint_error += 30 * weight[i] * (target - actual) ** 2 35 | 36 | # center of mass: x, y, z 37 | for j in [0, 1, 2]: 38 | target = ref_pos[j] 39 | actual = qpos[j] 40 | 41 | # NOTE: in Xie et al y target is 0 42 | 43 | com_error += (target - actual) ** 2 44 | 45 | # COM orientation: qx, qy, qz 46 | for j in [4, 5, 6]: 47 | target = ref_pos[j] # NOTE: in Xie et al orientation target is 0 48 | actual = qpos[j] 49 | 50 | orientation_error += (target - actual) ** 2 51 | 52 | # left and right shin springs 53 | for i in [15, 29]: 54 | target = ref_pos[i] # NOTE: in Xie et al spring target is 0 55 | actual = qpos[i] 56 | 57 | spring_error += 1000 * (target - actual) ** 2 58 | 59 | reward = 0.5 * np.exp(-joint_error) + \ 60 | 0.3 * np.exp(-com_error) + \ 61 | 0.1 * np.exp(-orientation_error) + \ 62 | 0.1 * np.exp(-spring_error) 63 | 64 | # orientation error does not look informative 65 | # maybe because it's comparing euclidean distance on quaternions 66 | # print("reward: {8}\njoint:\t{0:.2f}, % = {1:.2f}\ncom:\t{2:.2f}, % = {3:.2f}\norient:\t{4:.2f}, % = {5:.2f}\nspring:\t{6:.2f}, % = {7:.2f}\n\n".format( 67 | # 0.5 * np.exp(-joint_error), 0.5 * np.exp(-joint_error) / reward * 100, 68 | # 0.3 * np.exp(-com_error), 0.3 * np.exp(-com_error) / reward * 100, 69 | # 0.1 * np.exp(-orientation_error), 0.1 * np.exp(-orientation_error) / reward * 100, 70 | # 0.1 * np.exp(-spring_error), 0.1 * np.exp(-spring_error) / reward * 100, 71 | # reward 72 | # ) 73 | # ) 74 | 75 | return reward 76 | 77 | def trajmatch_footorient_hiprollvelact_reward(self): 78 | qpos = np.copy(self.sim.qpos()) 79 | qvel = np.copy(self.sim.qvel()) 80 | phase_diff = self.phase - np.floor(self.phase) 81 | ref_pos_prev, ref_vel_prev = self.get_ref_state(int(np.floor(self.phase))) 82 | if phase_diff != 0: 83 | ref_pos_next, ref_vel_next = self.get_ref_state(int(np.ceil(self.phase))) 84 | ref_pos_diff = ref_pos_next - ref_pos_prev 85 | ref_vel_diff = ref_vel_next - ref_vel_prev 86 | ref_pos = ref_pos_prev + phase_diff*ref_pos_diff 87 | ref_vel = ref_vel_prev + phase_diff*ref_vel_diff 88 | else: 89 | ref_pos = ref_pos_prev 90 | ref_vel = ref_vel_prev 91 | 92 | ref_pos, ref_vel = self.get_ref_state(self.phase) 93 | 94 | # TODO: should be variable; where do these come from? 95 | # TODO: see magnitude of state variables to gauge contribution to reward 96 | weight = [0.15, 0.15, 0.1, 0.05, 0.05, 0.15, 0.15, 0.1, 0.05, 0.05] 97 | 98 | joint_error = 0 99 | com_error = 0 100 | orientation_error = 0 101 | spring_error = 0 102 | 103 | # each joint pos 104 | for i, j in enumerate(self.pos_idx): 105 | target = ref_pos[j] 106 | actual = qpos[j] 107 | 108 | joint_error += 30 * weight[i] * (target - actual) ** 2 109 | 110 | # center of mass: x, y, z 111 | for j in [0, 1, 2]: 112 | target = ref_pos[j] 113 | actual = qpos[j] 114 | 115 | # NOTE: in Xie et al y target is 0 116 | 117 | com_error += (target - actual) ** 2 118 | 119 | # COM orientation: qx, qy, qz 120 | for j in [4, 5, 6]: 121 | target = ref_pos[j] # NOTE: in Xie et al orientation target is 0 122 | actual = qpos[j] 123 | 124 | orientation_error += (target - actual) ** 2 125 | 126 | # left and right shin springs 127 | for i in [15, 29]: 128 | target = ref_pos[i] # NOTE: in Xie et al spring target is 0 129 | actual = qpos[i] 130 | 131 | spring_error += 1000 * (target - actual) ** 2 132 | 133 | reward = 0.3 * np.exp(-joint_error) + \ 134 | 0.2 * np.exp(-com_error) + \ 135 | 0.1 * np.exp(-orientation_error) + \ 136 | 0.1 * np.exp(-spring_error) \ 137 | + .075*np.exp(-self.l_foot_orient_cost) + .075*np.exp(-self.r_foot_orient_cost) \ 138 | + .1*np.exp(-self.hiproll_cost) + 0.05*np.exp(-self.hiproll_act) 139 | 140 | # orientation error does not look informative 141 | # maybe because it's comparing euclidean distance on quaternions 142 | # print("reward: {8}\njoint:\t{0:.2f}, % = {1:.2f}\ncom:\t{2:.2f}, % = {3:.2f}\norient:\t{4:.2f}, % = {5:.2f}\nspring:\t{6:.2f}, % = {7:.2f}\n\n".format( 143 | # 0.5 * np.exp(-joint_error), 0.5 * np.exp(-joint_error) / reward * 100, 144 | # 0.3 * np.exp(-com_error), 0.3 * np.exp(-com_error) / reward * 100, 145 | # 0.1 * np.exp(-orientation_error), 0.1 * np.exp(-orientation_error) / reward * 100, 146 | # 0.1 * np.exp(-spring_error), 0.1 * np.exp(-spring_error) / reward * 100, 147 | # reward 148 | # ) 149 | # ) 150 | 151 | return reward -------------------------------------------------------------------------------- /cassie/trajectory/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/.DS_Store -------------------------------------------------------------------------------- /cassie/trajectory/__init__.py: -------------------------------------------------------------------------------- 1 | from .trajectory import * 2 | from .aslip_trajectory import * -------------------------------------------------------------------------------- /cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.0.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.0.pkl -------------------------------------------------------------------------------- /cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.1.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.1.pkl -------------------------------------------------------------------------------- /cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.2.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.2.pkl -------------------------------------------------------------------------------- /cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.3.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.3.pkl -------------------------------------------------------------------------------- /cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.4.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.4.pkl -------------------------------------------------------------------------------- /cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.5.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.5.pkl -------------------------------------------------------------------------------- /cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.6.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.6.pkl -------------------------------------------------------------------------------- /cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.7.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.7.pkl -------------------------------------------------------------------------------- /cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.8.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.8.pkl -------------------------------------------------------------------------------- /cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.9.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.9.pkl -------------------------------------------------------------------------------- /cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.0.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.0.pkl -------------------------------------------------------------------------------- /cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.1.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.1.pkl -------------------------------------------------------------------------------- /cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.2.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.2.pkl -------------------------------------------------------------------------------- /cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.3.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.3.pkl -------------------------------------------------------------------------------- /cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.4.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.4.pkl -------------------------------------------------------------------------------- /cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.5.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.5.pkl -------------------------------------------------------------------------------- /cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.6.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.6.pkl -------------------------------------------------------------------------------- /cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.7.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.7.pkl -------------------------------------------------------------------------------- /cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.8.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.8.pkl -------------------------------------------------------------------------------- /cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.9.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.9.pkl -------------------------------------------------------------------------------- /cassie/trajectory/aslipTrajsTaskSpace/walkCycle_2.0.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_2.0.pkl -------------------------------------------------------------------------------- /cassie/trajectory/backward_trajectory_Nov: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/backward_trajectory_Nov -------------------------------------------------------------------------------- /cassie/trajectory/ikNet_state_dict.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/ikNet_state_dict.pt -------------------------------------------------------------------------------- /cassie/trajectory/more-poses-trial.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/more-poses-trial.bin -------------------------------------------------------------------------------- /cassie/trajectory/spline_stepping_traj.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/spline_stepping_traj.pkl -------------------------------------------------------------------------------- /cassie/trajectory/stepdata.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/stepdata.bin -------------------------------------------------------------------------------- /cassie/trajectory/stepping_trajectory_Nov: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/stepping_trajectory_Nov -------------------------------------------------------------------------------- /cassie/trajectory/test.py: -------------------------------------------------------------------------------- 1 | # $ ipython -i test.py 2 | 3 | from trajectory import CassieTrajectory 4 | 5 | traj = CassieTrajectory("stepdata.bin") 6 | 7 | print(len(traj.qpos[0])) -------------------------------------------------------------------------------- /cassie/trajectory/traj_from_ref_foot_data.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/traj_from_ref_foot_data.pkl -------------------------------------------------------------------------------- /cassie/trajectory/trajectory.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | 4 | """ 5 | Agility 2 kHz trajectory 6 | """ 7 | class CassieTrajectory: 8 | def __init__(self, filepath): 9 | n = 1 + 35 + 32 + 10 + 10 + 10 10 | data = np.fromfile(filepath, dtype=np.double).reshape((-1, n)) 11 | 12 | # states 13 | self.time = data[:, 0] 14 | self.qpos = data[:, 1:36] 15 | self.qvel = data[:, 36:68] 16 | 17 | # actions 18 | self.torque = data[:, 68:78] 19 | self.mpos = data[:, 78:88] 20 | self.mvel = data[:, 88:98] 21 | 22 | def state(self, t): 23 | tmax = self.time[-1] 24 | 25 | i = int((t % tmax) / tmax * len(self.time)) 26 | 27 | return (self.qpos[i], self.qvel[i]) 28 | 29 | def action(self, t): 30 | tmax = self.time[-1] 31 | i = int((t % tmax) / tmax * len(self.time)) 32 | return (self.mpos[i], self.mvel[i], self.torque[i]) 33 | 34 | def sample(self): 35 | i = random.randrange(len(self.time)) 36 | return (self.time[i], self.qpos[i], self.qvel[i]) 37 | 38 | def __len__(self): 39 | return len(self.time) -------------------------------------------------------------------------------- /cassie/trajectory/walk-in-place-downsampled.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/walk-in-place-downsampled.bin -------------------------------------------------------------------------------- /img/output.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/img/output.gif -------------------------------------------------------------------------------- /img/output2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/img/output2.gif -------------------------------------------------------------------------------- /mirror_policy_check.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import hashlib, os, pickle 3 | import sys, time 4 | from cassie.quaternion_function import * 5 | import tty 6 | import termios 7 | import select 8 | import numpy as np 9 | from functools import partial 10 | from rl.envs.wrappers import SymmetricEnv 11 | from cassie import CassieEnv, CassiePlayground, CassieStandingEnv, CassieEnv_noaccel_footdist_omniscient, CassieEnv_noaccel_footdist 12 | 13 | def isData(): 14 | return select.select([sys.stdin], [], [], 0) == ([sys.stdin], [], []) 15 | 16 | env = CassieEnv(state_est=True, dynamics_randomization=False, history=0) 17 | env_fn = partial(CassieEnv, state_est=True, dynamics_randomization=False, history=0) 18 | # env = CassieEnv_noaccel_footdist(state_est=True, dynamics_randomization=False, history=0) 19 | # env_fn = partial(CassieEnv_noaccel_footdist, state_est=True, dynamics_randomization=False, history=0) 20 | 21 | sym_env = SymmetricEnv(env_fn, mirrored_obs=env_fn().mirrored_obs, mirrored_act=[-5, -6, 7, 8, 9, -0.1, -1, 2, 3, 4]) 22 | # obs = env.get_full_state() 23 | # print("obs len: ", len(obs)) 24 | # exit() 25 | 26 | path = "./trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/" 27 | # path = "./logs/footdist/CassieNoaccelFootDist/noaccel_footdist_speedmatch_seed10/" 28 | policy = torch.load(path + "actor.pt") 29 | policy.eval() 30 | 31 | old_settings = termios.tcgetattr(sys.stdin) 32 | 33 | orient_add = 0 34 | 35 | env.render() 36 | render_state = True 37 | try: 38 | tty.setcbreak(sys.stdin.fileno()) 39 | 40 | state = env.reset_for_test() 41 | done = False 42 | timesteps = 0 43 | eval_reward = 0 44 | speed = 0.0 45 | 46 | while render_state: 47 | 48 | if isData(): 49 | c = sys.stdin.read(1) 50 | if c == 'w': 51 | speed += 0.1 52 | elif c == 's': 53 | speed -= 0.1 54 | elif c == 'j': 55 | env.phase_add += .1 56 | print("Increasing frequency to: {:.1f}".format(env.phase_add)) 57 | elif c == 'h': 58 | env.phase_add -= .1 59 | print("Decreasing frequency to: {:.1f}".format(env.phase_add)) 60 | elif c == 'l': 61 | orient_add += .1 62 | print("Increasing orient_add to: ", orient_add) 63 | elif c == 'k': 64 | orient_add -= .1 65 | print("Decreasing orient_add to: ", orient_add) 66 | elif c == 'p': 67 | push = 100 68 | push_dir = 2 69 | force_arr = np.zeros(6) 70 | force_arr[push_dir] = push 71 | env.sim.apply_force(force_arr) 72 | 73 | env.update_speed(speed) 74 | print("speed: ", env.speed) 75 | 76 | if hasattr(env, 'simrate'): 77 | start = time.time() 78 | 79 | if (not env.vis.ispaused()): 80 | # Update Orientation 81 | quaternion = euler2quat(z=orient_add, y=0, x=0) 82 | iquaternion = inverse_quaternion(quaternion) 83 | 84 | if env.state_est: 85 | curr_orient = state[1:5] 86 | curr_transvel = state[15:18] 87 | # curr_orient = state[6:10] 88 | # curr_transvel = state[20:23] 89 | else: 90 | curr_orient = state[2:6] 91 | curr_transvel = state[20:23] 92 | 93 | new_orient = quaternion_product(iquaternion, curr_orient) 94 | 95 | if new_orient[0] < 0: 96 | new_orient = -new_orient 97 | 98 | new_translationalVelocity = rotate_by_quaternion(curr_transvel, iquaternion) 99 | 100 | if env.state_est: 101 | state[1:5] = torch.FloatTensor(new_orient) 102 | state[15:18] = torch.FloatTensor(new_translationalVelocity) 103 | # state[6:10] = torch.FloatTensor(new_orient) 104 | # state[20:23] = torch.FloatTensor(new_translationalVelocity) 105 | # state[0] = 1 # For use with StateEst. Replicate hack that height is always set to one on hardware. 106 | else: 107 | state[2:6] = torch.FloatTensor(new_orient) 108 | state[20:23] = torch.FloatTensor(new_translationalVelocity) 109 | 110 | state = torch.Tensor(state) 111 | # Calculate mirror state and mirror action 112 | with torch.no_grad(): 113 | mirror_state = sym_env.mirror_clock_observation(state.unsqueeze(0), env.clock_inds)[0] 114 | # Mirror pelvis orientation and velocity 115 | # mir_quat = inverse_quaternion(mirror_state[1:5]) 116 | # mir_quat[2] *= -1 117 | # mirror_state[1:5] = torch.Tensor(mir_quat) 118 | # mirror_state[16] *= -1 # y trans vel 119 | # mir_rot_vel = -mirror_state[18:21] 120 | # mir_rot_vel[1] *= -1 121 | # mirror_state[18:21] = mir_rot_vel 122 | # mirror_state[32] *= -1 # y trans accel 123 | mir_action = policy.forward(mirror_state, deterministic=True) 124 | mir_mir_action = sym_env.mirror_action(mir_action.unsqueeze(0)).detach().numpy()[0] 125 | action = policy.forward(state, deterministic=True).detach().numpy() 126 | # print("mirror action diff: ", np.linalg.norm(mir_mir_action - action)) 127 | state, reward, done, _ = env.step(mir_mir_action) 128 | 129 | eval_reward += reward 130 | timesteps += 1 131 | 132 | 133 | render_state = env.render() 134 | if hasattr(env, 'simrate'): 135 | # assume 30hz (hack) 136 | end = time.time() 137 | delaytime = max(0, 1000 / 30000 - (end-start)) 138 | time.sleep(delaytime) 139 | 140 | print("Eval reward: ", eval_reward) 141 | 142 | finally: 143 | termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old_settings) -------------------------------------------------------------------------------- /rl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/rl/__init__.py -------------------------------------------------------------------------------- /rl/algos/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rl/config/monitor.ini: -------------------------------------------------------------------------------- 1 | [monitor] 2 | # Options: Timesteps, Iterations, (walltime to be included in future) 3 | xlabel=Iterations 4 | 5 | # Options: Fixed, Variable 6 | xlim=Variable -------------------------------------------------------------------------------- /rl/distributions/__init__.py: -------------------------------------------------------------------------------- 1 | from .gaussian import DiagonalGaussian 2 | from .beta import Beta, Beta2 -------------------------------------------------------------------------------- /rl/distributions/beta.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | # TODO: extend these for arbitrary bounds 8 | 9 | """A beta distribution, but where the pdf is scaled to (-1, 1)""" 10 | class BoundedBeta(torch.distributions.Beta): 11 | def log_prob(self, x): 12 | return super().log_prob((x + 1) / 2) 13 | 14 | class Beta(nn.Module): 15 | def __init__(self, action_dim): 16 | super(Beta, self).__init__() 17 | 18 | self.action_dim = action_dim 19 | 20 | def forward(self, alpha_beta): 21 | alpha = 1 + F.softplus(alpha_beta[:, :self.action_dim]) 22 | beta = 1 + F.softplus(alpha_beta[:, self.action_dim:]) 23 | return alpha, beta 24 | 25 | def sample(self, x, deterministic): 26 | if deterministic is False: 27 | action = self.evaluate(x).sample() 28 | else: 29 | # E = alpha / (alpha + beta) 30 | return self.evaluate(x).mean 31 | 32 | return 2 * action - 1 33 | 34 | def evaluate(self, x): 35 | alpha, beta = self(x) 36 | return BoundedBeta(alpha, beta) 37 | 38 | 39 | # TODO: think of a better name for this 40 | """Beta distribution parameterized by mean and variance.""" 41 | class Beta2(nn.Module): 42 | def __init__(self, action_dim, init_std=0.25, learn_std=False): 43 | super(Beta2, self).__init__() 44 | 45 | assert init_std < 0.5, "Beta distribution has a max std dev of 0.5" 46 | 47 | self.action_dim = action_dim 48 | 49 | self.logstd = nn.Parameter( 50 | torch.ones(1, action_dim) * np.log(init_std), 51 | requires_grad=learn_std 52 | ) 53 | 54 | self.learn_std = learn_std 55 | 56 | 57 | def forward(self, x): 58 | mean = torch.sigmoid(x) 59 | 60 | var = self.logstd.exp().pow(2) 61 | 62 | """ 63 | alpha = ((1 - mu) / sigma^2 - 1 / mu) * mu^2 64 | beta = alpha * (1 / mu - 1) 65 | 66 | Implemented slightly differently for numerical stability. 67 | """ 68 | alpha = ((1 - mean) / var) * mean.pow(2) - mean 69 | beta = ((1 - mean) / var) * mean - 1 - alpha 70 | 71 | # PROBLEM: if alpha or beta < 1 thats not good 72 | 73 | #assert np.allclose(alpha, ((1 - mean) / var - 1 / mean) * mean.pow(2)) 74 | #assert np.allclose(beta, ((1 - mean) / var - 1 / mean) * mean.pow(2) * (1 / mean - 1)) 75 | 76 | #alpha = 1 + F.softplus(alpha) 77 | #beta = 1 + F.softplus(beta) 78 | 79 | # print("alpha",alpha) 80 | # print("beta",beta) 81 | 82 | # #print(alpha / (alpha + beta)) 83 | # print("mu",mean) 84 | 85 | # #print(torch.sqrt(alpha * beta / ((alpha+beta)**2 * (alpha + beta + 1)))) 86 | # print("var", var) 87 | 88 | # import pdb 89 | # pdb.set_trace() 90 | 91 | return alpha, beta 92 | 93 | def sample(self, x, deterministic): 94 | if deterministic is False: 95 | action = self.evaluate(x).sample() 96 | else: 97 | # E = alpha / (alpha + beta) 98 | return self.evaluate(x).mean 99 | 100 | # 2 * a - 1 puts a in (-1, 1) 101 | return 2 * action - 1 102 | 103 | def evaluate(self, x): 104 | alpha, beta = self(x) 105 | return BoundedBeta(alpha, beta) -------------------------------------------------------------------------------- /rl/distributions/gaussian.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import torch 4 | import torch.nn as nn 5 | from torch.autograd import Variable 6 | 7 | # TODO: look at change of variables function for enforcing 8 | # action bounds correctly 9 | class DiagonalGaussian(nn.Module): 10 | def __init__(self, num_outputs, init_std=1, learn_std=True): 11 | super(DiagonalGaussian, self).__init__() 12 | 13 | self.logstd = nn.Parameter( 14 | torch.ones(1, num_outputs) * np.log(init_std), 15 | requires_grad=learn_std 16 | ) 17 | 18 | self.learn_std = learn_std 19 | 20 | def forward(self, x): 21 | mean = x 22 | 23 | std = self.logstd.exp() 24 | 25 | return mean, std 26 | 27 | def sample(self, x, deterministic): 28 | if deterministic is False: 29 | action = self.evaluate(x).sample() 30 | else: 31 | action, _ = self(x) 32 | 33 | return action 34 | 35 | def evaluate(self, x): 36 | mean, std = self(x) 37 | return torch.distributions.Normal(mean, std) 38 | -------------------------------------------------------------------------------- /rl/envs/__init__.py: -------------------------------------------------------------------------------- 1 | from .vectorize import * 2 | from .normalize import * 3 | from .wrappers import * -------------------------------------------------------------------------------- /rl/envs/monitor.py: -------------------------------------------------------------------------------- 1 | # Modified from https://github.com/openai/baselines/blob/master/baselines/bench/monitor.py 2 | 3 | import time 4 | from glob import glob 5 | import csv 6 | import os.path as osp 7 | import json 8 | 9 | class Monitor: 10 | EXT = "monitor.csv" 11 | f = None 12 | 13 | def __init__(self, env, filename, allow_early_resets=False, reset_keywords=()): 14 | Wrapper.__init__(self, env=env) 15 | self.tstart = time.time() 16 | if filename is None: 17 | self.f = None 18 | self.logger = None 19 | else: 20 | if not filename.endswith(Monitor.EXT): 21 | if osp.isdir(filename): 22 | filename = osp.join(filename, Monitor.EXT) 23 | else: 24 | filename = filename + "." + Monitor.EXT 25 | self.f = open(filename, "wt") 26 | self.f.write('#%s\n'%json.dumps({"t_start": self.tstart, "gym_version": gym.__version__, 27 | "env_id": env.spec.id if env.spec else 'Unknown'})) 28 | self.logger = csv.DictWriter(self.f, fieldnames=('r', 'l', 't')+reset_keywords) 29 | self.logger.writeheader() 30 | 31 | self.reset_keywords = reset_keywords 32 | self.allow_early_resets = allow_early_resets 33 | self.rewards = None 34 | self.needs_reset = True 35 | self.episode_rewards = [] 36 | self.episode_lengths = [] 37 | self.total_steps = 0 38 | self.current_reset_info = {} # extra info about the current episode, that was passed in during reset() 39 | 40 | def _reset(self, **kwargs): 41 | if not self.allow_early_resets and not self.needs_reset: 42 | raise RuntimeError("Tried to reset an environment before done. If you want to allow early resets, wrap your env with Monitor(env, path, allow_early_resets=True)") 43 | self.rewards = [] 44 | self.needs_reset = False 45 | for k in self.reset_keywords: 46 | v = kwargs.get(k) 47 | if v is None: 48 | raise ValueError('Expected you to pass kwarg %s into reset'%k) 49 | self.current_reset_info[k] = v 50 | return self.env.reset(**kwargs) 51 | 52 | def _step(self, action): 53 | if self.needs_reset: 54 | raise RuntimeError("Tried to step environment that needs reset") 55 | ob, rew, done, info = self.env.step(action) 56 | self.rewards.append(rew) 57 | if done: 58 | self.needs_reset = True 59 | eprew = sum(self.rewards) 60 | eplen = len(self.rewards) 61 | epinfo = {"r": round(eprew, 6), "l": eplen, "t": round(time.time() - self.tstart, 6)} 62 | epinfo.update(self.current_reset_info) 63 | if self.logger: 64 | self.logger.writerow(epinfo) 65 | self.f.flush() 66 | self.episode_rewards.append(eprew) 67 | self.episode_lengths.append(eplen) 68 | info['episode'] = epinfo 69 | self.total_steps += 1 70 | return (ob, rew, done, info) 71 | 72 | def close(self): 73 | if self.f is not None: 74 | self.f.close() 75 | 76 | def get_total_steps(self): 77 | return self.total_steps 78 | 79 | def get_episode_rewards(self): 80 | return self.episode_rewards 81 | 82 | def get_episode_lengths(self): 83 | return self.episode_lengths 84 | 85 | class LoadMonitorResultsError(Exception): 86 | pass 87 | 88 | def get_monitor_files(dir): 89 | return glob(osp.join(dir, "*" + Monitor.EXT)) 90 | 91 | def load_results(dir): 92 | import pandas 93 | monitor_files = glob(osp.join(dir, "*monitor.*")) # get both csv and (old) json files 94 | if not monitor_files: 95 | raise LoadMonitorResultsError("no monitor files of the form *%s found in %s" % (Monitor.EXT, dir)) 96 | dfs = [] 97 | headers = [] 98 | for fname in monitor_files: 99 | with open(fname, 'rt') as fh: 100 | if fname.endswith('csv'): 101 | firstline = fh.readline() 102 | assert firstline[0] == '#' 103 | header = json.loads(firstline[1:]) 104 | df = pandas.read_csv(fh, index_col=None) 105 | headers.append(header) 106 | elif fname.endswith('json'): # Deprecated json format 107 | episodes = [] 108 | lines = fh.readlines() 109 | header = json.loads(lines[0]) 110 | headers.append(header) 111 | for line in lines[1:]: 112 | episode = json.loads(line) 113 | episodes.append(episode) 114 | df = pandas.DataFrame(episodes) 115 | df['t'] += header['t_start'] 116 | dfs.append(df) 117 | df = pandas.concat(dfs) 118 | df.sort_values('t', inplace=True) 119 | df['t'] -= min(header['t_start'] for header in headers) 120 | df.headers = headers # HACK to preserve backwards compatibility 121 | return df -------------------------------------------------------------------------------- /rl/envs/normalize.py: -------------------------------------------------------------------------------- 1 | # Modified from https://github.com/openai/baselines/blob/master/baselines/common/vec_env/vec_normalize.py 2 | # Thanks to the authors + OpenAI for the code 3 | 4 | import numpy as np 5 | import functools 6 | import torch 7 | import ray 8 | 9 | from .wrapper import WrapEnv 10 | 11 | @ray.remote 12 | def _run_random_actions(iter, policy, env_fn, noise_std): 13 | 14 | env = WrapEnv(env_fn) 15 | states = np.zeros((iter, env.observation_space.shape[0])) 16 | 17 | state = env.reset() 18 | for t in range(iter): 19 | states[t, :] = state 20 | 21 | state = torch.Tensor(state) 22 | 23 | action = policy(state) 24 | 25 | # add gaussian noise to deterministic action 26 | action = action + torch.randn(action.size()) * noise_std 27 | 28 | state, _, done, _ = env.step(action.data.numpy()) 29 | 30 | if done: 31 | state = env.reset() 32 | 33 | return states 34 | 35 | def get_normalization_params(iter, policy, env_fn, noise_std, procs=4): 36 | print("Gathering input normalization data using {0} steps, noise = {1}...".format(iter, noise_std)) 37 | 38 | states_ids = [_run_random_actions.remote(iter // procs, policy, env_fn, noise_std) for _ in range(procs)] 39 | 40 | states = [] 41 | for _ in range(procs): 42 | ready_ids, _ = ray.wait(states_ids, num_returns=1) 43 | states.extend(ray.get(ready_ids[0])) 44 | states_ids.remove(ready_ids[0]) 45 | 46 | print("Done gathering input normalization data.") 47 | 48 | return np.mean(states, axis=0), np.sqrt(np.var(states, axis=0) + 1e-8) 49 | 50 | 51 | # returns a function that creates a normalized environment, then pre-normalizes it 52 | # using states sampled from a deterministic policy with some added noise 53 | def PreNormalizer(iter, noise_std, policy, *args, **kwargs): 54 | 55 | # noise is gaussian noise 56 | @torch.no_grad() 57 | def pre_normalize(env, policy, num_iter, noise_std): 58 | # save whether or not the environment is configured to do online normalization 59 | online_val = env.online 60 | env.online = True 61 | 62 | state = env.reset() 63 | 64 | for t in range(num_iter): 65 | state = torch.Tensor(state) 66 | 67 | _, action = policy(state) 68 | 69 | # add gaussian noise to deterministic action 70 | action = action + torch.randn(action.size()) * noise_std 71 | 72 | state, _, done, _ = env.step(action.data.numpy()) 73 | 74 | if done: 75 | state = env.reset() 76 | 77 | env.online = online_val 78 | 79 | def _Normalizer(venv): 80 | venv = Normalize(venv, *args, **kwargs) 81 | 82 | print("Gathering input normalization data using {0} steps, noise = {1}...".format(iter, noise_std)) 83 | pre_normalize(venv, policy, iter, noise_std) 84 | print("Done gathering input normalization data.") 85 | 86 | return venv 87 | 88 | return _Normalizer 89 | 90 | # returns a function that creates a normalized environment 91 | def Normalizer(*args, **kwargs): 92 | def _Normalizer(venv): 93 | return Normalize(venv, *args, **kwargs) 94 | 95 | return _Normalizer 96 | 97 | class Normalize: 98 | """ 99 | Vectorized environment base class 100 | """ 101 | def __init__(self, 102 | venv, 103 | ob_rms=None, 104 | ob=True, 105 | ret=False, 106 | clipob=10., 107 | cliprew=10., 108 | online=True, 109 | gamma=1.0, 110 | epsilon=1e-8): 111 | 112 | self.venv = venv 113 | self._observation_space = venv.observation_space 114 | self._action_space = venv.action_space 115 | 116 | if ob_rms is not None: 117 | self.ob_rms = ob_rms 118 | else: 119 | self.ob_rms = RunningMeanStd(shape=self._observation_space.shape) if ob else None 120 | 121 | self.ret_rms = RunningMeanStd(shape=()) if ret else None 122 | self.clipob = clipob 123 | self.cliprew = cliprew 124 | self.ret = np.zeros(self.num_envs) 125 | self.gamma = gamma 126 | self.epsilon = epsilon 127 | 128 | self.online = online 129 | 130 | def step(self, vac): 131 | obs, rews, news, infos = self.venv.step(vac) 132 | 133 | #self.ret = self.ret * self.gamma + rews 134 | obs = self._obfilt(obs) 135 | 136 | # NOTE: shifting mean of reward seems bad; qualitatively changes MDP 137 | if self.ret_rms: 138 | if self.online: 139 | self.ret_rms.update(self.ret) 140 | 141 | rews = np.clip(rews / np.sqrt(self.ret_rms.var + self.epsilon), -self.cliprew, self.cliprew) 142 | 143 | return obs, rews, news, infos 144 | 145 | def _obfilt(self, obs): 146 | if self.ob_rms: 147 | if self.online: 148 | self.ob_rms.update(obs) 149 | 150 | obs = np.clip((obs - self.ob_rms.mean) / np.sqrt(self.ob_rms.var + self.epsilon), -self.clipob, self.clipob) 151 | return obs 152 | else: 153 | return obs 154 | 155 | def reset(self): 156 | """ 157 | Reset all environments 158 | """ 159 | obs = self.venv.reset() 160 | return self._obfilt(obs) 161 | 162 | @property 163 | def action_space(self): 164 | return self._action_space 165 | 166 | @property 167 | def observation_space(self): 168 | return self._observation_space 169 | 170 | def close(self): 171 | self.venv.close() 172 | 173 | def render(self): 174 | self.venv.render() 175 | 176 | @property 177 | def num_envs(self): 178 | return self.venv.num_envs 179 | 180 | 181 | 182 | class RunningMeanStd(object): 183 | # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm 184 | def __init__(self, epsilon=1e-4, shape=()): 185 | self.mean = np.zeros(shape, 'float64') 186 | self.var = np.zeros(shape, 'float64') 187 | self.count = epsilon 188 | 189 | 190 | def update(self, x): 191 | batch_mean = np.mean(x, axis=0) 192 | batch_var = np.var(x, axis=0) 193 | batch_count = x.shape[0] 194 | 195 | delta = batch_mean - self.mean 196 | tot_count = self.count + batch_count 197 | 198 | new_mean = self.mean + delta * batch_count / tot_count 199 | m_a = self.var * (self.count) 200 | m_b = batch_var * (batch_count) 201 | M2 = m_a + m_b + np.square(delta) * self.count * batch_count / (self.count + batch_count) 202 | new_var = M2 / (self.count + batch_count) 203 | 204 | new_count = batch_count + self.count 205 | 206 | self.mean = new_mean 207 | self.var = new_var 208 | self.count = new_count 209 | 210 | def test_runningmeanstd(): 211 | for (x1, x2, x3) in [ 212 | (np.random.randn(3), np.random.randn(4), np.random.randn(5)), 213 | (np.random.randn(3,2), np.random.randn(4,2), np.random.randn(5,2)), 214 | ]: 215 | 216 | rms = RunningMeanStd(epsilon=0.0, shape=x1.shape[1:]) 217 | 218 | x = np.concatenate([x1, x2, x3], axis=0) 219 | ms1 = [x.mean(axis=0), x.var(axis=0)] 220 | rms.update(x1) 221 | rms.update(x2) 222 | rms.update(x3) 223 | ms2 = [rms.mean, rms.var] 224 | 225 | assert np.allclose(ms1, ms2) 226 | -------------------------------------------------------------------------------- /rl/envs/vectorize.py: -------------------------------------------------------------------------------- 1 | # Modified from https://github.com/openai/baselines/blob/master/baselines/common/vec_env/dummy_vec_env.py 2 | # Thanks to the authors + OpenAI for the code 3 | 4 | import numpy as np 5 | 6 | class Vectorize: 7 | def __init__(self, env_fns): 8 | self.envs = [fn() for fn in env_fns] 9 | env = self.envs[0] 10 | 11 | self._observation_space = env.observation_space 12 | self._action_space = env.action_space 13 | 14 | self.ts = np.zeros(len(self.envs), dtype='int') 15 | 16 | def step(self, action_n): 17 | results = [env.step(a) for (a,env) in zip(action_n, self.envs)] 18 | obs, rews, dones, infos = map(np.array, zip(*results)) 19 | 20 | # TODO: decide whether to uncomment this 21 | self.ts += 1 22 | # for (i, done) in enumerate(dones): 23 | # if done: 24 | # obs[i] = self.envs[i].reset() 25 | # self.ts[i] = 0 26 | 27 | return np.array(obs), np.array(rews), np.array(dones), infos 28 | 29 | def reset(self): 30 | results = [env.reset() for env in self.envs] 31 | return np.array(results) 32 | 33 | def render(self): 34 | self.envs[0].render() 35 | 36 | @property 37 | def num_envs(self): 38 | return len(self.envs) 39 | 40 | @property 41 | def action_space(self): 42 | return self._action_space 43 | 44 | @property 45 | def observation_space(self): 46 | return self._observation_space 47 | 48 | -------------------------------------------------------------------------------- /rl/envs/wrapper.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | # Gives a vectorized interface to a single environment 4 | class WrapEnv: 5 | def __init__(self, env_fn): 6 | self.env = env_fn() 7 | 8 | def __getattr__(self, attr): 9 | return getattr(self.env, attr) 10 | 11 | def step(self, action): 12 | state, reward, done, info = self.env.step(action[0]) 13 | return np.array([state]), np.array([reward]), np.array([done]), np.array([info]) 14 | 15 | def render(self): 16 | self.env.render() 17 | 18 | def reset(self): 19 | return np.array([self.env.reset()]) -------------------------------------------------------------------------------- /rl/envs/wrappers.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | # Gives a vectorized interface to a single environment 5 | class WrapEnv: 6 | def __init__(self, env_fn): 7 | self.env = env_fn() 8 | 9 | def __getattr__(self, attr): 10 | return getattr(self.env, attr) 11 | 12 | def step(self, action, term_thresh=0): 13 | state, reward, done, info = self.env.step(action[0], f_term=term_thresh) 14 | return np.array([state]), np.array([reward]), np.array([done]), np.array([info]) 15 | 16 | def render(self): 17 | self.env.render() 18 | 19 | def reset(self): 20 | return np.array([self.env.reset()]) 21 | 22 | # TODO: this is probably a better case for inheritance than for a wrapper 23 | # Gives an interface to exploit mirror symmetry 24 | class SymmetricEnv: 25 | def __init__(self, env_fn, mirrored_obs=None, mirrored_act=None, obs_fn=None, act_fn=None): 26 | 27 | assert (bool(mirrored_act) ^ bool(act_fn)) and (bool(mirrored_obs) ^ bool(obs_fn)), \ 28 | "You must provide either mirror indices or a mirror function, but not both, for \ 29 | observation and action." 30 | 31 | if mirrored_act: 32 | self.act_mirror_matrix = torch.Tensor(_get_symmetry_matrix(mirrored_act)) 33 | 34 | elif act_fn: 35 | assert callable(act_fn), "Action mirror function must be callable" 36 | self.mirror_action = act_fn 37 | 38 | if mirrored_obs: 39 | self.obs_mirror_matrix = torch.Tensor(_get_symmetry_matrix(mirrored_obs)) 40 | 41 | elif obs_fn: 42 | assert callable(obs_fn), "Observation mirror function must be callable" 43 | self.mirror_observation = obs_fn 44 | 45 | self.env = env_fn() 46 | 47 | def __getattr__(self, attr): 48 | return getattr(self.env, attr) 49 | 50 | def mirror_action(self, action): 51 | return action @ self.act_mirror_matrix 52 | 53 | def mirror_observation(self, obs): 54 | return obs @ self.obs_mirror_matrix 55 | 56 | # To be used when there is a clock in the observation. In this case, the mirrored_obs vector inputted 57 | # when the SymmeticEnv is created should not move the clock input order. The indices of the obs vector 58 | # where the clocks are located need to be inputted. 59 | def mirror_clock_observation(self, obs, clock_inds): 60 | # print("obs.shape = ", obs.shape) 61 | # print("obs_mirror_matrix.shape = ", self.obs_mirror_matrix.shape) 62 | mirror_obs = obs @ self.obs_mirror_matrix 63 | clock = mirror_obs[:, self.clock_inds] 64 | # print("clock: ", clock) 65 | for i in range(np.shape(clock)[1]): 66 | mirror_obs[:, clock_inds[i]] = np.sin(np.arcsin(clock[:, i]) + np.pi) 67 | return mirror_obs 68 | 69 | 70 | def _get_symmetry_matrix(mirrored): 71 | numel = len(mirrored) 72 | mat = np.zeros((numel, numel)) 73 | 74 | for (i, j) in zip(np.arange(numel), np.abs(np.array(mirrored).astype(int))): 75 | mat[i, j] = np.sign(mirrored[i]) 76 | 77 | return mat -------------------------------------------------------------------------------- /rl/policies/__init__.py: -------------------------------------------------------------------------------- 1 | # from .actor import Gaussian_FF_Actor as GaussianMLP_Actor # for legacy code 2 | from .actor import Gaussian_FF_Actor 3 | 4 | # from .actor_release import GaussianMLP_Actor 5 | 6 | #from .linear import LinearMLP 7 | #from .recurrent import RecurrentNet -------------------------------------------------------------------------------- /rl/policies/base.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from torch import sqrt 6 | 7 | def normc_fn(m): 8 | classname = m.__class__.__name__ 9 | if classname.find('Linear') != -1: 10 | m.weight.data.normal_(0, 1) 11 | m.weight.data *= 1 / torch.sqrt(m.weight.data.pow(2).sum(1, keepdim=True)) 12 | if m.bias is not None: 13 | m.bias.data.fill_(0) 14 | 15 | # The base class for an actor. Includes functions for normalizing state (optional) 16 | class Net(nn.Module): 17 | def __init__(self): 18 | super(Net, self).__init__() 19 | self.is_recurrent = False 20 | 21 | self.welford_state_mean = torch.zeros(1) 22 | self.welford_state_mean_diff = torch.ones(1) 23 | self.welford_state_n = 1 24 | 25 | self.env_name = None 26 | 27 | def forward(self): 28 | raise NotImplementedError 29 | 30 | def normalize_state(self, state, update=True): 31 | state = torch.Tensor(state) 32 | 33 | if self.welford_state_n == 1: 34 | self.welford_state_mean = torch.zeros(state.size(-1)) 35 | self.welford_state_mean_diff = torch.ones(state.size(-1)) 36 | 37 | if update: 38 | if len(state.size()) == 1: # If we get a single state vector 39 | state_old = self.welford_state_mean 40 | self.welford_state_mean += (state - state_old) / self.welford_state_n 41 | self.welford_state_mean_diff += (state - state_old) * (state - state_old) 42 | self.welford_state_n += 1 43 | elif len(state.size()) == 2: # If we get a batch 44 | print("NORMALIZING 2D TENSOR (this should not be happening)") 45 | for r_n in r: 46 | state_old = self.welford_state_mean 47 | self.welford_state_mean += (state_n - state_old) / self.welford_state_n 48 | self.welford_state_mean_diff += (state_n - state_old) * (state_n - state_old) 49 | self.welford_state_n += 1 50 | elif len(state.size()) == 3: # If we get a batch of sequences 51 | print("NORMALIZING 3D TENSOR (this really should not be happening)") 52 | for r_t in r: 53 | for r_n in r_t: 54 | state_old = self.welford_state_mean 55 | self.welford_state_mean += (state_n - state_old) / self.welford_state_n 56 | self.welford_state_mean_diff += (state_n - state_old) * (state_n - state_old) 57 | self.welford_state_n += 1 58 | return (state - self.welford_state_mean) / sqrt(self.welford_state_mean_diff / self.welford_state_n) 59 | 60 | def copy_normalizer_stats(self, net): 61 | self.welford_state_mean = net.self_state_mean 62 | self.welford_state_mean_diff = net.welford_state_mean_diff 63 | self.welford_state_n = net.welford_state_n 64 | 65 | def initialize_parameters(self): 66 | self.apply(normc_fn) -------------------------------------------------------------------------------- /rl/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .render import * 2 | from .param_noise import * 3 | from .remote_replay import * 4 | import sys 5 | 6 | class ProgBar(): 7 | def __init__(self, total, bar_len=40): 8 | self.total = total 9 | self.count = 0 10 | self.bar_len = bar_len 11 | 12 | def next(self, msg=''): 13 | self.count += 1 14 | 15 | fill_len = int(round(self.bar_len * self.count / float(self.total))) 16 | bar = '=' * fill_len + '-' * (self.bar_len - fill_len) 17 | 18 | percent = round(100.0 * self.count / float(self.total), 1) 19 | 20 | msg = msg.ljust(len(msg) + 2) 21 | 22 | sys.stdout.write('[%s] %s%s ... %s\r' % (bar, percent, '%', msg)) 23 | sys.stdout.flush() 24 | -------------------------------------------------------------------------------- /rl/utils/param_noise.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import gym 4 | 5 | """ 6 | From OpenAI Baselines: 7 | https://github.com/openai/baselines/blob/master/baselines/ddpg/noise.py 8 | """ 9 | # For parameter noise 10 | class AdaptiveParamNoiseSpec(object): 11 | def __init__(self, initial_stddev=0.1, desired_action_stddev=0.2, adaptation_coefficient=1.01): 12 | """ 13 | Note that initial_stddev and current_stddev refer to std of parameter noise, 14 | but desired_action_stddev refers to (as name notes) desired std in action space 15 | """ 16 | self.initial_stddev = initial_stddev 17 | self.desired_action_stddev = desired_action_stddev 18 | self.adaptation_coefficient = adaptation_coefficient 19 | 20 | self.current_stddev = initial_stddev 21 | 22 | def adapt(self, distance): 23 | if distance > self.desired_action_stddev: 24 | # Decrease stddev. 25 | self.current_stddev /= self.adaptation_coefficient 26 | else: 27 | # Increase stddev. 28 | self.current_stddev *= self.adaptation_coefficient 29 | 30 | def get_stats(self): 31 | stats = { 32 | 'param_noise_stddev': self.current_stddev, 33 | } 34 | return stats 35 | 36 | def __repr__(self): 37 | fmt = 'AdaptiveParamNoiseSpec(initial_stddev={}, desired_action_stddev={}, adaptation_coefficient={})' 38 | return fmt.format(self.initial_stddev, self.desired_action_stddev, self.adaptation_coefficient) 39 | 40 | def distance_metric(actions1, actions2): 41 | """ 42 | Compute "distance" between actions taken by two policies at the same states 43 | Expects numpy arrays 44 | """ 45 | diff = actions1-actions2 46 | mean_diff = np.mean(np.square(diff), axis=0) 47 | dist = np.sqrt(np.mean(mean_diff)) 48 | return dist 49 | 50 | def perturb_actor_parameters(perturbed_policy, unperturbed_policy, param_noise, device): 51 | """Apply parameter noise to actor model, for exploration""" 52 | perturbed_policy.load_state_dict(unperturbed_policy.state_dict()) 53 | params = perturbed_policy.state_dict() 54 | for name in params: 55 | if 'ln' in name: 56 | pass 57 | param = params[name] 58 | param += torch.randn(param.shape).to(device) * param_noise.current_stddev -------------------------------------------------------------------------------- /rl/utils/remote_replay.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | import ray 4 | 5 | # tensorboard 6 | from datetime import datetime 7 | from torch.utils.tensorboard import SummaryWriter 8 | from colorama import Fore, Style 9 | 10 | # more efficient replay memory? 11 | from collections import deque 12 | 13 | # Code based on: 14 | # https://github.com/openai/baselines/blob/master/baselines/deepq/replay_buffer.py 15 | 16 | # Expects tuples of (state, next_state, action, reward, done) 17 | 18 | @ray.remote 19 | class ReplayBuffer_remote(object): 20 | def __init__(self, size, experiment_name, args): 21 | """Create Replay buffer. 22 | Parameters 23 | ---------- 24 | size: int 25 | Max number of transitions to store in the buffer. When the buffer 26 | overflows the old memories are dropped. 27 | """ 28 | self.storage = deque(maxlen=int(size)) 29 | self.max_size = size 30 | 31 | print("Created replay buffer with size {}".format(self.max_size)) 32 | 33 | def __len__(self): 34 | return len(self.storage) 35 | 36 | def storage_size(self): 37 | return len(self.storage) 38 | 39 | def add(self, data): 40 | self.storage.append(data) 41 | 42 | def add_bulk(self, data): 43 | for i in range(len(data)): 44 | self.storage.append(data[i]) 45 | 46 | def print_size(self): 47 | print("size = {}".format(len(self.storage))) 48 | 49 | def sample(self, batch_size): 50 | ind = np.random.randint(0, len(self.storage), size=batch_size) 51 | x, y, u, r, d = [], [], [], [], [] 52 | 53 | for i in ind: 54 | X, Y, U, R, D = self.storage[i] 55 | x.append(np.array(X, copy=False)) 56 | y.append(np.array(Y, copy=False)) 57 | u.append(np.array(U, copy=False)) 58 | r.append(np.array(R, copy=False)) 59 | d.append(np.array(D, copy=False)) 60 | 61 | # print("Sampled experience from replay buffer.") 62 | return np.array(x), np.array(y), np.array(u), np.array(r).reshape(-1, 1), np.array(d).reshape(-1, 1) 63 | 64 | # Non-ray actor for replay buffer 65 | class ReplayBuffer(object): 66 | def __init__(self, max_size=1e7): 67 | self.storage = [] 68 | self.max_size = max_size 69 | self.ptr = 0 70 | 71 | def add(self, data): 72 | if len(self.storage) < self.max_size: 73 | self.storage.append(data) 74 | self.storage[int(self.ptr)] = data 75 | self.ptr = (self.ptr + 1) % self.max_size 76 | 77 | 78 | def sample(self, batch_size): 79 | ind = np.random.randint(0, len(self.storage), size=batch_size) 80 | x, y, u, r, d = [], [], [], [], [] 81 | 82 | for i in ind: 83 | X, Y, U, R, D = self.storage[i] 84 | x.append(np.array(X, copy=False)) 85 | y.append(np.array(Y, copy=False)) 86 | u.append(np.array(U, copy=False)) 87 | r.append(np.array(R, copy=False)) 88 | d.append(np.array(D, copy=False)) 89 | 90 | return np.array(x), np.array(y), np.array(u), np.array(r).reshape(-1, 1), np.array(d).reshape(-1, 1) 91 | 92 | def get_transitions_from_range(self, start, end): 93 | ind = np.arange(int(start), int(end)) 94 | x, u = [], [] 95 | for i in ind: 96 | X, Y, U, R, D = self.storage[i] 97 | x.append(np.array(X, copy=False)) 98 | u.append(np.array(U, copy=False)) 99 | 100 | return np.array(x), np.array(u) 101 | 102 | def get_all_transitions(self): 103 | # list of transition tuples 104 | return self.storage 105 | 106 | def add_parallel(self, data): 107 | for i in range(len(data)): 108 | self.add(data[i]) -------------------------------------------------------------------------------- /test_policy.py: -------------------------------------------------------------------------------- 1 | from cassie import CassiePlayground 2 | from tools.test_commands import * 3 | from tools.eval_perturb import * 4 | from tools.eval_mission import * 5 | from tools.compare_pols import * 6 | from tools.eval_sensitivity import * 7 | from collections import OrderedDict 8 | from util.env import env_factory 9 | 10 | import torch 11 | import pickle 12 | import os, sys, argparse 13 | import numpy as np 14 | 15 | # Get policy to test from args, load policy and env 16 | parser = argparse.ArgumentParser() 17 | # General args 18 | parser.add_argument("--path", type=str, default="./trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2", help="path to folder containing policy and run details") 19 | parser.add_argument("--path2", type=str, default="./trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2", help="path to folder containing 2nd policy to compare against") 20 | parser.add_argument("--n_procs", type=int, default=4, help="Number of procs to use for multi-processing") 21 | parser.add_argument("--test", type=str, default="full", help="Test to run (options: \"full\", \"commands\", and \"perturb\", and \"compare\")") 22 | parser.add_argument("--eval", default=True, action="store_false", help="Whether to call policy.eval() or not") 23 | # Test Commands args 24 | parser.add_argument("--n_steps", type=int, default=200, help="Number of steps to for a full command cycle (1 speed change and 1 orientation change)") 25 | parser.add_argument("--n_commands", type=int, default=6, help="Number of commands in a single test iteration") 26 | parser.add_argument("--max_speed", type=float, default=3.0, help="Maximum allowable speed to test") 27 | parser.add_argument("--min_speed", type=float, default=0.0, help="Minimum allowable speed to test") 28 | parser.add_argument("--n_iter", type=int, default=10000, help="Number of command cycles to test") 29 | # Test Perturbs args 30 | parser.add_argument("--wait_time", type=float, default=3.0, help="How long to wait after perturb to count as success") 31 | parser.add_argument("--pert_dur", type=float, default=0.2, help="How long to apply perturbation") 32 | parser.add_argument("--pert_size", type=float, default=50, help="Size of perturbation to start sweep from") 33 | parser.add_argument("--pert_incr", type=float, default=10.0, help="How much to increment the perturbation size after each success") 34 | parser.add_argument("--pert_body", type=str, default="cassie-pelvis", help="Body to apply perturbation to") 35 | parser.add_argument("--num_angles", type=int, default=100, help="How many angles to test (angles are evenly divided into 2*pi)") 36 | # Test Mission args 37 | parser.add_argument("--viz", default=False, action='store_true') 38 | # Test parameter sensitivity args 39 | parser.add_argument("--sens_incr", type=float, default=0.05, help="Size of increments for the sensityivity sweep") 40 | parser.add_argument("--hi_factor", type=float, default=15, help="High factor") 41 | parser.add_argument("--lo_factor", type=float, default=0, help="Low factor") 42 | 43 | args = parser.parse_args() 44 | run_args = pickle.load(open(os.path.join(args.path, "experiment.pkl"), "rb")) 45 | # cassie_env = CassieEnv(traj=run_args.traj, clock_based=run_args.clock_based, state_est=run_args.state_est, dynamics_randomization=run_args.dyn_random) 46 | # env_fn = partial(CassieEnv, traj=run_args.traj, clock_based=run_args.clock_based, state_est=run_args.state_est, dynamics_randomization=run_args.dyn_random) 47 | # Make mirror False so that env_factory returns a regular wrap env function and not a symmetric env function that can be called to return 48 | # a cassie environment (symmetric env cannot be called to make another env) 49 | env_fn = env = env_factory( 50 | run_args.env_name, 51 | command_profile=run_args.command_profile, 52 | input_profile=run_args.input_profile, 53 | simrate=run_args.simrate, 54 | dynamics_randomization=run_args.dyn_random, 55 | mirror=run_args.mirror, 56 | learn_gains=run_args.learn_gains, 57 | reward=run_args.reward, 58 | history=run_args.history, 59 | no_delta=run_args.no_delta, 60 | traj=run_args.traj, 61 | ik_baseline=run_args.ik_baseline 62 | ) 63 | cassie_env = env_fn() 64 | policy = torch.load(os.path.join(args.path, "actor.pt")) 65 | if args.eval: 66 | policy.eval() 67 | if hasattr(policy, 'init_hidden_state'): 68 | policy.init_hidden_state() 69 | 70 | # TODO: make returning/save data in file inside function consist for all testing functions 71 | def test_commands(cassie_env, policy, args): 72 | print("Testing speed and orient commands") 73 | if args.n_procs == 1: 74 | save_data = eval_commands(cassie_env, policy, num_steps=args.n_steps, num_commands=args.n_commands, 75 | max_speed=args.max_speed, min_speed=args.min_speed, num_iters=args.n_iter) 76 | np.save(os.path.join(args.path, "eval_commands.npy"), save_data) 77 | else: 78 | eval_commands_multi(env_fn, policy, num_steps=args.n_steps, num_commands=args.n_commands, max_speed=args.max_speed, 79 | min_speed=args.min_speed, num_iters=args.n_iter, num_procs=args.n_procs, filename=os.path.join(args.path, "eval_commands.npy")) 80 | 81 | def test_perturbs(cassie_env, policy, args): 82 | print("Testing perturbations") 83 | if args.n_procs == 1: 84 | save_data = compute_perturbs(cassie_env, policy, wait_time=args.wait_time, perturb_duration=args.pert_dur, perturb_size=args.pert_size, 85 | perturb_incr=args.pert_incr, perturb_body=args.pert_body, num_angles=args.num_angles) 86 | else: 87 | save_data = compute_perturbs_multi(env_fn, policy, wait_time=args.wait_time, perturb_duration=args.pert_dur, perturb_size=args.pert_size, 88 | perturb_incr=args.pert_incr, perturb_body=args.pert_body, num_angles=args.num_angles, num_procs=args.n_procs) 89 | np.save(os.path.join(args.path, "eval_perturbs.npy"), save_data) 90 | 91 | # If not command line arg, assume run all tests 92 | if args.test == "full": 93 | print("Running full test") 94 | test_commands(cassie_env, policy, args) 95 | test_perturbs(cassie_env, policy, args) 96 | elif args.test == "commands": 97 | test_commands(cassie_env, policy, args) 98 | elif args.test == "perturb": 99 | test_perturbs(cassie_env, policy, args) 100 | elif args.test == "mission": 101 | missions = ["straight", "curvy", "90_left", "90_right"] 102 | if not args.viz: 103 | print("Testing missions") 104 | save_data = [] 105 | 106 | for mission in missions: 107 | print(mission + " mission:") 108 | cassie_env = CassiePlayground(traj=run_args.traj, clock_based=run_args.clock_based, state_est=run_args.state_est, dynamics_randomization=run_args.dyn_random, mission=mission) 109 | save_data.append(eval_mission(cassie_env, policy)) 110 | np.save(os.path.join(args.path, "eval_missions.npy"), save_data) 111 | else: 112 | save_data = np.load(os.path.join(args.path, "eval_missions.npy"), allow_pickle=True) 113 | plot_mission_data(save_data, missions) 114 | elif args.test == "sensitivity": 115 | print("Testing sensitivity") 116 | save_data = eval_sensitivity(cassie_env, policy, incr=args.sens_incr, hi_factor=args.hi_factor, lo_factor=args.lo_factor) 117 | print(save_data) 118 | np.save(os.path.join(args.path, "eval_sensitivity.npy"), save_data) 119 | elif args.test == "compare": 120 | print("running compare") 121 | compare_pols(args.path, args.path2) 122 | 123 | # vis_commands(cassie_env, policy, num_steps=200, num_commands=6, max_speed=3, min_speed=0) 124 | # save_data = eval_commands(cassie_env, policy, num_steps=200, num_commands=2, max_speed=3, min_speed=0, num_iters=1) 125 | # np.save("./test_eval_commands.npy", save_data) 126 | # eval_commands_multi(env_fn, policy, num_steps=200, num_commands=4, max_speed=3, min_speed=0, num_iters=4, num_procs=4) 127 | 128 | # report_stats("./test_eval_commands.npy") 129 | -------------------------------------------------------------------------------- /tools/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/tools/.DS_Store -------------------------------------------------------------------------------- /tools/aslip_tests/GRF_2KHz.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/tools/aslip_tests/GRF_2KHz.pkl -------------------------------------------------------------------------------- /tools/aslip_tests/plots/footpos_err.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/tools/aslip_tests/plots/footpos_err.png -------------------------------------------------------------------------------- /tools/cassie_top_white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/tools/cassie_top_white.png -------------------------------------------------------------------------------- /tools/command_trajectory.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/tools/command_trajectory.pkl -------------------------------------------------------------------------------- /tools/compare_pols.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sys, os 3 | import fpdf 4 | from .eval_perturb import plot_perturb 5 | 6 | def process_commands(data): 7 | stats = {} 8 | num_iters = data.shape[0] 9 | pass_rate = np.sum(data[:, 0]) / num_iters 10 | stats["Pass Rate"] = pass_rate 11 | success_inds = np.where(data[:, 0] == 1)[0] 12 | speed_fail_inds = np.where(data[:, 1] == 0)[0] 13 | orient_fail_inds = np.where(data[:, 1] == 1)[0] 14 | 15 | speed_change = data[speed_fail_inds, 4] 16 | orient_change = data[orient_fail_inds, 5] 17 | speed_neg_inds = np.where(speed_change < 0) 18 | speed_pos_inds = np.where(speed_change > 0) 19 | orient_neg_inds = np.where(orient_change < 0) 20 | orient_pos_inds = np.where(orient_change > 0) 21 | stats["Number of speed fails"] = len(speed_fail_inds) 22 | stats["Number of orient fails"] = len(orient_fail_inds) 23 | if len(speed_fail_inds) == 0: 24 | avg_pos_speed = "N/A" 25 | avg_neg_speed = "N/A" 26 | else: 27 | avg_pos_speed = np.mean(speed_change[speed_pos_inds]) 28 | avg_neg_speed = np.mean(speed_change[speed_neg_inds]) 29 | if len(orient_fail_inds) == 0: 30 | avg_pos_orient = "N/A" 31 | avg_neg_orient = "N/A" 32 | else: 33 | avg_pos_orient = np.mean(orient_change[orient_pos_inds]) 34 | avg_neg_orient = np.mean(orient_change[orient_neg_inds]) 35 | 36 | stats["Avg pos speed fails"] = avg_pos_speed 37 | stats["Avg neg speed fails"] = avg_neg_speed 38 | stats["Avg pos_orient fails"] = avg_pos_orient 39 | stats["Avg neg_orient fails"] = avg_neg_orient 40 | 41 | return stats 42 | 43 | def process_perturbs(data): 44 | stats = {} 45 | num_angles, num_phases = data.shape 46 | angles = 360*np.linspace(0, 1, num_angles+1) 47 | 48 | stats["Avg Force"] = round(np.mean(data), 2) 49 | stats["Max Force"] = np.max(data) 50 | max_ind = np.unravel_index(np.argmax(data, axis=None), data.shape) 51 | stats["Max Location (angle, phase)"] = (str(round(angles[max_ind[0]], 2))+chr(176), max_ind[1]) 52 | angle_avg = np.mean(data, axis=1) 53 | phase_avg = np.mean(data, axis=0) 54 | stats["Most Robust Angle"] = angles[np.argmax(angle_avg)] 55 | stats["Most Robust Phase"] = np.argmax(phase_avg) 56 | 57 | return stats 58 | 59 | 60 | # Note that for the spacing of the multi_cells to work out, this function assumes that 61 | # pol1's name is at least longer than pol2's name 62 | def draw_headers(pdf, pol1, pol2, key_col_width, min_width): 63 | epw = pdf.w - 2*pdf.l_margin 64 | th = pdf.font_size 65 | pol1_width = max(pdf.get_string_width(pol1), min_width) + 0.1 66 | pol2_width = max(pdf.get_string_width(pol2), min_width) + 0.1 67 | pol2_split = False 68 | if pol1_width + pol2_width + key_col_width>= epw: 69 | pol1_width = (epw - key_col_width) / 2 70 | if pol2_width > pol1_width: 71 | pol2_split = True 72 | pol2_width = pol1_width 73 | 74 | start_x = pdf.get_x() 75 | start_y = pdf.get_y() 76 | pdf.set_x(start_x + key_col_width) 77 | 78 | # Draw pol1 and pol2 multicell first to figure out y height 79 | pdf.multi_cell(pol1_width, 2*th, pol1, border=1, align="C") 80 | pol1_height = pdf.get_y() - start_y 81 | 82 | pdf.set_xy(start_x+key_col_width+pol1_width, start_y) 83 | if pol2_split: 84 | pdf.multi_cell(pol2_width, 2*th, pol2, border=1, align="C") 85 | else: 86 | pdf.cell(pol2_width, pol1_height, pol2, border=1, align="C") 87 | pdf.set_xy(start_x, start_y) 88 | pdf.cell(key_col_width, pol1_height, "", border=1, align="C") 89 | pdf.set_xy(start_x, start_y + pol1_height) 90 | 91 | return pol1_width, pol2_width 92 | 93 | def compare_pols(pol1, pol2): 94 | pol1 = pol1.strip("/") 95 | pol2 = pol2.strip("/") 96 | # For spacing concerns later, need pol1 to be the "longer" (name wise) of the two 97 | if len(os.path.basename(pol2)) > len(os.path.basename(pol1)): 98 | temp = pol1 99 | pol1 = pol2 100 | pol2 = temp 101 | pol1_name = os.path.basename(pol1) 102 | pol2_name = os.path.basename(pol2) 103 | print("pol1: ", pol1_name) 104 | print("pol2: ", pol2_name) 105 | 106 | # Initial PDF setup 107 | pdf = fpdf.FPDF(format='letter', unit='in') 108 | pdf.add_page() 109 | pdf.set_font('Times','',10.0) 110 | # Effective page width, or just epw 111 | epw = pdf.w - 2*pdf.l_margin 112 | th = pdf.font_size 113 | # Set title 114 | pdf.cell(epw, 2*th, "Policy Robustness Comparison", 0, 1, "C") 115 | pdf.ln(2*th) 116 | 117 | # Print command test table 118 | pol1_command = np.load(os.path.join(pol1, "eval_commands.npy")) 119 | pol2_command = np.load(os.path.join(pol2, "eval_commands.npy")) 120 | pol1_command_stats = process_commands(pol1_command) 121 | pol2_command_stats = process_commands(pol2_command) 122 | 123 | pdf.cell(epw, 2*th, "Command Test", 0, 1, "L") 124 | pdf.ln(th) 125 | # Set column widths 126 | key_col_width = pdf.get_string_width(max(pol2_command_stats.keys(), key=len)) + .2 127 | 128 | pol1_width, pol2_width = draw_headers(pdf, pol1_name, pol2_name, key_col_width, pdf.get_string_width(str(9.9999))) 129 | 130 | for key in pol2_command_stats.keys(): 131 | pdf.cell(key_col_width, 2*th, key, border=1, align="C") 132 | pdf.cell(pol1_width, 2*th, str(round(pol1_command_stats[key], 4)), border=1, align="C") 133 | pdf.cell(pol2_width, 2*th, str(round(pol2_command_stats[key], 4)), border=1, align="C") 134 | pdf.ln(2*th) 135 | 136 | # Print perturb test table 137 | pdf.ln(2*th) 138 | pdf.cell(epw, 2*th, "Perturbation Test", 0, 1, "L") 139 | pdf.ln(th) 140 | pol1_perturb = np.load(os.path.join(pol1, "eval_perturbs.npy")) 141 | pol2_perturb = np.load(os.path.join(pol2, "eval_perturbs.npy")) 142 | pol1_perturb_stats = process_perturbs(pol1_perturb) 143 | pol2_perturb_stats = process_perturbs(pol2_perturb) 144 | 145 | # Set column widths 146 | key_col_width = pdf.get_string_width(max(pol2_perturb_stats.keys(), key=len)) + .2 147 | pol1_width, pol2_width = draw_headers(pdf, pol1_name, pol2_name, key_col_width, pdf.get_string_width(str(999.99))) 148 | 149 | for key in pol2_perturb_stats.keys(): 150 | pdf.cell(key_col_width, 2*th, key, border=1, align="C") 151 | pdf.cell(pol1_width, 2*th, str(pol1_perturb_stats[key]), border=1, align="C") 152 | pdf.cell(pol2_width, 2*th, str(pol2_perturb_stats[key]), border=1, align="C") 153 | pdf.ln(2*th) 154 | 155 | max_force = max(np.max(np.mean(pol1_perturb, axis=1)), np.max(np.mean(pol2_perturb, axis=1))) 156 | max_force = 50*np.ceil(max_force / 50) 157 | pol1_perturb_plot = os.path.join(pol1, "perturb_plot.png") 158 | pol2_perturb_plot = os.path.join(pol2, "perturb_plot.png") 159 | plot_perturb(os.path.join(pol1, "eval_perturbs.npy"), pol1_perturb_plot, max_force) 160 | plot_perturb(os.path.join(pol2, "eval_perturbs.npy"), pol2_perturb_plot, max_force) 161 | pdf.ln(2*th) 162 | 163 | pdf.cell(epw, 2*th, "Perturbation Plot", 0, 1, "L") 164 | pol2_split = False 165 | if pdf.get_string_width(pol2) > epw / 2: 166 | pol2_split = True 167 | start_x = pdf.get_x() 168 | start_y = pdf.get_y() 169 | pdf.multi_cell(epw/2, 2*th, pol1_name, border=0, align="C") 170 | pol1_height = pdf.get_y() - start_y 171 | pdf.set_xy(start_x+epw/2, start_y) 172 | if pol2_split: 173 | pdf.multi_cell(epw/2, 2*th, pol2_name, border=0, align="C") 174 | else: 175 | pdf.cell(epw/2, pol1_height, pol2_name, border=0, align="C") 176 | pdf.set_xy(start_x, start_y+pol1_height) 177 | pdf.image(pol1_perturb_plot, x=start_x, y=start_y+pol1_height, w = epw/2-.1) 178 | pdf.image(pol2_perturb_plot, x=start_x+epw/2, y = start_y+pol1_height, w = epw/2-.1) 179 | 180 | pdf.output("./policy_compare.pdf") 181 | 182 | 183 | -------------------------------------------------------------------------------- /tools/eval_mission.py: -------------------------------------------------------------------------------- 1 | import sys, os 2 | sys.path.append("..") # Adds higher directory to python modules path. 3 | 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import matplotlib.colors as mcolors 7 | import matplotlib as mpl 8 | import torch 9 | import time 10 | import cmath 11 | import math 12 | import ray 13 | from functools import partial 14 | 15 | # from cassie import CassieEnv 16 | 17 | def quaternion2euler(quaternion): 18 | w = quaternion[0] 19 | x = quaternion[1] 20 | y = quaternion[2] 21 | z = quaternion[3] 22 | ysqr = y * y 23 | 24 | t0 = +2.0 * (w * x + y * z) 25 | t1 = +1.0 - 2.0 * (x * x + ysqr) 26 | X = math.degrees(math.atan2(t0, t1)) 27 | 28 | t2 = +2.0 * (w * y - z * x) 29 | t2 = +1.0 if t2 > +1.0 else t2 30 | t2 = -1.0 if t2 < -1.0 else t2 31 | Y = math.degrees(math.asin(t2)) 32 | 33 | t3 = +2.0 * (w * z + x * y) 34 | t4 = +1.0 - 2.0 * (ysqr + z * z) 35 | Z = math.degrees(math.atan2(t3, t4)) 36 | 37 | result = np.zeros(3) 38 | result[0] = X * np.pi / 180 39 | result[1] = Y * np.pi / 180 40 | result[2] = Z * np.pi / 180 41 | 42 | return result 43 | 44 | @torch.no_grad() 45 | def eval_mission(cassie_env, policy, num_iters=2): 46 | # save data holds deviation between robot xy pos, z orient, xy velocity and specified pos, orient, velocity from mission 47 | # if mission ends early (robot height fall over indicator) 48 | 49 | runs = [] 50 | pass_data = np.zeros(num_iters) # whether or not robot stayed alive during mission 51 | 52 | for j in range(num_iters): 53 | mission_len = cassie_env.command_traj.trajlen 54 | run_data = [] 55 | state = torch.Tensor(cassie_env.reset_for_test()) 56 | count, passed, done = 0, 1, False 57 | while count < mission_len and not done: 58 | # cassie_env.render() 59 | # Get action and act 60 | action = policy(state, True) 61 | action = action.data.numpy() 62 | state, reward, done, _ = cassie_env.step(action) 63 | state = torch.Tensor(state) 64 | # See if end state reached 65 | if done or cassie_env.sim.qpos()[2] < 0.4: 66 | passed = 0 67 | print("mission failed") 68 | # Get command info, robot info 69 | commanded_pos = cassie_env.command_traj.global_pos[:,0:2] 70 | commanded_speed = cassie_env.command_traj.speed_cmd 71 | commanded_orient = cassie_env.command_traj.orient 72 | qpos = cassie_env.sim.qpos() 73 | qvel = cassie_env.sim.qvel() 74 | actual_pos = qpos[0:2] # only care about x and y 75 | actual_speed = np.linalg.norm(qvel[0:2]) 76 | actual_orient = quaternion2euler(qpos[3:7])[2] # only care about yaw 77 | # Calculate pos,vel,orient deviation as vector difference 78 | pos_error = np.linalg.norm(actual_pos - commanded_pos) 79 | speed_error = np.linalg.norm(actual_speed - commanded_speed) 80 | orient_error = np.linalg.norm(actual_orient - commanded_orient) 81 | # Log info 82 | run_data.append(([count, pos_error, speed_error, orient_error])) 83 | count += 1 84 | if passed: 85 | print("mission passed") 86 | pass_data[j] = 1 87 | runs.append(np.array(run_data)) 88 | 89 | # summary stats 90 | run_lens = [len(run) for run in runs] 91 | print("longest / shortest / average steps : {} / {} / {}".format(max(run_lens), min(run_lens), sum(run_lens) / len(run_lens))) 92 | 93 | save_data = dict() 94 | save_data["runs"] = runs 95 | save_data["pass"] = pass_data 96 | 97 | return save_data 98 | 99 | 100 | def plot_mission_data(save_data, missions): 101 | num_missions = len(save_data) 102 | fig, axs = plt.subplots(num_missions, 3, figsize=(num_missions*5, 15)) 103 | for i in range(num_missions): 104 | mission_runs = save_data[i]["runs"] 105 | for run in mission_runs: 106 | axs[i][0].plot(run[:, 0], run[:, 1]) 107 | axs[i][1].plot(run[:, 0], run[:, 2]) 108 | axs[i][2].plot(run[:, 0], run[:, 3]) 109 | axs[i][1].set_title(missions[i]) # only put title on middle plot 110 | [axs[i][j].set_xlabel("steps") for j in range(3)] 111 | [axs[i][j].set_ylabel("error") for j in range(3)] 112 | plt.tight_layout(pad=3.0) 113 | plt.show() -------------------------------------------------------------------------------- /tools/eval_sensitivity.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import time 4 | import math 5 | 6 | #from eval_perturb import reset_to_phase 7 | 8 | @torch.no_grad() 9 | def sensitivity_sweep(cassie_env, policy, factor): 10 | # Pelvis: 0->5 11 | # Hips: 6->8 and 19->21 12 | # Achilles: 9->11 and 22->24 13 | # Knees: 12 and 25 14 | # Tarsus: 14 and 27 15 | # 16 | # Total number of parameters: 17 17 | 18 | #parameter_ids = [0, 1, 2, 3, 4, 5, 6, 7, 8, 19, 20, 21, 9, 10, 11, 22, 23, 19 | # 24, 12, 25, 14, 27] 20 | 21 | default_damp = cassie_env.default_damping 22 | parameter_ids = [(0, 5), (6, 8), (19, 21), (9, 11), (22, 24), (12), (25), 23 | (14), (27)] 24 | 25 | count = np.zeros(len(parameter_ids)) 26 | for i in range(9): 27 | damp_range = np.copy(default_damp) 28 | if type(parameter_ids[i]) is tuple: 29 | for j in range(parameter_ids[i][0], parameter_ids[i][1]+1): 30 | # Set damp sweep 31 | damp_range[j] = default_damp[j] * factor 32 | else: 33 | damp_id = parameter_ids[i] 34 | damp_range[damp_id] = default_damp[damp_id] * factor 35 | 36 | 37 | state = torch.Tensor(cassie_env.full_reset()) 38 | cassie_env.sim.set_dof_damping(np.clip(damp_range, 0, None)) 39 | cassie_env.speed = 1 40 | cassie_env.side_speed = 0 41 | cassie_env.phase_add = 1 42 | 43 | curr_time = time.time() 44 | curr_time = cassie_env.sim.time() 45 | start_t = curr_time 46 | while curr_time < start_t + 15: 47 | action = policy(state, True) 48 | action = action.data.numpy() 49 | state, reward, done, _ = cassie_env.step(action) 50 | state = torch.Tensor(state) 51 | curr_time = cassie_env.sim.time() 52 | if cassie_env.sim.qpos()[2] < 0.4: 53 | count[i] = 1 54 | break 55 | 56 | return count 57 | 58 | @torch.no_grad() 59 | def eval_sensitivity(cassie_env, policy, incr, hi_factor, lo_factor): 60 | # this is dumb 61 | lo = 1.0 62 | lo_cnt = 0 63 | while lo >= lo_factor: 64 | lo -= incr 65 | lo_cnt += 1 66 | 67 | num_iters = int(hi_factor / incr) + lo_cnt + 1 68 | 69 | counter = 0 70 | 71 | # Matrix with the num_iters rows, and 9 + 1 columns. the first column is 72 | # the value of damping. the next nine indicate the parameter, 1 is a 73 | # failure at the value, 0 means either no failure or default val. 74 | ret = np.zeros((num_iters, 10)) 75 | 76 | # Run the highs 77 | 78 | hi = 1.0 79 | 80 | while hi <= hi_factor: 81 | vals = sensitivity_sweep(cassie_env, policy, hi) 82 | ret[counter][0] = hi 83 | ret[counter][1:] = vals 84 | hi += incr 85 | counter += 1 86 | 87 | lo = 1.0 88 | 89 | # Run lo's 90 | for _ in range(lo_cnt): 91 | vals = sensitivity_sweep(cassie_env, policy, lo) 92 | ret[counter][0] = lo 93 | ret[counter][1:] = vals 94 | lo -= incr 95 | counter += 1 96 | 97 | # Report 98 | return ret 99 | -------------------------------------------------------------------------------- /tools/test_perturb_eval_phase.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/tools/test_perturb_eval_phase.npy -------------------------------------------------------------------------------- /tools/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .elements import * -------------------------------------------------------------------------------- /tools/utils/elements.py: -------------------------------------------------------------------------------- 1 | import pygame 2 | from pygame.locals import * 3 | 4 | import math 5 | import numpy as np 6 | 7 | 8 | class Mouse: 9 | def __init__(self, px_2_m): 10 | self.px = 0 11 | self.py = 0 12 | self.vx = 0 13 | self.vy = 0 14 | self.radius = 0 15 | self.color = (100,200,100) 16 | self.px_2_m = px_2_m 17 | 18 | def get_position(self): 19 | return (self.px, self.py) 20 | 21 | def get_m_position(self): 22 | return (self.px / self.px_2_m, self.py / self.px_2_m) 23 | 24 | def get_velocity(self): 25 | return (self.vx, self.vy) 26 | 27 | def update(self, time_passed): 28 | prev_p = self.get_position() 29 | self.px, self.py = pygame.mouse.get_pos() 30 | if time_passed > 0: 31 | self.vx = (self.px - prev_p[0]) / time_passed 32 | self.vy = (self.py - prev_p[1]) / time_passed 33 | 34 | def render(self, screen): 35 | pygame.draw.circle(screen, self.color, (self.px, self.py), self.radius) 36 | 37 | class Robot: 38 | def __init__(self, trajectory, time_passed, frequency): 39 | 40 | # action space is forward velocity and heading 41 | self.positions = trajectory.positions 42 | self.velocities = trajectory.vels 43 | self.thetas = trajectory.thetas 44 | self.accels = trajectory.accels 45 | self.trajlen = len(trajectory.positions) 46 | 47 | # ground truth's position: 48 | self.t_px = int(self.positions[0][0]) 49 | self.t_py = int(self.positions[0][1]) 50 | 51 | # follower's pos 52 | self.f_px = int(self.positions[0][0]) 53 | self.f_py = int(self.positions[0][1]) 54 | 55 | self.radius = 10 56 | self.color = (50,50,200) # direct position tracker 57 | self.color2 = (200,50,50) # velocity + angle tracker 58 | 59 | self.frequency = frequency 60 | self.prev_time = self.prev_inc_time = time_passed 61 | self.counter = 0 62 | self.count_inc = 1 63 | 64 | def update(self,time_passed): 65 | 66 | curr_accel = self.accels[self.counter] 67 | curr_vel = self.velocities[self.counter] 68 | curr_theta = self.thetas[self.counter] 69 | track_pos = self.positions[self.counter] 70 | 71 | # print((curr_vel, curr_theta, np.cos(curr_theta), np.sin(curr_theta))) 72 | 73 | # ground truth's new position: 74 | self.t_px, self.t_py = track_pos[0], track_pos[1] 75 | 76 | # follower's new position: execute angle and velocity command for time passed 77 | t_diff = time_passed - self.prev_time 78 | vx, vy = curr_vel * np.cos(curr_theta), curr_vel * np.sin(curr_theta) 79 | ax, ay = curr_accel * np.cos(curr_theta), curr_accel * np.sin(curr_theta) 80 | # gotta subtract the y velocity add because pygame counts y from top down 81 | self.f_px, self.f_py = self.f_px + vx * t_diff + 0.5 * ax * t_diff**2, self.f_py - vy * t_diff + 0.5 * ay * t_diff**2 82 | # self.f_px, self.f_py = self.f_px + vx * t_diff, self.f_py - vy * t_diff 83 | 84 | # increment t_idx on 30 Hz cycle 85 | if time_passed - self.prev_inc_time > (1 / self.frequency): 86 | self.counter += 1 87 | self.prev_inc_time = time_passed 88 | 89 | self.prev_time = time_passed 90 | 91 | # check if we need to restart 92 | if self.counter == self.trajlen: 93 | self.counter = 0 94 | self.f_px, self.f_py = int(self.positions[0][0]),int(self.positions[0][1]) 95 | 96 | def return_info(self, px_2_m): 97 | 98 | # thetas are the yaw angle of the robot 99 | thetas_rotated = self.thetas # no rotation for now 100 | # center of mass position is x y position converted to meters, with constant z height 101 | positions_in_meters = np.array( [[self.trajectory[i][0] / px_2_m - self.trajectory[0][0] / px_2_m, self.trajectory[i][1] / px_2_m - self.trajectory[0][1] / px_2_m, 1.0] for i in range(len(self.trajectory))] ) 102 | velocities_in_meters = np.array( [self.velocities[i] / px_2_m for i in range(len(self.velocities))] ) 103 | 104 | print("positions:\n{}\n\nvelocities:\n{}\n\norient:\n{}\n".format(positions_in_meters, velocities_in_meters, thetas_rotated)) 105 | 106 | return positions_in_meters, velocities_in_meters, thetas_rotated 107 | 108 | def render(self,screen): 109 | pygame.draw.circle(screen,self.color,(int(self.t_px),int(self.t_py)),self.radius) 110 | pygame.draw.circle(screen,self.color2,(int(self.f_px),int(self.f_py)),self.radius) 111 | # pygame.transform.rotate(screen, np.radians(self.theta)) 112 | 113 | class Waypoint: 114 | def __init__(self, mouse_position): 115 | self.px = mouse_position[0] 116 | self.py = mouse_position[1] 117 | self.radius = 5 118 | self.color = (100,200,100) 119 | 120 | def get_position(self): 121 | return (self.px, self.py) 122 | 123 | def render(self, screen): 124 | pygame.draw.circle(screen, self.color, (self.px, self.py), self.radius) 125 | 126 | class Trajectory: 127 | def __init__(self, t_new, positions, thetas, vels, accels): 128 | self.param = t_new 129 | self.positions = positions 130 | self.thetas = thetas 131 | self.vels = vels 132 | self.accels = accels 133 | self.width = 2 134 | self.color = (100,200,100) 135 | self.arrow_color = (200,200,200) 136 | self.arrow_length = 20.0 137 | 138 | def render(self, screen): 139 | scaled_vels = self.vels / np.max(self.vels) * self.arrow_length 140 | pygame_poses = [] 141 | for i in range(len(self.positions)): 142 | # pygame.draw.aaline(screen, self.color, self.positions[i-1], self.positions[i]) 143 | # print(self.positions[i]) 144 | pygame_poses.append((int(self.positions[i][0]), int(self.positions[i][1]))) 145 | # circle for pos 146 | pygame.draw.circle(screen, self.color, pygame_poses[-1], self.width) 147 | for i in range(len(self.thetas)): 148 | # calculate next pos 149 | pos2 = (pygame_poses[i][0] + scaled_vels[i] * np.cos(self.thetas[i]) , pygame_poses[i][1] - scaled_vels[i] * np.sin(self.thetas[i])) 150 | # arrow for angle and vel 151 | pygame.draw.line(screen, self.arrow_color, pygame_poses[i], pos2) 152 | 153 | def prepare_for_export(self, scale_factor, screen_height): 154 | 155 | self.positions = [[self.positions[i][0] / scale_factor, (screen_height - self.positions[i][1]) / scale_factor, 1.0] for i in range(len(self.positions))] 156 | self.positions = [[self.positions[i][0]-self.positions[0][0], self.positions[i][1]-self.positions[0][1], self.positions[i][2]] for i in range(len(self.positions))] 157 | 158 | self.vels = [self.vels[i] / scale_factor for i in range(len(self.vels))] 159 | 160 | print("positions:\n{}\n\nvelocities:\n{}\n\norient:\n{}\n".format(self.positions[:5], self.vels[:5], self.thetas[:5])) 161 | print("max vel: {}".format(np.max(self.vels))) 162 | 163 | class Grid: 164 | def __init__(self, screen_width, screen_height, px_2_m): 165 | self.px_2_m = px_2_m 166 | self.screen_height = screen_height 167 | self.screen_width = screen_width 168 | self.cell_height = px_2_m # approx height of 1m x 1m cell 169 | self.cell_width = px_2_m # approx width of 1m x 1m cell 170 | self.color = (90,90,90) 171 | 172 | def render(self, screen): 173 | # draw vertical lines 174 | for x in range(self.screen_height // self.px_2_m): 175 | pygame.draw.line(screen, self.color, (x * self.cell_width,0), (x * self.cell_width,self.screen_height)) 176 | # draw horizontal lines 177 | for y in range(self.screen_width // self.px_2_m): 178 | pygame.draw.line(screen, self.color, (0, y * self.cell_height), (self.screen_width, y * self.cell_height)) 179 | -------------------------------------------------------------------------------- /tools/vis_input_and_state.py: -------------------------------------------------------------------------------- 1 | import os, sys, argparse 2 | sys.path.append("..") 3 | 4 | from cassie import CassieEnv, CassiePlayground 5 | from rl.policies.actor import GaussianMLP_Actor 6 | 7 | import matplotlib.pyplot as plt 8 | 9 | import pickle 10 | import numpy as np 11 | import torch 12 | import time 13 | 14 | def set_axes_equal(ax): 15 | '''Make axes of 3D plot have equal scale so that spheres appear as spheres, 16 | cubes as cubes, etc.. This is one possible solution to Matplotlib's 17 | ax.set_aspect('equal') and ax.axis('equal') not working for 3D. 18 | 19 | Input 20 | ax: a matplotlib axis, e.g., as output from plt.gca(). 21 | ''' 22 | 23 | x_limits = ax.get_xlim3d() 24 | y_limits = ax.get_ylim3d() 25 | z_limits = ax.get_zlim3d() 26 | 27 | x_range = abs(x_limits[1] - x_limits[0]) 28 | x_middle = np.mean(x_limits) 29 | y_range = abs(y_limits[1] - y_limits[0]) 30 | y_middle = np.mean(y_limits) 31 | z_range = abs(z_limits[1] - z_limits[0]) 32 | z_middle = np.mean(z_limits) 33 | 34 | # The plot bounding box is a sphere in the sense of the infinity 35 | # norm, hence I call half the max range the plot radius. 36 | plot_radius = 0.5*max([x_range, y_range, z_range]) 37 | 38 | ax.set_xlim3d([x_middle - plot_radius, x_middle + plot_radius]) 39 | ax.set_ylim3d([y_middle - plot_radius, y_middle + plot_radius]) 40 | ax.set_zlim3d([z_middle - plot_radius, z_middle + plot_radius]) 41 | 42 | 43 | def eval_policy(policy, args, run_args): 44 | 45 | aslip = True if run_args.traj == "aslip" else False 46 | 47 | cassie_env = CassieEnv(traj=run_args.traj, state_est=run_args.state_est, no_delta=run_args.no_delta, dynamics_randomization=run_args.dyn_random, clock_based=run_args.clock_based, history=run_args.history, reward=run_args.reward) 48 | cassie_env.debug = args.debug 49 | visualize = not args.no_viz 50 | traj_len = args.traj_len 51 | 52 | if aslip: 53 | traj_info = [] # 54 | traj_cmd_info = [] # what actually gets sent to robot as state 55 | robot_state_info = [] # robot's estimated state 56 | actual_state_info = [] # actual mujoco state of the robot 57 | 58 | state = torch.Tensor(cassie_env.reset_for_test()) 59 | cassie_env.update_speed(2.0) 60 | print(cassie_env.speed) 61 | count, passed, done = 0, 1, False 62 | while count < traj_len and not done: 63 | 64 | if visualize: 65 | cassie_env.render() 66 | 67 | # Get action and act 68 | action = policy(state, True) 69 | action = action.data.numpy() 70 | state, reward, done, _ = cassie_env.step(action) 71 | state = torch.Tensor(state) 72 | 73 | print(reward) 74 | 75 | # print(cassie_env.phase) 76 | 77 | # See if end state reached 78 | if done or cassie_env.sim.qpos()[2] < 0.4: 79 | print(done) 80 | passed = 0 81 | print("failed") 82 | 83 | # Get trajectory info and robot info 84 | if aslip: 85 | a, b, c, d = cassie_env.get_traj_and_state_info() 86 | traj_info.append(a) 87 | traj_cmd_info.append(b) 88 | else: 89 | c, d = cassie_env.get_state_info() 90 | robot_state_info.append(c) 91 | actual_state_info.append(d) 92 | 93 | count += 1 94 | 95 | robot_state_info = robot_state_info[:-1] 96 | actual_state_info = actual_state_info[:-1] 97 | 98 | if aslip: 99 | 100 | traj_info = traj_info[:-1] 101 | traj_cmd_info = traj_cmd_info[:-1] 102 | 103 | traj_info = np.array(traj_info) 104 | traj_cmd_info = np.array(traj_cmd_info) 105 | robot_state_info = np.array(robot_state_info) 106 | actual_state_info = np.array(actual_state_info) 107 | 108 | fig, axs = plt.subplots(2, 2, figsize=(10, 10)) 109 | 110 | # print(traj_info) 111 | 112 | print(traj_info.shape) 113 | axs[0][0].set_title("XZ plane of traj_info") 114 | axs[0][0].plot(traj_info[:,0,0], traj_info[:,0,2], 'o-', label='cpos') 115 | axs[0][0].plot(traj_info[:,1,0], traj_info[:,1,2], 'o-', label='lpos') 116 | axs[0][0].plot(traj_info[:,2,0], traj_info[:,2,2], 'o-', label='rpos') 117 | 118 | print(traj_cmd_info.shape) 119 | axs[0][1].set_title("XZ plane of traj_cmd_info") 120 | axs[0][1].plot(traj_cmd_info[:,0,0], traj_cmd_info[:,0,2], label='cpos') 121 | axs[0][1].plot(traj_cmd_info[:,1,0], traj_cmd_info[:,1,2], label='lpos') 122 | axs[0][1].plot(traj_cmd_info[:,2,0], traj_cmd_info[:,2,2], label='rpos') 123 | 124 | print(robot_state_info.shape) 125 | axs[1][0].set_title("XZ plane of robot_state_info") 126 | axs[1][0].plot(robot_state_info[:,0,0], robot_state_info[:,0,2], label='cpos') 127 | axs[1][0].plot(robot_state_info[:,1,0], robot_state_info[:,1,2], label='lpos') 128 | axs[1][0].plot(robot_state_info[:,2,0], robot_state_info[:,2,2], label='rpos') 129 | 130 | print(actual_state_info.shape) 131 | axs[1][1].set_title("XZ plane of actual_state_info") 132 | axs[1][1].plot(actual_state_info[:,0,0], actual_state_info[:,0,2], label='cpos') 133 | axs[1][1].plot(actual_state_info[:,1,0], actual_state_info[:,1,2], label='lpos') 134 | axs[1][1].plot(actual_state_info[:,2,0], actual_state_info[:,2,2], label='rpos') 135 | 136 | plt.legend() 137 | plt.tight_layout() 138 | plt.show() 139 | 140 | else: 141 | 142 | robot_state_info = np.array(robot_state_info) 143 | actual_state_info = np.array(actual_state_info) 144 | 145 | fig, axs = plt.subplots(1, 2, figsize=(10, 10)) 146 | 147 | print(robot_state_info.shape) 148 | axs[0].set_title("XZ plane of robot_state_info") 149 | axs[0].plot(robot_state_info[:,0,0], robot_state_info[:,0,2], label='cpos') 150 | axs[0].plot(robot_state_info[:,1,0], robot_state_info[:,1,2], label='lpos') 151 | axs[0].plot(robot_state_info[:,2,0], robot_state_info[:,2,2], label='rpos') 152 | 153 | print(actual_state_info.shape) 154 | axs[1].set_title("XZ plane of actual_state_info") 155 | axs[1].plot(actual_state_info[:,0,0], actual_state_info[:,0,2], label='cpos') 156 | axs[1].plot(actual_state_info[:,1,0], actual_state_info[:,1,2], label='lpos') 157 | axs[1].plot(actual_state_info[:,2,0], actual_state_info[:,2,2], label='rpos') 158 | 159 | plt.legend() 160 | plt.tight_layout() 161 | plt.show() 162 | 163 | 164 | parser = argparse.ArgumentParser() 165 | parser.add_argument("--path", type=str, default="../trained_models/ppo/Cassie-v0/IK_traj-aslip_aslip_old_2048_12288_seed-10/", help="path to folder containing policy and run details") 166 | parser.add_argument("--traj_len", default=30, type=str) 167 | parser.add_argument("--debug", default=False, action='store_true') 168 | parser.add_argument("--no_viz", default=False, action='store_true') 169 | parser.add_argument("--eval", default=True, action="store_false", help="Whether to call policy.eval() or not") 170 | 171 | args = parser.parse_args() 172 | 173 | run_args = pickle.load(open(args.path + "experiment.pkl", "rb")) 174 | 175 | policy = torch.load(args.path + "actor.pt") 176 | 177 | if args.eval: 178 | policy.eval() # NOTE: for some reason the saved nodelta_neutral_stateest_symmetry policy needs this but it breaks all new policies... 179 | 180 | eval_policy(policy, args, run_args) -------------------------------------------------------------------------------- /tools/vis_perturb.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append("..") # Adds higher directory to python modules path. 3 | 4 | import argparse 5 | import pickle 6 | 7 | import numpy as np 8 | import torch 9 | import time 10 | import copy 11 | 12 | from cassie import CassieEnv 13 | 14 | # Will reset the env to the given phase by reset_for_test, and then 15 | # simulating 2 cycle then to the given phase 16 | @torch.no_grad() 17 | def reset_to_phase(env, policy, phase): 18 | state = torch.Tensor(cassie_env.reset_for_test()) 19 | for i in range(2*(env.phaselen + 1)): 20 | action = policy.act(state, True) 21 | action = action.data.numpy() 22 | state, reward, done, _ = cassie_env.step(action) 23 | state = torch.Tensor(state) 24 | for i in range(phase): 25 | action = policy.act(state, True) 26 | action = action.data.numpy() 27 | state, reward, done, _ = cassie_env.step(action) 28 | state = torch.Tensor(state) 29 | 30 | parser = argparse.ArgumentParser() 31 | parser.add_argument("--path", type=str, default=None, help="path to folder containing policy and run details") 32 | args = parser.parse_args() 33 | run_args = pickle.load(open(args.path + "experiment.pkl", "rb")) 34 | 35 | # RUN_NAME = "7b7e24-seed0" 36 | # POLICY_PATH = "../trained_models/ppo/Cassie-v0/" + RUN_NAME + "/actor.pt" 37 | 38 | # Load environment and policy 39 | # env_fn = partial(CassieEnv_speed_no_delta_neutral_foot, "walking", clock_based=True, state_est=True) 40 | cassie_env = CassieEnv(traj=run_args.traj, clock_based=run_args.clock_based, state_est=run_args.state_est, dynamics_randomization=run_args.dyn_random) 41 | policy = torch.load(args.path + "actor.pt") 42 | 43 | state = torch.Tensor(cassie_env.reset_for_test()) 44 | # cassie_env.sim.step_pd(self.u) 45 | cassie_env.speed = 0.5 46 | cassie_env.phase_add = 1 47 | num_steps = cassie_env.phaselen + 1 48 | # Simulate for "wait_time" first to stabilize 49 | for i in range(num_steps*2): 50 | action = policy(state, True) 51 | action = action.data.numpy() 52 | state, reward, done, _ = cassie_env.step(action) 53 | state = torch.Tensor(state) 54 | curr_time = cassie_env.sim.time() 55 | start_t = curr_time 56 | sim_t = time.time() 57 | while curr_time < start_t + 4: 58 | action = policy(state, True) 59 | action = action.data.numpy() 60 | state, reward, done, _ = cassie_env.step(action) 61 | state = torch.Tensor(state) 62 | curr_time = cassie_env.sim.time() 63 | print("sim time: ", time.time() - sim_t) 64 | exit() 65 | qpos_phase = np.zeros((35, num_steps)) 66 | qvel_phase = np.zeros((32, num_steps)) 67 | action_phase = np.zeros((10, num_steps)) 68 | cassie_state_phase = [copy.deepcopy(cassie_env.cassie_state)] 69 | # print("phase: ", cassie_env.phase) 70 | qpos_phase[:, 0] = cassie_env.sim.qpos() 71 | qvel_phase[:, 0] = cassie_env.sim.qvel() 72 | for i in range(num_steps-1): 73 | action = policy.act(state, True) 74 | action = action.data.numpy() 75 | action_phase[:, i] = action 76 | state, reward, done, _ = cassie_env.step(action) 77 | state = torch.Tensor(state) 78 | # print("phase: ", cassie_env.phase) 79 | qpos_phase[:, i+1] = cassie_env.sim.qpos() 80 | qvel_phase[:, i+1] = cassie_env.sim.qvel() 81 | cassie_state_phase.append(copy.deepcopy(cassie_env.cassie_state)) 82 | 83 | action = policy.act(state, True) 84 | action = action.data.numpy() 85 | action_phase[:, -1] = action 86 | state = torch.Tensor(cassie_env.reset_for_test()) 87 | 88 | cassie_env.speed = 0.5 89 | cassie_env.phase_add = 1 90 | wait_time = 4 91 | dt = 0.05 92 | speedup = 3 93 | perturb_time = 2 94 | perturb_duration = 0.2 95 | perturb_size = 170 96 | perturb_dir = -2*np.pi*np.linspace(0, 1, 5) # Angles from straight forward to apply force 97 | perturb_body = "cassie-pelvis" 98 | dir_idx = 0 99 | 100 | ###### Vis a single Perturbation for a given phase ###### 101 | test_phase = 0 102 | reset_to_phase(cassie_env, policy, test_phase) 103 | # cassie_env.sim.set_qpos(qpos_phase[:, test_phase]) 104 | # cassie_env.sim.set_qvel(qvel_phase[:, test_phase]) 105 | # cassie_env.cassie_state = cassie_state_phase[test_phase] 106 | # cassie_env.sim.set_cassie_state(cassie_state_phase[test_phase]) 107 | # cassie_env.phase = test_phase 108 | # state, reward, done, _ = cassie_env.step(action_phase[:, test_phase-1]) 109 | # state = torch.Tensor(state) 110 | render_state = cassie_env.render() 111 | force_x = perturb_size * np.cos(0) 112 | force_y = perturb_size * np.sin(0) 113 | print("Perturb angle: {}\t Perturb size: {} N".format(np.degrees(-perturb_dir[dir_idx]), perturb_size)) 114 | # Apply perturb (if time) 115 | start_t = cassie_env.sim.time() 116 | while render_state: 117 | if (not cassie_env.vis.ispaused()): 118 | curr_time = cassie_env.sim.time() 119 | if curr_time < start_t+perturb_duration: 120 | cassie_env.vis.apply_force([force_x, force_y, 0, 0, 0, 0], perturb_body) 121 | # Done perturbing, reset perturb_time and xfrc_applied 122 | elif start_t+perturb_duration < curr_time < start_t+perturb_duration + wait_time: 123 | # print("curr time: ", curr_time) 124 | cassie_env.vis.apply_force([0, 0, 0, 0, 0, 0], perturb_body) 125 | else: 126 | # pass 127 | print("passed") 128 | break 129 | 130 | # Get action 131 | action = policy.act(state, True) 132 | action = action.data.numpy() 133 | state, reward, done, _ = cassie_env.step(action) 134 | if cassie_env.sim.qpos()[2] < 0.4: 135 | print("failed") 136 | break 137 | else: 138 | state = torch.Tensor(state) 139 | render_state = cassie_env.render() 140 | time.sleep(dt / speedup) 141 | exit() 142 | 143 | ###### Vis all perturbations ###### 144 | render_state = cassie_env.render() 145 | force_x = perturb_size * np.cos(0) 146 | force_y = perturb_size * np.sin(0) 147 | print("Perturb angle: {}\t Perturb size: {} N".format(np.degrees(-perturb_dir[dir_idx]), perturb_size)) 148 | while render_state: 149 | if (not cassie_env.vis.ispaused()): 150 | curr_time = cassie_env.sim.time() 151 | # Apply perturb (if time) 152 | if curr_time > perturb_time + wait_time: 153 | # Haven't perturbed for full time yet 154 | if curr_time < perturb_time + wait_time + perturb_duration: 155 | print("phase: ", cassie_env.phase) 156 | cassie_env.vis.apply_force([force_x, force_y, 0, 0, 0, 0], perturb_body) 157 | # Done perturbing, reset perturb_time and xfrc_applied 158 | else: 159 | cassie_env.vis.apply_force([0, 0, 0, 0, 0, 0], perturb_body) 160 | dir_idx += 1 161 | # Skip last direction, 0 is the same as 2*pi 162 | if dir_idx >= len(perturb_dir) - 1: 163 | dir_idx = 0 164 | perturb_size += 50 165 | force_x = perturb_size * np.cos(perturb_dir[dir_idx]) 166 | force_y = perturb_size * np.sin(perturb_dir[dir_idx]) 167 | print("Perturb angle: {}\t Perturb size: {} N".format(np.degrees(-perturb_dir[dir_idx]), perturb_size)) 168 | perturb_time = curr_time 169 | 170 | # Get action 171 | action = policy.act(state, True) 172 | action = action.data.numpy() 173 | state, reward, done, _ = cassie_env.step(action) 174 | if cassie_env.sim.qpos()[2] < 0.4: 175 | state = torch.Tensor(cassie_env.reset_for_test()) 176 | cassie_env.speed = 0.5 177 | cassie_env.phase_add = 1 178 | perturb_time = 0 179 | else: 180 | state = torch.Tensor(state) 181 | render_state = cassie_env.render() 182 | time.sleep(dt / speedup) -------------------------------------------------------------------------------- /trained_models/5k_retrain/actor.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/trained_models/5k_retrain/actor.pt -------------------------------------------------------------------------------- /trained_models/5k_retrain/critic.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/trained_models/5k_retrain/critic.pt -------------------------------------------------------------------------------- /trained_models/5k_retrain/eval_commands.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/trained_models/5k_retrain/eval_commands.npy -------------------------------------------------------------------------------- /trained_models/5k_retrain/eval_perturbs.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/trained_models/5k_retrain/eval_perturbs.npy -------------------------------------------------------------------------------- /trained_models/5k_retrain/experiment.info: -------------------------------------------------------------------------------- 1 | algo_name: ppo 2 | clip: 0.2 3 | clock_based: True 4 | dyn_random: False 5 | entropy_coeff: 0.0 6 | env_name: Cassie-v0 7 | epochs: 5 8 | eps: 1e-05 9 | gamma: 0.99 10 | history: 0 11 | input_norm_steps: 100 12 | lam: 0.95 13 | lr: 0.0001 14 | max_grad_norm: 0.05 15 | max_traj_len: 300 16 | minibatch_size: 2048 17 | mirror: True 18 | n_itr: 20000 19 | name: model 20 | no_delta: True 21 | num_procs: 64 22 | num_steps: 187 23 | previous: None 24 | recurrent: False 25 | redis_address: None 26 | reward: 5k_speed_reward 27 | simrate: 60 28 | state_est: True 29 | traj: walking 30 | use_gae: False 31 | viz_port: 8097 32 | -------------------------------------------------------------------------------- /trained_models/5k_retrain/experiment.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/trained_models/5k_retrain/experiment.pkl -------------------------------------------------------------------------------- /trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/5k_test.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/5k_test.pkl -------------------------------------------------------------------------------- /trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/actor.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/actor.pt -------------------------------------------------------------------------------- /trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/critic.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/critic.pt -------------------------------------------------------------------------------- /trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/eval_commands.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/eval_commands.npy -------------------------------------------------------------------------------- /trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/eval_perturbs.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/eval_perturbs.npy -------------------------------------------------------------------------------- /trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/experiment.info: -------------------------------------------------------------------------------- 1 | command_profile: clock 2 | dyn_random: False 3 | env_name: Cassie-v0 4 | history: 0 5 | ik_baseline: None 6 | input_profile: full 7 | learn_gains: False 8 | mirror: True 9 | no_delta: True 10 | recurrent: False 11 | reward: 5k_speed_reward 12 | simrate: 60 13 | traj: None 14 | -------------------------------------------------------------------------------- /trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/experiment.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/experiment.pkl -------------------------------------------------------------------------------- /util/env.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import torch 4 | import numpy as np 5 | 6 | from cassie import CassieEnv, CassieTrajEnv, CassiePlayground, CassieStandingEnv 7 | 8 | def env_factory(path, command_profile="clock", input_profile="full", simrate=50, dynamics_randomization=True, mirror=False, learn_gains=False, reward=None, history=0, no_delta=True, traj=None, ik_baseline=False, **kwargs): 9 | from functools import partial 10 | 11 | """ 12 | Returns an *uninstantiated* environment constructor. 13 | 14 | Since environments containing cpointers (e.g. Mujoco envs) can't be serialized, 15 | this allows us to pass their constructors to Ray remote functions instead 16 | (since the gym registry isn't shared across ray subprocesses we can't simply 17 | pass gym.make() either) 18 | 19 | Note: env.unwrapped.spec is never set, if that matters for some reason. 20 | """ 21 | 22 | # Custom Cassie Environment 23 | if path in ['Cassie-v0', 'CassieTraj-v0', 'CassiePlayground-v0', 'CassieStandingEnv-v0']: 24 | 25 | if path == 'Cassie-v0': 26 | env_fn = partial(CassieEnv, command_profile=command_profile, input_profile=input_profile, simrate=simrate, dynamics_randomization=dynamics_randomization, learn_gains=learn_gains, reward=reward, history=history) 27 | elif path == 'CassieTraj-v0': 28 | env_fn = partial(CassieTrajEnv, traj=traj, command_profile=command_profile, input_profile=input_profile, simrate=simrate, dynamics_randomization=dynamics_randomization, no_delta=no_delta, learn_gains=learn_gains, ik_baseline=ik_baseline, reward=reward, history=history) 29 | elif path == 'CassiePlayground-v0': 30 | env_fn = partial(CassiePlayground, command_profile=command_profile, input_profile=input_profile, simrate=simrate, dynamics_randomization=dynamics_randomization, learn_gains=learn_gains, reward=reward, history=history) 31 | elif path == 'CassieStandingEnv-v0': 32 | env_fn = partial(CassieStandingEnv, command_profile=command_profile, input_profile=input_profile, simrate=simrate, dynamics_randomization=dynamics_randomization, learn_gains=learn_gains, reward=reward, history=history) 33 | 34 | if mirror: 35 | from rl.envs.wrappers import SymmetricEnv 36 | env_fn = partial(SymmetricEnv, env_fn, mirrored_obs=env_fn().mirrored_obs, mirrored_act=env_fn().mirrored_acts) 37 | 38 | print() 39 | print("Environment: {}".format(path)) 40 | print(" ├ reward: {}".format(reward)) 41 | print(" ├ input prof: {}".format(input_profile)) 42 | print(" ├ cmd prof: {}".format(command_profile)) 43 | print(" ├ learn gains: {}".format(learn_gains)) 44 | print(" ├ dyn_random: {}".format(dynamics_randomization)) 45 | print(" ├ mirror: {}".format(mirror)) 46 | if path == "CassieTraj-v0": 47 | print(" ├ traj: {}".format(traj)) 48 | print(" ├ ik baseline: {}".format(ik_baseline)) 49 | print(" ├ no_delta: {}".format(no_delta)) 50 | print(" └ obs_dim: {}".format(env_fn().observation_space.shape[0])) 51 | 52 | return env_fn 53 | 54 | # OpenAI Gym environment 55 | else: 56 | import gym 57 | spec = gym.envs.registry.spec(path) 58 | _kwargs = spec._kwargs.copy() 59 | _kwargs.update(kwargs) 60 | 61 | try: 62 | if callable(spec._entry_point): 63 | cls = spec._entry_point(**_kwargs) 64 | else: 65 | cls = gym.envs.registration.load(spec._entry_point) 66 | except AttributeError: 67 | if callable(spec.entry_point): 68 | cls = spec.entry_point(**_kwargs) 69 | else: 70 | cls = gym.envs.registration.load(spec.entry_point) 71 | 72 | return partial(cls, **_kwargs) 73 | -------------------------------------------------------------------------------- /util/log.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | import hashlib, os, pickle 3 | 4 | class color: 5 | BOLD = '\033[1m\033[48m' 6 | END = '\033[0m' 7 | ORANGE = '\033[38;5;202m' 8 | BLACK = '\033[38;5;240m' 9 | 10 | # Logger stores in trained_models by default 11 | def create_logger(args): 12 | from torch.utils.tensorboard import SummaryWriter 13 | """Use hyperparms to set a directory to output diagnostic files.""" 14 | 15 | arg_dict = args.__dict__ 16 | assert "seed" in arg_dict, \ 17 | "You must provide a 'seed' key in your command line arguments" 18 | assert "logdir" in arg_dict, \ 19 | "You must provide a 'logdir' key in your command line arguments." 20 | assert "env_name" in arg_dict, \ 21 | "You must provide a 'env_name' key in your command line arguments." 22 | 23 | # sort the keys so the same hyperparameters will always have the same hash 24 | arg_dict = OrderedDict(sorted(arg_dict.items(), key=lambda t: t[0])) 25 | 26 | # remove seed so it doesn't get hashed, store value for filename 27 | # same for logging directory 28 | run_name = arg_dict.pop('run_name') 29 | seed = str(arg_dict.pop("seed")) 30 | logdir = str(arg_dict.pop('logdir')) 31 | env_name = str(arg_dict['env_name']) 32 | 33 | # see if this run has a unique name, if so then that is going to be the name of the folder, even if it overrirdes 34 | if run_name is not None: 35 | logdir = os.path.join(logdir, env_name) 36 | output_dir = os.path.join(logdir, run_name) 37 | else: 38 | # see if we are resuming a previous run, if we are mark as continued 39 | if args.previous is not None: 40 | if args.exchange_reward is not None: 41 | output_dir = args.previous[0:-1] + "_NEW-" + args.reward 42 | else: 43 | print(args.previous[0:-1]) 44 | output_dir = args.previous[0:-1] + '-cont' 45 | else: 46 | # get a unique hash for the hyperparameter settings, truncated at 10 chars 47 | arg_hash = hashlib.md5(str(arg_dict).encode('ascii')).hexdigest()[0:6] + '-seed' + seed 48 | logdir = os.path.join(logdir, env_name) 49 | output_dir = os.path.join(logdir, arg_hash) 50 | 51 | # create a directory with the hyperparm hash as its name, if it doesn't 52 | # already exist. 53 | os.makedirs(output_dir, exist_ok=True) 54 | 55 | # Create a file with all the hyperparam settings in human-readable plaintext, 56 | # also pickle file for resuming training easily 57 | info_path = os.path.join(output_dir, "experiment.info") 58 | pkl_path = os.path.join(output_dir, "experiment.pkl") 59 | with open(pkl_path, 'wb') as file: 60 | pickle.dump(args, file) 61 | with open(info_path, 'w') as file: 62 | for key, val in arg_dict.items(): 63 | file.write("%s: %s" % (key, val)) 64 | file.write('\n') 65 | 66 | logger = SummaryWriter(output_dir, flush_secs=0.1) # flush_secs=0.1 actually slows down quite a bit, even on parallelized set ups 67 | print("Logging to " + color.BOLD + color.ORANGE + str(output_dir) + color.END) 68 | 69 | logger.dir = output_dir 70 | return logger 71 | 72 | # Rule for curriculum learning is that env observation space should be the same (so attributes like env.clock_based or env.state_est shouldn't be different and are forced to be same here) 73 | # deal with loading hyperparameters of previous run continuation 74 | def parse_previous(args): 75 | if args.previous is not None: 76 | run_args = pickle.load(open(args.previous + "experiment.pkl", "rb")) 77 | args.recurrent = run_args.recurrent 78 | args.env_name = run_args.env_name 79 | args.command_profile = run_args.command_profile 80 | args.input_profile = run_args.input_profile 81 | args.learn_gains = run_args.learn_gains 82 | args.traj = run_args.traj 83 | args.no_delta = run_args.no_delta 84 | args.ik_baseline = run_args.ik_baseline 85 | if args.exchange_reward is not None: 86 | args.reward = args.exchange_reward 87 | args.run_name = run_args.run_name + "_NEW-" + args.reward 88 | else: 89 | args.reward = run_args.reward 90 | args.run_name = run_args.run_name + "--cont" 91 | return args 92 | -------------------------------------------------------------------------------- /util/logo.py: -------------------------------------------------------------------------------- 1 | class color: 2 | BOLD = '\033[1m\033[48m' 3 | END = '\033[0m' 4 | ORANGE = '\033[38;5;202m' 5 | BLACK = '\033[38;5;240m' 6 | 7 | 8 | def print_logo(subtitle="", option=2): 9 | print() 10 | print(color.BOLD + color.ORANGE + " .8. " + color.BLACK + " 8 888888888o " + color.ORANGE + "8 8888888888 `8.`8888. ,8' ") 11 | print(color.BOLD + color.ORANGE + " .888. " + color.BLACK + " 8 8888 `88. " + color.ORANGE + "8 8888 `8.`8888. ,8' ") 12 | print(color.BOLD + color.ORANGE + " :88888. " + color.BLACK + " 8 8888 `88 " + color.ORANGE + "8 8888 `8.`8888. ,8' ") 13 | print(color.BOLD + color.ORANGE + " . `88888. " + color.BLACK + " 8 8888 ,88 " + color.ORANGE + "8 8888 `8.`8888.,8' ") 14 | print(color.BOLD + color.ORANGE + " .8. `88888. " + color.BLACK + " 8 8888. ,88' " + color.ORANGE + "8 888888888888 `8.`88888' ") 15 | print(color.BOLD + color.ORANGE + " .8`8. `88888. " + color.BLACK + " 8 888888888P' " + color.ORANGE + "8 8888 .88.`8888. ") 16 | print(color.BOLD + color.ORANGE + " .8' `8. `88888. " + color.BLACK + " 8 8888 " + color.ORANGE + "8 8888 .8'`8.`8888. ") 17 | print(color.BOLD + color.ORANGE + " .8' `8. `88888. " + color.BLACK + " 8 8888 " + color.ORANGE + "8 8888 .8' `8.`8888. ") 18 | print(color.BOLD + color.ORANGE + " .888888888. `88888. " + color.BLACK + " 8 8888 " + color.ORANGE + "8 8888 .8' `8.`8888. ") 19 | print(color.BOLD + color.ORANGE + ".8' `8. `88888." + color.BLACK + " 8 8888 " + color.ORANGE + "8 888888888888 .8' `8.`8888. " + color.END) 20 | print("\n") 21 | print(subtitle) 22 | print("\n") 23 | --------------------------------------------------------------------------------