├── .gitignore
├── 5k_test.py
├── LICENSE
├── README.md
├── apex-logo.png
├── apex.py
├── cassie
├── __init__.py
├── cassie.py
├── cassie_footdist_env.py
├── cassie_mininput_env.py
├── cassie_noaccel_footdist_env.py
├── cassie_noaccel_footdist_nojoint_env.py
├── cassie_noaccel_footdist_omniscient.py
├── cassie_novel_footdist_env.py
├── cassie_playground.py
├── cassie_standing_env.py
├── cassie_traj.py
├── cassiemujoco
│ ├── WhyteField.png
│ ├── __init__.py
│ ├── cassie-stl-meshes
│ │ ├── achilles-rod.stl
│ │ ├── bleachers.stl
│ │ ├── foot-crank.stl
│ │ ├── foot.stl
│ │ ├── heel-spring.stl
│ │ ├── hip-pitch.stl
│ │ ├── hip-roll.stl
│ │ ├── hip-yaw.stl
│ │ ├── knee-spring.stl
│ │ ├── knee.stl
│ │ ├── pelvis.stl
│ │ ├── plantar-rod.stl
│ │ ├── shin.stl
│ │ ├── tarsus.stl
│ │ └── terrains
│ │ │ ├── bowl.png
│ │ │ ├── radial_gradient.png
│ │ │ ├── side_hill.png
│ │ │ ├── side_slope.png
│ │ │ ├── slope.png
│ │ │ ├── step_pyramid.png
│ │ │ └── terrain_1.png
│ ├── cassie.xml
│ ├── cassie.xml.orig
│ ├── cassieUDP.py
│ ├── cassie_crown.xml
│ ├── cassie_drop_step.xml
│ ├── cassie_hfield.xml
│ ├── cassie_muTor.xml
│ ├── cassie_noise_terrain.xml
│ ├── cassie_playground.xml
│ ├── cassie_soft.xml
│ ├── cassie_stiff.xml
│ ├── cassie_track.xml
│ ├── cassie_waypoints.xml
│ ├── cassiemujoco.py
│ ├── cassiemujoco_ctypes.py
│ ├── include
│ │ ├── CassieCoreSim.h
│ │ ├── PdInput.h
│ │ ├── StateOutput.h
│ │ ├── cassie_in_t.h
│ │ ├── cassie_out_t.h
│ │ ├── cassie_user_in_t.h
│ │ ├── cassiemujoco.h
│ │ ├── pd_in_t.h
│ │ ├── state_out_t.h
│ │ └── udp.h
│ ├── libcassiemujoco.so
│ ├── terrain_noise.xml
│ ├── terrain_racetrack.xml
│ ├── terrain_random_hills.xml
│ ├── terrains
│ │ ├── crown.png
│ │ ├── drop_step.png
│ │ ├── hfield.png
│ │ ├── hfield2.png
│ │ ├── hills.png
│ │ ├── noise.png
│ │ ├── noise1.npy
│ │ ├── noise2.npy
│ │ ├── noise3.npy
│ │ ├── noisy.png
│ │ ├── racetrack1.png
│ │ ├── rand_hill1.npy
│ │ ├── rand_hill2.npy
│ │ ├── rand_hill3.npy
│ │ ├── slope.png
│ │ ├── utils
│ │ │ └── noise_generator.py
│ │ └── wavefield.png
│ ├── test_terrain_noise.xml
│ └── test_terrain_slope.xml
├── deprecated
│ ├── aslipik_env.py
│ ├── aslipik_unified_env.py
│ ├── aslipik_unified_no_delta_env.py
│ ├── cassie_env.py
│ ├── env_test.py
│ ├── ground_friction_env.py
│ ├── ik_env.py
│ ├── no_delta_env.py
│ ├── plotting.py
│ ├── slipik_env.py
│ ├── speed_double_freq_env.py
│ ├── speed_env.py
│ ├── speed_freq_env.py
│ ├── speed_freq_no_delta_env.py
│ ├── speed_no_delta_env.py
│ ├── speed_no_delta_neutral_foot_env.py
│ └── taskspace_env.py
├── missions
│ ├── 90_left
│ │ ├── command_trajectory_0.5.pkl
│ │ ├── command_trajectory_0.9.pkl
│ │ ├── command_trajectory_1.4.pkl
│ │ ├── command_trajectory_1.9.pkl
│ │ ├── command_trajectory_2.3.pkl
│ │ ├── command_trajectory_2.8.pkl
│ │ ├── waypoints_0.5.csv
│ │ ├── waypoints_0.9.csv
│ │ ├── waypoints_1.4.csv
│ │ ├── waypoints_1.9.csv
│ │ ├── waypoints_2.3.csv
│ │ └── waypoints_2.8.csv
│ ├── 90_right
│ │ ├── command_trajectory_0.5.pkl
│ │ ├── command_trajectory_0.9.pkl
│ │ ├── command_trajectory_1.4.pkl
│ │ ├── command_trajectory_1.9.pkl
│ │ ├── command_trajectory_2.3.pkl
│ │ ├── command_trajectory_2.8.pkl
│ │ ├── waypoints_0.5.csv
│ │ ├── waypoints_0.9.csv
│ │ ├── waypoints_1.4.csv
│ │ ├── waypoints_1.9.csv
│ │ ├── waypoints_2.3.csv
│ │ └── waypoints_2.8.csv
│ ├── __init__.py
│ ├── add_waypoints.py
│ ├── command_mission.py
│ ├── curvy
│ │ ├── command_trajectory_0.5.pkl
│ │ ├── command_trajectory_0.9.pkl
│ │ ├── command_trajectory_1.4.pkl
│ │ ├── command_trajectory_1.9.pkl
│ │ ├── command_trajectory_2.3.pkl
│ │ ├── command_trajectory_2.8.pkl
│ │ ├── waypoints_0.5.csv
│ │ ├── waypoints_0.9.csv
│ │ ├── waypoints_1.4.csv
│ │ ├── waypoints_1.9.csv
│ │ ├── waypoints_2.3.csv
│ │ └── waypoints_2.8.csv
│ ├── default
│ │ ├── command_trajectory.pkl
│ │ └── waypoints.csv
│ └── straight
│ │ ├── command_trajectory_0.5.pkl
│ │ ├── command_trajectory_0.9.pkl
│ │ ├── command_trajectory_1.4.pkl
│ │ ├── command_trajectory_1.9.pkl
│ │ ├── command_trajectory_2.3.pkl
│ │ ├── command_trajectory_2.8.pkl
│ │ ├── waypoints_0.5.csv
│ │ ├── waypoints_0.9.csv
│ │ ├── waypoints_1.4.csv
│ │ ├── waypoints_1.9.csv
│ │ ├── waypoints_2.3.csv
│ │ ├── waypoints_2.4.csv
│ │ └── waypoints_2.8.csv
├── outfile.npz
├── phase_function.py
├── plotting_ex.py
├── quaternion_function.py
├── rewards
│ ├── __init__.py
│ ├── aslip_rewards.py
│ ├── clock_rewards.py
│ ├── command_reward.py
│ ├── iros_paper_reward.py
│ ├── reward_clock_funcs
│ │ ├── incentive_clock_smooth.pkl
│ │ ├── incentive_clock_smooth_aerial.pkl
│ │ ├── incentive_clock_smooth_zero.pkl
│ │ ├── incentive_clock_strict0.1.pkl
│ │ ├── incentive_clock_strict0.1_aerial.pkl
│ │ ├── incentive_clock_strict0.1_zero.pkl
│ │ ├── incentive_clock_strict0.4.pkl
│ │ ├── incentive_clock_strict0.4_aerial.pkl
│ │ ├── incentive_clock_strict0.4_zero.pkl
│ │ ├── no_incentive_aslip_clock_strict0.3.pkl
│ │ ├── no_incentive_clock_smooth.pkl
│ │ ├── no_incentive_clock_smooth_aerial.pkl
│ │ ├── no_incentive_clock_smooth_zero.pkl
│ │ ├── no_incentive_clock_strict0.1.pkl
│ │ ├── no_incentive_clock_strict0.1_aerial.pkl
│ │ ├── no_incentive_clock_strict0.1_zero.pkl
│ │ ├── no_incentive_clock_strict0.4.pkl
│ │ ├── no_incentive_clock_strict0.4_aerial.pkl
│ │ └── no_incentive_clock_strict0.4_zero.pkl
│ ├── rnn_dyn_random_reward.py
│ ├── side_speedmatch_foottraj_reward.py
│ ├── side_speedmatch_heightvel_reward.py
│ ├── side_speedmatch_heuristic_reward.py
│ ├── side_speedmatch_rewards.py
│ ├── side_speedmatch_torquesmooth_reward.py
│ ├── speedmatch_footorient_joint_smooth_reward.py
│ ├── speedmatch_heuristic_reward.py
│ ├── speedmatch_rewards.py
│ ├── standing_rewards.py
│ └── trajmatch_reward.py
└── trajectory
│ ├── .DS_Store
│ ├── __init__.py
│ ├── aslipTrajsTaskSpace
│ ├── walkCycle_0.0.pkl
│ ├── walkCycle_0.1.pkl
│ ├── walkCycle_0.2.pkl
│ ├── walkCycle_0.3.pkl
│ ├── walkCycle_0.4.pkl
│ ├── walkCycle_0.5.pkl
│ ├── walkCycle_0.6.pkl
│ ├── walkCycle_0.7.pkl
│ ├── walkCycle_0.8.pkl
│ ├── walkCycle_0.9.pkl
│ ├── walkCycle_1.0.pkl
│ ├── walkCycle_1.1.pkl
│ ├── walkCycle_1.2.pkl
│ ├── walkCycle_1.3.pkl
│ ├── walkCycle_1.4.pkl
│ ├── walkCycle_1.5.pkl
│ ├── walkCycle_1.6.pkl
│ ├── walkCycle_1.7.pkl
│ ├── walkCycle_1.8.pkl
│ ├── walkCycle_1.9.pkl
│ └── walkCycle_2.0.pkl
│ ├── aslip_trajectory.py
│ ├── backward_trajectory_Nov
│ ├── ikNet_state_dict.pt
│ ├── more-poses-trial.bin
│ ├── spline_stepping_traj.pkl
│ ├── stepdata.bin
│ ├── stepping_trajectory_Nov
│ ├── test.py
│ ├── traj_from_ref_foot_data.pkl
│ ├── trajectory.py
│ └── walk-in-place-downsampled.bin
├── img
├── output.gif
└── output2.gif
├── mirror_policy_check.py
├── plot_policy.py
├── rl
├── __init__.py
├── algos
│ ├── __init__.py
│ ├── ars.py
│ ├── async_td3.py
│ ├── dpg.py
│ ├── ppo.py
│ └── sync_td3.py
├── config
│ └── monitor.ini
├── distributions
│ ├── __init__.py
│ ├── beta.py
│ └── gaussian.py
├── envs
│ ├── __init__.py
│ ├── monitor.py
│ ├── normalize.py
│ ├── vectorize.py
│ ├── wrapper.py
│ └── wrappers.py
├── policies
│ ├── __init__.py
│ ├── actor.py
│ ├── base.py
│ └── critic.py
└── utils
│ ├── __init__.py
│ ├── param_noise.py
│ ├── remote_replay.py
│ └── render.py
├── test_policy.py
├── tools
├── .DS_Store
├── aslip_tests
│ ├── GRF_2KHz.pkl
│ ├── GRF_compare.py
│ ├── foot_placement.py
│ ├── parallelized.py
│ ├── plots
│ │ └── footpos_err.png
│ └── taskspace_tracking.py
├── cassie_top_white.png
├── command_trajectory.pkl
├── compare_pols.py
├── eval_mission.py
├── eval_perturb.py
├── eval_sensitivity.py
├── test_commands.py
├── test_perturb_eval_phase.npy
├── utils
│ ├── __init__.py
│ └── elements.py
├── vis_input_and_state.py
├── vis_perturb.py
└── waypoint_trajectory.py
├── trained_models
├── 5k_retrain
│ ├── actor.pt
│ ├── critic.pt
│ ├── eval_commands.npy
│ ├── eval_perturbs.npy
│ ├── experiment.info
│ └── experiment.pkl
└── nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2
│ ├── 5k_test.pkl
│ ├── actor.pt
│ ├── critic.pt
│ ├── eval_commands.npy
│ ├── eval_perturbs.npy
│ ├── experiment.info
│ └── experiment.pkl
└── util
├── env.py
├── eval.py
├── log.py
└── logo.py
/.gitignore:
--------------------------------------------------------------------------------
1 | cassieXie/
2 | MUJOCO_LOG.TXT
3 | cassie/cassiemujoco/mjkey.txt
4 | cassie/cassiemujoco/mjpro150/
5 | cassie/pickled
6 | sim-to-real/
7 | cassie/cassieIK_SL.pt
8 | cassie/taskspace_to_jointpos.pt
9 | .vscode/
10 |
11 | # Byte-compiled / optimized / DLL files
12 | __pycache__/
13 | *.py[cod]
14 | *$py.class
15 |
16 | # C extensions
17 | *.so
18 | !libcassiemujoco.so
19 |
20 | # Distribution / packaging
21 | .Python
22 | build/
23 | develop-eggs/
24 | dist/
25 | downloads/
26 | eggs/
27 | .eggs/
28 | lib/
29 | lib64/
30 | parts/
31 | sdist/
32 | var/
33 | wheels/
34 | *.egg-info/
35 | .installed.cfg
36 | *.egg
37 | MANIFEST
38 |
39 | # PyInstaller
40 | # Usually these files are written by a python script from a template
41 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
42 | *.manifest
43 | *.spec
44 |
45 | # Installer logs
46 | pip-log.txt
47 | pip-delete-this-directory.txt
48 |
49 | # Unit test / coverage reports
50 | htmlcov/
51 | .tox/
52 | .coverage
53 | .coverage.*
54 | .cache
55 | nosetests.xml
56 | coverage.xml
57 | *.cover
58 | .hypothesis/
59 |
60 | # Translations
61 | *.mo
62 | *.pot
63 |
64 | # Django stuff:
65 | *.log
66 | .static_storage/
67 | .media/
68 | local_settings.py
69 |
70 | # Flask stuff:
71 | instance/
72 | .webassets-cache
73 |
74 | # Scrapy stuff:
75 | .scrapy
76 |
77 | # Sphinx documentation
78 | docs/_build/
79 |
80 | # PyBuilder
81 | target/
82 |
83 | # Jupyter Notebook
84 | .ipynb_checkpoints
85 |
86 | # pyenv
87 | .python-version
88 |
89 | # celery beat schedule file
90 | celerybeat-schedule
91 |
92 | # SageMath parsed files
93 | *.sage.py
94 |
95 | # Environments
96 | .env
97 | .venv
98 | env/
99 | venv/
100 | ENV/
101 | env.bak/
102 | venv.bak/
103 |
104 | # Spyder project settings
105 | .spyderproject
106 | .spyproject
107 |
108 | # Rope project settings
109 | .ropeproject
110 |
111 | # mkdocs documentation
112 | /site
113 |
114 | # mypy
115 | .mypy_cache/
116 |
117 | # ray files
118 | /ray_tmp
119 | ray_timeline.json
120 |
121 | # trained models
122 | /trained_models/*
123 |
124 | # logging directory
125 | /logs
126 | ref_qposes.png
127 |
128 | # testing directory files
129 | /tools/aslip_pipeline/testTS_logs/*
130 | /tools/aslip_pipeline/testVaryVel_logs/*
131 | /tools/apex_plots/*
132 | /tools/waypoints.csv
133 |
134 | *.sh.e*
135 | *.sh.o*
136 |
137 |
138 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License
2 |
3 | Copyright (c) 2017 Pedro Autran e Morais
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | ----
4 |
5 | Apex is a small, modular library that contains some implementations of continuous reinforcement learning algorithms. Fully compatible with OpenAI gym.
6 |
7 |
8 |
9 |
10 | ## Running experiments
11 |
12 | ### Basics
13 | Any algorithm can be run from the apex.py entry point.
14 |
15 | To run PPO on a cassie environment,
16 |
17 | ```bash
18 | python apex.py ppo --env_name Cassie-v0 --num_procs 12 --run_name experiment01
19 | ```
20 |
21 | To run TD3 on the gym environment Walker-v2,
22 |
23 | ```bash
24 | python apex.py td3_async --env_name Walker-v2 --num_procs 12 --run_name experiment02
25 | ```
26 |
27 | ## Logging details / Monitoring live training progress
28 | Tensorboard logging is enabled by default for all algorithms. The logger expects that you supply an argument named ```logdir```, containing the root directory you want to store your logfiles in, and an argument named ```seed```, which is used to seed the pseudorandom number generators.
29 |
30 | A basic command line script illustrating this is:
31 |
32 | ```bash
33 | python apex.py ars --logdir logs/ars --seed 1337
34 | ```
35 |
36 | The resulting directory tree would look something like this:
37 | ```
38 | trained_models/ # directory with all of the saved models and tensorboard logs
39 | └── ars # algorithm name
40 | └── Cassie-v0 # environment name
41 | └── 8b8b12-seed1 # unique run name created with hash of hyperparameters
42 | ├── actor.pt # actor network for algo
43 | ├── critic.pt # critic network for algo
44 | ├── events.out.tfevents # tensorboard binary file
45 | ├── experiment.info # readable hyperparameters for this run
46 | └── experiment.pkl # loadable pickle of hyperparameters
47 | ```
48 |
49 | Using tensorboard makes it easy to compare experiments and resume training later on.
50 |
51 | To see live training progress
52 |
53 | Run ```$ tensorboard --logdir logs/``` then navigate to ```http://localhost:6006/``` in your browser
54 |
55 | ## Cassie Environments:
56 | * `Cassie-v0` : basic unified environment for walking/running policies
57 | * `CassieTraj-v0` : unified environment with reference trajectories
58 | * `CassiePlayground-v0` : environment for executing autonomous missions
59 | * `CassieStanding-v0` : environment for training standing policies
60 |
61 | ## Algorithms:
62 | #### Currently implemented:
63 | * Parallelism with [Ray](https://github.com/ray-project/ray)
64 | * [GAE](https://arxiv.org/abs/1506.02438)/TD(lambda) estimators
65 | * [PPO](https://arxiv.org/abs/1707.06347), VPG with ratio objective and with log likelihood objective
66 | * [TD3](https://arxiv.org/abs/1802.09477) with [Parameter Noise Exploration](https://arxiv.org/abs/1706.01905)
67 | * [DDPG](https://arxiv.org/abs/1509.02971)
68 | * [RDPG](https://arxiv.org/abs/1512.04455)
69 | * [ARS](https://arxiv.org/abs/1803.07055)
70 | * Entropy based exploration bonus
71 | * advantage centering (observation normalization WIP)
72 |
73 | #### To be implemented long term:
74 | * [SAC](https://arxiv.org/abs/1801.01290)
75 | * [GPO](https://arxiv.org/abs/1711.01012)
76 | * [NAF](https://arxiv.org/abs/1603.00748)
77 | * [SVG](https://arxiv.org/abs/1510.09142)
78 | * [I2A](https://arxiv.org/abs/1707.06203)
79 | * [PGPE](http://ieeexplore.ieee.org/document/5708821/?reload=true)
80 | * [Value Distribution](https://arxiv.org/pdf/1707.06887.pdf)
81 | * Oracle methods (e.g. [GPS](https://arxiv.org/abs/1610.00529))
82 | * CUDA support (should be trivial but I don't have a GPU to test on currently)
83 |
84 | #### Maybe implemented in future:
85 |
86 | * [DXNN](https://arxiv.org/abs/1008.2412)
87 | * [ACER](https://arxiv.org/abs/1611.01224) and other off-policy methods
88 | * Model-based methods
89 |
90 | ## Acknowledgements
91 |
92 | Thanks to @ikostrikov's whose great implementations were used for debugging. Also thanks to @rll for rllab, which inspired a lot of the high level interface and logging for this library, and to @OpenAI for the original PPO tensorflow implementation. Thanks to @sfujim for the clean implementations of TD3 and DDPG in PyTorch. Thanks @modestyachts for the easy to understand ARS implementation.
93 |
--------------------------------------------------------------------------------
/apex-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/apex-logo.png
--------------------------------------------------------------------------------
/cassie/__init__.py:
--------------------------------------------------------------------------------
1 | # Unified
2 | from .cassie import CassieEnv
3 | from .cassie_traj import CassieTrajEnv
4 | from .cassie_playground import CassiePlayground
5 | from .cassie_standing_env import CassieStandingEnv # sorta old/unused
6 |
7 | # Proprietary
8 | from .cassie_noaccel_footdist_omniscient import CassieEnv_noaccel_footdist_omniscient
9 | from .cassie_footdist_env import CassieEnv_footdist
10 | from .cassie_noaccel_footdist_env import CassieEnv_noaccel_footdist
11 | from .cassie_noaccel_footdist_nojoint_env import CassieEnv_noaccel_footdist_nojoint
12 | from .cassie_novel_footdist_env import CassieEnv_novel_footdist
13 | from .cassie_mininput_env import CassieEnv_mininput
14 |
15 | # CassieMujocoSim
16 | from .cassiemujoco import *
17 |
18 |
19 | ##############
20 | # DEPRECATED #
21 | ##############
22 | # from .cassie_env import CassieEnv
23 | # from .taskspace_env import CassieTSEnv
24 | # from .aslipik_env import CassieIKEnv
25 | # from .aslipik_unified_env import UnifiedCassieIKEnv
26 | # from .aslipik_unified_no_delta_env import UnifiedCassieIKEnvNoDelta
27 | # from .no_delta_env import CassieEnv_nodelta
28 | # from .dynamics_random import CassieEnv_rand_dyn
29 | # from .speed_double_freq_env import CassieEnv_speed_dfreq
30 | # from .ground_friction_env import CassieGroundFrictionEnv
31 | # from .cassie_standing_env import CassieStandingEnv
32 |
--------------------------------------------------------------------------------
/cassie/cassiemujoco/WhyteField.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/WhyteField.png
--------------------------------------------------------------------------------
/cassie/cassiemujoco/__init__.py:
--------------------------------------------------------------------------------
1 | from .cassiemujoco import *
--------------------------------------------------------------------------------
/cassie/cassiemujoco/cassie-stl-meshes/achilles-rod.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/achilles-rod.stl
--------------------------------------------------------------------------------
/cassie/cassiemujoco/cassie-stl-meshes/bleachers.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/bleachers.stl
--------------------------------------------------------------------------------
/cassie/cassiemujoco/cassie-stl-meshes/foot-crank.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/foot-crank.stl
--------------------------------------------------------------------------------
/cassie/cassiemujoco/cassie-stl-meshes/foot.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/foot.stl
--------------------------------------------------------------------------------
/cassie/cassiemujoco/cassie-stl-meshes/heel-spring.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/heel-spring.stl
--------------------------------------------------------------------------------
/cassie/cassiemujoco/cassie-stl-meshes/hip-pitch.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/hip-pitch.stl
--------------------------------------------------------------------------------
/cassie/cassiemujoco/cassie-stl-meshes/hip-roll.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/hip-roll.stl
--------------------------------------------------------------------------------
/cassie/cassiemujoco/cassie-stl-meshes/hip-yaw.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/hip-yaw.stl
--------------------------------------------------------------------------------
/cassie/cassiemujoco/cassie-stl-meshes/knee-spring.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/knee-spring.stl
--------------------------------------------------------------------------------
/cassie/cassiemujoco/cassie-stl-meshes/knee.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/knee.stl
--------------------------------------------------------------------------------
/cassie/cassiemujoco/cassie-stl-meshes/pelvis.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/pelvis.stl
--------------------------------------------------------------------------------
/cassie/cassiemujoco/cassie-stl-meshes/plantar-rod.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/plantar-rod.stl
--------------------------------------------------------------------------------
/cassie/cassiemujoco/cassie-stl-meshes/shin.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/shin.stl
--------------------------------------------------------------------------------
/cassie/cassiemujoco/cassie-stl-meshes/tarsus.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/tarsus.stl
--------------------------------------------------------------------------------
/cassie/cassiemujoco/cassie-stl-meshes/terrains/bowl.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/terrains/bowl.png
--------------------------------------------------------------------------------
/cassie/cassiemujoco/cassie-stl-meshes/terrains/radial_gradient.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/terrains/radial_gradient.png
--------------------------------------------------------------------------------
/cassie/cassiemujoco/cassie-stl-meshes/terrains/side_hill.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/terrains/side_hill.png
--------------------------------------------------------------------------------
/cassie/cassiemujoco/cassie-stl-meshes/terrains/side_slope.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/terrains/side_slope.png
--------------------------------------------------------------------------------
/cassie/cassiemujoco/cassie-stl-meshes/terrains/slope.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/terrains/slope.png
--------------------------------------------------------------------------------
/cassie/cassiemujoco/cassie-stl-meshes/terrains/step_pyramid.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/terrains/step_pyramid.png
--------------------------------------------------------------------------------
/cassie/cassiemujoco/cassie-stl-meshes/terrains/terrain_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/terrains/terrain_1.png
--------------------------------------------------------------------------------
/cassie/cassiemujoco/cassieUDP.py:
--------------------------------------------------------------------------------
1 | from .cassiemujoco_ctypes import *
2 | import os
3 | import ctypes
4 | import numpy as np
5 |
6 | class CassieUdp:
7 | def __init__(self, remote_addr='127.0.0.1', remote_port='25000',
8 | local_addr='0.0.0.0', local_port='25001'):
9 | self.sock = udp_init_client(str.encode(remote_addr),
10 | str.encode(remote_port),
11 | str.encode(local_addr),
12 | str.encode(local_port))
13 | self.packet_header_info = packet_header_info_t()
14 | self.recvlen = 2 + 697
15 | self.sendlen = 2 + 58
16 | self.recvlen_pd = 2 + 493
17 | self.sendlen_pd = 2 + 476
18 | self.recvbuf = (ctypes.c_ubyte * max(self.recvlen, self.recvlen_pd))()
19 | self.sendbuf = (ctypes.c_ubyte * max(self.sendlen, self.sendlen_pd))()
20 | self.inbuf = ctypes.cast(ctypes.byref(self.recvbuf, 2),
21 | ctypes.POINTER(ctypes.c_ubyte))
22 | self.outbuf = ctypes.cast(ctypes.byref(self.sendbuf, 2),
23 | ctypes.POINTER(ctypes.c_ubyte))
24 |
25 | def send(self, u):
26 | pack_cassie_user_in_t(u, self.outbuf)
27 | send_packet(self.sock, self.sendbuf, self.sendlen, None, 0)
28 |
29 | def send_pd(self, u):
30 | pack_pd_in_t(u, self.outbuf)
31 | send_packet(self.sock, self.sendbuf, self.sendlen_pd, None, 0)
32 |
33 | def recv_wait(self):
34 | nbytes = -1
35 | while nbytes != self.recvlen:
36 | nbytes = get_newest_packet(self.sock, self.recvbuf, self.recvlen,
37 | None, None)
38 | process_packet_header(self.packet_header_info,
39 | self.recvbuf, self.sendbuf)
40 | cassie_out = cassie_out_t()
41 | unpack_cassie_out_t(self.inbuf, cassie_out)
42 | return cassie_out
43 |
44 | def recv_wait_pd(self):
45 | nbytes = -1
46 | while nbytes != self.recvlen_pd:
47 | nbytes = get_newest_packet(self.sock, self.recvbuf, self.recvlen_pd,
48 | None, None)
49 | process_packet_header(self.packet_header_info,
50 | self.recvbuf, self.sendbuf)
51 | state_out = state_out_t()
52 | unpack_state_out_t(self.inbuf, state_out)
53 | return state_out
54 |
55 | def recv_newest(self):
56 | nbytes = get_newest_packet(self.sock, self.recvbuf, self.recvlen,
57 | None, None)
58 | if nbytes != self.recvlen:
59 | return None
60 | process_packet_header(self.packet_header_info,
61 | self.recvbuf, self.sendbuf)
62 | cassie_out = cassie_out_t()
63 | unpack_cassie_out_t(self.inbuf, cassie_out)
64 | return cassie_out
65 |
66 | def recv_newest_pd(self):
67 | nbytes = get_newest_packet(self.sock, self.recvbuf, self.recvlen_pd,
68 | None, None)
69 | if nbytes != self.recvlen_pd:
70 | return None
71 | process_packet_header(self.packet_header_info,
72 | self.recvbuf, self.sendbuf)
73 | state_out = state_out_t()
74 | unpack_state_out_t(self.inbuf, state_out)
75 | return state_out
76 |
77 | def delay(self):
78 | return ord(self.packet_header_info.delay)
79 |
80 | def seq_num_in_diff(self):
81 | return ord(self.packet_header_info.seq_num_in_diff)
82 |
83 | def __del__(self):
84 | udp_close(self.sock)
--------------------------------------------------------------------------------
/cassie/cassiemujoco/include/CassieCoreSim.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Agility Robotics
3 | *
4 | * Permission to use, copy, modify, and distribute this software for any
5 | * purpose with or without fee is hereby granted, provided that the above
6 | * copyright notice and this permission notice appear in all copies.
7 | *
8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 | */
16 |
17 | #ifndef CASSIECORESIM_H
18 | #define CASSIECORESIM_H
19 |
20 | #include "cassie_user_in_t.h"
21 | #include "cassie_out_t.h"
22 | #include "cassie_in_t.h"
23 |
24 | typedef struct CassieCoreSim CassieCoreSim;
25 |
26 | #ifdef __cplusplus
27 | extern "C" {
28 | #endif
29 |
30 | CassieCoreSim* CassieCoreSim_alloc(void);
31 | void CassieCoreSim_copy(CassieCoreSim *dst, const CassieCoreSim *src);
32 | void CassieCoreSim_free(CassieCoreSim *sys);
33 | void CassieCoreSim_setup(CassieCoreSim *sys);
34 | void CassieCoreSim_step(CassieCoreSim *sys, const cassie_user_in_t *in1,
35 | const cassie_out_t *in2, cassie_in_t *out1);
36 |
37 | #ifdef __cplusplus
38 | }
39 | #endif
40 | #endif // CASSIECORESIM_H
41 |
--------------------------------------------------------------------------------
/cassie/cassiemujoco/include/PdInput.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Agility Robotics
3 | *
4 | * Permission to use, copy, modify, and distribute this software for any
5 | * purpose with or without fee is hereby granted, provided that the above
6 | * copyright notice and this permission notice appear in all copies.
7 | *
8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 | */
16 |
17 | #ifndef PDINPUT_H
18 | #define PDINPUT_H
19 |
20 | #include "pd_in_t.h"
21 | #include "cassie_out_t.h"
22 | #include "cassie_user_in_t.h"
23 |
24 | typedef struct PdInput PdInput;
25 |
26 | #ifdef __cplusplus
27 | extern "C" {
28 | #endif
29 |
30 | PdInput* PdInput_alloc(void);
31 | void PdInput_copy(PdInput *dst, const PdInput *src);
32 | void PdInput_free(PdInput *sys);
33 | void PdInput_setup(PdInput *sys);
34 | void PdInput_step(PdInput *sys, const pd_in_t *in1, const cassie_out_t
35 | *in2, cassie_user_in_t *out1);
36 |
37 | #ifdef __cplusplus
38 | }
39 | #endif
40 | #endif // PDINPUT_H
41 |
--------------------------------------------------------------------------------
/cassie/cassiemujoco/include/StateOutput.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Agility Robotics
3 | *
4 | * Permission to use, copy, modify, and distribute this software for any
5 | * purpose with or without fee is hereby granted, provided that the above
6 | * copyright notice and this permission notice appear in all copies.
7 | *
8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 | */
16 |
17 | #ifndef STATEOUTPUT_H
18 | #define STATEOUTPUT_H
19 |
20 | #include "cassie_out_t.h"
21 | #include "state_out_t.h"
22 |
23 | typedef struct StateOutput StateOutput;
24 |
25 | #ifdef __cplusplus
26 | extern "C" {
27 | #endif
28 |
29 | StateOutput* StateOutput_alloc(void);
30 | void StateOutput_copy(StateOutput *dst, const StateOutput *src);
31 | void StateOutput_free(StateOutput *sys);
32 | void StateOutput_setup(StateOutput *sys);
33 | void StateOutput_step(StateOutput *sys, const cassie_out_t *in1,
34 | state_out_t *out1);
35 |
36 | #ifdef __cplusplus
37 | }
38 | #endif
39 | #endif // STATEOUTPUT_H
40 |
--------------------------------------------------------------------------------
/cassie/cassiemujoco/include/cassie_in_t.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Agility Robotics
3 | *
4 | * Permission to use, copy, modify, and distribute this software for any
5 | * purpose with or without fee is hereby granted, provided that the above
6 | * copyright notice and this permission notice appear in all copies.
7 | *
8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 | */
16 |
17 | #ifndef CASSIE_IN_T_H
18 | #define CASSIE_IN_T_H
19 |
20 | #define CASSIE_IN_T_PACKED_LEN 91
21 |
22 | #include
23 |
24 | typedef struct {
25 | unsigned short controlWord;
26 | double torque;
27 | } elmo_in_t;
28 |
29 | typedef struct {
30 | elmo_in_t hipRollDrive;
31 | elmo_in_t hipYawDrive;
32 | elmo_in_t hipPitchDrive;
33 | elmo_in_t kneeDrive;
34 | elmo_in_t footDrive;
35 | } cassie_leg_in_t;
36 |
37 | typedef struct {
38 | short channel[14];
39 | } radio_in_t;
40 |
41 | typedef struct {
42 | radio_in_t radio;
43 | bool sto;
44 | bool piezoState;
45 | unsigned char piezoTone;
46 | } cassie_pelvis_in_t;
47 |
48 | typedef struct {
49 | cassie_pelvis_in_t pelvis;
50 | cassie_leg_in_t leftLeg;
51 | cassie_leg_in_t rightLeg;
52 | } cassie_in_t;
53 |
54 |
55 | #ifdef __cplusplus
56 | extern "C" {
57 | #endif
58 |
59 | void pack_cassie_in_t(const cassie_in_t *bus, unsigned char *bytes);
60 | void unpack_cassie_in_t(const unsigned char *bytes, cassie_in_t *bus);
61 |
62 | #ifdef __cplusplus
63 | }
64 | #endif
65 | #endif // CASSIE_IN_T_H
66 |
--------------------------------------------------------------------------------
/cassie/cassiemujoco/include/cassie_out_t.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Agility Robotics
3 | *
4 | * Permission to use, copy, modify, and distribute this software for any
5 | * purpose with or without fee is hereby granted, provided that the above
6 | * copyright notice and this permission notice appear in all copies.
7 | *
8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 | */
16 |
17 | #ifndef CASSIE_OUT_T_H
18 | #define CASSIE_OUT_T_H
19 |
20 | #define CASSIE_OUT_T_PACKED_LEN 697
21 |
22 | #include
23 |
24 | typedef short DiagnosticCodes;
25 |
26 |
27 | typedef struct {
28 | bool dataGood;
29 | double stateOfCharge;
30 | double voltage[12];
31 | double current;
32 | double temperature[4];
33 | } battery_out_t;
34 |
35 | typedef struct {
36 | double position;
37 | double velocity;
38 | } cassie_joint_out_t;
39 |
40 | typedef struct {
41 | unsigned short statusWord;
42 | double position;
43 | double velocity;
44 | double torque;
45 | double driveTemperature;
46 | double dcLinkVoltage;
47 | double torqueLimit;
48 | double gearRatio;
49 | } elmo_out_t;
50 |
51 | typedef struct {
52 | elmo_out_t hipRollDrive;
53 | elmo_out_t hipYawDrive;
54 | elmo_out_t hipPitchDrive;
55 | elmo_out_t kneeDrive;
56 | elmo_out_t footDrive;
57 | cassie_joint_out_t shinJoint;
58 | cassie_joint_out_t tarsusJoint;
59 | cassie_joint_out_t footJoint;
60 | unsigned char medullaCounter;
61 | unsigned short medullaCpuLoad;
62 | bool reedSwitchState;
63 | } cassie_leg_out_t;
64 |
65 | typedef struct {
66 | bool radioReceiverSignalGood;
67 | bool receiverMedullaSignalGood;
68 | double channel[16];
69 | } radio_out_t;
70 |
71 | typedef struct {
72 | int etherCatStatus[6];
73 | int etherCatNotifications[21];
74 | double taskExecutionTime;
75 | unsigned int overloadCounter;
76 | double cpuTemperature;
77 | } target_pc_out_t;
78 |
79 | typedef struct {
80 | bool dataGood;
81 | unsigned short vpeStatus;
82 | double pressure;
83 | double temperature;
84 | double magneticField[3];
85 | double angularVelocity[3];
86 | double linearAcceleration[3];
87 | double orientation[4];
88 | } vectornav_out_t;
89 |
90 | typedef struct {
91 | target_pc_out_t targetPc;
92 | battery_out_t battery;
93 | radio_out_t radio;
94 | vectornav_out_t vectorNav;
95 | unsigned char medullaCounter;
96 | unsigned short medullaCpuLoad;
97 | bool bleederState;
98 | bool leftReedSwitchState;
99 | bool rightReedSwitchState;
100 | double vtmTemperature;
101 | } cassie_pelvis_out_t;
102 |
103 | typedef struct {
104 | cassie_pelvis_out_t pelvis;
105 | cassie_leg_out_t leftLeg;
106 | cassie_leg_out_t rightLeg;
107 | bool isCalibrated;
108 | DiagnosticCodes messages[4];
109 | } cassie_out_t;
110 |
111 | #define EMPTY ((DiagnosticCodes)0)
112 | #define LEFT_HIP_NOT_CALIB ((DiagnosticCodes)5)
113 | #define LEFT_KNEE_NOT_CALIB ((DiagnosticCodes)6)
114 | #define RIGHT_HIP_NOT_CALIB ((DiagnosticCodes)7)
115 | #define RIGHT_KNEE_NOT_CALIB ((DiagnosticCodes)8)
116 | #define LOW_BATTERY_CHARGE ((DiagnosticCodes)200)
117 | #define HIGH_CPU_TEMP ((DiagnosticCodes)205)
118 | #define HIGH_VTM_TEMP ((DiagnosticCodes)210)
119 | #define HIGH_ELMO_DRIVE_TEMP ((DiagnosticCodes)215)
120 | #define HIGH_STATOR_TEMP ((DiagnosticCodes)220)
121 | #define LOW_ELMO_LINK_VOLTAGE ((DiagnosticCodes)221)
122 | #define HIGH_BATTERY_TEMP ((DiagnosticCodes)225)
123 | #define RADIO_DATA_BAD ((DiagnosticCodes)230)
124 | #define RADIO_SIGNAL_BAD ((DiagnosticCodes)231)
125 | #define BMS_DATA_BAD ((DiagnosticCodes)235)
126 | #define VECTORNAV_DATA_BAD ((DiagnosticCodes)236)
127 | #define VPE_GYRO_SATURATION ((DiagnosticCodes)240)
128 | #define VPE_MAG_SATURATION ((DiagnosticCodes)241)
129 | #define VPE_ACC_SATURATION ((DiagnosticCodes)242)
130 | #define VPE_ATTITUDE_BAD ((DiagnosticCodes)245)
131 | #define VPE_ATTITUDE_NOT_TRACKING ((DiagnosticCodes)246)
132 | #define ETHERCAT_DC_ERROR ((DiagnosticCodes)400)
133 | #define ETHERCAT_ERROR ((DiagnosticCodes)410)
134 | #define LOAD_CALIB_DATA_ERROR ((DiagnosticCodes)590)
135 | #define CRITICAL_BATTERY_CHARGE ((DiagnosticCodes)600)
136 | #define CRITICAL_CPU_TEMP ((DiagnosticCodes)605)
137 | #define CRITICAL_VTM_TEMP ((DiagnosticCodes)610)
138 | #define CRITICAL_ELMO_DRIVE_TEMP ((DiagnosticCodes)615)
139 | #define CRITICAL_STATOR_TEMP ((DiagnosticCodes)620)
140 | #define CRITICAL_BATTERY_TEMP ((DiagnosticCodes)625)
141 | #define TORQUE_LIMIT_REACHED ((DiagnosticCodes)630)
142 | #define JOINT_LIMIT_REACHED ((DiagnosticCodes)635)
143 | #define ENCODER_FAILURE ((DiagnosticCodes)640)
144 | #define SPRING_FAILURE ((DiagnosticCodes)645)
145 | #define LEFT_LEG_MEDULLA_HANG ((DiagnosticCodes)700)
146 | #define RIGHT_LEG_MEDULLA_HANG ((DiagnosticCodes)701)
147 | #define PELVIS_MEDULLA_HANG ((DiagnosticCodes)703)
148 | #define CPU_OVERLOAD ((DiagnosticCodes)704)
149 |
150 | #ifdef __cplusplus
151 | extern "C" {
152 | #endif
153 |
154 | void pack_cassie_out_t(const cassie_out_t *bus, unsigned char *bytes);
155 | void unpack_cassie_out_t(const unsigned char *bytes, cassie_out_t *bus);
156 |
157 | #ifdef __cplusplus
158 | }
159 | #endif
160 | #endif // CASSIE_OUT_T_H
161 |
--------------------------------------------------------------------------------
/cassie/cassiemujoco/include/cassie_user_in_t.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Agility Robotics
3 | *
4 | * Permission to use, copy, modify, and distribute this software for any
5 | * purpose with or without fee is hereby granted, provided that the above
6 | * copyright notice and this permission notice appear in all copies.
7 | *
8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 | */
16 |
17 | #ifndef CASSIE_USER_IN_T_H
18 | #define CASSIE_USER_IN_T_H
19 |
20 | #define CASSIE_USER_IN_T_PACKED_LEN 58
21 |
22 | #include
23 |
24 | typedef struct {
25 | double torque[10];
26 | short telemetry[9];
27 | } cassie_user_in_t;
28 |
29 |
30 | #ifdef __cplusplus
31 | extern "C" {
32 | #endif
33 |
34 | void pack_cassie_user_in_t(const cassie_user_in_t *bus, unsigned char *bytes);
35 | void unpack_cassie_user_in_t(const unsigned char *bytes, cassie_user_in_t *bus);
36 |
37 | #ifdef __cplusplus
38 | }
39 | #endif
40 | #endif // CASSIE_USER_IN_T_H
41 |
--------------------------------------------------------------------------------
/cassie/cassiemujoco/include/pd_in_t.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Agility Robotics
3 | *
4 | * Permission to use, copy, modify, and distribute this software for any
5 | * purpose with or without fee is hereby granted, provided that the above
6 | * copyright notice and this permission notice appear in all copies.
7 | *
8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 | */
16 |
17 | #ifndef PD_IN_T_H
18 | #define PD_IN_T_H
19 |
20 | #define PD_IN_T_PACKED_LEN 476
21 |
22 | #include
23 |
24 | typedef struct {
25 | double torque[5];
26 | double pTarget[5];
27 | double dTarget[5];
28 | double pGain[5];
29 | double dGain[5];
30 | } pd_motor_in_t;
31 |
32 | typedef struct {
33 | double torque[6];
34 | double pTarget[6];
35 | double dTarget[6];
36 | double pGain[6];
37 | double dGain[6];
38 | } pd_task_in_t;
39 |
40 | typedef struct {
41 | pd_task_in_t taskPd;
42 | pd_motor_in_t motorPd;
43 | } pd_leg_in_t;
44 |
45 | typedef struct {
46 | pd_leg_in_t leftLeg;
47 | pd_leg_in_t rightLeg;
48 | double telemetry[9];
49 | } pd_in_t;
50 |
51 |
52 | #ifdef __cplusplus
53 | extern "C" {
54 | #endif
55 |
56 | void pack_pd_in_t(const pd_in_t *bus, unsigned char *bytes);
57 | void unpack_pd_in_t(const unsigned char *bytes, pd_in_t *bus);
58 |
59 | #ifdef __cplusplus
60 | }
61 | #endif
62 | #endif // PD_IN_T_H
63 |
--------------------------------------------------------------------------------
/cassie/cassiemujoco/include/state_out_t.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Agility Robotics
3 | *
4 | * Permission to use, copy, modify, and distribute this software for any
5 | * purpose with or without fee is hereby granted, provided that the above
6 | * copyright notice and this permission notice appear in all copies.
7 | *
8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 | */
16 |
17 | #ifndef STATE_OUT_T_H
18 | #define STATE_OUT_T_H
19 |
20 | #define STATE_OUT_T_PACKED_LEN 493
21 |
22 | #include
23 |
24 | typedef struct {
25 | double stateOfCharge;
26 | double current;
27 | } state_battery_out_t;
28 |
29 | typedef struct {
30 | double position[3];
31 | double orientation[4];
32 | double footRotationalVelocity[3];
33 | double footTranslationalVelocity[3];
34 | double toeForce[3];
35 | double heelForce[3];
36 | } state_foot_out_t;
37 |
38 | typedef struct {
39 | double position[6];
40 | double velocity[6];
41 | } state_joint_out_t;
42 |
43 | typedef struct {
44 | double position[10];
45 | double velocity[10];
46 | double torque[10];
47 | } state_motor_out_t;
48 |
49 | typedef struct {
50 | double position[3];
51 | double orientation[4];
52 | double rotationalVelocity[3];
53 | double translationalVelocity[3];
54 | double translationalAcceleration[3];
55 | double externalMoment[3];
56 | double externalForce[3];
57 | } state_pelvis_out_t;
58 |
59 | typedef struct {
60 | double channel[16];
61 | bool signalGood;
62 | } state_radio_out_t;
63 |
64 | typedef struct {
65 | double height;
66 | double slope[2];
67 | } state_terrain_out_t;
68 |
69 | typedef struct {
70 | state_pelvis_out_t pelvis;
71 | state_foot_out_t leftFoot;
72 | state_foot_out_t rightFoot;
73 | state_terrain_out_t terrain;
74 | state_motor_out_t motor;
75 | state_joint_out_t joint;
76 | state_radio_out_t radio;
77 | state_battery_out_t battery;
78 | } state_out_t;
79 |
80 |
81 | #ifdef __cplusplus
82 | extern "C" {
83 | #endif
84 |
85 | void pack_state_out_t(const state_out_t *bus, unsigned char *bytes);
86 | void unpack_state_out_t(const unsigned char *bytes, state_out_t *bus);
87 |
88 | #ifdef __cplusplus
89 | }
90 | #endif
91 | #endif // STATE_OUT_T_H
92 |
--------------------------------------------------------------------------------
/cassie/cassiemujoco/include/udp.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Agility Robotics
3 | *
4 | * Permission to use, copy, modify, and distribute this software for any
5 | * purpose with or without fee is hereby granted, provided that the above
6 | * copyright notice and this permission notice appear in all copies.
7 | *
8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 | */
16 |
17 | #ifndef UDP_H
18 | #define UDP_H
19 |
20 | #define PACKET_HEADER_LEN 2
21 |
22 | // Data and results for processing packet header
23 | typedef struct {
24 | char seq_num_out;
25 | char seq_num_in_last;
26 | char delay;
27 | char seq_num_in_diff;
28 | } packet_header_info_t;
29 |
30 |
31 | // Process packet header used to measure delay and skipped packets
32 | void process_packet_header(packet_header_info_t *info,
33 | const unsigned char *header_in,
34 | unsigned char *header_out);
35 |
36 | #ifndef _WIN32
37 | #include
38 |
39 | // Create a UDP socket listening at a specific address/port
40 | int udp_init_host(const char *addr_str, const char *port_str);
41 |
42 | // Create a UDP socket connected and listening to specific addresses/ports
43 | int udp_init_client(const char *remote_addr_str, const char *remote_port_str,
44 | const char *local_addr_str, const char *local_port_str);
45 |
46 | // Close a UDP socket
47 | void udp_close(int sock);
48 |
49 | // Get newest valid packet in RX buffer
50 | ssize_t get_newest_packet(int sock, void *recvbuf, size_t recvlen,
51 | struct sockaddr *src_addr, socklen_t *addrlen);
52 |
53 | // Wait for a new valid packet
54 | ssize_t wait_for_packet(int sock, void *recvbuf, size_t recvlen,
55 | struct sockaddr *src_addr, socklen_t *addrlen);
56 |
57 | // Send a packet
58 | ssize_t send_packet(int sock, void *sendbuf, size_t sendlen,
59 | struct sockaddr *dst_addr, socklen_t addrlen);
60 |
61 | #endif // _WIN32
62 | #endif // UDP_H
63 |
--------------------------------------------------------------------------------
/cassie/cassiemujoco/libcassiemujoco.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/libcassiemujoco.so
--------------------------------------------------------------------------------
/cassie/cassiemujoco/terrains/crown.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/terrains/crown.png
--------------------------------------------------------------------------------
/cassie/cassiemujoco/terrains/drop_step.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/terrains/drop_step.png
--------------------------------------------------------------------------------
/cassie/cassiemujoco/terrains/hfield.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/terrains/hfield.png
--------------------------------------------------------------------------------
/cassie/cassiemujoco/terrains/hfield2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/terrains/hfield2.png
--------------------------------------------------------------------------------
/cassie/cassiemujoco/terrains/hills.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/terrains/hills.png
--------------------------------------------------------------------------------
/cassie/cassiemujoco/terrains/noise.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/terrains/noise.png
--------------------------------------------------------------------------------
/cassie/cassiemujoco/terrains/noise1.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/terrains/noise1.npy
--------------------------------------------------------------------------------
/cassie/cassiemujoco/terrains/noise2.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/terrains/noise2.npy
--------------------------------------------------------------------------------
/cassie/cassiemujoco/terrains/noise3.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/terrains/noise3.npy
--------------------------------------------------------------------------------
/cassie/cassiemujoco/terrains/noisy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/terrains/noisy.png
--------------------------------------------------------------------------------
/cassie/cassiemujoco/terrains/racetrack1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/terrains/racetrack1.png
--------------------------------------------------------------------------------
/cassie/cassiemujoco/terrains/rand_hill1.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/terrains/rand_hill1.npy
--------------------------------------------------------------------------------
/cassie/cassiemujoco/terrains/rand_hill2.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/terrains/rand_hill2.npy
--------------------------------------------------------------------------------
/cassie/cassiemujoco/terrains/rand_hill3.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/terrains/rand_hill3.npy
--------------------------------------------------------------------------------
/cassie/cassiemujoco/terrains/slope.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/terrains/slope.png
--------------------------------------------------------------------------------
/cassie/cassiemujoco/terrains/utils/noise_generator.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import numpy as np
3 | import matplotlib.pyplot as plt
4 |
5 | if __name__ == '__main__':
6 | parser = argparse.ArgumentParser(description='PNG Noise Generator for MuJoCo height fields')
7 | parser.add_argument('--filename', '-f', action='store', default='noise',
8 | help='Name of file output. '
9 | 'File will be saved as a PNG file outside of the folder this is located in'
10 | '(usage: -f )')
11 | parser.add_argument('--dimension', '-d', type=int, nargs='+', default=(32, 32),
12 | help='Size of the 2D array (usage: -d )')
13 | parser.add_argument('--granularity', '-g', type=int, default=100,
14 | help='How fine or course the noise is. '
15 | 'The larger the number, the finer the noise (usage: -g )')
16 | parser.add_argument('--start_size', '-s', type=int, default=2,
17 | help='The middle of the map will be always flat for starting.'
18 | 'Choose how big this block size will be (usage: -s )')
19 | parser.add_argument('--seed', type=int, default=None,
20 | help='Set seed for reproducible maps (usage: --seed )')
21 |
22 | args = parser.parse_args()
23 |
24 | if args.seed:
25 | np.random.seed(args.seed)
26 |
27 | midpoint = (int(args.dimension[0] / 2), int(args.dimension[1] / 2))
28 |
29 | # build noisy array
30 | terrain = np.random.randint(args.granularity, size=args.dimension)
31 |
32 | terrain[midpoint[0] - args.start_size:midpoint[0] + args.start_size,
33 | midpoint[1] - args.start_size:midpoint[1] + args.start_size] = 0
34 |
35 | # save as png file
36 | plt.imsave('../{}.png'.format(args.filename), terrain, cmap='gray')
37 |
--------------------------------------------------------------------------------
/cassie/cassiemujoco/terrains/wavefield.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/terrains/wavefield.png
--------------------------------------------------------------------------------
/cassie/deprecated/env_test.py:
--------------------------------------------------------------------------------
1 | # import numpy as np
2 |
3 | # from cassie_env import CassieEnv
4 |
5 | # from mujoco.cassiemujoco import *
6 | # from trajectory.trajectory import CassieTrajectory
7 |
8 |
9 | # traj = CassieTrajectory("trajectory/stepdata.bin")
10 |
11 |
12 | # env = CassieEnv("trajectory/stepdata.bin")
13 | # csim = CassieSim()
14 |
15 | # u = pd_in_t()
16 |
17 | # test actual trajectory
18 |
19 | # for i in range(len(traj.qpos)):
20 | # qpos = traj.qpos[i]
21 | # qvel = traj.qvel[i]
22 |
23 | # csim.set_qpos(qpos)
24 | # csim.set_qvel(qvel)
25 |
26 | # y = csim.step_pd(u)
27 |
28 | # cvis.draw(csim)
29 |
30 | # print(i, end='\r')
31 |
32 |
33 | # test trajectory wrap-around
34 |
35 | # env.render()
36 | # env.reset()
37 |
38 | # u = pd_in_t()
39 | # while True:
40 | # # start = t.time()
41 | # # while True:
42 | # # stop = t.time()
43 | # # #print(stop-start)
44 | # # #print("stop")
45 | # # if stop - start > 0.033:
46 | # # break
47 |
48 | # pos, vel = env.get_ref_state()
49 |
50 | # '''env.phase = env.phase + 14
51 | # pos2, vel2 = env.get_kin_state()
52 | # print(pos[7:21]-pos2[21:35])
53 | # env.phase = env.phase - 14'''
54 |
55 | # env.phase += 1
56 | # # #print(env.speed)
57 | # if env.phase >= 28:
58 | # env.phase = 0
59 | # env.counter += 1
60 | # #break
61 | # env.sim.set_qpos(pos)
62 | # env.sim.set_qvel(vel)
63 | # y = env.sim.step_pd(u)
64 | # env.render()
--------------------------------------------------------------------------------
/cassie/deprecated/plotting.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import matplotlib.pyplot as plt
3 | from tempfile import TemporaryFile
4 |
5 | # from cassie_env import CassieEnv
6 | from trajectory.trajectory import CassieTrajectory
7 | #from mujoco.cassiemujoco import *
8 | import time as t
9 | traj = CassieTrajectory("/home/robot/Desktop/apex/cassie/trajectory/stepdata.bin")
10 | # env = CassieEnv("walking")
11 |
12 | qpos_traj = traj.qpos
13 | time_traj = traj.time
14 |
15 | tt = traj.time
16 | #u = pd_in_t()
17 |
18 | # load your data
19 | data = np.load('cassie/outfile.npz')
20 | motor = data['motor']
21 | joint = data['joint']
22 | qpos = data['qpos_replay']
23 | time = data['time']
24 |
25 | delt_t = time[4] - time[3]
26 | delt_t_traj = time_traj[4] - time_traj[3]
27 | same_time = delt_t / delt_t_traj
28 | time_traj = time_traj * same_time
29 |
30 | #time = time * (60/2000)
31 | numStates = len(qpos)
32 |
33 | # np.savetxt("test_arr.txt", qpos[0:1000, 34])
34 | print("Made it")
35 | # test actual trajectory
36 |
37 | rand = np.random.randint(1, 101, 1000)
38 |
39 | #log data
40 | plt.subplot(2,2,1)
41 | plt.plot(time[0:500], motor[0:500,4], 'r')
42 | plt.plot(time[0:500], motor[0:500, 9], 'k')
43 |
44 | ax2 = plt.subplot(2,2,2)
45 | ax2.plot(time[1200:1300], joint[1200:1300,2], 'r')
46 | ax2.plot(time[1200:1300], joint[1200:1300, 5], 'k')
47 |
48 | ax3 = plt.subplot(2,2,3)
49 | ax3.plot(time[1200:1300], qpos[1200:1300,20], 'r')
50 | ax3.plot(time[1200:1300], qpos[1200:1300, 34], 'k')
51 |
52 | ax2.get_shared_x_axes().join(ax2, ax3)
53 | ax2.set_xticklabels([])
54 |
55 |
56 | #trajectory data
57 | plt.subplot(2,2,4)
58 | plt.plot(time_traj[:], qpos_traj[:,20], 'r')
59 | plt.plot(time_traj[:], qpos_traj[:, 34], 'k')
60 | plt.show()
61 |
62 | #trajectory data
63 |
64 | plt.plot(tt[:], qpos_traj[:,32] + qpos_traj[:, 33], 'r')
65 | # plt.plot(tt[:], qpos_traj[:,19], 'b')
66 | # plt.plot(tt[:], qpos_traj[:, 20], 'k')
67 | plt.show()
--------------------------------------------------------------------------------
/cassie/missions/90_left/command_trajectory_0.5.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/90_left/command_trajectory_0.5.pkl
--------------------------------------------------------------------------------
/cassie/missions/90_left/command_trajectory_0.9.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/90_left/command_trajectory_0.9.pkl
--------------------------------------------------------------------------------
/cassie/missions/90_left/command_trajectory_1.4.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/90_left/command_trajectory_1.4.pkl
--------------------------------------------------------------------------------
/cassie/missions/90_left/command_trajectory_1.9.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/90_left/command_trajectory_1.9.pkl
--------------------------------------------------------------------------------
/cassie/missions/90_left/command_trajectory_2.3.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/90_left/command_trajectory_2.3.pkl
--------------------------------------------------------------------------------
/cassie/missions/90_left/command_trajectory_2.8.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/90_left/command_trajectory_2.8.pkl
--------------------------------------------------------------------------------
/cassie/missions/90_right/command_trajectory_0.5.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/90_right/command_trajectory_0.5.pkl
--------------------------------------------------------------------------------
/cassie/missions/90_right/command_trajectory_0.9.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/90_right/command_trajectory_0.9.pkl
--------------------------------------------------------------------------------
/cassie/missions/90_right/command_trajectory_1.4.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/90_right/command_trajectory_1.4.pkl
--------------------------------------------------------------------------------
/cassie/missions/90_right/command_trajectory_1.9.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/90_right/command_trajectory_1.9.pkl
--------------------------------------------------------------------------------
/cassie/missions/90_right/command_trajectory_2.3.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/90_right/command_trajectory_2.3.pkl
--------------------------------------------------------------------------------
/cassie/missions/90_right/command_trajectory_2.8.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/90_right/command_trajectory_2.8.pkl
--------------------------------------------------------------------------------
/cassie/missions/__init__.py:
--------------------------------------------------------------------------------
1 | # We use this directory for storing missions -- high-level commands to policies
2 |
3 | from .command_mission import *
4 | from .add_waypoints import add_waypoints
--------------------------------------------------------------------------------
/cassie/missions/add_waypoints.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | import os
4 | import sys
5 | import argparse
6 | import pandas as pd
7 | import lxml.etree as ET
8 |
9 | def read_xml(file):
10 | return ET.parse(file, ET.XMLParser(remove_blank_text=True))
11 |
12 | space=10
13 | color='1 0.9 0 0.7'
14 |
15 |
16 | def add_waypoints(input_file, output_file, waypoints_file):
17 |
18 | try:
19 | # create trajectory data frame
20 | traj_df = pd.read_csv(waypoints_file, header=None, usecols=[0, 1], names=['X', 'Y'])
21 |
22 | # read xml file
23 | tree = read_xml(input_file)
24 |
25 | except TypeError:
26 | if not input_file:
27 | print('No XML file provided...\n')
28 | else:
29 | print(str(input_file) + ' not found. Check XML file path.')
30 | sys.exit(0)
31 |
32 | # get root of xml tree
33 | root = tree.getroot()
34 |
35 | # get worldbody subelement from root
36 | worldbody = root.find('worldbody')
37 |
38 | for idx, pos in enumerate(traj_df.values[20::int(space)], start=1):
39 | # create a waypoint subelement
40 | ET.SubElement(worldbody, 'geom', {'name': 'waypoint{}'.format(idx),
41 | 'pos': '{} {} 1.01 '.format(pos[0], pos[1]),
42 | 'size': '0.03 0.03 0.03',
43 | 'type': 'sphere',
44 | 'contype': '0',
45 | 'rgba': color})
46 |
47 | # add to root
48 | tree.write(output_file, encoding='utf-8', pretty_print=True, xml_declaration=True)
49 |
50 |
51 | if __name__ == "__main__":
52 | add_waypoints("default")
53 |
--------------------------------------------------------------------------------
/cassie/missions/command_mission.py:
--------------------------------------------------------------------------------
1 | import pickle
2 | import numpy as np
3 | import os
4 |
5 | class CommandTrajectory:
6 | def __init__(self, mission_name):
7 | mission_path = os.path.join(mission_name, "command_trajectory.pkl")
8 | with open(mission_path, "rb") as f:
9 | trajectory = pickle.load(f)
10 |
11 | self.global_pos = np.copy(trajectory["compos"])
12 | self.speed_cmd = np.copy(trajectory["speed"])
13 |
14 | # NOTE: still need to rotate translational velocity and accleration
15 | self.orient = np.copy(trajectory["orient"])
16 | self.prev_orient = 0
17 |
18 | self.trajlen = len(self.speed_cmd)
19 |
20 | # print("positions:\n{}\n\nvelocities:\n{}\n\norient:\n{}\n".format(self.global_pos[:5], self.speed_cmd[:5], self.orient[:5]))
21 | # print(self.speed_cmd.shape)
22 | # print(self.orient.shape)
23 | # print(np.max(self.speed_cmd))
24 | # input()
--------------------------------------------------------------------------------
/cassie/missions/curvy/command_trajectory_0.5.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/curvy/command_trajectory_0.5.pkl
--------------------------------------------------------------------------------
/cassie/missions/curvy/command_trajectory_0.9.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/curvy/command_trajectory_0.9.pkl
--------------------------------------------------------------------------------
/cassie/missions/curvy/command_trajectory_1.4.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/curvy/command_trajectory_1.4.pkl
--------------------------------------------------------------------------------
/cassie/missions/curvy/command_trajectory_1.9.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/curvy/command_trajectory_1.9.pkl
--------------------------------------------------------------------------------
/cassie/missions/curvy/command_trajectory_2.3.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/curvy/command_trajectory_2.3.pkl
--------------------------------------------------------------------------------
/cassie/missions/curvy/command_trajectory_2.8.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/curvy/command_trajectory_2.8.pkl
--------------------------------------------------------------------------------
/cassie/missions/default/command_trajectory.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/default/command_trajectory.pkl
--------------------------------------------------------------------------------
/cassie/missions/straight/command_trajectory_0.5.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/straight/command_trajectory_0.5.pkl
--------------------------------------------------------------------------------
/cassie/missions/straight/command_trajectory_0.9.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/straight/command_trajectory_0.9.pkl
--------------------------------------------------------------------------------
/cassie/missions/straight/command_trajectory_1.4.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/straight/command_trajectory_1.4.pkl
--------------------------------------------------------------------------------
/cassie/missions/straight/command_trajectory_1.9.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/straight/command_trajectory_1.9.pkl
--------------------------------------------------------------------------------
/cassie/missions/straight/command_trajectory_2.3.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/straight/command_trajectory_2.3.pkl
--------------------------------------------------------------------------------
/cassie/missions/straight/command_trajectory_2.8.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/straight/command_trajectory_2.8.pkl
--------------------------------------------------------------------------------
/cassie/outfile.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/outfile.npz
--------------------------------------------------------------------------------
/cassie/plotting_ex.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import time
3 | import math
4 |
5 | # from cassie_env import CassieEnv
6 |
7 | from cassiemujoco import *
8 | from trajectory.trajectory import CassieTrajectory
9 | import matplotlib.pyplot as plt
10 | from matplotlib import style
11 | from matplotlib.animation import FuncAnimation
12 | import matplotlib.animation as animation
13 | from mpl_toolkits.mplot3d import Axes3D
14 | from IPython import display
15 |
16 | def visualise_sim_graph(file_path, freq_of_sim):
17 | traj = np.load(file_path)
18 | # env = CassieEnv("walking")
19 | # csim = CassieSim("./cassie/cassiemujoco/cassie.xml")
20 | # vis = CassieVis(csim, "./cassie/cassiemujoco/cassie.xml")
21 | u = pd_in_t()
22 |
23 | # pelvisXYZ = traj.f.qpos_replay[:, 0:3]
24 | # render_state = vis.draw(csim)
25 | # saved_time = traj.f.time[:]
26 |
27 | #################Graphing###########
28 | log_time = traj.f.time[:]
29 | y_val = traj.f.qpos_replay[:,2] #z - height
30 | x_data= log_time
31 | y_data = y_val
32 |
33 | delt_x = (x_data[1] - x_data[0]) * 1000 #convert seconds to ms
34 |
35 | num_frames = math.ceil(len(x_data) / 10)
36 |
37 |
38 |
39 | Writer = animation.writers['ffmpeg']
40 | writer = Writer(fps=15, metadata=dict(artist='Me'), bitrate=1800)
41 |
42 | output = plt.plot([])
43 | plt.close()
44 | print(output[0])
45 |
46 | x = np.linspace(0,2*np.pi, 100)
47 |
48 | fig = plt.figure()
49 |
50 | lines = plt.plot([])
51 | line = lines[0]
52 |
53 | #other setup //set x and y lims
54 | plt.xlim(x_data.min(), x_data.max())
55 | plt.ylim(y_data.min(), y_data.max())
56 | def animate(frame):
57 | #update
58 | x = x_data[:frame*10]
59 | y = y_data[:frame*10]
60 | # y = np.sin(x + 2*np.pi * frame/100)
61 | line.set_data((x,y))
62 |
63 | anim = FuncAnimation(fig, animate, frames=num_frames, interval=(1/freq_of_sim * 1000 + (10 * delt_x))) #20 is 50 fps
64 |
65 | anim.save('lines.mp4', writer=writer)
66 | # html = display.HTML(video)
67 | # display.display(html)
68 |
69 | plt.close()
70 |
71 | visualise_sim_graph("./outfile8.npz", 30)
--------------------------------------------------------------------------------
/cassie/quaternion_function.py:
--------------------------------------------------------------------------------
1 | import math
2 | import numpy as np
3 |
4 | def inverse_quaternion(quaternion):
5 | result = np.copy(quaternion)
6 | result[1:4] = -result[1:4]
7 | return result
8 |
9 | def quaternion_product(q1, q2):
10 | result = np.zeros(4)
11 | result[0] = q1[0]*q2[0]-q1[1]*q2[1]-q1[2]*q2[2]-q1[3]*q2[3]
12 | result[1] = q1[0]*q2[1]+q2[0]*q1[1]+q1[2]*q2[3]-q1[3]*q2[2]
13 | result[2] = q1[0]*q2[2]-q1[1]*q2[3]+q1[2]*q2[0]+q1[3]*q2[1]
14 | result[3] = q1[0]*q2[3]+q1[1]*q2[2]-q1[2]*q2[1]+q1[3]*q2[0]
15 | return result
16 |
17 | def rotate_by_quaternion(vector, quaternion):
18 | q1 = np.copy(quaternion)
19 | q2 = np.zeros(4)
20 | q2[1:4] = np.copy(vector)
21 | q3 = inverse_quaternion(quaternion)
22 | q = quaternion_product(q2, q3)
23 | q = quaternion_product(q1, q)
24 | result = q[1:4]
25 | return result
26 |
27 | def quaternion2euler(quaternion):
28 | w = quaternion[0]
29 | x = quaternion[1]
30 | y = quaternion[2]
31 | z = quaternion[3]
32 | ysqr = y * y
33 |
34 | t0 = +2.0 * (w * x + y * z)
35 | t1 = +1.0 - 2.0 * (x * x + ysqr)
36 | X = math.degrees(math.atan2(t0, t1))
37 |
38 | t2 = +2.0 * (w * y - z * x)
39 | t2 = +1.0 if t2 > +1.0 else t2
40 | t2 = -1.0 if t2 < -1.0 else t2
41 | Y = math.degrees(math.asin(t2))
42 |
43 | t3 = +2.0 * (w * z + x * y)
44 | t4 = +1.0 - 2.0 * (ysqr + z * z)
45 | Z = math.degrees(math.atan2(t3, t4))
46 |
47 | result = np.zeros(3)
48 | result[0] = X * np.pi / 180
49 | result[1] = Y * np.pi / 180
50 | result[2] = Z * np.pi / 180
51 |
52 | return result
53 |
54 | def euler2quat(z=0, y=0, x=0):
55 |
56 | z = z/2.0
57 | y = y/2.0
58 | x = x/2.0
59 | cz = math.cos(z)
60 | sz = math.sin(z)
61 | cy = math.cos(y)
62 | sy = math.sin(y)
63 | cx = math.cos(x)
64 | sx = math.sin(x)
65 | result = np.array([
66 | cx*cy*cz - sx*sy*sz,
67 | cx*sy*sz + cy*cz*sx,
68 | cx*cz*sy - sx*cy*sz,
69 | cx*cy*sz + sx*cz*sy])
70 | if result[0] < 0:
71 | result = -result
72 | return result
--------------------------------------------------------------------------------
/cassie/rewards/__init__.py:
--------------------------------------------------------------------------------
1 | # We use this directory for keeping track of reward functions. Each reward function operates on an object of CassieEnv_v2, passed as 'self'
2 |
3 | from .clock_rewards import *
4 | from .aslip_rewards import *
5 | from .rnn_dyn_random_reward import *
6 | from .iros_paper_reward import *
7 | from .command_reward import *
8 |
9 | # from .speedmatch_footorient_joint_smooth_reward import *
10 | from .speedmatch_rewards import *
11 | from .trajmatch_reward import *
12 | from .standing_rewards import *
13 | # from .speedmatch_heuristic_reward import *
14 | from .side_speedmatch_rewards import *
15 | # from .side_speedmatch_foottraj_reward import *
16 | # from .side_speedmatch_heightvel_reward import *
17 | # from .side_speedmatch_heuristic_reward import *
18 | # from .side_speedmatch_torquesmooth_reward import *
--------------------------------------------------------------------------------
/cassie/rewards/command_reward.py:
--------------------------------------------------------------------------------
1 | import math
2 | import numpy as np
3 |
4 | def quaternion2euler(quaternion):
5 | w = quaternion[0]
6 | x = quaternion[1]
7 | y = quaternion[2]
8 | z = quaternion[3]
9 | ysqr = y * y
10 |
11 | t0 = +2.0 * (w * x + y * z)
12 | t1 = +1.0 - 2.0 * (x * x + ysqr)
13 | X = math.degrees(math.atan2(t0, t1))
14 |
15 | t2 = +2.0 * (w * y - z * x)
16 | t2 = +1.0 if t2 > +1.0 else t2
17 | t2 = -1.0 if t2 < -1.0 else t2
18 | Y = math.degrees(math.asin(t2))
19 |
20 | t3 = +2.0 * (w * z + x * y)
21 | t4 = +1.0 - 2.0 * (ysqr + z * z)
22 | Z = math.degrees(math.atan2(t3, t4))
23 |
24 | result = np.zeros(3)
25 | result[0] = X * np.pi / 180
26 | result[1] = Y * np.pi / 180
27 | result[2] = Z * np.pi / 180
28 |
29 | return result
30 |
31 | def euler2quat(z=0, y=0, x=0):
32 |
33 | z = z/2.0
34 | y = y/2.0
35 | x = x/2.0
36 | cz = math.cos(z)
37 | sz = math.sin(z)
38 | cy = math.cos(y)
39 | sy = math.sin(y)
40 | cx = math.cos(x)
41 | sx = math.sin(x)
42 | result = np.array([
43 | cx*cy*cz - sx*sy*sz,
44 | cx*sy*sz + cy*cz*sx,
45 | cx*cz*sy - sx*cy*sz,
46 | cx*cy*sz + sx*cz*sy])
47 | if result[0] < 0:
48 | result = -result
49 | return result
50 |
51 | def command_reward(self):
52 | qpos = np.copy(self.sim.qpos())
53 | qvel = np.copy(self.sim.qvel())
54 |
55 | # get current speed and orientation
56 | curr_pos = qpos[0:3]
57 | curr_speed = qvel[0]
58 | curr_orient = quaternion2euler(qpos[3:7])[2]
59 |
60 | # desired speed and orientation
61 | desired_pos = self.command_traj.global_pos[self.command_counter] + self.last_position
62 | desired_speed = self.command_traj.speed_cmd[self.command_counter]
63 | desired_orient = self.command_traj.orient[self.command_counter]
64 |
65 | compos_error = np.linalg.norm(curr_pos - desired_pos)
66 | speed_error = np.linalg.norm(curr_speed - desired_speed)
67 | orientation_error = np.linalg.norm(curr_orient - desired_orient)
68 |
69 | reward = 0.2 * np.exp(-speed_error) + \
70 | 0.3 * np.exp(-compos_error) + \
71 | 0.5 * np.exp(-orientation_error)
72 |
73 | if self.debug:
74 | print("reward: {6}\nspeed:\t{0:.2f}, % = {1:.2f}\ncompos:\t{2:.2f}, % = {3:.2f}\norient:\t{4:.2f}, % = {5:.2f}\n\n".format(
75 | 0.325 * np.exp(-speed_error), 0.325 * np.exp(-speed_error) / reward * 100,
76 | 0.35 * np.exp(-compos_error), 0.35 * np.exp(-compos_error) / reward * 100,
77 | 0.325 * np.exp(-orientation_error), 0.325 * np.exp(-orientation_error) / reward * 100,
78 | reward
79 | )
80 | )
81 | print(self.command_counter)
82 | print("actual speed: {}\tdesired_speed: {}".format(curr_speed, self.speed))
83 | print("actual compos: {}\tdesired_pos: {}".format(curr_pos[0:2], desired_pos[0:2]))
84 | print("actual orient: {}\tdesired_orient: {}".format(curr_orient, desired_orient))
85 | return reward
86 |
87 | def command_reward_no_pos(self):
88 | qpos = np.copy(self.sim.qpos())
89 | qvel = np.copy(self.sim.qvel())
90 |
91 | # get current speed and orientation
92 | # curr_pos = qpos[0:3]
93 | curr_speed = qvel[0]
94 | curr_orient = quaternion2euler(qpos[3:7])[2]
95 |
96 | # desired speed and orientation
97 | desired_speed = self.command_traj.speed_cmd[self.command_counter]
98 | desired_orient = self.command_traj.orient[self.command_counter]
99 |
100 | # compos_error = np.linalg.norm(curr_pos - desired_pos)
101 | speed_error = np.linalg.norm(curr_speed - desired_speed)
102 | orientation_error = np.linalg.norm(curr_orient - desired_orient)
103 |
104 | reward = 0.5 * np.exp(-speed_error) + \
105 | 0.5 * np.exp(-orientation_error)
106 |
107 | if self.debug:
108 | print("reward: {4}\nspeed:\t{0:.2f}, % = {1:.2f}\norient:\t{2:.2f}, % = {3:.2f}\n\n".format(
109 | 0.5 * np.exp(-speed_error), 0.5 * np.exp(-speed_error) / reward * 100,
110 | 0.5 * np.exp(-orientation_error), 0.5 * np.exp(-orientation_error) / reward * 100,
111 | reward
112 | )
113 | )
114 | print(self.command_counter)
115 | print("actual speed: {}\tdesired_speed: {}".format(curr_speed, self.speed))
116 | # print("actual compos: {}\tdesired_pos: {}".format(curr_pos[0:2], desired_pos[0:2]))
117 | print("actual orient: {}\tdesired_orient: {}".format(curr_orient, desired_orient))
118 | return reward
119 |
120 | def command_reward_keepalive(self):
121 | reward = 1.0
122 | if self.debug:
123 | print("reward = 1.0\tcounter={}".format(self.command_counter))
124 | return reward
--------------------------------------------------------------------------------
/cassie/rewards/iros_paper_reward.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | def iros_paper_reward(self):
4 | qpos = np.copy(self.sim.qpos())
5 | qvel = np.copy(self.sim.qvel())
6 |
7 | ref_pos, ref_vel = self.get_ref_state(self.phase)
8 |
9 | # TODO: should be variable; where do these come from?
10 | # TODO: see magnitude of state variables to gauge contribution to reward
11 | weight = [0.15, 0.15, 0.1, 0.05, 0.05, 0.15, 0.15, 0.1, 0.05, 0.05]
12 |
13 | joint_error = 0
14 | com_error = 0
15 | orientation_error = 0
16 | spring_error = 0
17 |
18 | # each joint pos
19 | for i, j in enumerate(self.pos_idx):
20 | target = ref_pos[j]
21 | actual = qpos[j]
22 |
23 | joint_error += 30 * weight[i] * (target - actual) ** 2
24 |
25 | # center of mass: x, y, z
26 | for j in [0, 1, 2]:
27 | target = ref_pos[j]
28 | actual = qpos[j]
29 |
30 | # NOTE: in Xie et al y target is 0
31 |
32 | com_error += (target - actual) ** 2
33 |
34 | # COM orientation: qx, qy, qz
35 | for j in [4, 5, 6]:
36 | target = ref_pos[j] # NOTE: in Xie et al orientation target is 0
37 | actual = qpos[j]
38 |
39 | orientation_error += (target - actual) ** 2
40 |
41 | # left and right shin springs
42 | for i in [15, 29]:
43 | target = ref_pos[i] # NOTE: in Xie et al spring target is 0
44 | actual = qpos[i]
45 |
46 | spring_error += 1000 * (target - actual) ** 2
47 |
48 | reward = 0.5 * np.exp(-joint_error) + \
49 | 0.3 * np.exp(-com_error) + \
50 | 0.1 * np.exp(-orientation_error) + \
51 | 0.1 * np.exp(-spring_error)
52 |
53 | # reward = np.sign(qvel[0])*qvel[0]**2
54 | # desired_speed = 3.0
55 | # speed_diff = np.abs(qvel[0] - desired_speed)
56 | # if speed_diff > 1:
57 | # speed_diff = speed_diff**2
58 | # reward = 20 - speed_diff
59 |
60 | return reward
--------------------------------------------------------------------------------
/cassie/rewards/reward_clock_funcs/incentive_clock_smooth.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/incentive_clock_smooth.pkl
--------------------------------------------------------------------------------
/cassie/rewards/reward_clock_funcs/incentive_clock_smooth_aerial.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/incentive_clock_smooth_aerial.pkl
--------------------------------------------------------------------------------
/cassie/rewards/reward_clock_funcs/incentive_clock_smooth_zero.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/incentive_clock_smooth_zero.pkl
--------------------------------------------------------------------------------
/cassie/rewards/reward_clock_funcs/incentive_clock_strict0.1.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/incentive_clock_strict0.1.pkl
--------------------------------------------------------------------------------
/cassie/rewards/reward_clock_funcs/incentive_clock_strict0.1_aerial.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/incentive_clock_strict0.1_aerial.pkl
--------------------------------------------------------------------------------
/cassie/rewards/reward_clock_funcs/incentive_clock_strict0.1_zero.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/incentive_clock_strict0.1_zero.pkl
--------------------------------------------------------------------------------
/cassie/rewards/reward_clock_funcs/incentive_clock_strict0.4.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/incentive_clock_strict0.4.pkl
--------------------------------------------------------------------------------
/cassie/rewards/reward_clock_funcs/incentive_clock_strict0.4_aerial.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/incentive_clock_strict0.4_aerial.pkl
--------------------------------------------------------------------------------
/cassie/rewards/reward_clock_funcs/incentive_clock_strict0.4_zero.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/incentive_clock_strict0.4_zero.pkl
--------------------------------------------------------------------------------
/cassie/rewards/reward_clock_funcs/no_incentive_aslip_clock_strict0.3.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/no_incentive_aslip_clock_strict0.3.pkl
--------------------------------------------------------------------------------
/cassie/rewards/reward_clock_funcs/no_incentive_clock_smooth.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/no_incentive_clock_smooth.pkl
--------------------------------------------------------------------------------
/cassie/rewards/reward_clock_funcs/no_incentive_clock_smooth_aerial.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/no_incentive_clock_smooth_aerial.pkl
--------------------------------------------------------------------------------
/cassie/rewards/reward_clock_funcs/no_incentive_clock_smooth_zero.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/no_incentive_clock_smooth_zero.pkl
--------------------------------------------------------------------------------
/cassie/rewards/reward_clock_funcs/no_incentive_clock_strict0.1.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/no_incentive_clock_strict0.1.pkl
--------------------------------------------------------------------------------
/cassie/rewards/reward_clock_funcs/no_incentive_clock_strict0.1_aerial.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/no_incentive_clock_strict0.1_aerial.pkl
--------------------------------------------------------------------------------
/cassie/rewards/reward_clock_funcs/no_incentive_clock_strict0.1_zero.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/no_incentive_clock_strict0.1_zero.pkl
--------------------------------------------------------------------------------
/cassie/rewards/reward_clock_funcs/no_incentive_clock_strict0.4.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/no_incentive_clock_strict0.4.pkl
--------------------------------------------------------------------------------
/cassie/rewards/reward_clock_funcs/no_incentive_clock_strict0.4_aerial.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/no_incentive_clock_strict0.4_aerial.pkl
--------------------------------------------------------------------------------
/cassie/rewards/reward_clock_funcs/no_incentive_clock_strict0.4_zero.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/no_incentive_clock_strict0.4_zero.pkl
--------------------------------------------------------------------------------
/cassie/rewards/rnn_dyn_random_reward.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | def jonah_RNN_reward(self):
4 | qpos = np.copy(self.sim.qpos())
5 | qvel = np.copy(self.sim.qvel())
6 |
7 | ref_pos, ref_vel = self.get_ref_state(self.phase)
8 |
9 | # TODO: should be variable; where do these come from?
10 | # TODO: see magnitude of state variables to gauge contribution to reward
11 | weight = [0.15, 0.15, 0.1, 0.05, 0.05, 0.15, 0.15, 0.1, 0.05, 0.05]
12 |
13 | joint_error = 0
14 | com_error = 0
15 | orientation_error = 0
16 | spring_error = 0
17 |
18 | # each joint pos
19 | for i, j in enumerate(self.pos_idx):
20 | target = ref_pos[j]
21 | actual = qpos[j]
22 |
23 | joint_error += 50 * weight[i] * (target - actual) ** 2
24 |
25 | # center of mass: x, y, z
26 | for j in [0, 1, 2]:
27 | target = ref_pos[j]
28 | actual = qpos[j]
29 |
30 | # NOTE: in Xie et al y target is 0
31 |
32 | com_error += 10 * (target - actual) ** 2
33 |
34 | actual_q = qpos[3:7]
35 | target_q = ref_pos[3:7]
36 | #target_q = [1, 0, 0, 0]
37 | orientation_error = 5 * (1 - np.inner(actual_q, target_q) ** 2)
38 |
39 | # left and right shin springs
40 | for i in [15, 29]:
41 | target = ref_pos[i] # NOTE: in Xie et al spring target is 0
42 | actual = qpos[i]
43 |
44 | spring_error += 1000 * (target - actual) ** 2
45 |
46 | reward = 0.200 * np.exp(-joint_error) + \
47 | 0.450 * np.exp(-com_error) + \
48 | 0.300 * np.exp(-orientation_error) + \
49 | 0.050 * np.exp(-spring_error)
50 |
51 | return reward
--------------------------------------------------------------------------------
/cassie/rewards/side_speedmatch_foottraj_reward.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | def side_speedmatch_foottraj_reward(self):
4 | qpos = np.copy(self.sim.qpos())
5 | qvel = np.copy(self.sim.qvel())
6 |
7 | forward_diff = np.abs(qvel[0] -self.speed)
8 | orient_diff = np.linalg.norm(qpos[3:7] - np.array([1, 0, 0, 0]))
9 | side_diff = np.abs(qvel[1] - self.side_speed)
10 | if forward_diff < 0.05:
11 | forward_diff = 0
12 | if side_diff < 0.05:
13 | side_diff = 0
14 |
15 | reward = .15*np.exp(-forward_diff) + .15*np.exp(-side_diff) + .1*np.exp(-orient_diff) \
16 | + .1*np.exp(-20*self.l_foot_diff) + .1*np.exp(-20*self.r_foot_diff) \
17 | + .1*np.exp(-5*self.l_footvel_diff) + .1*np.exp(-5*self.r_footvel_diff) \
18 | + .1*np.exp(-self.lfoot_orient_cost) + .1*np.exp(-self.rfoot_orient_cost)
19 |
20 | return reward
--------------------------------------------------------------------------------
/cassie/rewards/side_speedmatch_heightvel_reward.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | def side_speedmatch_heightvel_reward(self):
4 | qpos = np.copy(self.sim.qpos())
5 | qvel = np.copy(self.sim.qvel())
6 |
7 | forward_diff = np.abs(qvel[0] -self.speed)
8 | orient_diff = np.linalg.norm(qpos[3:7] - np.array([1, 0, 0, 0]))
9 | side_diff = np.abs(qvel[1] - self.side_speed)
10 | if forward_diff < 0.05:
11 | forward_diff = 0
12 | if side_diff < 0.05:
13 | side_diff = 0
14 |
15 | reward = .2*np.exp(-forward_diff) + .2*np.exp(-side_diff) + .1*np.exp(-orient_diff) \
16 | + .1*np.exp(-self.lfoot_orient_cost) + .1*np.exp(-self.rfoot_orient_cost) \
17 | + .15*np.exp(-self.lf_heightvel) + .15*np.exp(-self.rf_heightvel) \
18 | # + .1*np.exp(-self.ltdvel_cost) * .1*np.exp(-self.rtdvel_cost)
19 |
20 | return reward
--------------------------------------------------------------------------------
/cassie/rewards/side_speedmatch_heuristic_reward.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | def side_speedmatch_heuristic_reward(self):
4 | qpos = np.copy(self.sim.qpos())
5 | qvel = np.copy(self.sim.qvel())
6 |
7 | forward_diff = np.abs(qvel[0] -self.speed)
8 | orient_diff = np.linalg.norm(qpos[3:7] - np.array([1, 0, 0, 0]))
9 | side_diff = np.abs(qvel[1] - self.side_speed)
10 | if forward_diff < 0.05:
11 | forward_diff = 0
12 | if side_diff < 0.05:
13 | side_diff = 0
14 |
15 | ######## Foot position penalty ########
16 | foot_pos = np.zeros(6)
17 | self.sim.foot_pos(foot_pos)
18 | foot_dist = np.linalg.norm(foot_pos[0:2]-foot_pos[3:5])
19 | foot_penalty = 0
20 | if foot_dist < 0.22:
21 | foot_penalty = 0.2
22 | ######## Foot force penalty ########
23 | foot_forces = self.sim.get_foot_forces()
24 | lforce = max((foot_forces[0] - 700)/1000, 0)
25 | rforce = max((foot_forces[1] - 700)/1000, 0)
26 | ######## Torque penalty ########
27 | torque = np.linalg.norm(self.cassie_state.motor.torque[:])
28 | ######## Pelvis z accel penalty #########
29 | pelaccel = np.abs(self.cassie_state.pelvis.translationalAcceleration[2])
30 | pelaccel_penalty = 0
31 | if pelaccel > 6:
32 | pelaccel_penalty = (pelaccel - 6) / 30
33 | ####### Prev action penalty ########
34 | if self.prev_action is not None:
35 | prev_penalty = np.linalg.norm(self.curr_action - self.prev_action) / 10 #* (30/self.simrate)
36 | else:
37 | prev_penalty = 0
38 | print("prev_penalty: ", prev_penalty)
39 | ######## Foot height bonus ########
40 | footheight_penalty = 0
41 | if (np.abs(self.lfoot_vel) < 0.05 and foot_pos[2] < 0.2 and foot_forces[0] == 0) or (np.abs(self.rfoot_vel) < 0.05 and foot_pos[5] < 0.2 and foot_forces[1] == 0):
42 | # print("adding foot height penalty")
43 | footheight_penalty = 0.2
44 |
45 |
46 | reward = .25*np.exp(-forward_diff) + .25*np.exp(-side_diff) + .1*np.exp(-orient_diff) \
47 | + .1*np.exp(-self.torque_cost) + .1*np.exp(-self.smooth_cost) \
48 | + .1*np.exp(-self.lfoot_orient_cost) + .1*np.exp(-self.rfoot_orient_cost) \
49 | - pelaccel_penalty \
50 | - foot_penalty \
51 | - lforce - rforce \
52 | - footheight_penalty
53 |
54 | return reward
--------------------------------------------------------------------------------
/cassie/rewards/side_speedmatch_rewards.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | def side_speedmatch_reward(self):
4 | qpos = np.copy(self.sim.qpos())
5 | qvel = np.copy(self.sim.qvel())
6 |
7 | forward_diff = np.abs(qvel[0] -self.speed)
8 | orient_diff = np.linalg.norm(qpos[3:7] - np.array([1, 0, 0, 0]))
9 | side_diff = np.abs(qvel[1] - self.side_speed)
10 | if forward_diff < 0.05:
11 | forward_diff = 0
12 | if side_diff < 0.05:
13 | side_diff = 0
14 |
15 | reward = .4*np.exp(-forward_diff) + .4*np.exp(-side_diff) + .2*np.exp(-orient_diff)
16 |
17 | return reward
18 |
19 | def side_speedmatch_torquesmooth_reward(self):
20 | qpos = np.copy(self.sim.qpos())
21 | qvel = np.copy(self.sim.qvel())
22 |
23 | forward_diff = np.abs(qvel[0] -self.speed)
24 | orient_diff = np.linalg.norm(qpos[3:7] - np.array([1, 0, 0, 0]))
25 | side_diff = np.abs(qvel[1] - self.side_speed)
26 | if forward_diff < 0.05:
27 | forward_diff = 0
28 | if side_diff < 0.05:
29 | side_diff = 0
30 |
31 | reward = .25*np.exp(-forward_diff) + .25*np.exp(-side_diff) + .2*np.exp(-orient_diff) \
32 | + .1*np.exp(-self.torque_cost) + .2*np.exp(-self.smooth_cost)
33 |
34 | return reward
35 |
36 | def side_speedmatch_foottraj_reward(self):
37 | qpos = np.copy(self.sim.qpos())
38 | qvel = np.copy(self.sim.qvel())
39 |
40 | forward_diff = np.abs(qvel[0] -self.speed)
41 | orient_diff = np.linalg.norm(qpos[3:7] - np.array([1, 0, 0, 0]))
42 | side_diff = np.abs(qvel[1] - self.side_speed)
43 | if forward_diff < 0.05:
44 | forward_diff = 0
45 | if side_diff < 0.05:
46 | side_diff = 0
47 |
48 | reward = .15*np.exp(-forward_diff) + .15*np.exp(-side_diff) + .1*np.exp(-orient_diff) \
49 | + .1*np.exp(-20*self.l_foot_diff) + .1*np.exp(-20*self.r_foot_diff) \
50 | + .1*np.exp(-5*self.l_footvel_diff) + .1*np.exp(-5*self.r_footvel_diff) \
51 | + .1*np.exp(-self.lfoot_orient_cost) + .1*np.exp(-self.rfoot_orient_cost)
52 |
53 | return reward
54 |
55 | def side_speedmatch_heightvel_reward(self):
56 | qpos = np.copy(self.sim.qpos())
57 | qvel = np.copy(self.sim.qvel())
58 |
59 | forward_diff = np.abs(qvel[0] -self.speed)
60 | orient_diff = np.linalg.norm(qpos[3:7] - np.array([1, 0, 0, 0]))
61 | side_diff = np.abs(qvel[1] - self.side_speed)
62 | if forward_diff < 0.05:
63 | forward_diff = 0
64 | if side_diff < 0.05:
65 | side_diff = 0
66 |
67 | reward = .2*np.exp(-forward_diff) + .2*np.exp(-side_diff) + .1*np.exp(-orient_diff) \
68 | + .1*np.exp(-self.lfoot_orient_cost) + .1*np.exp(-self.rfoot_orient_cost) \
69 | + .15*np.exp(-self.lf_heightvel) + .15*np.exp(-self.rf_heightvel) \
70 | # + .1*np.exp(-self.ltdvel_cost) * .1*np.exp(-self.rtdvel_cost)
71 |
72 | return reward
73 |
74 | def side_speedmatch_heuristic_reward(self):
75 | qpos = np.copy(self.sim.qpos())
76 | qvel = np.copy(self.sim.qvel())
77 |
78 | forward_diff = np.abs(qvel[0] -self.speed)
79 | orient_diff = np.linalg.norm(qpos[3:7] - np.array([1, 0, 0, 0]))
80 | side_diff = np.abs(qvel[1] - self.side_speed)
81 | if forward_diff < 0.05:
82 | forward_diff = 0
83 | if side_diff < 0.05:
84 | side_diff = 0
85 |
86 | ######## Foot position penalty ########
87 | foot_pos = np.zeros(6)
88 | self.sim.foot_pos(foot_pos)
89 | foot_dist = np.linalg.norm(foot_pos[0:2]-foot_pos[3:5])
90 | foot_penalty = 0
91 | if foot_dist < 0.22:
92 | foot_penalty = 0.2
93 | ######## Foot force penalty ########
94 | foot_forces = self.sim.get_foot_forces()
95 | lforce = max((foot_forces[0] - 700)/1000, 0)
96 | rforce = max((foot_forces[1] - 700)/1000, 0)
97 | ######## Torque penalty ########
98 | torque = np.linalg.norm(self.cassie_state.motor.torque[:])
99 | ######## Pelvis z accel penalty #########
100 | pelaccel = np.abs(self.cassie_state.pelvis.translationalAcceleration[2])
101 | pelaccel_penalty = 0
102 | if pelaccel > 6:
103 | pelaccel_penalty = (pelaccel - 6) / 30
104 | ####### Prev action penalty ########
105 | if self.prev_action is not None:
106 | prev_penalty = np.linalg.norm(self.curr_action - self.prev_action) / 10 #* (30/self.simrate)
107 | else:
108 | prev_penalty = 0
109 | print("prev_penalty: ", prev_penalty)
110 | ######## Foot height bonus ########
111 | footheight_penalty = 0
112 | if (np.abs(self.lfoot_vel) < 0.05 and foot_pos[2] < 0.2 and foot_forces[0] == 0) or (np.abs(self.rfoot_vel) < 0.05 and foot_pos[5] < 0.2 and foot_forces[1] == 0):
113 | # print("adding foot height penalty")
114 | footheight_penalty = 0.2
115 |
116 |
117 | reward = .25*np.exp(-forward_diff) + .25*np.exp(-side_diff) + .1*np.exp(-orient_diff) \
118 | + .1*np.exp(-self.torque_cost) + .1*np.exp(-self.smooth_cost) \
119 | + .1*np.exp(-self.lfoot_orient_cost) + .1*np.exp(-self.rfoot_orient_cost) \
120 | - pelaccel_penalty \
121 | - foot_penalty \
122 | - lforce - rforce \
123 | - footheight_penalty
124 |
125 | return reward
--------------------------------------------------------------------------------
/cassie/rewards/side_speedmatch_torquesmooth_reward.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | def side_speedmatch_torquesmooth_reward(self):
4 | qpos = np.copy(self.sim.qpos())
5 | qvel = np.copy(self.sim.qvel())
6 |
7 | forward_diff = np.abs(qvel[0] -self.speed)
8 | orient_diff = np.linalg.norm(qpos[3:7] - np.array([1, 0, 0, 0]))
9 | side_diff = np.abs(qvel[1] - self.side_speed)
10 | if forward_diff < 0.05:
11 | forward_diff = 0
12 | if side_diff < 0.05:
13 | side_diff = 0
14 |
15 | reward = .25*np.exp(-forward_diff) + .25*np.exp(-side_diff) + .2*np.exp(-orient_diff) \
16 | + .1*np.exp(-self.torque_cost) + .2*np.exp(-self.smooth_cost)
17 | return reward
--------------------------------------------------------------------------------
/cassie/rewards/speedmatch_footorient_joint_smooth_reward.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | def speedmatch_footorient_joint_smooth_reward(self):
4 | qpos = np.copy(self.sim.qpos())
5 | qvel = np.copy(self.sim.qvel())
6 |
7 | orient_targ = np.array([1, 0, 0, 0])
8 | speed_targ = np.array([self.speed, 0, 0])
9 | if self.time >= self.orient_time:
10 | orient_targ = euler2quat(z=self.orient_add, y=0, x=0)
11 | iquaternion = inverse_quaternion(orient_targ)
12 | speed_targ = rotate_by_quaternion(speed_targ, iquaternion)
13 | new_orient = quaternion_product(iquaternion, self.cassie_state.pelvis.orientation[:])
14 | if new_orient[0] < 0:
15 | new_orient = -new_orient
16 | forward_diff = np.abs(qvel[0] - speed_targ[0])
17 | orient_diff = 1 - np.inner(orient_targ, qpos[3:7]) ** 2
18 | # orient_diff = np.linalg.norm(qpos[3:7] - np.array([1, 0, 0, 0]))
19 | y_vel = np.abs(qvel[1] - speed_targ[1])
20 | if forward_diff < 0.05:
21 | forward_diff = 0
22 | if y_vel < 0.05:
23 | y_vel = 0
24 | straight_diff = 8*np.abs(qpos[1] - self.y_offset)
25 | if np.abs(qpos[1] - self.y_offset) < 0.05:
26 | straight_diff = 0
27 | if orient_diff < 5e-3:
28 | orient_diff = 0
29 | else:
30 | orient_diff *= 30
31 |
32 | reward = .25*np.exp(-forward_diff) + .1*np.exp(-orient_diff) \
33 | + .1*np.exp(-straight_diff) + .1*np.exp(-y_vel) \
34 | + .1*np.exp(-self.l_foot_orient) + .1*np.exp(-self.r_foot_orient) \
35 | + .1*np.exp(-self.smooth_cost) \
36 | + .15*np.exp(-self.joint_error)
37 |
38 | return reward
--------------------------------------------------------------------------------
/cassie/rewards/speedmatch_heuristic_reward.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | def speedmatch_heuristic_reward(self):
4 | ######## Pelvis z accel penalty #########
5 | pelaccel = np.abs(self.cassie_state.pelvis.translationalAcceleration[2])
6 | pelaccel_penalty = 0
7 | if pelaccel > 5:
8 | pelaccel_penalty = (pelaccel - 5) / 10
9 | pelbonus = 0
10 | if 8 < pelaccel < 10:
11 | pelbonus = 0.2
12 | ######## Foot position penalty ########
13 | foot_pos = np.zeros(6)
14 | self.sim.foot_pos(foot_pos)
15 | foot_dist = np.linalg.norm(foot_pos[0:2]-foot_pos[3:5])
16 | foot_penalty = 0
17 | if foot_dist < 0.14:
18 | foot_penalty = 0.2
19 | ######## Foot force penalty ########
20 | foot_forces = self.sim.get_foot_forces()
21 | lforce = max((foot_forces[0] - 350)/1000, 0)
22 | rforce = max((foot_forces[1] - 350)/1000, 0)
23 | forcebonus = 0
24 | # print("foot force: ", lforce, rforce)
25 | # lbonus = max((800 - foot_forces[0])/1000, 0)
26 | if foot_forces[0] <= 1000 and foot_forces[1] <= 1000:
27 | forcebonus = foot_forces[0] / 5000 + foot_forces[1] / 5000
28 | ######## Foot velocity penalty ########
29 | lfoot_vel_bonus = 0
30 | rfoot_vel_bonus = 0
31 | # if self.prev_foot is not None and foot_pos[2] < 0.3 and foot_pos[5] < 0.3:
32 | # lfoot_vel = np.abs(foot_pos[2] - self.prev_foot[2]) / 0.03 * 0.03
33 | # rfoot_vel = np.abs(foot_pos[5] - self.prev_foot[5]) / 0.03 * 0.03
34 | # if self.l_high:
35 | # lfoot_vel_bonus = self.lfoot_vel * 0.3
36 | # if self.r_high:
37 | # rfoot_vel_bonus = self.rfoot_vel * 0.3
38 | ######## Foot orientation ########
39 | lfoot_orient = 1 - np.inner(np.array([1, 0, 0, 0]), self.cassie_state.leftFoot.orientation[:]) ** 2
40 | rfoot_orient = 1 - np.inner(np.array([1, 0, 0, 0]), self.cassie_state.rightFoot.orientation[:]) ** 2
41 | ####### Hip yaw ########
42 | rhipyaw = np.abs(qpos[22])
43 | lhipyaw = np.abs(qpos[8])
44 | if lhipyaw < 0.05:
45 | lhipyaw = 0
46 | if rhipyaw < 0.05:
47 | rhipyaw = 0
48 | ####### Hip roll penalty #########
49 | lhiproll = np.abs(qpos[7])
50 | rhiproll = np.abs(qpos[21])
51 | if lhiproll < 0.05:
52 | lhiproll = 0
53 | if rhiproll < 0.05:
54 | rhiproll = 0
55 | ####### Prev action penalty ########
56 | if self.prev_action is not None:
57 | prev_penalty = np.linalg.norm(self.curr_action - self.prev_action) / 10 #* (30/self.simrate)
58 | else:
59 | prev_penalty = 0
60 |
61 | reward = .2*np.exp(-self.com_vel_error) + .1*np.exp(-self.com_error) + .1*np.exp(-self.orientation_error) \
62 | + .1*np.exp(-20*self.l_foot_diff) + .1*np.exp(-5*self.l_footvel_diff) \
63 | + .1*np.exp(-20*self.r_foot_diff) + .1*np.exp(-5*self.r_footvel_diff) \
64 | + .1*np.exp(-lfoot_orient) + .1*np.exp(-rfoot_orient)
65 | # reward = .4*np.exp(-forward_diff) + .3*np.exp(-orient_diff) \
66 | # + .15*np.exp(-straight_diff) + .15*np.exp(-y_vel) \
67 | # + .1*np.exp(-self.l_foot_orient) + .1*np.exp(-self.r_foot_orient) \
68 | # + .1*np.exp(-self.smooth_cost) \
69 | # + .15*np.exp(-self.joint_error)
70 | # + .1*np.exp(-self.torque_cost) + .1*np.exp(-self.smooth_cost) #\
71 | #
72 | # + .075*np.exp(-10*lhipyaw) + .075*np.exp(-10*rhipyaw) + .075*np.exp(-10*lhiproll) + .075*np.exp(-10*rhiproll)
73 | # + .1*np.exp(-20*self.l_foot_diff) + .1*np.exp(-20*self.r_foot_diff) \
74 | # + .1*np.exp(-5*self.l_footvel_diff) + .1*np.exp(-5*self.r_footvel_diff)
75 | # - lfoot_vel_bonus - rfoot_vel_bonus - foot_penalty
76 | # - lforce - rforce
77 | #+ pelbonus- pelaccel_penalty - foot_penalty
78 |
--------------------------------------------------------------------------------
/cassie/rewards/standing_rewards.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | def stand_reward(self):
4 | qpos = np.copy(self.sim.qpos())
5 | qvel = np.copy(self.sim.qvel())
6 |
7 | com_vel = np.linalg.norm(qvel[0:3])
8 | com_height = (0.9 - qpos[2]) ** 2
9 |
10 | reward = 0.5*np.exp(-com_vel) + 0.5*np.exp(-com_height)
11 |
12 | return reward
13 |
14 | def step_even_reward(self):
15 | qpos = np.copy(self.sim.qpos())
16 | qvel = np.copy(self.sim.qvel())
17 |
18 | com_vel = np.linalg.norm(qvel[0:3])
19 | com_height = (0.9 - qpos[2]) ** 2
20 |
21 | reward = 0.2*np.exp(-com_vel) + 0.2*np.exp(-com_height) \
22 | + 0.3*np.exp(-self.l_foot_cost_even) + 0.3*np.exp(-self.r_foot_cost_even)
23 |
24 | return reward
25 |
26 | def step_even_pelheight_reward(self):
27 | qpos = np.copy(self.sim.qpos())
28 | qvel = np.copy(self.sim.qvel())
29 |
30 | com_height = (0.9 - qpos[2]) ** 2
31 | if qpos[2] > 0.8:
32 | com_height = 0
33 |
34 | reward = 0.2*np.exp(-com_height) \
35 | + 0.4*np.exp(-self.l_foot_cost_even) + 0.4*np.exp(-self.r_foot_cost_even)
36 |
37 | return reward
38 |
39 | def step_smooth_pelheight_reward(self):
40 | qpos = np.copy(self.sim.qpos())
41 | qvel = np.copy(self.sim.qvel())
42 |
43 | com_height = (0.9 - qpos[2]) ** 2
44 | if qpos[2] > 0.8:
45 | com_height = 0
46 |
47 | reward = 0.2*np.exp(-com_height) \
48 | + 0.4*np.exp(-self.l_foot_cost_smooth) + 0.4*np.exp(-self.r_foot_cost_smooth)
49 |
50 | return reward
--------------------------------------------------------------------------------
/cassie/rewards/trajmatch_reward.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | def trajmatch_reward(self):
4 | qpos = np.copy(self.sim.qpos())
5 | qvel = np.copy(self.sim.qvel())
6 | phase_diff = self.phase - np.floor(self.phase)
7 | ref_pos_prev, ref_vel_prev = self.get_ref_state(int(np.floor(self.phase)))
8 | if phase_diff != 0:
9 | ref_pos_next, ref_vel_next = self.get_ref_state(int(np.ceil(self.phase)))
10 | ref_pos_diff = ref_pos_next - ref_pos_prev
11 | ref_vel_diff = ref_vel_next - ref_vel_prev
12 | ref_pos = ref_pos_prev + phase_diff*ref_pos_diff
13 | ref_vel = ref_vel_prev + phase_diff*ref_vel_diff
14 | else:
15 | ref_pos = ref_pos_prev
16 | ref_vel = ref_vel_prev
17 |
18 | ref_pos, ref_vel = self.get_ref_state(self.phase)
19 |
20 | # TODO: should be variable; where do these come from?
21 | # TODO: see magnitude of state variables to gauge contribution to reward
22 | weight = [0.15, 0.15, 0.1, 0.05, 0.05, 0.15, 0.15, 0.1, 0.05, 0.05]
23 |
24 | joint_error = 0
25 | com_error = 0
26 | orientation_error = 0
27 | spring_error = 0
28 |
29 | # each joint pos
30 | for i, j in enumerate(self.pos_idx):
31 | target = ref_pos[j]
32 | actual = qpos[j]
33 |
34 | joint_error += 30 * weight[i] * (target - actual) ** 2
35 |
36 | # center of mass: x, y, z
37 | for j in [0, 1, 2]:
38 | target = ref_pos[j]
39 | actual = qpos[j]
40 |
41 | # NOTE: in Xie et al y target is 0
42 |
43 | com_error += (target - actual) ** 2
44 |
45 | # COM orientation: qx, qy, qz
46 | for j in [4, 5, 6]:
47 | target = ref_pos[j] # NOTE: in Xie et al orientation target is 0
48 | actual = qpos[j]
49 |
50 | orientation_error += (target - actual) ** 2
51 |
52 | # left and right shin springs
53 | for i in [15, 29]:
54 | target = ref_pos[i] # NOTE: in Xie et al spring target is 0
55 | actual = qpos[i]
56 |
57 | spring_error += 1000 * (target - actual) ** 2
58 |
59 | reward = 0.5 * np.exp(-joint_error) + \
60 | 0.3 * np.exp(-com_error) + \
61 | 0.1 * np.exp(-orientation_error) + \
62 | 0.1 * np.exp(-spring_error)
63 |
64 | # orientation error does not look informative
65 | # maybe because it's comparing euclidean distance on quaternions
66 | # print("reward: {8}\njoint:\t{0:.2f}, % = {1:.2f}\ncom:\t{2:.2f}, % = {3:.2f}\norient:\t{4:.2f}, % = {5:.2f}\nspring:\t{6:.2f}, % = {7:.2f}\n\n".format(
67 | # 0.5 * np.exp(-joint_error), 0.5 * np.exp(-joint_error) / reward * 100,
68 | # 0.3 * np.exp(-com_error), 0.3 * np.exp(-com_error) / reward * 100,
69 | # 0.1 * np.exp(-orientation_error), 0.1 * np.exp(-orientation_error) / reward * 100,
70 | # 0.1 * np.exp(-spring_error), 0.1 * np.exp(-spring_error) / reward * 100,
71 | # reward
72 | # )
73 | # )
74 |
75 | return reward
76 |
77 | def trajmatch_footorient_hiprollvelact_reward(self):
78 | qpos = np.copy(self.sim.qpos())
79 | qvel = np.copy(self.sim.qvel())
80 | phase_diff = self.phase - np.floor(self.phase)
81 | ref_pos_prev, ref_vel_prev = self.get_ref_state(int(np.floor(self.phase)))
82 | if phase_diff != 0:
83 | ref_pos_next, ref_vel_next = self.get_ref_state(int(np.ceil(self.phase)))
84 | ref_pos_diff = ref_pos_next - ref_pos_prev
85 | ref_vel_diff = ref_vel_next - ref_vel_prev
86 | ref_pos = ref_pos_prev + phase_diff*ref_pos_diff
87 | ref_vel = ref_vel_prev + phase_diff*ref_vel_diff
88 | else:
89 | ref_pos = ref_pos_prev
90 | ref_vel = ref_vel_prev
91 |
92 | ref_pos, ref_vel = self.get_ref_state(self.phase)
93 |
94 | # TODO: should be variable; where do these come from?
95 | # TODO: see magnitude of state variables to gauge contribution to reward
96 | weight = [0.15, 0.15, 0.1, 0.05, 0.05, 0.15, 0.15, 0.1, 0.05, 0.05]
97 |
98 | joint_error = 0
99 | com_error = 0
100 | orientation_error = 0
101 | spring_error = 0
102 |
103 | # each joint pos
104 | for i, j in enumerate(self.pos_idx):
105 | target = ref_pos[j]
106 | actual = qpos[j]
107 |
108 | joint_error += 30 * weight[i] * (target - actual) ** 2
109 |
110 | # center of mass: x, y, z
111 | for j in [0, 1, 2]:
112 | target = ref_pos[j]
113 | actual = qpos[j]
114 |
115 | # NOTE: in Xie et al y target is 0
116 |
117 | com_error += (target - actual) ** 2
118 |
119 | # COM orientation: qx, qy, qz
120 | for j in [4, 5, 6]:
121 | target = ref_pos[j] # NOTE: in Xie et al orientation target is 0
122 | actual = qpos[j]
123 |
124 | orientation_error += (target - actual) ** 2
125 |
126 | # left and right shin springs
127 | for i in [15, 29]:
128 | target = ref_pos[i] # NOTE: in Xie et al spring target is 0
129 | actual = qpos[i]
130 |
131 | spring_error += 1000 * (target - actual) ** 2
132 |
133 | reward = 0.3 * np.exp(-joint_error) + \
134 | 0.2 * np.exp(-com_error) + \
135 | 0.1 * np.exp(-orientation_error) + \
136 | 0.1 * np.exp(-spring_error) \
137 | + .075*np.exp(-self.l_foot_orient_cost) + .075*np.exp(-self.r_foot_orient_cost) \
138 | + .1*np.exp(-self.hiproll_cost) + 0.05*np.exp(-self.hiproll_act)
139 |
140 | # orientation error does not look informative
141 | # maybe because it's comparing euclidean distance on quaternions
142 | # print("reward: {8}\njoint:\t{0:.2f}, % = {1:.2f}\ncom:\t{2:.2f}, % = {3:.2f}\norient:\t{4:.2f}, % = {5:.2f}\nspring:\t{6:.2f}, % = {7:.2f}\n\n".format(
143 | # 0.5 * np.exp(-joint_error), 0.5 * np.exp(-joint_error) / reward * 100,
144 | # 0.3 * np.exp(-com_error), 0.3 * np.exp(-com_error) / reward * 100,
145 | # 0.1 * np.exp(-orientation_error), 0.1 * np.exp(-orientation_error) / reward * 100,
146 | # 0.1 * np.exp(-spring_error), 0.1 * np.exp(-spring_error) / reward * 100,
147 | # reward
148 | # )
149 | # )
150 |
151 | return reward
--------------------------------------------------------------------------------
/cassie/trajectory/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/.DS_Store
--------------------------------------------------------------------------------
/cassie/trajectory/__init__.py:
--------------------------------------------------------------------------------
1 | from .trajectory import *
2 | from .aslip_trajectory import *
--------------------------------------------------------------------------------
/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.0.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.0.pkl
--------------------------------------------------------------------------------
/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.1.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.1.pkl
--------------------------------------------------------------------------------
/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.2.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.2.pkl
--------------------------------------------------------------------------------
/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.3.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.3.pkl
--------------------------------------------------------------------------------
/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.4.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.4.pkl
--------------------------------------------------------------------------------
/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.5.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.5.pkl
--------------------------------------------------------------------------------
/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.6.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.6.pkl
--------------------------------------------------------------------------------
/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.7.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.7.pkl
--------------------------------------------------------------------------------
/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.8.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.8.pkl
--------------------------------------------------------------------------------
/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.9.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.9.pkl
--------------------------------------------------------------------------------
/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.0.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.0.pkl
--------------------------------------------------------------------------------
/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.1.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.1.pkl
--------------------------------------------------------------------------------
/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.2.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.2.pkl
--------------------------------------------------------------------------------
/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.3.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.3.pkl
--------------------------------------------------------------------------------
/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.4.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.4.pkl
--------------------------------------------------------------------------------
/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.5.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.5.pkl
--------------------------------------------------------------------------------
/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.6.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.6.pkl
--------------------------------------------------------------------------------
/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.7.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.7.pkl
--------------------------------------------------------------------------------
/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.8.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.8.pkl
--------------------------------------------------------------------------------
/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.9.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.9.pkl
--------------------------------------------------------------------------------
/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_2.0.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_2.0.pkl
--------------------------------------------------------------------------------
/cassie/trajectory/backward_trajectory_Nov:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/backward_trajectory_Nov
--------------------------------------------------------------------------------
/cassie/trajectory/ikNet_state_dict.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/ikNet_state_dict.pt
--------------------------------------------------------------------------------
/cassie/trajectory/more-poses-trial.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/more-poses-trial.bin
--------------------------------------------------------------------------------
/cassie/trajectory/spline_stepping_traj.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/spline_stepping_traj.pkl
--------------------------------------------------------------------------------
/cassie/trajectory/stepdata.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/stepdata.bin
--------------------------------------------------------------------------------
/cassie/trajectory/stepping_trajectory_Nov:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/stepping_trajectory_Nov
--------------------------------------------------------------------------------
/cassie/trajectory/test.py:
--------------------------------------------------------------------------------
1 | # $ ipython -i test.py
2 |
3 | from trajectory import CassieTrajectory
4 |
5 | traj = CassieTrajectory("stepdata.bin")
6 |
7 | print(len(traj.qpos[0]))
--------------------------------------------------------------------------------
/cassie/trajectory/traj_from_ref_foot_data.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/traj_from_ref_foot_data.pkl
--------------------------------------------------------------------------------
/cassie/trajectory/trajectory.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import random
3 |
4 | """
5 | Agility 2 kHz trajectory
6 | """
7 | class CassieTrajectory:
8 | def __init__(self, filepath):
9 | n = 1 + 35 + 32 + 10 + 10 + 10
10 | data = np.fromfile(filepath, dtype=np.double).reshape((-1, n))
11 |
12 | # states
13 | self.time = data[:, 0]
14 | self.qpos = data[:, 1:36]
15 | self.qvel = data[:, 36:68]
16 |
17 | # actions
18 | self.torque = data[:, 68:78]
19 | self.mpos = data[:, 78:88]
20 | self.mvel = data[:, 88:98]
21 |
22 | def state(self, t):
23 | tmax = self.time[-1]
24 |
25 | i = int((t % tmax) / tmax * len(self.time))
26 |
27 | return (self.qpos[i], self.qvel[i])
28 |
29 | def action(self, t):
30 | tmax = self.time[-1]
31 | i = int((t % tmax) / tmax * len(self.time))
32 | return (self.mpos[i], self.mvel[i], self.torque[i])
33 |
34 | def sample(self):
35 | i = random.randrange(len(self.time))
36 | return (self.time[i], self.qpos[i], self.qvel[i])
37 |
38 | def __len__(self):
39 | return len(self.time)
--------------------------------------------------------------------------------
/cassie/trajectory/walk-in-place-downsampled.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/walk-in-place-downsampled.bin
--------------------------------------------------------------------------------
/img/output.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/img/output.gif
--------------------------------------------------------------------------------
/img/output2.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/img/output2.gif
--------------------------------------------------------------------------------
/mirror_policy_check.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import hashlib, os, pickle
3 | import sys, time
4 | from cassie.quaternion_function import *
5 | import tty
6 | import termios
7 | import select
8 | import numpy as np
9 | from functools import partial
10 | from rl.envs.wrappers import SymmetricEnv
11 | from cassie import CassieEnv, CassiePlayground, CassieStandingEnv, CassieEnv_noaccel_footdist_omniscient, CassieEnv_noaccel_footdist
12 |
13 | def isData():
14 | return select.select([sys.stdin], [], [], 0) == ([sys.stdin], [], [])
15 |
16 | env = CassieEnv(state_est=True, dynamics_randomization=False, history=0)
17 | env_fn = partial(CassieEnv, state_est=True, dynamics_randomization=False, history=0)
18 | # env = CassieEnv_noaccel_footdist(state_est=True, dynamics_randomization=False, history=0)
19 | # env_fn = partial(CassieEnv_noaccel_footdist, state_est=True, dynamics_randomization=False, history=0)
20 |
21 | sym_env = SymmetricEnv(env_fn, mirrored_obs=env_fn().mirrored_obs, mirrored_act=[-5, -6, 7, 8, 9, -0.1, -1, 2, 3, 4])
22 | # obs = env.get_full_state()
23 | # print("obs len: ", len(obs))
24 | # exit()
25 |
26 | path = "./trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/"
27 | # path = "./logs/footdist/CassieNoaccelFootDist/noaccel_footdist_speedmatch_seed10/"
28 | policy = torch.load(path + "actor.pt")
29 | policy.eval()
30 |
31 | old_settings = termios.tcgetattr(sys.stdin)
32 |
33 | orient_add = 0
34 |
35 | env.render()
36 | render_state = True
37 | try:
38 | tty.setcbreak(sys.stdin.fileno())
39 |
40 | state = env.reset_for_test()
41 | done = False
42 | timesteps = 0
43 | eval_reward = 0
44 | speed = 0.0
45 |
46 | while render_state:
47 |
48 | if isData():
49 | c = sys.stdin.read(1)
50 | if c == 'w':
51 | speed += 0.1
52 | elif c == 's':
53 | speed -= 0.1
54 | elif c == 'j':
55 | env.phase_add += .1
56 | print("Increasing frequency to: {:.1f}".format(env.phase_add))
57 | elif c == 'h':
58 | env.phase_add -= .1
59 | print("Decreasing frequency to: {:.1f}".format(env.phase_add))
60 | elif c == 'l':
61 | orient_add += .1
62 | print("Increasing orient_add to: ", orient_add)
63 | elif c == 'k':
64 | orient_add -= .1
65 | print("Decreasing orient_add to: ", orient_add)
66 | elif c == 'p':
67 | push = 100
68 | push_dir = 2
69 | force_arr = np.zeros(6)
70 | force_arr[push_dir] = push
71 | env.sim.apply_force(force_arr)
72 |
73 | env.update_speed(speed)
74 | print("speed: ", env.speed)
75 |
76 | if hasattr(env, 'simrate'):
77 | start = time.time()
78 |
79 | if (not env.vis.ispaused()):
80 | # Update Orientation
81 | quaternion = euler2quat(z=orient_add, y=0, x=0)
82 | iquaternion = inverse_quaternion(quaternion)
83 |
84 | if env.state_est:
85 | curr_orient = state[1:5]
86 | curr_transvel = state[15:18]
87 | # curr_orient = state[6:10]
88 | # curr_transvel = state[20:23]
89 | else:
90 | curr_orient = state[2:6]
91 | curr_transvel = state[20:23]
92 |
93 | new_orient = quaternion_product(iquaternion, curr_orient)
94 |
95 | if new_orient[0] < 0:
96 | new_orient = -new_orient
97 |
98 | new_translationalVelocity = rotate_by_quaternion(curr_transvel, iquaternion)
99 |
100 | if env.state_est:
101 | state[1:5] = torch.FloatTensor(new_orient)
102 | state[15:18] = torch.FloatTensor(new_translationalVelocity)
103 | # state[6:10] = torch.FloatTensor(new_orient)
104 | # state[20:23] = torch.FloatTensor(new_translationalVelocity)
105 | # state[0] = 1 # For use with StateEst. Replicate hack that height is always set to one on hardware.
106 | else:
107 | state[2:6] = torch.FloatTensor(new_orient)
108 | state[20:23] = torch.FloatTensor(new_translationalVelocity)
109 |
110 | state = torch.Tensor(state)
111 | # Calculate mirror state and mirror action
112 | with torch.no_grad():
113 | mirror_state = sym_env.mirror_clock_observation(state.unsqueeze(0), env.clock_inds)[0]
114 | # Mirror pelvis orientation and velocity
115 | # mir_quat = inverse_quaternion(mirror_state[1:5])
116 | # mir_quat[2] *= -1
117 | # mirror_state[1:5] = torch.Tensor(mir_quat)
118 | # mirror_state[16] *= -1 # y trans vel
119 | # mir_rot_vel = -mirror_state[18:21]
120 | # mir_rot_vel[1] *= -1
121 | # mirror_state[18:21] = mir_rot_vel
122 | # mirror_state[32] *= -1 # y trans accel
123 | mir_action = policy.forward(mirror_state, deterministic=True)
124 | mir_mir_action = sym_env.mirror_action(mir_action.unsqueeze(0)).detach().numpy()[0]
125 | action = policy.forward(state, deterministic=True).detach().numpy()
126 | # print("mirror action diff: ", np.linalg.norm(mir_mir_action - action))
127 | state, reward, done, _ = env.step(mir_mir_action)
128 |
129 | eval_reward += reward
130 | timesteps += 1
131 |
132 |
133 | render_state = env.render()
134 | if hasattr(env, 'simrate'):
135 | # assume 30hz (hack)
136 | end = time.time()
137 | delaytime = max(0, 1000 / 30000 - (end-start))
138 | time.sleep(delaytime)
139 |
140 | print("Eval reward: ", eval_reward)
141 |
142 | finally:
143 | termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old_settings)
--------------------------------------------------------------------------------
/rl/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/rl/__init__.py
--------------------------------------------------------------------------------
/rl/algos/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/rl/config/monitor.ini:
--------------------------------------------------------------------------------
1 | [monitor]
2 | # Options: Timesteps, Iterations, (walltime to be included in future)
3 | xlabel=Iterations
4 |
5 | # Options: Fixed, Variable
6 | xlim=Variable
--------------------------------------------------------------------------------
/rl/distributions/__init__.py:
--------------------------------------------------------------------------------
1 | from .gaussian import DiagonalGaussian
2 | from .beta import Beta, Beta2
--------------------------------------------------------------------------------
/rl/distributions/beta.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | import torch
4 | import torch.nn as nn
5 | import torch.nn.functional as F
6 |
7 | # TODO: extend these for arbitrary bounds
8 |
9 | """A beta distribution, but where the pdf is scaled to (-1, 1)"""
10 | class BoundedBeta(torch.distributions.Beta):
11 | def log_prob(self, x):
12 | return super().log_prob((x + 1) / 2)
13 |
14 | class Beta(nn.Module):
15 | def __init__(self, action_dim):
16 | super(Beta, self).__init__()
17 |
18 | self.action_dim = action_dim
19 |
20 | def forward(self, alpha_beta):
21 | alpha = 1 + F.softplus(alpha_beta[:, :self.action_dim])
22 | beta = 1 + F.softplus(alpha_beta[:, self.action_dim:])
23 | return alpha, beta
24 |
25 | def sample(self, x, deterministic):
26 | if deterministic is False:
27 | action = self.evaluate(x).sample()
28 | else:
29 | # E = alpha / (alpha + beta)
30 | return self.evaluate(x).mean
31 |
32 | return 2 * action - 1
33 |
34 | def evaluate(self, x):
35 | alpha, beta = self(x)
36 | return BoundedBeta(alpha, beta)
37 |
38 |
39 | # TODO: think of a better name for this
40 | """Beta distribution parameterized by mean and variance."""
41 | class Beta2(nn.Module):
42 | def __init__(self, action_dim, init_std=0.25, learn_std=False):
43 | super(Beta2, self).__init__()
44 |
45 | assert init_std < 0.5, "Beta distribution has a max std dev of 0.5"
46 |
47 | self.action_dim = action_dim
48 |
49 | self.logstd = nn.Parameter(
50 | torch.ones(1, action_dim) * np.log(init_std),
51 | requires_grad=learn_std
52 | )
53 |
54 | self.learn_std = learn_std
55 |
56 |
57 | def forward(self, x):
58 | mean = torch.sigmoid(x)
59 |
60 | var = self.logstd.exp().pow(2)
61 |
62 | """
63 | alpha = ((1 - mu) / sigma^2 - 1 / mu) * mu^2
64 | beta = alpha * (1 / mu - 1)
65 |
66 | Implemented slightly differently for numerical stability.
67 | """
68 | alpha = ((1 - mean) / var) * mean.pow(2) - mean
69 | beta = ((1 - mean) / var) * mean - 1 - alpha
70 |
71 | # PROBLEM: if alpha or beta < 1 thats not good
72 |
73 | #assert np.allclose(alpha, ((1 - mean) / var - 1 / mean) * mean.pow(2))
74 | #assert np.allclose(beta, ((1 - mean) / var - 1 / mean) * mean.pow(2) * (1 / mean - 1))
75 |
76 | #alpha = 1 + F.softplus(alpha)
77 | #beta = 1 + F.softplus(beta)
78 |
79 | # print("alpha",alpha)
80 | # print("beta",beta)
81 |
82 | # #print(alpha / (alpha + beta))
83 | # print("mu",mean)
84 |
85 | # #print(torch.sqrt(alpha * beta / ((alpha+beta)**2 * (alpha + beta + 1))))
86 | # print("var", var)
87 |
88 | # import pdb
89 | # pdb.set_trace()
90 |
91 | return alpha, beta
92 |
93 | def sample(self, x, deterministic):
94 | if deterministic is False:
95 | action = self.evaluate(x).sample()
96 | else:
97 | # E = alpha / (alpha + beta)
98 | return self.evaluate(x).mean
99 |
100 | # 2 * a - 1 puts a in (-1, 1)
101 | return 2 * action - 1
102 |
103 | def evaluate(self, x):
104 | alpha, beta = self(x)
105 | return BoundedBeta(alpha, beta)
--------------------------------------------------------------------------------
/rl/distributions/gaussian.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | import torch
4 | import torch.nn as nn
5 | from torch.autograd import Variable
6 |
7 | # TODO: look at change of variables function for enforcing
8 | # action bounds correctly
9 | class DiagonalGaussian(nn.Module):
10 | def __init__(self, num_outputs, init_std=1, learn_std=True):
11 | super(DiagonalGaussian, self).__init__()
12 |
13 | self.logstd = nn.Parameter(
14 | torch.ones(1, num_outputs) * np.log(init_std),
15 | requires_grad=learn_std
16 | )
17 |
18 | self.learn_std = learn_std
19 |
20 | def forward(self, x):
21 | mean = x
22 |
23 | std = self.logstd.exp()
24 |
25 | return mean, std
26 |
27 | def sample(self, x, deterministic):
28 | if deterministic is False:
29 | action = self.evaluate(x).sample()
30 | else:
31 | action, _ = self(x)
32 |
33 | return action
34 |
35 | def evaluate(self, x):
36 | mean, std = self(x)
37 | return torch.distributions.Normal(mean, std)
38 |
--------------------------------------------------------------------------------
/rl/envs/__init__.py:
--------------------------------------------------------------------------------
1 | from .vectorize import *
2 | from .normalize import *
3 | from .wrappers import *
--------------------------------------------------------------------------------
/rl/envs/monitor.py:
--------------------------------------------------------------------------------
1 | # Modified from https://github.com/openai/baselines/blob/master/baselines/bench/monitor.py
2 |
3 | import time
4 | from glob import glob
5 | import csv
6 | import os.path as osp
7 | import json
8 |
9 | class Monitor:
10 | EXT = "monitor.csv"
11 | f = None
12 |
13 | def __init__(self, env, filename, allow_early_resets=False, reset_keywords=()):
14 | Wrapper.__init__(self, env=env)
15 | self.tstart = time.time()
16 | if filename is None:
17 | self.f = None
18 | self.logger = None
19 | else:
20 | if not filename.endswith(Monitor.EXT):
21 | if osp.isdir(filename):
22 | filename = osp.join(filename, Monitor.EXT)
23 | else:
24 | filename = filename + "." + Monitor.EXT
25 | self.f = open(filename, "wt")
26 | self.f.write('#%s\n'%json.dumps({"t_start": self.tstart, "gym_version": gym.__version__,
27 | "env_id": env.spec.id if env.spec else 'Unknown'}))
28 | self.logger = csv.DictWriter(self.f, fieldnames=('r', 'l', 't')+reset_keywords)
29 | self.logger.writeheader()
30 |
31 | self.reset_keywords = reset_keywords
32 | self.allow_early_resets = allow_early_resets
33 | self.rewards = None
34 | self.needs_reset = True
35 | self.episode_rewards = []
36 | self.episode_lengths = []
37 | self.total_steps = 0
38 | self.current_reset_info = {} # extra info about the current episode, that was passed in during reset()
39 |
40 | def _reset(self, **kwargs):
41 | if not self.allow_early_resets and not self.needs_reset:
42 | raise RuntimeError("Tried to reset an environment before done. If you want to allow early resets, wrap your env with Monitor(env, path, allow_early_resets=True)")
43 | self.rewards = []
44 | self.needs_reset = False
45 | for k in self.reset_keywords:
46 | v = kwargs.get(k)
47 | if v is None:
48 | raise ValueError('Expected you to pass kwarg %s into reset'%k)
49 | self.current_reset_info[k] = v
50 | return self.env.reset(**kwargs)
51 |
52 | def _step(self, action):
53 | if self.needs_reset:
54 | raise RuntimeError("Tried to step environment that needs reset")
55 | ob, rew, done, info = self.env.step(action)
56 | self.rewards.append(rew)
57 | if done:
58 | self.needs_reset = True
59 | eprew = sum(self.rewards)
60 | eplen = len(self.rewards)
61 | epinfo = {"r": round(eprew, 6), "l": eplen, "t": round(time.time() - self.tstart, 6)}
62 | epinfo.update(self.current_reset_info)
63 | if self.logger:
64 | self.logger.writerow(epinfo)
65 | self.f.flush()
66 | self.episode_rewards.append(eprew)
67 | self.episode_lengths.append(eplen)
68 | info['episode'] = epinfo
69 | self.total_steps += 1
70 | return (ob, rew, done, info)
71 |
72 | def close(self):
73 | if self.f is not None:
74 | self.f.close()
75 |
76 | def get_total_steps(self):
77 | return self.total_steps
78 |
79 | def get_episode_rewards(self):
80 | return self.episode_rewards
81 |
82 | def get_episode_lengths(self):
83 | return self.episode_lengths
84 |
85 | class LoadMonitorResultsError(Exception):
86 | pass
87 |
88 | def get_monitor_files(dir):
89 | return glob(osp.join(dir, "*" + Monitor.EXT))
90 |
91 | def load_results(dir):
92 | import pandas
93 | monitor_files = glob(osp.join(dir, "*monitor.*")) # get both csv and (old) json files
94 | if not monitor_files:
95 | raise LoadMonitorResultsError("no monitor files of the form *%s found in %s" % (Monitor.EXT, dir))
96 | dfs = []
97 | headers = []
98 | for fname in monitor_files:
99 | with open(fname, 'rt') as fh:
100 | if fname.endswith('csv'):
101 | firstline = fh.readline()
102 | assert firstline[0] == '#'
103 | header = json.loads(firstline[1:])
104 | df = pandas.read_csv(fh, index_col=None)
105 | headers.append(header)
106 | elif fname.endswith('json'): # Deprecated json format
107 | episodes = []
108 | lines = fh.readlines()
109 | header = json.loads(lines[0])
110 | headers.append(header)
111 | for line in lines[1:]:
112 | episode = json.loads(line)
113 | episodes.append(episode)
114 | df = pandas.DataFrame(episodes)
115 | df['t'] += header['t_start']
116 | dfs.append(df)
117 | df = pandas.concat(dfs)
118 | df.sort_values('t', inplace=True)
119 | df['t'] -= min(header['t_start'] for header in headers)
120 | df.headers = headers # HACK to preserve backwards compatibility
121 | return df
--------------------------------------------------------------------------------
/rl/envs/normalize.py:
--------------------------------------------------------------------------------
1 | # Modified from https://github.com/openai/baselines/blob/master/baselines/common/vec_env/vec_normalize.py
2 | # Thanks to the authors + OpenAI for the code
3 |
4 | import numpy as np
5 | import functools
6 | import torch
7 | import ray
8 |
9 | from .wrapper import WrapEnv
10 |
11 | @ray.remote
12 | def _run_random_actions(iter, policy, env_fn, noise_std):
13 |
14 | env = WrapEnv(env_fn)
15 | states = np.zeros((iter, env.observation_space.shape[0]))
16 |
17 | state = env.reset()
18 | for t in range(iter):
19 | states[t, :] = state
20 |
21 | state = torch.Tensor(state)
22 |
23 | action = policy(state)
24 |
25 | # add gaussian noise to deterministic action
26 | action = action + torch.randn(action.size()) * noise_std
27 |
28 | state, _, done, _ = env.step(action.data.numpy())
29 |
30 | if done:
31 | state = env.reset()
32 |
33 | return states
34 |
35 | def get_normalization_params(iter, policy, env_fn, noise_std, procs=4):
36 | print("Gathering input normalization data using {0} steps, noise = {1}...".format(iter, noise_std))
37 |
38 | states_ids = [_run_random_actions.remote(iter // procs, policy, env_fn, noise_std) for _ in range(procs)]
39 |
40 | states = []
41 | for _ in range(procs):
42 | ready_ids, _ = ray.wait(states_ids, num_returns=1)
43 | states.extend(ray.get(ready_ids[0]))
44 | states_ids.remove(ready_ids[0])
45 |
46 | print("Done gathering input normalization data.")
47 |
48 | return np.mean(states, axis=0), np.sqrt(np.var(states, axis=0) + 1e-8)
49 |
50 |
51 | # returns a function that creates a normalized environment, then pre-normalizes it
52 | # using states sampled from a deterministic policy with some added noise
53 | def PreNormalizer(iter, noise_std, policy, *args, **kwargs):
54 |
55 | # noise is gaussian noise
56 | @torch.no_grad()
57 | def pre_normalize(env, policy, num_iter, noise_std):
58 | # save whether or not the environment is configured to do online normalization
59 | online_val = env.online
60 | env.online = True
61 |
62 | state = env.reset()
63 |
64 | for t in range(num_iter):
65 | state = torch.Tensor(state)
66 |
67 | _, action = policy(state)
68 |
69 | # add gaussian noise to deterministic action
70 | action = action + torch.randn(action.size()) * noise_std
71 |
72 | state, _, done, _ = env.step(action.data.numpy())
73 |
74 | if done:
75 | state = env.reset()
76 |
77 | env.online = online_val
78 |
79 | def _Normalizer(venv):
80 | venv = Normalize(venv, *args, **kwargs)
81 |
82 | print("Gathering input normalization data using {0} steps, noise = {1}...".format(iter, noise_std))
83 | pre_normalize(venv, policy, iter, noise_std)
84 | print("Done gathering input normalization data.")
85 |
86 | return venv
87 |
88 | return _Normalizer
89 |
90 | # returns a function that creates a normalized environment
91 | def Normalizer(*args, **kwargs):
92 | def _Normalizer(venv):
93 | return Normalize(venv, *args, **kwargs)
94 |
95 | return _Normalizer
96 |
97 | class Normalize:
98 | """
99 | Vectorized environment base class
100 | """
101 | def __init__(self,
102 | venv,
103 | ob_rms=None,
104 | ob=True,
105 | ret=False,
106 | clipob=10.,
107 | cliprew=10.,
108 | online=True,
109 | gamma=1.0,
110 | epsilon=1e-8):
111 |
112 | self.venv = venv
113 | self._observation_space = venv.observation_space
114 | self._action_space = venv.action_space
115 |
116 | if ob_rms is not None:
117 | self.ob_rms = ob_rms
118 | else:
119 | self.ob_rms = RunningMeanStd(shape=self._observation_space.shape) if ob else None
120 |
121 | self.ret_rms = RunningMeanStd(shape=()) if ret else None
122 | self.clipob = clipob
123 | self.cliprew = cliprew
124 | self.ret = np.zeros(self.num_envs)
125 | self.gamma = gamma
126 | self.epsilon = epsilon
127 |
128 | self.online = online
129 |
130 | def step(self, vac):
131 | obs, rews, news, infos = self.venv.step(vac)
132 |
133 | #self.ret = self.ret * self.gamma + rews
134 | obs = self._obfilt(obs)
135 |
136 | # NOTE: shifting mean of reward seems bad; qualitatively changes MDP
137 | if self.ret_rms:
138 | if self.online:
139 | self.ret_rms.update(self.ret)
140 |
141 | rews = np.clip(rews / np.sqrt(self.ret_rms.var + self.epsilon), -self.cliprew, self.cliprew)
142 |
143 | return obs, rews, news, infos
144 |
145 | def _obfilt(self, obs):
146 | if self.ob_rms:
147 | if self.online:
148 | self.ob_rms.update(obs)
149 |
150 | obs = np.clip((obs - self.ob_rms.mean) / np.sqrt(self.ob_rms.var + self.epsilon), -self.clipob, self.clipob)
151 | return obs
152 | else:
153 | return obs
154 |
155 | def reset(self):
156 | """
157 | Reset all environments
158 | """
159 | obs = self.venv.reset()
160 | return self._obfilt(obs)
161 |
162 | @property
163 | def action_space(self):
164 | return self._action_space
165 |
166 | @property
167 | def observation_space(self):
168 | return self._observation_space
169 |
170 | def close(self):
171 | self.venv.close()
172 |
173 | def render(self):
174 | self.venv.render()
175 |
176 | @property
177 | def num_envs(self):
178 | return self.venv.num_envs
179 |
180 |
181 |
182 | class RunningMeanStd(object):
183 | # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm
184 | def __init__(self, epsilon=1e-4, shape=()):
185 | self.mean = np.zeros(shape, 'float64')
186 | self.var = np.zeros(shape, 'float64')
187 | self.count = epsilon
188 |
189 |
190 | def update(self, x):
191 | batch_mean = np.mean(x, axis=0)
192 | batch_var = np.var(x, axis=0)
193 | batch_count = x.shape[0]
194 |
195 | delta = batch_mean - self.mean
196 | tot_count = self.count + batch_count
197 |
198 | new_mean = self.mean + delta * batch_count / tot_count
199 | m_a = self.var * (self.count)
200 | m_b = batch_var * (batch_count)
201 | M2 = m_a + m_b + np.square(delta) * self.count * batch_count / (self.count + batch_count)
202 | new_var = M2 / (self.count + batch_count)
203 |
204 | new_count = batch_count + self.count
205 |
206 | self.mean = new_mean
207 | self.var = new_var
208 | self.count = new_count
209 |
210 | def test_runningmeanstd():
211 | for (x1, x2, x3) in [
212 | (np.random.randn(3), np.random.randn(4), np.random.randn(5)),
213 | (np.random.randn(3,2), np.random.randn(4,2), np.random.randn(5,2)),
214 | ]:
215 |
216 | rms = RunningMeanStd(epsilon=0.0, shape=x1.shape[1:])
217 |
218 | x = np.concatenate([x1, x2, x3], axis=0)
219 | ms1 = [x.mean(axis=0), x.var(axis=0)]
220 | rms.update(x1)
221 | rms.update(x2)
222 | rms.update(x3)
223 | ms2 = [rms.mean, rms.var]
224 |
225 | assert np.allclose(ms1, ms2)
226 |
--------------------------------------------------------------------------------
/rl/envs/vectorize.py:
--------------------------------------------------------------------------------
1 | # Modified from https://github.com/openai/baselines/blob/master/baselines/common/vec_env/dummy_vec_env.py
2 | # Thanks to the authors + OpenAI for the code
3 |
4 | import numpy as np
5 |
6 | class Vectorize:
7 | def __init__(self, env_fns):
8 | self.envs = [fn() for fn in env_fns]
9 | env = self.envs[0]
10 |
11 | self._observation_space = env.observation_space
12 | self._action_space = env.action_space
13 |
14 | self.ts = np.zeros(len(self.envs), dtype='int')
15 |
16 | def step(self, action_n):
17 | results = [env.step(a) for (a,env) in zip(action_n, self.envs)]
18 | obs, rews, dones, infos = map(np.array, zip(*results))
19 |
20 | # TODO: decide whether to uncomment this
21 | self.ts += 1
22 | # for (i, done) in enumerate(dones):
23 | # if done:
24 | # obs[i] = self.envs[i].reset()
25 | # self.ts[i] = 0
26 |
27 | return np.array(obs), np.array(rews), np.array(dones), infos
28 |
29 | def reset(self):
30 | results = [env.reset() for env in self.envs]
31 | return np.array(results)
32 |
33 | def render(self):
34 | self.envs[0].render()
35 |
36 | @property
37 | def num_envs(self):
38 | return len(self.envs)
39 |
40 | @property
41 | def action_space(self):
42 | return self._action_space
43 |
44 | @property
45 | def observation_space(self):
46 | return self._observation_space
47 |
48 |
--------------------------------------------------------------------------------
/rl/envs/wrapper.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | # Gives a vectorized interface to a single environment
4 | class WrapEnv:
5 | def __init__(self, env_fn):
6 | self.env = env_fn()
7 |
8 | def __getattr__(self, attr):
9 | return getattr(self.env, attr)
10 |
11 | def step(self, action):
12 | state, reward, done, info = self.env.step(action[0])
13 | return np.array([state]), np.array([reward]), np.array([done]), np.array([info])
14 |
15 | def render(self):
16 | self.env.render()
17 |
18 | def reset(self):
19 | return np.array([self.env.reset()])
--------------------------------------------------------------------------------
/rl/envs/wrappers.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 |
4 | # Gives a vectorized interface to a single environment
5 | class WrapEnv:
6 | def __init__(self, env_fn):
7 | self.env = env_fn()
8 |
9 | def __getattr__(self, attr):
10 | return getattr(self.env, attr)
11 |
12 | def step(self, action, term_thresh=0):
13 | state, reward, done, info = self.env.step(action[0], f_term=term_thresh)
14 | return np.array([state]), np.array([reward]), np.array([done]), np.array([info])
15 |
16 | def render(self):
17 | self.env.render()
18 |
19 | def reset(self):
20 | return np.array([self.env.reset()])
21 |
22 | # TODO: this is probably a better case for inheritance than for a wrapper
23 | # Gives an interface to exploit mirror symmetry
24 | class SymmetricEnv:
25 | def __init__(self, env_fn, mirrored_obs=None, mirrored_act=None, obs_fn=None, act_fn=None):
26 |
27 | assert (bool(mirrored_act) ^ bool(act_fn)) and (bool(mirrored_obs) ^ bool(obs_fn)), \
28 | "You must provide either mirror indices or a mirror function, but not both, for \
29 | observation and action."
30 |
31 | if mirrored_act:
32 | self.act_mirror_matrix = torch.Tensor(_get_symmetry_matrix(mirrored_act))
33 |
34 | elif act_fn:
35 | assert callable(act_fn), "Action mirror function must be callable"
36 | self.mirror_action = act_fn
37 |
38 | if mirrored_obs:
39 | self.obs_mirror_matrix = torch.Tensor(_get_symmetry_matrix(mirrored_obs))
40 |
41 | elif obs_fn:
42 | assert callable(obs_fn), "Observation mirror function must be callable"
43 | self.mirror_observation = obs_fn
44 |
45 | self.env = env_fn()
46 |
47 | def __getattr__(self, attr):
48 | return getattr(self.env, attr)
49 |
50 | def mirror_action(self, action):
51 | return action @ self.act_mirror_matrix
52 |
53 | def mirror_observation(self, obs):
54 | return obs @ self.obs_mirror_matrix
55 |
56 | # To be used when there is a clock in the observation. In this case, the mirrored_obs vector inputted
57 | # when the SymmeticEnv is created should not move the clock input order. The indices of the obs vector
58 | # where the clocks are located need to be inputted.
59 | def mirror_clock_observation(self, obs, clock_inds):
60 | # print("obs.shape = ", obs.shape)
61 | # print("obs_mirror_matrix.shape = ", self.obs_mirror_matrix.shape)
62 | mirror_obs = obs @ self.obs_mirror_matrix
63 | clock = mirror_obs[:, self.clock_inds]
64 | # print("clock: ", clock)
65 | for i in range(np.shape(clock)[1]):
66 | mirror_obs[:, clock_inds[i]] = np.sin(np.arcsin(clock[:, i]) + np.pi)
67 | return mirror_obs
68 |
69 |
70 | def _get_symmetry_matrix(mirrored):
71 | numel = len(mirrored)
72 | mat = np.zeros((numel, numel))
73 |
74 | for (i, j) in zip(np.arange(numel), np.abs(np.array(mirrored).astype(int))):
75 | mat[i, j] = np.sign(mirrored[i])
76 |
77 | return mat
--------------------------------------------------------------------------------
/rl/policies/__init__.py:
--------------------------------------------------------------------------------
1 | # from .actor import Gaussian_FF_Actor as GaussianMLP_Actor # for legacy code
2 | from .actor import Gaussian_FF_Actor
3 |
4 | # from .actor_release import GaussianMLP_Actor
5 |
6 | #from .linear import LinearMLP
7 | #from .recurrent import RecurrentNet
--------------------------------------------------------------------------------
/rl/policies/base.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 | from torch import sqrt
6 |
7 | def normc_fn(m):
8 | classname = m.__class__.__name__
9 | if classname.find('Linear') != -1:
10 | m.weight.data.normal_(0, 1)
11 | m.weight.data *= 1 / torch.sqrt(m.weight.data.pow(2).sum(1, keepdim=True))
12 | if m.bias is not None:
13 | m.bias.data.fill_(0)
14 |
15 | # The base class for an actor. Includes functions for normalizing state (optional)
16 | class Net(nn.Module):
17 | def __init__(self):
18 | super(Net, self).__init__()
19 | self.is_recurrent = False
20 |
21 | self.welford_state_mean = torch.zeros(1)
22 | self.welford_state_mean_diff = torch.ones(1)
23 | self.welford_state_n = 1
24 |
25 | self.env_name = None
26 |
27 | def forward(self):
28 | raise NotImplementedError
29 |
30 | def normalize_state(self, state, update=True):
31 | state = torch.Tensor(state)
32 |
33 | if self.welford_state_n == 1:
34 | self.welford_state_mean = torch.zeros(state.size(-1))
35 | self.welford_state_mean_diff = torch.ones(state.size(-1))
36 |
37 | if update:
38 | if len(state.size()) == 1: # If we get a single state vector
39 | state_old = self.welford_state_mean
40 | self.welford_state_mean += (state - state_old) / self.welford_state_n
41 | self.welford_state_mean_diff += (state - state_old) * (state - state_old)
42 | self.welford_state_n += 1
43 | elif len(state.size()) == 2: # If we get a batch
44 | print("NORMALIZING 2D TENSOR (this should not be happening)")
45 | for r_n in r:
46 | state_old = self.welford_state_mean
47 | self.welford_state_mean += (state_n - state_old) / self.welford_state_n
48 | self.welford_state_mean_diff += (state_n - state_old) * (state_n - state_old)
49 | self.welford_state_n += 1
50 | elif len(state.size()) == 3: # If we get a batch of sequences
51 | print("NORMALIZING 3D TENSOR (this really should not be happening)")
52 | for r_t in r:
53 | for r_n in r_t:
54 | state_old = self.welford_state_mean
55 | self.welford_state_mean += (state_n - state_old) / self.welford_state_n
56 | self.welford_state_mean_diff += (state_n - state_old) * (state_n - state_old)
57 | self.welford_state_n += 1
58 | return (state - self.welford_state_mean) / sqrt(self.welford_state_mean_diff / self.welford_state_n)
59 |
60 | def copy_normalizer_stats(self, net):
61 | self.welford_state_mean = net.self_state_mean
62 | self.welford_state_mean_diff = net.welford_state_mean_diff
63 | self.welford_state_n = net.welford_state_n
64 |
65 | def initialize_parameters(self):
66 | self.apply(normc_fn)
--------------------------------------------------------------------------------
/rl/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .render import *
2 | from .param_noise import *
3 | from .remote_replay import *
4 | import sys
5 |
6 | class ProgBar():
7 | def __init__(self, total, bar_len=40):
8 | self.total = total
9 | self.count = 0
10 | self.bar_len = bar_len
11 |
12 | def next(self, msg=''):
13 | self.count += 1
14 |
15 | fill_len = int(round(self.bar_len * self.count / float(self.total)))
16 | bar = '=' * fill_len + '-' * (self.bar_len - fill_len)
17 |
18 | percent = round(100.0 * self.count / float(self.total), 1)
19 |
20 | msg = msg.ljust(len(msg) + 2)
21 |
22 | sys.stdout.write('[%s] %s%s ... %s\r' % (bar, percent, '%', msg))
23 | sys.stdout.flush()
24 |
--------------------------------------------------------------------------------
/rl/utils/param_noise.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 | import gym
4 |
5 | """
6 | From OpenAI Baselines:
7 | https://github.com/openai/baselines/blob/master/baselines/ddpg/noise.py
8 | """
9 | # For parameter noise
10 | class AdaptiveParamNoiseSpec(object):
11 | def __init__(self, initial_stddev=0.1, desired_action_stddev=0.2, adaptation_coefficient=1.01):
12 | """
13 | Note that initial_stddev and current_stddev refer to std of parameter noise,
14 | but desired_action_stddev refers to (as name notes) desired std in action space
15 | """
16 | self.initial_stddev = initial_stddev
17 | self.desired_action_stddev = desired_action_stddev
18 | self.adaptation_coefficient = adaptation_coefficient
19 |
20 | self.current_stddev = initial_stddev
21 |
22 | def adapt(self, distance):
23 | if distance > self.desired_action_stddev:
24 | # Decrease stddev.
25 | self.current_stddev /= self.adaptation_coefficient
26 | else:
27 | # Increase stddev.
28 | self.current_stddev *= self.adaptation_coefficient
29 |
30 | def get_stats(self):
31 | stats = {
32 | 'param_noise_stddev': self.current_stddev,
33 | }
34 | return stats
35 |
36 | def __repr__(self):
37 | fmt = 'AdaptiveParamNoiseSpec(initial_stddev={}, desired_action_stddev={}, adaptation_coefficient={})'
38 | return fmt.format(self.initial_stddev, self.desired_action_stddev, self.adaptation_coefficient)
39 |
40 | def distance_metric(actions1, actions2):
41 | """
42 | Compute "distance" between actions taken by two policies at the same states
43 | Expects numpy arrays
44 | """
45 | diff = actions1-actions2
46 | mean_diff = np.mean(np.square(diff), axis=0)
47 | dist = np.sqrt(np.mean(mean_diff))
48 | return dist
49 |
50 | def perturb_actor_parameters(perturbed_policy, unperturbed_policy, param_noise, device):
51 | """Apply parameter noise to actor model, for exploration"""
52 | perturbed_policy.load_state_dict(unperturbed_policy.state_dict())
53 | params = perturbed_policy.state_dict()
54 | for name in params:
55 | if 'ln' in name:
56 | pass
57 | param = params[name]
58 | param += torch.randn(param.shape).to(device) * param_noise.current_stddev
--------------------------------------------------------------------------------
/rl/utils/remote_replay.py:
--------------------------------------------------------------------------------
1 | import random
2 | import numpy as np
3 | import ray
4 |
5 | # tensorboard
6 | from datetime import datetime
7 | from torch.utils.tensorboard import SummaryWriter
8 | from colorama import Fore, Style
9 |
10 | # more efficient replay memory?
11 | from collections import deque
12 |
13 | # Code based on:
14 | # https://github.com/openai/baselines/blob/master/baselines/deepq/replay_buffer.py
15 |
16 | # Expects tuples of (state, next_state, action, reward, done)
17 |
18 | @ray.remote
19 | class ReplayBuffer_remote(object):
20 | def __init__(self, size, experiment_name, args):
21 | """Create Replay buffer.
22 | Parameters
23 | ----------
24 | size: int
25 | Max number of transitions to store in the buffer. When the buffer
26 | overflows the old memories are dropped.
27 | """
28 | self.storage = deque(maxlen=int(size))
29 | self.max_size = size
30 |
31 | print("Created replay buffer with size {}".format(self.max_size))
32 |
33 | def __len__(self):
34 | return len(self.storage)
35 |
36 | def storage_size(self):
37 | return len(self.storage)
38 |
39 | def add(self, data):
40 | self.storage.append(data)
41 |
42 | def add_bulk(self, data):
43 | for i in range(len(data)):
44 | self.storage.append(data[i])
45 |
46 | def print_size(self):
47 | print("size = {}".format(len(self.storage)))
48 |
49 | def sample(self, batch_size):
50 | ind = np.random.randint(0, len(self.storage), size=batch_size)
51 | x, y, u, r, d = [], [], [], [], []
52 |
53 | for i in ind:
54 | X, Y, U, R, D = self.storage[i]
55 | x.append(np.array(X, copy=False))
56 | y.append(np.array(Y, copy=False))
57 | u.append(np.array(U, copy=False))
58 | r.append(np.array(R, copy=False))
59 | d.append(np.array(D, copy=False))
60 |
61 | # print("Sampled experience from replay buffer.")
62 | return np.array(x), np.array(y), np.array(u), np.array(r).reshape(-1, 1), np.array(d).reshape(-1, 1)
63 |
64 | # Non-ray actor for replay buffer
65 | class ReplayBuffer(object):
66 | def __init__(self, max_size=1e7):
67 | self.storage = []
68 | self.max_size = max_size
69 | self.ptr = 0
70 |
71 | def add(self, data):
72 | if len(self.storage) < self.max_size:
73 | self.storage.append(data)
74 | self.storage[int(self.ptr)] = data
75 | self.ptr = (self.ptr + 1) % self.max_size
76 |
77 |
78 | def sample(self, batch_size):
79 | ind = np.random.randint(0, len(self.storage), size=batch_size)
80 | x, y, u, r, d = [], [], [], [], []
81 |
82 | for i in ind:
83 | X, Y, U, R, D = self.storage[i]
84 | x.append(np.array(X, copy=False))
85 | y.append(np.array(Y, copy=False))
86 | u.append(np.array(U, copy=False))
87 | r.append(np.array(R, copy=False))
88 | d.append(np.array(D, copy=False))
89 |
90 | return np.array(x), np.array(y), np.array(u), np.array(r).reshape(-1, 1), np.array(d).reshape(-1, 1)
91 |
92 | def get_transitions_from_range(self, start, end):
93 | ind = np.arange(int(start), int(end))
94 | x, u = [], []
95 | for i in ind:
96 | X, Y, U, R, D = self.storage[i]
97 | x.append(np.array(X, copy=False))
98 | u.append(np.array(U, copy=False))
99 |
100 | return np.array(x), np.array(u)
101 |
102 | def get_all_transitions(self):
103 | # list of transition tuples
104 | return self.storage
105 |
106 | def add_parallel(self, data):
107 | for i in range(len(data)):
108 | self.add(data[i])
--------------------------------------------------------------------------------
/test_policy.py:
--------------------------------------------------------------------------------
1 | from cassie import CassiePlayground
2 | from tools.test_commands import *
3 | from tools.eval_perturb import *
4 | from tools.eval_mission import *
5 | from tools.compare_pols import *
6 | from tools.eval_sensitivity import *
7 | from collections import OrderedDict
8 | from util.env import env_factory
9 |
10 | import torch
11 | import pickle
12 | import os, sys, argparse
13 | import numpy as np
14 |
15 | # Get policy to test from args, load policy and env
16 | parser = argparse.ArgumentParser()
17 | # General args
18 | parser.add_argument("--path", type=str, default="./trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2", help="path to folder containing policy and run details")
19 | parser.add_argument("--path2", type=str, default="./trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2", help="path to folder containing 2nd policy to compare against")
20 | parser.add_argument("--n_procs", type=int, default=4, help="Number of procs to use for multi-processing")
21 | parser.add_argument("--test", type=str, default="full", help="Test to run (options: \"full\", \"commands\", and \"perturb\", and \"compare\")")
22 | parser.add_argument("--eval", default=True, action="store_false", help="Whether to call policy.eval() or not")
23 | # Test Commands args
24 | parser.add_argument("--n_steps", type=int, default=200, help="Number of steps to for a full command cycle (1 speed change and 1 orientation change)")
25 | parser.add_argument("--n_commands", type=int, default=6, help="Number of commands in a single test iteration")
26 | parser.add_argument("--max_speed", type=float, default=3.0, help="Maximum allowable speed to test")
27 | parser.add_argument("--min_speed", type=float, default=0.0, help="Minimum allowable speed to test")
28 | parser.add_argument("--n_iter", type=int, default=10000, help="Number of command cycles to test")
29 | # Test Perturbs args
30 | parser.add_argument("--wait_time", type=float, default=3.0, help="How long to wait after perturb to count as success")
31 | parser.add_argument("--pert_dur", type=float, default=0.2, help="How long to apply perturbation")
32 | parser.add_argument("--pert_size", type=float, default=50, help="Size of perturbation to start sweep from")
33 | parser.add_argument("--pert_incr", type=float, default=10.0, help="How much to increment the perturbation size after each success")
34 | parser.add_argument("--pert_body", type=str, default="cassie-pelvis", help="Body to apply perturbation to")
35 | parser.add_argument("--num_angles", type=int, default=100, help="How many angles to test (angles are evenly divided into 2*pi)")
36 | # Test Mission args
37 | parser.add_argument("--viz", default=False, action='store_true')
38 | # Test parameter sensitivity args
39 | parser.add_argument("--sens_incr", type=float, default=0.05, help="Size of increments for the sensityivity sweep")
40 | parser.add_argument("--hi_factor", type=float, default=15, help="High factor")
41 | parser.add_argument("--lo_factor", type=float, default=0, help="Low factor")
42 |
43 | args = parser.parse_args()
44 | run_args = pickle.load(open(os.path.join(args.path, "experiment.pkl"), "rb"))
45 | # cassie_env = CassieEnv(traj=run_args.traj, clock_based=run_args.clock_based, state_est=run_args.state_est, dynamics_randomization=run_args.dyn_random)
46 | # env_fn = partial(CassieEnv, traj=run_args.traj, clock_based=run_args.clock_based, state_est=run_args.state_est, dynamics_randomization=run_args.dyn_random)
47 | # Make mirror False so that env_factory returns a regular wrap env function and not a symmetric env function that can be called to return
48 | # a cassie environment (symmetric env cannot be called to make another env)
49 | env_fn = env = env_factory(
50 | run_args.env_name,
51 | command_profile=run_args.command_profile,
52 | input_profile=run_args.input_profile,
53 | simrate=run_args.simrate,
54 | dynamics_randomization=run_args.dyn_random,
55 | mirror=run_args.mirror,
56 | learn_gains=run_args.learn_gains,
57 | reward=run_args.reward,
58 | history=run_args.history,
59 | no_delta=run_args.no_delta,
60 | traj=run_args.traj,
61 | ik_baseline=run_args.ik_baseline
62 | )
63 | cassie_env = env_fn()
64 | policy = torch.load(os.path.join(args.path, "actor.pt"))
65 | if args.eval:
66 | policy.eval()
67 | if hasattr(policy, 'init_hidden_state'):
68 | policy.init_hidden_state()
69 |
70 | # TODO: make returning/save data in file inside function consist for all testing functions
71 | def test_commands(cassie_env, policy, args):
72 | print("Testing speed and orient commands")
73 | if args.n_procs == 1:
74 | save_data = eval_commands(cassie_env, policy, num_steps=args.n_steps, num_commands=args.n_commands,
75 | max_speed=args.max_speed, min_speed=args.min_speed, num_iters=args.n_iter)
76 | np.save(os.path.join(args.path, "eval_commands.npy"), save_data)
77 | else:
78 | eval_commands_multi(env_fn, policy, num_steps=args.n_steps, num_commands=args.n_commands, max_speed=args.max_speed,
79 | min_speed=args.min_speed, num_iters=args.n_iter, num_procs=args.n_procs, filename=os.path.join(args.path, "eval_commands.npy"))
80 |
81 | def test_perturbs(cassie_env, policy, args):
82 | print("Testing perturbations")
83 | if args.n_procs == 1:
84 | save_data = compute_perturbs(cassie_env, policy, wait_time=args.wait_time, perturb_duration=args.pert_dur, perturb_size=args.pert_size,
85 | perturb_incr=args.pert_incr, perturb_body=args.pert_body, num_angles=args.num_angles)
86 | else:
87 | save_data = compute_perturbs_multi(env_fn, policy, wait_time=args.wait_time, perturb_duration=args.pert_dur, perturb_size=args.pert_size,
88 | perturb_incr=args.pert_incr, perturb_body=args.pert_body, num_angles=args.num_angles, num_procs=args.n_procs)
89 | np.save(os.path.join(args.path, "eval_perturbs.npy"), save_data)
90 |
91 | # If not command line arg, assume run all tests
92 | if args.test == "full":
93 | print("Running full test")
94 | test_commands(cassie_env, policy, args)
95 | test_perturbs(cassie_env, policy, args)
96 | elif args.test == "commands":
97 | test_commands(cassie_env, policy, args)
98 | elif args.test == "perturb":
99 | test_perturbs(cassie_env, policy, args)
100 | elif args.test == "mission":
101 | missions = ["straight", "curvy", "90_left", "90_right"]
102 | if not args.viz:
103 | print("Testing missions")
104 | save_data = []
105 |
106 | for mission in missions:
107 | print(mission + " mission:")
108 | cassie_env = CassiePlayground(traj=run_args.traj, clock_based=run_args.clock_based, state_est=run_args.state_est, dynamics_randomization=run_args.dyn_random, mission=mission)
109 | save_data.append(eval_mission(cassie_env, policy))
110 | np.save(os.path.join(args.path, "eval_missions.npy"), save_data)
111 | else:
112 | save_data = np.load(os.path.join(args.path, "eval_missions.npy"), allow_pickle=True)
113 | plot_mission_data(save_data, missions)
114 | elif args.test == "sensitivity":
115 | print("Testing sensitivity")
116 | save_data = eval_sensitivity(cassie_env, policy, incr=args.sens_incr, hi_factor=args.hi_factor, lo_factor=args.lo_factor)
117 | print(save_data)
118 | np.save(os.path.join(args.path, "eval_sensitivity.npy"), save_data)
119 | elif args.test == "compare":
120 | print("running compare")
121 | compare_pols(args.path, args.path2)
122 |
123 | # vis_commands(cassie_env, policy, num_steps=200, num_commands=6, max_speed=3, min_speed=0)
124 | # save_data = eval_commands(cassie_env, policy, num_steps=200, num_commands=2, max_speed=3, min_speed=0, num_iters=1)
125 | # np.save("./test_eval_commands.npy", save_data)
126 | # eval_commands_multi(env_fn, policy, num_steps=200, num_commands=4, max_speed=3, min_speed=0, num_iters=4, num_procs=4)
127 |
128 | # report_stats("./test_eval_commands.npy")
129 |
--------------------------------------------------------------------------------
/tools/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/tools/.DS_Store
--------------------------------------------------------------------------------
/tools/aslip_tests/GRF_2KHz.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/tools/aslip_tests/GRF_2KHz.pkl
--------------------------------------------------------------------------------
/tools/aslip_tests/plots/footpos_err.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/tools/aslip_tests/plots/footpos_err.png
--------------------------------------------------------------------------------
/tools/cassie_top_white.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/tools/cassie_top_white.png
--------------------------------------------------------------------------------
/tools/command_trajectory.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/tools/command_trajectory.pkl
--------------------------------------------------------------------------------
/tools/compare_pols.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import sys, os
3 | import fpdf
4 | from .eval_perturb import plot_perturb
5 |
6 | def process_commands(data):
7 | stats = {}
8 | num_iters = data.shape[0]
9 | pass_rate = np.sum(data[:, 0]) / num_iters
10 | stats["Pass Rate"] = pass_rate
11 | success_inds = np.where(data[:, 0] == 1)[0]
12 | speed_fail_inds = np.where(data[:, 1] == 0)[0]
13 | orient_fail_inds = np.where(data[:, 1] == 1)[0]
14 |
15 | speed_change = data[speed_fail_inds, 4]
16 | orient_change = data[orient_fail_inds, 5]
17 | speed_neg_inds = np.where(speed_change < 0)
18 | speed_pos_inds = np.where(speed_change > 0)
19 | orient_neg_inds = np.where(orient_change < 0)
20 | orient_pos_inds = np.where(orient_change > 0)
21 | stats["Number of speed fails"] = len(speed_fail_inds)
22 | stats["Number of orient fails"] = len(orient_fail_inds)
23 | if len(speed_fail_inds) == 0:
24 | avg_pos_speed = "N/A"
25 | avg_neg_speed = "N/A"
26 | else:
27 | avg_pos_speed = np.mean(speed_change[speed_pos_inds])
28 | avg_neg_speed = np.mean(speed_change[speed_neg_inds])
29 | if len(orient_fail_inds) == 0:
30 | avg_pos_orient = "N/A"
31 | avg_neg_orient = "N/A"
32 | else:
33 | avg_pos_orient = np.mean(orient_change[orient_pos_inds])
34 | avg_neg_orient = np.mean(orient_change[orient_neg_inds])
35 |
36 | stats["Avg pos speed fails"] = avg_pos_speed
37 | stats["Avg neg speed fails"] = avg_neg_speed
38 | stats["Avg pos_orient fails"] = avg_pos_orient
39 | stats["Avg neg_orient fails"] = avg_neg_orient
40 |
41 | return stats
42 |
43 | def process_perturbs(data):
44 | stats = {}
45 | num_angles, num_phases = data.shape
46 | angles = 360*np.linspace(0, 1, num_angles+1)
47 |
48 | stats["Avg Force"] = round(np.mean(data), 2)
49 | stats["Max Force"] = np.max(data)
50 | max_ind = np.unravel_index(np.argmax(data, axis=None), data.shape)
51 | stats["Max Location (angle, phase)"] = (str(round(angles[max_ind[0]], 2))+chr(176), max_ind[1])
52 | angle_avg = np.mean(data, axis=1)
53 | phase_avg = np.mean(data, axis=0)
54 | stats["Most Robust Angle"] = angles[np.argmax(angle_avg)]
55 | stats["Most Robust Phase"] = np.argmax(phase_avg)
56 |
57 | return stats
58 |
59 |
60 | # Note that for the spacing of the multi_cells to work out, this function assumes that
61 | # pol1's name is at least longer than pol2's name
62 | def draw_headers(pdf, pol1, pol2, key_col_width, min_width):
63 | epw = pdf.w - 2*pdf.l_margin
64 | th = pdf.font_size
65 | pol1_width = max(pdf.get_string_width(pol1), min_width) + 0.1
66 | pol2_width = max(pdf.get_string_width(pol2), min_width) + 0.1
67 | pol2_split = False
68 | if pol1_width + pol2_width + key_col_width>= epw:
69 | pol1_width = (epw - key_col_width) / 2
70 | if pol2_width > pol1_width:
71 | pol2_split = True
72 | pol2_width = pol1_width
73 |
74 | start_x = pdf.get_x()
75 | start_y = pdf.get_y()
76 | pdf.set_x(start_x + key_col_width)
77 |
78 | # Draw pol1 and pol2 multicell first to figure out y height
79 | pdf.multi_cell(pol1_width, 2*th, pol1, border=1, align="C")
80 | pol1_height = pdf.get_y() - start_y
81 |
82 | pdf.set_xy(start_x+key_col_width+pol1_width, start_y)
83 | if pol2_split:
84 | pdf.multi_cell(pol2_width, 2*th, pol2, border=1, align="C")
85 | else:
86 | pdf.cell(pol2_width, pol1_height, pol2, border=1, align="C")
87 | pdf.set_xy(start_x, start_y)
88 | pdf.cell(key_col_width, pol1_height, "", border=1, align="C")
89 | pdf.set_xy(start_x, start_y + pol1_height)
90 |
91 | return pol1_width, pol2_width
92 |
93 | def compare_pols(pol1, pol2):
94 | pol1 = pol1.strip("/")
95 | pol2 = pol2.strip("/")
96 | # For spacing concerns later, need pol1 to be the "longer" (name wise) of the two
97 | if len(os.path.basename(pol2)) > len(os.path.basename(pol1)):
98 | temp = pol1
99 | pol1 = pol2
100 | pol2 = temp
101 | pol1_name = os.path.basename(pol1)
102 | pol2_name = os.path.basename(pol2)
103 | print("pol1: ", pol1_name)
104 | print("pol2: ", pol2_name)
105 |
106 | # Initial PDF setup
107 | pdf = fpdf.FPDF(format='letter', unit='in')
108 | pdf.add_page()
109 | pdf.set_font('Times','',10.0)
110 | # Effective page width, or just epw
111 | epw = pdf.w - 2*pdf.l_margin
112 | th = pdf.font_size
113 | # Set title
114 | pdf.cell(epw, 2*th, "Policy Robustness Comparison", 0, 1, "C")
115 | pdf.ln(2*th)
116 |
117 | # Print command test table
118 | pol1_command = np.load(os.path.join(pol1, "eval_commands.npy"))
119 | pol2_command = np.load(os.path.join(pol2, "eval_commands.npy"))
120 | pol1_command_stats = process_commands(pol1_command)
121 | pol2_command_stats = process_commands(pol2_command)
122 |
123 | pdf.cell(epw, 2*th, "Command Test", 0, 1, "L")
124 | pdf.ln(th)
125 | # Set column widths
126 | key_col_width = pdf.get_string_width(max(pol2_command_stats.keys(), key=len)) + .2
127 |
128 | pol1_width, pol2_width = draw_headers(pdf, pol1_name, pol2_name, key_col_width, pdf.get_string_width(str(9.9999)))
129 |
130 | for key in pol2_command_stats.keys():
131 | pdf.cell(key_col_width, 2*th, key, border=1, align="C")
132 | pdf.cell(pol1_width, 2*th, str(round(pol1_command_stats[key], 4)), border=1, align="C")
133 | pdf.cell(pol2_width, 2*th, str(round(pol2_command_stats[key], 4)), border=1, align="C")
134 | pdf.ln(2*th)
135 |
136 | # Print perturb test table
137 | pdf.ln(2*th)
138 | pdf.cell(epw, 2*th, "Perturbation Test", 0, 1, "L")
139 | pdf.ln(th)
140 | pol1_perturb = np.load(os.path.join(pol1, "eval_perturbs.npy"))
141 | pol2_perturb = np.load(os.path.join(pol2, "eval_perturbs.npy"))
142 | pol1_perturb_stats = process_perturbs(pol1_perturb)
143 | pol2_perturb_stats = process_perturbs(pol2_perturb)
144 |
145 | # Set column widths
146 | key_col_width = pdf.get_string_width(max(pol2_perturb_stats.keys(), key=len)) + .2
147 | pol1_width, pol2_width = draw_headers(pdf, pol1_name, pol2_name, key_col_width, pdf.get_string_width(str(999.99)))
148 |
149 | for key in pol2_perturb_stats.keys():
150 | pdf.cell(key_col_width, 2*th, key, border=1, align="C")
151 | pdf.cell(pol1_width, 2*th, str(pol1_perturb_stats[key]), border=1, align="C")
152 | pdf.cell(pol2_width, 2*th, str(pol2_perturb_stats[key]), border=1, align="C")
153 | pdf.ln(2*th)
154 |
155 | max_force = max(np.max(np.mean(pol1_perturb, axis=1)), np.max(np.mean(pol2_perturb, axis=1)))
156 | max_force = 50*np.ceil(max_force / 50)
157 | pol1_perturb_plot = os.path.join(pol1, "perturb_plot.png")
158 | pol2_perturb_plot = os.path.join(pol2, "perturb_plot.png")
159 | plot_perturb(os.path.join(pol1, "eval_perturbs.npy"), pol1_perturb_plot, max_force)
160 | plot_perturb(os.path.join(pol2, "eval_perturbs.npy"), pol2_perturb_plot, max_force)
161 | pdf.ln(2*th)
162 |
163 | pdf.cell(epw, 2*th, "Perturbation Plot", 0, 1, "L")
164 | pol2_split = False
165 | if pdf.get_string_width(pol2) > epw / 2:
166 | pol2_split = True
167 | start_x = pdf.get_x()
168 | start_y = pdf.get_y()
169 | pdf.multi_cell(epw/2, 2*th, pol1_name, border=0, align="C")
170 | pol1_height = pdf.get_y() - start_y
171 | pdf.set_xy(start_x+epw/2, start_y)
172 | if pol2_split:
173 | pdf.multi_cell(epw/2, 2*th, pol2_name, border=0, align="C")
174 | else:
175 | pdf.cell(epw/2, pol1_height, pol2_name, border=0, align="C")
176 | pdf.set_xy(start_x, start_y+pol1_height)
177 | pdf.image(pol1_perturb_plot, x=start_x, y=start_y+pol1_height, w = epw/2-.1)
178 | pdf.image(pol2_perturb_plot, x=start_x+epw/2, y = start_y+pol1_height, w = epw/2-.1)
179 |
180 | pdf.output("./policy_compare.pdf")
181 |
182 |
183 |
--------------------------------------------------------------------------------
/tools/eval_mission.py:
--------------------------------------------------------------------------------
1 | import sys, os
2 | sys.path.append("..") # Adds higher directory to python modules path.
3 |
4 | import numpy as np
5 | import matplotlib.pyplot as plt
6 | import matplotlib.colors as mcolors
7 | import matplotlib as mpl
8 | import torch
9 | import time
10 | import cmath
11 | import math
12 | import ray
13 | from functools import partial
14 |
15 | # from cassie import CassieEnv
16 |
17 | def quaternion2euler(quaternion):
18 | w = quaternion[0]
19 | x = quaternion[1]
20 | y = quaternion[2]
21 | z = quaternion[3]
22 | ysqr = y * y
23 |
24 | t0 = +2.0 * (w * x + y * z)
25 | t1 = +1.0 - 2.0 * (x * x + ysqr)
26 | X = math.degrees(math.atan2(t0, t1))
27 |
28 | t2 = +2.0 * (w * y - z * x)
29 | t2 = +1.0 if t2 > +1.0 else t2
30 | t2 = -1.0 if t2 < -1.0 else t2
31 | Y = math.degrees(math.asin(t2))
32 |
33 | t3 = +2.0 * (w * z + x * y)
34 | t4 = +1.0 - 2.0 * (ysqr + z * z)
35 | Z = math.degrees(math.atan2(t3, t4))
36 |
37 | result = np.zeros(3)
38 | result[0] = X * np.pi / 180
39 | result[1] = Y * np.pi / 180
40 | result[2] = Z * np.pi / 180
41 |
42 | return result
43 |
44 | @torch.no_grad()
45 | def eval_mission(cassie_env, policy, num_iters=2):
46 | # save data holds deviation between robot xy pos, z orient, xy velocity and specified pos, orient, velocity from mission
47 | # if mission ends early (robot height fall over indicator)
48 |
49 | runs = []
50 | pass_data = np.zeros(num_iters) # whether or not robot stayed alive during mission
51 |
52 | for j in range(num_iters):
53 | mission_len = cassie_env.command_traj.trajlen
54 | run_data = []
55 | state = torch.Tensor(cassie_env.reset_for_test())
56 | count, passed, done = 0, 1, False
57 | while count < mission_len and not done:
58 | # cassie_env.render()
59 | # Get action and act
60 | action = policy(state, True)
61 | action = action.data.numpy()
62 | state, reward, done, _ = cassie_env.step(action)
63 | state = torch.Tensor(state)
64 | # See if end state reached
65 | if done or cassie_env.sim.qpos()[2] < 0.4:
66 | passed = 0
67 | print("mission failed")
68 | # Get command info, robot info
69 | commanded_pos = cassie_env.command_traj.global_pos[:,0:2]
70 | commanded_speed = cassie_env.command_traj.speed_cmd
71 | commanded_orient = cassie_env.command_traj.orient
72 | qpos = cassie_env.sim.qpos()
73 | qvel = cassie_env.sim.qvel()
74 | actual_pos = qpos[0:2] # only care about x and y
75 | actual_speed = np.linalg.norm(qvel[0:2])
76 | actual_orient = quaternion2euler(qpos[3:7])[2] # only care about yaw
77 | # Calculate pos,vel,orient deviation as vector difference
78 | pos_error = np.linalg.norm(actual_pos - commanded_pos)
79 | speed_error = np.linalg.norm(actual_speed - commanded_speed)
80 | orient_error = np.linalg.norm(actual_orient - commanded_orient)
81 | # Log info
82 | run_data.append(([count, pos_error, speed_error, orient_error]))
83 | count += 1
84 | if passed:
85 | print("mission passed")
86 | pass_data[j] = 1
87 | runs.append(np.array(run_data))
88 |
89 | # summary stats
90 | run_lens = [len(run) for run in runs]
91 | print("longest / shortest / average steps : {} / {} / {}".format(max(run_lens), min(run_lens), sum(run_lens) / len(run_lens)))
92 |
93 | save_data = dict()
94 | save_data["runs"] = runs
95 | save_data["pass"] = pass_data
96 |
97 | return save_data
98 |
99 |
100 | def plot_mission_data(save_data, missions):
101 | num_missions = len(save_data)
102 | fig, axs = plt.subplots(num_missions, 3, figsize=(num_missions*5, 15))
103 | for i in range(num_missions):
104 | mission_runs = save_data[i]["runs"]
105 | for run in mission_runs:
106 | axs[i][0].plot(run[:, 0], run[:, 1])
107 | axs[i][1].plot(run[:, 0], run[:, 2])
108 | axs[i][2].plot(run[:, 0], run[:, 3])
109 | axs[i][1].set_title(missions[i]) # only put title on middle plot
110 | [axs[i][j].set_xlabel("steps") for j in range(3)]
111 | [axs[i][j].set_ylabel("error") for j in range(3)]
112 | plt.tight_layout(pad=3.0)
113 | plt.show()
--------------------------------------------------------------------------------
/tools/eval_sensitivity.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | import time
4 | import math
5 |
6 | #from eval_perturb import reset_to_phase
7 |
8 | @torch.no_grad()
9 | def sensitivity_sweep(cassie_env, policy, factor):
10 | # Pelvis: 0->5
11 | # Hips: 6->8 and 19->21
12 | # Achilles: 9->11 and 22->24
13 | # Knees: 12 and 25
14 | # Tarsus: 14 and 27
15 | #
16 | # Total number of parameters: 17
17 |
18 | #parameter_ids = [0, 1, 2, 3, 4, 5, 6, 7, 8, 19, 20, 21, 9, 10, 11, 22, 23,
19 | # 24, 12, 25, 14, 27]
20 |
21 | default_damp = cassie_env.default_damping
22 | parameter_ids = [(0, 5), (6, 8), (19, 21), (9, 11), (22, 24), (12), (25),
23 | (14), (27)]
24 |
25 | count = np.zeros(len(parameter_ids))
26 | for i in range(9):
27 | damp_range = np.copy(default_damp)
28 | if type(parameter_ids[i]) is tuple:
29 | for j in range(parameter_ids[i][0], parameter_ids[i][1]+1):
30 | # Set damp sweep
31 | damp_range[j] = default_damp[j] * factor
32 | else:
33 | damp_id = parameter_ids[i]
34 | damp_range[damp_id] = default_damp[damp_id] * factor
35 |
36 |
37 | state = torch.Tensor(cassie_env.full_reset())
38 | cassie_env.sim.set_dof_damping(np.clip(damp_range, 0, None))
39 | cassie_env.speed = 1
40 | cassie_env.side_speed = 0
41 | cassie_env.phase_add = 1
42 |
43 | curr_time = time.time()
44 | curr_time = cassie_env.sim.time()
45 | start_t = curr_time
46 | while curr_time < start_t + 15:
47 | action = policy(state, True)
48 | action = action.data.numpy()
49 | state, reward, done, _ = cassie_env.step(action)
50 | state = torch.Tensor(state)
51 | curr_time = cassie_env.sim.time()
52 | if cassie_env.sim.qpos()[2] < 0.4:
53 | count[i] = 1
54 | break
55 |
56 | return count
57 |
58 | @torch.no_grad()
59 | def eval_sensitivity(cassie_env, policy, incr, hi_factor, lo_factor):
60 | # this is dumb
61 | lo = 1.0
62 | lo_cnt = 0
63 | while lo >= lo_factor:
64 | lo -= incr
65 | lo_cnt += 1
66 |
67 | num_iters = int(hi_factor / incr) + lo_cnt + 1
68 |
69 | counter = 0
70 |
71 | # Matrix with the num_iters rows, and 9 + 1 columns. the first column is
72 | # the value of damping. the next nine indicate the parameter, 1 is a
73 | # failure at the value, 0 means either no failure or default val.
74 | ret = np.zeros((num_iters, 10))
75 |
76 | # Run the highs
77 |
78 | hi = 1.0
79 |
80 | while hi <= hi_factor:
81 | vals = sensitivity_sweep(cassie_env, policy, hi)
82 | ret[counter][0] = hi
83 | ret[counter][1:] = vals
84 | hi += incr
85 | counter += 1
86 |
87 | lo = 1.0
88 |
89 | # Run lo's
90 | for _ in range(lo_cnt):
91 | vals = sensitivity_sweep(cassie_env, policy, lo)
92 | ret[counter][0] = lo
93 | ret[counter][1:] = vals
94 | lo -= incr
95 | counter += 1
96 |
97 | # Report
98 | return ret
99 |
--------------------------------------------------------------------------------
/tools/test_perturb_eval_phase.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/tools/test_perturb_eval_phase.npy
--------------------------------------------------------------------------------
/tools/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .elements import *
--------------------------------------------------------------------------------
/tools/utils/elements.py:
--------------------------------------------------------------------------------
1 | import pygame
2 | from pygame.locals import *
3 |
4 | import math
5 | import numpy as np
6 |
7 |
8 | class Mouse:
9 | def __init__(self, px_2_m):
10 | self.px = 0
11 | self.py = 0
12 | self.vx = 0
13 | self.vy = 0
14 | self.radius = 0
15 | self.color = (100,200,100)
16 | self.px_2_m = px_2_m
17 |
18 | def get_position(self):
19 | return (self.px, self.py)
20 |
21 | def get_m_position(self):
22 | return (self.px / self.px_2_m, self.py / self.px_2_m)
23 |
24 | def get_velocity(self):
25 | return (self.vx, self.vy)
26 |
27 | def update(self, time_passed):
28 | prev_p = self.get_position()
29 | self.px, self.py = pygame.mouse.get_pos()
30 | if time_passed > 0:
31 | self.vx = (self.px - prev_p[0]) / time_passed
32 | self.vy = (self.py - prev_p[1]) / time_passed
33 |
34 | def render(self, screen):
35 | pygame.draw.circle(screen, self.color, (self.px, self.py), self.radius)
36 |
37 | class Robot:
38 | def __init__(self, trajectory, time_passed, frequency):
39 |
40 | # action space is forward velocity and heading
41 | self.positions = trajectory.positions
42 | self.velocities = trajectory.vels
43 | self.thetas = trajectory.thetas
44 | self.accels = trajectory.accels
45 | self.trajlen = len(trajectory.positions)
46 |
47 | # ground truth's position:
48 | self.t_px = int(self.positions[0][0])
49 | self.t_py = int(self.positions[0][1])
50 |
51 | # follower's pos
52 | self.f_px = int(self.positions[0][0])
53 | self.f_py = int(self.positions[0][1])
54 |
55 | self.radius = 10
56 | self.color = (50,50,200) # direct position tracker
57 | self.color2 = (200,50,50) # velocity + angle tracker
58 |
59 | self.frequency = frequency
60 | self.prev_time = self.prev_inc_time = time_passed
61 | self.counter = 0
62 | self.count_inc = 1
63 |
64 | def update(self,time_passed):
65 |
66 | curr_accel = self.accels[self.counter]
67 | curr_vel = self.velocities[self.counter]
68 | curr_theta = self.thetas[self.counter]
69 | track_pos = self.positions[self.counter]
70 |
71 | # print((curr_vel, curr_theta, np.cos(curr_theta), np.sin(curr_theta)))
72 |
73 | # ground truth's new position:
74 | self.t_px, self.t_py = track_pos[0], track_pos[1]
75 |
76 | # follower's new position: execute angle and velocity command for time passed
77 | t_diff = time_passed - self.prev_time
78 | vx, vy = curr_vel * np.cos(curr_theta), curr_vel * np.sin(curr_theta)
79 | ax, ay = curr_accel * np.cos(curr_theta), curr_accel * np.sin(curr_theta)
80 | # gotta subtract the y velocity add because pygame counts y from top down
81 | self.f_px, self.f_py = self.f_px + vx * t_diff + 0.5 * ax * t_diff**2, self.f_py - vy * t_diff + 0.5 * ay * t_diff**2
82 | # self.f_px, self.f_py = self.f_px + vx * t_diff, self.f_py - vy * t_diff
83 |
84 | # increment t_idx on 30 Hz cycle
85 | if time_passed - self.prev_inc_time > (1 / self.frequency):
86 | self.counter += 1
87 | self.prev_inc_time = time_passed
88 |
89 | self.prev_time = time_passed
90 |
91 | # check if we need to restart
92 | if self.counter == self.trajlen:
93 | self.counter = 0
94 | self.f_px, self.f_py = int(self.positions[0][0]),int(self.positions[0][1])
95 |
96 | def return_info(self, px_2_m):
97 |
98 | # thetas are the yaw angle of the robot
99 | thetas_rotated = self.thetas # no rotation for now
100 | # center of mass position is x y position converted to meters, with constant z height
101 | positions_in_meters = np.array( [[self.trajectory[i][0] / px_2_m - self.trajectory[0][0] / px_2_m, self.trajectory[i][1] / px_2_m - self.trajectory[0][1] / px_2_m, 1.0] for i in range(len(self.trajectory))] )
102 | velocities_in_meters = np.array( [self.velocities[i] / px_2_m for i in range(len(self.velocities))] )
103 |
104 | print("positions:\n{}\n\nvelocities:\n{}\n\norient:\n{}\n".format(positions_in_meters, velocities_in_meters, thetas_rotated))
105 |
106 | return positions_in_meters, velocities_in_meters, thetas_rotated
107 |
108 | def render(self,screen):
109 | pygame.draw.circle(screen,self.color,(int(self.t_px),int(self.t_py)),self.radius)
110 | pygame.draw.circle(screen,self.color2,(int(self.f_px),int(self.f_py)),self.radius)
111 | # pygame.transform.rotate(screen, np.radians(self.theta))
112 |
113 | class Waypoint:
114 | def __init__(self, mouse_position):
115 | self.px = mouse_position[0]
116 | self.py = mouse_position[1]
117 | self.radius = 5
118 | self.color = (100,200,100)
119 |
120 | def get_position(self):
121 | return (self.px, self.py)
122 |
123 | def render(self, screen):
124 | pygame.draw.circle(screen, self.color, (self.px, self.py), self.radius)
125 |
126 | class Trajectory:
127 | def __init__(self, t_new, positions, thetas, vels, accels):
128 | self.param = t_new
129 | self.positions = positions
130 | self.thetas = thetas
131 | self.vels = vels
132 | self.accels = accels
133 | self.width = 2
134 | self.color = (100,200,100)
135 | self.arrow_color = (200,200,200)
136 | self.arrow_length = 20.0
137 |
138 | def render(self, screen):
139 | scaled_vels = self.vels / np.max(self.vels) * self.arrow_length
140 | pygame_poses = []
141 | for i in range(len(self.positions)):
142 | # pygame.draw.aaline(screen, self.color, self.positions[i-1], self.positions[i])
143 | # print(self.positions[i])
144 | pygame_poses.append((int(self.positions[i][0]), int(self.positions[i][1])))
145 | # circle for pos
146 | pygame.draw.circle(screen, self.color, pygame_poses[-1], self.width)
147 | for i in range(len(self.thetas)):
148 | # calculate next pos
149 | pos2 = (pygame_poses[i][0] + scaled_vels[i] * np.cos(self.thetas[i]) , pygame_poses[i][1] - scaled_vels[i] * np.sin(self.thetas[i]))
150 | # arrow for angle and vel
151 | pygame.draw.line(screen, self.arrow_color, pygame_poses[i], pos2)
152 |
153 | def prepare_for_export(self, scale_factor, screen_height):
154 |
155 | self.positions = [[self.positions[i][0] / scale_factor, (screen_height - self.positions[i][1]) / scale_factor, 1.0] for i in range(len(self.positions))]
156 | self.positions = [[self.positions[i][0]-self.positions[0][0], self.positions[i][1]-self.positions[0][1], self.positions[i][2]] for i in range(len(self.positions))]
157 |
158 | self.vels = [self.vels[i] / scale_factor for i in range(len(self.vels))]
159 |
160 | print("positions:\n{}\n\nvelocities:\n{}\n\norient:\n{}\n".format(self.positions[:5], self.vels[:5], self.thetas[:5]))
161 | print("max vel: {}".format(np.max(self.vels)))
162 |
163 | class Grid:
164 | def __init__(self, screen_width, screen_height, px_2_m):
165 | self.px_2_m = px_2_m
166 | self.screen_height = screen_height
167 | self.screen_width = screen_width
168 | self.cell_height = px_2_m # approx height of 1m x 1m cell
169 | self.cell_width = px_2_m # approx width of 1m x 1m cell
170 | self.color = (90,90,90)
171 |
172 | def render(self, screen):
173 | # draw vertical lines
174 | for x in range(self.screen_height // self.px_2_m):
175 | pygame.draw.line(screen, self.color, (x * self.cell_width,0), (x * self.cell_width,self.screen_height))
176 | # draw horizontal lines
177 | for y in range(self.screen_width // self.px_2_m):
178 | pygame.draw.line(screen, self.color, (0, y * self.cell_height), (self.screen_width, y * self.cell_height))
179 |
--------------------------------------------------------------------------------
/tools/vis_input_and_state.py:
--------------------------------------------------------------------------------
1 | import os, sys, argparse
2 | sys.path.append("..")
3 |
4 | from cassie import CassieEnv, CassiePlayground
5 | from rl.policies.actor import GaussianMLP_Actor
6 |
7 | import matplotlib.pyplot as plt
8 |
9 | import pickle
10 | import numpy as np
11 | import torch
12 | import time
13 |
14 | def set_axes_equal(ax):
15 | '''Make axes of 3D plot have equal scale so that spheres appear as spheres,
16 | cubes as cubes, etc.. This is one possible solution to Matplotlib's
17 | ax.set_aspect('equal') and ax.axis('equal') not working for 3D.
18 |
19 | Input
20 | ax: a matplotlib axis, e.g., as output from plt.gca().
21 | '''
22 |
23 | x_limits = ax.get_xlim3d()
24 | y_limits = ax.get_ylim3d()
25 | z_limits = ax.get_zlim3d()
26 |
27 | x_range = abs(x_limits[1] - x_limits[0])
28 | x_middle = np.mean(x_limits)
29 | y_range = abs(y_limits[1] - y_limits[0])
30 | y_middle = np.mean(y_limits)
31 | z_range = abs(z_limits[1] - z_limits[0])
32 | z_middle = np.mean(z_limits)
33 |
34 | # The plot bounding box is a sphere in the sense of the infinity
35 | # norm, hence I call half the max range the plot radius.
36 | plot_radius = 0.5*max([x_range, y_range, z_range])
37 |
38 | ax.set_xlim3d([x_middle - plot_radius, x_middle + plot_radius])
39 | ax.set_ylim3d([y_middle - plot_radius, y_middle + plot_radius])
40 | ax.set_zlim3d([z_middle - plot_radius, z_middle + plot_radius])
41 |
42 |
43 | def eval_policy(policy, args, run_args):
44 |
45 | aslip = True if run_args.traj == "aslip" else False
46 |
47 | cassie_env = CassieEnv(traj=run_args.traj, state_est=run_args.state_est, no_delta=run_args.no_delta, dynamics_randomization=run_args.dyn_random, clock_based=run_args.clock_based, history=run_args.history, reward=run_args.reward)
48 | cassie_env.debug = args.debug
49 | visualize = not args.no_viz
50 | traj_len = args.traj_len
51 |
52 | if aslip:
53 | traj_info = [] #
54 | traj_cmd_info = [] # what actually gets sent to robot as state
55 | robot_state_info = [] # robot's estimated state
56 | actual_state_info = [] # actual mujoco state of the robot
57 |
58 | state = torch.Tensor(cassie_env.reset_for_test())
59 | cassie_env.update_speed(2.0)
60 | print(cassie_env.speed)
61 | count, passed, done = 0, 1, False
62 | while count < traj_len and not done:
63 |
64 | if visualize:
65 | cassie_env.render()
66 |
67 | # Get action and act
68 | action = policy(state, True)
69 | action = action.data.numpy()
70 | state, reward, done, _ = cassie_env.step(action)
71 | state = torch.Tensor(state)
72 |
73 | print(reward)
74 |
75 | # print(cassie_env.phase)
76 |
77 | # See if end state reached
78 | if done or cassie_env.sim.qpos()[2] < 0.4:
79 | print(done)
80 | passed = 0
81 | print("failed")
82 |
83 | # Get trajectory info and robot info
84 | if aslip:
85 | a, b, c, d = cassie_env.get_traj_and_state_info()
86 | traj_info.append(a)
87 | traj_cmd_info.append(b)
88 | else:
89 | c, d = cassie_env.get_state_info()
90 | robot_state_info.append(c)
91 | actual_state_info.append(d)
92 |
93 | count += 1
94 |
95 | robot_state_info = robot_state_info[:-1]
96 | actual_state_info = actual_state_info[:-1]
97 |
98 | if aslip:
99 |
100 | traj_info = traj_info[:-1]
101 | traj_cmd_info = traj_cmd_info[:-1]
102 |
103 | traj_info = np.array(traj_info)
104 | traj_cmd_info = np.array(traj_cmd_info)
105 | robot_state_info = np.array(robot_state_info)
106 | actual_state_info = np.array(actual_state_info)
107 |
108 | fig, axs = plt.subplots(2, 2, figsize=(10, 10))
109 |
110 | # print(traj_info)
111 |
112 | print(traj_info.shape)
113 | axs[0][0].set_title("XZ plane of traj_info")
114 | axs[0][0].plot(traj_info[:,0,0], traj_info[:,0,2], 'o-', label='cpos')
115 | axs[0][0].plot(traj_info[:,1,0], traj_info[:,1,2], 'o-', label='lpos')
116 | axs[0][0].plot(traj_info[:,2,0], traj_info[:,2,2], 'o-', label='rpos')
117 |
118 | print(traj_cmd_info.shape)
119 | axs[0][1].set_title("XZ plane of traj_cmd_info")
120 | axs[0][1].plot(traj_cmd_info[:,0,0], traj_cmd_info[:,0,2], label='cpos')
121 | axs[0][1].plot(traj_cmd_info[:,1,0], traj_cmd_info[:,1,2], label='lpos')
122 | axs[0][1].plot(traj_cmd_info[:,2,0], traj_cmd_info[:,2,2], label='rpos')
123 |
124 | print(robot_state_info.shape)
125 | axs[1][0].set_title("XZ plane of robot_state_info")
126 | axs[1][0].plot(robot_state_info[:,0,0], robot_state_info[:,0,2], label='cpos')
127 | axs[1][0].plot(robot_state_info[:,1,0], robot_state_info[:,1,2], label='lpos')
128 | axs[1][0].plot(robot_state_info[:,2,0], robot_state_info[:,2,2], label='rpos')
129 |
130 | print(actual_state_info.shape)
131 | axs[1][1].set_title("XZ plane of actual_state_info")
132 | axs[1][1].plot(actual_state_info[:,0,0], actual_state_info[:,0,2], label='cpos')
133 | axs[1][1].plot(actual_state_info[:,1,0], actual_state_info[:,1,2], label='lpos')
134 | axs[1][1].plot(actual_state_info[:,2,0], actual_state_info[:,2,2], label='rpos')
135 |
136 | plt.legend()
137 | plt.tight_layout()
138 | plt.show()
139 |
140 | else:
141 |
142 | robot_state_info = np.array(robot_state_info)
143 | actual_state_info = np.array(actual_state_info)
144 |
145 | fig, axs = plt.subplots(1, 2, figsize=(10, 10))
146 |
147 | print(robot_state_info.shape)
148 | axs[0].set_title("XZ plane of robot_state_info")
149 | axs[0].plot(robot_state_info[:,0,0], robot_state_info[:,0,2], label='cpos')
150 | axs[0].plot(robot_state_info[:,1,0], robot_state_info[:,1,2], label='lpos')
151 | axs[0].plot(robot_state_info[:,2,0], robot_state_info[:,2,2], label='rpos')
152 |
153 | print(actual_state_info.shape)
154 | axs[1].set_title("XZ plane of actual_state_info")
155 | axs[1].plot(actual_state_info[:,0,0], actual_state_info[:,0,2], label='cpos')
156 | axs[1].plot(actual_state_info[:,1,0], actual_state_info[:,1,2], label='lpos')
157 | axs[1].plot(actual_state_info[:,2,0], actual_state_info[:,2,2], label='rpos')
158 |
159 | plt.legend()
160 | plt.tight_layout()
161 | plt.show()
162 |
163 |
164 | parser = argparse.ArgumentParser()
165 | parser.add_argument("--path", type=str, default="../trained_models/ppo/Cassie-v0/IK_traj-aslip_aslip_old_2048_12288_seed-10/", help="path to folder containing policy and run details")
166 | parser.add_argument("--traj_len", default=30, type=str)
167 | parser.add_argument("--debug", default=False, action='store_true')
168 | parser.add_argument("--no_viz", default=False, action='store_true')
169 | parser.add_argument("--eval", default=True, action="store_false", help="Whether to call policy.eval() or not")
170 |
171 | args = parser.parse_args()
172 |
173 | run_args = pickle.load(open(args.path + "experiment.pkl", "rb"))
174 |
175 | policy = torch.load(args.path + "actor.pt")
176 |
177 | if args.eval:
178 | policy.eval() # NOTE: for some reason the saved nodelta_neutral_stateest_symmetry policy needs this but it breaks all new policies...
179 |
180 | eval_policy(policy, args, run_args)
--------------------------------------------------------------------------------
/tools/vis_perturb.py:
--------------------------------------------------------------------------------
1 | import sys
2 | sys.path.append("..") # Adds higher directory to python modules path.
3 |
4 | import argparse
5 | import pickle
6 |
7 | import numpy as np
8 | import torch
9 | import time
10 | import copy
11 |
12 | from cassie import CassieEnv
13 |
14 | # Will reset the env to the given phase by reset_for_test, and then
15 | # simulating 2 cycle then to the given phase
16 | @torch.no_grad()
17 | def reset_to_phase(env, policy, phase):
18 | state = torch.Tensor(cassie_env.reset_for_test())
19 | for i in range(2*(env.phaselen + 1)):
20 | action = policy.act(state, True)
21 | action = action.data.numpy()
22 | state, reward, done, _ = cassie_env.step(action)
23 | state = torch.Tensor(state)
24 | for i in range(phase):
25 | action = policy.act(state, True)
26 | action = action.data.numpy()
27 | state, reward, done, _ = cassie_env.step(action)
28 | state = torch.Tensor(state)
29 |
30 | parser = argparse.ArgumentParser()
31 | parser.add_argument("--path", type=str, default=None, help="path to folder containing policy and run details")
32 | args = parser.parse_args()
33 | run_args = pickle.load(open(args.path + "experiment.pkl", "rb"))
34 |
35 | # RUN_NAME = "7b7e24-seed0"
36 | # POLICY_PATH = "../trained_models/ppo/Cassie-v0/" + RUN_NAME + "/actor.pt"
37 |
38 | # Load environment and policy
39 | # env_fn = partial(CassieEnv_speed_no_delta_neutral_foot, "walking", clock_based=True, state_est=True)
40 | cassie_env = CassieEnv(traj=run_args.traj, clock_based=run_args.clock_based, state_est=run_args.state_est, dynamics_randomization=run_args.dyn_random)
41 | policy = torch.load(args.path + "actor.pt")
42 |
43 | state = torch.Tensor(cassie_env.reset_for_test())
44 | # cassie_env.sim.step_pd(self.u)
45 | cassie_env.speed = 0.5
46 | cassie_env.phase_add = 1
47 | num_steps = cassie_env.phaselen + 1
48 | # Simulate for "wait_time" first to stabilize
49 | for i in range(num_steps*2):
50 | action = policy(state, True)
51 | action = action.data.numpy()
52 | state, reward, done, _ = cassie_env.step(action)
53 | state = torch.Tensor(state)
54 | curr_time = cassie_env.sim.time()
55 | start_t = curr_time
56 | sim_t = time.time()
57 | while curr_time < start_t + 4:
58 | action = policy(state, True)
59 | action = action.data.numpy()
60 | state, reward, done, _ = cassie_env.step(action)
61 | state = torch.Tensor(state)
62 | curr_time = cassie_env.sim.time()
63 | print("sim time: ", time.time() - sim_t)
64 | exit()
65 | qpos_phase = np.zeros((35, num_steps))
66 | qvel_phase = np.zeros((32, num_steps))
67 | action_phase = np.zeros((10, num_steps))
68 | cassie_state_phase = [copy.deepcopy(cassie_env.cassie_state)]
69 | # print("phase: ", cassie_env.phase)
70 | qpos_phase[:, 0] = cassie_env.sim.qpos()
71 | qvel_phase[:, 0] = cassie_env.sim.qvel()
72 | for i in range(num_steps-1):
73 | action = policy.act(state, True)
74 | action = action.data.numpy()
75 | action_phase[:, i] = action
76 | state, reward, done, _ = cassie_env.step(action)
77 | state = torch.Tensor(state)
78 | # print("phase: ", cassie_env.phase)
79 | qpos_phase[:, i+1] = cassie_env.sim.qpos()
80 | qvel_phase[:, i+1] = cassie_env.sim.qvel()
81 | cassie_state_phase.append(copy.deepcopy(cassie_env.cassie_state))
82 |
83 | action = policy.act(state, True)
84 | action = action.data.numpy()
85 | action_phase[:, -1] = action
86 | state = torch.Tensor(cassie_env.reset_for_test())
87 |
88 | cassie_env.speed = 0.5
89 | cassie_env.phase_add = 1
90 | wait_time = 4
91 | dt = 0.05
92 | speedup = 3
93 | perturb_time = 2
94 | perturb_duration = 0.2
95 | perturb_size = 170
96 | perturb_dir = -2*np.pi*np.linspace(0, 1, 5) # Angles from straight forward to apply force
97 | perturb_body = "cassie-pelvis"
98 | dir_idx = 0
99 |
100 | ###### Vis a single Perturbation for a given phase ######
101 | test_phase = 0
102 | reset_to_phase(cassie_env, policy, test_phase)
103 | # cassie_env.sim.set_qpos(qpos_phase[:, test_phase])
104 | # cassie_env.sim.set_qvel(qvel_phase[:, test_phase])
105 | # cassie_env.cassie_state = cassie_state_phase[test_phase]
106 | # cassie_env.sim.set_cassie_state(cassie_state_phase[test_phase])
107 | # cassie_env.phase = test_phase
108 | # state, reward, done, _ = cassie_env.step(action_phase[:, test_phase-1])
109 | # state = torch.Tensor(state)
110 | render_state = cassie_env.render()
111 | force_x = perturb_size * np.cos(0)
112 | force_y = perturb_size * np.sin(0)
113 | print("Perturb angle: {}\t Perturb size: {} N".format(np.degrees(-perturb_dir[dir_idx]), perturb_size))
114 | # Apply perturb (if time)
115 | start_t = cassie_env.sim.time()
116 | while render_state:
117 | if (not cassie_env.vis.ispaused()):
118 | curr_time = cassie_env.sim.time()
119 | if curr_time < start_t+perturb_duration:
120 | cassie_env.vis.apply_force([force_x, force_y, 0, 0, 0, 0], perturb_body)
121 | # Done perturbing, reset perturb_time and xfrc_applied
122 | elif start_t+perturb_duration < curr_time < start_t+perturb_duration + wait_time:
123 | # print("curr time: ", curr_time)
124 | cassie_env.vis.apply_force([0, 0, 0, 0, 0, 0], perturb_body)
125 | else:
126 | # pass
127 | print("passed")
128 | break
129 |
130 | # Get action
131 | action = policy.act(state, True)
132 | action = action.data.numpy()
133 | state, reward, done, _ = cassie_env.step(action)
134 | if cassie_env.sim.qpos()[2] < 0.4:
135 | print("failed")
136 | break
137 | else:
138 | state = torch.Tensor(state)
139 | render_state = cassie_env.render()
140 | time.sleep(dt / speedup)
141 | exit()
142 |
143 | ###### Vis all perturbations ######
144 | render_state = cassie_env.render()
145 | force_x = perturb_size * np.cos(0)
146 | force_y = perturb_size * np.sin(0)
147 | print("Perturb angle: {}\t Perturb size: {} N".format(np.degrees(-perturb_dir[dir_idx]), perturb_size))
148 | while render_state:
149 | if (not cassie_env.vis.ispaused()):
150 | curr_time = cassie_env.sim.time()
151 | # Apply perturb (if time)
152 | if curr_time > perturb_time + wait_time:
153 | # Haven't perturbed for full time yet
154 | if curr_time < perturb_time + wait_time + perturb_duration:
155 | print("phase: ", cassie_env.phase)
156 | cassie_env.vis.apply_force([force_x, force_y, 0, 0, 0, 0], perturb_body)
157 | # Done perturbing, reset perturb_time and xfrc_applied
158 | else:
159 | cassie_env.vis.apply_force([0, 0, 0, 0, 0, 0], perturb_body)
160 | dir_idx += 1
161 | # Skip last direction, 0 is the same as 2*pi
162 | if dir_idx >= len(perturb_dir) - 1:
163 | dir_idx = 0
164 | perturb_size += 50
165 | force_x = perturb_size * np.cos(perturb_dir[dir_idx])
166 | force_y = perturb_size * np.sin(perturb_dir[dir_idx])
167 | print("Perturb angle: {}\t Perturb size: {} N".format(np.degrees(-perturb_dir[dir_idx]), perturb_size))
168 | perturb_time = curr_time
169 |
170 | # Get action
171 | action = policy.act(state, True)
172 | action = action.data.numpy()
173 | state, reward, done, _ = cassie_env.step(action)
174 | if cassie_env.sim.qpos()[2] < 0.4:
175 | state = torch.Tensor(cassie_env.reset_for_test())
176 | cassie_env.speed = 0.5
177 | cassie_env.phase_add = 1
178 | perturb_time = 0
179 | else:
180 | state = torch.Tensor(state)
181 | render_state = cassie_env.render()
182 | time.sleep(dt / speedup)
--------------------------------------------------------------------------------
/trained_models/5k_retrain/actor.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/trained_models/5k_retrain/actor.pt
--------------------------------------------------------------------------------
/trained_models/5k_retrain/critic.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/trained_models/5k_retrain/critic.pt
--------------------------------------------------------------------------------
/trained_models/5k_retrain/eval_commands.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/trained_models/5k_retrain/eval_commands.npy
--------------------------------------------------------------------------------
/trained_models/5k_retrain/eval_perturbs.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/trained_models/5k_retrain/eval_perturbs.npy
--------------------------------------------------------------------------------
/trained_models/5k_retrain/experiment.info:
--------------------------------------------------------------------------------
1 | algo_name: ppo
2 | clip: 0.2
3 | clock_based: True
4 | dyn_random: False
5 | entropy_coeff: 0.0
6 | env_name: Cassie-v0
7 | epochs: 5
8 | eps: 1e-05
9 | gamma: 0.99
10 | history: 0
11 | input_norm_steps: 100
12 | lam: 0.95
13 | lr: 0.0001
14 | max_grad_norm: 0.05
15 | max_traj_len: 300
16 | minibatch_size: 2048
17 | mirror: True
18 | n_itr: 20000
19 | name: model
20 | no_delta: True
21 | num_procs: 64
22 | num_steps: 187
23 | previous: None
24 | recurrent: False
25 | redis_address: None
26 | reward: 5k_speed_reward
27 | simrate: 60
28 | state_est: True
29 | traj: walking
30 | use_gae: False
31 | viz_port: 8097
32 |
--------------------------------------------------------------------------------
/trained_models/5k_retrain/experiment.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/trained_models/5k_retrain/experiment.pkl
--------------------------------------------------------------------------------
/trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/5k_test.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/5k_test.pkl
--------------------------------------------------------------------------------
/trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/actor.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/actor.pt
--------------------------------------------------------------------------------
/trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/critic.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/critic.pt
--------------------------------------------------------------------------------
/trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/eval_commands.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/eval_commands.npy
--------------------------------------------------------------------------------
/trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/eval_perturbs.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/eval_perturbs.npy
--------------------------------------------------------------------------------
/trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/experiment.info:
--------------------------------------------------------------------------------
1 | command_profile: clock
2 | dyn_random: False
3 | env_name: Cassie-v0
4 | history: 0
5 | ik_baseline: None
6 | input_profile: full
7 | learn_gains: False
8 | mirror: True
9 | no_delta: True
10 | recurrent: False
11 | reward: 5k_speed_reward
12 | simrate: 60
13 | traj: None
14 |
--------------------------------------------------------------------------------
/trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/experiment.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/experiment.pkl
--------------------------------------------------------------------------------
/util/env.py:
--------------------------------------------------------------------------------
1 | import os
2 | import time
3 | import torch
4 | import numpy as np
5 |
6 | from cassie import CassieEnv, CassieTrajEnv, CassiePlayground, CassieStandingEnv
7 |
8 | def env_factory(path, command_profile="clock", input_profile="full", simrate=50, dynamics_randomization=True, mirror=False, learn_gains=False, reward=None, history=0, no_delta=True, traj=None, ik_baseline=False, **kwargs):
9 | from functools import partial
10 |
11 | """
12 | Returns an *uninstantiated* environment constructor.
13 |
14 | Since environments containing cpointers (e.g. Mujoco envs) can't be serialized,
15 | this allows us to pass their constructors to Ray remote functions instead
16 | (since the gym registry isn't shared across ray subprocesses we can't simply
17 | pass gym.make() either)
18 |
19 | Note: env.unwrapped.spec is never set, if that matters for some reason.
20 | """
21 |
22 | # Custom Cassie Environment
23 | if path in ['Cassie-v0', 'CassieTraj-v0', 'CassiePlayground-v0', 'CassieStandingEnv-v0']:
24 |
25 | if path == 'Cassie-v0':
26 | env_fn = partial(CassieEnv, command_profile=command_profile, input_profile=input_profile, simrate=simrate, dynamics_randomization=dynamics_randomization, learn_gains=learn_gains, reward=reward, history=history)
27 | elif path == 'CassieTraj-v0':
28 | env_fn = partial(CassieTrajEnv, traj=traj, command_profile=command_profile, input_profile=input_profile, simrate=simrate, dynamics_randomization=dynamics_randomization, no_delta=no_delta, learn_gains=learn_gains, ik_baseline=ik_baseline, reward=reward, history=history)
29 | elif path == 'CassiePlayground-v0':
30 | env_fn = partial(CassiePlayground, command_profile=command_profile, input_profile=input_profile, simrate=simrate, dynamics_randomization=dynamics_randomization, learn_gains=learn_gains, reward=reward, history=history)
31 | elif path == 'CassieStandingEnv-v0':
32 | env_fn = partial(CassieStandingEnv, command_profile=command_profile, input_profile=input_profile, simrate=simrate, dynamics_randomization=dynamics_randomization, learn_gains=learn_gains, reward=reward, history=history)
33 |
34 | if mirror:
35 | from rl.envs.wrappers import SymmetricEnv
36 | env_fn = partial(SymmetricEnv, env_fn, mirrored_obs=env_fn().mirrored_obs, mirrored_act=env_fn().mirrored_acts)
37 |
38 | print()
39 | print("Environment: {}".format(path))
40 | print(" ├ reward: {}".format(reward))
41 | print(" ├ input prof: {}".format(input_profile))
42 | print(" ├ cmd prof: {}".format(command_profile))
43 | print(" ├ learn gains: {}".format(learn_gains))
44 | print(" ├ dyn_random: {}".format(dynamics_randomization))
45 | print(" ├ mirror: {}".format(mirror))
46 | if path == "CassieTraj-v0":
47 | print(" ├ traj: {}".format(traj))
48 | print(" ├ ik baseline: {}".format(ik_baseline))
49 | print(" ├ no_delta: {}".format(no_delta))
50 | print(" └ obs_dim: {}".format(env_fn().observation_space.shape[0]))
51 |
52 | return env_fn
53 |
54 | # OpenAI Gym environment
55 | else:
56 | import gym
57 | spec = gym.envs.registry.spec(path)
58 | _kwargs = spec._kwargs.copy()
59 | _kwargs.update(kwargs)
60 |
61 | try:
62 | if callable(spec._entry_point):
63 | cls = spec._entry_point(**_kwargs)
64 | else:
65 | cls = gym.envs.registration.load(spec._entry_point)
66 | except AttributeError:
67 | if callable(spec.entry_point):
68 | cls = spec.entry_point(**_kwargs)
69 | else:
70 | cls = gym.envs.registration.load(spec.entry_point)
71 |
72 | return partial(cls, **_kwargs)
73 |
--------------------------------------------------------------------------------
/util/log.py:
--------------------------------------------------------------------------------
1 | from collections import OrderedDict
2 | import hashlib, os, pickle
3 |
4 | class color:
5 | BOLD = '\033[1m\033[48m'
6 | END = '\033[0m'
7 | ORANGE = '\033[38;5;202m'
8 | BLACK = '\033[38;5;240m'
9 |
10 | # Logger stores in trained_models by default
11 | def create_logger(args):
12 | from torch.utils.tensorboard import SummaryWriter
13 | """Use hyperparms to set a directory to output diagnostic files."""
14 |
15 | arg_dict = args.__dict__
16 | assert "seed" in arg_dict, \
17 | "You must provide a 'seed' key in your command line arguments"
18 | assert "logdir" in arg_dict, \
19 | "You must provide a 'logdir' key in your command line arguments."
20 | assert "env_name" in arg_dict, \
21 | "You must provide a 'env_name' key in your command line arguments."
22 |
23 | # sort the keys so the same hyperparameters will always have the same hash
24 | arg_dict = OrderedDict(sorted(arg_dict.items(), key=lambda t: t[0]))
25 |
26 | # remove seed so it doesn't get hashed, store value for filename
27 | # same for logging directory
28 | run_name = arg_dict.pop('run_name')
29 | seed = str(arg_dict.pop("seed"))
30 | logdir = str(arg_dict.pop('logdir'))
31 | env_name = str(arg_dict['env_name'])
32 |
33 | # see if this run has a unique name, if so then that is going to be the name of the folder, even if it overrirdes
34 | if run_name is not None:
35 | logdir = os.path.join(logdir, env_name)
36 | output_dir = os.path.join(logdir, run_name)
37 | else:
38 | # see if we are resuming a previous run, if we are mark as continued
39 | if args.previous is not None:
40 | if args.exchange_reward is not None:
41 | output_dir = args.previous[0:-1] + "_NEW-" + args.reward
42 | else:
43 | print(args.previous[0:-1])
44 | output_dir = args.previous[0:-1] + '-cont'
45 | else:
46 | # get a unique hash for the hyperparameter settings, truncated at 10 chars
47 | arg_hash = hashlib.md5(str(arg_dict).encode('ascii')).hexdigest()[0:6] + '-seed' + seed
48 | logdir = os.path.join(logdir, env_name)
49 | output_dir = os.path.join(logdir, arg_hash)
50 |
51 | # create a directory with the hyperparm hash as its name, if it doesn't
52 | # already exist.
53 | os.makedirs(output_dir, exist_ok=True)
54 |
55 | # Create a file with all the hyperparam settings in human-readable plaintext,
56 | # also pickle file for resuming training easily
57 | info_path = os.path.join(output_dir, "experiment.info")
58 | pkl_path = os.path.join(output_dir, "experiment.pkl")
59 | with open(pkl_path, 'wb') as file:
60 | pickle.dump(args, file)
61 | with open(info_path, 'w') as file:
62 | for key, val in arg_dict.items():
63 | file.write("%s: %s" % (key, val))
64 | file.write('\n')
65 |
66 | logger = SummaryWriter(output_dir, flush_secs=0.1) # flush_secs=0.1 actually slows down quite a bit, even on parallelized set ups
67 | print("Logging to " + color.BOLD + color.ORANGE + str(output_dir) + color.END)
68 |
69 | logger.dir = output_dir
70 | return logger
71 |
72 | # Rule for curriculum learning is that env observation space should be the same (so attributes like env.clock_based or env.state_est shouldn't be different and are forced to be same here)
73 | # deal with loading hyperparameters of previous run continuation
74 | def parse_previous(args):
75 | if args.previous is not None:
76 | run_args = pickle.load(open(args.previous + "experiment.pkl", "rb"))
77 | args.recurrent = run_args.recurrent
78 | args.env_name = run_args.env_name
79 | args.command_profile = run_args.command_profile
80 | args.input_profile = run_args.input_profile
81 | args.learn_gains = run_args.learn_gains
82 | args.traj = run_args.traj
83 | args.no_delta = run_args.no_delta
84 | args.ik_baseline = run_args.ik_baseline
85 | if args.exchange_reward is not None:
86 | args.reward = args.exchange_reward
87 | args.run_name = run_args.run_name + "_NEW-" + args.reward
88 | else:
89 | args.reward = run_args.reward
90 | args.run_name = run_args.run_name + "--cont"
91 | return args
92 |
--------------------------------------------------------------------------------
/util/logo.py:
--------------------------------------------------------------------------------
1 | class color:
2 | BOLD = '\033[1m\033[48m'
3 | END = '\033[0m'
4 | ORANGE = '\033[38;5;202m'
5 | BLACK = '\033[38;5;240m'
6 |
7 |
8 | def print_logo(subtitle="", option=2):
9 | print()
10 | print(color.BOLD + color.ORANGE + " .8. " + color.BLACK + " 8 888888888o " + color.ORANGE + "8 8888888888 `8.`8888. ,8' ")
11 | print(color.BOLD + color.ORANGE + " .888. " + color.BLACK + " 8 8888 `88. " + color.ORANGE + "8 8888 `8.`8888. ,8' ")
12 | print(color.BOLD + color.ORANGE + " :88888. " + color.BLACK + " 8 8888 `88 " + color.ORANGE + "8 8888 `8.`8888. ,8' ")
13 | print(color.BOLD + color.ORANGE + " . `88888. " + color.BLACK + " 8 8888 ,88 " + color.ORANGE + "8 8888 `8.`8888.,8' ")
14 | print(color.BOLD + color.ORANGE + " .8. `88888. " + color.BLACK + " 8 8888. ,88' " + color.ORANGE + "8 888888888888 `8.`88888' ")
15 | print(color.BOLD + color.ORANGE + " .8`8. `88888. " + color.BLACK + " 8 888888888P' " + color.ORANGE + "8 8888 .88.`8888. ")
16 | print(color.BOLD + color.ORANGE + " .8' `8. `88888. " + color.BLACK + " 8 8888 " + color.ORANGE + "8 8888 .8'`8.`8888. ")
17 | print(color.BOLD + color.ORANGE + " .8' `8. `88888. " + color.BLACK + " 8 8888 " + color.ORANGE + "8 8888 .8' `8.`8888. ")
18 | print(color.BOLD + color.ORANGE + " .888888888. `88888. " + color.BLACK + " 8 8888 " + color.ORANGE + "8 8888 .8' `8.`8888. ")
19 | print(color.BOLD + color.ORANGE + ".8' `8. `88888." + color.BLACK + " 8 8888 " + color.ORANGE + "8 888888888888 .8' `8.`8888. " + color.END)
20 | print("\n")
21 | print(subtitle)
22 | print("\n")
23 |
--------------------------------------------------------------------------------