├── .gitignore
├── 5k_test.py
├── LICENSE
├── README.md
├── apex-logo.png
├── apex.py
├── cassie
    ├── __init__.py
    ├── cassie.py
    ├── cassie_footdist_env.py
    ├── cassie_mininput_env.py
    ├── cassie_noaccel_footdist_env.py
    ├── cassie_noaccel_footdist_nojoint_env.py
    ├── cassie_noaccel_footdist_omniscient.py
    ├── cassie_novel_footdist_env.py
    ├── cassie_playground.py
    ├── cassie_standing_env.py
    ├── cassie_traj.py
    ├── cassiemujoco
    │   ├── WhyteField.png
    │   ├── __init__.py
    │   ├── cassie-stl-meshes
    │   │   ├── achilles-rod.stl
    │   │   ├── bleachers.stl
    │   │   ├── foot-crank.stl
    │   │   ├── foot.stl
    │   │   ├── heel-spring.stl
    │   │   ├── hip-pitch.stl
    │   │   ├── hip-roll.stl
    │   │   ├── hip-yaw.stl
    │   │   ├── knee-spring.stl
    │   │   ├── knee.stl
    │   │   ├── pelvis.stl
    │   │   ├── plantar-rod.stl
    │   │   ├── shin.stl
    │   │   ├── tarsus.stl
    │   │   └── terrains
    │   │   │   ├── bowl.png
    │   │   │   ├── radial_gradient.png
    │   │   │   ├── side_hill.png
    │   │   │   ├── side_slope.png
    │   │   │   ├── slope.png
    │   │   │   ├── step_pyramid.png
    │   │   │   └── terrain_1.png
    │   ├── cassie.xml
    │   ├── cassie.xml.orig
    │   ├── cassieUDP.py
    │   ├── cassie_crown.xml
    │   ├── cassie_drop_step.xml
    │   ├── cassie_hfield.xml
    │   ├── cassie_muTor.xml
    │   ├── cassie_noise_terrain.xml
    │   ├── cassie_playground.xml
    │   ├── cassie_soft.xml
    │   ├── cassie_stiff.xml
    │   ├── cassie_track.xml
    │   ├── cassie_waypoints.xml
    │   ├── cassiemujoco.py
    │   ├── cassiemujoco_ctypes.py
    │   ├── include
    │   │   ├── CassieCoreSim.h
    │   │   ├── PdInput.h
    │   │   ├── StateOutput.h
    │   │   ├── cassie_in_t.h
    │   │   ├── cassie_out_t.h
    │   │   ├── cassie_user_in_t.h
    │   │   ├── cassiemujoco.h
    │   │   ├── pd_in_t.h
    │   │   ├── state_out_t.h
    │   │   └── udp.h
    │   ├── libcassiemujoco.so
    │   ├── terrain_noise.xml
    │   ├── terrain_racetrack.xml
    │   ├── terrain_random_hills.xml
    │   ├── terrains
    │   │   ├── crown.png
    │   │   ├── drop_step.png
    │   │   ├── hfield.png
    │   │   ├── hfield2.png
    │   │   ├── hills.png
    │   │   ├── noise.png
    │   │   ├── noise1.npy
    │   │   ├── noise2.npy
    │   │   ├── noise3.npy
    │   │   ├── noisy.png
    │   │   ├── racetrack1.png
    │   │   ├── rand_hill1.npy
    │   │   ├── rand_hill2.npy
    │   │   ├── rand_hill3.npy
    │   │   ├── slope.png
    │   │   ├── utils
    │   │   │   └── noise_generator.py
    │   │   └── wavefield.png
    │   ├── test_terrain_noise.xml
    │   └── test_terrain_slope.xml
    ├── deprecated
    │   ├── aslipik_env.py
    │   ├── aslipik_unified_env.py
    │   ├── aslipik_unified_no_delta_env.py
    │   ├── cassie_env.py
    │   ├── env_test.py
    │   ├── ground_friction_env.py
    │   ├── ik_env.py
    │   ├── no_delta_env.py
    │   ├── plotting.py
    │   ├── slipik_env.py
    │   ├── speed_double_freq_env.py
    │   ├── speed_env.py
    │   ├── speed_freq_env.py
    │   ├── speed_freq_no_delta_env.py
    │   ├── speed_no_delta_env.py
    │   ├── speed_no_delta_neutral_foot_env.py
    │   └── taskspace_env.py
    ├── missions
    │   ├── 90_left
    │   │   ├── command_trajectory_0.5.pkl
    │   │   ├── command_trajectory_0.9.pkl
    │   │   ├── command_trajectory_1.4.pkl
    │   │   ├── command_trajectory_1.9.pkl
    │   │   ├── command_trajectory_2.3.pkl
    │   │   ├── command_trajectory_2.8.pkl
    │   │   ├── waypoints_0.5.csv
    │   │   ├── waypoints_0.9.csv
    │   │   ├── waypoints_1.4.csv
    │   │   ├── waypoints_1.9.csv
    │   │   ├── waypoints_2.3.csv
    │   │   └── waypoints_2.8.csv
    │   ├── 90_right
    │   │   ├── command_trajectory_0.5.pkl
    │   │   ├── command_trajectory_0.9.pkl
    │   │   ├── command_trajectory_1.4.pkl
    │   │   ├── command_trajectory_1.9.pkl
    │   │   ├── command_trajectory_2.3.pkl
    │   │   ├── command_trajectory_2.8.pkl
    │   │   ├── waypoints_0.5.csv
    │   │   ├── waypoints_0.9.csv
    │   │   ├── waypoints_1.4.csv
    │   │   ├── waypoints_1.9.csv
    │   │   ├── waypoints_2.3.csv
    │   │   └── waypoints_2.8.csv
    │   ├── __init__.py
    │   ├── add_waypoints.py
    │   ├── command_mission.py
    │   ├── curvy
    │   │   ├── command_trajectory_0.5.pkl
    │   │   ├── command_trajectory_0.9.pkl
    │   │   ├── command_trajectory_1.4.pkl
    │   │   ├── command_trajectory_1.9.pkl
    │   │   ├── command_trajectory_2.3.pkl
    │   │   ├── command_trajectory_2.8.pkl
    │   │   ├── waypoints_0.5.csv
    │   │   ├── waypoints_0.9.csv
    │   │   ├── waypoints_1.4.csv
    │   │   ├── waypoints_1.9.csv
    │   │   ├── waypoints_2.3.csv
    │   │   └── waypoints_2.8.csv
    │   ├── default
    │   │   ├── command_trajectory.pkl
    │   │   └── waypoints.csv
    │   └── straight
    │   │   ├── command_trajectory_0.5.pkl
    │   │   ├── command_trajectory_0.9.pkl
    │   │   ├── command_trajectory_1.4.pkl
    │   │   ├── command_trajectory_1.9.pkl
    │   │   ├── command_trajectory_2.3.pkl
    │   │   ├── command_trajectory_2.8.pkl
    │   │   ├── waypoints_0.5.csv
    │   │   ├── waypoints_0.9.csv
    │   │   ├── waypoints_1.4.csv
    │   │   ├── waypoints_1.9.csv
    │   │   ├── waypoints_2.3.csv
    │   │   ├── waypoints_2.4.csv
    │   │   └── waypoints_2.8.csv
    ├── outfile.npz
    ├── phase_function.py
    ├── plotting_ex.py
    ├── quaternion_function.py
    ├── rewards
    │   ├── __init__.py
    │   ├── aslip_rewards.py
    │   ├── clock_rewards.py
    │   ├── command_reward.py
    │   ├── iros_paper_reward.py
    │   ├── reward_clock_funcs
    │   │   ├── incentive_clock_smooth.pkl
    │   │   ├── incentive_clock_smooth_aerial.pkl
    │   │   ├── incentive_clock_smooth_zero.pkl
    │   │   ├── incentive_clock_strict0.1.pkl
    │   │   ├── incentive_clock_strict0.1_aerial.pkl
    │   │   ├── incentive_clock_strict0.1_zero.pkl
    │   │   ├── incentive_clock_strict0.4.pkl
    │   │   ├── incentive_clock_strict0.4_aerial.pkl
    │   │   ├── incentive_clock_strict0.4_zero.pkl
    │   │   ├── no_incentive_aslip_clock_strict0.3.pkl
    │   │   ├── no_incentive_clock_smooth.pkl
    │   │   ├── no_incentive_clock_smooth_aerial.pkl
    │   │   ├── no_incentive_clock_smooth_zero.pkl
    │   │   ├── no_incentive_clock_strict0.1.pkl
    │   │   ├── no_incentive_clock_strict0.1_aerial.pkl
    │   │   ├── no_incentive_clock_strict0.1_zero.pkl
    │   │   ├── no_incentive_clock_strict0.4.pkl
    │   │   ├── no_incentive_clock_strict0.4_aerial.pkl
    │   │   └── no_incentive_clock_strict0.4_zero.pkl
    │   ├── rnn_dyn_random_reward.py
    │   ├── side_speedmatch_foottraj_reward.py
    │   ├── side_speedmatch_heightvel_reward.py
    │   ├── side_speedmatch_heuristic_reward.py
    │   ├── side_speedmatch_rewards.py
    │   ├── side_speedmatch_torquesmooth_reward.py
    │   ├── speedmatch_footorient_joint_smooth_reward.py
    │   ├── speedmatch_heuristic_reward.py
    │   ├── speedmatch_rewards.py
    │   ├── standing_rewards.py
    │   └── trajmatch_reward.py
    └── trajectory
    │   ├── .DS_Store
    │   ├── __init__.py
    │   ├── aslipTrajsTaskSpace
    │       ├── walkCycle_0.0.pkl
    │       ├── walkCycle_0.1.pkl
    │       ├── walkCycle_0.2.pkl
    │       ├── walkCycle_0.3.pkl
    │       ├── walkCycle_0.4.pkl
    │       ├── walkCycle_0.5.pkl
    │       ├── walkCycle_0.6.pkl
    │       ├── walkCycle_0.7.pkl
    │       ├── walkCycle_0.8.pkl
    │       ├── walkCycle_0.9.pkl
    │       ├── walkCycle_1.0.pkl
    │       ├── walkCycle_1.1.pkl
    │       ├── walkCycle_1.2.pkl
    │       ├── walkCycle_1.3.pkl
    │       ├── walkCycle_1.4.pkl
    │       ├── walkCycle_1.5.pkl
    │       ├── walkCycle_1.6.pkl
    │       ├── walkCycle_1.7.pkl
    │       ├── walkCycle_1.8.pkl
    │       ├── walkCycle_1.9.pkl
    │       └── walkCycle_2.0.pkl
    │   ├── aslip_trajectory.py
    │   ├── backward_trajectory_Nov
    │   ├── ikNet_state_dict.pt
    │   ├── more-poses-trial.bin
    │   ├── spline_stepping_traj.pkl
    │   ├── stepdata.bin
    │   ├── stepping_trajectory_Nov
    │   ├── test.py
    │   ├── traj_from_ref_foot_data.pkl
    │   ├── trajectory.py
    │   └── walk-in-place-downsampled.bin
├── img
    ├── output.gif
    └── output2.gif
├── mirror_policy_check.py
├── plot_policy.py
├── rl
    ├── __init__.py
    ├── algos
    │   ├── __init__.py
    │   ├── ars.py
    │   ├── async_td3.py
    │   ├── dpg.py
    │   ├── ppo.py
    │   └── sync_td3.py
    ├── config
    │   └── monitor.ini
    ├── distributions
    │   ├── __init__.py
    │   ├── beta.py
    │   └── gaussian.py
    ├── envs
    │   ├── __init__.py
    │   ├── monitor.py
    │   ├── normalize.py
    │   ├── vectorize.py
    │   ├── wrapper.py
    │   └── wrappers.py
    ├── policies
    │   ├── __init__.py
    │   ├── actor.py
    │   ├── base.py
    │   └── critic.py
    └── utils
    │   ├── __init__.py
    │   ├── param_noise.py
    │   ├── remote_replay.py
    │   └── render.py
├── test_policy.py
├── tools
    ├── .DS_Store
    ├── aslip_tests
    │   ├── GRF_2KHz.pkl
    │   ├── GRF_compare.py
    │   ├── foot_placement.py
    │   ├── parallelized.py
    │   ├── plots
    │   │   └── footpos_err.png
    │   └── taskspace_tracking.py
    ├── cassie_top_white.png
    ├── command_trajectory.pkl
    ├── compare_pols.py
    ├── eval_mission.py
    ├── eval_perturb.py
    ├── eval_sensitivity.py
    ├── test_commands.py
    ├── test_perturb_eval_phase.npy
    ├── utils
    │   ├── __init__.py
    │   └── elements.py
    ├── vis_input_and_state.py
    ├── vis_perturb.py
    └── waypoint_trajectory.py
├── trained_models
    ├── 5k_retrain
    │   ├── actor.pt
    │   ├── critic.pt
    │   ├── eval_commands.npy
    │   ├── eval_perturbs.npy
    │   ├── experiment.info
    │   └── experiment.pkl
    └── nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2
    │   ├── 5k_test.pkl
    │   ├── actor.pt
    │   ├── critic.pt
    │   ├── eval_commands.npy
    │   ├── eval_perturbs.npy
    │   ├── experiment.info
    │   └── experiment.pkl
└── util
    ├── env.py
    ├── eval.py
    ├── log.py
    └── logo.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | cassieXie/
  2 | MUJOCO_LOG.TXT
  3 | cassie/cassiemujoco/mjkey.txt
  4 | cassie/cassiemujoco/mjpro150/
  5 | cassie/pickled
  6 | sim-to-real/
  7 | cassie/cassieIK_SL.pt
  8 | cassie/taskspace_to_jointpos.pt
  9 | .vscode/
 10 | 
 11 | # Byte-compiled / optimized / DLL files
 12 | __pycache__/
 13 | *.py[cod]
 14 | *$py.class
 15 | 
 16 | # C extensions
 17 | *.so
 18 | !libcassiemujoco.so
 19 | 
 20 | # Distribution / packaging
 21 | .Python
 22 | build/
 23 | develop-eggs/
 24 | dist/
 25 | downloads/
 26 | eggs/
 27 | .eggs/
 28 | lib/
 29 | lib64/
 30 | parts/
 31 | sdist/
 32 | var/
 33 | wheels/
 34 | *.egg-info/
 35 | .installed.cfg
 36 | *.egg
 37 | MANIFEST
 38 | 
 39 | # PyInstaller
 40 | #  Usually these files are written by a python script from a template
 41 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 42 | *.manifest
 43 | *.spec
 44 | 
 45 | # Installer logs
 46 | pip-log.txt
 47 | pip-delete-this-directory.txt
 48 | 
 49 | # Unit test / coverage reports
 50 | htmlcov/
 51 | .tox/
 52 | .coverage
 53 | .coverage.*
 54 | .cache
 55 | nosetests.xml
 56 | coverage.xml
 57 | *.cover
 58 | .hypothesis/
 59 | 
 60 | # Translations
 61 | *.mo
 62 | *.pot
 63 | 
 64 | # Django stuff:
 65 | *.log
 66 | .static_storage/
 67 | .media/
 68 | local_settings.py
 69 | 
 70 | # Flask stuff:
 71 | instance/
 72 | .webassets-cache
 73 | 
 74 | # Scrapy stuff:
 75 | .scrapy
 76 | 
 77 | # Sphinx documentation
 78 | docs/_build/
 79 | 
 80 | # PyBuilder
 81 | target/
 82 | 
 83 | # Jupyter Notebook
 84 | .ipynb_checkpoints
 85 | 
 86 | # pyenv
 87 | .python-version
 88 | 
 89 | # celery beat schedule file
 90 | celerybeat-schedule
 91 | 
 92 | # SageMath parsed files
 93 | *.sage.py
 94 | 
 95 | # Environments
 96 | .env
 97 | .venv
 98 | env/
 99 | venv/
100 | ENV/
101 | env.bak/
102 | venv.bak/
103 | 
104 | # Spyder project settings
105 | .spyderproject
106 | .spyproject
107 | 
108 | # Rope project settings
109 | .ropeproject
110 | 
111 | # mkdocs documentation
112 | /site
113 | 
114 | # mypy
115 | .mypy_cache/
116 | 
117 | # ray files
118 | /ray_tmp
119 | ray_timeline.json
120 | 
121 | # trained models
122 | /trained_models/*
123 | 
124 | # logging directory
125 | /logs
126 | ref_qposes.png
127 | 
128 | # testing directory files
129 | /tools/aslip_pipeline/testTS_logs/*
130 | /tools/aslip_pipeline/testVaryVel_logs/*
131 | /tools/apex_plots/*
132 | /tools/waypoints.csv
133 | 
134 | *.sh.e*
135 | *.sh.o*
136 | 
137 | 
138 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License
 2 | 
 3 | Copyright (c) 2017 Pedro Autran e Morais
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <img src="https://github.com/osudrl/apex/blob/master/apex-logo.png" alt="apex" width="200"/>
 2 | 
 3 | ----
 4 | 
 5 | Apex is a small, modular library that contains some implementations of continuous reinforcement learning algorithms. Fully compatible with OpenAI gym.
 6 | 
 7 | <img src="img/output.gif" alt="running1"/>
 8 | <img src="img/output2.gif" alt="running2"/>
 9 | 
10 | ## Running experiments
11 | 
12 | ### Basics
13 | Any algorithm can be run from the apex.py entry point.
14 | 
15 | To run PPO on a cassie environment,
16 | 
17 | ```bash
18 | python apex.py ppo --env_name Cassie-v0 --num_procs 12 --run_name experiment01
19 | ```
20 | 
21 | To run TD3 on the gym environment Walker-v2,
22 | 
23 | ```bash
24 | python apex.py td3_async --env_name Walker-v2 --num_procs 12 --run_name experiment02
25 | ```
26 | 
27 | ## Logging details / Monitoring live training progress
28 | Tensorboard logging is enabled by default for all algorithms. The logger expects that you supply an argument named ```logdir```, containing the root directory you want to store your logfiles in, and an argument named ```seed```, which is used to seed the pseudorandom number generators.
29 | 
30 | A basic command line script illustrating this is:
31 | 
32 | ```bash
33 | python apex.py ars --logdir logs/ars --seed 1337
34 | ```
35 | 
36 | The resulting directory tree would look something like this:
37 | ```
38 | trained_models/                         # directory with all of the saved models and tensorboard logs
39 | └── ars                                 # algorithm name
40 |     └── Cassie-v0                       # environment name
41 |         └── 8b8b12-seed1                # unique run name created with hash of hyperparameters
42 |             ├── actor.pt                # actor network for algo
43 |             ├── critic.pt               # critic network for algo
44 |             ├── events.out.tfevents     # tensorboard binary file
45 |             ├── experiment.info         # readable hyperparameters for this run
46 |             └── experiment.pkl          # loadable pickle of hyperparameters
47 | ```
48 | 
49 | Using tensorboard makes it easy to compare experiments and resume training later on.
50 | 
51 | To see live training progress
52 | 
53 | Run ```$ tensorboard --logdir logs/``` then navigate to ```http://localhost:6006/``` in your browser
54 | 
55 | ## Cassie Environments:
56 | * `Cassie-v0` : basic unified environment for walking/running policies
57 | * `CassieTraj-v0` : unified environment with reference trajectories
58 | * `CassiePlayground-v0` : environment for executing autonomous missions
59 | * `CassieStanding-v0` : environment for training standing policies
60 | 
61 | ## Algorithms:
62 | #### Currently implemented:
63 | * Parallelism with [Ray](https://github.com/ray-project/ray)
64 | * [GAE](https://arxiv.org/abs/1506.02438)/TD(lambda) estimators
65 | * [PPO](https://arxiv.org/abs/1707.06347), VPG with ratio objective and with log likelihood objective
66 | * [TD3](https://arxiv.org/abs/1802.09477) with [Parameter Noise Exploration](https://arxiv.org/abs/1706.01905)
67 | * [DDPG](https://arxiv.org/abs/1509.02971)
68 | * [RDPG](https://arxiv.org/abs/1512.04455)
69 | * [ARS](https://arxiv.org/abs/1803.07055)
70 | * Entropy based exploration bonus
71 | * advantage centering (observation normalization WIP)
72 | 
73 | #### To be implemented long term:
74 | * [SAC](https://arxiv.org/abs/1801.01290)
75 | * [GPO](https://arxiv.org/abs/1711.01012)
76 | * [NAF](https://arxiv.org/abs/1603.00748)
77 | * [SVG](https://arxiv.org/abs/1510.09142)
78 | * [I2A](https://arxiv.org/abs/1707.06203)
79 | * [PGPE](http://ieeexplore.ieee.org/document/5708821/?reload=true)
80 | * [Value Distribution](https://arxiv.org/pdf/1707.06887.pdf)
81 | * Oracle methods (e.g. [GPS](https://arxiv.org/abs/1610.00529))
82 | * CUDA support (should be trivial but I don't have a GPU to test on currently)
83 | 
84 | #### Maybe implemented in future:
85 | 
86 | * [DXNN](https://arxiv.org/abs/1008.2412)
87 | * [ACER](https://arxiv.org/abs/1611.01224) and other off-policy methods
88 | * Model-based methods
89 | 
90 | ## Acknowledgements
91 | 
92 | Thanks to @ikostrikov's whose great implementations were used for debugging. Also thanks to @rll for rllab, which inspired a lot of the high level interface and logging for this library, and to @OpenAI for the original PPO tensorflow implementation. Thanks to @sfujim for the clean implementations of TD3 and DDPG in PyTorch. Thanks @modestyachts for the easy to understand ARS implementation.
93 | 


--------------------------------------------------------------------------------
/apex-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/apex-logo.png


--------------------------------------------------------------------------------
/cassie/__init__.py:
--------------------------------------------------------------------------------
 1 | # Unified
 2 | from .cassie import CassieEnv
 3 | from .cassie_traj import CassieTrajEnv
 4 | from .cassie_playground import CassiePlayground
 5 | from .cassie_standing_env import CassieStandingEnv  # sorta old/unused
 6 | 
 7 | # Proprietary
 8 | from .cassie_noaccel_footdist_omniscient import CassieEnv_noaccel_footdist_omniscient
 9 | from .cassie_footdist_env import CassieEnv_footdist
10 | from .cassie_noaccel_footdist_env import CassieEnv_noaccel_footdist
11 | from .cassie_noaccel_footdist_nojoint_env import CassieEnv_noaccel_footdist_nojoint
12 | from .cassie_novel_footdist_env import CassieEnv_novel_footdist
13 | from .cassie_mininput_env import CassieEnv_mininput
14 | 
15 | # CassieMujocoSim
16 | from .cassiemujoco import *
17 | 
18 | 
19 | ##############
20 | # DEPRECATED #
21 | ##############
22 | # from .cassie_env import CassieEnv
23 | # from .taskspace_env import CassieTSEnv
24 | # from .aslipik_env import CassieIKEnv
25 | # from .aslipik_unified_env import UnifiedCassieIKEnv
26 | # from .aslipik_unified_no_delta_env import UnifiedCassieIKEnvNoDelta
27 | # from .no_delta_env import CassieEnv_nodelta
28 | # from .dynamics_random import CassieEnv_rand_dyn
29 | # from .speed_double_freq_env import CassieEnv_speed_dfreq
30 | # from .ground_friction_env import CassieGroundFrictionEnv
31 | # from .cassie_standing_env import CassieStandingEnv
32 | 


--------------------------------------------------------------------------------
/cassie/cassiemujoco/WhyteField.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/WhyteField.png


--------------------------------------------------------------------------------
/cassie/cassiemujoco/__init__.py:
--------------------------------------------------------------------------------
1 | from .cassiemujoco import *


--------------------------------------------------------------------------------
/cassie/cassiemujoco/cassie-stl-meshes/achilles-rod.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/achilles-rod.stl


--------------------------------------------------------------------------------
/cassie/cassiemujoco/cassie-stl-meshes/bleachers.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/bleachers.stl


--------------------------------------------------------------------------------
/cassie/cassiemujoco/cassie-stl-meshes/foot-crank.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/foot-crank.stl


--------------------------------------------------------------------------------
/cassie/cassiemujoco/cassie-stl-meshes/foot.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/foot.stl


--------------------------------------------------------------------------------
/cassie/cassiemujoco/cassie-stl-meshes/heel-spring.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/heel-spring.stl


--------------------------------------------------------------------------------
/cassie/cassiemujoco/cassie-stl-meshes/hip-pitch.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/hip-pitch.stl


--------------------------------------------------------------------------------
/cassie/cassiemujoco/cassie-stl-meshes/hip-roll.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/hip-roll.stl


--------------------------------------------------------------------------------
/cassie/cassiemujoco/cassie-stl-meshes/hip-yaw.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/hip-yaw.stl


--------------------------------------------------------------------------------
/cassie/cassiemujoco/cassie-stl-meshes/knee-spring.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/knee-spring.stl


--------------------------------------------------------------------------------
/cassie/cassiemujoco/cassie-stl-meshes/knee.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/knee.stl


--------------------------------------------------------------------------------
/cassie/cassiemujoco/cassie-stl-meshes/pelvis.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/pelvis.stl


--------------------------------------------------------------------------------
/cassie/cassiemujoco/cassie-stl-meshes/plantar-rod.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/plantar-rod.stl


--------------------------------------------------------------------------------
/cassie/cassiemujoco/cassie-stl-meshes/shin.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/shin.stl


--------------------------------------------------------------------------------
/cassie/cassiemujoco/cassie-stl-meshes/tarsus.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/tarsus.stl


--------------------------------------------------------------------------------
/cassie/cassiemujoco/cassie-stl-meshes/terrains/bowl.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/terrains/bowl.png


--------------------------------------------------------------------------------
/cassie/cassiemujoco/cassie-stl-meshes/terrains/radial_gradient.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/terrains/radial_gradient.png


--------------------------------------------------------------------------------
/cassie/cassiemujoco/cassie-stl-meshes/terrains/side_hill.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/terrains/side_hill.png


--------------------------------------------------------------------------------
/cassie/cassiemujoco/cassie-stl-meshes/terrains/side_slope.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/terrains/side_slope.png


--------------------------------------------------------------------------------
/cassie/cassiemujoco/cassie-stl-meshes/terrains/slope.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/terrains/slope.png


--------------------------------------------------------------------------------
/cassie/cassiemujoco/cassie-stl-meshes/terrains/step_pyramid.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/terrains/step_pyramid.png


--------------------------------------------------------------------------------
/cassie/cassiemujoco/cassie-stl-meshes/terrains/terrain_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/cassie-stl-meshes/terrains/terrain_1.png


--------------------------------------------------------------------------------
/cassie/cassiemujoco/cassieUDP.py:
--------------------------------------------------------------------------------
 1 | from .cassiemujoco_ctypes import *
 2 | import os
 3 | import ctypes
 4 | import numpy as np
 5 | 
 6 | class CassieUdp:
 7 |     def __init__(self, remote_addr='127.0.0.1', remote_port='25000',
 8 |                  local_addr='0.0.0.0', local_port='25001'):
 9 |         self.sock = udp_init_client(str.encode(remote_addr),
10 |                                     str.encode(remote_port),
11 |                                     str.encode(local_addr),
12 |                                     str.encode(local_port))
13 |         self.packet_header_info = packet_header_info_t()
14 |         self.recvlen = 2 + 697
15 |         self.sendlen = 2 + 58
16 |         self.recvlen_pd = 2 + 493
17 |         self.sendlen_pd = 2 + 476
18 |         self.recvbuf = (ctypes.c_ubyte * max(self.recvlen, self.recvlen_pd))()
19 |         self.sendbuf = (ctypes.c_ubyte * max(self.sendlen, self.sendlen_pd))()
20 |         self.inbuf = ctypes.cast(ctypes.byref(self.recvbuf, 2),
21 |                                  ctypes.POINTER(ctypes.c_ubyte))
22 |         self.outbuf = ctypes.cast(ctypes.byref(self.sendbuf, 2),
23 |                                   ctypes.POINTER(ctypes.c_ubyte))
24 | 
25 |     def send(self, u):
26 |         pack_cassie_user_in_t(u, self.outbuf)
27 |         send_packet(self.sock, self.sendbuf, self.sendlen, None, 0)
28 | 
29 |     def send_pd(self, u):
30 |         pack_pd_in_t(u, self.outbuf)
31 |         send_packet(self.sock, self.sendbuf, self.sendlen_pd, None, 0)
32 | 
33 |     def recv_wait(self):
34 |         nbytes = -1
35 |         while nbytes != self.recvlen:
36 |             nbytes = get_newest_packet(self.sock, self.recvbuf, self.recvlen,
37 |                                        None, None)
38 |         process_packet_header(self.packet_header_info,
39 |                               self.recvbuf, self.sendbuf)
40 |         cassie_out = cassie_out_t()
41 |         unpack_cassie_out_t(self.inbuf, cassie_out)
42 |         return cassie_out
43 | 
44 |     def recv_wait_pd(self):
45 |         nbytes = -1
46 |         while nbytes != self.recvlen_pd:
47 |             nbytes = get_newest_packet(self.sock, self.recvbuf, self.recvlen_pd,
48 |                                        None, None)
49 |         process_packet_header(self.packet_header_info,
50 |                               self.recvbuf, self.sendbuf)
51 |         state_out = state_out_t()
52 |         unpack_state_out_t(self.inbuf, state_out)
53 |         return state_out
54 | 
55 |     def recv_newest(self):
56 |         nbytes = get_newest_packet(self.sock, self.recvbuf, self.recvlen,
57 |                                    None, None)
58 |         if nbytes != self.recvlen:
59 |             return None
60 |         process_packet_header(self.packet_header_info,
61 |                               self.recvbuf, self.sendbuf)
62 |         cassie_out = cassie_out_t()
63 |         unpack_cassie_out_t(self.inbuf, cassie_out)
64 |         return cassie_out
65 | 
66 |     def recv_newest_pd(self):
67 |         nbytes = get_newest_packet(self.sock, self.recvbuf, self.recvlen_pd,
68 |                                    None, None)
69 |         if nbytes != self.recvlen_pd:
70 |             return None
71 |         process_packet_header(self.packet_header_info,
72 |                               self.recvbuf, self.sendbuf)
73 |         state_out = state_out_t()
74 |         unpack_state_out_t(self.inbuf, state_out)
75 |         return state_out
76 | 
77 |     def delay(self):
78 |         return ord(self.packet_header_info.delay)
79 | 
80 |     def seq_num_in_diff(self):
81 |         return ord(self.packet_header_info.seq_num_in_diff)
82 | 
83 |     def __del__(self):
84 |         udp_close(self.sock)


--------------------------------------------------------------------------------
/cassie/cassiemujoco/include/CassieCoreSim.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2018 Agility Robotics
 3 |  *
 4 |  * Permission to use, copy, modify, and distribute this software for any
 5 |  * purpose with or without fee is hereby granted, provided that the above
 6 |  * copyright notice and this permission notice appear in all copies.
 7 |  *
 8 |  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 9 |  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 |  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 |  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 |  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 |  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 |  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 |  */
16 | 
17 | #ifndef CASSIECORESIM_H
18 | #define CASSIECORESIM_H
19 | 
20 | #include "cassie_user_in_t.h"
21 | #include "cassie_out_t.h"
22 | #include "cassie_in_t.h"
23 | 
24 | typedef struct CassieCoreSim CassieCoreSim;
25 | 
26 | #ifdef __cplusplus
27 | extern "C" {
28 | #endif
29 | 
30 | CassieCoreSim* CassieCoreSim_alloc(void);
31 | void CassieCoreSim_copy(CassieCoreSim *dst, const CassieCoreSim *src);
32 | void CassieCoreSim_free(CassieCoreSim *sys);
33 | void CassieCoreSim_setup(CassieCoreSim *sys);
34 | void CassieCoreSim_step(CassieCoreSim *sys, const cassie_user_in_t *in1,
35 |   const cassie_out_t *in2, cassie_in_t *out1);
36 | 
37 | #ifdef __cplusplus
38 | }
39 | #endif
40 | #endif // CASSIECORESIM_H
41 | 


--------------------------------------------------------------------------------
/cassie/cassiemujoco/include/PdInput.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2018 Agility Robotics
 3 |  *
 4 |  * Permission to use, copy, modify, and distribute this software for any
 5 |  * purpose with or without fee is hereby granted, provided that the above
 6 |  * copyright notice and this permission notice appear in all copies.
 7 |  *
 8 |  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 9 |  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 |  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 |  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 |  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 |  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 |  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 |  */
16 | 
17 | #ifndef PDINPUT_H
18 | #define PDINPUT_H
19 | 
20 | #include "pd_in_t.h"
21 | #include "cassie_out_t.h"
22 | #include "cassie_user_in_t.h"
23 | 
24 | typedef struct PdInput PdInput;
25 | 
26 | #ifdef __cplusplus
27 | extern "C" {
28 | #endif
29 | 
30 | PdInput* PdInput_alloc(void);
31 | void PdInput_copy(PdInput *dst, const PdInput *src);
32 | void PdInput_free(PdInput *sys);
33 | void PdInput_setup(PdInput *sys);
34 | void PdInput_step(PdInput *sys, const pd_in_t *in1, const cassie_out_t
35 |   *in2, cassie_user_in_t *out1);
36 | 
37 | #ifdef __cplusplus
38 | }
39 | #endif
40 | #endif // PDINPUT_H
41 | 


--------------------------------------------------------------------------------
/cassie/cassiemujoco/include/StateOutput.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2018 Agility Robotics
 3 |  *
 4 |  * Permission to use, copy, modify, and distribute this software for any
 5 |  * purpose with or without fee is hereby granted, provided that the above
 6 |  * copyright notice and this permission notice appear in all copies.
 7 |  *
 8 |  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 9 |  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 |  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 |  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 |  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 |  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 |  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 |  */
16 | 
17 | #ifndef STATEOUTPUT_H
18 | #define STATEOUTPUT_H
19 | 
20 | #include "cassie_out_t.h"
21 | #include "state_out_t.h"
22 | 
23 | typedef struct StateOutput StateOutput;
24 | 
25 | #ifdef __cplusplus
26 | extern "C" {
27 | #endif
28 | 
29 | StateOutput* StateOutput_alloc(void);
30 | void StateOutput_copy(StateOutput *dst, const StateOutput *src);
31 | void StateOutput_free(StateOutput *sys);
32 | void StateOutput_setup(StateOutput *sys);
33 | void StateOutput_step(StateOutput *sys, const cassie_out_t *in1,
34 |   state_out_t *out1);
35 | 
36 | #ifdef __cplusplus
37 | }
38 | #endif
39 | #endif // STATEOUTPUT_H
40 | 


--------------------------------------------------------------------------------
/cassie/cassiemujoco/include/cassie_in_t.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2018 Agility Robotics
 3 |  *
 4 |  * Permission to use, copy, modify, and distribute this software for any
 5 |  * purpose with or without fee is hereby granted, provided that the above
 6 |  * copyright notice and this permission notice appear in all copies.
 7 |  *
 8 |  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 9 |  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 |  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 |  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 |  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 |  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 |  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 |  */
16 | 
17 | #ifndef CASSIE_IN_T_H
18 | #define CASSIE_IN_T_H
19 | 
20 | #define CASSIE_IN_T_PACKED_LEN 91
21 | 
22 | #include <stdbool.h>
23 | 
24 | typedef struct {
25 |   unsigned short controlWord;
26 |   double torque;
27 | } elmo_in_t;
28 | 
29 | typedef struct {
30 |   elmo_in_t hipRollDrive;
31 |   elmo_in_t hipYawDrive;
32 |   elmo_in_t hipPitchDrive;
33 |   elmo_in_t kneeDrive;
34 |   elmo_in_t footDrive;
35 | } cassie_leg_in_t;
36 | 
37 | typedef struct {
38 |   short channel[14];
39 | } radio_in_t;
40 | 
41 | typedef struct {
42 |   radio_in_t radio;
43 |   bool sto;
44 |   bool piezoState;
45 |   unsigned char piezoTone;
46 | } cassie_pelvis_in_t;
47 | 
48 | typedef struct {
49 |   cassie_pelvis_in_t pelvis;
50 |   cassie_leg_in_t leftLeg;
51 |   cassie_leg_in_t rightLeg;
52 | } cassie_in_t;
53 | 
54 | 
55 | #ifdef __cplusplus
56 | extern "C" {
57 | #endif
58 | 
59 | void pack_cassie_in_t(const cassie_in_t *bus, unsigned char *bytes);
60 | void unpack_cassie_in_t(const unsigned char *bytes, cassie_in_t *bus);
61 | 
62 | #ifdef __cplusplus
63 | }
64 | #endif
65 | #endif // CASSIE_IN_T_H
66 | 


--------------------------------------------------------------------------------
/cassie/cassiemujoco/include/cassie_out_t.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2018 Agility Robotics
  3 |  *
  4 |  * Permission to use, copy, modify, and distribute this software for any
  5 |  * purpose with or without fee is hereby granted, provided that the above
  6 |  * copyright notice and this permission notice appear in all copies.
  7 |  *
  8 |  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  9 |  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 10 |  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 11 |  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 12 |  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 13 |  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 14 |  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 15 |  */
 16 | 
 17 | #ifndef CASSIE_OUT_T_H
 18 | #define CASSIE_OUT_T_H
 19 | 
 20 | #define CASSIE_OUT_T_PACKED_LEN 697
 21 | 
 22 | #include <stdbool.h>
 23 | 
 24 | typedef short DiagnosticCodes;
 25 | 
 26 | 
 27 | typedef struct {
 28 |   bool dataGood;
 29 |   double stateOfCharge;
 30 |   double voltage[12];
 31 |   double current;
 32 |   double temperature[4];
 33 | } battery_out_t;
 34 | 
 35 | typedef struct {
 36 |   double position;
 37 |   double velocity;
 38 | } cassie_joint_out_t;
 39 | 
 40 | typedef struct {
 41 |   unsigned short statusWord;
 42 |   double position;
 43 |   double velocity;
 44 |   double torque;
 45 |   double driveTemperature;
 46 |   double dcLinkVoltage;
 47 |   double torqueLimit;
 48 |   double gearRatio;
 49 | } elmo_out_t;
 50 | 
 51 | typedef struct {
 52 |   elmo_out_t hipRollDrive;
 53 |   elmo_out_t hipYawDrive;
 54 |   elmo_out_t hipPitchDrive;
 55 |   elmo_out_t kneeDrive;
 56 |   elmo_out_t footDrive;
 57 |   cassie_joint_out_t shinJoint;
 58 |   cassie_joint_out_t tarsusJoint;
 59 |   cassie_joint_out_t footJoint;
 60 |   unsigned char medullaCounter;
 61 |   unsigned short medullaCpuLoad;
 62 |   bool reedSwitchState;
 63 | } cassie_leg_out_t;
 64 | 
 65 | typedef struct {
 66 |   bool radioReceiverSignalGood;
 67 |   bool receiverMedullaSignalGood;
 68 |   double channel[16];
 69 | } radio_out_t;
 70 | 
 71 | typedef struct {
 72 |   int etherCatStatus[6];
 73 |   int etherCatNotifications[21];
 74 |   double taskExecutionTime;
 75 |   unsigned int overloadCounter;
 76 |   double cpuTemperature;
 77 | } target_pc_out_t;
 78 | 
 79 | typedef struct {
 80 |   bool dataGood;
 81 |   unsigned short vpeStatus;
 82 |   double pressure;
 83 |   double temperature;
 84 |   double magneticField[3];
 85 |   double angularVelocity[3];
 86 |   double linearAcceleration[3];
 87 |   double orientation[4];
 88 | } vectornav_out_t;
 89 | 
 90 | typedef struct {
 91 |   target_pc_out_t targetPc;
 92 |   battery_out_t battery;
 93 |   radio_out_t radio;
 94 |   vectornav_out_t vectorNav;
 95 |   unsigned char medullaCounter;
 96 |   unsigned short medullaCpuLoad;
 97 |   bool bleederState;
 98 |   bool leftReedSwitchState;
 99 |   bool rightReedSwitchState;
100 |   double vtmTemperature;
101 | } cassie_pelvis_out_t;
102 | 
103 | typedef struct {
104 |   cassie_pelvis_out_t pelvis;
105 |   cassie_leg_out_t leftLeg;
106 |   cassie_leg_out_t rightLeg;
107 |   bool isCalibrated;
108 |   DiagnosticCodes messages[4];
109 | } cassie_out_t;
110 | 
111 | #define EMPTY                          ((DiagnosticCodes)0)
112 | #define LEFT_HIP_NOT_CALIB             ((DiagnosticCodes)5)
113 | #define LEFT_KNEE_NOT_CALIB            ((DiagnosticCodes)6)
114 | #define RIGHT_HIP_NOT_CALIB            ((DiagnosticCodes)7)
115 | #define RIGHT_KNEE_NOT_CALIB           ((DiagnosticCodes)8)
116 | #define LOW_BATTERY_CHARGE             ((DiagnosticCodes)200)
117 | #define HIGH_CPU_TEMP                  ((DiagnosticCodes)205)
118 | #define HIGH_VTM_TEMP                  ((DiagnosticCodes)210)
119 | #define HIGH_ELMO_DRIVE_TEMP           ((DiagnosticCodes)215)
120 | #define HIGH_STATOR_TEMP               ((DiagnosticCodes)220)
121 | #define LOW_ELMO_LINK_VOLTAGE          ((DiagnosticCodes)221)
122 | #define HIGH_BATTERY_TEMP              ((DiagnosticCodes)225)
123 | #define RADIO_DATA_BAD                 ((DiagnosticCodes)230)
124 | #define RADIO_SIGNAL_BAD               ((DiagnosticCodes)231)
125 | #define BMS_DATA_BAD                   ((DiagnosticCodes)235)
126 | #define VECTORNAV_DATA_BAD             ((DiagnosticCodes)236)
127 | #define VPE_GYRO_SATURATION            ((DiagnosticCodes)240)
128 | #define VPE_MAG_SATURATION             ((DiagnosticCodes)241)
129 | #define VPE_ACC_SATURATION             ((DiagnosticCodes)242)
130 | #define VPE_ATTITUDE_BAD               ((DiagnosticCodes)245)
131 | #define VPE_ATTITUDE_NOT_TRACKING      ((DiagnosticCodes)246)
132 | #define ETHERCAT_DC_ERROR              ((DiagnosticCodes)400)
133 | #define ETHERCAT_ERROR                 ((DiagnosticCodes)410)
134 | #define LOAD_CALIB_DATA_ERROR          ((DiagnosticCodes)590)
135 | #define CRITICAL_BATTERY_CHARGE        ((DiagnosticCodes)600)
136 | #define CRITICAL_CPU_TEMP              ((DiagnosticCodes)605)
137 | #define CRITICAL_VTM_TEMP              ((DiagnosticCodes)610)
138 | #define CRITICAL_ELMO_DRIVE_TEMP       ((DiagnosticCodes)615)
139 | #define CRITICAL_STATOR_TEMP           ((DiagnosticCodes)620)
140 | #define CRITICAL_BATTERY_TEMP          ((DiagnosticCodes)625)
141 | #define TORQUE_LIMIT_REACHED           ((DiagnosticCodes)630)
142 | #define JOINT_LIMIT_REACHED            ((DiagnosticCodes)635)
143 | #define ENCODER_FAILURE                ((DiagnosticCodes)640)
144 | #define SPRING_FAILURE                 ((DiagnosticCodes)645)
145 | #define LEFT_LEG_MEDULLA_HANG          ((DiagnosticCodes)700)
146 | #define RIGHT_LEG_MEDULLA_HANG         ((DiagnosticCodes)701)
147 | #define PELVIS_MEDULLA_HANG            ((DiagnosticCodes)703)
148 | #define CPU_OVERLOAD                   ((DiagnosticCodes)704)
149 | 
150 | #ifdef __cplusplus
151 | extern "C" {
152 | #endif
153 | 
154 | void pack_cassie_out_t(const cassie_out_t *bus, unsigned char *bytes);
155 | void unpack_cassie_out_t(const unsigned char *bytes, cassie_out_t *bus);
156 | 
157 | #ifdef __cplusplus
158 | }
159 | #endif
160 | #endif // CASSIE_OUT_T_H
161 | 


--------------------------------------------------------------------------------
/cassie/cassiemujoco/include/cassie_user_in_t.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2018 Agility Robotics
 3 |  *
 4 |  * Permission to use, copy, modify, and distribute this software for any
 5 |  * purpose with or without fee is hereby granted, provided that the above
 6 |  * copyright notice and this permission notice appear in all copies.
 7 |  *
 8 |  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 9 |  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 |  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 |  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 |  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 |  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 |  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 |  */
16 | 
17 | #ifndef CASSIE_USER_IN_T_H
18 | #define CASSIE_USER_IN_T_H
19 | 
20 | #define CASSIE_USER_IN_T_PACKED_LEN 58
21 | 
22 | #include <stdbool.h>
23 | 
24 | typedef struct {
25 |   double torque[10];
26 |   short telemetry[9];
27 | } cassie_user_in_t;
28 | 
29 | 
30 | #ifdef __cplusplus
31 | extern "C" {
32 | #endif
33 | 
34 | void pack_cassie_user_in_t(const cassie_user_in_t *bus, unsigned char *bytes);
35 | void unpack_cassie_user_in_t(const unsigned char *bytes, cassie_user_in_t *bus);
36 | 
37 | #ifdef __cplusplus
38 | }
39 | #endif
40 | #endif // CASSIE_USER_IN_T_H
41 | 


--------------------------------------------------------------------------------
/cassie/cassiemujoco/include/pd_in_t.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2018 Agility Robotics
 3 |  *
 4 |  * Permission to use, copy, modify, and distribute this software for any
 5 |  * purpose with or without fee is hereby granted, provided that the above
 6 |  * copyright notice and this permission notice appear in all copies.
 7 |  *
 8 |  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 9 |  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 |  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 |  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 |  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 |  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 |  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 |  */
16 | 
17 | #ifndef PD_IN_T_H
18 | #define PD_IN_T_H
19 | 
20 | #define PD_IN_T_PACKED_LEN 476
21 | 
22 | #include <stdbool.h>
23 | 
24 | typedef struct {
25 |   double torque[5];
26 |   double pTarget[5];
27 |   double dTarget[5];
28 |   double pGain[5];
29 |   double dGain[5];
30 | } pd_motor_in_t;
31 | 
32 | typedef struct {
33 |   double torque[6];
34 |   double pTarget[6];
35 |   double dTarget[6];
36 |   double pGain[6];
37 |   double dGain[6];
38 | } pd_task_in_t;
39 | 
40 | typedef struct {
41 |   pd_task_in_t taskPd;
42 |   pd_motor_in_t motorPd;
43 | } pd_leg_in_t;
44 | 
45 | typedef struct {
46 |   pd_leg_in_t leftLeg;
47 |   pd_leg_in_t rightLeg;
48 |   double telemetry[9];
49 | } pd_in_t;
50 | 
51 | 
52 | #ifdef __cplusplus
53 | extern "C" {
54 | #endif
55 | 
56 | void pack_pd_in_t(const pd_in_t *bus, unsigned char *bytes);
57 | void unpack_pd_in_t(const unsigned char *bytes, pd_in_t *bus);
58 | 
59 | #ifdef __cplusplus
60 | }
61 | #endif
62 | #endif // PD_IN_T_H
63 | 


--------------------------------------------------------------------------------
/cassie/cassiemujoco/include/state_out_t.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2018 Agility Robotics
 3 |  *
 4 |  * Permission to use, copy, modify, and distribute this software for any
 5 |  * purpose with or without fee is hereby granted, provided that the above
 6 |  * copyright notice and this permission notice appear in all copies.
 7 |  *
 8 |  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 9 |  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 |  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 |  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 |  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 |  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 |  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 |  */
16 | 
17 | #ifndef STATE_OUT_T_H
18 | #define STATE_OUT_T_H
19 | 
20 | #define STATE_OUT_T_PACKED_LEN 493
21 | 
22 | #include <stdbool.h>
23 | 
24 | typedef struct {
25 |   double stateOfCharge;
26 |   double current;
27 | } state_battery_out_t;
28 | 
29 | typedef struct {
30 |   double position[3];
31 |   double orientation[4];
32 |   double footRotationalVelocity[3];
33 |   double footTranslationalVelocity[3];
34 |   double toeForce[3];
35 |   double heelForce[3];
36 | } state_foot_out_t;
37 | 
38 | typedef struct {
39 |   double position[6];
40 |   double velocity[6];
41 | } state_joint_out_t;
42 | 
43 | typedef struct {
44 |   double position[10];
45 |   double velocity[10];
46 |   double torque[10];
47 | } state_motor_out_t;
48 | 
49 | typedef struct {
50 |   double position[3];
51 |   double orientation[4];
52 |   double rotationalVelocity[3];
53 |   double translationalVelocity[3];
54 |   double translationalAcceleration[3];
55 |   double externalMoment[3];
56 |   double externalForce[3];
57 | } state_pelvis_out_t;
58 | 
59 | typedef struct {
60 |   double channel[16];
61 |   bool signalGood;
62 | } state_radio_out_t;
63 | 
64 | typedef struct {
65 |   double height;
66 |   double slope[2];
67 | } state_terrain_out_t;
68 | 
69 | typedef struct {
70 |   state_pelvis_out_t pelvis;
71 |   state_foot_out_t leftFoot;
72 |   state_foot_out_t rightFoot;
73 |   state_terrain_out_t terrain;
74 |   state_motor_out_t motor;
75 |   state_joint_out_t joint;
76 |   state_radio_out_t radio;
77 |   state_battery_out_t battery;
78 | } state_out_t;
79 | 
80 | 
81 | #ifdef __cplusplus
82 | extern "C" {
83 | #endif
84 | 
85 | void pack_state_out_t(const state_out_t *bus, unsigned char *bytes);
86 | void unpack_state_out_t(const unsigned char *bytes, state_out_t *bus);
87 | 
88 | #ifdef __cplusplus
89 | }
90 | #endif
91 | #endif // STATE_OUT_T_H
92 | 


--------------------------------------------------------------------------------
/cassie/cassiemujoco/include/udp.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2018 Agility Robotics
 3 |  *
 4 |  * Permission to use, copy, modify, and distribute this software for any
 5 |  * purpose with or without fee is hereby granted, provided that the above
 6 |  * copyright notice and this permission notice appear in all copies.
 7 |  *
 8 |  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 9 |  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 |  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 |  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 |  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 |  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 |  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 |  */
16 | 
17 | #ifndef UDP_H
18 | #define UDP_H
19 | 
20 | #define PACKET_HEADER_LEN 2
21 | 
22 | // Data and results for processing packet header
23 | typedef struct {
24 |     char seq_num_out;
25 |     char seq_num_in_last;
26 |     char delay;
27 |     char seq_num_in_diff;
28 | } packet_header_info_t;
29 | 
30 | 
31 | // Process packet header used to measure delay and skipped packets
32 | void process_packet_header(packet_header_info_t *info,
33 |                            const unsigned char *header_in,
34 |                            unsigned char *header_out);
35 | 
36 | #ifndef _WIN32
37 | #include <sys/socket.h>
38 | 
39 | // Create a UDP socket listening at a specific address/port
40 | int udp_init_host(const char *addr_str, const char *port_str);
41 | 
42 | // Create a UDP socket connected and listening to specific addresses/ports
43 | int udp_init_client(const char *remote_addr_str, const char *remote_port_str,
44 |                     const char *local_addr_str, const char *local_port_str);
45 | 
46 | // Close a UDP socket
47 | void udp_close(int sock);
48 | 
49 | // Get newest valid packet in RX buffer
50 | ssize_t get_newest_packet(int sock, void *recvbuf, size_t recvlen,
51 |                           struct sockaddr *src_addr, socklen_t *addrlen);
52 | 
53 | // Wait for a new valid packet
54 | ssize_t wait_for_packet(int sock, void *recvbuf, size_t recvlen,
55 |                         struct sockaddr *src_addr, socklen_t *addrlen);
56 | 
57 | // Send a packet
58 | ssize_t send_packet(int sock, void *sendbuf, size_t sendlen,
59 |                     struct sockaddr *dst_addr, socklen_t addrlen);
60 | 
61 | #endif // _WIN32
62 | #endif // UDP_H
63 | 


--------------------------------------------------------------------------------
/cassie/cassiemujoco/libcassiemujoco.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/libcassiemujoco.so


--------------------------------------------------------------------------------
/cassie/cassiemujoco/terrains/crown.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/terrains/crown.png


--------------------------------------------------------------------------------
/cassie/cassiemujoco/terrains/drop_step.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/terrains/drop_step.png


--------------------------------------------------------------------------------
/cassie/cassiemujoco/terrains/hfield.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/terrains/hfield.png


--------------------------------------------------------------------------------
/cassie/cassiemujoco/terrains/hfield2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/terrains/hfield2.png


--------------------------------------------------------------------------------
/cassie/cassiemujoco/terrains/hills.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/terrains/hills.png


--------------------------------------------------------------------------------
/cassie/cassiemujoco/terrains/noise.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/terrains/noise.png


--------------------------------------------------------------------------------
/cassie/cassiemujoco/terrains/noise1.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/terrains/noise1.npy


--------------------------------------------------------------------------------
/cassie/cassiemujoco/terrains/noise2.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/terrains/noise2.npy


--------------------------------------------------------------------------------
/cassie/cassiemujoco/terrains/noise3.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/terrains/noise3.npy


--------------------------------------------------------------------------------
/cassie/cassiemujoco/terrains/noisy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/terrains/noisy.png


--------------------------------------------------------------------------------
/cassie/cassiemujoco/terrains/racetrack1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/terrains/racetrack1.png


--------------------------------------------------------------------------------
/cassie/cassiemujoco/terrains/rand_hill1.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/terrains/rand_hill1.npy


--------------------------------------------------------------------------------
/cassie/cassiemujoco/terrains/rand_hill2.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/terrains/rand_hill2.npy


--------------------------------------------------------------------------------
/cassie/cassiemujoco/terrains/rand_hill3.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/terrains/rand_hill3.npy


--------------------------------------------------------------------------------
/cassie/cassiemujoco/terrains/slope.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/terrains/slope.png


--------------------------------------------------------------------------------
/cassie/cassiemujoco/terrains/utils/noise_generator.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | if __name__ == '__main__':
 6 |     parser = argparse.ArgumentParser(description='PNG Noise Generator for MuJoCo height fields')
 7 |     parser.add_argument('--filename', '-f', action='store', default='noise',
 8 |                         help='Name of file output. '
 9 |                              'File will be saved as a PNG file outside of the folder this is located in'
10 |                              '(usage: -f <filename>)')
11 |     parser.add_argument('--dimension', '-d', type=int, nargs='+', default=(32, 32),
12 |                         help='Size of the 2D array (usage: -d <dim1> <dim2>)')
13 |     parser.add_argument('--granularity', '-g', type=int, default=100,
14 |                         help='How fine or course the noise is. '
15 |                              'The larger the number, the finer the noise (usage: -g <int>)')
16 |     parser.add_argument('--start_size', '-s', type=int, default=2,
17 |                         help='The middle of the map will be always flat for starting.'
18 |                              'Choose how big this block size will be (usage: -s <int>)')
19 |     parser.add_argument('--seed', type=int, default=None,
20 |                         help='Set seed for reproducible maps (usage: --seed <int>)')
21 | 
22 |     args = parser.parse_args()
23 | 
24 |     if args.seed:
25 |         np.random.seed(args.seed)
26 | 
27 |     midpoint = (int(args.dimension[0] / 2), int(args.dimension[1] / 2))
28 | 
29 |     # build noisy array
30 |     terrain = np.random.randint(args.granularity, size=args.dimension)
31 | 
32 |     terrain[midpoint[0] - args.start_size:midpoint[0] + args.start_size,
33 |     midpoint[1] - args.start_size:midpoint[1] + args.start_size] = 0
34 | 
35 |     # save as png file
36 |     plt.imsave('../{}.png'.format(args.filename), terrain, cmap='gray')
37 | 


--------------------------------------------------------------------------------
/cassie/cassiemujoco/terrains/wavefield.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/cassiemujoco/terrains/wavefield.png


--------------------------------------------------------------------------------
/cassie/deprecated/env_test.py:
--------------------------------------------------------------------------------
 1 | # import numpy as np
 2 | 
 3 | # from cassie_env import CassieEnv
 4 | 
 5 | # from mujoco.cassiemujoco import *
 6 | # from trajectory.trajectory import CassieTrajectory
 7 | 
 8 | 
 9 | # traj = CassieTrajectory("trajectory/stepdata.bin")
10 | 
11 | 
12 | # env = CassieEnv("trajectory/stepdata.bin")
13 | # csim = CassieSim()
14 | 
15 | # u = pd_in_t()
16 | 
17 | # test actual trajectory
18 | 
19 | # for i in range(len(traj.qpos)):
20 | #     qpos = traj.qpos[i]
21 | #     qvel = traj.qvel[i]
22 | 
23 | #     csim.set_qpos(qpos)
24 | #     csim.set_qvel(qvel)
25 | 
26 | #     y = csim.step_pd(u)
27 | 
28 | #     cvis.draw(csim)
29 | 
30 | #     print(i, end='\r')
31 | 
32 | 
33 | # test trajectory wrap-around
34 | 
35 | # env.render()
36 | # env.reset()
37 | 
38 | # u = pd_in_t()
39 | # while True:
40 | #     # start = t.time()
41 | #     # while True:
42 | #     #     stop = t.time()
43 | #     #     #print(stop-start)
44 | #     #     #print("stop")
45 | #     #     if stop - start > 0.033:
46 | #     #         break
47 | 
48 | #     pos, vel = env.get_ref_state()
49 | 
50 | #     '''env.phase = env.phase + 14
51 | #     pos2, vel2 = env.get_kin_state()
52 | #     print(pos[7:21]-pos2[21:35])
53 | #     env.phase = env.phase - 14'''
54 | 
55 | #     env.phase += 1
56 | #     # #print(env.speed)
57 | #     if env.phase >= 28:
58 | #         env.phase = 0
59 | #         env.counter += 1
60 | #         #break
61 | #     env.sim.set_qpos(pos)
62 | #     env.sim.set_qvel(vel)
63 | #     y = env.sim.step_pd(u)
64 | #     env.render()


--------------------------------------------------------------------------------
/cassie/deprecated/plotting.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | from tempfile import TemporaryFile
 4 | 
 5 | # from cassie_env import CassieEnv
 6 | from trajectory.trajectory import CassieTrajectory
 7 | #from mujoco.cassiemujoco import *
 8 | import time as t
 9 | traj = CassieTrajectory("/home/robot/Desktop/apex/cassie/trajectory/stepdata.bin")
10 | # env = CassieEnv("walking")
11 | 
12 | qpos_traj = traj.qpos
13 | time_traj = traj.time
14 | 
15 | tt = traj.time
16 | #u = pd_in_t()
17 | 
18 | # load your data
19 | data = np.load('cassie/outfile.npz')
20 | motor = data['motor']
21 | joint = data['joint']
22 | qpos = data['qpos_replay']
23 | time = data['time']
24 | 
25 | delt_t = time[4] - time[3]
26 | delt_t_traj = time_traj[4] - time_traj[3]
27 | same_time = delt_t / delt_t_traj
28 | time_traj = time_traj * same_time
29 | 
30 | #time = time * (60/2000)
31 | numStates = len(qpos)
32 | 
33 | # np.savetxt("test_arr.txt", qpos[0:1000, 34])
34 | print("Made it")
35 | # test actual trajectory
36 | 
37 | rand = np.random.randint(1, 101, 1000)
38 | 
39 | #log data
40 | plt.subplot(2,2,1)
41 | plt.plot(time[0:500], motor[0:500,4], 'r')
42 | plt.plot(time[0:500], motor[0:500, 9], 'k')
43 | 
44 | ax2 = plt.subplot(2,2,2)
45 | ax2.plot(time[1200:1300], joint[1200:1300,2], 'r')
46 | ax2.plot(time[1200:1300], joint[1200:1300, 5], 'k')
47 | 
48 | ax3 = plt.subplot(2,2,3)
49 | ax3.plot(time[1200:1300], qpos[1200:1300,20], 'r')
50 | ax3.plot(time[1200:1300], qpos[1200:1300, 34], 'k')
51 | 
52 | ax2.get_shared_x_axes().join(ax2, ax3)
53 | ax2.set_xticklabels([])
54 | 
55 | 
56 | #trajectory data
57 | plt.subplot(2,2,4)
58 | plt.plot(time_traj[:], qpos_traj[:,20], 'r')
59 | plt.plot(time_traj[:], qpos_traj[:, 34], 'k')
60 | plt.show()
61 | 
62 | #trajectory data
63 | 
64 | plt.plot(tt[:], qpos_traj[:,32] + qpos_traj[:, 33], 'r')
65 | # plt.plot(tt[:], qpos_traj[:,19], 'b')
66 | # plt.plot(tt[:], qpos_traj[:, 20], 'k')
67 | plt.show()


--------------------------------------------------------------------------------
/cassie/missions/90_left/command_trajectory_0.5.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/90_left/command_trajectory_0.5.pkl


--------------------------------------------------------------------------------
/cassie/missions/90_left/command_trajectory_0.9.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/90_left/command_trajectory_0.9.pkl


--------------------------------------------------------------------------------
/cassie/missions/90_left/command_trajectory_1.4.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/90_left/command_trajectory_1.4.pkl


--------------------------------------------------------------------------------
/cassie/missions/90_left/command_trajectory_1.9.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/90_left/command_trajectory_1.9.pkl


--------------------------------------------------------------------------------
/cassie/missions/90_left/command_trajectory_2.3.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/90_left/command_trajectory_2.3.pkl


--------------------------------------------------------------------------------
/cassie/missions/90_left/command_trajectory_2.8.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/90_left/command_trajectory_2.8.pkl


--------------------------------------------------------------------------------
/cassie/missions/90_right/command_trajectory_0.5.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/90_right/command_trajectory_0.5.pkl


--------------------------------------------------------------------------------
/cassie/missions/90_right/command_trajectory_0.9.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/90_right/command_trajectory_0.9.pkl


--------------------------------------------------------------------------------
/cassie/missions/90_right/command_trajectory_1.4.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/90_right/command_trajectory_1.4.pkl


--------------------------------------------------------------------------------
/cassie/missions/90_right/command_trajectory_1.9.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/90_right/command_trajectory_1.9.pkl


--------------------------------------------------------------------------------
/cassie/missions/90_right/command_trajectory_2.3.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/90_right/command_trajectory_2.3.pkl


--------------------------------------------------------------------------------
/cassie/missions/90_right/command_trajectory_2.8.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/90_right/command_trajectory_2.8.pkl


--------------------------------------------------------------------------------
/cassie/missions/__init__.py:
--------------------------------------------------------------------------------
1 | # We use this directory for storing missions -- high-level commands to policies
2 | 
3 | from .command_mission import *
4 | from .add_waypoints import add_waypoints


--------------------------------------------------------------------------------
/cassie/missions/add_waypoints.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import os
 4 | import sys
 5 | import argparse
 6 | import pandas as pd
 7 | import lxml.etree as ET
 8 | 
 9 | def read_xml(file):
10 |     return ET.parse(file, ET.XMLParser(remove_blank_text=True))
11 | 
12 | space=10
13 | color='1 0.9 0 0.7'
14 | 
15 | 
16 | def add_waypoints(input_file, output_file, waypoints_file):
17 | 
18 |     try:
19 |         # create trajectory data frame
20 |         traj_df = pd.read_csv(waypoints_file, header=None, usecols=[0, 1], names=['X', 'Y'])
21 | 
22 |         # read xml file
23 |         tree = read_xml(input_file)
24 | 
25 |     except TypeError:
26 |         if not input_file:
27 |             print('No XML file provided...\n')
28 |         else:
29 |             print(str(input_file) + ' not found. Check XML file path.')
30 |         sys.exit(0)
31 | 
32 |     # get root of xml tree
33 |     root = tree.getroot()
34 | 
35 |     # get worldbody subelement from root
36 |     worldbody = root.find('worldbody')
37 | 
38 |     for idx, pos in enumerate(traj_df.values[20::int(space)], start=1):
39 |         # create a waypoint subelement
40 |         ET.SubElement(worldbody, 'geom', {'name': 'waypoint{}'.format(idx),
41 |                                           'pos': '{} {} 1.01 '.format(pos[0], pos[1]),
42 |                                           'size': '0.03 0.03 0.03',
43 |                                           'type': 'sphere',
44 |                                           'contype': '0',
45 |                                           'rgba': color})
46 | 
47 |     # add to root
48 |     tree.write(output_file, encoding='utf-8', pretty_print=True, xml_declaration=True)
49 | 
50 | 
51 | if __name__ == "__main__":
52 |     add_waypoints("default")
53 | 


--------------------------------------------------------------------------------
/cassie/missions/command_mission.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | import numpy as np
 3 | import os
 4 | 
 5 | class CommandTrajectory:
 6 |     def __init__(self, mission_name):
 7 |         mission_path = os.path.join(mission_name, "command_trajectory.pkl")
 8 |         with open(mission_path, "rb") as f:
 9 |             trajectory = pickle.load(f)
10 | 
11 |         self.global_pos = np.copy(trajectory["compos"])
12 |         self.speed_cmd = np.copy(trajectory["speed"])
13 |         
14 |         # NOTE: still need to rotate translational velocity and accleration
15 |         self.orient = np.copy(trajectory["orient"])
16 |         self.prev_orient = 0
17 |         
18 |         self.trajlen = len(self.speed_cmd)
19 | 
20 |         # print("positions:\n{}\n\nvelocities:\n{}\n\norient:\n{}\n".format(self.global_pos[:5], self.speed_cmd[:5], self.orient[:5]))
21 |         # print(self.speed_cmd.shape)
22 |         # print(self.orient.shape)
23 |         # print(np.max(self.speed_cmd))
24 |         # input()


--------------------------------------------------------------------------------
/cassie/missions/curvy/command_trajectory_0.5.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/curvy/command_trajectory_0.5.pkl


--------------------------------------------------------------------------------
/cassie/missions/curvy/command_trajectory_0.9.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/curvy/command_trajectory_0.9.pkl


--------------------------------------------------------------------------------
/cassie/missions/curvy/command_trajectory_1.4.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/curvy/command_trajectory_1.4.pkl


--------------------------------------------------------------------------------
/cassie/missions/curvy/command_trajectory_1.9.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/curvy/command_trajectory_1.9.pkl


--------------------------------------------------------------------------------
/cassie/missions/curvy/command_trajectory_2.3.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/curvy/command_trajectory_2.3.pkl


--------------------------------------------------------------------------------
/cassie/missions/curvy/command_trajectory_2.8.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/curvy/command_trajectory_2.8.pkl


--------------------------------------------------------------------------------
/cassie/missions/default/command_trajectory.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/default/command_trajectory.pkl


--------------------------------------------------------------------------------
/cassie/missions/straight/command_trajectory_0.5.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/straight/command_trajectory_0.5.pkl


--------------------------------------------------------------------------------
/cassie/missions/straight/command_trajectory_0.9.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/straight/command_trajectory_0.9.pkl


--------------------------------------------------------------------------------
/cassie/missions/straight/command_trajectory_1.4.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/straight/command_trajectory_1.4.pkl


--------------------------------------------------------------------------------
/cassie/missions/straight/command_trajectory_1.9.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/straight/command_trajectory_1.9.pkl


--------------------------------------------------------------------------------
/cassie/missions/straight/command_trajectory_2.3.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/straight/command_trajectory_2.3.pkl


--------------------------------------------------------------------------------
/cassie/missions/straight/command_trajectory_2.8.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/missions/straight/command_trajectory_2.8.pkl


--------------------------------------------------------------------------------
/cassie/outfile.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/outfile.npz


--------------------------------------------------------------------------------
/cassie/plotting_ex.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import time
 3 | import math
 4 | 
 5 | # from cassie_env import CassieEnv
 6 | 
 7 | from cassiemujoco import *
 8 | from trajectory.trajectory import CassieTrajectory
 9 | import matplotlib.pyplot as plt 
10 | from matplotlib import style
11 | from matplotlib.animation import FuncAnimation
12 | import matplotlib.animation as animation
13 | from mpl_toolkits.mplot3d import Axes3D
14 | from IPython import display
15 | 
16 | def visualise_sim_graph(file_path, freq_of_sim):
17 |     traj = np.load(file_path)
18 |     # env = CassieEnv("walking")
19 |     # csim = CassieSim("./cassie/cassiemujoco/cassie.xml")
20 |     # vis = CassieVis(csim, "./cassie/cassiemujoco/cassie.xml")
21 |     u = pd_in_t()
22 | 
23 |     # pelvisXYZ = traj.f.qpos_replay[:, 0:3]
24 |     # render_state = vis.draw(csim)
25 |     # saved_time = traj.f.time[:]
26 | 
27 |     #################Graphing###########
28 |     log_time = traj.f.time[:]
29 |     y_val = traj.f.qpos_replay[:,2] #z - height
30 |     x_data= log_time
31 |     y_data = y_val
32 | 
33 |     delt_x = (x_data[1] - x_data[0]) * 1000 #convert seconds to ms
34 | 
35 |     num_frames = math.ceil(len(x_data) / 10)
36 | 
37 | 
38 | 
39 |     Writer = animation.writers['ffmpeg']
40 |     writer = Writer(fps=15, metadata=dict(artist='Me'), bitrate=1800)
41 | 
42 |     output = plt.plot([])
43 |     plt.close()
44 |     print(output[0])
45 | 
46 |     x = np.linspace(0,2*np.pi, 100)
47 | 
48 |     fig = plt.figure()
49 | 
50 |     lines = plt.plot([])
51 |     line = lines[0]
52 | 
53 |     #other setup //set x and y lims
54 |     plt.xlim(x_data.min(), x_data.max())
55 |     plt.ylim(y_data.min(), y_data.max())
56 |     def animate(frame):
57 |         #update
58 |         x = x_data[:frame*10]
59 |         y = y_data[:frame*10]
60 |         # y = np.sin(x + 2*np.pi * frame/100)
61 |         line.set_data((x,y))
62 | 
63 |     anim = FuncAnimation(fig, animate, frames=num_frames, interval=(1/freq_of_sim * 1000 + (10 * delt_x))) #20 is 50 fps
64 | 
65 |     anim.save('lines.mp4', writer=writer)
66 |     # html = display.HTML(video)
67 |     # display.display(html)
68 | 
69 |     plt.close()
70 | 
71 | visualise_sim_graph("./outfile8.npz", 30)


--------------------------------------------------------------------------------
/cassie/quaternion_function.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import numpy as np
 3 | 
 4 | def inverse_quaternion(quaternion):
 5 | 	result = np.copy(quaternion)
 6 | 	result[1:4] = -result[1:4]
 7 | 	return result
 8 | 
 9 | def quaternion_product(q1, q2):
10 | 	result = np.zeros(4)
11 | 	result[0] = q1[0]*q2[0]-q1[1]*q2[1]-q1[2]*q2[2]-q1[3]*q2[3]
12 | 	result[1] = q1[0]*q2[1]+q2[0]*q1[1]+q1[2]*q2[3]-q1[3]*q2[2]
13 | 	result[2] = q1[0]*q2[2]-q1[1]*q2[3]+q1[2]*q2[0]+q1[3]*q2[1]
14 | 	result[3] = q1[0]*q2[3]+q1[1]*q2[2]-q1[2]*q2[1]+q1[3]*q2[0]
15 | 	return result
16 | 
17 | def rotate_by_quaternion(vector, quaternion):
18 | 	q1 = np.copy(quaternion)
19 | 	q2 = np.zeros(4)
20 | 	q2[1:4] = np.copy(vector)
21 | 	q3 = inverse_quaternion(quaternion)
22 | 	q = quaternion_product(q2, q3)
23 | 	q = quaternion_product(q1, q)
24 | 	result = q[1:4]
25 | 	return result
26 | 
27 | def quaternion2euler(quaternion):
28 | 	w = quaternion[0]
29 | 	x = quaternion[1]
30 | 	y = quaternion[2]
31 | 	z = quaternion[3]
32 | 	ysqr = y * y
33 | 	
34 | 	t0 = +2.0 * (w * x + y * z)
35 | 	t1 = +1.0 - 2.0 * (x * x + ysqr)
36 | 	X = math.degrees(math.atan2(t0, t1))
37 | 	
38 | 	t2 = +2.0 * (w * y - z * x)
39 | 	t2 = +1.0 if t2 > +1.0 else t2
40 | 	t2 = -1.0 if t2 < -1.0 else t2
41 | 	Y = math.degrees(math.asin(t2))
42 | 	
43 | 	t3 = +2.0 * (w * z + x * y)
44 | 	t4 = +1.0 - 2.0 * (ysqr + z * z)
45 | 	Z = math.degrees(math.atan2(t3, t4))
46 | 
47 | 	result = np.zeros(3)
48 | 	result[0] = X * np.pi / 180
49 | 	result[1] = Y * np.pi / 180
50 | 	result[2] = Z * np.pi / 180
51 | 	
52 | 	return result
53 | 
54 | def euler2quat(z=0, y=0, x=0):
55 | 
56 |     z = z/2.0
57 |     y = y/2.0
58 |     x = x/2.0
59 |     cz = math.cos(z)
60 |     sz = math.sin(z)
61 |     cy = math.cos(y)
62 |     sy = math.sin(y)
63 |     cx = math.cos(x)
64 |     sx = math.sin(x)
65 |     result =  np.array([
66 |              cx*cy*cz - sx*sy*sz,
67 |              cx*sy*sz + cy*cz*sx,
68 |              cx*cz*sy - sx*cy*sz,
69 |              cx*cy*sz + sx*cz*sy])
70 |     if result[0] < 0:
71 |     	result = -result
72 |     return result


--------------------------------------------------------------------------------
/cassie/rewards/__init__.py:
--------------------------------------------------------------------------------
 1 | # We use this directory for keeping track of reward functions. Each reward function operates on an object of CassieEnv_v2, passed as 'self'
 2 | 
 3 | from .clock_rewards import *
 4 | from .aslip_rewards import *
 5 | from .rnn_dyn_random_reward import *
 6 | from .iros_paper_reward import *
 7 | from .command_reward import *
 8 | 
 9 | # from .speedmatch_footorient_joint_smooth_reward import *
10 | from .speedmatch_rewards import *
11 | from .trajmatch_reward import *
12 | from .standing_rewards import *
13 | # from .speedmatch_heuristic_reward import *
14 | from .side_speedmatch_rewards import *
15 | # from .side_speedmatch_foottraj_reward import *
16 | # from .side_speedmatch_heightvel_reward import *
17 | # from .side_speedmatch_heuristic_reward import *
18 | # from .side_speedmatch_torquesmooth_reward import *


--------------------------------------------------------------------------------
/cassie/rewards/command_reward.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import numpy as np
  3 | 
  4 | def quaternion2euler(quaternion):
  5 | 	w = quaternion[0]
  6 | 	x = quaternion[1]
  7 | 	y = quaternion[2]
  8 | 	z = quaternion[3]
  9 | 	ysqr = y * y
 10 | 	
 11 | 	t0 = +2.0 * (w * x + y * z)
 12 | 	t1 = +1.0 - 2.0 * (x * x + ysqr)
 13 | 	X = math.degrees(math.atan2(t0, t1))
 14 | 	
 15 | 	t2 = +2.0 * (w * y - z * x)
 16 | 	t2 = +1.0 if t2 > +1.0 else t2
 17 | 	t2 = -1.0 if t2 < -1.0 else t2
 18 | 	Y = math.degrees(math.asin(t2))
 19 | 	
 20 | 	t3 = +2.0 * (w * z + x * y)
 21 | 	t4 = +1.0 - 2.0 * (ysqr + z * z)
 22 | 	Z = math.degrees(math.atan2(t3, t4))
 23 | 
 24 | 	result = np.zeros(3)
 25 | 	result[0] = X * np.pi / 180
 26 | 	result[1] = Y * np.pi / 180
 27 | 	result[2] = Z * np.pi / 180
 28 | 	
 29 | 	return result
 30 | 
 31 | def euler2quat(z=0, y=0, x=0):
 32 | 
 33 |     z = z/2.0
 34 |     y = y/2.0
 35 |     x = x/2.0
 36 |     cz = math.cos(z)
 37 |     sz = math.sin(z)
 38 |     cy = math.cos(y)
 39 |     sy = math.sin(y)
 40 |     cx = math.cos(x)
 41 |     sx = math.sin(x)
 42 |     result =  np.array([
 43 |              cx*cy*cz - sx*sy*sz,
 44 |              cx*sy*sz + cy*cz*sx,
 45 |              cx*cz*sy - sx*cy*sz,
 46 |              cx*cy*sz + sx*cz*sy])
 47 |     if result[0] < 0:
 48 |     	result = -result
 49 |     return result
 50 | 
 51 | def command_reward(self):
 52 |     qpos = np.copy(self.sim.qpos())
 53 |     qvel = np.copy(self.sim.qvel())
 54 | 
 55 |     # get current speed and orientation
 56 |     curr_pos = qpos[0:3]
 57 |     curr_speed = qvel[0]
 58 |     curr_orient = quaternion2euler(qpos[3:7])[2]
 59 | 
 60 |     # desired speed and orientation
 61 |     desired_pos    = self.command_traj.global_pos[self.command_counter] + self.last_position
 62 |     desired_speed  = self.command_traj.speed_cmd[self.command_counter]
 63 |     desired_orient = self.command_traj.orient[self.command_counter]
 64 | 
 65 |     compos_error      = np.linalg.norm(curr_pos - desired_pos)
 66 |     speed_error       = np.linalg.norm(curr_speed - desired_speed)
 67 |     orientation_error = np.linalg.norm(curr_orient - desired_orient)
 68 | 
 69 |     reward = 0.2 * np.exp(-speed_error) +       \
 70 |              0.3 * np.exp(-compos_error) +       \
 71 |              0.5 * np.exp(-orientation_error)
 72 | 
 73 |     if self.debug:
 74 |         print("reward: {6}\nspeed:\t{0:.2f}, % = {1:.2f}\ncompos:\t{2:.2f}, % = {3:.2f}\norient:\t{4:.2f}, % = {5:.2f}\n\n".format(
 75 |         0.325  * np.exp(-speed_error),       0.325 * np.exp(-speed_error) / reward * 100,
 76 |         0.35 * np.exp(-compos_error),    0.35 * np.exp(-compos_error) / reward * 100,
 77 |         0.325 * np.exp(-orientation_error),    0.325 * np.exp(-orientation_error) / reward * 100,
 78 |         reward
 79 |         )
 80 |         )
 81 |         print(self.command_counter)
 82 |         print("actual speed:  {}\tdesired_speed:  {}".format(curr_speed, self.speed))
 83 |         print("actual compos: {}\tdesired_pos:    {}".format(curr_pos[0:2], desired_pos[0:2]))
 84 |         print("actual orient: {}\tdesired_orient: {}".format(curr_orient, desired_orient))
 85 |     return reward
 86 | 
 87 | def command_reward_no_pos(self):
 88 |     qpos = np.copy(self.sim.qpos())
 89 |     qvel = np.copy(self.sim.qvel())
 90 | 
 91 |     # get current speed and orientation
 92 |     # curr_pos = qpos[0:3]
 93 |     curr_speed = qvel[0]
 94 |     curr_orient = quaternion2euler(qpos[3:7])[2]
 95 | 
 96 |     # desired speed and orientation
 97 |     desired_speed  = self.command_traj.speed_cmd[self.command_counter]
 98 |     desired_orient = self.command_traj.orient[self.command_counter]
 99 | 
100 |     # compos_error      = np.linalg.norm(curr_pos - desired_pos)
101 |     speed_error       = np.linalg.norm(curr_speed - desired_speed)
102 |     orientation_error = np.linalg.norm(curr_orient - desired_orient)
103 | 
104 |     reward = 0.5 * np.exp(-speed_error) +       \
105 |              0.5 * np.exp(-orientation_error)
106 | 
107 |     if self.debug:
108 |         print("reward: {4}\nspeed:\t{0:.2f}, % = {1:.2f}\norient:\t{2:.2f}, % = {3:.2f}\n\n".format(
109 |         0.5  * np.exp(-speed_error),       0.5 * np.exp(-speed_error) / reward * 100,
110 |         0.5 * np.exp(-orientation_error),    0.5 * np.exp(-orientation_error) / reward * 100,
111 |         reward
112 |         )
113 |         )
114 |         print(self.command_counter)
115 |         print("actual speed:  {}\tdesired_speed:  {}".format(curr_speed, self.speed))
116 |         # print("actual compos: {}\tdesired_pos:    {}".format(curr_pos[0:2], desired_pos[0:2]))
117 |         print("actual orient: {}\tdesired_orient: {}".format(curr_orient, desired_orient))
118 |     return reward
119 | 
120 | def command_reward_keepalive(self):
121 |     reward = 1.0
122 |     if self.debug:
123 |         print("reward = 1.0\tcounter={}".format(self.command_counter))
124 |     return reward


--------------------------------------------------------------------------------
/cassie/rewards/iros_paper_reward.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def iros_paper_reward(self):
 4 |     qpos = np.copy(self.sim.qpos())
 5 |     qvel = np.copy(self.sim.qvel())
 6 | 
 7 |     ref_pos, ref_vel = self.get_ref_state(self.phase)
 8 | 
 9 |     # TODO: should be variable; where do these come from?
10 |     # TODO: see magnitude of state variables to gauge contribution to reward
11 |     weight = [0.15, 0.15, 0.1, 0.05, 0.05, 0.15, 0.15, 0.1, 0.05, 0.05]
12 | 
13 |     joint_error       = 0
14 |     com_error         = 0
15 |     orientation_error = 0
16 |     spring_error      = 0
17 | 
18 |     # each joint pos
19 |     for i, j in enumerate(self.pos_idx):
20 |         target = ref_pos[j]
21 |         actual = qpos[j]
22 | 
23 |         joint_error += 30 * weight[i] * (target - actual) ** 2
24 | 
25 |     # center of mass: x, y, z
26 |     for j in [0, 1, 2]:
27 |         target = ref_pos[j]
28 |         actual = qpos[j]
29 | 
30 |         # NOTE: in Xie et al y target is 0
31 | 
32 |         com_error += (target - actual) ** 2
33 | 
34 |     # COM orientation: qx, qy, qz
35 |     for j in [4, 5, 6]:
36 |         target = ref_pos[j] # NOTE: in Xie et al orientation target is 0
37 |         actual = qpos[j]
38 | 
39 |         orientation_error += (target - actual) ** 2
40 | 
41 |     # left and right shin springs
42 |     for i in [15, 29]:
43 |         target = ref_pos[i] # NOTE: in Xie et al spring target is 0
44 |         actual = qpos[i]
45 | 
46 |         spring_error += 1000 * (target - actual) ** 2      
47 | 
48 |     reward = 0.5 * np.exp(-joint_error) +       \
49 |                 0.3 * np.exp(-com_error) +         \
50 |                 0.1 * np.exp(-orientation_error) + \
51 |                 0.1 * np.exp(-spring_error)
52 | 
53 |     # reward = np.sign(qvel[0])*qvel[0]**2
54 |     # desired_speed = 3.0
55 |     # speed_diff = np.abs(qvel[0] - desired_speed)
56 |     # if speed_diff > 1:
57 |     #     speed_diff = speed_diff**2
58 |     # reward = 20 - speed_diff
59 | 
60 |     return reward


--------------------------------------------------------------------------------
/cassie/rewards/reward_clock_funcs/incentive_clock_smooth.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/incentive_clock_smooth.pkl


--------------------------------------------------------------------------------
/cassie/rewards/reward_clock_funcs/incentive_clock_smooth_aerial.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/incentive_clock_smooth_aerial.pkl


--------------------------------------------------------------------------------
/cassie/rewards/reward_clock_funcs/incentive_clock_smooth_zero.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/incentive_clock_smooth_zero.pkl


--------------------------------------------------------------------------------
/cassie/rewards/reward_clock_funcs/incentive_clock_strict0.1.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/incentive_clock_strict0.1.pkl


--------------------------------------------------------------------------------
/cassie/rewards/reward_clock_funcs/incentive_clock_strict0.1_aerial.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/incentive_clock_strict0.1_aerial.pkl


--------------------------------------------------------------------------------
/cassie/rewards/reward_clock_funcs/incentive_clock_strict0.1_zero.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/incentive_clock_strict0.1_zero.pkl


--------------------------------------------------------------------------------
/cassie/rewards/reward_clock_funcs/incentive_clock_strict0.4.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/incentive_clock_strict0.4.pkl


--------------------------------------------------------------------------------
/cassie/rewards/reward_clock_funcs/incentive_clock_strict0.4_aerial.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/incentive_clock_strict0.4_aerial.pkl


--------------------------------------------------------------------------------
/cassie/rewards/reward_clock_funcs/incentive_clock_strict0.4_zero.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/incentive_clock_strict0.4_zero.pkl


--------------------------------------------------------------------------------
/cassie/rewards/reward_clock_funcs/no_incentive_aslip_clock_strict0.3.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/no_incentive_aslip_clock_strict0.3.pkl


--------------------------------------------------------------------------------
/cassie/rewards/reward_clock_funcs/no_incentive_clock_smooth.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/no_incentive_clock_smooth.pkl


--------------------------------------------------------------------------------
/cassie/rewards/reward_clock_funcs/no_incentive_clock_smooth_aerial.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/no_incentive_clock_smooth_aerial.pkl


--------------------------------------------------------------------------------
/cassie/rewards/reward_clock_funcs/no_incentive_clock_smooth_zero.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/no_incentive_clock_smooth_zero.pkl


--------------------------------------------------------------------------------
/cassie/rewards/reward_clock_funcs/no_incentive_clock_strict0.1.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/no_incentive_clock_strict0.1.pkl


--------------------------------------------------------------------------------
/cassie/rewards/reward_clock_funcs/no_incentive_clock_strict0.1_aerial.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/no_incentive_clock_strict0.1_aerial.pkl


--------------------------------------------------------------------------------
/cassie/rewards/reward_clock_funcs/no_incentive_clock_strict0.1_zero.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/no_incentive_clock_strict0.1_zero.pkl


--------------------------------------------------------------------------------
/cassie/rewards/reward_clock_funcs/no_incentive_clock_strict0.4.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/no_incentive_clock_strict0.4.pkl


--------------------------------------------------------------------------------
/cassie/rewards/reward_clock_funcs/no_incentive_clock_strict0.4_aerial.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/no_incentive_clock_strict0.4_aerial.pkl


--------------------------------------------------------------------------------
/cassie/rewards/reward_clock_funcs/no_incentive_clock_strict0.4_zero.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/rewards/reward_clock_funcs/no_incentive_clock_strict0.4_zero.pkl


--------------------------------------------------------------------------------
/cassie/rewards/rnn_dyn_random_reward.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def jonah_RNN_reward(self):
 4 |     qpos = np.copy(self.sim.qpos())
 5 |     qvel = np.copy(self.sim.qvel())
 6 | 
 7 |     ref_pos, ref_vel = self.get_ref_state(self.phase)
 8 |     
 9 |     # TODO: should be variable; where do these come from?
10 |     # TODO: see magnitude of state variables to gauge contribution to reward
11 |     weight = [0.15, 0.15, 0.1, 0.05, 0.05, 0.15, 0.15, 0.1, 0.05, 0.05]
12 | 
13 |     joint_error       = 0
14 |     com_error         = 0
15 |     orientation_error = 0
16 |     spring_error      = 0
17 | 
18 |     # each joint pos
19 |     for i, j in enumerate(self.pos_idx):
20 |         target = ref_pos[j]
21 |         actual = qpos[j]
22 | 
23 |         joint_error += 50 * weight[i] * (target - actual) ** 2
24 | 
25 |     # center of mass: x, y, z
26 |     for j in [0, 1, 2]:
27 |         target = ref_pos[j]
28 |         actual = qpos[j]
29 | 
30 |         # NOTE: in Xie et al y target is 0
31 | 
32 |         com_error += 10 * (target - actual) ** 2
33 | 
34 |     actual_q = qpos[3:7]
35 |     target_q = ref_pos[3:7]
36 |     #target_q = [1, 0, 0, 0]
37 |     orientation_error = 5 * (1 - np.inner(actual_q, target_q) ** 2)
38 | 
39 |     # left and right shin springs
40 |     for i in [15, 29]:
41 |         target = ref_pos[i] # NOTE: in Xie et al spring target is 0
42 |         actual = qpos[i]
43 | 
44 |         spring_error += 1000 * (target - actual) ** 2      
45 | 
46 |     reward = 0.200 * np.exp(-joint_error) +       \
47 |             0.450 * np.exp(-com_error) +         \
48 |             0.300 * np.exp(-orientation_error) + \
49 |             0.050 * np.exp(-spring_error)
50 | 
51 |     return reward


--------------------------------------------------------------------------------
/cassie/rewards/side_speedmatch_foottraj_reward.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def side_speedmatch_foottraj_reward(self):
 4 |     qpos = np.copy(self.sim.qpos())
 5 |     qvel = np.copy(self.sim.qvel())
 6 |     
 7 |     forward_diff = np.abs(qvel[0] -self.speed)
 8 |     orient_diff = np.linalg.norm(qpos[3:7] - np.array([1, 0, 0, 0]))
 9 |     side_diff = np.abs(qvel[1] - self.side_speed)
10 |     if forward_diff < 0.05:
11 |         forward_diff = 0
12 |     if side_diff < 0.05:
13 |         side_diff = 0
14 | 
15 |     reward = .15*np.exp(-forward_diff) + .15*np.exp(-side_diff) + .1*np.exp(-orient_diff) \
16 |                     + .1*np.exp(-20*self.l_foot_diff) + .1*np.exp(-20*self.r_foot_diff) \
17 |                     + .1*np.exp(-5*self.l_footvel_diff) + .1*np.exp(-5*self.r_footvel_diff) \
18 |                     + .1*np.exp(-self.lfoot_orient_cost) + .1*np.exp(-self.rfoot_orient_cost)
19 | 
20 |     return reward


--------------------------------------------------------------------------------
/cassie/rewards/side_speedmatch_heightvel_reward.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def side_speedmatch_heightvel_reward(self):
 4 |     qpos = np.copy(self.sim.qpos())
 5 |     qvel = np.copy(self.sim.qvel())
 6 |     
 7 |     forward_diff = np.abs(qvel[0] -self.speed)
 8 |     orient_diff = np.linalg.norm(qpos[3:7] - np.array([1, 0, 0, 0]))
 9 |     side_diff = np.abs(qvel[1] - self.side_speed)
10 |     if forward_diff < 0.05:
11 |         forward_diff = 0
12 |     if side_diff < 0.05:
13 |         side_diff = 0
14 | 
15 |     reward = .2*np.exp(-forward_diff) + .2*np.exp(-side_diff) + .1*np.exp(-orient_diff) \
16 |             + .1*np.exp(-self.lfoot_orient_cost) + .1*np.exp(-self.rfoot_orient_cost) \
17 |             + .15*np.exp(-self.lf_heightvel) + .15*np.exp(-self.rf_heightvel) \
18 |             # + .1*np.exp(-self.ltdvel_cost) * .1*np.exp(-self.rtdvel_cost)
19 | 
20 |     return reward


--------------------------------------------------------------------------------
/cassie/rewards/side_speedmatch_heuristic_reward.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def side_speedmatch_heuristic_reward(self):
 4 |     qpos = np.copy(self.sim.qpos())
 5 |     qvel = np.copy(self.sim.qvel())
 6 |     
 7 |     forward_diff = np.abs(qvel[0] -self.speed)
 8 |     orient_diff = np.linalg.norm(qpos[3:7] - np.array([1, 0, 0, 0]))
 9 |     side_diff = np.abs(qvel[1] - self.side_speed)
10 |     if forward_diff < 0.05:
11 |         forward_diff = 0
12 |     if side_diff < 0.05:
13 |         side_diff = 0
14 | 
15 |     ######## Foot position penalty ########
16 |     foot_pos = np.zeros(6)
17 |     self.sim.foot_pos(foot_pos)
18 |     foot_dist = np.linalg.norm(foot_pos[0:2]-foot_pos[3:5])
19 |     foot_penalty = 0
20 |     if foot_dist < 0.22:
21 |        foot_penalty = 0.2
22 |     ######## Foot force penalty ########
23 |     foot_forces = self.sim.get_foot_forces()
24 |     lforce = max((foot_forces[0] - 700)/1000, 0)
25 |     rforce = max((foot_forces[1] - 700)/1000, 0)
26 |     ######## Torque penalty ########
27 |     torque = np.linalg.norm(self.cassie_state.motor.torque[:])        
28 |     ######## Pelvis z accel penalty #########
29 |     pelaccel = np.abs(self.cassie_state.pelvis.translationalAcceleration[2])
30 |     pelaccel_penalty = 0
31 |     if pelaccel > 6:
32 |         pelaccel_penalty = (pelaccel - 6) / 30
33 |     ####### Prev action penalty ########
34 |     if self.prev_action is not None:
35 |         prev_penalty = np.linalg.norm(self.curr_action - self.prev_action) / 10 #* (30/self.simrate)
36 |     else:
37 |         prev_penalty = 0
38 |     print("prev_penalty: ", prev_penalty)
39 |     ######## Foot height bonus ########
40 |     footheight_penalty = 0
41 |     if (np.abs(self.lfoot_vel) < 0.05 and foot_pos[2] < 0.2 and foot_forces[0] == 0) or (np.abs(self.rfoot_vel) < 0.05 and foot_pos[5] < 0.2 and foot_forces[1] == 0):
42 |         # print("adding foot height penalty")
43 |         footheight_penalty = 0.2
44 | 
45 | 
46 |     reward = .25*np.exp(-forward_diff) + .25*np.exp(-side_diff) + .1*np.exp(-orient_diff) \
47 |             + .1*np.exp(-self.torque_cost) + .1*np.exp(-self.smooth_cost) \
48 |             + .1*np.exp(-self.lfoot_orient_cost) + .1*np.exp(-self.rfoot_orient_cost) \
49 |             - pelaccel_penalty \
50 |             - foot_penalty \
51 |             - lforce - rforce \
52 |             - footheight_penalty
53 | 
54 |     return reward


--------------------------------------------------------------------------------
/cassie/rewards/side_speedmatch_rewards.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | def side_speedmatch_reward(self):
  4 |     qpos = np.copy(self.sim.qpos())
  5 |     qvel = np.copy(self.sim.qvel())
  6 |     
  7 |     forward_diff = np.abs(qvel[0] -self.speed)
  8 |     orient_diff = np.linalg.norm(qpos[3:7] - np.array([1, 0, 0, 0]))
  9 |     side_diff = np.abs(qvel[1] - self.side_speed)
 10 |     if forward_diff < 0.05:
 11 |         forward_diff = 0
 12 |     if side_diff < 0.05:
 13 |         side_diff = 0
 14 | 
 15 |     reward = .4*np.exp(-forward_diff) + .4*np.exp(-side_diff) + .2*np.exp(-orient_diff)
 16 | 
 17 |     return reward
 18 | 
 19 | def side_speedmatch_torquesmooth_reward(self):
 20 |     qpos = np.copy(self.sim.qpos())
 21 |     qvel = np.copy(self.sim.qvel())
 22 |     
 23 |     forward_diff = np.abs(qvel[0] -self.speed)
 24 |     orient_diff = np.linalg.norm(qpos[3:7] - np.array([1, 0, 0, 0]))
 25 |     side_diff = np.abs(qvel[1] - self.side_speed)
 26 |     if forward_diff < 0.05:
 27 |         forward_diff = 0
 28 |     if side_diff < 0.05:
 29 |         side_diff = 0
 30 | 
 31 |     reward = .25*np.exp(-forward_diff) + .25*np.exp(-side_diff) + .2*np.exp(-orient_diff) \
 32 |                 + .1*np.exp(-self.torque_cost) + .2*np.exp(-self.smooth_cost)
 33 |                 
 34 |     return reward
 35 | 
 36 | def side_speedmatch_foottraj_reward(self):
 37 |     qpos = np.copy(self.sim.qpos())
 38 |     qvel = np.copy(self.sim.qvel())
 39 |     
 40 |     forward_diff = np.abs(qvel[0] -self.speed)
 41 |     orient_diff = np.linalg.norm(qpos[3:7] - np.array([1, 0, 0, 0]))
 42 |     side_diff = np.abs(qvel[1] - self.side_speed)
 43 |     if forward_diff < 0.05:
 44 |         forward_diff = 0
 45 |     if side_diff < 0.05:
 46 |         side_diff = 0
 47 | 
 48 |     reward = .15*np.exp(-forward_diff) + .15*np.exp(-side_diff) + .1*np.exp(-orient_diff) \
 49 |                     + .1*np.exp(-20*self.l_foot_diff) + .1*np.exp(-20*self.r_foot_diff) \
 50 |                     + .1*np.exp(-5*self.l_footvel_diff) + .1*np.exp(-5*self.r_footvel_diff) \
 51 |                     + .1*np.exp(-self.lfoot_orient_cost) + .1*np.exp(-self.rfoot_orient_cost)
 52 | 
 53 |     return reward
 54 | 
 55 | def side_speedmatch_heightvel_reward(self):
 56 |     qpos = np.copy(self.sim.qpos())
 57 |     qvel = np.copy(self.sim.qvel())
 58 |     
 59 |     forward_diff = np.abs(qvel[0] -self.speed)
 60 |     orient_diff = np.linalg.norm(qpos[3:7] - np.array([1, 0, 0, 0]))
 61 |     side_diff = np.abs(qvel[1] - self.side_speed)
 62 |     if forward_diff < 0.05:
 63 |         forward_diff = 0
 64 |     if side_diff < 0.05:
 65 |         side_diff = 0
 66 | 
 67 |     reward = .2*np.exp(-forward_diff) + .2*np.exp(-side_diff) + .1*np.exp(-orient_diff) \
 68 |             + .1*np.exp(-self.lfoot_orient_cost) + .1*np.exp(-self.rfoot_orient_cost) \
 69 |             + .15*np.exp(-self.lf_heightvel) + .15*np.exp(-self.rf_heightvel) \
 70 |             # + .1*np.exp(-self.ltdvel_cost) * .1*np.exp(-self.rtdvel_cost)
 71 | 
 72 |     return reward
 73 | 
 74 | def side_speedmatch_heuristic_reward(self):
 75 |     qpos = np.copy(self.sim.qpos())
 76 |     qvel = np.copy(self.sim.qvel())
 77 |     
 78 |     forward_diff = np.abs(qvel[0] -self.speed)
 79 |     orient_diff = np.linalg.norm(qpos[3:7] - np.array([1, 0, 0, 0]))
 80 |     side_diff = np.abs(qvel[1] - self.side_speed)
 81 |     if forward_diff < 0.05:
 82 |         forward_diff = 0
 83 |     if side_diff < 0.05:
 84 |         side_diff = 0
 85 | 
 86 |     ######## Foot position penalty ########
 87 |     foot_pos = np.zeros(6)
 88 |     self.sim.foot_pos(foot_pos)
 89 |     foot_dist = np.linalg.norm(foot_pos[0:2]-foot_pos[3:5])
 90 |     foot_penalty = 0
 91 |     if foot_dist < 0.22:
 92 |        foot_penalty = 0.2
 93 |     ######## Foot force penalty ########
 94 |     foot_forces = self.sim.get_foot_forces()
 95 |     lforce = max((foot_forces[0] - 700)/1000, 0)
 96 |     rforce = max((foot_forces[1] - 700)/1000, 0)
 97 |     ######## Torque penalty ########
 98 |     torque = np.linalg.norm(self.cassie_state.motor.torque[:])        
 99 |     ######## Pelvis z accel penalty #########
100 |     pelaccel = np.abs(self.cassie_state.pelvis.translationalAcceleration[2])
101 |     pelaccel_penalty = 0
102 |     if pelaccel > 6:
103 |         pelaccel_penalty = (pelaccel - 6) / 30
104 |     ####### Prev action penalty ########
105 |     if self.prev_action is not None:
106 |         prev_penalty = np.linalg.norm(self.curr_action - self.prev_action) / 10 #* (30/self.simrate)
107 |     else:
108 |         prev_penalty = 0
109 |     print("prev_penalty: ", prev_penalty)
110 |     ######## Foot height bonus ########
111 |     footheight_penalty = 0
112 |     if (np.abs(self.lfoot_vel) < 0.05 and foot_pos[2] < 0.2 and foot_forces[0] == 0) or (np.abs(self.rfoot_vel) < 0.05 and foot_pos[5] < 0.2 and foot_forces[1] == 0):
113 |         # print("adding foot height penalty")
114 |         footheight_penalty = 0.2
115 | 
116 | 
117 |     reward = .25*np.exp(-forward_diff) + .25*np.exp(-side_diff) + .1*np.exp(-orient_diff) \
118 |             + .1*np.exp(-self.torque_cost) + .1*np.exp(-self.smooth_cost) \
119 |             + .1*np.exp(-self.lfoot_orient_cost) + .1*np.exp(-self.rfoot_orient_cost) \
120 |             - pelaccel_penalty \
121 |             - foot_penalty \
122 |             - lforce - rforce \
123 |             - footheight_penalty
124 | 
125 |     return reward


--------------------------------------------------------------------------------
/cassie/rewards/side_speedmatch_torquesmooth_reward.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def side_speedmatch_torquesmooth_reward(self):
 4 |     qpos = np.copy(self.sim.qpos())
 5 |     qvel = np.copy(self.sim.qvel())
 6 |     
 7 |     forward_diff = np.abs(qvel[0] -self.speed)
 8 |     orient_diff = np.linalg.norm(qpos[3:7] - np.array([1, 0, 0, 0]))
 9 |     side_diff = np.abs(qvel[1] - self.side_speed)
10 |     if forward_diff < 0.05:
11 |         forward_diff = 0
12 |     if side_diff < 0.05:
13 |         side_diff = 0
14 | 
15 |     reward = .25*np.exp(-forward_diff) + .25*np.exp(-side_diff) + .2*np.exp(-orient_diff) \
16 |                     + .1*np.exp(-self.torque_cost) + .2*np.exp(-self.smooth_cost)
17 |     return reward


--------------------------------------------------------------------------------
/cassie/rewards/speedmatch_footorient_joint_smooth_reward.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def speedmatch_footorient_joint_smooth_reward(self):
 4 |     qpos = np.copy(self.sim.qpos())
 5 |     qvel = np.copy(self.sim.qvel())
 6 |     
 7 |     orient_targ = np.array([1, 0, 0, 0])
 8 |     speed_targ = np.array([self.speed, 0, 0])
 9 |     if self.time >= self.orient_time:
10 |         orient_targ = euler2quat(z=self.orient_add, y=0, x=0)
11 |         iquaternion = inverse_quaternion(orient_targ)
12 |         speed_targ = rotate_by_quaternion(speed_targ, iquaternion)
13 |         new_orient = quaternion_product(iquaternion, self.cassie_state.pelvis.orientation[:])
14 |         if new_orient[0] < 0:
15 |             new_orient = -new_orient
16 |     forward_diff = np.abs(qvel[0] - speed_targ[0])
17 |     orient_diff = 1 - np.inner(orient_targ, qpos[3:7]) ** 2
18 |     # orient_diff = np.linalg.norm(qpos[3:7] - np.array([1, 0, 0, 0]))
19 |     y_vel = np.abs(qvel[1] - speed_targ[1])
20 |     if forward_diff < 0.05:
21 |         forward_diff = 0
22 |     if y_vel < 0.05:
23 |         y_vel = 0
24 |     straight_diff = 8*np.abs(qpos[1] - self.y_offset)
25 |     if np.abs(qpos[1] - self.y_offset) < 0.05:
26 |         straight_diff = 0
27 |     if orient_diff < 5e-3:
28 |         orient_diff = 0
29 |     else:
30 |         orient_diff *= 30
31 | 
32 |     reward = .25*np.exp(-forward_diff) + .1*np.exp(-orient_diff) \
33 |                 + .1*np.exp(-straight_diff) + .1*np.exp(-y_vel) \
34 |                 + .1*np.exp(-self.l_foot_orient) + .1*np.exp(-self.r_foot_orient) \
35 |                 + .1*np.exp(-self.smooth_cost) \
36 |                 + .15*np.exp(-self.joint_error) 
37 | 
38 |     return reward


--------------------------------------------------------------------------------
/cassie/rewards/speedmatch_heuristic_reward.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def speedmatch_heuristic_reward(self):
 4 |     ######## Pelvis z accel penalty #########
 5 |     pelaccel = np.abs(self.cassie_state.pelvis.translationalAcceleration[2])
 6 |     pelaccel_penalty = 0
 7 |     if pelaccel > 5:
 8 |         pelaccel_penalty = (pelaccel - 5) / 10
 9 |     pelbonus = 0
10 |     if 8 < pelaccel < 10:
11 |         pelbonus = 0.2
12 |     ######## Foot position penalty ########
13 |     foot_pos = np.zeros(6)
14 |     self.sim.foot_pos(foot_pos)
15 |     foot_dist = np.linalg.norm(foot_pos[0:2]-foot_pos[3:5])
16 |     foot_penalty = 0
17 |     if foot_dist < 0.14:
18 |        foot_penalty = 0.2
19 |     ######## Foot force penalty ########
20 |     foot_forces = self.sim.get_foot_forces()
21 |     lforce = max((foot_forces[0] - 350)/1000, 0)
22 |     rforce = max((foot_forces[1] - 350)/1000, 0)
23 |     forcebonus = 0
24 |     # print("foot force: ", lforce, rforce)
25 |     # lbonus = max((800 - foot_forces[0])/1000, 0)
26 |     if foot_forces[0] <= 1000 and foot_forces[1] <= 1000:
27 |         forcebonus = foot_forces[0] / 5000 + foot_forces[1] / 5000
28 |     ######## Foot velocity penalty ########
29 |     lfoot_vel_bonus = 0     
30 |     rfoot_vel_bonus = 0
31 |     # if self.prev_foot is not None and foot_pos[2] < 0.3 and foot_pos[5] < 0.3:
32 |     #     lfoot_vel = np.abs(foot_pos[2] - self.prev_foot[2]) / 0.03 * 0.03
33 |     #     rfoot_vel = np.abs(foot_pos[5] - self.prev_foot[5]) / 0.03 * 0.03
34 |     # if self.l_high:
35 |     #     lfoot_vel_bonus = self.lfoot_vel * 0.3
36 |     # if self.r_high:
37 |     #     rfoot_vel_bonus = self.rfoot_vel * 0.3
38 |     ######## Foot orientation ########
39 |     lfoot_orient = 1 - np.inner(np.array([1, 0, 0, 0]), self.cassie_state.leftFoot.orientation[:]) ** 2
40 |     rfoot_orient = 1 - np.inner(np.array([1, 0, 0, 0]), self.cassie_state.rightFoot.orientation[:]) ** 2
41 |     ####### Hip yaw ########
42 |     rhipyaw = np.abs(qpos[22])
43 |     lhipyaw = np.abs(qpos[8])
44 |     if lhipyaw < 0.05:
45 |         lhipyaw = 0
46 |     if rhipyaw < 0.05:
47 |         rhipyaw = 0
48 |     ####### Hip roll penalty #########
49 |     lhiproll = np.abs(qpos[7])
50 |     rhiproll = np.abs(qpos[21])
51 |     if lhiproll < 0.05:
52 |         lhiproll = 0
53 |     if rhiproll < 0.05:
54 |         rhiproll = 0
55 |     ####### Prev action penalty ########
56 |     if self.prev_action is not None:
57 |         prev_penalty = np.linalg.norm(self.curr_action - self.prev_action) / 10 #* (30/self.simrate)
58 |     else:
59 |         prev_penalty = 0
60 | 
61 |     reward = .2*np.exp(-self.com_vel_error) + .1*np.exp(-self.com_error) + .1*np.exp(-self.orientation_error) \
62 |             + .1*np.exp(-20*self.l_foot_diff) + .1*np.exp(-5*self.l_footvel_diff) \
63 |             + .1*np.exp(-20*self.r_foot_diff) + .1*np.exp(-5*self.r_footvel_diff) \
64 |             + .1*np.exp(-lfoot_orient) + .1*np.exp(-rfoot_orient)
65 |     # reward = .4*np.exp(-forward_diff) + .3*np.exp(-orient_diff) \
66 |                 # + .15*np.exp(-straight_diff) + .15*np.exp(-y_vel) \
67 |                 # + .1*np.exp(-self.l_foot_orient) + .1*np.exp(-self.r_foot_orient) \
68 |                 # + .1*np.exp(-self.smooth_cost) \
69 |                 # + .15*np.exp(-self.joint_error) 
70 |                 # + .1*np.exp(-self.torque_cost) + .1*np.exp(-self.smooth_cost) #\
71 |                 #
72 |                 #  + .075*np.exp(-10*lhipyaw) + .075*np.exp(-10*rhipyaw) + .075*np.exp(-10*lhiproll) + .075*np.exp(-10*rhiproll)
73 |     #         + .1*np.exp(-20*self.l_foot_diff) + .1*np.exp(-20*self.r_foot_diff) \
74 |     #         + .1*np.exp(-5*self.l_footvel_diff) + .1*np.exp(-5*self.r_footvel_diff)
75 |     # - lfoot_vel_bonus - rfoot_vel_bonus - foot_penalty
76 |     # - lforce - rforce
77 |     #+ pelbonus- pelaccel_penalty - foot_penalty
78 | 


--------------------------------------------------------------------------------
/cassie/rewards/standing_rewards.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def stand_reward(self):
 4 |     qpos = np.copy(self.sim.qpos())
 5 |     qvel = np.copy(self.sim.qvel())
 6 | 
 7 |     com_vel = np.linalg.norm(qvel[0:3])
 8 |     com_height = (0.9 - qpos[2]) ** 2
 9 | 
10 |     reward = 0.5*np.exp(-com_vel) + 0.5*np.exp(-com_height)
11 | 
12 |     return reward
13 | 
14 | def step_even_reward(self):
15 |     qpos = np.copy(self.sim.qpos())
16 |     qvel = np.copy(self.sim.qvel())
17 | 
18 |     com_vel = np.linalg.norm(qvel[0:3])
19 |     com_height = (0.9 - qpos[2]) ** 2
20 | 
21 |     reward = 0.2*np.exp(-com_vel) + 0.2*np.exp(-com_height) \
22 |             + 0.3*np.exp(-self.l_foot_cost_even) + 0.3*np.exp(-self.r_foot_cost_even)
23 | 
24 |     return reward
25 | 
26 | def step_even_pelheight_reward(self):
27 |     qpos = np.copy(self.sim.qpos())
28 |     qvel = np.copy(self.sim.qvel())
29 | 
30 |     com_height = (0.9 - qpos[2]) ** 2
31 |     if qpos[2] > 0.8:
32 |         com_height = 0
33 | 
34 |     reward = 0.2*np.exp(-com_height) \
35 |             + 0.4*np.exp(-self.l_foot_cost_even) + 0.4*np.exp(-self.r_foot_cost_even)
36 | 
37 |     return reward
38 | 
39 | def step_smooth_pelheight_reward(self):
40 |     qpos = np.copy(self.sim.qpos())
41 |     qvel = np.copy(self.sim.qvel())
42 | 
43 |     com_height = (0.9 - qpos[2]) ** 2
44 |     if qpos[2] > 0.8:
45 |         com_height = 0
46 | 
47 |     reward = 0.2*np.exp(-com_height) \
48 |             + 0.4*np.exp(-self.l_foot_cost_smooth) + 0.4*np.exp(-self.r_foot_cost_smooth)
49 | 
50 |     return reward


--------------------------------------------------------------------------------
/cassie/rewards/trajmatch_reward.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | def trajmatch_reward(self):
  4 |     qpos = np.copy(self.sim.qpos())
  5 |     qvel = np.copy(self.sim.qvel())
  6 |     phase_diff = self.phase - np.floor(self.phase)
  7 |     ref_pos_prev, ref_vel_prev = self.get_ref_state(int(np.floor(self.phase)))
  8 |     if phase_diff != 0:
  9 |         ref_pos_next, ref_vel_next = self.get_ref_state(int(np.ceil(self.phase)))
 10 |         ref_pos_diff = ref_pos_next - ref_pos_prev
 11 |         ref_vel_diff = ref_vel_next - ref_vel_prev
 12 |         ref_pos = ref_pos_prev + phase_diff*ref_pos_diff
 13 |         ref_vel = ref_vel_prev + phase_diff*ref_vel_diff
 14 |     else:
 15 |         ref_pos = ref_pos_prev
 16 |         ref_vel = ref_vel_prev
 17 | 
 18 |     ref_pos, ref_vel = self.get_ref_state(self.phase)
 19 | 
 20 |     # TODO: should be variable; where do these come from?
 21 |     # TODO: see magnitude of state variables to gauge contribution to reward
 22 |     weight = [0.15, 0.15, 0.1, 0.05, 0.05, 0.15, 0.15, 0.1, 0.05, 0.05]
 23 | 
 24 |     joint_error       = 0
 25 |     com_error         = 0
 26 |     orientation_error = 0
 27 |     spring_error      = 0
 28 | 
 29 |     # each joint pos
 30 |     for i, j in enumerate(self.pos_idx):
 31 |         target = ref_pos[j]
 32 |         actual = qpos[j]
 33 | 
 34 |         joint_error += 30 * weight[i] * (target - actual) ** 2
 35 | 
 36 |     # center of mass: x, y, z
 37 |     for j in [0, 1, 2]:
 38 |         target = ref_pos[j]
 39 |         actual = qpos[j]
 40 | 
 41 |         # NOTE: in Xie et al y target is 0
 42 | 
 43 |         com_error += (target - actual) ** 2
 44 | 
 45 |     # COM orientation: qx, qy, qz
 46 |     for j in [4, 5, 6]:
 47 |         target = ref_pos[j] # NOTE: in Xie et al orientation target is 0
 48 |         actual = qpos[j]
 49 | 
 50 |         orientation_error += (target - actual) ** 2
 51 | 
 52 |     # left and right shin springs
 53 |     for i in [15, 29]:
 54 |         target = ref_pos[i] # NOTE: in Xie et al spring target is 0
 55 |         actual = qpos[i]
 56 | 
 57 |         spring_error += 1000 * (target - actual) ** 2      
 58 | 
 59 |     reward = 0.5 * np.exp(-joint_error) +       \
 60 |              0.3 * np.exp(-com_error) +         \
 61 |              0.1 * np.exp(-orientation_error) + \
 62 |              0.1 * np.exp(-spring_error)
 63 | 
 64 |     # orientation error does not look informative
 65 |     # maybe because it's comparing euclidean distance on quaternions
 66 |     # print("reward: {8}\njoint:\t{0:.2f}, % = {1:.2f}\ncom:\t{2:.2f}, % = {3:.2f}\norient:\t{4:.2f}, % = {5:.2f}\nspring:\t{6:.2f}, % = {7:.2f}\n\n".format(
 67 |     #             0.5 * np.exp(-joint_error),       0.5 * np.exp(-joint_error) / reward * 100,
 68 |     #             0.3 * np.exp(-com_error),         0.3 * np.exp(-com_error) / reward * 100,
 69 |     #             0.1 * np.exp(-orientation_error), 0.1 * np.exp(-orientation_error) / reward * 100,
 70 |     #             0.1 * np.exp(-spring_error),      0.1 * np.exp(-spring_error) / reward * 100,
 71 |     #             reward
 72 |     #         )
 73 |     #     )
 74 | 
 75 |     return reward
 76 | 
 77 | def trajmatch_footorient_hiprollvelact_reward(self):
 78 |     qpos = np.copy(self.sim.qpos())
 79 |     qvel = np.copy(self.sim.qvel())
 80 |     phase_diff = self.phase - np.floor(self.phase)
 81 |     ref_pos_prev, ref_vel_prev = self.get_ref_state(int(np.floor(self.phase)))
 82 |     if phase_diff != 0:
 83 |         ref_pos_next, ref_vel_next = self.get_ref_state(int(np.ceil(self.phase)))
 84 |         ref_pos_diff = ref_pos_next - ref_pos_prev
 85 |         ref_vel_diff = ref_vel_next - ref_vel_prev
 86 |         ref_pos = ref_pos_prev + phase_diff*ref_pos_diff
 87 |         ref_vel = ref_vel_prev + phase_diff*ref_vel_diff
 88 |     else:
 89 |         ref_pos = ref_pos_prev
 90 |         ref_vel = ref_vel_prev
 91 | 
 92 |     ref_pos, ref_vel = self.get_ref_state(self.phase)
 93 | 
 94 |     # TODO: should be variable; where do these come from?
 95 |     # TODO: see magnitude of state variables to gauge contribution to reward
 96 |     weight = [0.15, 0.15, 0.1, 0.05, 0.05, 0.15, 0.15, 0.1, 0.05, 0.05]
 97 | 
 98 |     joint_error       = 0
 99 |     com_error         = 0
100 |     orientation_error = 0
101 |     spring_error      = 0
102 | 
103 |     # each joint pos
104 |     for i, j in enumerate(self.pos_idx):
105 |         target = ref_pos[j]
106 |         actual = qpos[j]
107 | 
108 |         joint_error += 30 * weight[i] * (target - actual) ** 2
109 | 
110 |     # center of mass: x, y, z
111 |     for j in [0, 1, 2]:
112 |         target = ref_pos[j]
113 |         actual = qpos[j]
114 | 
115 |         # NOTE: in Xie et al y target is 0
116 | 
117 |         com_error += (target - actual) ** 2
118 | 
119 |     # COM orientation: qx, qy, qz
120 |     for j in [4, 5, 6]:
121 |         target = ref_pos[j] # NOTE: in Xie et al orientation target is 0
122 |         actual = qpos[j]
123 | 
124 |         orientation_error += (target - actual) ** 2
125 | 
126 |     # left and right shin springs
127 |     for i in [15, 29]:
128 |         target = ref_pos[i] # NOTE: in Xie et al spring target is 0
129 |         actual = qpos[i]
130 | 
131 |         spring_error += 1000 * (target - actual) ** 2      
132 | 
133 |     reward = 0.3 * np.exp(-joint_error) +       \
134 |              0.2 * np.exp(-com_error) +         \
135 |              0.1 * np.exp(-orientation_error) + \
136 |              0.1 * np.exp(-spring_error) \
137 |             + .075*np.exp(-self.l_foot_orient_cost) + .075*np.exp(-self.r_foot_orient_cost) \
138 |             + .1*np.exp(-self.hiproll_cost) + 0.05*np.exp(-self.hiproll_act)
139 | 
140 |     # orientation error does not look informative
141 |     # maybe because it's comparing euclidean distance on quaternions
142 |     # print("reward: {8}\njoint:\t{0:.2f}, % = {1:.2f}\ncom:\t{2:.2f}, % = {3:.2f}\norient:\t{4:.2f}, % = {5:.2f}\nspring:\t{6:.2f}, % = {7:.2f}\n\n".format(
143 |     #             0.5 * np.exp(-joint_error),       0.5 * np.exp(-joint_error) / reward * 100,
144 |     #             0.3 * np.exp(-com_error),         0.3 * np.exp(-com_error) / reward * 100,
145 |     #             0.1 * np.exp(-orientation_error), 0.1 * np.exp(-orientation_error) / reward * 100,
146 |     #             0.1 * np.exp(-spring_error),      0.1 * np.exp(-spring_error) / reward * 100,
147 |     #             reward
148 |     #         )
149 |     #     )
150 | 
151 |     return reward


--------------------------------------------------------------------------------
/cassie/trajectory/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/.DS_Store


--------------------------------------------------------------------------------
/cassie/trajectory/__init__.py:
--------------------------------------------------------------------------------
1 | from .trajectory import *
2 | from .aslip_trajectory import *


--------------------------------------------------------------------------------
/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.0.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.0.pkl


--------------------------------------------------------------------------------
/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.1.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.1.pkl


--------------------------------------------------------------------------------
/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.2.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.2.pkl


--------------------------------------------------------------------------------
/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.3.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.3.pkl


--------------------------------------------------------------------------------
/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.4.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.4.pkl


--------------------------------------------------------------------------------
/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.5.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.5.pkl


--------------------------------------------------------------------------------
/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.6.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.6.pkl


--------------------------------------------------------------------------------
/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.7.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.7.pkl


--------------------------------------------------------------------------------
/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.8.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.8.pkl


--------------------------------------------------------------------------------
/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.9.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_0.9.pkl


--------------------------------------------------------------------------------
/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.0.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.0.pkl


--------------------------------------------------------------------------------
/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.1.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.1.pkl


--------------------------------------------------------------------------------
/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.2.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.2.pkl


--------------------------------------------------------------------------------
/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.3.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.3.pkl


--------------------------------------------------------------------------------
/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.4.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.4.pkl


--------------------------------------------------------------------------------
/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.5.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.5.pkl


--------------------------------------------------------------------------------
/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.6.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.6.pkl


--------------------------------------------------------------------------------
/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.7.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.7.pkl


--------------------------------------------------------------------------------
/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.8.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.8.pkl


--------------------------------------------------------------------------------
/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.9.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_1.9.pkl


--------------------------------------------------------------------------------
/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_2.0.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/aslipTrajsTaskSpace/walkCycle_2.0.pkl


--------------------------------------------------------------------------------
/cassie/trajectory/backward_trajectory_Nov:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/backward_trajectory_Nov


--------------------------------------------------------------------------------
/cassie/trajectory/ikNet_state_dict.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/ikNet_state_dict.pt


--------------------------------------------------------------------------------
/cassie/trajectory/more-poses-trial.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/more-poses-trial.bin


--------------------------------------------------------------------------------
/cassie/trajectory/spline_stepping_traj.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/spline_stepping_traj.pkl


--------------------------------------------------------------------------------
/cassie/trajectory/stepdata.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/stepdata.bin


--------------------------------------------------------------------------------
/cassie/trajectory/stepping_trajectory_Nov:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/stepping_trajectory_Nov


--------------------------------------------------------------------------------
/cassie/trajectory/test.py:
--------------------------------------------------------------------------------
1 | # $ ipython -i test.py
2 | 
3 | from trajectory import CassieTrajectory
4 | 
5 | traj = CassieTrajectory("stepdata.bin")
6 | 
7 | print(len(traj.qpos[0]))


--------------------------------------------------------------------------------
/cassie/trajectory/traj_from_ref_foot_data.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/traj_from_ref_foot_data.pkl


--------------------------------------------------------------------------------
/cassie/trajectory/trajectory.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import random
 3 | 
 4 | """
 5 | Agility 2 kHz trajectory
 6 | """
 7 | class CassieTrajectory:
 8 |     def __init__(self, filepath):
 9 |         n = 1 + 35 + 32 + 10 + 10 + 10
10 |         data = np.fromfile(filepath, dtype=np.double).reshape((-1, n))
11 | 
12 |         # states
13 |         self.time = data[:, 0]
14 |         self.qpos = data[:, 1:36]
15 |         self.qvel = data[:, 36:68]
16 | 
17 |         # actions
18 |         self.torque = data[:, 68:78]
19 |         self.mpos = data[:, 78:88]
20 |         self.mvel = data[:, 88:98]
21 | 
22 |     def state(self, t):
23 |         tmax = self.time[-1]
24 | 
25 |         i = int((t % tmax) / tmax * len(self.time))
26 | 
27 |         return (self.qpos[i], self.qvel[i])
28 | 
29 |     def action(self, t):
30 |         tmax = self.time[-1]
31 |         i = int((t % tmax) / tmax * len(self.time))
32 |         return (self.mpos[i], self.mvel[i], self.torque[i])
33 | 
34 |     def sample(self):
35 |         i = random.randrange(len(self.time))
36 |         return (self.time[i], self.qpos[i], self.qvel[i])
37 | 
38 |     def __len__(self):
39 |         return len(self.time)


--------------------------------------------------------------------------------
/cassie/trajectory/walk-in-place-downsampled.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/cassie/trajectory/walk-in-place-downsampled.bin


--------------------------------------------------------------------------------
/img/output.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/img/output.gif


--------------------------------------------------------------------------------
/img/output2.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/img/output2.gif


--------------------------------------------------------------------------------
/mirror_policy_check.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import hashlib, os, pickle
  3 | import sys, time
  4 | from cassie.quaternion_function import *
  5 | import tty
  6 | import termios
  7 | import select
  8 | import numpy as np
  9 | from functools import partial
 10 | from rl.envs.wrappers import SymmetricEnv
 11 | from cassie import CassieEnv, CassiePlayground, CassieStandingEnv, CassieEnv_noaccel_footdist_omniscient, CassieEnv_noaccel_footdist
 12 | 
 13 | def isData():
 14 |     return select.select([sys.stdin], [], [], 0) == ([sys.stdin], [], [])
 15 | 
 16 | env = CassieEnv(state_est=True, dynamics_randomization=False, history=0)
 17 | env_fn = partial(CassieEnv, state_est=True, dynamics_randomization=False, history=0)
 18 | # env = CassieEnv_noaccel_footdist(state_est=True, dynamics_randomization=False, history=0)
 19 | # env_fn = partial(CassieEnv_noaccel_footdist, state_est=True, dynamics_randomization=False, history=0)
 20 | 
 21 | sym_env = SymmetricEnv(env_fn, mirrored_obs=env_fn().mirrored_obs, mirrored_act=[-5, -6, 7, 8, 9, -0.1, -1, 2, 3, 4])
 22 | # obs = env.get_full_state()
 23 | # print("obs len: ", len(obs))
 24 | # exit()
 25 | 
 26 | path = "./trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/"
 27 | # path = "./logs/footdist/CassieNoaccelFootDist/noaccel_footdist_speedmatch_seed10/"
 28 | policy = torch.load(path + "actor.pt")
 29 | policy.eval()
 30 | 
 31 | old_settings = termios.tcgetattr(sys.stdin)
 32 | 
 33 | orient_add = 0
 34 | 
 35 | env.render()
 36 | render_state = True
 37 | try:
 38 |     tty.setcbreak(sys.stdin.fileno())
 39 | 
 40 |     state = env.reset_for_test()
 41 |     done = False
 42 |     timesteps = 0
 43 |     eval_reward = 0
 44 |     speed = 0.0
 45 | 
 46 |     while render_state:
 47 |     
 48 |         if isData():
 49 |             c = sys.stdin.read(1)
 50 |             if c == 'w':
 51 |                 speed += 0.1
 52 |             elif c == 's':
 53 |                 speed -= 0.1
 54 |             elif c == 'j':
 55 |                 env.phase_add += .1
 56 |                 print("Increasing frequency to: {:.1f}".format(env.phase_add))
 57 |             elif c == 'h':
 58 |                 env.phase_add -= .1
 59 |                 print("Decreasing frequency to: {:.1f}".format(env.phase_add))
 60 |             elif c == 'l':
 61 |                 orient_add += .1
 62 |                 print("Increasing orient_add to: ", orient_add)
 63 |             elif c == 'k':
 64 |                 orient_add -= .1
 65 |                 print("Decreasing orient_add to: ", orient_add)
 66 |             elif c == 'p':
 67 |                 push = 100
 68 |                 push_dir = 2
 69 |                 force_arr = np.zeros(6)
 70 |                 force_arr[push_dir] = push
 71 |                 env.sim.apply_force(force_arr)
 72 | 
 73 |             env.update_speed(speed)
 74 |             print("speed: ", env.speed)
 75 |         
 76 |         if hasattr(env, 'simrate'):
 77 |             start = time.time()
 78 | 
 79 |         if (not env.vis.ispaused()):
 80 |             # Update Orientation
 81 |             quaternion = euler2quat(z=orient_add, y=0, x=0)
 82 |             iquaternion = inverse_quaternion(quaternion)
 83 | 
 84 |             if env.state_est:
 85 |                 curr_orient = state[1:5]
 86 |                 curr_transvel = state[15:18]
 87 |                 # curr_orient = state[6:10]
 88 |                 # curr_transvel = state[20:23]
 89 |             else:
 90 |                 curr_orient = state[2:6]
 91 |                 curr_transvel = state[20:23]
 92 |             
 93 |             new_orient = quaternion_product(iquaternion, curr_orient)
 94 | 
 95 |             if new_orient[0] < 0:
 96 |                 new_orient = -new_orient
 97 | 
 98 |             new_translationalVelocity = rotate_by_quaternion(curr_transvel, iquaternion)
 99 |             
100 |             if env.state_est:
101 |                 state[1:5] = torch.FloatTensor(new_orient)
102 |                 state[15:18] = torch.FloatTensor(new_translationalVelocity)
103 |                 # state[6:10] = torch.FloatTensor(new_orient)
104 |                 # state[20:23] = torch.FloatTensor(new_translationalVelocity)
105 |                 # state[0] = 1      # For use with StateEst. Replicate hack that height is always set to one on hardware.
106 |             else:
107 |                 state[2:6] = torch.FloatTensor(new_orient)
108 |                 state[20:23] = torch.FloatTensor(new_translationalVelocity)          
109 |                 
110 |             state = torch.Tensor(state)
111 |             # Calculate mirror state and mirror action
112 |             with torch.no_grad():
113 |                 mirror_state = sym_env.mirror_clock_observation(state.unsqueeze(0), env.clock_inds)[0]
114 |                 # Mirror pelvis orientation and velocity
115 |                 # mir_quat = inverse_quaternion(mirror_state[1:5])
116 |                 # mir_quat[2] *= -1
117 |                 # mirror_state[1:5] = torch.Tensor(mir_quat)
118 |                 # mirror_state[16] *= -1      # y trans vel
119 |                 # mir_rot_vel = -mirror_state[18:21]
120 |                 # mir_rot_vel[1] *= -1
121 |                 # mirror_state[18:21] = mir_rot_vel
122 |                 # mirror_state[32] *= -1      # y trans accel
123 |                 mir_action = policy.forward(mirror_state, deterministic=True)
124 |                 mir_mir_action = sym_env.mirror_action(mir_action.unsqueeze(0)).detach().numpy()[0]
125 |                 action = policy.forward(state, deterministic=True).detach().numpy()
126 |             # print("mirror action diff: ", np.linalg.norm(mir_mir_action - action))
127 |             state, reward, done, _ = env.step(mir_mir_action)
128 |             
129 |             eval_reward += reward
130 |             timesteps += 1
131 | 
132 |             
133 |         render_state = env.render()
134 |         if hasattr(env, 'simrate'):
135 |             # assume 30hz (hack)
136 |             end = time.time()
137 |             delaytime = max(0, 1000 / 30000 - (end-start))
138 |             time.sleep(delaytime)
139 | 
140 |     print("Eval reward: ", eval_reward)
141 | 
142 | finally:
143 |     termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old_settings)


--------------------------------------------------------------------------------
/rl/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/rl/__init__.py


--------------------------------------------------------------------------------
/rl/algos/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/rl/config/monitor.ini:
--------------------------------------------------------------------------------
1 | [monitor]
2 | # Options: Timesteps, Iterations, (walltime to be included in future)
3 | xlabel=Iterations
4 | 
5 | # Options: Fixed, Variable
6 | xlim=Variable


--------------------------------------------------------------------------------
/rl/distributions/__init__.py:
--------------------------------------------------------------------------------
1 | from .gaussian import DiagonalGaussian
2 | from .beta import Beta, Beta2


--------------------------------------------------------------------------------
/rl/distributions/beta.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | 
  7 | # TODO: extend these for arbitrary bounds
  8 | 
  9 | """A beta distribution, but where the pdf is scaled to (-1, 1)"""
 10 | class BoundedBeta(torch.distributions.Beta):
 11 |     def log_prob(self, x):
 12 |         return super().log_prob((x + 1) / 2)
 13 | 
 14 | class Beta(nn.Module):
 15 |     def __init__(self, action_dim):
 16 |         super(Beta, self).__init__()
 17 | 
 18 |         self.action_dim = action_dim
 19 | 
 20 |     def forward(self, alpha_beta):
 21 |         alpha = 1 + F.softplus(alpha_beta[:, :self.action_dim])
 22 |         beta  = 1 + F.softplus(alpha_beta[:, self.action_dim:])
 23 |         return alpha, beta
 24 | 
 25 |     def sample(self, x, deterministic):
 26 |         if deterministic is False:
 27 |             action = self.evaluate(x).sample()
 28 |         else:
 29 |             # E = alpha / (alpha + beta)
 30 |             return self.evaluate(x).mean
 31 | 
 32 |         return 2 * action - 1
 33 | 
 34 |     def evaluate(self, x):
 35 |         alpha, beta = self(x)
 36 |         return BoundedBeta(alpha, beta)
 37 | 
 38 | 
 39 | # TODO: think of a better name for this
 40 | """Beta distribution parameterized by mean and variance."""
 41 | class Beta2(nn.Module):
 42 |     def __init__(self, action_dim, init_std=0.25, learn_std=False):
 43 |         super(Beta2, self).__init__()
 44 | 
 45 |         assert init_std < 0.5, "Beta distribution has a max std dev of 0.5"
 46 | 
 47 |         self.action_dim = action_dim
 48 | 
 49 |         self.logstd = nn.Parameter(
 50 |             torch.ones(1, action_dim) * np.log(init_std),
 51 |             requires_grad=learn_std
 52 |         )
 53 | 
 54 |         self.learn_std = learn_std
 55 | 
 56 | 
 57 |     def forward(self, x):
 58 |         mean = torch.sigmoid(x) 
 59 | 
 60 |         var = self.logstd.exp().pow(2)
 61 | 
 62 |         """
 63 |         alpha = ((1 - mu) / sigma^2 - 1 / mu) * mu^2
 64 |         beta  = alpha * (1 / mu - 1)
 65 | 
 66 |         Implemented slightly differently for numerical stability.
 67 |         """
 68 |         alpha = ((1 - mean) / var) * mean.pow(2) - mean
 69 |         beta  = ((1 - mean) / var) * mean - 1 - alpha
 70 | 
 71 |         # PROBLEM: if alpha or beta < 1 thats not good
 72 | 
 73 |         #assert np.allclose(alpha, ((1 - mean) / var - 1 / mean) * mean.pow(2))
 74 |         #assert np.allclose(beta, ((1 - mean) / var - 1 / mean) * mean.pow(2) * (1 / mean - 1))
 75 | 
 76 |         #alpha = 1 + F.softplus(alpha)
 77 |         #beta  = 1 + F.softplus(beta)
 78 | 
 79 |         # print("alpha",alpha)
 80 |         # print("beta",beta)
 81 | 
 82 |         # #print(alpha / (alpha + beta))
 83 |         # print("mu",mean)
 84 | 
 85 |         # #print(torch.sqrt(alpha * beta / ((alpha+beta)**2 * (alpha + beta + 1))))
 86 |         # print("var", var)
 87 | 
 88 |         # import pdb
 89 |         # pdb.set_trace()
 90 | 
 91 |         return alpha, beta
 92 | 
 93 |     def sample(self, x, deterministic):
 94 |         if deterministic is False:
 95 |             action = self.evaluate(x).sample()
 96 |         else:
 97 |             # E = alpha / (alpha + beta)
 98 |             return self.evaluate(x).mean
 99 | 
100 |         # 2 * a - 1 puts a in (-1, 1)
101 |         return 2 * action - 1
102 | 
103 |     def evaluate(self, x):
104 |         alpha, beta = self(x)
105 |         return BoundedBeta(alpha, beta)


--------------------------------------------------------------------------------
/rl/distributions/gaussian.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | from torch.autograd import Variable
 6 | 
 7 | # TODO: look at change of variables function for enforcing
 8 | # action bounds correctly
 9 | class DiagonalGaussian(nn.Module):
10 |     def __init__(self, num_outputs, init_std=1, learn_std=True):
11 |         super(DiagonalGaussian, self).__init__()
12 | 
13 |         self.logstd = nn.Parameter(
14 |             torch.ones(1, num_outputs) * np.log(init_std),
15 |             requires_grad=learn_std
16 |         )
17 | 
18 |         self.learn_std = learn_std
19 | 
20 |     def forward(self, x):
21 |         mean = x
22 | 
23 |         std = self.logstd.exp()
24 |         
25 |         return mean, std
26 | 
27 |     def sample(self, x, deterministic):
28 |         if deterministic is False:
29 |             action = self.evaluate(x).sample()
30 |         else:
31 |             action, _ = self(x)
32 | 
33 |         return action
34 | 
35 |     def evaluate(self, x):
36 |         mean, std = self(x)
37 |         return torch.distributions.Normal(mean, std)
38 | 


--------------------------------------------------------------------------------
/rl/envs/__init__.py:
--------------------------------------------------------------------------------
1 | from .vectorize import *
2 | from .normalize import *
3 | from .wrappers import *


--------------------------------------------------------------------------------
/rl/envs/monitor.py:
--------------------------------------------------------------------------------
  1 | # Modified from https://github.com/openai/baselines/blob/master/baselines/bench/monitor.py
  2 | 
  3 | import time
  4 | from glob import glob
  5 | import csv
  6 | import os.path as osp
  7 | import json
  8 | 
  9 | class Monitor:
 10 |     EXT = "monitor.csv"
 11 |     f = None
 12 | 
 13 |     def __init__(self, env, filename, allow_early_resets=False, reset_keywords=()):
 14 |         Wrapper.__init__(self, env=env)
 15 |         self.tstart = time.time()
 16 |         if filename is None:
 17 |             self.f = None
 18 |             self.logger = None
 19 |         else:
 20 |             if not filename.endswith(Monitor.EXT):
 21 |                 if osp.isdir(filename):
 22 |                     filename = osp.join(filename, Monitor.EXT)
 23 |                 else:
 24 |                     filename = filename + "." + Monitor.EXT
 25 |             self.f = open(filename, "wt")
 26 |             self.f.write('#%s\n'%json.dumps({"t_start": self.tstart, "gym_version": gym.__version__,
 27 |                 "env_id": env.spec.id if env.spec else 'Unknown'}))
 28 |             self.logger = csv.DictWriter(self.f, fieldnames=('r', 'l', 't')+reset_keywords)
 29 |             self.logger.writeheader()
 30 | 
 31 |         self.reset_keywords = reset_keywords
 32 |         self.allow_early_resets = allow_early_resets
 33 |         self.rewards = None
 34 |         self.needs_reset = True
 35 |         self.episode_rewards = []
 36 |         self.episode_lengths = []
 37 |         self.total_steps = 0
 38 |         self.current_reset_info = {} # extra info about the current episode, that was passed in during reset()
 39 | 
 40 |     def _reset(self, **kwargs):
 41 |         if not self.allow_early_resets and not self.needs_reset:
 42 |             raise RuntimeError("Tried to reset an environment before done. If you want to allow early resets, wrap your env with Monitor(env, path, allow_early_resets=True)")
 43 |         self.rewards = []
 44 |         self.needs_reset = False
 45 |         for k in self.reset_keywords:
 46 |             v = kwargs.get(k)
 47 |             if v is None:
 48 |                 raise ValueError('Expected you to pass kwarg %s into reset'%k)
 49 |             self.current_reset_info[k] = v
 50 |         return self.env.reset(**kwargs)
 51 | 
 52 |     def _step(self, action):
 53 |         if self.needs_reset:
 54 |             raise RuntimeError("Tried to step environment that needs reset")
 55 |         ob, rew, done, info = self.env.step(action)
 56 |         self.rewards.append(rew)
 57 |         if done:
 58 |             self.needs_reset = True
 59 |             eprew = sum(self.rewards)
 60 |             eplen = len(self.rewards)
 61 |             epinfo = {"r": round(eprew, 6), "l": eplen, "t": round(time.time() - self.tstart, 6)}
 62 |             epinfo.update(self.current_reset_info)
 63 |             if self.logger:
 64 |                 self.logger.writerow(epinfo)
 65 |                 self.f.flush()
 66 |             self.episode_rewards.append(eprew)
 67 |             self.episode_lengths.append(eplen)
 68 |             info['episode'] = epinfo
 69 |         self.total_steps += 1
 70 |         return (ob, rew, done, info)
 71 | 
 72 |     def close(self):
 73 |         if self.f is not None:
 74 |             self.f.close()
 75 | 
 76 |     def get_total_steps(self):
 77 |         return self.total_steps
 78 | 
 79 |     def get_episode_rewards(self):
 80 |         return self.episode_rewards
 81 | 
 82 |     def get_episode_lengths(self):
 83 |         return self.episode_lengths
 84 | 
 85 | class LoadMonitorResultsError(Exception):
 86 |     pass
 87 | 
 88 | def get_monitor_files(dir):
 89 |     return glob(osp.join(dir, "*" + Monitor.EXT))
 90 | 
 91 | def load_results(dir):
 92 |     import pandas
 93 |     monitor_files = glob(osp.join(dir, "*monitor.*")) # get both csv and (old) json files
 94 |     if not monitor_files:
 95 |         raise LoadMonitorResultsError("no monitor files of the form *%s found in %s" % (Monitor.EXT, dir))
 96 |     dfs = []
 97 |     headers = []
 98 |     for fname in monitor_files:
 99 |         with open(fname, 'rt') as fh:
100 |             if fname.endswith('csv'):
101 |                 firstline = fh.readline()
102 |                 assert firstline[0] == '#'
103 |                 header = json.loads(firstline[1:])
104 |                 df = pandas.read_csv(fh, index_col=None)
105 |                 headers.append(header)
106 |             elif fname.endswith('json'): # Deprecated json format
107 |                 episodes = []
108 |                 lines = fh.readlines()
109 |                 header = json.loads(lines[0])
110 |                 headers.append(header)
111 |                 for line in lines[1:]:
112 |                     episode = json.loads(line)
113 |                     episodes.append(episode)
114 |                 df = pandas.DataFrame(episodes)
115 |         df['t'] += header['t_start']
116 |         dfs.append(df)
117 |     df = pandas.concat(dfs)
118 |     df.sort_values('t', inplace=True)
119 |     df['t'] -= min(header['t_start'] for header in headers)
120 |     df.headers = headers # HACK to preserve backwards compatibility
121 |     return df


--------------------------------------------------------------------------------
/rl/envs/normalize.py:
--------------------------------------------------------------------------------
  1 | # Modified from https://github.com/openai/baselines/blob/master/baselines/common/vec_env/vec_normalize.py
  2 | # Thanks to the authors + OpenAI for the code
  3 | 
  4 | import numpy as np
  5 | import functools
  6 | import torch
  7 | import ray
  8 | 
  9 | from .wrapper import WrapEnv
 10 | 
 11 | @ray.remote
 12 | def _run_random_actions(iter, policy, env_fn, noise_std):
 13 | 
 14 |     env = WrapEnv(env_fn)
 15 |     states = np.zeros((iter, env.observation_space.shape[0]))
 16 | 
 17 |     state = env.reset()
 18 |     for t in range(iter):
 19 |         states[t, :] = state
 20 | 
 21 |         state = torch.Tensor(state)
 22 | 
 23 |         action = policy(state)
 24 | 
 25 |         # add gaussian noise to deterministic action
 26 |         action = action + torch.randn(action.size()) * noise_std
 27 | 
 28 |         state, _, done, _ = env.step(action.data.numpy())
 29 | 
 30 |         if done:
 31 |             state = env.reset()
 32 |     
 33 |     return states
 34 | 
 35 | def get_normalization_params(iter, policy, env_fn, noise_std, procs=4):
 36 |     print("Gathering input normalization data using {0} steps, noise = {1}...".format(iter, noise_std))
 37 | 
 38 |     states_ids = [_run_random_actions.remote(iter // procs, policy, env_fn, noise_std) for _ in range(procs)]
 39 | 
 40 |     states = []
 41 |     for _ in range(procs):
 42 |         ready_ids, _ = ray.wait(states_ids, num_returns=1)
 43 |         states.extend(ray.get(ready_ids[0]))
 44 |         states_ids.remove(ready_ids[0])
 45 | 
 46 |     print("Done gathering input normalization data.")
 47 | 
 48 |     return np.mean(states, axis=0), np.sqrt(np.var(states, axis=0) + 1e-8)
 49 | 
 50 | 
 51 | # returns a function that creates a normalized environment, then pre-normalizes it 
 52 | # using states sampled from a deterministic policy with some added noise
 53 | def PreNormalizer(iter, noise_std, policy, *args, **kwargs):
 54 | 
 55 |     # noise is gaussian noise
 56 |     @torch.no_grad()
 57 |     def pre_normalize(env, policy, num_iter, noise_std):
 58 |         # save whether or not the environment is configured to do online normalization
 59 |         online_val = env.online
 60 |         env.online = True
 61 | 
 62 |         state = env.reset()
 63 | 
 64 |         for t in range(num_iter):
 65 |             state = torch.Tensor(state)
 66 | 
 67 |             _, action = policy(state)
 68 | 
 69 |             # add gaussian noise to deterministic action
 70 |             action = action + torch.randn(action.size()) * noise_std
 71 | 
 72 |             state, _, done, _ = env.step(action.data.numpy())
 73 | 
 74 |             if done:
 75 |                 state = env.reset()
 76 | 
 77 |         env.online = online_val
 78 |     
 79 |     def _Normalizer(venv):
 80 |         venv = Normalize(venv, *args, **kwargs)
 81 | 
 82 |         print("Gathering input normalization data using {0} steps, noise = {1}...".format(iter, noise_std))
 83 |         pre_normalize(venv, policy, iter, noise_std)
 84 |         print("Done gathering input normalization data.")
 85 | 
 86 |         return venv
 87 | 
 88 |     return _Normalizer
 89 | 
 90 | # returns a function that creates a normalized environment
 91 | def Normalizer(*args, **kwargs):
 92 |     def _Normalizer(venv):
 93 |         return Normalize(venv, *args, **kwargs)
 94 | 
 95 |     return _Normalizer
 96 | 
 97 | class Normalize:
 98 |     """
 99 |     Vectorized environment base class
100 |     """
101 |     def __init__(self, 
102 |                  venv,
103 |                  ob_rms=None, 
104 |                  ob=True, 
105 |                  ret=False, 
106 |                  clipob=10., 
107 |                  cliprew=10., 
108 |                  online=True,
109 |                  gamma=1.0, 
110 |                  epsilon=1e-8):
111 | 
112 |         self.venv = venv
113 |         self._observation_space = venv.observation_space
114 |         self._action_space = venv.action_space
115 | 
116 |         if ob_rms is not None:
117 |             self.ob_rms = ob_rms
118 |         else:
119 |             self.ob_rms = RunningMeanStd(shape=self._observation_space.shape) if ob else None
120 | 
121 |         self.ret_rms = RunningMeanStd(shape=()) if ret else None
122 |         self.clipob = clipob
123 |         self.cliprew = cliprew
124 |         self.ret = np.zeros(self.num_envs)
125 |         self.gamma = gamma
126 |         self.epsilon = epsilon
127 | 
128 |         self.online = online
129 | 
130 |     def step(self, vac):
131 |         obs, rews, news, infos = self.venv.step(vac)
132 | 
133 |         #self.ret = self.ret * self.gamma + rews
134 |         obs = self._obfilt(obs)
135 | 
136 |         # NOTE: shifting mean of reward seems bad; qualitatively changes MDP
137 |         if self.ret_rms: 
138 |             if self.online:
139 |                 self.ret_rms.update(self.ret)
140 |             
141 |             rews = np.clip(rews / np.sqrt(self.ret_rms.var + self.epsilon), -self.cliprew, self.cliprew)
142 | 
143 |         return obs, rews, news, infos
144 | 
145 |     def _obfilt(self, obs):
146 |         if self.ob_rms: 
147 |             if self.online:
148 |                 self.ob_rms.update(obs)
149 |             
150 |             obs = np.clip((obs - self.ob_rms.mean) / np.sqrt(self.ob_rms.var + self.epsilon), -self.clipob, self.clipob)
151 |             return obs
152 |         else:
153 |             return obs
154 | 
155 |     def reset(self):
156 |         """
157 |         Reset all environments
158 |         """
159 |         obs = self.venv.reset()
160 |         return self._obfilt(obs)
161 | 
162 |     @property
163 |     def action_space(self):
164 |         return self._action_space
165 | 
166 |     @property
167 |     def observation_space(self):
168 |         return self._observation_space
169 | 
170 |     def close(self):
171 |         self.venv.close()
172 |     
173 |     def render(self):
174 |         self.venv.render()
175 | 
176 |     @property
177 |     def num_envs(self):
178 |         return self.venv.num_envs
179 | 
180 | 
181 | 
182 | class RunningMeanStd(object):
183 |     # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm
184 |     def __init__(self, epsilon=1e-4, shape=()):
185 |         self.mean = np.zeros(shape, 'float64')
186 |         self.var = np.zeros(shape, 'float64')
187 |         self.count = epsilon
188 | 
189 | 
190 |     def update(self, x):
191 |         batch_mean = np.mean(x, axis=0)
192 |         batch_var = np.var(x, axis=0)
193 |         batch_count = x.shape[0]
194 | 
195 |         delta = batch_mean - self.mean
196 |         tot_count = self.count + batch_count
197 | 
198 |         new_mean = self.mean + delta * batch_count / tot_count        
199 |         m_a = self.var * (self.count)
200 |         m_b = batch_var * (batch_count)
201 |         M2 = m_a + m_b + np.square(delta) * self.count * batch_count / (self.count + batch_count)
202 |         new_var = M2 / (self.count + batch_count)
203 | 
204 |         new_count = batch_count + self.count
205 | 
206 |         self.mean = new_mean
207 |         self.var = new_var
208 |         self.count = new_count        
209 | 
210 | def test_runningmeanstd():
211 |     for (x1, x2, x3) in [
212 |         (np.random.randn(3), np.random.randn(4), np.random.randn(5)),
213 |         (np.random.randn(3,2), np.random.randn(4,2), np.random.randn(5,2)),
214 |         ]:
215 | 
216 |         rms = RunningMeanStd(epsilon=0.0, shape=x1.shape[1:])
217 | 
218 |         x = np.concatenate([x1, x2, x3], axis=0)
219 |         ms1 = [x.mean(axis=0), x.var(axis=0)]
220 |         rms.update(x1)
221 |         rms.update(x2)
222 |         rms.update(x3)
223 |         ms2 = [rms.mean, rms.var]
224 | 
225 |         assert np.allclose(ms1, ms2)
226 | 


--------------------------------------------------------------------------------
/rl/envs/vectorize.py:
--------------------------------------------------------------------------------
 1 | # Modified from https://github.com/openai/baselines/blob/master/baselines/common/vec_env/dummy_vec_env.py
 2 | # Thanks to the authors + OpenAI for the code
 3 | 
 4 | import numpy as np
 5 | 
 6 | class Vectorize:
 7 |     def __init__(self, env_fns):
 8 |         self.envs = [fn() for fn in env_fns]
 9 |         env = self.envs[0]
10 | 
11 |         self._observation_space = env.observation_space
12 |         self._action_space = env.action_space
13 |        
14 |         self.ts = np.zeros(len(self.envs), dtype='int')  
15 |       
16 |     def step(self, action_n):
17 |         results = [env.step(a) for (a,env) in zip(action_n, self.envs)]
18 |         obs, rews, dones, infos = map(np.array, zip(*results))
19 | 
20 |         # TODO: decide whether to uncomment this
21 |         self.ts += 1
22 |         # for (i, done) in enumerate(dones):
23 |         #     if done:
24 |         #         obs[i] = self.envs[i].reset()
25 |         #         self.ts[i] = 0
26 | 
27 |         return np.array(obs), np.array(rews), np.array(dones), infos
28 | 
29 |     def reset(self):        
30 |         results = [env.reset() for env in self.envs]
31 |         return np.array(results)
32 | 
33 |     def render(self):
34 |         self.envs[0].render()
35 | 
36 |     @property
37 |     def num_envs(self):
38 |         return len(self.envs)
39 | 
40 |     @property
41 |     def action_space(self):
42 |         return self._action_space
43 | 
44 |     @property
45 |     def observation_space(self):
46 |         return self._observation_space
47 | 
48 |     


--------------------------------------------------------------------------------
/rl/envs/wrapper.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | # Gives a vectorized interface to a single environment
 4 | class WrapEnv:
 5 |     def __init__(self, env_fn):
 6 |         self.env = env_fn()
 7 | 
 8 |     def __getattr__(self, attr):
 9 |         return getattr(self.env, attr)
10 | 
11 |     def step(self, action):
12 |         state, reward, done, info = self.env.step(action[0])
13 |         return np.array([state]), np.array([reward]), np.array([done]), np.array([info])
14 | 
15 |     def render(self):
16 |         self.env.render()
17 | 
18 |     def reset(self):
19 |         return np.array([self.env.reset()])


--------------------------------------------------------------------------------
/rl/envs/wrappers.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | # Gives a vectorized interface to a single environment
 5 | class WrapEnv:
 6 |     def __init__(self, env_fn):
 7 |         self.env = env_fn()
 8 | 
 9 |     def __getattr__(self, attr):
10 |         return getattr(self.env, attr)
11 | 
12 |     def step(self, action, term_thresh=0):
13 |         state, reward, done, info = self.env.step(action[0], f_term=term_thresh)
14 |         return np.array([state]), np.array([reward]), np.array([done]), np.array([info])
15 | 
16 |     def render(self):
17 |         self.env.render()
18 | 
19 |     def reset(self):
20 |         return np.array([self.env.reset()])
21 | 
22 | # TODO: this is probably a better case for inheritance than for a wrapper
23 | # Gives an interface to exploit mirror symmetry
24 | class SymmetricEnv:    
25 |     def __init__(self, env_fn, mirrored_obs=None, mirrored_act=None, obs_fn=None, act_fn=None):
26 | 
27 |         assert (bool(mirrored_act) ^ bool(act_fn)) and (bool(mirrored_obs) ^ bool(obs_fn)), \
28 |             "You must provide either mirror indices or a mirror function, but not both, for \
29 |              observation and action."
30 | 
31 |         if mirrored_act:
32 |             self.act_mirror_matrix = torch.Tensor(_get_symmetry_matrix(mirrored_act))
33 | 
34 |         elif act_fn:
35 |             assert callable(act_fn), "Action mirror function must be callable"
36 |             self.mirror_action = act_fn
37 | 
38 |         if mirrored_obs:
39 |             self.obs_mirror_matrix = torch.Tensor(_get_symmetry_matrix(mirrored_obs))
40 | 
41 |         elif obs_fn:
42 |             assert callable(obs_fn), "Observation mirror function must be callable"
43 |             self.mirror_observation = obs_fn
44 | 
45 |         self.env = env_fn()
46 | 
47 |     def __getattr__(self, attr):
48 |         return getattr(self.env, attr)
49 | 
50 |     def mirror_action(self, action):
51 |         return action @ self.act_mirror_matrix
52 | 
53 |     def mirror_observation(self, obs):
54 |         return obs @ self.obs_mirror_matrix
55 | 
56 |     # To be used when there is a clock in the observation. In this case, the mirrored_obs vector inputted
57 |     # when the SymmeticEnv is created should not move the clock input order. The indices of the obs vector
58 |     # where the clocks are located need to be inputted.
59 |     def mirror_clock_observation(self, obs, clock_inds):
60 |         # print("obs.shape = ", obs.shape)
61 |         # print("obs_mirror_matrix.shape = ", self.obs_mirror_matrix.shape)
62 |         mirror_obs = obs @ self.obs_mirror_matrix
63 |         clock = mirror_obs[:, self.clock_inds]
64 |         # print("clock: ", clock)
65 |         for i in range(np.shape(clock)[1]):
66 |             mirror_obs[:, clock_inds[i]] = np.sin(np.arcsin(clock[:, i]) + np.pi)
67 |         return mirror_obs
68 | 
69 | 
70 | def _get_symmetry_matrix(mirrored):
71 |     numel = len(mirrored)
72 |     mat = np.zeros((numel, numel))
73 | 
74 |     for (i, j) in zip(np.arange(numel), np.abs(np.array(mirrored).astype(int))):
75 |         mat[i, j] = np.sign(mirrored[i])
76 | 
77 |     return mat


--------------------------------------------------------------------------------
/rl/policies/__init__.py:
--------------------------------------------------------------------------------
1 | # from .actor import Gaussian_FF_Actor as GaussianMLP_Actor # for legacy code
2 | from .actor import Gaussian_FF_Actor
3 | 
4 | # from .actor_release import GaussianMLP_Actor
5 | 
6 | #from .linear import LinearMLP
7 | #from .recurrent import RecurrentNet


--------------------------------------------------------------------------------
/rl/policies/base.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | from torch import sqrt
 6 | 
 7 | def normc_fn(m):
 8 |     classname = m.__class__.__name__
 9 |     if classname.find('Linear') != -1:
10 |         m.weight.data.normal_(0, 1)
11 |         m.weight.data *= 1 / torch.sqrt(m.weight.data.pow(2).sum(1, keepdim=True))
12 |         if m.bias is not None:
13 |             m.bias.data.fill_(0)
14 | 
15 | # The base class for an actor. Includes functions for normalizing state (optional)
16 | class Net(nn.Module):
17 |   def __init__(self):
18 |     super(Net, self).__init__()
19 |     self.is_recurrent = False
20 | 
21 |     self.welford_state_mean = torch.zeros(1)
22 |     self.welford_state_mean_diff = torch.ones(1)
23 |     self.welford_state_n = 1
24 | 
25 |     self.env_name = None
26 | 
27 |   def forward(self):
28 |     raise NotImplementedError
29 | 
30 |   def normalize_state(self, state, update=True):
31 |     state = torch.Tensor(state)
32 | 
33 |     if self.welford_state_n == 1:
34 |       self.welford_state_mean = torch.zeros(state.size(-1))
35 |       self.welford_state_mean_diff = torch.ones(state.size(-1))
36 | 
37 |     if update:
38 |       if len(state.size()) == 1: # If we get a single state vector 
39 |         state_old = self.welford_state_mean
40 |         self.welford_state_mean += (state - state_old) / self.welford_state_n
41 |         self.welford_state_mean_diff += (state - state_old) * (state - state_old)
42 |         self.welford_state_n += 1
43 |       elif len(state.size()) == 2: # If we get a batch
44 |         print("NORMALIZING 2D TENSOR (this should not be happening)")
45 |         for r_n in r:
46 |           state_old = self.welford_state_mean
47 |           self.welford_state_mean += (state_n - state_old) / self.welford_state_n
48 |           self.welford_state_mean_diff += (state_n - state_old) * (state_n - state_old)
49 |           self.welford_state_n += 1
50 |       elif len(state.size()) == 3: # If we get a batch of sequences
51 |         print("NORMALIZING 3D TENSOR (this really should not be happening)")
52 |         for r_t in r:
53 |           for r_n in r_t:
54 |             state_old = self.welford_state_mean
55 |             self.welford_state_mean += (state_n - state_old) / self.welford_state_n
56 |             self.welford_state_mean_diff += (state_n - state_old) * (state_n - state_old)
57 |             self.welford_state_n += 1
58 |     return (state - self.welford_state_mean) / sqrt(self.welford_state_mean_diff / self.welford_state_n)
59 | 
60 |   def copy_normalizer_stats(self, net):
61 |     self.welford_state_mean = net.self_state_mean
62 |     self.welford_state_mean_diff = net.welford_state_mean_diff
63 |     self.welford_state_n = net.welford_state_n
64 |   
65 |   def initialize_parameters(self):
66 |     self.apply(normc_fn)


--------------------------------------------------------------------------------
/rl/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | from .render import *
 2 | from .param_noise import *
 3 | from .remote_replay import *
 4 | import sys
 5 | 
 6 | class ProgBar():
 7 |     def __init__(self, total, bar_len=40):
 8 |         self.total = total
 9 |         self.count = 0
10 |         self.bar_len = bar_len
11 | 
12 |     def next(self, msg=''):
13 |         self.count += 1
14 | 
15 |         fill_len = int(round(self.bar_len * self.count / float(self.total)))
16 |         bar = '=' * fill_len + '-' * (self.bar_len - fill_len)
17 | 
18 |         percent = round(100.0 * self.count / float(self.total), 1)
19 | 
20 |         msg = msg.ljust(len(msg) + 2)
21 | 
22 |         sys.stdout.write('[%s] %s%s ... %s\r' % (bar, percent, '%', msg))
23 |         sys.stdout.flush()
24 | 


--------------------------------------------------------------------------------
/rl/utils/param_noise.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | import gym
 4 | 
 5 | """
 6 | From OpenAI Baselines:
 7 | https://github.com/openai/baselines/blob/master/baselines/ddpg/noise.py
 8 | """
 9 | # For parameter noise
10 | class AdaptiveParamNoiseSpec(object):
11 |     def __init__(self, initial_stddev=0.1, desired_action_stddev=0.2, adaptation_coefficient=1.01):
12 |         """
13 |         Note that initial_stddev and current_stddev refer to std of parameter noise, 
14 |         but desired_action_stddev refers to (as name notes) desired std in action space
15 |         """
16 |         self.initial_stddev = initial_stddev
17 |         self.desired_action_stddev = desired_action_stddev
18 |         self.adaptation_coefficient = adaptation_coefficient
19 | 
20 |         self.current_stddev = initial_stddev
21 | 
22 |     def adapt(self, distance):
23 |         if distance > self.desired_action_stddev:
24 |             # Decrease stddev.
25 |             self.current_stddev /= self.adaptation_coefficient
26 |         else:
27 |             # Increase stddev.
28 |             self.current_stddev *= self.adaptation_coefficient
29 | 
30 |     def get_stats(self):
31 |         stats = {
32 |             'param_noise_stddev': self.current_stddev,
33 |         }
34 |         return stats
35 | 
36 |     def __repr__(self):
37 |         fmt = 'AdaptiveParamNoiseSpec(initial_stddev={}, desired_action_stddev={}, adaptation_coefficient={})'
38 |         return fmt.format(self.initial_stddev, self.desired_action_stddev, self.adaptation_coefficient)
39 | 
40 | def distance_metric(actions1, actions2):
41 |     """
42 |     Compute "distance" between actions taken by two policies at the same states
43 |     Expects numpy arrays
44 |     """
45 |     diff = actions1-actions2
46 |     mean_diff = np.mean(np.square(diff), axis=0)
47 |     dist = np.sqrt(np.mean(mean_diff))
48 |     return dist
49 | 
50 | def perturb_actor_parameters(perturbed_policy, unperturbed_policy, param_noise, device):
51 |     """Apply parameter noise to actor model, for exploration"""
52 |     perturbed_policy.load_state_dict(unperturbed_policy.state_dict())
53 |     params = perturbed_policy.state_dict()
54 |     for name in params:
55 |         if 'ln' in name: 
56 |             pass 
57 |         param = params[name]
58 |         param += torch.randn(param.shape).to(device) * param_noise.current_stddev


--------------------------------------------------------------------------------
/rl/utils/remote_replay.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import numpy as np
  3 | import ray
  4 | 
  5 | # tensorboard
  6 | from datetime import datetime
  7 | from torch.utils.tensorboard import SummaryWriter
  8 | from colorama import Fore, Style
  9 | 
 10 | # more efficient replay memory?
 11 | from collections import deque
 12 | 
 13 | # Code based on:
 14 | # https://github.com/openai/baselines/blob/master/baselines/deepq/replay_buffer.py
 15 | 
 16 | # Expects tuples of (state, next_state, action, reward, done)
 17 | 
 18 | @ray.remote
 19 | class ReplayBuffer_remote(object):
 20 |     def __init__(self, size, experiment_name, args):
 21 |         """Create Replay buffer.
 22 |         Parameters
 23 |         ----------
 24 |         size: int
 25 |             Max number of transitions to store in the buffer. When the buffer
 26 |             overflows the old memories are dropped.
 27 |         """
 28 |         self.storage = deque(maxlen=int(size))
 29 |         self.max_size = size
 30 | 
 31 |         print("Created replay buffer with size {}".format(self.max_size))
 32 |     
 33 |     def __len__(self):
 34 |         return len(self.storage)
 35 | 
 36 |     def storage_size(self):
 37 |         return len(self.storage)
 38 | 
 39 |     def add(self, data):
 40 |         self.storage.append(data)
 41 | 
 42 |     def add_bulk(self, data):
 43 |         for i in range(len(data)):
 44 |             self.storage.append(data[i])
 45 | 
 46 |     def print_size(self):
 47 |         print("size = {}".format(len(self.storage)))
 48 | 
 49 |     def sample(self, batch_size):
 50 |         ind = np.random.randint(0, len(self.storage), size=batch_size)
 51 |         x, y, u, r, d = [], [], [], [], []
 52 | 
 53 |         for i in ind:
 54 |             X, Y, U, R, D = self.storage[i]
 55 |             x.append(np.array(X, copy=False))
 56 |             y.append(np.array(Y, copy=False))
 57 |             u.append(np.array(U, copy=False))
 58 |             r.append(np.array(R, copy=False))
 59 |             d.append(np.array(D, copy=False))
 60 | 
 61 |         # print("Sampled experience from replay buffer.")
 62 |         return np.array(x), np.array(y), np.array(u), np.array(r).reshape(-1, 1), np.array(d).reshape(-1, 1)
 63 | 
 64 | # Non-ray actor for replay buffer
 65 | class ReplayBuffer(object):
 66 |     def __init__(self, max_size=1e7):
 67 |         self.storage = []
 68 |         self.max_size = max_size
 69 |         self.ptr = 0
 70 | 
 71 |     def add(self, data):
 72 |         if len(self.storage) < self.max_size:
 73 |             self.storage.append(data)
 74 |         self.storage[int(self.ptr)] = data
 75 |         self.ptr = (self.ptr + 1) % self.max_size
 76 |             
 77 | 
 78 |     def sample(self, batch_size):
 79 |         ind = np.random.randint(0, len(self.storage), size=batch_size)
 80 |         x, y, u, r, d = [], [], [], [], []
 81 | 
 82 |         for i in ind:
 83 |             X, Y, U, R, D = self.storage[i]
 84 |             x.append(np.array(X, copy=False))
 85 |             y.append(np.array(Y, copy=False))
 86 |             u.append(np.array(U, copy=False))
 87 |             r.append(np.array(R, copy=False))
 88 |             d.append(np.array(D, copy=False))
 89 | 
 90 |         return np.array(x), np.array(y), np.array(u), np.array(r).reshape(-1, 1), np.array(d).reshape(-1, 1)
 91 | 
 92 |     def get_transitions_from_range(self, start, end):
 93 |         ind = np.arange(int(start), int(end))
 94 |         x, u = [], []
 95 |         for i in ind:
 96 |             X, Y, U, R, D = self.storage[i]
 97 |             x.append(np.array(X, copy=False))
 98 |             u.append(np.array(U, copy=False))
 99 |         
100 |         return np.array(x), np.array(u)
101 | 
102 |     def get_all_transitions(self):
103 |         # list of transition tuples
104 |         return self.storage
105 | 
106 |     def add_parallel(self, data):
107 |         for i in range(len(data)):
108 |             self.add(data[i])


--------------------------------------------------------------------------------
/test_policy.py:
--------------------------------------------------------------------------------
  1 | from cassie import CassiePlayground
  2 | from tools.test_commands import *
  3 | from tools.eval_perturb import *
  4 | from tools.eval_mission import *
  5 | from tools.compare_pols import *
  6 | from tools.eval_sensitivity import *
  7 | from collections import OrderedDict
  8 | from util.env import env_factory
  9 | 
 10 | import torch
 11 | import pickle
 12 | import os, sys, argparse
 13 | import numpy as np
 14 | 
 15 | # Get policy to test from args, load policy and env
 16 | parser = argparse.ArgumentParser()
 17 | # General args
 18 | parser.add_argument("--path", type=str, default="./trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2", help="path to folder containing policy and run details")
 19 | parser.add_argument("--path2", type=str, default="./trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2", help="path to folder containing 2nd policy to compare against")
 20 | parser.add_argument("--n_procs", type=int, default=4, help="Number of procs to use for multi-processing")
 21 | parser.add_argument("--test", type=str, default="full", help="Test to run (options: \"full\", \"commands\", and \"perturb\", and \"compare\")")
 22 | parser.add_argument("--eval", default=True, action="store_false", help="Whether to call policy.eval() or not")
 23 | # Test Commands args
 24 | parser.add_argument("--n_steps", type=int, default=200, help="Number of steps to for a full command cycle (1 speed change and 1 orientation change)")
 25 | parser.add_argument("--n_commands", type=int, default=6, help="Number of commands in a single test iteration")
 26 | parser.add_argument("--max_speed", type=float, default=3.0, help="Maximum allowable speed to test")
 27 | parser.add_argument("--min_speed", type=float, default=0.0, help="Minimum allowable speed to test")
 28 | parser.add_argument("--n_iter", type=int, default=10000, help="Number of command cycles to test")
 29 | # Test Perturbs args
 30 | parser.add_argument("--wait_time", type=float, default=3.0, help="How long to wait after perturb to count as success")
 31 | parser.add_argument("--pert_dur", type=float, default=0.2, help="How long to apply perturbation")
 32 | parser.add_argument("--pert_size", type=float, default=50, help="Size of perturbation to start sweep from")
 33 | parser.add_argument("--pert_incr", type=float, default=10.0, help="How much to increment the perturbation size after each success")
 34 | parser.add_argument("--pert_body", type=str, default="cassie-pelvis", help="Body to apply perturbation to")
 35 | parser.add_argument("--num_angles", type=int, default=100, help="How many angles to test (angles are evenly divided into 2*pi)")
 36 | # Test Mission args
 37 | parser.add_argument("--viz", default=False, action='store_true')
 38 | # Test parameter sensitivity args
 39 | parser.add_argument("--sens_incr", type=float, default=0.05, help="Size of increments for the sensityivity sweep")
 40 | parser.add_argument("--hi_factor", type=float, default=15, help="High factor")
 41 | parser.add_argument("--lo_factor", type=float, default=0, help="Low factor")
 42 | 
 43 | args = parser.parse_args()
 44 | run_args = pickle.load(open(os.path.join(args.path, "experiment.pkl"), "rb"))
 45 | # cassie_env = CassieEnv(traj=run_args.traj, clock_based=run_args.clock_based, state_est=run_args.state_est, dynamics_randomization=run_args.dyn_random)
 46 | # env_fn = partial(CassieEnv, traj=run_args.traj, clock_based=run_args.clock_based, state_est=run_args.state_est, dynamics_randomization=run_args.dyn_random)
 47 | # Make mirror False so that env_factory returns a regular wrap env function and not a symmetric env function that can be called to return
 48 | # a cassie environment (symmetric env cannot be called to make another env)
 49 | env_fn = env = env_factory(
 50 |     run_args.env_name,
 51 |     command_profile=run_args.command_profile,
 52 |     input_profile=run_args.input_profile,
 53 |     simrate=run_args.simrate,
 54 |     dynamics_randomization=run_args.dyn_random,
 55 |     mirror=run_args.mirror,
 56 |     learn_gains=run_args.learn_gains,
 57 |     reward=run_args.reward,
 58 |     history=run_args.history,
 59 |     no_delta=run_args.no_delta,
 60 |     traj=run_args.traj,
 61 |     ik_baseline=run_args.ik_baseline
 62 | )
 63 | cassie_env = env_fn()
 64 | policy = torch.load(os.path.join(args.path, "actor.pt"))
 65 | if args.eval:
 66 |     policy.eval()
 67 | if hasattr(policy, 'init_hidden_state'):
 68 |     policy.init_hidden_state()
 69 | 
 70 | # TODO: make returning/save data in file inside function consist for all testing functions
 71 | def test_commands(cassie_env, policy, args):
 72 |     print("Testing speed and orient commands")
 73 |     if args.n_procs == 1:
 74 |         save_data = eval_commands(cassie_env, policy, num_steps=args.n_steps, num_commands=args.n_commands,
 75 |                 max_speed=args.max_speed, min_speed=args.min_speed, num_iters=args.n_iter)
 76 |         np.save(os.path.join(args.path, "eval_commands.npy"), save_data)
 77 |     else:
 78 |         eval_commands_multi(env_fn, policy, num_steps=args.n_steps, num_commands=args.n_commands, max_speed=args.max_speed,
 79 |                 min_speed=args.min_speed, num_iters=args.n_iter, num_procs=args.n_procs, filename=os.path.join(args.path, "eval_commands.npy"))
 80 | 
 81 | def test_perturbs(cassie_env, policy, args):
 82 |     print("Testing perturbations")
 83 |     if args.n_procs == 1:
 84 |         save_data = compute_perturbs(cassie_env, policy, wait_time=args.wait_time, perturb_duration=args.pert_dur, perturb_size=args.pert_size,
 85 |                     perturb_incr=args.pert_incr, perturb_body=args.pert_body, num_angles=args.num_angles)
 86 |     else:
 87 |         save_data = compute_perturbs_multi(env_fn, policy, wait_time=args.wait_time, perturb_duration=args.pert_dur, perturb_size=args.pert_size,
 88 |                     perturb_incr=args.pert_incr, perturb_body=args.pert_body, num_angles=args.num_angles, num_procs=args.n_procs)
 89 |     np.save(os.path.join(args.path, "eval_perturbs.npy"), save_data)
 90 | 
 91 | # If not command line arg, assume run all tests
 92 | if args.test == "full":
 93 |     print("Running full test")
 94 |     test_commands(cassie_env, policy, args)
 95 |     test_perturbs(cassie_env, policy, args)
 96 | elif args.test == "commands":
 97 |     test_commands(cassie_env, policy, args)
 98 | elif args.test == "perturb":
 99 |     test_perturbs(cassie_env, policy, args)
100 | elif args.test == "mission":
101 |     missions = ["straight", "curvy", "90_left", "90_right"]
102 |     if not args.viz:
103 |         print("Testing missions")
104 |         save_data = []
105 | 
106 |         for mission in missions:
107 |             print(mission + " mission:")
108 |             cassie_env = CassiePlayground(traj=run_args.traj, clock_based=run_args.clock_based, state_est=run_args.state_est, dynamics_randomization=run_args.dyn_random, mission=mission)
109 |             save_data.append(eval_mission(cassie_env, policy))
110 |         np.save(os.path.join(args.path, "eval_missions.npy"), save_data)
111 |     else:
112 |         save_data = np.load(os.path.join(args.path, "eval_missions.npy"), allow_pickle=True)
113 |         plot_mission_data(save_data, missions)
114 | elif args.test == "sensitivity":
115 |     print("Testing sensitivity")
116 |     save_data = eval_sensitivity(cassie_env, policy, incr=args.sens_incr, hi_factor=args.hi_factor, lo_factor=args.lo_factor)
117 |     print(save_data)
118 |     np.save(os.path.join(args.path, "eval_sensitivity.npy"), save_data)
119 | elif args.test == "compare":
120 |     print("running compare")
121 |     compare_pols(args.path, args.path2)
122 | 
123 | # vis_commands(cassie_env, policy, num_steps=200, num_commands=6, max_speed=3, min_speed=0)
124 | # save_data = eval_commands(cassie_env, policy, num_steps=200, num_commands=2, max_speed=3, min_speed=0, num_iters=1)
125 | # np.save("./test_eval_commands.npy", save_data)
126 | # eval_commands_multi(env_fn, policy, num_steps=200, num_commands=4, max_speed=3, min_speed=0, num_iters=4, num_procs=4)
127 | 
128 | # report_stats("./test_eval_commands.npy")
129 | 


--------------------------------------------------------------------------------
/tools/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/tools/.DS_Store


--------------------------------------------------------------------------------
/tools/aslip_tests/GRF_2KHz.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/tools/aslip_tests/GRF_2KHz.pkl


--------------------------------------------------------------------------------
/tools/aslip_tests/plots/footpos_err.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/tools/aslip_tests/plots/footpos_err.png


--------------------------------------------------------------------------------
/tools/cassie_top_white.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/tools/cassie_top_white.png


--------------------------------------------------------------------------------
/tools/command_trajectory.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/tools/command_trajectory.pkl


--------------------------------------------------------------------------------
/tools/compare_pols.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import sys, os
  3 | import fpdf
  4 | from .eval_perturb import plot_perturb
  5 | 
  6 | def process_commands(data):
  7 |     stats = {}
  8 |     num_iters = data.shape[0]
  9 |     pass_rate = np.sum(data[:, 0]) / num_iters
 10 |     stats["Pass Rate"] = pass_rate
 11 |     success_inds = np.where(data[:, 0] == 1)[0]
 12 |     speed_fail_inds = np.where(data[:, 1] == 0)[0]
 13 |     orient_fail_inds = np.where(data[:, 1] == 1)[0]
 14 | 
 15 |     speed_change = data[speed_fail_inds, 4]
 16 |     orient_change = data[orient_fail_inds, 5]
 17 |     speed_neg_inds = np.where(speed_change < 0)
 18 |     speed_pos_inds = np.where(speed_change > 0)
 19 |     orient_neg_inds = np.where(orient_change < 0)
 20 |     orient_pos_inds = np.where(orient_change > 0)
 21 |     stats["Number of speed fails"] = len(speed_fail_inds)
 22 |     stats["Number of orient fails"] = len(orient_fail_inds)
 23 |     if len(speed_fail_inds) == 0:
 24 |         avg_pos_speed = "N/A"
 25 |         avg_neg_speed = "N/A"
 26 |     else:
 27 |         avg_pos_speed = np.mean(speed_change[speed_pos_inds])
 28 |         avg_neg_speed = np.mean(speed_change[speed_neg_inds])
 29 |     if len(orient_fail_inds) == 0:
 30 |         avg_pos_orient = "N/A"
 31 |         avg_neg_orient = "N/A"
 32 |     else:
 33 |         avg_pos_orient = np.mean(orient_change[orient_pos_inds])
 34 |         avg_neg_orient = np.mean(orient_change[orient_neg_inds])
 35 | 
 36 |     stats["Avg pos speed fails"] = avg_pos_speed
 37 |     stats["Avg neg speed fails"] = avg_neg_speed
 38 |     stats["Avg pos_orient fails"] = avg_pos_orient
 39 |     stats["Avg neg_orient fails"] = avg_neg_orient
 40 | 
 41 |     return stats
 42 | 
 43 | def process_perturbs(data):
 44 |     stats = {}
 45 |     num_angles, num_phases = data.shape
 46 |     angles = 360*np.linspace(0, 1, num_angles+1)
 47 |     
 48 |     stats["Avg Force"] = round(np.mean(data), 2)
 49 |     stats["Max Force"] = np.max(data)
 50 |     max_ind = np.unravel_index(np.argmax(data, axis=None), data.shape)
 51 |     stats["Max Location (angle, phase)"] = (str(round(angles[max_ind[0]], 2))+chr(176), max_ind[1])
 52 |     angle_avg = np.mean(data, axis=1)
 53 |     phase_avg = np.mean(data, axis=0)
 54 |     stats["Most Robust Angle"] = angles[np.argmax(angle_avg)]
 55 |     stats["Most Robust Phase"] = np.argmax(phase_avg)
 56 | 
 57 |     return stats
 58 | 
 59 | 
 60 | # Note that for the spacing of the multi_cells to work out, this function assumes that 
 61 | # pol1's name is at least longer than pol2's name
 62 | def draw_headers(pdf, pol1, pol2, key_col_width, min_width):
 63 |     epw = pdf.w - 2*pdf.l_margin
 64 |     th = pdf.font_size
 65 |     pol1_width = max(pdf.get_string_width(pol1), min_width) + 0.1
 66 |     pol2_width = max(pdf.get_string_width(pol2), min_width) + 0.1
 67 |     pol2_split = False
 68 |     if pol1_width + pol2_width + key_col_width>= epw:
 69 |         pol1_width = (epw - key_col_width) / 2
 70 |         if pol2_width > pol1_width:
 71 |             pol2_split = True
 72 |         pol2_width = pol1_width
 73 | 
 74 |     start_x = pdf.get_x()
 75 |     start_y = pdf.get_y()
 76 |     pdf.set_x(start_x + key_col_width)
 77 | 
 78 |     # Draw pol1 and pol2 multicell first to figure out y height
 79 |     pdf.multi_cell(pol1_width, 2*th, pol1, border=1, align="C")
 80 |     pol1_height = pdf.get_y() - start_y
 81 |     
 82 |     pdf.set_xy(start_x+key_col_width+pol1_width, start_y)
 83 |     if pol2_split:
 84 |         pdf.multi_cell(pol2_width, 2*th, pol2, border=1, align="C")
 85 |     else:
 86 |         pdf.cell(pol2_width, pol1_height, pol2, border=1, align="C")
 87 |     pdf.set_xy(start_x, start_y)
 88 |     pdf.cell(key_col_width, pol1_height, "", border=1, align="C")
 89 |     pdf.set_xy(start_x, start_y + pol1_height)
 90 | 
 91 |     return pol1_width, pol2_width
 92 | 
 93 | def compare_pols(pol1, pol2):
 94 |     pol1 = pol1.strip("/")
 95 |     pol2 = pol2.strip("/")
 96 |     # For spacing concerns later, need pol1 to be the "longer" (name wise) of the two
 97 |     if len(os.path.basename(pol2)) > len(os.path.basename(pol1)):
 98 |         temp = pol1
 99 |         pol1 = pol2
100 |         pol2 = temp
101 |     pol1_name = os.path.basename(pol1)
102 |     pol2_name = os.path.basename(pol2)
103 |     print("pol1: ", pol1_name)
104 |     print("pol2: ", pol2_name)
105 | 
106 |     # Initial PDF setup
107 |     pdf = fpdf.FPDF(format='letter', unit='in')
108 |     pdf.add_page()
109 |     pdf.set_font('Times','',10.0) 
110 |     # Effective page width, or just epw
111 |     epw = pdf.w - 2*pdf.l_margin
112 |     th = pdf.font_size
113 |     # Set title
114 |     pdf.cell(epw, 2*th, "Policy Robustness Comparison", 0, 1, "C")
115 |     pdf.ln(2*th)
116 | 
117 |     # Print command test table
118 |     pol1_command = np.load(os.path.join(pol1, "eval_commands.npy"))
119 |     pol2_command = np.load(os.path.join(pol2, "eval_commands.npy"))
120 |     pol1_command_stats = process_commands(pol1_command)
121 |     pol2_command_stats = process_commands(pol2_command)
122 | 
123 |     pdf.cell(epw, 2*th, "Command Test", 0, 1, "L")
124 |     pdf.ln(th)
125 |     # Set column widths
126 |     key_col_width = pdf.get_string_width(max(pol2_command_stats.keys(), key=len)) + .2 
127 | 
128 |     pol1_width, pol2_width = draw_headers(pdf, pol1_name, pol2_name, key_col_width, pdf.get_string_width(str(9.9999)))
129 | 
130 |     for key in pol2_command_stats.keys():
131 |         pdf.cell(key_col_width, 2*th, key, border=1, align="C")
132 |         pdf.cell(pol1_width, 2*th, str(round(pol1_command_stats[key], 4)), border=1, align="C")
133 |         pdf.cell(pol2_width, 2*th, str(round(pol2_command_stats[key], 4)), border=1, align="C")
134 |         pdf.ln(2*th)
135 | 
136 |     # Print perturb test table
137 |     pdf.ln(2*th)
138 |     pdf.cell(epw, 2*th, "Perturbation Test", 0, 1, "L")
139 |     pdf.ln(th)
140 |     pol1_perturb = np.load(os.path.join(pol1, "eval_perturbs.npy"))
141 |     pol2_perturb = np.load(os.path.join(pol2, "eval_perturbs.npy"))
142 |     pol1_perturb_stats = process_perturbs(pol1_perturb)
143 |     pol2_perturb_stats = process_perturbs(pol2_perturb)
144 | 
145 |     # Set column widths
146 |     key_col_width = pdf.get_string_width(max(pol2_perturb_stats.keys(), key=len)) + .2 
147 |     pol1_width, pol2_width = draw_headers(pdf, pol1_name, pol2_name, key_col_width, pdf.get_string_width(str(999.99)))
148 | 
149 |     for key in pol2_perturb_stats.keys():
150 |         pdf.cell(key_col_width, 2*th, key, border=1, align="C")
151 |         pdf.cell(pol1_width, 2*th, str(pol1_perturb_stats[key]), border=1, align="C")
152 |         pdf.cell(pol2_width, 2*th, str(pol2_perturb_stats[key]), border=1, align="C")
153 |         pdf.ln(2*th)
154 | 
155 |     max_force = max(np.max(np.mean(pol1_perturb, axis=1)), np.max(np.mean(pol2_perturb, axis=1)))
156 |     max_force = 50*np.ceil(max_force / 50)
157 |     pol1_perturb_plot = os.path.join(pol1, "perturb_plot.png")
158 |     pol2_perturb_plot = os.path.join(pol2, "perturb_plot.png")
159 |     plot_perturb(os.path.join(pol1, "eval_perturbs.npy"), pol1_perturb_plot, max_force)
160 |     plot_perturb(os.path.join(pol2, "eval_perturbs.npy"), pol2_perturb_plot, max_force)
161 |     pdf.ln(2*th)
162 | 
163 |     pdf.cell(epw, 2*th, "Perturbation Plot", 0, 1, "L")
164 |     pol2_split = False
165 |     if pdf.get_string_width(pol2) > epw / 2:
166 |         pol2_split = True
167 |     start_x = pdf.get_x()
168 |     start_y = pdf.get_y()
169 |     pdf.multi_cell(epw/2, 2*th, pol1_name, border=0, align="C")
170 |     pol1_height = pdf.get_y() - start_y
171 |     pdf.set_xy(start_x+epw/2, start_y)
172 |     if pol2_split:
173 |         pdf.multi_cell(epw/2, 2*th, pol2_name, border=0, align="C")
174 |     else:
175 |         pdf.cell(epw/2, pol1_height, pol2_name, border=0, align="C")
176 |     pdf.set_xy(start_x, start_y+pol1_height)
177 |     pdf.image(pol1_perturb_plot, x=start_x, y=start_y+pol1_height, w = epw/2-.1)
178 |     pdf.image(pol2_perturb_plot, x=start_x+epw/2, y = start_y+pol1_height, w = epw/2-.1)
179 | 
180 |     pdf.output("./policy_compare.pdf")
181 | 
182 | 
183 | 


--------------------------------------------------------------------------------
/tools/eval_mission.py:
--------------------------------------------------------------------------------
  1 | import sys, os
  2 | sys.path.append("..") # Adds higher directory to python modules path.
  3 | 
  4 | import numpy as np
  5 | import matplotlib.pyplot as plt
  6 | import matplotlib.colors as mcolors
  7 | import matplotlib as mpl
  8 | import torch
  9 | import time
 10 | import cmath
 11 | import math
 12 | import ray
 13 | from functools import partial
 14 | 
 15 | # from cassie import CassieEnv
 16 | 
 17 | def quaternion2euler(quaternion):
 18 | 	w = quaternion[0]
 19 | 	x = quaternion[1]
 20 | 	y = quaternion[2]
 21 | 	z = quaternion[3]
 22 | 	ysqr = y * y
 23 | 	
 24 | 	t0 = +2.0 * (w * x + y * z)
 25 | 	t1 = +1.0 - 2.0 * (x * x + ysqr)
 26 | 	X = math.degrees(math.atan2(t0, t1))
 27 | 	
 28 | 	t2 = +2.0 * (w * y - z * x)
 29 | 	t2 = +1.0 if t2 > +1.0 else t2
 30 | 	t2 = -1.0 if t2 < -1.0 else t2
 31 | 	Y = math.degrees(math.asin(t2))
 32 | 	
 33 | 	t3 = +2.0 * (w * z + x * y)
 34 | 	t4 = +1.0 - 2.0 * (ysqr + z * z)
 35 | 	Z = math.degrees(math.atan2(t3, t4))
 36 | 
 37 | 	result = np.zeros(3)
 38 | 	result[0] = X * np.pi / 180
 39 | 	result[1] = Y * np.pi / 180
 40 | 	result[2] = Z * np.pi / 180
 41 | 	
 42 | 	return result
 43 | 
 44 | @torch.no_grad()
 45 | def eval_mission(cassie_env, policy, num_iters=2):
 46 |     # save data holds deviation between robot xy pos, z orient, xy velocity and specified pos, orient, velocity from mission
 47 |     # if mission ends early (robot height fall over indicator) 
 48 |     
 49 |     runs = []
 50 |     pass_data = np.zeros(num_iters) # whether or not robot stayed alive during mission
 51 | 
 52 |     for j in range(num_iters):
 53 |         mission_len = cassie_env.command_traj.trajlen
 54 |         run_data = []
 55 |         state = torch.Tensor(cassie_env.reset_for_test())
 56 |         count, passed, done = 0, 1, False
 57 |         while count < mission_len and not done:
 58 |             # cassie_env.render()
 59 |             # Get action and act
 60 |             action = policy(state, True)
 61 |             action = action.data.numpy()
 62 |             state, reward, done, _ = cassie_env.step(action)
 63 |             state = torch.Tensor(state)
 64 |             # See if end state reached
 65 |             if done or cassie_env.sim.qpos()[2] < 0.4:
 66 |                 passed = 0
 67 |                 print("mission failed")
 68 |             # Get command info, robot info
 69 |             commanded_pos = cassie_env.command_traj.global_pos[:,0:2]
 70 |             commanded_speed = cassie_env.command_traj.speed_cmd
 71 |             commanded_orient = cassie_env.command_traj.orient
 72 |             qpos = cassie_env.sim.qpos()
 73 |             qvel = cassie_env.sim.qvel()
 74 |             actual_pos = qpos[0:2] # only care about x and y
 75 |             actual_speed = np.linalg.norm(qvel[0:2])
 76 |             actual_orient = quaternion2euler(qpos[3:7])[2] # only care about yaw
 77 |             # Calculate pos,vel,orient deviation as vector difference
 78 |             pos_error = np.linalg.norm(actual_pos - commanded_pos)
 79 |             speed_error = np.linalg.norm(actual_speed - commanded_speed)
 80 |             orient_error = np.linalg.norm(actual_orient - commanded_orient)
 81 |             # Log info
 82 |             run_data.append(([count, pos_error, speed_error, orient_error]))
 83 |             count += 1
 84 |         if passed:
 85 |             print("mission passed")
 86 |             pass_data[j] = 1
 87 |         runs.append(np.array(run_data))
 88 | 
 89 |     # summary stats
 90 |     run_lens = [len(run) for run in runs]
 91 |     print("longest / shortest / average steps : {} / {} / {}".format(max(run_lens), min(run_lens), sum(run_lens) / len(run_lens)))
 92 | 
 93 |     save_data = dict()
 94 |     save_data["runs"] = runs
 95 |     save_data["pass"] = pass_data
 96 | 
 97 |     return save_data
 98 | 
 99 | 
100 | def plot_mission_data(save_data, missions):
101 |     num_missions = len(save_data)
102 |     fig, axs = plt.subplots(num_missions, 3, figsize=(num_missions*5, 15))
103 |     for i in range(num_missions):
104 |         mission_runs = save_data[i]["runs"]
105 |         for run in mission_runs:
106 |             axs[i][0].plot(run[:, 0], run[:, 1])
107 |             axs[i][1].plot(run[:, 0], run[:, 2])
108 |             axs[i][2].plot(run[:, 0], run[:, 3])
109 |         axs[i][1].set_title(missions[i]) # only put title on middle plot
110 |         [axs[i][j].set_xlabel("steps") for j in range(3)]
111 |         [axs[i][j].set_ylabel("error") for j in range(3)]
112 |     plt.tight_layout(pad=3.0)
113 |     plt.show()


--------------------------------------------------------------------------------
/tools/eval_sensitivity.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import time
 4 | import math
 5 | 
 6 | #from eval_perturb import reset_to_phase
 7 | 
 8 | @torch.no_grad()
 9 | def sensitivity_sweep(cassie_env, policy, factor):
10 |     # Pelvis: 0->5
11 |     # Hips: 6->8 and 19->21
12 |     # Achilles: 9->11 and 22->24
13 |     # Knees: 12 and 25
14 |     # Tarsus: 14 and 27
15 |     #
16 |     # Total number of parameters: 17
17 | 
18 |     #parameter_ids = [0, 1, 2, 3, 4, 5, 6, 7, 8, 19, 20, 21, 9, 10, 11, 22, 23,
19 |     #        24, 12, 25, 14, 27]
20 | 
21 |     default_damp = cassie_env.default_damping
22 |     parameter_ids = [(0, 5), (6, 8), (19, 21), (9, 11), (22, 24), (12), (25),
23 |             (14), (27)]
24 | 
25 |     count = np.zeros(len(parameter_ids))
26 |     for i in range(9):
27 |         damp_range = np.copy(default_damp)
28 |         if type(parameter_ids[i]) is tuple:
29 |             for j in range(parameter_ids[i][0], parameter_ids[i][1]+1):
30 |                 # Set damp sweep
31 |                 damp_range[j] = default_damp[j] * factor
32 |         else:
33 |             damp_id = parameter_ids[i]
34 |             damp_range[damp_id] = default_damp[damp_id] * factor
35 | 
36 |     
37 |         state = torch.Tensor(cassie_env.full_reset()) 
38 |         cassie_env.sim.set_dof_damping(np.clip(damp_range, 0, None))
39 |         cassie_env.speed = 1
40 |         cassie_env.side_speed = 0
41 |         cassie_env.phase_add = 1
42 |         
43 |         curr_time = time.time()
44 |         curr_time = cassie_env.sim.time()
45 |         start_t = curr_time
46 |         while curr_time < start_t + 15:
47 |             action = policy(state, True)
48 |             action = action.data.numpy()
49 |             state, reward, done, _ = cassie_env.step(action)
50 |             state = torch.Tensor(state)
51 |             curr_time = cassie_env.sim.time()
52 |             if cassie_env.sim.qpos()[2] < 0.4:
53 |                 count[i] = 1
54 |                 break
55 | 
56 |     return count
57 | 
58 | @torch.no_grad()
59 | def eval_sensitivity(cassie_env, policy, incr, hi_factor, lo_factor):
60 |     # this is dumb
61 |     lo = 1.0
62 |     lo_cnt = 0
63 |     while lo >= lo_factor:
64 |         lo -= incr
65 |         lo_cnt += 1
66 | 
67 |     num_iters = int(hi_factor / incr) + lo_cnt + 1
68 | 
69 |     counter = 0
70 | 
71 |     # Matrix with the num_iters rows, and 9 + 1 columns. the first column is
72 |     # the value of damping. the next nine indicate the parameter, 1 is a
73 |     # failure at the value, 0 means either no failure or default val.
74 |     ret = np.zeros((num_iters, 10))
75 | 
76 |     # Run the highs
77 | 
78 |     hi = 1.0
79 | 
80 |     while hi <= hi_factor:
81 |         vals = sensitivity_sweep(cassie_env, policy, hi)
82 |         ret[counter][0] = hi
83 |         ret[counter][1:] = vals
84 |         hi += incr
85 |         counter += 1
86 | 
87 |     lo = 1.0
88 | 
89 |     # Run lo's
90 |     for _ in range(lo_cnt):
91 |         vals = sensitivity_sweep(cassie_env, policy, lo)
92 |         ret[counter][0] = lo
93 |         ret[counter][1:] = vals
94 |         lo -= incr
95 |         counter += 1
96 | 
97 |     # Report
98 |     return ret
99 | 


--------------------------------------------------------------------------------
/tools/test_perturb_eval_phase.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/tools/test_perturb_eval_phase.npy


--------------------------------------------------------------------------------
/tools/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .elements import *


--------------------------------------------------------------------------------
/tools/utils/elements.py:
--------------------------------------------------------------------------------
  1 | import pygame
  2 | from pygame.locals import *
  3 | 
  4 | import math
  5 | import numpy as np
  6 | 
  7 | 
  8 | class Mouse:
  9 |     def __init__(self, px_2_m):
 10 |         self.px = 0
 11 |         self.py = 0
 12 |         self.vx = 0
 13 |         self.vy = 0
 14 |         self.radius = 0
 15 |         self.color = (100,200,100)
 16 |         self.px_2_m = px_2_m
 17 | 
 18 |     def get_position(self):
 19 |         return (self.px, self.py)
 20 |     
 21 |     def get_m_position(self):
 22 |         return (self.px / self.px_2_m, self.py / self.px_2_m)
 23 | 
 24 |     def get_velocity(self):
 25 |         return (self.vx, self.vy)
 26 |     
 27 |     def update(self, time_passed):
 28 |         prev_p = self.get_position()
 29 |         self.px, self.py  = pygame.mouse.get_pos()
 30 |         if time_passed > 0:
 31 |             self.vx = (self.px - prev_p[0]) / time_passed
 32 |             self.vy = (self.py - prev_p[1]) / time_passed
 33 |     
 34 |     def render(self, screen):
 35 |         pygame.draw.circle(screen, self.color, (self.px, self.py), self.radius)
 36 |     
 37 | class Robot:
 38 |     def __init__(self, trajectory, time_passed, frequency):
 39 |         
 40 |         # action space is forward velocity and heading
 41 |         self.positions = trajectory.positions
 42 |         self.velocities = trajectory.vels
 43 |         self.thetas = trajectory.thetas
 44 |         self.accels = trajectory.accels
 45 |         self.trajlen = len(trajectory.positions)
 46 |         
 47 |         # ground truth's position:
 48 |         self.t_px = int(self.positions[0][0])
 49 |         self.t_py = int(self.positions[0][1])
 50 | 
 51 |         # follower's pos
 52 |         self.f_px = int(self.positions[0][0])
 53 |         self.f_py = int(self.positions[0][1])
 54 | 
 55 |         self.radius = 10
 56 |         self.color = (50,50,200) # direct position tracker
 57 |         self.color2 = (200,50,50)  # velocity + angle tracker
 58 |         
 59 |         self.frequency = frequency
 60 |         self.prev_time = self.prev_inc_time = time_passed
 61 |         self.counter = 0
 62 |         self.count_inc = 1
 63 | 
 64 |     def update(self,time_passed):
 65 | 
 66 |         curr_accel = self.accels[self.counter]
 67 |         curr_vel = self.velocities[self.counter]
 68 |         curr_theta = self.thetas[self.counter]
 69 |         track_pos = self.positions[self.counter]
 70 | 
 71 |         # print((curr_vel, curr_theta, np.cos(curr_theta), np.sin(curr_theta)))
 72 | 
 73 |         # ground truth's new position:
 74 |         self.t_px, self.t_py = track_pos[0], track_pos[1]
 75 | 
 76 |         # follower's new position: execute angle and velocity command for time passed
 77 |         t_diff = time_passed - self.prev_time
 78 |         vx, vy = curr_vel * np.cos(curr_theta), curr_vel * np.sin(curr_theta)
 79 |         ax, ay = curr_accel * np.cos(curr_theta), curr_accel * np.sin(curr_theta)
 80 |         # gotta subtract the y velocity add because pygame counts y from top down
 81 |         self.f_px, self.f_py = self.f_px + vx * t_diff + 0.5 * ax * t_diff**2, self.f_py - vy * t_diff + 0.5 * ay * t_diff**2
 82 |         # self.f_px, self.f_py = self.f_px + vx * t_diff, self.f_py - vy * t_diff
 83 | 
 84 |         # increment t_idx on 30 Hz cycle
 85 |         if time_passed - self.prev_inc_time > (1 / self.frequency):
 86 |             self.counter += 1
 87 |             self.prev_inc_time = time_passed
 88 | 
 89 |         self.prev_time = time_passed
 90 | 
 91 |         # check if we need to restart
 92 |         if self.counter == self.trajlen:
 93 |             self.counter = 0
 94 |             self.f_px, self.f_py = int(self.positions[0][0]),int(self.positions[0][1])
 95 | 
 96 |     def return_info(self, px_2_m):
 97 | 
 98 |         # thetas are the yaw angle of the robot
 99 |         thetas_rotated = self.thetas # no rotation for now
100 |         # center of mass position is x y position converted to meters, with constant z height
101 |         positions_in_meters = np.array( [[self.trajectory[i][0] / px_2_m - self.trajectory[0][0] / px_2_m, self.trajectory[i][1] / px_2_m - self.trajectory[0][1] / px_2_m, 1.0] for i in range(len(self.trajectory))] )
102 |         velocities_in_meters = np.array( [self.velocities[i] / px_2_m for i in range(len(self.velocities))] )
103 | 
104 |         print("positions:\n{}\n\nvelocities:\n{}\n\norient:\n{}\n".format(positions_in_meters, velocities_in_meters, thetas_rotated))
105 | 
106 |         return positions_in_meters, velocities_in_meters, thetas_rotated
107 | 
108 |     def render(self,screen):
109 |         pygame.draw.circle(screen,self.color,(int(self.t_px),int(self.t_py)),self.radius)
110 |         pygame.draw.circle(screen,self.color2,(int(self.f_px),int(self.f_py)),self.radius)
111 |         # pygame.transform.rotate(screen, np.radians(self.theta))
112 | 
113 | class Waypoint:
114 |     def __init__(self, mouse_position):
115 |         self.px = mouse_position[0]
116 |         self.py = mouse_position[1]
117 |         self.radius = 5
118 |         self.color = (100,200,100)
119 | 
120 |     def get_position(self):
121 |         return (self.px, self.py)
122 |     
123 |     def render(self, screen):
124 |         pygame.draw.circle(screen, self.color, (self.px, self.py), self.radius)
125 | 
126 | class Trajectory:
127 |     def __init__(self, t_new, positions, thetas, vels, accels):
128 |         self.param = t_new
129 |         self.positions = positions
130 |         self.thetas = thetas
131 |         self.vels = vels
132 |         self.accels = accels
133 |         self.width = 2
134 |         self.color = (100,200,100)
135 |         self.arrow_color = (200,200,200)
136 |         self.arrow_length = 20.0
137 |     
138 |     def render(self, screen):
139 |         scaled_vels = self.vels / np.max(self.vels) * self.arrow_length
140 |         pygame_poses = []
141 |         for i in range(len(self.positions)):
142 |             # pygame.draw.aaline(screen, self.color, self.positions[i-1], self.positions[i])
143 |             # print(self.positions[i])
144 |             pygame_poses.append((int(self.positions[i][0]), int(self.positions[i][1])))
145 |             # circle for pos
146 |             pygame.draw.circle(screen, self.color, pygame_poses[-1], self.width)
147 |         for i in range(len(self.thetas)):
148 |             # calculate next pos
149 |             pos2 = (pygame_poses[i][0] + scaled_vels[i] * np.cos(self.thetas[i]) , pygame_poses[i][1] - scaled_vels[i] * np.sin(self.thetas[i]))
150 |             # arrow for angle and vel
151 |             pygame.draw.line(screen, self.arrow_color, pygame_poses[i], pos2)
152 |     
153 |     def prepare_for_export(self, scale_factor, screen_height):
154 | 
155 |         self.positions = [[self.positions[i][0] / scale_factor, (screen_height - self.positions[i][1]) / scale_factor, 1.0] for i in range(len(self.positions))]
156 |         self.positions = [[self.positions[i][0]-self.positions[0][0], self.positions[i][1]-self.positions[0][1], self.positions[i][2]] for i in range(len(self.positions))]
157 | 
158 |         self.vels = [self.vels[i] / scale_factor for i in range(len(self.vels))]
159 | 
160 |         print("positions:\n{}\n\nvelocities:\n{}\n\norient:\n{}\n".format(self.positions[:5], self.vels[:5], self.thetas[:5]))
161 |         print("max vel: {}".format(np.max(self.vels)))
162 | 
163 | class Grid:
164 |     def __init__(self, screen_width, screen_height, px_2_m):
165 |         self.px_2_m = px_2_m
166 |         self.screen_height = screen_height
167 |         self.screen_width = screen_width
168 |         self.cell_height = px_2_m  # approx height of 1m x 1m cell
169 |         self.cell_width = px_2_m   # approx width of 1m x 1m cell
170 |         self.color = (90,90,90)
171 |     
172 |     def render(self, screen):
173 |         # draw vertical lines
174 |         for x in range(self.screen_height // self.px_2_m):
175 |             pygame.draw.line(screen, self.color, (x * self.cell_width,0), (x * self.cell_width,self.screen_height))
176 |         # draw horizontal lines
177 |         for y in range(self.screen_width // self.px_2_m):
178 |             pygame.draw.line(screen, self.color, (0, y * self.cell_height), (self.screen_width, y * self.cell_height))
179 | 


--------------------------------------------------------------------------------
/tools/vis_input_and_state.py:
--------------------------------------------------------------------------------
  1 | import os, sys, argparse
  2 | sys.path.append("..") 
  3 | 
  4 | from cassie import CassieEnv, CassiePlayground
  5 | from rl.policies.actor import GaussianMLP_Actor
  6 | 
  7 | import matplotlib.pyplot as plt
  8 | 
  9 | import pickle
 10 | import numpy as np
 11 | import torch
 12 | import time
 13 | 
 14 | def set_axes_equal(ax):
 15 |     '''Make axes of 3D plot have equal scale so that spheres appear as spheres,
 16 |     cubes as cubes, etc..  This is one possible solution to Matplotlib's
 17 |     ax.set_aspect('equal') and ax.axis('equal') not working for 3D.
 18 | 
 19 |     Input
 20 |       ax: a matplotlib axis, e.g., as output from plt.gca().
 21 |     '''
 22 | 
 23 |     x_limits = ax.get_xlim3d()
 24 |     y_limits = ax.get_ylim3d()
 25 |     z_limits = ax.get_zlim3d()
 26 | 
 27 |     x_range = abs(x_limits[1] - x_limits[0])
 28 |     x_middle = np.mean(x_limits)
 29 |     y_range = abs(y_limits[1] - y_limits[0])
 30 |     y_middle = np.mean(y_limits)
 31 |     z_range = abs(z_limits[1] - z_limits[0])
 32 |     z_middle = np.mean(z_limits)
 33 | 
 34 |     # The plot bounding box is a sphere in the sense of the infinity
 35 |     # norm, hence I call half the max range the plot radius.
 36 |     plot_radius = 0.5*max([x_range, y_range, z_range])
 37 | 
 38 |     ax.set_xlim3d([x_middle - plot_radius, x_middle + plot_radius])
 39 |     ax.set_ylim3d([y_middle - plot_radius, y_middle + plot_radius])
 40 |     ax.set_zlim3d([z_middle - plot_radius, z_middle + plot_radius])
 41 | 
 42 | 
 43 | def eval_policy(policy, args, run_args):
 44 | 
 45 |     aslip = True if run_args.traj == "aslip" else False
 46 | 
 47 |     cassie_env = CassieEnv(traj=run_args.traj, state_est=run_args.state_est, no_delta=run_args.no_delta, dynamics_randomization=run_args.dyn_random, clock_based=run_args.clock_based, history=run_args.history, reward=run_args.reward)
 48 |     cassie_env.debug = args.debug
 49 |     visualize = not args.no_viz
 50 |     traj_len = args.traj_len
 51 | 
 52 |     if aslip:
 53 |         traj_info = [] # 
 54 |         traj_cmd_info = [] # what actually gets sent to robot as state
 55 |     robot_state_info = [] # robot's estimated state
 56 |     actual_state_info = [] # actual mujoco state of the robot
 57 | 
 58 |     state = torch.Tensor(cassie_env.reset_for_test())
 59 |     cassie_env.update_speed(2.0)
 60 |     print(cassie_env.speed)
 61 |     count, passed, done = 0, 1, False
 62 |     while count < traj_len and not done:
 63 | 
 64 |         if visualize:
 65 |             cassie_env.render()
 66 | 
 67 |         # Get action and act
 68 |         action = policy(state, True)
 69 |         action = action.data.numpy()
 70 |         state, reward, done, _ = cassie_env.step(action)
 71 |         state = torch.Tensor(state)
 72 | 
 73 |         print(reward)
 74 | 
 75 |         # print(cassie_env.phase)
 76 | 
 77 |         # See if end state reached
 78 |         if done or cassie_env.sim.qpos()[2] < 0.4:
 79 |             print(done)
 80 |             passed = 0
 81 |             print("failed")
 82 | 
 83 |         # Get trajectory info and robot info
 84 |         if aslip:
 85 |             a, b, c, d = cassie_env.get_traj_and_state_info()
 86 |             traj_info.append(a)
 87 |             traj_cmd_info.append(b)
 88 |         else:
 89 |             c, d = cassie_env.get_state_info()
 90 |         robot_state_info.append(c)
 91 |         actual_state_info.append(d)
 92 | 
 93 |         count += 1
 94 | 
 95 |     robot_state_info = robot_state_info[:-1]
 96 |     actual_state_info = actual_state_info[:-1]
 97 | 
 98 |     if aslip:
 99 | 
100 |         traj_info = traj_info[:-1]
101 |         traj_cmd_info = traj_cmd_info[:-1]
102 | 
103 |         traj_info = np.array(traj_info)
104 |         traj_cmd_info = np.array(traj_cmd_info)
105 |         robot_state_info = np.array(robot_state_info)
106 |         actual_state_info = np.array(actual_state_info)
107 | 
108 |         fig, axs = plt.subplots(2, 2, figsize=(10, 10))
109 | 
110 |         # print(traj_info)
111 | 
112 |         print(traj_info.shape)
113 |         axs[0][0].set_title("XZ plane of traj_info")
114 |         axs[0][0].plot(traj_info[:,0,0], traj_info[:,0,2], 'o-', label='cpos')
115 |         axs[0][0].plot(traj_info[:,1,0], traj_info[:,1,2], 'o-', label='lpos')
116 |         axs[0][0].plot(traj_info[:,2,0], traj_info[:,2,2], 'o-', label='rpos')
117 | 
118 |         print(traj_cmd_info.shape)
119 |         axs[0][1].set_title("XZ plane of traj_cmd_info")
120 |         axs[0][1].plot(traj_cmd_info[:,0,0], traj_cmd_info[:,0,2], label='cpos')
121 |         axs[0][1].plot(traj_cmd_info[:,1,0], traj_cmd_info[:,1,2], label='lpos')
122 |         axs[0][1].plot(traj_cmd_info[:,2,0], traj_cmd_info[:,2,2], label='rpos')
123 | 
124 |         print(robot_state_info.shape)
125 |         axs[1][0].set_title("XZ plane of robot_state_info")
126 |         axs[1][0].plot(robot_state_info[:,0,0], robot_state_info[:,0,2], label='cpos')
127 |         axs[1][0].plot(robot_state_info[:,1,0], robot_state_info[:,1,2], label='lpos')
128 |         axs[1][0].plot(robot_state_info[:,2,0], robot_state_info[:,2,2], label='rpos')
129 | 
130 |         print(actual_state_info.shape)
131 |         axs[1][1].set_title("XZ plane of actual_state_info")
132 |         axs[1][1].plot(actual_state_info[:,0,0], actual_state_info[:,0,2], label='cpos')
133 |         axs[1][1].plot(actual_state_info[:,1,0], actual_state_info[:,1,2], label='lpos')
134 |         axs[1][1].plot(actual_state_info[:,2,0], actual_state_info[:,2,2], label='rpos')
135 | 
136 |         plt.legend()
137 |         plt.tight_layout()
138 |         plt.show()
139 | 
140 |     else:
141 | 
142 |         robot_state_info = np.array(robot_state_info)
143 |         actual_state_info = np.array(actual_state_info)
144 | 
145 |         fig, axs = plt.subplots(1, 2, figsize=(10, 10))
146 | 
147 |         print(robot_state_info.shape)
148 |         axs[0].set_title("XZ plane of robot_state_info")
149 |         axs[0].plot(robot_state_info[:,0,0], robot_state_info[:,0,2], label='cpos')
150 |         axs[0].plot(robot_state_info[:,1,0], robot_state_info[:,1,2], label='lpos')
151 |         axs[0].plot(robot_state_info[:,2,0], robot_state_info[:,2,2], label='rpos')
152 | 
153 |         print(actual_state_info.shape)
154 |         axs[1].set_title("XZ plane of actual_state_info")
155 |         axs[1].plot(actual_state_info[:,0,0], actual_state_info[:,0,2], label='cpos')
156 |         axs[1].plot(actual_state_info[:,1,0], actual_state_info[:,1,2], label='lpos')
157 |         axs[1].plot(actual_state_info[:,2,0], actual_state_info[:,2,2], label='rpos')
158 | 
159 |         plt.legend()
160 |         plt.tight_layout()
161 |         plt.show()
162 | 
163 | 
164 | parser = argparse.ArgumentParser()
165 | parser.add_argument("--path", type=str, default="../trained_models/ppo/Cassie-v0/IK_traj-aslip_aslip_old_2048_12288_seed-10/", help="path to folder containing policy and run details")
166 | parser.add_argument("--traj_len", default=30, type=str)
167 | parser.add_argument("--debug", default=False, action='store_true')
168 | parser.add_argument("--no_viz", default=False, action='store_true')
169 | parser.add_argument("--eval", default=True, action="store_false", help="Whether to call policy.eval() or not")
170 | 
171 | args = parser.parse_args()
172 | 
173 | run_args = pickle.load(open(args.path + "experiment.pkl", "rb"))
174 | 
175 | policy = torch.load(args.path + "actor.pt")
176 | 
177 | if args.eval:
178 |     policy.eval()  # NOTE: for some reason the saved nodelta_neutral_stateest_symmetry policy needs this but it breaks all new policies...
179 | 
180 | eval_policy(policy, args, run_args)


--------------------------------------------------------------------------------
/tools/vis_perturb.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | sys.path.append("..") # Adds higher directory to python modules path.
  3 | 
  4 | import argparse
  5 | import pickle
  6 | 
  7 | import numpy as np
  8 | import torch
  9 | import time
 10 | import copy
 11 | 
 12 | from cassie import CassieEnv
 13 | 
 14 | # Will reset the env to the given phase by reset_for_test, and then
 15 | # simulating 2 cycle then to the given phase
 16 | @torch.no_grad()
 17 | def reset_to_phase(env, policy, phase):
 18 |     state = torch.Tensor(cassie_env.reset_for_test())
 19 |     for i in range(2*(env.phaselen + 1)):
 20 |         action = policy.act(state, True)
 21 |         action = action.data.numpy()
 22 |         state, reward, done, _ = cassie_env.step(action)
 23 |         state = torch.Tensor(state)
 24 |     for i in range(phase):
 25 |         action = policy.act(state, True)
 26 |         action = action.data.numpy()
 27 |         state, reward, done, _ = cassie_env.step(action)
 28 |         state = torch.Tensor(state)
 29 | 
 30 | parser = argparse.ArgumentParser()
 31 | parser.add_argument("--path", type=str, default=None, help="path to folder containing policy and run details")
 32 | args = parser.parse_args()
 33 | run_args = pickle.load(open(args.path + "experiment.pkl", "rb"))
 34 | 
 35 | # RUN_NAME = "7b7e24-seed0"
 36 | # POLICY_PATH = "../trained_models/ppo/Cassie-v0/" + RUN_NAME + "/actor.pt"
 37 | 
 38 | # Load environment and policy
 39 | # env_fn = partial(CassieEnv_speed_no_delta_neutral_foot, "walking", clock_based=True, state_est=True)
 40 | cassie_env = CassieEnv(traj=run_args.traj, clock_based=run_args.clock_based, state_est=run_args.state_est, dynamics_randomization=run_args.dyn_random)
 41 | policy = torch.load(args.path + "actor.pt")
 42 | 
 43 | state = torch.Tensor(cassie_env.reset_for_test())
 44 | # cassie_env.sim.step_pd(self.u)
 45 | cassie_env.speed = 0.5
 46 | cassie_env.phase_add = 1
 47 | num_steps = cassie_env.phaselen + 1
 48 | # Simulate for "wait_time" first to stabilize
 49 | for i in range(num_steps*2):
 50 |     action = policy(state, True)
 51 |     action = action.data.numpy()
 52 |     state, reward, done, _ = cassie_env.step(action)
 53 |     state = torch.Tensor(state)
 54 | curr_time = cassie_env.sim.time()
 55 | start_t = curr_time
 56 | sim_t = time.time()
 57 | while curr_time < start_t + 4:
 58 |     action = policy(state, True)
 59 |     action = action.data.numpy()
 60 |     state, reward, done, _ = cassie_env.step(action)
 61 |     state = torch.Tensor(state)
 62 |     curr_time = cassie_env.sim.time()
 63 | print("sim time: ", time.time() - sim_t)
 64 | exit()
 65 | qpos_phase = np.zeros((35, num_steps))
 66 | qvel_phase = np.zeros((32, num_steps))
 67 | action_phase = np.zeros((10, num_steps))
 68 | cassie_state_phase = [copy.deepcopy(cassie_env.cassie_state)]
 69 | # print("phase: ", cassie_env.phase)
 70 | qpos_phase[:, 0] = cassie_env.sim.qpos()
 71 | qvel_phase[:, 0] = cassie_env.sim.qvel()
 72 | for i in range(num_steps-1):
 73 |     action = policy.act(state, True)
 74 |     action = action.data.numpy()
 75 |     action_phase[:, i] = action
 76 |     state, reward, done, _ = cassie_env.step(action)
 77 |     state = torch.Tensor(state)
 78 |     # print("phase: ", cassie_env.phase)
 79 |     qpos_phase[:, i+1] = cassie_env.sim.qpos()
 80 |     qvel_phase[:, i+1] = cassie_env.sim.qvel()
 81 |     cassie_state_phase.append(copy.deepcopy(cassie_env.cassie_state))
 82 | 
 83 | action = policy.act(state, True)
 84 | action = action.data.numpy()
 85 | action_phase[:, -1] = action
 86 | state = torch.Tensor(cassie_env.reset_for_test())
 87 | 
 88 | cassie_env.speed = 0.5
 89 | cassie_env.phase_add = 1
 90 | wait_time = 4
 91 | dt = 0.05
 92 | speedup = 3
 93 | perturb_time = 2
 94 | perturb_duration = 0.2
 95 | perturb_size = 170
 96 | perturb_dir = -2*np.pi*np.linspace(0, 1, 5)  # Angles from straight forward to apply force
 97 | perturb_body = "cassie-pelvis"
 98 | dir_idx = 0
 99 | 
100 | ###### Vis a single Perturbation for a given phase ######
101 | test_phase = 0
102 | reset_to_phase(cassie_env, policy, test_phase)
103 | # cassie_env.sim.set_qpos(qpos_phase[:, test_phase])
104 | # cassie_env.sim.set_qvel(qvel_phase[:, test_phase])
105 | # cassie_env.cassie_state = cassie_state_phase[test_phase]
106 | # cassie_env.sim.set_cassie_state(cassie_state_phase[test_phase])
107 | # cassie_env.phase = test_phase
108 | # state, reward, done, _ = cassie_env.step(action_phase[:, test_phase-1])
109 | # state = torch.Tensor(state)
110 | render_state = cassie_env.render()
111 | force_x = perturb_size * np.cos(0)
112 | force_y = perturb_size * np.sin(0)
113 | print("Perturb angle: {}\t Perturb size: {} N".format(np.degrees(-perturb_dir[dir_idx]), perturb_size))
114 | # Apply perturb (if time)
115 | start_t = cassie_env.sim.time()
116 | while render_state:
117 |     if (not cassie_env.vis.ispaused()):
118 |         curr_time = cassie_env.sim.time()
119 |         if curr_time < start_t+perturb_duration:
120 |             cassie_env.vis.apply_force([force_x, force_y, 0, 0, 0, 0], perturb_body)
121 |         # Done perturbing, reset perturb_time and xfrc_applied
122 |         elif start_t+perturb_duration < curr_time < start_t+perturb_duration + wait_time:
123 |             # print("curr time: ", curr_time)
124 |             cassie_env.vis.apply_force([0, 0, 0, 0, 0, 0], perturb_body)
125 |         else:
126 |             # pass
127 |             print("passed")
128 |             break           
129 | 
130 |         # Get action
131 |         action = policy.act(state, True)
132 |         action = action.data.numpy()
133 |         state, reward, done, _ = cassie_env.step(action)
134 |         if cassie_env.sim.qpos()[2] < 0.4:
135 |             print("failed")
136 |             break
137 |         else:
138 |             state = torch.Tensor(state)
139 |     render_state = cassie_env.render()
140 |     time.sleep(dt / speedup)
141 | exit()
142 | 
143 | ###### Vis all perturbations ######
144 | render_state = cassie_env.render()
145 | force_x = perturb_size * np.cos(0)
146 | force_y = perturb_size * np.sin(0)
147 | print("Perturb angle: {}\t Perturb size: {} N".format(np.degrees(-perturb_dir[dir_idx]), perturb_size))
148 | while render_state:
149 |     if (not cassie_env.vis.ispaused()):
150 |         curr_time = cassie_env.sim.time()
151 |         # Apply perturb (if time)
152 |         if curr_time > perturb_time + wait_time:
153 |             # Haven't perturbed for full time yet
154 |             if curr_time < perturb_time + wait_time + perturb_duration:
155 |                 print("phase: ", cassie_env.phase)
156 |                 cassie_env.vis.apply_force([force_x, force_y, 0, 0, 0, 0], perturb_body)
157 |             # Done perturbing, reset perturb_time and xfrc_applied
158 |             else:
159 |                 cassie_env.vis.apply_force([0, 0, 0, 0, 0, 0], perturb_body)
160 |                 dir_idx += 1
161 |                 # Skip last direction, 0 is the same as 2*pi
162 |                 if dir_idx >= len(perturb_dir) - 1:
163 |                     dir_idx = 0
164 |                     perturb_size += 50
165 |                 force_x = perturb_size * np.cos(perturb_dir[dir_idx])
166 |                 force_y = perturb_size * np.sin(perturb_dir[dir_idx])
167 |                 print("Perturb angle: {}\t Perturb size: {} N".format(np.degrees(-perturb_dir[dir_idx]), perturb_size))
168 |                 perturb_time = curr_time
169 | 
170 |         # Get action
171 |         action = policy.act(state, True)
172 |         action = action.data.numpy()
173 |         state, reward, done, _ = cassie_env.step(action)
174 |         if cassie_env.sim.qpos()[2] < 0.4:
175 |             state = torch.Tensor(cassie_env.reset_for_test())
176 |             cassie_env.speed = 0.5
177 |             cassie_env.phase_add = 1
178 |             perturb_time = 0
179 |         else:
180 |             state = torch.Tensor(state)
181 |     render_state = cassie_env.render()
182 |     time.sleep(dt / speedup)


--------------------------------------------------------------------------------
/trained_models/5k_retrain/actor.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/trained_models/5k_retrain/actor.pt


--------------------------------------------------------------------------------
/trained_models/5k_retrain/critic.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/trained_models/5k_retrain/critic.pt


--------------------------------------------------------------------------------
/trained_models/5k_retrain/eval_commands.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/trained_models/5k_retrain/eval_commands.npy


--------------------------------------------------------------------------------
/trained_models/5k_retrain/eval_perturbs.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/trained_models/5k_retrain/eval_perturbs.npy


--------------------------------------------------------------------------------
/trained_models/5k_retrain/experiment.info:
--------------------------------------------------------------------------------
 1 | algo_name: ppo
 2 | clip: 0.2
 3 | clock_based: True
 4 | dyn_random: False
 5 | entropy_coeff: 0.0
 6 | env_name: Cassie-v0
 7 | epochs: 5
 8 | eps: 1e-05
 9 | gamma: 0.99
10 | history: 0
11 | input_norm_steps: 100
12 | lam: 0.95
13 | lr: 0.0001
14 | max_grad_norm: 0.05
15 | max_traj_len: 300
16 | minibatch_size: 2048
17 | mirror: True
18 | n_itr: 20000
19 | name: model
20 | no_delta: True
21 | num_procs: 64
22 | num_steps: 187
23 | previous: None
24 | recurrent: False
25 | redis_address: None
26 | reward: 5k_speed_reward
27 | simrate: 60
28 | state_est: True
29 | traj: walking
30 | use_gae: False
31 | viz_port: 8097
32 | 


--------------------------------------------------------------------------------
/trained_models/5k_retrain/experiment.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/trained_models/5k_retrain/experiment.pkl


--------------------------------------------------------------------------------
/trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/5k_test.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/5k_test.pkl


--------------------------------------------------------------------------------
/trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/actor.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/actor.pt


--------------------------------------------------------------------------------
/trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/critic.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/critic.pt


--------------------------------------------------------------------------------
/trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/eval_commands.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/eval_commands.npy


--------------------------------------------------------------------------------
/trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/eval_perturbs.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/eval_perturbs.npy


--------------------------------------------------------------------------------
/trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/experiment.info:
--------------------------------------------------------------------------------
 1 | command_profile: clock
 2 | dyn_random: False
 3 | env_name: Cassie-v0
 4 | history: 0
 5 | ik_baseline: None
 6 | input_profile: full
 7 | learn_gains: False
 8 | mirror: True
 9 | no_delta: True
10 | recurrent: False
11 | reward: 5k_speed_reward
12 | simrate: 60
13 | traj: None
14 | 


--------------------------------------------------------------------------------
/trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/experiment.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osudrl/apex/82c44af2b5d7bffe884fd2f69856bb1b3c9948e0/trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/experiment.pkl


--------------------------------------------------------------------------------
/util/env.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import time
 3 | import torch
 4 | import numpy as np
 5 | 
 6 | from cassie import CassieEnv, CassieTrajEnv, CassiePlayground, CassieStandingEnv
 7 | 
 8 | def env_factory(path, command_profile="clock", input_profile="full", simrate=50, dynamics_randomization=True, mirror=False, learn_gains=False, reward=None, history=0, no_delta=True, traj=None, ik_baseline=False, **kwargs):
 9 |     from functools import partial
10 | 
11 |     """
12 |     Returns an *uninstantiated* environment constructor.
13 | 
14 |     Since environments containing cpointers (e.g. Mujoco envs) can't be serialized,
15 |     this allows us to pass their constructors to Ray remote functions instead
16 |     (since the gym registry isn't shared across ray subprocesses we can't simply
17 |     pass gym.make() either)
18 | 
19 |     Note: env.unwrapped.spec is never set, if that matters for some reason.
20 |     """
21 | 
22 |     # Custom Cassie Environment
23 |     if path in ['Cassie-v0', 'CassieTraj-v0', 'CassiePlayground-v0', 'CassieStandingEnv-v0']:
24 | 
25 |         if path == 'Cassie-v0':
26 |             env_fn = partial(CassieEnv, command_profile=command_profile, input_profile=input_profile, simrate=simrate, dynamics_randomization=dynamics_randomization, learn_gains=learn_gains, reward=reward, history=history)
27 |         elif path == 'CassieTraj-v0':
28 |             env_fn = partial(CassieTrajEnv, traj=traj, command_profile=command_profile, input_profile=input_profile, simrate=simrate, dynamics_randomization=dynamics_randomization, no_delta=no_delta, learn_gains=learn_gains, ik_baseline=ik_baseline, reward=reward, history=history)
29 |         elif path == 'CassiePlayground-v0':
30 |             env_fn = partial(CassiePlayground, command_profile=command_profile, input_profile=input_profile, simrate=simrate, dynamics_randomization=dynamics_randomization, learn_gains=learn_gains, reward=reward, history=history)
31 |         elif path == 'CassieStandingEnv-v0':
32 |             env_fn = partial(CassieStandingEnv, command_profile=command_profile, input_profile=input_profile, simrate=simrate, dynamics_randomization=dynamics_randomization, learn_gains=learn_gains, reward=reward, history=history)
33 | 
34 |         if mirror:
35 |             from rl.envs.wrappers import SymmetricEnv
36 |             env_fn = partial(SymmetricEnv, env_fn, mirrored_obs=env_fn().mirrored_obs, mirrored_act=env_fn().mirrored_acts)
37 | 
38 |         print()
39 |         print("Environment: {}".format(path))
40 |         print(" ├ reward:         {}".format(reward))
41 |         print(" ├ input prof:     {}".format(input_profile))
42 |         print(" ├ cmd prof:       {}".format(command_profile))
43 |         print(" ├ learn gains:    {}".format(learn_gains))
44 |         print(" ├ dyn_random:     {}".format(dynamics_randomization))
45 |         print(" ├ mirror:         {}".format(mirror))
46 |         if path == "CassieTraj-v0":
47 |             print(" ├ traj:           {}".format(traj))
48 |             print(" ├ ik baseline:    {}".format(ik_baseline))
49 |             print(" ├ no_delta:       {}".format(no_delta))
50 |         print(" └ obs_dim:        {}".format(env_fn().observation_space.shape[0]))
51 | 
52 |         return env_fn
53 | 
54 |     # OpenAI Gym environment
55 |     else:
56 |         import gym
57 |         spec = gym.envs.registry.spec(path)
58 |         _kwargs = spec._kwargs.copy()
59 |         _kwargs.update(kwargs)
60 | 
61 |         try:
62 |             if callable(spec._entry_point):
63 |                 cls = spec._entry_point(**_kwargs)
64 |             else:
65 |                 cls = gym.envs.registration.load(spec._entry_point)
66 |         except AttributeError:
67 |             if callable(spec.entry_point):
68 |                 cls = spec.entry_point(**_kwargs)
69 |             else:
70 |                 cls = gym.envs.registration.load(spec.entry_point)
71 | 
72 |         return partial(cls, **_kwargs)
73 | 


--------------------------------------------------------------------------------
/util/log.py:
--------------------------------------------------------------------------------
 1 | from collections import OrderedDict
 2 | import hashlib, os, pickle
 3 | 
 4 | class color:
 5 |     BOLD   = '\033[1m\033[48m'
 6 |     END    = '\033[0m'
 7 |     ORANGE = '\033[38;5;202m'
 8 |     BLACK  = '\033[38;5;240m'
 9 | 
10 | # Logger stores in trained_models by default
11 | def create_logger(args):
12 |     from torch.utils.tensorboard import SummaryWriter
13 |     """Use hyperparms to set a directory to output diagnostic files."""
14 | 
15 |     arg_dict = args.__dict__
16 |     assert "seed" in arg_dict, \
17 |     "You must provide a 'seed' key in your command line arguments"
18 |     assert "logdir" in arg_dict, \
19 |     "You must provide a 'logdir' key in your command line arguments."
20 |     assert "env_name" in arg_dict, \
21 |     "You must provide a 'env_name' key in your command line arguments."
22 | 
23 |     # sort the keys so the same hyperparameters will always have the same hash
24 |     arg_dict = OrderedDict(sorted(arg_dict.items(), key=lambda t: t[0]))
25 | 
26 |     # remove seed so it doesn't get hashed, store value for filename
27 |     # same for logging directory
28 |     run_name = arg_dict.pop('run_name')
29 |     seed = str(arg_dict.pop("seed"))
30 |     logdir = str(arg_dict.pop('logdir'))
31 |     env_name = str(arg_dict['env_name'])
32 | 
33 |     # see if this run has a unique name, if so then that is going to be the name of the folder, even if it overrirdes
34 |     if run_name is not None:
35 |         logdir = os.path.join(logdir, env_name)
36 |         output_dir = os.path.join(logdir, run_name)
37 |     else:
38 |         # see if we are resuming a previous run, if we are mark as continued
39 |         if args.previous is not None:
40 |             if args.exchange_reward is not None:
41 |                 output_dir = args.previous[0:-1] + "_NEW-" + args.reward
42 |             else:
43 |                 print(args.previous[0:-1])
44 |                 output_dir = args.previous[0:-1] + '-cont'
45 |         else:
46 |             # get a unique hash for the hyperparameter settings, truncated at 10 chars
47 |             arg_hash   = hashlib.md5(str(arg_dict).encode('ascii')).hexdigest()[0:6] + '-seed' + seed
48 |             logdir     = os.path.join(logdir, env_name)
49 |             output_dir = os.path.join(logdir, arg_hash)
50 | 
51 |     # create a directory with the hyperparm hash as its name, if it doesn't
52 |     # already exist.
53 |     os.makedirs(output_dir, exist_ok=True)
54 | 
55 |     # Create a file with all the hyperparam settings in human-readable plaintext,
56 |     # also pickle file for resuming training easily
57 |     info_path = os.path.join(output_dir, "experiment.info")
58 |     pkl_path = os.path.join(output_dir, "experiment.pkl")
59 |     with open(pkl_path, 'wb') as file:
60 |         pickle.dump(args, file)
61 |     with open(info_path, 'w') as file:
62 |         for key, val in arg_dict.items():
63 |             file.write("%s: %s" % (key, val))
64 |             file.write('\n')
65 | 
66 |     logger = SummaryWriter(output_dir, flush_secs=0.1) # flush_secs=0.1 actually slows down quite a bit, even on parallelized set ups
67 |     print("Logging to " + color.BOLD + color.ORANGE + str(output_dir) + color.END)
68 | 
69 |     logger.dir = output_dir
70 |     return logger
71 | 
72 | # Rule for curriculum learning is that env observation space should be the same (so attributes like env.clock_based or env.state_est shouldn't be different and are forced to be same here)
73 | # deal with loading hyperparameters of previous run continuation
74 | def parse_previous(args):
75 |     if args.previous is not None:
76 |         run_args = pickle.load(open(args.previous + "experiment.pkl", "rb"))
77 |         args.recurrent = run_args.recurrent
78 |         args.env_name = run_args.env_name
79 |         args.command_profile = run_args.command_profile
80 |         args.input_profile = run_args.input_profile
81 |         args.learn_gains = run_args.learn_gains
82 |         args.traj = run_args.traj
83 |         args.no_delta = run_args.no_delta
84 |         args.ik_baseline = run_args.ik_baseline
85 |         if args.exchange_reward is not None:
86 |             args.reward = args.exchange_reward
87 |             args.run_name = run_args.run_name + "_NEW-" + args.reward
88 |         else:
89 |             args.reward = run_args.reward
90 |             args.run_name = run_args.run_name + "--cont"
91 |     return args
92 | 


--------------------------------------------------------------------------------
/util/logo.py:
--------------------------------------------------------------------------------
 1 | class color:
 2 |     BOLD   = '\033[1m\033[48m'
 3 |     END    = '\033[0m'
 4 |     ORANGE = '\033[38;5;202m'
 5 |     BLACK  = '\033[38;5;240m'
 6 | 
 7 | 
 8 | def print_logo(subtitle="", option=2):
 9 |     print()
10 |     print(color.BOLD + color.ORANGE +  "         .8.         " + color.BLACK + " 8 888888888o   " + color.ORANGE + "8 8888888888   `8.`8888.      ,8' ")
11 |     print(color.BOLD + color.ORANGE +  "        .888.        " + color.BLACK + " 8 8888    `88. " + color.ORANGE + "8 8888          `8.`8888.    ,8' ")
12 |     print(color.BOLD + color.ORANGE +  "       :88888.       " + color.BLACK + " 8 8888     `88 " + color.ORANGE + "8 8888           `8.`8888.  ,8' ")
13 |     print(color.BOLD + color.ORANGE +  "      . `88888.      " + color.BLACK + " 8 8888     ,88 " + color.ORANGE + "8 8888            `8.`8888.,8' ")
14 |     print(color.BOLD + color.ORANGE +  "     .8. `88888.     " + color.BLACK + " 8 8888.   ,88' " + color.ORANGE + "8 888888888888     `8.`88888' ")
15 |     print(color.BOLD + color.ORANGE + "    .8`8. `88888.    " + color.BLACK  + " 8 888888888P'  " + color.ORANGE + "8 8888             .88.`8888. ")
16 |     print(color.BOLD + color.ORANGE + "   .8' `8. `88888.   " + color.BLACK  + " 8 8888         " + color.ORANGE + "8 8888            .8'`8.`8888. ")
17 |     print(color.BOLD + color.ORANGE + "  .8'   `8. `88888.  " + color.BLACK  + " 8 8888         " + color.ORANGE + "8 8888           .8'  `8.`8888. ")
18 |     print(color.BOLD + color.ORANGE + " .888888888. `88888. " + color.BLACK  + " 8 8888         " + color.ORANGE + "8 8888          .8'    `8.`8888. ")
19 |     print(color.BOLD + color.ORANGE + ".8'       `8. `88888." + color.BLACK  + " 8 8888         " + color.ORANGE + "8 888888888888 .8'      `8.`8888. " + color.END)
20 |     print("\n")
21 |     print(subtitle)
22 |     print("\n")
23 | 


--------------------------------------------------------------------------------