├── .gitignore ├── LICENSE ├── README.md ├── clean.sh ├── examples ├── bspilqr │ ├── car.py │ └── lightdark.py ├── elqr │ ├── lqr.py │ └── pendulum.py ├── gps │ ├── analytical │ │ ├── mpc │ │ │ ├── mb_cartpole.py │ │ │ ├── mb_cartpole_parallel.py │ │ │ ├── mb_double_cartpole.py │ │ │ ├── mb_lqr.py │ │ │ ├── mb_pendulum.py │ │ │ └── mb_pendulum_parallel.py │ │ └── topt │ │ │ ├── mb_cartpole.py │ │ │ ├── mb_double_cartpole.py │ │ │ ├── mb_double_pendulum.py │ │ │ ├── mb_lqr.py │ │ │ ├── mb_pendulum.py │ │ │ ├── mb_pendulum_parallel.py │ │ │ ├── mb_quad_pendulum.py │ │ │ └── mbgps_riccati.py │ └── learned │ │ ├── mf_cartpole.py │ │ ├── mf_double_pendulum.py │ │ ├── mf_lqr.py │ │ └── mf_pendulum.py ├── ilqr │ └── mpc │ │ ├── cartpole.py │ │ ├── doublecartpole.py │ │ ├── lqr.py │ │ ├── pendulum.py │ │ └── pendulum_parallel.py ├── rgps │ ├── mb_lqr.py │ └── mb_robot.py └── riccati │ └── lqr.py ├── setup.py └── trajopt ├── __init__.py ├── bspilqr ├── CMakeLists.txt ├── __init__.py ├── bspilqr.py ├── objects.py └── src │ └── util.cpp ├── elqr ├── __init__.py ├── elqr.py └── objects.py ├── envs ├── __init__.py ├── car │ └── car.py ├── cartpole │ └── cartpole.py ├── double_cartpole │ └── double_cartpole.py ├── double_pendulum │ └── double_pendulum.py ├── lightdark │ └── lightdark.py ├── lqr │ ├── lqr_v0.py │ ├── lqr_v1.py │ └── lqr_v2.py ├── pendulum │ └── pendulum.py ├── quad_pendulum │ └── quad_pendulum.py └── robot │ └── robot.py ├── gps ├── CMakeLists.txt ├── __init__.py ├── mbgps.py ├── mfgps.py ├── objects.py └── src │ └── util.cpp ├── ilqr ├── CMakeLists.txt ├── __init__.py ├── ilqr.py ├── objects.py └── src │ └── util.cpp ├── rgps ├── CMakeLists.txt ├── __init__.py ├── lrgps.py ├── mbrgps.py ├── mfrgps.py ├── objects.py └── src │ └── util.cpp └── riccati ├── __init__.py ├── objects.py └── riccati.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pkl 2 | .idea/ 3 | .vscode/ 4 | .history/ 5 | 6 | ## Core latex/pdflatex auxiliary files: 7 | *.aux 8 | *.lof 9 | *.log 10 | *.lot 11 | *.fls 12 | *.out 13 | *.toc 14 | *.fmt 15 | *.fot 16 | *.cb 17 | *.cb2 18 | .*.lb 19 | 20 | ## Intermediate documents: 21 | *.dvi 22 | *.xdv 23 | *-converted-to.* 24 | # these rules might exclude image files for figures etc. 25 | # *.ps 26 | # *.eps 27 | # *.pdf 28 | 29 | ## Generated if empty string is given at "Please type another file name for output:" 30 | .pdf 31 | 32 | ## Bibliography auxiliary files (bibtex/biblatex/biber): 33 | *.bbl 34 | *.bcf 35 | *.blg 36 | *-blx.aux 37 | *-blx.bib 38 | *.run.xml 39 | 40 | ## Build tool auxiliary files: 41 | *.fdb_latexmk 42 | *.synctex 43 | *.synctex(busy) 44 | *.synctex.gz 45 | *.synctex.gz(busy) 46 | *.pdfsync 47 | 48 | ## Build tool directories for auxiliary files 49 | # latexrun 50 | latex.out/ 51 | 52 | ## Auxiliary and intermediate files from other packages: 53 | # algorithms 54 | *.alg 55 | *.loa 56 | 57 | # achemso 58 | acs-*.bib 59 | 60 | # amsthm 61 | *.thm 62 | 63 | # beamer 64 | *.nav 65 | *.pre 66 | *.snm 67 | *.vrb 68 | 69 | # changes 70 | *.soc 71 | 72 | # comment 73 | *.cut 74 | 75 | # cprotect 76 | *.cpt 77 | 78 | # elsarticle (documentclass of Elsevier journals) 79 | *.spl 80 | 81 | # endnotes 82 | *.ent 83 | 84 | # fixme 85 | *.lox 86 | 87 | # feynmf/feynmp 88 | *.mf 89 | *.mp 90 | *.t[1-9] 91 | *.t[1-9][0-9] 92 | *.tfm 93 | 94 | #(r)(e)ledmac/(r)(e)ledpar 95 | *.end 96 | *.?end 97 | *.[1-9] 98 | *.[1-9][0-9] 99 | *.[1-9][0-9][0-9] 100 | *.[1-9]R 101 | *.[1-9][0-9]R 102 | *.[1-9][0-9][0-9]R 103 | *.eledsec[1-9] 104 | *.eledsec[1-9]R 105 | *.eledsec[1-9][0-9] 106 | *.eledsec[1-9][0-9]R 107 | *.eledsec[1-9][0-9][0-9] 108 | *.eledsec[1-9][0-9][0-9]R 109 | 110 | # glossaries 111 | *.acn 112 | *.acr 113 | *.glg 114 | *.glo 115 | *.gls 116 | *.glsdefs 117 | 118 | # uncomment this for glossaries-extra (will ignore makeindex's style files!) 119 | # *.ist 120 | 121 | # gnuplottex 122 | *-gnuplottex-* 123 | 124 | # gregoriotex 125 | *.gaux 126 | *.gtex 127 | 128 | # htlatex 129 | *.4ct 130 | *.4tc 131 | *.idv 132 | *.lg 133 | *.trc 134 | *.xref 135 | 136 | # hyperref 137 | *.brf 138 | 139 | # knitr 140 | *-concordance.tex 141 | # TODO Comment the next line if you want to keep your tikz graphics files 142 | *.tikz 143 | *-tikzDictionary 144 | 145 | # listings 146 | *.lol 147 | 148 | # luatexja-ruby 149 | *.ltjruby 150 | 151 | # makeidx 152 | *.idx 153 | *.ilg 154 | *.ind 155 | 156 | # minitoc 157 | *.maf 158 | *.mlf 159 | *.mlt 160 | *.mtc[0-9]* 161 | *.slf[0-9]* 162 | *.slt[0-9]* 163 | *.stc[0-9]* 164 | 165 | # minted 166 | _minted* 167 | *.pyg 168 | 169 | # morewrites 170 | *.mw 171 | 172 | # nomencl 173 | *.nlg 174 | *.nlo 175 | *.nls 176 | 177 | # pax 178 | *.pax 179 | 180 | # pdfpcnotes 181 | *.pdfpc 182 | 183 | # sagetex 184 | *.sagetex.sage 185 | *.sagetex.py 186 | *.sagetex.scmd 187 | 188 | # scrwfile 189 | *.wrt 190 | 191 | # sympy 192 | *.sout 193 | *.sympy 194 | sympy-plots-for-*.tex/ 195 | 196 | # pdfcomment 197 | *.upa 198 | *.upb 199 | 200 | # pythontex 201 | *.pytxcode 202 | pythontex-files-*/ 203 | 204 | # tcolorbox 205 | *.listing 206 | 207 | # thmtools 208 | *.loe 209 | 210 | # TikZ & PGF 211 | *.dpth 212 | *.md5 213 | *.auxlock 214 | 215 | # todonotes 216 | *.tdo 217 | 218 | # vhistory 219 | *.hst 220 | *.ver 221 | 222 | # easy-todo 223 | *.lod 224 | 225 | # xcolor 226 | *.xcp 227 | 228 | # xmpincl 229 | *.xmpi 230 | 231 | # xindy 232 | *.xdy 233 | 234 | # xypic precompiled matrices 235 | *.xyc 236 | 237 | # endfloat 238 | *.ttt 239 | *.fff 240 | 241 | # Latexian 242 | TSWLatexianTemp* 243 | 244 | ## Editors: 245 | # WinEdt 246 | *.bak 247 | *.sav 248 | 249 | # Texpad 250 | .texpadtmp 251 | 252 | # LyX 253 | *.lyx~ 254 | 255 | # Kile 256 | *.backup 257 | 258 | # KBibTeX 259 | *~[0-9]* 260 | 261 | # auto folder when using emacs and auctex 262 | ./auto/* 263 | *.el 264 | 265 | # expex forward references with \gathertags 266 | *-tags.tex 267 | 268 | # standalone packages 269 | *.sta 270 | 271 | CMakeLists.txt.user 272 | CMakeCache.txt 273 | CMakeFiles 274 | CMakeScripts 275 | Testing 276 | Makefile 277 | cmake_install.cmake 278 | install_manifest.txt 279 | compile_commands.json 280 | CTestTestfile.cmake 281 | _deps 282 | 283 | # Prerequisites 284 | *.d 285 | 286 | # Object files 287 | *.o 288 | *.ko 289 | *.obj 290 | *.elf 291 | 292 | # Linker output 293 | *.ilk 294 | *.map 295 | *.exp 296 | 297 | # Precompiled Headers 298 | *.gch 299 | *.pch 300 | 301 | # Libraries 302 | *.lib 303 | *.a 304 | *.la 305 | *.lo 306 | 307 | # Shared objects (inc. Windows DLLs) 308 | *.dll 309 | *.so 310 | *.so.* 311 | *.dylib 312 | 313 | # Executables 314 | *.exe 315 | *.out 316 | *.app 317 | *.i*86 318 | *.x86_64 319 | *.hex 320 | 321 | # Debug files 322 | *.dSYM/ 323 | *.su 324 | *.idb 325 | *.pdb 326 | 327 | # Kernel Module Compile Results 328 | *.mod* 329 | *.cmd 330 | .tmp_versions/ 331 | modules.order 332 | Module.symvers 333 | Mkfile.old 334 | dkms.conf 335 | 336 | # Byte-compiled / optimized / DLL files 337 | __pycache__/ 338 | *.py[cod] 339 | *$py.class 340 | 341 | # C extensions 342 | *.so 343 | 344 | # Distribution / packaging 345 | .Python 346 | build/ 347 | develop-eggs/ 348 | dist/ 349 | downloads/ 350 | eggs/ 351 | .eggs/ 352 | lib/ 353 | lib64/ 354 | parts/ 355 | sdist/ 356 | var/ 357 | wheels/ 358 | pip-wheel-metadata/ 359 | share/python-wheels/ 360 | *.egg-info/ 361 | .installed.cfg 362 | *.egg 363 | MANIFEST 364 | 365 | # PyInstaller 366 | # Usually these files are written by a python script from a template 367 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 368 | *.manifest 369 | *.spec 370 | 371 | # Installer logs 372 | pip-log.txt 373 | pip-delete-this-directory.txt 374 | 375 | # Unit test / coverage reports 376 | htmlcov/ 377 | .tox/ 378 | .nox/ 379 | .coverage 380 | .coverage.* 381 | .cache 382 | nosetests.xml 383 | coverage.xml 384 | *.cover 385 | .hypothesis/ 386 | .pytest_cache/ 387 | 388 | # Translations 389 | *.mo 390 | *.pot 391 | 392 | # Django stuff: 393 | *.log 394 | local_settings.py 395 | db.sqlite3 396 | db.sqlite3-journal 397 | 398 | # Flask stuff: 399 | instance/ 400 | .webassets-cache 401 | 402 | # Scrapy stuff: 403 | .scrapy 404 | 405 | # Sphinx documentation 406 | docs/_build/ 407 | 408 | # PyBuilder 409 | target/ 410 | 411 | # Jupyter Notebook 412 | .ipynb_checkpoints 413 | 414 | # IPython 415 | profile_default/ 416 | ipython_config.py 417 | 418 | # pyenv 419 | .python-version 420 | 421 | # pipenv 422 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 423 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 424 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 425 | # install all needed dependencies. 426 | #Pipfile.lock 427 | 428 | # celery beat schedule file 429 | celerybeat-schedule 430 | 431 | # SageMath parsed files 432 | *.sage.py 433 | 434 | # Environments 435 | .env 436 | .venv 437 | env/ 438 | venv/ 439 | ENV/ 440 | env.bak/ 441 | venv.bak/ 442 | 443 | # Spyder project settings 444 | .spyderproject 445 | .spyproject 446 | 447 | # Rope project settings 448 | .ropeproject 449 | 450 | # mkdocs documentation 451 | /site 452 | 453 | # mypy 454 | .mypy_cache/ 455 | .dmypy.json 456 | dmypy.json 457 | 458 | # Pyre type checker 459 | .pyre/ 460 | 461 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Hany Abdulsamad 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Trajectory Optimization 2 | 3 | A toolbox for trajectory optimization of dynamical systems 4 | 5 | ## Installation 6 | 7 | This toolbox uses pybind11 to compile C++ code and use it in python. 8 | The following setup has been test only while using Conda envs. 9 | 10 | - Install Pybind11: 11 | ```shell 12 | conda install pybind11 13 | ``` 14 | 15 | - Compile OpenBLAS: 16 | * Prequisits: libpthread, libgfortran, gcc-fortran 17 | * https://github.com/xianyi/OpenBLAS.git 18 | * Change USE_THREAD to (de-)activate multi-threading 19 | ```shell 20 | USE_THREAD=0 C=gcc FC=gfortran NO_AFFINITY=1 NO_SHARED=1 COMMON_OPT=" -O2 -march=native " make 21 | ``` 22 | * Edit local CMakeLists.txt to reflect the path of OpenBLAS 23 | 24 | - Configure Armadillo: 25 | * http://arma.sourceforge.net/download.html 26 | * Only configure, do not make 27 | ```shell 28 | ./configure 29 | ``` 30 | * Edit local CMakeLists.txt to reflect the path of Armadillo 31 | 32 | - Install Python Package: 33 | ```shell 34 | pip install -e . 35 | ``` 36 | -------------------------------------------------------------------------------- /clean.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash 2 | 3 | rm trajopt/bspilqr/*.so 4 | rm -r trajopt/bspilqr/build 5 | 6 | rm trajopt/ilqr/*.so 7 | rm -r trajopt/ilqr/build 8 | 9 | rm trajopt/gps/*.so 10 | rm -r trajopt/gps/build 11 | 12 | rm trajopt/rgps/*.so 13 | rm -r trajopt/rgps/build 14 | -------------------------------------------------------------------------------- /examples/bspilqr/car.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import gym 4 | from trajopt.bspilqr import BSPiLQR 5 | 6 | from trajopt.bspilqr.objects import EKF 7 | 8 | 9 | # car task 10 | env = gym.make('Car-TO-v0') 11 | env._max_episode_steps = 10000 12 | 13 | nb_steps = 25 14 | 15 | state_dim = env.state_dim 16 | belief_dim = env.belief_dim 17 | obs_dim = env.obs_dim 18 | act_dim = env.act_dim 19 | 20 | mu_b = np.zeros((belief_dim, nb_steps + 1)) 21 | sigma_b = np.zeros((belief_dim, belief_dim, nb_steps + 1)) 22 | 23 | state = np.zeros((state_dim, nb_steps + 1)) 24 | obs = np.zeros((obs_dim, nb_steps + 1)) 25 | act = np.zeros((act_dim, nb_steps)) 26 | 27 | obs[:, 0] = env.reset() 28 | state[:, 0] = env.state 29 | 30 | _mu_b, _sigma_b = env.init() 31 | filter = EKF(env) 32 | mu_b[:, 0], sigma_b[..., 0] = filter.innovate(_mu_b, _sigma_b, obs[:, 0]) 33 | 34 | for t in range(nb_steps): 35 | solver = BSPiLQR(env=env, init_belief=(mu_b[:, t], sigma_b[..., t]), nb_steps=10) 36 | trace = solver.run(nb_iter=25, verbose=False) 37 | 38 | act[:, t] = solver.uref[:, 0] 39 | obs[:, t + 1], _, _, _ = env.step(act[:, t]) 40 | state[:, t + 1] = env.state 41 | 42 | mu_b[:, t + 1], sigma_b[..., t + 1] = filter.inference(mu_b[:, t], sigma_b[..., t], 43 | act[:, t], obs[:, t + 1]) 44 | 45 | print('Time Step:', t, 'Cost:', trace[-1]) 46 | -------------------------------------------------------------------------------- /examples/bspilqr/lightdark.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import gym 4 | from trajopt.bspilqr import BSPiLQR 5 | 6 | from trajopt.bspilqr.objects import EKF 7 | 8 | 9 | # light dark task 10 | env = gym.make('LightDark-TO-v0') 11 | env._max_episode_steps = 10000 12 | 13 | nb_steps = 25 14 | 15 | state_dim = env.state_dim 16 | belief_dim = env.belief_dim 17 | obs_dim = env.obs_dim 18 | act_dim = env.act_dim 19 | 20 | mu_b = np.zeros((belief_dim, nb_steps + 1)) 21 | sigma_b = np.zeros((belief_dim, belief_dim, nb_steps + 1)) 22 | 23 | state = np.zeros((state_dim, nb_steps + 1)) 24 | obs = np.zeros((obs_dim, nb_steps + 1)) 25 | act = np.zeros((act_dim, nb_steps)) 26 | 27 | obs[:, 0] = env.reset() 28 | state[:, 0] = env.state 29 | 30 | _mu_b, _sigma_b = env.init() 31 | filter = EKF(env) 32 | mu_b[:, 0], sigma_b[..., 0] = filter.innovate(_mu_b, _sigma_b, obs[:, 0]) 33 | 34 | for t in range(nb_steps): 35 | solver = BSPiLQR(env=env, init_belief=(mu_b[:, t], sigma_b[..., t]), nb_steps=10) 36 | trace = solver.run(nb_iter=25, verbose=False) 37 | 38 | act[:, t] = solver.uref[:, 0] 39 | obs[:, t + 1], _, _, _ = env.step(act[:, t]) 40 | state[:, t + 1] = env.state 41 | 42 | mu_b[:, t + 1], sigma_b[..., t + 1] = filter.inference(mu_b[:, t], sigma_b[..., t], 43 | act[:, t], obs[:, t + 1]) 44 | 45 | print('Time Step:', t, 'Cost:', trace[-1]) 46 | -------------------------------------------------------------------------------- /examples/elqr/lqr.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from trajopt.elqr import eLQR 3 | 4 | # lqr task 5 | env = gym.make('LQR-TO-v0') 6 | env._max_episode_steps = 100 7 | 8 | state = env.reset() 9 | alg = eLQR(env, nb_steps=60, 10 | init_state=state) 11 | 12 | # run eLQR 13 | trace = alg.run() 14 | 15 | # plot forward pass 16 | alg.plot() 17 | 18 | # plot objective 19 | import matplotlib.pyplot as plt 20 | 21 | plt.figure() 22 | plt.plot(trace) 23 | plt.show() 24 | -------------------------------------------------------------------------------- /examples/elqr/pendulum.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from trajopt.elqr import eLQR 3 | 4 | # pendulum task 5 | env = gym.make('Pendulum-TO-v0') 6 | env._max_episode_steps = 100 7 | 8 | state = env.reset() 9 | alg = eLQR(env, nb_steps=100, 10 | init_state=state) 11 | 12 | # run eLQR 13 | trace = alg.run() 14 | 15 | # plot forward pass 16 | alg.plot() 17 | 18 | # plot objective 19 | import matplotlib.pyplot as plt 20 | 21 | plt.figure() 22 | plt.plot(trace) 23 | plt.show() 24 | -------------------------------------------------------------------------------- /examples/gps/analytical/mpc/mb_cartpole.py: -------------------------------------------------------------------------------- 1 | import autograd.numpy as np 2 | 3 | import gym 4 | from trajopt.gps import MBGPS 5 | 6 | import matplotlib.pyplot as plt 7 | 8 | import warnings 9 | warnings.filterwarnings("ignore") 10 | 11 | np.random.seed(1337) 12 | 13 | # cartpole env 14 | env = gym.make('Cartpole-TO-v0') 15 | env._max_episode_steps = 10000 16 | env.unwrapped.dt = 0.05 17 | env.unwrapped.umax = np.array([5.]) 18 | env.unwrapped.periodic = True 19 | 20 | env.seed(1337) 21 | 22 | dm_state = env.observation_space.shape[0] 23 | dm_act = env.action_space.shape[0] 24 | 25 | horizon, nb_steps = 20, 100 26 | 27 | env_sigma = env.unwrapped.sigma 28 | 29 | state = np.zeros((dm_state, nb_steps + 1)) 30 | action = np.zeros((dm_act, nb_steps)) 31 | 32 | state[:, 0] = env.reset() 33 | for t in range(nb_steps): 34 | solver = MBGPS(env, init_state=tuple([state[:, t], env_sigma]), 35 | init_action_sigma=1., nb_steps=horizon, 36 | kl_bound=2., action_penalty=1e-5) 37 | trace = solver.run(nb_iter=10, verbose=False) 38 | 39 | _act = solver.ctl.sample(state[:, t], 0, stoch=False) 40 | action[:, t] = np.clip(_act, -env.ulim, env.ulim) 41 | state[:, t + 1], _, _, _ = env.step(action[:, t]) 42 | 43 | print('Time Step:', t, 'Cost:', trace[-1]) 44 | 45 | 46 | plt.figure() 47 | 48 | plt.subplot(5, 1, 1) 49 | plt.plot(state[0, :], '-b') 50 | plt.subplot(5, 1, 2) 51 | plt.plot(state[1, :], '-b') 52 | 53 | plt.subplot(5, 1, 3) 54 | plt.plot(state[2, :], '-r') 55 | plt.subplot(5, 1, 4) 56 | plt.plot(state[3, :], '-r') 57 | 58 | plt.subplot(5, 1, 5) 59 | plt.plot(action[0, :], '-g') 60 | 61 | plt.show() 62 | -------------------------------------------------------------------------------- /examples/gps/analytical/mpc/mb_cartpole_parallel.py: -------------------------------------------------------------------------------- 1 | import autograd.numpy as np 2 | 3 | import gym 4 | from trajopt.gps import MBGPS 5 | 6 | import matplotlib.pyplot as plt 7 | 8 | from joblib import Parallel, delayed 9 | 10 | import multiprocessing 11 | nb_cores = multiprocessing.cpu_count() 12 | 13 | 14 | def create_job(kwargs): 15 | import warnings 16 | warnings.filterwarnings("ignore") 17 | 18 | # cartpole env 19 | env = gym.make('Cartpole-TO-v0') 20 | env._max_episode_steps = 10000 21 | env.unwrapped.dt = 0.05 22 | env.unwrapped.umax = np.array([5.]) 23 | env.unwrapped.periodic = True 24 | 25 | dm_state = env.observation_space.shape[0] 26 | dm_act = env.action_space.shape[0] 27 | 28 | horizon, nb_steps = 20, 100 29 | 30 | env_sigma = env.unwrapped.sigma 31 | 32 | state = np.zeros((dm_state, nb_steps + 1)) 33 | action = np.zeros((dm_act, nb_steps)) 34 | 35 | state[:, 0] = env.reset() 36 | for t in range(nb_steps): 37 | solver = MBGPS(env, init_state=tuple([state[:, t], env_sigma]), 38 | init_action_sigma=1., nb_steps=horizon, 39 | kl_bound=2., action_penalty=1e-5) 40 | trace = solver.run(nb_iter=10, verbose=False) 41 | 42 | _act = solver.ctl.sample(state[:, t], 0, stoch=False) 43 | action[:, t] = np.clip(_act, -env.ulim, env.ulim) 44 | state[:, t + 1], _, _, _ = env.step(action[:, t]) 45 | 46 | print('Time Step:', t, 'Cost:', trace[-1]) 47 | 48 | return state[:, :-1].T, action.T 49 | 50 | 51 | def parallel_gps(nb_jobs=50): 52 | kwargs_list = [{} for _ in range(nb_jobs)] 53 | results = Parallel(n_jobs=min(nb_jobs, nb_cores), 54 | verbose=10, backend='loky')(map(delayed(create_job), kwargs_list)) 55 | obs, act = list(map(list, zip(*results))) 56 | return obs, act 57 | 58 | 59 | obs, act = parallel_gps(nb_jobs=12) 60 | 61 | fig, ax = plt.subplots(nrows=5, ncols=1, figsize=(12, 4)) 62 | for _obs, _act in zip(obs, act): 63 | ax[0].plot_distributions(_obs[:, 0]) 64 | ax[1].plot_distributions(_obs[:, 1]) 65 | ax[2].plot_distributions(_obs[:, 2]) 66 | ax[3].plot_distributions(_obs[:, 3]) 67 | ax[4].plot_distributions(_act) 68 | plt.show() 69 | 70 | # import pickle 71 | # data = {'obs': obs, 'act': act} 72 | # pickle.dump(data, open("gps_cartpole_cart.pkl", "wb")) 73 | -------------------------------------------------------------------------------- /examples/gps/analytical/mpc/mb_double_cartpole.py: -------------------------------------------------------------------------------- 1 | import autograd.numpy as np 2 | 3 | import gym 4 | from trajopt.gps import MBGPS 5 | 6 | import matplotlib.pyplot as plt 7 | 8 | import warnings 9 | warnings.filterwarnings("ignore") 10 | 11 | np.random.seed(1337) 12 | 13 | # double cartpole env 14 | env = gym.make('DoubleCartpole-TO-v0') 15 | env._max_episode_steps = 10000 16 | env.unwrapped.dt = 0.05 17 | env.unwrapped.umax = np.array([5.]) 18 | env.unwrapped.periodic = True 19 | 20 | env.seed(1337) 21 | 22 | dm_state = env.observation_space.shape[0] 23 | dm_act = env.action_space.shape[0] 24 | 25 | horizon, nb_steps = 20, 100 26 | 27 | env_sigma = env.unwrapped.sigma 28 | 29 | state = np.zeros((dm_state, nb_steps + 1)) 30 | action = np.zeros((dm_act, nb_steps)) 31 | 32 | state[:, 0] = env.reset() 33 | for t in range(nb_steps): 34 | solver = MBGPS(env, init_state=tuple([state[:, t], env_sigma]), 35 | init_action_sigma=1., nb_steps=horizon, 36 | kl_bound=2., action_penalty=1e-5) 37 | trace = solver.run(nb_iter=10, verbose=False) 38 | 39 | _act = solver.ctl.sample(state[:, t], 0, stoch=False) 40 | action[:, t] = np.clip(_act, -env.ulim, env.ulim) 41 | state[:, t + 1], _, _, _ = env.step(action[:, t]) 42 | 43 | print('Time Step:', t, 'Cost:', trace[-1]) 44 | 45 | 46 | plt.figure() 47 | 48 | plt.subplot(7, 1, 1) 49 | plt.plot(state[0, :], '-b') 50 | plt.subplot(7, 1, 2) 51 | plt.plot(state[1, :], '-b') 52 | plt.subplot(7, 1, 3) 53 | plt.plot(state[2, :], '-b') 54 | 55 | plt.subplot(7, 1, 4) 56 | plt.plot(state[3, :], '-r') 57 | plt.subplot(7, 1, 5) 58 | plt.plot(state[4, :], '-r') 59 | plt.subplot(7, 1, 6) 60 | plt.plot(state[5, :], '-r') 61 | 62 | plt.subplot(7, 1, 7) 63 | plt.plot(action[0, :], '-g') 64 | 65 | plt.show() 66 | -------------------------------------------------------------------------------- /examples/gps/analytical/mpc/mb_lqr.py: -------------------------------------------------------------------------------- 1 | import autograd.numpy as np 2 | 3 | import gym 4 | from trajopt.gps import MBGPS 5 | 6 | import matplotlib.pyplot as plt 7 | 8 | import warnings 9 | warnings.filterwarnings("ignore") 10 | 11 | 12 | # lqr task 13 | env = gym.make('LQR-TO-v0') 14 | env._max_episode_steps = 100000 15 | 16 | dm_state = env.observation_space.shape[0] 17 | dm_act = env.action_space.shape[0] 18 | 19 | horizon, nb_steps = 25, 60 20 | 21 | env_sigma = env.unwrapped.sigma 22 | 23 | state = np.zeros((dm_state, nb_steps + 1)) 24 | action = np.zeros((dm_act, nb_steps)) 25 | 26 | state[:, 0] = env.reset() 27 | for t in range(nb_steps): 28 | solver = MBGPS(env, init_state=tuple([state[:, t], env_sigma]), 29 | init_action_sigma=100., nb_steps=horizon, kl_bound=5.) 30 | trace = solver.run(nb_iter=10, verbose=False) 31 | 32 | _act = solver.ctl.sample(state[:, t], 0, stoch=False) 33 | action[:, t] = np.clip(_act, -env.ulim, env.ulim) 34 | state[:, t + 1], _, _, _ = env.step(action[:, t]) 35 | 36 | print('Time Step:', t, 'Cost:', trace[-1]) 37 | 38 | 39 | plt.figure() 40 | 41 | plt.subplot(3, 1, 1) 42 | plt.plot(state[0, :], '-b') 43 | plt.subplot(3, 1, 2) 44 | plt.plot(state[1, :], '-b') 45 | 46 | plt.subplot(3, 1, 3) 47 | plt.plot(action[0, :], '-g') 48 | 49 | plt.show() 50 | -------------------------------------------------------------------------------- /examples/gps/analytical/mpc/mb_pendulum.py: -------------------------------------------------------------------------------- 1 | import autograd.numpy as np 2 | 3 | import gym 4 | from trajopt.gps import MBGPS 5 | 6 | import warnings 7 | warnings.filterwarnings("ignore") 8 | 9 | # pendulum env 10 | env = gym.make('Pendulum-TO-v0') 11 | env._max_episode_steps = 10000 12 | env.unwrapped.dt = 0.05 13 | env.unwrapped.umax = np.array([2.5]) 14 | env.unwrapped.periodic = True 15 | 16 | dm_state = env.observation_space.shape[0] 17 | dm_act = env.action_space.shape[0] 18 | 19 | horizon, nb_steps = 25, 100 20 | 21 | env_sigma = env.unwrapped.sigma 22 | 23 | state = np.zeros((dm_state, nb_steps + 1)) 24 | action = np.zeros((dm_act, nb_steps)) 25 | 26 | state[:, 0] = env.reset() 27 | for t in range(nb_steps): 28 | solver = MBGPS(env, init_state=tuple([state[:, t], env_sigma]), 29 | init_action_sigma=25., nb_steps=horizon, 30 | kl_bound=1., action_penalty=1e-5) 31 | trace = solver.run(nb_iter=10, verbose=False) 32 | 33 | _act = solver.ctl.sample(state[:, t], 0, stoch=False) 34 | action[:, t] = np.clip(_act, -env.ulim, env.ulim) 35 | state[:, t + 1], _, _, _ = env.step(action[:, t]) 36 | 37 | print('Time Step:', t, 'Cost:', trace[-1]) 38 | 39 | 40 | import matplotlib.pyplot as plt 41 | 42 | plt.figure() 43 | 44 | plt.subplot(3, 1, 1) 45 | plt.plot(state[0, :], '-b') 46 | plt.subplot(3, 1, 2) 47 | plt.plot(state[1, :], '-b') 48 | 49 | plt.subplot(3, 1, 3) 50 | plt.plot(action[0, :], '-g') 51 | 52 | plt.show() 53 | -------------------------------------------------------------------------------- /examples/gps/analytical/mpc/mb_pendulum_parallel.py: -------------------------------------------------------------------------------- 1 | import autograd.numpy as np 2 | 3 | import gym 4 | from trajopt.gps import MBGPS 5 | 6 | import matplotlib.pyplot as plt 7 | 8 | from joblib import Parallel, delayed 9 | 10 | import multiprocessing 11 | nb_cores = multiprocessing.cpu_count() 12 | 13 | 14 | def create_job(kwargs): 15 | import warnings 16 | warnings.filterwarnings("ignore") 17 | 18 | # pendulum env 19 | env = gym.make('Pendulum-TO-v0') 20 | env._max_episode_steps = 10000 21 | env.unwrapped.dt = 0.02 22 | env.unwrapped.umax = np.array([2.5]) 23 | env.unwrapped.periodic = True 24 | 25 | dm_state = env.observation_space.shape[0] 26 | dm_act = env.action_space.shape[0] 27 | 28 | horizon, nb_steps = 15, 100 29 | 30 | state = np.zeros((dm_state, nb_steps + 1)) 31 | action = np.zeros((dm_act, nb_steps)) 32 | 33 | state[:, 0] = env.reset() 34 | for t in range(nb_steps): 35 | init_state = tuple([state[:, t], 1e-4 * np.eye(dm_state)]) 36 | solver = MBGPS(env, init_state=init_state, 37 | init_action_sigma=2.5, nb_steps=horizon, 38 | kl_bound=1., action_penalty=1e-3) 39 | trace = solver.run(nb_iter=5, verbose=False) 40 | 41 | solver.ctl.sigma = np.dstack([1e-2 * np.eye(dm_act)] * horizon) 42 | u = solver.ctl.sample(state[:, t], 0, stoch=True) 43 | action[:, t] = np.clip(u, -env.ulim, env.ulim) 44 | state[:, t + 1], _, _, _ = env.step(action[:, t]) 45 | 46 | # print('Time Step:', t, 'Cost:', trace[-1]) 47 | 48 | return state[:, :-1].T, action.T 49 | 50 | 51 | def parallel_gps(nb_jobs=50): 52 | kwargs_list = [{} for _ in range(nb_jobs)] 53 | results = Parallel(n_jobs=min(nb_jobs, 12), 54 | verbose=10, backend='loky')(map(delayed(create_job), kwargs_list)) 55 | obs, act = list(map(list, zip(*results))) 56 | return obs, act 57 | 58 | 59 | obs, act = parallel_gps(nb_jobs=50) 60 | 61 | fig, ax = plt.subplots(nrows=3, ncols=1, figsize=(12, 4)) 62 | for _obs, _act in zip(obs, act): 63 | ax[0].plot(_obs[:, :-1]) 64 | ax[1].plot(_obs[:, -1]) 65 | ax[2].plot(_act) 66 | plt.show() 67 | 68 | import pickle 69 | data = {'obs': obs, 'act': act} 70 | pickle.dump(data, open("gps_pendulum.pkl", "wb")) 71 | -------------------------------------------------------------------------------- /examples/gps/analytical/topt/mb_cartpole.py: -------------------------------------------------------------------------------- 1 | import autograd.numpy as np 2 | 3 | import gym 4 | from trajopt.gps import MBGPS 5 | 6 | import matplotlib.pyplot as plt 7 | 8 | import warnings 9 | warnings.filterwarnings("ignore") 10 | 11 | np.random.seed(1337) 12 | 13 | # cartpole env 14 | env = gym.make('Cartpole-TO-v0') 15 | env._max_episode_steps = 100 16 | env.unwrapped.dt = 0.05 17 | 18 | env.seed(1337) 19 | 20 | solver = MBGPS(env, nb_steps=100, 21 | init_state=env.init(), 22 | init_action_sigma=10.0, 23 | kl_bound=1e0, 24 | slew_rate=False, 25 | action_penalty=1e-5, 26 | activation={'mult': 1., 'shift': 80}) 27 | 28 | trace = solver.run(nb_iter=25, verbose=True) 29 | 30 | # plot dists 31 | solver.plot() 32 | 33 | # plot objective 34 | plt.figure() 35 | plt.plot(trace) 36 | plt.show() 37 | -------------------------------------------------------------------------------- /examples/gps/analytical/topt/mb_double_cartpole.py: -------------------------------------------------------------------------------- 1 | import autograd.numpy as np 2 | 3 | import gym 4 | from trajopt.gps import MBGPS 5 | 6 | import matplotlib.pyplot as plt 7 | 8 | import warnings 9 | warnings.filterwarnings("ignore") 10 | 11 | np.random.seed(1337) 12 | 13 | # pendulum env 14 | env = gym.make('DoubleCartpole-TO-v0') 15 | env._max_episode_steps = 10000 16 | env.unwrapped.dt = 0.05 17 | 18 | env.seed(1337) 19 | 20 | solver = MBGPS(env, nb_steps=100, 21 | init_state=env.init(), 22 | init_action_sigma=5.0, 23 | kl_bound=1e0, 24 | slew_rate=False, 25 | action_penalty=1e-5, 26 | activation={'mult': 1., 'shift': 80}) 27 | 28 | trace = solver.run(nb_iter=50, verbose=True) 29 | 30 | # plot dists 31 | solver.plot() 32 | 33 | # plot objective 34 | plt.figure() 35 | plt.plot(trace) 36 | plt.show() 37 | -------------------------------------------------------------------------------- /examples/gps/analytical/topt/mb_double_pendulum.py: -------------------------------------------------------------------------------- 1 | import autograd.numpy as np 2 | 3 | import gym 4 | from trajopt.gps import MBGPS 5 | 6 | import matplotlib.pyplot as plt 7 | 8 | import warnings 9 | warnings.filterwarnings("ignore") 10 | 11 | np.random.seed(1337) 12 | 13 | # pendulum env 14 | env = gym.make('DoublePendulum-TO-v0') 15 | env._max_episode_steps = 100 16 | env.unwrapped.dt = 0.05 17 | 18 | env.seed(1337) 19 | 20 | solver = MBGPS(env, nb_steps=100, 21 | init_state=env.init(), 22 | init_action_sigma=10.0, 23 | kl_bound=1e1, 24 | slew_rate=False, 25 | action_penalty=1, 26 | activation={'mult': 1., 'shift': 80}) 27 | 28 | trace = solver.run(nb_iter=50, verbose=True) 29 | 30 | # plot dists 31 | solver.plot() 32 | 33 | # plot objective 34 | plt.figure() 35 | plt.plot(trace) 36 | plt.show() 37 | -------------------------------------------------------------------------------- /examples/gps/analytical/topt/mb_lqr.py: -------------------------------------------------------------------------------- 1 | import autograd.numpy as np 2 | 3 | import gym 4 | from trajopt.gps import MBGPS 5 | 6 | import matplotlib.pyplot as plt 7 | 8 | import warnings 9 | warnings.filterwarnings("ignore") 10 | 11 | np.random.seed(1337) 12 | 13 | # lqr task 14 | env = gym.make('LQR-TO-v0') 15 | env._max_episode_steps = 60 16 | 17 | env.seed(1337) 18 | 19 | solver = MBGPS(env, nb_steps=60, 20 | init_state=env.init(), 21 | init_action_sigma=100., 22 | kl_bound=5.) 23 | 24 | trace = solver.run(nb_iter=10, verbose=True) 25 | 26 | # plot dists 27 | solver.plot() 28 | 29 | # plot objective 30 | plt.figure() 31 | plt.plot(trace) 32 | plt.show() 33 | -------------------------------------------------------------------------------- /examples/gps/analytical/topt/mb_pendulum.py: -------------------------------------------------------------------------------- 1 | import autograd.numpy as np 2 | 3 | import gym 4 | from trajopt.gps import MBGPS 5 | 6 | import matplotlib.pyplot as plt 7 | 8 | import warnings 9 | warnings.filterwarnings("ignore") 10 | 11 | np.random.seed(1337) 12 | 13 | # pendulum env 14 | env = gym.make('Pendulum-TO-v0') 15 | env._max_episode_steps = 100 16 | env.unwrapped.dt = 0.05 17 | 18 | env.seed(1337) 19 | 20 | solver = MBGPS(env, nb_steps=100, 21 | init_state=env.init(), 22 | init_action_sigma=5.0, 23 | kl_bound=1e1, 24 | slew_rate=False, 25 | action_penalty=1, 26 | activation={'mult': 1., 'shift': 80}) 27 | 28 | trace = solver.run(nb_iter=25, verbose=True) 29 | 30 | # plot dists 31 | solver.plot() 32 | 33 | # plot objective 34 | plt.figure() 35 | plt.plot(trace) 36 | plt.show() 37 | -------------------------------------------------------------------------------- /examples/gps/analytical/topt/mb_pendulum_parallel.py: -------------------------------------------------------------------------------- 1 | import autograd.numpy as np 2 | 3 | import gym 4 | from trajopt.gps import MBGPS 5 | 6 | import matplotlib.pyplot as plt 7 | 8 | from joblib import Parallel, delayed 9 | 10 | import multiprocessing 11 | nb_cores = multiprocessing.cpu_count() 12 | 13 | 14 | def create_job(kwargs): 15 | import warnings 16 | warnings.filterwarnings("ignore") 17 | 18 | # pendulum env 19 | env = gym.make('Pendulum-TO-v0') 20 | env._max_episode_steps = 10000 21 | env.unwrapped.dt = 0.02 22 | env.unwrapped.umax = np.array([2.5]) 23 | env.unwrapped.periodic = False 24 | 25 | dm_state = env.observation_space.shape[0] 26 | dm_act = env.action_space.shape[0] 27 | 28 | state = env.reset() 29 | init_state = tuple([state, 1e-4 * np.eye(dm_state)]) 30 | solver = MBGPS(env, init_state=init_state, 31 | init_action_sigma=25., nb_steps=300, 32 | kl_bound=.1, action_penalty=1e-3, 33 | activation={'shift': 250, 'mult': 0.5}) 34 | 35 | solver.run(nb_iter=100, verbose=False) 36 | 37 | solver.ctl.sigma = np.dstack([1e-1 * np.eye(dm_act)] * 300) 38 | data = solver.rollout(nb_episodes=1, stoch=True, init=state) 39 | 40 | obs, act = np.squeeze(data['x'], axis=-1).T, np.squeeze(data['u'], axis=-1).T 41 | return obs, act 42 | 43 | 44 | def parallel_gps(nb_jobs=50): 45 | kwargs_list = [{} for _ in range(nb_jobs)] 46 | results = Parallel(n_jobs=min(nb_jobs, 20), 47 | verbose=10, backend='loky')(map(delayed(create_job), kwargs_list)) 48 | obs, act = list(map(list, zip(*results))) 49 | return obs, act 50 | 51 | 52 | obs, act = parallel_gps(nb_jobs=50) 53 | 54 | plt.figure() 55 | fig, ax = plt.subplots(nrows=3, ncols=1, figsize=(12, 4)) 56 | for _obs, _act in zip(obs, act): 57 | ax[0].plot(_obs[:, :-1]) 58 | ax[1].plot(_obs[:, -1]) 59 | ax[2].plot(_act) 60 | plt.show() 61 | 62 | import pickle 63 | data = {'obs': obs, 'act': act} 64 | pickle.dump(data, open("gps_pendulum_other.pkl", "wb")) 65 | -------------------------------------------------------------------------------- /examples/gps/analytical/topt/mb_quad_pendulum.py: -------------------------------------------------------------------------------- 1 | import autograd.numpy as np 2 | 3 | import gym 4 | from trajopt.gps import MBGPS 5 | 6 | import warnings 7 | warnings.filterwarnings("ignore") 8 | 9 | np.random.seed(1337) 10 | 11 | # pendulum env 12 | env = gym.make('QuadPendulum-TO-v0') 13 | env._max_episode_steps = 100 14 | env.unwrapped.dt = 0.05 15 | 16 | env.seed(1337) 17 | 18 | solver = MBGPS(env, nb_steps=100, 19 | init_state=env.init(), 20 | init_action_sigma=5.0, 21 | kl_bound=10., 22 | slew_rate=False, 23 | action_penalty=1e-5) 24 | 25 | trace = solver.run(nb_iter=50, verbose=True) 26 | 27 | # plot dists 28 | solver.plot() 29 | 30 | # plot objective 31 | import matplotlib.pyplot as plt 32 | 33 | plt.figure() 34 | plt.plot(trace) 35 | plt.show() 36 | -------------------------------------------------------------------------------- /examples/gps/analytical/topt/mbgps_riccati.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import gym 4 | 5 | from trajopt.gps import MBGPS 6 | from trajopt.riccati import Riccati 7 | 8 | import matplotlib.pyplot as plt 9 | 10 | import warnings 11 | warnings.filterwarnings("ignore") 12 | 13 | # lqr task 14 | env = gym.make('LQR-TO-v0') 15 | env.env._max_episode_steps = 100 16 | 17 | dm_state = env.unwrapped.dm_state 18 | dm_act = env.unwrapped.dm_act 19 | 20 | mbgps = MBGPS(env, nb_steps=100, 21 | init_state=env.init(), 22 | init_action_sigma=100., 23 | kl_bound=5.) 24 | 25 | mbgps.run(nb_iter=15, verbose=True) 26 | 27 | riccati = Riccati(env, nb_steps=100, 28 | init_state=env.init()) 29 | 30 | riccati.run() 31 | 32 | np.random.seed(1337) 33 | env.seed(1337) 34 | gps_data = mbgps.rollout(250, stoch=False) 35 | 36 | np.random.seed(1337) 37 | env.seed(1337) 38 | riccati_data = riccati.rollout(250) 39 | 40 | print('GPS Cost: ', np.mean(np.sum(gps_data['c'], axis=0)), 41 | ', Riccati Cost', np.mean(np.sum(riccati_data['c'], axis=0))) 42 | 43 | plt.figure(figsize=(6, 12)) 44 | plt.suptitle("LQR Mean Traj.: Riccati vs GPS") 45 | 46 | for i in range(dm_state): 47 | plt.subplot(dm_state + dm_act, 1, i + 1) 48 | plt.plot(riccati.xref[i, ...], color='k') 49 | plt.plot(mbgps.xdist.mu[i, ...], color='r', linestyle='None', marker='o', markersize=3) 50 | 51 | for i in range(dm_act): 52 | plt.subplot(dm_state + dm_act, 1, dm_state + i + 1) 53 | plt.plot(riccati.uref[i, ...], color='k') 54 | plt.plot(mbgps.udist.mu[i, ...], color='r', linestyle='None', marker='o', markersize=3) 55 | 56 | plt.show() 57 | 58 | plt.figure(figsize=(6, 12)) 59 | plt.suptitle("LQR Controller: Riccati vs GPS") 60 | 61 | for i in range(dm_state): 62 | plt.subplot(dm_state + dm_act, 1, i + 1) 63 | plt.plot(mbgps.ctl.K[0, i, ...], color='r', linestyle='None', marker='o', markersize=3) 64 | plt.plot(riccati.ctl.K[0, i, ...], color='k') 65 | 66 | for i in range(dm_act): 67 | plt.subplot(dm_state + dm_act, 1, dm_state + i + 1) 68 | plt.plot(mbgps.ctl.kff[i, ...], color='r', linestyle='None', marker='o', markersize=3) 69 | plt.plot(riccati.ctl.kff[i, ...], color='k') 70 | 71 | plt.show() 72 | 73 | plt.figure(figsize=(12, 12)) 74 | plt.suptitle("LQR Sample Traj.: Riccati vs GPS") 75 | 76 | for i, j in zip(range(dm_state), range(1, 2 * dm_state, 2)): 77 | plt.subplot(dm_state + dm_act, 2, j) 78 | plt.plot(riccati_data['x'][i, ...]) 79 | 80 | plt.subplot(dm_state + dm_act, 2, j + 1) 81 | plt.plot(gps_data['x'][i, ...]) 82 | 83 | for i, j in zip(range(dm_act), range(2 * dm_state + 1, 2 * dm_state + 2 * dm_act, 2)): 84 | plt.subplot(dm_state + dm_act, 2, j) 85 | plt.plot(riccati_data['u'][i, ...]) 86 | 87 | plt.subplot(dm_state + dm_act, 2, j + 1) 88 | plt.plot(gps_data['u'][i, ...]) 89 | 90 | plt.show() 91 | -------------------------------------------------------------------------------- /examples/gps/learned/mf_cartpole.py: -------------------------------------------------------------------------------- 1 | import autograd.numpy as np 2 | 3 | import gym 4 | from trajopt.gps import MFGPS 5 | 6 | import matplotlib.pyplot as plt 7 | 8 | import warnings 9 | warnings.filterwarnings("ignore") 10 | 11 | np.random.seed(1337) 12 | 13 | # cartpole task 14 | env = gym.make('Cartpole-TO-v0') 15 | env._max_episode_steps = 100 16 | env.unwrapped.dt = 0.05 17 | 18 | env.seed(1337) 19 | 20 | prior = {'K': 1e-6, 'psi': 1e6, 'nu': 0.1} 21 | # prior = {'K': 1e-6} 22 | 23 | solver = MFGPS(env, nb_steps=100, 24 | init_state=env.init(), 25 | init_action_sigma=0.1, 26 | kl_bound=1e0, 27 | action_penalty=1e-3, 28 | prior=prior, 29 | activation={'mult': 1., 'shift': 80}) 30 | 31 | # run gps 32 | trace = solver.run(nb_learning_episodes=25, 33 | nb_iter=50, verbose=True) 34 | 35 | # execute and plot 36 | data = solver.rollout(25, stoch=True) 37 | 38 | plt.figure() 39 | for k in range(solver.dm_state): 40 | plt.subplot(solver.dm_state + solver.dm_act, 1, k + 1) 41 | plt.plot(data['x'][k, ...]) 42 | 43 | for k in range(solver.dm_act): 44 | plt.subplot(solver.dm_state + solver.dm_act, 1, solver.dm_state + k + 1) 45 | plt.plot(np.clip(data['u'], - env.ulim[:, None, None], env.ulim[:, None, None])[k, ...]) 46 | 47 | plt.show() 48 | 49 | # plot objective 50 | plt.figure() 51 | plt.plot(trace) 52 | plt.show() 53 | -------------------------------------------------------------------------------- /examples/gps/learned/mf_double_pendulum.py: -------------------------------------------------------------------------------- 1 | import autograd.numpy as np 2 | 3 | import gym 4 | from trajopt.gps import MFGPS 5 | 6 | import matplotlib.pyplot as plt 7 | 8 | import warnings 9 | warnings.filterwarnings("ignore") 10 | 11 | np.random.seed(1337) 12 | 13 | # pendulum task 14 | env = gym.make('DoublePendulum-TO-v0') 15 | env._max_episode_steps = 100 16 | env.unwrapped.dt = 0.05 17 | 18 | env.seed(1337) 19 | 20 | # prior = {'K': 1e-3, 'psi': 1e-8, 'nu': 0.1} 21 | prior = {'K': 1e-6} 22 | 23 | solver = MFGPS(env, nb_steps=100, 24 | init_state=env.init(), 25 | init_action_sigma=10., 26 | kl_bound=1e1, 27 | slew_rate=False, 28 | action_penalty=1, 29 | prior=prior, 30 | activation={'mult': 1., 'shift': 80}) 31 | 32 | # run gps 33 | trace = solver.run(nb_learning_episodes=25, 34 | nb_evaluation_episodes=25, 35 | nb_iter=50, verbose=True) 36 | 37 | # execute and plot 38 | data = solver.rollout(25, stoch=True) 39 | 40 | plt.figure() 41 | for k in range(solver.dm_state): 42 | plt.subplot(solver.dm_state + solver.dm_act, 1, k + 1) 43 | plt.plot(data['x'][k, ...]) 44 | 45 | for k in range(solver.dm_act): 46 | plt.subplot(solver.dm_state + solver.dm_act, 1, solver.dm_state + k + 1) 47 | plt.plot(np.clip(data['u'], - env.ulim[:, None, None], env.ulim[:, None, None])[k, ...]) 48 | 49 | plt.show() 50 | 51 | # plot objective 52 | plt.figure() 53 | plt.plot(trace) 54 | plt.show() 55 | -------------------------------------------------------------------------------- /examples/gps/learned/mf_lqr.py: -------------------------------------------------------------------------------- 1 | import autograd.numpy as np 2 | 3 | import gym 4 | from trajopt.gps import MFGPS 5 | 6 | import matplotlib.pyplot as plt 7 | 8 | import warnings 9 | warnings.filterwarnings("ignore") 10 | 11 | np.random.seed(1337) 12 | 13 | # lqr task 14 | env = gym.make('LQR-TO-v1') 15 | env._max_episode_steps = 100000 16 | 17 | env.seed(1337) 18 | 19 | prior = {'K': 1e-6, 'psi': 1e6, 'nu': 0.1} 20 | 21 | alg = MFGPS(env, nb_steps=60, 22 | init_state=env.init(), 23 | init_action_sigma=100., 24 | kl_bound=1e0, 25 | prior=prior) 26 | 27 | # run gps 28 | trace = alg.run(nb_learning_episodes=25, 29 | nb_evaluation_episodes=25, 30 | nb_iter=25, verbose=True) 31 | 32 | # plot dists 33 | alg.plot_distributions() 34 | 35 | # execute and plot 36 | data = alg.rollout(25, stoch=True) 37 | 38 | plt.figure() 39 | for k in range(alg.dm_state): 40 | plt.subplot(alg.dm_state + alg.dm_act, 1, k + 1) 41 | plt.plot(data['x'][k, ...]) 42 | 43 | for k in range(alg.dm_act): 44 | plt.subplot(alg.dm_state + alg.dm_act, 1, alg.dm_state + k + 1) 45 | plt.plot(data['u'][k, ...]) 46 | 47 | plt.show() 48 | 49 | # plot objective 50 | plt.figure() 51 | plt.plot(trace) 52 | plt.show() 53 | -------------------------------------------------------------------------------- /examples/gps/learned/mf_pendulum.py: -------------------------------------------------------------------------------- 1 | import autograd.numpy as np 2 | 3 | import gym 4 | from trajopt.gps import MFGPS 5 | 6 | import matplotlib.pyplot as plt 7 | 8 | import warnings 9 | warnings.filterwarnings("ignore") 10 | 11 | np.random.seed(1337) 12 | 13 | # pendulum task 14 | env = gym.make('Pendulum-TO-v0') 15 | env._max_episode_steps = 100 16 | env.unwrapped.dt = 0.05 17 | 18 | env.seed(1337) 19 | 20 | # prior = {'K': 1e-3, 'psi': 1e-8, 'nu': 0.1} 21 | prior = {'K': 1e-6} 22 | 23 | solver = MFGPS(env, nb_steps=100, 24 | init_state=env.init(), 25 | init_action_sigma=5., 26 | kl_bound=1e1, 27 | slew_rate=False, 28 | action_penalty=1, 29 | prior=prior, 30 | activation={'mult': 1., 'shift': 80}) 31 | 32 | # run gps 33 | trace = solver.run(nb_learning_episodes=25, 34 | nb_evaluation_episodes=25, 35 | nb_iter=25, verbose=True) 36 | 37 | # execute and plot 38 | data = solver.rollout(25, stoch=True) 39 | 40 | plt.figure() 41 | for k in range(solver.dm_state): 42 | plt.subplot(solver.dm_state + solver.dm_act, 1, k + 1) 43 | plt.plot(data['x'][k, ...]) 44 | 45 | for k in range(solver.dm_act): 46 | plt.subplot(solver.dm_state + solver.dm_act, 1, solver.dm_state + k + 1) 47 | plt.plot(np.clip(data['u'], - env.ulim[:, None, None], env.ulim[:, None, None])[k, ...]) 48 | 49 | plt.show() 50 | 51 | # plot objective 52 | plt.figure() 53 | plt.plot(trace) 54 | plt.show() 55 | -------------------------------------------------------------------------------- /examples/ilqr/mpc/cartpole.py: -------------------------------------------------------------------------------- 1 | import autograd.numpy as np 2 | 3 | import gym 4 | from trajopt.ilqr import iLQR 5 | 6 | import warnings 7 | warnings.filterwarnings("ignore") 8 | 9 | 10 | # pendulum env 11 | env = gym.make('Cartpole-TO-v1') 12 | env._max_episode_steps = 100000 13 | env.unwrapped.dt = 0.02 14 | 15 | dm_state = env.observation_space.shape[0] 16 | dm_act = env.action_space.shape[0] 17 | 18 | horizon, nb_steps = 50, 250 19 | 20 | state = np.zeros((dm_state, nb_steps + 1)) 21 | action = np.zeros((dm_act, nb_steps)) 22 | 23 | state[:, 0] = env.reset() 24 | for t in range(nb_steps): 25 | solver = iLQR(env, init_state=state[:, t], 26 | nb_steps=horizon, action_penalty=np.array([1e-5])) 27 | trace = solver.run(nb_iter=10, verbose=False) 28 | 29 | action[:, t] = solver.uref[:, 0] 30 | state[:, t + 1], _, _, _ = env.step(action[:, t]) 31 | 32 | print('Time Step:', t, 'Cost:', trace[-1]) 33 | 34 | 35 | import matplotlib.pyplot as plt 36 | 37 | plt.figure() 38 | 39 | plt.subplot(5, 1, 1) 40 | plt.plot(state[0, :], '-b') 41 | plt.subplot(5, 1, 2) 42 | plt.plot(state[1, :], '-b') 43 | 44 | plt.subplot(5, 1, 3) 45 | plt.plot(state[2, :], '-r') 46 | plt.subplot(5, 1, 4) 47 | plt.plot(state[3, :], '-r') 48 | 49 | plt.subplot(5, 1, 5) 50 | plt.plot(action[0, :], '-g') 51 | 52 | plt.show() 53 | -------------------------------------------------------------------------------- /examples/ilqr/mpc/doublecartpole.py: -------------------------------------------------------------------------------- 1 | import autograd.numpy as np 2 | 3 | import gym 4 | from trajopt.ilqr import iLQR 5 | 6 | import warnings 7 | warnings.filterwarnings("ignore") 8 | 9 | 10 | # double cartpole env 11 | env = gym.make('DoubleCartpole-TO-v0') 12 | env._max_episode_steps = 100000 13 | env.unwrapped.dt = 0.01 14 | 15 | dm_state = env.observation_space.shape[0] 16 | dm_act = env.action_space.shape[0] 17 | 18 | horizon, nb_steps = 100, 500 19 | 20 | state = np.zeros((dm_state, nb_steps + 1)) 21 | action = np.zeros((dm_act, nb_steps)) 22 | 23 | state[:, 0] = env.reset() 24 | for t in range(nb_steps): 25 | solver = iLQR(env, init_state=state[:, t], 26 | nb_steps=horizon, action_penalty=np.array([1e-5])) 27 | trace = solver.run(nb_iter=10, verbose=False) 28 | 29 | action[:, t] = solver.uref[:, 0] 30 | state[:, t + 1], _, _, _ = env.step(action[:, t]) 31 | 32 | print('Time Step:', t, 'Cost:', trace[-1]) 33 | 34 | 35 | import matplotlib.pyplot as plt 36 | 37 | plt.figure() 38 | 39 | plt.subplot(7, 1, 1) 40 | plt.plot(state[0, :], '-b') 41 | plt.subplot(7, 1, 2) 42 | plt.plot(state[1, :], '-b') 43 | plt.subplot(7, 1, 3) 44 | plt.plot(state[2, :], '-b') 45 | 46 | plt.subplot(7, 1, 4) 47 | plt.plot(state[3, :], '-r') 48 | plt.subplot(7, 1, 5) 49 | plt.plot(state[4, :], '-r') 50 | plt.subplot(7, 1, 6) 51 | plt.plot(state[5, :], '-r') 52 | 53 | plt.subplot(7, 1, 7) 54 | plt.plot(action[0, :], '-g') 55 | 56 | plt.show() 57 | -------------------------------------------------------------------------------- /examples/ilqr/mpc/lqr.py: -------------------------------------------------------------------------------- 1 | import autograd.numpy as np 2 | 3 | import gym 4 | from trajopt.ilqr import iLQR 5 | 6 | import warnings 7 | warnings.filterwarnings("ignore") 8 | 9 | 10 | # lqr task 11 | env = gym.make('LQR-TO-v0') 12 | env._max_episode_steps = 100000 13 | 14 | dm_state = env.observation_space.shape[0] 15 | dm_act = env.action_space.shape[0] 16 | 17 | horizon, nb_steps = 25, 100 18 | state = np.zeros((dm_state, nb_steps + 1)) 19 | action = np.zeros((dm_act, nb_steps)) 20 | 21 | state[:, 0] = env.reset() 22 | for t in range(nb_steps): 23 | solver = iLQR(env, init_state=state[:, t], 24 | nb_steps=horizon) 25 | trace = solver.run(nb_iter=5, verbose=False) 26 | 27 | action[:, t] = solver.uref[:, 0] 28 | state[:, t + 1], _, _, _ = env.step(action[:, t]) 29 | 30 | print('Time Step:', t, 'Cost:', trace[-1]) 31 | 32 | 33 | import matplotlib.pyplot as plt 34 | 35 | plt.figure() 36 | 37 | plt.subplot(3, 1, 1) 38 | plt.plot(state[0, :], '-b') 39 | plt.subplot(3, 1, 2) 40 | plt.plot(state[1, :], '-b') 41 | 42 | plt.subplot(3, 1, 3) 43 | plt.plot(action[0, :], '-g') 44 | 45 | plt.show() 46 | -------------------------------------------------------------------------------- /examples/ilqr/mpc/pendulum.py: -------------------------------------------------------------------------------- 1 | import autograd.numpy as np 2 | 3 | import gym 4 | from trajopt.ilqr import iLQR 5 | 6 | import warnings 7 | warnings.filterwarnings("ignore") 8 | 9 | 10 | # pendulum env 11 | env = gym.make('Pendulum-TO-v0') 12 | env._max_episode_steps = 100000 13 | env.unwrapped.dt = 0.05 14 | 15 | dm_state = env.observation_space.shape[0] 16 | dm_act = env.action_space.shape[0] 17 | 18 | horizon, nb_steps = 25, 100 19 | state = np.zeros((dm_state, nb_steps + 1)) 20 | action = np.zeros((dm_act, nb_steps)) 21 | 22 | state[:, 0] = env.reset() 23 | for t in range(nb_steps): 24 | solver = iLQR(env, init_state=state[:, t], 25 | nb_steps=horizon, action_penalty=np.array([1e-5])) 26 | trace = solver.run(nb_iter=10, verbose=False) 27 | 28 | action[:, t] = solver.uref[:, 0] 29 | state[:, t + 1], _, _, _ = env.step(action[:, t]) 30 | 31 | print('Time Step:', t, 'Cost:', trace[-1]) 32 | 33 | import matplotlib.pyplot as plt 34 | 35 | plt.figure() 36 | 37 | plt.subplot(3, 1, 1) 38 | plt.plot(state[0, :], '-b') 39 | plt.subplot(3, 1, 2) 40 | plt.plot(state[1, :], '-b') 41 | 42 | plt.subplot(3, 1, 3) 43 | plt.plot(action[0, :], '-g') 44 | 45 | plt.show() 46 | -------------------------------------------------------------------------------- /examples/ilqr/mpc/pendulum_parallel.py: -------------------------------------------------------------------------------- 1 | import autograd.numpy as np 2 | 3 | import gym 4 | from trajopt.ilqr import iLQR 5 | 6 | from joblib import Parallel, delayed 7 | 8 | import multiprocessing 9 | nb_cores = multiprocessing.cpu_count() 10 | 11 | 12 | def create_job(kwargs): 13 | import warnings 14 | warnings.filterwarnings("ignore") 15 | 16 | # pendulum env 17 | env = gym.make('Pendulum-TO-v1') 18 | env._max_episode_steps = 100000 19 | env.unwrapped.dt = 0.05 20 | 21 | dm_state = env.observation_space.shape[0] 22 | dm_act = env.action_space.shape[0] 23 | 24 | horizon, nb_steps = 25, 100 25 | state = np.zeros((dm_state, nb_steps + 1)) 26 | action = np.zeros((dm_act, nb_steps)) 27 | 28 | state[:, 0] = env.reset() 29 | for t in range(nb_steps): 30 | solver = iLQR(env, init_state=state[:, t], 31 | nb_steps=horizon, action_penalty=np.array([1e-5])) 32 | solver.run(nb_iter=10, verbose=False) 33 | 34 | action[:, t] = solver.uref[:, 0] 35 | state[:, t + 1], _, _, _ = env.step(action[:, t]) 36 | 37 | return state[:, :-1].T, action.T 38 | 39 | 40 | def parallel_ilqr(nb_jobs=50): 41 | kwargs_list = [{} for _ in range(nb_jobs)] 42 | results = Parallel(n_jobs=min(nb_jobs, nb_cores), 43 | verbose=10, backend='loky')(map(delayed(create_job), kwargs_list)) 44 | obs, act = list(map(list, zip(*results))) 45 | return obs, act 46 | 47 | 48 | obs, act = parallel_ilqr(nb_jobs=50) 49 | 50 | import matplotlib.pyplot as plt 51 | 52 | fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(12, 4)) 53 | for _obs, _act in zip(obs, act): 54 | ax[0].plot(_obs[:, 0]) 55 | ax[1].plot(_obs[:, 1]) 56 | ax[2].plot(_act[:, 0]) 57 | plt.show() 58 | 59 | import pickle 60 | data = {'obs': obs, 'act': act} 61 | pickle.dump(data, open("ilqr_pendulum_polar.pkl", "wb")) 62 | -------------------------------------------------------------------------------- /examples/rgps/mb_lqr.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import gym 4 | 5 | from trajopt.rgps import LRGPS 6 | 7 | from matplotlib import rc 8 | import matplotlib.pyplot as plt 9 | 10 | import tikzplotlib 11 | 12 | import warnings 13 | warnings.filterwarnings("ignore") 14 | 15 | rc('lines', **{'linewidth': 1}) 16 | rc('text', usetex=True) 17 | 18 | 19 | def beautify(ax): 20 | ax.set_frame_on(True) 21 | ax.minorticks_on() 22 | 23 | ax.grid(True) 24 | ax.grid(linestyle='--') 25 | 26 | ax.tick_params(which='both', direction='in', 27 | bottom=True, labelbottom=True, 28 | top=True, labeltop=False, 29 | right=True, labelright=False, 30 | left=True, labelleft=True) 31 | 32 | ax.tick_params(which='major', length=6) 33 | ax.tick_params(which='minor', length=3) 34 | 35 | # ax.autoscale(tight=True) 36 | # ax.set_aspect('equal') 37 | 38 | if ax.get_legend(): 39 | ax.legend(loc='best') 40 | 41 | return ax 42 | 43 | 44 | # lqr task 45 | env = gym.make('LQR-TO-v1') 46 | env.env._max_episode_steps = 75 47 | env.env.sigma0 = 1e-2 * np.eye(2) 48 | env.env.sigma = 1e-4 * np.eye(2) 49 | 50 | np.random.seed(1337) 51 | env.seed(1337) 52 | 53 | rgps = LRGPS(env, nb_steps=75, 54 | policy_kl_bound=0.25, 55 | param_nominal_kl_bound=75e1, 56 | param_regularizer_kl_bound=10, 57 | init_state=env.init(), 58 | init_action_sigma=100.) 59 | rgps.run(nb_iter=100, verbose=True) 60 | 61 | np.random.seed(1337) 62 | env.seed(1337) 63 | 64 | gps = LRGPS(env, nb_steps=75, 65 | policy_kl_bound=0.25, 66 | param_nominal_kl_bound=75e1, 67 | param_regularizer_kl_bound=10, 68 | init_state=env.init(), 69 | init_action_sigma=100.) 70 | gps.run(nb_iter=100, verbose=True, 71 | optimize_adversary=False) 72 | 73 | # compute attack on final standard controller 74 | gps.param, gps.eta = gps.reguarlized_parameter_optimization(gps.ctl) 75 | print("Disturbance KL:", gps.parameter_nominal_kldiv(gps.param).sum()) 76 | 77 | fig = plt.figure(figsize=(6, 12)) 78 | plt.suptitle("Standard vs Robust Ctl: Feedback Controller") 79 | for i in range(rgps.dm_state): 80 | plt.subplot(rgps.dm_state + rgps.dm_act, 1, i + 1) 81 | plt.plot(rgps.ctl.K[0, i, ...], color='r', marker='x', markersize=2) 82 | plt.plot(gps.ctl.K[0, i, ...], color='b', marker='o', markersize=2) 83 | 84 | for i in range(rgps.dm_act): 85 | plt.subplot(rgps.dm_state + rgps.dm_act, 1, rgps.dm_state + i + 1) 86 | plt.plot(rgps.ctl.kff[i, ...], color='r', marker='x', markersize=2) 87 | plt.plot(gps.ctl.kff[i, ...], color='b', marker='o', markersize=2) 88 | 89 | axs = fig.get_axes() 90 | axs = [beautify(ax) for ax in axs] 91 | plt.show() 92 | 93 | tikzplotlib.save("linear_feedback_gains.tex") 94 | 95 | std_xdist, std_udist, _ = gps.cubature_forward_pass(gps.ctl, gps.nominal) 96 | robust_xdist, robust_udist, _ = rgps.cubature_forward_pass(rgps.ctl, rgps.nominal) 97 | 98 | cost_nom_env_std_ctl = gps.cost.evaluate(std_xdist, std_udist) 99 | cost_nom_env_rbst_ctl = rgps.cost.evaluate(robust_xdist, robust_udist) 100 | 101 | print("Expected Cost of Standard and Robust Control on Nominal Env") 102 | print("Std. Ctl.: ", cost_nom_env_std_ctl, "Rbst. Ctl.", cost_nom_env_rbst_ctl) 103 | 104 | std_worst_xdist, std_worst_udist, _ = gps.cubature_forward_pass(gps.ctl, gps.param) 105 | robust_worst_xdist, robust_worst_udist, _ = rgps.cubature_forward_pass(rgps.ctl, gps.param) 106 | 107 | cost_adv_env_std_ctl = gps.cost.evaluate(std_worst_xdist, std_worst_udist) 108 | cost_adv_env_rbst_ctl = rgps.cost.evaluate(robust_worst_xdist, robust_worst_udist) 109 | 110 | print("Expected Cost of Standard and Robust Control on Adverserial Env") 111 | print("Std. Ctl.: ", cost_adv_env_std_ctl, "Rbst. Ctl.", cost_adv_env_rbst_ctl) 112 | 113 | fig = plt.figure() 114 | plt.suptitle('Standard and Robust Control Without Adversary') 115 | for k in range(rgps.dm_state): 116 | plt.subplot(rgps.dm_state + rgps.dm_act, 1, k + 1) 117 | 118 | t = np.linspace(0, rgps.nb_steps, rgps.nb_steps + 1) 119 | 120 | plt.plot(t, std_xdist.mu[k, :], '-b') 121 | lb = std_xdist.mu[k, :] - 2. * np.sqrt(std_xdist.sigma[k, k, :]) 122 | ub = std_xdist.mu[k, :] + 2. * np.sqrt(std_xdist.sigma[k, k, :]) 123 | plt.fill_between(t, lb, ub, color='blue', alpha=0.1) 124 | 125 | for k in range(rgps.dm_act): 126 | plt.subplot(rgps.dm_state + rgps.dm_act, 1, rgps.dm_state + k + 1) 127 | 128 | t = np.linspace(0, rgps.nb_steps - 1, rgps.nb_steps) 129 | 130 | plt.plot(t, std_udist.mu[k, :], '-b') 131 | lb = std_udist.mu[k, :] - 2. * np.sqrt(std_udist.sigma[k, k, :]) 132 | ub = std_udist.mu[k, :] + 2. * np.sqrt(std_udist.sigma[k, k, :]) 133 | plt.fill_between(t, lb, ub, color='blue', alpha=0.1) 134 | 135 | for k in range(gps.dm_state): 136 | plt.subplot(gps.dm_state + gps.dm_act, 1, k + 1) 137 | 138 | t = np.linspace(0, rgps.nb_steps, rgps.nb_steps + 1) 139 | 140 | plt.plot(t, robust_xdist.mu[k, :], '-r') 141 | lb = robust_xdist.mu[k, :] - 2. * np.sqrt(robust_xdist.sigma[k, k, :]) 142 | ub = robust_xdist.mu[k, :] + 2. * np.sqrt(robust_xdist.sigma[k, k, :]) 143 | plt.fill_between(t, lb, ub, color='red', alpha=0.1) 144 | 145 | for k in range(rgps.dm_act): 146 | plt.subplot(gps.dm_state + gps.dm_act, 1, gps.dm_state + k + 1) 147 | 148 | t = np.linspace(0, rgps.nb_steps - 1, rgps.nb_steps) 149 | 150 | plt.plot(t, robust_udist.mu[k, :], '-r') 151 | lb = robust_udist.mu[k, :] - 2. * np.sqrt(robust_udist.sigma[k, k, :]) 152 | ub = robust_udist.mu[k, :] + 2. * np.sqrt(robust_udist.sigma[k, k, :]) 153 | plt.fill_between(t, lb, ub, color='red', alpha=0.1) 154 | 155 | axs = fig.get_axes() 156 | axs = [beautify(ax) for ax in axs] 157 | plt.show() 158 | 159 | # tikzplotlib.save("linear_trajectories_nominal.tex") 160 | 161 | fig = plt.figure() 162 | plt.suptitle('Standard and Robust Control With Adversary') 163 | for k in range(gps.dm_state): 164 | plt.subplot(gps.dm_state + gps.dm_act, 1, k + 1) 165 | 166 | t = np.linspace(0, rgps.nb_steps, rgps.nb_steps + 1) 167 | 168 | plt.plot(t, std_worst_xdist.mu[k, :], '-g') 169 | lb = std_worst_xdist.mu[k, :] - 2. * np.sqrt(std_worst_xdist.sigma[k, k, :]) 170 | ub = std_worst_xdist.mu[k, :] + 2. * np.sqrt(std_worst_xdist.sigma[k, k, :]) 171 | plt.fill_between(t, lb, ub, color='green', alpha=0.1) 172 | 173 | for k in range(rgps.dm_act): 174 | plt.subplot(gps.dm_state + gps.dm_act, 1, gps.dm_state + k + 1) 175 | 176 | t = np.linspace(0, rgps.nb_steps - 1, rgps.nb_steps) 177 | 178 | plt.plot(t, std_worst_udist.mu[k, :], '-g') 179 | lb = std_worst_udist.mu[k, :] - 2. * np.sqrt(std_worst_udist.sigma[k, k, :]) 180 | ub = std_worst_udist.mu[k, :] + 2. * np.sqrt(std_worst_udist.sigma[k, k, :]) 181 | plt.fill_between(t, lb, ub, color='green', alpha=0.1) 182 | 183 | for k in range(gps.dm_state): 184 | plt.subplot(gps.dm_state + gps.dm_act, 1, k + 1) 185 | 186 | t = np.linspace(0, rgps.nb_steps, rgps.nb_steps + 1) 187 | 188 | plt.plot(t, robust_worst_xdist.mu[k, :], '-m') 189 | lb = robust_worst_xdist.mu[k, :] - 2. * np.sqrt(robust_worst_xdist.sigma[k, k, :]) 190 | ub = robust_worst_xdist.mu[k, :] + 2. * np.sqrt(robust_worst_xdist.sigma[k, k, :]) 191 | plt.fill_between(t, lb, ub, color='magenta', alpha=0.1) 192 | 193 | for k in range(rgps.dm_act): 194 | plt.subplot(gps.dm_state + gps.dm_act, 1, gps.dm_state + k + 1) 195 | 196 | t = np.linspace(0, rgps.nb_steps - 1, rgps.nb_steps) 197 | 198 | plt.plot(t, robust_worst_udist.mu[k, :], '-m') 199 | lb = robust_worst_udist.mu[k, :] - 2. * np.sqrt(robust_worst_udist.sigma[k, k, :]) 200 | ub = robust_worst_udist.mu[k, :] + 2. * np.sqrt(robust_worst_udist.sigma[k, k, :]) 201 | plt.fill_between(t, lb, ub, color='magenta', alpha=0.1) 202 | 203 | axs = fig.get_axes() 204 | axs = [beautify(ax) for ax in axs] 205 | plt.show() 206 | 207 | # tikzplotlib.save("linear_trajectories_adversarial.tex") 208 | 209 | from trajopt.rgps.objects import MatrixNormalParameters 210 | interp = MatrixNormalParameters(rgps.dm_state, rgps.dm_act, rgps.nb_steps) 211 | 212 | alphas = np.linspace(0., 2., 21) 213 | 214 | cost_adv_env_std_ctl = [] 215 | cost_adv_env_rbst_ctl = [] 216 | kl_distance = [] 217 | 218 | for alpha in alphas: 219 | print('Alpha:', alpha) 220 | 221 | interp.mu, interp.sigma = gps.interp_gauss_kl(gps.nominal.mu, gps.nominal.sigma, 222 | gps.param.mu, gps.param.sigma, alpha) 223 | 224 | kl_distance.append(np.sum(gps.parameter_nominal_kldiv(interp))) 225 | 226 | std_worst_xdist, std_worst_udist, _ = gps.cubature_forward_pass(gps.ctl, interp) 227 | robust_worst_xdist, robust_worst_udist, _ = gps.cubature_forward_pass(rgps.ctl, interp) 228 | 229 | cost_adv_env_std_ctl.append(gps.cost.evaluate(std_worst_xdist, std_worst_udist)) 230 | cost_adv_env_rbst_ctl.append(gps.cost.evaluate(robust_worst_xdist, robust_worst_udist)) 231 | 232 | print("Expected Cost of Standard and Robust Control on Adverserial Env") 233 | print("Std. Ctl.: ", cost_adv_env_std_ctl[-1], "Rbst. Ctl.", cost_adv_env_rbst_ctl[-1]) 234 | 235 | fig = plt.figure() 236 | plt.plot(kl_distance, cost_adv_env_std_ctl, 'b', marker='o') 237 | plt.xscale('log') 238 | plt.yscale('log') 239 | plt.plot(kl_distance, cost_adv_env_rbst_ctl, 'r', marker='*') 240 | plt.xscale('log') 241 | plt.yscale('log') 242 | 243 | axs = fig.gca() 244 | axs = beautify(axs) 245 | plt.show() 246 | 247 | # tikzplotlib.save("linear_cost_over_distance.tex") 248 | 249 | kl_over_time = rgps.parameter_nominal_kldiv(gps.param) 250 | 251 | fig = plt.figure() 252 | plt.plot(kl_over_time, 'k', marker='.') 253 | plt.yscale('log') 254 | 255 | axs = fig.gca() 256 | axs = beautify(axs) 257 | plt.show() 258 | 259 | # tikzplotlib.save("linear_kl_over_time.tex") 260 | -------------------------------------------------------------------------------- /examples/rgps/mb_robot.py: -------------------------------------------------------------------------------- 1 | import autograd.numpy as np 2 | 3 | import gym 4 | from trajopt.rgps import MBRGPS 5 | 6 | from matplotlib import rc 7 | import matplotlib.pyplot as plt 8 | 9 | import tikzplotlib 10 | 11 | import warnings 12 | warnings.filterwarnings("ignore") 13 | 14 | rc('lines', **{'linewidth': 1}) 15 | rc('text', usetex=True) 16 | 17 | 18 | def beautify(ax): 19 | ax.set_frame_on(True) 20 | ax.minorticks_on() 21 | 22 | ax.grid(True) 23 | ax.grid(linestyle=':') 24 | 25 | ax.tick_params(which='both', direction='in', 26 | bottom=True, labelbottom=True, 27 | top=True, labeltop=False, 28 | right=True, labelright=False, 29 | left=True, labelleft=True) 30 | 31 | ax.tick_params(which='major', length=6) 32 | ax.tick_params(which='minor', length=3) 33 | 34 | # ax.autoscale(tight=True) 35 | # ax.set_aspect('equal') 36 | 37 | if ax.get_legend(): 38 | ax.legend(loc='best') 39 | 40 | return ax 41 | 42 | 43 | # pendulum task 44 | env = gym.make('Robot-TO-v0') 45 | env._max_episode_steps = 100 46 | 47 | np.random.seed(1337) 48 | env.seed(1337) 49 | 50 | rgps = MBRGPS(env, nb_steps=100, 51 | init_state=env.init(), 52 | policy_kl_bound=0.25, 53 | param_nominal_kl_bound=5e2, 54 | param_regularizer_kl_bound=10, 55 | init_action_sigma=0.1, 56 | action_penalty=1e-1, 57 | nominal_variance=1e-6) 58 | rgps.run(nb_iter=900, verbose=True) 59 | 60 | np.random.seed(1337) 61 | env.seed(1337) 62 | 63 | gps = MBRGPS(env, nb_steps=100, 64 | init_state=env.init(), 65 | policy_kl_bound=.25, 66 | param_nominal_kl_bound=5e2, 67 | param_regularizer_kl_bound=10, 68 | init_action_sigma=0.1, 69 | action_penalty=1e-1, 70 | nominal_variance=1e-6) 71 | gps.run(nb_iter=50, verbose=True, 72 | optimize_adversary=False) 73 | 74 | # compute attack on final standard controller 75 | gps.param_nominal_kl_bound = np.array([1e3]) 76 | gps.param, gps.eta = gps.reguarlized_parameter_optimization(gps.ctl) 77 | print("Disturbance KL:", gps.parameter_nominal_kldiv(gps.param).sum()) 78 | 79 | fig = plt.figure(figsize=(6, 12)) 80 | plt.suptitle("Standard vs Robust Ctl: Feedback Controller") 81 | for i in range(gps.dm_state): 82 | plt.subplot(gps.dm_state + gps.dm_act, 1, i + 1) 83 | plt.plot(gps.ctl.K[0, i, ...], color='b', marker='o', markersize=2) 84 | plt.plot(rgps.ctl.K[0, i, ...], color='r', marker='x', markersize=2) 85 | 86 | for i in range(gps.dm_act): 87 | plt.subplot(gps.dm_state + gps.dm_act, 1, gps.dm_state + i + 1) 88 | plt.plot(gps.ctl.kff[i, ...], color='b', marker='o', markersize=2) 89 | plt.plot(rgps.ctl.kff[i, ...], color='r', marker='x', markersize=2) 90 | 91 | axs = fig.get_axes() 92 | axs = [beautify(ax) for ax in axs] 93 | plt.show() 94 | 95 | # tikzplotlib.save("robot_feedback_gains.tex") 96 | 97 | std_xdist, std_udist, _ = gps.cubature_forward_pass(gps.ctl, gps.nominal) 98 | robust_xdist, robust_udist, _ = rgps.cubature_forward_pass(rgps.ctl, rgps.nominal) 99 | 100 | cost_nom_env_std_ctl = gps.cost.evaluate(std_xdist, std_udist) 101 | cost_nom_env_rbst_ctl = rgps.cost.evaluate(robust_xdist, robust_udist) 102 | 103 | print("Expected Cost of Standard and Robust Control on Nominal Env") 104 | print("Std. Ctl.: ", cost_nom_env_std_ctl, "Rbst. Ctl.", cost_nom_env_rbst_ctl) 105 | 106 | std_worst_xdist, std_worst_udist, _ = gps.cubature_forward_pass(gps.ctl, gps.param) 107 | robust_worst_xdist, robust_worst_udist, _ = rgps.cubature_forward_pass(rgps.ctl, rgps.param) 108 | 109 | cost_adv_env_std_ctl = gps.cost.evaluate(std_worst_xdist, std_worst_udist) 110 | cost_adv_env_rbst_ctl = rgps.cost.evaluate(robust_worst_xdist, robust_worst_udist) 111 | 112 | print("Expected Cost of Standard and Robust Control on Adverserial Env") 113 | print("Std. Ctl.: ", cost_adv_env_std_ctl, "Rbst. Ctl.", cost_adv_env_rbst_ctl) 114 | 115 | 116 | fig = plt.figure() 117 | plt.suptitle('Standard and Robust Control Without Adversary') 118 | for k in range(rgps.dm_state): 119 | plt.subplot(rgps.dm_state + rgps.dm_act, 1, k + 1) 120 | 121 | t = np.linspace(0, rgps.nb_steps, rgps.nb_steps + 1) 122 | 123 | plt.plot(t, std_xdist.mu[k, :], '-b') 124 | lb = std_xdist.mu[k, :] - 2. * np.sqrt(std_xdist.sigma[k, k, :]) 125 | ub = std_xdist.mu[k, :] + 2. * np.sqrt(std_xdist.sigma[k, k, :]) 126 | plt.fill_between(t, lb, ub, color='blue', alpha=0.1) 127 | 128 | for k in range(rgps.dm_act): 129 | plt.subplot(rgps.dm_state + rgps.dm_act, 1, rgps.dm_state + k + 1) 130 | 131 | t = np.linspace(0, rgps.nb_steps - 1, rgps.nb_steps) 132 | 133 | plt.plot(t, std_udist.mu[k, :], '-b') 134 | lb = std_udist.mu[k, :] - 2. * np.sqrt(std_udist.sigma[k, k, :]) 135 | ub = std_udist.mu[k, :] + 2. * np.sqrt(std_udist.sigma[k, k, :]) 136 | plt.fill_between(t, lb, ub, color='blue', alpha=0.1) 137 | 138 | for k in range(gps.dm_state): 139 | plt.subplot(gps.dm_state + gps.dm_act, 1, k + 1) 140 | 141 | t = np.linspace(0, rgps.nb_steps, rgps.nb_steps + 1) 142 | 143 | plt.plot(t, robust_xdist.mu[k, :], '-r') 144 | lb = robust_xdist.mu[k, :] - 2. * np.sqrt(robust_xdist.sigma[k, k, :]) 145 | ub = robust_xdist.mu[k, :] + 2. * np.sqrt(robust_xdist.sigma[k, k, :]) 146 | plt.fill_between(t, lb, ub, color='red', alpha=0.1) 147 | 148 | for k in range(rgps.dm_act): 149 | plt.subplot(gps.dm_state + gps.dm_act, 1, gps.dm_state + k + 1) 150 | 151 | t = np.linspace(0, rgps.nb_steps - 1, rgps.nb_steps) 152 | 153 | plt.plot(t, robust_udist.mu[k, :], '-r') 154 | lb = robust_udist.mu[k, :] - 2. * np.sqrt(robust_udist.sigma[k, k, :]) 155 | ub = robust_udist.mu[k, :] + 2. * np.sqrt(robust_udist.sigma[k, k, :]) 156 | plt.fill_between(t, lb, ub, color='red', alpha=0.1) 157 | 158 | axs = fig.get_axes() 159 | axs = [beautify(ax) for ax in axs] 160 | plt.show() 161 | 162 | # tikzplotlib.save("robot_trajectories_nominal.tex") 163 | 164 | fig = plt.figure() 165 | plt.suptitle('Standard and Robust Control With Adversary') 166 | for k in range(gps.dm_state): 167 | plt.subplot(gps.dm_state + gps.dm_act, 1, k + 1) 168 | 169 | t = np.linspace(0, rgps.nb_steps, rgps.nb_steps + 1) 170 | 171 | plt.plot(t, std_worst_xdist.mu[k, :], '-g') 172 | lb = std_worst_xdist.mu[k, :] - 2. * np.sqrt(std_worst_xdist.sigma[k, k, :]) 173 | ub = std_worst_xdist.mu[k, :] + 2. * np.sqrt(std_worst_xdist.sigma[k, k, :]) 174 | plt.fill_between(t, lb, ub, color='green', alpha=0.1) 175 | 176 | for k in range(rgps.dm_act): 177 | plt.subplot(gps.dm_state + gps.dm_act, 1, gps.dm_state + k + 1) 178 | 179 | t = np.linspace(0, rgps.nb_steps - 1, rgps.nb_steps) 180 | 181 | plt.plot(t, std_worst_udist.mu[k, :], '-g') 182 | lb = std_worst_udist.mu[k, :] - 2. * np.sqrt(std_worst_udist.sigma[k, k, :]) 183 | ub = std_worst_udist.mu[k, :] + 2. * np.sqrt(std_worst_udist.sigma[k, k, :]) 184 | plt.fill_between(t, lb, ub, color='green', alpha=0.1) 185 | 186 | for k in range(gps.dm_state): 187 | plt.subplot(gps.dm_state + gps.dm_act, 1, k + 1) 188 | 189 | t = np.linspace(0, rgps.nb_steps, rgps.nb_steps + 1) 190 | 191 | plt.plot(t, robust_worst_xdist.mu[k, :], '-m') 192 | lb = robust_worst_xdist.mu[k, :] - 2. * np.sqrt(robust_worst_xdist.sigma[k, k, :]) 193 | ub = robust_worst_xdist.mu[k, :] + 2. * np.sqrt(robust_worst_xdist.sigma[k, k, :]) 194 | plt.fill_between(t, lb, ub, color='magenta', alpha=0.1) 195 | 196 | for k in range(rgps.dm_act): 197 | plt.subplot(gps.dm_state + gps.dm_act, 1, gps.dm_state + k + 1) 198 | 199 | t = np.linspace(0, rgps.nb_steps - 1, rgps.nb_steps) 200 | 201 | plt.plot(t, robust_worst_udist.mu[k, :], '-m') 202 | lb = robust_worst_udist.mu[k, :] - 2. * np.sqrt(robust_worst_udist.sigma[k, k, :]) 203 | ub = robust_worst_udist.mu[k, :] + 2. * np.sqrt(robust_worst_udist.sigma[k, k, :]) 204 | plt.fill_between(t, lb, ub, color='magenta', alpha=0.1) 205 | 206 | axs = fig.get_axes() 207 | axs = [beautify(ax) for ax in axs] 208 | plt.show() 209 | 210 | # tikzplotlib.save("robot_trajectories_adversarial.tex") 211 | 212 | from trajopt.rgps.objects import MatrixNormalParameters 213 | interp = MatrixNormalParameters(rgps.dm_state, rgps.dm_act, rgps.nb_steps) 214 | 215 | alphas = np.linspace(0., 2., 21) 216 | 217 | cost_adv_env_std_ctl = [] 218 | cost_adv_env_rbst_ctl = [] 219 | kl_distance = [] 220 | 221 | for alpha in alphas: 222 | print('Alpha:', alpha) 223 | 224 | interp.mu, interp.sigma = gps.interp_gauss_kl(gps.nominal.mu, gps.nominal.sigma, 225 | gps.param.mu, gps.param.sigma, alpha) 226 | 227 | kl_distance.append(np.sum(gps.parameter_nominal_kldiv(interp))) 228 | 229 | std_worst_xdist, std_worst_udist, _ = gps.cubature_forward_pass(gps.ctl, interp) 230 | robust_worst_xdist, robust_worst_udist, _ = gps.cubature_forward_pass(rgps.ctl, interp) 231 | 232 | cost_adv_env_std_ctl.append(gps.cost.evaluate(std_worst_xdist, std_worst_udist)) 233 | cost_adv_env_rbst_ctl.append(gps.cost.evaluate(robust_worst_xdist, robust_worst_udist)) 234 | 235 | print("Expected Cost of Standard and Robust Control on Adverserial Env") 236 | print("Std. Ctl.: ", cost_adv_env_std_ctl[-1], "Rbst. Ctl.", cost_adv_env_rbst_ctl[-1]) 237 | 238 | fig = plt.figure() 239 | plt.plot(kl_distance, cost_adv_env_std_ctl, 'b', marker='o') 240 | # plt.xscale('log') 241 | plt.yscale('log') 242 | plt.plot(kl_distance, cost_adv_env_rbst_ctl, 'r', marker='*') 243 | # plt.xscale('log') 244 | plt.yscale('log') 245 | 246 | axs = fig.gca() 247 | axs = beautify(axs) 248 | plt.show() 249 | 250 | # tikzplotlib.save("robot_cost_over_distance.tex") 251 | 252 | kl_over_time = gps.parameter_nominal_kldiv(gps.param) 253 | 254 | fig = plt.figure() 255 | plt.plot(kl_over_time, 'k', marker='.') 256 | plt.yscale('log') 257 | 258 | axs = fig.gca() 259 | axs = beautify(axs) 260 | plt.show() 261 | 262 | # tikzplotlib.save("robot_kl_over_time.tex") 263 | -------------------------------------------------------------------------------- /examples/riccati/lqr.py: -------------------------------------------------------------------------------- 1 | import autograd.numpy as np 2 | 3 | import gym 4 | from trajopt.riccati import Riccati 5 | 6 | np.random.seed(1337) 7 | 8 | # lqr task 9 | env = gym.make('LQR-TO-v0') 10 | env._max_episode_steps = 100 11 | 12 | env.seed(1337) 13 | 14 | alg = Riccati(env, nb_steps=60, 15 | init_state=env.init()) 16 | 17 | # run Riccati 18 | trace = alg.run() 19 | 20 | alg.plot() 21 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import subprocess 4 | 5 | from setuptools import setup, Extension 6 | from setuptools.command.build_ext import build_ext 7 | 8 | 9 | class CMakeExtension(Extension): 10 | def __init__(self, name, sourcedir=''): 11 | Extension.__init__(self, name, sources=[]) 12 | self.sourcedir = os.path.abspath(sourcedir) 13 | 14 | 15 | class CMakeBuild(build_ext): 16 | def run(self): 17 | try: 18 | subprocess.check_output(['cmake', '--version']) 19 | except OSError: 20 | raise RuntimeError("CMake must be installed to build the following extensions: " + 21 | ", ".join(e.name for e in self.extensions)) 22 | 23 | for ext in self.extensions: 24 | self.build_extension(ext) 25 | 26 | def build_extension(self, ext): 27 | cmake_args = ['-DPYTHON_EXECUTABLE=' + sys.executable] 28 | 29 | cfg = 'Debug' if self.debug else 'Release' 30 | build_args = ['--config', cfg] 31 | 32 | cmake_args += ['-DCMAKE_BUILD_TYPE=' + cfg] 33 | build_args += ['--', '-j4'] 34 | 35 | env = os.environ.copy() 36 | env['CXXFLAGS'] = '{} -DVERSION_INFO=\\"{}\\"'.format(env.get('CXXFLAGS', ''), 37 | self.distribution.get_version()) 38 | if not os.path.exists(os.path.join(ext.sourcedir, self.build_temp)): 39 | os.makedirs(os.path.join(ext.sourcedir, self.build_temp)) 40 | subprocess.check_call(['cmake', ext.sourcedir] + cmake_args, cwd=os.path.join(ext.sourcedir, self.build_temp), env=env) 41 | subprocess.check_call(['cmake', '--build', '.'] + build_args, cwd=os.path.join(ext.sourcedir, self.build_temp)) 42 | 43 | 44 | setup( 45 | name='trajopt', 46 | version='0.0.1', 47 | author='Hany Abdulsamad', 48 | author_email='hany@robot-learning.de', 49 | description='A toolbox for trajectory optimization', 50 | install_requires=['numpy', 'scipy', 'matplotlib', 51 | 'scikit-learn', 'autograd', 'gym'], 52 | ext_modules=[CMakeExtension('gps', './trajopt/gps/'), 53 | CMakeExtension('rgps', './trajopt/rgps/'), 54 | CMakeExtension('ilqr', './trajopt/ilqr/'), 55 | CMakeExtension('bspilqr', './trajopt/bspilqr/')], 56 | cmdclass=dict(build_ext=CMakeBuild), 57 | zip_safe=False, 58 | ) 59 | -------------------------------------------------------------------------------- /trajopt/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.registration import register 2 | 3 | register( 4 | id='LQR-TO-v0', 5 | entry_point='trajopt.envs:LQRv0', 6 | max_episode_steps=1000, 7 | ) 8 | 9 | register( 10 | id='LQR-TO-v1', 11 | entry_point='trajopt.envs:LQRv1', 12 | max_episode_steps=1000, 13 | ) 14 | 15 | register( 16 | id='LQR-TO-v2', 17 | entry_point='trajopt.envs:LQRv2', 18 | max_episode_steps=1000, 19 | ) 20 | 21 | register( 22 | id='Robot-TO-v0', 23 | entry_point='trajopt.envs:Robot', 24 | max_episode_steps=1000, 25 | ) 26 | 27 | register( 28 | id='Pendulum-TO-v0', 29 | entry_point='trajopt.envs:Pendulum', 30 | max_episode_steps=1000, 31 | ) 32 | 33 | register( 34 | id='Pendulum-TO-v1', 35 | entry_point='trajopt.envs:PendulumWithCartesianCost', 36 | max_episode_steps=1000, 37 | ) 38 | 39 | register( 40 | id='DoublePendulum-TO-v0', 41 | entry_point='trajopt.envs:DoublePendulum', 42 | max_episode_steps=1000, 43 | ) 44 | 45 | register( 46 | id='DoublePendulum-TO-v1', 47 | entry_point='trajopt.envs:DoublePendulumWithCartesianCost', 48 | max_episode_steps=1000, 49 | ) 50 | 51 | register( 52 | id='QuadPendulum-TO-v0', 53 | entry_point='trajopt.envs:QuadPendulum', 54 | max_episode_steps=1000, 55 | ) 56 | 57 | register( 58 | id='QuadPendulum-TO-v1', 59 | entry_point='trajopt.envs:QuadPendulumWithCartesianCost', 60 | max_episode_steps=1000, 61 | ) 62 | 63 | register( 64 | id='Cartpole-TO-v0', 65 | entry_point='trajopt.envs:Cartpole', 66 | max_episode_steps=1000, 67 | ) 68 | 69 | register( 70 | id='Cartpole-TO-v1', 71 | entry_point='trajopt.envs:CartpoleWithCartesianCost', 72 | max_episode_steps=1000, 73 | ) 74 | 75 | register( 76 | id='DoubleCartpole-TO-v0', 77 | entry_point='trajopt.envs:DoubleCartpole', 78 | max_episode_steps=1000, 79 | ) 80 | 81 | register( 82 | id='DoubleCartpole-TO-v1', 83 | entry_point='trajopt.envs:DoubleCartpoleWithCartesianCost', 84 | max_episode_steps=1000, 85 | ) 86 | 87 | register( 88 | id='LightDark-TO-v0', 89 | entry_point='trajopt.envs:LightDark', 90 | max_episode_steps=1000, 91 | ) 92 | 93 | register( 94 | id='Car-TO-v0', 95 | entry_point='trajopt.envs:Car', 96 | max_episode_steps=1000, 97 | ) 98 | -------------------------------------------------------------------------------- /trajopt/bspilqr/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.14) 2 | project(core) 3 | 4 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/") 5 | 6 | set(ARMADILLO_LIBRARY "$ENV{HOME}/libs/armadillo/") 7 | include_directories(${ARMADILLO_LIBRARY}/include) 8 | 9 | find_package(pybind11) 10 | pybind11_add_module(core src/util.cpp) 11 | 12 | set(OPENBLAS_LIBRARY "$ENV{HOME}/libs/OpenBLAS/") 13 | target_link_libraries(core PRIVATE ${OPENBLAS_LIBRARY}/libopenblas.a pthread gfortran) -------------------------------------------------------------------------------- /trajopt/bspilqr/__init__.py: -------------------------------------------------------------------------------- 1 | from .bspilqr import BSPiLQR 2 | -------------------------------------------------------------------------------- /trajopt/bspilqr/bspilqr.py: -------------------------------------------------------------------------------- 1 | import autograd.numpy as np 2 | 3 | from trajopt.bspilqr.objects import Gaussian 4 | from trajopt.bspilqr.objects import AnalyticalLinearBeliefDynamics, AnalyticalQuadraticCost 5 | from trajopt.bspilqr.objects import QuadraticBeliefValue 6 | from trajopt.bspilqr.objects import LinearControl 7 | 8 | from trajopt.bspilqr.core import backward_pass 9 | 10 | 11 | class BSPiLQR: 12 | 13 | def __init__(self, env, nb_steps, init_belief, 14 | alphas=np.power(10., np.linspace(0, -3, 11)), 15 | lmbda=1., dlmbda=1., min_lmbda=1e-6, 16 | max_lmbda=1e6, mult_lmbda=1.6, tolfun=1e-8, 17 | tolgrad=1e-6, min_imp=0., reg=1): 18 | 19 | self.env = env 20 | 21 | # expose necessary functions 22 | self.env_dyn = self.env.unwrapped.dynamics 23 | self.env_obs = self.env.unwrapped.observe 24 | self.env_cost = self.env.unwrapped.cost 25 | self.env_init = init_belief 26 | 27 | self.env_dyn_noise = self.env.unwrapped.dyn_noise 28 | self.env_obs_noise = self.env.unwrapped.obs_noise 29 | 30 | self.ulim = self.env.action_space.high 31 | 32 | self.belief_dim = self.env.unwrapped.state_space.shape[0] 33 | self.obs_dim = self.env.unwrapped.observation_space.shape[0] 34 | self.act_dim = self.env.action_space.shape[0] 35 | self.nb_steps = nb_steps 36 | 37 | # backtracking 38 | self.alphas = alphas 39 | self.alpha = None 40 | 41 | self.lmbda = lmbda 42 | self.dlmbda = dlmbda 43 | self.min_lmbda = min_lmbda 44 | self.max_lmbda = max_lmbda 45 | self.mult_lmbda = mult_lmbda 46 | 47 | # regularization type 48 | self.reg = reg 49 | 50 | # minimum relative improvement 51 | self.min_imp = min_imp 52 | 53 | # stopping criterion 54 | self.tolfun = tolfun 55 | self.tolgrad = tolgrad 56 | 57 | # reference belief trajectory 58 | self.bref = Gaussian(self.belief_dim, self.nb_steps + 1) 59 | self.bref.mu[..., 0], self.bref.sigma[..., 0] = self.env_init 60 | 61 | self.uref = np.zeros((self.act_dim, self.nb_steps)) 62 | 63 | self.vfunc = QuadraticBeliefValue(self.belief_dim, self.nb_steps + 1) 64 | 65 | self.dyn = AnalyticalLinearBeliefDynamics(self.env_dyn, self.env_obs, 66 | self.env_dyn_noise, self.env_obs_noise, 67 | self.belief_dim, self.obs_dim, self.act_dim, self.nb_steps) 68 | 69 | self.ctl = LinearControl(self.belief_dim, self.act_dim, self.nb_steps) 70 | self.ctl.kff = 1e-8 * np.random.randn(self.act_dim, self.nb_steps) 71 | 72 | self.cost = AnalyticalQuadraticCost(self.env_cost, self.belief_dim, self.act_dim, self.nb_steps + 1) 73 | 74 | self.last_return = - np.inf 75 | 76 | def forward_pass(self, ctl, alpha): 77 | belief = Gaussian(self.belief_dim, self.nb_steps + 1) 78 | action = np.zeros((self.act_dim, self.nb_steps)) 79 | cost = np.zeros((self.nb_steps + 1, )) 80 | 81 | belief.mu[..., 0], belief.sigma[..., 0] = self.env_init 82 | for t in range(self.nb_steps): 83 | action[..., t] = ctl.action(belief, alpha, self.bref, self.uref, t) 84 | cost[..., t] = self.env_cost(belief.mu[..., t], belief.sigma[..., t], action[..., t]) 85 | belief.mu[..., t + 1], belief.sigma[..., t + 1] = self.dyn.forward(belief, action, t) 86 | 87 | cost[..., -1] = self.env_cost(belief.mu[..., -1], belief.sigma[..., -1], np.zeros((self.act_dim, ))) 88 | return belief, action, cost 89 | 90 | def backward_pass(self): 91 | lc = LinearControl(self.belief_dim, self.act_dim, self.nb_steps) 92 | bvalue = QuadraticBeliefValue(self.belief_dim, self.nb_steps + 1) 93 | 94 | bvalue.S, bvalue.s, bvalue.tau,\ 95 | dS, lc.K, lc.kff, diverge = backward_pass(self.cost.Q, self.cost.q, 96 | self.cost.R, self.cost.r, 97 | self.cost.P, self.cost.p, 98 | self.dyn.F, self.dyn.G, 99 | self.dyn.T, self.dyn.U, 100 | self.dyn.V, self.dyn.X, 101 | self.dyn.Y, self.dyn.Z, 102 | self.lmbda, self.reg, 103 | self.belief_dim, self.act_dim, self.nb_steps) 104 | return lc, bvalue, dS, diverge 105 | 106 | def plot(self): 107 | import matplotlib.pyplot as plt 108 | 109 | plt.figure() 110 | 111 | t = np.linspace(0, self.nb_steps, self.nb_steps + 1) 112 | for k in range(self.belief_dim): 113 | plt.subplot(self.belief_dim + self.act_dim, 1, k + 1) 114 | plt.plot(t, self.bref.mu[k, :], '-b') 115 | lb = self.bref.mu[k, :] - 2. * np.sqrt(self.bref.sigma[k, k, :]) 116 | ub = self.bref.mu[k, :] + 2. * np.sqrt(self.bref.sigma[k, k, :]) 117 | plt.fill_between(t, lb, ub, color='blue', alpha=0.1) 118 | 119 | t = np.linspace(0, self.nb_steps, self.nb_steps) 120 | for k in range(self.act_dim): 121 | plt.subplot(self.belief_dim + self.act_dim, 1, self.belief_dim + k + 1) 122 | plt.plot(t, self.uref[k, :], '-g') 123 | 124 | plt.show() 125 | 126 | def run(self, nb_iter=25, verbose=False): 127 | _trace = [] 128 | # init trajectory 129 | for alpha in self.alphas: 130 | _belief, _action, _cost = self.forward_pass(self.ctl, alpha) 131 | if np.all(_belief.mu < 1e8): 132 | self.bref = _belief 133 | self.uref = _action 134 | self.last_return = np.sum(_cost) 135 | break 136 | else: 137 | print("Initial trajectory diverges") 138 | 139 | _trace.append(self.last_return) 140 | 141 | for iter in range(nb_iter): 142 | # get linear system dynamics around ref traj. 143 | self.dyn.taylor_expansion(self.bref, self.uref) 144 | 145 | # get quadratic cost around ref traj. 146 | self.cost.taylor_expansion(self.bref, self.uref) 147 | 148 | bvalue = None 149 | lc, dvalue = None, None 150 | # execute a backward pass 151 | backpass_done = False 152 | while not backpass_done: 153 | lc, bvalue, dvalue, diverge = self.backward_pass() 154 | if np.any(diverge): 155 | # increase lmbda 156 | self.dlmbda = np.maximum(self.dlmbda * self.mult_lmbda, self.mult_lmbda) 157 | self.lmbda = np.maximum(self.lmbda * self.dlmbda, self.min_lmbda) 158 | if self.lmbda > self.max_lmbda: 159 | break 160 | else: 161 | continue 162 | else: 163 | backpass_done = True 164 | 165 | # terminate if gradient too small 166 | _g_norm = np.mean(np.max(np.abs(lc.kff) / (np.abs(self.uref) + 1.), axis=1)) 167 | if _g_norm < self.tolgrad and self.lmbda < 1e-5: 168 | self.dlmbda = np.minimum(self.dlmbda / self.mult_lmbda, 1. / self.mult_lmbda) 169 | self.lmbda = self.lmbda * self.dlmbda * (self.lmbda > self.min_lmbda) 170 | break 171 | 172 | _belief, _action = None, None 173 | _return, _dreturn = None, None 174 | # execute a forward pass 175 | fwdpass_done = False 176 | if backpass_done: 177 | for alpha in self.alphas: 178 | # apply on actual system 179 | _belief, _action, _cost = self.forward_pass(ctl=lc, alpha=alpha) 180 | 181 | # summed mean return 182 | _return = np.sum(_cost) 183 | 184 | # check return improvement 185 | _dreturn = self.last_return - _return 186 | _expected = - 1. * alpha * (dvalue[0] + alpha * dvalue[1]) 187 | _imp = _dreturn / _expected 188 | if _imp > self.min_imp: 189 | fwdpass_done = True 190 | break 191 | 192 | # accept or reject 193 | if fwdpass_done: 194 | # decrease lmbda 195 | self.dlmbda = np.minimum(self.dlmbda / self.mult_lmbda, 1. / self.mult_lmbda) 196 | self.lmbda = self.lmbda * self.dlmbda * (self.lmbda > self.min_lmbda) 197 | 198 | self.bref = _belief 199 | self.uref = _action 200 | self.last_return = _return 201 | 202 | self.vfunc = bvalue 203 | 204 | self.ctl = lc 205 | 206 | _trace.append(self.last_return) 207 | 208 | # terminate if reached objective tolerance 209 | if _dreturn < self.tolfun: 210 | break 211 | else: 212 | # increase lmbda 213 | self.dlmbda = np.maximum(self.dlmbda * self.mult_lmbda, self.mult_lmbda) 214 | self.lmbda = np.maximum(self.lmbda * self.dlmbda, self.min_lmbda) 215 | if self.lmbda > self.max_lmbda: 216 | break 217 | else: 218 | continue 219 | 220 | if verbose: 221 | print("iter: ", iter, 222 | " return: ", _return) 223 | 224 | return _trace 225 | -------------------------------------------------------------------------------- /trajopt/bspilqr/objects.py: -------------------------------------------------------------------------------- 1 | import autograd.numpy as np 2 | from autograd import jacobian, hessian 3 | from autograd.misc import flatten 4 | 5 | 6 | class Gaussian: 7 | def __init__(self, nb_dim, nb_steps): 8 | self.nb_dim = nb_dim 9 | self.nb_steps = nb_steps 10 | 11 | self.mu = np.zeros((self.nb_dim, self.nb_steps)) 12 | self.sigma = np.zeros((self.nb_dim, self.nb_dim, self.nb_steps)) 13 | for t in range(self.nb_steps): 14 | self.sigma[..., t] = np.eye(self.nb_dim) 15 | 16 | @property 17 | def params(self): 18 | return self.mu, self.sigma 19 | 20 | @params.setter 21 | def params(self, values): 22 | self.mu, self.sigma = values 23 | 24 | class EKF: 25 | 26 | def __init__(self, env): 27 | 28 | self.f = env.unwrapped.dynamics 29 | self.h = env.unwrapped.observe 30 | 31 | self.dfdx = jacobian(self.f, 0) 32 | self.dhdx = jacobian(self.h, 0) 33 | 34 | self.dyn_noise = env.unwrapped.dyn_noise 35 | self.obs_noise = env.unwrapped.obs_noise 36 | 37 | self.belief_dim = env.belief_dim 38 | self.obs_dim = env.obs_dim 39 | self.act_dim = env.act_dim 40 | 41 | def evalf(self, mu_b, u): 42 | return self.f(mu_b, u) 43 | 44 | def evalh(self, mu_b): 45 | return self.h(mu_b) 46 | 47 | def predict(self, mu_b, sigma_b, u): 48 | A = self.dfdx(mu_b, u) 49 | sigma_dyn = self.dyn_noise(mu_b, u) 50 | 51 | sigma_b_pr = A @ sigma_b @ A.T + sigma_dyn 52 | sigma_b_pr = 0.5 * (sigma_b_pr + sigma_b_pr.T) 53 | 54 | mu_b_pr = self.evalf(mu_b, u) 55 | 56 | return mu_b_pr, sigma_b_pr 57 | 58 | def innovate(self, mu_b, sigma_b, z): 59 | H = self.dhdx(mu_b) 60 | sigma_obs = self.obs_noise(mu_b) 61 | 62 | K = sigma_b @ H.T @ np.linalg.inv(H @ sigma_b @ H.T + sigma_obs) 63 | 64 | mu_b_in = mu_b + K @ (z - self.evalh(mu_b)) 65 | sigma_b_in = sigma_b - K @ H @ sigma_b 66 | sigma_b_in = 0.5 * (sigma_b_in + sigma_b_in.T) 67 | 68 | return mu_b_in, sigma_b_in 69 | 70 | def inference(self, mu_b, sigma_b, u, z): 71 | mu_b, sigma_b = self.predict(mu_b, sigma_b, u) 72 | mu_b, sigma_b = self.innovate(mu_b, sigma_b, z) 73 | return mu_b, sigma_b 74 | 75 | 76 | class QuadraticBeliefValue: 77 | def __init__(self, belief_dim, nb_steps): 78 | self.belief_dim = belief_dim 79 | self.nb_steps = nb_steps 80 | 81 | self.S = np.zeros((self.belief_dim, self.belief_dim, self.nb_steps)) 82 | self.s = np.zeros((self.belief_dim, self.nb_steps, )) 83 | self.tau = np.zeros((self.belief_dim, self.nb_steps, )) 84 | 85 | 86 | class QuadraticCost: 87 | def __init__(self, belief_dim, act_dim, nb_steps): 88 | self.belief_dim = belief_dim 89 | self.act_dim = act_dim 90 | 91 | self.nb_steps = nb_steps 92 | 93 | self.Q = np.zeros((self.belief_dim, self.belief_dim, self.nb_steps)) 94 | self.q = np.zeros((self.belief_dim, self.nb_steps)) 95 | 96 | self.R = np.zeros((self.act_dim, self.act_dim, self.nb_steps)) 97 | self.r = np.zeros((self.act_dim, self.nb_steps)) 98 | 99 | self.P = np.zeros((self.belief_dim, self.act_dim, self.nb_steps)) 100 | self.p = np.zeros((self.belief_dim * self.belief_dim, self.nb_steps)) 101 | 102 | @property 103 | def params(self): 104 | return self.Q, self.q, self.R, self.r, self.P, self.p 105 | 106 | @params.setter 107 | def params(self, values): 108 | self.Q, self.q, self.R, self.r, self.P, self.p = values 109 | 110 | 111 | class AnalyticalQuadraticCost(QuadraticCost): 112 | def __init__(self, f_cost, belief_dim, act_dim, nb_steps): 113 | super(AnalyticalQuadraticCost, self).__init__(belief_dim, act_dim, nb_steps) 114 | 115 | self.f = f_cost 116 | 117 | self.fQ = hessian(self.f, 0) 118 | self.fq = jacobian(self.f, 0) 119 | 120 | self.fR = hessian(self.f, 2) 121 | self.fr = jacobian(self.f, 2) 122 | 123 | self.fP = jacobian(jacobian(self.f, 0), 2) 124 | self.fp = jacobian(self.f, 1) 125 | 126 | def evalf(self, mu_b, sigma_b, u): 127 | return self.f(mu_b, sigma_b, u) 128 | 129 | def taylor_expansion(self, b, u): 130 | # padd last time step of action traj. 131 | _u = np.hstack((u, np.zeros((self.act_dim, 1)))) 132 | 133 | for t in range(self.nb_steps): 134 | _in = tuple([b.mu[..., t], b.sigma[..., t], _u[..., t]]) 135 | 136 | self.Q[..., t] = self.fQ(*_in) 137 | self.q[..., t] = self.fq(*_in) 138 | 139 | self.R[..., t] = self.fR(*_in) 140 | self.r[..., t] = self.fr(*_in) 141 | 142 | self.P[..., t] = self.fP(*_in) 143 | self.p[..., t] = np.reshape(self.fp(*_in), 144 | (self.belief_dim * self.belief_dim), order='F') 145 | 146 | 147 | class LinearBeliefDynamics: 148 | def __init__(self, belief_dim, obs_dim, act_dim, nb_steps): 149 | self.belief_dim = belief_dim 150 | self.obs_dim = obs_dim 151 | self.act_dim = act_dim 152 | 153 | self.nb_steps = nb_steps 154 | 155 | # Linearization of dynamics 156 | self.A = np.zeros((self.belief_dim, self.belief_dim, self.nb_steps)) 157 | self.H = np.zeros((self.obs_dim, self.obs_dim, self.nb_steps)) 158 | 159 | # EKF matrices 160 | self.K = np.zeros((self.belief_dim, self.obs_dim, self.nb_steps)) 161 | self.D = np.zeros((self.belief_dim, self.obs_dim, self.nb_steps)) 162 | 163 | # Linearization of belief dynamics 164 | self.F = np.zeros((self.belief_dim, self.belief_dim, self.nb_steps)) 165 | self.G = np.zeros((self.belief_dim, self.act_dim, self.nb_steps)) 166 | 167 | self.T = np.zeros((self.belief_dim * self.belief_dim, self.belief_dim, self.nb_steps)) 168 | self.U = np.zeros((self.belief_dim * self.belief_dim, self.belief_dim * self.belief_dim, self.nb_steps)) 169 | self.V = np.zeros((self.belief_dim * self.belief_dim, self.act_dim, self.nb_steps)) 170 | 171 | self.X = np.zeros((self.belief_dim * self.belief_dim, self.belief_dim, self.nb_steps)) 172 | self.Y = np.zeros((self.belief_dim * self.belief_dim, self.belief_dim * self.belief_dim, self.nb_steps)) 173 | self.Z = np.zeros((self.belief_dim * self.belief_dim, self.act_dim, self.nb_steps)) 174 | 175 | self.sigma_x = np.zeros((self.belief_dim, self.belief_dim, self.nb_steps)) 176 | self.sigma_z = np.zeros((self.obs_dim, self.obs_dim, self.nb_steps)) 177 | 178 | @property 179 | def params(self): 180 | return self.A, self.H, self.K, self.D,\ 181 | self.F, self.G, self.T, self.U,\ 182 | self.V, self.X, self.Y, self.Z, self.y 183 | 184 | @params.setter 185 | def params(self, values): 186 | self.A, self.H, self.K, self.D,\ 187 | self.F, self.G, self.T, self.U,\ 188 | self.V, self.X, self.Y, self.Z, self.y = values 189 | 190 | 191 | class AnalyticalLinearBeliefDynamics(LinearBeliefDynamics): 192 | def __init__(self, f_dyn, f_obs, 193 | dyn_noise, obs_noise, 194 | belief_dim, obs_dim, act_dim, nb_steps): 195 | super(AnalyticalLinearBeliefDynamics, self).__init__(belief_dim, obs_dim, act_dim, nb_steps) 196 | 197 | self.f = f_dyn 198 | self.h = f_obs 199 | 200 | self.dyn_noise = dyn_noise 201 | self.obs_noise = obs_noise 202 | 203 | self.dfdx = jacobian(self.f, 0) 204 | self.dhdx = jacobian(self.h, 0) 205 | 206 | def evalf(self, mu_b, u): 207 | return self.f(mu_b, u) 208 | 209 | def evalh(self, mu_b): 210 | return self.h(mu_b) 211 | 212 | # belief state dynamics 213 | def dynamics(self, mu_b, sigma_b, u): 214 | A = self.dfdx(mu_b, u) 215 | H = self.dhdx(self.evalf(mu_b, u)) 216 | 217 | sigma_dyn = self.dyn_noise(mu_b, u) 218 | sigma_obs = self.obs_noise(self.evalf(mu_b, u)) 219 | 220 | D = A @ sigma_b @ A.T + sigma_dyn 221 | D = 0.5 * (D + D.T) 222 | 223 | K = D @ H.T @ np.linalg.inv(H @ D @ H.T + sigma_obs) 224 | 225 | # stochastic mean dynamics 226 | f = self.evalf(mu_b, u) 227 | W = K @ H @ D 228 | 229 | # covariance dynamics 230 | phi = D - K @ H @ D 231 | phi = 0.5 * (phi + phi.T) 232 | 233 | return f, W, phi 234 | 235 | def taylor_expansion(self, b, u): 236 | for t in range(self.nb_steps): 237 | _in = tuple([b.mu[..., t], b.sigma[..., t], u[..., t]]) 238 | 239 | _in_flat, _unflatten = flatten(_in) 240 | 241 | def _dyn_flat(_in_flat): 242 | return flatten(self.dynamics(*_unflatten(_in_flat)))[0] 243 | 244 | _dyn_jac = jacobian(_dyn_flat) 245 | 246 | _grads = _dyn_jac(_in_flat) 247 | self.F[..., t] = _grads[:self.belief_dim, :self.belief_dim] 248 | self.G[..., t] = _grads[:self.belief_dim, -self.act_dim:] 249 | 250 | self.X[..., t] = _grads[self.belief_dim:self.belief_dim + self.belief_dim * self.belief_dim, :self.belief_dim] 251 | self.Y[..., t] = _grads[self.belief_dim:self.belief_dim + self.belief_dim * self.belief_dim, self.belief_dim:self.belief_dim + self.belief_dim * self.belief_dim] 252 | self.Z[..., t] = _grads[self.belief_dim:self.belief_dim + self.belief_dim * self.belief_dim, -self.act_dim:] 253 | 254 | self.T[..., t] = _grads[self.belief_dim + self.belief_dim * self.belief_dim:, :self.belief_dim] 255 | self.U[..., t] = _grads[self.belief_dim + self.belief_dim * self.belief_dim:, self.belief_dim:self.belief_dim + self.belief_dim * self.belief_dim] 256 | self.V[..., t] = _grads[self.belief_dim + self.belief_dim * self.belief_dim:, -self.act_dim:] 257 | 258 | def forward(self, b, u, t): 259 | mu_bn, _, sigma_bn = self.dynamics(b.mu[..., t], b.sigma[..., t], u[..., t]) 260 | return mu_bn, sigma_bn 261 | 262 | 263 | class LinearControl: 264 | def __init__(self, belief_dim, act_dim, nb_steps): 265 | self.belief_dim = belief_dim 266 | self.act_dim = act_dim 267 | self.nb_steps = nb_steps 268 | 269 | self.K = np.zeros((self.act_dim, self.belief_dim, self.nb_steps)) 270 | self.kff = np.zeros((self.act_dim, self.nb_steps)) 271 | 272 | @property 273 | def params(self): 274 | return self.K, self.kff 275 | 276 | @params.setter 277 | def params(self, values): 278 | self.K, self.kff = values 279 | 280 | def action(self, b, alpha, bref, uref, t): 281 | db = b.mu[:, t] - bref.mu[:, t] 282 | return uref[:, t] + alpha * self.kff[..., t] + self.K[..., t] @ db 283 | -------------------------------------------------------------------------------- /trajopt/bspilqr/src/util.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | namespace py = pybind11; 6 | 7 | using namespace arma; 8 | 9 | 10 | typedef py::array_t array_tf; 11 | typedef py::array_t array_tc; 12 | 13 | 14 | cube array_to_cube(array_tf m) { 15 | 16 | py::buffer_info _m_buff = m.request(); 17 | int n_rows = _m_buff.shape[0]; 18 | int n_cols = _m_buff.shape[1]; 19 | int n_slices = _m_buff.shape[2]; 20 | 21 | cube _m_arma((double *)_m_buff.ptr, n_rows, n_cols, n_slices); 22 | 23 | return _m_arma; 24 | } 25 | 26 | 27 | mat array_to_mat(array_tf m) { 28 | 29 | py::buffer_info _m_buff = m.request(); 30 | int n_rows = _m_buff.shape[0]; 31 | int n_cols = _m_buff.shape[1]; 32 | 33 | mat _m_arma((double *)_m_buff.ptr, n_rows, n_cols); 34 | 35 | return _m_arma; 36 | } 37 | 38 | 39 | vec array_to_vec(array_tf m) { 40 | 41 | py::buffer_info _m_buff = m.request(); 42 | int n_rows = _m_buff.shape[0]; 43 | 44 | vec _m_vec((double *)_m_buff.ptr, n_rows); 45 | 46 | return _m_vec; 47 | } 48 | 49 | 50 | array_tf cube_to_array(cube m) { 51 | 52 | auto _m_array = array_tf({m.n_rows, m.n_cols, m.n_slices}); 53 | 54 | py::buffer_info _m_buff = _m_array.request(); 55 | std::memcpy(_m_buff.ptr, m.memptr(), sizeof(double) * m.n_rows * m.n_cols * m.n_slices); 56 | 57 | return _m_array; 58 | } 59 | 60 | 61 | array_tf mat_to_array(mat m) { 62 | 63 | auto _m_array = array_tf({m.n_rows, m.n_cols}); 64 | 65 | py::buffer_info _m_buff = _m_array.request(); 66 | std::memcpy(_m_buff.ptr, m.memptr(), sizeof(double) * m.n_rows * m.n_cols); 67 | 68 | return _m_array; 69 | } 70 | 71 | 72 | array_tf vec_to_array(vec m) { 73 | 74 | auto _m_array = array_tf({m.n_rows}); 75 | 76 | py::buffer_info _m_buff = _m_array.request(); 77 | std::memcpy(_m_buff.ptr, m.memptr(), sizeof(double) * m.n_rows); 78 | 79 | return _m_array; 80 | } 81 | 82 | 83 | py::tuple backward_pass(array_tf _Q, array_tf _q, 84 | array_tf _R, array_tf _r, 85 | array_tf _P, array_tf _p, 86 | array_tf _F, array_tf _G, 87 | array_tf _T, array_tf _U, 88 | array_tf _V, array_tf _X, 89 | array_tf _Y, array_tf _Z, 90 | double lmbda, int reg, 91 | int dm_belief, int dm_act, int nb_steps) { 92 | 93 | // inputs 94 | cube Q = array_to_cube(_Q); 95 | mat q = array_to_mat(_q); 96 | 97 | cube R = array_to_cube(_R); 98 | mat r = array_to_mat(_r); 99 | 100 | cube P = array_to_cube(_P); 101 | mat p = array_to_mat(_p); 102 | 103 | cube F = array_to_cube(_F); 104 | cube G = array_to_cube(_G); 105 | 106 | cube T = array_to_cube(_T); 107 | cube U = array_to_cube(_U); 108 | cube V = array_to_cube(_V); 109 | 110 | cube X = array_to_cube(_X); 111 | cube Y = array_to_cube(_Y); 112 | cube Z = array_to_cube(_Z); 113 | 114 | // outputs 115 | cube C(dm_belief, dm_belief, nb_steps); 116 | mat c(dm_belief, nb_steps); 117 | 118 | cube D(dm_act, dm_act, nb_steps); 119 | mat d(dm_act, nb_steps); 120 | 121 | cube E(dm_act, dm_belief, nb_steps); 122 | mat e(dm_belief * dm_belief, nb_steps); 123 | 124 | cube Ereg(dm_act, dm_belief, nb_steps); 125 | cube Dreg(dm_act, dm_act, nb_steps); 126 | cube Dinv(dm_act, dm_act, nb_steps); 127 | 128 | cube S(dm_belief, dm_belief, nb_steps + 1); 129 | mat s(dm_belief, nb_steps + 1); 130 | mat tau(dm_belief * dm_belief, nb_steps + 1); 131 | 132 | vec dS(2); 133 | 134 | cube Sreg(dm_belief, dm_belief, nb_steps + 1); 135 | 136 | cube K(dm_act, dm_belief, nb_steps); 137 | mat kff(dm_act, nb_steps); 138 | 139 | int _diverge = 0; 140 | 141 | // init last time step 142 | S.slice(nb_steps) = Q.slice(nb_steps); 143 | s.col(nb_steps) = q.col(nb_steps); 144 | tau.col(nb_steps) = p.col(nb_steps); 145 | 146 | for(int i = nb_steps - 1; i>= 0; --i) 147 | { 148 | C.slice(i) = Q.slice(i) + F.slice(i).t() * S.slice(i+1) * F.slice(i); 149 | D.slice(i) = R.slice(i) + G.slice(i).t() * S.slice(i+1) * G.slice(i); 150 | E.slice(i) = (P.slice(i) + F.slice(i).t() * S.slice(i+1) * G.slice(i)).t(); 151 | 152 | c.col(i) = q.col(i) + F.slice(i).t() * s.col(i+1) + T.slice(i).t() * tau.col(i+1) 153 | + 0.5 * X.slice(i).t() * vectorise(S.slice(i+1)); 154 | 155 | d.col(i) = r.col(i) + G.slice(i).t() * s.col(i+1) + V.slice(i).t() * tau.col(i+1) 156 | + 0.5 * Z.slice(i).t() * vectorise(S.slice(i+1)); 157 | 158 | e.col(i) = p.col(i) + U.slice(i).t() * tau.col(i) + 0.5 * Y.slice(i).t() * vectorise(S.slice(i+1)); 159 | 160 | Sreg.slice(i+1) = S.slice(i+1); 161 | if (reg==2) 162 | Sreg.slice(i+1) += lmbda * eye(dm_belief, dm_belief); 163 | 164 | Ereg.slice(i) = (P.slice(i) + F.slice(i).t() * Sreg.slice(i+1) * G.slice(i)).t(); 165 | 166 | Dreg.slice(i) = R.slice(i) + G.slice(i).t() * Sreg.slice(i+1) * G.slice(i); 167 | if (reg==1) 168 | Dreg.slice(i) += lmbda * eye(dm_act, dm_act); 169 | 170 | if (!(Dreg.slice(i)).is_sympd()) { 171 | _diverge = i; 172 | break; 173 | } 174 | 175 | Dinv.slice(i) = inv(Dreg.slice(i)); 176 | K.slice(i) = - Dinv.slice(i) * Ereg.slice(i); 177 | kff.col(i) = - Dinv.slice(i) * d.col(i); 178 | 179 | dS += join_vert(kff.col(i).t() * d.col(i), 0.5 * kff.col(i).t() * D.slice(i) * kff.col(i)); 180 | 181 | tau.col(i) = e.col(i); 182 | 183 | s.col(i) = c.col(i) + K.slice(i).t() * D.slice(i) * kff.col(i) + 184 | K.slice(i).t() * d.col(i) + E.slice(i).t() * kff.col(i); 185 | 186 | S.slice(i) = C.slice(i) + K.slice(i).t() * D.slice(i) * K.slice(i) + 187 | K.slice(i).t() * E.slice(i) + E.slice(i).t() * K.slice(i); 188 | S.slice(i) = 0.5 * (S.slice(i) + S.slice(i).t()); 189 | } 190 | 191 | // transform outputs to numpy 192 | array_tf _S = cube_to_array(S); 193 | array_tf _s = mat_to_array(s); 194 | array_tf _tau = mat_to_array(tau); 195 | 196 | array_tf _dS = vec_to_array(dS); 197 | 198 | array_tf _K = cube_to_array(K); 199 | array_tf _kff = mat_to_array(kff); 200 | 201 | py::tuple output = py::make_tuple(_S, _s, _tau, 202 | _dS, _K, _kff, _diverge); 203 | return output; 204 | } 205 | 206 | 207 | PYBIND11_MODULE(core, m) 208 | { 209 | m.def("backward_pass", &backward_pass); 210 | } 211 | -------------------------------------------------------------------------------- /trajopt/elqr/__init__.py: -------------------------------------------------------------------------------- 1 | from .elqr import eLQR 2 | -------------------------------------------------------------------------------- /trajopt/elqr/elqr.py: -------------------------------------------------------------------------------- 1 | import autograd.numpy as np 2 | 3 | from trajopt.elqr.objects import AnalyticalLinearDynamics, AnalyticalQuadraticCost 4 | from trajopt.elqr.objects import QuadraticStateValue 5 | from trajopt.elqr.objects import LinearControl 6 | 7 | 8 | class eLQR: 9 | 10 | def __init__(self, env, nb_steps, 11 | init_state): 12 | 13 | self.env = env 14 | 15 | # expose necessary functions 16 | self.env_dyn = self.env.unwrapped.dynamics 17 | self.env_inv_dyn = self.env.unwrapped.inverse_dynamics 18 | self.env_cost = self.env.unwrapped.cost 19 | self.env_goal = self.env.unwrapped.g 20 | self.env_init = init_state 21 | 22 | self.ulim = self.env.action_space.high 23 | 24 | self.dm_state = self.env.observation_space.shape[0] 25 | self.dm_act = self.env.action_space.shape[0] 26 | self.nb_steps = nb_steps 27 | 28 | # reference trajectory 29 | self.xref = np.zeros((self.dm_state, self.nb_steps + 1)) 30 | self.xref[..., 0] = self.env_init 31 | 32 | self.uref = np.zeros((self.dm_act, self.nb_steps)) 33 | 34 | self.gocost = QuadraticStateValue(self.dm_state, self.nb_steps + 1) 35 | self.comecost = QuadraticStateValue(self.dm_state, self.nb_steps + 1) 36 | 37 | self.gocost.V[..., 0] += np.eye(self.dm_state) * 1e-16 38 | self.comecost.V[..., 0] += np.eye(self.dm_state) * 1e-16 39 | 40 | self.dyn = AnalyticalLinearDynamics(self.env_dyn, self.dm_state, self.dm_act, self.nb_steps) 41 | self.idyn = AnalyticalLinearDynamics(self.env_inv_dyn, self.dm_state, self.dm_act, self.nb_steps) 42 | 43 | self.ctl = LinearControl(self.dm_state, self.dm_act, self.nb_steps) 44 | self.ctl.kff = np.random.randn(self.dm_act, self.nb_steps) 45 | 46 | self.ictl = LinearControl(self.dm_state, self.dm_act, self.nb_steps) 47 | self.ictl.kff = 1e-2 * np.random.randn(self.dm_act, self.nb_steps) 48 | 49 | self.cost = AnalyticalQuadraticCost(self.env_cost, self.dm_state, self.dm_act, self.nb_steps + 1) 50 | 51 | self.last_objective = - np.inf 52 | 53 | def forward_pass(self, ctl): 54 | state = np.zeros((self.dm_state, self.nb_steps + 1)) 55 | action = np.zeros((self.dm_act, self.nb_steps)) 56 | cost = np.zeros((self.nb_steps + 1,)) 57 | 58 | state[..., 0] = self.env_init 59 | for t in range(self.nb_steps): 60 | action[..., t] = ctl.action(state[..., t], t) 61 | cost[..., t] = self.cost.evalf(state[..., t], action[..., t]) 62 | state[..., t + 1] = self.dyn.evalf(state[..., t], action[..., t]) 63 | 64 | cost[..., -1] = self.cost.evalf(state[..., -1], np.zeros((self.dm_act, ))) 65 | return state, action, cost 66 | 67 | def forward_lqr(self, state): 68 | for t in range(self.nb_steps): 69 | _action = self.ctl.action(state, t) 70 | 71 | _state_n = self.dyn.evalf(state, _action) 72 | 73 | # linearize inverse discrete dynamics 74 | _A, _B, _c = self.idyn.taylor_expansion(_state_n, _action) 75 | 76 | # quadratize cost 77 | _Cxx, _Cuu, _Cxu, _cx, _cu, _c0 = self.cost.taylor_expansion(state, _action) 78 | 79 | # forward value 80 | _Qxx = _A.T @ (_Cxx + self.comecost.V[..., t]) @ _A 81 | _Quu = _B.T @ (_Cxx + self.comecost.V[..., t]) @ _B + _B.T @ _Cxu + _Cxu.T @ _B + _Cuu 82 | _Qux = _B.T @ (_Cxx + self.comecost.V[..., t]) @ _A + _Cxu.T @ _A 83 | 84 | _qx = _A.T @ (_Cxx + self.comecost.V[..., t]) @ _c + _A.T @ (_cx + self.comecost.v[..., t]) 85 | _qu = _B.T @ (_Cxx + self.comecost.V[..., t]) @ _c + _Cxu.T @ _c + _B.T @ (_cx + self.comecost.v[..., t]) + _cu 86 | 87 | _q0 = 0.5 * _c.T @ (_Cxx + self.comecost.V[..., t]) @ _c +\ 88 | _c.T @ (_cx + self.comecost.v[..., t]) + _c0 + self.comecost.v0[..., t] 89 | 90 | # backward value 91 | _Qxx = _A.T @ (_Cxx + self.comecost.V[..., t]) @ _A 92 | _Quu = _B.T @ (_Cxx + self.comecost.V[..., t]) @ _B + _B.T @ _Cxu + _Cxu.T @ _B + _Cuu 93 | _Qux = _B.T @ (_Cxx + self.comecost.V[..., t]) @ _A + _Cxu.T @ _A 94 | 95 | self.ictl.K[..., t] = - np.linalg.inv(_Quu) @ _Qux 96 | self.ictl.kff[..., t] = - np.linalg.inv(_Quu) @ _qu 97 | 98 | self.comecost.V[..., t + 1] = _Qxx - _Qux.T @ np.linalg.inv(_Quu) @ _Qux 99 | self.comecost.v[..., t + 1] = _qx - _Qux.T @ np.linalg.inv(_Quu) @ _qu 100 | self.comecost.v0[..., t + 1] = _q0 - 0.5 * _qu.T @ np.linalg.inv(_Quu) @ _qu 101 | 102 | # store matrices 103 | self.idyn.A[..., t] = _A 104 | self.idyn.B[..., t] = _B 105 | self.idyn.c[..., t] = _c 106 | 107 | state = - np.linalg.inv(self.gocost.V[..., t + 1] + self.comecost.V[..., t + 1]) @\ 108 | (self.gocost.v[..., t + 1] + self.comecost.v[..., t + 1]) 109 | 110 | return state 111 | 112 | def backward_lqr(self, state): 113 | # quadratize last cost 114 | _Cxx, _Cuu, _Cxu, _cx, _cu, _c0 =\ 115 | self.cost.taylor_expansion(state, np.zeros((self.dm_act, ))) 116 | 117 | self.gocost.V[..., -1] = _Cxx 118 | self.gocost.v[..., -1] = _cx 119 | self.gocost.v0[..., -1] = _c0 120 | 121 | state = - np.linalg.inv(self.gocost.V[..., -1] + self.comecost.V[..., -1]) @\ 122 | (self.gocost.v[..., -1] + self.comecost.v[..., -1]) 123 | 124 | for t in range(self.nb_steps - 1, -1, -1): 125 | _action = self.ictl.action(state, t) 126 | 127 | _state_n = self.idyn.evalf(state, _action) 128 | # linearize discrete dynamics 129 | _A, _B, _c = self.dyn.taylor_expansion(_state_n, _action) 130 | 131 | # quadratize cost 132 | _Cxx, _Cuu, _Cxu, _cx, _cu, _c0 = self.cost.taylor_expansion(_state_n, _action) 133 | 134 | # backward value 135 | _Qxx = _Cxx + _A.T @ self.gocost.V[..., t + 1] @ _A 136 | _Quu = _Cuu + _B.T @ self.gocost.V[..., t + 1] @ _B 137 | _Qux = _Cxu.T + _B.T @ self.gocost.V[..., t + 1] @ _A 138 | 139 | _qx = _cx + _A.T @ self.gocost.V[..., t + 1] @ _c + _A.T @ self.gocost.v[..., t + 1] 140 | _qu = _cu + _B.T @ self.gocost.V[..., t + 1] @ _c + _B.T @ self.gocost.v[..., t + 1] 141 | 142 | _q0 = _c0 + self.gocost.v0[..., t + 1] + 0.5 * _c.T @ self.gocost.V[..., t + 1] @ _c +\ 143 | _c.T @ self.gocost.v[..., t + 1] 144 | 145 | self.ctl.K[..., t] = - np.linalg.inv(_Quu) @ _Qux 146 | self.ctl.kff[..., t] = - np.linalg.inv(_Quu) @ _qu 147 | 148 | self.gocost.V[..., t] = _Qxx - _Qux.T @ np.linalg.inv(_Quu) @ _Qux 149 | self.gocost.v[..., t] = _qx - _Qux.T @ np.linalg.inv(_Quu) @ _qu 150 | self.gocost.v0[..., t] = _q0 - 0.5 * _qu.T @ np.linalg.inv(_Quu) @ _qu 151 | 152 | # store matrices 153 | self.dyn.A[..., t] = _A 154 | self.dyn.B[..., t] = _B 155 | self.dyn.c[..., t] = _c 156 | 157 | state = - np.linalg.inv(self.gocost.V[..., t] + self.comecost.V[..., t]) @\ 158 | (self.gocost.v[..., t] + self.comecost.v[..., t]) 159 | 160 | return state 161 | 162 | def plot(self): 163 | import matplotlib.pyplot as plt 164 | 165 | plt.figure() 166 | 167 | t = np.linspace(0, self.nb_steps, self.nb_steps + 1) 168 | for k in range(self.dm_state): 169 | plt.subplot(self.dm_state + self.dm_act, 1, k + 1) 170 | plt.plot(t, self.xref[k, :], '-b') 171 | 172 | t = np.linspace(0, self.nb_steps, self.nb_steps) 173 | for k in range(self.dm_act): 174 | plt.subplot(self.dm_state + self.dm_act, 1, self.dm_state + k + 1) 175 | plt.plot(t, self.uref[k, :], '-g') 176 | 177 | plt.show() 178 | 179 | def run(self, nb_iter=10): 180 | _trace = [] 181 | 182 | # forward pass to get ref traj. 183 | self.xref, self.uref, _cost = self.forward_pass(self.ctl) 184 | # return around current traj. 185 | _trace.append(np.sum(_cost)) 186 | 187 | _state = self.env_init 188 | for _ in range(nb_iter): 189 | # forward lqr 190 | _state = self.forward_lqr(_state) 191 | 192 | # backward lqr 193 | _state = self.backward_lqr(_state) 194 | 195 | # forward pass to get ref traj. 196 | self.xref, self.uref, _cost = self.forward_pass(self.ctl) 197 | 198 | # return around current traj. 199 | _trace.append(np.sum(_cost)) 200 | 201 | return _trace 202 | -------------------------------------------------------------------------------- /trajopt/elqr/objects.py: -------------------------------------------------------------------------------- 1 | import autograd.numpy as np 2 | from autograd import jacobian, hessian 3 | 4 | 5 | class QuadraticStateValue: 6 | def __init__(self, dm_state, nb_steps): 7 | self.dm_state = dm_state 8 | self.nb_steps = nb_steps 9 | 10 | self.V = np.zeros((self.dm_state, self.dm_state, self.nb_steps)) 11 | self.v = np.zeros((self.dm_state, self.nb_steps, )) 12 | self.v0 = np.zeros((self.nb_steps, )) 13 | 14 | 15 | class QuadraticCost: 16 | def __init__(self, dm_state, dm_act, nb_steps): 17 | self.dm_state = dm_state 18 | self.dm_act = dm_act 19 | 20 | self.nb_steps = nb_steps 21 | 22 | self.Cxx = np.zeros((self.dm_state, self.dm_state, self.nb_steps)) 23 | self.cx = np.zeros((self.dm_state, self.nb_steps)) 24 | 25 | self.Cuu = np.zeros((self.dm_act, self.dm_act, self.nb_steps)) 26 | self.cu = np.zeros((self.dm_act, self.nb_steps)) 27 | 28 | self.Cxu = np.zeros((self.dm_state, self.dm_act, self.nb_steps)) 29 | self.c0 = np.zeros((self.nb_steps, )) 30 | 31 | @property 32 | def params(self): 33 | return self.Cxx, self.cx, self.Cuu, self.cu, self.Cxu, self.c0 34 | 35 | @params.setter 36 | def params(self, values): 37 | self.Cxx, self.cx, self.Cuu, self.cu, self.Cxu, self.c0 = values 38 | 39 | 40 | class AnalyticalQuadraticCost(QuadraticCost): 41 | def __init__(self, f, dm_state, dm_act, nb_steps): 42 | super(AnalyticalQuadraticCost, self).__init__(dm_state, dm_act, nb_steps) 43 | 44 | self.f = f 45 | 46 | self.dcdxx = hessian(self.f, 0) 47 | self.dcduu = hessian(self.f, 1) 48 | self.dcdxu = jacobian(jacobian(self.f, 0), 1) 49 | 50 | self.dcdx = jacobian(self.f, 0) 51 | self.dcdu = jacobian(self.f, 1) 52 | 53 | def evalf(self, x, u): 54 | return self.f(x, u, 0., 1) 55 | 56 | def taylor_expansion(self, x, u): 57 | _in = tuple([x, u, 0., 1.]) 58 | _Cxx = 0.5 * self.dcdxx(*_in) 59 | _Cuu = 0.5 * self.dcduu(*_in) 60 | _Cxu = self.dcdxu(*_in) 61 | 62 | _cx = self.dcdx(*_in) - self.dcdxx(*_in) @ x - self.dcdxu(*_in) @ u 63 | _cu = self.dcdu(*_in) - self.dcduu(*_in) @ u - x.T @ self.dcdxu(*_in) 64 | 65 | # residual of taylor expansion 66 | _c0 = self.f(*_in)\ 67 | - x.T @ _Cxx @ x\ 68 | - u.T @ _Cuu @ u\ 69 | - x.T @ _Cxu @ u\ 70 | - _cx.T @ x\ 71 | - _cu.T @ u 72 | 73 | return _Cxx, _Cuu, _Cxu, _cx, _cu, _c0 74 | 75 | 76 | class LinearDynamics: 77 | def __init__(self, dm_state, dm_act, nb_steps): 78 | self.dm_state = dm_state 79 | self.dm_act = dm_act 80 | self.nb_steps = nb_steps 81 | 82 | self.A = np.zeros((self.dm_state, self.dm_state, self.nb_steps)) 83 | self.B = np.zeros((self.dm_state, self.dm_act, self.nb_steps)) 84 | self.c = np.zeros((self.dm_state, self.nb_steps)) 85 | 86 | @property 87 | def params(self): 88 | return self.A, self.B, self.c 89 | 90 | @params.setter 91 | def params(self, values): 92 | self.A, self.B, self.c = values 93 | 94 | def sample(self, x, u): 95 | pass 96 | 97 | 98 | class AnalyticalLinearDynamics(LinearDynamics): 99 | def __init__(self, f_dyn, dm_state, dm_act, nb_steps): 100 | super(AnalyticalLinearDynamics, self).__init__(dm_state, dm_act, nb_steps) 101 | 102 | self.f = f_dyn 103 | 104 | self.dfdx = jacobian(self.f, 0) 105 | self.dfdu = jacobian(self.f, 1) 106 | 107 | def evalf(self, x, u): 108 | return self.f(x, u) 109 | 110 | def taylor_expansion(self, x, u): 111 | _A = self.dfdx(x, u) 112 | _B = self.dfdu(x, u) 113 | # residual of taylor expansion 114 | _c = self.evalf(x, u) - _A @ x - _B @ u 115 | 116 | return _A, _B, _c 117 | 118 | 119 | class LinearControl: 120 | def __init__(self, dm_state, dm_act, nb_steps): 121 | self.dm_state = dm_state 122 | self.dm_act = dm_act 123 | self.nb_steps = nb_steps 124 | 125 | self.K = np.zeros((self.dm_act, self.dm_state, self.nb_steps)) 126 | self.kff = np.zeros((self.dm_act, self.nb_steps)) 127 | 128 | @property 129 | def params(self): 130 | return self.K, self.kff 131 | 132 | @params.setter 133 | def params(self, values): 134 | self.K, self.kff = values 135 | 136 | def action(self, x, t): 137 | return self.kff[..., t] + self.K[..., t] @ x 138 | -------------------------------------------------------------------------------- /trajopt/envs/__init__.py: -------------------------------------------------------------------------------- 1 | from .lqr.lqr_v0 import LQRv0 2 | from .lqr.lqr_v1 import LQRv1 3 | from .lqr.lqr_v2 import LQRv2 4 | 5 | from .pendulum.pendulum import Pendulum 6 | from .pendulum.pendulum import PendulumWithCartesianCost 7 | 8 | from .double_pendulum.double_pendulum import DoublePendulum 9 | from .double_pendulum.double_pendulum import DoublePendulumWithCartesianCost 10 | 11 | from .quad_pendulum.quad_pendulum import QuadPendulum 12 | from .quad_pendulum.quad_pendulum import QuadPendulumWithCartesianCost 13 | 14 | from .cartpole.cartpole import Cartpole 15 | from .cartpole.cartpole import CartpoleWithCartesianCost 16 | 17 | from .double_cartpole.double_cartpole import DoubleCartpole 18 | from .double_cartpole.double_cartpole import DoubleCartpoleWithCartesianCost 19 | 20 | from .lightdark.lightdark import LightDark 21 | from .car.car import Car 22 | 23 | from .robot.robot import Robot 24 | -------------------------------------------------------------------------------- /trajopt/envs/car/car.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym import spaces 3 | from gym.utils import seeding 4 | 5 | import autograd.numpy as np 6 | 7 | 8 | class Car(gym.Env): 9 | 10 | def __init__(self): 11 | self.state_dim = 4 12 | self.belief_dim = 4 13 | self.act_dim = 2 14 | self.obs_dim = 2 15 | 16 | self.dt = 0.5 17 | 18 | self.dyn_sigma = 1e-8 * np.eye(self.state_dim) 19 | self.obs_sigma = 1e-8 * np.eye(self.obs_dim) 20 | 21 | self.goal = np.array([0., 0., 0., 0.]) 22 | 23 | # car length 24 | self.l = 0.1 25 | 26 | # belief cost weights 27 | self.mu_w = 100. * np.array([1., 1., 1., 1.]) 28 | self.sigma_w = 100. * np.array([1., 1., 1., 1.]) 29 | 30 | # action cost weights 31 | self.act_w = np.array([1., 1.]) 32 | 33 | self.xmax = np.array([np.inf, np.inf, np.inf, np.inf]) 34 | self.zmax = np.array([np.inf, np.inf]) 35 | self.umax = np.array([np.inf, np.inf]) 36 | 37 | self.state_space = spaces.Box(low=-self.xmax, 38 | high=self.xmax) 39 | 40 | self.observation_space = spaces.Box(low=-self.zmax, 41 | high=self.zmax) 42 | 43 | self.action_space = spaces.Box(low=-self.umax, 44 | high=self.umax) 45 | 46 | self.state = None 47 | self.np_random = None 48 | 49 | self.seed() 50 | 51 | @property 52 | def xlim(self): 53 | return self.xmax 54 | 55 | @property 56 | def ulim(self): 57 | return self.umax 58 | 59 | def dynamics(self, x, u): 60 | _u = np.clip(u, -self.ulim, self.ulim) 61 | 62 | def f(x, u): 63 | return np.hstack([x[3] * np.cos(x[2]), 64 | x[3] * np.sin(x[2]), 65 | x[3] * np.tan(u[1]) / self.l, 66 | u[0]]) 67 | 68 | k1 = f(x, _u) 69 | k2 = f(x + 0.5 * self.dt * k1, _u) 70 | k3 = f(x + 0.5 * self.dt * k2, _u) 71 | k4 = f(x + self.dt * k3, _u) 72 | 73 | xn = x + self.dt / 6. * (k1 + 2. * k2 + 2. * k3 + k4) 74 | xn = np.clip(xn, -self.xlim, self.xlim) 75 | 76 | return xn 77 | 78 | def observe(self, x): 79 | return np.array([x[0], x[1]]) 80 | 81 | def dyn_noise(self, x=None, u=None): 82 | _u = np.clip(u, -self.ulim, self.ulim) 83 | _x = np.clip(x, -self.xlim, self.xlim) 84 | return self.dyn_sigma 85 | 86 | def obs_noise(self, x=None): 87 | return self.obs_sigma +\ 88 | np.array([[0.5 * (5. - x[0])**2, 0.], 89 | [0., 0.]]) 90 | 91 | # cost over belief 92 | def cost(self, mu_b, sigma_b, u): 93 | return (mu_b - self.goal).T @ np.diag(self.mu_w) @ (mu_b - self.goal) +\ 94 | np.trace(np.diag(self.sigma_w) @ sigma_b) + \ 95 | u.T @ np.diag(self.act_w) @ u 96 | 97 | def seed(self, seed=None): 98 | self.np_random, seed = seeding.np_random(seed) 99 | return [seed] 100 | 101 | def step(self, u): 102 | # state-action dependent dynamics noise 103 | _sigma_dyn = self.dyn_noise(self.state, u) 104 | # evolve deterministic dynamics 105 | self.state = self.dynamics(self.state, u) 106 | # add dynamics noise 107 | self.state = self.np_random.multivariate_normal(mean=self.state, cov=_sigma_dyn) 108 | 109 | # state-action dependent dynamics noise 110 | _sigma_obs = self.obs_noise(self.state) 111 | # observe state 112 | _z = self.observe(self.state) 113 | # add observation noise 114 | obs = self.np_random.multivariate_normal(mean=_z, cov=_sigma_obs) 115 | 116 | return obs, [], False, {} 117 | 118 | # initial belief 119 | def init(self): 120 | mu = np.array([2., 2., 0., 0.]) 121 | sigma = 1. * np.eye(self.belief_dim) 122 | return mu, sigma 123 | 124 | def reset(self): 125 | self.state = np.array([0., 4., 0., 0.]) 126 | return self.observe(self.state) 127 | -------------------------------------------------------------------------------- /trajopt/envs/cartpole/cartpole.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym import spaces 3 | from gym.utils import seeding 4 | 5 | import autograd.numpy as np 6 | from autograd import jacobian 7 | from autograd.tracer import getval 8 | 9 | 10 | def wrap_angle(x): 11 | # wraps angle between [-pi, pi] 12 | return ((x + np.pi) % (2. * np.pi)) - np.pi 13 | 14 | 15 | class Cartpole(gym.Env): 16 | 17 | def __init__(self): 18 | self.dm_state = 4 19 | self.dm_act = 1 20 | 21 | self.dt = 0.01 22 | 23 | self.sigma = 1e-8 * np.eye(self.dm_state) 24 | 25 | # g = [x, th, dx, dth] 26 | self.g = np.array([0., 0., 27 | 0., 0.]) 28 | self.gw = np.array([1e1, 1e4, 29 | 1e0, 1e0]) 30 | 31 | # x = [x, th, dx, dth] 32 | self.xmax = np.array([10., np.inf, 33 | np.inf, np.inf]) 34 | self.observation_space = spaces.Box(low=-self.xmax, 35 | high=self.xmax) 36 | 37 | self.slew_rate = False 38 | self.uw = 1e-5 * np.ones((self.dm_act, )) 39 | self.umax = 10.0 * np.ones((self.dm_act, )) 40 | self.action_space = spaces.Box(low=-self.umax, 41 | high=self.umax, shape=(1,)) 42 | 43 | self.x0 = np.array([0., np.pi, 44 | 0., 0.]) 45 | self.sigma0 = 1e-4 * np.eye(self.dm_state) 46 | 47 | self.periodic = False 48 | 49 | self.state = None 50 | self.np_random = None 51 | 52 | self.seed() 53 | 54 | @property 55 | def xlim(self): 56 | return self.xmax 57 | 58 | @property 59 | def ulim(self): 60 | return self.umax 61 | 62 | def dynamics(self, x, u): 63 | _u = np.clip(u, -self.ulim, self.ulim) 64 | 65 | # Equations: http://coneural.org/florian/papers/05_cart_pole.pdf 66 | # x = [x, th, dx, dth] 67 | g = 9.81 68 | Mc = 0.37 69 | Mp = 0.127 70 | Mt = Mc + Mp 71 | l = 0.3365 72 | fr = 0.005 73 | 74 | def f(x, u): 75 | q, th, dq, dth = x 76 | 77 | sth = np.sin(th) 78 | cth = np.cos(th) 79 | 80 | # This friction model is not exactly right 81 | # It neglects the influence of the pole 82 | num = g * sth + cth * (- (u - fr * dq) - Mp * l * dth**2 * sth) / Mt 83 | denom = l * ((4. / 3.) - Mp * cth**2 / Mt) 84 | ddth = num / denom 85 | 86 | ddx = (u + Mp * l * (dth**2 * sth - ddth * cth)) / Mt 87 | return np.hstack((dq, dth, ddx, ddth)) 88 | 89 | c1 = f(x, _u) 90 | c2 = f(x + 0.5 * self.dt * c1, _u) 91 | c3 = f(x + 0.5 * self.dt * c2, _u) 92 | c4 = f(x + self.dt * c3, _u) 93 | 94 | xn = x + self.dt / 6. * (c1 + 2. * c2 + 2. * c3 + c4) 95 | xn = np.clip(xn, -self.xlim, self.xlim) 96 | 97 | return xn 98 | 99 | def features(self, x): 100 | return x 101 | 102 | def features_jacobian(self, x): 103 | J = jacobian(self.features, 0) 104 | j = self.features(x) - J(x) @ x 105 | return J, j 106 | 107 | def noise(self, x=None, u=None): 108 | _u = np.clip(u, -self.ulim, self.ulim) 109 | _x = np.clip(x, -self.xlim, self.xlim) 110 | return self.sigma 111 | 112 | def cost(self, x, u, u_last, a): 113 | c = 0. 114 | 115 | if self.slew_rate: 116 | c += (u - u_last).T @ np.diag(self.uw) @ (u - u_last) 117 | else: 118 | c += u.T @ np.diag(self.uw) @ u 119 | 120 | if a: 121 | y = np.hstack((x[0], wrap_angle(x[1]), 122 | x[2], x[3])) if self.periodic else x 123 | J, j = self.features_jacobian(getval(y)) 124 | z = J(getval(y)) @ y + j 125 | c += a * (z - self.g).T @ np.diag(self.gw) @ (z - self.g) 126 | 127 | return c 128 | 129 | def seed(self, seed=None): 130 | self.np_random, seed = seeding.np_random(seed) 131 | return [seed] 132 | 133 | def step(self, u): 134 | # state-action dependent noise 135 | _sigma = self.noise(self.state, u) 136 | # evolve deterministic dynamics 137 | self.state = self.dynamics(self.state, u) 138 | # add noise 139 | self.state = self.np_random.multivariate_normal(mean=self.state, cov=_sigma) 140 | return self.state, [], False, {} 141 | 142 | def init(self): 143 | return self.x0, self.sigma0 144 | 145 | def reset(self): 146 | self.state = self.np_random.multivariate_normal(mean=self.x0, cov=self.sigma0) 147 | return self.state 148 | 149 | 150 | class CartpoleWithCartesianCost(Cartpole): 151 | 152 | def __init__(self): 153 | super(CartpoleWithCartesianCost, self).__init__() 154 | 155 | # g = [x, cs_th, sn_th, dx, dth] 156 | self.g = np.array([0., 157 | 1., 0., 158 | 0., 0.]) 159 | self.gw = np.array([1e1, 160 | 1e4, 1e4, 161 | 1e0, 1e0]) 162 | 163 | def features(self, x): 164 | return np.array([x[0], 165 | np.cos(x[1]), np.sin(x[1]), 166 | x[2], x[3]]) 167 | -------------------------------------------------------------------------------- /trajopt/envs/double_cartpole/double_cartpole.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym import spaces 3 | from gym.utils import seeding 4 | 5 | import autograd.numpy as np 6 | from autograd import jacobian 7 | from autograd.tracer import getval 8 | 9 | 10 | def wrap_angle(x): 11 | # wraps angle between [-pi, pi] 12 | return ((x + np.pi) % (2. * np.pi)) - np.pi 13 | 14 | 15 | class DoubleCartpole(gym.Env): 16 | 17 | def __init__(self): 18 | self.dm_state = 6 19 | self.dm_act = 1 20 | 21 | self.dt = 0.01 22 | 23 | self.sigma = 1e-8 * np.eye(self.dm_state) 24 | 25 | # x = [x, th1, th2, dx, dth1, dth2] 26 | self.g = np.array([0., 0., 0., 27 | 0., 0., 0.]) 28 | self.gw = np.array([1e1, 1e4, 1e4, 29 | 1e0, 1e0, 1e0]) 30 | 31 | self.xmax = np.array([10., np.inf, np.inf, 32 | np.inf, np.inf, np.inf]) 33 | self.observation_space = spaces.Box(low=-self.xmax, 34 | high=self.xmax) 35 | 36 | self.slew_rate = False 37 | self.uw = 1e-5 * np.ones((self.dm_act, )) 38 | self.umax = 5.0 * np.ones((self.dm_act, )) 39 | self.action_space = spaces.Box(low=-self.umax, 40 | high=self.umax, shape=(1,)) 41 | 42 | self.x0 = np.array([0., np.pi, np.pi, 43 | 0., 0., 0.]) 44 | self.sigma0 = 1e-4 * np.eye(self.dm_state) 45 | 46 | self.periodic = False 47 | 48 | self.state = None 49 | self.np_random = None 50 | 51 | self.seed() 52 | 53 | @property 54 | def xlim(self): 55 | return self.xmax 56 | 57 | @property 58 | def ulim(self): 59 | return self.umax 60 | 61 | def dynamics(self, x, u): 62 | _u = np.clip(u, -self.ulim, self.ulim) 63 | 64 | # import from: https://github.com/JoeMWatson/input-inference-for-control/ 65 | """ 66 | http://www.lirmm.fr/~chemori/Temp/Wafa/double%20pendule%20inverse.pdf 67 | """ 68 | 69 | # x = [x, th1, th2, dx, dth1, dth2] 70 | 71 | g = 9.81 72 | Mc = 0.37 73 | Mp1 = 0.127 74 | Mp2 = 0.127 75 | Mt = Mc + Mp1 + Mp2 76 | L1 = 0.3365 77 | L2 = 0.3365 78 | l1 = L1 / 2. 79 | l2 = L2 / 2. 80 | J1 = Mp1 * L1 / 12. 81 | J2 = Mp2 * L2 / 12. 82 | 83 | def f(x, u): 84 | 85 | q = x[0] 86 | th1 = x[1] 87 | th2 = x[2] 88 | dq = x[3] 89 | dth1 = x[4] 90 | dth2 = x[5] 91 | 92 | s1 = np.sin(th1) 93 | c1 = np.cos(th1) 94 | s2 = np.sin(th2) 95 | c2 = np.cos(th2) 96 | sdth = np.sin(th1 - th2) 97 | cdth = np.cos(th1 - th2) 98 | 99 | # helpers 100 | l1_mp1_mp2 = Mp1 * l1 + Mp2 * L2 101 | l1_mp1_mp2_c1 = l1_mp1_mp2 * c1 102 | Mp2_l2 = Mp2 * l2 103 | Mp2_l2_c2 = Mp2_l2 * c2 104 | l1_l2_Mp2 = L1 * l2 * Mp2 105 | l1_l2_Mp2_cdth = l1_l2_Mp2 * cdth 106 | 107 | # inertia 108 | M11 = Mt 109 | M12 = l1_mp1_mp2_c1 110 | M13 = Mp2_l2_c2 111 | M21 = l1_mp1_mp2_c1 112 | M22 = (l1 ** 2) * Mp1 + (L1 ** 2) * Mp2 + J1 113 | M23 = l1_l2_Mp2_cdth 114 | M31 = Mp2_l2_c2 115 | M32 = l1_l2_Mp2_cdth 116 | M33 = (l2 ** 2) * Mp2 + J2 117 | 118 | # coreolis 119 | C11 = 0. 120 | C12 = - l1_mp1_mp2 * dth1 * s1 121 | C13 = - Mp2_l2 * dth2 * s2 122 | C21 = 0. 123 | C22 = 0. 124 | C23 = l1_l2_Mp2 * dth2 * sdth 125 | C31 = 0. 126 | C32 = - l1_l2_Mp2 * dth1 * sdth 127 | C33 = 0. 128 | 129 | # gravity 130 | G11 = 0. 131 | G21 = - (Mp1 * l1 + Mp2 * L1) * g * s1 132 | G31 = - Mp2 * l2 * g * s2 133 | 134 | # make matrices 135 | M = np.vstack((np.hstack((M11, M12, M13)), np.hstack((M21, M22, M23)), np.hstack((M31, M32, M33)))) 136 | C = np.vstack((np.hstack((C11, C12, C13)), np.hstack((C21, C22, C23)), np.hstack((C31, C32, C33)))) 137 | G = np.vstack((G11, G21, G31)) 138 | 139 | action = np.vstack((u, 0.0, 0.0)) 140 | 141 | M_inv = np.linalg.inv(M) 142 | C_dx = np.dot(C, x[3:].reshape((-1, 1))) 143 | ddx = np.dot(M_inv, action - C_dx - G).squeeze() 144 | 145 | return np.hstack((dq, dth1, dth2, ddx)) 146 | 147 | k1 = f(x, _u) 148 | k2 = f(x + 0.5 * self.dt * k1, _u) 149 | k3 = f(x + 0.5 * self.dt * k2, _u) 150 | k4 = f(x + self.dt * k3, _u) 151 | 152 | xn = x + self.dt / 6. * (k1 + 2. * k2 + 2. * k3 + k4) 153 | xn = np.clip(xn, -self.xlim, self.xlim) 154 | 155 | return xn 156 | 157 | def features(self, x): 158 | return x 159 | 160 | def features_jacobian(self, x): 161 | J = jacobian(self.features, 0) 162 | j = self.features(x) - J(x) @ x 163 | return J, j 164 | 165 | def noise(self, x=None, u=None): 166 | _u = np.clip(u, -self.ulim, self.ulim) 167 | _x = np.clip(x, -self.xlim, self.xlim) 168 | return self.sigma 169 | 170 | def cost(self, x, u, u_last, a): 171 | c = 0. 172 | 173 | if self.slew_rate: 174 | c += (u - u_last).T @ np.diag(self.uw) @ (u - u_last) 175 | else: 176 | c += u.T @ np.diag(self.uw) @ u 177 | 178 | if a: 179 | y = np.hstack((x[0], 180 | wrap_angle(x[1]), 181 | wrap_angle(x[2]), 182 | x[3], x[4], x[5])) if self.periodic else x 183 | J, j = self.features_jacobian(getval(y)) 184 | z = J(getval(y)) @ y + j 185 | c += a * (z - self.g).T @ np.diag(self.gw) @ (z - self.g) 186 | 187 | return c 188 | 189 | def seed(self, seed=None): 190 | self.np_random, seed = seeding.np_random(seed) 191 | return [seed] 192 | 193 | def step(self, u): 194 | # state-action dependent noise 195 | _sigma = self.noise(self.state, u) 196 | # evolve deterministic dynamics 197 | self.state = self.dynamics(self.state, u) 198 | # add noise 199 | self.state = self.np_random.multivariate_normal(mean=self.state, cov=_sigma) 200 | return self.state, [], False, {} 201 | 202 | def init(self): 203 | return self.x0, self.sigma0 204 | 205 | def reset(self): 206 | self.state = self.np_random.multivariate_normal(mean=self.x0, cov=self.sigma0) 207 | return self.state 208 | 209 | 210 | class DoubleCartpoleWithCartesianCost(DoubleCartpole): 211 | 212 | def __init__(self): 213 | super(DoubleCartpoleWithCartesianCost, self).__init__() 214 | 215 | # g = [x, cs_th1, sn_th1, cs_th2, sn_th2, dx, dth1, dth2] 216 | self.g = np.array([0., 217 | 1., 0., 218 | 1., 0., 219 | 0., 0., 0.]) 220 | 221 | self.gw = np.array([1e1, 222 | 1e4, 1e4, 223 | 1e4, 1e4, 224 | 1e0, 1e0, 1e0]) 225 | 226 | def features(self, x): 227 | return np.array([x[0], 228 | np.cos(x[0]), np.sin(x[0]), 229 | np.cos(x[1]), np.sin(x[1]), 230 | x[2], x[3], x[4]]) 231 | -------------------------------------------------------------------------------- /trajopt/envs/double_pendulum/double_pendulum.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym import spaces 3 | from gym.utils import seeding 4 | 5 | import autograd.numpy as np 6 | from autograd import jacobian 7 | from autograd.tracer import getval 8 | 9 | 10 | def wrap_angle(x): 11 | # wraps angle between [-pi, pi] 12 | return ((x + np.pi) % (2. * np.pi)) - np.pi 13 | 14 | 15 | class DoublePendulum(gym.Env): 16 | 17 | def __init__(self): 18 | self.dm_state = 4 19 | self.dm_act = 2 20 | 21 | self.dt = 0.01 22 | 23 | self.sigma = 1e-8 * np.eye(self.dm_state) 24 | 25 | # g = [th1, th2, dth1, dth2] 26 | self.g = np.array([0., 0., 27 | 0., 0.]) 28 | self.gw = np.array([1e4, 1e4, 29 | 1e0, 1e0]) 30 | 31 | # x = [th, dth] 32 | self.xmax = np.array([np.inf, np.inf, 33 | np.inf, np.inf]) 34 | self.observation_space = spaces.Box(low=-self.xmax, 35 | high=self.xmax) 36 | 37 | self.slew_rate = False 38 | self.uw = 1e-5 * np.ones((self.dm_act, )) 39 | self.umax = 10. * np.ones((self.dm_act, )) 40 | self.action_space = spaces.Box(low=-self.umax, 41 | high=self.umax, shape=(2,)) 42 | 43 | self.x0 = np.array([np.pi, 0., 44 | 0., 0.]) 45 | self.sigma0 = 1e-4 * np.eye(self.dm_state) 46 | 47 | self.periodic = False 48 | 49 | self.state = None 50 | self.np_random = None 51 | 52 | self.seed() 53 | 54 | @property 55 | def xlim(self): 56 | return self.xmax 57 | 58 | @property 59 | def ulim(self): 60 | return self.umax 61 | 62 | def dynamics(self, x, u): 63 | _u = np.clip(u, -self.ulim, self.ulim) 64 | 65 | # Code from PolicySearchToolbox 66 | 67 | masses = np.array([1., 1.]) 68 | lengths = np.array([1., 1.]) 69 | friction = 0.025 * np.array([1., 1.]) 70 | g = 9.81 71 | 72 | inertias = masses * (lengths ** 2 + 1e-4) / 3.0 73 | 74 | def f(x, u): 75 | th1, th2, dth1, dth2 = x 76 | th1 = th1 + np.pi # downward position = PI 77 | 78 | u1, u2 = u 79 | 80 | I1, I2 = inertias 81 | l1, l2 = lengths 82 | m1, m2 = masses 83 | k1, k2 = friction 84 | 85 | l1CM = l1 / 2. 86 | l2CM = l2 / 2. 87 | 88 | s1, c1 = np.sin(th1), np.cos(th1) 89 | s2, c2 = np.sin(th2), np.cos(th2) 90 | 91 | h11 = I1 + I2 + l1CM * l1CM * m1 + l1 * l1 * m2\ 92 | + l2CM * l2CM * m2 + 2. * l1 * l2CM * m2 * c2 93 | h12 = I2 + l2CM * l2CM * m2 + l1 * l2CM * m2 * c2 94 | 95 | b1 = g * l1CM * m1 * s1 + g * l1 * m2 * s1\ 96 | + g * l2CM * m2 * c2 * s1\ 97 | - 2. * dth1 * dth2 * l1 * l2CM * m2 * s2\ 98 | - dth2 * dth2 * l1 * l2CM * m2 * s2\ 99 | + g * l2CM * m2 * c1 * s2 100 | 101 | h21 = I2 + l2CM * l2CM * m2 + l1 * l2CM * m2 * c2 102 | h22 = I2 + l2CM * l2CM * m2 103 | 104 | b2 = g * l2CM * m2 * c2 * s1\ 105 | + dth1 * dth1 * l1 * l2CM * m2 * s2\ 106 | + g * l2CM * m2 * c1 * s2 107 | 108 | u1 = u1 - k1 * dth1 109 | u2 = u2 - k2 * dth2 110 | 111 | det = h11 * h22 - h12 * h21 112 | 113 | ddth1 = (h22 * (u1 - b1) - h12 * (u2 - b2)) / det 114 | ddth2 = (h11 * (u2 - b2) - h21 * (u1 - b1)) / det 115 | 116 | return np.array([dth1, dth2, ddth1, ddth2]) 117 | 118 | k1 = f(x, _u) 119 | k2 = f(x + 0.5 * self.dt * k1, _u) 120 | k3 = f(x + 0.5 * self.dt * k2, _u) 121 | k4 = f(x + self.dt * k3, _u) 122 | 123 | xn = x + self.dt / 6. * (k1 + 2. * k2 + 2. * k3 + k4) 124 | xn = np.clip(xn, -self.xlim, self.xlim) 125 | 126 | return xn 127 | 128 | def features(self, x): 129 | return x 130 | 131 | def features_jacobian(self, x): 132 | J = jacobian(self.features, 0) 133 | j = self.features(x) - J(x) @ x 134 | return J, j 135 | 136 | def noise(self, x=None, u=None): 137 | _u = np.clip(u, -self.ulim, self.ulim) 138 | _x = np.clip(x, -self.xlim, self.xlim) 139 | return self.sigma 140 | 141 | def cost(self, x, u, u_last, a): 142 | c = 0. 143 | 144 | if self.slew_rate: 145 | c += (u - u_last).T @ np.diag(self.uw) @ (u - u_last) 146 | else: 147 | c += u.T @ np.diag(self.uw) @ u 148 | 149 | if a: 150 | y = np.hstack((wrap_angle(x[0]), x[1])) if self.periodic else x 151 | J, j = self.features_jacobian(getval(y)) 152 | z = J(getval(y)) @ y + j 153 | c += a * (z - self.g).T @ np.diag(self.gw) @ (z - self.g) 154 | 155 | return c 156 | 157 | def seed(self, seed=None): 158 | self.np_random, seed = seeding.np_random(seed) 159 | return [seed] 160 | 161 | def step(self, u): 162 | # state-action dependent noise 163 | _sigma = self.noise(self.state, u) 164 | # evolve deterministic dynamics 165 | self.state = self.dynamics(self.state, u) 166 | # add noise 167 | self.state = self.np_random.multivariate_normal(mean=self.state, cov=_sigma) 168 | return self.state, [], False, {} 169 | 170 | def init(self): 171 | return self.x0, self.sigma0 172 | 173 | def reset(self): 174 | self.state = self.np_random.multivariate_normal(mean=self.x0, cov=self.sigma0) 175 | return self.state 176 | 177 | 178 | class DoublePendulumWithCartesianCost(DoublePendulum): 179 | 180 | def __init__(self): 181 | super(DoublePendulumWithCartesianCost, self).__init__() 182 | 183 | # g = [cs_th, sn_th, dth] 184 | self.g = np.array([1., 0., 185 | 1., 0., 186 | 0., 0.]) 187 | self.gw = np.array([1e4, 1e4, 188 | 1e4, 1e4, 189 | 1e0, 1e0]) 190 | 191 | def features(self, x): 192 | return np.array([np.cos(x[0]), np.sin(x[0]), 193 | np.cos(x[1]), np.sin(x[1]), 194 | x[2], x[3]]) 195 | -------------------------------------------------------------------------------- /trajopt/envs/lightdark/lightdark.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym import spaces 3 | from gym.utils import seeding 4 | 5 | import autograd.numpy as np 6 | 7 | 8 | class LightDark(gym.Env): 9 | 10 | def __init__(self): 11 | self.state_dim = 2 12 | self.belief_dim = 2 13 | self.act_dim = 2 14 | self.obs_dim = 2 15 | 16 | self.dt = 1. 17 | 18 | self.dyn_sigma = 1e-8 * np.eye(self.state_dim) 19 | self.obs_sigma = 1e-4 * np.eye(self.obs_dim) 20 | 21 | self.goal = np.array([0., 0.]) 22 | 23 | # belief cost weights 24 | self.mu_w = np.array([0.5, 0.5]) 25 | self.sigma_w = np.array([200., 0.]) 26 | 27 | # action cost weights 28 | self.act_w = np.array([0.5, 0.5]) 29 | 30 | self.xmax = np.array([7., 4.]) 31 | self.zmax = np.array([7., 4.]) 32 | self.umax = np.array([np.inf, np.inf]) 33 | 34 | self.state_space = spaces.Box(low=-self.xmax, 35 | high=self.xmax) 36 | 37 | self.observation_space = spaces.Box(low=-self.zmax, 38 | high=self.zmax) 39 | 40 | self.action_space = spaces.Box(low=-self.umax, 41 | high=self.umax) 42 | 43 | self.state = None 44 | self.np_random = None 45 | 46 | self.seed() 47 | 48 | @property 49 | def xlim(self): 50 | return self.xmax 51 | 52 | @property 53 | def ulim(self): 54 | return self.umax 55 | 56 | def dynamics(self, x, u): 57 | _u = np.clip(u, -self.ulim, self.ulim) 58 | xn = x + self.dt * _u 59 | xn = np.clip(xn, -self.xlim, self.xlim) 60 | return xn 61 | 62 | def observe(self, x): 63 | return x 64 | 65 | def dyn_noise(self, x=None, u=None): 66 | _u = np.clip(u, -self.ulim, self.ulim) 67 | _x = np.clip(x, -self.xlim, self.xlim) 68 | return self.dyn_sigma 69 | 70 | def obs_noise(self, x=None): 71 | return self.obs_sigma +\ 72 | np.array([[0.5 * (5. - x[0])**2, 0.], 73 | [0., 0.]]) 74 | 75 | # cost over belief 76 | def cost(self, mu_b, sigma_b, u): 77 | return (mu_b - self.goal).T @ np.diag(self.mu_w) @ (mu_b - self.goal) +\ 78 | np.trace(np.diag(self.sigma_w) @ sigma_b) + \ 79 | u.T @ np.diag(self.act_w) @ u 80 | 81 | def seed(self, seed=None): 82 | self.np_random, seed = seeding.np_random(seed) 83 | return [seed] 84 | 85 | def step(self, u): 86 | # state-action dependent dynamics noise 87 | _sigma_dyn = self.dyn_noise(self.state, u) 88 | # evolve deterministic dynamics 89 | self.state = self.dynamics(self.state, u) 90 | # add dynamics noise 91 | self.state = self.np_random.multivariate_normal(mean=self.state, cov=_sigma_dyn) 92 | 93 | # state-action dependent dynamics noise 94 | _sigma_obs = self.obs_noise(self.state) 95 | # observe state 96 | _z = self.observe(self.state) 97 | # add observation noise 98 | obs = self.np_random.multivariate_normal(mean=_z, cov=_sigma_obs) 99 | 100 | return obs, [], False, {} 101 | 102 | # initial belief 103 | def init(self): 104 | mu = np.array([2., 2.]) 105 | sigma = np.array([[5., 0.], 106 | [0., 1e-8]]) 107 | return mu, sigma 108 | 109 | def reset(self): 110 | self.state = np.array([2.5, 0.]) 111 | return self.observe(self.state) 112 | -------------------------------------------------------------------------------- /trajopt/envs/lqr/lqr_v0.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym import spaces 3 | from gym.utils import seeding 4 | 5 | import autograd.numpy as np 6 | 7 | 8 | class LQRv0(gym.Env): 9 | 10 | def __init__(self): 11 | self.dm_state = 2 12 | self.dm_act = 1 13 | 14 | self.dt = 0.1 15 | 16 | self.x0 = np.array([5., 5.]) 17 | self.g = np.array([10., 10.]) 18 | 19 | self.gw = np.array([1e1, 1e1]) 20 | self.uw = np.array([1e0]) 21 | 22 | self.A = np.array([[1.1, 0.], [1.0, 1.0]]) 23 | self.B = np.array([[1.], [0.]]) 24 | self.c = - self.A @ self.g 25 | 26 | self.umax = np.inf 27 | self.action_space = spaces.Box(low=-self.umax, 28 | high=self.umax, shape=(1,)) 29 | 30 | self.xmax = np.array([np.inf, np.inf]) 31 | self.observation_space = spaces.Box(low=-self.xmax, 32 | high=self.xmax) 33 | 34 | self.sigma = 1e-8 * np.eye(self.dm_state) 35 | self.sigma0 = 1e-2 * np.eye(self.dm_state) 36 | 37 | self.state = None 38 | self.np_random = None 39 | 40 | self.seed() 41 | 42 | @property 43 | def xlim(self): 44 | return self.xmax 45 | 46 | @property 47 | def ulim(self): 48 | return self.umax 49 | 50 | def dynamics(self, x, u): 51 | _u = np.clip(u, -self.ulim, self.ulim) 52 | 53 | xn = self.A @ x + self.B @ _u + self.c 54 | 55 | # def f(x, u): 56 | # return self.A @ x + self.B @ u + self.c 57 | # 58 | # k1 = f(x, _u) 59 | # k2 = f(x + 0.5 * self.dt * k1, _u) 60 | # k3 = f(x + 0.5 * self.dt * k2, _u) 61 | # k4 = f(x + self.dt * k3, _u) 62 | # 63 | # xn = x + self.dt / 6. * (k1 + 2. * k2 + 2. * k3 + k4) 64 | # xn = np.clip(xn, -self.xlim, self.xlim) 65 | 66 | return xn 67 | 68 | def inverse_dynamics(self, x, u): 69 | _u = np.clip(u, -self.ulim, self.ulim) 70 | 71 | def f(x, u): 72 | return self.A @ x + self.B @ u + self.c 73 | 74 | k1 = f(x, _u) 75 | k2 = f(x - 0.5 * self.dt * k1, _u) 76 | k3 = f(x - 0.5 * self.dt * k2, _u) 77 | k4 = f(x - self.dt * k3, _u) 78 | 79 | xn = x - self.dt / 6. * (k1 + 2. * k2 + 2. * k3 + k4) 80 | xn = np.clip(xn, -self.xlim, self.xlim) 81 | 82 | return xn 83 | 84 | def noise(self, x=None, u=None): 85 | _u = np.clip(u, -self.ulim, self.ulim) 86 | _x = np.clip(x, -self.xlim, self.xlim) 87 | return self.sigma 88 | 89 | def cost(self, x, u, u_last, a): 90 | c = u.T @ np.diag(self.uw) @ u 91 | if a: 92 | c += a * (x - self.g).T @ np.diag(self.gw) @ (x - self.g) 93 | 94 | return self.dt * c 95 | 96 | def seed(self, seed=None): 97 | self.np_random, seed = seeding.np_random(seed) 98 | return [seed] 99 | 100 | def step(self, u): 101 | # state-action dependent noise 102 | _sigma = self.noise(self.state, u) 103 | # evolve deterministic dynamics 104 | self.state = self.dynamics(self.state, u) 105 | # add noise 106 | self.state = self.np_random.multivariate_normal(mean=self.state, cov=_sigma) 107 | return self.state, [], False, {} 108 | 109 | def init(self): 110 | return self.x0, self.sigma0 111 | 112 | def reset(self): 113 | self.state = self.np_random.multivariate_normal(mean=self.x0, cov=self.sigma0) 114 | return self.state 115 | -------------------------------------------------------------------------------- /trajopt/envs/lqr/lqr_v1.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym import spaces 3 | from gym.utils import seeding 4 | 5 | import autograd.numpy as np 6 | 7 | 8 | class LQRv1(gym.Env): 9 | 10 | def __init__(self): 11 | self.dm_state = 2 12 | self.dm_act = 1 13 | 14 | self.dt = 0.01 15 | 16 | self.x0 = np.array([0., 0.]) 17 | self.g = np.array([1., 0.]) 18 | 19 | self.gw = np.array([1e2, 1e0]) 20 | self.uw = np.array([1e-3]) 21 | 22 | m, k, d = 1., 1e-2, 1e-1 23 | self.A = np.array([[0. , 1e0 ], 24 | [- k / m, - d / m]]) * self.dt + np.eye(self.dm_state) 25 | 26 | self.B = np.array([[0.], [1. / m]]) * self.dt 27 | self.c = np.array([0., 0.]) * self.dt 28 | 29 | self.umax = np.inf 30 | self.action_space = spaces.Box(low=-self.umax, 31 | high=self.umax, shape=(1,)) 32 | 33 | self.xmax = np.array([np.inf, np.inf]) 34 | self.observation_space = spaces.Box(low=-self.xmax, 35 | high=self.xmax) 36 | 37 | self.sigma0 = 1e-4 * np.eye(self.dm_state) 38 | self.sigma = 1e-8 * np.eye(self.dm_state) 39 | 40 | self.state = None 41 | self.np_random = None 42 | 43 | self.seed() 44 | self.reset() 45 | 46 | @property 47 | def xlim(self): 48 | return self.xmax 49 | 50 | @property 51 | def ulim(self): 52 | return self.umax 53 | 54 | def dynamics(self, x, u, dist=None): 55 | _u = np.clip(u, -self.ulim, self.ulim) 56 | 57 | if dist is None: 58 | xn = self.A @ x + self.B @ _u + self.c 59 | else: 60 | mu, sigma = dist['mu'], dist['sigma'] 61 | params = self.np_random.multivariate_normal(mean=mu, cov=sigma) 62 | 63 | A = np.reshape(params[:self.dm_state**2], (self.dm_state, self.dm_state), order='F') 64 | B = np.reshape(params[self.dm_state ** 2:self.dm_state**2 + self.dm_state * self.dm_act], (self.dm_state, self.dm_act), order='F') 65 | c = np.reshape(params[-self.dm_state:], (self.dm_state, ), order='F') 66 | 67 | xn = A @ x + B @ _u + c 68 | return xn 69 | 70 | def noise(self, x=None, u=None): 71 | _u = np.clip(u, -self.ulim, self.ulim) 72 | _x = np.clip(x, -self.xlim, self.xlim) 73 | return self.sigma 74 | 75 | def cost(self, x, u, u_last, a): 76 | c = u.T @ np.diag(self.uw) @ u 77 | if a: 78 | c += a * (x - self.g).T @ np.diag(self.gw) @ (x - self.g) 79 | 80 | return self.dt * c 81 | 82 | def seed(self, seed=None): 83 | self.np_random, seed = seeding.np_random(seed) 84 | return [seed] 85 | 86 | def step(self, u): 87 | # state-action dependent noise 88 | _sigma = self.noise(self.state, u) 89 | # evolve deterministic dynamics 90 | self.state = self.dynamics(self.state, u) 91 | # add noise 92 | self.state = self.np_random.multivariate_normal(mean=self.state, cov=_sigma) 93 | return self.state, [], False, {} 94 | 95 | def evolve(self, u, dist): 96 | # state-action dependent noise 97 | _sigma = self.noise(self.state, u) 98 | # evolve deterministic dynamics 99 | self.state = self.dynamics(self.state, u, dist) 100 | # add noise 101 | self.state = self.np_random.multivariate_normal(mean=self.state, cov=_sigma) 102 | return self.state, [], False, {} 103 | 104 | def init(self): 105 | return self.x0, self.sigma0 106 | 107 | def reset(self): 108 | self.state = self.np_random.multivariate_normal(mean=self.x0, cov=self.sigma0) 109 | return self.state 110 | -------------------------------------------------------------------------------- /trajopt/envs/lqr/lqr_v2.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym import spaces 3 | from gym.utils import seeding 4 | 5 | import autograd.numpy as np 6 | 7 | 8 | class LQRv2(gym.Env): 9 | 10 | def __init__(self): 11 | self.dm_state = 2 12 | self.dm_act = 1 13 | 14 | self.dt = 0.01 15 | 16 | self.x0 = np.array([5., 5.]) 17 | self.g = np.array([1., 0.]) 18 | 19 | self.gw = np.array([1e1, 1e0]) 20 | self.uw = np.array([1e-3]) 21 | 22 | self.A = np.array([[1.0, 0.], 23 | [0.1, 1.1]]) 24 | self.B = np.array([[0.05], [0.]]) 25 | self.c = np.array([0., 0.]) 26 | 27 | self.umax = np.inf 28 | self.action_space = spaces.Box(low=-self.umax, 29 | high=self.umax, shape=(1,)) 30 | 31 | self.xmax = np.array([np.inf, np.inf]) 32 | self.observation_space = spaces.Box(low=-self.xmax, 33 | high=self.xmax) 34 | 35 | self.sigma0 = 1e-2 * np.eye(self.dm_state) 36 | self.sigma = 1e-8 * np.eye(self.dm_state) 37 | 38 | self.state = None 39 | self.np_random = None 40 | 41 | self.seed() 42 | 43 | @property 44 | def xlim(self): 45 | return self.xmax 46 | 47 | @property 48 | def ulim(self): 49 | return self.umax 50 | 51 | def dynamics(self, x, u, dist=None): 52 | _u = np.clip(u, -self.ulim, self.ulim) 53 | 54 | if dist is None: 55 | xn = self.A @ x + self.B @ _u + self.c 56 | else: 57 | mu, sigma = dist['mu'], dist['sigma'] 58 | params = self.np_random.multivariate_normal(mean=mu, cov=sigma) 59 | 60 | A = np.reshape(params[:self.dm_state**2], (self.dm_state, self.dm_state), order='F') 61 | B = np.reshape(params[self.dm_state ** 2:self.dm_state**2 + self.dm_state * self.dm_act], (self.dm_state, self.dm_act), order='F') 62 | c = np.reshape(params[-self.dm_state:], (self.dm_state, ), order='F') 63 | 64 | xn = A @ x + B @ _u + c 65 | return xn 66 | 67 | def noise(self, x=None, u=None): 68 | _u = np.clip(u, -self.ulim, self.ulim) 69 | _x = np.clip(x, -self.xlim, self.xlim) 70 | return self.sigma 71 | 72 | def cost(self, x, u, u_last, a): 73 | c = u.T @ np.diag(self.uw) @ u 74 | if a: 75 | c += a * (x - self.g).T @ np.diag(self.gw) @ (x - self.g) 76 | 77 | return self.dt * c 78 | 79 | def seed(self, seed=None): 80 | self.np_random, seed = seeding.np_random(seed) 81 | return [seed] 82 | 83 | def step(self, u): 84 | # state-action dependent noise 85 | _sigma = self.noise(self.state, u) 86 | # evolve deterministic dynamics 87 | self.state = self.dynamics(self.state, u) 88 | # add noise 89 | self.state = self.np_random.multivariate_normal(mean=self.state, cov=_sigma) 90 | return self.state, [], False, {} 91 | 92 | def evolve(self, u, dist): 93 | # state-action dependent noise 94 | _sigma = self.noise(self.state, u) 95 | # evolve deterministic dynamics 96 | self.state = self.dynamics(self.state, u, dist) 97 | # add noise 98 | self.state = self.np_random.multivariate_normal(mean=self.state, cov=_sigma) 99 | return self.state, [], False, {} 100 | 101 | def init(self): 102 | return self.x0, self.sigma0 103 | 104 | def reset(self): 105 | self.state = self.np_random.multivariate_normal(mean=self.x0, cov=self.sigma0) 106 | return self.state 107 | -------------------------------------------------------------------------------- /trajopt/envs/pendulum/pendulum.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym import spaces 3 | from gym.utils import seeding 4 | 5 | import autograd.numpy as np 6 | from autograd import jacobian 7 | from autograd.tracer import getval 8 | 9 | 10 | def wrap_angle(x): 11 | # wraps angle between [-pi, pi] 12 | return ((x + np.pi) % (2. * np.pi)) - np.pi 13 | 14 | 15 | class Pendulum(gym.Env): 16 | 17 | def __init__(self): 18 | self.dm_state = 2 19 | self.dm_act = 1 20 | 21 | self.dt = 0.01 22 | 23 | self.sigma = 1e-4 * np.eye(self.dm_state) 24 | 25 | # g = [th, thd] 26 | self.g = np.array([0., 0.]) 27 | self.gw = np.array([1e0, 1e-1]) 28 | 29 | # x = [th, thd] 30 | self.xmax = np.array([np.inf, np.inf]) 31 | self.observation_space = spaces.Box(low=-self.xmax, 32 | high=self.xmax) 33 | 34 | self.slew_rate = False 35 | self.uw = 1e-3 * np.ones((self.dm_act, )) 36 | self.umax = 10. * np.ones((self.dm_act, )) 37 | self.action_space = spaces.Box(low=-self.umax, 38 | high=self.umax, shape=(1,)) 39 | 40 | self.x0 = np.array([np.pi, 0.]) 41 | self.sigma0 = 1e-4 * np.eye(self.dm_state) 42 | 43 | self.periodic = False 44 | 45 | self.state = None 46 | self.np_random = None 47 | 48 | self.seed() 49 | 50 | @property 51 | def xlim(self): 52 | return self.xmax 53 | 54 | @property 55 | def ulim(self): 56 | return self.umax 57 | 58 | def dynamics(self, x, u): 59 | _u = np.clip(u, -self.ulim, self.ulim) 60 | 61 | g, m, l, k = 9.81, 1., 1., 1e-3 62 | 63 | def f(x, u): 64 | th, dth = x 65 | return np.hstack((dth, - 3. * g / (2. * l) * np.sin(th + np.pi) + 66 | 3. / (m * l ** 2) * (u - k * dth))) 67 | 68 | k1 = f(x, _u) 69 | k2 = f(x + 0.5 * self.dt * k1, _u) 70 | k3 = f(x + 0.5 * self.dt * k2, _u) 71 | k4 = f(x + self.dt * k3, _u) 72 | 73 | xn = x + self.dt / 6. * (k1 + 2. * k2 + 2. * k3 + k4) 74 | xn = np.clip(xn, -self.xlim, self.xlim) 75 | 76 | return xn 77 | 78 | def inverse_dynamics(self, x, u): 79 | _u = np.clip(u, -self.ulim, self.ulim) 80 | 81 | g, m, l, k = 9.81, 1., 1., 1e-3 82 | 83 | def f(x, u): 84 | th, dth = x 85 | return np.hstack((dth, - 3. * g / (2. * l) * np.sin(th + np.pi) + 86 | 3. / (m * l ** 2) * (u - k * dth))) 87 | 88 | k1 = f(x, _u) 89 | k2 = f(x - 0.5 * self.dt * k1, _u) 90 | k3 = f(x - 0.5 * self.dt * k2, _u) 91 | k4 = f(x - self.dt * k3, _u) 92 | 93 | xn = x - self.dt / 6. * (k1 + 2. * k2 + 2. * k3 + k4) 94 | xn = np.clip(xn, -self.xlim, self.xlim) 95 | 96 | return xn 97 | 98 | def features(self, x): 99 | return x 100 | 101 | def features_jacobian(self, x): 102 | J = jacobian(self.features, 0) 103 | j = self.features(x) - J(x) @ x 104 | return J, j 105 | 106 | def noise(self, x=None, u=None): 107 | _u = np.clip(u, -self.ulim, self.ulim) 108 | _x = np.clip(x, -self.xlim, self.xlim) 109 | return self.sigma 110 | 111 | def cost(self, x, u, u_last, a): 112 | c = 0. 113 | 114 | if self.slew_rate: 115 | c += (u - u_last).T @ np.diag(self.uw) @ (u - u_last) 116 | else: 117 | c += u.T @ np.diag(self.uw) @ u 118 | 119 | if a: 120 | y = np.hstack((wrap_angle(x[0]), x[1])) if self.periodic else x 121 | J, j = self.features_jacobian(getval(y)) 122 | z = J(getval(y)) @ y + j 123 | c += a * (z - self.g).T @ np.diag(self.gw) @ (z - self.g) 124 | 125 | return c 126 | 127 | def seed(self, seed=None): 128 | self.np_random, seed = seeding.np_random(seed) 129 | return [seed] 130 | 131 | def step(self, u): 132 | # state-action dependent noise 133 | _sigma = self.noise(self.state, u) 134 | # evolve deterministic dynamics 135 | self.state = self.dynamics(self.state, u) 136 | # add noise 137 | self.state = self.np_random.multivariate_normal(mean=self.state, cov=_sigma) 138 | return self.state, [], False, {} 139 | 140 | def init(self): 141 | return self.x0, self.sigma0 142 | 143 | def reset(self): 144 | # self.state = self.np_random.multivariate_normal(mean=self.x0, cov=self.sigma0) 145 | low, high = np.array([np.pi - np.pi / 10., -0.75]), \ 146 | np.array([np.pi + np.pi / 10., 0.75]) 147 | x0 = self.np_random.uniform(low=low, high=high) 148 | self.state = np.hstack((wrap_angle(x0[0]), x0[1])) 149 | return self.state 150 | 151 | 152 | class PendulumWithCartesianCost(Pendulum): 153 | 154 | def __init__(self): 155 | super(PendulumWithCartesianCost, self).__init__() 156 | 157 | # g = [cs_th, sn_th, dth] 158 | self.g = np.array([1., 0., 0.]) 159 | self.gw = np.array([1e4, 1e4, 1e0]) 160 | 161 | def features(self, x): 162 | return np.array([np.cos(x[0]), np.sin(x[0]), x[1]]) 163 | -------------------------------------------------------------------------------- /trajopt/envs/quad_pendulum/quad_pendulum.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym import spaces 3 | from gym.utils import seeding 4 | 5 | import autograd.numpy as np 6 | from autograd import jacobian 7 | from autograd.tracer import getval 8 | 9 | 10 | def wrap_angle(x): 11 | # wraps angle between [-pi, pi] 12 | return ((x + np.pi) % (2. * np.pi)) - np.pi 13 | 14 | 15 | class QuadPendulum(gym.Env): 16 | 17 | def __init__(self): 18 | self.dm_state = 8 19 | self.dm_act = 4 20 | 21 | self.dt = 0.01 22 | 23 | self.sigma = 1e-8 * np.eye(self.dm_state) 24 | 25 | # g = [th1, th2, th3, th4, dth1, dth2, dth3, dth4] 26 | self.g = np.array([0., 0., 0., 0., 27 | 0., 0., 0., 0.]) 28 | self.gw = np.array([1e4, 1e4, 1e4, 1e4, 29 | 1e0, 1e0, 1e0, 1e0]) 30 | 31 | # x = [th, dth] 32 | self.xmax = np.array([np.inf, np.inf, np.inf, np.inf, 33 | np.inf, np.inf, np.inf, np.inf]) 34 | self.observation_space = spaces.Box(low=-self.xmax, 35 | high=self.xmax) 36 | 37 | self.slew_rate = False 38 | self.uw = 1e-5 * np.ones((self.dm_act, )) 39 | self.umax = 25. * np.ones((self.dm_act, )) 40 | self.action_space = spaces.Box(low=-self.umax, 41 | high=self.umax, shape=(4,)) 42 | 43 | self.x0 = np.array([np.pi, 0., 0., 0., 44 | 0., 0., 0., 0.]) 45 | self.sigma0 = 1e-4 * np.eye(self.dm_state) 46 | 47 | self.periodic = False 48 | 49 | self.state = None 50 | self.np_random = None 51 | 52 | self.seed() 53 | 54 | @property 55 | def xlim(self): 56 | return self.xmax 57 | 58 | @property 59 | def ulim(self): 60 | return self.umax 61 | 62 | def dynamics(self, x, u): 63 | _u = np.clip(u, -self.ulim, self.ulim) 64 | 65 | # Code from PolicySearchToolbox 66 | # Code from Chris Atkeson's (http://www.cs.cmu.edu/~cga/kdc/dynamics-2d/dynamics4.c) 67 | 68 | masses = np.array([1., 1., 1., 1.]) 69 | lengths = np.array([1., 1., 1., 1.]) 70 | friction = 0.025 * np.array([1., 1., 1., 1.]) 71 | G = 9.81 72 | 73 | inertias = masses * (lengths ** 2 + 1e-4) / 3.0 74 | 75 | def f(x, u): 76 | th1, th2, th3, th4,\ 77 | dth1, dth2, dth3, dth4 = x 78 | th1 = th1 + np.pi # downward position = PI 79 | 80 | u1, u2, u3, u4 = u 81 | 82 | I1, I2, I3, I4 = inertias 83 | l1, l2, l3, l4 = lengths 84 | m1, m2, m3, m4 = masses 85 | fr1, fr2, fr3, fr4 = friction 86 | 87 | l1cm = l1 / 2. 88 | l2cm = l2 / 2. 89 | l3cm = l3 / 2. 90 | l4cm = l4 / 2. 91 | 92 | s1, c1 = np.sin(th1), np.cos(th1) 93 | s2, c2 = np.sin(th2), np.cos(th2) 94 | s3, c3 = np.sin(th3), np.cos(th3) 95 | s4, c4 = np.sin(th4), np.cos(th4) 96 | 97 | s12 = s1 * c2 + c1 * s2 98 | c12 = c1 * c2 - s1 * s2 99 | s23 = s2 * c3 + c2 * s3 100 | c23 = c2 * c3 - s2 * s3 101 | s34 = s3 * c4 + c3 * s4 102 | c34 = c3 * c4 - s3 * s4 103 | 104 | s1234 = s12 * c34 + c12 * s34 105 | s123 = s12 * c3 + c12 * s3 106 | s234 = s2 * c34 + c2 * s34 107 | c234 = c2 * c34 - s2 * s34 108 | 109 | dth1_dth1 = dth1 * dth1 110 | dth2_dth2 = dth2 * dth2 111 | dth3_dth3 = dth3 * dth3 112 | dth4_dth4 = dth4 * dth4 113 | dth1_p_dth2_2 = (dth1 + dth2) * (dth1 + dth2) 114 | 115 | l4cm_m4 = l4cm * m4 116 | l3_l4cm_m4 = l3 * l4cm_m4 117 | l2_l4cm_m4 = l2 * l4cm_m4 118 | l2_l4cm_m4_c34 = l2_l4cm_m4 * c34 119 | l1_l4cm_m4 = l1 * l4cm_m4 120 | l3_m4 = l3 * m4 121 | l3cm_m3 = l3cm * m3 122 | l3cm_m3_l3_m4 = l3cm_m3 + l3_m4 123 | l2cm_m2 = l2cm * m2 124 | l2cm_m2_p_l2_m3_p_m4 = l2cm_m2 + l2 * (m3 + m4) 125 | l2_l3cm_m3_l3_m4 = l2 * l3cm_m3_l3_m4 126 | l1_l3cm_m3_l3_m4 = l1 * l3cm_m3_l3_m4 127 | a123d = dth1 + dth2 + dth3 128 | l1_l3cm_m3_l3_m4_s23 = l1_l3cm_m3_l3_m4 * s23 129 | l2_l4cm_m4_s34 = l2_l4cm_m4 * s34 130 | 131 | expr1 = G * (s123 * l3cm_m3_l3_m4 + s1234 * l4cm_m4) 132 | expr2 = (2 * a123d + dth4) * dth4 * l3_l4cm_m4 * s4 133 | expr3 = G * l2cm_m2_p_l2_m3_p_m4 * s12 134 | expr4a = 2 * dth1 * dth4 + 2 * dth2 * dth4 + 2 * dth3 * dth4 + dth4_dth4 135 | expr4b = 2 * dth1 * dth3 + 2 * dth2 * dth3 + dth3_dth3 136 | expr4 = (expr4b + expr4a) * l2_l4cm_m4_s34 137 | expr5a = dth1_dth1 * l1 * s234 138 | expr5 = l4cm_m4 * expr5a 139 | expr6 = expr4b * l2_l3cm_m3_l3_m4 * s3 140 | expr7 = l1 * l2cm_m2_p_l2_m3_p_m4 141 | expr8 = l1 * (m2 + m3 + m4) 142 | expr9a = 2 * dth1 * dth2 + dth2_dth2 143 | expr9 = (expr9a + expr4b) 144 | 145 | p = I4 + l4cm * l4cm_m4 146 | o = p + l3_l4cm_m4 * c4 147 | n = o + l2_l4cm_m4_c34 148 | m = n + l1_l4cm_m4 * c234 149 | 150 | t = u4 - fr4 * dth4 \ 151 | - (l4cm_m4 * (a123d * a123d * l3 * s4 + 152 | dth1_p_dth2_2 * l2 * s34 + 153 | expr5a + G * s1234)) 154 | 155 | l = o 156 | k = I3 + o + l3cm * l3cm_m3 + l3 * l3_m4 + l3_l4cm_m4 * c4 157 | j = k + l2_l3cm_m3_l3_m4 * c3 + l2_l4cm_m4_c34 158 | i = j + l1_l3cm_m3_l3_m4 * c23 + l1_l4cm_m4 * c234 159 | 160 | s = u3 - fr3 * dth3 \ 161 | - ((dth1_p_dth2_2 * l2_l3cm_m3_l3_m4 * s3 + dth1_dth1 * l1_l3cm_m3_l3_m4_s23) 162 | - expr2 + dth1_p_dth2_2 * l2_l4cm_m4_s34 + expr5 + expr1) 163 | 164 | h = n 165 | g = j 166 | 167 | f = j + I2 + l2cm * l2cm_m2 + (l2 * l2) * (m3 + m4)\ 168 | + l2_l3cm_m3_l3_m4 * c3 + l2_l4cm_m4_c34 169 | 170 | e = f + i - j + expr7 * c2 171 | 172 | r = u2 - fr2 * dth2\ 173 | - (dth1_dth1 * expr7 * s2 174 | - expr6 + dth1_dth1 * l1_l3cm_m3_l3_m4_s23 175 | - expr2 - expr4 + expr5 + expr3 + expr1) 176 | 177 | d = m 178 | c = i 179 | b = e 180 | a = 2 * e + I1 - f + (l1cm * l1cm) * m1 + l1 * expr8 181 | 182 | q = u1 - fr1 * dth1\ 183 | - (- expr9a * expr7 * s2 - expr6 - expr9 * l1_l3cm_m3_l3_m4_s23 184 | - expr2 - expr4 - (expr9 + expr4a) * l1_l4cm_m4 * s234 185 | + expr3 + G * (l1cm * m1 + expr8) * s1 + expr1) 186 | 187 | det = (d * g * j * m - c * h * j * m - d * f * k * m + b * h * k * m + c * f * l * m - b * g * l * m - d * g * i * n + 188 | c * h * i * n + d * e * k * n - a * h * k * n - c * e * l * n + a * g * l * n + d * f * i * o - b * h * i * o - 189 | d * e * j * o + a * h * j * o + b * e * l * o - a * f * l * o - c * f * i * p + b * g * i * p + c * e * j * p - 190 | a * g * j * p - b * e * k * p + a * f * k * p) 191 | 192 | ddth1 = q * (-(h * k * n) + g * l * n + h * j * o - f * l * o - g * j * p + f * k * p)\ 193 | + r * (d * k * n - c * l * n - d * j * o + b * l * o + c * j * p - b * k * p)\ 194 | + s * (-(d * g * n) + c * h * n + d * f * o - b * h * o - c * f * p + b * g * p)\ 195 | + t * (d * g * j - c * h * j - d * f * k + b * h * k + c * f * l - b * g * l) 196 | 197 | ddth2 = q * (h * k * m - g * l * m - h * i * o + e * l * o + g * i * p - e * k * p)\ 198 | + r * (-(d * k * m) + c * l * m + d * i * o - a * l * o - c * i * p + a * k * p)\ 199 | + s * (d * g * m - c * h * m - d * e * o + a * h * o + c * e * p - a * g * p)\ 200 | + t * (-(d * g * i) + c * h * i + d * e * k - a * h * k - c * e * l + a * g * l) 201 | 202 | ddth3 = q * (-(h * j * m) + f * l * m + h * i * n - e * l * n - f * i * p + e * j * p)\ 203 | + r * (d * j * m - b * l * m - d * i * n + a * l * n + b * i * p - a * j * p)\ 204 | + s * (-(d * f * m) + b * h * m + d * e * n - a * h * n - b * e * p + a * f * p)\ 205 | + t * (d * f * i - b * h * i - d * e * j + a * h * j + b * e * l - a * f * l) 206 | 207 | ddth4 = q * (g * j * m - f * k * m - g * i * n + e * k * n + f * i * o - e * j * o)\ 208 | + r * (-(c * j * m) + b * k * m + c * i * n - a * k * n - b * i * o + a * j * o)\ 209 | + s * (c * f * m - b * g * m - c * e * n + a * g * n + b * e * o - a * f * o)\ 210 | + t * (-(c * f * i) + b * g * i + c * e * j - a * g * j - b * e * k + a * f * k) 211 | 212 | ddth1 = ddth1 / det 213 | ddth2 = ddth2 / det 214 | ddth3 = ddth3 / det 215 | ddth4 = ddth4 / det 216 | 217 | return np.array([dth1, dth2, dth3, dth4, 218 | ddth1, ddth2, ddth3, ddth4]) 219 | 220 | k1 = f(x, _u) 221 | k2 = f(x + 0.5 * self.dt * k1, _u) 222 | k3 = f(x + 0.5 * self.dt * k2, _u) 223 | k4 = f(x + self.dt * k3, _u) 224 | 225 | xn = x + self.dt / 6. * (k1 + 2. * k2 + 2. * k3 + k4) 226 | xn = np.clip(xn, -self.xlim, self.xlim) 227 | 228 | return xn 229 | 230 | def features(self, x): 231 | return x 232 | 233 | def features_jacobian(self, x): 234 | J = jacobian(self.features, 0) 235 | j = self.features(x) - J(x) @ x 236 | return J, j 237 | 238 | def noise(self, x=None, u=None): 239 | _u = np.clip(u, -self.ulim, self.ulim) 240 | _x = np.clip(x, -self.xlim, self.xlim) 241 | return self.sigma 242 | 243 | def cost(self, x, u, u_last, a): 244 | c = 0. 245 | 246 | if self.slew_rate: 247 | c += (u - u_last).T @ np.diag(self.uw) @ (u - u_last) 248 | else: 249 | c += u.T @ np.diag(self.uw) @ u 250 | 251 | if a: 252 | y = np.hstack((wrap_angle(x[0]), wrap_angle(x[1]), 253 | wrap_angle(x[2]), wrap_angle(x[3]), 254 | x[4], x[5], x[6], x[7])) if self.periodic else x 255 | J, j = self.features_jacobian(getval(y)) 256 | z = J(getval(y)) @ y + j 257 | c += a * (z - self.g).T @ np.diag(self.gw) @ (z - self.g) 258 | 259 | return c 260 | 261 | def seed(self, seed=None): 262 | self.np_random, seed = seeding.np_random(seed) 263 | return [seed] 264 | 265 | def step(self, u): 266 | # state-action dependent noise 267 | _sigma = self.noise(self.state, u) 268 | # evolve deterministic dynamics 269 | self.state = self.dynamics(self.state, u) 270 | # add noise 271 | self.state = self.np_random.multivariate_normal(mean=self.state, cov=_sigma) 272 | return self.state, [], False, {} 273 | 274 | def init(self): 275 | return self.x0, self.sigma0 276 | 277 | def reset(self): 278 | self.state = self.np_random.multivariate_normal(mean=self.x0, cov=self.sigma0) 279 | return self.state 280 | 281 | 282 | class QuadPendulumWithCartesianCost(QuadPendulum): 283 | 284 | def __init__(self): 285 | super(QuadPendulumWithCartesianCost, self).__init__() 286 | 287 | self.g = np.array([1., 0., 288 | 1., 0., 289 | 1., 0., 290 | 1., 0., 291 | 0., 0., 0., 0.]) 292 | self.gw = np.array([1e4, 1e4, 293 | 1e4, 1e4, 294 | 1e4, 1e4, 295 | 1e4, 1e4, 296 | 1e0, 1e0, 1e0, 1e0]) 297 | 298 | def features(self, x): 299 | return np.array([np.cos(x[0]), np.sin(x[0]), 300 | np.cos(x[1]), np.sin(x[1]), 301 | np.cos(x[2]), np.sin(x[2]), 302 | np.cos(x[3]), np.sin(x[3]), 303 | x[4], x[5], x[6], x[7]]) 304 | -------------------------------------------------------------------------------- /trajopt/envs/robot/robot.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym import spaces 3 | from gym.utils import seeding 4 | 5 | import autograd.numpy as np 6 | 7 | 8 | class Robot(gym.Env): 9 | 10 | def __init__(self): 11 | self.dm_state = 4 12 | self.dm_act = 2 13 | 14 | self.dt = 0.025 15 | 16 | # car length 17 | self.l = 0.1 18 | 19 | self.x0 = np.array([5., 5., 0., 0.]) 20 | self.g = np.array([0., 0., 0., 0.]) 21 | 22 | self.gw = np.array([1e1, 1e1, 1., 1.]) 23 | self.uw = np.array([1e-3, 1e-3]) 24 | 25 | self.slew_rate = False 26 | self.umax = np.array([np.inf, np.inf]) 27 | self.action_space = spaces.Box(low=-self.umax, 28 | high=self.umax) 29 | 30 | self.xmax = np.array([np.inf, np.inf, np.inf, np.inf]) 31 | self.observation_space = spaces.Box(low=-self.xmax, 32 | high=self.xmax) 33 | 34 | self.sigma0 = 1e-4 * np.eye(self.dm_state) 35 | self.sigma = 1e-8 * np.eye(self.dm_state) 36 | 37 | self.state = None 38 | self.np_random = None 39 | 40 | self.seed() 41 | self.reset() 42 | 43 | @property 44 | def xlim(self): 45 | return self.xmax 46 | 47 | @property 48 | def ulim(self): 49 | return self.umax 50 | 51 | def dynamics(self, x, u): 52 | _u = np.clip(u, -self.ulim, self.ulim) 53 | 54 | def f(x, u): 55 | # x, y, th, v 56 | x, y, th, v = x 57 | a, phi = u 58 | return np.array([v * np.cos(th), 59 | v * np.sin(th), 60 | v * np.tan(phi) / self.l, 61 | a]) 62 | k1 = f(x, _u) 63 | k2 = f(x - 0.5 * self.dt * k1, _u) 64 | k3 = f(x - 0.5 * self.dt * k2, _u) 65 | k4 = f(x - self.dt * k3, _u) 66 | 67 | xn = x - self.dt / 6. * (k1 + 2. * k2 + 2. * k3 + k4) 68 | xn = np.clip(xn, -self.xlim, self.xlim) 69 | 70 | return xn 71 | 72 | def noise(self, x=None, u=None): 73 | _u = np.clip(u, -self.ulim, self.ulim) 74 | _x = np.clip(x, -self.xlim, self.xlim) 75 | return self.sigma 76 | 77 | def cost(self, x, u, u_last, a): 78 | c = u.T @ np.diag(self.uw) @ u 79 | if a: 80 | c += a * (x - self.g).T @ np.diag(self.gw) @ (x - self.g) 81 | 82 | return self.dt * c 83 | 84 | def seed(self, seed=None): 85 | self.np_random, seed = seeding.np_random(seed) 86 | return [seed] 87 | 88 | def step(self, u): 89 | # state-action dependent noise 90 | _sigma = self.noise(self.state, u) 91 | # evolve deterministic dynamics 92 | self.state = self.dynamics(self.state, u) 93 | # add noise 94 | self.state = self.np_random.multivariate_normal(mean=self.state, cov=_sigma) 95 | return self.state, [], False, {} 96 | 97 | def init(self): 98 | return self.x0, self.sigma0 99 | 100 | def reset(self): 101 | self.state = self.np_random.multivariate_normal(mean=self.x0, cov=self.sigma0) 102 | return self.state 103 | -------------------------------------------------------------------------------- /trajopt/gps/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.14) 2 | project(core) 3 | 4 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/") 5 | 6 | set(ARMADILLO_LIBRARY "$ENV{HOME}/libs/armadillo/") 7 | include_directories(${ARMADILLO_LIBRARY}/include) 8 | 9 | find_package(pybind11) 10 | pybind11_add_module(core src/util.cpp) 11 | 12 | set(OPENBLAS_LIBRARY "$ENV{HOME}/libs/OpenBLAS/") 13 | target_link_libraries(core PRIVATE ${OPENBLAS_LIBRARY}/libopenblas.a pthread gfortran) -------------------------------------------------------------------------------- /trajopt/gps/__init__.py: -------------------------------------------------------------------------------- 1 | from .mbgps import MBGPS 2 | from .mfgps import MFGPS 3 | -------------------------------------------------------------------------------- /trajopt/gps/objects.py: -------------------------------------------------------------------------------- 1 | import autograd.numpy as np 2 | from autograd import jacobian, hessian 3 | 4 | from mimo.distributions import MatrixNormalWishart 5 | from mimo.distributions import LinearGaussianWithMatrixNormalWishart 6 | from mimo.distributions import MatrixNormalWithKnownPrecision 7 | from mimo.distributions import LinearGaussianWithMatrixNormal 8 | from mimo.distributions import LinearGaussianWithKnownPrecision 9 | 10 | 11 | class Gaussian: 12 | def __init__(self, nb_dim, nb_steps): 13 | self.nb_dim = nb_dim 14 | self.nb_steps = nb_steps 15 | 16 | self.mu = np.zeros((self.nb_dim, self.nb_steps)) 17 | self.sigma = np.zeros((self.nb_dim, self.nb_dim, self.nb_steps)) 18 | for t in range(self.nb_steps): 19 | self.sigma[..., t] = np.eye(self.nb_dim) 20 | 21 | @property 22 | def params(self): 23 | return self.mu, self.sigma 24 | 25 | @params.setter 26 | def params(self, values): 27 | self.mu, self.sigma = values 28 | 29 | def sample(self, x): 30 | pass 31 | 32 | 33 | class QuadraticStateValue: 34 | def __init__(self, dm_state, nb_steps): 35 | self.dm_state = dm_state 36 | self.nb_steps = nb_steps 37 | 38 | self.V = np.zeros((self.dm_state, self.dm_state, self.nb_steps)) 39 | self.v = np.zeros((self.dm_state, self.nb_steps, )) 40 | self.v0 = np.zeros((self.nb_steps, )) 41 | 42 | 43 | class QuadraticStateActionValue: 44 | def __init__(self, dm_state, dm_act, nb_steps): 45 | self.dm_state = dm_state 46 | self.dm_act = dm_act 47 | self.nb_steps = nb_steps 48 | 49 | self.Qxx = np.zeros((self.dm_state, self.dm_state, self.nb_steps)) 50 | self.Quu = np.zeros((self.dm_act, self.dm_act, self.nb_steps)) 51 | self.Qux = np.zeros((self.dm_act, self.dm_state, self.nb_steps)) 52 | 53 | self.qx = np.zeros((self.dm_state, self.nb_steps, )) 54 | self.qu = np.zeros((self.dm_act, self.nb_steps, )) 55 | 56 | self.q0 = np.zeros((self.nb_steps, )) 57 | 58 | 59 | class QuadraticCost: 60 | def __init__(self, dm_state, dm_act, nb_steps): 61 | self.dm_state = dm_state 62 | self.dm_act = dm_act 63 | 64 | self.nb_steps = nb_steps 65 | 66 | self.Cxx = np.zeros((self.dm_state, self.dm_state, self.nb_steps)) 67 | self.cx = np.zeros((self.dm_state, self.nb_steps)) 68 | 69 | self.Cuu = np.zeros((self.dm_act, self.dm_act, self.nb_steps)) 70 | self.cu = np.zeros((self.dm_act, self.nb_steps)) 71 | 72 | self.Cxu = np.zeros((self.dm_state, self.dm_act, self.nb_steps)) 73 | self.c0 = np.zeros((self.nb_steps, )) 74 | 75 | @property 76 | def params(self): 77 | return self.Cxx, self.cx, self.Cuu, self.cu, self.Cxu, self.c0 78 | 79 | @params.setter 80 | def params(self, values): 81 | self.Cxx, self.cx, self.Cuu, self.cu, self.Cxu, self.c0 = values 82 | 83 | def evaluate(self, x, u): 84 | _ret = 0. 85 | _u = np.hstack((u, np.zeros((self.dm_act, 1)))) 86 | for t in range(self.nb_steps): 87 | _ret += x[..., t].T @ self.Cxx[..., t] @ x[..., t] +\ 88 | _u[..., t].T @ self.Cuu[..., t] @ _u[..., t] +\ 89 | x[..., t].T @ self.Cxu[..., t] @ _u[..., t] +\ 90 | self.cx[..., t].T @ x[..., t] +\ 91 | self.cu[..., t].T @ _u[..., t] + self.c0[..., t] 92 | return _ret 93 | 94 | 95 | class AnalyticalQuadraticCost(QuadraticCost): 96 | def __init__(self, f, dm_state, dm_act, nb_steps): 97 | super(AnalyticalQuadraticCost, self).__init__(dm_state, dm_act, nb_steps) 98 | 99 | self.f = f 100 | 101 | self.dcdxx = hessian(self.f, 0) 102 | self.dcduu = hessian(self.f, 1) 103 | self.dcdxu = jacobian(jacobian(self.f, 0), 1) 104 | 105 | self.dcdx = jacobian(self.f, 0) 106 | self.dcdu = jacobian(self.f, 1) 107 | 108 | def evalf(self, x, u, u_last, a): 109 | return self.f(x, u, u_last, a) 110 | 111 | def taylor_expansion(self, x, u, a): 112 | # padd last time step of action traj. 113 | _u = np.hstack((u, np.zeros((self.dm_act, 1)))) 114 | 115 | for t in range(self.nb_steps): 116 | _in = tuple([x[..., t], _u[..., t], _u[..., t - 1], a[t]]) 117 | self.Cxx[..., t] = 0.5 * self.dcdxx(*_in) 118 | self.Cuu[..., t] = 0.5 * self.dcduu(*_in) 119 | self.Cxu[..., t] = 0.5 * self.dcdxu(*_in) 120 | 121 | self.cx[..., t] = self.dcdx(*_in) - self.dcdxx(*_in) @ x[..., t] - 2. * self.dcdxu(*_in) @ _u[..., t] 122 | self.cu[..., t] = self.dcdu(*_in) - self.dcduu(*_in) @ _u[..., t] - 2. * x[..., t].T @ self.dcdxu(*_in) 123 | 124 | # residual of taylor expansion 125 | self.c0[..., t] = self.f(*_in)\ 126 | - x[..., t].T @ self.Cxx[..., t] @ x[..., t]\ 127 | - _u[..., t].T @ self.Cuu[..., t] @ _u[..., t]\ 128 | - 2. * x[..., t].T @ self.Cxu[..., t] @ _u[..., t]\ 129 | - self.cx[..., t].T @ x[..., t]\ 130 | - self.cu[..., t].T @ _u[..., t] 131 | 132 | 133 | class LinearGaussianDynamics: 134 | def __init__(self, dm_state, dm_act, nb_steps): 135 | self.dm_state = dm_state 136 | self.dm_act = dm_act 137 | self.nb_steps = nb_steps 138 | 139 | self.A = np.zeros((self.dm_state, self.dm_state, self.nb_steps)) 140 | self.B = np.zeros((self.dm_state, self.dm_act, self.nb_steps)) 141 | self.c = np.zeros((self.dm_state, self.nb_steps)) 142 | self.sigma = np.zeros((self.dm_state, self.dm_state, self.nb_steps)) 143 | for t in range(self.nb_steps): 144 | self.sigma[..., t] = 1e-8 * np.eye(self.dm_state) 145 | 146 | @property 147 | def params(self): 148 | return self.A, self.B, self.c, self.sigma 149 | 150 | @params.setter 151 | def params(self, values): 152 | self.A, self.B, self.c, self.sigma = values 153 | 154 | def sample(self, x, u): 155 | pass 156 | 157 | 158 | class AnalyticalLinearGaussianDynamics(LinearGaussianDynamics): 159 | def __init__(self, f_dyn, noise, dm_state, dm_act, nb_steps): 160 | super(AnalyticalLinearGaussianDynamics, self).__init__(dm_state, dm_act, nb_steps) 161 | 162 | self.f = f_dyn 163 | self.noise = noise 164 | 165 | self.dfdx = jacobian(self.f, 0) 166 | self.dfdu = jacobian(self.f, 1) 167 | 168 | def evalf(self, x, u): 169 | return self.f(x, u) 170 | 171 | def taylor_expansion(self, x, u): 172 | A = self.dfdx(x, u) 173 | B = self.dfdu(x, u) 174 | # residual of taylor expansion 175 | c = self.evalf(x, u) - A @ x - B @ u 176 | sigma = self.noise(x, u) 177 | return A, B, c, sigma 178 | 179 | def extended_kalman(self, init_state, lgc, ulim): 180 | lgd = LinearGaussianDynamics(self.dm_state, self.dm_act, self.nb_steps) 181 | 182 | xdist = Gaussian(self.dm_state, self.nb_steps + 1) 183 | udist = Gaussian(self.dm_act, self.nb_steps) 184 | 185 | # forward propagation of mean dynamics 186 | xdist.mu[..., 0], xdist.sigma[..., 0] = init_state 187 | for t in range(self.nb_steps): 188 | udist.mu[..., t] = np.clip(lgc.K[..., t] @ xdist.mu[..., t] + lgc.kff[..., t], -ulim, ulim) 189 | xdist.mu[..., t + 1] = self.evalf(xdist.mu[..., t], udist.mu[..., t]) 190 | 191 | for t in range(self.nb_steps): 192 | lgd.A[..., t], lgd.B[..., t], lgd.c[..., t], lgd.sigma[..., t] =\ 193 | self.taylor_expansion(xdist.mu[..., t], udist.mu[..., t]) 194 | 195 | # construct variace of next time step with extend Kalman filtering 196 | mu_x, sigma_x = xdist.mu[..., t], xdist.sigma[..., t] 197 | K, kff, ctl_sigma = lgc.K[..., t], lgc.kff[..., t], lgc.sigma[..., t] 198 | 199 | # propagate variance of action dist. 200 | u_sigma = ctl_sigma + K @ sigma_x @ K.T 201 | u_sigma = 0.5 * (u_sigma + u_sigma.T) 202 | udist.sigma[..., t] = u_sigma 203 | 204 | AB = np.hstack((lgd.A[..., t], lgd.B[..., t])) 205 | sigma_xu = np.vstack((np.hstack((sigma_x, sigma_x @ K.T)), 206 | np.hstack((K @ sigma_x, u_sigma)))) 207 | 208 | sigma_xn = lgd.sigma[..., t] + AB @ sigma_xu @ AB.T 209 | sigma_xn = 0.5 * (sigma_xn + sigma_xn.T) 210 | xdist.sigma[..., t + 1] = sigma_xn 211 | 212 | return xdist, udist, lgd 213 | 214 | 215 | class LearnedLinearGaussianDynamics(LinearGaussianDynamics): 216 | def __init__(self, dm_state, dm_act, nb_steps, prior): 217 | super(LearnedLinearGaussianDynamics, self).__init__(dm_state, dm_act, nb_steps) 218 | 219 | hypparams = dict(M=np.zeros((self.dm_state, self.dm_state + self.dm_act + 1)), 220 | K=prior['K'] * np.eye(self.dm_state + self.dm_act + 1), 221 | psi=prior['psi'] * np.eye(self.dm_state), 222 | nu=self.dm_state + prior['nu']) 223 | self.prior = MatrixNormalWishart(**hypparams) 224 | 225 | def learn(self, data): 226 | for t in range(self.nb_steps): 227 | input = np.hstack((data['x'][:, t, :].T, data['u'][:, t, :].T)) 228 | target = data['xn'][:, t, :].T 229 | 230 | model = LinearGaussianWithMatrixNormalWishart(self.prior, affine=True) 231 | model = model.max_aposteriori(y=target, x=input) 232 | 233 | self.A[..., t] = model.likelihood.A[:, :self.dm_state] 234 | self.B[..., t] = model.likelihood.A[:, self.dm_state:self.dm_state + self.dm_act] 235 | self.c[..., t] = model.likelihood.A[:, -1] 236 | self.sigma[..., t] = model.likelihood.sigma 237 | 238 | 239 | class LearnedLinearGaussianDynamicsWithKnownNoise(LinearGaussianDynamics): 240 | def __init__(self, dm_state, dm_act, nb_steps, noise, prior): 241 | super(LearnedLinearGaussianDynamicsWithKnownNoise, self).__init__(dm_state, dm_act, nb_steps) 242 | 243 | hypparams = dict(M=np.zeros((self.dm_state, self.dm_state + self.dm_act + 1)), 244 | K=prior['K'] * np.eye(self.dm_state + self.dm_act + 1), 245 | V=np.linalg.inv(noise)) 246 | self.prior = MatrixNormalWithKnownPrecision(**hypparams) 247 | self.noise = noise # assumed stationary over all time steps 248 | 249 | def learn(self, data): 250 | for t in range(self.nb_steps): 251 | input = np.hstack((data['x'][:, t, :].T, data['u'][:, t, :].T)) 252 | target = data['xn'][:, t, :].T 253 | 254 | likelihood = LinearGaussianWithKnownPrecision(lmbda=np.linalg.inv(self.noise), affine=True) 255 | model = LinearGaussianWithMatrixNormal(self.prior, likelihood=likelihood, affine=True) 256 | model = model.max_aposteriori(y=target, x=input) 257 | 258 | self.A[..., t] = model.likelihood.A[:, :self.dm_state] 259 | self.B[..., t] = model.likelihood.A[:, self.dm_state:self.dm_state + self.dm_act] 260 | self.c[..., t] = model.likelihood.A[:, -1] 261 | self.sigma[..., t] = model.likelihood.sigma 262 | 263 | 264 | class LinearGaussianControl: 265 | def __init__(self, dm_state, dm_act, nb_steps, init_ctl_sigma=1.): 266 | self.dm_state = dm_state 267 | self.dm_act = dm_act 268 | self.nb_steps = nb_steps 269 | 270 | self.K = np.zeros((self.dm_act, self.dm_state, self.nb_steps)) 271 | self.kff = np.zeros((self.dm_act, self.nb_steps)) 272 | 273 | self.sigma = np.zeros((self.dm_act, self.dm_act, self.nb_steps)) 274 | for t in range(self.nb_steps): 275 | self.sigma[..., t] = init_ctl_sigma * np.eye(self.dm_act) 276 | 277 | @property 278 | def params(self): 279 | return self.K, self.kff, self.sigma 280 | 281 | @params.setter 282 | def params(self, values): 283 | self.K, self.kff, self.sigma = values 284 | 285 | def mean(self, x, t): 286 | return np.einsum('kh,h->k', self.K[..., t], x) + self.kff[..., t] 287 | 288 | def sample(self, x, t, stoch=True): 289 | mu = self.mean(x, t) 290 | if stoch: 291 | return np.random.multivariate_normal(mean=mu, cov=self.sigma[..., t]) 292 | else: 293 | return mu 294 | 295 | def forward(self, xdist, t): 296 | x_mu, x_sigma = xdist.mu[..., t], xdist.sigma[..., t] 297 | K, kff, ctl_sigma = self.K[..., t], self.kff[..., t], self.sigma[..., t] 298 | 299 | u_mu = K @ x_mu + kff 300 | u_sigma = ctl_sigma + K @ x_sigma @ K.T 301 | u_sigma = 0.5 * (u_sigma + u_sigma.T) 302 | 303 | return u_mu, u_sigma 304 | 305 | 306 | def pass_alpha_as_vector(f): 307 | def wrapper(self, alpha, *args): 308 | assert alpha is not None 309 | 310 | if alpha.shape[0] == 1: 311 | alpha = alpha * np.ones((self.nb_steps, )) 312 | 313 | return f(self, alpha, *args) 314 | return wrapper 315 | -------------------------------------------------------------------------------- /trajopt/ilqr/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.14) 2 | project(core) 3 | 4 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/") 5 | 6 | set(ARMADILLO_LIBRARY "$ENV{HOME}/libs/armadillo/") 7 | include_directories(${ARMADILLO_LIBRARY}/include) 8 | 9 | find_package(pybind11) 10 | pybind11_add_module(core src/util.cpp) 11 | 12 | set(OPENBLAS_LIBRARY "$ENV{HOME}/libs/OpenBLAS/") 13 | target_link_libraries(core PRIVATE ${OPENBLAS_LIBRARY}/libopenblas.a pthread gfortran) -------------------------------------------------------------------------------- /trajopt/ilqr/__init__.py: -------------------------------------------------------------------------------- 1 | from .ilqr import iLQR 2 | -------------------------------------------------------------------------------- /trajopt/ilqr/ilqr.py: -------------------------------------------------------------------------------- 1 | import autograd.numpy as np 2 | 3 | from trajopt.ilqr.objects import AnalyticalLinearDynamics, AnalyticalQuadraticCost 4 | from trajopt.ilqr.objects import QuadraticStateValue, QuadraticStateActionValue 5 | from trajopt.ilqr.objects import LinearControl 6 | 7 | from trajopt.ilqr.core import backward_pass 8 | 9 | 10 | class iLQR: 11 | 12 | def __init__(self, env, nb_steps, init_state, 13 | activation=None, slew_rate=False, 14 | action_penalty=False, 15 | alphas=np.power(10., np.linspace(0, -3, 11)), 16 | lmbda=1., dlmbda=1., min_lmbda=1e-6, 17 | max_lmbda=1e6, mult_lmbda=1.6, tolfun=1e-6, 18 | tolgrad=1e-4, min_imp=0., reg=1): 19 | 20 | self.env = env 21 | 22 | # expose necessary functions 23 | self.env_dyn = self.env.unwrapped.dynamics 24 | self.env_cost = self.env.unwrapped.cost 25 | self.env_init = init_state 26 | 27 | self.ulim = self.env.action_space.high 28 | 29 | self.dm_state = self.env.observation_space.shape[0] 30 | self.dm_act = self.env.action_space.shape[0] 31 | self.nb_steps = nb_steps 32 | 33 | # use slew rate penalty or not 34 | self.env.unwrapped.slew_rate = slew_rate 35 | if action_penalty is not None: 36 | self.env.unwrapped.uw = action_penalty * np.ones((self.dm_act, )) 37 | 38 | # backtracking 39 | self.alphas = alphas 40 | self.alpha = None 41 | 42 | self.lmbda = lmbda 43 | self.dlmbda = dlmbda 44 | self.min_lmbda = min_lmbda 45 | self.max_lmbda = max_lmbda 46 | self.mult_lmbda = mult_lmbda 47 | 48 | # regularization type 49 | self.reg = reg 50 | 51 | # minimum relative improvement 52 | self.min_imp = min_imp 53 | 54 | # stopping criterion 55 | self.tolfun = tolfun 56 | self.tolgrad = tolgrad 57 | 58 | # reference trajectory 59 | self.xref = np.zeros((self.dm_state, self.nb_steps + 1)) 60 | self.xref[..., 0] = self.env_init 61 | 62 | self.uref = np.zeros((self.dm_act, self.nb_steps)) 63 | 64 | self.vfunc = QuadraticStateValue(self.dm_state, self.nb_steps + 1) 65 | self.qfunc = QuadraticStateActionValue(self.dm_state, self.dm_act, self.nb_steps) 66 | 67 | self.dyn = AnalyticalLinearDynamics(self.env_dyn, self.dm_state, self.dm_act, self.nb_steps) 68 | 69 | self.ctl = LinearControl(self.dm_state, self.dm_act, self.nb_steps) 70 | self.ctl.kff = 1e-4 * np.random.randn(self.dm_act, self.nb_steps) 71 | 72 | # activation of cost function in shape of sigmoid 73 | if activation is None: 74 | self.weighting = np.ones((self.nb_steps + 1, )) 75 | elif "mult" and "shift" in activation: 76 | t = np.linspace(0, self.nb_steps, self.nb_steps + 1) 77 | self.weighting = 1. / (1. + np.exp(- activation['mult'] * (t - activation['shift']))) 78 | elif "discount" in activation: 79 | self.weighting = np.ones((self.nb_steps + 1,)) 80 | gamma = activation["discount"] * np.ones((self.nb_steps, )) 81 | self.weighting[1:] = np.cumprod(gamma) 82 | else: 83 | raise NotImplementedError 84 | 85 | self.cost = AnalyticalQuadraticCost(self.env_cost, self.dm_state, self.dm_act, self.nb_steps + 1) 86 | 87 | self.last_return = - np.inf 88 | 89 | def forward_pass(self, ctl, alpha): 90 | state = np.zeros((self.dm_state, self.nb_steps + 1)) 91 | action = np.zeros((self.dm_act, self.nb_steps)) 92 | cost = np.zeros((self.nb_steps + 1, )) 93 | 94 | state[..., 0] = self.env_init 95 | for t in range(self.nb_steps): 96 | _act = ctl.action(state, alpha, self.xref, self.uref, t) 97 | action[..., t] = np.clip(_act, -self.ulim, self.ulim) 98 | cost[..., t] = self.env_cost(state[..., t], action[..., t], action[..., t], self.weighting[t]) 99 | state[..., t + 1] = self.env_dyn(state[..., t], action[..., t]) 100 | 101 | cost[..., -1] = self.env_cost(state[..., -1], np.zeros((self.dm_act, )), np.zeros((self.dm_act, )), self.weighting[-1]) 102 | return state, action, cost 103 | 104 | def backward_pass(self): 105 | lc = LinearControl(self.dm_state, self.dm_act, self.nb_steps) 106 | xvalue = QuadraticStateValue(self.dm_state, self.nb_steps + 1) 107 | xuvalue = QuadraticStateActionValue(self.dm_state, self.dm_act, self.nb_steps) 108 | 109 | xuvalue.Qxx, xuvalue.Qux, xuvalue.Quu,\ 110 | xuvalue.qx, xuvalue.qu,\ 111 | xvalue.V, xvalue.v, dV,\ 112 | lc.K, lc.kff, diverge = backward_pass(self.cost.Cxx, self.cost.cx, self.cost.Cuu, 113 | self.cost.cu, self.cost.Cxu, 114 | self.dyn.A, self.dyn.B, 115 | self.lmbda, self.reg, 116 | self.dm_state, self.dm_act, self.nb_steps) 117 | return lc, xvalue, xuvalue, dV, diverge 118 | 119 | def plot(self): 120 | import matplotlib.pyplot as plt 121 | 122 | plt.figure() 123 | 124 | t = np.linspace(0, self.nb_steps, self.nb_steps + 1) 125 | for k in range(self.dm_state): 126 | plt.subplot(self.dm_state + self.dm_act, 1, k + 1) 127 | plt.plot(t, self.xref[k, :], '-b') 128 | 129 | t = np.linspace(0, self.nb_steps, self.nb_steps) 130 | for k in range(self.dm_act): 131 | plt.subplot(self.dm_state + self.dm_act, 1, self.dm_state + k + 1) 132 | plt.plot(t, self.uref[k, :], '-g') 133 | 134 | plt.show() 135 | 136 | def run(self, nb_iter=25, verbose=False): 137 | _trace = [] 138 | # init trajectory 139 | for alpha in self.alphas: 140 | _state, _action, _cost = self.forward_pass(self.ctl, alpha) 141 | if np.all(_state < 1e8): 142 | self.xref = _state 143 | self.uref = _action 144 | self.last_return = np.sum(_cost) 145 | break 146 | else: 147 | print("Initial trajectory diverges") 148 | 149 | _trace.append(self.last_return) 150 | 151 | for iter in range(nb_iter): 152 | # get linear system dynamics around ref traj. 153 | self.dyn.taylor_expansion(self.xref, self.uref) 154 | 155 | # get quadratic cost around ref traj. 156 | self.cost.taylor_expansion(self.xref, self.uref, self.weighting) 157 | 158 | xvalue, xuvalue = None, None 159 | lc, dvalue = None, None 160 | # execute a backward pass 161 | backpass_done = False 162 | while not backpass_done: 163 | lc, xvalue, xuvalue, dvalue, diverge = self.backward_pass() 164 | if np.any(diverge): 165 | # increase lmbda 166 | self.dlmbda = np.maximum(self.dlmbda * self.mult_lmbda, self.mult_lmbda) 167 | self.lmbda = np.maximum(self.lmbda * self.dlmbda, self.min_lmbda) 168 | if self.lmbda > self.max_lmbda: 169 | break 170 | else: 171 | continue 172 | else: 173 | backpass_done = True 174 | 175 | # terminate if gradient too small 176 | _g_norm = np.mean(np.max(np.abs(lc.kff) / (np.abs(self.uref) + 1.), axis=1)) 177 | if _g_norm < self.tolgrad and self.lmbda < 1e-5: 178 | self.dlmbda = np.minimum(self.dlmbda / self.mult_lmbda, 1. / self.mult_lmbda) 179 | self.lmbda = self.lmbda * self.dlmbda * (self.lmbda > self.min_lmbda) 180 | break 181 | 182 | _state, _action = None, None 183 | _return, _dreturn = None, None 184 | # execute a forward pass 185 | fwdpass_done = False 186 | if backpass_done: 187 | for alpha in self.alphas: 188 | self.alpha = alpha 189 | 190 | # apply on actual system 191 | _state, _action, _cost = self.forward_pass(ctl=lc, alpha=self.alpha) 192 | 193 | # summed mean return 194 | _return = np.sum(_cost) 195 | 196 | # check return improvement 197 | _dreturn = self.last_return - _return 198 | _expected = - 1. * alpha * (dvalue[0] + alpha * dvalue[1]) 199 | _imp = _dreturn / _expected 200 | if _imp >= self.min_imp: 201 | fwdpass_done = True 202 | break 203 | 204 | # accept or reject 205 | if fwdpass_done: 206 | # decrease lmbda 207 | self.dlmbda = np.minimum(self.dlmbda / self.mult_lmbda, 1. / self.mult_lmbda) 208 | self.lmbda = self.lmbda * self.dlmbda * (self.lmbda > self.min_lmbda) 209 | 210 | self.xref = _state 211 | self.uref = _action 212 | self.last_return = _return 213 | 214 | self.vfunc = xvalue 215 | self.qfunc = xuvalue 216 | 217 | self.ctl = lc 218 | 219 | _trace.append(self.last_return) 220 | 221 | # terminate if reached objective tolerance 222 | if _dreturn < self.tolfun: 223 | break 224 | else: 225 | # increase lmbda 226 | self.dlmbda = np.maximum(self.dlmbda * self.mult_lmbda, self.mult_lmbda) 227 | self.lmbda = np.maximum(self.lmbda * self.dlmbda, self.min_lmbda) 228 | if self.lmbda > self.max_lmbda: 229 | break 230 | else: 231 | continue 232 | 233 | if verbose: 234 | print("iter: ", iter, 235 | " return: ", _return) 236 | 237 | return _trace 238 | -------------------------------------------------------------------------------- /trajopt/ilqr/objects.py: -------------------------------------------------------------------------------- 1 | import autograd.numpy as np 2 | from autograd import jacobian, hessian 3 | 4 | 5 | class QuadraticStateValue: 6 | def __init__(self, dm_state, nb_steps): 7 | self.dm_state = dm_state 8 | self.nb_steps = nb_steps 9 | 10 | self.V = np.zeros((self.dm_state, self.dm_state, self.nb_steps)) 11 | self.v = np.zeros((self.dm_state, self.nb_steps, )) 12 | 13 | 14 | class QuadraticStateActionValue: 15 | def __init__(self, dm_state, dm_act, nb_steps): 16 | self.dm_state = dm_state 17 | self.dm_act = dm_act 18 | self.nb_steps = nb_steps 19 | 20 | self.Qxx = np.zeros((self.dm_state, self.dm_state, self.nb_steps)) 21 | self.Quu = np.zeros((self.dm_act, self.dm_act, self.nb_steps)) 22 | self.Qux = np.zeros((self.dm_act, self.dm_state, self.nb_steps)) 23 | 24 | self.qx = np.zeros((self.dm_state, self.nb_steps, )) 25 | self.qu = np.zeros((self.dm_act, self.nb_steps, )) 26 | 27 | 28 | class QuadraticCost: 29 | def __init__(self, dm_state, dm_act, nb_steps): 30 | self.dm_state = dm_state 31 | self.dm_act = dm_act 32 | 33 | self.nb_steps = nb_steps 34 | 35 | self.Cxx = np.zeros((self.dm_state, self.dm_state, self.nb_steps)) 36 | self.cx = np.zeros((self.dm_state, self.nb_steps)) 37 | 38 | self.Cuu = np.zeros((self.dm_act, self.dm_act, self.nb_steps)) 39 | self.cu = np.zeros((self.dm_act, self.nb_steps)) 40 | 41 | self.Cxu = np.zeros((self.dm_state, self.dm_act, self.nb_steps)) 42 | 43 | @property 44 | def params(self): 45 | return self.Cxx, self.cx, self.Cuu, self.cu, self.Cxu 46 | 47 | @params.setter 48 | def params(self, values): 49 | self.Cxx, self.cx, self.Cuu, self.cu, self.Cxu = values 50 | 51 | 52 | class AnalyticalQuadraticCost(QuadraticCost): 53 | def __init__(self, f, dm_state, dm_act, nb_steps): 54 | super(AnalyticalQuadraticCost, self).__init__(dm_state, dm_act, nb_steps) 55 | 56 | self.f = f 57 | 58 | self.dcdxx = hessian(self.f, 0) 59 | self.dcduu = hessian(self.f, 1) 60 | self.dcdxu = jacobian(jacobian(self.f, 0), 1) 61 | 62 | self.dcdx = jacobian(self.f, 0) 63 | self.dcdu = jacobian(self.f, 1) 64 | 65 | def evalf(self, x, u, u_last, a): 66 | return self.f(x, u, u_last, a) 67 | 68 | def taylor_expansion(self, x, u, a): 69 | # padd last time step of action traj. 70 | _u = np.hstack((u, np.zeros((self.dm_act, 1)))) 71 | 72 | for t in range(self.nb_steps): 73 | _in = tuple([x[..., t], _u[..., t], _u[..., t - 1], a[t]]) 74 | self.Cxx[..., t] = self.dcdxx(*_in) 75 | self.Cuu[..., t] = self.dcduu(*_in) 76 | self.Cxu[..., t] = self.dcdxu(*_in) 77 | self.cx[..., t] = self.dcdx(*_in) 78 | self.cu[..., t] = self.dcdu(*_in) 79 | 80 | 81 | class LinearDynamics: 82 | def __init__(self, dm_state, dm_act, nb_steps): 83 | self.dm_state = dm_state 84 | self.dm_act = dm_act 85 | self.nb_steps = nb_steps 86 | 87 | self.A = np.zeros((self.dm_state, self.dm_state, self.nb_steps)) 88 | self.B = np.zeros((self.dm_state, self.dm_act, self.nb_steps)) 89 | 90 | @property 91 | def params(self): 92 | return self.A, self.B 93 | 94 | @params.setter 95 | def params(self, values): 96 | self.A, self.B = values 97 | 98 | def sample(self, x, u): 99 | pass 100 | 101 | 102 | class AnalyticalLinearDynamics(LinearDynamics): 103 | def __init__(self, f_dyn, dm_state, dm_act, nb_steps): 104 | super(AnalyticalLinearDynamics, self).__init__(dm_state, dm_act, nb_steps) 105 | 106 | self.f = f_dyn 107 | 108 | self.dfdx = jacobian(self.f, 0) 109 | self.dfdu = jacobian(self.f, 1) 110 | 111 | def evalf(self, x, u): 112 | return self.f(x, u) 113 | 114 | def taylor_expansion(self, x, u): 115 | for t in range(self.nb_steps): 116 | self.A[..., t] = self.dfdx(x[..., t], u[..., t]) 117 | self.B[..., t] = self.dfdu(x[..., t], u[..., t]) 118 | 119 | 120 | class LinearControl: 121 | def __init__(self, dm_state, dm_act, nb_steps): 122 | self.dm_state = dm_state 123 | self.dm_act = dm_act 124 | self.nb_steps = nb_steps 125 | 126 | self.K = np.zeros((self.dm_act, self.dm_state, self.nb_steps)) 127 | self.kff = np.zeros((self.dm_act, self.nb_steps)) 128 | 129 | @property 130 | def params(self): 131 | return self.K, self.kff 132 | 133 | @params.setter 134 | def params(self, values): 135 | self.K, self.kff = values 136 | 137 | def action(self, x, alpha, xref, uref, t): 138 | dx = x[..., t] - xref[:, t] 139 | return uref[:, t] + alpha * self.kff[..., t] + self.K[..., t] @ dx 140 | -------------------------------------------------------------------------------- /trajopt/ilqr/src/util.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | namespace py = pybind11; 6 | 7 | using namespace arma; 8 | 9 | 10 | typedef py::array_t array_tf; 11 | typedef py::array_t array_tc; 12 | 13 | 14 | cube array_to_cube(array_tf m) { 15 | 16 | py::buffer_info _m_buff = m.request(); 17 | int n_rows = _m_buff.shape[0]; 18 | int n_cols = _m_buff.shape[1]; 19 | int n_slices = _m_buff.shape[2]; 20 | 21 | cube _m_arma((double *)_m_buff.ptr, n_rows, n_cols, n_slices); 22 | 23 | return _m_arma; 24 | } 25 | 26 | 27 | mat array_to_mat(array_tf m) { 28 | 29 | py::buffer_info _m_buff = m.request(); 30 | int n_rows = _m_buff.shape[0]; 31 | int n_cols = _m_buff.shape[1]; 32 | 33 | mat _m_arma((double *)_m_buff.ptr, n_rows, n_cols); 34 | 35 | return _m_arma; 36 | } 37 | 38 | 39 | vec array_to_vec(array_tf m) { 40 | 41 | py::buffer_info _m_buff = m.request(); 42 | int n_rows = _m_buff.shape[0]; 43 | 44 | vec _m_vec((double *)_m_buff.ptr, n_rows); 45 | 46 | return _m_vec; 47 | } 48 | 49 | 50 | array_tf cube_to_array(cube m) { 51 | 52 | auto _m_array = array_tf({m.n_rows, m.n_cols, m.n_slices}); 53 | 54 | py::buffer_info _m_buff = _m_array.request(); 55 | std::memcpy(_m_buff.ptr, m.memptr(), sizeof(double) * m.n_rows * m.n_cols * m.n_slices); 56 | 57 | return _m_array; 58 | } 59 | 60 | 61 | array_tf mat_to_array(mat m) { 62 | 63 | auto _m_array = array_tf({m.n_rows, m.n_cols}); 64 | 65 | py::buffer_info _m_buff = _m_array.request(); 66 | std::memcpy(_m_buff.ptr, m.memptr(), sizeof(double) * m.n_rows * m.n_cols); 67 | 68 | return _m_array; 69 | } 70 | 71 | 72 | array_tf vec_to_array(vec m) { 73 | 74 | auto _m_array = array_tf({m.n_rows}); 75 | 76 | py::buffer_info _m_buff = _m_array.request(); 77 | std::memcpy(_m_buff.ptr, m.memptr(), sizeof(double) * m.n_rows); 78 | 79 | return _m_array; 80 | } 81 | 82 | 83 | py::tuple backward_pass(array_tf _Cxx, array_tf _cx, array_tf _Cuu, 84 | array_tf _cu, array_tf _Cxu, 85 | array_tf _A, array_tf _B, 86 | double lmbda, int reg, 87 | int dm_state, int dm_act, int nb_steps) { 88 | 89 | // inputs 90 | cube Cxx = array_to_cube(_Cxx); 91 | mat cx = array_to_mat(_cx); 92 | cube Cuu = array_to_cube(_Cuu); 93 | mat cu = array_to_mat(_cu); 94 | cube Cxu = array_to_cube(_Cxu); 95 | 96 | cube A = array_to_cube(_A); 97 | cube B = array_to_cube(_B); 98 | 99 | // outputs 100 | cube Q(dm_state + dm_act, dm_state + dm_act, nb_steps); 101 | cube Qxx(dm_state, dm_state, nb_steps); 102 | cube Qux(dm_act, dm_state, nb_steps); 103 | cube Quu(dm_act, dm_act, nb_steps); 104 | mat qx(dm_state, nb_steps); 105 | mat qu(dm_act, nb_steps); 106 | 107 | cube Qux_reg(dm_act, dm_state, nb_steps); 108 | cube Quu_reg(dm_act, dm_act, nb_steps); 109 | cube Quu_inv(dm_act, dm_act, nb_steps); 110 | 111 | cube V(dm_state, dm_state, nb_steps + 1); 112 | mat v(dm_state, nb_steps + 1); 113 | vec dV(2); 114 | 115 | cube V_reg(dm_state, dm_state, nb_steps + 1); 116 | 117 | cube K(dm_act, dm_state, nb_steps); 118 | mat kff(dm_act, nb_steps); 119 | 120 | int _diverge = 0; 121 | 122 | // last time step 123 | V.slice(nb_steps) = Cxx.slice(nb_steps); 124 | v.col(nb_steps) = cx.col(nb_steps); 125 | 126 | for(int i = nb_steps - 1; i>= 0; --i) 127 | { 128 | Qxx.slice(i) = Cxx.slice(i) + A.slice(i).t() * V.slice(i+1) * A.slice(i); 129 | Quu.slice(i) = Cuu.slice(i) + B.slice(i).t() * V.slice(i+1) * B.slice(i); 130 | Qux.slice(i) = (Cxu.slice(i) + A.slice(i).t() * V.slice(i+1) * B.slice(i)).t(); 131 | 132 | qu.col(i) = cu.col(i) + B.slice(i).t() * v.col(i+1); 133 | qx.col(i) = cx.col(i) + A.slice(i).t() * v.col(i+1); 134 | 135 | V_reg.slice(i+1) = V.slice(i+1); 136 | if (reg==2) 137 | V_reg.slice(i+1) += lmbda * eye(dm_state, dm_state); 138 | 139 | Qux_reg.slice(i) = (Cxu.slice(i) + A.slice(i).t() * V_reg.slice(i+1) * B.slice(i)).t(); 140 | 141 | Quu_reg.slice(i) = Cuu.slice(i) + B.slice(i).t() * V_reg.slice(i+1) * B.slice(i); 142 | if (reg==1) 143 | Quu_reg.slice(i) += lmbda * eye(dm_act, dm_act); 144 | 145 | if (!(Quu_reg.slice(i)).is_sympd()) { 146 | _diverge = i; 147 | break; 148 | } 149 | 150 | Quu_inv.slice(i) = inv(Quu_reg.slice(i)); 151 | K.slice(i) = - Quu_inv.slice(i) * Qux_reg.slice(i); 152 | kff.col(i) = - Quu_inv.slice(i) * qu.col(i); 153 | 154 | dV += join_vert(kff.col(i).t() * qu.col(i), 0.5 * kff.col(i).t() * Quu.slice(i) * kff.col(i)); 155 | 156 | v.col(i) = qx.col(i) + K.slice(i).t() * Quu.slice(i) * kff.col(i) + 157 | K.slice(i).t() * qu.col(i) + Qux.slice(i).t() * kff.col(i); 158 | 159 | V.slice(i) = Qxx.slice(i) + K.slice(i).t() * Quu.slice(i) * K.slice(i) + 160 | K.slice(i).t() * Qux.slice(i) + Qux.slice(i).t() * K.slice(i); 161 | V.slice(i) = 0.5 * (V.slice(i) + V.slice(i).t()); 162 | } 163 | 164 | // transform outputs to numpy 165 | array_tf _Qxx = cube_to_array(Qxx); 166 | array_tf _Qux = cube_to_array(Qux); 167 | array_tf _Quu = cube_to_array(Quu); 168 | 169 | array_tf _qx = mat_to_array(qx); 170 | array_tf _qu = mat_to_array(qu); 171 | 172 | array_tf _V = cube_to_array(V); 173 | array_tf _v = mat_to_array(v); 174 | array_tf _dV = vec_to_array(dV); 175 | 176 | array_tf _K = cube_to_array(K); 177 | array_tf _kff = mat_to_array(kff); 178 | 179 | py::tuple output = py::make_tuple(_Qxx, _Qux, _Quu, _qx, _qu, 180 | _V, _v, _dV, _K, _kff, _diverge); 181 | return output; 182 | } 183 | 184 | 185 | PYBIND11_MODULE(core, m) 186 | { 187 | m.def("backward_pass", &backward_pass); 188 | } 189 | -------------------------------------------------------------------------------- /trajopt/rgps/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.14) 2 | project(core) 3 | 4 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/") 5 | 6 | set(ARMADILLO_LIBRARY "$ENV{HOME}/libs/armadillo/") 7 | include_directories(${ARMADILLO_LIBRARY}/include) 8 | 9 | find_package(pybind11) 10 | pybind11_add_module(core src/util.cpp) 11 | 12 | set(OPENBLAS_LIBRARY "$ENV{HOME}/libs/OpenBLAS/") 13 | target_link_libraries(core PRIVATE ${OPENBLAS_LIBRARY}/libopenblas.a pthread gfortran) 14 | -------------------------------------------------------------------------------- /trajopt/rgps/__init__.py: -------------------------------------------------------------------------------- 1 | from .mfrgps import MFRGPS 2 | from .mbrgps import MBRGPS 3 | from .lrgps import LRGPS 4 | -------------------------------------------------------------------------------- /trajopt/riccati/__init__.py: -------------------------------------------------------------------------------- 1 | from .riccati import Riccati 2 | -------------------------------------------------------------------------------- /trajopt/riccati/objects.py: -------------------------------------------------------------------------------- 1 | import autograd.numpy as np 2 | from autograd import jacobian, hessian 3 | 4 | 5 | class QuadraticStateValue: 6 | def __init__(self, dm_state, nb_steps): 7 | self.dm_state = dm_state 8 | self.nb_steps = nb_steps 9 | 10 | self.V = np.zeros((self.dm_state, self.dm_state, self.nb_steps)) 11 | self.v = np.zeros((self.dm_state, self.nb_steps, )) 12 | 13 | 14 | class QuadraticCost: 15 | def __init__(self, dm_state, dm_act, nb_steps): 16 | self.dm_state = dm_state 17 | self.dm_act = dm_act 18 | 19 | self.nb_steps = nb_steps 20 | 21 | self.Cxx = np.zeros((self.dm_state, self.dm_state, self.nb_steps)) 22 | self.cx = np.zeros((self.dm_state, self.nb_steps)) 23 | 24 | self.Cuu = np.zeros((self.dm_act, self.dm_act, self.nb_steps)) 25 | self.cu = np.zeros((self.dm_act, self.nb_steps)) 26 | 27 | self.Cxu = np.zeros((self.dm_state, self.dm_act, self.nb_steps)) 28 | 29 | @property 30 | def params(self): 31 | return self.Cxx, self.cx, self.Cuu, self.cu, self.Cxu 32 | 33 | @params.setter 34 | def params(self, values): 35 | self.Cxx, self.cx, self.Cuu, self.cu, self.Cxu = values 36 | 37 | 38 | class AnalyticalQuadraticCost(QuadraticCost): 39 | def __init__(self, f, dm_state, dm_act, nb_steps): 40 | super(AnalyticalQuadraticCost, self).__init__(dm_state, dm_act, nb_steps) 41 | 42 | self.f = f 43 | 44 | self.dcdxx = hessian(self.f, 0) 45 | self.dcduu = hessian(self.f, 1) 46 | self.dcdxu = jacobian(jacobian(self.f, 0), 1) 47 | 48 | self.dcdx = jacobian(self.f, 0) 49 | self.dcdu = jacobian(self.f, 1) 50 | 51 | def evalf(self, x, u, u_last, a): 52 | return self.f(x, u, u_last, a) 53 | 54 | def taylor_expansion(self, x, u, a): 55 | # padd last time step of action traj. 56 | _u = np.hstack((u, np.zeros((self.dm_act, 1)))) 57 | 58 | for t in range(self.nb_steps): 59 | _in = tuple([x[..., t], _u[..., t], _u[..., t - 1], a[t]]) 60 | self.Cxx[..., t] = 0.5 * self.dcdxx(*_in) 61 | self.Cuu[..., t] = 0.5 * self.dcduu(*_in) 62 | self.Cxu[..., t] = 0.5 * self.dcdxu(*_in) 63 | 64 | self.cx[..., t] = self.dcdx(*_in) - self.dcdxx(*_in) @ x[..., t] - 2. * self.dcdxu(*_in) @ _u[..., t] 65 | self.cu[..., t] = self.dcdu(*_in) - self.dcduu(*_in) @ _u[..., t] - 2. * x[..., t].T @ self.dcdxu(*_in) 66 | 67 | 68 | class LinearDynamics: 69 | def __init__(self, dm_state, dm_act, nb_steps): 70 | self.dm_state = dm_state 71 | self.dm_act = dm_act 72 | self.nb_steps = nb_steps 73 | 74 | self.A = np.zeros((self.dm_state, self.dm_state, self.nb_steps)) 75 | self.B = np.zeros((self.dm_state, self.dm_act, self.nb_steps)) 76 | self.c = np.zeros((self.dm_state, self.nb_steps)) 77 | 78 | @property 79 | def params(self): 80 | return self.A, self.B, self.c 81 | 82 | @params.setter 83 | def params(self, values): 84 | self.A, self.B, self.c = values 85 | 86 | def sample(self, x, u): 87 | pass 88 | 89 | 90 | class AnalyticalLinearDynamics(LinearDynamics): 91 | def __init__(self, f_dyn, dm_state, dm_act, nb_steps): 92 | super(AnalyticalLinearDynamics, self).__init__(dm_state, dm_act, nb_steps) 93 | 94 | self.f = f_dyn 95 | 96 | self.dfdx = jacobian(self.f, 0) 97 | self.dfdu = jacobian(self.f, 1) 98 | 99 | def evalf(self, x, u): 100 | return self.f(x, u) 101 | 102 | def taylor_expansion(self, x, u): 103 | for t in range(self.nb_steps): 104 | self.A[..., t] = self.dfdx(x[..., t], u[..., t]) 105 | self.B[..., t] = self.dfdu(x[..., t], u[..., t]) 106 | # residual of taylor expansion 107 | self.c[..., t] = self.evalf(x[..., t], u[..., t]) -\ 108 | self.A[..., t] @ x[..., t] - self.B[..., t] @ u[..., t] 109 | 110 | 111 | class LinearControl: 112 | def __init__(self, dm_state, dm_act, nb_steps): 113 | self.dm_state = dm_state 114 | self.dm_act = dm_act 115 | self.nb_steps = nb_steps 116 | 117 | self.K = np.zeros((self.dm_act, self.dm_state, self.nb_steps)) 118 | self.kff = np.zeros((self.dm_act, self.nb_steps)) 119 | 120 | @property 121 | def params(self): 122 | return self.K, self.kff 123 | 124 | @params.setter 125 | def params(self, values): 126 | self.K, self.kff = values 127 | 128 | def action(self, x, t): 129 | return self.kff[..., t] + self.K[..., t] @ x 130 | -------------------------------------------------------------------------------- /trajopt/riccati/riccati.py: -------------------------------------------------------------------------------- 1 | import autograd.numpy as np 2 | 3 | from trajopt.riccati.objects import AnalyticalLinearDynamics, AnalyticalQuadraticCost 4 | from trajopt.riccati.objects import QuadraticStateValue 5 | from trajopt.riccati.objects import LinearControl 6 | 7 | 8 | class Riccati: 9 | 10 | def __init__(self, env, nb_steps, 11 | init_state, activation=None): 12 | 13 | self.env = env 14 | 15 | # expose necessary functions 16 | self.env_dyn = self.env.unwrapped.dynamics 17 | self.env_noise = self.env.unwrapped.sigma 18 | self.env_cost = self.env.unwrapped.cost 19 | self.env_init = init_state 20 | 21 | self.ulim = self.env.action_space.high 22 | 23 | self.dm_state = self.env.observation_space.shape[0] 24 | self.dm_act = self.env.action_space.shape[0] 25 | self.nb_steps = nb_steps 26 | 27 | # reference trajectory 28 | self.xref = np.zeros((self.dm_state, self.nb_steps + 1)) 29 | self.xref[..., 0] = self.env_init[0] 30 | 31 | self.uref = np.zeros((self.dm_act, self.nb_steps)) 32 | 33 | self.vfunc = QuadraticStateValue(self.dm_state, self.nb_steps + 1) 34 | self.dyn = AnalyticalLinearDynamics(self.env_dyn, self.dm_state, self.dm_act, self.nb_steps) 35 | self.ctl = LinearControl(self.dm_state, self.dm_act, self.nb_steps) 36 | 37 | # activation of cost function in shape of sigmoid 38 | if activation is None: 39 | self.weighting = np.ones((self.nb_steps + 1, )) 40 | elif "mult" and "shift" in activation: 41 | t = np.linspace(0, self.nb_steps, self.nb_steps + 1) 42 | self.weighting = 1. / (1. + np.exp(- activation['mult'] * (t - activation['shift']))) 43 | elif "discount" in activation: 44 | self.weighting = np.ones((self.nb_steps + 1,)) 45 | gamma = activation["discount"] * np.ones((self.nb_steps, )) 46 | self.weighting[1:] = np.cumprod(gamma) 47 | else: 48 | raise NotImplementedError 49 | 50 | self.cost = AnalyticalQuadraticCost(self.env_cost, self.dm_state, self.dm_act, self.nb_steps + 1) 51 | 52 | def rollout(self, nb_episodes, env=None): 53 | if env is None: 54 | env = self.env 55 | env_cost = self.env_cost 56 | else: 57 | env = env 58 | env_cost = env.unwrapped.cost 59 | 60 | data = {'x': np.zeros((self.dm_state, self.nb_steps, nb_episodes)), 61 | 'u': np.zeros((self.dm_act, self.nb_steps, nb_episodes)), 62 | 'xn': np.zeros((self.dm_state, self.nb_steps, nb_episodes)), 63 | 'c': np.zeros((self.nb_steps + 1, nb_episodes))} 64 | 65 | for n in range(nb_episodes): 66 | x = env.reset() 67 | 68 | for t in range(self.nb_steps): 69 | u = self.ctl.action(x, t) 70 | data['u'][..., t, n] = u 71 | 72 | # expose true reward function 73 | c = env_cost(x, u, data['u'][..., t - 1, n], self.weighting[t]) 74 | data['c'][t] = c 75 | 76 | data['x'][..., t, n] = x 77 | x, _, _, _ = env.step(u) 78 | data['xn'][..., t, n] = x 79 | 80 | c = env_cost(x, np.zeros((self.dm_act, )), np.zeros((self.dm_act, )), self.weighting[-1]) 81 | data['c'][-1, n] = c 82 | 83 | return data 84 | 85 | def forward_pass(self, ctl): 86 | state = np.zeros((self.dm_state, self.nb_steps + 1)) 87 | action = np.zeros((self.dm_act, self.nb_steps)) 88 | cost = np.zeros((self.nb_steps + 1, )) 89 | 90 | state[..., 0] = self.env.reset() 91 | for t in range(self.nb_steps): 92 | action[..., t] = ctl.action(state[..., t], t) 93 | cost[..., t] = self.env_cost(state[..., t], action[..., t], action[..., t - 1], self.weighting[t]) 94 | state[..., t + 1], _, _, _ = self.env.step(action[..., t]) 95 | 96 | cost[..., -1] = self.env_cost(state[..., -1], np.zeros((self.dm_act, )), 97 | np.zeros((self.dm_act, )), self.weighting[-1]) 98 | return state, action, cost 99 | 100 | def backward_pass(self): 101 | lc = LinearControl(self.dm_state, self.dm_act, self.nb_steps) 102 | xvalue = QuadraticStateValue(self.dm_state, self.nb_steps + 1) 103 | 104 | xvalue.V[..., -1] = self.cost.Cxx[..., -1] 105 | xvalue.v[..., -1] = self.cost.cx[..., -1] 106 | 107 | for t in range(self.nb_steps - 1, -1, -1): 108 | Qxx = self.cost.Cxx[..., t] + self.dyn.A[..., t].T @ xvalue.V[..., t + 1] @ self.dyn.A[..., t] 109 | Quu = self.cost.Cuu[..., t] + self.dyn.B[..., t].T @ xvalue.V[..., t + 1] @ self.dyn.B[..., t] 110 | Qux = self.cost.Cxu[..., t].T + self.dyn.B[..., t].T @ xvalue.V[..., t + 1] @ self.dyn.A[..., t] 111 | 112 | qx = self.cost.cx[..., t] + 2.0 * self.dyn.A[..., t].T @ xvalue.V[..., t + 1] @ self.dyn.c[..., t] +\ 113 | self.dyn.A[..., t].T @ xvalue.v[..., t + 1] 114 | 115 | qu = self.cost.cu[..., t] + 2.0 * self.dyn.B[..., t].T @ xvalue.V[..., t + 1] @ self.dyn.c[..., t] +\ 116 | self.dyn.B[..., t].T @ xvalue.v[..., t + 1] 117 | 118 | Quu_inv = np.linalg.inv(Quu) 119 | 120 | lc.K[..., t] = - Quu_inv @ Qux 121 | lc.kff[..., t] = - 0.5 * Quu_inv @ qu 122 | 123 | xvalue.V[..., t] = Qxx + Qux.T * lc.K[..., t] 124 | xvalue.v[..., t] = qx + 2. * lc.kff[..., t].T @ Qux 125 | 126 | return lc, xvalue 127 | 128 | def plot(self, xref=None, uref=None): 129 | xref = self.xref if xref is None else xref 130 | uref = self.uref if uref is None else uref 131 | 132 | import matplotlib.pyplot as plt 133 | 134 | plt.figure() 135 | 136 | t = np.linspace(0, self.nb_steps, self.nb_steps + 1) 137 | for k in range(self.dm_state): 138 | plt.subplot(self.dm_state + self.dm_act, 1, k + 1) 139 | plt.plot(t, xref[k, :], '-b') 140 | 141 | t = np.linspace(0, self.nb_steps, self.nb_steps) 142 | for k in range(self.dm_act): 143 | plt.subplot(self.dm_state + self.dm_act, 1, self.dm_state + k + 1) 144 | plt.plot(t, uref[k, :], '-g') 145 | 146 | plt.show() 147 | 148 | def run(self): 149 | # get linear system dynamics around ref traj. 150 | self.dyn.taylor_expansion(self.xref, self.uref) 151 | 152 | # get quadratic cost around ref traj. 153 | self.cost.taylor_expansion(self.xref, self.uref, self.weighting) 154 | 155 | # backward pass to get ctrl. 156 | self.ctl, self.vfunc = self.backward_pass() 157 | 158 | # forward pass to get cost and traj. 159 | self.xref, self.uref, _cost = self.forward_pass(self.ctl) 160 | 161 | return np.sum(_cost) 162 | --------------------------------------------------------------------------------