├── .gitignore
├── LICENSE
├── README.md
├── clean.sh
├── examples
    ├── bspilqr
    │   ├── car.py
    │   └── lightdark.py
    ├── elqr
    │   ├── lqr.py
    │   └── pendulum.py
    ├── gps
    │   ├── analytical
    │   │   ├── mpc
    │   │   │   ├── mb_cartpole.py
    │   │   │   ├── mb_cartpole_parallel.py
    │   │   │   ├── mb_double_cartpole.py
    │   │   │   ├── mb_lqr.py
    │   │   │   ├── mb_pendulum.py
    │   │   │   └── mb_pendulum_parallel.py
    │   │   └── topt
    │   │   │   ├── mb_cartpole.py
    │   │   │   ├── mb_double_cartpole.py
    │   │   │   ├── mb_double_pendulum.py
    │   │   │   ├── mb_lqr.py
    │   │   │   ├── mb_pendulum.py
    │   │   │   ├── mb_pendulum_parallel.py
    │   │   │   ├── mb_quad_pendulum.py
    │   │   │   └── mbgps_riccati.py
    │   └── learned
    │   │   ├── mf_cartpole.py
    │   │   ├── mf_double_pendulum.py
    │   │   ├── mf_lqr.py
    │   │   └── mf_pendulum.py
    ├── ilqr
    │   └── mpc
    │   │   ├── cartpole.py
    │   │   ├── doublecartpole.py
    │   │   ├── lqr.py
    │   │   ├── pendulum.py
    │   │   └── pendulum_parallel.py
    ├── rgps
    │   ├── mb_lqr.py
    │   └── mb_robot.py
    └── riccati
    │   └── lqr.py
├── setup.py
└── trajopt
    ├── __init__.py
    ├── bspilqr
        ├── CMakeLists.txt
        ├── __init__.py
        ├── bspilqr.py
        ├── objects.py
        └── src
        │   └── util.cpp
    ├── elqr
        ├── __init__.py
        ├── elqr.py
        └── objects.py
    ├── envs
        ├── __init__.py
        ├── car
        │   └── car.py
        ├── cartpole
        │   └── cartpole.py
        ├── double_cartpole
        │   └── double_cartpole.py
        ├── double_pendulum
        │   └── double_pendulum.py
        ├── lightdark
        │   └── lightdark.py
        ├── lqr
        │   ├── lqr_v0.py
        │   ├── lqr_v1.py
        │   └── lqr_v2.py
        ├── pendulum
        │   └── pendulum.py
        ├── quad_pendulum
        │   └── quad_pendulum.py
        └── robot
        │   └── robot.py
    ├── gps
        ├── CMakeLists.txt
        ├── __init__.py
        ├── mbgps.py
        ├── mfgps.py
        ├── objects.py
        └── src
        │   └── util.cpp
    ├── ilqr
        ├── CMakeLists.txt
        ├── __init__.py
        ├── ilqr.py
        ├── objects.py
        └── src
        │   └── util.cpp
    ├── rgps
        ├── CMakeLists.txt
        ├── __init__.py
        ├── lrgps.py
        ├── mbrgps.py
        ├── mfrgps.py
        ├── objects.py
        └── src
        │   └── util.cpp
    └── riccati
        ├── __init__.py
        ├── objects.py
        └── riccati.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | *.pkl
  2 | .idea/
  3 | .vscode/
  4 | .history/
  5 | 
  6 | ## Core latex/pdflatex auxiliary files:
  7 | *.aux
  8 | *.lof
  9 | *.log
 10 | *.lot
 11 | *.fls
 12 | *.out
 13 | *.toc
 14 | *.fmt
 15 | *.fot
 16 | *.cb
 17 | *.cb2
 18 | .*.lb
 19 | 
 20 | ## Intermediate documents:
 21 | *.dvi
 22 | *.xdv
 23 | *-converted-to.*
 24 | # these rules might exclude image files for figures etc.
 25 | # *.ps
 26 | # *.eps
 27 | # *.pdf
 28 | 
 29 | ## Generated if empty string is given at "Please type another file name for output:"
 30 | .pdf
 31 | 
 32 | ## Bibliography auxiliary files (bibtex/biblatex/biber):
 33 | *.bbl
 34 | *.bcf
 35 | *.blg
 36 | *-blx.aux
 37 | *-blx.bib
 38 | *.run.xml
 39 | 
 40 | ## Build tool auxiliary files:
 41 | *.fdb_latexmk
 42 | *.synctex
 43 | *.synctex(busy)
 44 | *.synctex.gz
 45 | *.synctex.gz(busy)
 46 | *.pdfsync
 47 | 
 48 | ## Build tool directories for auxiliary files
 49 | # latexrun
 50 | latex.out/
 51 | 
 52 | ## Auxiliary and intermediate files from other packages:
 53 | # algorithms
 54 | *.alg
 55 | *.loa
 56 | 
 57 | # achemso
 58 | acs-*.bib
 59 | 
 60 | # amsthm
 61 | *.thm
 62 | 
 63 | # beamer
 64 | *.nav
 65 | *.pre
 66 | *.snm
 67 | *.vrb
 68 | 
 69 | # changes
 70 | *.soc
 71 | 
 72 | # comment
 73 | *.cut
 74 | 
 75 | # cprotect
 76 | *.cpt
 77 | 
 78 | # elsarticle (documentclass of Elsevier journals)
 79 | *.spl
 80 | 
 81 | # endnotes
 82 | *.ent
 83 | 
 84 | # fixme
 85 | *.lox
 86 | 
 87 | # feynmf/feynmp
 88 | *.mf
 89 | *.mp
 90 | *.t[1-9]
 91 | *.t[1-9][0-9]
 92 | *.tfm
 93 | 
 94 | #(r)(e)ledmac/(r)(e)ledpar
 95 | *.end
 96 | *.?end
 97 | *.[1-9]
 98 | *.[1-9][0-9]
 99 | *.[1-9][0-9][0-9]
100 | *.[1-9]R
101 | *.[1-9][0-9]R
102 | *.[1-9][0-9][0-9]R
103 | *.eledsec[1-9]
104 | *.eledsec[1-9]R
105 | *.eledsec[1-9][0-9]
106 | *.eledsec[1-9][0-9]R
107 | *.eledsec[1-9][0-9][0-9]
108 | *.eledsec[1-9][0-9][0-9]R
109 | 
110 | # glossaries
111 | *.acn
112 | *.acr
113 | *.glg
114 | *.glo
115 | *.gls
116 | *.glsdefs
117 | 
118 | # uncomment this for glossaries-extra (will ignore makeindex's style files!)
119 | # *.ist
120 | 
121 | # gnuplottex
122 | *-gnuplottex-*
123 | 
124 | # gregoriotex
125 | *.gaux
126 | *.gtex
127 | 
128 | # htlatex
129 | *.4ct
130 | *.4tc
131 | *.idv
132 | *.lg
133 | *.trc
134 | *.xref
135 | 
136 | # hyperref
137 | *.brf
138 | 
139 | # knitr
140 | *-concordance.tex
141 | # TODO Comment the next line if you want to keep your tikz graphics files
142 | *.tikz
143 | *-tikzDictionary
144 | 
145 | # listings
146 | *.lol
147 | 
148 | # luatexja-ruby
149 | *.ltjruby
150 | 
151 | # makeidx
152 | *.idx
153 | *.ilg
154 | *.ind
155 | 
156 | # minitoc
157 | *.maf
158 | *.mlf
159 | *.mlt
160 | *.mtc[0-9]*
161 | *.slf[0-9]*
162 | *.slt[0-9]*
163 | *.stc[0-9]*
164 | 
165 | # minted
166 | _minted*
167 | *.pyg
168 | 
169 | # morewrites
170 | *.mw
171 | 
172 | # nomencl
173 | *.nlg
174 | *.nlo
175 | *.nls
176 | 
177 | # pax
178 | *.pax
179 | 
180 | # pdfpcnotes
181 | *.pdfpc
182 | 
183 | # sagetex
184 | *.sagetex.sage
185 | *.sagetex.py
186 | *.sagetex.scmd
187 | 
188 | # scrwfile
189 | *.wrt
190 | 
191 | # sympy
192 | *.sout
193 | *.sympy
194 | sympy-plots-for-*.tex/
195 | 
196 | # pdfcomment
197 | *.upa
198 | *.upb
199 | 
200 | # pythontex
201 | *.pytxcode
202 | pythontex-files-*/
203 | 
204 | # tcolorbox
205 | *.listing
206 | 
207 | # thmtools
208 | *.loe
209 | 
210 | # TikZ & PGF
211 | *.dpth
212 | *.md5
213 | *.auxlock
214 | 
215 | # todonotes
216 | *.tdo
217 | 
218 | # vhistory
219 | *.hst
220 | *.ver
221 | 
222 | # easy-todo
223 | *.lod
224 | 
225 | # xcolor
226 | *.xcp
227 | 
228 | # xmpincl
229 | *.xmpi
230 | 
231 | # xindy
232 | *.xdy
233 | 
234 | # xypic precompiled matrices
235 | *.xyc
236 | 
237 | # endfloat
238 | *.ttt
239 | *.fff
240 | 
241 | # Latexian
242 | TSWLatexianTemp*
243 | 
244 | ## Editors:
245 | # WinEdt
246 | *.bak
247 | *.sav
248 | 
249 | # Texpad
250 | .texpadtmp
251 | 
252 | # LyX
253 | *.lyx~
254 | 
255 | # Kile
256 | *.backup
257 | 
258 | # KBibTeX
259 | *~[0-9]*
260 | 
261 | # auto folder when using emacs and auctex
262 | ./auto/*
263 | *.el
264 | 
265 | # expex forward references with \gathertags
266 | *-tags.tex
267 | 
268 | # standalone packages
269 | *.sta
270 | 
271 | CMakeLists.txt.user
272 | CMakeCache.txt
273 | CMakeFiles
274 | CMakeScripts
275 | Testing
276 | Makefile
277 | cmake_install.cmake
278 | install_manifest.txt
279 | compile_commands.json
280 | CTestTestfile.cmake
281 | _deps
282 | 
283 | # Prerequisites
284 | *.d
285 | 
286 | # Object files
287 | *.o
288 | *.ko
289 | *.obj
290 | *.elf
291 | 
292 | # Linker output
293 | *.ilk
294 | *.map
295 | *.exp
296 | 
297 | # Precompiled Headers
298 | *.gch
299 | *.pch
300 | 
301 | # Libraries
302 | *.lib
303 | *.a
304 | *.la
305 | *.lo
306 | 
307 | # Shared objects (inc. Windows DLLs)
308 | *.dll
309 | *.so
310 | *.so.*
311 | *.dylib
312 | 
313 | # Executables
314 | *.exe
315 | *.out
316 | *.app
317 | *.i*86
318 | *.x86_64
319 | *.hex
320 | 
321 | # Debug files
322 | *.dSYM/
323 | *.su
324 | *.idb
325 | *.pdb
326 | 
327 | # Kernel Module Compile Results
328 | *.mod*
329 | *.cmd
330 | .tmp_versions/
331 | modules.order
332 | Module.symvers
333 | Mkfile.old
334 | dkms.conf
335 | 
336 | # Byte-compiled / optimized / DLL files
337 | __pycache__/
338 | *.py[cod]
339 | *$py.class
340 | 
341 | # C extensions
342 | *.so
343 | 
344 | # Distribution / packaging
345 | .Python
346 | build/
347 | develop-eggs/
348 | dist/
349 | downloads/
350 | eggs/
351 | .eggs/
352 | lib/
353 | lib64/
354 | parts/
355 | sdist/
356 | var/
357 | wheels/
358 | pip-wheel-metadata/
359 | share/python-wheels/
360 | *.egg-info/
361 | .installed.cfg
362 | *.egg
363 | MANIFEST
364 | 
365 | # PyInstaller
366 | #  Usually these files are written by a python script from a template
367 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
368 | *.manifest
369 | *.spec
370 | 
371 | # Installer logs
372 | pip-log.txt
373 | pip-delete-this-directory.txt
374 | 
375 | # Unit test / coverage reports
376 | htmlcov/
377 | .tox/
378 | .nox/
379 | .coverage
380 | .coverage.*
381 | .cache
382 | nosetests.xml
383 | coverage.xml
384 | *.cover
385 | .hypothesis/
386 | .pytest_cache/
387 | 
388 | # Translations
389 | *.mo
390 | *.pot
391 | 
392 | # Django stuff:
393 | *.log
394 | local_settings.py
395 | db.sqlite3
396 | db.sqlite3-journal
397 | 
398 | # Flask stuff:
399 | instance/
400 | .webassets-cache
401 | 
402 | # Scrapy stuff:
403 | .scrapy
404 | 
405 | # Sphinx documentation
406 | docs/_build/
407 | 
408 | # PyBuilder
409 | target/
410 | 
411 | # Jupyter Notebook
412 | .ipynb_checkpoints
413 | 
414 | # IPython
415 | profile_default/
416 | ipython_config.py
417 | 
418 | # pyenv
419 | .python-version
420 | 
421 | # pipenv
422 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
423 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
424 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
425 | #   install all needed dependencies.
426 | #Pipfile.lock
427 | 
428 | # celery beat schedule file
429 | celerybeat-schedule
430 | 
431 | # SageMath parsed files
432 | *.sage.py
433 | 
434 | # Environments
435 | .env
436 | .venv
437 | env/
438 | venv/
439 | ENV/
440 | env.bak/
441 | venv.bak/
442 | 
443 | # Spyder project settings
444 | .spyderproject
445 | .spyproject
446 | 
447 | # Rope project settings
448 | .ropeproject
449 | 
450 | # mkdocs documentation
451 | /site
452 | 
453 | # mypy
454 | .mypy_cache/
455 | .dmypy.json
456 | dmypy.json
457 | 
458 | # Pyre type checker
459 | .pyre/
460 | 
461 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Hany Abdulsamad
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Trajectory Optimization 
 2 | 
 3 | A toolbox for trajectory optimization of dynamical systems
 4 | 
 5 | ## Installation
 6 | 
 7 | This toolbox uses pybind11 to compile C++ code and use it in python.
 8 | The following setup has been test only while using Conda envs.
 9 | 
10 | - Install Pybind11:
11 |    ```shell
12 |    conda install pybind11
13 |    ```
14 |   
15 | - Compile OpenBLAS:
16 |     * Prequisits: libpthread, libgfortran, gcc-fortran
17 |     * https://github.com/xianyi/OpenBLAS.git
18 |     * Change USE_THREAD to (de-)activate multi-threading
19 |    ```shell
20 |    USE_THREAD=0 C=gcc FC=gfortran NO_AFFINITY=1 NO_SHARED=1 COMMON_OPT=" -O2 -march=native "  make
21 |    ```
22 |    * Edit local CMakeLists.txt to reflect the path of OpenBLAS
23 | 
24 | - Configure Armadillo:
25 |     * http://arma.sourceforge.net/download.html
26 |     * Only configure, do not make
27 |     ```shell
28 |     ./configure
29 |     ```
30 |     * Edit local CMakeLists.txt to reflect the path of Armadillo
31 | 
32 | - Install Python Package:
33 |    ```shell
34 |    pip install -e .
35 |    ```
36 | 


--------------------------------------------------------------------------------
/clean.sh:
--------------------------------------------------------------------------------
 1 | #/bin/bash
 2 | 
 3 | rm trajopt/bspilqr/*.so
 4 | rm -r trajopt/bspilqr/build
 5 | 
 6 | rm trajopt/ilqr/*.so
 7 | rm -r trajopt/ilqr/build
 8 | 
 9 | rm trajopt/gps/*.so
10 | rm -r trajopt/gps/build
11 | 
12 | rm trajopt/rgps/*.so
13 | rm -r trajopt/rgps/build
14 | 


--------------------------------------------------------------------------------
/examples/bspilqr/car.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import gym
 4 | from trajopt.bspilqr import BSPiLQR
 5 | 
 6 | from trajopt.bspilqr.objects import EKF
 7 | 
 8 | 
 9 | # car task
10 | env = gym.make('Car-TO-v0')
11 | env._max_episode_steps = 10000
12 | 
13 | nb_steps = 25
14 | 
15 | state_dim = env.state_dim
16 | belief_dim = env.belief_dim
17 | obs_dim = env.obs_dim
18 | act_dim = env.act_dim
19 | 
20 | mu_b = np.zeros((belief_dim, nb_steps + 1))
21 | sigma_b = np.zeros((belief_dim, belief_dim, nb_steps + 1))
22 | 
23 | state = np.zeros((state_dim, nb_steps + 1))
24 | obs = np.zeros((obs_dim, nb_steps + 1))
25 | act = np.zeros((act_dim, nb_steps))
26 | 
27 | obs[:, 0] = env.reset()
28 | state[:, 0] = env.state
29 | 
30 | _mu_b, _sigma_b = env.init()
31 | filter = EKF(env)
32 | mu_b[:, 0], sigma_b[..., 0] = filter.innovate(_mu_b, _sigma_b, obs[:, 0])
33 | 
34 | for t in range(nb_steps):
35 |     solver = BSPiLQR(env=env, init_belief=(mu_b[:, t], sigma_b[..., t]), nb_steps=10)
36 |     trace = solver.run(nb_iter=25, verbose=False)
37 | 
38 |     act[:, t] = solver.uref[:, 0]
39 |     obs[:, t + 1], _, _, _ = env.step(act[:, t])
40 |     state[:, t + 1] = env.state
41 | 
42 |     mu_b[:, t + 1], sigma_b[..., t + 1] = filter.inference(mu_b[:, t], sigma_b[..., t],
43 |                                                            act[:, t], obs[:, t + 1])
44 | 
45 |     print('Time Step:', t, 'Cost:', trace[-1])
46 | 


--------------------------------------------------------------------------------
/examples/bspilqr/lightdark.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import gym
 4 | from trajopt.bspilqr import BSPiLQR
 5 | 
 6 | from trajopt.bspilqr.objects import EKF
 7 | 
 8 | 
 9 | # light dark task
10 | env = gym.make('LightDark-TO-v0')
11 | env._max_episode_steps = 10000
12 | 
13 | nb_steps = 25
14 | 
15 | state_dim = env.state_dim
16 | belief_dim = env.belief_dim
17 | obs_dim = env.obs_dim
18 | act_dim = env.act_dim
19 | 
20 | mu_b = np.zeros((belief_dim, nb_steps + 1))
21 | sigma_b = np.zeros((belief_dim, belief_dim, nb_steps + 1))
22 | 
23 | state = np.zeros((state_dim, nb_steps + 1))
24 | obs = np.zeros((obs_dim, nb_steps + 1))
25 | act = np.zeros((act_dim, nb_steps))
26 | 
27 | obs[:, 0] = env.reset()
28 | state[:, 0] = env.state
29 | 
30 | _mu_b, _sigma_b = env.init()
31 | filter = EKF(env)
32 | mu_b[:, 0], sigma_b[..., 0] = filter.innovate(_mu_b, _sigma_b, obs[:, 0])
33 | 
34 | for t in range(nb_steps):
35 |     solver = BSPiLQR(env=env, init_belief=(mu_b[:, t], sigma_b[..., t]), nb_steps=10)
36 |     trace = solver.run(nb_iter=25, verbose=False)
37 | 
38 |     act[:, t] = solver.uref[:, 0]
39 |     obs[:, t + 1], _, _, _ = env.step(act[:, t])
40 |     state[:, t + 1] = env.state
41 | 
42 |     mu_b[:, t + 1], sigma_b[..., t + 1] = filter.inference(mu_b[:, t], sigma_b[..., t],
43 |                                                            act[:, t], obs[:, t + 1])
44 | 
45 |     print('Time Step:', t, 'Cost:', trace[-1])
46 | 


--------------------------------------------------------------------------------
/examples/elqr/lqr.py:
--------------------------------------------------------------------------------
 1 | import gym
 2 | from trajopt.elqr import eLQR
 3 | 
 4 | # lqr task
 5 | env = gym.make('LQR-TO-v0')
 6 | env._max_episode_steps = 100
 7 | 
 8 | state = env.reset()
 9 | alg = eLQR(env, nb_steps=60,
10 |            init_state=state)
11 | 
12 | # run eLQR
13 | trace = alg.run()
14 | 
15 | # plot forward pass
16 | alg.plot()
17 | 
18 | # plot objective
19 | import matplotlib.pyplot as plt
20 | 
21 | plt.figure()
22 | plt.plot(trace)
23 | plt.show()
24 | 


--------------------------------------------------------------------------------
/examples/elqr/pendulum.py:
--------------------------------------------------------------------------------
 1 | import gym
 2 | from trajopt.elqr import eLQR
 3 | 
 4 | # pendulum task
 5 | env = gym.make('Pendulum-TO-v0')
 6 | env._max_episode_steps = 100
 7 | 
 8 | state = env.reset()
 9 | alg = eLQR(env, nb_steps=100,
10 |            init_state=state)
11 | 
12 | # run eLQR
13 | trace = alg.run()
14 | 
15 | # plot forward pass
16 | alg.plot()
17 | 
18 | # plot objective
19 | import matplotlib.pyplot as plt
20 | 
21 | plt.figure()
22 | plt.plot(trace)
23 | plt.show()
24 | 


--------------------------------------------------------------------------------
/examples/gps/analytical/mpc/mb_cartpole.py:
--------------------------------------------------------------------------------
 1 | import autograd.numpy as np
 2 | 
 3 | import gym
 4 | from trajopt.gps import MBGPS
 5 | 
 6 | import matplotlib.pyplot as plt
 7 | 
 8 | import warnings
 9 | warnings.filterwarnings("ignore")
10 | 
11 | np.random.seed(1337)
12 | 
13 | # cartpole env
14 | env = gym.make('Cartpole-TO-v0')
15 | env._max_episode_steps = 10000
16 | env.unwrapped.dt = 0.05
17 | env.unwrapped.umax = np.array([5.])
18 | env.unwrapped.periodic = True
19 | 
20 | env.seed(1337)
21 | 
22 | dm_state = env.observation_space.shape[0]
23 | dm_act = env.action_space.shape[0]
24 | 
25 | horizon, nb_steps = 20, 100
26 | 
27 | env_sigma = env.unwrapped.sigma
28 | 
29 | state = np.zeros((dm_state, nb_steps + 1))
30 | action = np.zeros((dm_act, nb_steps))
31 | 
32 | state[:, 0] = env.reset()
33 | for t in range(nb_steps):
34 |     solver = MBGPS(env, init_state=tuple([state[:, t], env_sigma]),
35 |                    init_action_sigma=1., nb_steps=horizon,
36 |                    kl_bound=2., action_penalty=1e-5)
37 |     trace = solver.run(nb_iter=10, verbose=False)
38 | 
39 |     _act = solver.ctl.sample(state[:, t], 0, stoch=False)
40 |     action[:, t] = np.clip(_act, -env.ulim, env.ulim)
41 |     state[:, t + 1], _, _, _ = env.step(action[:, t])
42 | 
43 |     print('Time Step:', t, 'Cost:', trace[-1])
44 | 
45 | 
46 | plt.figure()
47 | 
48 | plt.subplot(5, 1, 1)
49 | plt.plot(state[0, :], '-b')
50 | plt.subplot(5, 1, 2)
51 | plt.plot(state[1, :], '-b')
52 | 
53 | plt.subplot(5, 1, 3)
54 | plt.plot(state[2, :], '-r')
55 | plt.subplot(5, 1, 4)
56 | plt.plot(state[3, :], '-r')
57 | 
58 | plt.subplot(5, 1, 5)
59 | plt.plot(action[0, :], '-g')
60 | 
61 | plt.show()
62 | 


--------------------------------------------------------------------------------
/examples/gps/analytical/mpc/mb_cartpole_parallel.py:
--------------------------------------------------------------------------------
 1 | import autograd.numpy as np
 2 | 
 3 | import gym
 4 | from trajopt.gps import MBGPS
 5 | 
 6 | import matplotlib.pyplot as plt
 7 | 
 8 | from joblib import Parallel, delayed
 9 | 
10 | import multiprocessing
11 | nb_cores = multiprocessing.cpu_count()
12 | 
13 | 
14 | def create_job(kwargs):
15 |     import warnings
16 |     warnings.filterwarnings("ignore")
17 | 
18 |     # cartpole env
19 |     env = gym.make('Cartpole-TO-v0')
20 |     env._max_episode_steps = 10000
21 |     env.unwrapped.dt = 0.05
22 |     env.unwrapped.umax = np.array([5.])
23 |     env.unwrapped.periodic = True
24 | 
25 |     dm_state = env.observation_space.shape[0]
26 |     dm_act = env.action_space.shape[0]
27 | 
28 |     horizon, nb_steps = 20, 100
29 | 
30 |     env_sigma = env.unwrapped.sigma
31 | 
32 |     state = np.zeros((dm_state, nb_steps + 1))
33 |     action = np.zeros((dm_act, nb_steps))
34 | 
35 |     state[:, 0] = env.reset()
36 |     for t in range(nb_steps):
37 |         solver = MBGPS(env, init_state=tuple([state[:, t], env_sigma]),
38 |                        init_action_sigma=1., nb_steps=horizon,
39 |                        kl_bound=2., action_penalty=1e-5)
40 |         trace = solver.run(nb_iter=10, verbose=False)
41 | 
42 |         _act = solver.ctl.sample(state[:, t], 0, stoch=False)
43 |         action[:, t] = np.clip(_act, -env.ulim, env.ulim)
44 |         state[:, t + 1], _, _, _ = env.step(action[:, t])
45 | 
46 |         print('Time Step:', t, 'Cost:', trace[-1])
47 | 
48 |     return state[:, :-1].T, action.T
49 | 
50 | 
51 | def parallel_gps(nb_jobs=50):
52 |     kwargs_list = [{} for _ in range(nb_jobs)]
53 |     results = Parallel(n_jobs=min(nb_jobs, nb_cores),
54 |                        verbose=10, backend='loky')(map(delayed(create_job), kwargs_list))
55 |     obs, act = list(map(list, zip(*results)))
56 |     return obs, act
57 | 
58 | 
59 | obs, act = parallel_gps(nb_jobs=12)
60 | 
61 | fig, ax = plt.subplots(nrows=5, ncols=1, figsize=(12, 4))
62 | for _obs, _act in zip(obs, act):
63 |     ax[0].plot_distributions(_obs[:, 0])
64 |     ax[1].plot_distributions(_obs[:, 1])
65 |     ax[2].plot_distributions(_obs[:, 2])
66 |     ax[3].plot_distributions(_obs[:, 3])
67 |     ax[4].plot_distributions(_act)
68 | plt.show()
69 | 
70 | # import pickle
71 | # data = {'obs': obs, 'act': act}
72 | # pickle.dump(data, open("gps_cartpole_cart.pkl", "wb"))
73 | 


--------------------------------------------------------------------------------
/examples/gps/analytical/mpc/mb_double_cartpole.py:
--------------------------------------------------------------------------------
 1 | import autograd.numpy as np
 2 | 
 3 | import gym
 4 | from trajopt.gps import MBGPS
 5 | 
 6 | import matplotlib.pyplot as plt
 7 | 
 8 | import warnings
 9 | warnings.filterwarnings("ignore")
10 | 
11 | np.random.seed(1337)
12 | 
13 | # double cartpole env
14 | env = gym.make('DoubleCartpole-TO-v0')
15 | env._max_episode_steps = 10000
16 | env.unwrapped.dt = 0.05
17 | env.unwrapped.umax = np.array([5.])
18 | env.unwrapped.periodic = True
19 | 
20 | env.seed(1337)
21 | 
22 | dm_state = env.observation_space.shape[0]
23 | dm_act = env.action_space.shape[0]
24 | 
25 | horizon, nb_steps = 20, 100
26 | 
27 | env_sigma = env.unwrapped.sigma
28 | 
29 | state = np.zeros((dm_state, nb_steps + 1))
30 | action = np.zeros((dm_act, nb_steps))
31 | 
32 | state[:, 0] = env.reset()
33 | for t in range(nb_steps):
34 |     solver = MBGPS(env, init_state=tuple([state[:, t], env_sigma]),
35 |                    init_action_sigma=1., nb_steps=horizon,
36 |                    kl_bound=2., action_penalty=1e-5)
37 |     trace = solver.run(nb_iter=10, verbose=False)
38 | 
39 |     _act = solver.ctl.sample(state[:, t], 0, stoch=False)
40 |     action[:, t] = np.clip(_act, -env.ulim, env.ulim)
41 |     state[:, t + 1], _, _, _ = env.step(action[:, t])
42 | 
43 |     print('Time Step:', t, 'Cost:', trace[-1])
44 | 
45 | 
46 | plt.figure()
47 | 
48 | plt.subplot(7, 1, 1)
49 | plt.plot(state[0, :], '-b')
50 | plt.subplot(7, 1, 2)
51 | plt.plot(state[1, :], '-b')
52 | plt.subplot(7, 1, 3)
53 | plt.plot(state[2, :], '-b')
54 | 
55 | plt.subplot(7, 1, 4)
56 | plt.plot(state[3, :], '-r')
57 | plt.subplot(7, 1, 5)
58 | plt.plot(state[4, :], '-r')
59 | plt.subplot(7, 1, 6)
60 | plt.plot(state[5, :], '-r')
61 | 
62 | plt.subplot(7, 1, 7)
63 | plt.plot(action[0, :], '-g')
64 | 
65 | plt.show()
66 | 


--------------------------------------------------------------------------------
/examples/gps/analytical/mpc/mb_lqr.py:
--------------------------------------------------------------------------------
 1 | import autograd.numpy as np
 2 | 
 3 | import gym
 4 | from trajopt.gps import MBGPS
 5 | 
 6 | import matplotlib.pyplot as plt
 7 | 
 8 | import warnings
 9 | warnings.filterwarnings("ignore")
10 | 
11 | 
12 | # lqr task
13 | env = gym.make('LQR-TO-v0')
14 | env._max_episode_steps = 100000
15 | 
16 | dm_state = env.observation_space.shape[0]
17 | dm_act = env.action_space.shape[0]
18 | 
19 | horizon, nb_steps = 25, 60
20 | 
21 | env_sigma = env.unwrapped.sigma
22 | 
23 | state = np.zeros((dm_state, nb_steps + 1))
24 | action = np.zeros((dm_act, nb_steps))
25 | 
26 | state[:, 0] = env.reset()
27 | for t in range(nb_steps):
28 |     solver = MBGPS(env, init_state=tuple([state[:, t], env_sigma]),
29 |                    init_action_sigma=100., nb_steps=horizon, kl_bound=5.)
30 |     trace = solver.run(nb_iter=10, verbose=False)
31 | 
32 |     _act = solver.ctl.sample(state[:, t], 0, stoch=False)
33 |     action[:, t] = np.clip(_act, -env.ulim, env.ulim)
34 |     state[:, t + 1], _, _, _ = env.step(action[:, t])
35 | 
36 |     print('Time Step:', t, 'Cost:', trace[-1])
37 | 
38 | 
39 | plt.figure()
40 | 
41 | plt.subplot(3, 1, 1)
42 | plt.plot(state[0, :], '-b')
43 | plt.subplot(3, 1, 2)
44 | plt.plot(state[1, :], '-b')
45 | 
46 | plt.subplot(3, 1, 3)
47 | plt.plot(action[0, :], '-g')
48 | 
49 | plt.show()
50 | 


--------------------------------------------------------------------------------
/examples/gps/analytical/mpc/mb_pendulum.py:
--------------------------------------------------------------------------------
 1 | import autograd.numpy as np
 2 | 
 3 | import gym
 4 | from trajopt.gps import MBGPS
 5 | 
 6 | import warnings
 7 | warnings.filterwarnings("ignore")
 8 | 
 9 | # pendulum env
10 | env = gym.make('Pendulum-TO-v0')
11 | env._max_episode_steps = 10000
12 | env.unwrapped.dt = 0.05
13 | env.unwrapped.umax = np.array([2.5])
14 | env.unwrapped.periodic = True
15 | 
16 | dm_state = env.observation_space.shape[0]
17 | dm_act = env.action_space.shape[0]
18 | 
19 | horizon, nb_steps = 25, 100
20 | 
21 | env_sigma = env.unwrapped.sigma
22 | 
23 | state = np.zeros((dm_state, nb_steps + 1))
24 | action = np.zeros((dm_act, nb_steps))
25 | 
26 | state[:, 0] = env.reset()
27 | for t in range(nb_steps):
28 |     solver = MBGPS(env, init_state=tuple([state[:, t], env_sigma]),
29 |                    init_action_sigma=25., nb_steps=horizon,
30 |                    kl_bound=1., action_penalty=1e-5)
31 |     trace = solver.run(nb_iter=10, verbose=False)
32 | 
33 |     _act = solver.ctl.sample(state[:, t], 0, stoch=False)
34 |     action[:, t] = np.clip(_act, -env.ulim, env.ulim)
35 |     state[:, t + 1], _, _, _ = env.step(action[:, t])
36 | 
37 |     print('Time Step:', t, 'Cost:', trace[-1])
38 | 
39 | 
40 | import matplotlib.pyplot as plt
41 | 
42 | plt.figure()
43 | 
44 | plt.subplot(3, 1, 1)
45 | plt.plot(state[0, :], '-b')
46 | plt.subplot(3, 1, 2)
47 | plt.plot(state[1, :], '-b')
48 | 
49 | plt.subplot(3, 1, 3)
50 | plt.plot(action[0, :], '-g')
51 | 
52 | plt.show()
53 | 


--------------------------------------------------------------------------------
/examples/gps/analytical/mpc/mb_pendulum_parallel.py:
--------------------------------------------------------------------------------
 1 | import autograd.numpy as np
 2 | 
 3 | import gym
 4 | from trajopt.gps import MBGPS
 5 | 
 6 | import matplotlib.pyplot as plt
 7 | 
 8 | from joblib import Parallel, delayed
 9 | 
10 | import multiprocessing
11 | nb_cores = multiprocessing.cpu_count()
12 | 
13 | 
14 | def create_job(kwargs):
15 |     import warnings
16 |     warnings.filterwarnings("ignore")
17 | 
18 |     # pendulum env
19 |     env = gym.make('Pendulum-TO-v0')
20 |     env._max_episode_steps = 10000
21 |     env.unwrapped.dt = 0.02
22 |     env.unwrapped.umax = np.array([2.5])
23 |     env.unwrapped.periodic = True
24 | 
25 |     dm_state = env.observation_space.shape[0]
26 |     dm_act = env.action_space.shape[0]
27 | 
28 |     horizon, nb_steps = 15, 100
29 | 
30 |     state = np.zeros((dm_state, nb_steps + 1))
31 |     action = np.zeros((dm_act, nb_steps))
32 | 
33 |     state[:, 0] = env.reset()
34 |     for t in range(nb_steps):
35 |         init_state = tuple([state[:, t], 1e-4 * np.eye(dm_state)])
36 |         solver = MBGPS(env, init_state=init_state,
37 |                        init_action_sigma=2.5, nb_steps=horizon,
38 |                        kl_bound=1., action_penalty=1e-3)
39 |         trace = solver.run(nb_iter=5, verbose=False)
40 | 
41 |         solver.ctl.sigma = np.dstack([1e-2 * np.eye(dm_act)] * horizon)
42 |         u = solver.ctl.sample(state[:, t], 0, stoch=True)
43 |         action[:, t] = np.clip(u, -env.ulim, env.ulim)
44 |         state[:, t + 1], _, _, _ = env.step(action[:, t])
45 | 
46 |         # print('Time Step:', t, 'Cost:', trace[-1])
47 | 
48 |     return state[:, :-1].T, action.T
49 | 
50 | 
51 | def parallel_gps(nb_jobs=50):
52 |     kwargs_list = [{} for _ in range(nb_jobs)]
53 |     results = Parallel(n_jobs=min(nb_jobs, 12),
54 |                        verbose=10, backend='loky')(map(delayed(create_job), kwargs_list))
55 |     obs, act = list(map(list, zip(*results)))
56 |     return obs, act
57 | 
58 | 
59 | obs, act = parallel_gps(nb_jobs=50)
60 | 
61 | fig, ax = plt.subplots(nrows=3, ncols=1, figsize=(12, 4))
62 | for _obs, _act in zip(obs, act):
63 |     ax[0].plot(_obs[:, :-1])
64 |     ax[1].plot(_obs[:, -1])
65 |     ax[2].plot(_act)
66 | plt.show()
67 | 
68 | import pickle
69 | data = {'obs': obs, 'act': act}
70 | pickle.dump(data, open("gps_pendulum.pkl", "wb"))
71 | 


--------------------------------------------------------------------------------
/examples/gps/analytical/topt/mb_cartpole.py:
--------------------------------------------------------------------------------
 1 | import autograd.numpy as np
 2 | 
 3 | import gym
 4 | from trajopt.gps import MBGPS
 5 | 
 6 | import matplotlib.pyplot as plt
 7 | 
 8 | import warnings
 9 | warnings.filterwarnings("ignore")
10 | 
11 | np.random.seed(1337)
12 | 
13 | # cartpole env
14 | env = gym.make('Cartpole-TO-v0')
15 | env._max_episode_steps = 100
16 | env.unwrapped.dt = 0.05
17 | 
18 | env.seed(1337)
19 | 
20 | solver = MBGPS(env, nb_steps=100,
21 |                init_state=env.init(),
22 |                init_action_sigma=10.0,
23 |                kl_bound=1e0,
24 |                slew_rate=False,
25 |                action_penalty=1e-5,
26 |                activation={'mult': 1., 'shift': 80})
27 | 
28 | trace = solver.run(nb_iter=25, verbose=True)
29 | 
30 | # plot dists
31 | solver.plot()
32 | 
33 | # plot objective
34 | plt.figure()
35 | plt.plot(trace)
36 | plt.show()
37 | 


--------------------------------------------------------------------------------
/examples/gps/analytical/topt/mb_double_cartpole.py:
--------------------------------------------------------------------------------
 1 | import autograd.numpy as np
 2 | 
 3 | import gym
 4 | from trajopt.gps import MBGPS
 5 | 
 6 | import matplotlib.pyplot as plt
 7 | 
 8 | import warnings
 9 | warnings.filterwarnings("ignore")
10 | 
11 | np.random.seed(1337)
12 | 
13 | # pendulum env
14 | env = gym.make('DoubleCartpole-TO-v0')
15 | env._max_episode_steps = 10000
16 | env.unwrapped.dt = 0.05
17 | 
18 | env.seed(1337)
19 | 
20 | solver = MBGPS(env, nb_steps=100,
21 |                init_state=env.init(),
22 |                init_action_sigma=5.0,
23 |                kl_bound=1e0,
24 |                slew_rate=False,
25 |                action_penalty=1e-5,
26 |                activation={'mult': 1., 'shift': 80})
27 | 
28 | trace = solver.run(nb_iter=50, verbose=True)
29 | 
30 | # plot dists
31 | solver.plot()
32 | 
33 | # plot objective
34 | plt.figure()
35 | plt.plot(trace)
36 | plt.show()
37 | 


--------------------------------------------------------------------------------
/examples/gps/analytical/topt/mb_double_pendulum.py:
--------------------------------------------------------------------------------
 1 | import autograd.numpy as np
 2 | 
 3 | import gym
 4 | from trajopt.gps import MBGPS
 5 | 
 6 | import matplotlib.pyplot as plt
 7 | 
 8 | import warnings
 9 | warnings.filterwarnings("ignore")
10 | 
11 | np.random.seed(1337)
12 | 
13 | # pendulum env
14 | env = gym.make('DoublePendulum-TO-v0')
15 | env._max_episode_steps = 100
16 | env.unwrapped.dt = 0.05
17 | 
18 | env.seed(1337)
19 | 
20 | solver = MBGPS(env, nb_steps=100,
21 |                init_state=env.init(),
22 |                init_action_sigma=10.0,
23 |                kl_bound=1e1,
24 |                slew_rate=False,
25 |                action_penalty=1,
26 |                activation={'mult': 1., 'shift': 80})
27 | 
28 | trace = solver.run(nb_iter=50, verbose=True)
29 | 
30 | # plot dists
31 | solver.plot()
32 | 
33 | # plot objective
34 | plt.figure()
35 | plt.plot(trace)
36 | plt.show()
37 | 


--------------------------------------------------------------------------------
/examples/gps/analytical/topt/mb_lqr.py:
--------------------------------------------------------------------------------
 1 | import autograd.numpy as np
 2 | 
 3 | import gym
 4 | from trajopt.gps import MBGPS
 5 | 
 6 | import matplotlib.pyplot as plt
 7 | 
 8 | import warnings
 9 | warnings.filterwarnings("ignore")
10 | 
11 | np.random.seed(1337)
12 | 
13 | # lqr task
14 | env = gym.make('LQR-TO-v0')
15 | env._max_episode_steps = 60
16 | 
17 | env.seed(1337)
18 | 
19 | solver = MBGPS(env, nb_steps=60,
20 |                init_state=env.init(),
21 |                init_action_sigma=100.,
22 |                kl_bound=5.)
23 | 
24 | trace = solver.run(nb_iter=10, verbose=True)
25 | 
26 | # plot dists
27 | solver.plot()
28 | 
29 | # plot objective
30 | plt.figure()
31 | plt.plot(trace)
32 | plt.show()
33 | 


--------------------------------------------------------------------------------
/examples/gps/analytical/topt/mb_pendulum.py:
--------------------------------------------------------------------------------
 1 | import autograd.numpy as np
 2 | 
 3 | import gym
 4 | from trajopt.gps import MBGPS
 5 | 
 6 | import matplotlib.pyplot as plt
 7 | 
 8 | import warnings
 9 | warnings.filterwarnings("ignore")
10 | 
11 | np.random.seed(1337)
12 | 
13 | # pendulum env
14 | env = gym.make('Pendulum-TO-v0')
15 | env._max_episode_steps = 100
16 | env.unwrapped.dt = 0.05
17 | 
18 | env.seed(1337)
19 | 
20 | solver = MBGPS(env, nb_steps=100,
21 |                init_state=env.init(),
22 |                init_action_sigma=5.0,
23 |                kl_bound=1e1,
24 |                slew_rate=False,
25 |                action_penalty=1,
26 |                activation={'mult': 1., 'shift': 80})
27 | 
28 | trace = solver.run(nb_iter=25, verbose=True)
29 | 
30 | # plot dists
31 | solver.plot()
32 | 
33 | # plot objective
34 | plt.figure()
35 | plt.plot(trace)
36 | plt.show()
37 | 


--------------------------------------------------------------------------------
/examples/gps/analytical/topt/mb_pendulum_parallel.py:
--------------------------------------------------------------------------------
 1 | import autograd.numpy as np
 2 | 
 3 | import gym
 4 | from trajopt.gps import MBGPS
 5 | 
 6 | import matplotlib.pyplot as plt
 7 | 
 8 | from joblib import Parallel, delayed
 9 | 
10 | import multiprocessing
11 | nb_cores = multiprocessing.cpu_count()
12 | 
13 | 
14 | def create_job(kwargs):
15 |     import warnings
16 |     warnings.filterwarnings("ignore")
17 | 
18 |     # pendulum env
19 |     env = gym.make('Pendulum-TO-v0')
20 |     env._max_episode_steps = 10000
21 |     env.unwrapped.dt = 0.02
22 |     env.unwrapped.umax = np.array([2.5])
23 |     env.unwrapped.periodic = False
24 | 
25 |     dm_state = env.observation_space.shape[0]
26 |     dm_act = env.action_space.shape[0]
27 | 
28 |     state = env.reset()
29 |     init_state = tuple([state, 1e-4 * np.eye(dm_state)])
30 |     solver = MBGPS(env, init_state=init_state,
31 |                    init_action_sigma=25., nb_steps=300,
32 |                    kl_bound=.1, action_penalty=1e-3,
33 |                    activation={'shift': 250, 'mult': 0.5})
34 | 
35 |     solver.run(nb_iter=100, verbose=False)
36 | 
37 |     solver.ctl.sigma = np.dstack([1e-1 * np.eye(dm_act)] * 300)
38 |     data = solver.rollout(nb_episodes=1, stoch=True, init=state)
39 | 
40 |     obs, act = np.squeeze(data['x'], axis=-1).T, np.squeeze(data['u'], axis=-1).T
41 |     return obs, act
42 | 
43 | 
44 | def parallel_gps(nb_jobs=50):
45 |     kwargs_list = [{} for _ in range(nb_jobs)]
46 |     results = Parallel(n_jobs=min(nb_jobs, 20),
47 |                        verbose=10, backend='loky')(map(delayed(create_job), kwargs_list))
48 |     obs, act = list(map(list, zip(*results)))
49 |     return obs, act
50 | 
51 | 
52 | obs, act = parallel_gps(nb_jobs=50)
53 | 
54 | plt.figure()
55 | fig, ax = plt.subplots(nrows=3, ncols=1, figsize=(12, 4))
56 | for _obs, _act in zip(obs, act):
57 |     ax[0].plot(_obs[:, :-1])
58 |     ax[1].plot(_obs[:, -1])
59 |     ax[2].plot(_act)
60 | plt.show()
61 | 
62 | import pickle
63 | data = {'obs': obs, 'act': act}
64 | pickle.dump(data, open("gps_pendulum_other.pkl", "wb"))
65 | 


--------------------------------------------------------------------------------
/examples/gps/analytical/topt/mb_quad_pendulum.py:
--------------------------------------------------------------------------------
 1 | import autograd.numpy as np
 2 | 
 3 | import gym
 4 | from trajopt.gps import MBGPS
 5 | 
 6 | import warnings
 7 | warnings.filterwarnings("ignore")
 8 | 
 9 | np.random.seed(1337)
10 | 
11 | # pendulum env
12 | env = gym.make('QuadPendulum-TO-v0')
13 | env._max_episode_steps = 100
14 | env.unwrapped.dt = 0.05
15 | 
16 | env.seed(1337)
17 | 
18 | solver = MBGPS(env, nb_steps=100,
19 |                init_state=env.init(),
20 |                init_action_sigma=5.0,
21 |                kl_bound=10.,
22 |                slew_rate=False,
23 |                action_penalty=1e-5)
24 | 
25 | trace = solver.run(nb_iter=50, verbose=True)
26 | 
27 | # plot dists
28 | solver.plot()
29 | 
30 | # plot objective
31 | import matplotlib.pyplot as plt
32 | 
33 | plt.figure()
34 | plt.plot(trace)
35 | plt.show()
36 | 


--------------------------------------------------------------------------------
/examples/gps/analytical/topt/mbgps_riccati.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import gym
 4 | 
 5 | from trajopt.gps import MBGPS
 6 | from trajopt.riccati import Riccati
 7 | 
 8 | import matplotlib.pyplot as plt
 9 | 
10 | import warnings
11 | warnings.filterwarnings("ignore")
12 | 
13 | # lqr task
14 | env = gym.make('LQR-TO-v0')
15 | env.env._max_episode_steps = 100
16 | 
17 | dm_state = env.unwrapped.dm_state
18 | dm_act = env.unwrapped.dm_act
19 | 
20 | mbgps = MBGPS(env, nb_steps=100,
21 |               init_state=env.init(),
22 |               init_action_sigma=100.,
23 |               kl_bound=5.)
24 | 
25 | mbgps.run(nb_iter=15, verbose=True)
26 | 
27 | riccati = Riccati(env, nb_steps=100,
28 |                   init_state=env.init())
29 | 
30 | riccati.run()
31 | 
32 | np.random.seed(1337)
33 | env.seed(1337)
34 | gps_data = mbgps.rollout(250, stoch=False)
35 | 
36 | np.random.seed(1337)
37 | env.seed(1337)
38 | riccati_data = riccati.rollout(250)
39 | 
40 | print('GPS Cost: ', np.mean(np.sum(gps_data['c'], axis=0)),
41 |       ', Riccati Cost', np.mean(np.sum(riccati_data['c'], axis=0)))
42 | 
43 | plt.figure(figsize=(6, 12))
44 | plt.suptitle("LQR Mean Traj.: Riccati vs GPS")
45 | 
46 | for i in range(dm_state):
47 |     plt.subplot(dm_state + dm_act, 1, i + 1)
48 |     plt.plot(riccati.xref[i, ...], color='k')
49 |     plt.plot(mbgps.xdist.mu[i, ...], color='r', linestyle='None', marker='o', markersize=3)
50 | 
51 | for i in range(dm_act):
52 |     plt.subplot(dm_state + dm_act, 1, dm_state + i + 1)
53 |     plt.plot(riccati.uref[i, ...], color='k')
54 |     plt.plot(mbgps.udist.mu[i, ...], color='r', linestyle='None', marker='o', markersize=3)
55 | 
56 | plt.show()
57 | 
58 | plt.figure(figsize=(6, 12))
59 | plt.suptitle("LQR Controller: Riccati vs GPS")
60 | 
61 | for i in range(dm_state):
62 |     plt.subplot(dm_state + dm_act, 1, i + 1)
63 |     plt.plot(mbgps.ctl.K[0, i, ...], color='r', linestyle='None', marker='o', markersize=3)
64 |     plt.plot(riccati.ctl.K[0, i, ...], color='k')
65 | 
66 | for i in range(dm_act):
67 |     plt.subplot(dm_state + dm_act, 1, dm_state + i + 1)
68 |     plt.plot(mbgps.ctl.kff[i, ...], color='r', linestyle='None', marker='o', markersize=3)
69 |     plt.plot(riccati.ctl.kff[i, ...], color='k')
70 | 
71 | plt.show()
72 | 
73 | plt.figure(figsize=(12, 12))
74 | plt.suptitle("LQR Sample Traj.: Riccati vs GPS")
75 | 
76 | for i, j in zip(range(dm_state), range(1, 2 * dm_state, 2)):
77 |     plt.subplot(dm_state + dm_act, 2, j)
78 |     plt.plot(riccati_data['x'][i, ...])
79 | 
80 |     plt.subplot(dm_state + dm_act, 2, j + 1)
81 |     plt.plot(gps_data['x'][i, ...])
82 | 
83 | for i, j in zip(range(dm_act), range(2 * dm_state + 1, 2 * dm_state + 2 * dm_act, 2)):
84 |     plt.subplot(dm_state + dm_act, 2, j)
85 |     plt.plot(riccati_data['u'][i, ...])
86 | 
87 |     plt.subplot(dm_state + dm_act, 2, j + 1)
88 |     plt.plot(gps_data['u'][i, ...])
89 | 
90 | plt.show()
91 | 


--------------------------------------------------------------------------------
/examples/gps/learned/mf_cartpole.py:
--------------------------------------------------------------------------------
 1 | import autograd.numpy as np
 2 | 
 3 | import gym
 4 | from trajopt.gps import MFGPS
 5 | 
 6 | import matplotlib.pyplot as plt
 7 | 
 8 | import warnings
 9 | warnings.filterwarnings("ignore")
10 | 
11 | np.random.seed(1337)
12 | 
13 | # cartpole task
14 | env = gym.make('Cartpole-TO-v0')
15 | env._max_episode_steps = 100
16 | env.unwrapped.dt = 0.05
17 | 
18 | env.seed(1337)
19 | 
20 | prior = {'K': 1e-6, 'psi': 1e6, 'nu': 0.1}
21 | # prior = {'K': 1e-6}
22 | 
23 | solver = MFGPS(env, nb_steps=100,
24 |                init_state=env.init(),
25 |                init_action_sigma=0.1,
26 |                kl_bound=1e0,
27 |                action_penalty=1e-3,
28 |                prior=prior,
29 |                activation={'mult': 1., 'shift': 80})
30 | 
31 | # run gps
32 | trace = solver.run(nb_learning_episodes=25,
33 |                    nb_iter=50, verbose=True)
34 | 
35 | # execute and plot
36 | data = solver.rollout(25, stoch=True)
37 | 
38 | plt.figure()
39 | for k in range(solver.dm_state):
40 |     plt.subplot(solver.dm_state + solver.dm_act, 1, k + 1)
41 |     plt.plot(data['x'][k, ...])
42 | 
43 | for k in range(solver.dm_act):
44 |     plt.subplot(solver.dm_state + solver.dm_act, 1, solver.dm_state + k + 1)
45 |     plt.plot(np.clip(data['u'], - env.ulim[:, None, None], env.ulim[:, None, None])[k, ...])
46 | 
47 | plt.show()
48 | 
49 | # plot objective
50 | plt.figure()
51 | plt.plot(trace)
52 | plt.show()
53 | 


--------------------------------------------------------------------------------
/examples/gps/learned/mf_double_pendulum.py:
--------------------------------------------------------------------------------
 1 | import autograd.numpy as np
 2 | 
 3 | import gym
 4 | from trajopt.gps import MFGPS
 5 | 
 6 | import matplotlib.pyplot as plt
 7 | 
 8 | import warnings
 9 | warnings.filterwarnings("ignore")
10 | 
11 | np.random.seed(1337)
12 | 
13 | # pendulum task
14 | env = gym.make('DoublePendulum-TO-v0')
15 | env._max_episode_steps = 100
16 | env.unwrapped.dt = 0.05
17 | 
18 | env.seed(1337)
19 | 
20 | # prior = {'K': 1e-3, 'psi': 1e-8, 'nu': 0.1}
21 | prior = {'K': 1e-6}
22 | 
23 | solver = MFGPS(env, nb_steps=100,
24 |                init_state=env.init(),
25 |                init_action_sigma=10.,
26 |                kl_bound=1e1,
27 |                slew_rate=False,
28 |                action_penalty=1,
29 |                prior=prior,
30 |                activation={'mult': 1., 'shift': 80})
31 | 
32 | # run gps
33 | trace = solver.run(nb_learning_episodes=25,
34 |                    nb_evaluation_episodes=25,
35 |                    nb_iter=50, verbose=True)
36 | 
37 | # execute and plot
38 | data = solver.rollout(25, stoch=True)
39 | 
40 | plt.figure()
41 | for k in range(solver.dm_state):
42 |     plt.subplot(solver.dm_state + solver.dm_act, 1, k + 1)
43 |     plt.plot(data['x'][k, ...])
44 | 
45 | for k in range(solver.dm_act):
46 |     plt.subplot(solver.dm_state + solver.dm_act, 1, solver.dm_state + k + 1)
47 |     plt.plot(np.clip(data['u'], - env.ulim[:, None, None], env.ulim[:, None, None])[k, ...])
48 | 
49 | plt.show()
50 | 
51 | # plot objective
52 | plt.figure()
53 | plt.plot(trace)
54 | plt.show()
55 | 


--------------------------------------------------------------------------------
/examples/gps/learned/mf_lqr.py:
--------------------------------------------------------------------------------
 1 | import autograd.numpy as np
 2 | 
 3 | import gym
 4 | from trajopt.gps import MFGPS
 5 | 
 6 | import matplotlib.pyplot as plt
 7 | 
 8 | import warnings
 9 | warnings.filterwarnings("ignore")
10 | 
11 | np.random.seed(1337)
12 | 
13 | # lqr task
14 | env = gym.make('LQR-TO-v1')
15 | env._max_episode_steps = 100000
16 | 
17 | env.seed(1337)
18 | 
19 | prior = {'K': 1e-6, 'psi': 1e6, 'nu': 0.1}
20 | 
21 | alg = MFGPS(env, nb_steps=60,
22 |             init_state=env.init(),
23 |             init_action_sigma=100.,
24 |             kl_bound=1e0,
25 |             prior=prior)
26 | 
27 | # run gps
28 | trace = alg.run(nb_learning_episodes=25,
29 |                 nb_evaluation_episodes=25,
30 |                 nb_iter=25, verbose=True)
31 | 
32 | # plot dists
33 | alg.plot_distributions()
34 | 
35 | # execute and plot
36 | data = alg.rollout(25, stoch=True)
37 | 
38 | plt.figure()
39 | for k in range(alg.dm_state):
40 |     plt.subplot(alg.dm_state + alg.dm_act, 1, k + 1)
41 |     plt.plot(data['x'][k, ...])
42 | 
43 | for k in range(alg.dm_act):
44 |     plt.subplot(alg.dm_state + alg.dm_act, 1, alg.dm_state + k + 1)
45 |     plt.plot(data['u'][k, ...])
46 | 
47 | plt.show()
48 | 
49 | # plot objective
50 | plt.figure()
51 | plt.plot(trace)
52 | plt.show()
53 | 


--------------------------------------------------------------------------------
/examples/gps/learned/mf_pendulum.py:
--------------------------------------------------------------------------------
 1 | import autograd.numpy as np
 2 | 
 3 | import gym
 4 | from trajopt.gps import MFGPS
 5 | 
 6 | import matplotlib.pyplot as plt
 7 | 
 8 | import warnings
 9 | warnings.filterwarnings("ignore")
10 | 
11 | np.random.seed(1337)
12 | 
13 | # pendulum task
14 | env = gym.make('Pendulum-TO-v0')
15 | env._max_episode_steps = 100
16 | env.unwrapped.dt = 0.05
17 | 
18 | env.seed(1337)
19 | 
20 | # prior = {'K': 1e-3, 'psi': 1e-8, 'nu': 0.1}
21 | prior = {'K': 1e-6}
22 | 
23 | solver = MFGPS(env, nb_steps=100,
24 |                init_state=env.init(),
25 |                init_action_sigma=5.,
26 |                kl_bound=1e1,
27 |                slew_rate=False,
28 |                action_penalty=1,
29 |                prior=prior,
30 |                activation={'mult': 1., 'shift': 80})
31 | 
32 | # run gps
33 | trace = solver.run(nb_learning_episodes=25,
34 |                    nb_evaluation_episodes=25,
35 |                    nb_iter=25, verbose=True)
36 | 
37 | # execute and plot
38 | data = solver.rollout(25, stoch=True)
39 | 
40 | plt.figure()
41 | for k in range(solver.dm_state):
42 |     plt.subplot(solver.dm_state + solver.dm_act, 1, k + 1)
43 |     plt.plot(data['x'][k, ...])
44 | 
45 | for k in range(solver.dm_act):
46 |     plt.subplot(solver.dm_state + solver.dm_act, 1, solver.dm_state + k + 1)
47 |     plt.plot(np.clip(data['u'], - env.ulim[:, None, None], env.ulim[:, None, None])[k, ...])
48 | 
49 | plt.show()
50 | 
51 | # plot objective
52 | plt.figure()
53 | plt.plot(trace)
54 | plt.show()
55 | 


--------------------------------------------------------------------------------
/examples/ilqr/mpc/cartpole.py:
--------------------------------------------------------------------------------
 1 | import autograd.numpy as np
 2 | 
 3 | import gym
 4 | from trajopt.ilqr import iLQR
 5 | 
 6 | import warnings
 7 | warnings.filterwarnings("ignore")
 8 | 
 9 | 
10 | # pendulum env
11 | env = gym.make('Cartpole-TO-v1')
12 | env._max_episode_steps = 100000
13 | env.unwrapped.dt = 0.02
14 | 
15 | dm_state = env.observation_space.shape[0]
16 | dm_act = env.action_space.shape[0]
17 | 
18 | horizon, nb_steps = 50, 250
19 | 
20 | state = np.zeros((dm_state, nb_steps + 1))
21 | action = np.zeros((dm_act, nb_steps))
22 | 
23 | state[:, 0] = env.reset()
24 | for t in range(nb_steps):
25 |     solver = iLQR(env, init_state=state[:, t],
26 |                   nb_steps=horizon, action_penalty=np.array([1e-5]))
27 |     trace = solver.run(nb_iter=10, verbose=False)
28 | 
29 |     action[:, t] = solver.uref[:, 0]
30 |     state[:, t + 1], _, _, _ = env.step(action[:, t])
31 | 
32 |     print('Time Step:', t, 'Cost:', trace[-1])
33 | 
34 | 
35 | import matplotlib.pyplot as plt
36 | 
37 | plt.figure()
38 | 
39 | plt.subplot(5, 1, 1)
40 | plt.plot(state[0, :], '-b')
41 | plt.subplot(5, 1, 2)
42 | plt.plot(state[1, :], '-b')
43 | 
44 | plt.subplot(5, 1, 3)
45 | plt.plot(state[2, :], '-r')
46 | plt.subplot(5, 1, 4)
47 | plt.plot(state[3, :], '-r')
48 | 
49 | plt.subplot(5, 1, 5)
50 | plt.plot(action[0, :], '-g')
51 | 
52 | plt.show()
53 | 


--------------------------------------------------------------------------------
/examples/ilqr/mpc/doublecartpole.py:
--------------------------------------------------------------------------------
 1 | import autograd.numpy as np
 2 | 
 3 | import gym
 4 | from trajopt.ilqr import iLQR
 5 | 
 6 | import warnings
 7 | warnings.filterwarnings("ignore")
 8 | 
 9 | 
10 | # double cartpole env
11 | env = gym.make('DoubleCartpole-TO-v0')
12 | env._max_episode_steps = 100000
13 | env.unwrapped.dt = 0.01
14 | 
15 | dm_state = env.observation_space.shape[0]
16 | dm_act = env.action_space.shape[0]
17 | 
18 | horizon, nb_steps = 100, 500
19 | 
20 | state = np.zeros((dm_state, nb_steps + 1))
21 | action = np.zeros((dm_act, nb_steps))
22 | 
23 | state[:, 0] = env.reset()
24 | for t in range(nb_steps):
25 |     solver = iLQR(env, init_state=state[:, t],
26 |                   nb_steps=horizon, action_penalty=np.array([1e-5]))
27 |     trace = solver.run(nb_iter=10, verbose=False)
28 | 
29 |     action[:, t] = solver.uref[:, 0]
30 |     state[:, t + 1], _, _, _ = env.step(action[:, t])
31 | 
32 |     print('Time Step:', t, 'Cost:', trace[-1])
33 | 
34 | 
35 | import matplotlib.pyplot as plt
36 | 
37 | plt.figure()
38 | 
39 | plt.subplot(7, 1, 1)
40 | plt.plot(state[0, :], '-b')
41 | plt.subplot(7, 1, 2)
42 | plt.plot(state[1, :], '-b')
43 | plt.subplot(7, 1, 3)
44 | plt.plot(state[2, :], '-b')
45 | 
46 | plt.subplot(7, 1, 4)
47 | plt.plot(state[3, :], '-r')
48 | plt.subplot(7, 1, 5)
49 | plt.plot(state[4, :], '-r')
50 | plt.subplot(7, 1, 6)
51 | plt.plot(state[5, :], '-r')
52 | 
53 | plt.subplot(7, 1, 7)
54 | plt.plot(action[0, :], '-g')
55 | 
56 | plt.show()
57 | 


--------------------------------------------------------------------------------
/examples/ilqr/mpc/lqr.py:
--------------------------------------------------------------------------------
 1 | import autograd.numpy as np
 2 | 
 3 | import gym
 4 | from trajopt.ilqr import iLQR
 5 | 
 6 | import warnings
 7 | warnings.filterwarnings("ignore")
 8 | 
 9 | 
10 | # lqr task
11 | env = gym.make('LQR-TO-v0')
12 | env._max_episode_steps = 100000
13 | 
14 | dm_state = env.observation_space.shape[0]
15 | dm_act = env.action_space.shape[0]
16 | 
17 | horizon, nb_steps = 25, 100
18 | state = np.zeros((dm_state, nb_steps + 1))
19 | action = np.zeros((dm_act, nb_steps))
20 | 
21 | state[:, 0] = env.reset()
22 | for t in range(nb_steps):
23 |     solver = iLQR(env, init_state=state[:, t],
24 |                   nb_steps=horizon)
25 |     trace = solver.run(nb_iter=5, verbose=False)
26 | 
27 |     action[:, t] = solver.uref[:, 0]
28 |     state[:, t + 1], _, _, _ = env.step(action[:, t])
29 | 
30 |     print('Time Step:', t, 'Cost:', trace[-1])
31 | 
32 | 
33 | import matplotlib.pyplot as plt
34 | 
35 | plt.figure()
36 | 
37 | plt.subplot(3, 1, 1)
38 | plt.plot(state[0, :], '-b')
39 | plt.subplot(3, 1, 2)
40 | plt.plot(state[1, :], '-b')
41 | 
42 | plt.subplot(3, 1, 3)
43 | plt.plot(action[0, :], '-g')
44 | 
45 | plt.show()
46 | 


--------------------------------------------------------------------------------
/examples/ilqr/mpc/pendulum.py:
--------------------------------------------------------------------------------
 1 | import autograd.numpy as np
 2 | 
 3 | import gym
 4 | from trajopt.ilqr import iLQR
 5 | 
 6 | import warnings
 7 | warnings.filterwarnings("ignore")
 8 | 
 9 | 
10 | # pendulum env
11 | env = gym.make('Pendulum-TO-v0')
12 | env._max_episode_steps = 100000
13 | env.unwrapped.dt = 0.05
14 | 
15 | dm_state = env.observation_space.shape[0]
16 | dm_act = env.action_space.shape[0]
17 | 
18 | horizon, nb_steps = 25, 100
19 | state = np.zeros((dm_state, nb_steps + 1))
20 | action = np.zeros((dm_act, nb_steps))
21 | 
22 | state[:, 0] = env.reset()
23 | for t in range(nb_steps):
24 |     solver = iLQR(env, init_state=state[:, t],
25 |                   nb_steps=horizon, action_penalty=np.array([1e-5]))
26 |     trace = solver.run(nb_iter=10, verbose=False)
27 | 
28 |     action[:, t] = solver.uref[:, 0]
29 |     state[:, t + 1], _, _, _ = env.step(action[:, t])
30 | 
31 |     print('Time Step:', t, 'Cost:', trace[-1])
32 | 
33 | import matplotlib.pyplot as plt
34 | 
35 | plt.figure()
36 | 
37 | plt.subplot(3, 1, 1)
38 | plt.plot(state[0, :], '-b')
39 | plt.subplot(3, 1, 2)
40 | plt.plot(state[1, :], '-b')
41 | 
42 | plt.subplot(3, 1, 3)
43 | plt.plot(action[0, :], '-g')
44 | 
45 | plt.show()
46 | 


--------------------------------------------------------------------------------
/examples/ilqr/mpc/pendulum_parallel.py:
--------------------------------------------------------------------------------
 1 | import autograd.numpy as np
 2 | 
 3 | import gym
 4 | from trajopt.ilqr import iLQR
 5 | 
 6 | from joblib import Parallel, delayed
 7 | 
 8 | import multiprocessing
 9 | nb_cores = multiprocessing.cpu_count()
10 | 
11 | 
12 | def create_job(kwargs):
13 |     import warnings
14 |     warnings.filterwarnings("ignore")
15 | 
16 |     # pendulum env
17 |     env = gym.make('Pendulum-TO-v1')
18 |     env._max_episode_steps = 100000
19 |     env.unwrapped.dt = 0.05
20 | 
21 |     dm_state = env.observation_space.shape[0]
22 |     dm_act = env.action_space.shape[0]
23 | 
24 |     horizon, nb_steps = 25, 100
25 |     state = np.zeros((dm_state, nb_steps + 1))
26 |     action = np.zeros((dm_act, nb_steps))
27 | 
28 |     state[:, 0] = env.reset()
29 |     for t in range(nb_steps):
30 |         solver = iLQR(env, init_state=state[:, t],
31 |                       nb_steps=horizon, action_penalty=np.array([1e-5]))
32 |         solver.run(nb_iter=10, verbose=False)
33 | 
34 |         action[:, t] = solver.uref[:, 0]
35 |         state[:, t + 1], _, _, _ = env.step(action[:, t])
36 | 
37 |     return state[:, :-1].T, action.T
38 | 
39 | 
40 | def parallel_ilqr(nb_jobs=50):
41 |     kwargs_list = [{} for _ in range(nb_jobs)]
42 |     results = Parallel(n_jobs=min(nb_jobs, nb_cores),
43 |                        verbose=10, backend='loky')(map(delayed(create_job), kwargs_list))
44 |     obs, act = list(map(list, zip(*results)))
45 |     return obs, act
46 | 
47 | 
48 | obs, act = parallel_ilqr(nb_jobs=50)
49 | 
50 | import matplotlib.pyplot as plt
51 | 
52 | fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(12, 4))
53 | for _obs, _act in zip(obs, act):
54 |     ax[0].plot(_obs[:, 0])
55 |     ax[1].plot(_obs[:, 1])
56 |     ax[2].plot(_act[:, 0])
57 | plt.show()
58 | 
59 | import pickle
60 | data = {'obs': obs, 'act': act}
61 | pickle.dump(data, open("ilqr_pendulum_polar.pkl", "wb"))
62 | 


--------------------------------------------------------------------------------
/examples/rgps/mb_lqr.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | import gym
  4 | 
  5 | from trajopt.rgps import LRGPS
  6 | 
  7 | from matplotlib import rc
  8 | import matplotlib.pyplot as plt
  9 | 
 10 | import tikzplotlib
 11 | 
 12 | import warnings
 13 | warnings.filterwarnings("ignore")
 14 | 
 15 | rc('lines', **{'linewidth': 1})
 16 | rc('text', usetex=True)
 17 | 
 18 | 
 19 | def beautify(ax):
 20 |     ax.set_frame_on(True)
 21 |     ax.minorticks_on()
 22 | 
 23 |     ax.grid(True)
 24 |     ax.grid(linestyle='--')
 25 | 
 26 |     ax.tick_params(which='both', direction='in',
 27 |                    bottom=True, labelbottom=True,
 28 |                    top=True, labeltop=False,
 29 |                    right=True, labelright=False,
 30 |                    left=True, labelleft=True)
 31 | 
 32 |     ax.tick_params(which='major', length=6)
 33 |     ax.tick_params(which='minor', length=3)
 34 | 
 35 |     # ax.autoscale(tight=True)
 36 |     # ax.set_aspect('equal')
 37 | 
 38 |     if ax.get_legend():
 39 |         ax.legend(loc='best')
 40 | 
 41 |     return ax
 42 | 
 43 | 
 44 | # lqr task
 45 | env = gym.make('LQR-TO-v1')
 46 | env.env._max_episode_steps = 75
 47 | env.env.sigma0 = 1e-2 * np.eye(2)
 48 | env.env.sigma = 1e-4 * np.eye(2)
 49 | 
 50 | np.random.seed(1337)
 51 | env.seed(1337)
 52 | 
 53 | rgps = LRGPS(env, nb_steps=75,
 54 |              policy_kl_bound=0.25,
 55 |              param_nominal_kl_bound=75e1,
 56 |              param_regularizer_kl_bound=10,
 57 |              init_state=env.init(),
 58 |              init_action_sigma=100.)
 59 | rgps.run(nb_iter=100, verbose=True)
 60 | 
 61 | np.random.seed(1337)
 62 | env.seed(1337)
 63 | 
 64 | gps = LRGPS(env, nb_steps=75,
 65 |             policy_kl_bound=0.25,
 66 |             param_nominal_kl_bound=75e1,
 67 |             param_regularizer_kl_bound=10,
 68 |             init_state=env.init(),
 69 |             init_action_sigma=100.)
 70 | gps.run(nb_iter=100, verbose=True,
 71 |         optimize_adversary=False)
 72 | 
 73 | # compute attack on final standard controller
 74 | gps.param, gps.eta = gps.reguarlized_parameter_optimization(gps.ctl)
 75 | print("Disturbance KL:", gps.parameter_nominal_kldiv(gps.param).sum())
 76 | 
 77 | fig = plt.figure(figsize=(6, 12))
 78 | plt.suptitle("Standard vs Robust Ctl: Feedback Controller")
 79 | for i in range(rgps.dm_state):
 80 |     plt.subplot(rgps.dm_state + rgps.dm_act, 1, i + 1)
 81 |     plt.plot(rgps.ctl.K[0, i, ...], color='r', marker='x', markersize=2)
 82 |     plt.plot(gps.ctl.K[0, i, ...], color='b', marker='o', markersize=2)
 83 | 
 84 | for i in range(rgps.dm_act):
 85 |     plt.subplot(rgps.dm_state + rgps.dm_act, 1, rgps.dm_state + i + 1)
 86 |     plt.plot(rgps.ctl.kff[i, ...], color='r', marker='x', markersize=2)
 87 |     plt.plot(gps.ctl.kff[i, ...], color='b', marker='o', markersize=2)
 88 | 
 89 | axs = fig.get_axes()
 90 | axs = [beautify(ax) for ax in axs]
 91 | plt.show()
 92 | 
 93 | tikzplotlib.save("linear_feedback_gains.tex")
 94 | 
 95 | std_xdist, std_udist, _ = gps.cubature_forward_pass(gps.ctl, gps.nominal)
 96 | robust_xdist, robust_udist, _ = rgps.cubature_forward_pass(rgps.ctl, rgps.nominal)
 97 | 
 98 | cost_nom_env_std_ctl = gps.cost.evaluate(std_xdist, std_udist)
 99 | cost_nom_env_rbst_ctl = rgps.cost.evaluate(robust_xdist, robust_udist)
100 | 
101 | print("Expected Cost of Standard and Robust Control on Nominal Env")
102 | print("Std. Ctl.: ", cost_nom_env_std_ctl, "Rbst. Ctl.", cost_nom_env_rbst_ctl)
103 | 
104 | std_worst_xdist, std_worst_udist, _ = gps.cubature_forward_pass(gps.ctl, gps.param)
105 | robust_worst_xdist, robust_worst_udist, _ = rgps.cubature_forward_pass(rgps.ctl, gps.param)
106 | 
107 | cost_adv_env_std_ctl = gps.cost.evaluate(std_worst_xdist, std_worst_udist)
108 | cost_adv_env_rbst_ctl = rgps.cost.evaluate(robust_worst_xdist, robust_worst_udist)
109 | 
110 | print("Expected Cost of Standard and Robust Control on Adverserial Env")
111 | print("Std. Ctl.: ", cost_adv_env_std_ctl, "Rbst. Ctl.", cost_adv_env_rbst_ctl)
112 | 
113 | fig = plt.figure()
114 | plt.suptitle('Standard and Robust Control Without Adversary')
115 | for k in range(rgps.dm_state):
116 |     plt.subplot(rgps.dm_state + rgps.dm_act, 1, k + 1)
117 | 
118 |     t = np.linspace(0, rgps.nb_steps, rgps.nb_steps + 1)
119 | 
120 |     plt.plot(t, std_xdist.mu[k, :], '-b')
121 |     lb = std_xdist.mu[k, :] - 2. * np.sqrt(std_xdist.sigma[k, k, :])
122 |     ub = std_xdist.mu[k, :] + 2. * np.sqrt(std_xdist.sigma[k, k, :])
123 |     plt.fill_between(t, lb, ub, color='blue', alpha=0.1)
124 | 
125 | for k in range(rgps.dm_act):
126 |     plt.subplot(rgps.dm_state + rgps.dm_act, 1, rgps.dm_state + k + 1)
127 | 
128 |     t = np.linspace(0, rgps.nb_steps - 1, rgps.nb_steps)
129 | 
130 |     plt.plot(t, std_udist.mu[k, :], '-b')
131 |     lb = std_udist.mu[k, :] - 2. * np.sqrt(std_udist.sigma[k, k, :])
132 |     ub = std_udist.mu[k, :] + 2. * np.sqrt(std_udist.sigma[k, k, :])
133 |     plt.fill_between(t, lb, ub, color='blue', alpha=0.1)
134 | 
135 | for k in range(gps.dm_state):
136 |     plt.subplot(gps.dm_state + gps.dm_act, 1, k + 1)
137 | 
138 |     t = np.linspace(0, rgps.nb_steps, rgps.nb_steps + 1)
139 | 
140 |     plt.plot(t, robust_xdist.mu[k, :], '-r')
141 |     lb = robust_xdist.mu[k, :] - 2. * np.sqrt(robust_xdist.sigma[k, k, :])
142 |     ub = robust_xdist.mu[k, :] + 2. * np.sqrt(robust_xdist.sigma[k, k, :])
143 |     plt.fill_between(t, lb, ub, color='red', alpha=0.1)
144 | 
145 | for k in range(rgps.dm_act):
146 |     plt.subplot(gps.dm_state + gps.dm_act, 1, gps.dm_state + k + 1)
147 | 
148 |     t = np.linspace(0, rgps.nb_steps - 1, rgps.nb_steps)
149 | 
150 |     plt.plot(t, robust_udist.mu[k, :], '-r')
151 |     lb = robust_udist.mu[k, :] - 2. * np.sqrt(robust_udist.sigma[k, k, :])
152 |     ub = robust_udist.mu[k, :] + 2. * np.sqrt(robust_udist.sigma[k, k, :])
153 |     plt.fill_between(t, lb, ub, color='red', alpha=0.1)
154 | 
155 | axs = fig.get_axes()
156 | axs = [beautify(ax) for ax in axs]
157 | plt.show()
158 | 
159 | # tikzplotlib.save("linear_trajectories_nominal.tex")
160 | 
161 | fig = plt.figure()
162 | plt.suptitle('Standard and Robust Control With Adversary')
163 | for k in range(gps.dm_state):
164 |     plt.subplot(gps.dm_state + gps.dm_act, 1, k + 1)
165 | 
166 |     t = np.linspace(0, rgps.nb_steps, rgps.nb_steps + 1)
167 | 
168 |     plt.plot(t, std_worst_xdist.mu[k, :], '-g')
169 |     lb = std_worst_xdist.mu[k, :] - 2. * np.sqrt(std_worst_xdist.sigma[k, k, :])
170 |     ub = std_worst_xdist.mu[k, :] + 2. * np.sqrt(std_worst_xdist.sigma[k, k, :])
171 |     plt.fill_between(t, lb, ub, color='green', alpha=0.1)
172 | 
173 | for k in range(rgps.dm_act):
174 |     plt.subplot(gps.dm_state + gps.dm_act, 1, gps.dm_state + k + 1)
175 | 
176 |     t = np.linspace(0, rgps.nb_steps - 1, rgps.nb_steps)
177 | 
178 |     plt.plot(t, std_worst_udist.mu[k, :], '-g')
179 |     lb = std_worst_udist.mu[k, :] - 2. * np.sqrt(std_worst_udist.sigma[k, k, :])
180 |     ub = std_worst_udist.mu[k, :] + 2. * np.sqrt(std_worst_udist.sigma[k, k, :])
181 |     plt.fill_between(t, lb, ub, color='green', alpha=0.1)
182 | 
183 | for k in range(gps.dm_state):
184 |     plt.subplot(gps.dm_state + gps.dm_act, 1, k + 1)
185 | 
186 |     t = np.linspace(0, rgps.nb_steps, rgps.nb_steps + 1)
187 | 
188 |     plt.plot(t, robust_worst_xdist.mu[k, :], '-m')
189 |     lb = robust_worst_xdist.mu[k, :] - 2. * np.sqrt(robust_worst_xdist.sigma[k, k, :])
190 |     ub = robust_worst_xdist.mu[k, :] + 2. * np.sqrt(robust_worst_xdist.sigma[k, k, :])
191 |     plt.fill_between(t, lb, ub, color='magenta', alpha=0.1)
192 | 
193 | for k in range(rgps.dm_act):
194 |     plt.subplot(gps.dm_state + gps.dm_act, 1, gps.dm_state + k + 1)
195 | 
196 |     t = np.linspace(0, rgps.nb_steps - 1, rgps.nb_steps)
197 | 
198 |     plt.plot(t, robust_worst_udist.mu[k, :], '-m')
199 |     lb = robust_worst_udist.mu[k, :] - 2. * np.sqrt(robust_worst_udist.sigma[k, k, :])
200 |     ub = robust_worst_udist.mu[k, :] + 2. * np.sqrt(robust_worst_udist.sigma[k, k, :])
201 |     plt.fill_between(t, lb, ub, color='magenta', alpha=0.1)
202 | 
203 | axs = fig.get_axes()
204 | axs = [beautify(ax) for ax in axs]
205 | plt.show()
206 | 
207 | # tikzplotlib.save("linear_trajectories_adversarial.tex")
208 | 
209 | from trajopt.rgps.objects import MatrixNormalParameters
210 | interp = MatrixNormalParameters(rgps.dm_state, rgps.dm_act, rgps.nb_steps)
211 | 
212 | alphas = np.linspace(0., 2., 21)
213 | 
214 | cost_adv_env_std_ctl = []
215 | cost_adv_env_rbst_ctl = []
216 | kl_distance = []
217 | 
218 | for alpha in alphas:
219 |     print('Alpha:', alpha)
220 | 
221 |     interp.mu, interp.sigma = gps.interp_gauss_kl(gps.nominal.mu, gps.nominal.sigma,
222 |                                                   gps.param.mu, gps.param.sigma, alpha)
223 | 
224 |     kl_distance.append(np.sum(gps.parameter_nominal_kldiv(interp)))
225 | 
226 |     std_worst_xdist, std_worst_udist, _ = gps.cubature_forward_pass(gps.ctl, interp)
227 |     robust_worst_xdist, robust_worst_udist, _ = gps.cubature_forward_pass(rgps.ctl, interp)
228 | 
229 |     cost_adv_env_std_ctl.append(gps.cost.evaluate(std_worst_xdist, std_worst_udist))
230 |     cost_adv_env_rbst_ctl.append(gps.cost.evaluate(robust_worst_xdist, robust_worst_udist))
231 | 
232 |     print("Expected Cost of Standard and Robust Control on Adverserial Env")
233 |     print("Std. Ctl.: ", cost_adv_env_std_ctl[-1], "Rbst. Ctl.", cost_adv_env_rbst_ctl[-1])
234 | 
235 | fig = plt.figure()
236 | plt.plot(kl_distance, cost_adv_env_std_ctl, 'b', marker='o')
237 | plt.xscale('log')
238 | plt.yscale('log')
239 | plt.plot(kl_distance, cost_adv_env_rbst_ctl, 'r', marker='*')
240 | plt.xscale('log')
241 | plt.yscale('log')
242 | 
243 | axs = fig.gca()
244 | axs = beautify(axs)
245 | plt.show()
246 | 
247 | # tikzplotlib.save("linear_cost_over_distance.tex")
248 | 
249 | kl_over_time = rgps.parameter_nominal_kldiv(gps.param)
250 | 
251 | fig = plt.figure()
252 | plt.plot(kl_over_time, 'k', marker='.')
253 | plt.yscale('log')
254 | 
255 | axs = fig.gca()
256 | axs = beautify(axs)
257 | plt.show()
258 | 
259 | # tikzplotlib.save("linear_kl_over_time.tex")
260 | 


--------------------------------------------------------------------------------
/examples/rgps/mb_robot.py:
--------------------------------------------------------------------------------
  1 | import autograd.numpy as np
  2 | 
  3 | import gym
  4 | from trajopt.rgps import MBRGPS
  5 | 
  6 | from matplotlib import rc
  7 | import matplotlib.pyplot as plt
  8 | 
  9 | import tikzplotlib
 10 | 
 11 | import warnings
 12 | warnings.filterwarnings("ignore")
 13 | 
 14 | rc('lines', **{'linewidth': 1})
 15 | rc('text', usetex=True)
 16 | 
 17 | 
 18 | def beautify(ax):
 19 |     ax.set_frame_on(True)
 20 |     ax.minorticks_on()
 21 | 
 22 |     ax.grid(True)
 23 |     ax.grid(linestyle=':')
 24 | 
 25 |     ax.tick_params(which='both', direction='in',
 26 |                    bottom=True, labelbottom=True,
 27 |                    top=True, labeltop=False,
 28 |                    right=True, labelright=False,
 29 |                    left=True, labelleft=True)
 30 | 
 31 |     ax.tick_params(which='major', length=6)
 32 |     ax.tick_params(which='minor', length=3)
 33 | 
 34 |     # ax.autoscale(tight=True)
 35 |     # ax.set_aspect('equal')
 36 | 
 37 |     if ax.get_legend():
 38 |         ax.legend(loc='best')
 39 | 
 40 |     return ax
 41 | 
 42 | 
 43 | # pendulum task
 44 | env = gym.make('Robot-TO-v0')
 45 | env._max_episode_steps = 100
 46 | 
 47 | np.random.seed(1337)
 48 | env.seed(1337)
 49 | 
 50 | rgps = MBRGPS(env, nb_steps=100,
 51 |               init_state=env.init(),
 52 |               policy_kl_bound=0.25,
 53 |               param_nominal_kl_bound=5e2,
 54 |               param_regularizer_kl_bound=10,
 55 |               init_action_sigma=0.1,
 56 |               action_penalty=1e-1,
 57 |               nominal_variance=1e-6)
 58 | rgps.run(nb_iter=900, verbose=True)
 59 | 
 60 | np.random.seed(1337)
 61 | env.seed(1337)
 62 | 
 63 | gps = MBRGPS(env, nb_steps=100,
 64 |              init_state=env.init(),
 65 |              policy_kl_bound=.25,
 66 |              param_nominal_kl_bound=5e2,
 67 |              param_regularizer_kl_bound=10,
 68 |              init_action_sigma=0.1,
 69 |              action_penalty=1e-1,
 70 |              nominal_variance=1e-6)
 71 | gps.run(nb_iter=50, verbose=True,
 72 |         optimize_adversary=False)
 73 | 
 74 | # compute attack on final standard controller
 75 | gps.param_nominal_kl_bound = np.array([1e3])
 76 | gps.param, gps.eta = gps.reguarlized_parameter_optimization(gps.ctl)
 77 | print("Disturbance KL:", gps.parameter_nominal_kldiv(gps.param).sum())
 78 | 
 79 | fig = plt.figure(figsize=(6, 12))
 80 | plt.suptitle("Standard vs Robust Ctl: Feedback Controller")
 81 | for i in range(gps.dm_state):
 82 |     plt.subplot(gps.dm_state + gps.dm_act, 1, i + 1)
 83 |     plt.plot(gps.ctl.K[0, i, ...], color='b', marker='o', markersize=2)
 84 |     plt.plot(rgps.ctl.K[0, i, ...], color='r', marker='x', markersize=2)
 85 | 
 86 | for i in range(gps.dm_act):
 87 |     plt.subplot(gps.dm_state + gps.dm_act, 1, gps.dm_state + i + 1)
 88 |     plt.plot(gps.ctl.kff[i, ...], color='b', marker='o', markersize=2)
 89 |     plt.plot(rgps.ctl.kff[i, ...], color='r', marker='x', markersize=2)
 90 | 
 91 | axs = fig.get_axes()
 92 | axs = [beautify(ax) for ax in axs]
 93 | plt.show()
 94 | 
 95 | # tikzplotlib.save("robot_feedback_gains.tex")
 96 | 
 97 | std_xdist, std_udist, _ = gps.cubature_forward_pass(gps.ctl, gps.nominal)
 98 | robust_xdist, robust_udist, _ = rgps.cubature_forward_pass(rgps.ctl, rgps.nominal)
 99 | 
100 | cost_nom_env_std_ctl = gps.cost.evaluate(std_xdist, std_udist)
101 | cost_nom_env_rbst_ctl = rgps.cost.evaluate(robust_xdist, robust_udist)
102 | 
103 | print("Expected Cost of Standard and Robust Control on Nominal Env")
104 | print("Std. Ctl.: ", cost_nom_env_std_ctl, "Rbst. Ctl.", cost_nom_env_rbst_ctl)
105 | 
106 | std_worst_xdist, std_worst_udist, _ = gps.cubature_forward_pass(gps.ctl, gps.param)
107 | robust_worst_xdist, robust_worst_udist, _ = rgps.cubature_forward_pass(rgps.ctl, rgps.param)
108 | 
109 | cost_adv_env_std_ctl = gps.cost.evaluate(std_worst_xdist, std_worst_udist)
110 | cost_adv_env_rbst_ctl = rgps.cost.evaluate(robust_worst_xdist, robust_worst_udist)
111 | 
112 | print("Expected Cost of Standard and Robust Control on Adverserial Env")
113 | print("Std. Ctl.: ", cost_adv_env_std_ctl, "Rbst. Ctl.", cost_adv_env_rbst_ctl)
114 | 
115 | 
116 | fig = plt.figure()
117 | plt.suptitle('Standard and Robust Control Without Adversary')
118 | for k in range(rgps.dm_state):
119 |     plt.subplot(rgps.dm_state + rgps.dm_act, 1, k + 1)
120 | 
121 |     t = np.linspace(0, rgps.nb_steps, rgps.nb_steps + 1)
122 | 
123 |     plt.plot(t, std_xdist.mu[k, :], '-b')
124 |     lb = std_xdist.mu[k, :] - 2. * np.sqrt(std_xdist.sigma[k, k, :])
125 |     ub = std_xdist.mu[k, :] + 2. * np.sqrt(std_xdist.sigma[k, k, :])
126 |     plt.fill_between(t, lb, ub, color='blue', alpha=0.1)
127 | 
128 | for k in range(rgps.dm_act):
129 |     plt.subplot(rgps.dm_state + rgps.dm_act, 1, rgps.dm_state + k + 1)
130 | 
131 |     t = np.linspace(0, rgps.nb_steps - 1, rgps.nb_steps)
132 | 
133 |     plt.plot(t, std_udist.mu[k, :], '-b')
134 |     lb = std_udist.mu[k, :] - 2. * np.sqrt(std_udist.sigma[k, k, :])
135 |     ub = std_udist.mu[k, :] + 2. * np.sqrt(std_udist.sigma[k, k, :])
136 |     plt.fill_between(t, lb, ub, color='blue', alpha=0.1)
137 | 
138 | for k in range(gps.dm_state):
139 |     plt.subplot(gps.dm_state + gps.dm_act, 1, k + 1)
140 | 
141 |     t = np.linspace(0, rgps.nb_steps, rgps.nb_steps + 1)
142 | 
143 |     plt.plot(t, robust_xdist.mu[k, :], '-r')
144 |     lb = robust_xdist.mu[k, :] - 2. * np.sqrt(robust_xdist.sigma[k, k, :])
145 |     ub = robust_xdist.mu[k, :] + 2. * np.sqrt(robust_xdist.sigma[k, k, :])
146 |     plt.fill_between(t, lb, ub, color='red', alpha=0.1)
147 | 
148 | for k in range(rgps.dm_act):
149 |     plt.subplot(gps.dm_state + gps.dm_act, 1, gps.dm_state + k + 1)
150 | 
151 |     t = np.linspace(0, rgps.nb_steps - 1, rgps.nb_steps)
152 | 
153 |     plt.plot(t, robust_udist.mu[k, :], '-r')
154 |     lb = robust_udist.mu[k, :] - 2. * np.sqrt(robust_udist.sigma[k, k, :])
155 |     ub = robust_udist.mu[k, :] + 2. * np.sqrt(robust_udist.sigma[k, k, :])
156 |     plt.fill_between(t, lb, ub, color='red', alpha=0.1)
157 | 
158 | axs = fig.get_axes()
159 | axs = [beautify(ax) for ax in axs]
160 | plt.show()
161 | 
162 | # tikzplotlib.save("robot_trajectories_nominal.tex")
163 | 
164 | fig = plt.figure()
165 | plt.suptitle('Standard and Robust Control With Adversary')
166 | for k in range(gps.dm_state):
167 |     plt.subplot(gps.dm_state + gps.dm_act, 1, k + 1)
168 | 
169 |     t = np.linspace(0, rgps.nb_steps, rgps.nb_steps + 1)
170 | 
171 |     plt.plot(t, std_worst_xdist.mu[k, :], '-g')
172 |     lb = std_worst_xdist.mu[k, :] - 2. * np.sqrt(std_worst_xdist.sigma[k, k, :])
173 |     ub = std_worst_xdist.mu[k, :] + 2. * np.sqrt(std_worst_xdist.sigma[k, k, :])
174 |     plt.fill_between(t, lb, ub, color='green', alpha=0.1)
175 | 
176 | for k in range(rgps.dm_act):
177 |     plt.subplot(gps.dm_state + gps.dm_act, 1, gps.dm_state + k + 1)
178 | 
179 |     t = np.linspace(0, rgps.nb_steps - 1, rgps.nb_steps)
180 | 
181 |     plt.plot(t, std_worst_udist.mu[k, :], '-g')
182 |     lb = std_worst_udist.mu[k, :] - 2. * np.sqrt(std_worst_udist.sigma[k, k, :])
183 |     ub = std_worst_udist.mu[k, :] + 2. * np.sqrt(std_worst_udist.sigma[k, k, :])
184 |     plt.fill_between(t, lb, ub, color='green', alpha=0.1)
185 | 
186 | for k in range(gps.dm_state):
187 |     plt.subplot(gps.dm_state + gps.dm_act, 1, k + 1)
188 | 
189 |     t = np.linspace(0, rgps.nb_steps, rgps.nb_steps + 1)
190 | 
191 |     plt.plot(t, robust_worst_xdist.mu[k, :], '-m')
192 |     lb = robust_worst_xdist.mu[k, :] - 2. * np.sqrt(robust_worst_xdist.sigma[k, k, :])
193 |     ub = robust_worst_xdist.mu[k, :] + 2. * np.sqrt(robust_worst_xdist.sigma[k, k, :])
194 |     plt.fill_between(t, lb, ub, color='magenta', alpha=0.1)
195 | 
196 | for k in range(rgps.dm_act):
197 |     plt.subplot(gps.dm_state + gps.dm_act, 1, gps.dm_state + k + 1)
198 | 
199 |     t = np.linspace(0, rgps.nb_steps - 1, rgps.nb_steps)
200 | 
201 |     plt.plot(t, robust_worst_udist.mu[k, :], '-m')
202 |     lb = robust_worst_udist.mu[k, :] - 2. * np.sqrt(robust_worst_udist.sigma[k, k, :])
203 |     ub = robust_worst_udist.mu[k, :] + 2. * np.sqrt(robust_worst_udist.sigma[k, k, :])
204 |     plt.fill_between(t, lb, ub, color='magenta', alpha=0.1)
205 | 
206 | axs = fig.get_axes()
207 | axs = [beautify(ax) for ax in axs]
208 | plt.show()
209 | 
210 | # tikzplotlib.save("robot_trajectories_adversarial.tex")
211 | 
212 | from trajopt.rgps.objects import MatrixNormalParameters
213 | interp = MatrixNormalParameters(rgps.dm_state, rgps.dm_act, rgps.nb_steps)
214 | 
215 | alphas = np.linspace(0., 2., 21)
216 | 
217 | cost_adv_env_std_ctl = []
218 | cost_adv_env_rbst_ctl = []
219 | kl_distance = []
220 | 
221 | for alpha in alphas:
222 |     print('Alpha:', alpha)
223 | 
224 |     interp.mu, interp.sigma = gps.interp_gauss_kl(gps.nominal.mu, gps.nominal.sigma,
225 |                                                   gps.param.mu, gps.param.sigma, alpha)
226 | 
227 |     kl_distance.append(np.sum(gps.parameter_nominal_kldiv(interp)))
228 | 
229 |     std_worst_xdist, std_worst_udist, _ = gps.cubature_forward_pass(gps.ctl, interp)
230 |     robust_worst_xdist, robust_worst_udist, _ = gps.cubature_forward_pass(rgps.ctl, interp)
231 | 
232 |     cost_adv_env_std_ctl.append(gps.cost.evaluate(std_worst_xdist, std_worst_udist))
233 |     cost_adv_env_rbst_ctl.append(gps.cost.evaluate(robust_worst_xdist, robust_worst_udist))
234 | 
235 |     print("Expected Cost of Standard and Robust Control on Adverserial Env")
236 |     print("Std. Ctl.: ", cost_adv_env_std_ctl[-1], "Rbst. Ctl.", cost_adv_env_rbst_ctl[-1])
237 | 
238 | fig = plt.figure()
239 | plt.plot(kl_distance, cost_adv_env_std_ctl, 'b', marker='o')
240 | # plt.xscale('log')
241 | plt.yscale('log')
242 | plt.plot(kl_distance, cost_adv_env_rbst_ctl, 'r', marker='*')
243 | # plt.xscale('log')
244 | plt.yscale('log')
245 | 
246 | axs = fig.gca()
247 | axs = beautify(axs)
248 | plt.show()
249 | 
250 | # tikzplotlib.save("robot_cost_over_distance.tex")
251 | 
252 | kl_over_time = gps.parameter_nominal_kldiv(gps.param)
253 | 
254 | fig = plt.figure()
255 | plt.plot(kl_over_time, 'k', marker='.')
256 | plt.yscale('log')
257 | 
258 | axs = fig.gca()
259 | axs = beautify(axs)
260 | plt.show()
261 | 
262 | # tikzplotlib.save("robot_kl_over_time.tex")
263 | 


--------------------------------------------------------------------------------
/examples/riccati/lqr.py:
--------------------------------------------------------------------------------
 1 | import autograd.numpy as np
 2 | 
 3 | import gym
 4 | from trajopt.riccati import Riccati
 5 | 
 6 | np.random.seed(1337)
 7 | 
 8 | # lqr task
 9 | env = gym.make('LQR-TO-v0')
10 | env._max_episode_steps = 100
11 | 
12 | env.seed(1337)
13 | 
14 | alg = Riccati(env, nb_steps=60,
15 |               init_state=env.init())
16 | 
17 | # run Riccati
18 | trace = alg.run()
19 | 
20 | alg.plot()
21 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import subprocess
 4 | 
 5 | from setuptools import setup, Extension
 6 | from setuptools.command.build_ext import build_ext
 7 | 
 8 | 
 9 | class CMakeExtension(Extension):
10 |     def __init__(self, name, sourcedir=''):
11 |         Extension.__init__(self, name, sources=[])
12 |         self.sourcedir = os.path.abspath(sourcedir)
13 | 
14 | 
15 | class CMakeBuild(build_ext):
16 |     def run(self):
17 |         try:
18 |             subprocess.check_output(['cmake', '--version'])
19 |         except OSError:
20 |             raise RuntimeError("CMake must be installed to build the following extensions: " +
21 |                                ", ".join(e.name for e in self.extensions))
22 | 
23 |         for ext in self.extensions:
24 |             self.build_extension(ext)
25 | 
26 |     def build_extension(self, ext):
27 |         cmake_args = ['-DPYTHON_EXECUTABLE=' + sys.executable]
28 | 
29 |         cfg = 'Debug' if self.debug else 'Release'
30 |         build_args = ['--config', cfg]
31 | 
32 |         cmake_args += ['-DCMAKE_BUILD_TYPE=' + cfg]
33 |         build_args += ['--', '-j4']
34 | 
35 |         env = os.environ.copy()
36 |         env['CXXFLAGS'] = '{} -DVERSION_INFO=\\"{}\\"'.format(env.get('CXXFLAGS', ''),
37 |                                                               self.distribution.get_version())
38 |         if not os.path.exists(os.path.join(ext.sourcedir, self.build_temp)):
39 |             os.makedirs(os.path.join(ext.sourcedir, self.build_temp))
40 |         subprocess.check_call(['cmake', ext.sourcedir] + cmake_args, cwd=os.path.join(ext.sourcedir, self.build_temp), env=env)
41 |         subprocess.check_call(['cmake', '--build', '.'] + build_args, cwd=os.path.join(ext.sourcedir, self.build_temp))
42 | 
43 | 
44 | setup(
45 |     name='trajopt',
46 |     version='0.0.1',
47 |     author='Hany Abdulsamad',
48 |     author_email='hany@robot-learning.de',
49 |     description='A toolbox for trajectory optimization',
50 |     install_requires=['numpy', 'scipy', 'matplotlib',
51 |                       'scikit-learn', 'autograd', 'gym'],
52 |     ext_modules=[CMakeExtension('gps', './trajopt/gps/'),
53 |                  CMakeExtension('rgps', './trajopt/rgps/'),
54 |                  CMakeExtension('ilqr', './trajopt/ilqr/'),
55 |                  CMakeExtension('bspilqr', './trajopt/bspilqr/')],
56 |     cmdclass=dict(build_ext=CMakeBuild),
57 |     zip_safe=False,
58 | )
59 | 


--------------------------------------------------------------------------------
/trajopt/__init__.py:
--------------------------------------------------------------------------------
 1 | from gym.envs.registration import register
 2 | 
 3 | register(
 4 |     id='LQR-TO-v0',
 5 |     entry_point='trajopt.envs:LQRv0',
 6 |     max_episode_steps=1000,
 7 | )
 8 | 
 9 | register(
10 |     id='LQR-TO-v1',
11 |     entry_point='trajopt.envs:LQRv1',
12 |     max_episode_steps=1000,
13 | )
14 | 
15 | register(
16 |     id='LQR-TO-v2',
17 |     entry_point='trajopt.envs:LQRv2',
18 |     max_episode_steps=1000,
19 | )
20 | 
21 | register(
22 |     id='Robot-TO-v0',
23 |     entry_point='trajopt.envs:Robot',
24 |     max_episode_steps=1000,
25 | )
26 | 
27 | register(
28 |     id='Pendulum-TO-v0',
29 |     entry_point='trajopt.envs:Pendulum',
30 |     max_episode_steps=1000,
31 | )
32 | 
33 | register(
34 |     id='Pendulum-TO-v1',
35 |     entry_point='trajopt.envs:PendulumWithCartesianCost',
36 |     max_episode_steps=1000,
37 | )
38 | 
39 | register(
40 |     id='DoublePendulum-TO-v0',
41 |     entry_point='trajopt.envs:DoublePendulum',
42 |     max_episode_steps=1000,
43 | )
44 | 
45 | register(
46 |     id='DoublePendulum-TO-v1',
47 |     entry_point='trajopt.envs:DoublePendulumWithCartesianCost',
48 |     max_episode_steps=1000,
49 | )
50 | 
51 | register(
52 |     id='QuadPendulum-TO-v0',
53 |     entry_point='trajopt.envs:QuadPendulum',
54 |     max_episode_steps=1000,
55 | )
56 | 
57 | register(
58 |     id='QuadPendulum-TO-v1',
59 |     entry_point='trajopt.envs:QuadPendulumWithCartesianCost',
60 |     max_episode_steps=1000,
61 | )
62 | 
63 | register(
64 |     id='Cartpole-TO-v0',
65 |     entry_point='trajopt.envs:Cartpole',
66 |     max_episode_steps=1000,
67 | )
68 | 
69 | register(
70 |     id='Cartpole-TO-v1',
71 |     entry_point='trajopt.envs:CartpoleWithCartesianCost',
72 |     max_episode_steps=1000,
73 | )
74 | 
75 | register(
76 |     id='DoubleCartpole-TO-v0',
77 |     entry_point='trajopt.envs:DoubleCartpole',
78 |     max_episode_steps=1000,
79 | )
80 | 
81 | register(
82 |     id='DoubleCartpole-TO-v1',
83 |     entry_point='trajopt.envs:DoubleCartpoleWithCartesianCost',
84 |     max_episode_steps=1000,
85 | )
86 | 
87 | register(
88 |     id='LightDark-TO-v0',
89 |     entry_point='trajopt.envs:LightDark',
90 |     max_episode_steps=1000,
91 | )
92 | 
93 | register(
94 |     id='Car-TO-v0',
95 |     entry_point='trajopt.envs:Car',
96 |     max_episode_steps=1000,
97 | )
98 | 


--------------------------------------------------------------------------------
/trajopt/bspilqr/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.14)
 2 | project(core)
 3 | 
 4 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/")
 5 | 
 6 | set(ARMADILLO_LIBRARY "$ENV{HOME}/libs/armadillo/")
 7 | include_directories(${ARMADILLO_LIBRARY}/include)
 8 | 
 9 | find_package(pybind11)
10 | pybind11_add_module(core src/util.cpp)
11 | 
12 | set(OPENBLAS_LIBRARY "$ENV{HOME}/libs/OpenBLAS/")
13 | target_link_libraries(core PRIVATE ${OPENBLAS_LIBRARY}/libopenblas.a pthread gfortran)


--------------------------------------------------------------------------------
/trajopt/bspilqr/__init__.py:
--------------------------------------------------------------------------------
1 | from .bspilqr import BSPiLQR
2 | 


--------------------------------------------------------------------------------
/trajopt/bspilqr/bspilqr.py:
--------------------------------------------------------------------------------
  1 | import autograd.numpy as np
  2 | 
  3 | from trajopt.bspilqr.objects import Gaussian
  4 | from trajopt.bspilqr.objects import AnalyticalLinearBeliefDynamics, AnalyticalQuadraticCost
  5 | from trajopt.bspilqr.objects import QuadraticBeliefValue
  6 | from trajopt.bspilqr.objects import LinearControl
  7 | 
  8 | from trajopt.bspilqr.core import backward_pass
  9 | 
 10 | 
 11 | class BSPiLQR:
 12 | 
 13 |     def __init__(self, env, nb_steps, init_belief,
 14 |                  alphas=np.power(10., np.linspace(0, -3, 11)),
 15 |                  lmbda=1., dlmbda=1., min_lmbda=1e-6,
 16 |                  max_lmbda=1e6, mult_lmbda=1.6, tolfun=1e-8,
 17 |                  tolgrad=1e-6, min_imp=0., reg=1):
 18 | 
 19 |         self.env = env
 20 | 
 21 |         # expose necessary functions
 22 |         self.env_dyn = self.env.unwrapped.dynamics
 23 |         self.env_obs = self.env.unwrapped.observe
 24 |         self.env_cost = self.env.unwrapped.cost
 25 |         self.env_init = init_belief
 26 | 
 27 |         self.env_dyn_noise = self.env.unwrapped.dyn_noise
 28 |         self.env_obs_noise = self.env.unwrapped.obs_noise
 29 | 
 30 |         self.ulim = self.env.action_space.high
 31 | 
 32 |         self.belief_dim = self.env.unwrapped.state_space.shape[0]
 33 |         self.obs_dim = self.env.unwrapped.observation_space.shape[0]
 34 |         self.act_dim = self.env.action_space.shape[0]
 35 |         self.nb_steps = nb_steps
 36 | 
 37 |         # backtracking
 38 |         self.alphas = alphas
 39 |         self.alpha = None
 40 | 
 41 |         self.lmbda = lmbda
 42 |         self.dlmbda = dlmbda
 43 |         self.min_lmbda = min_lmbda
 44 |         self.max_lmbda = max_lmbda
 45 |         self.mult_lmbda = mult_lmbda
 46 | 
 47 |         # regularization type
 48 |         self.reg = reg
 49 | 
 50 |         # minimum relative improvement
 51 |         self.min_imp = min_imp
 52 | 
 53 |         # stopping criterion
 54 |         self.tolfun = tolfun
 55 |         self.tolgrad = tolgrad
 56 | 
 57 |         # reference belief trajectory
 58 |         self.bref = Gaussian(self.belief_dim, self.nb_steps + 1)
 59 |         self.bref.mu[..., 0], self.bref.sigma[..., 0] = self.env_init
 60 | 
 61 |         self.uref = np.zeros((self.act_dim, self.nb_steps))
 62 | 
 63 |         self.vfunc = QuadraticBeliefValue(self.belief_dim, self.nb_steps + 1)
 64 | 
 65 |         self.dyn = AnalyticalLinearBeliefDynamics(self.env_dyn, self.env_obs,
 66 |                                                   self.env_dyn_noise, self.env_obs_noise,
 67 |                                                   self.belief_dim, self.obs_dim, self.act_dim, self.nb_steps)
 68 | 
 69 |         self.ctl = LinearControl(self.belief_dim, self.act_dim, self.nb_steps)
 70 |         self.ctl.kff = 1e-8 * np.random.randn(self.act_dim, self.nb_steps)
 71 | 
 72 |         self.cost = AnalyticalQuadraticCost(self.env_cost, self.belief_dim, self.act_dim, self.nb_steps + 1)
 73 | 
 74 |         self.last_return = - np.inf
 75 | 
 76 |     def forward_pass(self, ctl, alpha):
 77 |         belief = Gaussian(self.belief_dim, self.nb_steps + 1)
 78 |         action = np.zeros((self.act_dim, self.nb_steps))
 79 |         cost = np.zeros((self.nb_steps + 1, ))
 80 | 
 81 |         belief.mu[..., 0], belief.sigma[..., 0] = self.env_init
 82 |         for t in range(self.nb_steps):
 83 |             action[..., t] = ctl.action(belief, alpha, self.bref, self.uref, t)
 84 |             cost[..., t] = self.env_cost(belief.mu[..., t], belief.sigma[..., t], action[..., t])
 85 |             belief.mu[..., t + 1], belief.sigma[..., t + 1] = self.dyn.forward(belief, action, t)
 86 | 
 87 |         cost[..., -1] = self.env_cost(belief.mu[..., -1], belief.sigma[..., -1], np.zeros((self.act_dim, )))
 88 |         return belief, action, cost
 89 | 
 90 |     def backward_pass(self):
 91 |         lc = LinearControl(self.belief_dim, self.act_dim, self.nb_steps)
 92 |         bvalue = QuadraticBeliefValue(self.belief_dim, self.nb_steps + 1)
 93 | 
 94 |         bvalue.S, bvalue.s, bvalue.tau,\
 95 |         dS, lc.K, lc.kff, diverge = backward_pass(self.cost.Q, self.cost.q,
 96 |                                                   self.cost.R, self.cost.r,
 97 |                                                   self.cost.P, self.cost.p,
 98 |                                                   self.dyn.F, self.dyn.G,
 99 |                                                   self.dyn.T, self.dyn.U,
100 |                                                   self.dyn.V, self.dyn.X,
101 |                                                   self.dyn.Y, self.dyn.Z,
102 |                                                   self.lmbda, self.reg,
103 |                                                   self.belief_dim, self.act_dim, self.nb_steps)
104 |         return lc, bvalue, dS, diverge
105 | 
106 |     def plot(self):
107 |         import matplotlib.pyplot as plt
108 | 
109 |         plt.figure()
110 | 
111 |         t = np.linspace(0, self.nb_steps, self.nb_steps + 1)
112 |         for k in range(self.belief_dim):
113 |             plt.subplot(self.belief_dim + self.act_dim, 1, k + 1)
114 |             plt.plot(t, self.bref.mu[k, :], '-b')
115 |             lb = self.bref.mu[k, :] - 2. * np.sqrt(self.bref.sigma[k, k, :])
116 |             ub = self.bref.mu[k, :] + 2. * np.sqrt(self.bref.sigma[k, k, :])
117 |             plt.fill_between(t, lb, ub, color='blue', alpha=0.1)
118 | 
119 |         t = np.linspace(0, self.nb_steps, self.nb_steps)
120 |         for k in range(self.act_dim):
121 |             plt.subplot(self.belief_dim + self.act_dim, 1, self.belief_dim + k + 1)
122 |             plt.plot(t, self.uref[k, :], '-g')
123 | 
124 |         plt.show()
125 | 
126 |     def run(self, nb_iter=25, verbose=False):
127 |         _trace = []
128 |         # init trajectory
129 |         for alpha in self.alphas:
130 |             _belief, _action, _cost = self.forward_pass(self.ctl, alpha)
131 |             if np.all(_belief.mu < 1e8):
132 |                 self.bref = _belief
133 |                 self.uref = _action
134 |                 self.last_return = np.sum(_cost)
135 |                 break
136 |             else:
137 |                 print("Initial trajectory diverges")
138 | 
139 |         _trace.append(self.last_return)
140 | 
141 |         for iter in range(nb_iter):
142 |             # get linear system dynamics around ref traj.
143 |             self.dyn.taylor_expansion(self.bref, self.uref)
144 | 
145 |             # get quadratic cost around ref traj.
146 |             self.cost.taylor_expansion(self.bref, self.uref)
147 | 
148 |             bvalue = None
149 |             lc, dvalue = None, None
150 |             # execute a backward pass
151 |             backpass_done = False
152 |             while not backpass_done:
153 |                 lc, bvalue, dvalue, diverge = self.backward_pass()
154 |                 if np.any(diverge):
155 |                     # increase lmbda
156 |                     self.dlmbda = np.maximum(self.dlmbda * self.mult_lmbda, self.mult_lmbda)
157 |                     self.lmbda = np.maximum(self.lmbda * self.dlmbda, self.min_lmbda)
158 |                     if self.lmbda > self.max_lmbda:
159 |                         break
160 |                     else:
161 |                         continue
162 |                 else:
163 |                     backpass_done = True
164 | 
165 |             # terminate if gradient too small
166 |             _g_norm = np.mean(np.max(np.abs(lc.kff) / (np.abs(self.uref) + 1.), axis=1))
167 |             if _g_norm < self.tolgrad and self.lmbda < 1e-5:
168 |                 self.dlmbda = np.minimum(self.dlmbda / self.mult_lmbda, 1. / self.mult_lmbda)
169 |                 self.lmbda = self.lmbda * self.dlmbda * (self.lmbda > self.min_lmbda)
170 |                 break
171 | 
172 |             _belief, _action = None, None
173 |             _return, _dreturn = None, None
174 |             # execute a forward pass
175 |             fwdpass_done = False
176 |             if backpass_done:
177 |                 for alpha in self.alphas:
178 |                     # apply on actual system
179 |                     _belief, _action, _cost = self.forward_pass(ctl=lc, alpha=alpha)
180 | 
181 |                     # summed mean return
182 |                     _return = np.sum(_cost)
183 | 
184 |                     # check return improvement
185 |                     _dreturn = self.last_return - _return
186 |                     _expected = - 1. * alpha * (dvalue[0] + alpha * dvalue[1])
187 |                     _imp = _dreturn / _expected
188 |                     if _imp > self.min_imp:
189 |                         fwdpass_done = True
190 |                         break
191 | 
192 |             # accept or reject
193 |             if fwdpass_done:
194 |                 # decrease lmbda
195 |                 self.dlmbda = np.minimum(self.dlmbda / self.mult_lmbda, 1. / self.mult_lmbda)
196 |                 self.lmbda = self.lmbda * self.dlmbda * (self.lmbda > self.min_lmbda)
197 | 
198 |                 self.bref = _belief
199 |                 self.uref = _action
200 |                 self.last_return = _return
201 | 
202 |                 self.vfunc = bvalue
203 | 
204 |                 self.ctl = lc
205 | 
206 |                 _trace.append(self.last_return)
207 | 
208 |                 # terminate if reached objective tolerance
209 |                 if _dreturn < self.tolfun:
210 |                     break
211 |             else:
212 |                 # increase lmbda
213 |                 self.dlmbda = np.maximum(self.dlmbda * self.mult_lmbda, self.mult_lmbda)
214 |                 self.lmbda = np.maximum(self.lmbda * self.dlmbda, self.min_lmbda)
215 |                 if self.lmbda > self.max_lmbda:
216 |                     break
217 |                 else:
218 |                     continue
219 | 
220 |             if verbose:
221 |                 print("iter: ", iter,
222 |                       " return: ", _return)
223 | 
224 |         return _trace
225 | 


--------------------------------------------------------------------------------
/trajopt/bspilqr/objects.py:
--------------------------------------------------------------------------------
  1 | import autograd.numpy as np
  2 | from autograd import jacobian, hessian
  3 | from autograd.misc import flatten
  4 | 
  5 | 
  6 | class Gaussian:
  7 |     def __init__(self, nb_dim, nb_steps):
  8 |         self.nb_dim = nb_dim
  9 |         self.nb_steps = nb_steps
 10 | 
 11 |         self.mu = np.zeros((self.nb_dim, self.nb_steps))
 12 |         self.sigma = np.zeros((self.nb_dim, self.nb_dim, self.nb_steps))
 13 |         for t in range(self.nb_steps):
 14 |             self.sigma[..., t] = np.eye(self.nb_dim)
 15 | 
 16 |     @property
 17 |     def params(self):
 18 |         return self.mu, self.sigma
 19 | 
 20 |     @params.setter
 21 |     def params(self, values):
 22 |         self.mu, self.sigma = values
 23 | 
 24 | class EKF:
 25 | 
 26 |     def __init__(self, env):
 27 | 
 28 |         self.f = env.unwrapped.dynamics
 29 |         self.h = env.unwrapped.observe
 30 | 
 31 |         self.dfdx = jacobian(self.f, 0)
 32 |         self.dhdx = jacobian(self.h, 0)
 33 | 
 34 |         self.dyn_noise = env.unwrapped.dyn_noise
 35 |         self.obs_noise = env.unwrapped.obs_noise
 36 | 
 37 |         self.belief_dim = env.belief_dim
 38 |         self.obs_dim = env.obs_dim
 39 |         self.act_dim = env.act_dim
 40 | 
 41 |     def evalf(self, mu_b, u):
 42 |         return self.f(mu_b, u)
 43 | 
 44 |     def evalh(self, mu_b):
 45 |         return self.h(mu_b)
 46 | 
 47 |     def predict(self, mu_b, sigma_b, u):
 48 |         A = self.dfdx(mu_b, u)
 49 |         sigma_dyn = self.dyn_noise(mu_b, u)
 50 | 
 51 |         sigma_b_pr = A @ sigma_b @ A.T + sigma_dyn
 52 |         sigma_b_pr = 0.5 * (sigma_b_pr + sigma_b_pr.T)
 53 | 
 54 |         mu_b_pr = self.evalf(mu_b, u)
 55 | 
 56 |         return mu_b_pr, sigma_b_pr
 57 | 
 58 |     def innovate(self, mu_b, sigma_b, z):
 59 |         H = self.dhdx(mu_b)
 60 |         sigma_obs = self.obs_noise(mu_b)
 61 | 
 62 |         K = sigma_b @ H.T @ np.linalg.inv(H @ sigma_b @ H.T + sigma_obs)
 63 | 
 64 |         mu_b_in = mu_b + K @ (z - self.evalh(mu_b))
 65 |         sigma_b_in = sigma_b - K @ H @ sigma_b
 66 |         sigma_b_in = 0.5 * (sigma_b_in + sigma_b_in.T)
 67 | 
 68 |         return mu_b_in, sigma_b_in
 69 | 
 70 |     def inference(self, mu_b, sigma_b, u, z):
 71 |         mu_b, sigma_b = self.predict(mu_b, sigma_b, u)
 72 |         mu_b, sigma_b = self.innovate(mu_b, sigma_b, z)
 73 |         return mu_b, sigma_b
 74 | 
 75 | 
 76 | class QuadraticBeliefValue:
 77 |     def __init__(self, belief_dim, nb_steps):
 78 |         self.belief_dim = belief_dim
 79 |         self.nb_steps = nb_steps
 80 | 
 81 |         self.S = np.zeros((self.belief_dim, self.belief_dim, self.nb_steps))
 82 |         self.s = np.zeros((self.belief_dim, self.nb_steps, ))
 83 |         self.tau = np.zeros((self.belief_dim, self.nb_steps, ))
 84 | 
 85 | 
 86 | class QuadraticCost:
 87 |     def __init__(self, belief_dim, act_dim, nb_steps):
 88 |         self.belief_dim = belief_dim
 89 |         self.act_dim = act_dim
 90 | 
 91 |         self.nb_steps = nb_steps
 92 | 
 93 |         self.Q = np.zeros((self.belief_dim, self.belief_dim, self.nb_steps))
 94 |         self.q = np.zeros((self.belief_dim, self.nb_steps))
 95 | 
 96 |         self.R = np.zeros((self.act_dim, self.act_dim, self.nb_steps))
 97 |         self.r = np.zeros((self.act_dim, self.nb_steps))
 98 | 
 99 |         self.P = np.zeros((self.belief_dim, self.act_dim, self.nb_steps))
100 |         self.p = np.zeros((self.belief_dim * self.belief_dim, self.nb_steps))
101 | 
102 |     @property
103 |     def params(self):
104 |         return self.Q, self.q, self.R, self.r, self.P, self.p
105 | 
106 |     @params.setter
107 |     def params(self, values):
108 |         self.Q, self.q, self.R, self.r, self.P, self.p = values
109 | 
110 | 
111 | class AnalyticalQuadraticCost(QuadraticCost):
112 |     def __init__(self, f_cost, belief_dim, act_dim, nb_steps):
113 |         super(AnalyticalQuadraticCost, self).__init__(belief_dim, act_dim, nb_steps)
114 | 
115 |         self.f = f_cost
116 | 
117 |         self.fQ = hessian(self.f, 0)
118 |         self.fq = jacobian(self.f, 0)
119 | 
120 |         self.fR = hessian(self.f, 2)
121 |         self.fr = jacobian(self.f, 2)
122 | 
123 |         self.fP = jacobian(jacobian(self.f, 0), 2)
124 |         self.fp = jacobian(self.f, 1)
125 | 
126 |     def evalf(self, mu_b, sigma_b, u):
127 |         return self.f(mu_b, sigma_b, u)
128 | 
129 |     def taylor_expansion(self, b, u):
130 |         # padd last time step of action traj.
131 |         _u = np.hstack((u, np.zeros((self.act_dim, 1))))
132 | 
133 |         for t in range(self.nb_steps):
134 |             _in = tuple([b.mu[..., t], b.sigma[..., t], _u[..., t]])
135 | 
136 |             self.Q[..., t] = self.fQ(*_in)
137 |             self.q[..., t] = self.fq(*_in)
138 | 
139 |             self.R[..., t] = self.fR(*_in)
140 |             self.r[..., t] = self.fr(*_in)
141 | 
142 |             self.P[..., t] = self.fP(*_in)
143 |             self.p[..., t] = np.reshape(self.fp(*_in),
144 |                                         (self.belief_dim * self.belief_dim), order='F')
145 | 
146 | 
147 | class LinearBeliefDynamics:
148 |     def __init__(self, belief_dim, obs_dim, act_dim, nb_steps):
149 |         self.belief_dim = belief_dim
150 |         self.obs_dim = obs_dim
151 |         self.act_dim = act_dim
152 | 
153 |         self.nb_steps = nb_steps
154 | 
155 |         # Linearization of dynamics
156 |         self.A = np.zeros((self.belief_dim, self.belief_dim, self.nb_steps))
157 |         self.H = np.zeros((self.obs_dim, self.obs_dim, self.nb_steps))
158 | 
159 |         # EKF matrices
160 |         self.K = np.zeros((self.belief_dim, self.obs_dim, self.nb_steps))
161 |         self.D = np.zeros((self.belief_dim, self.obs_dim, self.nb_steps))
162 | 
163 |         # Linearization of belief dynamics
164 |         self.F = np.zeros((self.belief_dim, self.belief_dim, self.nb_steps))
165 |         self.G = np.zeros((self.belief_dim, self.act_dim, self.nb_steps))
166 | 
167 |         self.T = np.zeros((self.belief_dim * self.belief_dim, self.belief_dim, self.nb_steps))
168 |         self.U = np.zeros((self.belief_dim * self.belief_dim, self.belief_dim * self.belief_dim, self.nb_steps))
169 |         self.V = np.zeros((self.belief_dim * self.belief_dim, self.act_dim, self.nb_steps))
170 | 
171 |         self.X = np.zeros((self.belief_dim * self.belief_dim, self.belief_dim, self.nb_steps))
172 |         self.Y = np.zeros((self.belief_dim * self.belief_dim, self.belief_dim * self.belief_dim, self.nb_steps))
173 |         self.Z = np.zeros((self.belief_dim * self.belief_dim, self.act_dim, self.nb_steps))
174 | 
175 |         self.sigma_x = np.zeros((self.belief_dim, self.belief_dim, self.nb_steps))
176 |         self.sigma_z = np.zeros((self.obs_dim, self.obs_dim, self.nb_steps))
177 | 
178 |     @property
179 |     def params(self):
180 |         return self.A, self.H, self.K, self.D,\
181 |                self.F, self.G, self.T, self.U,\
182 |                self.V, self.X, self.Y, self.Z, self.y
183 | 
184 |     @params.setter
185 |     def params(self, values):
186 |         self.A, self.H, self.K, self.D,\
187 |         self.F, self.G, self.T, self.U,\
188 |         self.V, self.X, self.Y, self.Z, self.y = values
189 | 
190 | 
191 | class AnalyticalLinearBeliefDynamics(LinearBeliefDynamics):
192 |     def __init__(self, f_dyn, f_obs,
193 |                  dyn_noise, obs_noise,
194 |                  belief_dim, obs_dim, act_dim, nb_steps):
195 |         super(AnalyticalLinearBeliefDynamics, self).__init__(belief_dim, obs_dim, act_dim, nb_steps)
196 | 
197 |         self.f = f_dyn
198 |         self.h = f_obs
199 | 
200 |         self.dyn_noise = dyn_noise
201 |         self.obs_noise = obs_noise
202 | 
203 |         self.dfdx = jacobian(self.f, 0)
204 |         self.dhdx = jacobian(self.h, 0)
205 | 
206 |     def evalf(self, mu_b, u):
207 |         return self.f(mu_b, u)
208 | 
209 |     def evalh(self, mu_b):
210 |         return self.h(mu_b)
211 | 
212 |     # belief state dynamics
213 |     def dynamics(self, mu_b, sigma_b, u):
214 |         A = self.dfdx(mu_b, u)
215 |         H = self.dhdx(self.evalf(mu_b, u))
216 | 
217 |         sigma_dyn = self.dyn_noise(mu_b, u)
218 |         sigma_obs = self.obs_noise(self.evalf(mu_b, u))
219 | 
220 |         D = A @ sigma_b @ A.T + sigma_dyn
221 |         D = 0.5 * (D + D.T)
222 | 
223 |         K = D @ H.T @ np.linalg.inv(H @ D @ H.T + sigma_obs)
224 | 
225 |         # stochastic mean dynamics
226 |         f = self.evalf(mu_b, u)
227 |         W = K @ H @ D
228 | 
229 |         # covariance dynamics
230 |         phi = D - K @ H @ D
231 |         phi = 0.5 * (phi + phi.T)
232 | 
233 |         return f, W, phi
234 | 
235 |     def taylor_expansion(self, b, u):
236 |         for t in range(self.nb_steps):
237 |             _in = tuple([b.mu[..., t], b.sigma[..., t], u[..., t]])
238 | 
239 |             _in_flat, _unflatten = flatten(_in)
240 | 
241 |             def _dyn_flat(_in_flat):
242 |                 return flatten(self.dynamics(*_unflatten(_in_flat)))[0]
243 | 
244 |             _dyn_jac = jacobian(_dyn_flat)
245 | 
246 |             _grads = _dyn_jac(_in_flat)
247 |             self.F[..., t] = _grads[:self.belief_dim, :self.belief_dim]
248 |             self.G[..., t] = _grads[:self.belief_dim, -self.act_dim:]
249 | 
250 |             self.X[..., t] = _grads[self.belief_dim:self.belief_dim + self.belief_dim * self.belief_dim, :self.belief_dim]
251 |             self.Y[..., t] = _grads[self.belief_dim:self.belief_dim + self.belief_dim * self.belief_dim, self.belief_dim:self.belief_dim + self.belief_dim * self.belief_dim]
252 |             self.Z[..., t] = _grads[self.belief_dim:self.belief_dim + self.belief_dim * self.belief_dim, -self.act_dim:]
253 | 
254 |             self.T[..., t] = _grads[self.belief_dim + self.belief_dim * self.belief_dim:, :self.belief_dim]
255 |             self.U[..., t] = _grads[self.belief_dim + self.belief_dim * self.belief_dim:, self.belief_dim:self.belief_dim + self.belief_dim * self.belief_dim]
256 |             self.V[..., t] = _grads[self.belief_dim + self.belief_dim * self.belief_dim:, -self.act_dim:]
257 | 
258 |     def forward(self, b, u, t):
259 |         mu_bn, _, sigma_bn = self.dynamics(b.mu[..., t], b.sigma[..., t], u[..., t])
260 |         return mu_bn, sigma_bn
261 | 
262 | 
263 | class LinearControl:
264 |     def __init__(self, belief_dim, act_dim, nb_steps):
265 |         self.belief_dim = belief_dim
266 |         self.act_dim = act_dim
267 |         self.nb_steps = nb_steps
268 | 
269 |         self.K = np.zeros((self.act_dim, self.belief_dim, self.nb_steps))
270 |         self.kff = np.zeros((self.act_dim, self.nb_steps))
271 | 
272 |     @property
273 |     def params(self):
274 |         return self.K, self.kff
275 | 
276 |     @params.setter
277 |     def params(self, values):
278 |         self.K, self.kff = values
279 | 
280 |     def action(self, b, alpha, bref, uref, t):
281 |         db = b.mu[:, t] - bref.mu[:, t]
282 |         return uref[:, t] + alpha * self.kff[..., t] + self.K[..., t] @ db
283 | 


--------------------------------------------------------------------------------
/trajopt/bspilqr/src/util.cpp:
--------------------------------------------------------------------------------
  1 | #include <pybind11/pybind11.h>
  2 | #include <pybind11/numpy.h>
  3 | #include <armadillo>
  4 | 
  5 | namespace py = pybind11;
  6 | 
  7 | using namespace arma;
  8 | 
  9 | 
 10 | typedef py::array_t<double, py::array::f_style | py::array::forcecast> array_tf;
 11 | typedef py::array_t<double, py::array::c_style | py::array::forcecast> array_tc;
 12 | 
 13 | 
 14 | cube array_to_cube(array_tf m) {
 15 | 
 16 |     py::buffer_info _m_buff = m.request();
 17 |     int n_rows = _m_buff.shape[0];
 18 |     int n_cols = _m_buff.shape[1];
 19 |     int n_slices = _m_buff.shape[2];
 20 | 
 21 |     cube _m_arma((double *)_m_buff.ptr, n_rows, n_cols, n_slices);
 22 | 
 23 |     return _m_arma;
 24 | }
 25 | 
 26 | 
 27 | mat array_to_mat(array_tf m) {
 28 | 
 29 |     py::buffer_info _m_buff = m.request();
 30 |     int n_rows = _m_buff.shape[0];
 31 |     int n_cols = _m_buff.shape[1];
 32 | 
 33 |     mat _m_arma((double *)_m_buff.ptr, n_rows, n_cols);
 34 | 
 35 |     return _m_arma;
 36 | }
 37 | 
 38 | 
 39 | vec array_to_vec(array_tf m) {
 40 | 
 41 |     py::buffer_info _m_buff = m.request();
 42 |     int n_rows = _m_buff.shape[0];
 43 | 
 44 |     vec _m_vec((double *)_m_buff.ptr, n_rows);
 45 | 
 46 |     return _m_vec;
 47 | }
 48 | 
 49 | 
 50 | array_tf cube_to_array(cube m) {
 51 | 
 52 |     auto _m_array = array_tf({m.n_rows, m.n_cols, m.n_slices});
 53 | 
 54 |     py::buffer_info _m_buff = _m_array.request();
 55 |     std::memcpy(_m_buff.ptr, m.memptr(), sizeof(double) * m.n_rows * m.n_cols * m.n_slices);
 56 | 
 57 |     return _m_array;
 58 | }
 59 | 
 60 | 
 61 | array_tf mat_to_array(mat m) {
 62 | 
 63 |     auto _m_array = array_tf({m.n_rows, m.n_cols});
 64 | 
 65 |     py::buffer_info _m_buff = _m_array.request();
 66 |     std::memcpy(_m_buff.ptr, m.memptr(), sizeof(double) * m.n_rows * m.n_cols);
 67 | 
 68 |     return _m_array;
 69 | }
 70 | 
 71 | 
 72 | array_tf vec_to_array(vec m) {
 73 | 
 74 |     auto _m_array = array_tf({m.n_rows});
 75 | 
 76 |     py::buffer_info _m_buff = _m_array.request();
 77 |     std::memcpy(_m_buff.ptr, m.memptr(), sizeof(double) * m.n_rows);
 78 | 
 79 |     return _m_array;
 80 | }
 81 | 
 82 | 
 83 | py::tuple backward_pass(array_tf _Q, array_tf _q,
 84 |                         array_tf _R, array_tf _r,
 85 |                         array_tf _P, array_tf _p,
 86 |                         array_tf _F, array_tf _G,
 87 |                         array_tf _T, array_tf _U,
 88 |                         array_tf _V, array_tf _X,
 89 |                         array_tf _Y, array_tf _Z,
 90 |                         double lmbda, int reg,
 91 |                         int dm_belief, int dm_act, int nb_steps) {
 92 | 
 93 |     // inputs
 94 |     cube Q = array_to_cube(_Q);
 95 |     mat q = array_to_mat(_q);
 96 | 
 97 |     cube R = array_to_cube(_R);
 98 |     mat r = array_to_mat(_r);
 99 | 
100 |     cube P = array_to_cube(_P);
101 |     mat p = array_to_mat(_p);
102 | 
103 |     cube F = array_to_cube(_F);
104 |     cube G = array_to_cube(_G);
105 | 
106 |     cube T = array_to_cube(_T);
107 |     cube U = array_to_cube(_U);
108 |     cube V = array_to_cube(_V);
109 | 
110 |     cube X = array_to_cube(_X);
111 |     cube Y = array_to_cube(_Y);
112 |     cube Z = array_to_cube(_Z);
113 | 
114 |     // outputs
115 |     cube C(dm_belief, dm_belief, nb_steps);
116 |     mat c(dm_belief, nb_steps);
117 | 
118 |     cube D(dm_act, dm_act, nb_steps);
119 |     mat d(dm_act, nb_steps);
120 | 
121 |     cube E(dm_act, dm_belief, nb_steps);
122 |     mat e(dm_belief * dm_belief, nb_steps);
123 | 
124 |     cube Ereg(dm_act, dm_belief, nb_steps);
125 |     cube Dreg(dm_act, dm_act, nb_steps);
126 |     cube Dinv(dm_act, dm_act, nb_steps);
127 | 
128 |     cube S(dm_belief, dm_belief, nb_steps + 1);
129 |     mat s(dm_belief, nb_steps + 1);
130 |     mat tau(dm_belief * dm_belief, nb_steps + 1);
131 | 
132 |     vec dS(2);
133 | 
134 |     cube Sreg(dm_belief, dm_belief, nb_steps + 1);
135 | 
136 |     cube K(dm_act, dm_belief, nb_steps);
137 |     mat kff(dm_act, nb_steps);
138 | 
139 |     int _diverge = 0;
140 | 
141 |     // init last time step
142 |     S.slice(nb_steps) = Q.slice(nb_steps);
143 |     s.col(nb_steps) = q.col(nb_steps);
144 |     tau.col(nb_steps) = p.col(nb_steps);
145 | 
146 | 	for(int i = nb_steps - 1; i>= 0; --i)
147 | 	{
148 |         C.slice(i) = Q.slice(i) + F.slice(i).t() * S.slice(i+1) * F.slice(i);
149 |         D.slice(i) = R.slice(i) + G.slice(i).t() * S.slice(i+1) * G.slice(i);
150 |         E.slice(i) = (P.slice(i) + F.slice(i).t() * S.slice(i+1) * G.slice(i)).t();
151 | 
152 |         c.col(i) = q.col(i) + F.slice(i).t() * s.col(i+1) + T.slice(i).t() * tau.col(i+1)
153 |                    + 0.5 * X.slice(i).t() * vectorise(S.slice(i+1));
154 | 
155 |         d.col(i) = r.col(i) + G.slice(i).t() * s.col(i+1) + V.slice(i).t() * tau.col(i+1)
156 |                    + 0.5 * Z.slice(i).t() * vectorise(S.slice(i+1));
157 | 
158 |         e.col(i) = p.col(i) + U.slice(i).t() * tau.col(i) + 0.5 * Y.slice(i).t() * vectorise(S.slice(i+1));
159 | 
160 |         Sreg.slice(i+1) = S.slice(i+1);
161 |         if (reg==2)
162 |             Sreg.slice(i+1) += lmbda * eye(dm_belief, dm_belief);
163 | 
164 |         Ereg.slice(i) = (P.slice(i) + F.slice(i).t() * Sreg.slice(i+1) * G.slice(i)).t();
165 | 
166 |         Dreg.slice(i) = R.slice(i) + G.slice(i).t() * Sreg.slice(i+1) * G.slice(i);
167 |         if (reg==1)
168 |             Dreg.slice(i) += lmbda * eye(dm_act, dm_act);
169 | 
170 |         if (!(Dreg.slice(i)).is_sympd()) {
171 |             _diverge = i;
172 |             break;
173 |         }
174 | 
175 |         Dinv.slice(i) = inv(Dreg.slice(i));
176 |         K.slice(i) = - Dinv.slice(i) * Ereg.slice(i);
177 |         kff.col(i) = - Dinv.slice(i) * d.col(i);
178 | 
179 |         dS += join_vert(kff.col(i).t() * d.col(i), 0.5 * kff.col(i).t() * D.slice(i) * kff.col(i));
180 | 
181 |         tau.col(i) = e.col(i);
182 | 
183 |         s.col(i) = c.col(i) + K.slice(i).t() * D.slice(i) * kff.col(i) +
184 |                    K.slice(i).t() * d.col(i) + E.slice(i).t() * kff.col(i);
185 | 
186 |         S.slice(i) = C.slice(i) + K.slice(i).t() * D.slice(i) * K.slice(i) +
187 |                      K.slice(i).t() * E.slice(i) + E.slice(i).t() * K.slice(i);
188 |         S.slice(i) = 0.5 * (S.slice(i) + S.slice(i).t());
189 | 	}
190 | 
191 |     // transform outputs to numpy
192 |     array_tf _S = cube_to_array(S);
193 |     array_tf _s = mat_to_array(s);
194 |     array_tf _tau = mat_to_array(tau);
195 | 
196 |     array_tf _dS = vec_to_array(dS);
197 | 
198 |     array_tf _K = cube_to_array(K);
199 |     array_tf _kff = mat_to_array(kff);
200 | 
201 |     py::tuple output =  py::make_tuple(_S, _s, _tau,
202 |                                        _dS, _K, _kff, _diverge);
203 | 	return output;
204 | }
205 | 
206 | 
207 | PYBIND11_MODULE(core, m)
208 | {
209 |     m.def("backward_pass", &backward_pass);
210 | }
211 | 


--------------------------------------------------------------------------------
/trajopt/elqr/__init__.py:
--------------------------------------------------------------------------------
1 | from .elqr import eLQR
2 | 


--------------------------------------------------------------------------------
/trajopt/elqr/elqr.py:
--------------------------------------------------------------------------------
  1 | import autograd.numpy as np
  2 | 
  3 | from trajopt.elqr.objects import AnalyticalLinearDynamics, AnalyticalQuadraticCost
  4 | from trajopt.elqr.objects import QuadraticStateValue
  5 | from trajopt.elqr.objects import LinearControl
  6 | 
  7 | 
  8 | class eLQR:
  9 | 
 10 |     def __init__(self, env, nb_steps,
 11 |                  init_state):
 12 | 
 13 |         self.env = env
 14 | 
 15 |         # expose necessary functions
 16 |         self.env_dyn = self.env.unwrapped.dynamics
 17 |         self.env_inv_dyn = self.env.unwrapped.inverse_dynamics
 18 |         self.env_cost = self.env.unwrapped.cost
 19 |         self.env_goal = self.env.unwrapped.g
 20 |         self.env_init = init_state
 21 | 
 22 |         self.ulim = self.env.action_space.high
 23 | 
 24 |         self.dm_state = self.env.observation_space.shape[0]
 25 |         self.dm_act = self.env.action_space.shape[0]
 26 |         self.nb_steps = nb_steps
 27 | 
 28 |         # reference trajectory
 29 |         self.xref = np.zeros((self.dm_state, self.nb_steps + 1))
 30 |         self.xref[..., 0] = self.env_init
 31 | 
 32 |         self.uref = np.zeros((self.dm_act, self.nb_steps))
 33 | 
 34 |         self.gocost = QuadraticStateValue(self.dm_state, self.nb_steps + 1)
 35 |         self.comecost = QuadraticStateValue(self.dm_state, self.nb_steps + 1)
 36 | 
 37 |         self.gocost.V[..., 0] += np.eye(self.dm_state) * 1e-16
 38 |         self.comecost.V[..., 0] += np.eye(self.dm_state) * 1e-16
 39 | 
 40 |         self.dyn = AnalyticalLinearDynamics(self.env_dyn, self.dm_state, self.dm_act, self.nb_steps)
 41 |         self.idyn = AnalyticalLinearDynamics(self.env_inv_dyn, self.dm_state, self.dm_act, self.nb_steps)
 42 | 
 43 |         self.ctl = LinearControl(self.dm_state, self.dm_act, self.nb_steps)
 44 |         self.ctl.kff = np.random.randn(self.dm_act, self.nb_steps)
 45 | 
 46 |         self.ictl = LinearControl(self.dm_state, self.dm_act, self.nb_steps)
 47 |         self.ictl.kff = 1e-2 * np.random.randn(self.dm_act, self.nb_steps)
 48 | 
 49 |         self.cost = AnalyticalQuadraticCost(self.env_cost, self.dm_state, self.dm_act, self.nb_steps + 1)
 50 | 
 51 |         self.last_objective = - np.inf
 52 | 
 53 |     def forward_pass(self, ctl):
 54 |         state = np.zeros((self.dm_state, self.nb_steps + 1))
 55 |         action = np.zeros((self.dm_act, self.nb_steps))
 56 |         cost = np.zeros((self.nb_steps + 1,))
 57 | 
 58 |         state[..., 0] = self.env_init
 59 |         for t in range(self.nb_steps):
 60 |             action[..., t] = ctl.action(state[..., t], t)
 61 |             cost[..., t] = self.cost.evalf(state[..., t], action[..., t])
 62 |             state[..., t + 1] = self.dyn.evalf(state[..., t], action[..., t])
 63 | 
 64 |         cost[..., -1] = self.cost.evalf(state[..., -1], np.zeros((self.dm_act, )))
 65 |         return state, action, cost
 66 | 
 67 |     def forward_lqr(self, state):
 68 |         for t in range(self.nb_steps):
 69 |             _action = self.ctl.action(state, t)
 70 | 
 71 |             _state_n = self.dyn.evalf(state, _action)
 72 | 
 73 |             # linearize inverse discrete dynamics
 74 |             _A, _B, _c = self.idyn.taylor_expansion(_state_n, _action)
 75 | 
 76 |             # quadratize cost
 77 |             _Cxx, _Cuu, _Cxu, _cx, _cu, _c0 = self.cost.taylor_expansion(state, _action)
 78 | 
 79 |             # forward value
 80 |             _Qxx = _A.T @ (_Cxx + self.comecost.V[..., t]) @ _A
 81 |             _Quu = _B.T @ (_Cxx + self.comecost.V[..., t]) @ _B + _B.T @ _Cxu + _Cxu.T  @ _B + _Cuu
 82 |             _Qux = _B.T @ (_Cxx + self.comecost.V[..., t]) @ _A + _Cxu.T @ _A
 83 | 
 84 |             _qx = _A.T @ (_Cxx + self.comecost.V[..., t]) @ _c + _A.T @ (_cx + self.comecost.v[..., t])
 85 |             _qu = _B.T @ (_Cxx + self.comecost.V[..., t]) @ _c + _Cxu.T @ _c + _B.T @ (_cx + self.comecost.v[..., t]) + _cu
 86 | 
 87 |             _q0 = 0.5 * _c.T @ (_Cxx + self.comecost.V[..., t]) @ _c +\
 88 |                   _c.T @ (_cx + self.comecost.v[..., t]) + _c0 + self.comecost.v0[..., t]
 89 | 
 90 |             # backward value
 91 |             _Qxx = _A.T @ (_Cxx + self.comecost.V[..., t]) @ _A
 92 |             _Quu = _B.T @ (_Cxx + self.comecost.V[..., t]) @ _B + _B.T @ _Cxu + _Cxu.T  @ _B + _Cuu
 93 |             _Qux = _B.T @ (_Cxx + self.comecost.V[..., t]) @ _A + _Cxu.T @ _A
 94 | 
 95 |             self.ictl.K[..., t] = - np.linalg.inv(_Quu) @ _Qux
 96 |             self.ictl.kff[..., t] = - np.linalg.inv(_Quu) @ _qu
 97 | 
 98 |             self.comecost.V[..., t + 1] = _Qxx - _Qux.T @ np.linalg.inv(_Quu) @ _Qux
 99 |             self.comecost.v[..., t + 1] = _qx - _Qux.T @ np.linalg.inv(_Quu) @ _qu
100 |             self.comecost.v0[..., t + 1] = _q0 - 0.5 * _qu.T @ np.linalg.inv(_Quu) @ _qu
101 | 
102 |             # store matrices
103 |             self.idyn.A[..., t] = _A
104 |             self.idyn.B[..., t] = _B
105 |             self.idyn.c[..., t] = _c
106 | 
107 |             state = - np.linalg.inv(self.gocost.V[..., t + 1] + self.comecost.V[..., t + 1]) @\
108 |                     (self.gocost.v[..., t + 1] + self.comecost.v[..., t + 1])
109 | 
110 |         return state
111 | 
112 |     def backward_lqr(self, state):
113 |         # quadratize last cost
114 |         _Cxx, _Cuu, _Cxu, _cx, _cu, _c0 =\
115 |             self.cost.taylor_expansion(state, np.zeros((self.dm_act, )))
116 | 
117 |         self.gocost.V[..., -1] = _Cxx
118 |         self.gocost.v[..., -1] = _cx
119 |         self.gocost.v0[..., -1] = _c0
120 | 
121 |         state = - np.linalg.inv(self.gocost.V[..., -1] + self.comecost.V[..., -1]) @\
122 |                 (self.gocost.v[..., -1] + self.comecost.v[..., -1])
123 | 
124 |         for t in range(self.nb_steps - 1, -1, -1):
125 |             _action = self.ictl.action(state, t)
126 | 
127 |             _state_n = self.idyn.evalf(state, _action)
128 |             # linearize discrete dynamics
129 |             _A, _B, _c = self.dyn.taylor_expansion(_state_n, _action)
130 | 
131 |             # quadratize cost
132 |             _Cxx, _Cuu, _Cxu, _cx, _cu, _c0 = self.cost.taylor_expansion(_state_n, _action)
133 | 
134 |             # backward value
135 |             _Qxx = _Cxx + _A.T @ self.gocost.V[..., t + 1] @ _A
136 |             _Quu = _Cuu + _B.T @ self.gocost.V[..., t + 1] @ _B
137 |             _Qux = _Cxu.T + _B.T @ self.gocost.V[..., t + 1] @ _A
138 | 
139 |             _qx = _cx + _A.T @ self.gocost.V[..., t + 1] @ _c + _A.T @ self.gocost.v[..., t + 1]
140 |             _qu = _cu + _B.T @ self.gocost.V[..., t + 1] @ _c + _B.T @ self.gocost.v[..., t + 1]
141 | 
142 |             _q0 = _c0 + self.gocost.v0[..., t + 1] + 0.5 * _c.T @ self.gocost.V[..., t + 1] @ _c +\
143 |                   _c.T @ self.gocost.v[..., t + 1]
144 | 
145 |             self.ctl.K[..., t] = - np.linalg.inv(_Quu) @ _Qux
146 |             self.ctl.kff[..., t] = - np.linalg.inv(_Quu) @ _qu
147 | 
148 |             self.gocost.V[..., t] = _Qxx - _Qux.T @ np.linalg.inv(_Quu) @ _Qux
149 |             self.gocost.v[..., t] = _qx - _Qux.T @ np.linalg.inv(_Quu) @ _qu
150 |             self.gocost.v0[..., t] = _q0 - 0.5 * _qu.T @ np.linalg.inv(_Quu) @ _qu
151 | 
152 |             # store matrices
153 |             self.dyn.A[..., t] = _A
154 |             self.dyn.B[..., t] = _B
155 |             self.dyn.c[..., t] = _c
156 | 
157 |             state = - np.linalg.inv(self.gocost.V[..., t] + self.comecost.V[..., t]) @\
158 |                      (self.gocost.v[..., t] + self.comecost.v[..., t])
159 | 
160 |         return state
161 | 
162 |     def plot(self):
163 |         import matplotlib.pyplot as plt
164 | 
165 |         plt.figure()
166 | 
167 |         t = np.linspace(0, self.nb_steps, self.nb_steps + 1)
168 |         for k in range(self.dm_state):
169 |             plt.subplot(self.dm_state + self.dm_act, 1, k + 1)
170 |             plt.plot(t, self.xref[k, :], '-b')
171 | 
172 |         t = np.linspace(0, self.nb_steps, self.nb_steps)
173 |         for k in range(self.dm_act):
174 |             plt.subplot(self.dm_state + self.dm_act, 1, self.dm_state + k + 1)
175 |             plt.plot(t, self.uref[k, :], '-g')
176 | 
177 |         plt.show()
178 | 
179 |     def run(self, nb_iter=10):
180 |         _trace = []
181 | 
182 |         # forward pass to get ref traj.
183 |         self.xref, self.uref, _cost = self.forward_pass(self.ctl)
184 |         # return around current traj.
185 |         _trace.append(np.sum(_cost))
186 | 
187 |         _state = self.env_init
188 |         for _ in range(nb_iter):
189 |             # forward lqr
190 |             _state = self.forward_lqr(_state)
191 | 
192 |             # backward lqr
193 |             _state = self.backward_lqr(_state)
194 | 
195 |             # forward pass to get ref traj.
196 |             self.xref, self.uref, _cost = self.forward_pass(self.ctl)
197 | 
198 |             # return around current traj.
199 |             _trace.append(np.sum(_cost))
200 | 
201 |         return _trace
202 | 


--------------------------------------------------------------------------------
/trajopt/elqr/objects.py:
--------------------------------------------------------------------------------
  1 | import autograd.numpy as np
  2 | from autograd import jacobian, hessian
  3 | 
  4 | 
  5 | class QuadraticStateValue:
  6 |     def __init__(self, dm_state, nb_steps):
  7 |         self.dm_state = dm_state
  8 |         self.nb_steps = nb_steps
  9 | 
 10 |         self.V = np.zeros((self.dm_state, self.dm_state, self.nb_steps))
 11 |         self.v = np.zeros((self.dm_state, self.nb_steps, ))
 12 |         self.v0 = np.zeros((self.nb_steps, ))
 13 | 
 14 | 
 15 | class QuadraticCost:
 16 |     def __init__(self, dm_state, dm_act, nb_steps):
 17 |         self.dm_state = dm_state
 18 |         self.dm_act = dm_act
 19 | 
 20 |         self.nb_steps = nb_steps
 21 | 
 22 |         self.Cxx = np.zeros((self.dm_state, self.dm_state, self.nb_steps))
 23 |         self.cx = np.zeros((self.dm_state, self.nb_steps))
 24 | 
 25 |         self.Cuu = np.zeros((self.dm_act, self.dm_act, self.nb_steps))
 26 |         self.cu = np.zeros((self.dm_act, self.nb_steps))
 27 | 
 28 |         self.Cxu = np.zeros((self.dm_state, self.dm_act, self.nb_steps))
 29 |         self.c0 = np.zeros((self.nb_steps, ))
 30 | 
 31 |     @property
 32 |     def params(self):
 33 |         return self.Cxx, self.cx, self.Cuu, self.cu, self.Cxu, self.c0
 34 | 
 35 |     @params.setter
 36 |     def params(self, values):
 37 |         self.Cxx, self.cx, self.Cuu, self.cu, self.Cxu, self.c0 = values
 38 | 
 39 | 
 40 | class AnalyticalQuadraticCost(QuadraticCost):
 41 |     def __init__(self, f, dm_state, dm_act, nb_steps):
 42 |         super(AnalyticalQuadraticCost, self).__init__(dm_state, dm_act, nb_steps)
 43 | 
 44 |         self.f = f
 45 | 
 46 |         self.dcdxx = hessian(self.f, 0)
 47 |         self.dcduu = hessian(self.f, 1)
 48 |         self.dcdxu = jacobian(jacobian(self.f, 0), 1)
 49 | 
 50 |         self.dcdx = jacobian(self.f, 0)
 51 |         self.dcdu = jacobian(self.f, 1)
 52 | 
 53 |     def evalf(self, x, u):
 54 |         return self.f(x, u, 0., 1)
 55 | 
 56 |     def taylor_expansion(self, x, u):
 57 |         _in = tuple([x, u, 0., 1.])
 58 |         _Cxx = 0.5 * self.dcdxx(*_in)
 59 |         _Cuu = 0.5 * self.dcduu(*_in)
 60 |         _Cxu = self.dcdxu(*_in)
 61 | 
 62 |         _cx = self.dcdx(*_in) - self.dcdxx(*_in) @ x - self.dcdxu(*_in) @ u
 63 |         _cu = self.dcdu(*_in) - self.dcduu(*_in) @ u - x.T @ self.dcdxu(*_in)
 64 | 
 65 |         # residual of taylor expansion
 66 |         _c0 = self.f(*_in)\
 67 |               - x.T @ _Cxx @ x\
 68 |               - u.T @ _Cuu @ u\
 69 |               - x.T @ _Cxu @ u\
 70 |               - _cx.T @ x\
 71 |               - _cu.T @ u
 72 | 
 73 |         return _Cxx, _Cuu, _Cxu, _cx, _cu, _c0
 74 | 
 75 | 
 76 | class LinearDynamics:
 77 |     def __init__(self, dm_state, dm_act, nb_steps):
 78 |         self.dm_state = dm_state
 79 |         self.dm_act = dm_act
 80 |         self.nb_steps = nb_steps
 81 | 
 82 |         self.A = np.zeros((self.dm_state, self.dm_state, self.nb_steps))
 83 |         self.B = np.zeros((self.dm_state, self.dm_act, self.nb_steps))
 84 |         self.c = np.zeros((self.dm_state, self.nb_steps))
 85 | 
 86 |     @property
 87 |     def params(self):
 88 |         return self.A, self.B, self.c
 89 | 
 90 |     @params.setter
 91 |     def params(self, values):
 92 |         self.A, self.B, self.c = values
 93 | 
 94 |     def sample(self, x, u):
 95 |         pass
 96 | 
 97 | 
 98 | class AnalyticalLinearDynamics(LinearDynamics):
 99 |     def __init__(self, f_dyn, dm_state, dm_act, nb_steps):
100 |         super(AnalyticalLinearDynamics, self).__init__(dm_state, dm_act, nb_steps)
101 | 
102 |         self.f = f_dyn
103 | 
104 |         self.dfdx = jacobian(self.f, 0)
105 |         self.dfdu = jacobian(self.f, 1)
106 | 
107 |     def evalf(self, x, u):
108 |         return self.f(x, u)
109 | 
110 |     def taylor_expansion(self, x, u):
111 |         _A = self.dfdx(x, u)
112 |         _B = self.dfdu(x, u)
113 |         # residual of taylor expansion
114 |         _c = self.evalf(x, u) - _A @ x - _B @ u
115 | 
116 |         return _A, _B, _c
117 | 
118 | 
119 | class LinearControl:
120 |     def __init__(self, dm_state, dm_act, nb_steps):
121 |         self.dm_state = dm_state
122 |         self.dm_act = dm_act
123 |         self.nb_steps = nb_steps
124 | 
125 |         self.K = np.zeros((self.dm_act, self.dm_state, self.nb_steps))
126 |         self.kff = np.zeros((self.dm_act, self.nb_steps))
127 | 
128 |     @property
129 |     def params(self):
130 |         return self.K, self.kff
131 | 
132 |     @params.setter
133 |     def params(self, values):
134 |         self.K, self.kff = values
135 | 
136 |     def action(self, x, t):
137 |         return self.kff[..., t] + self.K[..., t] @ x
138 | 


--------------------------------------------------------------------------------
/trajopt/envs/__init__.py:
--------------------------------------------------------------------------------
 1 | from .lqr.lqr_v0 import LQRv0
 2 | from .lqr.lqr_v1 import LQRv1
 3 | from .lqr.lqr_v2 import LQRv2
 4 | 
 5 | from .pendulum.pendulum import Pendulum
 6 | from .pendulum.pendulum import PendulumWithCartesianCost
 7 | 
 8 | from .double_pendulum.double_pendulum import DoublePendulum
 9 | from .double_pendulum.double_pendulum import DoublePendulumWithCartesianCost
10 | 
11 | from .quad_pendulum.quad_pendulum import QuadPendulum
12 | from .quad_pendulum.quad_pendulum import QuadPendulumWithCartesianCost
13 | 
14 | from .cartpole.cartpole import Cartpole
15 | from .cartpole.cartpole import CartpoleWithCartesianCost
16 | 
17 | from .double_cartpole.double_cartpole import DoubleCartpole
18 | from .double_cartpole.double_cartpole import DoubleCartpoleWithCartesianCost
19 | 
20 | from .lightdark.lightdark import LightDark
21 | from .car.car import Car
22 | 
23 | from .robot.robot import Robot
24 | 


--------------------------------------------------------------------------------
/trajopt/envs/car/car.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | from gym import spaces
  3 | from gym.utils import seeding
  4 | 
  5 | import autograd.numpy as np
  6 | 
  7 | 
  8 | class Car(gym.Env):
  9 | 
 10 |     def __init__(self):
 11 |         self.state_dim = 4
 12 |         self.belief_dim = 4
 13 |         self.act_dim = 2
 14 |         self.obs_dim = 2
 15 | 
 16 |         self.dt = 0.5
 17 | 
 18 |         self.dyn_sigma = 1e-8 * np.eye(self.state_dim)
 19 |         self.obs_sigma = 1e-8 * np.eye(self.obs_dim)
 20 | 
 21 |         self.goal = np.array([0., 0., 0., 0.])
 22 | 
 23 |         # car length
 24 |         self.l = 0.1
 25 | 
 26 |         # belief cost weights
 27 |         self.mu_w = 100. * np.array([1., 1., 1., 1.])
 28 |         self.sigma_w = 100. * np.array([1., 1., 1., 1.])
 29 | 
 30 |         # action cost weights
 31 |         self.act_w = np.array([1., 1.])
 32 | 
 33 |         self.xmax = np.array([np.inf, np.inf, np.inf, np.inf])
 34 |         self.zmax = np.array([np.inf, np.inf])
 35 |         self.umax = np.array([np.inf, np.inf])
 36 | 
 37 |         self.state_space = spaces.Box(low=-self.xmax,
 38 |                                        high=self.xmax)
 39 | 
 40 |         self.observation_space = spaces.Box(low=-self.zmax,
 41 |                                             high=self.zmax)
 42 | 
 43 |         self.action_space = spaces.Box(low=-self.umax,
 44 |                                        high=self.umax)
 45 | 
 46 |         self.state = None
 47 |         self.np_random = None
 48 | 
 49 |         self.seed()
 50 | 
 51 |     @property
 52 |     def xlim(self):
 53 |         return self.xmax
 54 | 
 55 |     @property
 56 |     def ulim(self):
 57 |         return self.umax
 58 | 
 59 |     def dynamics(self, x, u):
 60 |         _u = np.clip(u, -self.ulim, self.ulim)
 61 | 
 62 |         def f(x, u):
 63 |             return np.hstack([x[3] * np.cos(x[2]),
 64 |                               x[3] * np.sin(x[2]),
 65 |                               x[3] * np.tan(u[1]) / self.l,
 66 |                               u[0]])
 67 | 
 68 |         k1 = f(x, _u)
 69 |         k2 = f(x + 0.5 * self.dt * k1, _u)
 70 |         k3 = f(x + 0.5 * self.dt * k2, _u)
 71 |         k4 = f(x + self.dt * k3, _u)
 72 | 
 73 |         xn = x + self.dt / 6. * (k1 + 2. * k2 + 2. * k3 + k4)
 74 |         xn = np.clip(xn, -self.xlim, self.xlim)
 75 | 
 76 |         return xn
 77 | 
 78 |     def observe(self, x):
 79 |         return np.array([x[0], x[1]])
 80 | 
 81 |     def dyn_noise(self, x=None, u=None):
 82 |         _u = np.clip(u, -self.ulim, self.ulim)
 83 |         _x = np.clip(x, -self.xlim, self.xlim)
 84 |         return self.dyn_sigma
 85 | 
 86 |     def obs_noise(self, x=None):
 87 |         return self.obs_sigma +\
 88 |                np.array([[0.5 * (5. - x[0])**2, 0.],
 89 |                          [0., 0.]])
 90 | 
 91 |     # cost over belief
 92 |     def cost(self, mu_b, sigma_b, u):
 93 |         return (mu_b - self.goal).T @ np.diag(self.mu_w) @ (mu_b - self.goal) +\
 94 |                np.trace(np.diag(self.sigma_w) @ sigma_b) + \
 95 |                u.T @ np.diag(self.act_w) @ u
 96 | 
 97 |     def seed(self, seed=None):
 98 |         self.np_random, seed = seeding.np_random(seed)
 99 |         return [seed]
100 | 
101 |     def step(self, u):
102 |         # state-action dependent dynamics noise
103 |         _sigma_dyn = self.dyn_noise(self.state, u)
104 |         # evolve deterministic dynamics
105 |         self.state = self.dynamics(self.state, u)
106 |         # add dynamics noise
107 |         self.state = self.np_random.multivariate_normal(mean=self.state, cov=_sigma_dyn)
108 | 
109 |         # state-action dependent dynamics noise
110 |         _sigma_obs = self.obs_noise(self.state)
111 |         # observe state
112 |         _z = self.observe(self.state)
113 |         # add observation noise
114 |         obs = self.np_random.multivariate_normal(mean=_z, cov=_sigma_obs)
115 | 
116 |         return obs, [], False, {}
117 | 
118 |     # initial belief
119 |     def init(self):
120 |         mu = np.array([2., 2., 0., 0.])
121 |         sigma = 1. * np.eye(self.belief_dim)
122 |         return mu, sigma
123 | 
124 |     def reset(self):
125 |         self.state = np.array([0., 4., 0., 0.])
126 |         return self.observe(self.state)
127 | 


--------------------------------------------------------------------------------
/trajopt/envs/cartpole/cartpole.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | from gym import spaces
  3 | from gym.utils import seeding
  4 | 
  5 | import autograd.numpy as np
  6 | from autograd import jacobian
  7 | from autograd.tracer import getval
  8 | 
  9 | 
 10 | def wrap_angle(x):
 11 |     # wraps angle between [-pi, pi]
 12 |     return ((x + np.pi) % (2. * np.pi)) - np.pi
 13 | 
 14 | 
 15 | class Cartpole(gym.Env):
 16 | 
 17 |     def __init__(self):
 18 |         self.dm_state = 4
 19 |         self.dm_act = 1
 20 | 
 21 |         self.dt = 0.01
 22 | 
 23 |         self.sigma = 1e-8 * np.eye(self.dm_state)
 24 | 
 25 |         # g = [x, th, dx, dth]
 26 |         self.g = np.array([0., 0.,
 27 |                            0., 0.])
 28 |         self.gw = np.array([1e1, 1e4,
 29 |                             1e0, 1e0])
 30 | 
 31 |         # x = [x, th, dx, dth]
 32 |         self.xmax = np.array([10., np.inf,
 33 |                               np.inf, np.inf])
 34 |         self.observation_space = spaces.Box(low=-self.xmax,
 35 |                                             high=self.xmax)
 36 | 
 37 |         self.slew_rate = False
 38 |         self.uw = 1e-5 * np.ones((self.dm_act, ))
 39 |         self.umax = 10.0 * np.ones((self.dm_act, ))
 40 |         self.action_space = spaces.Box(low=-self.umax,
 41 |                                        high=self.umax, shape=(1,))
 42 | 
 43 |         self.x0 = np.array([0., np.pi,
 44 |                             0., 0.])
 45 |         self.sigma0 = 1e-4 * np.eye(self.dm_state)
 46 | 
 47 |         self.periodic = False
 48 | 
 49 |         self.state = None
 50 |         self.np_random = None
 51 | 
 52 |         self.seed()
 53 | 
 54 |     @property
 55 |     def xlim(self):
 56 |         return self.xmax
 57 | 
 58 |     @property
 59 |     def ulim(self):
 60 |         return self.umax
 61 | 
 62 |     def dynamics(self, x, u):
 63 |         _u = np.clip(u, -self.ulim, self.ulim)
 64 | 
 65 |         # Equations: http://coneural.org/florian/papers/05_cart_pole.pdf
 66 |         # x = [x, th, dx, dth]
 67 |         g = 9.81
 68 |         Mc = 0.37
 69 |         Mp = 0.127
 70 |         Mt = Mc + Mp
 71 |         l = 0.3365
 72 |         fr = 0.005
 73 | 
 74 |         def f(x, u):
 75 |             q, th, dq, dth = x
 76 | 
 77 |             sth = np.sin(th)
 78 |             cth = np.cos(th)
 79 | 
 80 |             # This friction model is not exactly right
 81 |             # It neglects the influence of the pole
 82 |             num = g * sth + cth * (- (u - fr * dq) - Mp * l * dth**2 * sth) / Mt
 83 |             denom = l * ((4. / 3.) - Mp * cth**2 / Mt)
 84 |             ddth = num / denom
 85 | 
 86 |             ddx = (u + Mp * l * (dth**2 * sth - ddth * cth)) / Mt
 87 |             return np.hstack((dq, dth, ddx, ddth))
 88 | 
 89 |         c1 = f(x, _u)
 90 |         c2 = f(x + 0.5 * self.dt * c1, _u)
 91 |         c3 = f(x + 0.5 * self.dt * c2, _u)
 92 |         c4 = f(x + self.dt * c3, _u)
 93 | 
 94 |         xn = x + self.dt / 6. * (c1 + 2. * c2 + 2. * c3 + c4)
 95 |         xn = np.clip(xn, -self.xlim, self.xlim)
 96 | 
 97 |         return xn
 98 | 
 99 |     def features(self, x):
100 |         return x
101 | 
102 |     def features_jacobian(self, x):
103 |         J = jacobian(self.features, 0)
104 |         j = self.features(x) - J(x) @ x
105 |         return J, j
106 | 
107 |     def noise(self, x=None, u=None):
108 |         _u = np.clip(u, -self.ulim, self.ulim)
109 |         _x = np.clip(x, -self.xlim, self.xlim)
110 |         return self.sigma
111 | 
112 |     def cost(self, x, u, u_last, a):
113 |         c = 0.
114 | 
115 |         if self.slew_rate:
116 |             c += (u - u_last).T @ np.diag(self.uw) @ (u - u_last)
117 |         else:
118 |             c += u.T @ np.diag(self.uw) @ u
119 | 
120 |         if a:
121 |             y = np.hstack((x[0], wrap_angle(x[1]),
122 |                            x[2], x[3])) if self.periodic else x
123 |             J, j = self.features_jacobian(getval(y))
124 |             z = J(getval(y)) @ y + j
125 |             c += a * (z - self.g).T @ np.diag(self.gw) @ (z - self.g)
126 | 
127 |         return c
128 | 
129 |     def seed(self, seed=None):
130 |         self.np_random, seed = seeding.np_random(seed)
131 |         return [seed]
132 | 
133 |     def step(self, u):
134 |         # state-action dependent noise
135 |         _sigma = self.noise(self.state, u)
136 |         # evolve deterministic dynamics
137 |         self.state = self.dynamics(self.state, u)
138 |         # add noise
139 |         self.state = self.np_random.multivariate_normal(mean=self.state, cov=_sigma)
140 |         return self.state, [], False, {}
141 | 
142 |     def init(self):
143 |         return self.x0, self.sigma0
144 | 
145 |     def reset(self):
146 |         self.state = self.np_random.multivariate_normal(mean=self.x0, cov=self.sigma0)
147 |         return self.state
148 | 
149 | 
150 | class CartpoleWithCartesianCost(Cartpole):
151 | 
152 |     def __init__(self):
153 |         super(CartpoleWithCartesianCost, self).__init__()
154 | 
155 |         # g = [x, cs_th, sn_th, dx, dth]
156 |         self.g = np.array([0.,
157 |                            1., 0.,
158 |                            0., 0.])
159 |         self.gw = np.array([1e1,
160 |                             1e4, 1e4,
161 |                             1e0, 1e0])
162 | 
163 |     def features(self, x):
164 |         return np.array([x[0],
165 |                         np.cos(x[1]), np.sin(x[1]),
166 |                         x[2], x[3]])
167 | 


--------------------------------------------------------------------------------
/trajopt/envs/double_cartpole/double_cartpole.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | from gym import spaces
  3 | from gym.utils import seeding
  4 | 
  5 | import autograd.numpy as np
  6 | from autograd import jacobian
  7 | from autograd.tracer import getval
  8 | 
  9 | 
 10 | def wrap_angle(x):
 11 |     # wraps angle between [-pi, pi]
 12 |     return ((x + np.pi) % (2. * np.pi)) - np.pi
 13 | 
 14 | 
 15 | class DoubleCartpole(gym.Env):
 16 | 
 17 |     def __init__(self):
 18 |         self.dm_state = 6
 19 |         self.dm_act = 1
 20 | 
 21 |         self.dt = 0.01
 22 | 
 23 |         self.sigma = 1e-8 * np.eye(self.dm_state)
 24 | 
 25 |         # x = [x, th1, th2, dx, dth1, dth2]
 26 |         self.g = np.array([0., 0., 0.,
 27 |                            0., 0., 0.])
 28 |         self.gw = np.array([1e1, 1e4, 1e4,
 29 |                             1e0, 1e0, 1e0])
 30 | 
 31 |         self.xmax = np.array([10., np.inf, np.inf,
 32 |                               np.inf, np.inf, np.inf])
 33 |         self.observation_space = spaces.Box(low=-self.xmax,
 34 |                                             high=self.xmax)
 35 | 
 36 |         self.slew_rate = False
 37 |         self.uw = 1e-5 * np.ones((self.dm_act, ))
 38 |         self.umax = 5.0 * np.ones((self.dm_act, ))
 39 |         self.action_space = spaces.Box(low=-self.umax,
 40 |                                        high=self.umax, shape=(1,))
 41 | 
 42 |         self.x0 = np.array([0., np.pi, np.pi,
 43 |                             0., 0., 0.])
 44 |         self.sigma0 = 1e-4 * np.eye(self.dm_state)
 45 | 
 46 |         self.periodic = False
 47 | 
 48 |         self.state = None
 49 |         self.np_random = None
 50 | 
 51 |         self.seed()
 52 | 
 53 |     @property
 54 |     def xlim(self):
 55 |         return self.xmax
 56 | 
 57 |     @property
 58 |     def ulim(self):
 59 |         return self.umax
 60 | 
 61 |     def dynamics(self, x, u):
 62 |         _u = np.clip(u, -self.ulim, self.ulim)
 63 | 
 64 |         # import from: https://github.com/JoeMWatson/input-inference-for-control/
 65 |         """
 66 |         http://www.lirmm.fr/~chemori/Temp/Wafa/double%20pendule%20inverse.pdf
 67 |         """
 68 | 
 69 |         # x = [x, th1, th2, dx, dth1, dth2]
 70 | 
 71 |         g = 9.81
 72 |         Mc = 0.37
 73 |         Mp1 = 0.127
 74 |         Mp2 = 0.127
 75 |         Mt = Mc + Mp1 + Mp2
 76 |         L1 = 0.3365
 77 |         L2 = 0.3365
 78 |         l1 = L1 / 2.
 79 |         l2 = L2 / 2.
 80 |         J1 = Mp1 * L1 / 12.
 81 |         J2 = Mp2 * L2 / 12.
 82 | 
 83 |         def f(x, u):
 84 | 
 85 |             q = x[0]
 86 |             th1 = x[1]
 87 |             th2 = x[2]
 88 |             dq = x[3]
 89 |             dth1 = x[4]
 90 |             dth2 = x[5]
 91 | 
 92 |             s1 = np.sin(th1)
 93 |             c1 = np.cos(th1)
 94 |             s2 = np.sin(th2)
 95 |             c2 = np.cos(th2)
 96 |             sdth = np.sin(th1 - th2)
 97 |             cdth = np.cos(th1 - th2)
 98 | 
 99 |             # helpers
100 |             l1_mp1_mp2 = Mp1 * l1 + Mp2 * L2
101 |             l1_mp1_mp2_c1 = l1_mp1_mp2 * c1
102 |             Mp2_l2 = Mp2 * l2
103 |             Mp2_l2_c2 = Mp2_l2 * c2
104 |             l1_l2_Mp2 = L1 * l2 * Mp2
105 |             l1_l2_Mp2_cdth = l1_l2_Mp2 * cdth
106 | 
107 |             # inertia
108 |             M11 = Mt
109 |             M12 = l1_mp1_mp2_c1
110 |             M13 = Mp2_l2_c2
111 |             M21 = l1_mp1_mp2_c1
112 |             M22 = (l1 ** 2) * Mp1 + (L1 ** 2) * Mp2 + J1
113 |             M23 = l1_l2_Mp2_cdth
114 |             M31 = Mp2_l2_c2
115 |             M32 = l1_l2_Mp2_cdth
116 |             M33 = (l2 ** 2) * Mp2 + J2
117 | 
118 |             # coreolis
119 |             C11 = 0.
120 |             C12 = - l1_mp1_mp2 * dth1 * s1
121 |             C13 = - Mp2_l2 * dth2 * s2
122 |             C21 = 0.
123 |             C22 = 0.
124 |             C23 = l1_l2_Mp2 * dth2 * sdth
125 |             C31 = 0.
126 |             C32 = - l1_l2_Mp2 * dth1 * sdth
127 |             C33 = 0.
128 | 
129 |             # gravity
130 |             G11 = 0.
131 |             G21 = - (Mp1 * l1 + Mp2 * L1) * g * s1
132 |             G31 = - Mp2 * l2 * g * s2
133 | 
134 |             # make matrices
135 |             M = np.vstack((np.hstack((M11, M12, M13)), np.hstack((M21, M22, M23)), np.hstack((M31, M32, M33))))
136 |             C = np.vstack((np.hstack((C11, C12, C13)), np.hstack((C21, C22, C23)), np.hstack((C31, C32, C33))))
137 |             G = np.vstack((G11, G21, G31))
138 | 
139 |             action = np.vstack((u, 0.0, 0.0))
140 | 
141 |             M_inv = np.linalg.inv(M)
142 |             C_dx = np.dot(C, x[3:].reshape((-1, 1)))
143 |             ddx = np.dot(M_inv, action - C_dx - G).squeeze()
144 | 
145 |             return np.hstack((dq, dth1, dth2, ddx))
146 | 
147 |         k1 = f(x, _u)
148 |         k2 = f(x + 0.5 * self.dt * k1, _u)
149 |         k3 = f(x + 0.5 * self.dt * k2, _u)
150 |         k4 = f(x + self.dt * k3, _u)
151 | 
152 |         xn = x + self.dt / 6. * (k1 + 2. * k2 + 2. * k3 + k4)
153 |         xn = np.clip(xn, -self.xlim, self.xlim)
154 | 
155 |         return xn
156 | 
157 |     def features(self, x):
158 |         return x
159 | 
160 |     def features_jacobian(self, x):
161 |         J = jacobian(self.features, 0)
162 |         j = self.features(x) - J(x) @ x
163 |         return J, j
164 | 
165 |     def noise(self, x=None, u=None):
166 |         _u = np.clip(u, -self.ulim, self.ulim)
167 |         _x = np.clip(x, -self.xlim, self.xlim)
168 |         return self.sigma
169 | 
170 |     def cost(self, x, u, u_last, a):
171 |         c = 0.
172 | 
173 |         if self.slew_rate:
174 |             c += (u - u_last).T @ np.diag(self.uw) @ (u - u_last)
175 |         else:
176 |             c += u.T @ np.diag(self.uw) @ u
177 | 
178 |         if a:
179 |             y = np.hstack((x[0],
180 |                            wrap_angle(x[1]),
181 |                            wrap_angle(x[2]),
182 |                            x[3], x[4], x[5])) if self.periodic else x
183 |             J, j = self.features_jacobian(getval(y))
184 |             z = J(getval(y)) @ y + j
185 |             c += a * (z - self.g).T @ np.diag(self.gw) @ (z - self.g)
186 | 
187 |         return c
188 | 
189 |     def seed(self, seed=None):
190 |         self.np_random, seed = seeding.np_random(seed)
191 |         return [seed]
192 | 
193 |     def step(self, u):
194 |         # state-action dependent noise
195 |         _sigma = self.noise(self.state, u)
196 |         # evolve deterministic dynamics
197 |         self.state = self.dynamics(self.state, u)
198 |         # add noise
199 |         self.state = self.np_random.multivariate_normal(mean=self.state, cov=_sigma)
200 |         return self.state, [], False, {}
201 | 
202 |     def init(self):
203 |         return self.x0, self.sigma0
204 | 
205 |     def reset(self):
206 |         self.state = self.np_random.multivariate_normal(mean=self.x0, cov=self.sigma0)
207 |         return self.state
208 | 
209 | 
210 | class DoubleCartpoleWithCartesianCost(DoubleCartpole):
211 | 
212 |     def __init__(self):
213 |         super(DoubleCartpoleWithCartesianCost, self).__init__()
214 | 
215 |         # g = [x, cs_th1, sn_th1, cs_th2, sn_th2, dx, dth1, dth2]
216 |         self.g = np.array([0.,
217 |                            1., 0.,
218 |                            1., 0.,
219 |                            0., 0., 0.])
220 | 
221 |         self.gw = np.array([1e1,
222 |                             1e4, 1e4,
223 |                             1e4, 1e4,
224 |                             1e0, 1e0, 1e0])
225 | 
226 |     def features(self, x):
227 |         return np.array([x[0],
228 |                          np.cos(x[0]), np.sin(x[0]),
229 |                          np.cos(x[1]), np.sin(x[1]),
230 |                          x[2], x[3], x[4]])
231 | 


--------------------------------------------------------------------------------
/trajopt/envs/double_pendulum/double_pendulum.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | from gym import spaces
  3 | from gym.utils import seeding
  4 | 
  5 | import autograd.numpy as np
  6 | from autograd import jacobian
  7 | from autograd.tracer import getval
  8 | 
  9 | 
 10 | def wrap_angle(x):
 11 |     # wraps angle between [-pi, pi]
 12 |     return ((x + np.pi) % (2. * np.pi)) - np.pi
 13 | 
 14 | 
 15 | class DoublePendulum(gym.Env):
 16 | 
 17 |     def __init__(self):
 18 |         self.dm_state = 4
 19 |         self.dm_act = 2
 20 | 
 21 |         self.dt = 0.01
 22 | 
 23 |         self.sigma = 1e-8 * np.eye(self.dm_state)
 24 | 
 25 |         # g = [th1, th2, dth1, dth2]
 26 |         self.g = np.array([0., 0.,
 27 |                            0., 0.])
 28 |         self.gw = np.array([1e4, 1e4,
 29 |                             1e0, 1e0])
 30 | 
 31 |         # x = [th, dth]
 32 |         self.xmax = np.array([np.inf, np.inf,
 33 |                               np.inf, np.inf])
 34 |         self.observation_space = spaces.Box(low=-self.xmax,
 35 |                                             high=self.xmax)
 36 | 
 37 |         self.slew_rate = False
 38 |         self.uw = 1e-5 * np.ones((self.dm_act, ))
 39 |         self.umax = 10. * np.ones((self.dm_act, ))
 40 |         self.action_space = spaces.Box(low=-self.umax,
 41 |                                        high=self.umax, shape=(2,))
 42 | 
 43 |         self.x0 = np.array([np.pi, 0.,
 44 |                             0., 0.])
 45 |         self.sigma0 = 1e-4 * np.eye(self.dm_state)
 46 | 
 47 |         self.periodic = False
 48 | 
 49 |         self.state = None
 50 |         self.np_random = None
 51 | 
 52 |         self.seed()
 53 | 
 54 |     @property
 55 |     def xlim(self):
 56 |         return self.xmax
 57 | 
 58 |     @property
 59 |     def ulim(self):
 60 |         return self.umax
 61 | 
 62 |     def dynamics(self, x, u):
 63 |         _u = np.clip(u, -self.ulim, self.ulim)
 64 | 
 65 |         # Code from PolicySearchToolbox
 66 | 
 67 |         masses = np.array([1., 1.])
 68 |         lengths = np.array([1., 1.])
 69 |         friction = 0.025 * np.array([1., 1.])
 70 |         g = 9.81
 71 | 
 72 |         inertias = masses * (lengths ** 2 + 1e-4) / 3.0
 73 | 
 74 |         def f(x, u):
 75 |             th1, th2, dth1, dth2 = x
 76 |             th1 = th1 + np.pi  # downward position = PI
 77 | 
 78 |             u1, u2 = u
 79 | 
 80 |             I1, I2 = inertias
 81 |             l1, l2 = lengths
 82 |             m1, m2 = masses
 83 |             k1, k2 = friction
 84 | 
 85 |             l1CM = l1 / 2.
 86 |             l2CM = l2 / 2.
 87 | 
 88 |             s1, c1 = np.sin(th1), np.cos(th1)
 89 |             s2, c2 = np.sin(th2), np.cos(th2)
 90 | 
 91 |             h11 = I1 + I2 + l1CM * l1CM * m1 + l1 * l1 * m2\
 92 |                   + l2CM * l2CM * m2 + 2. * l1 * l2CM * m2 * c2
 93 |             h12 = I2 + l2CM * l2CM * m2 + l1 * l2CM * m2 * c2
 94 | 
 95 |             b1 = g * l1CM * m1 * s1 + g * l1 * m2 * s1\
 96 |                  + g * l2CM * m2 * c2 * s1\
 97 |                  - 2. * dth1 * dth2 * l1 * l2CM * m2 * s2\
 98 |                  - dth2 * dth2 * l1 * l2CM * m2 * s2\
 99 |                  + g * l2CM * m2 * c1 * s2
100 | 
101 |             h21 = I2 + l2CM * l2CM * m2 + l1 * l2CM * m2 * c2
102 |             h22 = I2 + l2CM * l2CM * m2
103 | 
104 |             b2 = g * l2CM * m2 * c2 * s1\
105 |                  + dth1 * dth1 * l1 * l2CM * m2 * s2\
106 |                  + g * l2CM * m2 * c1 * s2
107 | 
108 |             u1 = u1 - k1 * dth1
109 |             u2 = u2 - k2 * dth2
110 | 
111 |             det = h11 * h22 - h12 * h21
112 | 
113 |             ddth1 = (h22 * (u1 - b1) - h12 * (u2 - b2)) / det
114 |             ddth2 = (h11 * (u2 - b2) - h21 * (u1 - b1)) / det
115 | 
116 |             return np.array([dth1, dth2, ddth1, ddth2])
117 | 
118 |         k1 = f(x, _u)
119 |         k2 = f(x + 0.5 * self.dt * k1, _u)
120 |         k3 = f(x + 0.5 * self.dt * k2, _u)
121 |         k4 = f(x + self.dt * k3, _u)
122 | 
123 |         xn = x + self.dt / 6. * (k1 + 2. * k2 + 2. * k3 + k4)
124 |         xn = np.clip(xn, -self.xlim, self.xlim)
125 | 
126 |         return xn
127 | 
128 |     def features(self, x):
129 |         return x
130 | 
131 |     def features_jacobian(self, x):
132 |         J = jacobian(self.features, 0)
133 |         j = self.features(x) - J(x) @ x
134 |         return J, j
135 | 
136 |     def noise(self, x=None, u=None):
137 |         _u = np.clip(u, -self.ulim, self.ulim)
138 |         _x = np.clip(x, -self.xlim, self.xlim)
139 |         return self.sigma
140 | 
141 |     def cost(self, x, u, u_last, a):
142 |         c = 0.
143 | 
144 |         if self.slew_rate:
145 |             c += (u - u_last).T @ np.diag(self.uw) @ (u - u_last)
146 |         else:
147 |             c += u.T @ np.diag(self.uw) @ u
148 | 
149 |         if a:
150 |             y = np.hstack((wrap_angle(x[0]), x[1])) if self.periodic else x
151 |             J, j = self.features_jacobian(getval(y))
152 |             z = J(getval(y)) @ y + j
153 |             c += a * (z - self.g).T @ np.diag(self.gw) @ (z - self.g)
154 | 
155 |         return c
156 | 
157 |     def seed(self, seed=None):
158 |         self.np_random, seed = seeding.np_random(seed)
159 |         return [seed]
160 | 
161 |     def step(self, u):
162 |         # state-action dependent noise
163 |         _sigma = self.noise(self.state, u)
164 |         # evolve deterministic dynamics
165 |         self.state = self.dynamics(self.state, u)
166 |         # add noise
167 |         self.state = self.np_random.multivariate_normal(mean=self.state, cov=_sigma)
168 |         return self.state, [], False, {}
169 | 
170 |     def init(self):
171 |         return self.x0, self.sigma0
172 | 
173 |     def reset(self):
174 |         self.state = self.np_random.multivariate_normal(mean=self.x0, cov=self.sigma0)
175 |         return self.state
176 | 
177 | 
178 | class DoublePendulumWithCartesianCost(DoublePendulum):
179 | 
180 |     def __init__(self):
181 |         super(DoublePendulumWithCartesianCost, self).__init__()
182 | 
183 |         # g = [cs_th, sn_th, dth]
184 |         self.g = np.array([1., 0.,
185 |                            1., 0.,
186 |                            0., 0.])
187 |         self.gw = np.array([1e4, 1e4,
188 |                             1e4, 1e4,
189 |                             1e0, 1e0])
190 | 
191 |     def features(self, x):
192 |         return np.array([np.cos(x[0]), np.sin(x[0]),
193 |                          np.cos(x[1]), np.sin(x[1]),
194 |                          x[2], x[3]])
195 | 


--------------------------------------------------------------------------------
/trajopt/envs/lightdark/lightdark.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | from gym import spaces
  3 | from gym.utils import seeding
  4 | 
  5 | import autograd.numpy as np
  6 | 
  7 | 
  8 | class LightDark(gym.Env):
  9 | 
 10 |     def __init__(self):
 11 |         self.state_dim = 2
 12 |         self.belief_dim = 2
 13 |         self.act_dim = 2
 14 |         self.obs_dim = 2
 15 | 
 16 |         self.dt = 1.
 17 | 
 18 |         self.dyn_sigma = 1e-8 * np.eye(self.state_dim)
 19 |         self.obs_sigma = 1e-4 * np.eye(self.obs_dim)
 20 | 
 21 |         self.goal = np.array([0., 0.])
 22 | 
 23 |         # belief cost weights
 24 |         self.mu_w = np.array([0.5, 0.5])
 25 |         self.sigma_w = np.array([200., 0.])
 26 | 
 27 |         # action cost weights
 28 |         self.act_w = np.array([0.5, 0.5])
 29 | 
 30 |         self.xmax = np.array([7., 4.])
 31 |         self.zmax = np.array([7., 4.])
 32 |         self.umax = np.array([np.inf, np.inf])
 33 | 
 34 |         self.state_space = spaces.Box(low=-self.xmax,
 35 |                                       high=self.xmax)
 36 | 
 37 |         self.observation_space = spaces.Box(low=-self.zmax,
 38 |                                             high=self.zmax)
 39 | 
 40 |         self.action_space = spaces.Box(low=-self.umax,
 41 |                                        high=self.umax)
 42 | 
 43 |         self.state = None
 44 |         self.np_random = None
 45 | 
 46 |         self.seed()
 47 | 
 48 |     @property
 49 |     def xlim(self):
 50 |         return self.xmax
 51 | 
 52 |     @property
 53 |     def ulim(self):
 54 |         return self.umax
 55 | 
 56 |     def dynamics(self, x, u):
 57 |         _u = np.clip(u, -self.ulim, self.ulim)
 58 |         xn = x + self.dt * _u
 59 |         xn = np.clip(xn, -self.xlim, self.xlim)
 60 |         return xn
 61 | 
 62 |     def observe(self, x):
 63 |         return x
 64 | 
 65 |     def dyn_noise(self, x=None, u=None):
 66 |         _u = np.clip(u, -self.ulim, self.ulim)
 67 |         _x = np.clip(x, -self.xlim, self.xlim)
 68 |         return self.dyn_sigma
 69 | 
 70 |     def obs_noise(self, x=None):
 71 |         return self.obs_sigma +\
 72 |                np.array([[0.5 * (5. - x[0])**2, 0.],
 73 |                          [0., 0.]])
 74 | 
 75 |     # cost over belief
 76 |     def cost(self, mu_b, sigma_b, u):
 77 |         return (mu_b - self.goal).T @ np.diag(self.mu_w) @ (mu_b - self.goal) +\
 78 |                np.trace(np.diag(self.sigma_w) @ sigma_b) + \
 79 |                u.T @ np.diag(self.act_w) @ u
 80 | 
 81 |     def seed(self, seed=None):
 82 |         self.np_random, seed = seeding.np_random(seed)
 83 |         return [seed]
 84 | 
 85 |     def step(self, u):
 86 |         # state-action dependent dynamics noise
 87 |         _sigma_dyn = self.dyn_noise(self.state, u)
 88 |         # evolve deterministic dynamics
 89 |         self.state = self.dynamics(self.state, u)
 90 |         # add dynamics noise
 91 |         self.state = self.np_random.multivariate_normal(mean=self.state, cov=_sigma_dyn)
 92 | 
 93 |         # state-action dependent dynamics noise
 94 |         _sigma_obs = self.obs_noise(self.state)
 95 |         # observe state
 96 |         _z = self.observe(self.state)
 97 |         # add observation noise
 98 |         obs = self.np_random.multivariate_normal(mean=_z, cov=_sigma_obs)
 99 | 
100 |         return obs, [], False, {}
101 | 
102 |     # initial belief
103 |     def init(self):
104 |         mu = np.array([2., 2.])
105 |         sigma = np.array([[5., 0.],
106 |                           [0., 1e-8]])
107 |         return mu, sigma
108 | 
109 |     def reset(self):
110 |         self.state = np.array([2.5, 0.])
111 |         return self.observe(self.state)
112 | 


--------------------------------------------------------------------------------
/trajopt/envs/lqr/lqr_v0.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | from gym import spaces
  3 | from gym.utils import seeding
  4 | 
  5 | import autograd.numpy as np
  6 | 
  7 | 
  8 | class LQRv0(gym.Env):
  9 | 
 10 |     def __init__(self):
 11 |         self.dm_state = 2
 12 |         self.dm_act = 1
 13 | 
 14 |         self.dt = 0.1
 15 | 
 16 |         self.x0 = np.array([5., 5.])
 17 |         self.g = np.array([10., 10.])
 18 | 
 19 |         self.gw = np.array([1e1, 1e1])
 20 |         self.uw = np.array([1e0])
 21 | 
 22 |         self.A = np.array([[1.1, 0.], [1.0, 1.0]])
 23 |         self.B = np.array([[1.], [0.]])
 24 |         self.c = - self.A @ self.g
 25 | 
 26 |         self.umax = np.inf
 27 |         self.action_space = spaces.Box(low=-self.umax,
 28 |                                        high=self.umax, shape=(1,))
 29 | 
 30 |         self.xmax = np.array([np.inf, np.inf])
 31 |         self.observation_space = spaces.Box(low=-self.xmax,
 32 |                                             high=self.xmax)
 33 | 
 34 |         self.sigma = 1e-8 * np.eye(self.dm_state)
 35 |         self.sigma0 = 1e-2 * np.eye(self.dm_state)
 36 | 
 37 |         self.state = None
 38 |         self.np_random = None
 39 | 
 40 |         self.seed()
 41 | 
 42 |     @property
 43 |     def xlim(self):
 44 |         return self.xmax
 45 | 
 46 |     @property
 47 |     def ulim(self):
 48 |         return self.umax
 49 | 
 50 |     def dynamics(self, x, u):
 51 |         _u = np.clip(u, -self.ulim, self.ulim)
 52 | 
 53 |         xn = self.A @ x + self.B @ _u + self.c
 54 | 
 55 |         # def f(x, u):
 56 |         #     return self.A @ x + self.B @ u + self.c
 57 |         #
 58 |         # k1 = f(x, _u)
 59 |         # k2 = f(x + 0.5 * self.dt * k1, _u)
 60 |         # k3 = f(x + 0.5 * self.dt * k2, _u)
 61 |         # k4 = f(x + self.dt * k3, _u)
 62 |         #
 63 |         # xn = x + self.dt / 6. * (k1 + 2. * k2 + 2. * k3 + k4)
 64 |         # xn = np.clip(xn, -self.xlim, self.xlim)
 65 | 
 66 |         return xn
 67 | 
 68 |     def inverse_dynamics(self, x, u):
 69 |         _u = np.clip(u, -self.ulim, self.ulim)
 70 | 
 71 |         def f(x, u):
 72 |             return self.A @ x + self.B @ u + self.c
 73 | 
 74 |         k1 = f(x, _u)
 75 |         k2 = f(x - 0.5 * self.dt * k1, _u)
 76 |         k3 = f(x - 0.5 * self.dt * k2, _u)
 77 |         k4 = f(x - self.dt * k3, _u)
 78 | 
 79 |         xn = x - self.dt / 6. * (k1 + 2. * k2 + 2. * k3 + k4)
 80 |         xn = np.clip(xn, -self.xlim, self.xlim)
 81 | 
 82 |         return xn
 83 | 
 84 |     def noise(self, x=None, u=None):
 85 |         _u = np.clip(u, -self.ulim, self.ulim)
 86 |         _x = np.clip(x, -self.xlim, self.xlim)
 87 |         return self.sigma
 88 | 
 89 |     def cost(self, x, u, u_last, a):
 90 |         c = u.T @ np.diag(self.uw) @ u
 91 |         if a:
 92 |             c += a * (x - self.g).T @ np.diag(self.gw) @ (x - self.g)
 93 | 
 94 |         return self.dt * c
 95 | 
 96 |     def seed(self, seed=None):
 97 |         self.np_random, seed = seeding.np_random(seed)
 98 |         return [seed]
 99 | 
100 |     def step(self, u):
101 |         # state-action dependent noise
102 |         _sigma = self.noise(self.state, u)
103 |         # evolve deterministic dynamics
104 |         self.state = self.dynamics(self.state, u)
105 |         # add noise
106 |         self.state = self.np_random.multivariate_normal(mean=self.state, cov=_sigma)
107 |         return self.state, [], False, {}
108 | 
109 |     def init(self):
110 |         return self.x0, self.sigma0
111 | 
112 |     def reset(self):
113 |         self.state = self.np_random.multivariate_normal(mean=self.x0, cov=self.sigma0)
114 |         return self.state
115 | 


--------------------------------------------------------------------------------
/trajopt/envs/lqr/lqr_v1.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | from gym import spaces
  3 | from gym.utils import seeding
  4 | 
  5 | import autograd.numpy as np
  6 | 
  7 | 
  8 | class LQRv1(gym.Env):
  9 | 
 10 |     def __init__(self):
 11 |         self.dm_state = 2
 12 |         self.dm_act = 1
 13 | 
 14 |         self.dt = 0.01
 15 | 
 16 |         self.x0 = np.array([0., 0.])
 17 |         self.g = np.array([1., 0.])
 18 | 
 19 |         self.gw = np.array([1e2, 1e0])
 20 |         self.uw = np.array([1e-3])
 21 | 
 22 |         m, k, d = 1., 1e-2, 1e-1
 23 |         self.A = np.array([[0.     , 1e0   ],
 24 |                            [- k / m, - d / m]]) * self.dt + np.eye(self.dm_state)
 25 | 
 26 |         self.B = np.array([[0.], [1. / m]]) * self.dt
 27 |         self.c = np.array([0., 0.]) * self.dt
 28 | 
 29 |         self.umax = np.inf
 30 |         self.action_space = spaces.Box(low=-self.umax,
 31 |                                        high=self.umax, shape=(1,))
 32 | 
 33 |         self.xmax = np.array([np.inf, np.inf])
 34 |         self.observation_space = spaces.Box(low=-self.xmax,
 35 |                                             high=self.xmax)
 36 | 
 37 |         self.sigma0 = 1e-4 * np.eye(self.dm_state)
 38 |         self.sigma = 1e-8 * np.eye(self.dm_state)
 39 | 
 40 |         self.state = None
 41 |         self.np_random = None
 42 | 
 43 |         self.seed()
 44 |         self.reset()
 45 | 
 46 |     @property
 47 |     def xlim(self):
 48 |         return self.xmax
 49 | 
 50 |     @property
 51 |     def ulim(self):
 52 |         return self.umax
 53 | 
 54 |     def dynamics(self, x, u, dist=None):
 55 |         _u = np.clip(u, -self.ulim, self.ulim)
 56 | 
 57 |         if dist is None:
 58 |             xn = self.A @ x + self.B @ _u + self.c
 59 |         else:
 60 |             mu, sigma = dist['mu'], dist['sigma']
 61 |             params = self.np_random.multivariate_normal(mean=mu, cov=sigma)
 62 | 
 63 |             A = np.reshape(params[:self.dm_state**2], (self.dm_state, self.dm_state), order='F')
 64 |             B = np.reshape(params[self.dm_state ** 2:self.dm_state**2 + self.dm_state * self.dm_act], (self.dm_state, self.dm_act), order='F')
 65 |             c = np.reshape(params[-self.dm_state:], (self.dm_state, ), order='F')
 66 | 
 67 |             xn = A @ x + B @ _u + c
 68 |         return xn
 69 | 
 70 |     def noise(self, x=None, u=None):
 71 |         _u = np.clip(u, -self.ulim, self.ulim)
 72 |         _x = np.clip(x, -self.xlim, self.xlim)
 73 |         return self.sigma
 74 | 
 75 |     def cost(self, x, u, u_last, a):
 76 |         c = u.T @ np.diag(self.uw) @ u
 77 |         if a:
 78 |             c += a * (x - self.g).T @ np.diag(self.gw) @ (x - self.g)
 79 | 
 80 |         return self.dt * c
 81 | 
 82 |     def seed(self, seed=None):
 83 |         self.np_random, seed = seeding.np_random(seed)
 84 |         return [seed]
 85 | 
 86 |     def step(self, u):
 87 |         # state-action dependent noise
 88 |         _sigma = self.noise(self.state, u)
 89 |         # evolve deterministic dynamics
 90 |         self.state = self.dynamics(self.state, u)
 91 |         # add noise
 92 |         self.state = self.np_random.multivariate_normal(mean=self.state, cov=_sigma)
 93 |         return self.state, [], False, {}
 94 | 
 95 |     def evolve(self, u, dist):
 96 |         # state-action dependent noise
 97 |         _sigma = self.noise(self.state, u)
 98 |         # evolve deterministic dynamics
 99 |         self.state = self.dynamics(self.state, u, dist)
100 |         # add noise
101 |         self.state = self.np_random.multivariate_normal(mean=self.state, cov=_sigma)
102 |         return self.state, [], False, {}
103 | 
104 |     def init(self):
105 |         return self.x0, self.sigma0
106 | 
107 |     def reset(self):
108 |         self.state = self.np_random.multivariate_normal(mean=self.x0, cov=self.sigma0)
109 |         return self.state
110 | 


--------------------------------------------------------------------------------
/trajopt/envs/lqr/lqr_v2.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | from gym import spaces
  3 | from gym.utils import seeding
  4 | 
  5 | import autograd.numpy as np
  6 | 
  7 | 
  8 | class LQRv2(gym.Env):
  9 | 
 10 |     def __init__(self):
 11 |         self.dm_state = 2
 12 |         self.dm_act = 1
 13 | 
 14 |         self.dt = 0.01
 15 | 
 16 |         self.x0 = np.array([5., 5.])
 17 |         self.g = np.array([1., 0.])
 18 | 
 19 |         self.gw = np.array([1e1, 1e0])
 20 |         self.uw = np.array([1e-3])
 21 | 
 22 |         self.A = np.array([[1.0, 0.],
 23 |                            [0.1, 1.1]])
 24 |         self.B = np.array([[0.05], [0.]])
 25 |         self.c = np.array([0., 0.])
 26 | 
 27 |         self.umax = np.inf
 28 |         self.action_space = spaces.Box(low=-self.umax,
 29 |                                        high=self.umax, shape=(1,))
 30 | 
 31 |         self.xmax = np.array([np.inf, np.inf])
 32 |         self.observation_space = spaces.Box(low=-self.xmax,
 33 |                                             high=self.xmax)
 34 | 
 35 |         self.sigma0 = 1e-2 * np.eye(self.dm_state)
 36 |         self.sigma = 1e-8 * np.eye(self.dm_state)
 37 | 
 38 |         self.state = None
 39 |         self.np_random = None
 40 | 
 41 |         self.seed()
 42 | 
 43 |     @property
 44 |     def xlim(self):
 45 |         return self.xmax
 46 | 
 47 |     @property
 48 |     def ulim(self):
 49 |         return self.umax
 50 | 
 51 |     def dynamics(self, x, u, dist=None):
 52 |         _u = np.clip(u, -self.ulim, self.ulim)
 53 | 
 54 |         if dist is None:
 55 |             xn = self.A @ x + self.B @ _u + self.c
 56 |         else:
 57 |             mu, sigma = dist['mu'], dist['sigma']
 58 |             params = self.np_random.multivariate_normal(mean=mu, cov=sigma)
 59 | 
 60 |             A = np.reshape(params[:self.dm_state**2], (self.dm_state, self.dm_state), order='F')
 61 |             B = np.reshape(params[self.dm_state ** 2:self.dm_state**2 + self.dm_state * self.dm_act], (self.dm_state, self.dm_act), order='F')
 62 |             c = np.reshape(params[-self.dm_state:], (self.dm_state, ), order='F')
 63 | 
 64 |             xn = A @ x + B @ _u + c
 65 |         return xn
 66 | 
 67 |     def noise(self, x=None, u=None):
 68 |         _u = np.clip(u, -self.ulim, self.ulim)
 69 |         _x = np.clip(x, -self.xlim, self.xlim)
 70 |         return self.sigma
 71 | 
 72 |     def cost(self, x, u, u_last, a):
 73 |         c = u.T @ np.diag(self.uw) @ u
 74 |         if a:
 75 |             c += a * (x - self.g).T @ np.diag(self.gw) @ (x - self.g)
 76 | 
 77 |         return self.dt * c
 78 | 
 79 |     def seed(self, seed=None):
 80 |         self.np_random, seed = seeding.np_random(seed)
 81 |         return [seed]
 82 | 
 83 |     def step(self, u):
 84 |         # state-action dependent noise
 85 |         _sigma = self.noise(self.state, u)
 86 |         # evolve deterministic dynamics
 87 |         self.state = self.dynamics(self.state, u)
 88 |         # add noise
 89 |         self.state = self.np_random.multivariate_normal(mean=self.state, cov=_sigma)
 90 |         return self.state, [], False, {}
 91 | 
 92 |     def evolve(self, u, dist):
 93 |         # state-action dependent noise
 94 |         _sigma = self.noise(self.state, u)
 95 |         # evolve deterministic dynamics
 96 |         self.state = self.dynamics(self.state, u, dist)
 97 |         # add noise
 98 |         self.state = self.np_random.multivariate_normal(mean=self.state, cov=_sigma)
 99 |         return self.state, [], False, {}
100 | 
101 |     def init(self):
102 |         return self.x0, self.sigma0
103 | 
104 |     def reset(self):
105 |         self.state = self.np_random.multivariate_normal(mean=self.x0, cov=self.sigma0)
106 |         return self.state
107 | 


--------------------------------------------------------------------------------
/trajopt/envs/pendulum/pendulum.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | from gym import spaces
  3 | from gym.utils import seeding
  4 | 
  5 | import autograd.numpy as np
  6 | from autograd import jacobian
  7 | from autograd.tracer import getval
  8 | 
  9 | 
 10 | def wrap_angle(x):
 11 |     # wraps angle between [-pi, pi]
 12 |     return ((x + np.pi) % (2. * np.pi)) - np.pi
 13 | 
 14 | 
 15 | class Pendulum(gym.Env):
 16 | 
 17 |     def __init__(self):
 18 |         self.dm_state = 2
 19 |         self.dm_act = 1
 20 | 
 21 |         self.dt = 0.01
 22 | 
 23 |         self.sigma = 1e-4 * np.eye(self.dm_state)
 24 | 
 25 |         # g = [th, thd]
 26 |         self.g = np.array([0., 0.])
 27 |         self.gw = np.array([1e0, 1e-1])
 28 | 
 29 |         # x = [th, thd]
 30 |         self.xmax = np.array([np.inf, np.inf])
 31 |         self.observation_space = spaces.Box(low=-self.xmax,
 32 |                                             high=self.xmax)
 33 | 
 34 |         self.slew_rate = False
 35 |         self.uw = 1e-3 * np.ones((self.dm_act, ))
 36 |         self.umax = 10. * np.ones((self.dm_act, ))
 37 |         self.action_space = spaces.Box(low=-self.umax,
 38 |                                        high=self.umax, shape=(1,))
 39 | 
 40 |         self.x0 = np.array([np.pi, 0.])
 41 |         self.sigma0 = 1e-4 * np.eye(self.dm_state)
 42 | 
 43 |         self.periodic = False
 44 | 
 45 |         self.state = None
 46 |         self.np_random = None
 47 | 
 48 |         self.seed()
 49 | 
 50 |     @property
 51 |     def xlim(self):
 52 |         return self.xmax
 53 | 
 54 |     @property
 55 |     def ulim(self):
 56 |         return self.umax
 57 | 
 58 |     def dynamics(self, x, u):
 59 |         _u = np.clip(u, -self.ulim, self.ulim)
 60 | 
 61 |         g, m, l, k = 9.81, 1., 1., 1e-3
 62 | 
 63 |         def f(x, u):
 64 |             th, dth = x
 65 |             return np.hstack((dth, - 3. * g / (2. * l) * np.sin(th + np.pi) +
 66 |                               3. / (m * l ** 2) * (u - k * dth)))
 67 | 
 68 |         k1 = f(x, _u)
 69 |         k2 = f(x + 0.5 * self.dt * k1, _u)
 70 |         k3 = f(x + 0.5 * self.dt * k2, _u)
 71 |         k4 = f(x + self.dt * k3, _u)
 72 | 
 73 |         xn = x + self.dt / 6. * (k1 + 2. * k2 + 2. * k3 + k4)
 74 |         xn = np.clip(xn, -self.xlim, self.xlim)
 75 | 
 76 |         return xn
 77 | 
 78 |     def inverse_dynamics(self, x, u):
 79 |         _u = np.clip(u, -self.ulim, self.ulim)
 80 | 
 81 |         g, m, l, k = 9.81, 1., 1., 1e-3
 82 | 
 83 |         def f(x, u):
 84 |             th, dth = x
 85 |             return np.hstack((dth, - 3. * g / (2. * l) * np.sin(th + np.pi) +
 86 |                               3. / (m * l ** 2) * (u - k * dth)))
 87 | 
 88 |         k1 = f(x, _u)
 89 |         k2 = f(x - 0.5 * self.dt * k1, _u)
 90 |         k3 = f(x - 0.5 * self.dt * k2, _u)
 91 |         k4 = f(x - self.dt * k3, _u)
 92 | 
 93 |         xn = x - self.dt / 6. * (k1 + 2. * k2 + 2. * k3 + k4)
 94 |         xn = np.clip(xn, -self.xlim, self.xlim)
 95 | 
 96 |         return xn
 97 | 
 98 |     def features(self, x):
 99 |         return x
100 | 
101 |     def features_jacobian(self, x):
102 |         J = jacobian(self.features, 0)
103 |         j = self.features(x) - J(x) @ x
104 |         return J, j
105 | 
106 |     def noise(self, x=None, u=None):
107 |         _u = np.clip(u, -self.ulim, self.ulim)
108 |         _x = np.clip(x, -self.xlim, self.xlim)
109 |         return self.sigma
110 | 
111 |     def cost(self, x, u, u_last, a):
112 |         c = 0.
113 | 
114 |         if self.slew_rate:
115 |             c += (u - u_last).T @ np.diag(self.uw) @ (u - u_last)
116 |         else:
117 |             c += u.T @ np.diag(self.uw) @ u
118 | 
119 |         if a:
120 |             y = np.hstack((wrap_angle(x[0]), x[1])) if self.periodic else x
121 |             J, j = self.features_jacobian(getval(y))
122 |             z = J(getval(y)) @ y + j
123 |             c += a * (z - self.g).T @ np.diag(self.gw) @ (z - self.g)
124 | 
125 |         return c
126 | 
127 |     def seed(self, seed=None):
128 |         self.np_random, seed = seeding.np_random(seed)
129 |         return [seed]
130 | 
131 |     def step(self, u):
132 |         # state-action dependent noise
133 |         _sigma = self.noise(self.state, u)
134 |         # evolve deterministic dynamics
135 |         self.state = self.dynamics(self.state, u)
136 |         # add noise
137 |         self.state = self.np_random.multivariate_normal(mean=self.state, cov=_sigma)
138 |         return self.state, [], False, {}
139 | 
140 |     def init(self):
141 |         return self.x0, self.sigma0
142 | 
143 |     def reset(self):
144 |         # self.state = self.np_random.multivariate_normal(mean=self.x0, cov=self.sigma0)
145 |         low, high = np.array([np.pi - np.pi / 10., -0.75]), \
146 |                     np.array([np.pi + np.pi / 10., 0.75])
147 |         x0 = self.np_random.uniform(low=low, high=high)
148 |         self.state = np.hstack((wrap_angle(x0[0]), x0[1]))
149 |         return self.state
150 | 
151 | 
152 | class PendulumWithCartesianCost(Pendulum):
153 | 
154 |     def __init__(self):
155 |         super(PendulumWithCartesianCost, self).__init__()
156 | 
157 |         # g = [cs_th, sn_th, dth]
158 |         self.g = np.array([1., 0., 0.])
159 |         self.gw = np.array([1e4, 1e4, 1e0])
160 | 
161 |     def features(self, x):
162 |         return np.array([np.cos(x[0]), np.sin(x[0]), x[1]])
163 | 


--------------------------------------------------------------------------------
/trajopt/envs/quad_pendulum/quad_pendulum.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | from gym import spaces
  3 | from gym.utils import seeding
  4 | 
  5 | import autograd.numpy as np
  6 | from autograd import jacobian
  7 | from autograd.tracer import getval
  8 | 
  9 | 
 10 | def wrap_angle(x):
 11 |     # wraps angle between [-pi, pi]
 12 |     return ((x + np.pi) % (2. * np.pi)) - np.pi
 13 | 
 14 | 
 15 | class QuadPendulum(gym.Env):
 16 | 
 17 |     def __init__(self):
 18 |         self.dm_state = 8
 19 |         self.dm_act = 4
 20 | 
 21 |         self.dt = 0.01
 22 | 
 23 |         self.sigma = 1e-8 * np.eye(self.dm_state)
 24 | 
 25 |         # g = [th1, th2, th3, th4, dth1, dth2, dth3, dth4]
 26 |         self.g = np.array([0., 0., 0., 0.,
 27 |                            0., 0., 0., 0.])
 28 |         self.gw = np.array([1e4, 1e4, 1e4, 1e4,
 29 |                             1e0, 1e0, 1e0, 1e0])
 30 | 
 31 |         # x = [th, dth]
 32 |         self.xmax = np.array([np.inf, np.inf, np.inf, np.inf,
 33 |                               np.inf, np.inf, np.inf, np.inf])
 34 |         self.observation_space = spaces.Box(low=-self.xmax,
 35 |                                             high=self.xmax)
 36 | 
 37 |         self.slew_rate = False
 38 |         self.uw = 1e-5 * np.ones((self.dm_act, ))
 39 |         self.umax = 25. * np.ones((self.dm_act, ))
 40 |         self.action_space = spaces.Box(low=-self.umax,
 41 |                                        high=self.umax, shape=(4,))
 42 | 
 43 |         self.x0 = np.array([np.pi, 0., 0., 0.,
 44 |                             0., 0., 0., 0.])
 45 |         self.sigma0 = 1e-4 * np.eye(self.dm_state)
 46 | 
 47 |         self.periodic = False
 48 | 
 49 |         self.state = None
 50 |         self.np_random = None
 51 | 
 52 |         self.seed()
 53 | 
 54 |     @property
 55 |     def xlim(self):
 56 |         return self.xmax
 57 | 
 58 |     @property
 59 |     def ulim(self):
 60 |         return self.umax
 61 | 
 62 |     def dynamics(self, x, u):
 63 |         _u = np.clip(u, -self.ulim, self.ulim)
 64 | 
 65 |         # Code from PolicySearchToolbox
 66 |         # Code from Chris Atkeson's (http://www.cs.cmu.edu/~cga/kdc/dynamics-2d/dynamics4.c)
 67 | 
 68 |         masses = np.array([1., 1., 1., 1.])
 69 |         lengths = np.array([1., 1., 1., 1.])
 70 |         friction = 0.025 * np.array([1., 1., 1., 1.])
 71 |         G = 9.81
 72 | 
 73 |         inertias = masses * (lengths ** 2 + 1e-4) / 3.0
 74 | 
 75 |         def f(x, u):
 76 |             th1, th2, th3, th4,\
 77 |             dth1, dth2, dth3, dth4 = x
 78 |             th1 = th1 + np.pi  # downward position = PI
 79 | 
 80 |             u1, u2, u3, u4 = u
 81 | 
 82 |             I1, I2, I3, I4 = inertias
 83 |             l1, l2, l3, l4 = lengths
 84 |             m1, m2, m3, m4 = masses
 85 |             fr1, fr2, fr3, fr4 = friction
 86 | 
 87 |             l1cm = l1 / 2.
 88 |             l2cm = l2 / 2.
 89 |             l3cm = l3 / 2.
 90 |             l4cm = l4 / 2.
 91 | 
 92 |             s1, c1 = np.sin(th1), np.cos(th1)
 93 |             s2, c2 = np.sin(th2), np.cos(th2)
 94 |             s3, c3 = np.sin(th3), np.cos(th3)
 95 |             s4, c4 = np.sin(th4), np.cos(th4)
 96 | 
 97 |             s12 = s1 * c2 + c1 * s2
 98 |             c12 = c1 * c2 - s1 * s2
 99 |             s23 = s2 * c3 + c2 * s3
100 |             c23 = c2 * c3 - s2 * s3
101 |             s34 = s3 * c4 + c3 * s4
102 |             c34 = c3 * c4 - s3 * s4
103 | 
104 |             s1234 = s12 * c34 + c12 * s34
105 |             s123 = s12 * c3 + c12 * s3
106 |             s234 = s2 * c34 + c2 * s34
107 |             c234 = c2 * c34 - s2 * s34
108 | 
109 |             dth1_dth1 = dth1 * dth1
110 |             dth2_dth2 = dth2 * dth2
111 |             dth3_dth3 = dth3 * dth3
112 |             dth4_dth4 = dth4 * dth4
113 |             dth1_p_dth2_2 = (dth1 + dth2) * (dth1 + dth2)
114 | 
115 |             l4cm_m4 = l4cm * m4
116 |             l3_l4cm_m4 = l3 * l4cm_m4
117 |             l2_l4cm_m4 = l2 * l4cm_m4
118 |             l2_l4cm_m4_c34 = l2_l4cm_m4 * c34
119 |             l1_l4cm_m4 = l1 * l4cm_m4
120 |             l3_m4 = l3 * m4
121 |             l3cm_m3 = l3cm * m3
122 |             l3cm_m3_l3_m4 = l3cm_m3 + l3_m4
123 |             l2cm_m2 = l2cm * m2
124 |             l2cm_m2_p_l2_m3_p_m4 = l2cm_m2 + l2 * (m3 + m4)
125 |             l2_l3cm_m3_l3_m4 = l2 * l3cm_m3_l3_m4
126 |             l1_l3cm_m3_l3_m4 = l1 * l3cm_m3_l3_m4
127 |             a123d = dth1 + dth2 + dth3
128 |             l1_l3cm_m3_l3_m4_s23 = l1_l3cm_m3_l3_m4 * s23
129 |             l2_l4cm_m4_s34 = l2_l4cm_m4 * s34
130 | 
131 |             expr1 = G * (s123 * l3cm_m3_l3_m4 + s1234 * l4cm_m4)
132 |             expr2 = (2 * a123d + dth4) * dth4 * l3_l4cm_m4 * s4
133 |             expr3 = G * l2cm_m2_p_l2_m3_p_m4 * s12
134 |             expr4a = 2 * dth1 * dth4 + 2 * dth2 * dth4 + 2 * dth3 * dth4 + dth4_dth4
135 |             expr4b = 2 * dth1 * dth3 + 2 * dth2 * dth3 + dth3_dth3
136 |             expr4 = (expr4b + expr4a) * l2_l4cm_m4_s34
137 |             expr5a = dth1_dth1 * l1 * s234
138 |             expr5 = l4cm_m4 * expr5a
139 |             expr6 = expr4b * l2_l3cm_m3_l3_m4 * s3
140 |             expr7 = l1 * l2cm_m2_p_l2_m3_p_m4
141 |             expr8 = l1 * (m2 + m3 + m4)
142 |             expr9a = 2 * dth1 * dth2 + dth2_dth2
143 |             expr9 = (expr9a + expr4b)
144 | 
145 |             p = I4 + l4cm * l4cm_m4
146 |             o = p + l3_l4cm_m4 * c4
147 |             n = o + l2_l4cm_m4_c34
148 |             m = n + l1_l4cm_m4 * c234
149 | 
150 |             t = u4 - fr4 * dth4 \
151 |                 - (l4cm_m4 * (a123d * a123d * l3 * s4 +
152 |                               dth1_p_dth2_2 * l2 * s34 +
153 |                               expr5a + G * s1234))
154 | 
155 |             l = o
156 |             k = I3 + o + l3cm * l3cm_m3 + l3 * l3_m4 + l3_l4cm_m4 * c4
157 |             j = k + l2_l3cm_m3_l3_m4 * c3 + l2_l4cm_m4_c34
158 |             i = j + l1_l3cm_m3_l3_m4 * c23 + l1_l4cm_m4 * c234
159 | 
160 |             s = u3 - fr3 * dth3 \
161 |                 - ((dth1_p_dth2_2 * l2_l3cm_m3_l3_m4 * s3 + dth1_dth1 * l1_l3cm_m3_l3_m4_s23)
162 |                     - expr2 + dth1_p_dth2_2 * l2_l4cm_m4_s34 + expr5 + expr1)
163 | 
164 |             h = n
165 |             g = j
166 | 
167 |             f = j + I2 + l2cm * l2cm_m2 + (l2 * l2) * (m3 + m4)\
168 |                 + l2_l3cm_m3_l3_m4 * c3 + l2_l4cm_m4_c34
169 | 
170 |             e = f + i - j + expr7 * c2
171 | 
172 |             r = u2 - fr2 * dth2\
173 |                 - (dth1_dth1 * expr7 * s2
174 |                    - expr6 + dth1_dth1 * l1_l3cm_m3_l3_m4_s23
175 |                    - expr2 - expr4 + expr5 + expr3 + expr1)
176 | 
177 |             d = m
178 |             c = i
179 |             b = e
180 |             a = 2 * e + I1 - f + (l1cm * l1cm) * m1 + l1 * expr8
181 | 
182 |             q = u1 - fr1 * dth1\
183 |                 - (- expr9a * expr7 * s2 - expr6 - expr9 * l1_l3cm_m3_l3_m4_s23
184 |                    - expr2 - expr4 - (expr9 + expr4a) * l1_l4cm_m4 * s234
185 |                    + expr3 + G * (l1cm * m1 + expr8) * s1 + expr1)
186 | 
187 |             det = (d * g * j * m - c * h * j * m - d * f * k * m + b * h * k * m + c * f * l * m - b * g * l * m - d * g * i * n +
188 |                    c * h * i * n + d * e * k * n - a * h * k * n - c * e * l * n + a * g * l * n + d * f * i * o - b * h * i * o -
189 |                    d * e * j * o + a * h * j * o + b * e * l * o - a * f * l * o - c * f * i * p + b * g * i * p + c * e * j * p -
190 |                    a * g * j * p - b * e * k * p + a * f * k * p)
191 | 
192 |             ddth1 = q * (-(h * k * n) + g * l * n + h * j * o - f * l * o - g * j * p + f * k * p)\
193 |                     + r * (d * k * n - c * l * n - d * j * o + b * l * o + c * j * p - b * k * p)\
194 |                     + s * (-(d * g * n) + c * h * n + d * f * o - b * h * o - c * f * p + b * g * p)\
195 |                     + t * (d * g * j - c * h * j - d * f * k + b * h * k + c * f * l - b * g * l)
196 | 
197 |             ddth2 = q * (h * k * m - g * l * m - h * i * o + e * l * o + g * i * p - e * k * p)\
198 |                     + r * (-(d * k * m) + c * l * m + d * i * o - a * l * o - c * i * p + a * k * p)\
199 |                     + s * (d * g * m - c * h * m - d * e * o + a * h * o + c * e * p - a * g * p)\
200 |                     + t * (-(d * g * i) + c * h * i + d * e * k - a * h * k - c * e * l + a * g * l)
201 | 
202 |             ddth3 = q * (-(h * j * m) + f * l * m + h * i * n - e * l * n - f * i * p + e * j * p)\
203 |                     + r * (d * j * m - b * l * m - d * i * n + a * l * n + b * i * p - a * j * p)\
204 |                     + s * (-(d * f * m) + b * h * m + d * e * n - a * h * n - b * e * p + a * f * p)\
205 |                     + t * (d * f * i - b * h * i - d * e * j + a * h * j + b * e * l - a * f * l)
206 | 
207 |             ddth4 = q * (g * j * m - f * k * m - g * i * n + e * k * n + f * i * o - e * j * o)\
208 |                     + r * (-(c * j * m) + b * k * m + c * i * n - a * k * n - b * i * o + a * j * o)\
209 |                     + s * (c * f * m - b * g * m - c * e * n + a * g * n + b * e * o - a * f * o)\
210 |                     + t * (-(c * f * i) + b * g * i + c * e * j - a * g * j - b * e * k + a * f * k)
211 | 
212 |             ddth1 = ddth1 / det
213 |             ddth2 = ddth2 / det
214 |             ddth3 = ddth3 / det
215 |             ddth4 = ddth4 / det
216 | 
217 |             return np.array([dth1, dth2, dth3, dth4,
218 |                              ddth1, ddth2, ddth3, ddth4])
219 | 
220 |         k1 = f(x, _u)
221 |         k2 = f(x + 0.5 * self.dt * k1, _u)
222 |         k3 = f(x + 0.5 * self.dt * k2, _u)
223 |         k4 = f(x + self.dt * k3, _u)
224 | 
225 |         xn = x + self.dt / 6. * (k1 + 2. * k2 + 2. * k3 + k4)
226 |         xn = np.clip(xn, -self.xlim, self.xlim)
227 | 
228 |         return xn
229 | 
230 |     def features(self, x):
231 |         return x
232 | 
233 |     def features_jacobian(self, x):
234 |         J = jacobian(self.features, 0)
235 |         j = self.features(x) - J(x) @ x
236 |         return J, j
237 | 
238 |     def noise(self, x=None, u=None):
239 |         _u = np.clip(u, -self.ulim, self.ulim)
240 |         _x = np.clip(x, -self.xlim, self.xlim)
241 |         return self.sigma
242 | 
243 |     def cost(self, x, u, u_last, a):
244 |         c = 0.
245 | 
246 |         if self.slew_rate:
247 |             c += (u - u_last).T @ np.diag(self.uw) @ (u - u_last)
248 |         else:
249 |             c += u.T @ np.diag(self.uw) @ u
250 | 
251 |         if a:
252 |             y = np.hstack((wrap_angle(x[0]), wrap_angle(x[1]),
253 |                            wrap_angle(x[2]), wrap_angle(x[3]),
254 |                            x[4], x[5], x[6], x[7])) if self.periodic else x
255 |             J, j = self.features_jacobian(getval(y))
256 |             z = J(getval(y)) @ y + j
257 |             c += a * (z - self.g).T @ np.diag(self.gw) @ (z - self.g)
258 | 
259 |         return c
260 | 
261 |     def seed(self, seed=None):
262 |         self.np_random, seed = seeding.np_random(seed)
263 |         return [seed]
264 | 
265 |     def step(self, u):
266 |         # state-action dependent noise
267 |         _sigma = self.noise(self.state, u)
268 |         # evolve deterministic dynamics
269 |         self.state = self.dynamics(self.state, u)
270 |         # add noise
271 |         self.state = self.np_random.multivariate_normal(mean=self.state, cov=_sigma)
272 |         return self.state, [], False, {}
273 | 
274 |     def init(self):
275 |         return self.x0, self.sigma0
276 | 
277 |     def reset(self):
278 |         self.state = self.np_random.multivariate_normal(mean=self.x0, cov=self.sigma0)
279 |         return self.state
280 | 
281 | 
282 | class QuadPendulumWithCartesianCost(QuadPendulum):
283 | 
284 |     def __init__(self):
285 |         super(QuadPendulumWithCartesianCost, self).__init__()
286 | 
287 |         self.g = np.array([1., 0.,
288 |                            1., 0.,
289 |                            1., 0.,
290 |                            1., 0.,
291 |                            0., 0., 0., 0.])
292 |         self.gw = np.array([1e4, 1e4,
293 |                             1e4, 1e4,
294 |                             1e4, 1e4,
295 |                             1e4, 1e4,
296 |                             1e0, 1e0, 1e0, 1e0])
297 | 
298 |     def features(self, x):
299 |         return np.array([np.cos(x[0]), np.sin(x[0]),
300 |                          np.cos(x[1]), np.sin(x[1]),
301 |                          np.cos(x[2]), np.sin(x[2]),
302 |                          np.cos(x[3]), np.sin(x[3]),
303 |                          x[4], x[5], x[6], x[7]])
304 | 


--------------------------------------------------------------------------------
/trajopt/envs/robot/robot.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | from gym import spaces
  3 | from gym.utils import seeding
  4 | 
  5 | import autograd.numpy as np
  6 | 
  7 | 
  8 | class Robot(gym.Env):
  9 | 
 10 |     def __init__(self):
 11 |         self.dm_state = 4
 12 |         self.dm_act = 2
 13 | 
 14 |         self.dt = 0.025
 15 | 
 16 |         # car length
 17 |         self.l = 0.1
 18 | 
 19 |         self.x0 = np.array([5., 5., 0., 0.])
 20 |         self.g = np.array([0., 0., 0., 0.])
 21 | 
 22 |         self.gw = np.array([1e1, 1e1, 1., 1.])
 23 |         self.uw = np.array([1e-3, 1e-3])
 24 | 
 25 |         self.slew_rate = False
 26 |         self.umax = np.array([np.inf, np.inf])
 27 |         self.action_space = spaces.Box(low=-self.umax,
 28 |                                        high=self.umax)
 29 | 
 30 |         self.xmax = np.array([np.inf, np.inf, np.inf, np.inf])
 31 |         self.observation_space = spaces.Box(low=-self.xmax,
 32 |                                             high=self.xmax)
 33 | 
 34 |         self.sigma0 = 1e-4 * np.eye(self.dm_state)
 35 |         self.sigma = 1e-8 * np.eye(self.dm_state)
 36 | 
 37 |         self.state = None
 38 |         self.np_random = None
 39 | 
 40 |         self.seed()
 41 |         self.reset()
 42 | 
 43 |     @property
 44 |     def xlim(self):
 45 |         return self.xmax
 46 | 
 47 |     @property
 48 |     def ulim(self):
 49 |         return self.umax
 50 | 
 51 |     def dynamics(self, x, u):
 52 |         _u = np.clip(u, -self.ulim, self.ulim)
 53 | 
 54 |         def f(x, u):
 55 |             # x, y, th, v
 56 |             x, y, th, v = x
 57 |             a, phi = u
 58 |             return np.array([v * np.cos(th),
 59 |                              v * np.sin(th),
 60 |                              v * np.tan(phi) / self.l,
 61 |                              a])
 62 |         k1 = f(x, _u)
 63 |         k2 = f(x - 0.5 * self.dt * k1, _u)
 64 |         k3 = f(x - 0.5 * self.dt * k2, _u)
 65 |         k4 = f(x - self.dt * k3, _u)
 66 | 
 67 |         xn = x - self.dt / 6. * (k1 + 2. * k2 + 2. * k3 + k4)
 68 |         xn = np.clip(xn, -self.xlim, self.xlim)
 69 | 
 70 |         return xn
 71 | 
 72 |     def noise(self, x=None, u=None):
 73 |         _u = np.clip(u, -self.ulim, self.ulim)
 74 |         _x = np.clip(x, -self.xlim, self.xlim)
 75 |         return self.sigma
 76 | 
 77 |     def cost(self, x, u, u_last, a):
 78 |         c = u.T @ np.diag(self.uw) @ u
 79 |         if a:
 80 |             c += a * (x - self.g).T @ np.diag(self.gw) @ (x - self.g)
 81 | 
 82 |         return self.dt * c
 83 | 
 84 |     def seed(self, seed=None):
 85 |         self.np_random, seed = seeding.np_random(seed)
 86 |         return [seed]
 87 | 
 88 |     def step(self, u):
 89 |         # state-action dependent noise
 90 |         _sigma = self.noise(self.state, u)
 91 |         # evolve deterministic dynamics
 92 |         self.state = self.dynamics(self.state, u)
 93 |         # add noise
 94 |         self.state = self.np_random.multivariate_normal(mean=self.state, cov=_sigma)
 95 |         return self.state, [], False, {}
 96 | 
 97 |     def init(self):
 98 |         return self.x0, self.sigma0
 99 | 
100 |     def reset(self):
101 |         self.state = self.np_random.multivariate_normal(mean=self.x0, cov=self.sigma0)
102 |         return self.state
103 | 


--------------------------------------------------------------------------------
/trajopt/gps/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.14)
 2 | project(core)
 3 | 
 4 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/")
 5 | 
 6 | set(ARMADILLO_LIBRARY "$ENV{HOME}/libs/armadillo/")
 7 | include_directories(${ARMADILLO_LIBRARY}/include)
 8 | 
 9 | find_package(pybind11)
10 | pybind11_add_module(core src/util.cpp)
11 | 
12 | set(OPENBLAS_LIBRARY "$ENV{HOME}/libs/OpenBLAS/")
13 | target_link_libraries(core PRIVATE ${OPENBLAS_LIBRARY}/libopenblas.a pthread gfortran)


--------------------------------------------------------------------------------
/trajopt/gps/__init__.py:
--------------------------------------------------------------------------------
1 | from .mbgps import MBGPS
2 | from .mfgps import MFGPS
3 | 


--------------------------------------------------------------------------------
/trajopt/gps/objects.py:
--------------------------------------------------------------------------------
  1 | import autograd.numpy as np
  2 | from autograd import jacobian, hessian
  3 | 
  4 | from mimo.distributions import MatrixNormalWishart
  5 | from mimo.distributions import LinearGaussianWithMatrixNormalWishart
  6 | from mimo.distributions import MatrixNormalWithKnownPrecision
  7 | from mimo.distributions import LinearGaussianWithMatrixNormal
  8 | from mimo.distributions import LinearGaussianWithKnownPrecision
  9 | 
 10 | 
 11 | class Gaussian:
 12 |     def __init__(self, nb_dim, nb_steps):
 13 |         self.nb_dim = nb_dim
 14 |         self.nb_steps = nb_steps
 15 | 
 16 |         self.mu = np.zeros((self.nb_dim, self.nb_steps))
 17 |         self.sigma = np.zeros((self.nb_dim, self.nb_dim, self.nb_steps))
 18 |         for t in range(self.nb_steps):
 19 |             self.sigma[..., t] = np.eye(self.nb_dim)
 20 | 
 21 |     @property
 22 |     def params(self):
 23 |         return self.mu, self.sigma
 24 | 
 25 |     @params.setter
 26 |     def params(self, values):
 27 |         self.mu, self.sigma = values
 28 | 
 29 |     def sample(self, x):
 30 |         pass
 31 | 
 32 | 
 33 | class QuadraticStateValue:
 34 |     def __init__(self, dm_state, nb_steps):
 35 |         self.dm_state = dm_state
 36 |         self.nb_steps = nb_steps
 37 | 
 38 |         self.V = np.zeros((self.dm_state, self.dm_state, self.nb_steps))
 39 |         self.v = np.zeros((self.dm_state, self.nb_steps, ))
 40 |         self.v0 = np.zeros((self.nb_steps, ))
 41 | 
 42 | 
 43 | class QuadraticStateActionValue:
 44 |     def __init__(self, dm_state, dm_act, nb_steps):
 45 |         self.dm_state = dm_state
 46 |         self.dm_act = dm_act
 47 |         self.nb_steps = nb_steps
 48 | 
 49 |         self.Qxx = np.zeros((self.dm_state, self.dm_state, self.nb_steps))
 50 |         self.Quu = np.zeros((self.dm_act, self.dm_act, self.nb_steps))
 51 |         self.Qux = np.zeros((self.dm_act, self.dm_state, self.nb_steps))
 52 | 
 53 |         self.qx = np.zeros((self.dm_state, self.nb_steps, ))
 54 |         self.qu = np.zeros((self.dm_act, self.nb_steps, ))
 55 | 
 56 |         self.q0 = np.zeros((self.nb_steps, ))
 57 | 
 58 | 
 59 | class QuadraticCost:
 60 |     def __init__(self, dm_state, dm_act, nb_steps):
 61 |         self.dm_state = dm_state
 62 |         self.dm_act = dm_act
 63 | 
 64 |         self.nb_steps = nb_steps
 65 | 
 66 |         self.Cxx = np.zeros((self.dm_state, self.dm_state, self.nb_steps))
 67 |         self.cx = np.zeros((self.dm_state, self.nb_steps))
 68 | 
 69 |         self.Cuu = np.zeros((self.dm_act, self.dm_act, self.nb_steps))
 70 |         self.cu = np.zeros((self.dm_act, self.nb_steps))
 71 | 
 72 |         self.Cxu = np.zeros((self.dm_state, self.dm_act, self.nb_steps))
 73 |         self.c0 = np.zeros((self.nb_steps, ))
 74 | 
 75 |     @property
 76 |     def params(self):
 77 |         return self.Cxx, self.cx, self.Cuu, self.cu, self.Cxu, self.c0
 78 | 
 79 |     @params.setter
 80 |     def params(self, values):
 81 |         self.Cxx, self.cx, self.Cuu, self.cu, self.Cxu, self.c0 = values
 82 | 
 83 |     def evaluate(self, x, u):
 84 |         _ret = 0.
 85 |         _u = np.hstack((u, np.zeros((self.dm_act, 1))))
 86 |         for t in range(self.nb_steps):
 87 |             _ret += x[..., t].T @ self.Cxx[..., t] @ x[..., t] +\
 88 |                     _u[..., t].T @ self.Cuu[..., t] @ _u[..., t] +\
 89 |                     x[..., t].T @ self.Cxu[..., t] @ _u[..., t] +\
 90 |                     self.cx[..., t].T @ x[..., t] +\
 91 |                     self.cu[..., t].T @ _u[..., t] + self.c0[..., t]
 92 |         return _ret
 93 | 
 94 | 
 95 | class AnalyticalQuadraticCost(QuadraticCost):
 96 |     def __init__(self, f, dm_state, dm_act, nb_steps):
 97 |         super(AnalyticalQuadraticCost, self).__init__(dm_state, dm_act, nb_steps)
 98 | 
 99 |         self.f = f
100 | 
101 |         self.dcdxx = hessian(self.f, 0)
102 |         self.dcduu = hessian(self.f, 1)
103 |         self.dcdxu = jacobian(jacobian(self.f, 0), 1)
104 | 
105 |         self.dcdx = jacobian(self.f, 0)
106 |         self.dcdu = jacobian(self.f, 1)
107 | 
108 |     def evalf(self, x, u, u_last, a):
109 |         return self.f(x, u, u_last, a)
110 | 
111 |     def taylor_expansion(self, x, u, a):
112 |         # padd last time step of action traj.
113 |         _u = np.hstack((u, np.zeros((self.dm_act, 1))))
114 | 
115 |         for t in range(self.nb_steps):
116 |             _in = tuple([x[..., t], _u[..., t], _u[..., t - 1], a[t]])
117 |             self.Cxx[..., t] = 0.5 * self.dcdxx(*_in)
118 |             self.Cuu[..., t] = 0.5 * self.dcduu(*_in)
119 |             self.Cxu[..., t] = 0.5 * self.dcdxu(*_in)
120 | 
121 |             self.cx[..., t] = self.dcdx(*_in) - self.dcdxx(*_in) @ x[..., t] - 2. * self.dcdxu(*_in) @ _u[..., t]
122 |             self.cu[..., t] = self.dcdu(*_in) - self.dcduu(*_in) @ _u[..., t] - 2. * x[..., t].T @ self.dcdxu(*_in)
123 | 
124 |             # residual of taylor expansion
125 |             self.c0[..., t] = self.f(*_in)\
126 |                               - x[..., t].T @ self.Cxx[..., t] @ x[..., t]\
127 |                               - _u[..., t].T @ self.Cuu[..., t] @ _u[..., t]\
128 |                               - 2. * x[..., t].T @ self.Cxu[..., t] @ _u[..., t]\
129 |                               - self.cx[..., t].T @ x[..., t]\
130 |                               - self.cu[..., t].T @ _u[..., t]
131 | 
132 | 
133 | class LinearGaussianDynamics:
134 |     def __init__(self, dm_state, dm_act, nb_steps):
135 |         self.dm_state = dm_state
136 |         self.dm_act = dm_act
137 |         self.nb_steps = nb_steps
138 | 
139 |         self.A = np.zeros((self.dm_state, self.dm_state, self.nb_steps))
140 |         self.B = np.zeros((self.dm_state, self.dm_act, self.nb_steps))
141 |         self.c = np.zeros((self.dm_state, self.nb_steps))
142 |         self.sigma = np.zeros((self.dm_state, self.dm_state, self.nb_steps))
143 |         for t in range(self.nb_steps):
144 |             self.sigma[..., t] = 1e-8 * np.eye(self.dm_state)
145 | 
146 |     @property
147 |     def params(self):
148 |         return self.A, self.B, self.c, self.sigma
149 | 
150 |     @params.setter
151 |     def params(self, values):
152 |         self.A, self.B, self.c, self.sigma = values
153 | 
154 |     def sample(self, x, u):
155 |         pass
156 | 
157 | 
158 | class AnalyticalLinearGaussianDynamics(LinearGaussianDynamics):
159 |     def __init__(self, f_dyn, noise, dm_state, dm_act, nb_steps):
160 |         super(AnalyticalLinearGaussianDynamics, self).__init__(dm_state, dm_act, nb_steps)
161 | 
162 |         self.f = f_dyn
163 |         self.noise = noise
164 | 
165 |         self.dfdx = jacobian(self.f, 0)
166 |         self.dfdu = jacobian(self.f, 1)
167 | 
168 |     def evalf(self, x, u):
169 |         return self.f(x, u)
170 | 
171 |     def taylor_expansion(self, x, u):
172 |         A = self.dfdx(x, u)
173 |         B = self.dfdu(x, u)
174 |         # residual of taylor expansion
175 |         c = self.evalf(x, u) - A @ x - B @ u
176 |         sigma = self.noise(x, u)
177 |         return A, B, c, sigma
178 | 
179 |     def extended_kalman(self, init_state, lgc, ulim):
180 |         lgd = LinearGaussianDynamics(self.dm_state, self.dm_act, self.nb_steps)
181 | 
182 |         xdist = Gaussian(self.dm_state, self.nb_steps + 1)
183 |         udist = Gaussian(self.dm_act, self.nb_steps)
184 | 
185 |         # forward propagation of mean dynamics
186 |         xdist.mu[..., 0], xdist.sigma[..., 0] = init_state
187 |         for t in range(self.nb_steps):
188 |             udist.mu[..., t] = np.clip(lgc.K[..., t] @ xdist.mu[..., t] + lgc.kff[..., t], -ulim, ulim)
189 |             xdist.mu[..., t + 1] = self.evalf(xdist.mu[..., t], udist.mu[..., t])
190 | 
191 |         for t in range(self.nb_steps):
192 |             lgd.A[..., t], lgd.B[..., t], lgd.c[..., t], lgd.sigma[..., t] =\
193 |                 self.taylor_expansion(xdist.mu[..., t], udist.mu[..., t])
194 | 
195 |             # construct variace of next time step with extend Kalman filtering
196 |             mu_x, sigma_x = xdist.mu[..., t], xdist.sigma[..., t]
197 |             K, kff, ctl_sigma = lgc.K[..., t], lgc.kff[..., t], lgc.sigma[..., t]
198 | 
199 |             # propagate variance of action dist.
200 |             u_sigma = ctl_sigma + K @ sigma_x @ K.T
201 |             u_sigma = 0.5 * (u_sigma + u_sigma.T)
202 |             udist.sigma[..., t] = u_sigma
203 | 
204 |             AB = np.hstack((lgd.A[..., t], lgd.B[..., t]))
205 |             sigma_xu = np.vstack((np.hstack((sigma_x, sigma_x @ K.T)),
206 |                                   np.hstack((K @ sigma_x, u_sigma))))
207 | 
208 |             sigma_xn = lgd.sigma[..., t] + AB @ sigma_xu @ AB.T
209 |             sigma_xn = 0.5 * (sigma_xn + sigma_xn.T)
210 |             xdist.sigma[..., t + 1] = sigma_xn
211 | 
212 |         return xdist, udist, lgd
213 | 
214 | 
215 | class LearnedLinearGaussianDynamics(LinearGaussianDynamics):
216 |     def __init__(self, dm_state, dm_act, nb_steps, prior):
217 |         super(LearnedLinearGaussianDynamics, self).__init__(dm_state, dm_act, nb_steps)
218 | 
219 |         hypparams = dict(M=np.zeros((self.dm_state, self.dm_state + self.dm_act + 1)),
220 |                          K=prior['K'] * np.eye(self.dm_state + self.dm_act + 1),
221 |                          psi=prior['psi'] * np.eye(self.dm_state),
222 |                          nu=self.dm_state + prior['nu'])
223 |         self.prior = MatrixNormalWishart(**hypparams)
224 | 
225 |     def learn(self, data):
226 |         for t in range(self.nb_steps):
227 |             input = np.hstack((data['x'][:, t, :].T, data['u'][:, t, :].T))
228 |             target = data['xn'][:, t, :].T
229 | 
230 |             model = LinearGaussianWithMatrixNormalWishart(self.prior, affine=True)
231 |             model = model.max_aposteriori(y=target, x=input)
232 | 
233 |             self.A[..., t] = model.likelihood.A[:, :self.dm_state]
234 |             self.B[..., t] = model.likelihood.A[:, self.dm_state:self.dm_state + self.dm_act]
235 |             self.c[..., t] = model.likelihood.A[:, -1]
236 |             self.sigma[..., t] = model.likelihood.sigma
237 | 
238 | 
239 | class LearnedLinearGaussianDynamicsWithKnownNoise(LinearGaussianDynamics):
240 |     def __init__(self, dm_state, dm_act, nb_steps, noise, prior):
241 |         super(LearnedLinearGaussianDynamicsWithKnownNoise, self).__init__(dm_state, dm_act, nb_steps)
242 | 
243 |         hypparams = dict(M=np.zeros((self.dm_state, self.dm_state + self.dm_act + 1)),
244 |                          K=prior['K'] * np.eye(self.dm_state + self.dm_act + 1),
245 |                          V=np.linalg.inv(noise))
246 |         self.prior = MatrixNormalWithKnownPrecision(**hypparams)
247 |         self.noise = noise  # assumed stationary over all time steps
248 | 
249 |     def learn(self, data):
250 |         for t in range(self.nb_steps):
251 |             input = np.hstack((data['x'][:, t, :].T, data['u'][:, t, :].T))
252 |             target = data['xn'][:, t, :].T
253 | 
254 |             likelihood = LinearGaussianWithKnownPrecision(lmbda=np.linalg.inv(self.noise), affine=True)
255 |             model = LinearGaussianWithMatrixNormal(self.prior, likelihood=likelihood, affine=True)
256 |             model = model.max_aposteriori(y=target, x=input)
257 | 
258 |             self.A[..., t] = model.likelihood.A[:, :self.dm_state]
259 |             self.B[..., t] = model.likelihood.A[:, self.dm_state:self.dm_state + self.dm_act]
260 |             self.c[..., t] = model.likelihood.A[:, -1]
261 |             self.sigma[..., t] = model.likelihood.sigma
262 | 
263 | 
264 | class LinearGaussianControl:
265 |     def __init__(self, dm_state, dm_act, nb_steps, init_ctl_sigma=1.):
266 |         self.dm_state = dm_state
267 |         self.dm_act = dm_act
268 |         self.nb_steps = nb_steps
269 | 
270 |         self.K = np.zeros((self.dm_act, self.dm_state, self.nb_steps))
271 |         self.kff = np.zeros((self.dm_act, self.nb_steps))
272 | 
273 |         self.sigma = np.zeros((self.dm_act, self.dm_act, self.nb_steps))
274 |         for t in range(self.nb_steps):
275 |             self.sigma[..., t] = init_ctl_sigma * np.eye(self.dm_act)
276 | 
277 |     @property
278 |     def params(self):
279 |         return self.K, self.kff, self.sigma
280 | 
281 |     @params.setter
282 |     def params(self, values):
283 |         self.K, self.kff, self.sigma = values
284 | 
285 |     def mean(self, x, t):
286 |         return np.einsum('kh,h->k', self.K[..., t], x) + self.kff[..., t]
287 | 
288 |     def sample(self, x, t, stoch=True):
289 |         mu = self.mean(x, t)
290 |         if stoch:
291 |             return np.random.multivariate_normal(mean=mu, cov=self.sigma[..., t])
292 |         else:
293 |             return mu
294 | 
295 |     def forward(self, xdist, t):
296 |         x_mu, x_sigma = xdist.mu[..., t], xdist.sigma[..., t]
297 |         K, kff, ctl_sigma = self.K[..., t], self.kff[..., t], self.sigma[..., t]
298 | 
299 |         u_mu = K @ x_mu + kff
300 |         u_sigma = ctl_sigma + K @ x_sigma @ K.T
301 |         u_sigma = 0.5 * (u_sigma + u_sigma.T)
302 | 
303 |         return u_mu, u_sigma
304 | 
305 | 
306 | def pass_alpha_as_vector(f):
307 |     def wrapper(self, alpha, *args):
308 |         assert alpha is not None
309 | 
310 |         if alpha.shape[0] == 1:
311 |             alpha = alpha * np.ones((self.nb_steps, ))
312 | 
313 |         return f(self, alpha, *args)
314 |     return wrapper
315 | 


--------------------------------------------------------------------------------
/trajopt/ilqr/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.14)
 2 | project(core)
 3 | 
 4 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/")
 5 | 
 6 | set(ARMADILLO_LIBRARY "$ENV{HOME}/libs/armadillo/")
 7 | include_directories(${ARMADILLO_LIBRARY}/include)
 8 | 
 9 | find_package(pybind11)
10 | pybind11_add_module(core src/util.cpp)
11 | 
12 | set(OPENBLAS_LIBRARY "$ENV{HOME}/libs/OpenBLAS/")
13 | target_link_libraries(core PRIVATE ${OPENBLAS_LIBRARY}/libopenblas.a pthread gfortran)


--------------------------------------------------------------------------------
/trajopt/ilqr/__init__.py:
--------------------------------------------------------------------------------
1 | from .ilqr import iLQR
2 | 


--------------------------------------------------------------------------------
/trajopt/ilqr/ilqr.py:
--------------------------------------------------------------------------------
  1 | import autograd.numpy as np
  2 | 
  3 | from trajopt.ilqr.objects import AnalyticalLinearDynamics, AnalyticalQuadraticCost
  4 | from trajopt.ilqr.objects import QuadraticStateValue, QuadraticStateActionValue
  5 | from trajopt.ilqr.objects import LinearControl
  6 | 
  7 | from trajopt.ilqr.core import backward_pass
  8 | 
  9 | 
 10 | class iLQR:
 11 | 
 12 |     def __init__(self, env, nb_steps, init_state,
 13 |                  activation=None, slew_rate=False,
 14 |                  action_penalty=False,
 15 |                  alphas=np.power(10., np.linspace(0, -3, 11)),
 16 |                  lmbda=1., dlmbda=1., min_lmbda=1e-6,
 17 |                  max_lmbda=1e6, mult_lmbda=1.6, tolfun=1e-6,
 18 |                  tolgrad=1e-4, min_imp=0., reg=1):
 19 | 
 20 |         self.env = env
 21 | 
 22 |         # expose necessary functions
 23 |         self.env_dyn = self.env.unwrapped.dynamics
 24 |         self.env_cost = self.env.unwrapped.cost
 25 |         self.env_init = init_state
 26 | 
 27 |         self.ulim = self.env.action_space.high
 28 | 
 29 |         self.dm_state = self.env.observation_space.shape[0]
 30 |         self.dm_act = self.env.action_space.shape[0]
 31 |         self.nb_steps = nb_steps
 32 | 
 33 |         # use slew rate penalty or not
 34 |         self.env.unwrapped.slew_rate = slew_rate
 35 |         if action_penalty is not None:
 36 |             self.env.unwrapped.uw = action_penalty * np.ones((self.dm_act, ))
 37 | 
 38 |         # backtracking
 39 |         self.alphas = alphas
 40 |         self.alpha = None
 41 | 
 42 |         self.lmbda = lmbda
 43 |         self.dlmbda = dlmbda
 44 |         self.min_lmbda = min_lmbda
 45 |         self.max_lmbda = max_lmbda
 46 |         self.mult_lmbda = mult_lmbda
 47 | 
 48 |         # regularization type
 49 |         self.reg = reg
 50 | 
 51 |         # minimum relative improvement
 52 |         self.min_imp = min_imp
 53 | 
 54 |         # stopping criterion
 55 |         self.tolfun = tolfun
 56 |         self.tolgrad = tolgrad
 57 | 
 58 |         # reference trajectory
 59 |         self.xref = np.zeros((self.dm_state, self.nb_steps + 1))
 60 |         self.xref[..., 0] = self.env_init
 61 | 
 62 |         self.uref = np.zeros((self.dm_act, self.nb_steps))
 63 | 
 64 |         self.vfunc = QuadraticStateValue(self.dm_state, self.nb_steps + 1)
 65 |         self.qfunc = QuadraticStateActionValue(self.dm_state, self.dm_act, self.nb_steps)
 66 | 
 67 |         self.dyn = AnalyticalLinearDynamics(self.env_dyn, self.dm_state, self.dm_act, self.nb_steps)
 68 | 
 69 |         self.ctl = LinearControl(self.dm_state, self.dm_act, self.nb_steps)
 70 |         self.ctl.kff = 1e-4 * np.random.randn(self.dm_act, self.nb_steps)
 71 | 
 72 |         # activation of cost function in shape of sigmoid
 73 |         if activation is None:
 74 |             self.weighting = np.ones((self.nb_steps + 1, ))
 75 |         elif "mult" and "shift" in activation:
 76 |             t = np.linspace(0, self.nb_steps, self.nb_steps + 1)
 77 |             self.weighting = 1. / (1. + np.exp(- activation['mult'] * (t - activation['shift'])))
 78 |         elif "discount" in activation:
 79 |             self.weighting = np.ones((self.nb_steps + 1,))
 80 |             gamma = activation["discount"] * np.ones((self.nb_steps, ))
 81 |             self.weighting[1:] = np.cumprod(gamma)
 82 |         else:
 83 |             raise NotImplementedError
 84 | 
 85 |         self.cost = AnalyticalQuadraticCost(self.env_cost, self.dm_state, self.dm_act, self.nb_steps + 1)
 86 | 
 87 |         self.last_return = - np.inf
 88 | 
 89 |     def forward_pass(self, ctl, alpha):
 90 |         state = np.zeros((self.dm_state, self.nb_steps + 1))
 91 |         action = np.zeros((self.dm_act, self.nb_steps))
 92 |         cost = np.zeros((self.nb_steps + 1, ))
 93 | 
 94 |         state[..., 0] = self.env_init
 95 |         for t in range(self.nb_steps):
 96 |             _act = ctl.action(state, alpha, self.xref, self.uref, t)
 97 |             action[..., t] = np.clip(_act, -self.ulim, self.ulim)
 98 |             cost[..., t] = self.env_cost(state[..., t], action[..., t], action[..., t], self.weighting[t])
 99 |             state[..., t + 1] = self.env_dyn(state[..., t], action[..., t])
100 | 
101 |         cost[..., -1] = self.env_cost(state[..., -1], np.zeros((self.dm_act, )), np.zeros((self.dm_act, )), self.weighting[-1])
102 |         return state, action, cost
103 | 
104 |     def backward_pass(self):
105 |         lc = LinearControl(self.dm_state, self.dm_act, self.nb_steps)
106 |         xvalue = QuadraticStateValue(self.dm_state, self.nb_steps + 1)
107 |         xuvalue = QuadraticStateActionValue(self.dm_state, self.dm_act, self.nb_steps)
108 | 
109 |         xuvalue.Qxx, xuvalue.Qux, xuvalue.Quu,\
110 |         xuvalue.qx, xuvalue.qu,\
111 |         xvalue.V, xvalue.v, dV,\
112 |         lc.K, lc.kff, diverge = backward_pass(self.cost.Cxx, self.cost.cx, self.cost.Cuu,
113 |                                               self.cost.cu, self.cost.Cxu,
114 |                                               self.dyn.A, self.dyn.B,
115 |                                               self.lmbda, self.reg,
116 |                                               self.dm_state, self.dm_act, self.nb_steps)
117 |         return lc, xvalue, xuvalue, dV, diverge
118 | 
119 |     def plot(self):
120 |         import matplotlib.pyplot as plt
121 | 
122 |         plt.figure()
123 | 
124 |         t = np.linspace(0, self.nb_steps, self.nb_steps + 1)
125 |         for k in range(self.dm_state):
126 |             plt.subplot(self.dm_state + self.dm_act, 1, k + 1)
127 |             plt.plot(t, self.xref[k, :], '-b')
128 | 
129 |         t = np.linspace(0, self.nb_steps, self.nb_steps)
130 |         for k in range(self.dm_act):
131 |             plt.subplot(self.dm_state + self.dm_act, 1, self.dm_state + k + 1)
132 |             plt.plot(t, self.uref[k, :], '-g')
133 | 
134 |         plt.show()
135 | 
136 |     def run(self, nb_iter=25, verbose=False):
137 |         _trace = []
138 |         # init trajectory
139 |         for alpha in self.alphas:
140 |             _state, _action, _cost = self.forward_pass(self.ctl, alpha)
141 |             if np.all(_state < 1e8):
142 |                 self.xref = _state
143 |                 self.uref = _action
144 |                 self.last_return = np.sum(_cost)
145 |                 break
146 |             else:
147 |                 print("Initial trajectory diverges")
148 | 
149 |         _trace.append(self.last_return)
150 | 
151 |         for iter in range(nb_iter):
152 |             # get linear system dynamics around ref traj.
153 |             self.dyn.taylor_expansion(self.xref, self.uref)
154 | 
155 |             # get quadratic cost around ref traj.
156 |             self.cost.taylor_expansion(self.xref, self.uref, self.weighting)
157 | 
158 |             xvalue, xuvalue = None, None
159 |             lc, dvalue = None, None
160 |             # execute a backward pass
161 |             backpass_done = False
162 |             while not backpass_done:
163 |                 lc, xvalue, xuvalue, dvalue, diverge = self.backward_pass()
164 |                 if np.any(diverge):
165 |                     # increase lmbda
166 |                     self.dlmbda = np.maximum(self.dlmbda * self.mult_lmbda, self.mult_lmbda)
167 |                     self.lmbda = np.maximum(self.lmbda * self.dlmbda, self.min_lmbda)
168 |                     if self.lmbda > self.max_lmbda:
169 |                         break
170 |                     else:
171 |                         continue
172 |                 else:
173 |                     backpass_done = True
174 | 
175 |             # terminate if gradient too small
176 |             _g_norm = np.mean(np.max(np.abs(lc.kff) / (np.abs(self.uref) + 1.), axis=1))
177 |             if _g_norm < self.tolgrad and self.lmbda < 1e-5:
178 |                 self.dlmbda = np.minimum(self.dlmbda / self.mult_lmbda, 1. / self.mult_lmbda)
179 |                 self.lmbda = self.lmbda * self.dlmbda * (self.lmbda > self.min_lmbda)
180 |                 break
181 | 
182 |             _state, _action = None, None
183 |             _return, _dreturn = None, None
184 |             # execute a forward pass
185 |             fwdpass_done = False
186 |             if backpass_done:
187 |                 for alpha in self.alphas:
188 |                     self.alpha = alpha
189 | 
190 |                     # apply on actual system
191 |                     _state, _action, _cost = self.forward_pass(ctl=lc, alpha=self.alpha)
192 | 
193 |                     # summed mean return
194 |                     _return = np.sum(_cost)
195 | 
196 |                     # check return improvement
197 |                     _dreturn = self.last_return - _return
198 |                     _expected = - 1. * alpha * (dvalue[0] + alpha * dvalue[1])
199 |                     _imp = _dreturn / _expected
200 |                     if _imp >= self.min_imp:
201 |                         fwdpass_done = True
202 |                         break
203 | 
204 |             # accept or reject
205 |             if fwdpass_done:
206 |                 # decrease lmbda
207 |                 self.dlmbda = np.minimum(self.dlmbda / self.mult_lmbda, 1. / self.mult_lmbda)
208 |                 self.lmbda = self.lmbda * self.dlmbda * (self.lmbda > self.min_lmbda)
209 | 
210 |                 self.xref = _state
211 |                 self.uref = _action
212 |                 self.last_return = _return
213 | 
214 |                 self.vfunc = xvalue
215 |                 self.qfunc = xuvalue
216 | 
217 |                 self.ctl = lc
218 | 
219 |                 _trace.append(self.last_return)
220 | 
221 |                 # terminate if reached objective tolerance
222 |                 if _dreturn < self.tolfun:
223 |                     break
224 |             else:
225 |                 # increase lmbda
226 |                 self.dlmbda = np.maximum(self.dlmbda * self.mult_lmbda, self.mult_lmbda)
227 |                 self.lmbda = np.maximum(self.lmbda * self.dlmbda, self.min_lmbda)
228 |                 if self.lmbda > self.max_lmbda:
229 |                     break
230 |                 else:
231 |                     continue
232 | 
233 |             if verbose:
234 |                 print("iter: ", iter,
235 |                       " return: ", _return)
236 | 
237 |         return _trace
238 | 


--------------------------------------------------------------------------------
/trajopt/ilqr/objects.py:
--------------------------------------------------------------------------------
  1 | import autograd.numpy as np
  2 | from autograd import jacobian, hessian
  3 | 
  4 | 
  5 | class QuadraticStateValue:
  6 |     def __init__(self, dm_state, nb_steps):
  7 |         self.dm_state = dm_state
  8 |         self.nb_steps = nb_steps
  9 | 
 10 |         self.V = np.zeros((self.dm_state, self.dm_state, self.nb_steps))
 11 |         self.v = np.zeros((self.dm_state, self.nb_steps, ))
 12 | 
 13 | 
 14 | class QuadraticStateActionValue:
 15 |     def __init__(self, dm_state, dm_act, nb_steps):
 16 |         self.dm_state = dm_state
 17 |         self.dm_act = dm_act
 18 |         self.nb_steps = nb_steps
 19 | 
 20 |         self.Qxx = np.zeros((self.dm_state, self.dm_state, self.nb_steps))
 21 |         self.Quu = np.zeros((self.dm_act, self.dm_act, self.nb_steps))
 22 |         self.Qux = np.zeros((self.dm_act, self.dm_state, self.nb_steps))
 23 | 
 24 |         self.qx = np.zeros((self.dm_state, self.nb_steps, ))
 25 |         self.qu = np.zeros((self.dm_act, self.nb_steps, ))
 26 | 
 27 | 
 28 | class QuadraticCost:
 29 |     def __init__(self, dm_state, dm_act, nb_steps):
 30 |         self.dm_state = dm_state
 31 |         self.dm_act = dm_act
 32 | 
 33 |         self.nb_steps = nb_steps
 34 | 
 35 |         self.Cxx = np.zeros((self.dm_state, self.dm_state, self.nb_steps))
 36 |         self.cx = np.zeros((self.dm_state, self.nb_steps))
 37 | 
 38 |         self.Cuu = np.zeros((self.dm_act, self.dm_act, self.nb_steps))
 39 |         self.cu = np.zeros((self.dm_act, self.nb_steps))
 40 | 
 41 |         self.Cxu = np.zeros((self.dm_state, self.dm_act, self.nb_steps))
 42 | 
 43 |     @property
 44 |     def params(self):
 45 |         return self.Cxx, self.cx, self.Cuu, self.cu, self.Cxu
 46 | 
 47 |     @params.setter
 48 |     def params(self, values):
 49 |         self.Cxx, self.cx, self.Cuu, self.cu, self.Cxu = values
 50 | 
 51 | 
 52 | class AnalyticalQuadraticCost(QuadraticCost):
 53 |     def __init__(self, f, dm_state, dm_act, nb_steps):
 54 |         super(AnalyticalQuadraticCost, self).__init__(dm_state, dm_act, nb_steps)
 55 | 
 56 |         self.f = f
 57 | 
 58 |         self.dcdxx = hessian(self.f, 0)
 59 |         self.dcduu = hessian(self.f, 1)
 60 |         self.dcdxu = jacobian(jacobian(self.f, 0), 1)
 61 | 
 62 |         self.dcdx = jacobian(self.f, 0)
 63 |         self.dcdu = jacobian(self.f, 1)
 64 | 
 65 |     def evalf(self, x, u, u_last, a):
 66 |         return self.f(x, u, u_last, a)
 67 | 
 68 |     def taylor_expansion(self, x, u, a):
 69 |         # padd last time step of action traj.
 70 |         _u = np.hstack((u, np.zeros((self.dm_act, 1))))
 71 | 
 72 |         for t in range(self.nb_steps):
 73 |             _in = tuple([x[..., t], _u[..., t], _u[..., t - 1], a[t]])
 74 |             self.Cxx[..., t] = self.dcdxx(*_in)
 75 |             self.Cuu[..., t] = self.dcduu(*_in)
 76 |             self.Cxu[..., t] = self.dcdxu(*_in)
 77 |             self.cx[..., t] = self.dcdx(*_in)
 78 |             self.cu[..., t] = self.dcdu(*_in)
 79 | 
 80 | 
 81 | class LinearDynamics:
 82 |     def __init__(self, dm_state, dm_act, nb_steps):
 83 |         self.dm_state = dm_state
 84 |         self.dm_act = dm_act
 85 |         self.nb_steps = nb_steps
 86 | 
 87 |         self.A = np.zeros((self.dm_state, self.dm_state, self.nb_steps))
 88 |         self.B = np.zeros((self.dm_state, self.dm_act, self.nb_steps))
 89 | 
 90 |     @property
 91 |     def params(self):
 92 |         return self.A, self.B
 93 | 
 94 |     @params.setter
 95 |     def params(self, values):
 96 |         self.A, self.B = values
 97 | 
 98 |     def sample(self, x, u):
 99 |         pass
100 | 
101 | 
102 | class AnalyticalLinearDynamics(LinearDynamics):
103 |     def __init__(self, f_dyn, dm_state, dm_act, nb_steps):
104 |         super(AnalyticalLinearDynamics, self).__init__(dm_state, dm_act, nb_steps)
105 | 
106 |         self.f = f_dyn
107 | 
108 |         self.dfdx = jacobian(self.f, 0)
109 |         self.dfdu = jacobian(self.f, 1)
110 | 
111 |     def evalf(self, x, u):
112 |         return self.f(x, u)
113 | 
114 |     def taylor_expansion(self, x, u):
115 |         for t in range(self.nb_steps):
116 |             self.A[..., t] = self.dfdx(x[..., t], u[..., t])
117 |             self.B[..., t] = self.dfdu(x[..., t], u[..., t])
118 | 
119 | 
120 | class LinearControl:
121 |     def __init__(self, dm_state, dm_act, nb_steps):
122 |         self.dm_state = dm_state
123 |         self.dm_act = dm_act
124 |         self.nb_steps = nb_steps
125 | 
126 |         self.K = np.zeros((self.dm_act, self.dm_state, self.nb_steps))
127 |         self.kff = np.zeros((self.dm_act, self.nb_steps))
128 | 
129 |     @property
130 |     def params(self):
131 |         return self.K, self.kff
132 | 
133 |     @params.setter
134 |     def params(self, values):
135 |         self.K, self.kff = values
136 | 
137 |     def action(self, x, alpha, xref, uref, t):
138 |         dx = x[..., t] - xref[:, t]
139 |         return uref[:, t] + alpha * self.kff[..., t] + self.K[..., t] @ dx
140 | 


--------------------------------------------------------------------------------
/trajopt/ilqr/src/util.cpp:
--------------------------------------------------------------------------------
  1 | #include <pybind11/pybind11.h>
  2 | #include <pybind11/numpy.h>
  3 | #include <armadillo>
  4 | 
  5 | namespace py = pybind11;
  6 | 
  7 | using namespace arma;
  8 | 
  9 | 
 10 | typedef py::array_t<double, py::array::f_style | py::array::forcecast> array_tf;
 11 | typedef py::array_t<double, py::array::c_style | py::array::forcecast> array_tc;
 12 | 
 13 | 
 14 | cube array_to_cube(array_tf m) {
 15 | 
 16 |     py::buffer_info _m_buff = m.request();
 17 |     int n_rows = _m_buff.shape[0];
 18 |     int n_cols = _m_buff.shape[1];
 19 |     int n_slices = _m_buff.shape[2];
 20 | 
 21 |     cube _m_arma((double *)_m_buff.ptr, n_rows, n_cols, n_slices);
 22 | 
 23 |     return _m_arma;
 24 | }
 25 | 
 26 | 
 27 | mat array_to_mat(array_tf m) {
 28 | 
 29 |     py::buffer_info _m_buff = m.request();
 30 |     int n_rows = _m_buff.shape[0];
 31 |     int n_cols = _m_buff.shape[1];
 32 | 
 33 |     mat _m_arma((double *)_m_buff.ptr, n_rows, n_cols);
 34 | 
 35 |     return _m_arma;
 36 | }
 37 | 
 38 | 
 39 | vec array_to_vec(array_tf m) {
 40 | 
 41 |     py::buffer_info _m_buff = m.request();
 42 |     int n_rows = _m_buff.shape[0];
 43 | 
 44 |     vec _m_vec((double *)_m_buff.ptr, n_rows);
 45 | 
 46 |     return _m_vec;
 47 | }
 48 | 
 49 | 
 50 | array_tf cube_to_array(cube m) {
 51 | 
 52 |     auto _m_array = array_tf({m.n_rows, m.n_cols, m.n_slices});
 53 | 
 54 |     py::buffer_info _m_buff = _m_array.request();
 55 |     std::memcpy(_m_buff.ptr, m.memptr(), sizeof(double) * m.n_rows * m.n_cols * m.n_slices);
 56 | 
 57 |     return _m_array;
 58 | }
 59 | 
 60 | 
 61 | array_tf mat_to_array(mat m) {
 62 | 
 63 |     auto _m_array = array_tf({m.n_rows, m.n_cols});
 64 | 
 65 |     py::buffer_info _m_buff = _m_array.request();
 66 |     std::memcpy(_m_buff.ptr, m.memptr(), sizeof(double) * m.n_rows * m.n_cols);
 67 | 
 68 |     return _m_array;
 69 | }
 70 | 
 71 | 
 72 | array_tf vec_to_array(vec m) {
 73 | 
 74 |     auto _m_array = array_tf({m.n_rows});
 75 | 
 76 |     py::buffer_info _m_buff = _m_array.request();
 77 |     std::memcpy(_m_buff.ptr, m.memptr(), sizeof(double) * m.n_rows);
 78 | 
 79 |     return _m_array;
 80 | }
 81 | 
 82 | 
 83 | py::tuple backward_pass(array_tf _Cxx, array_tf _cx, array_tf _Cuu,
 84 |                         array_tf _cu, array_tf _Cxu,
 85 |                         array_tf _A, array_tf _B,
 86 |                         double lmbda, int reg,
 87 |                         int dm_state, int dm_act, int nb_steps) {
 88 | 
 89 |     // inputs
 90 |     cube Cxx = array_to_cube(_Cxx);
 91 |     mat cx = array_to_mat(_cx);
 92 |     cube Cuu = array_to_cube(_Cuu);
 93 |     mat cu = array_to_mat(_cu);
 94 |     cube Cxu = array_to_cube(_Cxu);
 95 | 
 96 |     cube A = array_to_cube(_A);
 97 |     cube B = array_to_cube(_B);
 98 | 
 99 |     // outputs
100 |     cube Q(dm_state + dm_act, dm_state + dm_act, nb_steps);
101 |     cube Qxx(dm_state, dm_state, nb_steps);
102 |     cube Qux(dm_act, dm_state, nb_steps);
103 |     cube Quu(dm_act, dm_act, nb_steps);
104 |     mat qx(dm_state, nb_steps);
105 |     mat qu(dm_act, nb_steps);
106 | 
107 |     cube Qux_reg(dm_act, dm_state, nb_steps);
108 |     cube Quu_reg(dm_act, dm_act, nb_steps);
109 |     cube Quu_inv(dm_act, dm_act, nb_steps);
110 | 
111 |     cube V(dm_state, dm_state, nb_steps + 1);
112 |     mat v(dm_state, nb_steps + 1);
113 |     vec dV(2);
114 | 
115 |     cube V_reg(dm_state, dm_state, nb_steps + 1);
116 | 
117 |     cube K(dm_act, dm_state, nb_steps);
118 |     mat kff(dm_act, nb_steps);
119 | 
120 |     int _diverge = 0;
121 | 
122 |     // last time step
123 |     V.slice(nb_steps) = Cxx.slice(nb_steps);
124 |     v.col(nb_steps) = cx.col(nb_steps);
125 | 
126 | 	for(int i = nb_steps - 1; i>= 0; --i)
127 | 	{
128 |         Qxx.slice(i) = Cxx.slice(i) + A.slice(i).t() * V.slice(i+1) * A.slice(i);
129 |         Quu.slice(i) = Cuu.slice(i) + B.slice(i).t() * V.slice(i+1) * B.slice(i);
130 |         Qux.slice(i) = (Cxu.slice(i) + A.slice(i).t() * V.slice(i+1) * B.slice(i)).t();
131 | 
132 |         qu.col(i) = cu.col(i) + B.slice(i).t() * v.col(i+1);
133 |         qx.col(i) = cx.col(i) + A.slice(i).t() * v.col(i+1);
134 | 
135 |         V_reg.slice(i+1) = V.slice(i+1);
136 |         if (reg==2)
137 |             V_reg.slice(i+1) += lmbda * eye(dm_state, dm_state);
138 | 
139 |         Qux_reg.slice(i) = (Cxu.slice(i) + A.slice(i).t() * V_reg.slice(i+1) * B.slice(i)).t();
140 | 
141 |         Quu_reg.slice(i) = Cuu.slice(i) + B.slice(i).t() * V_reg.slice(i+1) * B.slice(i);
142 |         if (reg==1)
143 |             Quu_reg.slice(i) += lmbda * eye(dm_act, dm_act);
144 | 
145 |         if (!(Quu_reg.slice(i)).is_sympd()) {
146 |             _diverge = i;
147 |             break;
148 |         }
149 | 
150 |         Quu_inv.slice(i) = inv(Quu_reg.slice(i));
151 |         K.slice(i) = - Quu_inv.slice(i) * Qux_reg.slice(i);
152 |         kff.col(i) = - Quu_inv.slice(i) * qu.col(i);
153 | 
154 |         dV += join_vert(kff.col(i).t() * qu.col(i), 0.5 * kff.col(i).t() * Quu.slice(i) * kff.col(i));
155 | 
156 |         v.col(i) = qx.col(i) + K.slice(i).t() * Quu.slice(i) * kff.col(i) +
157 |                    K.slice(i).t() * qu.col(i) + Qux.slice(i).t() * kff.col(i);
158 | 
159 |         V.slice(i) = Qxx.slice(i) + K.slice(i).t() * Quu.slice(i) * K.slice(i) +
160 |                      K.slice(i).t() * Qux.slice(i) + Qux.slice(i).t() * K.slice(i);
161 |         V.slice(i) = 0.5 * (V.slice(i) + V.slice(i).t());
162 | 	}
163 | 
164 |     // transform outputs to numpy
165 |     array_tf _Qxx = cube_to_array(Qxx);
166 |     array_tf _Qux = cube_to_array(Qux);
167 |     array_tf _Quu = cube_to_array(Quu);
168 | 
169 |     array_tf _qx = mat_to_array(qx);
170 |     array_tf _qu = mat_to_array(qu);
171 | 
172 |     array_tf _V = cube_to_array(V);
173 |     array_tf _v = mat_to_array(v);
174 |     array_tf _dV = vec_to_array(dV);
175 | 
176 |     array_tf _K = cube_to_array(K);
177 |     array_tf _kff = mat_to_array(kff);
178 | 
179 |     py::tuple output =  py::make_tuple(_Qxx, _Qux, _Quu, _qx, _qu,
180 |                                        _V, _v, _dV, _K, _kff, _diverge);
181 | 	return output;
182 | }
183 | 
184 | 
185 | PYBIND11_MODULE(core, m)
186 | {
187 |     m.def("backward_pass", &backward_pass);
188 | }
189 | 


--------------------------------------------------------------------------------
/trajopt/rgps/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.14)
 2 | project(core)
 3 | 
 4 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/")
 5 | 
 6 | set(ARMADILLO_LIBRARY "$ENV{HOME}/libs/armadillo/")
 7 | include_directories(${ARMADILLO_LIBRARY}/include)
 8 | 
 9 | find_package(pybind11)
10 | pybind11_add_module(core src/util.cpp)
11 | 
12 | set(OPENBLAS_LIBRARY "$ENV{HOME}/libs/OpenBLAS/")
13 | target_link_libraries(core PRIVATE ${OPENBLAS_LIBRARY}/libopenblas.a pthread gfortran)
14 | 


--------------------------------------------------------------------------------
/trajopt/rgps/__init__.py:
--------------------------------------------------------------------------------
1 | from .mfrgps import MFRGPS
2 | from .mbrgps import MBRGPS
3 | from .lrgps import LRGPS
4 | 


--------------------------------------------------------------------------------
/trajopt/riccati/__init__.py:
--------------------------------------------------------------------------------
1 | from .riccati import Riccati
2 | 


--------------------------------------------------------------------------------
/trajopt/riccati/objects.py:
--------------------------------------------------------------------------------
  1 | import autograd.numpy as np
  2 | from autograd import jacobian, hessian
  3 | 
  4 | 
  5 | class QuadraticStateValue:
  6 |     def __init__(self, dm_state, nb_steps):
  7 |         self.dm_state = dm_state
  8 |         self.nb_steps = nb_steps
  9 | 
 10 |         self.V = np.zeros((self.dm_state, self.dm_state, self.nb_steps))
 11 |         self.v = np.zeros((self.dm_state, self.nb_steps, ))
 12 | 
 13 | 
 14 | class QuadraticCost:
 15 |     def __init__(self, dm_state, dm_act, nb_steps):
 16 |         self.dm_state = dm_state
 17 |         self.dm_act = dm_act
 18 | 
 19 |         self.nb_steps = nb_steps
 20 | 
 21 |         self.Cxx = np.zeros((self.dm_state, self.dm_state, self.nb_steps))
 22 |         self.cx = np.zeros((self.dm_state, self.nb_steps))
 23 | 
 24 |         self.Cuu = np.zeros((self.dm_act, self.dm_act, self.nb_steps))
 25 |         self.cu = np.zeros((self.dm_act, self.nb_steps))
 26 | 
 27 |         self.Cxu = np.zeros((self.dm_state, self.dm_act, self.nb_steps))
 28 | 
 29 |     @property
 30 |     def params(self):
 31 |         return self.Cxx, self.cx, self.Cuu, self.cu, self.Cxu
 32 | 
 33 |     @params.setter
 34 |     def params(self, values):
 35 |         self.Cxx, self.cx, self.Cuu, self.cu, self.Cxu = values
 36 | 
 37 | 
 38 | class AnalyticalQuadraticCost(QuadraticCost):
 39 |     def __init__(self, f, dm_state, dm_act, nb_steps):
 40 |         super(AnalyticalQuadraticCost, self).__init__(dm_state, dm_act, nb_steps)
 41 | 
 42 |         self.f = f
 43 | 
 44 |         self.dcdxx = hessian(self.f, 0)
 45 |         self.dcduu = hessian(self.f, 1)
 46 |         self.dcdxu = jacobian(jacobian(self.f, 0), 1)
 47 | 
 48 |         self.dcdx = jacobian(self.f, 0)
 49 |         self.dcdu = jacobian(self.f, 1)
 50 | 
 51 |     def evalf(self, x, u, u_last, a):
 52 |         return self.f(x, u, u_last, a)
 53 | 
 54 |     def taylor_expansion(self, x, u, a):
 55 |         # padd last time step of action traj.
 56 |         _u = np.hstack((u, np.zeros((self.dm_act, 1))))
 57 | 
 58 |         for t in range(self.nb_steps):
 59 |             _in = tuple([x[..., t], _u[..., t], _u[..., t - 1], a[t]])
 60 |             self.Cxx[..., t] = 0.5 * self.dcdxx(*_in)
 61 |             self.Cuu[..., t] = 0.5 * self.dcduu(*_in)
 62 |             self.Cxu[..., t] = 0.5 * self.dcdxu(*_in)
 63 | 
 64 |             self.cx[..., t] = self.dcdx(*_in) - self.dcdxx(*_in) @ x[..., t] - 2. * self.dcdxu(*_in) @ _u[..., t]
 65 |             self.cu[..., t] = self.dcdu(*_in) - self.dcduu(*_in) @ _u[..., t] - 2. * x[..., t].T @ self.dcdxu(*_in)
 66 | 
 67 | 
 68 | class LinearDynamics:
 69 |     def __init__(self, dm_state, dm_act, nb_steps):
 70 |         self.dm_state = dm_state
 71 |         self.dm_act = dm_act
 72 |         self.nb_steps = nb_steps
 73 | 
 74 |         self.A = np.zeros((self.dm_state, self.dm_state, self.nb_steps))
 75 |         self.B = np.zeros((self.dm_state, self.dm_act, self.nb_steps))
 76 |         self.c = np.zeros((self.dm_state, self.nb_steps))
 77 | 
 78 |     @property
 79 |     def params(self):
 80 |         return self.A, self.B, self.c
 81 | 
 82 |     @params.setter
 83 |     def params(self, values):
 84 |         self.A, self.B, self.c = values
 85 | 
 86 |     def sample(self, x, u):
 87 |         pass
 88 | 
 89 | 
 90 | class AnalyticalLinearDynamics(LinearDynamics):
 91 |     def __init__(self, f_dyn, dm_state, dm_act, nb_steps):
 92 |         super(AnalyticalLinearDynamics, self).__init__(dm_state, dm_act, nb_steps)
 93 | 
 94 |         self.f = f_dyn
 95 | 
 96 |         self.dfdx = jacobian(self.f, 0)
 97 |         self.dfdu = jacobian(self.f, 1)
 98 | 
 99 |     def evalf(self, x, u):
100 |         return self.f(x, u)
101 | 
102 |     def taylor_expansion(self, x, u):
103 |         for t in range(self.nb_steps):
104 |             self.A[..., t] = self.dfdx(x[..., t], u[..., t])
105 |             self.B[..., t] = self.dfdu(x[..., t], u[..., t])
106 |             # residual of taylor expansion
107 |             self.c[..., t] = self.evalf(x[..., t], u[..., t]) -\
108 |                              self.A[..., t] @ x[..., t] - self.B[..., t] @ u[..., t]
109 | 
110 | 
111 | class LinearControl:
112 |     def __init__(self, dm_state, dm_act, nb_steps):
113 |         self.dm_state = dm_state
114 |         self.dm_act = dm_act
115 |         self.nb_steps = nb_steps
116 | 
117 |         self.K = np.zeros((self.dm_act, self.dm_state, self.nb_steps))
118 |         self.kff = np.zeros((self.dm_act, self.nb_steps))
119 | 
120 |     @property
121 |     def params(self):
122 |         return self.K, self.kff
123 | 
124 |     @params.setter
125 |     def params(self, values):
126 |         self.K, self.kff = values
127 | 
128 |     def action(self, x, t):
129 |         return self.kff[..., t] + self.K[..., t] @ x
130 | 


--------------------------------------------------------------------------------
/trajopt/riccati/riccati.py:
--------------------------------------------------------------------------------
  1 | import autograd.numpy as np
  2 | 
  3 | from trajopt.riccati.objects import AnalyticalLinearDynamics, AnalyticalQuadraticCost
  4 | from trajopt.riccati.objects import QuadraticStateValue
  5 | from trajopt.riccati.objects import LinearControl
  6 | 
  7 | 
  8 | class Riccati:
  9 | 
 10 |     def __init__(self, env, nb_steps,
 11 |                  init_state, activation=None):
 12 | 
 13 |         self.env = env
 14 | 
 15 |         # expose necessary functions
 16 |         self.env_dyn = self.env.unwrapped.dynamics
 17 |         self.env_noise = self.env.unwrapped.sigma
 18 |         self.env_cost = self.env.unwrapped.cost
 19 |         self.env_init = init_state
 20 | 
 21 |         self.ulim = self.env.action_space.high
 22 | 
 23 |         self.dm_state = self.env.observation_space.shape[0]
 24 |         self.dm_act = self.env.action_space.shape[0]
 25 |         self.nb_steps = nb_steps
 26 | 
 27 |         # reference trajectory
 28 |         self.xref = np.zeros((self.dm_state, self.nb_steps + 1))
 29 |         self.xref[..., 0] = self.env_init[0]
 30 | 
 31 |         self.uref = np.zeros((self.dm_act, self.nb_steps))
 32 | 
 33 |         self.vfunc = QuadraticStateValue(self.dm_state, self.nb_steps + 1)
 34 |         self.dyn = AnalyticalLinearDynamics(self.env_dyn, self.dm_state, self.dm_act, self.nb_steps)
 35 |         self.ctl = LinearControl(self.dm_state, self.dm_act, self.nb_steps)
 36 | 
 37 |         # activation of cost function in shape of sigmoid
 38 |         if activation is None:
 39 |             self.weighting = np.ones((self.nb_steps + 1, ))
 40 |         elif "mult" and "shift" in activation:
 41 |             t = np.linspace(0, self.nb_steps, self.nb_steps + 1)
 42 |             self.weighting = 1. / (1. + np.exp(- activation['mult'] * (t - activation['shift'])))
 43 |         elif "discount" in activation:
 44 |             self.weighting = np.ones((self.nb_steps + 1,))
 45 |             gamma = activation["discount"] * np.ones((self.nb_steps, ))
 46 |             self.weighting[1:] = np.cumprod(gamma)
 47 |         else:
 48 |             raise NotImplementedError
 49 | 
 50 |         self.cost = AnalyticalQuadraticCost(self.env_cost, self.dm_state, self.dm_act, self.nb_steps + 1)
 51 | 
 52 |     def rollout(self, nb_episodes, env=None):
 53 |         if env is None:
 54 |             env = self.env
 55 |             env_cost = self.env_cost
 56 |         else:
 57 |             env = env
 58 |             env_cost = env.unwrapped.cost
 59 | 
 60 |         data = {'x': np.zeros((self.dm_state, self.nb_steps, nb_episodes)),
 61 |                 'u': np.zeros((self.dm_act, self.nb_steps, nb_episodes)),
 62 |                 'xn': np.zeros((self.dm_state, self.nb_steps, nb_episodes)),
 63 |                 'c': np.zeros((self.nb_steps + 1, nb_episodes))}
 64 | 
 65 |         for n in range(nb_episodes):
 66 |             x = env.reset()
 67 | 
 68 |             for t in range(self.nb_steps):
 69 |                 u = self.ctl.action(x, t)
 70 |                 data['u'][..., t, n] = u
 71 | 
 72 |                 # expose true reward function
 73 |                 c = env_cost(x, u, data['u'][..., t - 1, n], self.weighting[t])
 74 |                 data['c'][t] = c
 75 | 
 76 |                 data['x'][..., t, n] = x
 77 |                 x, _, _, _ = env.step(u)
 78 |                 data['xn'][..., t, n] = x
 79 | 
 80 |             c = env_cost(x, np.zeros((self.dm_act, )),  np.zeros((self.dm_act, )), self.weighting[-1])
 81 |             data['c'][-1, n] = c
 82 | 
 83 |         return data
 84 | 
 85 |     def forward_pass(self, ctl):
 86 |         state = np.zeros((self.dm_state, self.nb_steps + 1))
 87 |         action = np.zeros((self.dm_act, self.nb_steps))
 88 |         cost = np.zeros((self.nb_steps + 1, ))
 89 | 
 90 |         state[..., 0] = self.env.reset()
 91 |         for t in range(self.nb_steps):
 92 |             action[..., t] = ctl.action(state[..., t], t)
 93 |             cost[..., t] = self.env_cost(state[..., t], action[..., t], action[..., t - 1], self.weighting[t])
 94 |             state[..., t + 1], _, _, _ = self.env.step(action[..., t])
 95 | 
 96 |         cost[..., -1] = self.env_cost(state[..., -1], np.zeros((self.dm_act, )),
 97 |                                       np.zeros((self.dm_act, )), self.weighting[-1])
 98 |         return state, action, cost
 99 | 
100 |     def backward_pass(self):
101 |         lc = LinearControl(self.dm_state, self.dm_act, self.nb_steps)
102 |         xvalue = QuadraticStateValue(self.dm_state, self.nb_steps + 1)
103 | 
104 |         xvalue.V[..., -1] = self.cost.Cxx[..., -1]
105 |         xvalue.v[..., -1] = self.cost.cx[..., -1]
106 | 
107 |         for t in range(self.nb_steps - 1, -1, -1):
108 |             Qxx = self.cost.Cxx[..., t] + self.dyn.A[..., t].T @ xvalue.V[..., t + 1] @ self.dyn.A[..., t]
109 |             Quu = self.cost.Cuu[..., t] + self.dyn.B[..., t].T @ xvalue.V[..., t + 1] @ self.dyn.B[..., t]
110 |             Qux = self.cost.Cxu[..., t].T + self.dyn.B[..., t].T @ xvalue.V[..., t + 1] @ self.dyn.A[..., t]
111 | 
112 |             qx = self.cost.cx[..., t] + 2.0 * self.dyn.A[..., t].T @ xvalue.V[..., t + 1] @ self.dyn.c[..., t] +\
113 |                   self.dyn.A[..., t].T @ xvalue.v[..., t + 1]
114 | 
115 |             qu = self.cost.cu[..., t] + 2.0 * self.dyn.B[..., t].T @ xvalue.V[..., t + 1] @ self.dyn.c[..., t] +\
116 |                   self.dyn.B[..., t].T @ xvalue.v[..., t + 1]
117 | 
118 |             Quu_inv = np.linalg.inv(Quu)
119 | 
120 |             lc.K[..., t] = - Quu_inv @ Qux
121 |             lc.kff[..., t] = - 0.5 * Quu_inv @ qu
122 | 
123 |             xvalue.V[..., t] = Qxx + Qux.T * lc.K[..., t]
124 |             xvalue.v[..., t] = qx + 2. * lc.kff[..., t].T @ Qux
125 | 
126 |         return lc, xvalue
127 | 
128 |     def plot(self, xref=None, uref=None):
129 |         xref = self.xref if xref is None else xref
130 |         uref = self.uref if uref is None else uref
131 | 
132 |         import matplotlib.pyplot as plt
133 | 
134 |         plt.figure()
135 | 
136 |         t = np.linspace(0, self.nb_steps, self.nb_steps + 1)
137 |         for k in range(self.dm_state):
138 |             plt.subplot(self.dm_state + self.dm_act, 1, k + 1)
139 |             plt.plot(t, xref[k, :], '-b')
140 | 
141 |         t = np.linspace(0, self.nb_steps, self.nb_steps)
142 |         for k in range(self.dm_act):
143 |             plt.subplot(self.dm_state + self.dm_act, 1, self.dm_state + k + 1)
144 |             plt.plot(t, uref[k, :], '-g')
145 | 
146 |         plt.show()
147 | 
148 |     def run(self):
149 |         # get linear system dynamics around ref traj.
150 |         self.dyn.taylor_expansion(self.xref, self.uref)
151 | 
152 |         # get quadratic cost around ref traj.
153 |         self.cost.taylor_expansion(self.xref, self.uref, self.weighting)
154 | 
155 |         # backward pass to get ctrl.
156 |         self.ctl, self.vfunc = self.backward_pass()
157 | 
158 |         # forward pass to get cost and traj.
159 |         self.xref, self.uref, _cost = self.forward_pass(self.ctl)
160 | 
161 |         return np.sum(_cost)
162 | 


--------------------------------------------------------------------------------