├── gym_brt
    ├── quanser
    │   ├── quanser_wrapper
    │   │   ├── __init__.py
    │   │   ├── setup.py
    │   │   ├── quanser_types.pxd
    │   │   ├── hil.pxd
    │   │   └── quanser_wrapper.pyx
    │   ├── __init__.py
    │   ├── qube_simulator.py
    │   └── qube_interfaces.py
    ├── envs
    │   ├── rendering
    │   │   ├── __init__.py
    │   │   └── qube_render.py
    │   ├── __init__.py
    │   ├── qube_rotor_env.py
    │   ├── qube_test_env.py
    │   ├── qube_dampen_env.py
    │   ├── qube_swingup_env.py
    │   ├── qube_balance_env.py
    │   └── qube_base_env.py
    ├── control
    │   ├── __init__.py
    │   └── control.py
    └── __init__.py
├── .gitattributes
├── QUBE-Servo_2_angled_pendulum.jpg
├── docs
    ├── README.md
    ├── alternatives.md
    ├── control.md
    ├── quanser.md
    └── envs.md
├── Pipfile
├── LICENSE
├── .gitignore
├── setup.py
├── environment.yaml
├── README.md
└── tests
    ├── lqr.py
    ├── test.py
    └── notebooks
        └── ODE vs Euler speedtest.ipynb


/gym_brt/quanser/quanser_wrapper/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.ipynb linguist-vendored
2 | 


--------------------------------------------------------------------------------
/gym_brt/envs/rendering/__init__.py:
--------------------------------------------------------------------------------
1 | from gym_brt.envs.rendering.qube_render import QubeRendererVypthon as QubeRenderer
2 | 


--------------------------------------------------------------------------------
/QUBE-Servo_2_angled_pendulum.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BlueRiverTech/quanser-openai-driver/HEAD/QUBE-Servo_2_angled_pendulum.jpg


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
1 | 
2 | # Table of contents
3 | - [Setup alternatives](./alternatives.md)
4 |     - [Installation](./alternatives.md#installation)
5 |     - [Usage](./alternatives.md#usage)
6 | - [control](./control.md)
7 | - [envs](./envs.md)
8 | - [quanser](./quanser.md)


--------------------------------------------------------------------------------
/gym_brt/quanser/__init__.py:
--------------------------------------------------------------------------------
1 | from gym_brt.quanser.qube_interfaces import QubeSimulator
2 | 
3 | try:
4 |     from gym_brt.quanser.qube_interfaces import QubeHardware
5 | except ImportError:
6 |     print("Warning: Can not import QubeHardware in quanser/__init__.py")
7 | 


--------------------------------------------------------------------------------
/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | url = "https://pypi.org/simple"
 3 | verify_ssl = true
 4 | name = "pypi"
 5 | 
 6 | [dev-packages]
 7 | Cython = 0.29.32
 8 | autograd = "*"
 9 | 
10 | [packages]
11 | gym = 0.21.0
12 | numpy = 1.21.6
13 | scipy = 1.7.3
14 | vpython = 7.6.4
15 | gym-brt = {path = ".", editable = true}
16 | 


--------------------------------------------------------------------------------
/gym_brt/control/__init__.py:
--------------------------------------------------------------------------------
 1 | from gym_brt.control.control import (
 2 |     zero_policy,
 3 |     constant_policy,
 4 |     random_policy,
 5 |     square_wave_policy,
 6 |     energy_control_policy,
 7 |     pd_control_policy,
 8 |     flip_and_hold_policy,
 9 |     square_wave_flip_and_hold_policy,
10 |     dampen_policy,
11 |     pd_tracking_control_policy,
12 | )
13 | 


--------------------------------------------------------------------------------
/docs/alternatives.md:
--------------------------------------------------------------------------------
 1 | # Alternatives
 2 | 
 3 | ### Usage
 4 | In addition to a [context manager (recommended)](../README.md#usage), the environment can also be closed manually by using `env.close()`
 5 | 
 6 | ```python
 7 | import gym
 8 | from gym_brt import QubeSwingupEnv
 9 | 
10 | num_episodes = 10
11 | num_steps = 250
12 | 
13 | env = QubeSwingupEnv()
14 | try:
15 |     for episode in range(num_episodes):
16 |         state = env.reset()
17 |         for step in range(num_steps):
18 |             action = env.action_space.sample()
19 |             state, reward, done, _ = env.step(action)
20 | finally:
21 |     env.close()
22 | ```
23 | 


--------------------------------------------------------------------------------
/docs/control.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Controllers
 3 | 
 4 | ### Control
 5 | Control base class.
 6 | 
 7 | ### NoControl
 8 | Applies no action (sets voltages to zero).
 9 | 
10 | ### RandomControl
11 | Applies a random action (samples from the action space).
12 | 
13 | ### AeroControl
14 | Uses PID control to minimize (pitch - reference pitch) + (yaw - reference yaw), where reference is the original position.
15 | 
16 | ### QubeFlipUpControl
17 | Uses a mixed mode controller that uses gains found from LQR to do the flip up when the pendulum angle is over than 20 degrees off upright, and uses PID control and filtering to hold the pendulum upright when under 20 degrees.
18 | 
19 | ### QubeHoldControl
20 | Holding control uses PID with filtering, and outside of 20 degrees use no control.
21 | 


--------------------------------------------------------------------------------
/gym_brt/envs/__init__.py:
--------------------------------------------------------------------------------
 1 | from gym_brt.envs.qube_swingup_env import (
 2 |     QubeSwingupEnv,
 3 |     QubeSwingupSparseEnv,
 4 |     QubeSwingupFollowEnv,
 5 |     QubeSwingupFollowSparseEnv,
 6 | )
 7 | from gym_brt.envs.qube_balance_env import (
 8 |     QubeBalanceEnv,
 9 |     QubeBalanceSparseEnv,
10 |     QubeBalanceFollowEnv,
11 |     QubeBalanceFollowSparseEnv,
12 | )
13 | from gym_brt.envs.qube_dampen_env import (
14 |     QubeDampenEnv,
15 |     QubeDampenSparseEnv,
16 |     QubeDampenFollowEnv,
17 |     QubeDampenFollowSparseEnv,
18 | )
19 | from gym_brt.envs.qube_test_env import (
20 |     QubeBalanceFollowSineWaveEnv,
21 |     QubeSwingupFollowSineWaveEnv,
22 |     QubeRotorFollowSineWaveEnv,
23 |     QubeDampenFollowSineWaveEnv,
24 | )
25 | from gym_brt.envs.qube_rotor_env import QubeRotorEnv, QubeRotorFollowEnv
26 | 


--------------------------------------------------------------------------------
/gym_brt/quanser/quanser_wrapper/setup.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup
 2 | from distutils.extension import Extension
 3 | from Cython.Build import cythonize
 4 | import numpy
 5 | 
 6 | extensions = [
 7 |     Extension(
 8 |         "quanser_wrapper",
 9 |         ["quanser_wrapper.pyx"],
10 |         include_dirs=["/opt/quanser/hil_sdk/include", numpy.get_include()],
11 |         libraries=[
12 |             "hil",
13 |             "quanser_runtime",
14 |             "quanser_common",
15 |             "rt",
16 |             "pthread",
17 |             "dl",
18 |             "m",
19 |             "c",
20 |         ],
21 |         library_dirs=["/opt/quanser/hil_sdk/lib"],
22 |     )
23 | ]
24 | cy_extensions = cythonize(extensions, compiler_directives={"language_level" : "3"})
25 | setup(ext_modules=cy_extensions, include_dirs=[numpy.get_include()])
26 | 


--------------------------------------------------------------------------------
/docs/quanser.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Quanser
 3 | The OpenAI Gym environments use one of the following to calculate each step.
 4 | 
 5 | Quanser wrapper is a hardware wrapper that allows the Qube Servo 2 and Quanser Aero to be used directly in OpenAI Gym.
 6 | Quanser Simulator simulates the Qube Servo 2 only (Aero is currently not supported for simulation).
 7 | 
 8 | # Quanser Wrapper
 9 | The implementation of a Python wrapper around Quanser's C-based HIL SDK.
10 | This is written in Cython.
11 | 
12 | All changes to this are recompiled if you install the gym_brt package with Cython installed.
13 | 
14 | # Quanser Simulator
15 | A simulator and wrapper to use a simulator directly in the Qube environments instead of using the real hardware.
16 | This is useful for transfer learning from simulation to real hardware and for people who don't have access to the Qube Servo 2.
17 | 


--------------------------------------------------------------------------------
/gym_brt/quanser/quanser_wrapper/quanser_types.pxd:
--------------------------------------------------------------------------------
 1 | cimport numpy as np
 2 | 
 3 | cdef extern from "/opt/quanser/hil_sdk/include/quanser_types.h":
 4 |     ctypedef int t_error
 5 | 
 6 |     ctypedef np.npy_int8    t_boolean # must always be 8 bits
 7 |     ctypedef np.npy_int8    t_byte    # must always be 8 bits
 8 |     ctypedef np.npy_uint8   t_ubyte   # must always be 8 bits
 9 |     ctypedef np.npy_int16   t_short   # must always be 16 bits
10 |     ctypedef np.npy_uint16  t_ushort  # must always be 16 bits
11 |     ctypedef np.npy_int32   t_int     # must always be 32 bits
12 |     ctypedef np.npy_uint32  t_uint    # must always be 32 bits
13 |     ctypedef np.npy_float64 t_double  # must always be 64 bits
14 |     ctypedef np.npy_int64   t_long    # must always be 64 bits
15 | 
16 |     ctypedef np.npy_int8    t_int8
17 |     ctypedef np.npy_uint8   t_uint8
18 |     ctypedef np.npy_int16   t_int16
19 |     ctypedef np.npy_uint16  t_uint16
20 |     ctypedef np.npy_int32   t_int32
21 |     ctypedef np.npy_uint32  t_uint32
22 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Blue River Technology
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/docs/envs.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Environments
 3 | Quanser OpenAI Driver currently supports two pieces of hardware: the Qube Servo 2 USB and the Quanser Aero.
 4 | 
 5 | # Qube
 6 | For more information about the Qube click [here](https://www.quanser.com/products/qube-servo-2/)
 7 | 
 8 | ### QubeBaseEnv
 9 | The base class for Qube environments.
10 | Info:
11 | 
12 | - Reset starts the pendulum from the bottom (at rest).
13 | - Has no reward function.
14 | 
15 | 
16 | ### QubeBeginDownEnv
17 | Info:
18 | 
19 | - Reset starts the pendulum from the bottom (at rest).
20 | - The task is to flip up the pendulum and hold it upright.
21 | - Episode ends once the theta angle is greater than 90 degrees.
22 | - Reward is a function of the angles theta (arm angle) and alpha (pendulum), and the alpha angular velocity.
23 |     - Encourages the the arm to stay centered, the pendulum to stay upright, and to stay stationary.
24 | 
25 | 
26 | ### QubeBeginUprightEnv
27 | Info:
28 | 
29 | - Reset starts the pendulum from the top (flipped up/inverted).
30 | - The task is to hold the pendulum upright.
31 | - Episode ends once the alpha angle is greater the 20 degrees or theta angle is greater than 90 degrees.
32 | - Reward is a function of the angles theta (arm angle) and alpha (pendulum), and the alpha angular velocity.
33 |     - Encourages the the arm to stay centered, the pendulum to stay upright, and to stay stationary.
34 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | *.DS_Store
  2 | .idea/
  3 | 
  4 | # Byte-compiled / optimized / DLL files
  5 | __pycache__/
  6 | *.py[cod]
  7 | *$py.class
  8 | *.pyc
  9 | 
 10 | # C extensions
 11 | *.so
 12 | 
 13 | # Distribution / packaging
 14 | .Python
 15 | env/
 16 | build/
 17 | develop-eggs/
 18 | dist/
 19 | downloads/
 20 | eggs/
 21 | .eggs/
 22 | lib/
 23 | lib64/
 24 | parts/
 25 | sdist/
 26 | var/
 27 | wheels/
 28 | *.egg-info/
 29 | .installed.cfg
 30 | *.egg
 31 | 
 32 | # PyInstaller
 33 | #  Usually these files are written by a python script from a template
 34 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 35 | *.manifest
 36 | *.spec
 37 | 
 38 | # Installer logs
 39 | pip-log.txt
 40 | pip-delete-this-directory.txt
 41 | 
 42 | # Unit test / coverage reports
 43 | htmlcov/
 44 | .tox/
 45 | .coverage
 46 | .coverage.*
 47 | .cache
 48 | nosetests.xml
 49 | coverage.xml
 50 | *.cover
 51 | .hypothesis/
 52 | 
 53 | # Translations
 54 | *.mo
 55 | *.pot
 56 | 
 57 | # Django stuff:
 58 | *.log
 59 | local_settings.py
 60 | 
 61 | # Flask stuff:
 62 | instance/
 63 | .webassets-cache
 64 | 
 65 | # Scrapy stuff:
 66 | .scrapy
 67 | 
 68 | # Sphinx documentation
 69 | docs/_build/
 70 | 
 71 | # PyBuilder
 72 | target/
 73 | 
 74 | # Jupyter Notebook
 75 | .ipynb_checkpoints
 76 | 
 77 | # pyenv
 78 | .python-version
 79 | 
 80 | # celery beat schedule file
 81 | celerybeat-schedule
 82 | 
 83 | # SageMath parsed files
 84 | *.sage.py
 85 | 
 86 | # dotenv
 87 | .env
 88 | 
 89 | # virtualenv
 90 | .venv
 91 | venv/
 92 | ENV/
 93 | 
 94 | # Spyder project settings
 95 | .spyderproject
 96 | .spyproject
 97 | 
 98 | # Rope project settings
 99 | .ropeproject
100 | 
101 | # mkdocs documentation
102 | /site
103 | 
104 | # mypy
105 | .mypy_cache/
106 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from setuptools import setup, Extension
 3 | from setuptools.command.build_ext import build_ext as _build_ext
 4 | 
 5 | 
 6 | class build_ext(_build_ext):
 7 |     def finalize_options(self):
 8 |         _build_ext.finalize_options(self)
 9 |         # Prevent numpy from thinking it is still in its setup process:
10 |         __builtins__.__NUMPY_SETUP__ = False
11 |         import numpy
12 | 
13 |         self.include_dirs.append(numpy.get_include())
14 | 
15 | 
16 | extensions = [
17 |     Extension(
18 |         "gym_brt.quanser.quanser_wrapper.quanser_wrapper",
19 |         ["gym_brt/quanser/quanser_wrapper/quanser_wrapper.pyx"],
20 |         include_dirs=["/opt/quanser/hil_sdk/include"],
21 |         libraries=[
22 |             "hil",
23 |             "quanser_runtime",
24 |             "quanser_common",
25 |             "rt",
26 |             "pthread",
27 |             "dl",
28 |             "m",
29 |             "c",
30 |         ],
31 |         library_dirs=["/opt/quanser/hil_sdk/lib"],
32 |     )
33 | ]
34 | 
35 | # If Cython is installed build from source otherwise use the precompiled version
36 | try:
37 |     from Cython.Build import cythonize
38 | 
39 |     extensions = cythonize(extensions, compiler_directives={"language_level" : "3"})
40 | except ImportError:
41 |     pass
42 | 
43 | 
44 | # Hacky way to check if the HIL SDK is installed (allows to run on Mac OS)
45 | is_hil_sdk_installed = False
46 | if os.path.isdir("/opt/quanser/hil_sdk/lib"):
47 |     is_hil_sdk_installed = True
48 | 
49 | setup(
50 |     name="gym_brt",
51 |     version=0.1,
52 |     cmdclass={"build_ext": build_ext} if is_hil_sdk_installed else {},
53 |     install_requires=["numpy", "gym", "scipy", "vpython"],
54 |     setup_requires=["numpy"],
55 |     ext_modules=extensions if is_hil_sdk_installed else None,
56 |     description="Blue River's OpenAI Gym wrapper around Quanser hardware.",
57 |     url="https://github.com/BlueRiverTech/quanser-openai-driver/",
58 |     author="Blue River Technology",
59 |     license="MIT",
60 | )
61 | 


--------------------------------------------------------------------------------
/gym_brt/envs/qube_rotor_env.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import print_function
 3 | from __future__ import division
 4 | 
 5 | import numpy as np
 6 | from gym import spaces
 7 | from gym_brt.envs.qube_base_env import QubeBaseEnv
 8 | 
 9 | 
10 | class QubeRotorEnv(QubeBaseEnv):
11 |     def _reward(self):
12 |         # Few options for reward:
13 |         # - high reward for large alpha_dot and small theta
14 |         # - reward for matching the action of the RPM controller
15 |         # - reward for how close the pendulum matches a clock hand going at a certain RPM
16 | 
17 |         theta_dist = (1 - np.abs(self._target_angle - self._theta)) / np.pi
18 |         return np.clip(theta_dist * self._alpha_dot / 20, 0, 20)
19 | 
20 |     def _isdone(self):
21 |         done = False
22 |         done |= self._episode_steps >= self._max_episode_steps
23 |         done |= abs(self._theta) > (90 * np.pi / 180)
24 |         return done
25 | 
26 |     def reset(self):
27 |         super(QubeRotorEnv, self).reset()
28 |         state = self._reset_down()
29 |         return state
30 | 
31 | 
32 | class QubeRotorFollowEnv(QubeRotorEnv):
33 |     def __init__(self, **kwargs):
34 |         super(QubeRotorFollowEnv, self).__init__(**kwargs)
35 |         obs_max = np.asarray(
36 |             [np.pi / 2, np.pi, np.inf, np.inf, 80 * (np.pi / 180)], dtype=np.float64
37 |         )
38 |         self.observation_space = spaces.Box(-obs_max, obs_max)
39 | 
40 |     def _get_state(self):
41 |         state = np.array(
42 |             [
43 |                 self._theta,
44 |                 self._alpha,
45 |                 self._theta_dot,
46 |                 self._alpha_dot,
47 |                 self._target_angle,
48 |             ],
49 |             dtype=np.float64,
50 |         )
51 |         return state
52 | 
53 |     def _next_target_angle(self):
54 |         # Update the target angle twice a second on average at random intervals
55 |         if np.random.randint(1, self._frequency / 2) == 1:
56 |             max_angle = 80 * (np.pi / 180)  # 80 degrees
57 |             angle = np.random.uniform(-max_angle, max_angle)
58 |         else:
59 |             angle = self._target_angle
60 |         return angle
61 | 


--------------------------------------------------------------------------------
/environment.yaml:
--------------------------------------------------------------------------------
 1 | name: qube
 2 | channels:
 3 |   - defaults
 4 | dependencies:
 5 |   - _libgcc_mutex=0.1=main
 6 |   - ca-certificates=2019.8.28=0
 7 |   - certifi=2019.9.11=py37_0
 8 |   - libedit=3.1.20181209=hc058e9b_0
 9 |   - libffi=3.2.1=hd88cf55_4
10 |   - libgcc-ng=9.1.0=hdf63c60_0
11 |   - libstdcxx-ng=9.1.0=hdf63c60_0
12 |   - ncurses=6.1=he6710b0_1
13 |   - openssl=1.1.1d=h7b6447c_1
14 |   - pip=19.2.3=py37_0
15 |   - python=3.7.4=h265db76_1
16 |   - readline=7.0=h7b6447c_5
17 |   - setuptools=41.2.0=py37_0
18 |   - sqlite=3.29.0=h7b6447c_0
19 |   - tk=8.6.8=hbc83047_0
20 |   - wheel=0.33.6=py37_0
21 |   - xz=5.2.4=h14c3975_4
22 |   - zlib=1.2.11=h7b6447c_3
23 |   - pip:
24 |     - asn1crypto==0.24.0
25 |     - attrs==19.1.0
26 |     - autobahn==19.9.3
27 |     - backcall==0.1.0
28 |     - bleach==3.1.0
29 |     - cffi==1.12.3
30 |     - cloudpickle==1.2.2
31 |     - cryptography==2.7
32 |     - decorator==4.4.0
33 |     - defusedxml==0.6.0
34 |     - entrypoints==0.3
35 |     - future==0.17.1
36 |     - gym==0.14.0
37 |     - ipykernel==5.1.2
38 |     - ipython==7.8.0
39 |     - ipython-genutils==0.2.0
40 |     - ipywidgets==7.5.1
41 |     - jedi==0.15.1
42 |     - jinja2==2.10.1
43 |     - jsonschema==3.0.2
44 |     - jupyter==1.0.0
45 |     - jupyter-client==5.3.3
46 |     - jupyter-console==6.0.0
47 |     - jupyter-core==4.5.0
48 |     - markupsafe==1.1.1
49 |     - mistune==0.8.4
50 |     - nbconvert==5.6.0
51 |     - nbformat==4.4.0
52 |     - notebook==6.0.1
53 |     - numpy==1.17.2
54 |     - pandocfilters==1.4.2
55 |     - parso==0.5.1
56 |     - pexpect==4.7.0
57 |     - pickleshare==0.7.5
58 |     - prometheus-client==0.7.1
59 |     - prompt-toolkit==2.0.9
60 |     - ptyprocess==0.6.0
61 |     - pycparser==2.19
62 |     - pyglet==1.3.2
63 |     - pygments==2.4.2
64 |     - pyrsistent==0.15.4
65 |     - python-dateutil==2.8.0
66 |     - pyzmq==18.1.0
67 |     - qtconsole==4.5.5
68 |     - scipy==1.3.1
69 |     - send2trash==1.5.0
70 |     - six==1.12.0
71 |     - terminado==0.8.2
72 |     - testpath==0.4.2
73 |     - tornado==6.0.3
74 |     - traitlets==4.3.2
75 |     - txaio==18.8.1
76 |     - vpnotebook==0.1.3
77 |     - vpython==7.5.1
78 |     - wcwidth==0.1.7
79 |     - webencodings==0.5.1
80 |     - widgetsnbextension==3.5.1
81 | 


--------------------------------------------------------------------------------
/gym_brt/envs/qube_test_env.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import print_function
 3 | from __future__ import division
 4 | 
 5 | import numpy as np
 6 | from gym import spaces
 7 | from gym_brt.envs.qube_base_env import QubeBaseEnv
 8 | from gym_brt.envs.qube_swingup_env import QubeSwingupFollowEnv
 9 | from gym_brt.envs.qube_balance_env import QubeBalanceFollowEnv
10 | from gym_brt.envs.qube_dampen_env import QubeDampenFollowEnv
11 | from gym_brt.envs.qube_rotor_env import QubeRotorFollowEnv
12 | 
13 | 
14 | """
15 | A series of testing evironments to visualize what a policy learned in each of
16 | the `Follow` environments.
17 | """
18 | 
19 | 
20 | class QubeBalanceFollowSineWaveEnv(QubeBalanceFollowEnv):
21 |     def _next_target_angle(self):
22 |         # Sine wave between -180 to +180 degrees
23 |         print(
24 |             "T:{:05.2f}, C:{:05.2f}, Diff:{:05.2f}".format(
25 |                 self._target_angle * (57.3),
26 |                 57.3 * self._theta,
27 |                 57.3 * (self._target_angle - self._theta),
28 |             )
29 |         )
30 |         return np.pi / 3 * np.sin(self._episode_steps / self._frequency)
31 | 
32 | 
33 | class QubeSwingupFollowSineWaveEnv(QubeSwingupFollowEnv):
34 |     def _next_target_angle(self):
35 |         # Sine wave between -180 to +180 degrees
36 |         print(
37 |             "T:{:05.2f}, C:{:05.2f}, Diff:{:05.2f}".format(
38 |                 self._target_angle * (57.3),
39 |                 57.3 * self._theta,
40 |                 57.3 * (self._target_angle - self._theta),
41 |             )
42 |         )
43 |         return np.pi / 3 * np.sin(self._episode_steps / self._frequency)
44 | 
45 | 
46 | class QubeRotorFollowSineWaveEnv(QubeRotorFollowEnv):
47 |     def _next_target_angle(self):
48 |         # Sine wave between -180 to +180 degrees
49 |         print(
50 |             "T:{:05.2f}, C:{:05.2f}, Diff:{:05.2f}".format(
51 |                 self._target_angle * (57.3),
52 |                 57.3 * self._theta,
53 |                 57.3 * (self._target_angle - self._theta),
54 |             )
55 |         )
56 |         return np.pi / 3 * np.sin(self._episode_steps / self._frequency)
57 | 
58 | 
59 | class QubeDampenFollowSineWaveEnv(QubeDampenFollowEnv):
60 |     def _next_target_angle(self):
61 |         # Sine wave between -180 to +180 degrees
62 |         print(
63 |             "T:{:05.2f}, C:{:05.2f}, Diff:{:05.2f}".format(
64 |                 self._target_angle * (57.3),
65 |                 57.3 * self._theta,
66 |                 57.3 * (self._target_angle - self._theta),
67 |             )
68 |         )
69 |         return np.pi * np.sin(self._episode_steps / self._frequency)
70 | 


--------------------------------------------------------------------------------
/gym_brt/envs/qube_dampen_env.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import print_function
 3 | from __future__ import division
 4 | 
 5 | import time
 6 | import numpy as np
 7 | from gym import spaces
 8 | from gym_brt.envs.qube_base_env import QubeBaseEnv
 9 | 
10 | 
11 | class QubeDampenEnv(QubeBaseEnv):
12 |     def _reward(self):
13 |         alpha = self._alpha
14 |         alpha = (alpha + 2 * np.pi) % (2 * np.pi) - np.pi
15 |         reward = 1 - (
16 |             (0.8 * np.abs(alpha) + 0.2 * np.abs(self._target_angle - self._theta))
17 |             / np.pi
18 |         )
19 |         return max(reward, 0)  # Clip for the follow env case
20 | 
21 |     def _isdone(self):
22 |         done = False
23 |         done |= self._episode_steps >= self._max_episode_steps
24 |         done |= abs(self._theta) > (90 * np.pi / 180)
25 |         return done
26 | 
27 |     def reset(self):
28 |         super(QubeDampenEnv, self).reset()
29 |         state = self._reset_up()
30 |         time.sleep(np.random.randint(1, 3000) / 1000)  # Sleep between 1-3000ms
31 |         return state
32 | 
33 | 
34 | class QubeDampenSparseEnv(QubeDampenEnv):
35 |     def _reward(self):
36 |         within_range = True
37 |         # Assume -pi < alpha < pi
38 |         within_range &= np.abs(self._alpha) > (189 * np.pi / 180)
39 |         within_range &= np.abs(self._theta) < (1 * np.pi / 180)
40 |         return 1 if within_range else 0
41 | 
42 | 
43 | class QubeDampenFollowEnv(QubeDampenEnv):
44 |     def __init__(self, **kwargs):
45 |         super(QubeDampenFollowEnv, self).__init__(**kwargs)
46 |         obs_max = np.asarray(
47 |             [np.pi / 2, np.pi, np.inf, np.inf, 80 * (np.pi / 180)], dtype=np.float64
48 |         )
49 |         self.observation_space = spaces.Box(-obs_max, obs_max)
50 | 
51 |     def _get_state(self):
52 |         state = np.array(
53 |             [
54 |                 self._theta,
55 |                 self._alpha,
56 |                 self._theta_dot,
57 |                 self._alpha_dot,
58 |                 self._target_angle,
59 |             ],
60 |             dtype=np.float64,
61 |         )
62 |         return state
63 | 
64 |     def _next_target_angle(self):
65 |         # Update the target angle twice a second on average at random intervals
66 |         if np.random.randint(1, self._frequency / 2) == 1:
67 |             max_angle = 80 * (np.pi / 180)  # 80 degrees
68 |             angle = np.random.uniform(-max_angle, max_angle)
69 |         else:
70 |             angle = self._target_angle
71 |         return angle
72 | 
73 | 
74 | class QubeDampenFollowSparseEnv(QubeDampenFollowEnv):
75 |     def _reward(self):
76 |         within_range = True
77 |         # Assume -pi < alpha < pi
78 |         within_range &= np.abs(self._alpha) > (189 * np.pi / 180)
79 |         within_range &= np.abs(self._theta) < (1 * np.pi / 180)
80 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Quanser OpenAI Driver
 2 | Has an OpenAI Gym wrapper for the Quanser Qube Servo 2 and Quanser Aero
 3 | 
 4 | - [Setup](#setup)
 5 |     - [Prerequisites](#prerequisites)
 6 |     - [Installation](#installation)
 7 |     - [Recompiling Cython](#recompiling-cython-code)
 8 | - [Basic Usage](#usage)
 9 | - [Warning](#warning)
10 | 
11 | 
12 | # Setup
13 | We have tested on Ubuntu 16.04 LTS and Ubuntu 18.04 LTS using Python 2.7 and Python 3.6.5<br>
14 | 
15 | 
16 | ### Prerequisites
17 | Install the HIL SDK from Quanser.<br>
18 | A mirror is available at https://github.com/quanser/hil_sdk_linux_x86_64.<br>
19 | 
20 | You can install the driver by:
21 | ```bash
22 |     git clone https://github.com/quanser/hil_sdk_linux_x86_64.git
23 |     sudo chmod a+x ./hil_sdk_linux_x86_64/setup_hil_sdk ./hil_sdk_linux_x86_64/uninstall_hil_sdk
24 |     sudo ./hil_sdk_linux_x86_64/setup_hil_sdk
25 | ```
26 | 
27 | You also must have pip installed:
28 | ```bash
29 |     sudo apt-get install python3-pip
30 | ```
31 | 
32 | 
33 | ### Installation
34 | We recommend that you use a virtual environment such as [conda (recommended)](https://conda.io/docs/user-guide/getting-started.html), [virtualenv](https://virtualenv.pypa.io/en/stable/), or [Pipenv](https://pipenv.readthedocs.io/en/latest/)
35 | 
36 | You can install the driver by cloning and pip-installing:
37 | ```bash
38 |     git clone https://github.com/BlueRiverTech/quanser-openai-driver.git
39 |     cd quanser-openai-driver
40 |     pip3 install -e .
41 | ```
42 | 
43 | Once you have that setup: Run the classical control baseline (ensure the Qube is connected to your computer)<br>
44 | ```bash
45 | python tests/test.py --env QubeSwingupEnv --controller flip
46 | ```
47 | 
48 | 
49 | # Usage
50 | Usage is very similar to most OpenAI gym environments but **requires** that you close the environment when finished.
51 | Without safely closing the Env, bad things may happen. Usually you will not be able to reopen the board.
52 | 
53 | This can be done with a context manager using a `with` statement
54 | ```python
55 | import gym
56 | from gym_brt import QubeSwingupEnv
57 | 
58 | num_episodes = 10
59 | num_steps = 250
60 | 
61 | with QubeSwingupEnv() as env:
62 |     for episode in range(num_episodes):
63 |         state = env.reset()
64 |         for step in range(num_steps):
65 |             action = env.action_space.sample()
66 |             state, reward, done, _ = env.step(action)
67 | ```
68 | 
69 | Or can be closed manually by using `env.close()`. You can see an [example here](docs/alternatives.md#usage).
70 | 
71 | 
72 | # Environments
73 | Information about various environments can be found in [docs/envs](docs/envs.md) and our [whitepaper](https://arxiv.org/abs/2001.02254).
74 | 
75 | 
76 | # Control
77 | Information about baselines can be found in [docs/control](docs/control.md).
78 | 
79 | 
80 | # Hardware Wrapper
81 | Information about the Python wrapper for Quanser hardware and Qube Servo 2 simulator can be found in [docs/quanser](docs/quanser.md) and our [whitepaper](https://arxiv.org/abs/2001.02254).
82 | 
83 | 
84 | # Citing
85 | If you use this in your research please cite the following [whitepaper](https://arxiv.org/abs/2001.02254):
86 | 
87 | ```
88 | @misc{2001.02254,
89 |   author = {{Polzounov}, Kirill and {Sundar}, Ramitha and {Redden}, Lee},
90 |   title = "{Blue River Controls: A toolkit for Reinforcement Learning Control Systems on Hardware}",
91 |   year = {2019},
92 |   eprint = {arXiv:2001.02254},
93 |   howpublished = {Accepted at the Workshop on Deep Reinforcement Learning at the 33rd Conference on Neural Information Processing Systems (NeurIPS 2019), Vancouver, Canada.}
94 | }
95 | ```
96 | 


--------------------------------------------------------------------------------
/gym_brt/envs/qube_swingup_env.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import print_function
  3 | from __future__ import division
  4 | 
  5 | import numpy as np
  6 | from gym import spaces
  7 | from gym_brt.envs.qube_base_env import QubeBaseEnv
  8 | 
  9 | """
 10 |     Description:
 11 |         A pendulum is attached to an un-actuated joint to a horizontal arm,
 12 |         which is actuated by a rotary motor. The pendulum begins
 13 |         downwards and the goal is flip the pendulum up and then to keep it from
 14 |         falling by applying a voltage on the motor which causes a torque on the
 15 |         horizontal arm.
 16 | 
 17 |     Source:
 18 |         This is modified for the Quanser Qube Servo2-USB from the Cart Pole
 19 |         problem described by Barto, Sutton, and Anderson, and implemented in
 20 |         OpenAI Gym: https://github.com/openai/gym/blob/master/gym/envs/classic_control/cartpole.py
 21 |         This description is also modified from the description by the OpenAI
 22 |         team.
 23 | 
 24 |     Observation:
 25 |         Type: Box(4)
 26 |         Num Observation                   Min         Max
 27 |         0   Rotary arm angle (theta)     -90 deg      90 deg
 28 |         1   Pendulum angle (alpha)       -180 deg     180 deg
 29 |         2   Cart Velocity                -Inf         Inf
 30 |         3   Pole Velocity                -Inf         Inf
 31 |         Note: the velocities are limited by the physical system.
 32 | 
 33 |     Actions:
 34 |         Type: Real number (1-D Continuous) (voltage applied to motor)
 35 | 
 36 |     Reward:
 37 |         r(s_t, a_t) = 1 - (0.8 * abs(alpha) + 0.2 * abs(theta)) / pi
 38 | 
 39 |     Starting State:
 40 |         Theta = 0 + noise, alpha = pi + noise
 41 | 
 42 |     Episode Termination:
 43 |         When theta is greater than ±90° or after 2048 steps
 44 | """
 45 | 
 46 | 
 47 | class QubeSwingupEnv(QubeBaseEnv):
 48 |     def _reward(self):
 49 |         reward = 1 - (
 50 |             (0.8 * np.abs(self._alpha) + 0.2 * np.abs(self._target_angle - self._theta))
 51 |             / np.pi
 52 |         )
 53 |         return max(reward, 0)  # Clip for the follow env case
 54 | 
 55 |     def _isdone(self):
 56 |         done = False
 57 |         done |= self._episode_steps >= self._max_episode_steps
 58 |         done |= abs(self._theta) > (90 * np.pi / 180)
 59 |         return done
 60 | 
 61 |     def reset(self):
 62 |         super(QubeSwingupEnv, self).reset()
 63 |         state = self._reset_down()
 64 |         return state
 65 | 
 66 | 
 67 | class QubeSwingupSparseEnv(QubeSwingupEnv):
 68 |     def _reward(self):
 69 |         within_range = True
 70 |         within_range &= np.abs(self._alpha) < (1 * np.pi / 180)
 71 |         within_range &= np.abs(self._theta) < (1 * np.pi / 180)
 72 |         return 1 if within_range else 0
 73 | 
 74 | 
 75 | class QubeSwingupFollowEnv(QubeSwingupEnv):
 76 |     def __init__(self, **kwargs):
 77 |         super(QubeSwingupFollowEnv, self).__init__(**kwargs)
 78 |         obs_max = np.asarray(
 79 |             [np.pi / 2, np.pi, np.inf, np.inf, 80 * (np.pi / 180)], dtype=np.float64
 80 |         )
 81 |         self.observation_space = spaces.Box(-obs_max, obs_max)
 82 | 
 83 |     def _get_state(self):
 84 |         state = np.array(
 85 |             [
 86 |                 self._theta,
 87 |                 self._alpha,
 88 |                 self._theta_dot,
 89 |                 self._alpha_dot,
 90 |                 self._target_angle,
 91 |             ],
 92 |             dtype=np.float64,
 93 |         )
 94 |         return state
 95 | 
 96 |     def _next_target_angle(self):
 97 |         # Update the target angle twice a second on average at random intervals
 98 |         if np.random.randint(1, self._frequency / 2) == 1:
 99 |             max_angle = 80 * (np.pi / 180)  # 80 degrees
100 |             angle = np.random.uniform(-max_angle, max_angle)
101 |         else:
102 |             angle = self._target_angle
103 |         return angle
104 | 
105 | 
106 | class QubeSwingupFollowSparseEnv(QubeSwingupFollowEnv):
107 |     def _reward(self):
108 |         within_range = True
109 |         within_range &= np.abs(self._alpha) < (1 * np.pi / 180)
110 |         within_range &= np.abs(self._theta) < (1 * np.pi / 180)
111 |         return 1 if within_range else 0
112 | 


--------------------------------------------------------------------------------
/gym_brt/envs/qube_balance_env.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import print_function
  3 | from __future__ import division
  4 | 
  5 | import numpy as np
  6 | from gym import spaces
  7 | from gym_brt.envs.qube_base_env import QubeBaseEnv
  8 | 
  9 | 
 10 | """
 11 |     Description:
 12 |         A pendulum is attached to an un-actuated joint to a horizontal arm,
 13 |         which is actuated by a rotary motor. The pendulum begins
 14 |         upright/inverted and the goal is to keep it from falling by applying a
 15 |         voltage on the motor which causes a torque on the horizontal arm.
 16 | 
 17 |     Source:
 18 |         This is modified for the Quanser Qube Servo2-USB from the Cart Pole
 19 |         problem described by Barto, Sutton, and Anderson, and implemented in
 20 |         OpenAI Gym: https://github.com/openai/gym/blob/master/gym/envs/classic_control/cartpole.py
 21 |         This description is also modified from the description by the OpenAI
 22 |         team.
 23 | 
 24 |     Observation:
 25 |         Type: Box(4)
 26 |         Num Observation                   Min         Max
 27 |         0   Rotary arm angle (theta)     -90 deg      90 deg
 28 |         1   Pendulum angle (alpha)       -20 deg      20 deg
 29 |         2   Cart Velocity                -Inf         Inf
 30 |         3   Pole Velocity                -Inf         Inf
 31 |         Note: the velocities are limited by the physical system.
 32 | 
 33 |     Actions:
 34 |         Type: Real number (1-D Continuous) (voltage applied to motor)
 35 | 
 36 |     Reward:
 37 |         r(s_t, a_t) = 1 - (0.8 * abs(alpha) + 0.2 * abs(theta)) / pi
 38 | 
 39 |     Starting State:
 40 |         Theta = 0 + noise, alpha = 0 + noise
 41 | 
 42 |     Episode Termination:
 43 |         Alpha is greater than ±20° from upright, theta is greater than ±90°, or
 44 |         after 2048 steps
 45 | """
 46 | 
 47 | 
 48 | class QubeBalanceEnv(QubeBaseEnv):
 49 |     def _reward(self):
 50 |         reward = 1 - (
 51 |             (0.8 * np.abs(self._alpha) + 0.2 * np.abs(self._target_angle - self._theta))
 52 |             / np.pi
 53 |         )
 54 |         return max(reward, 0)  # Clip for the follow env case
 55 | 
 56 |     def _isdone(self):
 57 |         done = False
 58 |         done |= self._episode_steps >= self._max_episode_steps
 59 |         done |= abs(self._theta) > (90 * np.pi / 180)
 60 |         done |= abs(self._alpha) > (20 * np.pi / 180)
 61 |         return done
 62 | 
 63 |     def reset(self):
 64 |         super(QubeBalanceEnv, self).reset()
 65 |         # Start the pendulum stationary at the top (stable point)
 66 |         state = self._reset_up()
 67 |         return state
 68 | 
 69 | 
 70 | class QubeBalanceSparseEnv(QubeBalanceEnv):
 71 |     def _reward(self):
 72 |         within_range = True
 73 |         within_range &= np.abs(self._alpha) < (1 * np.pi / 180)
 74 |         within_range &= np.abs(self._theta) < (1 * np.pi / 180)
 75 |         return 1 if within_range else 0
 76 | 
 77 | 
 78 | class QubeBalanceFollowEnv(QubeBalanceEnv):
 79 |     def __init__(self, **kwargs):
 80 |         super(QubeBalanceFollowEnv, self).__init__(**kwargs)
 81 |         obs_max = np.asarray(
 82 |             [np.pi / 2, np.pi, np.inf, np.inf, 80 * (np.pi / 180)], dtype=np.float64
 83 |         )
 84 |         self.observation_space = spaces.Box(-obs_max, obs_max)
 85 | 
 86 |     def _get_state(self):
 87 |         state = np.array(
 88 |             [
 89 |                 self._theta,
 90 |                 self._alpha,
 91 |                 self._theta_dot,
 92 |                 self._alpha_dot,
 93 |                 self._target_angle,
 94 |             ],
 95 |             dtype=np.float64,
 96 |         )
 97 |         return state
 98 | 
 99 |     def _next_target_angle(self):
100 |         # Update the target angle twice a second on average at random intervals
101 |         if np.random.randint(1, self._frequency / 2) == 1:
102 |             max_angle = 80 * (np.pi / 180)  # 80 degrees
103 |             angle = np.random.uniform(-max_angle, max_angle)
104 |         else:
105 |             angle = self._target_angle
106 |         return angle
107 | 
108 | 
109 | class QubeBalanceFollowSparseEnv(QubeBalanceFollowEnv):
110 |     def _reward(self):
111 |         within_range = True
112 |         within_range &= np.abs(self._alpha) < (1 * np.pi / 180)
113 |         within_range &= np.abs(self._theta) < (1 * np.pi / 180)
114 |         return 1 if within_range else 0
115 | 


--------------------------------------------------------------------------------
/gym_brt/quanser/qube_simulator.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import print_function
 3 | from __future__ import division
 4 | 
 5 | from scipy.integrate import odeint
 6 | import numpy as np
 7 | import math
 8 | 
 9 | 
10 | # Motor
11 | Rm = 8.4  # Resistance
12 | kt = 0.042  # Current-torque (N-m/A)
13 | km = 0.042  # Back-emf constant (V-s/rad)
14 | 
15 | # Rotary Arm
16 | mr = 0.095  # Mass (kg)
17 | Lr = 0.085  # Total length (m)
18 | Jr = mr * Lr ** 2 / 12  # Moment of inertia about pivot (kg-m^2)
19 | Dr = 0.00027  # Equivalent viscous damping coefficient (N-m-s/rad)
20 | 
21 | # Pendulum Link
22 | mp = 0.024  # Mass (kg)
23 | Lp = 0.129  # Total length (m)
24 | Jp = mp * Lp ** 2 / 12  # Moment of inertia about pivot (kg-m^2)
25 | Dp = 0.00005  # Equivalent viscous damping coefficient (N-m-s/rad)
26 | 
27 | g = 9.81  # Gravity constant
28 | 
29 | 
30 | def diff_forward_model_ode(state, t, action, dt):
31 |     theta, alpha, theta_dot, alpha_dot = state
32 |     Vm = action
33 |     tau = -(km * (Vm - km * theta_dot)) / Rm  # torque
34 | 
35 |     # fmt: off
36 |     # From Rotary Pendulum Workbook
37 |     theta_dot_dot = (-Lp*Lr*mp*(-8.0*Dp*alpha_dot + Lp**2*mp*theta_dot**2*np.sin(2.0*alpha) + 4.0*Lp*g*mp*np.sin(alpha))*np.cos(alpha) + (4.0*Jp + Lp**2*mp)*(4.0*Dr*theta_dot + Lp**2*alpha_dot*mp*theta_dot*np.sin(2.0*alpha) + 2.0*Lp*Lr*alpha_dot**2*mp*np.sin(alpha) - 4.0*tau))/(4.0*Lp**2*Lr**2*mp**2*np.cos(alpha)**2 - (4.0*Jp + Lp**2*mp)*(4.0*Jr + Lp**2*mp*np.sin(alpha)**2 + 4.0*Lr**2*mp))
38 |     alpha_dot_dot = (2.0*Lp*Lr*mp*(4.0*Dr*theta_dot + Lp**2*alpha_dot*mp*theta_dot*np.sin(2.0*alpha) + 2.0*Lp*Lr*alpha_dot**2*mp*np.sin(alpha) - 4.0*tau)*np.cos(alpha) - 0.5*(4.0*Jr + Lp**2*mp*np.sin(alpha)**2 + 4.0*Lr**2*mp)*(-8.0*Dp*alpha_dot + Lp**2*mp*theta_dot**2*np.sin(2.0*alpha) + 4.0*Lp*g*mp*np.sin(alpha)))/(4.0*Lp**2*Lr**2*mp**2*np.cos(alpha)**2 - (4.0*Jp + Lp**2*mp)*(4.0*Jr + Lp**2*mp*np.sin(alpha)**2 + 4.0*Lr**2*mp))
39 |     # fmt: on
40 | 
41 |     diff_state = np.array([theta_dot, alpha_dot, theta_dot_dot, alpha_dot_dot]).reshape(
42 |         (4,)
43 |     )
44 |     diff_state = np.array(diff_state, dtype="float64")
45 |     return diff_state
46 | 
47 | 
48 | def forward_model_ode(theta, alpha, theta_dot, alpha_dot, Vm, dt, integration_steps):
49 |     t = np.linspace(0.0, dt, 2)  # TODO: add and check integration steps here
50 | 
51 |     state = np.array([theta, alpha, theta_dot, alpha_dot])
52 |     next_state = np.array(odeint(diff_forward_model_ode, state, t, args=(Vm, dt)))[1, :]
53 |     theta, alpha, theta_dot, alpha_dot = next_state
54 | 
55 |     theta = ((theta + np.pi) % (2 * np.pi)) - np.pi
56 |     alpha = ((alpha + np.pi) % (2 * np.pi)) - np.pi
57 | 
58 |     return theta, alpha, theta_dot, alpha_dot
59 | 
60 | 
61 | def forward_model_euler(theta, alpha, theta_dot, alpha_dot, Vm, dt, integration_steps):
62 |     dt /= integration_steps
63 |     for step in range(integration_steps):
64 |         tau = -(km * (Vm - km * theta_dot)) / Rm  # torque
65 | 
66 |         # fmt: off
67 |         # From Rotary Pendulum Workbook
68 |         theta_dot_dot = float((-Lp*Lr*mp*(-8.0*Dp*alpha_dot + Lp**2*mp*theta_dot**2*np.sin(2.0*alpha) + 4.0*Lp*g*mp*np.sin(alpha))*np.cos(alpha) + (4.0*Jp + Lp**2*mp)*(4.0*Dr*theta_dot + Lp**2*alpha_dot*mp*theta_dot*np.sin(2.0*alpha) + 2.0*Lp*Lr*alpha_dot**2*mp*np.sin(alpha) - 4.0*tau))/(4.0*Lp**2*Lr**2*mp**2*np.cos(alpha)**2 - (4.0*Jp + Lp**2*mp)*(4.0*Jr + Lp**2*mp*np.sin(alpha)**2 + 4.0*Lr**2*mp)))
69 |         alpha_dot_dot = float((2.0*Lp*Lr*mp*(4.0*Dr*theta_dot + Lp**2*alpha_dot*mp*theta_dot*np.sin(2.0*alpha) + 2.0*Lp*Lr*alpha_dot**2*mp*np.sin(alpha) - 4.0*tau)*np.cos(alpha) - 0.5*(4.0*Jr + Lp**2*mp*np.sin(alpha)**2 + 4.0*Lr**2*mp)*(-8.0*Dp*alpha_dot + Lp**2*mp*theta_dot**2*np.sin(2.0*alpha) + 4.0*Lp*g*mp*np.sin(alpha)))/(4.0*Lp**2*Lr**2*mp**2*np.cos(alpha)**2 - (4.0*Jp + Lp**2*mp)*(4.0*Jr + Lp**2*mp*np.sin(alpha)**2 + 4.0*Lr**2*mp)))
70 |         # fmt: on
71 | 
72 |         if True:  # semi-implicit euler (more accurate)
73 |             theta_dot += theta_dot_dot * dt
74 |             alpha_dot += alpha_dot_dot * dt
75 |             theta += theta_dot * dt
76 |             alpha += alpha_dot * dt
77 |         else:  # classic euler
78 |             theta += theta_dot * dt
79 |             alpha += alpha_dot * dt
80 |             theta_dot += theta_dot_dot * dt
81 |             alpha_dot += alpha_dot_dot * dt
82 | 
83 |         theta = ((theta + np.pi) % (2 * np.pi)) - np.pi
84 |         alpha = ((alpha + np.pi) % (2 * np.pi)) - np.pi
85 | 
86 |     return theta, alpha, theta_dot, alpha_dot
87 | 


--------------------------------------------------------------------------------
/tests/lqr.py:
--------------------------------------------------------------------------------
  1 | try:
  2 |     import autograd.numpy as np
  3 |     from autograd import grad, jacobian
  4 | except:
  5 |     raise ImportError("Please install autograd.")
  6 | try:
  7 |     import scipy.linalg as sp_linalg
  8 | except:
  9 |     raise ImportError("Please install scipy.")
 10 | 
 11 | 
 12 | # Motor
 13 | Rm = 8.4  # Resistance
 14 | kt = 0.042  # Current-torque (N-m/A)
 15 | km = 0.042  # Back-emf constant (V-s/rad)
 16 | 
 17 | # Rotary Arm
 18 | mr = 0.095  # Mass (kg)
 19 | Lr = 0.085  # Total length (m)
 20 | Jr = mr * Lr ** 2 / 12  # Moment of inertia about pivot (kg-m^2)
 21 | Br = Dr = 0.0015  # Equivalent viscous damping coefficient (N-m-s/rad)
 22 | 
 23 | # Pendulum Link
 24 | mp = 0.024  # Mass (kg)
 25 | Lp = 0.129  # Total length (m)
 26 | Jp = mp * Lp ** 2 / 12  # Moment of inertia about pivot (kg-m^2)
 27 | Dp = Bp = 0.0005  # Equivalent viscous damping coefficient (N-m-s/rad)
 28 | 
 29 | g = 9.81  # Gravity constant
 30 | 
 31 | 
 32 | def forward_model(state, action, dt=1 / 300.0):
 33 |     theta = state[0]
 34 |     alpha = state[1]
 35 |     theta_dot = state[2]
 36 |     alpha_dot = state[3]
 37 |     Vm = action
 38 | 
 39 |     tau = (km * (Vm - km * theta_dot)) / Rm  # torque
 40 | 
 41 |     # fmt: off
 42 |     alpha_dot_dot = (2.0*Lp*Lr*mp*(4.0*Dr*theta_dot + Lp**2*alpha_dot*mp*theta_dot*np.sin(2.0*alpha) + 2.0*Lp*Lr*alpha_dot**2*mp*np.sin(alpha) - 4.0*(tau))*np.cos(alpha) - 0.5*(4.0*Jr + Lp**2*mp*np.sin(alpha)**2 + 4.0*Lr**2*mp)*(-8.0*Dp*alpha_dot + Lp**2*mp*theta_dot**2*np.sin(2.0*alpha) + 4.0*Lp*g*mp*np.sin(alpha)))/(4.0*Lp**2*Lr**2*mp**2*np.cos(alpha)**2 - (4.0*Jp + Lp**2*mp)*(4.0*Jr + Lp**2*mp*np.sin(alpha)**2 + 4.0*Lr**2*mp))
 43 |     theta_dot_dot = (-Lp*Lr*mp*(-8.0*Dp*alpha_dot + Lp**2*mp*theta_dot**2*np.sin(2.0*alpha) + 4.0*Lp*g*mp*np.sin(alpha))*np.cos(alpha) + (4.0*Jp + Lp**2*mp)*(4.0*Dr*theta_dot + Lp**2*alpha_dot*mp*theta_dot*np.sin(2.0*alpha) + 2.0*Lp*Lr*alpha_dot**2*mp*np.sin(alpha) - 4.0*(tau)))/(4.0*Lp**2*Lr**2*mp**2*np.cos(alpha)**2 - (4.0*Jp + Lp**2*mp)*(4.0*Jr + Lp**2*mp*np.sin(alpha)**2 + 4.0*Lr**2*mp))
 44 |     # fmt: on
 45 | 
 46 |     # Works around a single operating point
 47 |     theta += theta_dot * dt  # TODO: Shouldn't the angles be ahead of the velocities?
 48 |     alpha += alpha_dot * dt
 49 |     theta_dot += theta_dot_dot * dt
 50 |     alpha_dot += alpha_dot_dot * dt
 51 | 
 52 |     theta %= 2 * np.pi
 53 |     alpha %= 2 * np.pi
 54 | 
 55 |     # For continuous version of LQR
 56 |     # state = np.array([theta_dot,alpha_dot,theta_dot_dot,alpha_dot_dot]).reshape((4,))
 57 | 
 58 |     # For discrete version of LQR
 59 |     state = np.array([theta, alpha, theta_dot, alpha_dot]).reshape((4,))
 60 |     return state
 61 | 
 62 | 
 63 | def computeAB(current_state, current_control):
 64 |     # Linearizing Dynamics
 65 |     forward_dynamics_model = lambda state, action: forward_model(state, action)
 66 |     a_mat = jacobian(forward_dynamics_model, 0)
 67 |     b_mat = jacobian(forward_dynamics_model, 1)
 68 |     A = a_mat(current_state, current_control)
 69 |     B = b_mat(current_state, current_control)
 70 | 
 71 |     # Correct continuous time linearization from Quanser Workbook -
 72 |     # A = np.array( [[0,0,1.0000,0],[0,0,0 ,1.0000], [0,149.2751,-0.0104,0],[0,261.6091,-0.0103,0]]).reshape((4,4))
 73 |     # B = np.array([ 0,0,49.7275,49.1493]).reshape((4,1))
 74 | 
 75 |     return A, B
 76 | 
 77 | 
 78 | def LQR_control():
 79 |     # Cost matrices for LQR
 80 |     Q = np.diag(np.array([1, 1, 1, 1]))  # state_dimension = 4
 81 |     R = np.eye(1)  # control_dimension = 1
 82 | 
 83 |     A, B = computeAB(np.array([0.0, 0.0, 0.0, 0.0]), np.array([0.0]))
 84 | 
 85 |     # Use if discrete forward dynamics is used
 86 |     X = sp_linalg.solve_discrete_are(A, B, Q, R)
 87 |     K = np.dot(np.linalg.pinv(R + np.dot(B.T, np.dot(X, B))), np.dot(B.T, np.dot(X, A)))
 88 | 
 89 |     # Use for continuous version of LQR
 90 |     # X = sp_linalg.solve_continuous_are(A, B, Q, R)
 91 |     # K = np.dot(np.linalg.pinv(R), np.dot(B.T, X))
 92 |     return np.squeeze(K, 0)
 93 | 
 94 | 
 95 | def main():
 96 |     """
 97 |     K obtained from dicrete dynamics + discrete LQR and continuous dynamics + continuous LQR should approximately match
 98 |     quanser workbook and more importantly achieve balance on the Qube Hardware
 99 |     """
100 |     K_real = [-2.0, 35.0, -1.5, 3.0]  # Correct K from quanser workbook
101 |     K_calc = LQR_control().tolist()
102 |     print("The two following should be close to each other")
103 |     print("\tThe gains from Quanser are:", K_real)
104 |     print("\tThe calculated gains are:  ", K_calc)
105 | 
106 | 
107 | if __name__ == "__main__":
108 |     main()
109 | 


--------------------------------------------------------------------------------
/gym_brt/envs/rendering/qube_render.py:
--------------------------------------------------------------------------------
  1 | try:
  2 |     # If you use a device such as a headless raspberry pi this may not work
  3 |     from gym.envs.classic_control import rendering
  4 | except:
  5 |     print("Warning: Can not import classic_control rendering in qube_render.py")
  6 | 
  7 | import vpython as vp
  8 | import numpy as np
  9 | 
 10 | 
 11 | class QubeRendererVypthon:
 12 |     def __init__(self, theta, alpha, frequency):
 13 |         self.frequency = frequency
 14 |         vp.scene.width, vp.scene.height = 1000, 600
 15 |         vp.scene.range = 0.25
 16 |         vp.scene.title = "QubeServo2-USB rotary pendulum"
 17 | 
 18 |         # Dimensions of the rotary pendulum parts
 19 |         base_w, base_h, base_d = 0.102, 0.101, 0.102  # width, height, & len of base
 20 |         rotor_d, rotor_h = 0.0202, 0.01  # height, diameter of the rotor platform
 21 |         rotary_top_l, rotary_top_d = 0.032, 0.012  # height, diameter of the rotary top
 22 |         self.arm_l, arm_d = 0.085, 0.00325  # height, diameter of the arm
 23 |         self.pen_l, self.pen_d = 0.129, 0.00475  # height, diameter of the pendulum
 24 | 
 25 |         # Origin of parts
 26 |         arm_origin = vp.vec(0, 0, 0)
 27 |         self.rotary_top_origin = vp.vec(0, 0, -rotary_top_l / 2)
 28 |         rotor_origin = arm_origin - vp.vec(0, rotor_h + rotary_top_d / 2 - 0.0035, 0)
 29 |         base_origin = rotor_origin - vp.vec(0, base_h / 2, 0)
 30 | 
 31 |         # Create the part objects
 32 |         base = vp.box(
 33 |             pos=base_origin,
 34 |             size=vp.vec(base_w, base_h, base_d),
 35 |             color=vp.vec(0.45, 0.45, 0.45),
 36 |         )
 37 |         rotor = vp.cylinder(
 38 |             pos=rotor_origin,
 39 |             axis=vp.vec(0, 1, 0),
 40 |             size=vp.vec(rotor_h, rotor_d, rotor_d),
 41 |             color=vp.color.yellow,
 42 |         )
 43 |         self._rotary_top = vp.cylinder(
 44 |             pos=self.rotary_top_origin,
 45 |             axis=vp.vec(0, 0, 1),
 46 |             size=vp.vec(rotary_top_l, rotary_top_d, rotary_top_d),
 47 |             color=vp.color.red,
 48 |         )
 49 |         self._arm = vp.cylinder(
 50 |             pos=arm_origin,
 51 |             axis=vp.vec(0, 0, 1),
 52 |             size=vp.vec(self.arm_l, arm_d, arm_d),
 53 |             color=vp.vec(0.7, 0.7, 0.7),
 54 |         )
 55 |         self._pendulum = vp.cylinder(
 56 |             pos=self.pendulum_origin(theta),
 57 |             axis=vp.vec(0, 1, 0),
 58 |             size=vp.vec(self.pen_l, self.pen_d, self.pen_d),
 59 |             color=vp.color.red,
 60 |         )
 61 | 
 62 |         # Rotate parts to their init orientations
 63 |         self._rotary_top.rotate(angle=theta, axis=vp.vec(0, 1, 0), origin=arm_origin)
 64 |         self._arm.rotate(angle=theta, axis=vp.vec(0, 1, 0), origin=arm_origin)
 65 |         self._pendulum.rotate(
 66 |             angle=alpha,
 67 |             axis=self.pendulum_axis(theta),
 68 |             origin=self.pendulum_origin(theta),
 69 |         )
 70 |         self.theta, self.alpha = theta, alpha
 71 | 
 72 |     def pendulum_origin(self, theta):
 73 |         x = self.arm_l * np.sin(theta)
 74 |         y = 0
 75 |         z = self.arm_l * np.cos(theta)
 76 |         return vp.vec(x, y, z)
 77 | 
 78 |     def pendulum_axis(self, theta):
 79 |         x = np.sin(theta)
 80 |         y = 0
 81 |         z = np.cos(theta)
 82 |         return vp.vec(x, y, z)
 83 | 
 84 |     def render(self, theta, alpha):
 85 |         dtheta = theta - self.theta
 86 |         self.theta = theta
 87 | 
 88 |         self._arm.rotate(angle=dtheta, axis=vp.vec(0, 1, 0), origin=vp.vec(0, 0, 0))
 89 |         self._rotary_top.rotate(
 90 |             angle=dtheta, axis=vp.vec(0, 1, 0), origin=vp.vec(0, 0, 0)
 91 |         )
 92 |         self._pendulum.origin = self.pendulum_origin(theta)
 93 |         self._pendulum.pos = self.pendulum_origin(theta)
 94 |         self._pendulum.axis = vp.vec(0, 1, 0)
 95 |         self._pendulum.size = vp.vec(self.pen_l, self.pen_d, self.pen_d)
 96 |         self._pendulum.rotate(
 97 |             angle=alpha,
 98 |             axis=self.pendulum_axis(theta),
 99 |             origin=self.pendulum_origin(theta),
100 |         )
101 |         vp.rate(self.frequency)
102 | 
103 |     def close(self, *args, **kwargs):
104 |         pass
105 | 
106 | 
107 | class QubeRenderer2D:
108 |     def __init__(self, theta, alpha, frequency):
109 |         width, height = (640, 240)
110 |         self._viewer = rendering.Viewer(width, height)
111 |         l, r, t, b = (2, -2, 0, 100)
112 |         theta_poly = rendering.make_polygon([(l, b), (l, t), (r, t), (r, b)])
113 |         l, r, t, b = (2, -2, 0, 100)
114 |         alpha_poly = rendering.make_polygon([(l, b), (l, t), (r, t), (r, b)])
115 |         theta_circle = rendering.make_circle(radius=100, res=64, filled=False)
116 |         theta_circle.set_color(0.5, 0.5, 0.5)  # Theta is grey
117 |         alpha_circle = rendering.make_circle(radius=100, res=64, filled=False)
118 |         alpha_circle.set_color(0.8, 0.0, 0.0)  # Alpha is red
119 |         theta_origin = (width / 2 - 150, height / 2)
120 |         alpha_origin = (width / 2 + 150, height / 2)
121 |         self._theta_tx = rendering.Transform(translation=theta_origin)
122 |         self._alpha_tx = rendering.Transform(translation=alpha_origin)
123 |         theta_poly.add_attr(self._theta_tx)
124 |         alpha_poly.add_attr(self._alpha_tx)
125 |         theta_circle.add_attr(self._theta_tx)
126 |         alpha_circle.add_attr(self._alpha_tx)
127 |         self._viewer.add_geom(theta_poly)
128 |         self._viewer.add_geom(alpha_poly)
129 |         self._viewer.add_geom(theta_circle)
130 |         self._viewer.add_geom(alpha_circle)
131 | 
132 |     def render(self, theta, alpha):
133 |         self._theta_tx.set_rotation(self._theta + np.pi)
134 |         self._alpha_tx.set_rotation(self._alpha)
135 |         return self._viewer.render(return_rgb_array=mode == "rgb_array")
136 | 
137 |     def close(self):
138 |         self._viewer.close()
139 | 


--------------------------------------------------------------------------------
/gym_brt/envs/qube_base_env.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import print_function
  3 | from __future__ import division
  4 | 
  5 | import gym
  6 | import time
  7 | import math
  8 | import numpy as np
  9 | 
 10 | from gym import spaces
 11 | from gym.utils import seeding
 12 | 
 13 | # For other platforms where it's impossible to install the HIL SDK
 14 | try:
 15 |     from gym_brt.quanser import QubeHardware
 16 | except ImportError:
 17 |     print("Warning: Can not import QubeHardware in qube_base_env.py")
 18 | 
 19 | from gym_brt.quanser import QubeSimulator
 20 | from gym_brt.envs.rendering import QubeRenderer
 21 | 
 22 | 
 23 | MAX_MOTOR_VOLTAGE = 3
 24 | ACT_MAX = np.asarray([MAX_MOTOR_VOLTAGE], dtype=np.float64)
 25 | # OBS_MAX = [theta, alpha, theta_dot, alpha_dot]
 26 | OBS_MAX = np.asarray([np.pi / 2, np.pi, np.inf, np.inf], dtype=np.float64)
 27 | 
 28 | 
 29 | class QubeBaseEnv(gym.Env):
 30 |     """A base class for all qube-based environments."""
 31 | 
 32 |     def __init__(
 33 |         self,
 34 |         frequency=250,
 35 |         batch_size=2048,
 36 |         use_simulator=False,
 37 |         encoder_reset_steps=int(1e8),
 38 |     ):
 39 |         self.observation_space = spaces.Box(-OBS_MAX, OBS_MAX)
 40 |         self.action_space = spaces.Box(-ACT_MAX, ACT_MAX)
 41 | 
 42 |         self._frequency = frequency
 43 |         # Ensures that samples in episode are the same as batch size
 44 |         # Reset every batch_size steps (2048 ~= 8.192 seconds)
 45 |         self._max_episode_steps = batch_size
 46 |         self._episode_steps = 0
 47 |         self._encoder_reset_steps = encoder_reset_steps
 48 |         self._steps_since_encoder_reset = 0
 49 |         self._target_angle = 0
 50 | 
 51 |         self._theta, self._alpha, self._theta_dot, self._alpha_dot = 0, 0, 0, 0
 52 |         self._dtheta, self._dalpha = 0, 0
 53 | 
 54 |         # Open the Qube
 55 |         if use_simulator:
 56 |             # TODO: Check assumption: ODE integration should be ~ once per ms
 57 |             integration_steps = int(np.ceil(1000 / self._frequency))
 58 |             self.qube = QubeSimulator(
 59 |                 forward_model="ode",
 60 |                 frequency=self._frequency,
 61 |                 integration_steps=integration_steps,
 62 |                 max_voltage=MAX_MOTOR_VOLTAGE,
 63 |             )
 64 |         else:
 65 |             self.qube = QubeHardware(
 66 |                 frequency=self._frequency, max_voltage=MAX_MOTOR_VOLTAGE
 67 |             )
 68 |         self.qube.__enter__()
 69 | 
 70 |         self.seed()
 71 |         self._viewer = None
 72 | 
 73 |     def __enter__(self):
 74 |         return self
 75 | 
 76 |     def __exit__(self, type, value, traceback):
 77 |         self.close(type=type, value=value, traceback=traceback)
 78 | 
 79 |     def seed(self, seed=None):
 80 |         self.np_random, seed = seeding.np_random(seed)
 81 |         return [seed]
 82 | 
 83 |     def _step(self, action):
 84 |         led = self._led()
 85 | 
 86 |         action = np.clip(np.array(action, dtype=np.float64), -ACT_MAX, ACT_MAX)
 87 |         state = self.qube.step(action, led=led)
 88 | 
 89 |         self._dtheta = state[0] - self._theta
 90 |         self._dalpha = state[1] - self._alpha
 91 |         self._theta, self._alpha, self._theta_dot, self._alpha_dot = state
 92 | 
 93 |     def reset(self):
 94 |         self._episode_steps = 0
 95 |         # Occasionaly reset the enocoders to remove sensor drift
 96 |         if self._steps_since_encoder_reset >= self._encoder_reset_steps:
 97 |             self.qube.reset_encoders()
 98 |             self._steps_since_encoder_reset = 0
 99 |         action = np.zeros(shape=self.action_space.shape, dtype=self.action_space.dtype)
100 |         self._step(action)
101 |         return self._get_state()
102 | 
103 |     def _reset_up(self):
104 |         self.qube.reset_up()
105 |         action = np.zeros(shape=self.action_space.shape, dtype=self.action_space.dtype)
106 |         self._step(action)
107 |         return self._get_state()
108 | 
109 |     def _reset_down(self):
110 |         self.qube.reset_down()
111 |         action = np.zeros(shape=self.action_space.shape, dtype=self.action_space.dtype)
112 |         self._step(action)
113 |         return self._get_state()
114 | 
115 |     def _get_state(self):
116 |         return np.array(
117 |             [self._theta, self._alpha, self._theta_dot, self._alpha_dot],
118 |             dtype=np.float64,
119 |         )
120 | 
121 |     def _next_target_angle(self):
122 |         return 0
123 | 
124 |     def _reward(self):
125 |         raise NotImplementedError
126 | 
127 |     def _isdone(self):
128 |         raise NotImplementedError
129 | 
130 |     def _led(self):
131 |         if self._isdone():  # Doing reset
132 |             led = [1.0, 1.0, 0.0]  # Yellow
133 |         else:
134 |             if abs(self._alpha) > (20 * np.pi / 180):
135 |                 led = [1.0, 0.0, 0.0]  # Red
136 |             elif abs(self._theta) > (90 * np.pi / 180):
137 |                 led = [1.0, 0.0, 0.0]  # Red
138 |             else:
139 |                 led = [0.0, 1.0, 0.0]  # Green
140 |         return led
141 | 
142 |     def step(self, action):
143 |         self._step(action)
144 |         state = self._get_state()
145 |         reward = self._reward()
146 |         done = self._isdone()
147 |         info = {
148 |             "theta": self._theta,
149 |             "alpha": self._alpha,
150 |             "theta_dot": self._theta_dot,
151 |             "alpha_dot": self._alpha_dot,
152 |         }
153 | 
154 |         self._episode_steps += 1
155 |         self._steps_since_encoder_reset += 1
156 |         self._target_angle = self._next_target_angle()
157 | 
158 |         return state, reward, done, info
159 | 
160 |     def render(self, mode="human"):
161 |         if self._viewer is None:
162 |             self._viewer = QubeRenderer(self._theta, self._alpha, self._frequency)
163 |         self._viewer.render(self._theta, self._alpha)
164 | 
165 |     def close(self, type=None, value=None, traceback=None):
166 |         # Safely close the Qube (important on hardware)
167 |         self.qube.close(type=type, value=value, traceback=traceback)
168 |         if self._viewer is not None:
169 |             self._viewer.close()
170 | 


--------------------------------------------------------------------------------
/tests/test.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import print_function
  3 | from __future__ import division
  4 | 
  5 | import gym
  6 | import time
  7 | import argparse
  8 | import numpy as np
  9 | 
 10 | from gym_brt.envs import (
 11 |     QubeSwingupEnv,
 12 |     QubeSwingupSparseEnv,
 13 |     QubeSwingupFollowEnv,
 14 |     QubeSwingupFollowSparseEnv,
 15 |     QubeBalanceEnv,
 16 |     QubeBalanceSparseEnv,
 17 |     QubeBalanceFollowEnv,
 18 |     QubeBalanceFollowSparseEnv,
 19 |     QubeDampenEnv,
 20 |     QubeDampenSparseEnv,
 21 |     QubeDampenFollowEnv,
 22 |     QubeDampenFollowSparseEnv,
 23 |     QubeRotorEnv,
 24 |     QubeRotorFollowEnv,
 25 |     QubeBalanceFollowSineWaveEnv,
 26 |     QubeSwingupFollowSineWaveEnv,
 27 |     QubeRotorFollowSineWaveEnv,
 28 |     QubeDampenFollowSineWaveEnv,
 29 | )
 30 | 
 31 | from gym_brt.control import (
 32 |     zero_policy,
 33 |     constant_policy,
 34 |     random_policy,
 35 |     square_wave_policy,
 36 |     energy_control_policy,
 37 |     pd_control_policy,
 38 |     flip_and_hold_policy,
 39 |     square_wave_flip_and_hold_policy,
 40 |     dampen_policy,
 41 |     pd_tracking_control_policy,
 42 | )
 43 | 
 44 | 
 45 | def print_info(state_info, action, reward):
 46 |     theta = state_info["theta"]
 47 |     alpha = state_info["alpha"]
 48 |     theta_dot = state_info["theta_dot"]
 49 |     alpha_dot = state_info["alpha_dot"]
 50 |     print(
 51 |         "State: theta={:06.3f}, alpha={:06.3f}, theta_dot={:06.3f}, alpha_dot={:06.3f}".format(
 52 |             theta, alpha, theta_dot, alpha_dot
 53 |         )
 54 |     )
 55 |     print("Action={}, Reward={}".format(action, reward))
 56 | 
 57 | 
 58 | def test_env(
 59 |     env_name,
 60 |     policy,
 61 |     num_episodes=10,
 62 |     num_steps=250,
 63 |     frequency=250,
 64 |     state_keys=None,
 65 |     verbose=False,
 66 |     use_simulator=False,
 67 |     render=False,
 68 | ):
 69 | 
 70 |     with env_name(use_simulator=use_simulator, frequency=frequency) as env:
 71 |         for episode in range(num_episodes):
 72 |             state = env.reset()
 73 |             state, reward, done, info = env.step(np.array([0], dtype=np.float64))
 74 |             for step in range(num_steps):
 75 |                 action = policy(state, step=step, frequency=frequency)
 76 |                 state, reward, done, info = env.step(action)
 77 |                 if done:
 78 |                     break
 79 |                 if verbose:
 80 |                     print_info(info, action, reward)
 81 |                 if render:
 82 |                     env.render()
 83 | 
 84 | 
 85 | def main():
 86 |     envs = {
 87 |         "QubeSwingupEnv": QubeSwingupEnv,
 88 |         "QubeSwingupSparseEnv": QubeSwingupSparseEnv,
 89 |         "QubeSwingupFollowEnv": QubeSwingupFollowEnv,
 90 |         "QubeSwingupFollowSparseEnv": QubeSwingupFollowSparseEnv,
 91 |         "QubeBalanceEnv": QubeBalanceEnv,
 92 |         "QubeBalanceSparseEnv": QubeBalanceSparseEnv,
 93 |         "QubeBalanceFollowEnv": QubeBalanceFollowEnv,
 94 |         "QubeBalanceFollowSparseEnv": QubeBalanceFollowSparseEnv,
 95 |         "QubeDampenEnv": QubeDampenEnv,
 96 |         "QubeDampenSparseEnv": QubeDampenSparseEnv,
 97 |         "QubeDampenFollowEnv": QubeDampenFollowEnv,
 98 |         "QubeDampenFollowSparseEnv": QubeDampenFollowSparseEnv,
 99 |         "QubeRotorEnv": QubeRotorEnv,
100 |         "QubeRotorFollowEnv": QubeRotorFollowEnv,
101 |         "QubeBalanceFollowSineWaveEnv": QubeBalanceFollowSineWaveEnv,
102 |         "QubeSwingupFollowSineWaveEnv": QubeSwingupFollowSineWaveEnv,
103 |         "QubeRotorFollowSineWaveEnv": QubeRotorFollowSineWaveEnv,
104 |         "QubeDampenFollowSineWaveEnv": QubeDampenFollowSineWaveEnv,
105 |     }
106 |     policies = {
107 |         "none": zero_policy,
108 |         "zero": zero_policy,
109 |         "const": constant_policy,
110 |         "rand": random_policy,
111 |         "random": random_policy,
112 |         "sw": square_wave_policy,
113 |         "energy": energy_control_policy,
114 |         "pd": pd_control_policy,
115 |         "hold": pd_control_policy,
116 |         "flip": flip_and_hold_policy,
117 |         "sw-hold": square_wave_flip_and_hold_policy,
118 |         "damp": dampen_policy,
119 |         "track": pd_tracking_control_policy,
120 |     }
121 | 
122 |     # Parse command line args
123 |     parser = argparse.ArgumentParser()
124 |     parser.add_argument(
125 |         "-e",
126 |         "--env",
127 |         default="QubeSwingupEnv",
128 |         choices=list(envs.keys()),
129 |         help="Enviroment to run.",
130 |     )
131 |     parser.add_argument(
132 |         "-c",
133 |         "--controller",
134 |         default="random",
135 |         choices=list(policies.keys()),
136 |         help="Select what type of action to take.",
137 |     )
138 |     parser.add_argument(
139 |         "-ne",
140 |         "--num-episodes",
141 |         default="10",
142 |         type=int,
143 |         help="Number of episodes to run.",
144 |     )
145 |     parser.add_argument(
146 |         "-ns",
147 |         "--num-steps",
148 |         default="10000",
149 |         type=int,
150 |         help="Number of step to run per episode.",
151 |     )
152 |     parser.add_argument(
153 |         "-f",
154 |         "--frequency",
155 |         "--sample-frequency",
156 |         default="250",
157 |         type=float,
158 |         help="The frequency of samples on the Quanser hardware.",
159 |     )
160 |     parser.add_argument("-v", "--verbose", action="store_true")
161 |     parser.add_argument("-r", "--render", action="store_true")
162 |     parser.add_argument("-s", "--use_simulator", action="store_true")
163 |     args, _ = parser.parse_known_args()
164 | 
165 |     print("Testing Env:  {}".format(args.env))
166 |     print("Controller:   {}".format(args.controller))
167 |     print("{} steps over {} episodes".format(args.num_steps, args.num_episodes))
168 |     print("Samples freq: {}".format(args.frequency))
169 |     test_env(
170 |         envs[args.env],
171 |         policies[args.controller],
172 |         num_episodes=args.num_episodes,
173 |         num_steps=args.num_steps,
174 |         frequency=args.frequency,
175 |         verbose=args.verbose,
176 |         use_simulator=args.use_simulator,
177 |         render=args.render,
178 |     )
179 | 
180 | 
181 | if __name__ == "__main__":
182 |     main()
183 | 


--------------------------------------------------------------------------------
/gym_brt/control/control.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import print_function
  3 | from __future__ import division
  4 | 
  5 | import numpy as np
  6 | 
  7 | 
  8 | def _convert_state(state):
  9 |     state = np.asarray(state)
 10 |     if state.shape == (4,):
 11 |         return state
 12 |     if state.shape == (5,):
 13 |         return state
 14 |     elif state.shape == (6,):
 15 |         # Get the angles
 16 |         theta_x, theta_y = state[0], state[1]
 17 |         alpha_x, alpha_y = state[2], state[3]
 18 |         theta = np.arctan2(theta_y, theta_x)
 19 |         alpha = np.arctan2(alpha_y, alpha_x)
 20 |         theta_dot, alpha_dot = state[4], state[5]
 21 |         return theta, alpha, theta_dot, alpha_dot
 22 |     else:
 23 |         raise ValueError(
 24 |             "State shape was expected to be one of: (4,1) or (6,1), found: {}".format(
 25 |                 state.shape
 26 |             )
 27 |         )
 28 | 
 29 | 
 30 | # No input
 31 | def zero_policy(state, **kwargs):
 32 |     return np.array([0.0])
 33 | 
 34 | 
 35 | # Constant input
 36 | def constant_policy(state, **kwargs):
 37 |     return np.array([3.0])
 38 | 
 39 | 
 40 | # Rand input
 41 | def random_policy(state, **kwargs):
 42 |     return np.asarray([np.random.randn()])
 43 | 
 44 | 
 45 | # Square wave, switch every 85 ms
 46 | def square_wave_policy(state, step, frequency=250, **kwargs):
 47 |     steps_until_85ms = int(85 * (frequency / 300))
 48 |     state = _convert_state(state)
 49 |     # Switch between positive and negative every 85 ms
 50 |     mod_170ms = step % (2 * steps_until_85ms)
 51 |     if mod_170ms < steps_until_85ms:
 52 |         action = 3.0
 53 |     else:
 54 |         action = -3.0
 55 |     return np.array([action])
 56 | 
 57 | 
 58 | # Flip policy
 59 | def energy_control_policy(state, **kwargs):
 60 |     state = _convert_state(state)
 61 |     # Run energy-based control to flip up the pendulum
 62 |     theta, alpha, theta_dot, alpha_dot = state
 63 |     # alpha_dot += alpha_dot + 1e-15
 64 | 
 65 |     """Implements a energy based swing-up controller"""
 66 |     mu = 50.0  # in m/s/J
 67 |     ref_energy = 30.0 / 1000.0  # Er in joules
 68 | 
 69 |     max_u = 6  # Max action is 6m/s^2
 70 | 
 71 |     # System parameters
 72 |     jp = 3.3282e-5
 73 |     lp = 0.129
 74 |     lr = 0.085
 75 |     mp = 0.024
 76 |     mr = 0.095
 77 |     rm = 8.4
 78 |     g = 9.81
 79 |     kt = 0.042
 80 | 
 81 |     pend_torque = (1 / 2) * mp * g * lp * (1 + np.cos(alpha))
 82 |     energy = pend_torque + (jp / 2.0) * alpha_dot * alpha_dot
 83 | 
 84 |     u = mu * (energy - ref_energy) * np.sign(-1 * np.cos(alpha) * alpha_dot)
 85 |     u = np.clip(u, -max_u, max_u)
 86 | 
 87 |     torque = (mr * lr) * u
 88 |     voltage = (rm / kt) * torque
 89 |     return np.array([-voltage])
 90 | 
 91 | 
 92 | # Hold policy
 93 | def pd_control_policy(state, **kwargs):
 94 |     state = _convert_state(state)
 95 |     theta, alpha, theta_dot, alpha_dot = state
 96 |     # multiply by proportional and derivative gains
 97 |     kp_theta = -2.0
 98 |     kp_alpha = 35.0
 99 |     kd_theta = -1.5
100 |     kd_alpha = 3.0
101 | 
102 |     # If pendulum is within 20 degrees of upright, enable balance control, else zero
103 |     if np.abs(alpha) <= (20.0 * np.pi / 180.0):
104 |         action = (
105 |             theta * kp_theta
106 |             + alpha * kp_alpha
107 |             + theta_dot * kd_theta
108 |             + alpha_dot * kd_alpha
109 |         )
110 |     else:
111 |         action = 0.0
112 |     action = np.clip(action, -3.0, 3.0)
113 |     return np.array([action])
114 | 
115 | 
116 | # Flip and Hold
117 | def flip_and_hold_policy(state, **kwargs):
118 |     state = _convert_state(state)
119 |     theta, alpha, theta_dot, alpha_dot = state
120 | 
121 |     # If pendulum is within 20 degrees of upright, enable balance control
122 |     if np.abs(alpha) <= (20.0 * np.pi / 180.0):
123 |         action = pd_control_policy(state)
124 |     else:
125 |         action = energy_control_policy(state)
126 |     return action
127 | 
128 | 
129 | # Square wave instead of energy controller flip and hold
130 | def square_wave_flip_and_hold_policy(state, **kwargs):
131 |     state = _convert_state(state)
132 |     theta, alpha, theta_dot, alpha_dot = state
133 | 
134 |     # If pendulum is within 20 degrees of upright, enable balance control
135 |     if np.abs(alpha) <= (20.0 * np.pi / 180.0):
136 |         action = pd_control_policy(state)
137 |     else:
138 |         action = square_wave_policy(state, **kwargs)
139 |     return action
140 | 
141 | 
142 | def dampen_policy(state, **kwargs):
143 |     state = _convert_state(state)
144 |     theta, alpha, theta_dot, alpha_dot = state
145 | 
146 |     # Alt alpha angle: -pi to +pi, where 0 is the pendulum facing down (at rest)
147 |     alt_alpha = (alpha + 2 * np.pi) % (2 * np.pi) - np.pi
148 |     if np.abs(alt_alpha) < (20.0 * np.pi / 180.0) and np.abs(theta) < (np.pi / 4):
149 |         kp_theta = 0.9
150 |         kp_alpha = -3.6
151 |         kd_theta = 0.7
152 |         kd_alpha = -0.285
153 |         if alpha >= 0:
154 |             action = (
155 |                 theta * kp_theta
156 |                 + (alpha - np.pi) * kp_alpha
157 |                 + theta_dot * kd_theta
158 |                 + alpha_dot * kd_alpha
159 |             )
160 |         else:
161 |             action = (
162 |                 theta * kp_theta
163 |                 + (alpha + np.pi) * kp_alpha
164 |                 + theta_dot * kd_theta
165 |                 + alpha_dot * kd_alpha
166 |             )
167 |     else:
168 |         action = 0
169 | 
170 |     action = np.clip(action, -3.0, 3.0)
171 |     return np.array([action], dtype=np.float64)
172 | 
173 | 
174 | # Hold policy
175 | def pd_tracking_control_policy(state, **kwargs):
176 |     state = _convert_state(state)
177 |     theta, alpha, theta_dot, alpha_dot, theta_target = state
178 |     # multiply by proportional and derivative gains
179 |     kp_theta = -2.0
180 |     kp_alpha = 35.0
181 |     kd_theta = -1.5
182 |     kd_alpha = 3.0
183 | 
184 |     # If pendulum is within 20 degrees of upright, enable balance control, else zero
185 |     if np.abs(alpha) <= (20.0 * np.pi / 180.0):
186 |         action = (
187 |             (theta - theta_target) * kp_theta
188 |             + alpha * kp_alpha
189 |             + theta_dot * kd_theta
190 |             + alpha_dot * kd_alpha
191 |         )
192 |     else:
193 |         action = 0.0
194 |     action = np.clip(action, -3.0, 3.0)
195 |     return np.array([action])
196 | 


--------------------------------------------------------------------------------
/gym_brt/quanser/quanser_wrapper/hil.pxd:
--------------------------------------------------------------------------------
  1 | include "quanser_types.pxd"
  2 | 
  3 | cdef extern from "/opt/quanser/hil_sdk/include/hil.h":
  4 |     ctypedef struct t_card:
  5 |         pass
  6 |     ctypedef struct t_task:
  7 |         pass
  8 |     ctypedef struct t_monitor:
  9 |         pass
 10 |     cdef enum t_clock:
 11 |         SYSTEM_CLOCK_4 = -4
 12 |         SYSTEM_CLOCK_3 = -3
 13 |         SYSTEM_CLOCK_2 = -2
 14 |         SYSTEM_CLOCK_1 = -1
 15 |         HARDWARE_CLOCK_0 = 0
 16 |         HARDWARE_CLOCK_1 = 1
 17 |         HARDWARE_CLOCK_2 = 2
 18 |         HARDWARE_CLOCK_3 = 3
 19 |         HARDWARE_CLOCK_4 = 4
 20 |         HARDWARE_CLOCK_5 = 5
 21 |         HARDWARE_CLOCK_6 = 6
 22 |         HARDWARE_CLOCK_7 = 7
 23 |         HARDWARE_CLOCK_8 = 8
 24 |         HARDWARE_CLOCK_9 = 9
 25 |         HARDWARE_CLOCK_10 = 10
 26 |         HARDWARE_CLOCK_11 = 11
 27 |         HARDWARE_CLOCK_12 = 12
 28 |         HARDWARE_CLOCK_13 = 13
 29 |         HARDWARE_CLOCK_14 = 14
 30 |         HARDWARE_CLOCK_15 = 15
 31 |         HARDWARE_CLOCK_16 = 16
 32 |         HARDWARE_CLOCK_17 = 17
 33 |         HARDWARE_CLOCK_18 = 18
 34 |         HARDWARE_CLOCK_19 = 19
 35 |     cdef enum t_buffer_overflow_mode:
 36 |         BUFFER_MODE_ERROR_ON_OVERFLOW,  # return an error on buffer overflow (default).
 37 |         BUFFER_MODE_OVERWRITE_ON_OVERFLOW,  # overwrite old samples on buffer overflow.
 38 |         BUFFER_MODE_DISCARD_ON_OVERFLOW  # discard new samples on buffer overflow.
 39 | 
 40 |     t_error hil_open(
 41 |         const char * card_type,
 42 |         const char * card_identifier,
 43 |         t_card * card
 44 |     )
 45 |     t_error hil_close(t_card card)
 46 | 
 47 |     # For debugging
 48 |     t_error hil_close_all() # Sometimes the board doesn't safely close
 49 | 
 50 |     # Synchronous reads/writes
 51 |     t_error hil_set_encoder_counts(
 52 |         t_card card,
 53 |         const t_uint32 encoder_channels[],
 54 |         t_uint32 num_channels,
 55 |         const t_int32 buffer[]
 56 |     )
 57 |     t_error hil_write_analog(
 58 |         t_card card,
 59 |         const t_uint32 analog_channels[],
 60 |         t_uint32 num_channels,
 61 |         const t_double buffer[]
 62 |     )
 63 |     t_error hil_write_digital(
 64 |         t_card card,
 65 |         const t_uint32 digital_lines[],
 66 |         t_uint32 num_lines,
 67 |         const t_boolean buffer[]
 68 |     )
 69 |     t_error hil_write_other(
 70 |         t_card card,
 71 |         const t_uint32 other_channels[],
 72 |         t_uint32 num_channels,
 73 |         const t_double buffer[]
 74 |     )
 75 | 
 76 |     t_error hil_read_analog(
 77 |         t_card card,
 78 |         const t_uint32 analog_channels[],
 79 |         t_uint32 num_channels,
 80 |         t_double buffer[]
 81 |     )
 82 |     t_error hil_read_encoder(
 83 |         t_card card,
 84 |         const t_uint32 encoder_channels[],
 85 |         t_uint32 num_channels,
 86 |         t_int32 buffer[]
 87 |     )
 88 |     t_error hil_read_digital(
 89 |         t_card card,
 90 |         const t_uint32 digital_lines[],
 91 |         t_uint32 num_lines,
 92 |         t_boolean buffer[]
 93 |     )
 94 |     t_error hil_read_other(
 95 |         t_card card,
 96 |         const t_uint32 other_channels[],
 97 |         t_uint32 num_channels,
 98 |         t_double buffer[]
 99 |     )
100 |     t_error hil_read(
101 |         t_card card,
102 |         const t_uint32 analog_channels[], t_uint32 num_analog_channels,
103 |         const t_uint32 encoder_channels[], t_uint32 num_encoder_channels,
104 |         const t_uint32 digital_lines[], t_uint32 num_digital_lines,
105 |         const t_uint32 other_channels[], t_uint32 num_other_channels,
106 |         t_double analog_buffer[],
107 |         t_int32 encoder_buffer[],
108 |         t_boolean digital_buffer[],
109 |         t_double other_buffer[]
110 |     )
111 | 
112 |     # Async
113 |     t_error hil_task_create_reader_writer(
114 |         t_card card, t_uint32 samples_in_buffer,
115 |         const t_uint32 analog_input_channels[],
116 |         t_uint32 num_analog_input_channels,
117 |         const t_uint32 encoder_input_channels[],
118 |         t_uint32 num_encoder_input_channels,
119 |         const t_uint32 digital_input_lines[],
120 |         t_uint32 num_digital_input_lines,
121 |         const t_uint32 other_input_channels[],
122 |         t_uint32 num_other_input_channels,
123 | 
124 |         const t_uint32 analog_output_channels[],
125 |         t_uint32 num_analog_output_channels,
126 |         const t_uint32 pwm_output_channels[],
127 |         t_uint32 num_pwm_output_channels,
128 |         const t_uint32 digital_output_lines[],
129 |         t_uint32 num_digital_output_lines,
130 |         const t_uint32 other_output_channels[],
131 |         t_uint32 num_other_output_channels,
132 |         t_task *task
133 |     )
134 |     t_error hil_task_create_reader(
135 |         t_card card, t_uint32 samples_in_buffer,
136 |         const t_uint32 analog_channels[],
137 |         t_uint32 num_analog_channels,
138 |         const t_uint32 encoder_channels[],
139 |         t_uint32 num_encoder_channels,
140 |         const t_uint32 digital_lines[],
141 |         t_uint32 num_digital_lines,
142 |         const t_uint32 other_channels[],
143 |         t_uint32 num_other_channels,
144 |         t_task *task
145 |     )
146 | 
147 |     t_error hil_task_start(
148 |         t_task task,
149 |         t_clock clock,
150 |         t_double frequency,
151 |         t_uint32 num_samples
152 |     )
153 |     t_error hil_task_flush(t_task task)
154 |     t_error hil_task_stop(t_task task)
155 |     t_error hil_task_delete(t_task task)
156 | 
157 |     t_error hil_task_read_write(
158 |         t_task task, t_uint32 num_samples,
159 |         t_double analog_input_buffer[],
160 |         t_int32 encoder_input_buffer[],
161 |         t_boolean digital_input_buffer[],
162 |         t_double other_input_buffer[],
163 | 
164 |         const t_double analog_output_buffer[],
165 |         const t_double pwm_output_buffer[],
166 |         const t_boolean digital_output_buffer[],
167 |         const t_double other_output_buffer[]
168 |     )
169 |     t_error hil_task_write(
170 |         t_task task, t_uint32 num_samples,
171 |         const t_double analog_buffer[],
172 |         const t_double pwm_buffer[],
173 |         const t_boolean digital_buffer[],
174 |         const t_double other_buffer[]
175 |     )
176 |     t_error hil_task_read(
177 |         t_task task, t_uint32 num_samples,
178 |         t_double analog_buffer[],
179 |         t_int32 encoder_buffer[],
180 |         t_boolean digital_buffer[],
181 |         t_double other_buffer[]
182 |     )
183 | 
184 |     # Allow buffer overflow
185 |     t_error hil_task_set_buffer_overflow_mode(
186 |         t_task task,
187 |         t_buffer_overflow_mode mode)
188 |     t_int hil_task_get_buffer_overflows(t_task task)
189 | 


--------------------------------------------------------------------------------
/gym_brt/quanser/qube_interfaces.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import print_function
  3 | from __future__ import division
  4 | 
  5 | import gym
  6 | import time
  7 | import math
  8 | import numpy as np
  9 | 
 10 | from gym import spaces
 11 | from gym.utils import seeding
 12 | 
 13 | from gym_brt.control import flip_and_hold_policy, dampen_policy
 14 | 
 15 | # For other platforms where it's impossible to install the HIL SDK
 16 | try:
 17 |     from gym_brt.quanser.quanser_wrapper.quanser_wrapper import QubeServo2
 18 | except ImportError:
 19 |     print("Warning: Can not import QubeServo2 in qube_interface.py")
 20 | 
 21 | from gym_brt.quanser.qube_simulator import forward_model_euler, forward_model_ode
 22 | 
 23 | 
 24 | class QubeHardware(object):
 25 |     """Simplify the interface between the environment and """
 26 | 
 27 |     def __init__(self, frequency=250, max_voltage=18.0):
 28 |         self._theta_dot_cstate = 0
 29 |         self._alpha_dot_cstate = 0
 30 |         self._frequency = frequency
 31 | 
 32 |         # Open the Qube
 33 |         self.qube = QubeServo2(frequency=frequency)  # TODO: max_voltage=max_voltage
 34 |         self.qube.__enter__()
 35 | 
 36 |         self.state = np.array([0, 0, 0, 0], dtype=np.float64)
 37 | 
 38 |     def __enter__(self):
 39 |         return self
 40 | 
 41 |     def __exit__(self, type, value, traceback):
 42 |         self.close()
 43 | 
 44 |     def _match_state(s):
 45 |         theta, alpha, theta_dot, alpha_dot = s
 46 |         return np.array([])
 47 | 
 48 |     def step(self, action, led=None):
 49 |         theta, alpha, theta_dot, alpha_dot = self.state
 50 |         currents, encoders, others = self.qube.action(action, led_w=led)
 51 | 
 52 |         # Calculate alpha, theta, alpha_dot, and theta_dot
 53 |         theta = encoders[0] * (-2.0 * np.pi / 2048)
 54 |         # Alpha without normalizing
 55 |         alpha_un = encoders[1] * (2.0 * np.pi / 2048)
 56 |         # Normalized and shifted alpha
 57 |         alpha = (alpha_un % (2.0 * np.pi)) - np.pi
 58 | 
 59 |         # fmt: off
 60 |         theta_dot = -2500 * self._theta_dot_cstate + 50 * theta
 61 |         alpha_dot = -2500 * self._alpha_dot_cstate + 50 * alpha_un
 62 |         self._theta_dot_cstate += (-50 * self._theta_dot_cstate + theta) / self._frequency
 63 |         self._alpha_dot_cstate += (-50 * self._alpha_dot_cstate + alpha_un) / self._frequency
 64 |         # fmt: on
 65 | 
 66 |         state = np.array([theta, alpha, theta_dot, alpha_dot], dtype=np.float64)
 67 |         return state
 68 | 
 69 |     def reset_up(self):
 70 |         """Run classic control for flip-up until the pendulum is inverted for
 71 |         a set amount of time. Assumes that initial state is stationary
 72 |         downwards.
 73 |         """
 74 |         time_hold = 1.0 * self._frequency  # Hold upright 1 second
 75 |         sample = 0  # Samples since control system started
 76 |         samples_upright = 0  # Consecutive samples pendulum is upright
 77 | 
 78 |         action = np.array([1], dtype=np.float64)
 79 |         state = self.step([1.0])
 80 |         while True:
 81 |             action = flip_and_hold_policy(state)
 82 |             state = self.step(action)
 83 | 
 84 |             # Break if pendulum is inverted
 85 |             alpha = state[1]
 86 |             if alpha < (10 * np.pi / 180):
 87 |                 if samples_upright > time_hold:
 88 |                     break
 89 |                 samples_upright += 1
 90 |             else:
 91 |                 samples_upright = 0
 92 |             sample += 1
 93 | 
 94 |         return state
 95 | 
 96 |     def reset_down(self):
 97 |         action = np.array([0], dtype=np.float64)
 98 |         time_hold = 0.5 * self._frequency  # 0.5 seconds
 99 |         samples_downwards = 0  # Consecutive samples pendulum is stationary
100 | 
101 |         state = self.step([1.0])
102 |         while True:
103 |             action = dampen_policy(state)
104 |             state = self.step(action)
105 | 
106 |             # Break if pendulum is stationary
107 |             alpha = state[1]
108 |             if abs(alpha) > (178 * np.pi / 180):
109 |                 if samples_downwards > time_hold:
110 |                     break
111 |                 samples_downwards += 1
112 |             else:
113 |                 samples_downwards = 0
114 |         return state
115 | 
116 |     def reset_encoders(self):
117 |         """Fully stop the pendulum at the bottom. Then reset the alpha encoder"""
118 |         self.reset_down()
119 | 
120 |         self.step(np.array([0], dtype=np.float64))
121 |         print("Doing a hard reset and, reseting the alpha encoder")
122 |         time.sleep(25)  # Do nothing for 3 seconds to ensure pendulum is stopped
123 | 
124 |         # This is needed to prevent sensor drift on the alpha/pendulum angle
125 |         # We ONLY reset the alpha channel because the dampen function stops the
126 |         # pendulum from moving but does not perfectly center the pendulum at the
127 |         # bottom (this way alpha is very close to perfect and theta does not
128 |         # drift much)
129 |         print("Pre encoder reset:", self.qube.action(np.array([0], dtype=np.float64)))
130 |         self.qube.reset_encoders(channels=[1])  # Alpha channel only
131 |         print("After encoder reset:", self.qube.action(np.array([0], dtype=np.float64)))
132 | 
133 |     def close(self, type=None, value=None, traceback=None):
134 |         # Safely close the Qube
135 |         self.qube.__exit__(type=type, value=value, traceback=traceback)
136 | 
137 | 
138 | class QubeSimulator(object):
139 |     """Simulator that has the same interface as the hardware wrapper."""
140 | 
141 |     def __init__(
142 |         self, forward_model="ode", frequency=250, integration_steps=1, max_voltage=18.0
143 |     ):
144 |         if isinstance(forward_model, str):
145 |             if forward_model == "ode":
146 |                 self._forward_model = forward_model_ode
147 |             elif forward_model == "euler":
148 |                 self._forward_model = forward_model_euler
149 |             else:
150 |                 raise ValueError(
151 |                     "'forward_model' must be one of ['ode', 'euler'] or a callable."
152 |                 )
153 |         elif callable(forward_model):
154 |             self._forward_model = forward_model
155 |         else:
156 |             raise ValueError(
157 |                 "'forward_model' must be one of ['ode', 'euler'] or a callable."
158 |             )
159 | 
160 |         self._dt = 1.0 / frequency
161 |         self._integration_steps = integration_steps
162 |         self._max_voltage = max_voltage
163 |         self.state = (
164 |             np.array([0, 0, 0, 0], dtype=np.float64) + np.random.randn(4) * 0.01
165 |         )
166 | 
167 |     def __enter__(self):
168 |         return self
169 | 
170 |     def __exit__(self, type, value, traceback):
171 |         self.close()
172 | 
173 |     def step(self, action, led=None):
174 |         action = np.clip(action, -self._max_voltage, self._max_voltage)
175 |         self.state = self._forward_model(
176 |             *self.state, action, self._dt, self._integration_steps
177 |         )
178 |         return self.state
179 | 
180 |     def reset_up(self):
181 |         self.state = (
182 |             np.array([0, 0, 0, 0], dtype=np.float64) + np.random.randn(4) * 0.01
183 |         )
184 |         return self.state
185 | 
186 |     def reset_down(self):
187 |         self.state = (
188 |             np.array([0, np.pi, 0, 0], dtype=np.float64) + np.random.randn(4) * 0.01
189 |         )
190 |         return self.state
191 | 
192 |     def reset_encoders(self):
193 |         pass
194 | 
195 |     def close(self, type=None, value=None, traceback=None):
196 |         pass
197 | 


--------------------------------------------------------------------------------
/gym_brt/__init__.py:
--------------------------------------------------------------------------------
  1 | from gym.envs.registration import register
  2 | 
  3 | ### Hardware Environments ######################################################
  4 | # Swingup Environments
  5 | register(
  6 |     id='QubeSwingupEnv-v0',
  7 |     entry_point='gym_brt.envs:QubeSwingupEnv',
  8 |     nondeterministic=True,
  9 |     max_episode_steps=2048,
 10 |     kwargs={'use_simulator': False}
 11 | )
 12 | register(
 13 |     id='QubeSwingupSparseEnv-v0',
 14 |     entry_point='gym_brt.envs:QubeSwingupSparseEnv',
 15 |     nondeterministic=True,
 16 |     max_episode_steps=2048,
 17 |     kwargs={'use_simulator': False}
 18 | )
 19 | register(
 20 |     id='QubeSwingupFollowEnv-v0',
 21 |     entry_point='gym_brt.envs:QubeSwingupFollowEnv',
 22 |     nondeterministic=True,
 23 |     max_episode_steps=2048,
 24 |     kwargs={'use_simulator': False}
 25 | )
 26 | register(
 27 |     id='QubeSwingupFollowSparseEnv-v0',
 28 |     entry_point='gym_brt.envs:QubeSwingupFollowSparseEnv',
 29 |     nondeterministic=True,
 30 |     max_episode_steps=2048,
 31 |     kwargs={'use_simulator': False}
 32 | )
 33 | # Balance Environments
 34 | register(
 35 |     id='QubeBalanceEnv-v0',
 36 |     entry_point='gym_brt.envs:QubeBalanceEnv',
 37 |     nondeterministic=True,
 38 |     max_episode_steps=2048,
 39 |     kwargs={'use_simulator': False}
 40 | )
 41 | register(
 42 |     id='QubeBalanceSparseEnv-v0',
 43 |     entry_point='gym_brt.envs:QubeBalanceSparseEnv',
 44 |     nondeterministic=True,
 45 |     max_episode_steps=2048,
 46 |     kwargs={'use_simulator': False}
 47 | )
 48 | register(
 49 |     id='QubeBalanceFollowEnv-v0',
 50 |     entry_point='gym_brt.envs:QubeBalanceFollowEnv',
 51 |     nondeterministic=True,
 52 |     max_episode_steps=2048,
 53 |     kwargs={'use_simulator': False}
 54 | )
 55 | register(
 56 |     id='QubeBalanceFollowSparseEnv-v0',
 57 |     entry_point='gym_brt.envs:QubeBalanceFollowSparseEnv',
 58 |     nondeterministic=True,
 59 |     max_episode_steps=2048,
 60 |     kwargs={'use_simulator': False}
 61 | )
 62 | # Dampen Environments
 63 | register(
 64 |     id='QubeDampenEnv-v0',
 65 |     entry_point='gym_brt.envs:QubeDampenEnv',
 66 |     nondeterministic=True,
 67 |     max_episode_steps=2048,
 68 |     kwargs={'use_simulator': False}
 69 | )
 70 | register(
 71 |     id='QubeDampenSparseEnv-v0',
 72 |     entry_point='gym_brt.envs:QubeDampenSparseEnv',
 73 |     nondeterministic=True,
 74 |     max_episode_steps=2048,
 75 |     kwargs={'use_simulator': False}
 76 | )
 77 | register(
 78 |     id='QubeDampenFollowEnv-v0',
 79 |     entry_point='gym_brt.envs:QubeDampenFollowEnv',
 80 |     nondeterministic=True,
 81 |     max_episode_steps=2048,
 82 |     kwargs={'use_simulator': False}
 83 | )
 84 | register(
 85 |     id='QubeDampenFollowSparseEnv-v0',
 86 |     entry_point='gym_brt.envs:QubeDampenFollowSparseEnv',
 87 |     nondeterministic=True,
 88 |     max_episode_steps=2048,
 89 |     kwargs={'use_simulator': False}
 90 | )
 91 | register(
 92 |     id='QubeBalanceFollowSineWaveEnv-v0',
 93 |     entry_point='gym_brt.envs:QubeBalanceFollowSineWaveEnv',
 94 |     nondeterministic=True,
 95 |     max_episode_steps=2048,
 96 |     kwargs={'use_simulator': False}
 97 | )
 98 | # Sin wave environments
 99 | register(
100 |     id='QubeSwingupFollowSineWaveEnv-v0',
101 |     entry_point='gym_brt.envs:QubeSwingupFollowSineWaveEnv',
102 |     nondeterministic=True,
103 |     max_episode_steps=2048,
104 |     kwargs={'use_simulator': False}
105 | )
106 | register(
107 |     id='QubeRotorFollowSineWaveEnv-v0',
108 |     entry_point='gym_brt.envs:QubeRotorFollowSineWaveEnv',
109 |     nondeterministic=True,
110 |     max_episode_steps=2048,
111 |     kwargs={'use_simulator': False}
112 | )
113 | register(
114 |     id='QubeDampenFollowSineWaveEnv-v0',
115 |     entry_point='gym_brt.envs:QubeDampenFollowSineWaveEnv',
116 |     nondeterministic=True,
117 |     max_episode_steps=2048,
118 |     kwargs={'use_simulator': False}
119 | )
120 | # Rotor Evironments
121 | # Note: the rotor environments have issues with the rewards.
122 | # register(
123 | #     id='QubeRotorEnv-v0',
124 | #     entry_point='gym_brt.envs:QubeRotorEnv',
125 | #     nondeterministic=True,
126 | #     max_episode_steps=2048,
127 | #     kwargs={'use_simulator': False}
128 | # )
129 | # register(
130 | #     id='QubeRotorFollowEnv-v0',
131 | #     entry_point='gym_brt.envs:QubeRotorFollowEnv',
132 | #     nondeterministic=True,
133 | #     max_episode_steps=2048,
134 | #     kwargs={'use_simulator': False}
135 | # )
136 | 
137 | ### Simulated Environments #####################################################
138 | # Swingup Environments
139 | register(
140 |     id='QubeSwingupSimEnv-v0',
141 |     entry_point='gym_brt.envs:QubeSwingupEnv',
142 |     max_episode_steps=2048,
143 |     kwargs={'use_simulator': True}
144 | )
145 | register(
146 |     id='QubeSwingupSparseSimEnv-v0',
147 |     entry_point='gym_brt.envs:QubeSwingupSparseEnv',
148 |     max_episode_steps=2048,
149 |     kwargs={'use_simulator': True}
150 | )
151 | register(
152 |     id='QubeSwingupFollowSimEnv-v0',
153 |     entry_point='gym_brt.envs:QubeSwingupFollowEnv',
154 |     max_episode_steps=2048,
155 |     kwargs={'use_simulator': True}
156 | )
157 | register(
158 |     id='QubeSwingupFollowSparseSimEnv-v0',
159 |     entry_point='gym_brt.envs:QubeSwingupFollowSparseEnv',
160 |     max_episode_steps=2048,
161 |     kwargs={'use_simulator': True}
162 | )
163 | # Balance Environments
164 | register(
165 |     id='QubeBalanceSimEnv-v0',
166 |     entry_point='gym_brt.envs:QubeBalanceEnv',
167 |     max_episode_steps=2048,
168 |     kwargs={'use_simulator': True}
169 | )
170 | register(
171 |     id='QubeBalanceSparseSimEnv-v0',
172 |     entry_point='gym_brt.envs:QubeBalanceSparseEnv',
173 |     max_episode_steps=2048,
174 |     kwargs={'use_simulator': True}
175 | )
176 | register(
177 |     id='QubeBalanceFollowSimEnv-v0',
178 |     entry_point='gym_brt.envs:QubeBalanceFollowEnv',
179 |     max_episode_steps=2048,
180 |     kwargs={'use_simulator': True}
181 | )
182 | register(
183 |     id='QubeBalanceFollowSparseSimEnv-v0',
184 |     entry_point='gym_brt.envs:QubeBalanceFollowSparseEnv',
185 |     max_episode_steps=2048,
186 |     kwargs={'use_simulator': True}
187 | )
188 | # Dampen Environments
189 | register(
190 |     id='QubeDampenSimEnv-v0',
191 |     entry_point='gym_brt.envs:QubeDampenEnv',
192 |     max_episode_steps=2048,
193 |     kwargs={'use_simulator': True}
194 | )
195 | register(
196 |     id='QubeDampenSparseSimEnv-v0',
197 |     entry_point='gym_brt.envs:QubeDampenSparseEnv',
198 |     max_episode_steps=2048,
199 |     kwargs={'use_simulator': True}
200 | )
201 | register(
202 |     id='QubeDampenFollowSimEnv-v0',
203 |     entry_point='gym_brt.envs:QubeDampenFollowEnv',
204 |     max_episode_steps=2048,
205 |     kwargs={'use_simulator': True}
206 | )
207 | register(
208 |     id='QubeDampenFollowSparseSimEnv-v0',
209 |     entry_point='gym_brt.envs:QubeDampenFollowSparseEnv',
210 |     max_episode_steps=2048,
211 |     kwargs={'use_simulator': True}
212 | )
213 | register(
214 |     id='QubeBalanceFollowSineWaveSimEnv-v0',
215 |     entry_point='gym_brt.envs:QubeBalanceFollowSineWaveEnv',
216 |     max_episode_steps=2048,
217 |     kwargs={'use_simulator': True}
218 | )
219 | # Sin wave environments
220 | register(
221 |     id='QubeSwingupFollowSineWaveSimEnv-v0',
222 |     entry_point='gym_brt.envs:QubeSwingupFollowSineWaveEnv',
223 |     max_episode_steps=2048,
224 |     kwargs={'use_simulator': True}
225 | )
226 | register(
227 |     id='QubeRotorFollowSineWaveSimEnv-v0',
228 |     entry_point='gym_brt.envs:QubeRotorFollowSineWaveEnv',
229 |     max_episode_steps=2048,
230 |     kwargs={'use_simulator': True}
231 | )
232 | register(
233 |     id='QubeDampenFollowSineWaveSimEnv-v0',
234 |     entry_point='gym_brt.envs:QubeDampenFollowSineWaveEnv',
235 |     max_episode_steps=2048,
236 |     kwargs={'use_simulator': True}
237 | )
238 | # Rotor Evironments
239 | # Note: the rotor environments have issues with the rewards.
240 | # register(
241 | #     id='QubeRotorSimEnv-v0',
242 | #     entry_point='gym_brt.envs:QubeRotorEnv',
243 | #     max_episode_steps=2048,
244 | #     kwargs={'use_simulator': True}
245 | # )
246 | # register(
247 | #     id='QubeRotorFollowSimEnv-v0',
248 | #     entry_point='gym_brt.envs:QubeRotorFollowEnv',
249 | #     max_episode_steps=2048,
250 | #     kwargs={'use_simulator': True}
251 | # )
252 | 
253 | 
254 | 


--------------------------------------------------------------------------------
/tests/notebooks/ODE vs Euler speedtest.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import numba\n",
 10 |     "import numpy as np\n",
 11 |     "from scipy.integrate import odeint"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 2,
 17 |    "metadata": {},
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "# Motor\n",
 21 |     "Rm = 8.4  # Resistance\n",
 22 |     "kt = 0.042  # Current-torque (N-m/A)\n",
 23 |     "km = 0.042  # Back-emf constant (V-s/rad)\n",
 24 |     "\n",
 25 |     "# Rotary Arm\n",
 26 |     "mr = 0.095  # Mass (kg)\n",
 27 |     "Lr = 0.085  # Total length (m)\n",
 28 |     "Jr = mr * Lr ** 2 / 12  # Moment of inertia about pivot (kg-m^2)\n",
 29 |     "\n",
 30 |     "# Pendulum Link\n",
 31 |     "mp = 0.024  # Mass (kg)\n",
 32 |     "Lp = 0.129  # Total length (m)\n",
 33 |     "Jp = mp * Lp ** 2 / 12  # Moment of inertia about pivot (kg-m^2)\n",
 34 |     "\n",
 35 |     "Br = Dr = 0.0005  # Equivalent viscous damping coefficient (N-m-s/rad)\n",
 36 |     "Bp = Dp = 0.00005  # Equivalent viscous damping coefficient (N-m-s/rad)\n",
 37 |     "g = 9.81  # Gravity constant\n"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": 3,
 43 |    "metadata": {},
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "def diff_forward_model_ode(state, t, action, dt):\n",
 47 |     "    theta = state[0]\n",
 48 |     "    alpha = state[1]\n",
 49 |     "    theta_dot = state[2]\n",
 50 |     "    alpha_dot = state[3]\n",
 51 |     "    Vm = action\n",
 52 |     "    tau = (km * (Vm - km * theta_dot)) / Rm  # torque\n",
 53 |     "\n",
 54 |     "    # fmt: off\n",
 55 |     "    alpha_dot_dot = (2.0*Lp*Lr*mp*(4.0*Dr*theta_dot + Lp**2*alpha_dot*mp*theta_dot*np.sin(2.0*alpha) + 2.0*Lp*Lr*alpha_dot**2*mp*np.sin(alpha) - 4.0*tau)*np.cos(alpha) - 0.5*(4.0*Jr + Lp**2*mp*np.sin(alpha)**2 + 4.0*Lr**2*mp)*(-8.0*Dp*alpha_dot + Lp**2*mp*theta_dot**2*np.sin(2.0*alpha) + 4.0*Lp*g*mp*np.sin(alpha)))/(4.0*Lp**2*Lr**2*mp**2*np.cos(alpha)**2 - (4.0*Jp + Lp**2*mp)*(4.0*Jr + Lp**2*mp*np.sin(alpha)**2 + 4.0*Lr**2*mp))\n",
 56 |     "    theta_dot_dot = (-Lp*Lr*mp*(-8.0*Dp*alpha_dot + Lp**2*mp*theta_dot**2*np.sin(2.0*alpha) + 4.0*Lp*g*mp*np.sin(alpha))*np.cos(alpha) + (4.0*Jp + Lp**2*mp)*(4.0*Dr*theta_dot + Lp**2*alpha_dot*mp*theta_dot*np.sin(2.0*alpha) + 2.0*Lp*Lr*alpha_dot**2*mp*np.sin(alpha) - 4.0*tau))/(4.0*Lp**2*Lr**2*mp**2*np.cos(alpha)**2 - (4.0*Jp + Lp**2*mp)*(4.0*Jr + Lp**2*mp*np.sin(alpha)**2 + 4.0*Lr**2*mp))\n",
 57 |     "    # fmt: on\n",
 58 |     "\n",
 59 |     "    diff_state = np.array([theta_dot, alpha_dot, theta_dot_dot, alpha_dot_dot])\n",
 60 |     "    diff_state = np.array(diff_state, dtype=\"float64\")\n",
 61 |     "    return diff_state\n",
 62 |     "\n",
 63 |     "\n",
 64 |     "def forward_model_ode(theta, alpha, theta_dot, alpha_dot, Vm, dt, integration_steps):\n",
 65 |     "    t = np.linspace(0.0, dt, integration_steps)\n",
 66 |     "\n",
 67 |     "    state = np.array([theta, alpha, theta_dot, alpha_dot])\n",
 68 |     "    next_state = np.array(odeint(diff_forward_model_ode, state, t, args=(Vm, dt))).reshape(4,)\n",
 69 |     "    \n",
 70 |     "    theta = next_state[0]\n",
 71 |     "    alpha = next_state[1]\n",
 72 |     "    theta_dot = next_state[2]\n",
 73 |     "    alpha_dot = next_state[3]\n",
 74 |     "\n",
 75 |     "    theta = ((theta + np.pi) % (2 * np.pi)) - np.pi\n",
 76 |     "    alpha = ((alpha + np.pi) % (2 * np.pi)) - np.pi\n",
 77 |     "\n",
 78 |     "    return theta, alpha, theta_dot, alpha_dot\n"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": 4,
 84 |    "metadata": {},
 85 |    "outputs": [],
 86 |    "source": [
 87 |     "def forward_model_euler(theta, alpha, theta_dot, alpha_dot, Vm, dt, integration_steps):\n",
 88 |     "    dt /= integration_steps\n",
 89 |     "    for step in range(integration_steps):\n",
 90 |     "        tau = (km * (Vm - km * theta_dot)) / Rm  # torque\n",
 91 |     "\n",
 92 |     "        # fmt: off\n",
 93 |     "        alpha_dot_dot = (2.0*Lp*Lr*mp*(4.0*Dr*theta_dot + Lp**2*alpha_dot*mp*theta_dot*np.sin(2.0*alpha) + 2.0*Lp*Lr*alpha_dot**2*mp*np.sin(alpha) - 4.0*tau)*np.cos(alpha) - 0.5*(4.0*Jr + Lp**2*mp*np.sin(alpha)**2 + 4.0*Lr**2*mp)*(-8.0*Dp*alpha_dot + Lp**2*mp*theta_dot**2*np.sin(2.0*alpha) + 4.0*Lp*g*mp*np.sin(alpha)))/(4.0*Lp**2*Lr**2*mp**2*np.cos(alpha)**2 - (4.0*Jp + Lp**2*mp)*(4.0*Jr + Lp**2*mp*np.sin(alpha)**2 + 4.0*Lr**2*mp))\n",
 94 |     "        theta_dot_dot = (-Lp*Lr*mp*(-8.0*Dp*alpha_dot + Lp**2*mp*theta_dot**2*np.sin(2.0*alpha) + 4.0*Lp*g*mp*np.sin(alpha))*np.cos(alpha) + (4.0*Jp + Lp**2*mp)*(4.0*Dr*theta_dot + Lp**2*alpha_dot*mp*theta_dot*np.sin(2.0*alpha) + 2.0*Lp*Lr*alpha_dot**2*mp*np.sin(alpha) - 4.0*tau))/(4.0*Lp**2*Lr**2*mp**2*np.cos(alpha)**2 - (4.0*Jp + Lp**2*mp)*(4.0*Jr + Lp**2*mp*np.sin(alpha)**2 + 4.0*Lr**2*mp))\n",
 95 |     "        # fmt: on\n",
 96 |     "\n",
 97 |     "        theta_dot += theta_dot_dot * dt\n",
 98 |     "        alpha_dot += alpha_dot_dot * dt\n",
 99 |     "\n",
100 |     "        theta += theta_dot * dt\n",
101 |     "        alpha += alpha_dot * dt\n",
102 |     "\n",
103 |     "        theta %= 2 * np.pi\n",
104 |     "        alpha %= 2 * np.pi\n",
105 |     "\n",
106 |     "    return theta, alpha, theta_dot, alpha_dot\n"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": 5,
112 |    "metadata": {},
113 |    "outputs": [],
114 |    "source": [
115 |     "def f(forward_model, n=1000):\n",
116 |     "    for i in range(n):\n",
117 |     "        theta, alpha, theta_dot, alpha_dot = np.random.randn(4,) * 0.1\n",
118 |     "        Vm = np.random.randn()\n",
119 |     "        theta, alpha, theta_dot, alpha_dot = forward_model(\n",
120 |     "            theta, alpha, theta_dot, alpha_dot, Vm, dt=0.001, integration_steps=1\n",
121 |     "        )\n"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "markdown",
126 |    "metadata": {},
127 |    "source": [
128 |     "# Without numba"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": 6,
134 |    "metadata": {},
135 |    "outputs": [
136 |     {
137 |      "name": "stdout",
138 |      "output_type": "stream",
139 |      "text": [
140 |       "41.4 ms ± 3.21 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
141 |      ]
142 |     }
143 |    ],
144 |    "source": [
145 |     "%timeit -n 100 f(forward_model_ode)"
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "code",
150 |    "execution_count": 7,
151 |    "metadata": {},
152 |    "outputs": [
153 |     {
154 |      "name": "stdout",
155 |      "output_type": "stream",
156 |      "text": [
157 |       "54.9 ms ± 12.5 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
158 |      ]
159 |     }
160 |    ],
161 |    "source": [
162 |     "%timeit -n 100 f(forward_model_euler)"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "markdown",
167 |    "metadata": {},
168 |    "source": [
169 |     "# With numba"
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "code",
174 |    "execution_count": 8,
175 |    "metadata": {},
176 |    "outputs": [],
177 |    "source": [
178 |     "forward_model_ode = numba.jit(forward_model_ode)\n",
179 |     "forward_model_euler = numba.jit(forward_model_euler)"
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "code",
184 |    "execution_count": 9,
185 |    "metadata": {},
186 |    "outputs": [
187 |     {
188 |      "name": "stdout",
189 |      "output_type": "stream",
190 |      "text": [
191 |       "49.4 ms ± 2.37 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
192 |      ]
193 |     }
194 |    ],
195 |    "source": [
196 |     "%timeit -n 100 f(forward_model_ode)"
197 |    ]
198 |   },
199 |   {
200 |    "cell_type": "code",
201 |    "execution_count": 10,
202 |    "metadata": {},
203 |    "outputs": [
204 |     {
205 |      "name": "stdout",
206 |      "output_type": "stream",
207 |      "text": [
208 |       "5.62 ms ± 1.41 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
209 |      ]
210 |     }
211 |    ],
212 |    "source": [
213 |     "%timeit -n 100 f(forward_model_euler)"
214 |    ]
215 |   }
216 |  ],
217 |  "metadata": {
218 |   "kernelspec": {
219 |    "display_name": "TensorFlow (3.6.6)",
220 |    "language": "python",
221 |    "name": "tf"
222 |   },
223 |   "language_info": {
224 |    "codemirror_mode": {
225 |     "name": "ipython",
226 |     "version": 3
227 |    },
228 |    "file_extension": ".py",
229 |    "mimetype": "text/x-python",
230 |    "name": "python",
231 |    "nbconvert_exporter": "python",
232 |    "pygments_lexer": "ipython3",
233 |    "version": "3.6.7"
234 |   }
235 |  },
236 |  "nbformat": 4,
237 |  "nbformat_minor": 2
238 | }
239 | 


--------------------------------------------------------------------------------
/gym_brt/quanser/quanser_wrapper/quanser_wrapper.pyx:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import print_function
  3 | from __future__ import division
  4 | 
  5 | cimport gym_brt.quanser.quanser_wrapper.quanser_types as qt
  6 | cimport gym_brt.quanser.quanser_wrapper.hil as hil
  7 | cimport numpy as np
  8 | 
  9 | from gym_brt.quanser.quanser_wrapper.error_codes import error_codes
 10 | import numpy as np
 11 | 
 12 | 
 13 | cdef print_possible_error(int result):
 14 |     """If there is an error, print the error code"""
 15 |     if result < 0:
 16 |         print(error_codes[-result])
 17 | 
 18 | 
 19 | cdef class QuanserWrapper:
 20 |     cdef hil.t_card board
 21 |     cdef hil.t_task task
 22 | 
 23 |     cdef qt.t_uint32[::] analog_r_channels
 24 |     cdef qt.t_uint32 num_analog_r_channels
 25 |     cdef qt.t_double[::] currents_r
 26 | 
 27 |     cdef qt.t_uint32[::] analog_w_channels
 28 |     cdef qt.t_uint32 num_analog_w_channels
 29 |     cdef qt.t_double[::] voltages_w
 30 | 
 31 |     cdef qt.t_uint32[::] digital_w_channels
 32 |     cdef qt.t_uint32 num_digital_w_channels
 33 |     cdef np.npy_int8[::] enables_w  # Uses npy_int8 due to arm problem
 34 | 
 35 |     cdef qt.t_uint32[::] encoder_r_channels
 36 |     cdef qt.t_uint32 num_encoder_r_channels
 37 |     cdef qt.t_int32[::] encoder_r_buffer
 38 | 
 39 |     cdef qt.t_uint32[::] other_r_channels
 40 |     cdef qt.t_uint32 num_other_r_channels
 41 |     cdef qt.t_double[::] other_r_buffer
 42 | 
 43 |     cdef qt.t_uint32[::] led_w_channels
 44 |     cdef qt.t_uint32 num_led_w_channels
 45 |     cdef qt.t_double[::] led_w_buffer
 46 | 
 47 |     cdef qt.t_double frequency, max_voltage
 48 |     cdef qt.t_int samples_overflowed
 49 |     cdef bint task_started
 50 | 
 51 |     def __init__(
 52 |         self,
 53 |         max_voltage,
 54 |         analog_r_channels,
 55 |         analog_w_channels,
 56 |         digital_w_channels,
 57 |         encoder_r_channels,
 58 |         other_r_channels,
 59 |         led_w_channels,
 60 |         frequency=1000
 61 |     ):
 62 |         self.max_voltage = max_voltage
 63 |         # Convert the channels into numpy arrays which are then stored in
 64 |         # memoryviews (to pass C buffers to the HIL API)
 65 |         self.num_analog_r_channels = len(analog_r_channels)
 66 |         self.num_analog_w_channels = len(analog_w_channels)
 67 |         self.num_digital_w_channels = len(digital_w_channels)
 68 |         self.num_encoder_r_channels = len(encoder_r_channels)
 69 |         self.num_other_r_channels = len(other_r_channels)
 70 |         self.num_led_w_channels = len(led_w_channels)
 71 |         self.analog_r_channels = np.array(analog_r_channels, dtype=np.uint32)
 72 |         self.analog_w_channels = np.array(analog_w_channels, dtype=np.uint32)
 73 |         self.digital_w_channels = np.array(digital_w_channels, dtype=np.uint32)
 74 |         self.encoder_r_channels = np.array(encoder_r_channels, dtype=np.uint32)
 75 |         self.other_r_channels = np.array(other_r_channels, dtype=np.uint32)
 76 |         self.led_w_channels = np.array(led_w_channels, dtype=np.uint32)
 77 | 
 78 |         self.frequency = frequency
 79 |         self.task_started = False
 80 | 
 81 |     def __enter__(self):
 82 |         """Start the hardware in a deterministic way (all motors,
 83 |         encoders, etc at 0)
 84 |         """
 85 |         # Create memoryviews for read buffers
 86 |         self.currents_r = np.zeros(
 87 |             self.num_analog_r_channels, dtype=np.float64
 88 |         )  # t_double is 64 bits
 89 |         self.other_r_buffer = np.zeros(
 90 |             self.num_other_r_channels, dtype=np.float64
 91 |         )  # t_double is 64 bits
 92 | 
 93 |         # Set motor voltages_w and encoders to 0
 94 |         self.voltages_w = np.zeros(
 95 |             self.num_analog_w_channels, dtype=np.float64
 96 |         )  # t_double is 64 bits
 97 |         result = hil.hil_write_analog(
 98 |             self.board,
 99 |             &self.analog_w_channels[0],
100 |             self.num_analog_w_channels,
101 |             &self.voltages_w[0]
102 |         )
103 |         print_possible_error(result)
104 |         self.encoder_r_buffer = np.zeros(
105 |             self.num_encoder_r_channels, dtype=np.int32
106 |         )  # t_int32 is 32 bits
107 |         result = hil.hil_set_encoder_counts(
108 |             self.board,
109 |             &self.encoder_r_channels[0],
110 |             self.num_encoder_r_channels,
111 |             &self.encoder_r_buffer[0]
112 |         )
113 |         print_possible_error(result)
114 | 
115 |         # Set LED on to white (represents opening qube)
116 |         self.led_w_buffer = np.ones(
117 |             self.num_led_w_channels, dtype=np.float64
118 |         )  # t_double is 64 bits
119 |         result = hil.hil_write_other(
120 |             self.board,
121 |             &self.led_w_channels[0],
122 |             self.num_led_w_channels,
123 |             &self.led_w_buffer[0]
124 |         )
125 |         print_possible_error(result)
126 | 
127 |         # Enables_r all the motors
128 |         self.enables_w = np.ones(
129 |             self.num_digital_w_channels, dtype=np.int8
130 |         )  # t_bool is char 8 bits
131 |         result = hil.hil_write_digital(
132 |             self.board,
133 |             &self.digital_w_channels[0],
134 |             self.num_digital_w_channels,
135 |             &self.enables_w[0]
136 |         )
137 |         print_possible_error(result)
138 | 
139 |         return self
140 | 
141 |     def __exit__(self, type, value, traceback):
142 |         """Make sure hardware turns off safely"""
143 |         self._stop_task()
144 | 
145 |         # Set the motor voltages_w to 0
146 |         self.voltages_w = np.zeros(
147 |             self.num_analog_w_channels, dtype=np.float64
148 |         )  # t_double is 64 bits
149 |         hil.hil_write_analog(
150 |             self.board,
151 |             &self.analog_w_channels[0],
152 |             self.num_analog_w_channels,
153 |             &self.voltages_w[0]
154 |         )
155 | 
156 |         # Set LED on to default color (red)
157 |         self.led_w_buffer = np.array(
158 |             [1.0, 0.0, 0.0], dtype=np.float64
159 |         )  # t_double is 64 bits
160 |         result = hil.hil_write_other(
161 |             self.board,
162 |             &self.led_w_channels[0],
163 |             self.num_led_w_channels,
164 |             &self.led_w_buffer[0]
165 |         )
166 |         print_possible_error(result)
167 | 
168 | 
169 |         # Disable all the motors
170 |         self.enables_w = np.zeros(
171 |             self.num_digital_w_channels, dtype=np.int8
172 |         )  # t_bool is char 8 bits
173 |         hil.hil_write_digital(
174 |             self.board,
175 |             &self.digital_w_channels[0],
176 |             self.num_digital_w_channels,
177 |             &self.enables_w[0]
178 |         )
179 | 
180 |         hil.hil_close(self.board)  # Safely close the board
181 | 
182 |     def _create_task(self):
183 |         """Start a task reads and writes at fixed intervals"""
184 |         result = hil.hil_task_create_reader(
185 |             self.board,
186 |             1,  # Read 1 sample at a time
187 |             &self.analog_r_channels[0], self.num_analog_r_channels,
188 |             &self.encoder_r_channels[0], self.num_encoder_r_channels,
189 |             NULL, 0,
190 |             &self.other_r_channels[0], self.num_other_r_channels,
191 |             &self.task
192 |         )
193 |         print_possible_error(result)
194 | 
195 |         # Allow discarding of old samples after missed reads
196 |         hil.hil_task_set_buffer_overflow_mode(
197 |             self.task, hil.BUFFER_MODE_OVERWRITE_ON_OVERFLOW
198 |         )
199 | 
200 |         # Start the task
201 |         result = hil.hil_task_start(
202 |             self.task,
203 |             hil.HARDWARE_CLOCK_0,
204 |             self.frequency,
205 |             -1  # Read continuously
206 |         )
207 |         print_possible_error(result)
208 |         if result < 0:
209 |             raise ValueError("Could not start hil task")
210 | 
211 |         self.task_started = True
212 | 
213 |     def _stop_task(self):
214 |         if self.task_started:
215 |             self.task_started = False
216 |             hil.hil_task_flush(self.task)
217 |             hil.hil_task_stop(self.task)
218 |             hil.hil_task_delete(self.task)
219 | 
220 |     def reset_encoders(self, channels=None):
221 |         """Reset all or a few of the encoders"""
222 |         if channels is None:
223 |             # Set the entire encoder encoder_r_buffer to 0
224 |             self.encoder_r_buffer = np.zeros(
225 |                 self.num_encoder_r_channels, dtype=np.int32
226 |             )  # t_int32 is 32 bits
227 |         else:
228 |             # Set only specific encoders to 0, while leaving the others
229 |             for channel in channels:
230 |                 # Check if the channel is valid (in the available encoder
231 |                 # channels for the hardware)
232 |                 if channel not in self.encoder_r_channels:
233 |                     raise ValueError(
234 |                         "Channel: {} is not a possible channel on this hardware."
235 |                     )
236 |                 self.encoder_r_buffer[channel] = 0
237 | 
238 |         result = hil.hil_set_encoder_counts(
239 |             self.board,
240 |             &self.encoder_r_channels[0],
241 |             self.num_encoder_r_channels,
242 |             &self.encoder_r_buffer[0]
243 |         )
244 |         print_possible_error(result)
245 | 
246 |     def action(self, voltages_w, led_w=None):
247 |         # If it"s the first time running action, then start the background r/w
248 |         # task
249 |         if not self.task_started:
250 |             self._create_task()
251 | 
252 |         # Ensure safe voltage data
253 |         if isinstance(voltages_w, list):
254 |             voltages_w = np.array(voltages_w, dtype=np.float64)
255 |         assert isinstance(voltages_w, np.ndarray)
256 |         assert voltages_w.shape == (self.num_analog_w_channels,)
257 |         assert voltages_w.dtype == np.float64
258 |         # Clip voltages into the range of +- max_voltage
259 |         voltages_w = np.clip(voltages_w, -self.max_voltage, self.max_voltage)
260 | 
261 |         if led_w is not None:
262 |             # Ensure safe LED data
263 |             if isinstance(led_w, list):
264 |                 led_w = np.array(led_w, dtype=np.float64)
265 |             assert led_w.shape == (self.num_led_w_channels,)
266 |             assert led_w.dtype == np.float64
267 |             for i in range(self.num_led_w_channels):
268 |                 assert 0.0 <= led_w[i] <= 1.0  # HIL uses RGB scaled from 0-1
269 |             self._set_led(led_w)  # An immediate write to LED (not timed task)
270 | 
271 |         return self._action(voltages_w)
272 | 
273 |     def _action(
274 |         self, np.ndarray[qt.t_double, ndim=1, mode="c"] voltages_w not None
275 |     ):
276 |         samples_read = hil.hil_task_read(
277 |             self.task,
278 |             1,  # Number of samples to read
279 |             &self.currents_r[0],
280 |             &self.encoder_r_buffer[0],
281 |             NULL,
282 |             &self.other_r_buffer[0]
283 |         )
284 |         if samples_read < 0:
285 |             print_possible_error(samples_read)
286 | 
287 |         samples_overflowed = hil.hil_task_get_buffer_overflows(self.task)
288 |         if samples_overflowed > self.samples_overflowed:
289 |             print(
290 |                 "Missed {} samples".format(samples_overflowed - self.samples_overflowed)
291 |             )
292 |             self.samples_overflowed = samples_overflowed
293 | 
294 |         # Then write voltages_w calculated for previous time step
295 |         self.voltages_w = voltages_w
296 |         result_write = hil.hil_write_analog(
297 |             self.board,
298 |             &self.analog_w_channels[0],
299 |             self.num_analog_w_channels,
300 |             &self.voltages_w[0]
301 |         )
302 |         if result_write < 0:
303 |             print_possible_error(result_write)
304 | 
305 |         return (
306 |             np.asarray(self.currents_r),
307 |             np.asarray(self.encoder_r_buffer),
308 |             np.asarray(self.other_r_buffer),
309 |         )
310 | 
311 |     def _set_led(
312 |         self, np.ndarray[qt.t_double, ndim=1, mode="c"] led_w not None
313 |     ):
314 |         self.led_w_buffer = led_w
315 |         result = hil.hil_write_other(
316 |             self.board,
317 |             &self.led_w_channels[0],
318 |             self.num_led_w_channels,
319 |             &self.led_w_buffer[0]
320 |         )
321 |         if result < 0:
322 |             print_possible_error(result)
323 | 
324 | 
325 | cdef class QuanserAero(QuanserWrapper):
326 |     def __cinit__(self):
327 |         board_type = b"quanser_aero_usb"
328 |         board_identifier = b"0"
329 |         result = hil.hil_open(board_type, board_identifier, &self.board)
330 |         print_possible_error(result)
331 |         if result < 0:
332 |             raise IOError("Board could not be opened.")
333 | 
334 |     def __init__(self, frequency=250, max_voltage=18.0):
335 |         analog_r_channels = [0, 1]
336 |         analog_w_channels = [0, 1]
337 |         digital_w_channels = [0, 1]
338 |         encoder_r_channels = [0, 1, 2, 3]
339 |         other_r_channels = [
340 |             3000, 3001, 3002, 4000, 4001, 4002, 14000, 14001, 14002, 14003
341 |         ]
342 |         led_w_channels = [11000, 11001, 11002]
343 | 
344 |         super(QuanserAero, self).__init__(
345 |             max_voltage=max_voltage,
346 |             analog_r_channels=analog_r_channels,
347 |             analog_w_channels=analog_w_channels,
348 |             digital_w_channels=digital_w_channels,
349 |             encoder_r_channels=encoder_r_channels,
350 |             other_r_channels=other_r_channels,
351 |             led_w_channels=led_w_channels,
352 |             frequency=frequency
353 |         )
354 | 
355 | 
356 | cdef class QubeServo2(QuanserWrapper):
357 |     def __cinit__(self):
358 |         board_type = b"qube_servo2_usb"
359 |         board_identifier = b"0"
360 |         result = hil.hil_open(board_type, board_identifier, &self.board)
361 |         print_possible_error(result)
362 |         if result < 0:
363 |             raise IOError("Board could not be opened.")
364 | 
365 |     def __init__(self, frequency=250, max_voltage=18.0):
366 |         analog_r_channels = [0]
367 |         analog_w_channels = [0]
368 |         digital_w_channels = [0]
369 |         encoder_r_channels = [0, 1]
370 |         other_r_channels = [14000]
371 |         led_w_channels = [11000, 11001, 11002]
372 | 
373 |         super(QubeServo2, self).__init__(
374 |             max_voltage=max_voltage,
375 |             analog_r_channels=analog_r_channels,
376 |             analog_w_channels=analog_w_channels,
377 |             digital_w_channels=digital_w_channels,
378 |             encoder_r_channels=encoder_r_channels,
379 |             other_r_channels=other_r_channels,
380 |             led_w_channels=led_w_channels,
381 |             frequency=frequency
382 |         )
383 | 


--------------------------------------------------------------------------------