├── gym_brt
├── quanser
│ ├── quanser_wrapper
│ │ ├── __init__.py
│ │ ├── setup.py
│ │ ├── quanser_types.pxd
│ │ ├── hil.pxd
│ │ └── quanser_wrapper.pyx
│ ├── __init__.py
│ ├── qube_simulator.py
│ └── qube_interfaces.py
├── envs
│ ├── rendering
│ │ ├── __init__.py
│ │ └── qube_render.py
│ ├── __init__.py
│ ├── qube_rotor_env.py
│ ├── qube_test_env.py
│ ├── qube_dampen_env.py
│ ├── qube_swingup_env.py
│ ├── qube_balance_env.py
│ └── qube_base_env.py
├── control
│ ├── __init__.py
│ └── control.py
└── __init__.py
├── .gitattributes
├── QUBE-Servo_2_angled_pendulum.jpg
├── docs
├── README.md
├── alternatives.md
├── control.md
├── quanser.md
└── envs.md
├── Pipfile
├── LICENSE
├── .gitignore
├── setup.py
├── environment.yaml
├── README.md
└── tests
├── lqr.py
├── test.py
└── notebooks
└── ODE vs Euler speedtest.ipynb
/gym_brt/quanser/quanser_wrapper/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.ipynb linguist-vendored
2 |
--------------------------------------------------------------------------------
/gym_brt/envs/rendering/__init__.py:
--------------------------------------------------------------------------------
1 | from gym_brt.envs.rendering.qube_render import QubeRendererVypthon as QubeRenderer
2 |
--------------------------------------------------------------------------------
/QUBE-Servo_2_angled_pendulum.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BlueRiverTech/quanser-openai-driver/HEAD/QUBE-Servo_2_angled_pendulum.jpg
--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
1 |
2 | # Table of contents
3 | - [Setup alternatives](./alternatives.md)
4 | - [Installation](./alternatives.md#installation)
5 | - [Usage](./alternatives.md#usage)
6 | - [control](./control.md)
7 | - [envs](./envs.md)
8 | - [quanser](./quanser.md)
--------------------------------------------------------------------------------
/gym_brt/quanser/__init__.py:
--------------------------------------------------------------------------------
1 | from gym_brt.quanser.qube_interfaces import QubeSimulator
2 |
3 | try:
4 | from gym_brt.quanser.qube_interfaces import QubeHardware
5 | except ImportError:
6 | print("Warning: Can not import QubeHardware in quanser/__init__.py")
7 |
--------------------------------------------------------------------------------
/Pipfile:
--------------------------------------------------------------------------------
1 | [[source]]
2 | url = "https://pypi.org/simple"
3 | verify_ssl = true
4 | name = "pypi"
5 |
6 | [dev-packages]
7 | Cython = 0.29.32
8 | autograd = "*"
9 |
10 | [packages]
11 | gym = 0.21.0
12 | numpy = 1.21.6
13 | scipy = 1.7.3
14 | vpython = 7.6.4
15 | gym-brt = {path = ".", editable = true}
16 |
--------------------------------------------------------------------------------
/gym_brt/control/__init__.py:
--------------------------------------------------------------------------------
1 | from gym_brt.control.control import (
2 | zero_policy,
3 | constant_policy,
4 | random_policy,
5 | square_wave_policy,
6 | energy_control_policy,
7 | pd_control_policy,
8 | flip_and_hold_policy,
9 | square_wave_flip_and_hold_policy,
10 | dampen_policy,
11 | pd_tracking_control_policy,
12 | )
13 |
--------------------------------------------------------------------------------
/docs/alternatives.md:
--------------------------------------------------------------------------------
1 | # Alternatives
2 |
3 | ### Usage
4 | In addition to a [context manager (recommended)](../README.md#usage), the environment can also be closed manually by using `env.close()`
5 |
6 | ```python
7 | import gym
8 | from gym_brt import QubeSwingupEnv
9 |
10 | num_episodes = 10
11 | num_steps = 250
12 |
13 | env = QubeSwingupEnv()
14 | try:
15 | for episode in range(num_episodes):
16 | state = env.reset()
17 | for step in range(num_steps):
18 | action = env.action_space.sample()
19 | state, reward, done, _ = env.step(action)
20 | finally:
21 | env.close()
22 | ```
23 |
--------------------------------------------------------------------------------
/docs/control.md:
--------------------------------------------------------------------------------
1 |
2 | # Controllers
3 |
4 | ### Control
5 | Control base class.
6 |
7 | ### NoControl
8 | Applies no action (sets voltages to zero).
9 |
10 | ### RandomControl
11 | Applies a random action (samples from the action space).
12 |
13 | ### AeroControl
14 | Uses PID control to minimize (pitch - reference pitch) + (yaw - reference yaw), where reference is the original position.
15 |
16 | ### QubeFlipUpControl
17 | Uses a mixed mode controller that uses gains found from LQR to do the flip up when the pendulum angle is over than 20 degrees off upright, and uses PID control and filtering to hold the pendulum upright when under 20 degrees.
18 |
19 | ### QubeHoldControl
20 | Holding control uses PID with filtering, and outside of 20 degrees use no control.
21 |
--------------------------------------------------------------------------------
/gym_brt/envs/__init__.py:
--------------------------------------------------------------------------------
1 | from gym_brt.envs.qube_swingup_env import (
2 | QubeSwingupEnv,
3 | QubeSwingupSparseEnv,
4 | QubeSwingupFollowEnv,
5 | QubeSwingupFollowSparseEnv,
6 | )
7 | from gym_brt.envs.qube_balance_env import (
8 | QubeBalanceEnv,
9 | QubeBalanceSparseEnv,
10 | QubeBalanceFollowEnv,
11 | QubeBalanceFollowSparseEnv,
12 | )
13 | from gym_brt.envs.qube_dampen_env import (
14 | QubeDampenEnv,
15 | QubeDampenSparseEnv,
16 | QubeDampenFollowEnv,
17 | QubeDampenFollowSparseEnv,
18 | )
19 | from gym_brt.envs.qube_test_env import (
20 | QubeBalanceFollowSineWaveEnv,
21 | QubeSwingupFollowSineWaveEnv,
22 | QubeRotorFollowSineWaveEnv,
23 | QubeDampenFollowSineWaveEnv,
24 | )
25 | from gym_brt.envs.qube_rotor_env import QubeRotorEnv, QubeRotorFollowEnv
26 |
--------------------------------------------------------------------------------
/gym_brt/quanser/quanser_wrapper/setup.py:
--------------------------------------------------------------------------------
1 | from distutils.core import setup
2 | from distutils.extension import Extension
3 | from Cython.Build import cythonize
4 | import numpy
5 |
6 | extensions = [
7 | Extension(
8 | "quanser_wrapper",
9 | ["quanser_wrapper.pyx"],
10 | include_dirs=["/opt/quanser/hil_sdk/include", numpy.get_include()],
11 | libraries=[
12 | "hil",
13 | "quanser_runtime",
14 | "quanser_common",
15 | "rt",
16 | "pthread",
17 | "dl",
18 | "m",
19 | "c",
20 | ],
21 | library_dirs=["/opt/quanser/hil_sdk/lib"],
22 | )
23 | ]
24 | cy_extensions = cythonize(extensions, compiler_directives={"language_level" : "3"})
25 | setup(ext_modules=cy_extensions, include_dirs=[numpy.get_include()])
26 |
--------------------------------------------------------------------------------
/docs/quanser.md:
--------------------------------------------------------------------------------
1 |
2 | # Quanser
3 | The OpenAI Gym environments use one of the following to calculate each step.
4 |
5 | Quanser wrapper is a hardware wrapper that allows the Qube Servo 2 and Quanser Aero to be used directly in OpenAI Gym.
6 | Quanser Simulator simulates the Qube Servo 2 only (Aero is currently not supported for simulation).
7 |
8 | # Quanser Wrapper
9 | The implementation of a Python wrapper around Quanser's C-based HIL SDK.
10 | This is written in Cython.
11 |
12 | All changes to this are recompiled if you install the gym_brt package with Cython installed.
13 |
14 | # Quanser Simulator
15 | A simulator and wrapper to use a simulator directly in the Qube environments instead of using the real hardware.
16 | This is useful for transfer learning from simulation to real hardware and for people who don't have access to the Qube Servo 2.
17 |
--------------------------------------------------------------------------------
/gym_brt/quanser/quanser_wrapper/quanser_types.pxd:
--------------------------------------------------------------------------------
1 | cimport numpy as np
2 |
3 | cdef extern from "/opt/quanser/hil_sdk/include/quanser_types.h":
4 | ctypedef int t_error
5 |
6 | ctypedef np.npy_int8 t_boolean # must always be 8 bits
7 | ctypedef np.npy_int8 t_byte # must always be 8 bits
8 | ctypedef np.npy_uint8 t_ubyte # must always be 8 bits
9 | ctypedef np.npy_int16 t_short # must always be 16 bits
10 | ctypedef np.npy_uint16 t_ushort # must always be 16 bits
11 | ctypedef np.npy_int32 t_int # must always be 32 bits
12 | ctypedef np.npy_uint32 t_uint # must always be 32 bits
13 | ctypedef np.npy_float64 t_double # must always be 64 bits
14 | ctypedef np.npy_int64 t_long # must always be 64 bits
15 |
16 | ctypedef np.npy_int8 t_int8
17 | ctypedef np.npy_uint8 t_uint8
18 | ctypedef np.npy_int16 t_int16
19 | ctypedef np.npy_uint16 t_uint16
20 | ctypedef np.npy_int32 t_int32
21 | ctypedef np.npy_uint32 t_uint32
22 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 Blue River Technology
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/docs/envs.md:
--------------------------------------------------------------------------------
1 |
2 | # Environments
3 | Quanser OpenAI Driver currently supports two pieces of hardware: the Qube Servo 2 USB and the Quanser Aero.
4 |
5 | # Qube
6 | For more information about the Qube click [here](https://www.quanser.com/products/qube-servo-2/)
7 |
8 | ### QubeBaseEnv
9 | The base class for Qube environments.
10 | Info:
11 |
12 | - Reset starts the pendulum from the bottom (at rest).
13 | - Has no reward function.
14 |
15 |
16 | ### QubeBeginDownEnv
17 | Info:
18 |
19 | - Reset starts the pendulum from the bottom (at rest).
20 | - The task is to flip up the pendulum and hold it upright.
21 | - Episode ends once the theta angle is greater than 90 degrees.
22 | - Reward is a function of the angles theta (arm angle) and alpha (pendulum), and the alpha angular velocity.
23 | - Encourages the the arm to stay centered, the pendulum to stay upright, and to stay stationary.
24 |
25 |
26 | ### QubeBeginUprightEnv
27 | Info:
28 |
29 | - Reset starts the pendulum from the top (flipped up/inverted).
30 | - The task is to hold the pendulum upright.
31 | - Episode ends once the alpha angle is greater the 20 degrees or theta angle is greater than 90 degrees.
32 | - Reward is a function of the angles theta (arm angle) and alpha (pendulum), and the alpha angular velocity.
33 | - Encourages the the arm to stay centered, the pendulum to stay upright, and to stay stationary.
34 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.DS_Store
2 | .idea/
3 |
4 | # Byte-compiled / optimized / DLL files
5 | __pycache__/
6 | *.py[cod]
7 | *$py.class
8 | *.pyc
9 |
10 | # C extensions
11 | *.so
12 |
13 | # Distribution / packaging
14 | .Python
15 | env/
16 | build/
17 | develop-eggs/
18 | dist/
19 | downloads/
20 | eggs/
21 | .eggs/
22 | lib/
23 | lib64/
24 | parts/
25 | sdist/
26 | var/
27 | wheels/
28 | *.egg-info/
29 | .installed.cfg
30 | *.egg
31 |
32 | # PyInstaller
33 | # Usually these files are written by a python script from a template
34 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
35 | *.manifest
36 | *.spec
37 |
38 | # Installer logs
39 | pip-log.txt
40 | pip-delete-this-directory.txt
41 |
42 | # Unit test / coverage reports
43 | htmlcov/
44 | .tox/
45 | .coverage
46 | .coverage.*
47 | .cache
48 | nosetests.xml
49 | coverage.xml
50 | *.cover
51 | .hypothesis/
52 |
53 | # Translations
54 | *.mo
55 | *.pot
56 |
57 | # Django stuff:
58 | *.log
59 | local_settings.py
60 |
61 | # Flask stuff:
62 | instance/
63 | .webassets-cache
64 |
65 | # Scrapy stuff:
66 | .scrapy
67 |
68 | # Sphinx documentation
69 | docs/_build/
70 |
71 | # PyBuilder
72 | target/
73 |
74 | # Jupyter Notebook
75 | .ipynb_checkpoints
76 |
77 | # pyenv
78 | .python-version
79 |
80 | # celery beat schedule file
81 | celerybeat-schedule
82 |
83 | # SageMath parsed files
84 | *.sage.py
85 |
86 | # dotenv
87 | .env
88 |
89 | # virtualenv
90 | .venv
91 | venv/
92 | ENV/
93 |
94 | # Spyder project settings
95 | .spyderproject
96 | .spyproject
97 |
98 | # Rope project settings
99 | .ropeproject
100 |
101 | # mkdocs documentation
102 | /site
103 |
104 | # mypy
105 | .mypy_cache/
106 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import os
2 | from setuptools import setup, Extension
3 | from setuptools.command.build_ext import build_ext as _build_ext
4 |
5 |
6 | class build_ext(_build_ext):
7 | def finalize_options(self):
8 | _build_ext.finalize_options(self)
9 | # Prevent numpy from thinking it is still in its setup process:
10 | __builtins__.__NUMPY_SETUP__ = False
11 | import numpy
12 |
13 | self.include_dirs.append(numpy.get_include())
14 |
15 |
16 | extensions = [
17 | Extension(
18 | "gym_brt.quanser.quanser_wrapper.quanser_wrapper",
19 | ["gym_brt/quanser/quanser_wrapper/quanser_wrapper.pyx"],
20 | include_dirs=["/opt/quanser/hil_sdk/include"],
21 | libraries=[
22 | "hil",
23 | "quanser_runtime",
24 | "quanser_common",
25 | "rt",
26 | "pthread",
27 | "dl",
28 | "m",
29 | "c",
30 | ],
31 | library_dirs=["/opt/quanser/hil_sdk/lib"],
32 | )
33 | ]
34 |
35 | # If Cython is installed build from source otherwise use the precompiled version
36 | try:
37 | from Cython.Build import cythonize
38 |
39 | extensions = cythonize(extensions, compiler_directives={"language_level" : "3"})
40 | except ImportError:
41 | pass
42 |
43 |
44 | # Hacky way to check if the HIL SDK is installed (allows to run on Mac OS)
45 | is_hil_sdk_installed = False
46 | if os.path.isdir("/opt/quanser/hil_sdk/lib"):
47 | is_hil_sdk_installed = True
48 |
49 | setup(
50 | name="gym_brt",
51 | version=0.1,
52 | cmdclass={"build_ext": build_ext} if is_hil_sdk_installed else {},
53 | install_requires=["numpy", "gym", "scipy", "vpython"],
54 | setup_requires=["numpy"],
55 | ext_modules=extensions if is_hil_sdk_installed else None,
56 | description="Blue River's OpenAI Gym wrapper around Quanser hardware.",
57 | url="https://github.com/BlueRiverTech/quanser-openai-driver/",
58 | author="Blue River Technology",
59 | license="MIT",
60 | )
61 |
--------------------------------------------------------------------------------
/gym_brt/envs/qube_rotor_env.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import print_function
3 | from __future__ import division
4 |
5 | import numpy as np
6 | from gym import spaces
7 | from gym_brt.envs.qube_base_env import QubeBaseEnv
8 |
9 |
10 | class QubeRotorEnv(QubeBaseEnv):
11 | def _reward(self):
12 | # Few options for reward:
13 | # - high reward for large alpha_dot and small theta
14 | # - reward for matching the action of the RPM controller
15 | # - reward for how close the pendulum matches a clock hand going at a certain RPM
16 |
17 | theta_dist = (1 - np.abs(self._target_angle - self._theta)) / np.pi
18 | return np.clip(theta_dist * self._alpha_dot / 20, 0, 20)
19 |
20 | def _isdone(self):
21 | done = False
22 | done |= self._episode_steps >= self._max_episode_steps
23 | done |= abs(self._theta) > (90 * np.pi / 180)
24 | return done
25 |
26 | def reset(self):
27 | super(QubeRotorEnv, self).reset()
28 | state = self._reset_down()
29 | return state
30 |
31 |
32 | class QubeRotorFollowEnv(QubeRotorEnv):
33 | def __init__(self, **kwargs):
34 | super(QubeRotorFollowEnv, self).__init__(**kwargs)
35 | obs_max = np.asarray(
36 | [np.pi / 2, np.pi, np.inf, np.inf, 80 * (np.pi / 180)], dtype=np.float64
37 | )
38 | self.observation_space = spaces.Box(-obs_max, obs_max)
39 |
40 | def _get_state(self):
41 | state = np.array(
42 | [
43 | self._theta,
44 | self._alpha,
45 | self._theta_dot,
46 | self._alpha_dot,
47 | self._target_angle,
48 | ],
49 | dtype=np.float64,
50 | )
51 | return state
52 |
53 | def _next_target_angle(self):
54 | # Update the target angle twice a second on average at random intervals
55 | if np.random.randint(1, self._frequency / 2) == 1:
56 | max_angle = 80 * (np.pi / 180) # 80 degrees
57 | angle = np.random.uniform(-max_angle, max_angle)
58 | else:
59 | angle = self._target_angle
60 | return angle
61 |
--------------------------------------------------------------------------------
/environment.yaml:
--------------------------------------------------------------------------------
1 | name: qube
2 | channels:
3 | - defaults
4 | dependencies:
5 | - _libgcc_mutex=0.1=main
6 | - ca-certificates=2019.8.28=0
7 | - certifi=2019.9.11=py37_0
8 | - libedit=3.1.20181209=hc058e9b_0
9 | - libffi=3.2.1=hd88cf55_4
10 | - libgcc-ng=9.1.0=hdf63c60_0
11 | - libstdcxx-ng=9.1.0=hdf63c60_0
12 | - ncurses=6.1=he6710b0_1
13 | - openssl=1.1.1d=h7b6447c_1
14 | - pip=19.2.3=py37_0
15 | - python=3.7.4=h265db76_1
16 | - readline=7.0=h7b6447c_5
17 | - setuptools=41.2.0=py37_0
18 | - sqlite=3.29.0=h7b6447c_0
19 | - tk=8.6.8=hbc83047_0
20 | - wheel=0.33.6=py37_0
21 | - xz=5.2.4=h14c3975_4
22 | - zlib=1.2.11=h7b6447c_3
23 | - pip:
24 | - asn1crypto==0.24.0
25 | - attrs==19.1.0
26 | - autobahn==19.9.3
27 | - backcall==0.1.0
28 | - bleach==3.1.0
29 | - cffi==1.12.3
30 | - cloudpickle==1.2.2
31 | - cryptography==2.7
32 | - decorator==4.4.0
33 | - defusedxml==0.6.0
34 | - entrypoints==0.3
35 | - future==0.17.1
36 | - gym==0.14.0
37 | - ipykernel==5.1.2
38 | - ipython==7.8.0
39 | - ipython-genutils==0.2.0
40 | - ipywidgets==7.5.1
41 | - jedi==0.15.1
42 | - jinja2==2.10.1
43 | - jsonschema==3.0.2
44 | - jupyter==1.0.0
45 | - jupyter-client==5.3.3
46 | - jupyter-console==6.0.0
47 | - jupyter-core==4.5.0
48 | - markupsafe==1.1.1
49 | - mistune==0.8.4
50 | - nbconvert==5.6.0
51 | - nbformat==4.4.0
52 | - notebook==6.0.1
53 | - numpy==1.17.2
54 | - pandocfilters==1.4.2
55 | - parso==0.5.1
56 | - pexpect==4.7.0
57 | - pickleshare==0.7.5
58 | - prometheus-client==0.7.1
59 | - prompt-toolkit==2.0.9
60 | - ptyprocess==0.6.0
61 | - pycparser==2.19
62 | - pyglet==1.3.2
63 | - pygments==2.4.2
64 | - pyrsistent==0.15.4
65 | - python-dateutil==2.8.0
66 | - pyzmq==18.1.0
67 | - qtconsole==4.5.5
68 | - scipy==1.3.1
69 | - send2trash==1.5.0
70 | - six==1.12.0
71 | - terminado==0.8.2
72 | - testpath==0.4.2
73 | - tornado==6.0.3
74 | - traitlets==4.3.2
75 | - txaio==18.8.1
76 | - vpnotebook==0.1.3
77 | - vpython==7.5.1
78 | - wcwidth==0.1.7
79 | - webencodings==0.5.1
80 | - widgetsnbextension==3.5.1
81 |
--------------------------------------------------------------------------------
/gym_brt/envs/qube_test_env.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import print_function
3 | from __future__ import division
4 |
5 | import numpy as np
6 | from gym import spaces
7 | from gym_brt.envs.qube_base_env import QubeBaseEnv
8 | from gym_brt.envs.qube_swingup_env import QubeSwingupFollowEnv
9 | from gym_brt.envs.qube_balance_env import QubeBalanceFollowEnv
10 | from gym_brt.envs.qube_dampen_env import QubeDampenFollowEnv
11 | from gym_brt.envs.qube_rotor_env import QubeRotorFollowEnv
12 |
13 |
14 | """
15 | A series of testing evironments to visualize what a policy learned in each of
16 | the `Follow` environments.
17 | """
18 |
19 |
20 | class QubeBalanceFollowSineWaveEnv(QubeBalanceFollowEnv):
21 | def _next_target_angle(self):
22 | # Sine wave between -180 to +180 degrees
23 | print(
24 | "T:{:05.2f}, C:{:05.2f}, Diff:{:05.2f}".format(
25 | self._target_angle * (57.3),
26 | 57.3 * self._theta,
27 | 57.3 * (self._target_angle - self._theta),
28 | )
29 | )
30 | return np.pi / 3 * np.sin(self._episode_steps / self._frequency)
31 |
32 |
33 | class QubeSwingupFollowSineWaveEnv(QubeSwingupFollowEnv):
34 | def _next_target_angle(self):
35 | # Sine wave between -180 to +180 degrees
36 | print(
37 | "T:{:05.2f}, C:{:05.2f}, Diff:{:05.2f}".format(
38 | self._target_angle * (57.3),
39 | 57.3 * self._theta,
40 | 57.3 * (self._target_angle - self._theta),
41 | )
42 | )
43 | return np.pi / 3 * np.sin(self._episode_steps / self._frequency)
44 |
45 |
46 | class QubeRotorFollowSineWaveEnv(QubeRotorFollowEnv):
47 | def _next_target_angle(self):
48 | # Sine wave between -180 to +180 degrees
49 | print(
50 | "T:{:05.2f}, C:{:05.2f}, Diff:{:05.2f}".format(
51 | self._target_angle * (57.3),
52 | 57.3 * self._theta,
53 | 57.3 * (self._target_angle - self._theta),
54 | )
55 | )
56 | return np.pi / 3 * np.sin(self._episode_steps / self._frequency)
57 |
58 |
59 | class QubeDampenFollowSineWaveEnv(QubeDampenFollowEnv):
60 | def _next_target_angle(self):
61 | # Sine wave between -180 to +180 degrees
62 | print(
63 | "T:{:05.2f}, C:{:05.2f}, Diff:{:05.2f}".format(
64 | self._target_angle * (57.3),
65 | 57.3 * self._theta,
66 | 57.3 * (self._target_angle - self._theta),
67 | )
68 | )
69 | return np.pi * np.sin(self._episode_steps / self._frequency)
70 |
--------------------------------------------------------------------------------
/gym_brt/envs/qube_dampen_env.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import print_function
3 | from __future__ import division
4 |
5 | import time
6 | import numpy as np
7 | from gym import spaces
8 | from gym_brt.envs.qube_base_env import QubeBaseEnv
9 |
10 |
11 | class QubeDampenEnv(QubeBaseEnv):
12 | def _reward(self):
13 | alpha = self._alpha
14 | alpha = (alpha + 2 * np.pi) % (2 * np.pi) - np.pi
15 | reward = 1 - (
16 | (0.8 * np.abs(alpha) + 0.2 * np.abs(self._target_angle - self._theta))
17 | / np.pi
18 | )
19 | return max(reward, 0) # Clip for the follow env case
20 |
21 | def _isdone(self):
22 | done = False
23 | done |= self._episode_steps >= self._max_episode_steps
24 | done |= abs(self._theta) > (90 * np.pi / 180)
25 | return done
26 |
27 | def reset(self):
28 | super(QubeDampenEnv, self).reset()
29 | state = self._reset_up()
30 | time.sleep(np.random.randint(1, 3000) / 1000) # Sleep between 1-3000ms
31 | return state
32 |
33 |
34 | class QubeDampenSparseEnv(QubeDampenEnv):
35 | def _reward(self):
36 | within_range = True
37 | # Assume -pi < alpha < pi
38 | within_range &= np.abs(self._alpha) > (189 * np.pi / 180)
39 | within_range &= np.abs(self._theta) < (1 * np.pi / 180)
40 | return 1 if within_range else 0
41 |
42 |
43 | class QubeDampenFollowEnv(QubeDampenEnv):
44 | def __init__(self, **kwargs):
45 | super(QubeDampenFollowEnv, self).__init__(**kwargs)
46 | obs_max = np.asarray(
47 | [np.pi / 2, np.pi, np.inf, np.inf, 80 * (np.pi / 180)], dtype=np.float64
48 | )
49 | self.observation_space = spaces.Box(-obs_max, obs_max)
50 |
51 | def _get_state(self):
52 | state = np.array(
53 | [
54 | self._theta,
55 | self._alpha,
56 | self._theta_dot,
57 | self._alpha_dot,
58 | self._target_angle,
59 | ],
60 | dtype=np.float64,
61 | )
62 | return state
63 |
64 | def _next_target_angle(self):
65 | # Update the target angle twice a second on average at random intervals
66 | if np.random.randint(1, self._frequency / 2) == 1:
67 | max_angle = 80 * (np.pi / 180) # 80 degrees
68 | angle = np.random.uniform(-max_angle, max_angle)
69 | else:
70 | angle = self._target_angle
71 | return angle
72 |
73 |
74 | class QubeDampenFollowSparseEnv(QubeDampenFollowEnv):
75 | def _reward(self):
76 | within_range = True
77 | # Assume -pi < alpha < pi
78 | within_range &= np.abs(self._alpha) > (189 * np.pi / 180)
79 | within_range &= np.abs(self._theta) < (1 * np.pi / 180)
80 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Quanser OpenAI Driver
2 | Has an OpenAI Gym wrapper for the Quanser Qube Servo 2 and Quanser Aero
3 |
4 | - [Setup](#setup)
5 | - [Prerequisites](#prerequisites)
6 | - [Installation](#installation)
7 | - [Recompiling Cython](#recompiling-cython-code)
8 | - [Basic Usage](#usage)
9 | - [Warning](#warning)
10 |
11 |
12 | # Setup
13 | We have tested on Ubuntu 16.04 LTS and Ubuntu 18.04 LTS using Python 2.7 and Python 3.6.5
14 |
15 |
16 | ### Prerequisites
17 | Install the HIL SDK from Quanser.
18 | A mirror is available at https://github.com/quanser/hil_sdk_linux_x86_64.
19 |
20 | You can install the driver by:
21 | ```bash
22 | git clone https://github.com/quanser/hil_sdk_linux_x86_64.git
23 | sudo chmod a+x ./hil_sdk_linux_x86_64/setup_hil_sdk ./hil_sdk_linux_x86_64/uninstall_hil_sdk
24 | sudo ./hil_sdk_linux_x86_64/setup_hil_sdk
25 | ```
26 |
27 | You also must have pip installed:
28 | ```bash
29 | sudo apt-get install python3-pip
30 | ```
31 |
32 |
33 | ### Installation
34 | We recommend that you use a virtual environment such as [conda (recommended)](https://conda.io/docs/user-guide/getting-started.html), [virtualenv](https://virtualenv.pypa.io/en/stable/), or [Pipenv](https://pipenv.readthedocs.io/en/latest/)
35 |
36 | You can install the driver by cloning and pip-installing:
37 | ```bash
38 | git clone https://github.com/BlueRiverTech/quanser-openai-driver.git
39 | cd quanser-openai-driver
40 | pip3 install -e .
41 | ```
42 |
43 | Once you have that setup: Run the classical control baseline (ensure the Qube is connected to your computer)
44 | ```bash
45 | python tests/test.py --env QubeSwingupEnv --controller flip
46 | ```
47 |
48 |
49 | # Usage
50 | Usage is very similar to most OpenAI gym environments but **requires** that you close the environment when finished.
51 | Without safely closing the Env, bad things may happen. Usually you will not be able to reopen the board.
52 |
53 | This can be done with a context manager using a `with` statement
54 | ```python
55 | import gym
56 | from gym_brt import QubeSwingupEnv
57 |
58 | num_episodes = 10
59 | num_steps = 250
60 |
61 | with QubeSwingupEnv() as env:
62 | for episode in range(num_episodes):
63 | state = env.reset()
64 | for step in range(num_steps):
65 | action = env.action_space.sample()
66 | state, reward, done, _ = env.step(action)
67 | ```
68 |
69 | Or can be closed manually by using `env.close()`. You can see an [example here](docs/alternatives.md#usage).
70 |
71 |
72 | # Environments
73 | Information about various environments can be found in [docs/envs](docs/envs.md) and our [whitepaper](https://arxiv.org/abs/2001.02254).
74 |
75 |
76 | # Control
77 | Information about baselines can be found in [docs/control](docs/control.md).
78 |
79 |
80 | # Hardware Wrapper
81 | Information about the Python wrapper for Quanser hardware and Qube Servo 2 simulator can be found in [docs/quanser](docs/quanser.md) and our [whitepaper](https://arxiv.org/abs/2001.02254).
82 |
83 |
84 | # Citing
85 | If you use this in your research please cite the following [whitepaper](https://arxiv.org/abs/2001.02254):
86 |
87 | ```
88 | @misc{2001.02254,
89 | author = {{Polzounov}, Kirill and {Sundar}, Ramitha and {Redden}, Lee},
90 | title = "{Blue River Controls: A toolkit for Reinforcement Learning Control Systems on Hardware}",
91 | year = {2019},
92 | eprint = {arXiv:2001.02254},
93 | howpublished = {Accepted at the Workshop on Deep Reinforcement Learning at the 33rd Conference on Neural Information Processing Systems (NeurIPS 2019), Vancouver, Canada.}
94 | }
95 | ```
96 |
--------------------------------------------------------------------------------
/gym_brt/envs/qube_swingup_env.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import print_function
3 | from __future__ import division
4 |
5 | import numpy as np
6 | from gym import spaces
7 | from gym_brt.envs.qube_base_env import QubeBaseEnv
8 |
9 | """
10 | Description:
11 | A pendulum is attached to an un-actuated joint to a horizontal arm,
12 | which is actuated by a rotary motor. The pendulum begins
13 | downwards and the goal is flip the pendulum up and then to keep it from
14 | falling by applying a voltage on the motor which causes a torque on the
15 | horizontal arm.
16 |
17 | Source:
18 | This is modified for the Quanser Qube Servo2-USB from the Cart Pole
19 | problem described by Barto, Sutton, and Anderson, and implemented in
20 | OpenAI Gym: https://github.com/openai/gym/blob/master/gym/envs/classic_control/cartpole.py
21 | This description is also modified from the description by the OpenAI
22 | team.
23 |
24 | Observation:
25 | Type: Box(4)
26 | Num Observation Min Max
27 | 0 Rotary arm angle (theta) -90 deg 90 deg
28 | 1 Pendulum angle (alpha) -180 deg 180 deg
29 | 2 Cart Velocity -Inf Inf
30 | 3 Pole Velocity -Inf Inf
31 | Note: the velocities are limited by the physical system.
32 |
33 | Actions:
34 | Type: Real number (1-D Continuous) (voltage applied to motor)
35 |
36 | Reward:
37 | r(s_t, a_t) = 1 - (0.8 * abs(alpha) + 0.2 * abs(theta)) / pi
38 |
39 | Starting State:
40 | Theta = 0 + noise, alpha = pi + noise
41 |
42 | Episode Termination:
43 | When theta is greater than ±90° or after 2048 steps
44 | """
45 |
46 |
47 | class QubeSwingupEnv(QubeBaseEnv):
48 | def _reward(self):
49 | reward = 1 - (
50 | (0.8 * np.abs(self._alpha) + 0.2 * np.abs(self._target_angle - self._theta))
51 | / np.pi
52 | )
53 | return max(reward, 0) # Clip for the follow env case
54 |
55 | def _isdone(self):
56 | done = False
57 | done |= self._episode_steps >= self._max_episode_steps
58 | done |= abs(self._theta) > (90 * np.pi / 180)
59 | return done
60 |
61 | def reset(self):
62 | super(QubeSwingupEnv, self).reset()
63 | state = self._reset_down()
64 | return state
65 |
66 |
67 | class QubeSwingupSparseEnv(QubeSwingupEnv):
68 | def _reward(self):
69 | within_range = True
70 | within_range &= np.abs(self._alpha) < (1 * np.pi / 180)
71 | within_range &= np.abs(self._theta) < (1 * np.pi / 180)
72 | return 1 if within_range else 0
73 |
74 |
75 | class QubeSwingupFollowEnv(QubeSwingupEnv):
76 | def __init__(self, **kwargs):
77 | super(QubeSwingupFollowEnv, self).__init__(**kwargs)
78 | obs_max = np.asarray(
79 | [np.pi / 2, np.pi, np.inf, np.inf, 80 * (np.pi / 180)], dtype=np.float64
80 | )
81 | self.observation_space = spaces.Box(-obs_max, obs_max)
82 |
83 | def _get_state(self):
84 | state = np.array(
85 | [
86 | self._theta,
87 | self._alpha,
88 | self._theta_dot,
89 | self._alpha_dot,
90 | self._target_angle,
91 | ],
92 | dtype=np.float64,
93 | )
94 | return state
95 |
96 | def _next_target_angle(self):
97 | # Update the target angle twice a second on average at random intervals
98 | if np.random.randint(1, self._frequency / 2) == 1:
99 | max_angle = 80 * (np.pi / 180) # 80 degrees
100 | angle = np.random.uniform(-max_angle, max_angle)
101 | else:
102 | angle = self._target_angle
103 | return angle
104 |
105 |
106 | class QubeSwingupFollowSparseEnv(QubeSwingupFollowEnv):
107 | def _reward(self):
108 | within_range = True
109 | within_range &= np.abs(self._alpha) < (1 * np.pi / 180)
110 | within_range &= np.abs(self._theta) < (1 * np.pi / 180)
111 | return 1 if within_range else 0
112 |
--------------------------------------------------------------------------------
/gym_brt/envs/qube_balance_env.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import print_function
3 | from __future__ import division
4 |
5 | import numpy as np
6 | from gym import spaces
7 | from gym_brt.envs.qube_base_env import QubeBaseEnv
8 |
9 |
10 | """
11 | Description:
12 | A pendulum is attached to an un-actuated joint to a horizontal arm,
13 | which is actuated by a rotary motor. The pendulum begins
14 | upright/inverted and the goal is to keep it from falling by applying a
15 | voltage on the motor which causes a torque on the horizontal arm.
16 |
17 | Source:
18 | This is modified for the Quanser Qube Servo2-USB from the Cart Pole
19 | problem described by Barto, Sutton, and Anderson, and implemented in
20 | OpenAI Gym: https://github.com/openai/gym/blob/master/gym/envs/classic_control/cartpole.py
21 | This description is also modified from the description by the OpenAI
22 | team.
23 |
24 | Observation:
25 | Type: Box(4)
26 | Num Observation Min Max
27 | 0 Rotary arm angle (theta) -90 deg 90 deg
28 | 1 Pendulum angle (alpha) -20 deg 20 deg
29 | 2 Cart Velocity -Inf Inf
30 | 3 Pole Velocity -Inf Inf
31 | Note: the velocities are limited by the physical system.
32 |
33 | Actions:
34 | Type: Real number (1-D Continuous) (voltage applied to motor)
35 |
36 | Reward:
37 | r(s_t, a_t) = 1 - (0.8 * abs(alpha) + 0.2 * abs(theta)) / pi
38 |
39 | Starting State:
40 | Theta = 0 + noise, alpha = 0 + noise
41 |
42 | Episode Termination:
43 | Alpha is greater than ±20° from upright, theta is greater than ±90°, or
44 | after 2048 steps
45 | """
46 |
47 |
48 | class QubeBalanceEnv(QubeBaseEnv):
49 | def _reward(self):
50 | reward = 1 - (
51 | (0.8 * np.abs(self._alpha) + 0.2 * np.abs(self._target_angle - self._theta))
52 | / np.pi
53 | )
54 | return max(reward, 0) # Clip for the follow env case
55 |
56 | def _isdone(self):
57 | done = False
58 | done |= self._episode_steps >= self._max_episode_steps
59 | done |= abs(self._theta) > (90 * np.pi / 180)
60 | done |= abs(self._alpha) > (20 * np.pi / 180)
61 | return done
62 |
63 | def reset(self):
64 | super(QubeBalanceEnv, self).reset()
65 | # Start the pendulum stationary at the top (stable point)
66 | state = self._reset_up()
67 | return state
68 |
69 |
70 | class QubeBalanceSparseEnv(QubeBalanceEnv):
71 | def _reward(self):
72 | within_range = True
73 | within_range &= np.abs(self._alpha) < (1 * np.pi / 180)
74 | within_range &= np.abs(self._theta) < (1 * np.pi / 180)
75 | return 1 if within_range else 0
76 |
77 |
78 | class QubeBalanceFollowEnv(QubeBalanceEnv):
79 | def __init__(self, **kwargs):
80 | super(QubeBalanceFollowEnv, self).__init__(**kwargs)
81 | obs_max = np.asarray(
82 | [np.pi / 2, np.pi, np.inf, np.inf, 80 * (np.pi / 180)], dtype=np.float64
83 | )
84 | self.observation_space = spaces.Box(-obs_max, obs_max)
85 |
86 | def _get_state(self):
87 | state = np.array(
88 | [
89 | self._theta,
90 | self._alpha,
91 | self._theta_dot,
92 | self._alpha_dot,
93 | self._target_angle,
94 | ],
95 | dtype=np.float64,
96 | )
97 | return state
98 |
99 | def _next_target_angle(self):
100 | # Update the target angle twice a second on average at random intervals
101 | if np.random.randint(1, self._frequency / 2) == 1:
102 | max_angle = 80 * (np.pi / 180) # 80 degrees
103 | angle = np.random.uniform(-max_angle, max_angle)
104 | else:
105 | angle = self._target_angle
106 | return angle
107 |
108 |
109 | class QubeBalanceFollowSparseEnv(QubeBalanceFollowEnv):
110 | def _reward(self):
111 | within_range = True
112 | within_range &= np.abs(self._alpha) < (1 * np.pi / 180)
113 | within_range &= np.abs(self._theta) < (1 * np.pi / 180)
114 | return 1 if within_range else 0
115 |
--------------------------------------------------------------------------------
/gym_brt/quanser/qube_simulator.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import print_function
3 | from __future__ import division
4 |
5 | from scipy.integrate import odeint
6 | import numpy as np
7 | import math
8 |
9 |
10 | # Motor
11 | Rm = 8.4 # Resistance
12 | kt = 0.042 # Current-torque (N-m/A)
13 | km = 0.042 # Back-emf constant (V-s/rad)
14 |
15 | # Rotary Arm
16 | mr = 0.095 # Mass (kg)
17 | Lr = 0.085 # Total length (m)
18 | Jr = mr * Lr ** 2 / 12 # Moment of inertia about pivot (kg-m^2)
19 | Dr = 0.00027 # Equivalent viscous damping coefficient (N-m-s/rad)
20 |
21 | # Pendulum Link
22 | mp = 0.024 # Mass (kg)
23 | Lp = 0.129 # Total length (m)
24 | Jp = mp * Lp ** 2 / 12 # Moment of inertia about pivot (kg-m^2)
25 | Dp = 0.00005 # Equivalent viscous damping coefficient (N-m-s/rad)
26 |
27 | g = 9.81 # Gravity constant
28 |
29 |
30 | def diff_forward_model_ode(state, t, action, dt):
31 | theta, alpha, theta_dot, alpha_dot = state
32 | Vm = action
33 | tau = -(km * (Vm - km * theta_dot)) / Rm # torque
34 |
35 | # fmt: off
36 | # From Rotary Pendulum Workbook
37 | theta_dot_dot = (-Lp*Lr*mp*(-8.0*Dp*alpha_dot + Lp**2*mp*theta_dot**2*np.sin(2.0*alpha) + 4.0*Lp*g*mp*np.sin(alpha))*np.cos(alpha) + (4.0*Jp + Lp**2*mp)*(4.0*Dr*theta_dot + Lp**2*alpha_dot*mp*theta_dot*np.sin(2.0*alpha) + 2.0*Lp*Lr*alpha_dot**2*mp*np.sin(alpha) - 4.0*tau))/(4.0*Lp**2*Lr**2*mp**2*np.cos(alpha)**2 - (4.0*Jp + Lp**2*mp)*(4.0*Jr + Lp**2*mp*np.sin(alpha)**2 + 4.0*Lr**2*mp))
38 | alpha_dot_dot = (2.0*Lp*Lr*mp*(4.0*Dr*theta_dot + Lp**2*alpha_dot*mp*theta_dot*np.sin(2.0*alpha) + 2.0*Lp*Lr*alpha_dot**2*mp*np.sin(alpha) - 4.0*tau)*np.cos(alpha) - 0.5*(4.0*Jr + Lp**2*mp*np.sin(alpha)**2 + 4.0*Lr**2*mp)*(-8.0*Dp*alpha_dot + Lp**2*mp*theta_dot**2*np.sin(2.0*alpha) + 4.0*Lp*g*mp*np.sin(alpha)))/(4.0*Lp**2*Lr**2*mp**2*np.cos(alpha)**2 - (4.0*Jp + Lp**2*mp)*(4.0*Jr + Lp**2*mp*np.sin(alpha)**2 + 4.0*Lr**2*mp))
39 | # fmt: on
40 |
41 | diff_state = np.array([theta_dot, alpha_dot, theta_dot_dot, alpha_dot_dot]).reshape(
42 | (4,)
43 | )
44 | diff_state = np.array(diff_state, dtype="float64")
45 | return diff_state
46 |
47 |
48 | def forward_model_ode(theta, alpha, theta_dot, alpha_dot, Vm, dt, integration_steps):
49 | t = np.linspace(0.0, dt, 2) # TODO: add and check integration steps here
50 |
51 | state = np.array([theta, alpha, theta_dot, alpha_dot])
52 | next_state = np.array(odeint(diff_forward_model_ode, state, t, args=(Vm, dt)))[1, :]
53 | theta, alpha, theta_dot, alpha_dot = next_state
54 |
55 | theta = ((theta + np.pi) % (2 * np.pi)) - np.pi
56 | alpha = ((alpha + np.pi) % (2 * np.pi)) - np.pi
57 |
58 | return theta, alpha, theta_dot, alpha_dot
59 |
60 |
61 | def forward_model_euler(theta, alpha, theta_dot, alpha_dot, Vm, dt, integration_steps):
62 | dt /= integration_steps
63 | for step in range(integration_steps):
64 | tau = -(km * (Vm - km * theta_dot)) / Rm # torque
65 |
66 | # fmt: off
67 | # From Rotary Pendulum Workbook
68 | theta_dot_dot = float((-Lp*Lr*mp*(-8.0*Dp*alpha_dot + Lp**2*mp*theta_dot**2*np.sin(2.0*alpha) + 4.0*Lp*g*mp*np.sin(alpha))*np.cos(alpha) + (4.0*Jp + Lp**2*mp)*(4.0*Dr*theta_dot + Lp**2*alpha_dot*mp*theta_dot*np.sin(2.0*alpha) + 2.0*Lp*Lr*alpha_dot**2*mp*np.sin(alpha) - 4.0*tau))/(4.0*Lp**2*Lr**2*mp**2*np.cos(alpha)**2 - (4.0*Jp + Lp**2*mp)*(4.0*Jr + Lp**2*mp*np.sin(alpha)**2 + 4.0*Lr**2*mp)))
69 | alpha_dot_dot = float((2.0*Lp*Lr*mp*(4.0*Dr*theta_dot + Lp**2*alpha_dot*mp*theta_dot*np.sin(2.0*alpha) + 2.0*Lp*Lr*alpha_dot**2*mp*np.sin(alpha) - 4.0*tau)*np.cos(alpha) - 0.5*(4.0*Jr + Lp**2*mp*np.sin(alpha)**2 + 4.0*Lr**2*mp)*(-8.0*Dp*alpha_dot + Lp**2*mp*theta_dot**2*np.sin(2.0*alpha) + 4.0*Lp*g*mp*np.sin(alpha)))/(4.0*Lp**2*Lr**2*mp**2*np.cos(alpha)**2 - (4.0*Jp + Lp**2*mp)*(4.0*Jr + Lp**2*mp*np.sin(alpha)**2 + 4.0*Lr**2*mp)))
70 | # fmt: on
71 |
72 | if True: # semi-implicit euler (more accurate)
73 | theta_dot += theta_dot_dot * dt
74 | alpha_dot += alpha_dot_dot * dt
75 | theta += theta_dot * dt
76 | alpha += alpha_dot * dt
77 | else: # classic euler
78 | theta += theta_dot * dt
79 | alpha += alpha_dot * dt
80 | theta_dot += theta_dot_dot * dt
81 | alpha_dot += alpha_dot_dot * dt
82 |
83 | theta = ((theta + np.pi) % (2 * np.pi)) - np.pi
84 | alpha = ((alpha + np.pi) % (2 * np.pi)) - np.pi
85 |
86 | return theta, alpha, theta_dot, alpha_dot
87 |
--------------------------------------------------------------------------------
/tests/lqr.py:
--------------------------------------------------------------------------------
1 | try:
2 | import autograd.numpy as np
3 | from autograd import grad, jacobian
4 | except:
5 | raise ImportError("Please install autograd.")
6 | try:
7 | import scipy.linalg as sp_linalg
8 | except:
9 | raise ImportError("Please install scipy.")
10 |
11 |
12 | # Motor
13 | Rm = 8.4 # Resistance
14 | kt = 0.042 # Current-torque (N-m/A)
15 | km = 0.042 # Back-emf constant (V-s/rad)
16 |
17 | # Rotary Arm
18 | mr = 0.095 # Mass (kg)
19 | Lr = 0.085 # Total length (m)
20 | Jr = mr * Lr ** 2 / 12 # Moment of inertia about pivot (kg-m^2)
21 | Br = Dr = 0.0015 # Equivalent viscous damping coefficient (N-m-s/rad)
22 |
23 | # Pendulum Link
24 | mp = 0.024 # Mass (kg)
25 | Lp = 0.129 # Total length (m)
26 | Jp = mp * Lp ** 2 / 12 # Moment of inertia about pivot (kg-m^2)
27 | Dp = Bp = 0.0005 # Equivalent viscous damping coefficient (N-m-s/rad)
28 |
29 | g = 9.81 # Gravity constant
30 |
31 |
32 | def forward_model(state, action, dt=1 / 300.0):
33 | theta = state[0]
34 | alpha = state[1]
35 | theta_dot = state[2]
36 | alpha_dot = state[3]
37 | Vm = action
38 |
39 | tau = (km * (Vm - km * theta_dot)) / Rm # torque
40 |
41 | # fmt: off
42 | alpha_dot_dot = (2.0*Lp*Lr*mp*(4.0*Dr*theta_dot + Lp**2*alpha_dot*mp*theta_dot*np.sin(2.0*alpha) + 2.0*Lp*Lr*alpha_dot**2*mp*np.sin(alpha) - 4.0*(tau))*np.cos(alpha) - 0.5*(4.0*Jr + Lp**2*mp*np.sin(alpha)**2 + 4.0*Lr**2*mp)*(-8.0*Dp*alpha_dot + Lp**2*mp*theta_dot**2*np.sin(2.0*alpha) + 4.0*Lp*g*mp*np.sin(alpha)))/(4.0*Lp**2*Lr**2*mp**2*np.cos(alpha)**2 - (4.0*Jp + Lp**2*mp)*(4.0*Jr + Lp**2*mp*np.sin(alpha)**2 + 4.0*Lr**2*mp))
43 | theta_dot_dot = (-Lp*Lr*mp*(-8.0*Dp*alpha_dot + Lp**2*mp*theta_dot**2*np.sin(2.0*alpha) + 4.0*Lp*g*mp*np.sin(alpha))*np.cos(alpha) + (4.0*Jp + Lp**2*mp)*(4.0*Dr*theta_dot + Lp**2*alpha_dot*mp*theta_dot*np.sin(2.0*alpha) + 2.0*Lp*Lr*alpha_dot**2*mp*np.sin(alpha) - 4.0*(tau)))/(4.0*Lp**2*Lr**2*mp**2*np.cos(alpha)**2 - (4.0*Jp + Lp**2*mp)*(4.0*Jr + Lp**2*mp*np.sin(alpha)**2 + 4.0*Lr**2*mp))
44 | # fmt: on
45 |
46 | # Works around a single operating point
47 | theta += theta_dot * dt # TODO: Shouldn't the angles be ahead of the velocities?
48 | alpha += alpha_dot * dt
49 | theta_dot += theta_dot_dot * dt
50 | alpha_dot += alpha_dot_dot * dt
51 |
52 | theta %= 2 * np.pi
53 | alpha %= 2 * np.pi
54 |
55 | # For continuous version of LQR
56 | # state = np.array([theta_dot,alpha_dot,theta_dot_dot,alpha_dot_dot]).reshape((4,))
57 |
58 | # For discrete version of LQR
59 | state = np.array([theta, alpha, theta_dot, alpha_dot]).reshape((4,))
60 | return state
61 |
62 |
63 | def computeAB(current_state, current_control):
64 | # Linearizing Dynamics
65 | forward_dynamics_model = lambda state, action: forward_model(state, action)
66 | a_mat = jacobian(forward_dynamics_model, 0)
67 | b_mat = jacobian(forward_dynamics_model, 1)
68 | A = a_mat(current_state, current_control)
69 | B = b_mat(current_state, current_control)
70 |
71 | # Correct continuous time linearization from Quanser Workbook -
72 | # A = np.array( [[0,0,1.0000,0],[0,0,0 ,1.0000], [0,149.2751,-0.0104,0],[0,261.6091,-0.0103,0]]).reshape((4,4))
73 | # B = np.array([ 0,0,49.7275,49.1493]).reshape((4,1))
74 |
75 | return A, B
76 |
77 |
78 | def LQR_control():
79 | # Cost matrices for LQR
80 | Q = np.diag(np.array([1, 1, 1, 1])) # state_dimension = 4
81 | R = np.eye(1) # control_dimension = 1
82 |
83 | A, B = computeAB(np.array([0.0, 0.0, 0.0, 0.0]), np.array([0.0]))
84 |
85 | # Use if discrete forward dynamics is used
86 | X = sp_linalg.solve_discrete_are(A, B, Q, R)
87 | K = np.dot(np.linalg.pinv(R + np.dot(B.T, np.dot(X, B))), np.dot(B.T, np.dot(X, A)))
88 |
89 | # Use for continuous version of LQR
90 | # X = sp_linalg.solve_continuous_are(A, B, Q, R)
91 | # K = np.dot(np.linalg.pinv(R), np.dot(B.T, X))
92 | return np.squeeze(K, 0)
93 |
94 |
95 | def main():
96 | """
97 | K obtained from dicrete dynamics + discrete LQR and continuous dynamics + continuous LQR should approximately match
98 | quanser workbook and more importantly achieve balance on the Qube Hardware
99 | """
100 | K_real = [-2.0, 35.0, -1.5, 3.0] # Correct K from quanser workbook
101 | K_calc = LQR_control().tolist()
102 | print("The two following should be close to each other")
103 | print("\tThe gains from Quanser are:", K_real)
104 | print("\tThe calculated gains are: ", K_calc)
105 |
106 |
107 | if __name__ == "__main__":
108 | main()
109 |
--------------------------------------------------------------------------------
/gym_brt/envs/rendering/qube_render.py:
--------------------------------------------------------------------------------
1 | try:
2 | # If you use a device such as a headless raspberry pi this may not work
3 | from gym.envs.classic_control import rendering
4 | except:
5 | print("Warning: Can not import classic_control rendering in qube_render.py")
6 |
7 | import vpython as vp
8 | import numpy as np
9 |
10 |
11 | class QubeRendererVypthon:
12 | def __init__(self, theta, alpha, frequency):
13 | self.frequency = frequency
14 | vp.scene.width, vp.scene.height = 1000, 600
15 | vp.scene.range = 0.25
16 | vp.scene.title = "QubeServo2-USB rotary pendulum"
17 |
18 | # Dimensions of the rotary pendulum parts
19 | base_w, base_h, base_d = 0.102, 0.101, 0.102 # width, height, & len of base
20 | rotor_d, rotor_h = 0.0202, 0.01 # height, diameter of the rotor platform
21 | rotary_top_l, rotary_top_d = 0.032, 0.012 # height, diameter of the rotary top
22 | self.arm_l, arm_d = 0.085, 0.00325 # height, diameter of the arm
23 | self.pen_l, self.pen_d = 0.129, 0.00475 # height, diameter of the pendulum
24 |
25 | # Origin of parts
26 | arm_origin = vp.vec(0, 0, 0)
27 | self.rotary_top_origin = vp.vec(0, 0, -rotary_top_l / 2)
28 | rotor_origin = arm_origin - vp.vec(0, rotor_h + rotary_top_d / 2 - 0.0035, 0)
29 | base_origin = rotor_origin - vp.vec(0, base_h / 2, 0)
30 |
31 | # Create the part objects
32 | base = vp.box(
33 | pos=base_origin,
34 | size=vp.vec(base_w, base_h, base_d),
35 | color=vp.vec(0.45, 0.45, 0.45),
36 | )
37 | rotor = vp.cylinder(
38 | pos=rotor_origin,
39 | axis=vp.vec(0, 1, 0),
40 | size=vp.vec(rotor_h, rotor_d, rotor_d),
41 | color=vp.color.yellow,
42 | )
43 | self._rotary_top = vp.cylinder(
44 | pos=self.rotary_top_origin,
45 | axis=vp.vec(0, 0, 1),
46 | size=vp.vec(rotary_top_l, rotary_top_d, rotary_top_d),
47 | color=vp.color.red,
48 | )
49 | self._arm = vp.cylinder(
50 | pos=arm_origin,
51 | axis=vp.vec(0, 0, 1),
52 | size=vp.vec(self.arm_l, arm_d, arm_d),
53 | color=vp.vec(0.7, 0.7, 0.7),
54 | )
55 | self._pendulum = vp.cylinder(
56 | pos=self.pendulum_origin(theta),
57 | axis=vp.vec(0, 1, 0),
58 | size=vp.vec(self.pen_l, self.pen_d, self.pen_d),
59 | color=vp.color.red,
60 | )
61 |
62 | # Rotate parts to their init orientations
63 | self._rotary_top.rotate(angle=theta, axis=vp.vec(0, 1, 0), origin=arm_origin)
64 | self._arm.rotate(angle=theta, axis=vp.vec(0, 1, 0), origin=arm_origin)
65 | self._pendulum.rotate(
66 | angle=alpha,
67 | axis=self.pendulum_axis(theta),
68 | origin=self.pendulum_origin(theta),
69 | )
70 | self.theta, self.alpha = theta, alpha
71 |
72 | def pendulum_origin(self, theta):
73 | x = self.arm_l * np.sin(theta)
74 | y = 0
75 | z = self.arm_l * np.cos(theta)
76 | return vp.vec(x, y, z)
77 |
78 | def pendulum_axis(self, theta):
79 | x = np.sin(theta)
80 | y = 0
81 | z = np.cos(theta)
82 | return vp.vec(x, y, z)
83 |
84 | def render(self, theta, alpha):
85 | dtheta = theta - self.theta
86 | self.theta = theta
87 |
88 | self._arm.rotate(angle=dtheta, axis=vp.vec(0, 1, 0), origin=vp.vec(0, 0, 0))
89 | self._rotary_top.rotate(
90 | angle=dtheta, axis=vp.vec(0, 1, 0), origin=vp.vec(0, 0, 0)
91 | )
92 | self._pendulum.origin = self.pendulum_origin(theta)
93 | self._pendulum.pos = self.pendulum_origin(theta)
94 | self._pendulum.axis = vp.vec(0, 1, 0)
95 | self._pendulum.size = vp.vec(self.pen_l, self.pen_d, self.pen_d)
96 | self._pendulum.rotate(
97 | angle=alpha,
98 | axis=self.pendulum_axis(theta),
99 | origin=self.pendulum_origin(theta),
100 | )
101 | vp.rate(self.frequency)
102 |
103 | def close(self, *args, **kwargs):
104 | pass
105 |
106 |
107 | class QubeRenderer2D:
108 | def __init__(self, theta, alpha, frequency):
109 | width, height = (640, 240)
110 | self._viewer = rendering.Viewer(width, height)
111 | l, r, t, b = (2, -2, 0, 100)
112 | theta_poly = rendering.make_polygon([(l, b), (l, t), (r, t), (r, b)])
113 | l, r, t, b = (2, -2, 0, 100)
114 | alpha_poly = rendering.make_polygon([(l, b), (l, t), (r, t), (r, b)])
115 | theta_circle = rendering.make_circle(radius=100, res=64, filled=False)
116 | theta_circle.set_color(0.5, 0.5, 0.5) # Theta is grey
117 | alpha_circle = rendering.make_circle(radius=100, res=64, filled=False)
118 | alpha_circle.set_color(0.8, 0.0, 0.0) # Alpha is red
119 | theta_origin = (width / 2 - 150, height / 2)
120 | alpha_origin = (width / 2 + 150, height / 2)
121 | self._theta_tx = rendering.Transform(translation=theta_origin)
122 | self._alpha_tx = rendering.Transform(translation=alpha_origin)
123 | theta_poly.add_attr(self._theta_tx)
124 | alpha_poly.add_attr(self._alpha_tx)
125 | theta_circle.add_attr(self._theta_tx)
126 | alpha_circle.add_attr(self._alpha_tx)
127 | self._viewer.add_geom(theta_poly)
128 | self._viewer.add_geom(alpha_poly)
129 | self._viewer.add_geom(theta_circle)
130 | self._viewer.add_geom(alpha_circle)
131 |
132 | def render(self, theta, alpha):
133 | self._theta_tx.set_rotation(self._theta + np.pi)
134 | self._alpha_tx.set_rotation(self._alpha)
135 | return self._viewer.render(return_rgb_array=mode == "rgb_array")
136 |
137 | def close(self):
138 | self._viewer.close()
139 |
--------------------------------------------------------------------------------
/gym_brt/envs/qube_base_env.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import print_function
3 | from __future__ import division
4 |
5 | import gym
6 | import time
7 | import math
8 | import numpy as np
9 |
10 | from gym import spaces
11 | from gym.utils import seeding
12 |
13 | # For other platforms where it's impossible to install the HIL SDK
14 | try:
15 | from gym_brt.quanser import QubeHardware
16 | except ImportError:
17 | print("Warning: Can not import QubeHardware in qube_base_env.py")
18 |
19 | from gym_brt.quanser import QubeSimulator
20 | from gym_brt.envs.rendering import QubeRenderer
21 |
22 |
23 | MAX_MOTOR_VOLTAGE = 3
24 | ACT_MAX = np.asarray([MAX_MOTOR_VOLTAGE], dtype=np.float64)
25 | # OBS_MAX = [theta, alpha, theta_dot, alpha_dot]
26 | OBS_MAX = np.asarray([np.pi / 2, np.pi, np.inf, np.inf], dtype=np.float64)
27 |
28 |
29 | class QubeBaseEnv(gym.Env):
30 | """A base class for all qube-based environments."""
31 |
32 | def __init__(
33 | self,
34 | frequency=250,
35 | batch_size=2048,
36 | use_simulator=False,
37 | encoder_reset_steps=int(1e8),
38 | ):
39 | self.observation_space = spaces.Box(-OBS_MAX, OBS_MAX)
40 | self.action_space = spaces.Box(-ACT_MAX, ACT_MAX)
41 |
42 | self._frequency = frequency
43 | # Ensures that samples in episode are the same as batch size
44 | # Reset every batch_size steps (2048 ~= 8.192 seconds)
45 | self._max_episode_steps = batch_size
46 | self._episode_steps = 0
47 | self._encoder_reset_steps = encoder_reset_steps
48 | self._steps_since_encoder_reset = 0
49 | self._target_angle = 0
50 |
51 | self._theta, self._alpha, self._theta_dot, self._alpha_dot = 0, 0, 0, 0
52 | self._dtheta, self._dalpha = 0, 0
53 |
54 | # Open the Qube
55 | if use_simulator:
56 | # TODO: Check assumption: ODE integration should be ~ once per ms
57 | integration_steps = int(np.ceil(1000 / self._frequency))
58 | self.qube = QubeSimulator(
59 | forward_model="ode",
60 | frequency=self._frequency,
61 | integration_steps=integration_steps,
62 | max_voltage=MAX_MOTOR_VOLTAGE,
63 | )
64 | else:
65 | self.qube = QubeHardware(
66 | frequency=self._frequency, max_voltage=MAX_MOTOR_VOLTAGE
67 | )
68 | self.qube.__enter__()
69 |
70 | self.seed()
71 | self._viewer = None
72 |
73 | def __enter__(self):
74 | return self
75 |
76 | def __exit__(self, type, value, traceback):
77 | self.close(type=type, value=value, traceback=traceback)
78 |
79 | def seed(self, seed=None):
80 | self.np_random, seed = seeding.np_random(seed)
81 | return [seed]
82 |
83 | def _step(self, action):
84 | led = self._led()
85 |
86 | action = np.clip(np.array(action, dtype=np.float64), -ACT_MAX, ACT_MAX)
87 | state = self.qube.step(action, led=led)
88 |
89 | self._dtheta = state[0] - self._theta
90 | self._dalpha = state[1] - self._alpha
91 | self._theta, self._alpha, self._theta_dot, self._alpha_dot = state
92 |
93 | def reset(self):
94 | self._episode_steps = 0
95 | # Occasionaly reset the enocoders to remove sensor drift
96 | if self._steps_since_encoder_reset >= self._encoder_reset_steps:
97 | self.qube.reset_encoders()
98 | self._steps_since_encoder_reset = 0
99 | action = np.zeros(shape=self.action_space.shape, dtype=self.action_space.dtype)
100 | self._step(action)
101 | return self._get_state()
102 |
103 | def _reset_up(self):
104 | self.qube.reset_up()
105 | action = np.zeros(shape=self.action_space.shape, dtype=self.action_space.dtype)
106 | self._step(action)
107 | return self._get_state()
108 |
109 | def _reset_down(self):
110 | self.qube.reset_down()
111 | action = np.zeros(shape=self.action_space.shape, dtype=self.action_space.dtype)
112 | self._step(action)
113 | return self._get_state()
114 |
115 | def _get_state(self):
116 | return np.array(
117 | [self._theta, self._alpha, self._theta_dot, self._alpha_dot],
118 | dtype=np.float64,
119 | )
120 |
121 | def _next_target_angle(self):
122 | return 0
123 |
124 | def _reward(self):
125 | raise NotImplementedError
126 |
127 | def _isdone(self):
128 | raise NotImplementedError
129 |
130 | def _led(self):
131 | if self._isdone(): # Doing reset
132 | led = [1.0, 1.0, 0.0] # Yellow
133 | else:
134 | if abs(self._alpha) > (20 * np.pi / 180):
135 | led = [1.0, 0.0, 0.0] # Red
136 | elif abs(self._theta) > (90 * np.pi / 180):
137 | led = [1.0, 0.0, 0.0] # Red
138 | else:
139 | led = [0.0, 1.0, 0.0] # Green
140 | return led
141 |
142 | def step(self, action):
143 | self._step(action)
144 | state = self._get_state()
145 | reward = self._reward()
146 | done = self._isdone()
147 | info = {
148 | "theta": self._theta,
149 | "alpha": self._alpha,
150 | "theta_dot": self._theta_dot,
151 | "alpha_dot": self._alpha_dot,
152 | }
153 |
154 | self._episode_steps += 1
155 | self._steps_since_encoder_reset += 1
156 | self._target_angle = self._next_target_angle()
157 |
158 | return state, reward, done, info
159 |
160 | def render(self, mode="human"):
161 | if self._viewer is None:
162 | self._viewer = QubeRenderer(self._theta, self._alpha, self._frequency)
163 | self._viewer.render(self._theta, self._alpha)
164 |
165 | def close(self, type=None, value=None, traceback=None):
166 | # Safely close the Qube (important on hardware)
167 | self.qube.close(type=type, value=value, traceback=traceback)
168 | if self._viewer is not None:
169 | self._viewer.close()
170 |
--------------------------------------------------------------------------------
/tests/test.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import print_function
3 | from __future__ import division
4 |
5 | import gym
6 | import time
7 | import argparse
8 | import numpy as np
9 |
10 | from gym_brt.envs import (
11 | QubeSwingupEnv,
12 | QubeSwingupSparseEnv,
13 | QubeSwingupFollowEnv,
14 | QubeSwingupFollowSparseEnv,
15 | QubeBalanceEnv,
16 | QubeBalanceSparseEnv,
17 | QubeBalanceFollowEnv,
18 | QubeBalanceFollowSparseEnv,
19 | QubeDampenEnv,
20 | QubeDampenSparseEnv,
21 | QubeDampenFollowEnv,
22 | QubeDampenFollowSparseEnv,
23 | QubeRotorEnv,
24 | QubeRotorFollowEnv,
25 | QubeBalanceFollowSineWaveEnv,
26 | QubeSwingupFollowSineWaveEnv,
27 | QubeRotorFollowSineWaveEnv,
28 | QubeDampenFollowSineWaveEnv,
29 | )
30 |
31 | from gym_brt.control import (
32 | zero_policy,
33 | constant_policy,
34 | random_policy,
35 | square_wave_policy,
36 | energy_control_policy,
37 | pd_control_policy,
38 | flip_and_hold_policy,
39 | square_wave_flip_and_hold_policy,
40 | dampen_policy,
41 | pd_tracking_control_policy,
42 | )
43 |
44 |
45 | def print_info(state_info, action, reward):
46 | theta = state_info["theta"]
47 | alpha = state_info["alpha"]
48 | theta_dot = state_info["theta_dot"]
49 | alpha_dot = state_info["alpha_dot"]
50 | print(
51 | "State: theta={:06.3f}, alpha={:06.3f}, theta_dot={:06.3f}, alpha_dot={:06.3f}".format(
52 | theta, alpha, theta_dot, alpha_dot
53 | )
54 | )
55 | print("Action={}, Reward={}".format(action, reward))
56 |
57 |
58 | def test_env(
59 | env_name,
60 | policy,
61 | num_episodes=10,
62 | num_steps=250,
63 | frequency=250,
64 | state_keys=None,
65 | verbose=False,
66 | use_simulator=False,
67 | render=False,
68 | ):
69 |
70 | with env_name(use_simulator=use_simulator, frequency=frequency) as env:
71 | for episode in range(num_episodes):
72 | state = env.reset()
73 | state, reward, done, info = env.step(np.array([0], dtype=np.float64))
74 | for step in range(num_steps):
75 | action = policy(state, step=step, frequency=frequency)
76 | state, reward, done, info = env.step(action)
77 | if done:
78 | break
79 | if verbose:
80 | print_info(info, action, reward)
81 | if render:
82 | env.render()
83 |
84 |
85 | def main():
86 | envs = {
87 | "QubeSwingupEnv": QubeSwingupEnv,
88 | "QubeSwingupSparseEnv": QubeSwingupSparseEnv,
89 | "QubeSwingupFollowEnv": QubeSwingupFollowEnv,
90 | "QubeSwingupFollowSparseEnv": QubeSwingupFollowSparseEnv,
91 | "QubeBalanceEnv": QubeBalanceEnv,
92 | "QubeBalanceSparseEnv": QubeBalanceSparseEnv,
93 | "QubeBalanceFollowEnv": QubeBalanceFollowEnv,
94 | "QubeBalanceFollowSparseEnv": QubeBalanceFollowSparseEnv,
95 | "QubeDampenEnv": QubeDampenEnv,
96 | "QubeDampenSparseEnv": QubeDampenSparseEnv,
97 | "QubeDampenFollowEnv": QubeDampenFollowEnv,
98 | "QubeDampenFollowSparseEnv": QubeDampenFollowSparseEnv,
99 | "QubeRotorEnv": QubeRotorEnv,
100 | "QubeRotorFollowEnv": QubeRotorFollowEnv,
101 | "QubeBalanceFollowSineWaveEnv": QubeBalanceFollowSineWaveEnv,
102 | "QubeSwingupFollowSineWaveEnv": QubeSwingupFollowSineWaveEnv,
103 | "QubeRotorFollowSineWaveEnv": QubeRotorFollowSineWaveEnv,
104 | "QubeDampenFollowSineWaveEnv": QubeDampenFollowSineWaveEnv,
105 | }
106 | policies = {
107 | "none": zero_policy,
108 | "zero": zero_policy,
109 | "const": constant_policy,
110 | "rand": random_policy,
111 | "random": random_policy,
112 | "sw": square_wave_policy,
113 | "energy": energy_control_policy,
114 | "pd": pd_control_policy,
115 | "hold": pd_control_policy,
116 | "flip": flip_and_hold_policy,
117 | "sw-hold": square_wave_flip_and_hold_policy,
118 | "damp": dampen_policy,
119 | "track": pd_tracking_control_policy,
120 | }
121 |
122 | # Parse command line args
123 | parser = argparse.ArgumentParser()
124 | parser.add_argument(
125 | "-e",
126 | "--env",
127 | default="QubeSwingupEnv",
128 | choices=list(envs.keys()),
129 | help="Enviroment to run.",
130 | )
131 | parser.add_argument(
132 | "-c",
133 | "--controller",
134 | default="random",
135 | choices=list(policies.keys()),
136 | help="Select what type of action to take.",
137 | )
138 | parser.add_argument(
139 | "-ne",
140 | "--num-episodes",
141 | default="10",
142 | type=int,
143 | help="Number of episodes to run.",
144 | )
145 | parser.add_argument(
146 | "-ns",
147 | "--num-steps",
148 | default="10000",
149 | type=int,
150 | help="Number of step to run per episode.",
151 | )
152 | parser.add_argument(
153 | "-f",
154 | "--frequency",
155 | "--sample-frequency",
156 | default="250",
157 | type=float,
158 | help="The frequency of samples on the Quanser hardware.",
159 | )
160 | parser.add_argument("-v", "--verbose", action="store_true")
161 | parser.add_argument("-r", "--render", action="store_true")
162 | parser.add_argument("-s", "--use_simulator", action="store_true")
163 | args, _ = parser.parse_known_args()
164 |
165 | print("Testing Env: {}".format(args.env))
166 | print("Controller: {}".format(args.controller))
167 | print("{} steps over {} episodes".format(args.num_steps, args.num_episodes))
168 | print("Samples freq: {}".format(args.frequency))
169 | test_env(
170 | envs[args.env],
171 | policies[args.controller],
172 | num_episodes=args.num_episodes,
173 | num_steps=args.num_steps,
174 | frequency=args.frequency,
175 | verbose=args.verbose,
176 | use_simulator=args.use_simulator,
177 | render=args.render,
178 | )
179 |
180 |
181 | if __name__ == "__main__":
182 | main()
183 |
--------------------------------------------------------------------------------
/gym_brt/control/control.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import print_function
3 | from __future__ import division
4 |
5 | import numpy as np
6 |
7 |
8 | def _convert_state(state):
9 | state = np.asarray(state)
10 | if state.shape == (4,):
11 | return state
12 | if state.shape == (5,):
13 | return state
14 | elif state.shape == (6,):
15 | # Get the angles
16 | theta_x, theta_y = state[0], state[1]
17 | alpha_x, alpha_y = state[2], state[3]
18 | theta = np.arctan2(theta_y, theta_x)
19 | alpha = np.arctan2(alpha_y, alpha_x)
20 | theta_dot, alpha_dot = state[4], state[5]
21 | return theta, alpha, theta_dot, alpha_dot
22 | else:
23 | raise ValueError(
24 | "State shape was expected to be one of: (4,1) or (6,1), found: {}".format(
25 | state.shape
26 | )
27 | )
28 |
29 |
30 | # No input
31 | def zero_policy(state, **kwargs):
32 | return np.array([0.0])
33 |
34 |
35 | # Constant input
36 | def constant_policy(state, **kwargs):
37 | return np.array([3.0])
38 |
39 |
40 | # Rand input
41 | def random_policy(state, **kwargs):
42 | return np.asarray([np.random.randn()])
43 |
44 |
45 | # Square wave, switch every 85 ms
46 | def square_wave_policy(state, step, frequency=250, **kwargs):
47 | steps_until_85ms = int(85 * (frequency / 300))
48 | state = _convert_state(state)
49 | # Switch between positive and negative every 85 ms
50 | mod_170ms = step % (2 * steps_until_85ms)
51 | if mod_170ms < steps_until_85ms:
52 | action = 3.0
53 | else:
54 | action = -3.0
55 | return np.array([action])
56 |
57 |
58 | # Flip policy
59 | def energy_control_policy(state, **kwargs):
60 | state = _convert_state(state)
61 | # Run energy-based control to flip up the pendulum
62 | theta, alpha, theta_dot, alpha_dot = state
63 | # alpha_dot += alpha_dot + 1e-15
64 |
65 | """Implements a energy based swing-up controller"""
66 | mu = 50.0 # in m/s/J
67 | ref_energy = 30.0 / 1000.0 # Er in joules
68 |
69 | max_u = 6 # Max action is 6m/s^2
70 |
71 | # System parameters
72 | jp = 3.3282e-5
73 | lp = 0.129
74 | lr = 0.085
75 | mp = 0.024
76 | mr = 0.095
77 | rm = 8.4
78 | g = 9.81
79 | kt = 0.042
80 |
81 | pend_torque = (1 / 2) * mp * g * lp * (1 + np.cos(alpha))
82 | energy = pend_torque + (jp / 2.0) * alpha_dot * alpha_dot
83 |
84 | u = mu * (energy - ref_energy) * np.sign(-1 * np.cos(alpha) * alpha_dot)
85 | u = np.clip(u, -max_u, max_u)
86 |
87 | torque = (mr * lr) * u
88 | voltage = (rm / kt) * torque
89 | return np.array([-voltage])
90 |
91 |
92 | # Hold policy
93 | def pd_control_policy(state, **kwargs):
94 | state = _convert_state(state)
95 | theta, alpha, theta_dot, alpha_dot = state
96 | # multiply by proportional and derivative gains
97 | kp_theta = -2.0
98 | kp_alpha = 35.0
99 | kd_theta = -1.5
100 | kd_alpha = 3.0
101 |
102 | # If pendulum is within 20 degrees of upright, enable balance control, else zero
103 | if np.abs(alpha) <= (20.0 * np.pi / 180.0):
104 | action = (
105 | theta * kp_theta
106 | + alpha * kp_alpha
107 | + theta_dot * kd_theta
108 | + alpha_dot * kd_alpha
109 | )
110 | else:
111 | action = 0.0
112 | action = np.clip(action, -3.0, 3.0)
113 | return np.array([action])
114 |
115 |
116 | # Flip and Hold
117 | def flip_and_hold_policy(state, **kwargs):
118 | state = _convert_state(state)
119 | theta, alpha, theta_dot, alpha_dot = state
120 |
121 | # If pendulum is within 20 degrees of upright, enable balance control
122 | if np.abs(alpha) <= (20.0 * np.pi / 180.0):
123 | action = pd_control_policy(state)
124 | else:
125 | action = energy_control_policy(state)
126 | return action
127 |
128 |
129 | # Square wave instead of energy controller flip and hold
130 | def square_wave_flip_and_hold_policy(state, **kwargs):
131 | state = _convert_state(state)
132 | theta, alpha, theta_dot, alpha_dot = state
133 |
134 | # If pendulum is within 20 degrees of upright, enable balance control
135 | if np.abs(alpha) <= (20.0 * np.pi / 180.0):
136 | action = pd_control_policy(state)
137 | else:
138 | action = square_wave_policy(state, **kwargs)
139 | return action
140 |
141 |
142 | def dampen_policy(state, **kwargs):
143 | state = _convert_state(state)
144 | theta, alpha, theta_dot, alpha_dot = state
145 |
146 | # Alt alpha angle: -pi to +pi, where 0 is the pendulum facing down (at rest)
147 | alt_alpha = (alpha + 2 * np.pi) % (2 * np.pi) - np.pi
148 | if np.abs(alt_alpha) < (20.0 * np.pi / 180.0) and np.abs(theta) < (np.pi / 4):
149 | kp_theta = 0.9
150 | kp_alpha = -3.6
151 | kd_theta = 0.7
152 | kd_alpha = -0.285
153 | if alpha >= 0:
154 | action = (
155 | theta * kp_theta
156 | + (alpha - np.pi) * kp_alpha
157 | + theta_dot * kd_theta
158 | + alpha_dot * kd_alpha
159 | )
160 | else:
161 | action = (
162 | theta * kp_theta
163 | + (alpha + np.pi) * kp_alpha
164 | + theta_dot * kd_theta
165 | + alpha_dot * kd_alpha
166 | )
167 | else:
168 | action = 0
169 |
170 | action = np.clip(action, -3.0, 3.0)
171 | return np.array([action], dtype=np.float64)
172 |
173 |
174 | # Hold policy
175 | def pd_tracking_control_policy(state, **kwargs):
176 | state = _convert_state(state)
177 | theta, alpha, theta_dot, alpha_dot, theta_target = state
178 | # multiply by proportional and derivative gains
179 | kp_theta = -2.0
180 | kp_alpha = 35.0
181 | kd_theta = -1.5
182 | kd_alpha = 3.0
183 |
184 | # If pendulum is within 20 degrees of upright, enable balance control, else zero
185 | if np.abs(alpha) <= (20.0 * np.pi / 180.0):
186 | action = (
187 | (theta - theta_target) * kp_theta
188 | + alpha * kp_alpha
189 | + theta_dot * kd_theta
190 | + alpha_dot * kd_alpha
191 | )
192 | else:
193 | action = 0.0
194 | action = np.clip(action, -3.0, 3.0)
195 | return np.array([action])
196 |
--------------------------------------------------------------------------------
/gym_brt/quanser/quanser_wrapper/hil.pxd:
--------------------------------------------------------------------------------
1 | include "quanser_types.pxd"
2 |
3 | cdef extern from "/opt/quanser/hil_sdk/include/hil.h":
4 | ctypedef struct t_card:
5 | pass
6 | ctypedef struct t_task:
7 | pass
8 | ctypedef struct t_monitor:
9 | pass
10 | cdef enum t_clock:
11 | SYSTEM_CLOCK_4 = -4
12 | SYSTEM_CLOCK_3 = -3
13 | SYSTEM_CLOCK_2 = -2
14 | SYSTEM_CLOCK_1 = -1
15 | HARDWARE_CLOCK_0 = 0
16 | HARDWARE_CLOCK_1 = 1
17 | HARDWARE_CLOCK_2 = 2
18 | HARDWARE_CLOCK_3 = 3
19 | HARDWARE_CLOCK_4 = 4
20 | HARDWARE_CLOCK_5 = 5
21 | HARDWARE_CLOCK_6 = 6
22 | HARDWARE_CLOCK_7 = 7
23 | HARDWARE_CLOCK_8 = 8
24 | HARDWARE_CLOCK_9 = 9
25 | HARDWARE_CLOCK_10 = 10
26 | HARDWARE_CLOCK_11 = 11
27 | HARDWARE_CLOCK_12 = 12
28 | HARDWARE_CLOCK_13 = 13
29 | HARDWARE_CLOCK_14 = 14
30 | HARDWARE_CLOCK_15 = 15
31 | HARDWARE_CLOCK_16 = 16
32 | HARDWARE_CLOCK_17 = 17
33 | HARDWARE_CLOCK_18 = 18
34 | HARDWARE_CLOCK_19 = 19
35 | cdef enum t_buffer_overflow_mode:
36 | BUFFER_MODE_ERROR_ON_OVERFLOW, # return an error on buffer overflow (default).
37 | BUFFER_MODE_OVERWRITE_ON_OVERFLOW, # overwrite old samples on buffer overflow.
38 | BUFFER_MODE_DISCARD_ON_OVERFLOW # discard new samples on buffer overflow.
39 |
40 | t_error hil_open(
41 | const char * card_type,
42 | const char * card_identifier,
43 | t_card * card
44 | )
45 | t_error hil_close(t_card card)
46 |
47 | # For debugging
48 | t_error hil_close_all() # Sometimes the board doesn't safely close
49 |
50 | # Synchronous reads/writes
51 | t_error hil_set_encoder_counts(
52 | t_card card,
53 | const t_uint32 encoder_channels[],
54 | t_uint32 num_channels,
55 | const t_int32 buffer[]
56 | )
57 | t_error hil_write_analog(
58 | t_card card,
59 | const t_uint32 analog_channels[],
60 | t_uint32 num_channels,
61 | const t_double buffer[]
62 | )
63 | t_error hil_write_digital(
64 | t_card card,
65 | const t_uint32 digital_lines[],
66 | t_uint32 num_lines,
67 | const t_boolean buffer[]
68 | )
69 | t_error hil_write_other(
70 | t_card card,
71 | const t_uint32 other_channels[],
72 | t_uint32 num_channels,
73 | const t_double buffer[]
74 | )
75 |
76 | t_error hil_read_analog(
77 | t_card card,
78 | const t_uint32 analog_channels[],
79 | t_uint32 num_channels,
80 | t_double buffer[]
81 | )
82 | t_error hil_read_encoder(
83 | t_card card,
84 | const t_uint32 encoder_channels[],
85 | t_uint32 num_channels,
86 | t_int32 buffer[]
87 | )
88 | t_error hil_read_digital(
89 | t_card card,
90 | const t_uint32 digital_lines[],
91 | t_uint32 num_lines,
92 | t_boolean buffer[]
93 | )
94 | t_error hil_read_other(
95 | t_card card,
96 | const t_uint32 other_channels[],
97 | t_uint32 num_channels,
98 | t_double buffer[]
99 | )
100 | t_error hil_read(
101 | t_card card,
102 | const t_uint32 analog_channels[], t_uint32 num_analog_channels,
103 | const t_uint32 encoder_channels[], t_uint32 num_encoder_channels,
104 | const t_uint32 digital_lines[], t_uint32 num_digital_lines,
105 | const t_uint32 other_channels[], t_uint32 num_other_channels,
106 | t_double analog_buffer[],
107 | t_int32 encoder_buffer[],
108 | t_boolean digital_buffer[],
109 | t_double other_buffer[]
110 | )
111 |
112 | # Async
113 | t_error hil_task_create_reader_writer(
114 | t_card card, t_uint32 samples_in_buffer,
115 | const t_uint32 analog_input_channels[],
116 | t_uint32 num_analog_input_channels,
117 | const t_uint32 encoder_input_channels[],
118 | t_uint32 num_encoder_input_channels,
119 | const t_uint32 digital_input_lines[],
120 | t_uint32 num_digital_input_lines,
121 | const t_uint32 other_input_channels[],
122 | t_uint32 num_other_input_channels,
123 |
124 | const t_uint32 analog_output_channels[],
125 | t_uint32 num_analog_output_channels,
126 | const t_uint32 pwm_output_channels[],
127 | t_uint32 num_pwm_output_channels,
128 | const t_uint32 digital_output_lines[],
129 | t_uint32 num_digital_output_lines,
130 | const t_uint32 other_output_channels[],
131 | t_uint32 num_other_output_channels,
132 | t_task *task
133 | )
134 | t_error hil_task_create_reader(
135 | t_card card, t_uint32 samples_in_buffer,
136 | const t_uint32 analog_channels[],
137 | t_uint32 num_analog_channels,
138 | const t_uint32 encoder_channels[],
139 | t_uint32 num_encoder_channels,
140 | const t_uint32 digital_lines[],
141 | t_uint32 num_digital_lines,
142 | const t_uint32 other_channels[],
143 | t_uint32 num_other_channels,
144 | t_task *task
145 | )
146 |
147 | t_error hil_task_start(
148 | t_task task,
149 | t_clock clock,
150 | t_double frequency,
151 | t_uint32 num_samples
152 | )
153 | t_error hil_task_flush(t_task task)
154 | t_error hil_task_stop(t_task task)
155 | t_error hil_task_delete(t_task task)
156 |
157 | t_error hil_task_read_write(
158 | t_task task, t_uint32 num_samples,
159 | t_double analog_input_buffer[],
160 | t_int32 encoder_input_buffer[],
161 | t_boolean digital_input_buffer[],
162 | t_double other_input_buffer[],
163 |
164 | const t_double analog_output_buffer[],
165 | const t_double pwm_output_buffer[],
166 | const t_boolean digital_output_buffer[],
167 | const t_double other_output_buffer[]
168 | )
169 | t_error hil_task_write(
170 | t_task task, t_uint32 num_samples,
171 | const t_double analog_buffer[],
172 | const t_double pwm_buffer[],
173 | const t_boolean digital_buffer[],
174 | const t_double other_buffer[]
175 | )
176 | t_error hil_task_read(
177 | t_task task, t_uint32 num_samples,
178 | t_double analog_buffer[],
179 | t_int32 encoder_buffer[],
180 | t_boolean digital_buffer[],
181 | t_double other_buffer[]
182 | )
183 |
184 | # Allow buffer overflow
185 | t_error hil_task_set_buffer_overflow_mode(
186 | t_task task,
187 | t_buffer_overflow_mode mode)
188 | t_int hil_task_get_buffer_overflows(t_task task)
189 |
--------------------------------------------------------------------------------
/gym_brt/quanser/qube_interfaces.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import print_function
3 | from __future__ import division
4 |
5 | import gym
6 | import time
7 | import math
8 | import numpy as np
9 |
10 | from gym import spaces
11 | from gym.utils import seeding
12 |
13 | from gym_brt.control import flip_and_hold_policy, dampen_policy
14 |
15 | # For other platforms where it's impossible to install the HIL SDK
16 | try:
17 | from gym_brt.quanser.quanser_wrapper.quanser_wrapper import QubeServo2
18 | except ImportError:
19 | print("Warning: Can not import QubeServo2 in qube_interface.py")
20 |
21 | from gym_brt.quanser.qube_simulator import forward_model_euler, forward_model_ode
22 |
23 |
24 | class QubeHardware(object):
25 | """Simplify the interface between the environment and """
26 |
27 | def __init__(self, frequency=250, max_voltage=18.0):
28 | self._theta_dot_cstate = 0
29 | self._alpha_dot_cstate = 0
30 | self._frequency = frequency
31 |
32 | # Open the Qube
33 | self.qube = QubeServo2(frequency=frequency) # TODO: max_voltage=max_voltage
34 | self.qube.__enter__()
35 |
36 | self.state = np.array([0, 0, 0, 0], dtype=np.float64)
37 |
38 | def __enter__(self):
39 | return self
40 |
41 | def __exit__(self, type, value, traceback):
42 | self.close()
43 |
44 | def _match_state(s):
45 | theta, alpha, theta_dot, alpha_dot = s
46 | return np.array([])
47 |
48 | def step(self, action, led=None):
49 | theta, alpha, theta_dot, alpha_dot = self.state
50 | currents, encoders, others = self.qube.action(action, led_w=led)
51 |
52 | # Calculate alpha, theta, alpha_dot, and theta_dot
53 | theta = encoders[0] * (-2.0 * np.pi / 2048)
54 | # Alpha without normalizing
55 | alpha_un = encoders[1] * (2.0 * np.pi / 2048)
56 | # Normalized and shifted alpha
57 | alpha = (alpha_un % (2.0 * np.pi)) - np.pi
58 |
59 | # fmt: off
60 | theta_dot = -2500 * self._theta_dot_cstate + 50 * theta
61 | alpha_dot = -2500 * self._alpha_dot_cstate + 50 * alpha_un
62 | self._theta_dot_cstate += (-50 * self._theta_dot_cstate + theta) / self._frequency
63 | self._alpha_dot_cstate += (-50 * self._alpha_dot_cstate + alpha_un) / self._frequency
64 | # fmt: on
65 |
66 | state = np.array([theta, alpha, theta_dot, alpha_dot], dtype=np.float64)
67 | return state
68 |
69 | def reset_up(self):
70 | """Run classic control for flip-up until the pendulum is inverted for
71 | a set amount of time. Assumes that initial state is stationary
72 | downwards.
73 | """
74 | time_hold = 1.0 * self._frequency # Hold upright 1 second
75 | sample = 0 # Samples since control system started
76 | samples_upright = 0 # Consecutive samples pendulum is upright
77 |
78 | action = np.array([1], dtype=np.float64)
79 | state = self.step([1.0])
80 | while True:
81 | action = flip_and_hold_policy(state)
82 | state = self.step(action)
83 |
84 | # Break if pendulum is inverted
85 | alpha = state[1]
86 | if alpha < (10 * np.pi / 180):
87 | if samples_upright > time_hold:
88 | break
89 | samples_upright += 1
90 | else:
91 | samples_upright = 0
92 | sample += 1
93 |
94 | return state
95 |
96 | def reset_down(self):
97 | action = np.array([0], dtype=np.float64)
98 | time_hold = 0.5 * self._frequency # 0.5 seconds
99 | samples_downwards = 0 # Consecutive samples pendulum is stationary
100 |
101 | state = self.step([1.0])
102 | while True:
103 | action = dampen_policy(state)
104 | state = self.step(action)
105 |
106 | # Break if pendulum is stationary
107 | alpha = state[1]
108 | if abs(alpha) > (178 * np.pi / 180):
109 | if samples_downwards > time_hold:
110 | break
111 | samples_downwards += 1
112 | else:
113 | samples_downwards = 0
114 | return state
115 |
116 | def reset_encoders(self):
117 | """Fully stop the pendulum at the bottom. Then reset the alpha encoder"""
118 | self.reset_down()
119 |
120 | self.step(np.array([0], dtype=np.float64))
121 | print("Doing a hard reset and, reseting the alpha encoder")
122 | time.sleep(25) # Do nothing for 3 seconds to ensure pendulum is stopped
123 |
124 | # This is needed to prevent sensor drift on the alpha/pendulum angle
125 | # We ONLY reset the alpha channel because the dampen function stops the
126 | # pendulum from moving but does not perfectly center the pendulum at the
127 | # bottom (this way alpha is very close to perfect and theta does not
128 | # drift much)
129 | print("Pre encoder reset:", self.qube.action(np.array([0], dtype=np.float64)))
130 | self.qube.reset_encoders(channels=[1]) # Alpha channel only
131 | print("After encoder reset:", self.qube.action(np.array([0], dtype=np.float64)))
132 |
133 | def close(self, type=None, value=None, traceback=None):
134 | # Safely close the Qube
135 | self.qube.__exit__(type=type, value=value, traceback=traceback)
136 |
137 |
138 | class QubeSimulator(object):
139 | """Simulator that has the same interface as the hardware wrapper."""
140 |
141 | def __init__(
142 | self, forward_model="ode", frequency=250, integration_steps=1, max_voltage=18.0
143 | ):
144 | if isinstance(forward_model, str):
145 | if forward_model == "ode":
146 | self._forward_model = forward_model_ode
147 | elif forward_model == "euler":
148 | self._forward_model = forward_model_euler
149 | else:
150 | raise ValueError(
151 | "'forward_model' must be one of ['ode', 'euler'] or a callable."
152 | )
153 | elif callable(forward_model):
154 | self._forward_model = forward_model
155 | else:
156 | raise ValueError(
157 | "'forward_model' must be one of ['ode', 'euler'] or a callable."
158 | )
159 |
160 | self._dt = 1.0 / frequency
161 | self._integration_steps = integration_steps
162 | self._max_voltage = max_voltage
163 | self.state = (
164 | np.array([0, 0, 0, 0], dtype=np.float64) + np.random.randn(4) * 0.01
165 | )
166 |
167 | def __enter__(self):
168 | return self
169 |
170 | def __exit__(self, type, value, traceback):
171 | self.close()
172 |
173 | def step(self, action, led=None):
174 | action = np.clip(action, -self._max_voltage, self._max_voltage)
175 | self.state = self._forward_model(
176 | *self.state, action, self._dt, self._integration_steps
177 | )
178 | return self.state
179 |
180 | def reset_up(self):
181 | self.state = (
182 | np.array([0, 0, 0, 0], dtype=np.float64) + np.random.randn(4) * 0.01
183 | )
184 | return self.state
185 |
186 | def reset_down(self):
187 | self.state = (
188 | np.array([0, np.pi, 0, 0], dtype=np.float64) + np.random.randn(4) * 0.01
189 | )
190 | return self.state
191 |
192 | def reset_encoders(self):
193 | pass
194 |
195 | def close(self, type=None, value=None, traceback=None):
196 | pass
197 |
--------------------------------------------------------------------------------
/gym_brt/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.envs.registration import register
2 |
3 | ### Hardware Environments ######################################################
4 | # Swingup Environments
5 | register(
6 | id='QubeSwingupEnv-v0',
7 | entry_point='gym_brt.envs:QubeSwingupEnv',
8 | nondeterministic=True,
9 | max_episode_steps=2048,
10 | kwargs={'use_simulator': False}
11 | )
12 | register(
13 | id='QubeSwingupSparseEnv-v0',
14 | entry_point='gym_brt.envs:QubeSwingupSparseEnv',
15 | nondeterministic=True,
16 | max_episode_steps=2048,
17 | kwargs={'use_simulator': False}
18 | )
19 | register(
20 | id='QubeSwingupFollowEnv-v0',
21 | entry_point='gym_brt.envs:QubeSwingupFollowEnv',
22 | nondeterministic=True,
23 | max_episode_steps=2048,
24 | kwargs={'use_simulator': False}
25 | )
26 | register(
27 | id='QubeSwingupFollowSparseEnv-v0',
28 | entry_point='gym_brt.envs:QubeSwingupFollowSparseEnv',
29 | nondeterministic=True,
30 | max_episode_steps=2048,
31 | kwargs={'use_simulator': False}
32 | )
33 | # Balance Environments
34 | register(
35 | id='QubeBalanceEnv-v0',
36 | entry_point='gym_brt.envs:QubeBalanceEnv',
37 | nondeterministic=True,
38 | max_episode_steps=2048,
39 | kwargs={'use_simulator': False}
40 | )
41 | register(
42 | id='QubeBalanceSparseEnv-v0',
43 | entry_point='gym_brt.envs:QubeBalanceSparseEnv',
44 | nondeterministic=True,
45 | max_episode_steps=2048,
46 | kwargs={'use_simulator': False}
47 | )
48 | register(
49 | id='QubeBalanceFollowEnv-v0',
50 | entry_point='gym_brt.envs:QubeBalanceFollowEnv',
51 | nondeterministic=True,
52 | max_episode_steps=2048,
53 | kwargs={'use_simulator': False}
54 | )
55 | register(
56 | id='QubeBalanceFollowSparseEnv-v0',
57 | entry_point='gym_brt.envs:QubeBalanceFollowSparseEnv',
58 | nondeterministic=True,
59 | max_episode_steps=2048,
60 | kwargs={'use_simulator': False}
61 | )
62 | # Dampen Environments
63 | register(
64 | id='QubeDampenEnv-v0',
65 | entry_point='gym_brt.envs:QubeDampenEnv',
66 | nondeterministic=True,
67 | max_episode_steps=2048,
68 | kwargs={'use_simulator': False}
69 | )
70 | register(
71 | id='QubeDampenSparseEnv-v0',
72 | entry_point='gym_brt.envs:QubeDampenSparseEnv',
73 | nondeterministic=True,
74 | max_episode_steps=2048,
75 | kwargs={'use_simulator': False}
76 | )
77 | register(
78 | id='QubeDampenFollowEnv-v0',
79 | entry_point='gym_brt.envs:QubeDampenFollowEnv',
80 | nondeterministic=True,
81 | max_episode_steps=2048,
82 | kwargs={'use_simulator': False}
83 | )
84 | register(
85 | id='QubeDampenFollowSparseEnv-v0',
86 | entry_point='gym_brt.envs:QubeDampenFollowSparseEnv',
87 | nondeterministic=True,
88 | max_episode_steps=2048,
89 | kwargs={'use_simulator': False}
90 | )
91 | register(
92 | id='QubeBalanceFollowSineWaveEnv-v0',
93 | entry_point='gym_brt.envs:QubeBalanceFollowSineWaveEnv',
94 | nondeterministic=True,
95 | max_episode_steps=2048,
96 | kwargs={'use_simulator': False}
97 | )
98 | # Sin wave environments
99 | register(
100 | id='QubeSwingupFollowSineWaveEnv-v0',
101 | entry_point='gym_brt.envs:QubeSwingupFollowSineWaveEnv',
102 | nondeterministic=True,
103 | max_episode_steps=2048,
104 | kwargs={'use_simulator': False}
105 | )
106 | register(
107 | id='QubeRotorFollowSineWaveEnv-v0',
108 | entry_point='gym_brt.envs:QubeRotorFollowSineWaveEnv',
109 | nondeterministic=True,
110 | max_episode_steps=2048,
111 | kwargs={'use_simulator': False}
112 | )
113 | register(
114 | id='QubeDampenFollowSineWaveEnv-v0',
115 | entry_point='gym_brt.envs:QubeDampenFollowSineWaveEnv',
116 | nondeterministic=True,
117 | max_episode_steps=2048,
118 | kwargs={'use_simulator': False}
119 | )
120 | # Rotor Evironments
121 | # Note: the rotor environments have issues with the rewards.
122 | # register(
123 | # id='QubeRotorEnv-v0',
124 | # entry_point='gym_brt.envs:QubeRotorEnv',
125 | # nondeterministic=True,
126 | # max_episode_steps=2048,
127 | # kwargs={'use_simulator': False}
128 | # )
129 | # register(
130 | # id='QubeRotorFollowEnv-v0',
131 | # entry_point='gym_brt.envs:QubeRotorFollowEnv',
132 | # nondeterministic=True,
133 | # max_episode_steps=2048,
134 | # kwargs={'use_simulator': False}
135 | # )
136 |
137 | ### Simulated Environments #####################################################
138 | # Swingup Environments
139 | register(
140 | id='QubeSwingupSimEnv-v0',
141 | entry_point='gym_brt.envs:QubeSwingupEnv',
142 | max_episode_steps=2048,
143 | kwargs={'use_simulator': True}
144 | )
145 | register(
146 | id='QubeSwingupSparseSimEnv-v0',
147 | entry_point='gym_brt.envs:QubeSwingupSparseEnv',
148 | max_episode_steps=2048,
149 | kwargs={'use_simulator': True}
150 | )
151 | register(
152 | id='QubeSwingupFollowSimEnv-v0',
153 | entry_point='gym_brt.envs:QubeSwingupFollowEnv',
154 | max_episode_steps=2048,
155 | kwargs={'use_simulator': True}
156 | )
157 | register(
158 | id='QubeSwingupFollowSparseSimEnv-v0',
159 | entry_point='gym_brt.envs:QubeSwingupFollowSparseEnv',
160 | max_episode_steps=2048,
161 | kwargs={'use_simulator': True}
162 | )
163 | # Balance Environments
164 | register(
165 | id='QubeBalanceSimEnv-v0',
166 | entry_point='gym_brt.envs:QubeBalanceEnv',
167 | max_episode_steps=2048,
168 | kwargs={'use_simulator': True}
169 | )
170 | register(
171 | id='QubeBalanceSparseSimEnv-v0',
172 | entry_point='gym_brt.envs:QubeBalanceSparseEnv',
173 | max_episode_steps=2048,
174 | kwargs={'use_simulator': True}
175 | )
176 | register(
177 | id='QubeBalanceFollowSimEnv-v0',
178 | entry_point='gym_brt.envs:QubeBalanceFollowEnv',
179 | max_episode_steps=2048,
180 | kwargs={'use_simulator': True}
181 | )
182 | register(
183 | id='QubeBalanceFollowSparseSimEnv-v0',
184 | entry_point='gym_brt.envs:QubeBalanceFollowSparseEnv',
185 | max_episode_steps=2048,
186 | kwargs={'use_simulator': True}
187 | )
188 | # Dampen Environments
189 | register(
190 | id='QubeDampenSimEnv-v0',
191 | entry_point='gym_brt.envs:QubeDampenEnv',
192 | max_episode_steps=2048,
193 | kwargs={'use_simulator': True}
194 | )
195 | register(
196 | id='QubeDampenSparseSimEnv-v0',
197 | entry_point='gym_brt.envs:QubeDampenSparseEnv',
198 | max_episode_steps=2048,
199 | kwargs={'use_simulator': True}
200 | )
201 | register(
202 | id='QubeDampenFollowSimEnv-v0',
203 | entry_point='gym_brt.envs:QubeDampenFollowEnv',
204 | max_episode_steps=2048,
205 | kwargs={'use_simulator': True}
206 | )
207 | register(
208 | id='QubeDampenFollowSparseSimEnv-v0',
209 | entry_point='gym_brt.envs:QubeDampenFollowSparseEnv',
210 | max_episode_steps=2048,
211 | kwargs={'use_simulator': True}
212 | )
213 | register(
214 | id='QubeBalanceFollowSineWaveSimEnv-v0',
215 | entry_point='gym_brt.envs:QubeBalanceFollowSineWaveEnv',
216 | max_episode_steps=2048,
217 | kwargs={'use_simulator': True}
218 | )
219 | # Sin wave environments
220 | register(
221 | id='QubeSwingupFollowSineWaveSimEnv-v0',
222 | entry_point='gym_brt.envs:QubeSwingupFollowSineWaveEnv',
223 | max_episode_steps=2048,
224 | kwargs={'use_simulator': True}
225 | )
226 | register(
227 | id='QubeRotorFollowSineWaveSimEnv-v0',
228 | entry_point='gym_brt.envs:QubeRotorFollowSineWaveEnv',
229 | max_episode_steps=2048,
230 | kwargs={'use_simulator': True}
231 | )
232 | register(
233 | id='QubeDampenFollowSineWaveSimEnv-v0',
234 | entry_point='gym_brt.envs:QubeDampenFollowSineWaveEnv',
235 | max_episode_steps=2048,
236 | kwargs={'use_simulator': True}
237 | )
238 | # Rotor Evironments
239 | # Note: the rotor environments have issues with the rewards.
240 | # register(
241 | # id='QubeRotorSimEnv-v0',
242 | # entry_point='gym_brt.envs:QubeRotorEnv',
243 | # max_episode_steps=2048,
244 | # kwargs={'use_simulator': True}
245 | # )
246 | # register(
247 | # id='QubeRotorFollowSimEnv-v0',
248 | # entry_point='gym_brt.envs:QubeRotorFollowEnv',
249 | # max_episode_steps=2048,
250 | # kwargs={'use_simulator': True}
251 | # )
252 |
253 |
254 |
--------------------------------------------------------------------------------
/tests/notebooks/ODE vs Euler speedtest.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import numba\n",
10 | "import numpy as np\n",
11 | "from scipy.integrate import odeint"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 2,
17 | "metadata": {},
18 | "outputs": [],
19 | "source": [
20 | "# Motor\n",
21 | "Rm = 8.4 # Resistance\n",
22 | "kt = 0.042 # Current-torque (N-m/A)\n",
23 | "km = 0.042 # Back-emf constant (V-s/rad)\n",
24 | "\n",
25 | "# Rotary Arm\n",
26 | "mr = 0.095 # Mass (kg)\n",
27 | "Lr = 0.085 # Total length (m)\n",
28 | "Jr = mr * Lr ** 2 / 12 # Moment of inertia about pivot (kg-m^2)\n",
29 | "\n",
30 | "# Pendulum Link\n",
31 | "mp = 0.024 # Mass (kg)\n",
32 | "Lp = 0.129 # Total length (m)\n",
33 | "Jp = mp * Lp ** 2 / 12 # Moment of inertia about pivot (kg-m^2)\n",
34 | "\n",
35 | "Br = Dr = 0.0005 # Equivalent viscous damping coefficient (N-m-s/rad)\n",
36 | "Bp = Dp = 0.00005 # Equivalent viscous damping coefficient (N-m-s/rad)\n",
37 | "g = 9.81 # Gravity constant\n"
38 | ]
39 | },
40 | {
41 | "cell_type": "code",
42 | "execution_count": 3,
43 | "metadata": {},
44 | "outputs": [],
45 | "source": [
46 | "def diff_forward_model_ode(state, t, action, dt):\n",
47 | " theta = state[0]\n",
48 | " alpha = state[1]\n",
49 | " theta_dot = state[2]\n",
50 | " alpha_dot = state[3]\n",
51 | " Vm = action\n",
52 | " tau = (km * (Vm - km * theta_dot)) / Rm # torque\n",
53 | "\n",
54 | " # fmt: off\n",
55 | " alpha_dot_dot = (2.0*Lp*Lr*mp*(4.0*Dr*theta_dot + Lp**2*alpha_dot*mp*theta_dot*np.sin(2.0*alpha) + 2.0*Lp*Lr*alpha_dot**2*mp*np.sin(alpha) - 4.0*tau)*np.cos(alpha) - 0.5*(4.0*Jr + Lp**2*mp*np.sin(alpha)**2 + 4.0*Lr**2*mp)*(-8.0*Dp*alpha_dot + Lp**2*mp*theta_dot**2*np.sin(2.0*alpha) + 4.0*Lp*g*mp*np.sin(alpha)))/(4.0*Lp**2*Lr**2*mp**2*np.cos(alpha)**2 - (4.0*Jp + Lp**2*mp)*(4.0*Jr + Lp**2*mp*np.sin(alpha)**2 + 4.0*Lr**2*mp))\n",
56 | " theta_dot_dot = (-Lp*Lr*mp*(-8.0*Dp*alpha_dot + Lp**2*mp*theta_dot**2*np.sin(2.0*alpha) + 4.0*Lp*g*mp*np.sin(alpha))*np.cos(alpha) + (4.0*Jp + Lp**2*mp)*(4.0*Dr*theta_dot + Lp**2*alpha_dot*mp*theta_dot*np.sin(2.0*alpha) + 2.0*Lp*Lr*alpha_dot**2*mp*np.sin(alpha) - 4.0*tau))/(4.0*Lp**2*Lr**2*mp**2*np.cos(alpha)**2 - (4.0*Jp + Lp**2*mp)*(4.0*Jr + Lp**2*mp*np.sin(alpha)**2 + 4.0*Lr**2*mp))\n",
57 | " # fmt: on\n",
58 | "\n",
59 | " diff_state = np.array([theta_dot, alpha_dot, theta_dot_dot, alpha_dot_dot])\n",
60 | " diff_state = np.array(diff_state, dtype=\"float64\")\n",
61 | " return diff_state\n",
62 | "\n",
63 | "\n",
64 | "def forward_model_ode(theta, alpha, theta_dot, alpha_dot, Vm, dt, integration_steps):\n",
65 | " t = np.linspace(0.0, dt, integration_steps)\n",
66 | "\n",
67 | " state = np.array([theta, alpha, theta_dot, alpha_dot])\n",
68 | " next_state = np.array(odeint(diff_forward_model_ode, state, t, args=(Vm, dt))).reshape(4,)\n",
69 | " \n",
70 | " theta = next_state[0]\n",
71 | " alpha = next_state[1]\n",
72 | " theta_dot = next_state[2]\n",
73 | " alpha_dot = next_state[3]\n",
74 | "\n",
75 | " theta = ((theta + np.pi) % (2 * np.pi)) - np.pi\n",
76 | " alpha = ((alpha + np.pi) % (2 * np.pi)) - np.pi\n",
77 | "\n",
78 | " return theta, alpha, theta_dot, alpha_dot\n"
79 | ]
80 | },
81 | {
82 | "cell_type": "code",
83 | "execution_count": 4,
84 | "metadata": {},
85 | "outputs": [],
86 | "source": [
87 | "def forward_model_euler(theta, alpha, theta_dot, alpha_dot, Vm, dt, integration_steps):\n",
88 | " dt /= integration_steps\n",
89 | " for step in range(integration_steps):\n",
90 | " tau = (km * (Vm - km * theta_dot)) / Rm # torque\n",
91 | "\n",
92 | " # fmt: off\n",
93 | " alpha_dot_dot = (2.0*Lp*Lr*mp*(4.0*Dr*theta_dot + Lp**2*alpha_dot*mp*theta_dot*np.sin(2.0*alpha) + 2.0*Lp*Lr*alpha_dot**2*mp*np.sin(alpha) - 4.0*tau)*np.cos(alpha) - 0.5*(4.0*Jr + Lp**2*mp*np.sin(alpha)**2 + 4.0*Lr**2*mp)*(-8.0*Dp*alpha_dot + Lp**2*mp*theta_dot**2*np.sin(2.0*alpha) + 4.0*Lp*g*mp*np.sin(alpha)))/(4.0*Lp**2*Lr**2*mp**2*np.cos(alpha)**2 - (4.0*Jp + Lp**2*mp)*(4.0*Jr + Lp**2*mp*np.sin(alpha)**2 + 4.0*Lr**2*mp))\n",
94 | " theta_dot_dot = (-Lp*Lr*mp*(-8.0*Dp*alpha_dot + Lp**2*mp*theta_dot**2*np.sin(2.0*alpha) + 4.0*Lp*g*mp*np.sin(alpha))*np.cos(alpha) + (4.0*Jp + Lp**2*mp)*(4.0*Dr*theta_dot + Lp**2*alpha_dot*mp*theta_dot*np.sin(2.0*alpha) + 2.0*Lp*Lr*alpha_dot**2*mp*np.sin(alpha) - 4.0*tau))/(4.0*Lp**2*Lr**2*mp**2*np.cos(alpha)**2 - (4.0*Jp + Lp**2*mp)*(4.0*Jr + Lp**2*mp*np.sin(alpha)**2 + 4.0*Lr**2*mp))\n",
95 | " # fmt: on\n",
96 | "\n",
97 | " theta_dot += theta_dot_dot * dt\n",
98 | " alpha_dot += alpha_dot_dot * dt\n",
99 | "\n",
100 | " theta += theta_dot * dt\n",
101 | " alpha += alpha_dot * dt\n",
102 | "\n",
103 | " theta %= 2 * np.pi\n",
104 | " alpha %= 2 * np.pi\n",
105 | "\n",
106 | " return theta, alpha, theta_dot, alpha_dot\n"
107 | ]
108 | },
109 | {
110 | "cell_type": "code",
111 | "execution_count": 5,
112 | "metadata": {},
113 | "outputs": [],
114 | "source": [
115 | "def f(forward_model, n=1000):\n",
116 | " for i in range(n):\n",
117 | " theta, alpha, theta_dot, alpha_dot = np.random.randn(4,) * 0.1\n",
118 | " Vm = np.random.randn()\n",
119 | " theta, alpha, theta_dot, alpha_dot = forward_model(\n",
120 | " theta, alpha, theta_dot, alpha_dot, Vm, dt=0.001, integration_steps=1\n",
121 | " )\n"
122 | ]
123 | },
124 | {
125 | "cell_type": "markdown",
126 | "metadata": {},
127 | "source": [
128 | "# Without numba"
129 | ]
130 | },
131 | {
132 | "cell_type": "code",
133 | "execution_count": 6,
134 | "metadata": {},
135 | "outputs": [
136 | {
137 | "name": "stdout",
138 | "output_type": "stream",
139 | "text": [
140 | "41.4 ms ± 3.21 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
141 | ]
142 | }
143 | ],
144 | "source": [
145 | "%timeit -n 100 f(forward_model_ode)"
146 | ]
147 | },
148 | {
149 | "cell_type": "code",
150 | "execution_count": 7,
151 | "metadata": {},
152 | "outputs": [
153 | {
154 | "name": "stdout",
155 | "output_type": "stream",
156 | "text": [
157 | "54.9 ms ± 12.5 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
158 | ]
159 | }
160 | ],
161 | "source": [
162 | "%timeit -n 100 f(forward_model_euler)"
163 | ]
164 | },
165 | {
166 | "cell_type": "markdown",
167 | "metadata": {},
168 | "source": [
169 | "# With numba"
170 | ]
171 | },
172 | {
173 | "cell_type": "code",
174 | "execution_count": 8,
175 | "metadata": {},
176 | "outputs": [],
177 | "source": [
178 | "forward_model_ode = numba.jit(forward_model_ode)\n",
179 | "forward_model_euler = numba.jit(forward_model_euler)"
180 | ]
181 | },
182 | {
183 | "cell_type": "code",
184 | "execution_count": 9,
185 | "metadata": {},
186 | "outputs": [
187 | {
188 | "name": "stdout",
189 | "output_type": "stream",
190 | "text": [
191 | "49.4 ms ± 2.37 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
192 | ]
193 | }
194 | ],
195 | "source": [
196 | "%timeit -n 100 f(forward_model_ode)"
197 | ]
198 | },
199 | {
200 | "cell_type": "code",
201 | "execution_count": 10,
202 | "metadata": {},
203 | "outputs": [
204 | {
205 | "name": "stdout",
206 | "output_type": "stream",
207 | "text": [
208 | "5.62 ms ± 1.41 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
209 | ]
210 | }
211 | ],
212 | "source": [
213 | "%timeit -n 100 f(forward_model_euler)"
214 | ]
215 | }
216 | ],
217 | "metadata": {
218 | "kernelspec": {
219 | "display_name": "TensorFlow (3.6.6)",
220 | "language": "python",
221 | "name": "tf"
222 | },
223 | "language_info": {
224 | "codemirror_mode": {
225 | "name": "ipython",
226 | "version": 3
227 | },
228 | "file_extension": ".py",
229 | "mimetype": "text/x-python",
230 | "name": "python",
231 | "nbconvert_exporter": "python",
232 | "pygments_lexer": "ipython3",
233 | "version": "3.6.7"
234 | }
235 | },
236 | "nbformat": 4,
237 | "nbformat_minor": 2
238 | }
239 |
--------------------------------------------------------------------------------
/gym_brt/quanser/quanser_wrapper/quanser_wrapper.pyx:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import print_function
3 | from __future__ import division
4 |
5 | cimport gym_brt.quanser.quanser_wrapper.quanser_types as qt
6 | cimport gym_brt.quanser.quanser_wrapper.hil as hil
7 | cimport numpy as np
8 |
9 | from gym_brt.quanser.quanser_wrapper.error_codes import error_codes
10 | import numpy as np
11 |
12 |
13 | cdef print_possible_error(int result):
14 | """If there is an error, print the error code"""
15 | if result < 0:
16 | print(error_codes[-result])
17 |
18 |
19 | cdef class QuanserWrapper:
20 | cdef hil.t_card board
21 | cdef hil.t_task task
22 |
23 | cdef qt.t_uint32[::] analog_r_channels
24 | cdef qt.t_uint32 num_analog_r_channels
25 | cdef qt.t_double[::] currents_r
26 |
27 | cdef qt.t_uint32[::] analog_w_channels
28 | cdef qt.t_uint32 num_analog_w_channels
29 | cdef qt.t_double[::] voltages_w
30 |
31 | cdef qt.t_uint32[::] digital_w_channels
32 | cdef qt.t_uint32 num_digital_w_channels
33 | cdef np.npy_int8[::] enables_w # Uses npy_int8 due to arm problem
34 |
35 | cdef qt.t_uint32[::] encoder_r_channels
36 | cdef qt.t_uint32 num_encoder_r_channels
37 | cdef qt.t_int32[::] encoder_r_buffer
38 |
39 | cdef qt.t_uint32[::] other_r_channels
40 | cdef qt.t_uint32 num_other_r_channels
41 | cdef qt.t_double[::] other_r_buffer
42 |
43 | cdef qt.t_uint32[::] led_w_channels
44 | cdef qt.t_uint32 num_led_w_channels
45 | cdef qt.t_double[::] led_w_buffer
46 |
47 | cdef qt.t_double frequency, max_voltage
48 | cdef qt.t_int samples_overflowed
49 | cdef bint task_started
50 |
51 | def __init__(
52 | self,
53 | max_voltage,
54 | analog_r_channels,
55 | analog_w_channels,
56 | digital_w_channels,
57 | encoder_r_channels,
58 | other_r_channels,
59 | led_w_channels,
60 | frequency=1000
61 | ):
62 | self.max_voltage = max_voltage
63 | # Convert the channels into numpy arrays which are then stored in
64 | # memoryviews (to pass C buffers to the HIL API)
65 | self.num_analog_r_channels = len(analog_r_channels)
66 | self.num_analog_w_channels = len(analog_w_channels)
67 | self.num_digital_w_channels = len(digital_w_channels)
68 | self.num_encoder_r_channels = len(encoder_r_channels)
69 | self.num_other_r_channels = len(other_r_channels)
70 | self.num_led_w_channels = len(led_w_channels)
71 | self.analog_r_channels = np.array(analog_r_channels, dtype=np.uint32)
72 | self.analog_w_channels = np.array(analog_w_channels, dtype=np.uint32)
73 | self.digital_w_channels = np.array(digital_w_channels, dtype=np.uint32)
74 | self.encoder_r_channels = np.array(encoder_r_channels, dtype=np.uint32)
75 | self.other_r_channels = np.array(other_r_channels, dtype=np.uint32)
76 | self.led_w_channels = np.array(led_w_channels, dtype=np.uint32)
77 |
78 | self.frequency = frequency
79 | self.task_started = False
80 |
81 | def __enter__(self):
82 | """Start the hardware in a deterministic way (all motors,
83 | encoders, etc at 0)
84 | """
85 | # Create memoryviews for read buffers
86 | self.currents_r = np.zeros(
87 | self.num_analog_r_channels, dtype=np.float64
88 | ) # t_double is 64 bits
89 | self.other_r_buffer = np.zeros(
90 | self.num_other_r_channels, dtype=np.float64
91 | ) # t_double is 64 bits
92 |
93 | # Set motor voltages_w and encoders to 0
94 | self.voltages_w = np.zeros(
95 | self.num_analog_w_channels, dtype=np.float64
96 | ) # t_double is 64 bits
97 | result = hil.hil_write_analog(
98 | self.board,
99 | &self.analog_w_channels[0],
100 | self.num_analog_w_channels,
101 | &self.voltages_w[0]
102 | )
103 | print_possible_error(result)
104 | self.encoder_r_buffer = np.zeros(
105 | self.num_encoder_r_channels, dtype=np.int32
106 | ) # t_int32 is 32 bits
107 | result = hil.hil_set_encoder_counts(
108 | self.board,
109 | &self.encoder_r_channels[0],
110 | self.num_encoder_r_channels,
111 | &self.encoder_r_buffer[0]
112 | )
113 | print_possible_error(result)
114 |
115 | # Set LED on to white (represents opening qube)
116 | self.led_w_buffer = np.ones(
117 | self.num_led_w_channels, dtype=np.float64
118 | ) # t_double is 64 bits
119 | result = hil.hil_write_other(
120 | self.board,
121 | &self.led_w_channels[0],
122 | self.num_led_w_channels,
123 | &self.led_w_buffer[0]
124 | )
125 | print_possible_error(result)
126 |
127 | # Enables_r all the motors
128 | self.enables_w = np.ones(
129 | self.num_digital_w_channels, dtype=np.int8
130 | ) # t_bool is char 8 bits
131 | result = hil.hil_write_digital(
132 | self.board,
133 | &self.digital_w_channels[0],
134 | self.num_digital_w_channels,
135 | &self.enables_w[0]
136 | )
137 | print_possible_error(result)
138 |
139 | return self
140 |
141 | def __exit__(self, type, value, traceback):
142 | """Make sure hardware turns off safely"""
143 | self._stop_task()
144 |
145 | # Set the motor voltages_w to 0
146 | self.voltages_w = np.zeros(
147 | self.num_analog_w_channels, dtype=np.float64
148 | ) # t_double is 64 bits
149 | hil.hil_write_analog(
150 | self.board,
151 | &self.analog_w_channels[0],
152 | self.num_analog_w_channels,
153 | &self.voltages_w[0]
154 | )
155 |
156 | # Set LED on to default color (red)
157 | self.led_w_buffer = np.array(
158 | [1.0, 0.0, 0.0], dtype=np.float64
159 | ) # t_double is 64 bits
160 | result = hil.hil_write_other(
161 | self.board,
162 | &self.led_w_channels[0],
163 | self.num_led_w_channels,
164 | &self.led_w_buffer[0]
165 | )
166 | print_possible_error(result)
167 |
168 |
169 | # Disable all the motors
170 | self.enables_w = np.zeros(
171 | self.num_digital_w_channels, dtype=np.int8
172 | ) # t_bool is char 8 bits
173 | hil.hil_write_digital(
174 | self.board,
175 | &self.digital_w_channels[0],
176 | self.num_digital_w_channels,
177 | &self.enables_w[0]
178 | )
179 |
180 | hil.hil_close(self.board) # Safely close the board
181 |
182 | def _create_task(self):
183 | """Start a task reads and writes at fixed intervals"""
184 | result = hil.hil_task_create_reader(
185 | self.board,
186 | 1, # Read 1 sample at a time
187 | &self.analog_r_channels[0], self.num_analog_r_channels,
188 | &self.encoder_r_channels[0], self.num_encoder_r_channels,
189 | NULL, 0,
190 | &self.other_r_channels[0], self.num_other_r_channels,
191 | &self.task
192 | )
193 | print_possible_error(result)
194 |
195 | # Allow discarding of old samples after missed reads
196 | hil.hil_task_set_buffer_overflow_mode(
197 | self.task, hil.BUFFER_MODE_OVERWRITE_ON_OVERFLOW
198 | )
199 |
200 | # Start the task
201 | result = hil.hil_task_start(
202 | self.task,
203 | hil.HARDWARE_CLOCK_0,
204 | self.frequency,
205 | -1 # Read continuously
206 | )
207 | print_possible_error(result)
208 | if result < 0:
209 | raise ValueError("Could not start hil task")
210 |
211 | self.task_started = True
212 |
213 | def _stop_task(self):
214 | if self.task_started:
215 | self.task_started = False
216 | hil.hil_task_flush(self.task)
217 | hil.hil_task_stop(self.task)
218 | hil.hil_task_delete(self.task)
219 |
220 | def reset_encoders(self, channels=None):
221 | """Reset all or a few of the encoders"""
222 | if channels is None:
223 | # Set the entire encoder encoder_r_buffer to 0
224 | self.encoder_r_buffer = np.zeros(
225 | self.num_encoder_r_channels, dtype=np.int32
226 | ) # t_int32 is 32 bits
227 | else:
228 | # Set only specific encoders to 0, while leaving the others
229 | for channel in channels:
230 | # Check if the channel is valid (in the available encoder
231 | # channels for the hardware)
232 | if channel not in self.encoder_r_channels:
233 | raise ValueError(
234 | "Channel: {} is not a possible channel on this hardware."
235 | )
236 | self.encoder_r_buffer[channel] = 0
237 |
238 | result = hil.hil_set_encoder_counts(
239 | self.board,
240 | &self.encoder_r_channels[0],
241 | self.num_encoder_r_channels,
242 | &self.encoder_r_buffer[0]
243 | )
244 | print_possible_error(result)
245 |
246 | def action(self, voltages_w, led_w=None):
247 | # If it"s the first time running action, then start the background r/w
248 | # task
249 | if not self.task_started:
250 | self._create_task()
251 |
252 | # Ensure safe voltage data
253 | if isinstance(voltages_w, list):
254 | voltages_w = np.array(voltages_w, dtype=np.float64)
255 | assert isinstance(voltages_w, np.ndarray)
256 | assert voltages_w.shape == (self.num_analog_w_channels,)
257 | assert voltages_w.dtype == np.float64
258 | # Clip voltages into the range of +- max_voltage
259 | voltages_w = np.clip(voltages_w, -self.max_voltage, self.max_voltage)
260 |
261 | if led_w is not None:
262 | # Ensure safe LED data
263 | if isinstance(led_w, list):
264 | led_w = np.array(led_w, dtype=np.float64)
265 | assert led_w.shape == (self.num_led_w_channels,)
266 | assert led_w.dtype == np.float64
267 | for i in range(self.num_led_w_channels):
268 | assert 0.0 <= led_w[i] <= 1.0 # HIL uses RGB scaled from 0-1
269 | self._set_led(led_w) # An immediate write to LED (not timed task)
270 |
271 | return self._action(voltages_w)
272 |
273 | def _action(
274 | self, np.ndarray[qt.t_double, ndim=1, mode="c"] voltages_w not None
275 | ):
276 | samples_read = hil.hil_task_read(
277 | self.task,
278 | 1, # Number of samples to read
279 | &self.currents_r[0],
280 | &self.encoder_r_buffer[0],
281 | NULL,
282 | &self.other_r_buffer[0]
283 | )
284 | if samples_read < 0:
285 | print_possible_error(samples_read)
286 |
287 | samples_overflowed = hil.hil_task_get_buffer_overflows(self.task)
288 | if samples_overflowed > self.samples_overflowed:
289 | print(
290 | "Missed {} samples".format(samples_overflowed - self.samples_overflowed)
291 | )
292 | self.samples_overflowed = samples_overflowed
293 |
294 | # Then write voltages_w calculated for previous time step
295 | self.voltages_w = voltages_w
296 | result_write = hil.hil_write_analog(
297 | self.board,
298 | &self.analog_w_channels[0],
299 | self.num_analog_w_channels,
300 | &self.voltages_w[0]
301 | )
302 | if result_write < 0:
303 | print_possible_error(result_write)
304 |
305 | return (
306 | np.asarray(self.currents_r),
307 | np.asarray(self.encoder_r_buffer),
308 | np.asarray(self.other_r_buffer),
309 | )
310 |
311 | def _set_led(
312 | self, np.ndarray[qt.t_double, ndim=1, mode="c"] led_w not None
313 | ):
314 | self.led_w_buffer = led_w
315 | result = hil.hil_write_other(
316 | self.board,
317 | &self.led_w_channels[0],
318 | self.num_led_w_channels,
319 | &self.led_w_buffer[0]
320 | )
321 | if result < 0:
322 | print_possible_error(result)
323 |
324 |
325 | cdef class QuanserAero(QuanserWrapper):
326 | def __cinit__(self):
327 | board_type = b"quanser_aero_usb"
328 | board_identifier = b"0"
329 | result = hil.hil_open(board_type, board_identifier, &self.board)
330 | print_possible_error(result)
331 | if result < 0:
332 | raise IOError("Board could not be opened.")
333 |
334 | def __init__(self, frequency=250, max_voltage=18.0):
335 | analog_r_channels = [0, 1]
336 | analog_w_channels = [0, 1]
337 | digital_w_channels = [0, 1]
338 | encoder_r_channels = [0, 1, 2, 3]
339 | other_r_channels = [
340 | 3000, 3001, 3002, 4000, 4001, 4002, 14000, 14001, 14002, 14003
341 | ]
342 | led_w_channels = [11000, 11001, 11002]
343 |
344 | super(QuanserAero, self).__init__(
345 | max_voltage=max_voltage,
346 | analog_r_channels=analog_r_channels,
347 | analog_w_channels=analog_w_channels,
348 | digital_w_channels=digital_w_channels,
349 | encoder_r_channels=encoder_r_channels,
350 | other_r_channels=other_r_channels,
351 | led_w_channels=led_w_channels,
352 | frequency=frequency
353 | )
354 |
355 |
356 | cdef class QubeServo2(QuanserWrapper):
357 | def __cinit__(self):
358 | board_type = b"qube_servo2_usb"
359 | board_identifier = b"0"
360 | result = hil.hil_open(board_type, board_identifier, &self.board)
361 | print_possible_error(result)
362 | if result < 0:
363 | raise IOError("Board could not be opened.")
364 |
365 | def __init__(self, frequency=250, max_voltage=18.0):
366 | analog_r_channels = [0]
367 | analog_w_channels = [0]
368 | digital_w_channels = [0]
369 | encoder_r_channels = [0, 1]
370 | other_r_channels = [14000]
371 | led_w_channels = [11000, 11001, 11002]
372 |
373 | super(QubeServo2, self).__init__(
374 | max_voltage=max_voltage,
375 | analog_r_channels=analog_r_channels,
376 | analog_w_channels=analog_w_channels,
377 | digital_w_channels=digital_w_channels,
378 | encoder_r_channels=encoder_r_channels,
379 | other_r_channels=other_r_channels,
380 | led_w_channels=led_w_channels,
381 | frequency=frequency
382 | )
383 |
--------------------------------------------------------------------------------